## 新增功能 ### 1. 屏幕阅读器兼容性增强(a11y) - 无障碍工具库:src/shared/lib/a11y.ts - aria-live Hook:src/shared/hooks/use-aria-live.ts - a11y 组件:skip-link/visually-hidden/focus-trap/aria-status - 增强 UI:table.tsx 系统性 ARIA role,dialog.tsx aria-modal - 审计文档:docs/accessibility/a11y-audit.md(WCAG 2.1 AA 清单) ### 2. 视觉回归测试 - 测试套件:tests/visual/(homepage + 3 个 dashboard) - 3 视口(desktop/tablet/mobile)× 2 主题(light/dark) - 动态元素遮罩,避免误报 - playwright.config.ts 新增 visual-chromium 项目 - 文档:docs/testing/visual-regression.md ### 3. 短信/微信推送渠道集成 - 新模块:src/modules/notifications/ - 4 个渠道:SMS(阿里云/腾讯云)、WeChat(公众号)、Email(SMTP)、In-App - 分发器按用户偏好并行多渠道发送 - 外部 SDK 动态 import,Mock 模式开发可用 - 文档:docs/notifications/channels.md ### 4. 漏洞扫描 CI 集成 - CI security-scan job:npm audit + Snyk + Trivy FS + OWASP ZAP - 独立工作流 security.yml:每周一深度扫描 + 容器镜像扫描 - 配置:suppressions.json + .trivyignore - 本地脚本:security-scan.sh/ps1 - 文档:docs/security/scanning.md(SLA 分级) ### 5. 灾备方案 - 脚本:backup-verify/backup-offsite-sync/dr-drill/failover/health-check - CI 增强:备份后校验+异地同步,每周灾备演练 - 独立工作流 dr-drill.yml:每周一凌晨 4 点自动演练 - 文档:docs/dr/dr-plan.md(RTO 4h/RPO 24h)+ dr-runbook.md(6 故障场景) ## 验证 - npx tsc --noEmit:0 错误 - npm run lint:0 错误 0 警告
420 lines
11 KiB
Bash
420 lines
11 KiB
Bash
#!/bin/bash
|
|
# 故障切换脚本
|
|
# 用法: ./failover.sh [--auto] [--primary URL] [--standby URL]
|
|
# 用于主数据库故障时切换到备库
|
|
|
|
set -u
|
|
|
|
show_help() {
|
|
cat <<EOF
|
|
用法: $0 [选项]
|
|
数据库故障切换脚本,将应用从主库切换到备库
|
|
|
|
选项:
|
|
--auto 半自动模式(检测失败后自动切换,需先确认)
|
|
--primary URL 主库连接 URL(默认从 DATABASE_URL 读取)
|
|
--standby URL 备库连接 URL(必需,从 DATABASE_URL_STANDBY 读取)
|
|
--app-url URL 应用健康检查 URL(默认 http://localhost:8015)
|
|
--no-restart 不重启应用(仅更新配置)
|
|
--dry-run 演练模式,只输出步骤不实际执行
|
|
--help, -h 显示帮助信息
|
|
|
|
环境变量:
|
|
DATABASE_URL 主库连接 URL
|
|
DATABASE_URL_STANDBY 备库连接 URL(必需)
|
|
FAILOVER_APP_URL 应用健康检查 URL(默认 http://localhost:8015)
|
|
FAILOVER_APP_NAME 应用容器名(默认 nextjs-app)
|
|
FAILOVER_CONFIG_FILE 配置文件路径(默认 .env.local)
|
|
FAILOVER_LOG_FILE 切换日志路径(默认 docs/dr/logs/failover.log)
|
|
|
|
退出码:
|
|
0 切换成功
|
|
1 切换失败
|
|
EOF
|
|
}
|
|
|
|
# 解析参数
|
|
AUTO_MODE=0
|
|
PRIMARY_URL=""
|
|
STANDBY_URL=""
|
|
APP_URL=""
|
|
NO_RESTART=0
|
|
DRY_RUN=0
|
|
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--help|-h)
|
|
show_help
|
|
exit 0
|
|
;;
|
|
--auto)
|
|
AUTO_MODE=1
|
|
shift
|
|
;;
|
|
--primary)
|
|
if [ $# -lt 2 ]; then
|
|
echo "ERROR: --primary requires an argument" >&2
|
|
exit 1
|
|
fi
|
|
PRIMARY_URL="$2"
|
|
shift 2
|
|
;;
|
|
--standby)
|
|
if [ $# -lt 2 ]; then
|
|
echo "ERROR: --standby requires an argument" >&2
|
|
exit 1
|
|
fi
|
|
STANDBY_URL="$2"
|
|
shift 2
|
|
;;
|
|
--app-url)
|
|
if [ $# -lt 2 ]; then
|
|
echo "ERROR: --app-url requires an argument" >&2
|
|
exit 1
|
|
fi
|
|
APP_URL="$2"
|
|
shift 2
|
|
;;
|
|
--no-restart)
|
|
NO_RESTART=1
|
|
shift
|
|
;;
|
|
--dry-run)
|
|
DRY_RUN=1
|
|
shift
|
|
;;
|
|
*)
|
|
echo "ERROR: Unknown argument: $1" >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# 配置
|
|
PRIMARY_URL="${PRIMARY_URL:-${DATABASE_URL:-}}"
|
|
STANDBY_URL="${STANDBY_URL:-${DATABASE_URL_STANDBY:-}}"
|
|
APP_URL="${APP_URL:-${FAILOVER_APP_URL:-http://localhost:8015}}"
|
|
APP_NAME="${FAILOVER_APP_NAME:-nextjs-app}"
|
|
CONFIG_FILE="${FAILOVER_CONFIG_FILE:-.env.local}"
|
|
LOG_DIR="docs/dr/logs"
|
|
LOG_FILE="${FAILOVER_LOG_FILE:-$LOG_DIR/failover.log}"
|
|
|
|
# 检查必需参数
|
|
if [ -z "$STANDBY_URL" ]; then
|
|
echo "ERROR: Standby database URL not provided" >&2
|
|
echo "Set DATABASE_URL_STANDBY or use --standby" >&2
|
|
exit 1
|
|
fi
|
|
|
|
if [ -z "$PRIMARY_URL" ]; then
|
|
echo "ERROR: Primary database URL not provided" >&2
|
|
echo "Set DATABASE_URL or use --primary" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# 创建日志目录
|
|
mkdir -p "$LOG_DIR"
|
|
|
|
# 日志函数
|
|
log() {
|
|
local timestamp
|
|
timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
|
echo "[$timestamp] $1" | tee -a "$LOG_FILE"
|
|
}
|
|
|
|
log_error() {
|
|
log "ERROR: $1" >&2
|
|
}
|
|
|
|
# 解析 DATABASE_URL
|
|
parse_db_url() {
|
|
local url="$1"
|
|
local user pass host port dbname
|
|
user=$(echo "$url" | sed -n 's/.*:\/\/\([^:]*\):.*/\1/p')
|
|
pass=$(echo "$url" | sed -n 's/.*:\/\/[^:]*:\([^@]*\)@.*/\1/p')
|
|
host=$(echo "$url" | sed -n 's/.*@\([^:]*\):.*/\1/p')
|
|
port=$(echo "$url" | sed -n 's/.*:\([0-9]*\)\/.*/\1/p')
|
|
dbname=$(echo "$url" | sed -n 's/.*\/\([^?]*\).*/\1/p')
|
|
echo "$user|$pass|$host|$port|$dbname"
|
|
}
|
|
|
|
# 检查数据库健康
|
|
check_db_health() {
|
|
local url="$1"
|
|
local parsed
|
|
parsed=$(parse_db_url "$url")
|
|
local user pass host port dbname
|
|
IFS='|' read -r user pass host port dbname <<EOF
|
|
$parsed
|
|
EOF
|
|
log "Checking database health: ${host}:${port}/${dbname}"
|
|
if [ "$DRY_RUN" -eq 1 ]; then
|
|
log " [DRY-RUN] Would check: mysql -h $host -P $port -u $user -e 'SELECT 1'"
|
|
return 0
|
|
fi
|
|
if mysql -h "$host" -P "$port" -u "$user" -p"$pass" -e "SELECT 1;" 2>/dev/null; then
|
|
log " Database is healthy"
|
|
return 0
|
|
else
|
|
log " Database is NOT reachable"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# 检查应用健康
|
|
check_app_health() {
|
|
local url="$1"
|
|
log "Checking application health: $url"
|
|
if [ "$DRY_RUN" -eq 1 ]; then
|
|
log " [DRY-RUN] Would check: curl -f $url"
|
|
return 0
|
|
fi
|
|
if command -v curl >/dev/null 2>&1; then
|
|
if curl -sf -o /dev/null -m 10 "$url" 2>/dev/null; then
|
|
log " Application is healthy"
|
|
return 0
|
|
else
|
|
log " Application is NOT healthy"
|
|
return 1
|
|
fi
|
|
else
|
|
log " WARN: curl not available, skipping app health check"
|
|
return 0
|
|
fi
|
|
}
|
|
|
|
# 提升备库为主库(如果是主从架构)
|
|
promote_standby() {
|
|
log "Promoting standby to primary..."
|
|
local parsed
|
|
parsed=$(parse_db_url "$STANDBY_URL")
|
|
local user pass host port dbname
|
|
IFS='|' read -r user pass host port dbname <<EOF
|
|
$parsed
|
|
EOF
|
|
|
|
if [ "$DRY_RUN" -eq 1 ]; then
|
|
log " [DRY-RUN] Would promote standby: STOP SLAVE; RESET SLAVE ALL; SET GLOBAL read_only=OFF;"
|
|
return 0
|
|
fi
|
|
|
|
# 检查是否为从库
|
|
SLAVE_STATUS=$(mysql -h "$host" -P "$port" -u "$user" -p"$pass" \
|
|
-e "SHOW SLAVE STATUS\G" 2>/dev/null)
|
|
|
|
if [ -n "$SLAVE_STATUS" ]; then
|
|
log " Standby is a slave, promoting..."
|
|
# 停止复制
|
|
if mysql -h "$host" -P "$port" -u "$user" -p"$pass" \
|
|
-e "STOP SLAVE; RESET SLAVE ALL;" 2>/dev/null; then
|
|
log " Replication stopped and reset"
|
|
else
|
|
log_error "Failed to stop replication"
|
|
return 1
|
|
fi
|
|
# 关闭只读模式
|
|
if mysql -h "$host" -P "$port" -u "$user" -p"$pass" \
|
|
-e "SET GLOBAL read_only=OFF; SET GLOBAL super_read_only=OFF;" 2>/dev/null; then
|
|
log " Read-only mode disabled"
|
|
else
|
|
log_error "Failed to disable read-only mode"
|
|
return 1
|
|
fi
|
|
else
|
|
log " Standby is not a slave (standalone), skipping promotion"
|
|
fi
|
|
|
|
log " Standby promoted successfully"
|
|
return 0
|
|
}
|
|
|
|
# 更新应用配置
|
|
update_config() {
|
|
log "Updating application configuration..."
|
|
if [ "$DRY_RUN" -eq 1 ]; then
|
|
log " [DRY-RUN] Would update $CONFIG_FILE: DATABASE_URL=$STANDBY_URL"
|
|
return 0
|
|
fi
|
|
|
|
if [ -f "$CONFIG_FILE" ]; then
|
|
# 备份原配置
|
|
cp "$CONFIG_FILE" "${CONFIG_FILE}.bak.$(date +%s)"
|
|
log " Backed up original config to ${CONFIG_FILE}.bak.*"
|
|
|
|
# 更新 DATABASE_URL
|
|
if grep -q "^DATABASE_URL=" "$CONFIG_FILE"; then
|
|
sed -i.bak "s|^DATABASE_URL=.*|DATABASE_URL=$STANDBY_URL|" "$CONFIG_FILE"
|
|
rm -f "${CONFIG_FILE}.bak" 2>/dev/null || true
|
|
log " Updated DATABASE_URL in $CONFIG_FILE"
|
|
else
|
|
echo "DATABASE_URL=$STANDBY_URL" >> "$CONFIG_FILE"
|
|
log " Added DATABASE_URL to $CONFIG_FILE"
|
|
fi
|
|
else
|
|
log " WARN: Config file $CONFIG_FILE not found, creating new one"
|
|
echo "DATABASE_URL=$STANDBY_URL" > "$CONFIG_FILE"
|
|
fi
|
|
|
|
# 同时更新环境变量(供当前会话使用)
|
|
export DATABASE_URL="$STANDBY_URL"
|
|
log " Configuration updated"
|
|
return 0
|
|
}
|
|
|
|
# 重启应用
|
|
restart_app() {
|
|
if [ "$NO_RESTART" -eq 1 ]; then
|
|
log "Skipping application restart (--no-restart)"
|
|
return 0
|
|
fi
|
|
|
|
log "Restarting application..."
|
|
if [ "$DRY_RUN" -eq 1 ]; then
|
|
log " [DRY-RUN] Would restart: docker restart $APP_NAME"
|
|
return 0
|
|
fi
|
|
|
|
if command -v docker >/dev/null 2>&1; then
|
|
log " Restarting Docker container: $APP_NAME"
|
|
if docker restart "$APP_NAME" 2>/dev/null; then
|
|
log " Container restarted"
|
|
# 等待应用启动
|
|
log " Waiting for application to start..."
|
|
sleep 5
|
|
return 0
|
|
else
|
|
log_error "Failed to restart container $APP_NAME"
|
|
return 1
|
|
fi
|
|
else
|
|
log " WARN: Docker not available, please restart application manually"
|
|
log " Updated DATABASE_URL: $STANDBY_URL"
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
# 主流程
|
|
log "========================================"
|
|
log "Database Failover Started"
|
|
log "========================================"
|
|
log "Mode: $([ "$AUTO_MODE" -eq 1 ] && echo "semi-auto" || echo "manual")"
|
|
log "Dry-run: $([ "$DRY_RUN" -eq 1 ] && echo "yes" || echo "no")"
|
|
log "Primary: $PRIMARY_URL"
|
|
log "Standby: $STANDBY_URL"
|
|
log ""
|
|
|
|
# 步骤 1: 检测主库健康状态
|
|
log "[1/5] Checking primary database health..."
|
|
PRIMARY_HEALTHY=0
|
|
if check_db_health "$PRIMARY_URL"; then
|
|
PRIMARY_HEALTHY=1
|
|
log " Primary is healthy"
|
|
if [ "$AUTO_MODE" -eq 0 ]; then
|
|
log " Primary is healthy. Failover not needed."
|
|
log " Use --auto to force failover even if primary is healthy"
|
|
log "========================================"
|
|
log "Failover Cancelled (Primary Healthy)"
|
|
log "========================================"
|
|
exit 0
|
|
fi
|
|
else
|
|
log " Primary is NOT healthy, proceeding with failover"
|
|
fi
|
|
|
|
# 半自动模式确认
|
|
if [ "$AUTO_MODE" -eq 1 ] && [ "$DRY_RUN" -eq 0 ]; then
|
|
echo ""
|
|
echo "WARNING: About to failover from primary to standby."
|
|
echo " Primary: $PRIMARY_URL"
|
|
echo " Standby: $STANDBY_URL"
|
|
echo ""
|
|
read -p "Type 'FAILover' to confirm: " CONFIRM
|
|
if [ "$CONFIRM" != "FAILover" ]; then
|
|
log "Failover cancelled by user"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
# 步骤 2: 检查备库健康
|
|
log ""
|
|
log "[2/5] Checking standby database health..."
|
|
if ! check_db_health "$STANDBY_URL"; then
|
|
log_error "Standby is also not healthy, cannot failover"
|
|
log "========================================"
|
|
log "Failover FAILED (Standby Unhealthy)"
|
|
log "========================================"
|
|
exit 1
|
|
fi
|
|
|
|
# 步骤 3: 提升备库为主库
|
|
log ""
|
|
log "[3/5] Promoting standby to primary..."
|
|
if ! promote_standby; then
|
|
log_error "Failed to promote standby"
|
|
exit 1
|
|
fi
|
|
|
|
# 步骤 4: 更新应用配置并重启
|
|
log ""
|
|
log "[4/5] Updating application configuration and restarting..."
|
|
update_config
|
|
if ! restart_app; then
|
|
log_error "Failed to restart application"
|
|
log " Manual intervention required"
|
|
exit 1
|
|
fi
|
|
|
|
# 步骤 5: 验证切换成功
|
|
log ""
|
|
log "[5/5] Verifying failover..."
|
|
sleep 3
|
|
|
|
# 检查应用健康
|
|
APP_HEALTHY=0
|
|
for i in 1 2 3 4 5; do
|
|
if check_app_health "$APP_URL"; then
|
|
APP_HEALTHY=1
|
|
break
|
|
fi
|
|
log " Retry $i/5 in 5 seconds..."
|
|
sleep 5
|
|
done
|
|
|
|
if [ "$APP_HEALTHY" -eq 0 ]; then
|
|
log_error "Application is not healthy after failover"
|
|
log " Check application logs and configuration"
|
|
log "========================================"
|
|
log "Failover FAILED (App Unhealthy)"
|
|
log "========================================"
|
|
exit 1
|
|
fi
|
|
|
|
# 检查数据库连接(通过应用)
|
|
log " Verifying database connection via application..."
|
|
if [ "$DRY_RUN" -eq 0 ]; then
|
|
if curl -sf -m 10 "$APP_URL" >/dev/null 2>&1; then
|
|
log " Application responding successfully"
|
|
else
|
|
log_error "Application not responding"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
log ""
|
|
log "========================================"
|
|
log "Failover Completed Successfully"
|
|
log "========================================"
|
|
log "Primary (old): $PRIMARY_URL"
|
|
log "Standby (new): $STANDBY_URL"
|
|
log "Application: $APP_URL"
|
|
log "Log file: $LOG_FILE"
|
|
log ""
|
|
log "Post-failover checklist:"
|
|
log " 1. Verify application functionality"
|
|
log " 2. Update monitoring alerts"
|
|
log " 3. Notify stakeholders"
|
|
log " 4. Plan primary database recovery"
|
|
log " 5. Schedule post-mortem review"
|
|
log ""
|
|
exit 0
|