feat(P2): 实现质量保障类5项功能(无障碍/视觉回归/通知渠道/漏洞扫描/灾备)

## 新增功能

### 1. 屏幕阅读器兼容性增强(a11y)
- 无障碍工具库:src/shared/lib/a11y.ts
- aria-live Hook:src/shared/hooks/use-aria-live.ts
- a11y 组件:skip-link/visually-hidden/focus-trap/aria-status
- 增强 UI:table.tsx 系统性 ARIA role,dialog.tsx aria-modal
- 审计文档:docs/accessibility/a11y-audit.md(WCAG 2.1 AA 清单)

### 2. 视觉回归测试
- 测试套件:tests/visual/(homepage + 3 个 dashboard)
- 3 视口(desktop/tablet/mobile)× 2 主题(light/dark)
- 动态元素遮罩,避免误报
- playwright.config.ts 新增 visual-chromium 项目
- 文档:docs/testing/visual-regression.md

### 3. 短信/微信推送渠道集成
- 新模块:src/modules/notifications/
- 4 个渠道:SMS(阿里云/腾讯云)、WeChat(公众号)、Email(SMTP)、In-App
- 分发器按用户偏好并行多渠道发送
- 外部 SDK 动态 import,Mock 模式开发可用
- 文档:docs/notifications/channels.md

### 4. 漏洞扫描 CI 集成
- CI security-scan job:npm audit + Snyk + Trivy FS + OWASP ZAP
- 独立工作流 security.yml:每周一深度扫描 + 容器镜像扫描
- 配置:suppressions.json + .trivyignore
- 本地脚本:security-scan.sh/ps1
- 文档:docs/security/scanning.md(SLA 分级)

### 5. 灾备方案
- 脚本:backup-verify/backup-offsite-sync/dr-drill/failover/health-check
- CI 增强:备份后校验+异地同步,每周灾备演练
- 独立工作流 dr-drill.yml:每周一凌晨 4 点自动演练
- 文档:docs/dr/dr-plan.md(RTO 4h/RPO 24h)+ dr-runbook.md(6 故障场景)

## 验证
- npx tsc --noEmit:0 错误
- npm run lint:0 错误 0 警告
This commit is contained in:
SpecialX
2026-06-17 20:18:29 +08:00
parent b86255f0ea
commit 6585e10c6f
53 changed files with 7491 additions and 37 deletions

419
scripts/failover.sh Normal file
View File

@@ -0,0 +1,419 @@
#!/bin/bash
# 故障切换脚本
# 用法: ./failover.sh [--auto] [--primary URL] [--standby URL]
# 用于主数据库故障时切换到备库
set -u
show_help() {
cat <<EOF
用法: $0 [选项]
数据库故障切换脚本,将应用从主库切换到备库
选项:
--auto 半自动模式(检测失败后自动切换,需先确认)
--primary URL 主库连接 URL(默认从 DATABASE_URL 读取)
--standby URL 备库连接 URL(必需,从 DATABASE_URL_STANDBY 读取)
--app-url URL 应用健康检查 URL(默认 http://localhost:8015)
--no-restart 不重启应用(仅更新配置)
--dry-run 演练模式,只输出步骤不实际执行
--help, -h 显示帮助信息
环境变量:
DATABASE_URL 主库连接 URL
DATABASE_URL_STANDBY 备库连接 URL(必需)
FAILOVER_APP_URL 应用健康检查 URL(默认 http://localhost:8015)
FAILOVER_APP_NAME 应用容器名(默认 nextjs-app)
FAILOVER_CONFIG_FILE 配置文件路径(默认 .env.local)
FAILOVER_LOG_FILE 切换日志路径(默认 docs/dr/logs/failover.log)
退出码:
0 切换成功
1 切换失败
EOF
}
# 解析参数
AUTO_MODE=0
PRIMARY_URL=""
STANDBY_URL=""
APP_URL=""
NO_RESTART=0
DRY_RUN=0
while [ $# -gt 0 ]; do
case "$1" in
--help|-h)
show_help
exit 0
;;
--auto)
AUTO_MODE=1
shift
;;
--primary)
if [ $# -lt 2 ]; then
echo "ERROR: --primary requires an argument" >&2
exit 1
fi
PRIMARY_URL="$2"
shift 2
;;
--standby)
if [ $# -lt 2 ]; then
echo "ERROR: --standby requires an argument" >&2
exit 1
fi
STANDBY_URL="$2"
shift 2
;;
--app-url)
if [ $# -lt 2 ]; then
echo "ERROR: --app-url requires an argument" >&2
exit 1
fi
APP_URL="$2"
shift 2
;;
--no-restart)
NO_RESTART=1
shift
;;
--dry-run)
DRY_RUN=1
shift
;;
*)
echo "ERROR: Unknown argument: $1" >&2
exit 1
;;
esac
done
# 配置
PRIMARY_URL="${PRIMARY_URL:-${DATABASE_URL:-}}"
STANDBY_URL="${STANDBY_URL:-${DATABASE_URL_STANDBY:-}}"
APP_URL="${APP_URL:-${FAILOVER_APP_URL:-http://localhost:8015}}"
APP_NAME="${FAILOVER_APP_NAME:-nextjs-app}"
CONFIG_FILE="${FAILOVER_CONFIG_FILE:-.env.local}"
LOG_DIR="docs/dr/logs"
LOG_FILE="${FAILOVER_LOG_FILE:-$LOG_DIR/failover.log}"
# 检查必需参数
if [ -z "$STANDBY_URL" ]; then
echo "ERROR: Standby database URL not provided" >&2
echo "Set DATABASE_URL_STANDBY or use --standby" >&2
exit 1
fi
if [ -z "$PRIMARY_URL" ]; then
echo "ERROR: Primary database URL not provided" >&2
echo "Set DATABASE_URL or use --primary" >&2
exit 1
fi
# 创建日志目录
mkdir -p "$LOG_DIR"
# 日志函数
log() {
local timestamp
timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
echo "[$timestamp] $1" | tee -a "$LOG_FILE"
}
log_error() {
log "ERROR: $1" >&2
}
# 解析 DATABASE_URL
parse_db_url() {
local url="$1"
local user pass host port dbname
user=$(echo "$url" | sed -n 's/.*:\/\/\([^:]*\):.*/\1/p')
pass=$(echo "$url" | sed -n 's/.*:\/\/[^:]*:\([^@]*\)@.*/\1/p')
host=$(echo "$url" | sed -n 's/.*@\([^:]*\):.*/\1/p')
port=$(echo "$url" | sed -n 's/.*:\([0-9]*\)\/.*/\1/p')
dbname=$(echo "$url" | sed -n 's/.*\/\([^?]*\).*/\1/p')
echo "$user|$pass|$host|$port|$dbname"
}
# 检查数据库健康
check_db_health() {
local url="$1"
local parsed
parsed=$(parse_db_url "$url")
local user pass host port dbname
IFS='|' read -r user pass host port dbname <<EOF
$parsed
EOF
log "Checking database health: ${host}:${port}/${dbname}"
if [ "$DRY_RUN" -eq 1 ]; then
log " [DRY-RUN] Would check: mysql -h $host -P $port -u $user -e 'SELECT 1'"
return 0
fi
if mysql -h "$host" -P "$port" -u "$user" -p"$pass" -e "SELECT 1;" 2>/dev/null; then
log " Database is healthy"
return 0
else
log " Database is NOT reachable"
return 1
fi
}
# 检查应用健康
check_app_health() {
local url="$1"
log "Checking application health: $url"
if [ "$DRY_RUN" -eq 1 ]; then
log " [DRY-RUN] Would check: curl -f $url"
return 0
fi
if command -v curl >/dev/null 2>&1; then
if curl -sf -o /dev/null -m 10 "$url" 2>/dev/null; then
log " Application is healthy"
return 0
else
log " Application is NOT healthy"
return 1
fi
else
log " WARN: curl not available, skipping app health check"
return 0
fi
}
# 提升备库为主库(如果是主从架构)
promote_standby() {
log "Promoting standby to primary..."
local parsed
parsed=$(parse_db_url "$STANDBY_URL")
local user pass host port dbname
IFS='|' read -r user pass host port dbname <<EOF
$parsed
EOF
if [ "$DRY_RUN" -eq 1 ]; then
log " [DRY-RUN] Would promote standby: STOP SLAVE; RESET SLAVE ALL; SET GLOBAL read_only=OFF;"
return 0
fi
# 检查是否为从库
SLAVE_STATUS=$(mysql -h "$host" -P "$port" -u "$user" -p"$pass" \
-e "SHOW SLAVE STATUS\G" 2>/dev/null)
if [ -n "$SLAVE_STATUS" ]; then
log " Standby is a slave, promoting..."
# 停止复制
if mysql -h "$host" -P "$port" -u "$user" -p"$pass" \
-e "STOP SLAVE; RESET SLAVE ALL;" 2>/dev/null; then
log " Replication stopped and reset"
else
log_error "Failed to stop replication"
return 1
fi
# 关闭只读模式
if mysql -h "$host" -P "$port" -u "$user" -p"$pass" \
-e "SET GLOBAL read_only=OFF; SET GLOBAL super_read_only=OFF;" 2>/dev/null; then
log " Read-only mode disabled"
else
log_error "Failed to disable read-only mode"
return 1
fi
else
log " Standby is not a slave (standalone), skipping promotion"
fi
log " Standby promoted successfully"
return 0
}
# 更新应用配置
update_config() {
log "Updating application configuration..."
if [ "$DRY_RUN" -eq 1 ]; then
log " [DRY-RUN] Would update $CONFIG_FILE: DATABASE_URL=$STANDBY_URL"
return 0
fi
if [ -f "$CONFIG_FILE" ]; then
# 备份原配置
cp "$CONFIG_FILE" "${CONFIG_FILE}.bak.$(date +%s)"
log " Backed up original config to ${CONFIG_FILE}.bak.*"
# 更新 DATABASE_URL
if grep -q "^DATABASE_URL=" "$CONFIG_FILE"; then
sed -i.bak "s|^DATABASE_URL=.*|DATABASE_URL=$STANDBY_URL|" "$CONFIG_FILE"
rm -f "${CONFIG_FILE}.bak" 2>/dev/null || true
log " Updated DATABASE_URL in $CONFIG_FILE"
else
echo "DATABASE_URL=$STANDBY_URL" >> "$CONFIG_FILE"
log " Added DATABASE_URL to $CONFIG_FILE"
fi
else
log " WARN: Config file $CONFIG_FILE not found, creating new one"
echo "DATABASE_URL=$STANDBY_URL" > "$CONFIG_FILE"
fi
# 同时更新环境变量(供当前会话使用)
export DATABASE_URL="$STANDBY_URL"
log " Configuration updated"
return 0
}
# 重启应用
restart_app() {
if [ "$NO_RESTART" -eq 1 ]; then
log "Skipping application restart (--no-restart)"
return 0
fi
log "Restarting application..."
if [ "$DRY_RUN" -eq 1 ]; then
log " [DRY-RUN] Would restart: docker restart $APP_NAME"
return 0
fi
if command -v docker >/dev/null 2>&1; then
log " Restarting Docker container: $APP_NAME"
if docker restart "$APP_NAME" 2>/dev/null; then
log " Container restarted"
# 等待应用启动
log " Waiting for application to start..."
sleep 5
return 0
else
log_error "Failed to restart container $APP_NAME"
return 1
fi
else
log " WARN: Docker not available, please restart application manually"
log " Updated DATABASE_URL: $STANDBY_URL"
fi
return 0
}
# 主流程
log "========================================"
log "Database Failover Started"
log "========================================"
log "Mode: $([ "$AUTO_MODE" -eq 1 ] && echo "semi-auto" || echo "manual")"
log "Dry-run: $([ "$DRY_RUN" -eq 1 ] && echo "yes" || echo "no")"
log "Primary: $PRIMARY_URL"
log "Standby: $STANDBY_URL"
log ""
# 步骤 1: 检测主库健康状态
log "[1/5] Checking primary database health..."
PRIMARY_HEALTHY=0
if check_db_health "$PRIMARY_URL"; then
PRIMARY_HEALTHY=1
log " Primary is healthy"
if [ "$AUTO_MODE" -eq 0 ]; then
log " Primary is healthy. Failover not needed."
log " Use --auto to force failover even if primary is healthy"
log "========================================"
log "Failover Cancelled (Primary Healthy)"
log "========================================"
exit 0
fi
else
log " Primary is NOT healthy, proceeding with failover"
fi
# 半自动模式确认
if [ "$AUTO_MODE" -eq 1 ] && [ "$DRY_RUN" -eq 0 ]; then
echo ""
echo "WARNING: About to failover from primary to standby."
echo " Primary: $PRIMARY_URL"
echo " Standby: $STANDBY_URL"
echo ""
read -p "Type 'FAILover' to confirm: " CONFIRM
if [ "$CONFIRM" != "FAILover" ]; then
log "Failover cancelled by user"
exit 1
fi
fi
# 步骤 2: 检查备库健康
log ""
log "[2/5] Checking standby database health..."
if ! check_db_health "$STANDBY_URL"; then
log_error "Standby is also not healthy, cannot failover"
log "========================================"
log "Failover FAILED (Standby Unhealthy)"
log "========================================"
exit 1
fi
# 步骤 3: 提升备库为主库
log ""
log "[3/5] Promoting standby to primary..."
if ! promote_standby; then
log_error "Failed to promote standby"
exit 1
fi
# 步骤 4: 更新应用配置并重启
log ""
log "[4/5] Updating application configuration and restarting..."
update_config
if ! restart_app; then
log_error "Failed to restart application"
log " Manual intervention required"
exit 1
fi
# 步骤 5: 验证切换成功
log ""
log "[5/5] Verifying failover..."
sleep 3
# 检查应用健康
APP_HEALTHY=0
for i in 1 2 3 4 5; do
if check_app_health "$APP_URL"; then
APP_HEALTHY=1
break
fi
log " Retry $i/5 in 5 seconds..."
sleep 5
done
if [ "$APP_HEALTHY" -eq 0 ]; then
log_error "Application is not healthy after failover"
log " Check application logs and configuration"
log "========================================"
log "Failover FAILED (App Unhealthy)"
log "========================================"
exit 1
fi
# 检查数据库连接(通过应用)
log " Verifying database connection via application..."
if [ "$DRY_RUN" -eq 0 ]; then
if curl -sf -m 10 "$APP_URL" >/dev/null 2>&1; then
log " Application responding successfully"
else
log_error "Application not responding"
exit 1
fi
fi
log ""
log "========================================"
log "Failover Completed Successfully"
log "========================================"
log "Primary (old): $PRIMARY_URL"
log "Standby (new): $STANDBY_URL"
log "Application: $APP_URL"
log "Log file: $LOG_FILE"
log ""
log "Post-failover checklist:"
log " 1. Verify application functionality"
log " 2. Update monitoring alerts"
log " 3. Notify stakeholders"
log " 4. Plan primary database recovery"
log " 5. Schedule post-mortem review"
log ""
exit 0