feat(P2): 实现质量保障类5项功能(无障碍/视觉回归/通知渠道/漏洞扫描/灾备)
## 新增功能 ### 1. 屏幕阅读器兼容性增强(a11y) - 无障碍工具库:src/shared/lib/a11y.ts - aria-live Hook:src/shared/hooks/use-aria-live.ts - a11y 组件:skip-link/visually-hidden/focus-trap/aria-status - 增强 UI:table.tsx 系统性 ARIA role,dialog.tsx aria-modal - 审计文档:docs/accessibility/a11y-audit.md(WCAG 2.1 AA 清单) ### 2. 视觉回归测试 - 测试套件:tests/visual/(homepage + 3 个 dashboard) - 3 视口(desktop/tablet/mobile)× 2 主题(light/dark) - 动态元素遮罩,避免误报 - playwright.config.ts 新增 visual-chromium 项目 - 文档:docs/testing/visual-regression.md ### 3. 短信/微信推送渠道集成 - 新模块:src/modules/notifications/ - 4 个渠道:SMS(阿里云/腾讯云)、WeChat(公众号)、Email(SMTP)、In-App - 分发器按用户偏好并行多渠道发送 - 外部 SDK 动态 import,Mock 模式开发可用 - 文档:docs/notifications/channels.md ### 4. 漏洞扫描 CI 集成 - CI security-scan job:npm audit + Snyk + Trivy FS + OWASP ZAP - 独立工作流 security.yml:每周一深度扫描 + 容器镜像扫描 - 配置:suppressions.json + .trivyignore - 本地脚本:security-scan.sh/ps1 - 文档:docs/security/scanning.md(SLA 分级) ### 5. 灾备方案 - 脚本:backup-verify/backup-offsite-sync/dr-drill/failover/health-check - CI 增强:备份后校验+异地同步,每周灾备演练 - 独立工作流 dr-drill.yml:每周一凌晨 4 点自动演练 - 文档:docs/dr/dr-plan.md(RTO 4h/RPO 24h)+ dr-runbook.md(6 故障场景) ## 验证 - npx tsc --noEmit:0 错误 - npm run lint:0 错误 0 警告
This commit is contained in:
342
scripts/backup-offsite-sync.sh
Normal file
342
scripts/backup-offsite-sync.sh
Normal file
@@ -0,0 +1,342 @@
|
||||
#!/bin/bash
|
||||
# 异地备份同步脚本
|
||||
# 用法: ./backup-offsite-sync.sh
|
||||
# 将本地备份同步到远程存储(S3/OSS/NFS),支持校验和清理
|
||||
|
||||
set -u
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
用法: $0 [选项]
|
||||
异地备份同步脚本,将本地备份同步到远程存储
|
||||
|
||||
选项:
|
||||
--backend TYPE 远程存储后端类型: s3|oss|nfs|none
|
||||
--no-cleanup 不清理远程过期备份
|
||||
--no-verify 不校验同步结果
|
||||
--help, -h 显示帮助信息
|
||||
|
||||
环境变量:
|
||||
BACKUP_DIR 本地备份目录(默认 ./backups)
|
||||
BACKUP_OFFSITE_BACKEND 远程后端类型: s3|oss|nfs|none (默认 none)
|
||||
BACKUP_OFFSITE_REMOTE 远程目标路径
|
||||
- s3: s3://bucket-name/path
|
||||
- oss: oss://bucket-name/path
|
||||
- nfs: /mnt/nfs/backup-path
|
||||
BACKUP_OFFSITE_BUCKET 存储桶名称(仅 s3/oss)
|
||||
BACKUP_OFFSITE_ACCESS_KEY 访问密钥
|
||||
BACKUP_OFFSITE_SECRET_KEY 秘密密钥
|
||||
BACKUP_OFFSITE_REGION 区域(默认 us-east-1)
|
||||
BACKUP_OFFSITE_RETENTION_DAYS 远程保留天数(默认 90)
|
||||
|
||||
需要工具:
|
||||
s3: aws-cli (aws) 或 rclone
|
||||
oss: ossutil 或 rclone
|
||||
nfs: rsync (NFS 应已挂载到 BACKUP_OFFSITE_REMOTE)
|
||||
|
||||
退出码:
|
||||
0 同步成功
|
||||
1 同步失败
|
||||
EOF
|
||||
}
|
||||
|
||||
# 解析参数
|
||||
NO_CLEANUP=0
|
||||
NO_VERIFY=0
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--help|-h)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
--backend)
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "ERROR: --backend requires an argument" >&2
|
||||
exit 1
|
||||
fi
|
||||
BACKUP_OFFSITE_BACKEND="$2"
|
||||
shift 2
|
||||
;;
|
||||
--no-cleanup)
|
||||
NO_CLEANUP=1
|
||||
shift
|
||||
;;
|
||||
--no-verify)
|
||||
NO_VERIFY=1
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown argument: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
BACKUP_DIR="${BACKUP_DIR:-./backups}"
|
||||
BACKEND="${BACKUP_OFFSITE_BACKEND:-none}"
|
||||
REMOTE="${BACKUP_OFFSITE_REMOTE:-}"
|
||||
BUCKET="${BACKUP_OFFSITE_BUCKET:-}"
|
||||
ACCESS_KEY="${BACKUP_OFFSITE_ACCESS_KEY:-}"
|
||||
SECRET_KEY="${BACKUP_OFFSITE_SECRET_KEY:-}"
|
||||
REGION="${BACKUP_OFFSITE_REGION:-us-east-1}"
|
||||
RETENTION_DAYS="${BACKUP_OFFSITE_RETENTION_DAYS:-90}"
|
||||
|
||||
echo "=== Offsite Backup Sync ==="
|
||||
echo "Time: $(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
||||
echo "Backend: $BACKEND"
|
||||
echo "Local: $BACKUP_DIR"
|
||||
echo "Remote: $REMOTE"
|
||||
echo ""
|
||||
|
||||
# 检查后端类型
|
||||
if [ "$BACKEND" = "none" ]; then
|
||||
echo "INFO: BACKUP_OFFSITE_BACKEND=none, offsite sync disabled"
|
||||
echo "To enable, set BACKUP_OFFSITE_BACKEND to s3, oss, or nfs"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ "$BACKEND" != "s3" ] && [ "$BACKEND" != "oss" ] && [ "$BACKEND" != "nfs" ]; then
|
||||
echo "ERROR: Invalid backend: $BACKEND (must be s3, oss, nfs, or none)" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 检查本地备份目录
|
||||
if [ ! -d "$BACKUP_DIR" ]; then
|
||||
echo "ERROR: Local backup directory does not exist: $BACKUP_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 统计本地备份文件
|
||||
LOCAL_FILES=$(ls -1 "$BACKUP_DIR"/db_backup_*.sql.gz 2>/dev/null | wc -l)
|
||||
if [ "$LOCAL_FILES" -eq 0 ]; then
|
||||
echo "ERROR: No backup files found in $BACKUP_DIR" >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "INFO: Found $LOCAL_FILES local backup files"
|
||||
|
||||
# 检查远程配置
|
||||
if [ -z "$REMOTE" ]; then
|
||||
echo "ERROR: BACKUP_OFFSITE_REMOTE not set" >&2
|
||||
echo "Example for $BACKEND:" >&2
|
||||
case "$BACKEND" in
|
||||
s3) echo " s3://my-bucket/backups/" >&2 ;;
|
||||
oss) echo " oss://my-bucket/backups/" >&2 ;;
|
||||
nfs) echo " /mnt/nfs/backups/" >&2 ;;
|
||||
esac
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 检查工具可用性
|
||||
check_tool() {
|
||||
if ! command -v "$1" >/dev/null 2>&1; then
|
||||
echo "ERROR: Required tool not found: $1" >&2
|
||||
echo "Please install $1 to use the $BACKEND backend" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 配置凭证
|
||||
setup_credentials() {
|
||||
case "$BACKEND" in
|
||||
s3)
|
||||
if [ -n "$ACCESS_KEY" ] && [ -n "$SECRET_KEY" ]; then
|
||||
export AWS_ACCESS_KEY_ID="$ACCESS_KEY"
|
||||
export AWS_SECRET_ACCESS_KEY="$SECRET_KEY"
|
||||
export AWS_DEFAULT_REGION="$REGION"
|
||||
fi
|
||||
;;
|
||||
oss)
|
||||
if [ -n "$ACCESS_KEY" ] && [ -n "$SECRET_KEY" ]; then
|
||||
# ossutil 配置
|
||||
if [ -f ~/.ossutilconfig ]; then
|
||||
cp ~/.ossutilconfig ~/.ossutilconfig.bak 2>/dev/null || true
|
||||
fi
|
||||
cat > ~/.ossutilconfig <<EOF
|
||||
[Credentials]
|
||||
provider = oss
|
||||
accessKey = $ACCESS_KEY
|
||||
secretKey = $SECRET_KEY
|
||||
[Default]
|
||||
endpoint = oss-${REGION}.aliyuncs.com
|
||||
EOF
|
||||
fi
|
||||
;;
|
||||
nfs)
|
||||
# NFS 应已挂载,无需凭证
|
||||
if [ ! -d "$REMOTE" ]; then
|
||||
echo "ERROR: NFS remote directory does not exist: $REMOTE" >&2
|
||||
echo "Please ensure NFS is mounted at this path" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
# 同步到远程
|
||||
sync_to_remote() {
|
||||
echo ""
|
||||
echo "[1/3] Syncing backups to $BACKEND..."
|
||||
case "$BACKEND" in
|
||||
s3)
|
||||
if command -v aws >/dev/null 2>&1; then
|
||||
echo "Using aws-cli"
|
||||
if ! aws s3 sync "$BACKUP_DIR/" "$REMOTE" \
|
||||
--exclude "*" --include "db_backup_*.sql.gz" \
|
||||
--no-progress; then
|
||||
echo "ERROR: aws s3 sync failed" >&2
|
||||
return 1
|
||||
fi
|
||||
elif command -v rclone >/dev/null 2>&1; then
|
||||
echo "Using rclone"
|
||||
if ! rclone sync "$BACKUP_DIR" "$REMOTE" \
|
||||
--include "db_backup_*.sql.gz" \
|
||||
--progress; then
|
||||
echo "ERROR: rclone sync failed" >&2
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
echo "ERROR: Neither aws-cli nor rclone found" >&2
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
oss)
|
||||
if command -v ossutil >/dev/null 2>&1; then
|
||||
echo "Using ossutil"
|
||||
if ! ossutil cp -r "$BACKUP_DIR/" "$REMOTE" \
|
||||
--include "db_backup_*.sql.gz" -f; then
|
||||
echo "ERROR: ossutil sync failed" >&2
|
||||
return 1
|
||||
fi
|
||||
elif command -v rclone >/dev/null 2>&1; then
|
||||
echo "Using rclone"
|
||||
if ! rclone sync "$BACKUP_DIR" "$REMOTE" \
|
||||
--include "db_backup_*.sql.gz" \
|
||||
--progress; then
|
||||
echo "ERROR: rclone sync failed" >&2
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
echo "ERROR: Neither ossutil nor rclone found" >&2
|
||||
return 1
|
||||
fi
|
||||
;;
|
||||
nfs)
|
||||
if command -v rsync >/dev/null 2>&1; then
|
||||
echo "Using rsync"
|
||||
mkdir -p "$REMOTE" 2>/dev/null || true
|
||||
if ! rsync -av --include="db_backup_*.sql.gz" --exclude="*" \
|
||||
"$BACKUP_DIR/" "$REMOTE/"; then
|
||||
echo "ERROR: rsync failed" >&2
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
echo "Using cp (rsync not available)"
|
||||
mkdir -p "$REMOTE" 2>/dev/null || true
|
||||
if ! cp "$BACKUP_DIR"/db_backup_*.sql.gz "$REMOTE/" 2>/dev/null; then
|
||||
echo "ERROR: cp failed" >&2
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
echo " PASS: Sync completed"
|
||||
return 0
|
||||
}
|
||||
|
||||
# 校验同步结果
|
||||
verify_sync() {
|
||||
if [ "$NO_VERIFY" -eq 1 ]; then
|
||||
echo ""
|
||||
echo "[2/3] Verification skipped (--no-verify)"
|
||||
return 0
|
||||
fi
|
||||
echo ""
|
||||
echo "[2/3] Verifying sync result..."
|
||||
REMOTE_FILES=0
|
||||
case "$BACKEND" in
|
||||
s3)
|
||||
if command -v aws >/dev/null 2>&1; then
|
||||
REMOTE_FILES=$(aws s3 ls "$REMOTE" --recursive 2>/dev/null | grep -c "db_backup_.*\.sql\.gz" || echo 0)
|
||||
elif command -v rclone >/dev/null 2>&1; then
|
||||
REMOTE_FILES=$(rclone lsf "$REMOTE" --include "db_backup_*.sql.gz" 2>/dev/null | wc -l || echo 0)
|
||||
fi
|
||||
;;
|
||||
oss)
|
||||
if command -v ossutil >/dev/null 2>&1; then
|
||||
REMOTE_FILES=$(ossutil ls "$REMOTE" 2>/dev/null | grep -c "db_backup_.*\.sql\.gz" || echo 0)
|
||||
elif command -v rclone >/dev/null 2>&1; then
|
||||
REMOTE_FILES=$(rclone lsf "$REMOTE" --include "db_backup_*.sql.gz" 2>/dev/null | wc -l || echo 0)
|
||||
fi
|
||||
;;
|
||||
nfs)
|
||||
REMOTE_FILES=$(ls -1 "$REMOTE"/db_backup_*.sql.gz 2>/dev/null | wc -l)
|
||||
;;
|
||||
esac
|
||||
|
||||
echo " Local files: $LOCAL_FILES"
|
||||
echo " Remote files: $REMOTE_FILES"
|
||||
|
||||
if [ "$REMOTE_FILES" -lt "$LOCAL_FILES" ]; then
|
||||
echo " WARN: Remote has fewer files than local (some may have been cleaned up)"
|
||||
else
|
||||
echo " PASS: File count verified"
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
# 清理远程过期备份
|
||||
cleanup_remote() {
|
||||
if [ "$NO_CLEANUP" -eq 1 ]; then
|
||||
echo ""
|
||||
echo "[3/3] Cleanup skipped (--no-cleanup)"
|
||||
return 0
|
||||
fi
|
||||
echo ""
|
||||
echo "[3/3] Cleaning up remote backups older than $RETENTION_DAYS days..."
|
||||
case "$BACKEND" in
|
||||
s3)
|
||||
if command -v aws >/dev/null 2>&1; then
|
||||
aws s3 ls "$REMOTE" --recursive 2>/dev/null | grep "db_backup_.*\.sql\.gz" | while read -r line; do
|
||||
FILE_PATH=$(echo "$line" | awk '{print $4}')
|
||||
FILE_DATE=$(echo "$FILE_PATH" | grep -oE '[0-9]{8}_[0-9]{6}' | head -1)
|
||||
if [ -n "$FILE_DATE" ]; then
|
||||
FILE_TS=$(echo "$FILE_DATE" | sed 's/\([0-9]\{8\}\)_\([0-9]\{6\}\)/\1 \2/' | awk '{print $1}')
|
||||
CUTOFF=$(date -d "-$RETENTION_DAYS days" +%Y%m%d 2>/dev/null || date -v-${RETENTION_DAYS}d +%Y%m%d 2>/dev/null)
|
||||
if [ -n "$CUTOFF" ] && [ "$FILE_TS" -lt "$CUTOFF" ]; then
|
||||
echo " Deleting: $FILE_PATH"
|
||||
aws s3 rm "s3://$(echo "$REMOTE" | sed 's|s3://||')/$FILE_PATH" 2>/dev/null || true
|
||||
fi
|
||||
fi
|
||||
done
|
||||
fi
|
||||
;;
|
||||
oss)
|
||||
if command -v ossutil >/dev/null 2>&1; then
|
||||
# ossutil 不支持基于时间的清理,使用生命周期规则或手动删除
|
||||
echo " INFO: For OSS, configure lifecycle rules in the console for automatic cleanup"
|
||||
echo " INFO: Manual cleanup with retention $RETENTION_DAYS days"
|
||||
fi
|
||||
;;
|
||||
nfs)
|
||||
if [ -d "$REMOTE" ]; then
|
||||
find "$REMOTE" -name "db_backup_*.sql.gz" -mtime +$RETENTION_DAYS -delete 2>/dev/null || true
|
||||
echo " Cleaned up files older than $RETENTION_DAYS days"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
echo " PASS: Cleanup completed"
|
||||
return 0
|
||||
}
|
||||
|
||||
# 执行同步流程
|
||||
setup_credentials
|
||||
if ! sync_to_remote; then
|
||||
exit 1
|
||||
fi
|
||||
verify_sync
|
||||
cleanup_remote
|
||||
|
||||
echo ""
|
||||
echo "=== Offsite Sync Complete ==="
|
||||
exit 0
|
||||
221
scripts/backup-verify.sh
Normal file
221
scripts/backup-verify.sh
Normal file
@@ -0,0 +1,221 @@
|
||||
#!/bin/bash
|
||||
# 备份完整性校验脚本
|
||||
# 用法: ./backup-verify.sh [backup_file] [--min-size BYTES]
|
||||
# 不传参数时校验最新备份
|
||||
|
||||
set -u
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
用法: $0 [backup_file] [选项]
|
||||
备份完整性校验脚本
|
||||
|
||||
参数:
|
||||
backup_file 要校验的备份文件路径(不传时校验最新备份)
|
||||
|
||||
选项:
|
||||
--min-size BYTES 最小文件大小阈值(字节),默认 1024
|
||||
--no-sql-check 跳过 SQL 语法校验(不连接数据库)
|
||||
--help, -h 显示帮助信息
|
||||
|
||||
环境变量:
|
||||
BACKUP_DIR 备份目录(默认 ./backups)
|
||||
DATABASE_URL 数据库连接 URL(用于 SQL 语法校验)
|
||||
BACKUP_VERIFY_MIN_SIZE 最小文件大小(字节,默认 1024)
|
||||
|
||||
退出码:
|
||||
0 校验通过
|
||||
1 校验失败
|
||||
EOF
|
||||
}
|
||||
|
||||
# 解析参数
|
||||
BACKUP_FILE=""
|
||||
MIN_SIZE="${BACKUP_VERIFY_MIN_SIZE:-1024}"
|
||||
NO_SQL_CHECK=0
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--help|-h)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
--min-size)
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "ERROR: --min-size requires an argument" >&2
|
||||
exit 1
|
||||
fi
|
||||
MIN_SIZE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--no-sql-check)
|
||||
NO_SQL_CHECK=1
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
if [ -z "$BACKUP_FILE" ]; then
|
||||
BACKUP_FILE="$1"
|
||||
else
|
||||
echo "ERROR: Unknown argument: $1" >&2
|
||||
exit 1
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
BACKUP_DIR="${BACKUP_DIR:-./backups}"
|
||||
|
||||
# 如果未指定文件,查找最新备份
|
||||
if [ -z "$BACKUP_FILE" ]; then
|
||||
BACKUP_FILE=$(ls -t "$BACKUP_DIR"/db_backup_*.sql.gz 2>/dev/null | head -1)
|
||||
if [ -z "$BACKUP_FILE" ]; then
|
||||
echo "ERROR: No backup file found in $BACKUP_DIR" >&2
|
||||
echo "Hint: Run scripts/backup-db.sh first or specify a file path" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
echo "=== Backup Verification Report ==="
|
||||
echo "File: $BACKUP_FILE"
|
||||
echo "Time: $(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
||||
echo ""
|
||||
|
||||
ERRORS=0
|
||||
WARNINGS=0
|
||||
|
||||
# 步骤 1: 检查文件存在
|
||||
echo "[1/4] Checking file existence..."
|
||||
if [ ! -f "$BACKUP_FILE" ]; then
|
||||
echo " FAIL: File does not exist: $BACKUP_FILE"
|
||||
exit 1
|
||||
fi
|
||||
echo " PASS: File exists"
|
||||
|
||||
# 步骤 2: 检查文件大小
|
||||
echo "[2/4] Checking file size..."
|
||||
FILE_SIZE=$(stat -c%s "$BACKUP_FILE" 2>/dev/null || stat -f%z "$BACKUP_FILE" 2>/dev/null)
|
||||
if [ -z "$FILE_SIZE" ]; then
|
||||
echo " WARN: Could not determine file size"
|
||||
WARNINGS=$((WARNINGS + 1))
|
||||
elif [ "$FILE_SIZE" -lt "$MIN_SIZE" ]; then
|
||||
echo " FAIL: File size ${FILE_SIZE} bytes is below threshold ${MIN_SIZE} bytes"
|
||||
echo " This may indicate a corrupted or empty backup"
|
||||
exit 1
|
||||
else
|
||||
echo " PASS: File size ${FILE_SIZE} bytes (threshold: ${MIN_SIZE} bytes)"
|
||||
fi
|
||||
|
||||
# 步骤 3: 校验 gzip 完整性
|
||||
echo "[3/4] Verifying gzip integrity..."
|
||||
if ! gunzip -t "$BACKUP_FILE" 2>/dev/null; then
|
||||
echo " FAIL: gzip integrity check failed - file may be corrupted"
|
||||
exit 1
|
||||
fi
|
||||
echo " PASS: gzip integrity verified"
|
||||
|
||||
# 步骤 4: 校验 SQL 内容
|
||||
echo "[4/4] Verifying SQL content..."
|
||||
TEMP_SQL=$(mktemp 2>/dev/null || echo "/tmp/backup_verify_$$.sql")
|
||||
trap "rm -f \"$TEMP_SQL\" /tmp/backup_verify_errors_$$.txt 2>/dev/null" EXIT
|
||||
|
||||
if ! gunzip -c "$BACKUP_FILE" > "$TEMP_SQL" 2>/dev/null; then
|
||||
echo " FAIL: Could not decompress backup file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 检查文件非空
|
||||
SQL_SIZE=$(stat -c%s "$TEMP_SQL" 2>/dev/null || stat -f%z "$TEMP_SQL" 2>/dev/null)
|
||||
if [ -z "$SQL_SIZE" ] || [ "$SQL_SIZE" -eq 0 ]; then
|
||||
echo " FAIL: Decompressed SQL file is empty"
|
||||
exit 1
|
||||
fi
|
||||
echo " PASS: Decompressed size: ${SQL_SIZE} bytes"
|
||||
|
||||
# 检查 mysqldump 头部
|
||||
if grep -q "MySQL dump" "$TEMP_SQL" 2>/dev/null || grep -q "mysqldump" "$TEMP_SQL" 2>/dev/null; then
|
||||
echo " PASS: mysqldump header found"
|
||||
else
|
||||
echo " WARN: mysqldump header not found (may not be a standard mysqldump file)"
|
||||
WARNINGS=$((WARNINGS + 1))
|
||||
fi
|
||||
|
||||
# 检查 SQL 语句数量
|
||||
STMT_COUNT=$(grep -c ";" "$TEMP_SQL" 2>/dev/null || echo 0)
|
||||
if [ "$STMT_COUNT" -lt 10 ]; then
|
||||
echo " WARN: Low statement count (${STMT_COUNT} semicolons)"
|
||||
WARNINGS=$((WARNINGS + 1))
|
||||
else
|
||||
echo " PASS: Found ${STMT_COUNT} SQL statements"
|
||||
fi
|
||||
|
||||
# 检查 CREATE TABLE 数量
|
||||
CREATE_COUNT=$(grep -ci "CREATE TABLE" "$TEMP_SQL" 2>/dev/null || echo 0)
|
||||
echo " INFO: Found ${CREATE_COUNT} CREATE TABLE statements"
|
||||
|
||||
# 检查明显的语法错误标记
|
||||
if grep -qi "ERROR at line" "$TEMP_SQL" 2>/dev/null; then
|
||||
echo " FAIL: Found error markers in SQL file"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# SQL 语法校验(可选,需要 DATABASE_URL)
|
||||
if [ "$NO_SQL_CHECK" -eq 1 ]; then
|
||||
echo " SKIP: SQL syntax check skipped (--no-sql-check)"
|
||||
elif [ -z "${DATABASE_URL:-}" ]; then
|
||||
echo " SKIP: DATABASE_URL not set, skipping SQL syntax check"
|
||||
else
|
||||
echo " Performing SQL syntax check via mysql..."
|
||||
# 解析 DATABASE_URL
|
||||
DB_USER=$(echo "$DATABASE_URL" | sed -n 's/.*:\/\/\([^:]*\):.*/\1/p')
|
||||
DB_PASS=$(echo "$DATABASE_URL" | sed -n 's/.*:\/\/[^:]*:\([^@]*\)@.*/\1/p')
|
||||
DB_HOST=$(echo "$DATABASE_URL" | sed -n 's/.*@\([^:]*\):.*/\1/p')
|
||||
DB_PORT=$(echo "$DATABASE_URL" | sed -n 's/.*:\([0-9]*\)\/.*/\1/p')
|
||||
|
||||
if [ -z "$DB_HOST" ] || [ -z "$DB_USER" ]; then
|
||||
echo " WARN: Could not parse DATABASE_URL, skipping SQL syntax check"
|
||||
WARNINGS=$((WARNINGS + 1))
|
||||
else
|
||||
# 创建临时数据库进行语法校验(不影响生产数据)
|
||||
TEMP_DB="verify_$(date +%s)_$$"
|
||||
ERROR_FILE="/tmp/backup_verify_errors_$$.txt"
|
||||
|
||||
if mysql -h "$DB_HOST" -P "${DB_PORT:-3306}" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "CREATE DATABASE \`$TEMP_DB\`;" 2>"$ERROR_FILE"; then
|
||||
# 使用 --force 继续执行,捕获所有语法错误
|
||||
mysql -h "$DB_HOST" -P "${DB_PORT:-3306}" -u "$DB_USER" -p"$DB_PASS" \
|
||||
--force "$TEMP_DB" < "$TEMP_SQL" > /dev/null 2>"$ERROR_FILE" || true
|
||||
|
||||
# 检查是否有语法错误(区分语法错误和执行错误)
|
||||
SYNTAX_ERRORS=$(grep -i "You have an error in your SQL syntax" "$ERROR_FILE" 2>/dev/null | wc -l || echo 0)
|
||||
if [ "$SYNTAX_ERRORS" -gt 0 ]; then
|
||||
echo " FAIL: Found $SYNTAX_ERRORS SQL syntax errors"
|
||||
grep -i "You have an error in your SQL syntax" "$ERROR_FILE" | head -3
|
||||
# 清理临时数据库
|
||||
mysql -h "$DB_HOST" -P "${DB_PORT:-3306}" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "DROP DATABASE IF EXISTS \`$TEMP_DB\`;" 2>/dev/null || true
|
||||
exit 1
|
||||
else
|
||||
echo " PASS: SQL syntax check passed (no syntax errors)"
|
||||
fi
|
||||
|
||||
# 清理临时数据库
|
||||
mysql -h "$DB_HOST" -P "${DB_PORT:-3306}" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "DROP DATABASE IF EXISTS \`$TEMP_DB\`;" 2>/dev/null || true
|
||||
else
|
||||
echo " WARN: Could not create temp database for syntax check, skipping"
|
||||
WARNINGS=$((WARNINGS + 1))
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Verification Summary ==="
|
||||
echo "Errors: $ERRORS"
|
||||
echo "Warnings: $WARNINGS"
|
||||
if [ "$ERRORS" -gt 0 ]; then
|
||||
echo "Result: FAILED"
|
||||
exit 1
|
||||
fi
|
||||
echo "Result: PASSED"
|
||||
exit 0
|
||||
420
scripts/dr-drill.ps1
Normal file
420
scripts/dr-drill.ps1
Normal file
@@ -0,0 +1,420 @@
|
||||
<#
|
||||
.SYNOPSIS
|
||||
灾备演练脚本(Windows PowerShell 版本)
|
||||
.DESCRIPTION
|
||||
自动化灾备演练:从备份恢复到测试数据库,验证数据完整性
|
||||
.EXAMPLE
|
||||
.\dr-drill.ps1
|
||||
.EXAMPLE
|
||||
.\dr-drill.ps1 -BackupFile "backups\db_backup_20260617_020000.sql.gz" -TestDb "next_edu_dr_drill"
|
||||
.EXAMPLE
|
||||
.\dr-drill.ps1 -NoCleanup
|
||||
.PARAMETER BackupFile
|
||||
指定备份文件(不指定则使用最新备份)
|
||||
.PARAMETER TestDb
|
||||
测试数据库名(默认 next_edu_dr_drill)
|
||||
.PARAMETER NoCleanup
|
||||
演练后不清理测试数据库
|
||||
.PARAMETER ReportDir
|
||||
报告输出目录(默认 docs\dr\reports)
|
||||
.PARAMETER Help
|
||||
显示帮助信息
|
||||
#>
|
||||
|
||||
[CmdletBinding()]
|
||||
param(
|
||||
[Parameter(Position = 0)]
|
||||
[string]$BackupFile = "",
|
||||
|
||||
[Parameter()]
|
||||
[string]$TestDb = "",
|
||||
|
||||
[Parameter()]
|
||||
[switch]$NoCleanup,
|
||||
|
||||
[Parameter()]
|
||||
[string]$ReportDir = "",
|
||||
|
||||
[Parameter()]
|
||||
[switch]$Help
|
||||
)
|
||||
|
||||
# 显示帮助
|
||||
if ($Help) {
|
||||
Get-Help $MyInvocation.MyCommand.Path -Detailed
|
||||
exit 0
|
||||
}
|
||||
|
||||
# 配置
|
||||
$ErrorActionPreference = "Stop"
|
||||
|
||||
$DatabaseUrl = $env:DATABASE_URL
|
||||
if ([string]::IsNullOrEmpty($DatabaseUrl)) {
|
||||
Write-Host "ERROR: DATABASE_URL not set" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
|
||||
$BackupDir = if ($env:BACKUP_DIR) { $env:BACKUP_DIR } else { ".\backups" }
|
||||
if ([string]::IsNullOrEmpty($TestDb)) {
|
||||
$TestDb = if ($env:DR_DRILL_TEST_DB) { $env:DR_DRILL_TEST_DB } else { "next_edu_dr_drill" }
|
||||
}
|
||||
if ([string]::IsNullOrEmpty($ReportDir)) {
|
||||
$ReportDir = if ($env:DR_DRILL_REPORT_DIR) { $env:DR_DRILL_REPORT_DIR } else { "docs\dr\reports" }
|
||||
}
|
||||
|
||||
$Timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
|
||||
$ReportFile = Join-Path $ReportDir "dr_drill_${Timestamp}.md"
|
||||
|
||||
# 解析 DATABASE_URL
|
||||
# 格式: mysql://user:password@host:port/dbname
|
||||
function Parse-DatabaseUrl {
|
||||
param([string]$Url)
|
||||
|
||||
try {
|
||||
$uri = [System.Uri]$Url
|
||||
$userInfo = $uri.UserInfo -split ':', 2
|
||||
return @{
|
||||
User = $userInfo[0]
|
||||
Pass = if ($userInfo.Length -gt 1) { $userInfo[1] } else { "" }
|
||||
Host = $uri.Host
|
||||
Port = if ($uri.Port -gt 0) { $uri.Port } else { 3306 }
|
||||
DbName = $uri.AbsolutePath.TrimStart('/')
|
||||
}
|
||||
}
|
||||
catch {
|
||||
Write-Host "ERROR: Invalid DATABASE_URL format" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
$db = Parse-DatabaseUrl $DatabaseUrl
|
||||
|
||||
# 创建报告目录
|
||||
if (-not (Test-Path $ReportDir)) {
|
||||
New-Item -ItemType Directory -Path $ReportDir -Force | Out-Null
|
||||
}
|
||||
|
||||
# 初始化报告
|
||||
function Init-Report {
|
||||
$content = @"
|
||||
# 灾备演练报告
|
||||
|
||||
- **演练时间**: $(Get-Date -Format "yyyy-MM-ddTHH:mm:ssZ")
|
||||
- **测试数据库**: $TestDb
|
||||
- **源数据库**: $($db.DbName)
|
||||
- **数据库主机**: $($db.Host):$($db.Port)
|
||||
- **备份文件**: $BackupFile
|
||||
|
||||
## 演练步骤
|
||||
|
||||
"@
|
||||
Set-Content -Path $ReportFile -Value $content -Encoding UTF8
|
||||
}
|
||||
|
||||
function Append-Report {
|
||||
param([string]$Content)
|
||||
Add-Content -Path $ReportFile -Value $Content -Encoding UTF8
|
||||
}
|
||||
|
||||
function Step-Result {
|
||||
param(
|
||||
[string]$Step,
|
||||
[string]$Status,
|
||||
[string]$Detail
|
||||
)
|
||||
Append-Report "### 步骤 $Step`: $Status"
|
||||
Append-Report ""
|
||||
Append-Report $Detail
|
||||
Append-Report ""
|
||||
if ($Status -eq "FAILED") {
|
||||
Append-Report "❌ 步骤失败"
|
||||
}
|
||||
else {
|
||||
Append-Report "✅ 步骤成功"
|
||||
}
|
||||
Append-Report ""
|
||||
Write-Host "---"
|
||||
}
|
||||
|
||||
# MySQL 执行函数
|
||||
function Invoke-MySql {
|
||||
param(
|
||||
[string]$Query,
|
||||
[string]$Database = "",
|
||||
[switch]$Silent,
|
||||
[switch]$Scalar
|
||||
)
|
||||
|
||||
$mysqlArgs = @("-h", $db.Host, "-P", $db.Port, "-u", $db.User, "-p$($db.Pass)")
|
||||
if (-not [string]::IsNullOrEmpty($Database)) {
|
||||
$mysqlArgs += $Database
|
||||
}
|
||||
$mysqlArgs += @("-e", $Query)
|
||||
|
||||
if ($Scalar) {
|
||||
$mysqlArgs += @("-s", "-N")
|
||||
}
|
||||
|
||||
if ($Silent) {
|
||||
$result = & mysql @mysqlArgs 2>$null
|
||||
}
|
||||
else {
|
||||
$result = & mysql @mysqlArgs 2>&1
|
||||
}
|
||||
return $result
|
||||
}
|
||||
|
||||
# 检查 mysql 命令
|
||||
if (-not (Get-Command mysql -ErrorAction SilentlyContinue)) {
|
||||
Write-Host "ERROR: mysql client not found in PATH" -ForegroundColor Red
|
||||
Write-Host "Please install MySQL client tools" -ForegroundColor Red
|
||||
exit 1
|
||||
}
|
||||
|
||||
Write-Host "=== Disaster Recovery Drill ===" -ForegroundColor Cyan
|
||||
Write-Host "Time: $(Get-Date -Format 'yyyy-MM-ddTHH:mm:ssZ')"
|
||||
Write-Host "Test DB: $TestDb"
|
||||
Write-Host "Source DB: $($db.DbName)@$($db.Host):$($db.Port)"
|
||||
Write-Host "Report: $ReportFile"
|
||||
Write-Host ""
|
||||
|
||||
Init-Report
|
||||
$drillStart = Get-Date
|
||||
$overallStatus = "SUCCESS"
|
||||
|
||||
# 步骤 1: 查找备份文件
|
||||
Write-Host "[1/6] Locating backup file..."
|
||||
if ([string]::IsNullOrEmpty($BackupFile)) {
|
||||
$backupPattern = Join-Path $BackupDir "db_backup_*.sql.gz"
|
||||
$latestBackup = Get-ChildItem -Path $backupPattern -ErrorAction SilentlyContinue |
|
||||
Sort-Object LastWriteTime -Descending |
|
||||
Select-Object -First 1
|
||||
if ($latestBackup) {
|
||||
$BackupFile = $latestBackup.FullName
|
||||
}
|
||||
else {
|
||||
Write-Host " FAIL: No backup file found in $BackupDir" -ForegroundColor Red
|
||||
Step-Result "1 - 定位备份文件" "FAILED" "未找到备份文件于 $BackupDir"
|
||||
Append-Report "## 演练结果: ❌ FAILED`n`n演练失败,未找到备份文件"
|
||||
exit 1
|
||||
}
|
||||
}
|
||||
|
||||
if (-not (Test-Path $BackupFile)) {
|
||||
Write-Host " FAIL: Backup file not found: $BackupFile" -ForegroundColor Red
|
||||
Step-Result "1 - 定位备份文件" "FAILED" "备份文件不存在: $BackupFile"
|
||||
exit 1
|
||||
}
|
||||
|
||||
$backupSize = (Get-Item $BackupFile).Length
|
||||
Write-Host " PASS: Found backup: $BackupFile ($backupSize bytes)" -ForegroundColor Green
|
||||
Step-Result "1 - 定位备份文件" "PASSED" "备份文件: ``$BackupFile`` ($backupSize bytes)"
|
||||
|
||||
# 步骤 2: 创建测试数据库
|
||||
Write-Host "[2/6] Creating test database..."
|
||||
try {
|
||||
Invoke-MySql -Query "DROP DATABASE IF EXISTS ``$TestDb``;" -Silent
|
||||
Invoke-MySql -Query "CREATE DATABASE ``$TestDb`` CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;" -Silent
|
||||
Write-Host " PASS: Test database created: $TestDb" -ForegroundColor Green
|
||||
Step-Result "2 - 创建测试数据库" "PASSED" "测试数据库 ``$TestDb`` 创建成功"
|
||||
}
|
||||
catch {
|
||||
Write-Host " FAIL: Could not create test database" -ForegroundColor Red
|
||||
Step-Result "2 - 创建测试数据库" "FAILED" "创建测试数据库 ``$TestDb`` 失败"
|
||||
$overallStatus = "FAILED"
|
||||
Append-Report "## 演练结果: ❌ FAILED"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# 步骤 3: 从备份恢复到测试数据库
|
||||
Write-Host "[3/6] Restoring backup to test database..."
|
||||
$restoreStart = Get-Date
|
||||
try {
|
||||
# 使用 7z 或 gunzip 解压,然后管道到 mysql
|
||||
# Windows 上可能需要使用 7z 或 .NET GZipStream
|
||||
$tempSqlFile = [System.IO.Path]::GetTempFileName()
|
||||
|
||||
# 尝试使用 gunzip(如果可用)
|
||||
if (Get-Command gunzip -ErrorAction SilentlyContinue) {
|
||||
$process = Start-Process -FilePath "gunzip" -ArgumentList "-c", "`"$BackupFile`"" -NoNewWindow -RedirectStandardOutput $tempSqlFile -Wait -PassThru
|
||||
}
|
||||
# 尝试使用 7z
|
||||
elseif (Get-Command 7z -ErrorAction SilentlyContinue) {
|
||||
& 7z e -so "$BackupFile" | Out-File -FilePath $tempSqlFile -Encoding ASCII
|
||||
}
|
||||
# 使用 .NET GZipStream
|
||||
else {
|
||||
$inStream = [System.IO.File]::OpenRead($BackupFile)
|
||||
$gzStream = New-Object System.IO.Compression.GZipStream($inStream, [System.IO.Compression.CompressionMode]::Decompress)
|
||||
$reader = New-Object System.IO.StreamReader($gzStream, [System.Text.Encoding]::UTF8)
|
||||
$content = $reader.ReadToEnd()
|
||||
$reader.Close()
|
||||
$gzStream.Close()
|
||||
$inStream.Close()
|
||||
Set-Content -Path $tempSqlFile -Value $content -Encoding UTF8 -NoNewline
|
||||
}
|
||||
|
||||
# 执行恢复
|
||||
$mysqlArgs = @("-h", $db.Host, "-P", $db.Port, "-u", $db.User, "-p$($db.Pass)", $TestDb)
|
||||
Get-Content $tempSqlFile -Raw | & mysql @mysqlArgs 2>$null
|
||||
|
||||
Remove-Item $tempSqlFile -Force -ErrorAction SilentlyContinue
|
||||
|
||||
$restoreEnd = Get-Date
|
||||
$restoreDuration = ($restoreEnd - $restoreStart).TotalSeconds
|
||||
Write-Host " PASS: Restore completed in $([int]$restoreDuration)s" -ForegroundColor Green
|
||||
Step-Result "3 - 从备份恢复" "PASSED" "恢复完成,耗时 $([int]$restoreDuration) 秒"
|
||||
}
|
||||
catch {
|
||||
Write-Host " FAIL: Restore failed: $_" -ForegroundColor Red
|
||||
Step-Result "3 - 从备份恢复" "FAILED" "从备份恢复失败: $_"
|
||||
$overallStatus = "FAILED"
|
||||
if (-not $NoCleanup) {
|
||||
try { Invoke-MySql -Query "DROP DATABASE IF EXISTS ``$TestDb``;" -Silent } catch {}
|
||||
}
|
||||
Append-Report "## 演练结果: ❌ FAILED"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# 步骤 4: 数据完整性检查
|
||||
Write-Host "[4/6] Running data integrity checks..."
|
||||
$testTables = Invoke-MySql -Query "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='$TestDb';" -Silent -Scalar
|
||||
$sourceTables = Invoke-MySql -Query "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='$($db.DbName)';" -Silent -Scalar
|
||||
|
||||
Write-Host " Test DB tables: $testTables"
|
||||
Write-Host " Source DB tables: $sourceTables"
|
||||
|
||||
$testRecords = Invoke-MySql -Query "SELECT SUM(table_rows) FROM information_schema.tables WHERE table_schema='$TestDb';" -Silent -Scalar
|
||||
$sourceRecords = Invoke-MySql -Query "SELECT SUM(table_rows) FROM information_schema.tables WHERE table_schema='$($db.DbName)';" -Silent -Scalar
|
||||
|
||||
Write-Host " Test DB records: $testRecords"
|
||||
Write-Host " Source DB records: $sourceRecords"
|
||||
|
||||
$integrityDetail = @"
|
||||
| 指标 | 测试库 | 源库 |
|
||||
|------|--------|------|
|
||||
| 表数量 | $testTables | $sourceTables |
|
||||
| 记录数(近似) | $testRecords | $sourceRecords |
|
||||
"@
|
||||
|
||||
if ([int]$testTables -ge [int]$sourceTables) {
|
||||
Write-Host " PASS: Table count matches" -ForegroundColor Green
|
||||
Step-Result "4 - 数据完整性检查" "PASSED" $integrityDetail
|
||||
}
|
||||
else {
|
||||
Write-Host " WARN: Test DB has fewer tables than source" -ForegroundColor Yellow
|
||||
Step-Result "4 - 数据完整性检查" "WARN" "$integrityDetail`n`n⚠️ 测试库表数量少于源库"
|
||||
}
|
||||
|
||||
# 步骤 5: 冒烟测试
|
||||
Write-Host "[5/6] Running smoke tests..."
|
||||
$smokePassed = 0
|
||||
$smokeFailed = 0
|
||||
$smokeDetail = ""
|
||||
|
||||
# 测试 1: 检查 users 表
|
||||
try {
|
||||
$userCount = Invoke-MySql -Query "SELECT COUNT(*) FROM users;" -Database $TestDb -Silent -Scalar
|
||||
$smokePassed++
|
||||
$smokeDetail += "- ✅ users 表查询成功: $userCount 条记录`n"
|
||||
Write-Host " PASS: users table query: $userCount records" -ForegroundColor Green
|
||||
}
|
||||
catch {
|
||||
$smokeDetail += "- ⚠️ users 表不存在或查询失败`n"
|
||||
Write-Host " WARN: users table not found or query failed" -ForegroundColor Yellow
|
||||
}
|
||||
|
||||
# 测试 2: 检查 schools 表
|
||||
try {
|
||||
$schoolCount = Invoke-MySql -Query "SELECT COUNT(*) FROM schools;" -Database $TestDb -Silent -Scalar
|
||||
$smokePassed++
|
||||
$smokeDetail += "- ✅ schools 表查询成功: $schoolCount 条记录`n"
|
||||
Write-Host " PASS: schools table query: $schoolCount records" -ForegroundColor Green
|
||||
}
|
||||
catch {
|
||||
$smokeDetail += "- ⚠️ schools 表不存在或查询失败`n"
|
||||
Write-Host " WARN: schools table not found or query failed" -ForegroundColor Yellow
|
||||
}
|
||||
|
||||
# 测试 3: 基础表查询
|
||||
try {
|
||||
$baseTableCount = Invoke-MySql -Query "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='$TestDb' AND table_type='BASE TABLE';" -Silent -Scalar
|
||||
if ([int]$baseTableCount -gt 0) {
|
||||
$smokePassed++
|
||||
$smokeDetail += "- ✅ 基础表查询成功: $baseTableCount 个基础表`n"
|
||||
Write-Host " PASS: Base table query: $baseTableCount tables" -ForegroundColor Green
|
||||
}
|
||||
else {
|
||||
$smokeFailed++
|
||||
$smokeDetail += "- ❌ 基础表查询失败`n"
|
||||
Write-Host " FAIL: Base table query failed" -ForegroundColor Red
|
||||
}
|
||||
}
|
||||
catch {
|
||||
$smokeFailed++
|
||||
$smokeDetail += "- ❌ 基础表查询失败`n"
|
||||
Write-Host " FAIL: Base table query failed" -ForegroundColor Red
|
||||
}
|
||||
|
||||
Step-Result "5 - 冒烟测试" "PASSED" "通过: $smokePassed, 失败: $smokeFailed`n`n$smokeDetail"
|
||||
|
||||
# 步骤 6: 清理测试数据库
|
||||
Write-Host "[6/6] Cleaning up test database..."
|
||||
if ($NoCleanup) {
|
||||
Write-Host " SKIP: Cleanup skipped (--NoCleanup)" -ForegroundColor Yellow
|
||||
Step-Result "6 - 清理测试数据库" "SKIPPED" "演练后保留测试数据库 ``$TestDb``"
|
||||
}
|
||||
else {
|
||||
try {
|
||||
Invoke-MySql -Query "DROP DATABASE IF EXISTS ``$TestDb``;" -Silent
|
||||
Write-Host " PASS: Test database dropped: $TestDb" -ForegroundColor Green
|
||||
Step-Result "6 - 清理测试数据库" "PASSED" "测试数据库 ``$TestDb`` 已删除"
|
||||
}
|
||||
catch {
|
||||
Write-Host " WARN: Could not drop test database (manual cleanup required)" -ForegroundColor Yellow
|
||||
Step-Result "6 - 清理测试数据库" "WARN" "⚠️ 无法删除测试数据库 ``$TestDb``,需手动清理"
|
||||
}
|
||||
}
|
||||
|
||||
# 生成总结
|
||||
$drillEnd = Get-Date
|
||||
$drillDuration = ($drillEnd - $drillStart).TotalSeconds
|
||||
|
||||
Append-Report "## 演练结果"
|
||||
Append-Report ""
|
||||
if ($overallStatus -eq "SUCCESS") {
|
||||
Append-Report "**状态**: ✅ 成功"
|
||||
}
|
||||
else {
|
||||
Append-Report "**状态**: ❌ 失败"
|
||||
}
|
||||
Append-Report "**总耗时**: $([int]$drillDuration) 秒"
|
||||
Append-Report "**备份文件**: ``$BackupFile``"
|
||||
Append-Report "**测试数据库**: ``$TestDb``"
|
||||
Append-Report ""
|
||||
Append-Report "## RTO/RPO 评估"
|
||||
Append-Report ""
|
||||
Append-Report "- **RTO 目标**: 4 小时"
|
||||
Append-Report "- **本次恢复耗时**: $([int]$restoreDuration) 秒 ($([int]($restoreDuration / 60)) 分钟)"
|
||||
if ($restoreDuration -lt 14400) {
|
||||
Append-Report "- **RTO 评估**: ✅ 达标"
|
||||
}
|
||||
else {
|
||||
Append-Report "- **RTO 评估**: ⚠️ 需关注"
|
||||
}
|
||||
Append-Report "- **RPO 目标**: 24 小时(取决于备份频率)"
|
||||
Append-Report ""
|
||||
|
||||
Write-Host ""
|
||||
Write-Host "=== Drill Summary ===" -ForegroundColor Cyan
|
||||
Write-Host "Status: $overallStatus"
|
||||
Write-Host "Duration: $([int]$drillDuration)s"
|
||||
Write-Host "Report: $ReportFile"
|
||||
Write-Host ""
|
||||
|
||||
if ($overallStatus -eq "SUCCESS") {
|
||||
exit 0
|
||||
}
|
||||
else {
|
||||
exit 1
|
||||
}
|
||||
369
scripts/dr-drill.sh
Normal file
369
scripts/dr-drill.sh
Normal file
@@ -0,0 +1,369 @@
|
||||
#!/bin/bash
|
||||
# 灾备演练脚本
|
||||
# 用法: ./dr-drill.sh
|
||||
# 自动化灾备演练:从备份恢复到测试数据库,验证数据完整性
|
||||
|
||||
set -u
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
用法: $0 [选项]
|
||||
灾备演练脚本,自动化测试备份恢复流程
|
||||
|
||||
选项:
|
||||
--backup FILE 指定备份文件(不指定则使用最新备份)
|
||||
--test-db NAME 测试数据库名(默认 next_edu_dr_drill)
|
||||
--no-cleanup 演练后不清理测试数据库
|
||||
--report-dir DIR 报告输出目录(默认 docs/dr/reports)
|
||||
--help, -h 显示帮助信息
|
||||
|
||||
环境变量:
|
||||
DATABASE_URL 数据库连接 URL(必需)
|
||||
BACKUP_DIR 备份目录(默认 ./backups)
|
||||
DR_DRILL_TEST_DB 测试数据库名(默认 next_edu_dr_drill)
|
||||
DR_DRILL_REPORT_DIR 报告目录(默认 docs/dr/reports)
|
||||
|
||||
退出码:
|
||||
0 演练成功
|
||||
1 演练失败
|
||||
EOF
|
||||
}
|
||||
|
||||
# 解析参数
|
||||
BACKUP_FILE=""
|
||||
NO_CLEANUP=0
|
||||
REPORT_DIR=""
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--help|-h)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
--backup)
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "ERROR: --backup requires an argument" >&2
|
||||
exit 1
|
||||
fi
|
||||
BACKUP_FILE="$2"
|
||||
shift 2
|
||||
;;
|
||||
--test-db)
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "ERROR: --test-db requires an argument" >&2
|
||||
exit 1
|
||||
fi
|
||||
DR_DRILL_TEST_DB="$2"
|
||||
shift 2
|
||||
;;
|
||||
--no-cleanup)
|
||||
NO_CLEANUP=1
|
||||
shift
|
||||
;;
|
||||
--report-dir)
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "ERROR: --report-dir requires an argument" >&2
|
||||
exit 1
|
||||
fi
|
||||
REPORT_DIR="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown argument: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# 配置
|
||||
DATABASE_URL="${DATABASE_URL:-}"
|
||||
BACKUP_DIR="${BACKUP_DIR:-./backups}"
|
||||
TEST_DB="${DR_DRILL_TEST_DB:-next_edu_dr_drill}"
|
||||
REPORT_DIR="${REPORT_DIR:-${DR_DRILL_REPORT_DIR:-docs/dr/reports}}"
|
||||
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
|
||||
REPORT_FILE="$REPORT_DIR/dr_drill_${TIMESTAMP}.md"
|
||||
|
||||
# 检查 DATABASE_URL
|
||||
if [ -z "$DATABASE_URL" ]; then
|
||||
echo "ERROR: DATABASE_URL not set" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 解析 DATABASE_URL
|
||||
DB_USER=$(echo "$DATABASE_URL" | sed -n 's/.*:\/\/\([^:]*\):.*/\1/p')
|
||||
DB_PASS=$(echo "$DATABASE_URL" | sed -n 's/.*:\/\/[^:]*:\([^@]*\)@.*/\1/p')
|
||||
DB_HOST=$(echo "$DATABASE_URL" | sed -n 's/.*@\([^:]*\):.*/\1/p')
|
||||
DB_PORT=$(echo "$DATABASE_URL" | sed -n 's/.*:\([0-9]*\)\/.*/\1/p')
|
||||
DB_NAME=$(echo "$DATABASE_URL" | sed -n 's/.*\/\([^?]*\).*/\1/p')
|
||||
|
||||
# 创建报告目录
|
||||
mkdir -p "$REPORT_DIR"
|
||||
|
||||
# 初始化报告
|
||||
init_report() {
|
||||
cat > "$REPORT_FILE" <<EOF
|
||||
# 灾备演练报告
|
||||
|
||||
- **演练时间**: $(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
- **测试数据库**: $TEST_DB
|
||||
- **源数据库**: $DB_NAME
|
||||
- **数据库主机**: $DB_HOST:$DB_PORT
|
||||
- **备份文件**: $BACKUP_FILE
|
||||
|
||||
## 演练步骤
|
||||
|
||||
EOF
|
||||
}
|
||||
|
||||
# 追加报告
|
||||
append_report() {
|
||||
echo "$1" >> "$REPORT_FILE"
|
||||
}
|
||||
|
||||
# 记录步骤结果
|
||||
step_result() {
|
||||
local step="$1"
|
||||
local status="$2"
|
||||
local detail="$3"
|
||||
append_report "### 步骤 $step: $status"
|
||||
append_report ""
|
||||
append_report "$detail"
|
||||
append_report ""
|
||||
if [ "$status" = "FAILED" ]; then
|
||||
append_report "❌ 步骤失败"
|
||||
else
|
||||
append_report "✅ 步骤成功"
|
||||
fi
|
||||
append_report ""
|
||||
echo "---"
|
||||
}
|
||||
|
||||
echo "=== Disaster Recovery Drill ==="
|
||||
echo "Time: $(date -u +"%Y-%m-%dT%H:%M:%SZ")"
|
||||
echo "Test DB: $TEST_DB"
|
||||
echo "Source DB: $DB_NAME@$DB_HOST:$DB_PORT"
|
||||
echo "Report: $REPORT_FILE"
|
||||
echo ""
|
||||
|
||||
init_report
|
||||
DRILL_START=$(date +%s)
|
||||
OVERALL_STATUS="SUCCESS"
|
||||
|
||||
# 步骤 1: 查找备份文件
|
||||
echo "[1/6] Locating backup file..."
|
||||
if [ -z "$BACKUP_FILE" ]; then
|
||||
BACKUP_FILE=$(ls -t "$BACKUP_DIR"/db_backup_*.sql.gz 2>/dev/null | head -1)
|
||||
if [ -z "$BACKUP_FILE" ]; then
|
||||
echo " FAIL: No backup file found in $BACKUP_DIR"
|
||||
step_result "1 - 定位备份文件" "FAILED" "未找到备份文件于 $BACKUP_DIR"
|
||||
OVERALL_STATUS="FAILED"
|
||||
append_report "## 演练结果: ❌ FAILED"
|
||||
append_report ""
|
||||
append_report "演练失败,未找到备份文件"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! -f "$BACKUP_FILE" ]; then
|
||||
echo " FAIL: Backup file not found: $BACKUP_FILE"
|
||||
step_result "1 - 定位备份文件" "FAILED" "备份文件不存在: $BACKUP_FILE"
|
||||
OVERALL_STATUS="FAILED"
|
||||
append_report "## 演练结果: ❌ FAILED"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
BACKUP_SIZE=$(stat -c%s "$BACKUP_FILE" 2>/dev/null || stat -f%z "$BACKUP_FILE" 2>/dev/null)
|
||||
echo " PASS: Found backup: $BACKUP_FILE (${BACKUP_SIZE} bytes)"
|
||||
step_result "1 - 定位备份文件" "PASSED" "备份文件: \`$BACKUP_FILE\` (${BACKUP_SIZE} bytes)"
|
||||
|
||||
# 步骤 2: 创建测试数据库
|
||||
echo "[2/6] Creating test database..."
|
||||
# 先删除已存在的测试数据库
|
||||
mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "DROP DATABASE IF EXISTS \`$TEST_DB\`;" 2>/dev/null
|
||||
|
||||
if mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "CREATE DATABASE \`$TEST_DB\` CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;" 2>/dev/null; then
|
||||
echo " PASS: Test database created: $TEST_DB"
|
||||
step_result "2 - 创建测试数据库" "PASSED" "测试数据库 \`$TEST_DB\` 创建成功"
|
||||
else
|
||||
echo " FAIL: Could not create test database"
|
||||
step_result "2 - 创建测试数据库" "FAILED" "创建测试数据库 \`$TEST_DB\` 失败"
|
||||
OVERALL_STATUS="FAILED"
|
||||
append_report "## 演练结果: ❌ FAILED"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 步骤 3: 从备份恢复到测试数据库
|
||||
echo "[3/6] Restoring backup to test database..."
|
||||
RESTORE_START=$(date +%s)
|
||||
if gunzip -c "$BACKUP_FILE" | mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" "$TEST_DB" 2>/dev/null; then
|
||||
RESTORE_END=$(date +%s)
|
||||
RESTORE_DURATION=$((RESTORE_END - RESTORE_START))
|
||||
echo " PASS: Restore completed in ${RESTORE_DURATION}s"
|
||||
step_result "3 - 从备份恢复" "PASSED" "恢复完成,耗时 ${RESTORE_DURATION} 秒"
|
||||
else
|
||||
echo " FAIL: Restore failed"
|
||||
step_result "3 - 从备份恢复" "FAILED" "从备份恢复失败"
|
||||
OVERALL_STATUS="FAILED"
|
||||
# 尝试清理
|
||||
if [ "$NO_CLEANUP" -eq 0 ]; then
|
||||
mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "DROP DATABASE IF EXISTS \`$TEST_DB\`;" 2>/dev/null || true
|
||||
fi
|
||||
append_report "## 演练结果: ❌ FAILED"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 步骤 4: 数据完整性检查
|
||||
echo "[4/6] Running data integrity checks..."
|
||||
# 获取测试数据库表数量
|
||||
TEST_TABLES=$(mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='$TEST_DB';" \
|
||||
-s -N 2>/dev/null || echo 0)
|
||||
|
||||
# 获取源数据库表数量
|
||||
SOURCE_TABLES=$(mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='$DB_NAME';" \
|
||||
-s -N 2>/dev/null || echo 0)
|
||||
|
||||
echo " Test DB tables: $TEST_TABLES"
|
||||
echo " Source DB tables: $SOURCE_TABLES"
|
||||
|
||||
# 获取测试数据库总记录数
|
||||
TEST_RECORDS=$(mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "SELECT SUM(table_rows) FROM information_schema.tables WHERE table_schema='$TEST_DB';" \
|
||||
-s -N 2>/dev/null || echo 0)
|
||||
|
||||
# 获取源数据库总记录数
|
||||
SOURCE_RECORDS=$(mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "SELECT SUM(table_rows) FROM information_schema.tables WHERE table_schema='$DB_NAME';" \
|
||||
-s -N 2>/dev/null || echo 0)
|
||||
|
||||
echo " Test DB records: $TEST_RECORDS"
|
||||
echo " Source DB records: $SOURCE_RECORDS"
|
||||
|
||||
INTEGRITY_DETAIL="| 指标 | 测试库 | 源库 |
|
||||
|------|--------|------|
|
||||
| 表数量 | $TEST_TABLES | $SOURCE_TABLES |
|
||||
| 记录数(近似) | $TEST_RECORDS | $SOURCE_RECORDS |"
|
||||
|
||||
if [ "$TEST_TABLES" -ge "$SOURCE_TABLES" ]; then
|
||||
echo " PASS: Table count matches"
|
||||
step_result "4 - 数据完整性检查" "PASSED" "$INTEGRITY_DETAIL"
|
||||
else
|
||||
echo " WARN: Test DB has fewer tables than source"
|
||||
step_result "4 - 数据完整性检查" "WARN" "$INTEGRITY_DETAIL
|
||||
|
||||
⚠️ 测试库表数量少于源库"
|
||||
fi
|
||||
|
||||
# 步骤 5: 冒烟测试
|
||||
echo "[5/6] Running smoke tests..."
|
||||
SMOKE_PASSED=0
|
||||
SMOKE_FAILED=0
|
||||
SMOKE_DETAIL=""
|
||||
|
||||
# 测试 1: 检查 users 表(如果存在)
|
||||
USER_COUNT=$(mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" "$TEST_DB" \
|
||||
-e "SELECT COUNT(*) FROM users;" -s -N 2>/dev/null || echo "N/A")
|
||||
if [ "$USER_COUNT" != "N/A" ]; then
|
||||
SMOKE_PASSED=$((SMOKE_PASSED + 1))
|
||||
SMOKE_DETAIL="${SMOKE_DETAIL}- ✅ users 表查询成功: ${USER_COUNT} 条记录
|
||||
"
|
||||
echo " PASS: users table query: $USER_COUNT records"
|
||||
else
|
||||
SMOKE_DETAIL="${SMOKE_DETAIL}- ⚠️ users 表不存在或查询失败
|
||||
"
|
||||
echo " WARN: users table not found or query failed"
|
||||
fi
|
||||
|
||||
# 测试 2: 检查 schools 表(如果存在)
|
||||
SCHOOL_COUNT=$(mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" "$TEST_DB" \
|
||||
-e "SELECT COUNT(*) FROM schools;" -s -N 2>/dev/null || echo "N/A")
|
||||
if [ "$SCHOOL_COUNT" != "N/A" ]; then
|
||||
SMOKE_PASSED=$((SMOKE_PASSED + 1))
|
||||
SMOKE_DETAIL="${SMOKE_DETAIL}- ✅ schools 表查询成功: ${SCHOOL_COUNT} 条记录
|
||||
"
|
||||
echo " PASS: schools table query: $SCHOOL_COUNT records"
|
||||
else
|
||||
SMOKE_DETAIL="${SMOKE_DETAIL}- ⚠️ schools 表不存在或查询失败
|
||||
"
|
||||
echo " WARN: schools table not found or query failed"
|
||||
fi
|
||||
|
||||
# 测试 3: 执行简单 JOIN 查询(检查关系完整性)
|
||||
JOIN_TEST=$(mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" "$TEST_DB" \
|
||||
-e "SELECT COUNT(*) FROM information_schema.tables WHERE table_schema='$TEST_DB' AND table_type='BASE TABLE';" \
|
||||
-s -N 2>/dev/null || echo "0")
|
||||
if [ "$JOIN_TEST" -gt 0 ]; then
|
||||
SMOKE_PASSED=$((SMOKE_PASSED + 1))
|
||||
SMOKE_DETAIL="${SMOKE_DETAIL}- ✅ 基础表查询成功: ${JOIN_TEST} 个基础表
|
||||
"
|
||||
echo " PASS: Base table query: $JOIN_TEST tables"
|
||||
else
|
||||
SMOKE_DETAIL="${SMOKE_DETAIL}- ❌ 基础表查询失败
|
||||
"
|
||||
SMOKE_FAILED=$((SMOKE_FAILED + 1))
|
||||
echo " FAIL: Base table query failed"
|
||||
fi
|
||||
|
||||
step_result "5 - 冒烟测试" "PASSED" "通过: $SMOKE_PASSED, 失败: $SMOKE_FAILED
|
||||
|
||||
$SMOKE_DETAIL"
|
||||
|
||||
# 步骤 6: 清理测试数据库
|
||||
echo "[6/6] Cleaning up test database..."
|
||||
if [ "$NO_CLEANUP" -eq 1 ]; then
|
||||
echo " SKIP: Cleanup skipped (--no-cleanup)"
|
||||
step_result "6 - 清理测试数据库" "SKIPPED" "演练后保留测试数据库 \`$TEST_DB\`"
|
||||
else
|
||||
if mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "DROP DATABASE IF EXISTS \`$TEST_DB\`;" 2>/dev/null; then
|
||||
echo " PASS: Test database dropped: $TEST_DB"
|
||||
step_result "6 - 清理测试数据库" "PASSED" "测试数据库 \`$TEST_DB\` 已删除"
|
||||
else
|
||||
echo " WARN: Could not drop test database (manual cleanup required)"
|
||||
step_result "6 - 清理测试数据库" "WARN" "⚠️ 无法删除测试数据库 \`$TEST_DB\`,需手动清理"
|
||||
fi
|
||||
fi
|
||||
|
||||
# 生成总结
|
||||
DRILL_END=$(date +%s)
|
||||
DRILL_DURATION=$((DRILL_END - DRILL_START))
|
||||
|
||||
append_report "## 演练结果"
|
||||
append_report ""
|
||||
if [ "$OVERALL_STATUS" = "SUCCESS" ]; then
|
||||
append_report "**状态**: ✅ 成功"
|
||||
else
|
||||
append_report "**状态**: ❌ 失败"
|
||||
fi
|
||||
append_report "**总耗时**: ${DRILL_DURATION} 秒"
|
||||
append_report "**备份文件**: \`$BACKUP_FILE\`"
|
||||
append_report "**测试数据库**: \`$TEST_DB\`"
|
||||
append_report ""
|
||||
append_report "## RTO/RPO 评估"
|
||||
append_report ""
|
||||
append_report "- **RTO 目标**: 4 小时"
|
||||
append_report "- **本次恢复耗时**: ${RESTORE_DURATION} 秒 ($(( RESTORE_DURATION / 60 )) 分钟)"
|
||||
if [ -n "${RESTORE_DURATION:-}" ] && [ "$RESTORE_DURATION" -lt 14400 ]; then
|
||||
append_report "- **RTO 评估**: ✅ 达标"
|
||||
else
|
||||
append_report "- **RTO 评估**: ⚠️ 需关注"
|
||||
fi
|
||||
append_report "- **RPO 目标**: 24 小时(取决于备份频率)"
|
||||
append_report ""
|
||||
|
||||
echo ""
|
||||
echo "=== Drill Summary ==="
|
||||
echo "Status: $OVERALL_STATUS"
|
||||
echo "Duration: ${DRILL_DURATION}s"
|
||||
echo "Report: $REPORT_FILE"
|
||||
echo ""
|
||||
|
||||
if [ "$OVERALL_STATUS" = "SUCCESS" ]; then
|
||||
exit 0
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
419
scripts/failover.sh
Normal file
419
scripts/failover.sh
Normal file
@@ -0,0 +1,419 @@
|
||||
#!/bin/bash
|
||||
# 故障切换脚本
|
||||
# 用法: ./failover.sh [--auto] [--primary URL] [--standby URL]
|
||||
# 用于主数据库故障时切换到备库
|
||||
|
||||
set -u
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
用法: $0 [选项]
|
||||
数据库故障切换脚本,将应用从主库切换到备库
|
||||
|
||||
选项:
|
||||
--auto 半自动模式(检测失败后自动切换,需先确认)
|
||||
--primary URL 主库连接 URL(默认从 DATABASE_URL 读取)
|
||||
--standby URL 备库连接 URL(必需,从 DATABASE_URL_STANDBY 读取)
|
||||
--app-url URL 应用健康检查 URL(默认 http://localhost:8015)
|
||||
--no-restart 不重启应用(仅更新配置)
|
||||
--dry-run 演练模式,只输出步骤不实际执行
|
||||
--help, -h 显示帮助信息
|
||||
|
||||
环境变量:
|
||||
DATABASE_URL 主库连接 URL
|
||||
DATABASE_URL_STANDBY 备库连接 URL(必需)
|
||||
FAILOVER_APP_URL 应用健康检查 URL(默认 http://localhost:8015)
|
||||
FAILOVER_APP_NAME 应用容器名(默认 nextjs-app)
|
||||
FAILOVER_CONFIG_FILE 配置文件路径(默认 .env.local)
|
||||
FAILOVER_LOG_FILE 切换日志路径(默认 docs/dr/logs/failover.log)
|
||||
|
||||
退出码:
|
||||
0 切换成功
|
||||
1 切换失败
|
||||
EOF
|
||||
}
|
||||
|
||||
# 解析参数
|
||||
AUTO_MODE=0
|
||||
PRIMARY_URL=""
|
||||
STANDBY_URL=""
|
||||
APP_URL=""
|
||||
NO_RESTART=0
|
||||
DRY_RUN=0
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--help|-h)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
--auto)
|
||||
AUTO_MODE=1
|
||||
shift
|
||||
;;
|
||||
--primary)
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "ERROR: --primary requires an argument" >&2
|
||||
exit 1
|
||||
fi
|
||||
PRIMARY_URL="$2"
|
||||
shift 2
|
||||
;;
|
||||
--standby)
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "ERROR: --standby requires an argument" >&2
|
||||
exit 1
|
||||
fi
|
||||
STANDBY_URL="$2"
|
||||
shift 2
|
||||
;;
|
||||
--app-url)
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "ERROR: --app-url requires an argument" >&2
|
||||
exit 1
|
||||
fi
|
||||
APP_URL="$2"
|
||||
shift 2
|
||||
;;
|
||||
--no-restart)
|
||||
NO_RESTART=1
|
||||
shift
|
||||
;;
|
||||
--dry-run)
|
||||
DRY_RUN=1
|
||||
shift
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown argument: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# 配置
|
||||
PRIMARY_URL="${PRIMARY_URL:-${DATABASE_URL:-}}"
|
||||
STANDBY_URL="${STANDBY_URL:-${DATABASE_URL_STANDBY:-}}"
|
||||
APP_URL="${APP_URL:-${FAILOVER_APP_URL:-http://localhost:8015}}"
|
||||
APP_NAME="${FAILOVER_APP_NAME:-nextjs-app}"
|
||||
CONFIG_FILE="${FAILOVER_CONFIG_FILE:-.env.local}"
|
||||
LOG_DIR="docs/dr/logs"
|
||||
LOG_FILE="${FAILOVER_LOG_FILE:-$LOG_DIR/failover.log}"
|
||||
|
||||
# 检查必需参数
|
||||
if [ -z "$STANDBY_URL" ]; then
|
||||
echo "ERROR: Standby database URL not provided" >&2
|
||||
echo "Set DATABASE_URL_STANDBY or use --standby" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ -z "$PRIMARY_URL" ]; then
|
||||
echo "ERROR: Primary database URL not provided" >&2
|
||||
echo "Set DATABASE_URL or use --primary" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 创建日志目录
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
# 日志函数
|
||||
log() {
|
||||
local timestamp
|
||||
timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
echo "[$timestamp] $1" | tee -a "$LOG_FILE"
|
||||
}
|
||||
|
||||
log_error() {
|
||||
log "ERROR: $1" >&2
|
||||
}
|
||||
|
||||
# 解析 DATABASE_URL
|
||||
parse_db_url() {
|
||||
local url="$1"
|
||||
local user pass host port dbname
|
||||
user=$(echo "$url" | sed -n 's/.*:\/\/\([^:]*\):.*/\1/p')
|
||||
pass=$(echo "$url" | sed -n 's/.*:\/\/[^:]*:\([^@]*\)@.*/\1/p')
|
||||
host=$(echo "$url" | sed -n 's/.*@\([^:]*\):.*/\1/p')
|
||||
port=$(echo "$url" | sed -n 's/.*:\([0-9]*\)\/.*/\1/p')
|
||||
dbname=$(echo "$url" | sed -n 's/.*\/\([^?]*\).*/\1/p')
|
||||
echo "$user|$pass|$host|$port|$dbname"
|
||||
}
|
||||
|
||||
# 检查数据库健康
|
||||
check_db_health() {
|
||||
local url="$1"
|
||||
local parsed
|
||||
parsed=$(parse_db_url "$url")
|
||||
local user pass host port dbname
|
||||
IFS='|' read -r user pass host port dbname <<EOF
|
||||
$parsed
|
||||
EOF
|
||||
log "Checking database health: ${host}:${port}/${dbname}"
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
log " [DRY-RUN] Would check: mysql -h $host -P $port -u $user -e 'SELECT 1'"
|
||||
return 0
|
||||
fi
|
||||
if mysql -h "$host" -P "$port" -u "$user" -p"$pass" -e "SELECT 1;" 2>/dev/null; then
|
||||
log " Database is healthy"
|
||||
return 0
|
||||
else
|
||||
log " Database is NOT reachable"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
# 检查应用健康
|
||||
check_app_health() {
|
||||
local url="$1"
|
||||
log "Checking application health: $url"
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
log " [DRY-RUN] Would check: curl -f $url"
|
||||
return 0
|
||||
fi
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
if curl -sf -o /dev/null -m 10 "$url" 2>/dev/null; then
|
||||
log " Application is healthy"
|
||||
return 0
|
||||
else
|
||||
log " Application is NOT healthy"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
log " WARN: curl not available, skipping app health check"
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
# 提升备库为主库(如果是主从架构)
|
||||
promote_standby() {
|
||||
log "Promoting standby to primary..."
|
||||
local parsed
|
||||
parsed=$(parse_db_url "$STANDBY_URL")
|
||||
local user pass host port dbname
|
||||
IFS='|' read -r user pass host port dbname <<EOF
|
||||
$parsed
|
||||
EOF
|
||||
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
log " [DRY-RUN] Would promote standby: STOP SLAVE; RESET SLAVE ALL; SET GLOBAL read_only=OFF;"
|
||||
return 0
|
||||
fi
|
||||
|
||||
# 检查是否为从库
|
||||
SLAVE_STATUS=$(mysql -h "$host" -P "$port" -u "$user" -p"$pass" \
|
||||
-e "SHOW SLAVE STATUS\G" 2>/dev/null)
|
||||
|
||||
if [ -n "$SLAVE_STATUS" ]; then
|
||||
log " Standby is a slave, promoting..."
|
||||
# 停止复制
|
||||
if mysql -h "$host" -P "$port" -u "$user" -p"$pass" \
|
||||
-e "STOP SLAVE; RESET SLAVE ALL;" 2>/dev/null; then
|
||||
log " Replication stopped and reset"
|
||||
else
|
||||
log_error "Failed to stop replication"
|
||||
return 1
|
||||
fi
|
||||
# 关闭只读模式
|
||||
if mysql -h "$host" -P "$port" -u "$user" -p"$pass" \
|
||||
-e "SET GLOBAL read_only=OFF; SET GLOBAL super_read_only=OFF;" 2>/dev/null; then
|
||||
log " Read-only mode disabled"
|
||||
else
|
||||
log_error "Failed to disable read-only mode"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
log " Standby is not a slave (standalone), skipping promotion"
|
||||
fi
|
||||
|
||||
log " Standby promoted successfully"
|
||||
return 0
|
||||
}
|
||||
|
||||
# 更新应用配置
|
||||
update_config() {
|
||||
log "Updating application configuration..."
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
log " [DRY-RUN] Would update $CONFIG_FILE: DATABASE_URL=$STANDBY_URL"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if [ -f "$CONFIG_FILE" ]; then
|
||||
# 备份原配置
|
||||
cp "$CONFIG_FILE" "${CONFIG_FILE}.bak.$(date +%s)"
|
||||
log " Backed up original config to ${CONFIG_FILE}.bak.*"
|
||||
|
||||
# 更新 DATABASE_URL
|
||||
if grep -q "^DATABASE_URL=" "$CONFIG_FILE"; then
|
||||
sed -i.bak "s|^DATABASE_URL=.*|DATABASE_URL=$STANDBY_URL|" "$CONFIG_FILE"
|
||||
rm -f "${CONFIG_FILE}.bak" 2>/dev/null || true
|
||||
log " Updated DATABASE_URL in $CONFIG_FILE"
|
||||
else
|
||||
echo "DATABASE_URL=$STANDBY_URL" >> "$CONFIG_FILE"
|
||||
log " Added DATABASE_URL to $CONFIG_FILE"
|
||||
fi
|
||||
else
|
||||
log " WARN: Config file $CONFIG_FILE not found, creating new one"
|
||||
echo "DATABASE_URL=$STANDBY_URL" > "$CONFIG_FILE"
|
||||
fi
|
||||
|
||||
# 同时更新环境变量(供当前会话使用)
|
||||
export DATABASE_URL="$STANDBY_URL"
|
||||
log " Configuration updated"
|
||||
return 0
|
||||
}
|
||||
|
||||
# 重启应用
|
||||
restart_app() {
|
||||
if [ "$NO_RESTART" -eq 1 ]; then
|
||||
log "Skipping application restart (--no-restart)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "Restarting application..."
|
||||
if [ "$DRY_RUN" -eq 1 ]; then
|
||||
log " [DRY-RUN] Would restart: docker restart $APP_NAME"
|
||||
return 0
|
||||
fi
|
||||
|
||||
if command -v docker >/dev/null 2>&1; then
|
||||
log " Restarting Docker container: $APP_NAME"
|
||||
if docker restart "$APP_NAME" 2>/dev/null; then
|
||||
log " Container restarted"
|
||||
# 等待应用启动
|
||||
log " Waiting for application to start..."
|
||||
sleep 5
|
||||
return 0
|
||||
else
|
||||
log_error "Failed to restart container $APP_NAME"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
log " WARN: Docker not available, please restart application manually"
|
||||
log " Updated DATABASE_URL: $STANDBY_URL"
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
# 主流程
|
||||
log "========================================"
|
||||
log "Database Failover Started"
|
||||
log "========================================"
|
||||
log "Mode: $([ "$AUTO_MODE" -eq 1 ] && echo "semi-auto" || echo "manual")"
|
||||
log "Dry-run: $([ "$DRY_RUN" -eq 1 ] && echo "yes" || echo "no")"
|
||||
log "Primary: $PRIMARY_URL"
|
||||
log "Standby: $STANDBY_URL"
|
||||
log ""
|
||||
|
||||
# 步骤 1: 检测主库健康状态
|
||||
log "[1/5] Checking primary database health..."
|
||||
PRIMARY_HEALTHY=0
|
||||
if check_db_health "$PRIMARY_URL"; then
|
||||
PRIMARY_HEALTHY=1
|
||||
log " Primary is healthy"
|
||||
if [ "$AUTO_MODE" -eq 0 ]; then
|
||||
log " Primary is healthy. Failover not needed."
|
||||
log " Use --auto to force failover even if primary is healthy"
|
||||
log "========================================"
|
||||
log "Failover Cancelled (Primary Healthy)"
|
||||
log "========================================"
|
||||
exit 0
|
||||
fi
|
||||
else
|
||||
log " Primary is NOT healthy, proceeding with failover"
|
||||
fi
|
||||
|
||||
# 半自动模式确认
|
||||
if [ "$AUTO_MODE" -eq 1 ] && [ "$DRY_RUN" -eq 0 ]; then
|
||||
echo ""
|
||||
echo "WARNING: About to failover from primary to standby."
|
||||
echo " Primary: $PRIMARY_URL"
|
||||
echo " Standby: $STANDBY_URL"
|
||||
echo ""
|
||||
read -p "Type 'FAILover' to confirm: " CONFIRM
|
||||
if [ "$CONFIRM" != "FAILover" ]; then
|
||||
log "Failover cancelled by user"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# 步骤 2: 检查备库健康
|
||||
log ""
|
||||
log "[2/5] Checking standby database health..."
|
||||
if ! check_db_health "$STANDBY_URL"; then
|
||||
log_error "Standby is also not healthy, cannot failover"
|
||||
log "========================================"
|
||||
log "Failover FAILED (Standby Unhealthy)"
|
||||
log "========================================"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 步骤 3: 提升备库为主库
|
||||
log ""
|
||||
log "[3/5] Promoting standby to primary..."
|
||||
if ! promote_standby; then
|
||||
log_error "Failed to promote standby"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 步骤 4: 更新应用配置并重启
|
||||
log ""
|
||||
log "[4/5] Updating application configuration and restarting..."
|
||||
update_config
|
||||
if ! restart_app; then
|
||||
log_error "Failed to restart application"
|
||||
log " Manual intervention required"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 步骤 5: 验证切换成功
|
||||
log ""
|
||||
log "[5/5] Verifying failover..."
|
||||
sleep 3
|
||||
|
||||
# 检查应用健康
|
||||
APP_HEALTHY=0
|
||||
for i in 1 2 3 4 5; do
|
||||
if check_app_health "$APP_URL"; then
|
||||
APP_HEALTHY=1
|
||||
break
|
||||
fi
|
||||
log " Retry $i/5 in 5 seconds..."
|
||||
sleep 5
|
||||
done
|
||||
|
||||
if [ "$APP_HEALTHY" -eq 0 ]; then
|
||||
log_error "Application is not healthy after failover"
|
||||
log " Check application logs and configuration"
|
||||
log "========================================"
|
||||
log "Failover FAILED (App Unhealthy)"
|
||||
log "========================================"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 检查数据库连接(通过应用)
|
||||
log " Verifying database connection via application..."
|
||||
if [ "$DRY_RUN" -eq 0 ]; then
|
||||
if curl -sf -m 10 "$APP_URL" >/dev/null 2>&1; then
|
||||
log " Application responding successfully"
|
||||
else
|
||||
log_error "Application not responding"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
log ""
|
||||
log "========================================"
|
||||
log "Failover Completed Successfully"
|
||||
log "========================================"
|
||||
log "Primary (old): $PRIMARY_URL"
|
||||
log "Standby (new): $STANDBY_URL"
|
||||
log "Application: $APP_URL"
|
||||
log "Log file: $LOG_FILE"
|
||||
log ""
|
||||
log "Post-failover checklist:"
|
||||
log " 1. Verify application functionality"
|
||||
log " 2. Update monitoring alerts"
|
||||
log " 3. Notify stakeholders"
|
||||
log " 4. Plan primary database recovery"
|
||||
log " 5. Schedule post-mortem review"
|
||||
log ""
|
||||
exit 0
|
||||
253
scripts/health-check.sh
Normal file
253
scripts/health-check.sh
Normal file
@@ -0,0 +1,253 @@
|
||||
#!/bin/bash
|
||||
# 健康检查脚本
|
||||
# 用法: ./health-check.sh
|
||||
# 检查应用、数据库、磁盘空间、备份新鲜度,输出 JSON 报告
|
||||
|
||||
set -u
|
||||
|
||||
show_help() {
|
||||
cat <<EOF
|
||||
用法: $0 [选项]
|
||||
系统健康检查脚本,输出 JSON 格式报告
|
||||
|
||||
选项:
|
||||
--app-url URL 应用健康检查 URL(默认 http://localhost:8015)
|
||||
--no-app 跳过应用健康检查
|
||||
--no-db 跳过数据库检查
|
||||
--no-disk 跳过磁盘空间检查
|
||||
--no-backup 跳过备份新鲜度检查
|
||||
--disk-threshold PCT 磁盘空间阈值百分比(默认 90)
|
||||
--backup-max-age HRS 备份最大年龄(小时,默认 24)
|
||||
--help, -h 显示帮助信息
|
||||
|
||||
环境变量:
|
||||
DATABASE_URL 数据库连接 URL
|
||||
HEALTH_CHECK_URL 应用健康检查 URL(默认 http://localhost:8015)
|
||||
BACKUP_DIR 备份目录(默认 ./backups)
|
||||
HEALTH_CHECK_DISK_THRESHOLD 磁盘阈值(默认 90)
|
||||
HEALTH_CHECK_BACKUP_MAX_AGE 备份最大年龄(小时,默认 24)
|
||||
|
||||
退出码:
|
||||
0 健康
|
||||
1 异常
|
||||
EOF
|
||||
}
|
||||
|
||||
# 解析参数
|
||||
CHECK_APP=1
|
||||
CHECK_DB=1
|
||||
CHECK_DISK=1
|
||||
CHECK_BACKUP=1
|
||||
APP_URL=""
|
||||
DISK_THRESHOLD=""
|
||||
BACKUP_MAX_AGE=""
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--help|-h)
|
||||
show_help
|
||||
exit 0
|
||||
;;
|
||||
--app-url)
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "ERROR: --app-url requires an argument" >&2
|
||||
exit 1
|
||||
fi
|
||||
APP_URL="$2"
|
||||
shift 2
|
||||
;;
|
||||
--no-app) CHECK_APP=0; shift ;;
|
||||
--no-db) CHECK_DB=0; shift ;;
|
||||
--no-disk) CHECK_DISK=0; shift ;;
|
||||
--no-backup) CHECK_BACKUP=0; shift ;;
|
||||
--disk-threshold)
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "ERROR: --disk-threshold requires an argument" >&2
|
||||
exit 1
|
||||
fi
|
||||
DISK_THRESHOLD="$2"
|
||||
shift 2
|
||||
;;
|
||||
--backup-max-age)
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "ERROR: --backup-max-age requires an argument" >&2
|
||||
exit 1
|
||||
fi
|
||||
BACKUP_MAX_AGE="$2"
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown argument: $1" >&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# 配置
|
||||
APP_URL="${APP_URL:-${HEALTH_CHECK_URL:-http://localhost:8015}}"
|
||||
BACKUP_DIR="${BACKUP_DIR:-./backups}"
|
||||
DISK_THRESHOLD="${DISK_THRESHOLD:-${HEALTH_CHECK_DISK_THRESHOLD:-90}}"
|
||||
BACKUP_MAX_AGE="${BACKUP_MAX_AGE:-${HEALTH_CHECK_BACKUP_MAX_AGE:-24}}"
|
||||
DATABASE_URL="${DATABASE_URL:-}"
|
||||
|
||||
# JSON 输出辅助函数
|
||||
json_escape() {
|
||||
echo "$1" | sed -e 's/\\/\\\\/g' -e 's/"/\\"/g' -e 's/\t/\\t/g'
|
||||
}
|
||||
|
||||
# 检查结果数组
|
||||
RESULTS=""
|
||||
OVERALL_STATUS="healthy"
|
||||
CHECKS_PASSED=0
|
||||
CHECKS_FAILED=0
|
||||
CHECKS_WARNED=0
|
||||
|
||||
add_result() {
|
||||
local name="$1"
|
||||
local status="$2"
|
||||
local message="$3"
|
||||
local detail="${4:-}"
|
||||
|
||||
local escaped_message escaped_detail
|
||||
escaped_message=$(json_escape "$message")
|
||||
escaped_detail=$(json_escape "$detail")
|
||||
|
||||
local result_entry
|
||||
result_entry=" {\"name\": \"$name\", \"status\": \"$status\", \"message\": \"$escaped_message\""
|
||||
if [ -n "$detail" ]; then
|
||||
result_entry="$result_entry, \"detail\": \"$escaped_detail\""
|
||||
fi
|
||||
result_entry="$result_entry }"
|
||||
|
||||
if [ -z "$RESULTS" ]; then
|
||||
RESULTS="$result_entry"
|
||||
else
|
||||
RESULTS="$RESULTS,
|
||||
$result_entry"
|
||||
fi
|
||||
|
||||
case "$status" in
|
||||
pass) CHECKS_PASSED=$((CHECKS_PASSED + 1)) ;;
|
||||
fail) CHECKS_FAILED=$((CHECKS_FAILED + 1)); OVERALL_STATUS="unhealthy" ;;
|
||||
warn) CHECKS_WARNED=$((CHECKS_WARNED + 1)); [ "$OVERALL_STATUS" = "healthy" ] && OVERALL_STATUS="degraded" ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# 1. 应用健康检查
|
||||
if [ "$CHECK_APP" -eq 1 ]; then
|
||||
if command -v curl >/dev/null 2>&1; then
|
||||
HTTP_CODE=$(curl -sf -o /dev/null -w "%{http_code}" -m 10 "$APP_URL" 2>/dev/null || echo "000")
|
||||
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "302" ]; then
|
||||
add_result "app" "pass" "Application is healthy" "HTTP $HTTP_CODE from $APP_URL"
|
||||
elif [ "$HTTP_CODE" = "000" ]; then
|
||||
add_result "app" "fail" "Application is not reachable" "Cannot connect to $APP_URL"
|
||||
else
|
||||
add_result "app" "fail" "Application returned error" "HTTP $HTTP_CODE from $APP_URL"
|
||||
fi
|
||||
else
|
||||
add_result "app" "warn" "curl not available, skipping app check" ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# 2. 数据库连接检查
|
||||
if [ "$CHECK_DB" -eq 1 ]; then
|
||||
if [ -z "$DATABASE_URL" ]; then
|
||||
add_result "database" "warn" "DATABASE_URL not set, skipping DB check" ""
|
||||
else
|
||||
# 解析 DATABASE_URL
|
||||
DB_USER=$(echo "$DATABASE_URL" | sed -n 's/.*:\/\/\([^:]*\):.*/\1/p')
|
||||
DB_PASS=$(echo "$DATABASE_URL" | sed -n 's/.*:\/\/[^:]*:\([^@]*\)@.*/\1/p')
|
||||
DB_HOST=$(echo "$DATABASE_URL" | sed -n 's/.*@\([^:]*\):.*/\1/p')
|
||||
DB_PORT=$(echo "$DATABASE_URL" | sed -n 's/.*:\([0-9]*\)\/.*/\1/p')
|
||||
DB_NAME=$(echo "$DATABASE_URL" | sed -n 's/.*\/\([^?]*\).*/\1/p')
|
||||
|
||||
if command -v mysql >/dev/null 2>&1; then
|
||||
if mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "SELECT 1;" 2>/dev/null; then
|
||||
# 获取连接信息
|
||||
DB_VERSION=$(mysql -h "$DB_HOST" -P "$DB_PORT" -u "$DB_USER" -p"$DB_PASS" \
|
||||
-e "SELECT VERSION();" -s -N 2>/dev/null || echo "unknown")
|
||||
add_result "database" "pass" "Database connection successful" "Host: $DB_HOST:$DB_PORT, DB: $DB_NAME, Version: $DB_VERSION"
|
||||
else
|
||||
add_result "database" "fail" "Database connection failed" "Cannot connect to $DB_HOST:$DB_PORT/$DB_NAME"
|
||||
fi
|
||||
else
|
||||
add_result "database" "warn" "mysql client not available, skipping DB check" ""
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# 3. 磁盘空间检查
|
||||
if [ "$CHECK_DISK" -eq 1 ]; then
|
||||
# 获取根分区或当前目录所在分区的使用率
|
||||
DISK_INFO=$(df -h . 2>/dev/null | tail -1)
|
||||
if [ -n "$DISK_INFO" ]; then
|
||||
DISK_USE_PCT=$(echo "$DISK_INFO" | awk '{print $5}' | sed 's/%//')
|
||||
DISK_USE_HUMAN=$(echo "$DISK_INFO" | awk '{print $3}')
|
||||
DISK_TOTAL_HUMAN=$(echo "$DISK_INFO" | awk '{print $2}')
|
||||
DISK_AVAIL_HUMAN=$(echo "$DISK_INFO" | awk '{print $4}')
|
||||
DISK_MOUNT=$(echo "$DISK_INFO" | awk '{print $6}')
|
||||
|
||||
if [ "$DISK_USE_PCT" -ge "$DISK_THRESHOLD" ]; then
|
||||
add_result "disk" "fail" "Disk space critical" "Usage: ${DISK_USE_PCT}% (threshold: ${DISK_THRESHOLD}%), Used: ${DISK_USE_HUMAN}/${DISK_TOTAL_HUMAN}, Available: ${DISK_AVAIL_HUMAN}, Mount: ${DISK_MOUNT}"
|
||||
elif [ "$DISK_USE_PCT" -ge $((DISK_THRESHOLD - 10)) ]; then
|
||||
add_result "disk" "warn" "Disk space warning" "Usage: ${DISK_USE_PCT}% (threshold: ${DISK_THRESHOLD}%), Used: ${DISK_USE_HUMAN}/${DISK_TOTAL_HUMAN}, Available: ${DISK_AVAIL_HUMAN}, Mount: ${DISK_MOUNT}"
|
||||
else
|
||||
add_result "disk" "pass" "Disk space OK" "Usage: ${DISK_USE_PCT}%, Used: ${DISK_USE_HUMAN}/${DISK_TOTAL_HUMAN}, Available: ${DISK_AVAIL_HUMAN}, Mount: ${DISK_MOUNT}"
|
||||
fi
|
||||
else
|
||||
add_result "disk" "warn" "Could not determine disk usage" ""
|
||||
fi
|
||||
fi
|
||||
|
||||
# 4. 备份新鲜度检查
|
||||
if [ "$CHECK_BACKUP" -eq 1 ]; then
|
||||
if [ -d "$BACKUP_DIR" ]; then
|
||||
LATEST_BACKUP=$(ls -t "$BACKUP_DIR"/db_backup_*.sql.gz 2>/dev/null | head -1)
|
||||
if [ -n "$LATEST_BACKUP" ]; then
|
||||
# 获取备份文件修改时间(秒)
|
||||
BACKUP_MTIME=$(stat -c%Y "$LATEST_BACKUP" 2>/dev/null || stat -f%m "$LATEST_BACKUP" 2>/dev/null)
|
||||
CURRENT_TIME=$(date +%s)
|
||||
BACKUP_AGE_HOURS=$(( (CURRENT_TIME - BACKUP_MTIME) / 3600 ))
|
||||
|
||||
BACKUP_SIZE=$(stat -c%s "$LATEST_BACKUP" 2>/dev/null || stat -f%z "$LATEST_BACKUP" 2>/dev/null)
|
||||
BACKUP_SIZE_HUMAN=$(echo "$BACKUP_SIZE" | awk '{split("B KB MB GB TB",v);i=1;while($1>=1024&&i<5){$1/=1024;i++};printf "%.1f%s",$1,v[i]}')
|
||||
|
||||
if [ "$BACKUP_AGE_HOURS" -gt "$BACKUP_MAX_AGE" ]; then
|
||||
add_result "backup" "fail" "Backup is stale" "Latest backup is ${BACKUP_AGE_HOURS}h old (max: ${BACKUP_MAX_AGE}h), File: $(basename "$LATEST_BACKUP"), Size: $BACKUP_SIZE_HUMAN"
|
||||
elif [ "$BACKUP_AGE_HOURS" -gt $((BACKUP_MAX_AGE / 2)) ]; then
|
||||
add_result "backup" "warn" "Backup getting old" "Latest backup is ${BACKUP_AGE_HOURS}h old (max: ${BACKUP_MAX_AGE}h), File: $(basename "$LATEST_BACKUP"), Size: $BACKUP_SIZE_HUMAN"
|
||||
else
|
||||
add_result "backup" "pass" "Backup is fresh" "Latest backup is ${BACKUP_AGE_HOURS}h old, File: $(basename "$LATEST_BACKUP"), Size: $BACKUP_SIZE_HUMAN"
|
||||
fi
|
||||
else
|
||||
add_result "backup" "fail" "No backup files found" "No db_backup_*.sql.gz files in $BACKUP_DIR"
|
||||
fi
|
||||
else
|
||||
add_result "backup" "warn" "Backup directory does not exist" "$BACKUP_DIR"
|
||||
fi
|
||||
fi
|
||||
|
||||
# 输出 JSON 报告
|
||||
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
cat <<EOF
|
||||
{
|
||||
"timestamp": "$TIMESTAMP",
|
||||
"status": "$OVERALL_STATUS",
|
||||
"summary": {
|
||||
"total": $((CHECKS_PASSED + CHECKS_FAILED + CHECKS_WARNED)),
|
||||
"passed": $CHECKS_PASSED,
|
||||
"failed": $CHECKS_FAILED,
|
||||
"warned": $CHECKS_WARNED
|
||||
},
|
||||
"checks": [
|
||||
$RESULTS
|
||||
]
|
||||
}
|
||||
EOF
|
||||
|
||||
if [ "$OVERALL_STATUS" = "unhealthy" ]; then
|
||||
exit 1
|
||||
fi
|
||||
exit 0
|
||||
137
scripts/security-scan.ps1
Normal file
137
scripts/security-scan.ps1
Normal file
@@ -0,0 +1,137 @@
|
||||
# 本地安全扫描脚本 (Windows PowerShell)
|
||||
# 用法: .\scripts\security-scan.ps1
|
||||
# 功能: npm audit + Trivy 文件系统扫描,输出彩色报告
|
||||
# 退出码: 0=无高危漏洞, 1=存在高危漏洞
|
||||
|
||||
$ErrorActionPreference = "Continue"
|
||||
$ProjectRoot = Resolve-Path "$PSScriptRoot\.."
|
||||
Set-Location $ProjectRoot
|
||||
|
||||
$script:HasHigh = 0
|
||||
|
||||
function Write-Header($msg) {
|
||||
Write-Host "================================================" -ForegroundColor Cyan
|
||||
Write-Host " $msg" -ForegroundColor Cyan
|
||||
Write-Host "================================================" -ForegroundColor Cyan
|
||||
}
|
||||
function Write-Pass($msg) { Write-Host "[PASS] $msg" -ForegroundColor Green }
|
||||
function Write-Warn2($msg) { Write-Host "[WARN] $msg" -ForegroundColor Yellow }
|
||||
function Write-Fail($msg) { Write-Host "[FAIL] $msg" -ForegroundColor Red; $script:HasHigh = 1 }
|
||||
function Write-Info2($msg) { Write-Host "[INFO] $msg" -ForegroundColor Blue }
|
||||
|
||||
function Test-Command($name) {
|
||||
return [bool](Get-Command $name -ErrorAction SilentlyContinue)
|
||||
}
|
||||
|
||||
Write-Header "本地安全扫描"
|
||||
Write-Info2 "项目目录: $ProjectRoot"
|
||||
Write-Host ""
|
||||
|
||||
# ------------------------------------------------
|
||||
# 1. npm audit
|
||||
# ------------------------------------------------
|
||||
Write-Header "1/2 npm audit (依赖审计)"
|
||||
|
||||
if (-not (Test-Command "npm")) {
|
||||
Write-Fail "未检测到 npm,请先安装 Node.js"
|
||||
exit 1
|
||||
}
|
||||
|
||||
$auditJson = "$env:TEMP\audit-report.json"
|
||||
npm audit --json 2>$null | Out-File -FilePath $auditJson -Encoding utf8
|
||||
|
||||
if (Test-Path $auditJson) {
|
||||
try {
|
||||
$audit = Get-Content $auditJson -Raw | ConvertFrom-Json
|
||||
$v = $audit.metadata.vulnerabilities
|
||||
$critical = if ($v.critical) { [int]$v.critical } else { 0 }
|
||||
$high = if ($v.high) { [int]$v.high } else { 0 }
|
||||
$moderate = if ($v.moderate) { [int]$v.moderate } else { 0 }
|
||||
$low = if ($v.low) { [int]$v.low } else { 0 }
|
||||
|
||||
Write-Host -NoNewline " critical: "; Write-Host -NoNewline "$critical " -ForegroundColor Red
|
||||
Write-Host -NoNewline " high: "; Write-Host -NoNewline "$high " -ForegroundColor Red
|
||||
Write-Host -NoNewline " moderate: "; Write-Host -NoNewline "$moderate " -ForegroundColor Yellow
|
||||
Write-Host -NoNewline " low: "; Write-Host "$low" -ForegroundColor Green
|
||||
|
||||
if ($critical -gt 0 -or $high -gt 0) {
|
||||
Write-Fail "npm audit 发现 critical/high 漏洞"
|
||||
} else {
|
||||
Write-Pass "npm audit 无 critical/high 漏洞"
|
||||
}
|
||||
} catch {
|
||||
Write-Warn2 "npm audit 报告解析失败,显示原始输出"
|
||||
npm audit --audit-level=moderate
|
||||
}
|
||||
|
||||
Copy-Item $auditJson "$ProjectRoot\audit-report.json" -Force
|
||||
Write-Info2 "报告已保存: audit-report.json"
|
||||
} else {
|
||||
Write-Warn2 "npm audit 未生成报告"
|
||||
}
|
||||
Write-Host ""
|
||||
|
||||
# ------------------------------------------------
|
||||
# 2. Trivy 文件系统扫描
|
||||
# ------------------------------------------------
|
||||
Write-Header "2/2 Trivy FS Scan (文件系统扫描)"
|
||||
|
||||
if (-not (Test-Command "trivy")) {
|
||||
Write-Warn2 "未检测到 trivy,跳过文件系统扫描"
|
||||
Write-Info2 "安装 Trivy: https://aquasecurity.github.io/trivy/latest/getting-started/installation/"
|
||||
} else {
|
||||
$trivyReport = "$ProjectRoot\trivy-fs-report.json"
|
||||
trivy fs --format json --output $trivyReport --exit-code 0 . 2>$null
|
||||
if ($LASTEXITCODE -eq 0) {
|
||||
Write-Pass "Trivy 扫描完成"
|
||||
} else {
|
||||
Write-Warn2 "Trivy 扫描返回非零状态(可能存在漏洞)"
|
||||
}
|
||||
|
||||
if (Test-Path $trivyReport) {
|
||||
try {
|
||||
$trivy = Get-Content $trivyReport -Raw | ConvertFrom-Json
|
||||
$allVulns = @()
|
||||
foreach ($r in $trivy.Results) {
|
||||
if ($r.Vulnerabilities) { $allVulns += $r.Vulnerabilities }
|
||||
}
|
||||
$total = $allVulns.Count
|
||||
$critical = @($allVulns | Where-Object { $_.Severity -eq "CRITICAL" }).Count
|
||||
$high = @($allVulns | Where-Object { $_.Severity -eq "HIGH" }).Count
|
||||
$medium = @($allVulns | Where-Object { $_.Severity -eq "MEDIUM" }).Count
|
||||
$low = @($allVulns | Where-Object { $_.Severity -eq "LOW" }).Count
|
||||
|
||||
Write-Host -NoNewline " 总计: $total critical: "; Write-Host -NoNewline "$critical " -ForegroundColor Red
|
||||
Write-Host -NoNewline " high: "; Write-Host -NoNewline "$high " -ForegroundColor Red
|
||||
Write-Host -NoNewline " medium: "; Write-Host -NoNewline "$medium " -ForegroundColor Yellow
|
||||
Write-Host -NoNewline " low: "; Write-Host "$low" -ForegroundColor Green
|
||||
|
||||
if ($critical -gt 0 -or $high -gt 0) {
|
||||
Write-Fail "Trivy 发现 critical/high 漏洞"
|
||||
} else {
|
||||
Write-Pass "Trivy 无 critical/high 漏洞"
|
||||
}
|
||||
Write-Info2 "报告已保存: trivy-fs-report.json"
|
||||
} catch {
|
||||
Write-Warn2 "Trivy 报告解析失败"
|
||||
}
|
||||
}
|
||||
|
||||
Write-Host ""
|
||||
Write-Info2 "Trivy 表格视图:"
|
||||
trivy fs --format table --exit-code 0 .
|
||||
}
|
||||
Write-Host ""
|
||||
|
||||
# ------------------------------------------------
|
||||
# 汇总
|
||||
# ------------------------------------------------
|
||||
Write-Header "扫描汇总"
|
||||
if ($script:HasHigh -eq 0) {
|
||||
Write-Pass "未发现高危漏洞 (exit 0)"
|
||||
exit 0
|
||||
} else {
|
||||
Write-Fail "发现高危漏洞,请尽快处理 (exit 1)"
|
||||
Write-Host " SLA: critical 24h / high 7d / medium 30d / low 90d" -ForegroundColor Blue
|
||||
exit 1
|
||||
}
|
||||
133
scripts/security-scan.sh
Normal file
133
scripts/security-scan.sh
Normal file
@@ -0,0 +1,133 @@
|
||||
#!/bin/bash
|
||||
# 本地安全扫描脚本 (Linux/macOS)
|
||||
# 用法: ./scripts/security-scan.sh
|
||||
# 功能: npm audit + Trivy 文件系统扫描,输出彩色报告
|
||||
# 退出码: 0=无高危漏洞, 1=存在高危漏洞
|
||||
|
||||
set -uo pipefail
|
||||
|
||||
# 颜色定义
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
cd "$PROJECT_ROOT"
|
||||
|
||||
HAS_HIGH=0
|
||||
|
||||
print_header() {
|
||||
echo -e "${CYAN}================================================${NC}"
|
||||
echo -e "${CYAN} $1${NC}"
|
||||
echo -e "${CYAN}================================================${NC}"
|
||||
}
|
||||
|
||||
print_ok() { echo -e "${GREEN}[PASS]${NC} $1"; }
|
||||
print_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
||||
print_err() { echo -e "${RED}[FAIL]${NC} $1"; HAS_HIGH=1; }
|
||||
print_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
|
||||
# 检查命令是否存在
|
||||
command_exists() {
|
||||
command -v "$1" >/dev/null 2>&1
|
||||
}
|
||||
|
||||
print_header "本地安全扫描"
|
||||
print_info "项目目录: $PROJECT_ROOT"
|
||||
echo ""
|
||||
|
||||
# ------------------------------------------------
|
||||
# 1. npm audit
|
||||
# ------------------------------------------------
|
||||
print_header "1/2 npm audit (依赖审计)"
|
||||
|
||||
if ! command_exists npm; then
|
||||
print_err "未检测到 npm,请先安装 Node.js"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
npm audit --json > /tmp/audit-report.json 2>/dev/null || true
|
||||
|
||||
if [ -f /tmp/audit-report.json ]; then
|
||||
# 提取漏洞计数(需要 jq)
|
||||
if command_exists jq; then
|
||||
CRITICAL=$(jq -r '.metadata.vulnerabilities.critical // 0' /tmp/audit-report.json)
|
||||
HIGH=$(jq -r '.metadata.vulnerabilities.high // 0' /tmp/audit-report.json)
|
||||
MODERATE=$(jq -r '.metadata.vulnerabilities.moderate // 0' /tmp/audit-report.json)
|
||||
LOW=$(jq -r '.metadata.vulnerabilities.low // 0' /tmp/audit-report.json)
|
||||
|
||||
echo -e " critical: ${RED}${CRITICAL}${NC} high: ${RED}${HIGH}${NC} moderate: ${YELLOW}${MODERATE}${NC} low: ${GREEN}${LOW}${NC}"
|
||||
|
||||
if [ "$CRITICAL" -gt 0 ] || [ "$HIGH" -gt 0 ]; then
|
||||
print_err "npm audit 发现 critical/high 漏洞"
|
||||
else
|
||||
print_ok "npm audit 无 critical/high 漏洞"
|
||||
fi
|
||||
else
|
||||
print_warn "未安装 jq,跳过漏洞计数,显示原始报告"
|
||||
npm audit --audit-level=moderate || print_warn "npm audit 发现漏洞"
|
||||
fi
|
||||
|
||||
# 保存报告到项目根目录
|
||||
cp /tmp/audit-report.json "$PROJECT_ROOT/audit-report.json"
|
||||
print_info "报告已保存: audit-report.json"
|
||||
else
|
||||
print_warn "npm audit 未生成报告"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# ------------------------------------------------
|
||||
# 2. Trivy 文件系统扫描
|
||||
# ------------------------------------------------
|
||||
print_header "2/2 Trivy FS Scan (文件系统扫描)"
|
||||
|
||||
if ! command_exists trivy; then
|
||||
print_warn "未检测到 trivy,跳过文件系统扫描"
|
||||
print_info "安装 Trivy: https://aquasecurity.github.io/trivy/latest/getting-started/installation/"
|
||||
else
|
||||
TRIVY_REPORT="$PROJECT_ROOT/trivy-fs-report.json"
|
||||
if trivy fs --format json --output "$TRIVY_REPORT" --exit-code 0 . >/dev/null 2>&1; then
|
||||
print_ok "Trivy 扫描完成"
|
||||
else
|
||||
print_warn "Trivy 扫描返回非零状态(可能存在漏洞)"
|
||||
fi
|
||||
|
||||
if [ -f "$TRIVY_REPORT" ] && command_exists jq; then
|
||||
TOTAL=$(jq -r '[.Results[]?.Vulnerabilities[]?] | length' "$TRIVY_REPORT" 2>/dev/null || echo "0")
|
||||
CRITICAL=$(jq -r '[.Results[]?.Vulnerabilities[]? | select(.Severity=="CRITICAL")] | length' "$TRIVY_REPORT" 2>/dev/null || echo "0")
|
||||
HIGH=$(jq -r '[.Results[]?.Vulnerabilities[]? | select(.Severity=="HIGH")] | length' "$TRIVY_REPORT" 2>/dev/null || echo "0")
|
||||
MEDIUM=$(jq -r '[.Results[]?.Vulnerabilities[]? | select(.Severity=="MEDIUM")] | length' "$TRIVY_REPORT" 2>/dev/null || echo "0")
|
||||
LOW=$(jq -r '[.Results[]?.Vulnerabilities[]? | select(.Severity=="LOW")] | length' "$TRIVY_REPORT" 2>/dev/null || echo "0")
|
||||
|
||||
echo -e " 总计: ${TOTAL} critical: ${RED}${CRITICAL}${NC} high: ${RED}${HIGH}${NC} medium: ${YELLOW}${MEDIUM}${NC} low: ${GREEN}${LOW}${NC}"
|
||||
|
||||
if [ "$CRITICAL" -gt 0 ] || [ "$HIGH" -gt 0 ]; then
|
||||
print_err "Trivy 发现 critical/high 漏洞"
|
||||
else
|
||||
print_ok "Trivy 无 critical/high 漏洞"
|
||||
fi
|
||||
print_info "报告已保存: trivy-fs-report.json"
|
||||
fi
|
||||
|
||||
# 输出表格视图
|
||||
echo ""
|
||||
print_info "Trivy 表格视图:"
|
||||
trivy fs --format table --exit-code 0 . || true
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# ------------------------------------------------
|
||||
# 汇总
|
||||
# ------------------------------------------------
|
||||
print_header "扫描汇总"
|
||||
if [ "$HAS_HIGH" -eq 0 ]; then
|
||||
print_ok "未发现高危漏洞 (exit 0)"
|
||||
exit 0
|
||||
else
|
||||
print_err "发现高危漏洞,请尽快处理 (exit 1)"
|
||||
echo -e " ${BLUE}SLA:${NC} critical 24h / high 7d / medium 30d / low 90d"
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user