强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

GoAccess 日志分析完全指南 / 10 - 自动化集成

10 - 自动化集成

10.1 概述

手动运行 GoAccess 适合临时分析,但生产环境中需要自动化来确保报告的及时性和一致性。本章介绍如何将 GoAccess 集成到自动化工作流中:

  • 定时报告生成(日报、周报、月报)
  • 异常告警(流量突增、错误率飙升)
  • CI/CD 流水线集成
  • 邮件/消息通知
  • 与外部系统联动

10.2 定时报告

10.2.1 Cron 定时任务

# 编辑 crontab
crontab -e
# 每天凌晨 1 点生成日报
0 1 * * * /usr/local/bin/daily_report.sh >> /var/log/goaccess_cron.log 2>&1

# 每周一凌晨 2 点生成周报
0 2 * * 1 /usr/local/bin/weekly_report.sh >> /var/log/goaccess_cron.log 2>&1

# 每月 1 日凌晨 3 点生成月报
0 3 1 * * /usr/local/bin/monthly_report.sh >> /var/log/goaccess_cron.log 2>&1

# 每小时生成一次实时报告(用于仪表盘)
0 * * * * /usr/local/bin/hourly_report.sh >> /var/log/goaccess_cron.log 2>&1

10.2.2 日报脚本

#!/bin/bash
# daily_report.sh — 生成 GoAccess 日报

set -euo pipefail

LOG_FILE="/var/log/nginx/access.log"
REPORT_DIR="/var/www/html/stats/daily"
DATE=$(date -d "yesterday" +%Y-%m-%d)
REPORT="${REPORT_DIR}/${DATE}.html"

mkdir -p "${REPORT_DIR}"

# 过滤昨天的日志并生成报告
awk -v date="$(date -d 'yesterday' +'%d/%b/%Y')" '$0 ~ date' "${LOG_FILE}" | \
  goaccess --log-format=COMBINED \
  -o "${REPORT}" \
  --html-title="日报 ${DATE}" \
  --html-prefs='{"theme":"bright","perPage":30}' \
  --exclude='(bot|crawler|spider)' \
  --exclude='\.(css|js|jpg|png|gif|ico|svg|woff2?)$' \
  --process-and-exit - 2>/dev/null

# 验证报告生成成功
if [ -f "${REPORT}" ]; then
    echo "[${DATE}] 日报生成成功: ${REPORT} ($(du -h "${REPORT}" | cut -f1))"
else
    echo "[${DATE}] 日报生成失败" >&2
    exit 1
fi

# 清理超过 90 天的旧报告
find "${REPORT_DIR}" -name "*.html" -mtime +90 -delete 2>/dev/null

10.2.3 周报脚本

#!/bin/bash
# weekly_report.sh — 生成 GoAccess 周报

set -euo pipefail

LOG_DIR="/var/log/nginx"
REPORT_DIR="/var/www/html/stats/weekly"
WEEK=$(date -d "last week" +%Y-W%V)
REPORT="${REPORT_DIR}/${WEEK}.html"

mkdir -p "${REPORT_DIR}"

# 获取上周的日期范围
START=$(date -d "last monday -7 days" +%d/%b/%Y)
END=$(date -d "last sunday" +%d/%b/%Y)

# 合并上周日志
{
  zcat "${LOG_DIR}"/access.log.*.gz 2>/dev/null || true
  cat "${LOG_DIR}"/access.log.1 2>/dev/null || true
} | awk -v start="${START}" -v end="${END}" '
  $0 ~ start, $0 ~ end
' | goaccess --log-format=COMBINED \
  -o "${REPORT}" \
  --html-title="周报 ${WEEK}" \
  --html-prefs='{"theme":"bright","perPage":50}' \
  --exclude='(bot|crawler|spider)' \
  --exclude='\.(css|js|jpg|png|gif|ico|svg|woff2?)$' \
  --process-and-exit - 2>/dev/null

echo "周报已生成: ${REPORT}"

10.2.4 月报脚本

#!/bin/bash
# monthly_report.sh — 生成 GoAccess 月报

set -euo pipefail

LOG_DIR="/var/log/nginx"
REPORT_DIR="/var/www/html/stats/monthly"
MONTH=$(date -d "last month" +%Y-%m)
REPORT="${REPORT_DIR}/${MONTH}.html"

mkdir -p "${REPORT_DIR}"

# 合并上月日志
{
  zcat "${LOG_DIR}"/access.log.*.gz 2>/dev/null || true
  cat "${LOG_DIR}"/access.log.1 2>/dev/null || true
  cat "${LOG_DIR}"/access.log 2>/dev/null || true
} | goaccess --log-format=COMBINED \
  -o "${REPORT}" \
  --html-title="月报 ${MONTH}" \
  --html-prefs='{"theme":"bright","perPage":50}' \
  --date-range="${MONTH//-/01}-${MONTH//-/31}" \
  --exclude='(bot|crawler|spider)' \
  --exclude='\.(css|js|jpg|png|gif|ico|svg|woff2?)$' \
  --process-and-exit - 2>/dev/null

echo "月报已生成: ${REPORT}"

10.3 告警机制

10.3.1 错误率告警

#!/bin/bash
# error_alert.sh — 检测错误率并告警

LOG_FILE="/var/log/nginx/access.log"
ALERT_LOG="/var/log/goaccess_alerts.log"
THRESHOLD_5XX=1.0    # 5xx 错误率阈值(百分比)
THRESHOLD_404=5.0    # 404 错误率阈值(百分比)

# 获取最近 1000 条日志
RECENT=$(tail -1000 "${LOG_FILE}")
TOTAL=$(echo "${RECENT}" | wc -l)

if [ "${TOTAL}" -eq 0 ]; then
    echo "日志为空,跳过检查"
    exit 0
fi

# 计算错误率
COUNT_5XX=$(echo "${RECENT}" | grep -cE '" 5[0-9]{2} ' || true)
COUNT_404=$(echo "${RECENT}" | grep -c '" 404 ' || true)

PCT_5XX=$(echo "scale=2; ${COUNT_5XX} * 100 / ${TOTAL}" | bc)
PCT_404=$(echo "scale=2; ${COUNT_404} * 100 / ${TOTAL}" | bc)

# 5xx 告警
if (( $(echo "${PCT_5XX} > ${THRESHOLD_5XX}" | bc -l) )); then
    MSG="[ALERT] 5xx 错误率异常: ${PCT_5XX}% (阈值: ${THRESHOLD_5XX}%)"
    echo "$(date '+%Y-%m-%d %H:%M:%S') ${MSG}" >> "${ALERT_LOG}"

    # 发送邮件告警
    echo "${MSG}" | mail -s "⚠️ 5xx 错误率告警" [email protected]

    # 发送 Webhook(如企业微信、钉钉、Slack)
    curl -s -X POST "https://hook.example.com/alert" \
      -H "Content-Type: application/json" \
      -d "{\"text\": \"${MSG}\"}"
fi

# 404 告警
if (( $(echo "${PCT_404} > ${THRESHOLD_404}" | bc -l) )); then
    MSG="[ALERT] 404 错误率异常: ${PCT_404}% (阈值: ${THRESHOLD_404}%)"
    echo "$(date '+%Y-%m-%d %H:%M:%S') ${MSG}" >> "${ALERT_LOG}"

    # 找出最常见的 404 URL
    TOP_404=$(echo "${RECENT}" | grep '" 404 ' | \
      awk '{print $7}' | sort | uniq -c | sort -rn | head -5)

    echo "Top 404 URLs:" >> "${ALERT_LOG}"
    echo "${TOP_404}" >> "${ALERT_LOG}"
fi

10.3.2 流量异常告警

#!/bin/bash
# traffic_alert.sh — 流量突增/突降告警

LOG_FILE="/var/log/nginx/access.log"
HISTORICAL_FILE="/tmp/historical_traffic.txt"
CURRENT_WINDOW=300  # 5 分钟窗口

# 获取最近 5 分钟的请求数
CURRENT=$(tail -${CURRENT_WINDOW} "${LOG_FILE}" 2>/dev/null | wc -l)

# 获取历史平均值
if [ -f "${HISTORICAL_FILE}" ]; then
    AVG=$(cat "${HISTORICAL_FILE}")
else
    AVG=${CURRENT}
    echo "${AVG}" > "${HISTORICAL_FILE}"
fi

# 计算变化率
if [ "${AVG}" -gt 0 ]; then
    CHANGE=$(echo "scale=2; (${CURRENT} - ${AVG}) * 100 / ${AVG}" | bc)
else
    CHANGE=0
fi

# 流量突增告警(增长 > 200%)
if (( $(echo "${CHANGE} > 200" | bc -l) )); then
    MSG="[ALERT] 流量突增: ${CURRENT} 请求/5分钟 (历史均值: ${AVG}, 增长: ${CHANGE}%)"
    echo "${MSG}" | mail -s "📈 流量突增告警" [email protected]
fi

# 流量突降告警(下降 > 80%)
if (( $(echo "${CHANGE} < -80" | bc -l) )); then
    MSG="[ALERT] 流量突降: ${CURRENT} 请求/5分钟 (历史均值: ${AVG}, 下降: ${CHANGE}%)"
    echo "${MSG}" | mail -s "📉 流量突降告警" [email protected]
fi

# 更新历史平均值(滑动平均)
NEW_AVG=$(echo "scale=0; (${AVG} * 9 + ${CURRENT}) / 10" | bc)
echo "${NEW_AVG}" > "${HISTORICAL_FILE}"

10.3.3 完整告警框架

#!/bin/bash
# goaccess_alert.sh — GoAccess 告警框架

set -euo pipefail

# ============ 配置 ============
LOG_FILE="/var/log/nginx/access.log"
CONFIG_FILE="/etc/goaccess/alert.conf"
ALERT_LOG="/var/log/goaccess_alerts.log"

# 告警渠道配置
ALERT_EMAIL="[email protected]"
WEBHOOK_URL="https://hooks.slack.com/services/xxx"
WEBHOOK_ENABLED=true

# ============ 加载配置 ============
source "${CONFIG_FILE}" 2>/dev/null || true

# ============ 函数定义 ============
log_alert() {
    local level="$1" message="$2"
    echo "$(date '+%Y-%m-%d %H:%M:%S') [${level}] ${message}" >> "${ALERT_LOG}"
}

send_email() {
    local subject="$1" body="$2"
    echo "${body}" | mail -s "${subject}" "${ALERT_EMAIL}" 2>/dev/null || true
}

send_webhook() {
    local text="$1"
    if [ "${WEBHOOK_ENABLED}" = true ]; then
        curl -s -X POST "${WEBHOOK_URL}" \
          -H "Content-Type: application/json" \
          -d "{\"text\": \"${text}\"}" 2>/dev/null || true
    fi
}

send_alert() {
    local level="$1" title="$2" message="$3"

    log_alert "${level}" "${message}"
    send_email "[$level] ${title}" "${message}"
    send_webhook "[$level] ${title}: ${message}"
}

# ============ 检查项 ============

# 检查 1: 5xx 错误率
check_5xx_rate() {
    local total=$(tail -5000 "${LOG_FILE}" | wc -l)
    local errors=$(tail -5000 "${LOG_FILE}" | grep -cE '" 5[0-9]{2} ' || true)

    if [ "${total}" -gt 0 ]; then
        local rate=$(echo "scale=2; ${errors} * 100 / ${total}" | bc)
        if (( $(echo "${rate} > 1.0" | bc -l) )); then
            send_alert "CRITICAL" "5xx 错误率" "当前: ${rate}% (阈值: 1.0%) | 错误数: ${errors}/${total}"
            return 1
        elif (( $(echo "${rate} > 0.5" | bc -l) )); then
            send_alert "WARNING" "5xx 错误率" "当前: ${rate}% (阈值: 0.5%) | 错误数: ${errors}/${total}"
        fi
    fi
    return 0
}

# 检查 2: 404 比例
check_404_rate() {
    local total=$(tail -5000 "${LOG_FILE}" | wc -l)
    local not_found=$(tail -5000 "${LOG_FILE}" | grep -c '" 404 ' || true)

    if [ "${total}" -gt 0 ]; then
        local rate=$(echo "scale=2; ${not_found} * 100 / ${total}" | bc)
        if (( $(echo "${rate} > 10" | bc -l) )); then
            send_alert "WARNING" "404 比例过高" "当前: ${rate}% (阈值: 10%)"
        fi
    fi
}

# 检查 3: 单 IP 请求频率
check_rate_limit() {
    local threshold=1000  # 单 IP 最大请求数/小时
    local top_ip=$(tail -36000 "${LOG_FILE}" | \
      awk '{print $1}' | sort | uniq -c | sort -rn | head -1)
    local count=$(echo "${top_ip}" | awk '{print $1}')
    local ip=$(echo "${top_ip}" | awk '{print $2}')

    if [ "${count}" -gt "${threshold}" ]; then
        send_alert "WARNING" "单 IP 请求频率过高" "IP: ${ip}, 请求数: ${count}/小时 (阈值: ${threshold})"
    fi
}

# 检查 4: 响应时间异常(需要日志包含耗时字段)
check_slow_requests() {
    local log_with_time="${LOG_FILE}"
    local slow_count=$(tail -1000 "${log_with_time}" | \
      awk '{for(i=1;i<=NF;i++) if($i ~ /^[0-9]+\.[0-9]+$/ && $i > 5.0) count++} END {print count+0}')

    if [ "${slow_count}" -gt 50 ]; then
        send_alert "WARNING" "慢请求过多" "最近 1000 条请求中有 ${slow_count} 条耗时超过 5 秒"
    fi
}

# ============ 执行检查 ============
echo "开始告警检查..."
check_5xx_rate
check_404_rate
check_rate_limit
check_slow_requests
echo "告警检查完成"

10.4 邮件通知

10.4.1 配置邮件发送

# 安装 mailutils
sudo apt install -y mailutils

# 配置 SMTP(使用外部 SMTP 服务器)
cat > ~/.mailrc << 'EOF'
set smtp=smtps://smtp.example.com:465
set smtp-auth=login
set [email protected]
set smtp-auth-password=your_password
set ssl-verify=ignore
set from="GoAccess <[email protected]>"
EOF

10.4.2 发送 HTML 邮件报告

#!/bin/bash
# email_html_report.sh — 发送 HTML 格式的邮件报告

RECIPIENT="[email protected]"
DATE=$(date -d yesterday +%Y-%m-%d)
REPORT="/var/www/html/stats/daily/${DATE}.html"

if [ ! -f "${REPORT}" ]; then
    echo "报告文件不存在: ${REPORT}"
    exit 1
fi

# 发送 HTML 邮件
mail -s "📊 网站访问日报 - ${DATE}" \
     -a "Content-Type: text/html; charset=UTF-8" \
     -a "From: GoAccess Reports <[email protected]>" \
     "${RECIPIENT}" < "${REPORT}"

echo "邮件已发送至 ${RECIPIENT}"

10.4.3 发送摘要邮件

#!/bin/bash
# email_summary.sh — 发送精简的摘要邮件

LOG_FILE="/var/log/nginx/access.log"
DATE=$(date -d yesterday +%Y-%m-%d)
RECIPIENT="[email protected]"

# 生成摘要数据
{
  echo "<html><body style='font-family:Arial,sans-serif;'>"
  echo "<h2>📊 网站访问日报 - ${DATE}</h2>"

  # 总请求
  TOTAL=$(awk -v d="$(date -d yesterday +'%d/%b/%Y')" '$0 ~ d' "${LOG_FILE}" | wc -l)
  echo "<p><strong>总请求:</strong> ${TOTAL}</p>"

  # 独立访客
  UV=$(awk -v d="$(date -d yesterday +'%d/%b/%Y')" '$0 ~ d' "${LOG_FILE}" | \
    awk '{print $1}' | sort -u | wc -l)
  echo "<p><strong>独立访客:</strong> ${UV}</p>"

  # 状态码分布
  echo "<h3>状态码分布</h3><table border='1' cellpadding='5' style='border-collapse:collapse;'>"
  echo "<tr><th>状态码</th><th>请求数</th><th>占比</th></tr>"
  awk -v d="$(date -d yesterday +'%d/%b/%Y')" '$0 ~ d' "${LOG_FILE}" | \
    awk '{gsub(/"/, "", $9); print $9}' | sort | uniq -c | sort -rn | \
    while read count code; do
        pct=$(echo "scale=1; ${count} * 100 / ${TOTAL}" | bc)
        echo "<tr><td>${code}</td><td>${count}</td><td>${pct}%</td></tr>"
    done
  echo "</table>"

  # Top 5 页面
  echo "<h3>热门页面 Top 5</h3><ol>"
  awk -v d="$(date -d yesterday +'%d/%b/%Y')" '$0 ~ d' "${LOG_FILE}" | \
    awk '{gsub(/"/, "", $7); print $7}' | sort | uniq -c | sort -rn | head -5 | \
    while read count url; do
        echo "<li><code>${url}</code> — ${count} 次</li>"
    done
  echo "</ol>"

  echo "<hr><p style='color:#999;font-size:12px;'>由 GoAccess 自动生成 | $(date)</p>"
  echo "</body></html>"
} | mail -s "📊 网站访问日报 - ${DATE}" \
     -a "Content-Type: text/html; charset=UTF-8" \
     "${RECIPIENT}"

echo "摘要邮件已发送"

10.5 Webhook 通知

10.5.1 企业微信通知

#!/bin/bash
# wechat_notify.sh — 企业微信 Webhook 通知

WEBHOOK_URL="https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=YOUR_KEY"
TITLE="$1"
CONTENT="$2"

curl -s -X POST "${WEBHOOK_URL}" \
  -H "Content-Type: application/json" \
  -d "{
    \"msgtype\": \"markdown\",
    \"markdown\": {
        \"content\": \"## ${TITLE}\n${CONTENT}\"
    }
  }"

10.5.2 钉钉通知

#!/bin/bash
# dingtalk_notify.sh — 钉钉 Webhook 通知

WEBHOOK_URL="https://oapi.dingtalk.com/robot/send?access_token=YOUR_TOKEN"
TITLE="$1"
CONTENT="$2"

curl -s -X POST "${WEBHOOK_URL}" \
  -H "Content-Type: application/json" \
  -d "{
    \"msgtype\": \"markdown\",
    \"markdown\": {
        \"title\": \"${TITLE}\",
        \"text\": \"## ${TITLE}\n${CONTENT}\"
    }
  }"

10.5.3 Slack 通知

#!/bin/bash
# slack_notify.sh — Slack Webhook 通知

WEBHOOK_URL="https://hooks.slack.com/services/xxx/yyy/zzz"
TEXT="$1"

curl -s -X POST "${WEBHOOK_URL}" \
  -H "Content-Type: application/json" \
  -d "{\"text\": \"${TEXT}\"}"

10.6 CI/CD 集成

10.6.1 GitHub Actions

# .github/workflows/goaccess-report.yml
name: Generate GoAccess Report

on:
  schedule:
    - cron: '0 1 * * *'  # 每天 UTC 01:00
  workflow_dispatch:      # 允许手动触发

jobs:
  generate-report:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout
        uses: actions/checkout@v4

      - name: Install GoAccess
        run: sudo apt-get install -y goaccess

      - name: Download logs
        run: |
          # 从服务器下载日志(使用 SSH)
          mkdir -p logs
          scp -o StrictHostKeyChecking=no \
            user@your-server:/var/log/nginx/access.log \
            logs/access.log

      - name: Generate report
        run: |
          goaccess logs/access.log \
            --log-format=COMBINED \
            -o report.html \
            --html-title="Daily Report - $(date -d yesterday +%Y-%m-%d)" \
            --process-and-exit

      - name: Upload artifact
        uses: actions/upload-artifact@v4
        with:
          name: goaccess-report
          path: report.html

      - name: Deploy to GitHub Pages
        uses: peaceiris/actions-gh-pages@v3
        with:
          github_token: ${{ secrets.GITHUB_TOKEN }}
          publish_dir: ./
          destination_dir: reports

10.6.2 GitLab CI

# .gitlab-ci.yml
stages:
  - analyze
  - deploy

goaccess-report:
  stage: analyze
  image: debian:bookworm
  before_script:
    - apt-get update && apt-get install -y goaccess openssh-client
  script:
    - mkdir -p reports
    - scp -o StrictHostKeyChecking=no user@server:/var/log/nginx/access.log .
    - goaccess access.log --log-format=COMBINED -o reports/report.html --process-and-exit
  artifacts:
    paths:
      - reports/
    expire_in: 30 days
  only:
    - schedules

10.7 综合自动化脚本

#!/bin/bash
# goaccess_auto.sh — GoAccess 综合自动化脚本

set -euo pipefail

# ============ 配置 ============
LOG_DIR="/var/log/nginx"
REPORT_BASE="/var/www/html/stats"
DB_FILE="/var/lib/goaccess/state.db"
ALERT_THRESHOLDS_FILE="/etc/goaccess/thresholds.conf"

DATE=$(date +%Y-%m-%d)
DOW=$(date +%u)
DOM=$(date +%d)

# ============ 日志函数 ============
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*"; }

# ============ 报告生成 ============
generate_report() {
    local type="$1" title="$2" output="$3"
    local extra_args=("${@:4}")

    log "生成${type}报告: ${output}"

    {
      zcat "${LOG_DIR}"/access.log.*.gz 2>/dev/null || true
      cat "${LOG_DIR}"/access.log 2>/dev/null || true
    } | goaccess --log-format=COMBINED \
      -o "${output}" \
      --html-title="${title}" \
      --html-prefs='{"theme":"bright","perPage":30}' \
      --exclude='(bot|crawler|spider|Bytespider|GPTBot)' \
      --exclude='\.(css|js|jpg|png|gif|ico|svg|woff2?)$' \
      --process-and-exit \
      "${extra_args[@]}" - 2>/dev/null

    if [ -f "${output}" ]; then
        log "  ✓ 成功 ($(du -h "${output}" | cut -f1))"
    else
        log "  ✗ 失败"
        return 1
    fi
}

# ============ 主逻辑 ============
log "开始自动化任务"

# 每日报告
mkdir -p "${REPORT_BASE}/daily"
generate_report "日报" "日报 ${DATE}" "${REPORT_BASE}/daily/${DATE}.html"

# 每周报告(周一)
if [ "${DOW}" = "1" ]; then
    WEEK=$(date -d "last week" +%Y-W%V)
    mkdir -p "${REPORT_BASE}/weekly"
    generate_report "周报" "周报 ${WEEK}" "${REPORT_BASE}/weekly/${WEEK}.html"
fi

# 每月报告(1日)
if [ "${DOM}" = "01" ]; then
    MONTH=$(date -d "last month" +%Y-%m)
    mkdir -p "${REPORT_BASE}/monthly"
    generate_report "月报" "月报 ${MONTH}" "${REPORT_BASE}/monthly/${MONTH}.html"
fi

# 清理旧报告
find "${REPORT_BASE}/daily" -name "*.html" -mtime +90 -delete 2>/dev/null || true
find "${REPORT_BASE}/weekly" -name "*.html" -mtime +180 -delete 2>/dev/null || true

# 执行告警检查
/usr/local/bin/goaccess_alert.sh || true

log "自动化任务完成"

10.8 systemd Timer 替代 Cron

# /etc/systemd/system/goaccess-auto.service
[Unit]
Description=GoAccess Automation Task
After=network.target nginx.service

[Service]
Type=oneshot
ExecStart=/usr/local/bin/goaccess_auto.sh
User=www-data
Group=www-data
StandardOutput=journal
StandardError=journal
# /etc/systemd/system/goaccess-auto.timer
[Unit]
Description=Run GoAccess automation daily

[Timer]
OnCalendar=*-*-* 01:00:00
Persistent=true
RandomizedDelaySec=300

[Install]
WantedBy=timers.target
sudo systemctl daemon-reload
sudo systemctl enable --now goaccess-auto.timer

# 查看定时器状态
sudo systemctl list-timers goaccess-auto.timer

10.9 小结

场景方案工具
定时报告Cron / systemd TimerShell 脚本
错误告警定期检查 + 阈值Shell + mail/curl
流量告警滑动窗口监控Shell + Webhook
邮件通知HTML 邮件 / 摘要邮件mail 命令
消息通知企业微信 / 钉钉 / SlackWebhook
CI/CDGitHub Actions / GitLab CIYAML 配置

下一章

下一章将介绍 GoAccess 的 Docker 部署方案,包括容器化部署、日志收集、实时监控和与 Nginx 的集成。

11 - Docker 部署


扩展阅读