强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

AWK & SED 生产力教程 / 第 12 章:报告生成

第 12 章:报告生成

一份好的报告,能让数据说话。AWK 和 SED 是生成结构化报告的利器。

12.1 文本报告基础

表格格式化

cat > data.txt << 'EOF'
Alice Engineering 15000 2000
Bob Marketing 12000 1500
Carol Engineering 16000 2500
Dave Sales 11000 1000
Eve Engineering 14000 1800
EOF

# 简单表格
$ awk '{
    printf "%-12s %-15s %10d %10d\n", $1, $2, $3, $4
}' data.txt

# 带表头和分隔线
$ awk '
BEGIN {
    printf "%-12s %-15s %10s %10s\n", "姓名", "部门", "薪资", "奖金"
    printf "%-12s %-15s %10s %10s\n", "----------", "-------------", "----------", "----------"
}
{
    printf "%-12s %-15s %10d %10d\n", $1, $2, $3, $4
}' data.txt

Unicode 表格

$ awk '
BEGIN {
    print "┌────────────┬───────────────┬──────────┬──────────┐"
    printf "│ %-10s │ %-13s │ %8s │ %8s │\n", "姓名", "部门", "薪资", "奖金"
    print "├────────────┼───────────────┼──────────┼──────────┤"
}
{
    printf "│ %-10s │ %-13s │ %8d │ %8d │\n", $1, $2, $3, $4
}
END {
    print "└────────────┴───────────────┴──────────┴──────────┘"
}' data.txt

汇总统计表

$ awk '
{
    dept_count[$2]++
    dept_salary[$2] += $3
    dept_bonus[$2] += $4
}
END {
    printf "%-15s %6s %12s %12s %12s\n", "部门", "人数", "总薪资", "总奖金", "平均薪资"
    printf "%-15s %6s %12s %12s %12s\n", "===============", "======", "============", "============", "============"
    
    total_count = 0
    total_salary = 0
    total_bonus = 0
    
    for (d in dept_count) {
        printf "%-15s %6d %12d %12d %12.0f\n", d, dept_count[d], dept_salary[d], dept_bonus[d], dept_salary[d]/dept_count[d]
        total_count += dept_count[d]
        total_salary += dept_salary[d]
        total_bonus += dept_bonus[d]
    }
    
    printf "%-15s %6s %12s %12s %12s\n", "===============", "======", "============", "============", "============"
    printf "%-15s %6d %12d %12d %12.0f\n", "合计", total_count, total_salary, total_bonus, total_salary/total_count
}' data.txt

12.2 CSV 报告生成

生成 CSV 文件

# 将数据转换为 CSV
$ awk '{
    printf "%s,%s,%d,%d,%d\n", $1, $2, $3, $4, $3+$4
}' data.txt > report.csv

# 带表头
$ awk 'BEGIN {
    print "姓名,部门,薪资,奖金,总收入"
}
{
    printf "%s,%s,%d,%d,%d\n", $1, $2, $3, $4, $3+$4
}' data.txt > report.csv

从现有数据生成 CSV

# 从 df 输出生成 CSV
$ df -h | awk 'NR>1 {
    gsub(/%/, "", $5)
    printf "%s,%s,%s,%s,%s,%s\n", $1, $2, $3, $4, $5, $6
}' > disk_report.csv

# 从 ps 输出生成 CSV
$ ps aux | awk 'NR>1 {
    printf "%s,%s,%s,%s,%s\n", $1, $2, $3, $4, $11
}' > process_report.csv

CSV 格式化

# 添加 CSV 表头
$ echo "Name,Department,Salary" | cat - data.csv > report_with_header.csv

# CSV 列对齐显示
$ column -t -s, report.csv

# 排序 CSV(按第 3 列数值降序)
$ head -1 report.csv > sorted_report.csv
$ tail -n +2 report.csv | sort -t, -k3 -rn >> sorted_report.csv

12.3 HTML 报告生成

基本 HTML 表格

$ awk '
BEGIN {
    print "<!DOCTYPE html>"
    print "<html>"
    print "<head>"
    print "<style>"
    print "  body { font-family: Arial, sans-serif; margin: 20px; }"
    print "  table { border-collapse: collapse; width: 80%; margin: 20px auto; }"
    print "  th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }"
    print "  th { background-color: #4CAF50; color: white; }"
    print "  tr:nth-child(even) { background-color: #f2f2f2; }"
    print "  .right { text-align: right; }"
    print "</style>"
    print "</head>"
    print "<body>"
    print "<h1 style=\"text-align:center\">员工薪资报告</h1>"
    print "<table>"
    print "<tr><th>姓名</th><th>部门</th><th class=\"right\">薪资</th><th class=\"right\">奖金</th><th class=\"right\">总收入</th></tr>"
}
{
    printf "<tr><td>%s</td><td>%s</td><td class=\"right\">%d</td><td class=\"right\">%d</td><td class=\"right\">%d</td></tr>\n", $1, $2, $3, $4, $3+$4
}
END {
    print "</table>"
    print "<p style=\"text-align:center; color: #666;\">生成时间: " strftime("%Y-%m-%d %H:%M:%S") "</p>"
    print "</body>"
    print "</html>"
}' data.txt > report.html

带统计的 HTML 报告

cat > generate_report.awk << 'EOF'
BEGIN {
    print "<!DOCTYPE html>"
    print "<html><head>"
    print "<meta charset=\"UTF-8\">"
    print "<style>"
    print "  body { font-family: 'Segoe UI', Arial, sans-serif; margin: 40px; background: #f5f5f5; }"
    print "  .container { max-width: 900px; margin: 0 auto; background: white; padding: 30px; border-radius: 8px; box-shadow: 0 2px 10px rgba(0,0,0,0.1); }"
    print "  h1 { color: #2c3e50; text-align: center; }"
    print "  table { width: 100%; border-collapse: collapse; margin: 20px 0; }"
    print "  th { background: #3498db; color: white; padding: 12px; }"
    print "  td { padding: 10px; border-bottom: 1px solid #ddd; }"
    print "  tr:hover { background: #f5f5f5; }"
    print "  .summary { display: flex; justify-content: space-around; margin: 20px 0; }"
    print "  .stat-box { text-align: center; padding: 20px; background: #ecf0f1; border-radius: 8px; }"
    print "  .stat-number { font-size: 24px; font-weight: bold; color: #2c3e50; }"
    print "  .stat-label { color: #7f8c8d; }"
    print "</style></head><body>"
    print "<div class=\"container\">"
    print "<h1>📊 数据分析报告</h1>"
}
{
    data[NR] = $0
    dept_count[$2]++
    dept_salary[$2] += $3
    total_salary += $3
    total_count++
}
END {
    # 统计卡片
    printf "<div class=\"summary\">"
    printf "<div class=\"stat-box\"><div class=\"stat-number\">%d</div><div class=\"stat-label\">总人数</div></div>", total_count
    printf "<div class=\"stat-box\"><div class=\"stat-number\">%d</div><div class=\"stat-label\">总薪资</div></div>", total_salary
    printf "<div class=\"stat-box\"><div class=\"stat-number\">%.0f</div><div class=\"stat-label\">平均薪资</div></div>", total_salary/total_count
    printf "<div class=\"stat-box\"><div class=\"stat-number\">%d</div><div class=\"stat-label\">部门数</div></div>", length(dept_count)
    printf "</div>"
    
    # 详细表格
    print "<table>"
    print "<tr><th>姓名</th><th>部门</th><th>薪资</th><th>奖金</th><th>总收入</th></tr>"
    for (i=1; i<=NR; i++) {
        split(data[i], a, " ")
        printf "<tr><td>%s</td><td>%s</td><td>%d</td><td>%d</td><td>%d</td></tr>\n", a[1], a[2], a[3], a[4], a[3]+a[4]
    }
    print "</table>"
    
    # 部门统计
    print "<h2>部门统计</h2>"
    print "<table>"
    print "<tr><th>部门</th><th>人数</th><th>平均薪资</th></tr>"
    for (d in dept_count)
        printf "<tr><td>%s</td><td>%d</td><td>%.0f</td></tr>\n", d, dept_count[d], dept_salary[d]/dept_count[d]
    print "</table>"
    
    print "</div></body></html>"
}
EOF

$ awk -f generate_report.awk data.txt > report.html

12.4 Markdown 报告

$ awk '
BEGIN {
    print "# 员工薪资报告"
    print ""
    print "生成时间: " strftime("%Y-%m-%d %H:%M:%S")
    print ""
    print "## 详细数据"
    print ""
    printf "| %-10s | %-13s | %8s | %8s | %8s |\n", "姓名", "部门", "薪资", "奖金", "总收入"
    printf "|%-11s|%-14s|%-9s|%-9s|%-9s|\n", "-----------", "--------------", "---------", "---------", "---------"
}
{
    printf "| %-10s | %-13s | %8d | %8d | %8d |\n", $1, $2, $3, $4, $3+$4
    dept_salary[$2] += $3
    dept_count[$2]++
}
END {
    print ""
    print "## 部门统计"
    print ""
    printf "| %-13s | %6s | %12s |\n", "部门", "人数", "平均薪资"
    printf "|%-14s|%-7s|%-13s|\n", "--------------", "-------", "-------------"
    for (d in dept_count)
        printf "| %-13s | %6d | %12.0f |\n", d, dept_count[d], dept_salary[d]/dept_count[d]
}' data.txt > report.md

12.5 ASCII 图表

柱状图

$ awk '
{
    dept_salary[$2] += $3
    dept_count[$2]++
}
END {
    for (d in dept_count) {
        avg = dept_salary[d] / dept_count[d]
        bar_len = int(avg / 1000)
        printf "%-13s |", d
        for (i=0; i<bar_len; i++) printf "█"
        printf " %10.0f\n", avg
    }
}' data.txt

带刻度的柱状图

$ awk '
{
    category[$2] += $3
}
END {
    # 找出最大值
    max_val = 0
    for (c in category) {
        if (category[c] > max_val) max_val = category[c]
    }
    
    # 绘制图表
    bar_width = 40
    for (c in category) {
        bar_len = int(category[c] / max_val * bar_width)
        printf "%-15s │", c
        for (i=0; i<bar_len; i++) printf "█"
        for (i=bar_len; i<bar_width; i++) printf "░"
        printf("│ %10d\n", category[c])
    }
    
    # 刻度线
    printf "%-15s └", ""
    for (i=0; i<bar_width; i++) printf "─"
    printf "┘\n"
    printf "%-15s  %10d", "", 0
    printf "%*d\n", bar_width-10, max_val
}' data.txt

饼图(文本版)

$ awk '
{
    dept_salary[$2] += $3
    total += $3
}
END {
    printf "部门薪资占比:\n\n"
    for (d in dept_salary) {
        pct = dept_salary[d] / total * 100
        bar_len = int(pct / 2)
        printf "%-13s %5.1f%% ", d, pct
        for (i=0; i<bar_len; i++) printf "■"
        printf "\n"
    }
}' data.txt

12.6 报告邮件发送

#!/bin/bash
# send_report.sh — 生成并发送报告

REPORT_FILE="report_$(date +%Y%m%d).html"

# 生成报告
awk -f generate_report.awk data.txt > "$REPORT_FILE"

# 发送邮件(需要安装 mailx 或 sendmail)
if command -v mailx &>/dev/null; then
    mailx -s "数据报告 $(date +%Y-%m-%d)" \
          -a "Content-Type: text/html" \
          [email protected] < "$REPORT_FILE"
    echo "报告已发送至 [email protected]"
else
    echo "请安装 mailx 以发送邮件"
    echo "报告已保存至: $REPORT_FILE"
fi

12.7 综合实战

🏢 场景:月度运营报告

#!/bin/bash
# monthly_report.sh — 生成月度运营报告

MONTH=$(date -d "last month" +%Y-%m 2>/dev/null || date -v-1m +%Y-%m)
LOG_DIR="/var/log/nginx"
REPORT_DIR="/var/reports"
mkdir -p "$REPORT_DIR"

REPORT_FILE="${REPORT_DIR}/monthly_${MONTH}.html"

{
    cat << 'HEADER'
<!DOCTYPE html>
<html><head>
<meta charset="UTF-8">
<style>
body { font-family: Arial, sans-serif; margin: 40px; }
.card { background: #f8f9fa; border-radius: 8px; padding: 20px; margin: 10px 0; }
h1 { color: #333; }
table { width: 100%; border-collapse: collapse; }
th, td { padding: 8px; border-bottom: 1px solid #ddd; }
th { background: #007bff; color: white; }
</style>
</head><body>
<h1>📊 月度运营报告</h1>
HEADER

    echo "<h2>${MONTH} 数据概览</h2>"
    echo "<div class='card'>"
    
    # 从日志中提取统计数据
    cat "${LOG_DIR}"/access.log* 2>/dev/null | awk -v month="$MONTH" '
    {
        split($4, t, ":")
        date = substr(t[1], 2, 11)
        if (date ~ month) {
            total++
            bytes += $10
            if ($9 >= 400) errors++
            ips[$1]++
            paths[$7]++
        }
    }
    END {
        printf "<p>总请求数: <strong>%d</strong></p>\n", total
        printf "<p>独立访客: <strong>%d</strong></p>\n", length(ips)
        printf "<p>错误请求: <strong>%d</strong></p>\n", errors
        printf "<p>总流量: <strong>%.2f GB</strong></p>\n", bytes/1073741824
        
        print "</div>"
        print "<h2>热门页面</h2>"
        print "<table><tr><th>页面</th><th>访问次数</th></tr>"
        for (p in paths) printf "<tr><td>%s</td><td>%d</td></tr>\n", p, paths[p]
        print "</table>"
    }'
    
    echo "</body></html>"
} > "$REPORT_FILE"

echo "报告已生成: ${REPORT_FILE}"

12.8 报告生成速查

# 文本表格
awk '{printf "%-20s %10d\n", $1, $2}' data.txt

# CSV
awk '{printf "%s,%s,%d\n", $1, $2, $3}' data.txt > report.csv

# HTML 表格
awk 'BEGIN{print "<table>"} {printf "<tr><td>%s</td><td>%d</td></tr>\n", $1, $2} END{print "</table>"}' data.txt

# Markdown 表格
awk 'BEGIN{printf "| Name | Value |\n|------|-------|\n"} {printf "| %s | %d |\n", $1, $2}' data.txt

# ASCII 柱状图
awk '{printf "%-15s ", $1; for(i=0;i<$2/10;i++) printf "█"; printf "\n"}' data.txt

扩展阅读


下一章:第 13 章:脚本编写 — 错误处理、调试、可维护性、模块化。