强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

Varnish Cache 运维教程 / 第07章:VCL 高级编程

第07章:VCL 高级编程

7.1 高级条件逻辑

7.1.1 复杂条件组合

sub vcl_recv {
    # 复杂条件组合示例

    # 场景:API 请求的缓存策略
    if (
        req.method == "GET" &&                      # GET 请求
        req.url ~ "^/api/v[0-9]+/" &&               # API 路径
        !req.http.Authorization &&                   # 无认证
        !req.http.Cookie ~ "session=" &&             # 无会话
        req.http.Accept ~ "application/json"         # JSON 请求
    ) {
        return (hash);
    }

    # 场景:移动端特殊处理
    if (
        req.http.User-Agent ~ "(?i)(mobile|android|iphone)" &&
        req.url !~ "\.(css|js|jpg|png|gif)$" &&
        req.http.Cookie !~ "desktop=true"
    ) {
        set req.http.X-Device = "mobile";
    }

    # 多条件 OR 与 AND 组合
    if (
        (req.url ~ "^/products/" || req.url ~ "^/categories/") &&
        req.method == "GET" &&
        !req.http.X-No-Cache
    ) {
        return (hash);
    }
}

7.1.2 switch 语句

import std;

sub vcl_recv {
    # 使用 switch 进行多路分支
    switch (req.http.Host) {
        case "api.example.com":
            set req.backend_hint = api_backend;
        case "static.example.com":
            set req.backend_hint = static_backend;
        case "admin.example.com":
            set req.backend_hint = admin_backend;
            call check_admin_ip;
        default:
            set req.backend_hint = web_backend;
    }

    # switch 与正则表达式
    switch -regsub (req.url, "\?.*$", "") {
        case "^/products/[0-9]+$":
            set req.http.X-Handler = "product-detail";
        case "^/products/$":
            set req.http.X-Handler = "product-list";
        case "^/categories/":
            set req.http.X-Handler = "category";
        case "\.(css|js|jpg|png|gif|webp)$":
            set req.http.X-Handler = "static";
        default:
            set req.http.X-Handler = "default";
    }
}

7.1.3 条件赋值模式

sub vcl_recv {
    # 模拟三元运算符
    set req.http.X-Debug = (req.http.X-Debug-Mode == "true") ? "1" : "0";

    # 多级条件赋值
    set req.http.X-Cache-TTL = "300";  # 默认 5 分钟
    if (req.url ~ "\.(css|js)$") {
        set req.http.X-Cache-TTL = "3600";  # 1 小时
    } elseif (req.url ~ "\.(jpg|png|gif)$") {
        set req.http.X-Cache-TTL = "604800";  # 7 天
    } elseif (req.url ~ "^/api/") {
        set req.http.X-Cache-TTL = "60";  # 1 分钟
    }
}

7.2 正则表达式高级用法

7.2.1 命名捕获组

sub vcl_recv {
    # 使用捕获组提取信息
    if (req.url ~ "^/products/([0-9]+)/reviews/([0-9]+)$") {
        set req.http.X-Product-ID = regsub(req.url, "^/products/([0-9]+)/reviews/([0-9]+)$", "\1");
        set req.http.X-Review-ID = regsub(req.url, "^/products/([0-9]+)/reviews/([0-9]+)$", "\2");
    }

    # 从 Host 提取子域名
    if (req.http.Host ~ "^([a-z0-9-]+)\.example\.com$") {
        set req.http.X-Tenant = regsub(req.http.Host, "^([a-z0-9-]+)\.example\.com$", "\1");
    }
}

7.2.2 正则替换

sub vcl_recv {
    # regsub - 替换第一个匹配
    # 移除 URL 中的追踪参数
    set req.url = regsub(req.url, "([?&])(utm_[^&]*|fbclid|gclid)=[^&]*", "\1");

    # 清理多余的 & 和 ?
    set req.url = regsub(req.url, "\?&+", "?");
    set req.url = regsub(req.url, "\?+$", "");

    # regsuball - 替换所有匹配
    # 标准化 URL 路径
    set req.url = regsuball(req.url, "//+", "/");
}

sub vcl_deliver {
    # 在响应中添加调试信息
    set resp.http.X-Original-URL = regsub(req.url, "&", "&");
}

7.2.3 正则性能优化

sub vcl_recv {
    # 不好的写法:复杂正则
    # if (req.url ~ "^/(products|categories|brands)(/([a-z0-9-]+))?(/[0-9]+)?/?$")

    # 好的写法:分步匹配,提高效率
    if (req.url ~ "^/(products|categories|brands)/") {
        set req.http.X-Section = regsub(req.url, "^/(products|categories|brands)/.*", "\1");

        # 进一步细分
        if (req.url ~ "^/products/[0-9]+$") {
            set req.http.X-Type = "detail";
        } elseif (req.url ~ "^/products/$") {
            set req.http.X-Type = "list";
        }
    }

    # 避免过度使用正则,能用字符串操作就不用正则
    if (req.url ~ "^/api/") {
        # 使用 std.strpad 或其他字符串函数
    }
}

7.3 ACL(访问控制列表)

7.3.1 ACL 定义

# 基本 ACL
acl local {
    "localhost";      # 127.0.0.1 和 ::1
    "192.168.0.0"/24; # 192.168.0.x 子网
    "10.0.0.0"/8;     # 10.x.x.x 子网
    !"192.168.0.100"; # 排除特定 IP
}

# 管理员 ACL
acl admin {
    "192.168.1.10";   # 办公网络
    "192.168.1.11";
    "203.0.113.0"/24; # VPN 网络
}

# CDN 节点 ACL
acl cdn_nodes {
    "198.51.100.0"/24;
    "203.0.113.0"/24;
}

# 搜索引擎爬虫 ACL
acl crawlers {
    "66.249.64.0"/19;   # Google
    "157.55.32.0"/19;   # Bing
    "123.125.71.0"/24;  # Baidu
}

7.3.2 ACL 使用

sub vcl_recv {
    # 基于 ACL 的访问控制
    if (req.method == "PURGE") {
        if (!client.ip ~ local) {
            return (synth(403, "Forbidden"));
        }
        return (purge);
    }

    # 管理后台访问限制
    if (req.url ~ "^/admin") {
        if (!client.ip ~ admin) {
            return (synth(403, "Admin access denied"));
        }
    }

    # 特定功能限制
    if (req.url ~ "^/debug") {
        if (!client.ip ~ local) {
            return (synth(403, "Debug access denied"));
        }
    }

    # CDN 节点特殊处理
    if (client.ip ~ cdn_nodes) {
        set req.http.X-CDN = "true";
    }
}

7.3.3 动态 ACL

# 使用 VCL 变量实现动态 ACL
sub vcl_recv {
    # 从数据库或配置文件加载允许的 IP
    # 注意:VCL 不支持动态加载,需要使用 VMOD

    # 使用 header-based 认证
    if (req.url ~ "^/internal/") {
        if (req.http.X-Internal-Secret != "your-secret-key") {
            return (synth(403, "Forbidden"));
        }
    }
}

7.4 错误页面自定义

7.4.1 基本错误页面

sub vcl_synth {
    # 自定义 404 页面
    if (resp.status == 404) {
        set resp.http.Content-Type = "text/html; charset=utf-8";
        synthetic({"
<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <title>404 - 页面未找到</title>
    <style>
        body { font-family: "Microsoft YaHei", sans-serif; text-align: center; padding: 50px; }
        h1 { color: #333; }
        p { color: #666; }
        a { color: #007bff; }
    </style>
</head>
<body>
    <h1>404</h1>
    <p>抱歉,您访问的页面不存在。</p>
    <a href="/">返回首页</a>
</body>
</html>"});
        return (deliver);
    }

    # 自定义 503 页面
    if (resp.status == 503) {
        set resp.http.Content-Type = "text/html; charset=utf-8";
        synthetic({"
<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <title>503 - 服务不可用</title>
    <style>
        body { font-family: "Microsoft YaHei", sans-serif; text-align: center; padding: 50px; }
        h1 { color: #e74c3c; }
        p { color: #666; }
    </style>
</head>
<body>
    <h1>503</h1>
    <p>服务器暂时不可用,请稍后再试。</p>
</body>
</html>"});
        return (deliver);
    }

    # 自定义 403 页面
    if (resp.status == 403) {
        set resp.http.Content-Type = "text/html; charset=utf-8";
        synthetic({"
<!DOCTYPE html>
<html>
<head><title>403 Forbidden</title></head>
<body>
<h1>403 Forbidden</h1>
<p>Access denied.</p>
</body>
</html>"});
        return (deliver);
    }

    # 默认错误页面
    set resp.http.Content-Type = "text/html; charset=utf-8";
    synthetic({"
<!DOCTYPE html>
<html>
<head><title>"} + resp.status + " " + resp.reason + {"</title></head>
<body>
<h1>"} + resp.status + " " + resp.reason + {"</h1>
</body>
</html>"});
    return (deliver);
}

7.4.2 后端错误页面

sub vcl_backend_error {
    # 后端不可用时的错误页面
    set beresp.http.Content-Type = "text/html; charset=utf-8";
    synthetic({"
<!DOCTYPE html>
<html lang="zh-CN">
<head>
    <meta charset="UTF-8">
    <title>服务维护中</title>
    <meta http-equiv="refresh" content="30">
    <style>
        body { font-family: "Microsoft YaHei", sans-serif; text-align: center; padding: 100px; }
        h1 { color: #e67e22; }
        .countdown { font-size: 24px; color: #333; }
    </style>
</head>
<body>
    <h1>系统维护中</h1>
    <p>我们正在进行维护,请稍后访问。</p>
    <p>页面将在 <span class="countdown">30</span> 秒后自动刷新。</p>
</body>
</html>"});
    return (deliver);
}

7.4.3 错误重定向

sub vcl_synth {
    # 自定义重定向(使用 7xx 状态码)
    if (resp.status == 750) {
        set resp.status = 301;
        set resp.http.Location = resp.reason;
        set resp.reason = "Moved Permanently";
        return (deliver);
    }

    if (resp.status == 751) {
        set resp.status = 302;
        set resp.http.Location = resp.reason;
        set resp.reason = "Found";
        return (deliver);
    }
}

sub vcl_recv {
    # www 重定向
    if (req.http.Host ~ "^www\.(.+)$") {
        return (synth(750, "https://" + regsub(req.http.Host, "^www\.(.+)$", "\1") + req.url));
    }

    # HTTP 重定向到 HTTPS
    if (req.http.X-Forwarded-Proto !~ "https") {
        return (synth(750, "https://" + req.http.Host + req.url));
    }
}

7.5 自定义子程序模块化

7.5.1 模块化设计

# ==================== 认证模块 ====================
sub auth_check {
    # 检查基本认证
    if (!req.http.Authorization) {
        set req.http.X-Auth-Status = "missing";
        return (synth(401, "Unauthorized"));
    }

    # 验证 token(示例)
    if (req.http.X-API-Key != "valid-key") {
        set req.http.X-Auth-Status = "invalid";
        return (synth(403, "Forbidden"));
    }

    set req.http.X-Auth-Status = "valid";
}

# ==================== 速率限制模块 ====================
sub rate_limit_check {
    # 注意:VCL 原生不支持速率限制
    # 需要使用 VMOD 或外部服务
    # 这里是概念示例
    if (req.http.X-Rate-Limit-Remaining == "0") {
        return (synth(429, "Too Many Requests"));
    }
}

# ==================== URL 标准化模块 ====================
sub normalize_url {
    # 移除尾部斜杠
    if (req.url != "/" && req.url ~ "/$") {
        set req.url = regsub(req.url, "/+$", "");
    }

    # 移除片段标识符
    set req.url = regsub(req.url, "#.*$", "");

    # 标准化多个斜杠
    set req.url = regsuball(req.url, "//+", "/");

    # 移除追踪参数
    set req.url = regsub(req.url, "[?&](utm_[^&]*|fbclid|gclid|_ga)=[^&]*", "");
    set req.url = regsub(req.url, "\?&?$", "");
}

# ==================== 缓存策略模块 ====================
sub set_cache_headers {
    if (req.http.X-Cache-TTL) {
        # 在 vcl_backend_response 中使用
    }

    # 设置 Vary 头部
    if (req.http.X-Language) {
        set req.http.X-Vary = "Accept-Language";
    }
}

# ==================== 日志标记模块 ====================
sub log_tags {
    # 添加请求分类标签
    if (req.url ~ "^/api/") {
        set req.http.X-Log-Category = "api";
    } elseif (req.url ~ "\.(css|js)$") {
        set req.http.X-Log-Category = "static";
    } elseif (req.url ~ "\.(jpg|png|gif|webp)$") {
        set req.http.X-Log-Category = "image";
    } else {
        set req.http.X-Log-Category = "page";
    }
}

7.5.2 模块化调用

sub vcl_recv {
    # 调用各个模块
    call normalize_url;
    call log_tags;

    # 认证检查(仅特定路径)
    if (req.url ~ "^/api/admin/") {
        call auth_check;
    }

    # 速率限制(所有 API 请求)
    if (req.url ~ "^/api/") {
        call rate_limit_check;
    }

    # 基本缓存策略
    if (req.method != "GET" && req.method != "HEAD") {
        return (pass);
    }

    if (req.http.Authorization) {
        return (pass);
    }

    return (hash);
}

7.6 子请求与内部重定向

7.6.1 restart 机制

sub vcl_recv {
    # 使用 restart 进行内部重定向
    # restart 会重新执行 vcl_recv

    # 场景:URL 重写后重试
    if (req.http.X-Rewrite-To && req.restarts == 0) {
        set req.url = req.http.X-Rewrite-To;
        unset req.http.X-Rewrite-To;
        return (restart);
    }

    # 场景:后端失败时重试
    if (req.restarts > 0) {
        # 第二次尝试使用不同的后端
        set req.backend_hint = fallback_backend;
    }
}

sub vcl_backend_response {
    # 后端返回特定状态码时重试
    if (beresp.status == 503 && req.restarts < 3) {
        return (retry);
    }
}

sub vcl_deliver {
    # 添加重启信息
    if (req.restarts > 0) {
        set resp.http.X-Restarts = req.restarts;
    }
}

7.6.2 负载均衡失败转移

import directors;

sub vcl_init {
    # 初始化负载均衡器
    new web = directors.round_robin();
    web.add_backend(web01);
    web.add_backend(web02);
    web.add_backend(web03);

    new fallback = directors.round_robin();
    fallback.add_backend(fallback01);
}

sub vcl_recv {
    set req.backend_hint = web.backend();

    # 设置重试次数限制
    if (req.restarts >= 3) {
        return (synth(503, "All backends failed"));
    }
}

sub vcl_backend_response {
    if (beresp.status >= 500) {
        # 后端错误,尝试下一个
        return (retry);
    }
}

sub vcl_backend_fetch {
    # 重试时使用回退后端
    if (req.restarts > 0) {
        set bereq.backend = fallback.backend();
    }
}

7.7 中间件模式

7.7.1 请求/响应管道

# 请求处理管道
sub vcl_recv {
    # 阶段 1:安全检查
    call security_check;

    # 阶段 2:URL 标准化
    call normalize_url;

    # 阶段 3:认证
    if (req.url ~ "^/protected/") {
        call auth_check;
    }

    # 阶段 4:路由
    call route_request;

    # 阶段 5:缓存决策
    call cache_decision;
}

# 响应处理管道
sub vcl_deliver {
    # 阶段 1:添加安全头部
    call add_security_headers;

    # 阶段 2:添加 CORS 头部
    call add_cors_headers;

    # 阶段 3:添加调试信息
    if (req.http.X-Debug) {
        call add_debug_headers;
    }

    # 阶段 4:移除敏感信息
    call sanitize_headers;
}

7.7.2 洋葱模型中间件

# 第一层:基础安全
sub vcl_recv {
    # IP 黑名单检查
    if (client.ip ~ blocked_ips) {
        return (synth(403, "Blocked"));
    }

    # 请求速率检查
    call rate_limit;

    # 继续下一层
    call layer2_routing;
}

# 第二层:路由
sub layer2_routing {
    # 主机路由
    switch (req.http.Host) {
        case "api.example.com":
            call layer3_api;
        case "www.example.com":
            call layer3_web;
        default:
            return (synth(404, "Unknown host"));
    }
}

# 第三层:API 处理
sub layer3_api {
    # API 版本路由
    if (req.url ~ "^/api/v1/") {
        set req.backend_hint = api_v1_backend;
    } else {
        set req.backend_hint = api_v2_backend;
    }
}

7.8 性能优化技巧

7.8.1 减少正则匹配

sub vcl_recv {
    # 不好的写法:多个独立正则
    # if (req.url ~ "^/a") { ... }
    # if (req.url ~ "^/b") { ... }
    # if (req.url ~ "^/c") { ... }

    # 好的写法:合并正则
    if (req.url ~ "^/(a|b|c)/") {
        switch -regsub (req.url, "^/([a-z]+).*", "\1") {
            case "a":
                set req.http.X-Section = "a";
            case "b":
                set req.http.X-Section = "b";
            case "c":
                set req.http.X-Section = "c";
        }
    }

    # 更好的写法:使用字符串前缀判断
    if (req.url ~ "^/api/") {
        set req.http.X-Section = "api";
    } else if (req.url ~ "^/static/") {
        set req.http.X-Section = "static";
    }
}

7.8.2 避免不必要的计算

sub vcl_recv {
    # 提前返回,避免后续处理
    if (req.method == "PURGE") {
        return (purge);
    }

    # 只在需要时进行复杂操作
    if (req.url ~ "^/api/") {
        # API 特定处理
        call api_processing;
    }

    # 静态资源快速路径
    if (req.url ~ "\.(css|js|jpg|png|gif|webp|svg|ico|woff2)$") {
        unset req.http.Cookie;
        return (hash);
    }
}

7.9 注意事项

重要

  1. VCL 没有循环语句,这是设计决策,确保高性能
  2. 子程序调用没有返回值,需要通过 req.http 传递状态
  3. restart 会重新执行整个请求处理流程,注意避免无限循环
  4. retry 仅在 vcl_backend_response 中可用,用于重试后端请求
  5. ACL 是编译时确定的,修改需要重新加载 VCL
  6. 正则表达式性能差异很大,简单的前缀匹配优于复杂正则
  7. 避免在高频路径上进行复杂的字符串操作

7.10 扩展阅读