Varnish Cache 运维教程 / 第08章:负载均衡与 Director
第08章:负载均衡与 Director
8.1 负载均衡概述
Varnish 通过 Director(导向器)机制实现后端服务器的负载均衡。Director 是一组后端服务器的集合,按照特定算法分配请求。
8.1.1 负载均衡的优势
| 优势 | 说明 |
|---|---|
| 高可用性 | 单个后端故障不影响整体服务 |
| 水平扩展 | 增加后端即可提升处理能力 |
| 负载分配 | 均匀分配请求,避免单点过载 |
| 故障隔离 | 自动剔除故障节点 |
8.1.2 Director 类型
| 类型 | 算法 | 适用场景 |
|---|---|---|
round_robin | 轮询 | 通用场景,后端性能一致 |
fallback | 回退 | 主备架构,故障转移 |
random | 随机 | 简单负载分配 |
hash | 哈希 | 会话保持,缓存亲和 |
8.2 基本 Director 配置
8.2.1 单后端配置
vcl 4.1;
backend web01 {
.host = "192.168.1.10";
.port = "80";
.connect_timeout = 5s;
.first_byte_timeout = 30s;
.between_bytes_timeout = 10s;
.max_connections = 300;
.probe = {
.url = "/health";
.timeout = 3s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
}
sub vcl_recv {
set req.backend_hint = web01;
}
8.2.2 多后端简单配置
vcl 4.1;
backend web01 {
.host = "192.168.1.10";
.port = "80";
.probe = health_check;
}
backend web02 {
.host = "192.168.1.11";
.port = "80";
.probe = health_check;
}
backend web03 {
.host = "192.168.1.12";
.port = "80";
.probe = health_check;
}
probe health_check {
.url = "/health";
.timeout = 2s;
.interval = 5s;
.window = 5;
.threshold = 3;
.expected_response = 200;
}
sub vcl_recv {
# 简单轮询(不使用 Director)
if (req.url ~ "^/api/") {
set req.backend_hint = web01;
} else {
# 随机选择
if (randombool(0.5, 100.0)) {
set req.backend_hint = web02;
} else {
set req.backend_hint = web03;
}
}
}
8.3 Director 详解
8.3.1 Round-Robin Director
轮询是最简单的负载均衡算法,按顺序将请求分配给每个后端。
vcl 4.1;
import directors;
backend web01 {
.host = "192.168.1.10";
.port = "80";
.probe = health_check;
}
backend web02 {
.host = "192.168.1.11";
.port = "80";
.probe = health_check;
}
backend web03 {
.host = "192.168.1.12";
.port = "80";
.probe = health_check;
}
probe health_check {
.url = "/health";
.timeout = 2s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
sub vcl_init {
# 创建 round_robin director
new web_director = directors.round_robin();
# 添加后端
web_director.add_backend(web01);
web_director.add_backend(web02);
web_director.add_backend(web03);
}
sub vcl_recv {
# 使用 director 选择后端
set req.backend_hint = web_director.backend();
}
8.3.2 Fallback Director
回退 Director 按优先级选择后端,第一个健康的后端会被选中。
vcl 4.1;
import directors;
backend primary {
.host = "192.168.1.10";
.port = "80";
.probe = health_check;
}
backend secondary {
.host = "192.168.1.11";
.port = "80";
.probe = health_check;
}
backend tertiary {
.host = "192.168.1.12";
.port = "80";
.probe = health_check;
}
probe health_check {
.url = "/health";
.timeout = 2s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
sub vcl_init {
# 创建 fallback director
new failover = directors.fallback();
# 按优先级添加(先添加的优先级最高)
failover.add_backend(primary);
failover.add_backend(secondary);
failover.add_backend(tertiary);
}
sub vcl_recv {
set req.backend_hint = failover.backend();
}
8.3.3 Random Director
随机 Director 按权重随机选择后端。
vcl 4.1;
import directors;
backend web01 {
.host = "192.168.1.10";
.port = "80";
.probe = health_check;
}
backend web02 {
.host = "192.168.1.11";
.port = "80";
.probe = health_check;
}
backend web03 {
.host = "192.168.1.12";
.port = "80";
.probe = health_check;
}
probe health_check {
.url = "/health";
.timeout = 2s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
sub vcl_init {
# 创建 random director
new random_director = directors.random();
# 添加后端并设置权重
random_director.add_backend(web01, 10); # 权重 10
random_director.add_backend(web02, 5); # 权重 5
random_director.add_backend(web03, 1); # 权重 1
}
sub vcl_recv {
set req.backend_hint = random_director.backend();
}
8.3.4 Hash Director
哈希 Director 根据请求的特定属性(如 URL、Cookie)选择后端,确保同一请求总是到达同一后端。
vcl 4.1;
import directors;
backend cache01 {
.host = "192.168.1.10";
.port = "80";
.probe = health_check;
}
backend cache02 {
.host = "192.168.1.11";
.port = "80";
.probe = health_check;
}
backend cache03 {
.host = "192.168.1.12";
.port = "80";
.probe = health_check;
}
probe health_check {
.url = "/health";
.timeout = 2s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
sub vcl_init {
# 创建 hash director
new hash_director = directors.hash();
hash_director.add_backend(cache01, 1.0);
hash_director.add_backend(cache02, 1.0);
hash_director.add_backend(cache03, 1.0);
}
sub vcl_recv {
# 根据 URL 哈希选择后端(缓存亲和)
set req.backend_hint = hash_director.backend(req.url);
# 或根据客户端 IP 哈希
# set req.backend_hint = hash_director.backend(client.ip);
# 或根据 Cookie 中的 session ID
# if (req.http.Cookie ~ "session_id=") {
# set req.backend_hint = hash_director.backend(
# regsub(req.http.Cookie, ".*session_id=([^;]+).*", "\1")
# );
# }
}
8.4 健康检查
8.4.1 健康检查配置
# 基本健康检查
probe basic_health {
.url = "/health";
.timeout = 2s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
# 高级健康检查
probe advanced_health {
.request =
"GET /health HTTP/1.1"
"Host: backend.local"
"Connection: close"
"User-Agent: Varnish-Health-Check";
.timeout = 3s;
.interval = 10s;
.window = 10;
.threshold = 8;
.expected_response = 200;
# 检查响应内容
.match = "\"status\":\"healthy\"";
}
# TCP 健康检查(不检查 HTTP 响应)
probe tcp_health {
.timeout = 1s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
8.4.2 健康检查参数详解
| 参数 | 说明 | 默认值 |
|---|---|---|
.url | 检查的 URL 路径 | - |
.request | 自定义 HTTP 请求 | - |
.timeout | 超时时间 | 2s |
.interval | 检查间隔 | 5s |
.window | 滑动窗口大小 | 8 |
.threshold | 健康阈值 | 3 |
.expected_response | 期望的状态码 | 200 |
.match | 响应体匹配 | - |
.initial | 初始状态 | threshold |
8.4.3 健康检查端点实现
# Flask 示例:健康检查端点
from flask import Flask, jsonify
import psutil
app = Flask(__name__)
@app.route('/health')
def health_check():
"""基础健康检查"""
return jsonify({
"status": "healthy",
"timestamp": datetime.utcnow().isoformat()
})
@app.route('/health/detailed')
def detailed_health():
"""详细健康检查"""
health = {
"status": "healthy",
"checks": {
"cpu": {
"status": "healthy" if psutil.cpu_percent() < 80 else "unhealthy",
"value": psutil.cpu_percent()
},
"memory": {
"status": "healthy" if psutil.virtual_memory().percent < 80 else "unhealthy",
"value": psutil.virtual_memory().percent
},
"disk": {
"status": "healthy" if psutil.disk_usage('/').percent < 90 else "unhealthy",
"value": psutil.disk_usage('/').percent
}
}
}
status_code = 200 if health["status"] == "healthy" else 503
return jsonify(health), status_code
8.4.4 查看健康检查状态
# 查看后端健康状态
varnishadm backend.list
# 输出示例:
# 200 Backend name Admin Probe
# web01(192.168.1.10,,80) probe Success 5/5
# web02(192.168.1.11,,80) probe Success 3/5
# web03(192.168.1.12,,80) probe Sick 0/5
# 查看详细探针信息
varnishadm backend.list -p
8.5 高级负载均衡策略
8.5.1 基于 URL 的路由
vcl 4.1;
import directors;
backend api01 {
.host = "192.168.1.20";
.port = "80";
.probe = health_check;
}
backend api02 {
.host = "192.168.1.21";
.port = "80";
.probe = health_check;
}
backend web01 {
.host = "192.168.1.10";
.port = "80";
.probe = health_check;
}
backend web02 {
.host = "192.168.1.11";
.port = "80";
.probe = health_check;
}
backend static01 {
.host = "192.168.1.30";
.port = "80";
.probe = health_check;
}
probe health_check {
.url = "/health";
.timeout = 2s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
sub vcl_init {
# API 服务器池
new api_pool = directors.round_robin();
api_pool.add_backend(api01);
api_pool.add_backend(api02);
# Web 服务器池
new web_pool = directors.round_robin();
web_pool.add_backend(web01);
web_pool.add_backend(web02);
# 静态资源服务器池
new static_pool = directors.round_robin();
static_pool.add_backend(static01);
}
sub vcl_recv {
# 基于 URL 路径路由
if (req.url ~ "^/api/") {
set req.backend_hint = api_pool.backend();
} elseif (req.url ~ "\.(css|js|jpg|png|gif|webp|svg|ico|woff2)$") {
set req.backend_hint = static_pool.backend();
} else {
set req.backend_hint = web_pool.backend();
}
}
8.5.2 基于域名的路由
vcl 4.1;
import directors;
backend blog01 {
.host = "192.168.1.40";
.port = "80";
.probe = health_check;
}
backend shop01 {
.host = "192.168.1.50";
.port = "80";
.probe = health_check;
}
backend main01 {
.host = "192.168.1.10";
.port = "80";
.probe = health_check;
}
probe health_check {
.url = "/health";
.timeout = 2s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
sub vcl_recv {
# 基于域名路由
switch (req.http.Host) {
case "blog.example.com":
set req.backend_hint = blog01;
case "shop.example.com":
set req.backend_hint = shop01;
case "www.example.com":
set req.backend_hint = main01;
case "example.com":
# 重定向到 www
return (synth(750, "https://www.example.com" + req.url));
default:
return (synth(404, "Unknown host"));
}
}
8.5.3 灰度发布
vcl 4.1;
import directors;
backend stable {
.host = "192.168.1.10";
.port = "80";
.probe = health_check;
}
backend canary {
.host = "192.168.1.20";
.port = "80";
.probe = health_check;
}
probe health_check {
.url = "/health";
.timeout = 2s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
sub vcl_recv {
# 灰度发布策略:10% 流量到新版本
# 方法 1:基于随机
if (randombool(10, 100)) {
set req.backend_hint = canary;
set req.http.X-Backend-Version = "canary";
} else {
set req.backend_hint = stable;
set req.http.X-Backend-Version = "stable";
}
# 方法 2:基于特定用户
if (req.http.X-User-ID ~ "^(1000|1001|1002|1003|1004|1005|1006|1007|1008|1009)$") {
set req.backend_hint = canary;
set req.http.X-Backend-Version = "canary";
}
# 方法 3:基于 Cookie 标记
if (req.http.Cookie ~ "canary=true") {
set req.backend_hint = canary;
set req.http.X-Backend-Version = "canary";
}
}
8.6 故障转移
8.6.1 自动故障转移
vcl 4.1;
import directors;
backend primary {
.host = "192.168.1.10";
.port = "80";
.probe = health_check;
}
backend secondary {
.host = "192.168.1.11";
.port = "80";
.probe = health_check;
}
backend tertiary {
.host = "192.168.1.12";
.port = "80";
.probe = health_check;
}
probe health_check {
.url = "/health";
.timeout = 2s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
sub vcl_init {
new failover = directors.fallback();
failover.add_backend(primary);
failover.add_backend(secondary);
failover.add_backend(tertiary);
}
sub vcl_recv {
set req.backend_hint = failover.backend();
}
sub vcl_backend_response {
# 后端返回 5xx 错误时触发故障转移
if (beresp.status >= 500 && req.restarts < 3) {
return (retry);
}
}
sub vcl_backend_fetch {
# 重试时使用不同的后端
if (req.restarts > 0) {
set bereq.backend = failover.backend();
}
}
sub vcl_deliver {
# 记录故障转移信息
if (req.restarts > 0) {
set resp.http.X-Failover-Count = req.restarts;
}
}
8.6.2 Grace 模式故障转移
sub vcl_recv {
# 检查后端健康状态
if (!std.healthy(req.backend_hint)) {
# 后端不健康时,使用更长的 grace
set req.grace = 1h;
} else {
set req.grace = 30s;
}
}
sub vcl_hit {
# 对象在 grace 期间
if (obj.ttl + obj.grace > 0s) {
# 后端不健康时返回过期内容
if (!std.healthy(req.backend_hint)) {
return (deliver);
}
# 后端健康但对象过期,后台更新
return (deliver);
}
return (miss);
}
8.7 Director 监控
8.7.1 查看 Director 状态
# 查看所有 director
varnishadm directors.list
# 查看后端状态
varnishadm backend.list
# 查看后端详细信息
varnishadm backend.list -p
8.7.2 监控脚本
#!/bin/bash
# monitor-backends.sh - 后端健康监控
VARNISHADM="varnishadm"
while true; do
echo "=== Backend Status $(date) ==="
# 获取后端状态
$VARNISHADM backend.list | while read line; do
if echo "$line" | grep -q "Sick"; then
echo "ALERT: Backend is sick - $line"
# 发送告警
# send_alert "Backend sick: $line"
fi
done
# 获取统计信息
echo "--- Statistics ---"
$VARNISHADM stats | grep -E "backend|fetch"
sleep 60
done
8.8 注意事项
重要
- Director 是在
vcl_init中初始化的,不能在运行时修改- 健康检查会消耗后端资源,合理设置检查间隔
- 使用
fallbackDirector 时,注意后端的添加顺序(优先级)hashDirector 的 key 选择直接影响负载分布- 故障转移时注意避免请求循环
- 灰度发布时确保 canary 版本的健康检查端点正确
8.9 业务场景
场景一:读写分离架构
vcl 4.1;
import directors;
backend master {
.host = "192.168.1.10";
.port = "80";
.probe = health_check;
}
backend slave01 {
.host = "192.168.1.11";
.port = "80";
.probe = health_check;
}
backend slave02 {
.host = "192.168.1.12";
.port = "80";
.probe = health_check;
}
probe health_check {
.url = "/health";
.timeout = 2s;
.interval = 5s;
.window = 5;
.threshold = 3;
}
sub vcl_init {
new read_pool = directors.round_robin();
read_pool.add_backend(slave01);
read_pool.add_backend(slave02);
}
sub vcl_recv {
# 写操作路由到 master
if (req.method == "POST" || req.method == "PUT" ||
req.method == "DELETE" || req.method == "PATCH") {
set req.backend_hint = master;
return (pass);
}
# 读操作路由到 slave
set req.backend_hint = read_pool.backend();
}
场景二:多数据中心
vcl 4.1;
import directors;
backend dc1_web01 {
.host = "10.0.1.10";
.port = "80";
.probe = health_check;
}
backend dc1_web02 {
.host = "10.0.1.11";
.port = "80";
.probe = health_check;
}
backend dc2_web01 {
.host = "10.0.2.10";
.port = "80";
.probe = health_check;
}
backend dc2_web02 {
.host = "10.0.2.11";
.port = "80";
.probe = health_check;
}
probe health_check {
.url = "/health";
.timeout = 3s;
.interval = 10s;
.window = 5;
.threshold = 3;
}
sub vcl_init {
# 本地数据中心
new local_dc = directors.round_robin();
local_dc.add_backend(dc1_web01);
local_dc.add_backend(dc1_web02);
# 远程数据中心(回退)
new remote_dc = directors.round_robin();
remote_dc.add_backend(dc2_web01);
remote_dc.add_backend(dc2_web02);
# 全局(故障转移)
new global = directors.fallback();
global.add_backend(dc1_web01);
global.add_backend(dc2_web01);
}
sub vcl_recv {
# 优先使用本地数据中心
set req.backend_hint = local_dc.backend();
}