PostgreSQL 完全指南 / 05 - SQL 进阶
第 05 章 · SQL 进阶
本章介绍 PostgreSQL 中最强大的 SQL 特性:CTE(公用表表达式)、窗口函数、LATERAL 连接、JSON/JSONB 操作等。
5.1 CTE(Common Table Expression)公用表表达式
CTE 使用 WITH 子句定义临时结果集,使复杂查询更清晰。
基本 CTE
-- 查询薪资高于部门平均薪资的员工
WITH dept_avg AS (
SELECT department, AVG(salary) AS avg_salary
FROM employees
GROUP BY department
)
SELECT e.name, e.salary, d.avg_salary
FROM employees e
JOIN dept_avg d ON e.department = d.department
WHERE e.salary > d.avg_salary;
多 CTE 链式引用
WITH
active_employees AS (
SELECT * FROM employees WHERE is_active = TRUE
),
dept_stats AS (
SELECT department, COUNT(*) AS cnt, AVG(salary)::NUMERIC(12,2) AS avg_sal
FROM active_employees
GROUP BY department
),
top_dept AS (
SELECT department FROM dept_stats WHERE avg_sal > 15000
)
SELECT ae.name, ae.salary, ae.department
FROM active_employees ae
WHERE ae.department IN (SELECT department FROM top_dept)
ORDER BY ae.salary DESC;
递归 CTE(Recursive CTE)
递归 CTE 是处理层级数据(组织架构、树形结构)的利器。
-- 创建组织架构表
CREATE TABLE org_chart (
id SERIAL PRIMARY KEY,
name TEXT NOT NULL,
manager_id INT REFERENCES org_chart(id)
);
INSERT INTO org_chart (name, manager_id) VALUES
('CEO', NULL),
('CTO', 1),
('CFO', 1),
('VP Engineering', 2),
('VP Product', 2),
('Senior Dev', 4),
('Junior Dev', 4),
('Accountant', 3);
-- 递归查询:CEO → 底层的完整汇报链
WITH RECURSIVE hierarchy AS (
-- 锚点:顶层(无上级)
SELECT id, name, manager_id, 0 AS depth,
name::TEXT AS path
FROM org_chart
WHERE manager_id IS NULL
UNION ALL
-- 递归:连接下级
SELECT o.id, o.name, o.manager_id, h.depth + 1,
h.path || ' → ' || o.name
FROM org_chart o
INNER JOIN hierarchy h ON o.manager_id = h.id
)
SELECT
repeat(' ', depth) || name AS org_tree,
depth,
path
FROM hierarchy
ORDER BY path;
输出:
org_tree | depth | path
--------------------+-------+--------------------------------------------
CEO | 0 | CEO
CTO | 1 | CEO → CTO
VP Engineering | 2 | CEO → CTO → VP Engineering
Junior Dev | 3 | CEO → CTO → VP Engineering → Junior Dev
Senior Dev | 3 | CEO → CTO → VP Engineering → Senior Dev
VP Product | 2 | CEO → CTO → VP Product
CFO | 1 | CEO → CFO
Accountant | 2 | CEO → CFO → Accountant
物化 CTE(MATERIALIZED / NOT MATERIALIZED)
-- 强制物化(结果存入临时表,多次引用时不重复计算)
WITH expensive_query AS MATERIALIZED (
SELECT * FROM large_table WHERE complex_condition(id)
)
SELECT * FROM expensive_query a
JOIN expensive_query b ON a.id = b.parent_id;
-- 强制不物化(展开为子查询,可能更优)
WITH simple_filter AS NOT MATERIALIZED (
SELECT id, name FROM users WHERE is_active
)
SELECT * FROM simple_filter WHERE id > 100;
💡 技巧:PG 优化器会自动决定是否物化 CTE。如果 CTE 被引用多次且结果集不大,MATERIALIZED 通常更快;如果 CTE 只引用一次且可下推条件,NOT MATERIALIZED 更好。
5.2 窗口函数(Window Functions)
窗口函数是 PostgreSQL 最强大的分析能力之一,它可以在不折叠行的情况下执行聚合计算。
核心概念
-- 语法
<窗口函数>() OVER (
[PARTITION BY <分组列>]
[ORDER BY <排序列> [ASC|DESC]]
[ROWS|RANGE <帧范围>]
)
常用窗口函数
| 函数 | 作用 | 示例 |
|---|---|---|
ROW_NUMBER() | 行号(无并列) | 1, 2, 3, 4 |
RANK() | 排名(有并列跳号) | 1, 2, 2, 4 |
DENSE_RANK() | 排名(有并列不跳号) | 1, 2, 2, 3 |
NTILE(n) | 分成 n 组 | 1, 1, 2, 2 |
LAG(col, n) | 前第 n 行的值 | 前一行 |
LEAD(col, n) | 后第 n 行的值 | 后一行 |
FIRST_VALUE(col) | 窗口内第一行的值 | — |
LAST_VALUE(col) | 窗口内最后一行的值 | — |
NTH_VALUE(col, n) | 窗口内第 n 行的值 | — |
SUM() OVER | 累计/滑动求和 | — |
AVG() OVER | 累计/滑动平均 | — |
COUNT() OVER | 累计/滑动计数 | — |
实战示例
-- 创建示例数据
CREATE TABLE sales (
id SERIAL PRIMARY KEY,
sale_date DATE NOT NULL,
product TEXT NOT NULL,
region TEXT NOT NULL,
amount NUMERIC(12,2) NOT NULL
);
INSERT INTO sales (sale_date, product, region, amount) VALUES
('2024-01-15', 'Laptop', 'North', 5000),
('2024-01-20', 'Phone', 'South', 3000),
('2024-02-10', 'Laptop', 'North', 5500),
('2024-02-15', 'Phone', 'East', 2800),
('2024-03-05', 'Laptop', 'West', 4800),
('2024-03-10', 'Phone', 'North', 3200),
('2024-03-20', 'Laptop', 'South', 5200),
('2024-04-01', 'Phone', 'East', 3100),
('2024-04-15', 'Laptop', 'North', 5300),
('2024-04-20', 'Phone', 'West', 2900);
-- 排名:每个地区销售额排名
SELECT
region, product, amount,
ROW_NUMBER() OVER (PARTITION BY region ORDER BY amount DESC) AS row_num,
RANK() OVER (PARTITION BY region ORDER BY amount DESC) AS rank,
DENSE_RANK() OVER (PARTITION BY region ORDER BY amount DESC) AS dense_rank
FROM sales;
-- 累计求和:每月累计销售额
SELECT
sale_date, product, amount,
SUM(amount) OVER (ORDER BY sale_date) AS running_total
FROM sales;
-- 移动平均:3 行滑动平均
SELECT
sale_date, amount,
AVG(amount) OVER (ORDER BY sale_date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg_3
FROM sales;
-- 同比/环比(LAG/LEAD)
SELECT
sale_date, amount,
LAG(amount, 1) OVER (ORDER BY sale_date) AS prev_amount,
amount - LAG(amount, 1) OVER (ORDER BY sale_date) AS diff,
ROUND(
(amount - LAG(amount, 1) OVER (ORDER BY sale_date))
/ NULLIF(LAG(amount, 1) OVER (ORDER BY sale_date), 0) * 100,
2) AS pct_change
FROM sales;
-- 每个产品类别内占比
SELECT
product, region, amount,
ROUND(amount / SUM(amount) OVER (PARTITION BY product) * 100, 2) AS pct_of_product,
ROUND(amount / SUM(amount) OVER () * 100, 2) AS pct_of_total
FROM sales;
窗口帧(Window Frame)
ROWS BETWEEN ... AND ... — 按物理行
RANGE BETWEEN ... AND ... — 按值范围
GROUPS BETWEEN ... AND ... — 按分组(PG 11+)
帧边界:
UNBOUNDED PRECEDING — 从窗口第一行
n PRECEDING — 前 n 行
CURRENT ROW — 当前行
n FOLLOWING — 后 n 行
UNBOUNDED FOLLOWING — 到窗口最后一行
-- 默认帧:RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-- 有 ORDER BY 时默认使用上述帧
-- 累计总和
SUM(amount) OVER (ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
-- 前后各一行的平均值
AVG(amount) OVER (ORDER BY date ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
-- 从当前行到最后一行的总和
SUM(amount) OVER (ORDER BY date ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
WINDOW 子句(复用窗口定义)
SELECT
region, product, amount,
ROW_NUMBER() OVER w AS row_num,
SUM(amount) OVER w AS running_total,
AVG(amount) OVER w AS running_avg
FROM sales
WINDOW w AS (PARTITION BY region ORDER BY sale_date);
5.3 LATERAL 连接
LATERAL 允许子查询引用前面表的列,实现"每行执行一次子查询"。
-- 每个部门薪资最高的 3 名员工
SELECT d.name AS dept, e.*
FROM departments d
CROSS JOIN LATERAL (
SELECT emp.name, emp.salary
FROM employees emp
WHERE emp.department = d.name
ORDER BY emp.salary DESC
LIMIT 3
) e;
-- 为每个产品生成日期序列
SELECT p.product_name, d.dt
FROM products p
CROSS JOIN LATERAL (
SELECT generate_series('2024-01-01'::DATE, '2024-12-31'::DATE, '1 month') AS dt
) d;
-- 每个用户的最后一条订单
SELECT u.name, o.*
FROM users u
LEFT JOIN LATERAL (
SELECT order_id, total, created_at
FROM orders
WHERE user_id = u.id
ORDER BY created_at DESC
LIMIT 1
) o ON TRUE;
💡 技巧:LATERAL 本质上等价于 SQL 的 for 循环——对左表的每一行,执行一次右子查询。在需要 Top-N per group 的场景中非常实用。
5.4 JSON / JSONB 操作
PostgreSQL 的 JSONB 类型使其可以充当文档数据库。
创建和插入
CREATE TABLE events (
id SERIAL PRIMARY KEY,
data JSONB NOT NULL
);
INSERT INTO events (data) VALUES
('{"type": "click", "user_id": 1, "page": "/home", "timestamp": "2024-03-15T10:00:00Z", "tags": ["web", "mobile"]}'),
('{"type": "purchase", "user_id": 2, "amount": 99.99, "items": [{"sku": "A1", "qty": 2}, {"sku": "B2", "qty": 1}]}'),
('{"type": "click", "user_id": 1, "page": "/products", "tags": ["web"]}');
提取数据
-- -> 返回 JSON 对象(保留 JSON 类型)
SELECT data -> 'type' AS type_json FROM events;
-- "click", "purchase"
-- ->> 返回文本
SELECT data ->> 'type' AS type_text FROM events;
-- click, purchase
-- 嵌套访问
SELECT data -> 'items' -> 0 ->> 'sku' AS first_sku FROM events;
-- A1
-- #> 路径访问(返回 JSON)
SELECT data #> '{items,0,sku}' FROM events;
-- #>> 路径访问(返回文本)
SELECT data #>> '{items,0,sku}' FROM events;
JSONB 查询操作符
-- @> 包含
SELECT * FROM events WHERE data @> '{"type": "click"}';
-- <@ 被包含
SELECT * FROM events WHERE '{"type": "click"}'::JSONB <@ data;
-- ? 键存在
SELECT * FROM events WHERE data ? 'items';
-- ?| 任一键存在
SELECT * FROM events WHERE data ?| ARRAY['items', 'tags'];
-- ?& 所有键存在
SELECT * FROM events WHERE data ?& ARRAY['type', 'user_id'];
-- @? JSONPath 查询(PG 12+)
SELECT * FROM events WHERE data @? '$.items[*] ? (@.qty > 1)';
JSONB 索引
-- GIN 索引(支持 @>, ?, ?|, ?&)
CREATE INDEX idx_events_data ON events USING GIN (data);
-- JSONB Path 操作符索引
CREATE INDEX idx_events_type ON events USING GIN ((data -> 'type'));
-- 表达式索引
CREATE INDEX idx_events_user ON events ((data ->> 'user_id'));
JSONB 修改操作
-- || 合并/更新
UPDATE events SET data = data || '{"new_key": "new_value"}' WHERE id = 1;
-- - 删除键
UPDATE events SET data = data - 'new_key' WHERE id = 1;
-- #- 按路径删除
UPDATE events SET data = data #- '{items,0}' WHERE id = 2;
-- jsonb_set 按路径设置值
UPDATE events SET data = jsonb_set(data, '{type}', '"pageview"') WHERE id = 1;
-- jsonb_insert 按路径插入新值
UPDATE events SET data = jsonb_insert(data, '{tags,-1}', '"tablet"') WHERE id = 1;
JSON 聚合函数
-- jsonb_agg:将行聚合成 JSON 数组
SELECT data ->> 'type' AS type, jsonb_agg(data ->> 'page') AS pages
FROM events
WHERE data ? 'page'
GROUP BY data ->> 'type';
-- jsonb_object_agg:将行聚合成 JSON 对象
SELECT jsonb_object_agg(id, data ->> 'type') FROM events;
-- jsonb_build_object:构建 JSON 对象
SELECT jsonb_build_object(
'total', COUNT(*),
'types', jsonb_agg(DISTINCT data ->> 'type')
) FROM events;
JSON_TABLE(PG 17+)
-- JSON_TABLE:将 JSON 数组展开为关系表
SELECT jt.*
FROM events e,
JSON_TABLE(e.data, '$.items[*]' COLUMNS (
sku TEXT PATH '$.sku',
qty INT PATH '$.qty'
)) jt
WHERE e.data ->> 'type' = 'purchase';
5.5 其他高级查询技巧
GROUPING SETS / ROLLUP / CUBE
-- ROLLUP:逐级汇总
SELECT
COALESCE(department, '【合计】') AS department,
COALESCE(is_active::TEXT, '【小计】') AS is_active,
COUNT(*), AVG(salary)::NUMERIC(12,2)
FROM employees
GROUP BY ROLLUP(department, is_active);
-- CUBE:所有维度组合
SELECT department, is_active, COUNT(*)
FROM employees
GROUP BY CUBE(department, is_active);
-- GROUPING SETS:指定分组组合
SELECT department, is_active, COUNT(*)
FROM employees
GROUP BY GROUPING SETS (
(department, is_active),
(department),
(is_active),
()
);
FILTER 子句
-- 带过滤的聚合(PG 9.4+)
SELECT
department,
COUNT(*) AS total,
COUNT(*) FILTER (WHERE salary > 15000) AS high_salary,
COUNT(*) FILTER (WHERE hire_date > '2024-01-01') AS new_hire,
AVG(salary) FILTER (WHERE is_active) AS avg_active_salary
FROM employees
GROUP BY department;
VALUES 表构造器
-- 用 VALUES 创建临时数据集
SELECT * FROM (VALUES
(1, 'Jan', 100),
(2, 'Feb', 150),
(3, 'Mar', 200)
) AS months(id, name, sales)
ORDER BY sales DESC;
业务场景
| 场景 | 推荐技术 |
|---|---|
| 组织架构/树形查询 | 递归 CTE |
| 报表累计/同比/环比 | 窗口函数 (LAG, LEAD, SUM OVER) |
| Top-N 每组取前 N | LATERAL + LIMIT 或 ROW_NUMBER |
| 文档存储/灵活 schema | JSONB + GIN 索引 |
| 多维分析报表 | ROLLUP / CUBE / GROUPING SETS |
| 条件聚合统计 | FILTER 子句 |