OpenCV 计算机视觉完全教程 / 第 12 章 — 视频处理
第 12 章 — 视频处理
12.1 视频读取
import cv2
# 打开视频文件
cap = cv2.VideoCapture("video.mp4")
# 或打开摄像头(0 = 默认摄像头)
# cap = cv2.VideoCapture(0)
# 检查是否成功打开
if not cap.isOpened():
print("无法打开视频")
exit()
# 获取视频属性
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
duration = total_frames / fps if fps > 0 else 0
print(f"分辨率: {width}×{height}")
print(f"帧率: {fps:.1f} FPS")
print(f"总帧数: {total_frames}")
print(f"时长: {duration:.1f} 秒")
# 逐帧读取
while True:
ret, frame = cap.read()
if not ret:
break
# 处理每一帧
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cv2.imshow("视频", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
视频属性常量
| 属性 | 常量 | 说明 |
|---|
| 帧率 | CAP_PROP_FPS | 每秒帧数 |
| 宽度 | CAP_PROP_FRAME_WIDTH | 帧宽度 |
| 高度 | CAP_PROP_FRAME_HEIGHT | 帧高度 |
| 总帧数 | CAP_PROP_FRAME_COUNT | 总帧数 |
| 当前帧 | CAP_PROP_POS_FRAMES | 当前位置 |
| 时间戳 | CAP_PROP_POS_MSEC | 当前时间(ms) |
| 编解码器 | CAP_PROP_FOURCC | 4 字符编码 |
12.2 视频写入
import cv2
import numpy as np
cap = cv2.VideoCapture(0) # 摄像头
# 获取摄像头参数
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# 创建 VideoWriter
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # 编解码器
# 常用: 'mp4v', 'XVID', 'MJPG', 'H264'
out = cv2.VideoWriter("output.mp4", fourcc, fps, (w, h))
while True:
ret, frame = cap.read()
if not ret:
break
# 可以对帧做处理
processed = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
processed = cv2.cvtColor(processed, cv2.COLOR_GRAY2BGR) # 转回三通道
out.write(processed) # 写入帧
cv2.imshow("录制中...", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
out.release()
cv2.destroyAllWindows()
常用编解码器
| 编解码器 | FourCC | 后缀 | 说明 |
|---|
| H.264 | avc1 / H264 | .mp4 | 压缩率高,兼容性好 |
| H.265 | HEVC | .mp4 | 更高压缩率 |
| MJPEG | MJPG | .avi | 无损/低压缩 |
| XVID | XVID | .avi | MPEG-4 |
| VP9 | VP09 | .webm | 开源 |
12.3 帧差分运动检测
"""
motion_detector.py — 基于帧差分的运动检测
"""
import cv2
import numpy as np
def detect_motion():
cap = cv2.VideoCapture(0)
# 读取第一帧作为背景参考
ret, prev_frame = cap.read()
prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
prev_gray = cv2.GaussianBlur(prev_gray, (21, 21), 0)
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (21, 21), 0)
# 帧差分
delta = cv2.absdiff(prev_gray, gray)
_, thresh = cv2.threshold(delta, 25, 255, cv2.THRESH_BINARY)
# 形态学操作去噪
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
thresh = cv2.dilate(thresh, kernel, iterations=2)
thresh = cv2.erode(thresh, kernel, iterations=1)
# 查找运动区域
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
result = frame.copy()
motion_count = 0
for cnt in contours:
area = cv2.contourArea(cnt)
if area < 500:
continue
motion_count += 1
x, y, w, h = cv2.boundingRect(cnt)
cv2.rectangle(result, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.putText(result, f"Motions: {motion_count}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.imshow("运动检测", result)
cv2.imshow("差分掩码", thresh)
prev_gray = gray
if cv2.waitKey(30) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
12.4 背景减除
import cv2
cap = cv2.VideoCapture("video.mp4")
# 方法 1: MOG2(高斯混合模型)
bg_subtractor = cv2.createBackgroundSubtractorMOG2(
history=500, # 历史帧数
varThreshold=16, # 方差阈值
detectShadows=True # 检测阴影
)
# 方法 2: KNN
# bg_subtractor = cv2.createBackgroundSubtractorKNN(
# history=500, dist2Threshold=400, detectShadows=True
# )
while True:
ret, frame = cap.read()
if not ret:
break
# 应用背景减除
fg_mask = bg_subtractor.apply(frame)
# 可视化
# 获取背景模型
bg_image = bg_subtractor.getBackgroundImage()
cv2.imshow("前景掩码", fg_mask)
cv2.imshow("原始", frame)
if cv2.waitKey(30) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
背景减除方法对比
| 方法 | 类 | 速度 | 效果 | 适用 |
|---|
| MOG2 | createBackgroundSubtractorMOG2 | 快 | 好 | 通用 |
| KNN | createBackgroundSubtractorKNN | 中 | 好 | 复杂场景 |
| GMG | bgsegm.createBackgroundSubtractorGMG | 慢 | 一般 | 静态背景 |
12.5 光流估计
12.5.1 稀疏光流(Lucas-Kanade)
import cv2
import numpy as np
cap = cv2.VideoCapture("video.mp4")
# 参数
lk_params = dict(
winSize=(15, 15),
maxLevel=2,
criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03)
)
# 读取第一帧
ret, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
# 检测初始特征点
p0 = cv2.goodFeaturesToTrack(old_gray, maxCorners=100,
qualityLevel=0.3, minDistance=7)
# 创建随机颜色
colors = np.random.randint(0, 255, (100, 3))
mask = np.zeros_like(old_frame)
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 计算光流(追踪特征点)
p1, st, err = cv2.calcOpticalFlowPyrLK(
old_gray, gray, p0, None, **lk_params
)
if p1 is not None:
# 只保留好的追踪点
good_new = p1[st == 1]
good_old = p0[st == 1]
# 绘制轨迹
for i, (new, old) in enumerate(zip(good_new, good_old)):
a, b = new.ravel()
c, d = old.ravel()
mask = cv2.line(mask, (int(a), int(b)), (int(c), int(d)),
colors[i % 100].tolist(), 2)
frame = cv2.circle(frame, (int(a), int(b)), 3,
colors[i % 100].tolist(), -1)
# 更新特征点
p0 = good_new.reshape(-1, 1, 2)
result = cv2.add(frame, mask)
cv2.imshow("光流追踪", result)
old_gray = gray
if cv2.waitKey(30) & 0xFF == ord('q'):
break
# 每隔 N 帧重新检测特征点
if len(p0) < 20:
p0 = cv2.goodFeaturesToTrack(gray, maxCorners=100,
qualityLevel=0.3, minDistance=7)
mask = np.zeros_like(old_frame)
cap.release()
cv2.destroyAllWindows()
12.5.2 稠密光流(Farneback)
import cv2
import numpy as np
cap = cv2.VideoCapture("video.mp4")
ret, old_frame = cap.read()
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
hsv = np.zeros_like(old_frame)
hsv[..., 1] = 255 # 饱和度设满
while True:
ret, frame = cap.read()
if not ret:
break
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
# 计算稠密光流
flow = cv2.calcOpticalFlowFarneback(
old_gray, gray, None,
pyr_scale=0.5, # 金字塔缩放
levels=3, # 金字塔层数
winsize=15, # 窗口大小
iterations=3, # 迭代次数
poly_n=5, # 多项式展开邻域
poly_sigma=1.2, # 高斯标准差
flags=0
)
# 转换为极坐标
magnitude, angle = cv2.cartToPolar(flow[..., 0], flow[..., 1])
# HSV 可视化(H=方向,V=幅度)
hsv[..., 0] = angle * 180 / np.pi / 2
hsv[..., 2] = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)
rgb = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
cv2.imshow("稠密光流", rgb)
old_gray = gray
if cv2.waitKey(30) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
12.6 目标追踪器
import cv2
cap = cv2.VideoCapture(0)
# 选择追踪器
# OpenCV 4.x 推荐: CSRT, KCF
tracker = cv2.TrackerCSRT_create()
# 其他选项:
# tracker = cv2.TrackerKCF_create()
# tracker = cv2.legacy.TrackerMOSSE_create()
# 读取第一帧,选择 ROI
ret, frame = cap.read()
bbox = cv2.selectROI("选择目标", frame, False)
tracker.init(frame, bbox)
while True:
ret, frame = cap.read()
if not ret:
break
# 更新追踪器
success, bbox = tracker.update(frame)
if success:
x, y, w, h = [int(v) for v in bbox]
cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
cv2.putText(frame, "Tracking", (x, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
else:
cv2.putText(frame, "Lost!", (100, 80),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.imshow("追踪", frame)
if cv2.waitKey(30) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
追踪器对比
| 追踪器 | 速度 | 精度 | 鲁棒性 | 说明 |
|---|
| CSRT | ★★★☆☆ | ★★★★★ | ★★★★ | 最佳精度(推荐) |
| KCF | ★★★★☆ | ★★★★ | ★★★ | 平衡选择 |
| MOSSE | ★★★★★ | ★★★ | ★★ | 最快 |
| MedianFlow | ★★★★☆ | ★★★ | ★★ | 匀速运动 |
12.7 视频处理最佳实践
"""
video_utils.py — 视频处理工具集
"""
import cv2
import time
def get_video_info(path):
"""获取视频元信息"""
cap = cv2.VideoCapture(path)
info = {
"width": int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
"height": int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),
"fps": cap.get(cv2.CAP_PROP_FPS),
"total_frames": int(cap.get(cv2.CAP_PROP_FRAME_COUNT)),
"codec": int(cap.get(cv2.CAP_PROP_FOURCC)),
}
info["duration_sec"] = info["total_frames"] / info["fps"]
cap.release()
return info
def process_video(input_path, output_path, process_func):
"""通用视频处理框架"""
cap = cv2.VideoCapture(input_path)
fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
frame_idx = 0
start_time = time.time()
while True:
ret, frame = cap.read()
if not ret:
break
processed = process_func(frame)
out.write(processed)
frame_idx += 1
if frame_idx % 100 == 0:
elapsed = time.time() - start_time
speed = frame_idx / elapsed
eta = (total - frame_idx) / speed
print(f"进度: {frame_idx}/{total} ({frame_idx/total*100:.1f}%) "
f"速度: {speed:.1f} FPS ETA: {eta:.0f}s")
cap.release()
out.release()
print(f"处理完成: {output_path}")
12.8 扩展阅读
本章小结: 掌握了视频读取与写入、帧差分运动检测、背景减除、光流估计和目标追踪等核心视频处理技术。