OpenCV 计算机视觉完全教程 / 第 09 章 — 几何变换
第 09 章 — 几何变换
9.1 变换类型总览
| 变换 | 自由度 | 保持性质 | 矩阵大小 |
|---|
| 平移 | 2 | 形状不变 | 2×3 |
| 刚体(欧氏) | 3 | 长度不变 | 2×3 |
| 相似 | 4 | 角度不变 | 2×3 |
| 仿射 | 6 | 平行线不变 | 2×3 |
| 透视(投影) | 8 | 直线不变 | 3×3 |
9.2 缩放(Scaling)
import cv2
import numpy as np
img = cv2.imread("photo.jpg")
h, w = img.shape[:2]
# 方法 1: 指定目标尺寸
resized = cv2.resize(img, (640, 480))
# 方法 2: 指定缩放因子
resized_half = cv2.resize(img, None, fx=0.5, fy=0.5)
# 方法 3: 不同插值方法比较
methods = {
"INTER_NEAREST": cv2.INTER_NEAREST, # 最近邻(最快)
"INTER_LINEAR": cv2.INTER_LINEAR, # 双线性(默认)
"INTER_CUBIC": cv2.INTER_CUBIC, # 双三次(质量好)
"INTER_LANCZOS4": cv2.INTER_LANCZOS4, # Lanczos(最高质量)
"INTER_AREA": cv2.INTER_AREA, # 像素面积(缩小时推荐)
}
for name, method in methods.items():
result = cv2.resize(img, (320, 240), interpolation=method)
print(f"{name}: {result.shape}")
插值方法选择指南
| 场景 | 推荐方法 | 说明 |
|---|
| 放大图像 | INTER_CUBIC / INTER_LINEAR | 质量优先 |
| 缩小图像 | INTER_AREA | 避免摩尔纹 |
| 实时处理 | INTER_NEAREST | 速度优先 |
| 超高质量 | INTER_LANCZOS4 | 计算量大 |
9.3 平移(Translation)
import cv2
import numpy as np
img = cv2.imread("photo.jpg")
h, w = img.shape[:2]
# 平移矩阵 [[1, 0, tx], [0, 1, ty]]
tx, ty = 100, 50 # 向右 100px,向下 50px
M = np.float32([[1, 0, tx], [0, 1, ty]])
# 应用仿射变换
shifted = cv2.warpAffine(img, M, (w, h))
# 边界填充
shifted_border = cv2.warpAffine(img, M, (w, h),
borderMode=cv2.BORDER_REFLECT)
9.4 旋转(Rotation)
import cv2
import numpy as np
img = cv2.imread("photo.jpg")
h, w = img.shape[:2]
# 方法 1: 使用 getRotationMatrix2D(推荐)
center = (w // 2, h // 2) # 旋转中心
angle = 45 # 逆时针角度
scale = 1.0 # 缩放因子
M = cv2.getRotationMatrix2D(center, angle, scale)
rotated = cv2.warpAffine(img, M, (w, h))
# 方法 2: 旋转后不裁剪(计算新尺寸)
def rotate_bound(image, angle):
"""旋转图像,自动调整画布大小"""
(h, w) = image.shape[:2]
(cx, cy) = (w / 2, h / 2)
M = cv2.getRotationMatrix2D((cx, cy), angle, 1.0)
cos = np.abs(M[0, 0])
sin = np.abs(M[0, 1])
# 计算新的边界尺寸
new_w = int(h * sin + w * cos)
new_h = int(h * cos + w * sin)
# 调整旋转矩阵
M[0, 2] += (new_w / 2) - cx
M[1, 2] += (new_h / 2) - cy
return cv2.warpAffine(image, M, (new_w, new_h))
rotated_full = rotate_bound(img, 45)
旋转矩阵
旋转矩阵 (2×3):
┌ ┐
│ cos(θ) -sin(θ) tx │
│ sin(θ) cos(θ) ty │
└ ┘
θ = 旋转角度(逆时针为正)
tx, ty = 平移分量
仿射变换保持平行线不变,由 3 个点对确定。
import cv2
import numpy as np
img = cv2.imread("photo.jpg")
h, w = img.shape[:2]
# 方法 1: 从 3 个点对计算变换矩阵
pts_src = np.float32([[50, 50], [200, 50], [50, 200]])
pts_dst = np.float32([[10, 100], [200, 50], [100, 250]])
M = cv2.getAffineTransform(pts_src, pts_dst)
warped = cv2.warpAffine(img, M, (w, h))
# 方法 2: 组合多种变换
# 平移 + 旋转 + 缩放
T = np.float32([[1, 0, 50], [0, 1, 30]]) # 平移
R = cv2.getRotationMatrix2D((w//2, h//2), 30, 1) # 旋转
# 组合变换(矩阵相乘)
# 注意:先应用的变换在右边
# combined = R @ T (先平移后旋转)
透视变换由 4 个点对 确定,可以矫正任意四边形。
import cv2
import numpy as np
img = cv2.imread("document.jpg")
h, w = img.shape[:2]
# 源四边形(文档四个角,需要手动标注或自动检测)
pts_src = np.float32([
[56, 65], # 左上
[368, 52], # 右上
[389, 390], # 右下
[43, 382] # 左下
])
# 目标矩形
pts_dst = np.float32([
[0, 0],
[400, 0],
[400, 500],
[0, 500]
])
# 计算透视变换矩阵 (3×3)
M = cv2.getPerspectiveTransform(pts_src, pts_dst)
# 应用透视变换
warped = cv2.warpPerspective(img, M, (400, 500))
# 逆透视变换
M_inv = cv2.getPerspectiveTransform(pts_dst, pts_src)
original = cv2.warpPerspective(warped, M_inv, (w, h))
9.7 实战:自动文档扫描
"""
document_scanner.py — 自动检测文档边缘并校正
"""
import cv2
import numpy as np
def order_points(pts):
"""将四个点排序为 [左上, 右上, 右下, 左下]"""
rect = np.zeros((4, 2), dtype=np.float32)
s = pts.sum(axis=1)
d = np.diff(pts, axis=1)
rect[0] = pts[np.argmin(s)] # 左上:和最小
rect[2] = pts[np.argmax(s)] # 右下:和最大
rect[1] = pts[np.argmin(d)] # 右上:差最小
rect[3] = pts[np.argmax(d)] # 左下:差最大
return rect
def four_point_transform(image, pts):
"""四点透视变换"""
rect = order_points(pts)
(tl, tr, br, bl) = rect
# 计算新图像宽度
width_a = np.linalg.norm(br - bl)
width_b = np.linalg.norm(tr - tl)
max_width = max(int(width_a), int(width_b))
# 计算新图像高度
height_a = np.linalg.norm(tr - br)
height_b = np.linalg.norm(tl - bl)
max_height = max(int(height_a), int(height_b))
# 目标坐标
dst = np.array([
[0, 0],
[max_width - 1, 0],
[max_width - 1, max_height - 1],
[0, max_height - 1]
], dtype=np.float32)
M = cv2.getPerspectiveTransform(rect, dst)
return cv2.warpPerspective(image, M, (max_width, max_height))
def scan_document(image_path):
"""自动文档扫描"""
img = cv2.imread(image_path)
orig = img.copy()
ratio = img.shape[0] / 500.0
img = cv2.resize(img, (int(img.shape[1] / ratio), 500))
# 预处理
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 50, 200)
# 查找轮廓
contours, _ = cv2.findContours(edged, cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE)
contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]
# 查找四边形
screen_cnt = None
for cnt in contours:
peri = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.02 * peri, True)
if len(approx) == 4:
screen_cnt = approx
break
if screen_cnt is None:
print("未检测到文档边缘")
return None
# 应用透视变换
warped = four_point_transform(orig, screen_cnt.reshape(4, 2) * ratio)
# 二值化(扫描效果)
warped_gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
scanned = cv2.adaptiveThreshold(
warped_gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,
blockSize=11, C=10
)
return scanned
# 使用
# result = scan_document("document_photo.jpg")
# if result is not None:
# cv2.imwrite("scanned.jpg", result)
9.8 相机去畸变
import cv2
import numpy as np
# 相机内参矩阵(需标定获得)
# 假设已标定
camera_matrix = np.array([
[800, 0, 320],
[0, 800, 240],
[0, 0, 1]
], dtype=np.float64)
dist_coeffs = np.array([-0.2, 0.1, 0, 0], dtype=np.float64)
img = cv2.imread("distorted.jpg")
h, w = img.shape[:2]
# 方法 1: 去畸变
undistorted = cv2.undistort(img, camera_matrix, dist_coeffs)
# 方法 2: 使用映射表(更灵活)
new_camera_mtx, roi = cv2.getOptimalNewCameraMatrix(
camera_matrix, dist_coeffs, (w, h), 1, (w, h)
)
map_x, map_y = cv2.initUndistortRectifyMap(
camera_matrix, dist_coeffs, None, new_camera_mtx, (w, h), cv2.CV_32FC1
)
undistorted2 = cv2.remap(img, map_x, map_y, cv2.INTER_LINEAR)
9.9 极坐标与对数极坐标变换
import cv2
import numpy as np
img = cv2.imread("photo.jpg")
h, w = img.shape[:2]
center = (w // 2, h // 2)
# 线性极坐标
polar = cv2.linearPolar(img, center, maxRadius=min(center),
flags=cv2.WARP_FILL_OUTLIERS)
# 对数极坐标
log_polar = cv2.logPolar(img, center, maxRadius=min(center),
flags=cv2.WARP_FILL_OUTLIERS)
# 逆变换
back = cv2.linearPolar(polar, center, maxRadius=min(center),
flags=cv2.WARP_INVERSE_MAP)
9.10 变换方法对比
| 变换 | 所需点数 | 函数 | 保持性质 |
|---|
| 平移 | 1 | 手动矩阵 | 全等 |
| 刚体 | 2 | estimateAffinePartial2D | 距离/角度 |
| 仿射 | 3 | getAffineTransform | 平行线 |
| 透视 | 4 | getPerspectiveTransform | 直线 |
| RANSAC仿射 | 5+ | estimateAffine2D | 鲁棒拟合 |
9.11 扩展阅读
本章小结: 掌握了缩放、平移、旋转、仿射变换、透视变换等几何变换,学会了文档自动扫描和相机去畸变等实际应用。