GCC 完全指南 / 13 - GCC 扩展特性

13 - GCC 扩展特性

了解 GCC 独有的扩展属性（attribute）、内置函数、向量化和语句表达式等强大特性。

13.1 GCC 扩展概述

GCC 提供了大量 C/C++ 标准之外的扩展特性，这些特性增强了语言的表达力和编译器的优化能力。使用 -std=gnu17（默认）而非 -std=c17 即可启用这些扩展。

# 使用 GNU 扩展（默认）
gcc -std=gnu17 -o hello main.c   # 扩展已启用

# 禁用扩展，严格遵循标准
gcc -std=c17 -pedantic -o hello main.c  # 警告非标准用法

13.2 `attribute` 属性

__attribute__ 是 GCC 最重要的扩展机制，允许对函数、变量、类型等添加元信息。

函数属性

属性	说明
`noreturn`	函数不会返回
`format`	printf/scanf 风格参数检查
`unused`	标记函数为可能未使用
`always_inline`	总是内联
`noinline`	禁止内联
`flatten`	内联所有被调用的函数
`constructor`	在 main() 之前执行
`destructor`	在 main() 之后执行
`deprecated`	标记为已弃用
`visibility`	控制符号可见性
`weak`	弱符号
`aligned`	对齐要求
`packed`	紧凑排列
`hot`	标记为热点函数（优先优化）
`cold`	标记为冷函数（减少优化）
`target`	指定目标特性
`error`	调用时产生编译错误
`warning`	调用时产生编译警告
`malloc`	标记为内存分配函数
`returns_nonnull`	返回值不为 NULL
`nonnull`	参数不为 NULL

#include <stdio.h>
#include <stdlib.h>

// noreturn: 函数不会返回
__attribute__((noreturn))
void fatal_error(const char *msg) {
    fprintf(stderr, "FATAL: %s\n", msg);
    exit(1);
    // 编译器知道不会执行到这里
}

// format: printf 格式检查
__attribute__((format(printf, 1, 2)))
void my_printf(const char *fmt, ...) {
    va_list args;
    va_start(args, fmt);
    vprintf(fmt, args);
    va_end(args);
}

// constructor/destructor
__attribute__((constructor))
void init_library(void) {
    printf("Library initialized\n");
}

__attribute__((destructor))
void cleanup_library(void) {
    printf("Library cleaned up\n");
}

// deprecated: 已弃用
__attribute__((deprecated("Use new_function() instead")))
void old_function(void) {}

// always_inline / noinline
__attribute__((always_inline))
static inline int fast_add(int a, int b) { return a + b; }

__attribute__((noinline))
void debuggable_function(void) { /* 便于调试 */ }

// cold: 优化时降低优先级
__attribute__((cold))
void error_handler(int code) {
    fprintf(stderr, "Error: %d\n", code);
}

// hot: 优化时提高优先级
__attribute__((hot))
void compute_intensive(double *data, int n) {
    for (int i = 0; i < n; i++) {
        data[i] *= 2.0;
    }
}

// target: 指定 CPU 特性
__attribute__((target("avx2")))
void avx2_function(float *a, float *b, float *c, int n) {
    for (int i = 0; i < n; i++) {
        c[i] = a[i] + b[i];
    }
}

// error/warning: 编译时强制报错
extern void __compile_error_stub(void)
    __attribute__((error("Must implement this function")));

变量属性

// aligned: 对齐
int aligned_var __attribute__((aligned(64)));  // 64 字节对齐（缓存行）

// packed: 紧凑排列（无填充）
struct __attribute__((packed)) PackedStruct {
    char a;    // 1 byte
    int b;     // 4 bytes
    char c;    // 1 byte
};
// sizeof(PackedStruct) == 6（无填充）

// section: 放入特定段
__attribute__((section(".mydata")))
int special_var = 42;

// unused: 抑制未使用警告
__attribute__((unused)) int maybe_unused_var;

// weak: 弱符号
__attribute__((weak)) int default_value = 100;

// deprecated
__attribute__((deprecated)) int old_api_var;

// cleanup: 变量超出作用域时自动调用清理函数
void auto_free(void **p) { free(*p); }
char *msg __attribute__((cleanup(auto_free))) = malloc(100);
// msg 在作用域结束时自动 free

// vector_size: 向量类型
typedef int v4si __attribute__((vector_size(16)));  // 4 个 int 的向量
typedef float v8sf __attribute__((vector_size(32))); // 8 个 float 的向量

类型属性

// 对齐
typedef struct {
    int x, y;
} __attribute__((aligned(16))) AlignedPoint;

// packed
typedef struct __attribute__((packed)) {
    char a;
    int b;
    char c;
} PackedData;

// transparent_union: 透明联合体
typedef union {
    int i;
    float f;
    void *p;
} __attribute__((transparent_union)) Value;
// 可以直接传递 int/float/void* 给 Value 参数

13.3 `__builtin_*` 内置函数

GCC 提供了大量编译器内置函数，直接映射到硬件指令或编译器优化。

位操作

#include <stdio.h>
#include <stdint.h>

int main(void) {
    unsigned int x = 0b10110000;

    // 前导零计数 (Count Leading Zeros)
    printf("CLZ: %d\n", __builtin_clz(x));       // 24 (32-bit: 0x000000B0 → 24 个前导零)

    // 尾随零计数 (Count Trailing Zeros)
    printf("CTZ: %d\n", __builtin_ctz(x));       // 4

    // 置位计数 (Population Count)
    printf("POP: %d\n", __builtin_popcount(x));  // 3

    // 奇偶校验
    printf("PARITY: %d\n", __builtin_parity(x)); // 1 (奇数个 1)

    // 字节序转换
    uint32_t val = 0x12345678;
    printf("BSWAP: 0x%08x\n", __builtin_bswap32(val));  // 0x78563412

    // 位反转
    unsigned char b = 0b11010010;
    printf("REV: 0b%08b\n", __builtin_bitreverse8(b)); // 0b01001011

    return 0;
}

溢出检测

#include <stdio.h>
#include <limits.h>

int main(void) {
    int a = INT_MAX, b = 1, result;

    // 加法溢出检测
    if (__builtin_add_overflow(a, b, &result)) {
        printf("加法溢出！\n");
    }

    // 乘法溢出检测
    long long x = LLONG_MAX, y = 2, res;
    if (__builtin_mul_overflow(x, y, &res)) {
        printf("乘法溢出！\n");
    }

    // 子减法溢出检测
    unsigned int c = 0, d = 1;
    unsigned int r;
    if (__builtin_sub_overflow(c, d, &r)) {
        printf("无符号减法下溢！\n");
    }

    return 0;
}

内存操作

// 高效的内存操作（编译器内联优化）
__builtin_memcpy(dst, src, n);   // 替代 memcpy
__builtin_memset(dst, val, n);   // 替代 memset
__builtin_memmove(dst, src, n);  // 替代 memmove
__builtin_memcmp(a, b, n);       // 替代 memcmp

// 预取
__builtin_prefetch(ptr);         // 提示 CPU 预取数据
__builtin_prefetch(ptr, 0, 3);   // 读预取，低时间局部性
__builtin_prefetch(ptr, 1, 3);   // 写预取

不可达和预期

// 标记不可达代码（消除编译器警告，启用优化）
int check_range(int x) {
    if (x >= 0 && x <= 10) return x;
    __builtin_unreachable();  // 编译器假设不会到达这里
}

// 分支预期
if (__builtin_expect(error, 0)) {  // 提示 error 几乎总是 0
    handle_error();
}

// Linux 内核中的常见定义
#define likely(x)   __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)

if (unlikely(ptr == NULL)) {
    return -EINVAL;
}

CPU 特性检测

#include <stdio.h>

int main(void) {
    // 运行时检测 CPU 特性
    if (__builtin_cpu_supports("sse4.2")) {
        printf("SSE 4.2 supported\n");
    }
    if (__builtin_cpu_supports("avx2")) {
        printf("AVX2 supported\n");
    }
    if (__builtin_cpu_supports("avx512f")) {
        printf("AVX-512 supported\n");
    }
    return 0;
}

原子操作内置函数

#include <stdio.h>

int counter = 0;

void increment(void) {
    __atomic_add_fetch(&counter, 1, __ATOMIC_SEQ_CST);
}

int read_counter(void) {
    return __atomic_load_n(&counter, __ATOMIC_SEQ_CST);
}

int compare_and_swap(int *ptr, int old, int new) {
    return __atomic_compare_exchange_n(
        ptr, &old, new, 0,
        __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST
    );
}

13.4 语句表达式（Statement Expressions）

GCC 允许在表达式中使用复合语句（代码块），最后一个表达式的值作为整个块的值。

// 语句表达式的通用形式
({ 表达式1; 表达式2; ...; 最终值; })

// 安全的 MAX 宏（避免多次求值）
#define SAFE_MAX(a, b) ({        \
    typeof(a) _a = (a);         \
    typeof(b) _b = (b);         \
    _a > _b ? _a : _b;         \
})

// 记录日志并返回值
#define LOG_AND_RETURN(expr) ({  \
    typeof(expr) _result = (expr); \
    printf("Result: %d\n", _result); \
    _result;                     \
})

int main(void) {
    int x = 3, y = 5;
    int max = SAFE_MAX(x++, y++);
    // x 只自增一次，y 只自增一次
    printf("max=%d, x=%d, y=%d\n", max, x, y);
    // 输出: max=5, x=4, y=6
    return 0;
}

13.5 `typeof` 和 `__auto_type`

// typeof 获取变量的类型
int x = 42;
typeof(x) y = 10;    // y 的类型与 x 相同（int）

// 通用交换宏
#define SWAP(a, b) do {     \
    typeof(a) _tmp = (a);   \
    (a) = (b);              \
    (b) = _tmp;             \
} while (0)

// __auto_type（GCC 推断类型，类似 C++ auto）
__auto_type val = some_function();  // val 的类型由编译器推断

13.6 指定初始化器

// C99 标准：数组指定初始化器
int arr[5] = {[0] = 10, [3] = 40};
// arr = {10, 0, 0, 40, 0}

// GCC 扩展：范围初始化器
int arr[10] = {[0 ... 4] = 1, [5 ... 9] = 2};
// arr = {1,1,1,1,1,2,2,2,2,2}

// 结构体指定初始化器（C99 标准）
struct Point p = {.y = 10, .x = 5};

// 用于 switch-case 范围（GCC 扩展）
void handle_char(char c) {
    switch (c) {
    case 'a' ... 'z':
        printf("小写字母\n");
        break;
    case 'A' ... 'Z':
        printf("大写字母\n");
        break;
    case '0' ... '9':
        printf("数字\n");
        break;
    }
}

13.7 向量化扩展

#include <stdio.h>

// 定义向量类型
typedef int v4si __attribute__((vector_size(16)));
typedef float v4sf __attribute__((vector_size(16)));

int main(void) {
    v4si a = {1, 2, 3, 4};
    v4si b = {5, 6, 7, 8};
    v4si c = a + b;       // SIMD 加法: {6, 8, 10, 12}
    v4si d = a * b;       // SIMD 乘法: {5, 12, 21, 32}

    // 需要存储到数组才能打印
    int result[4];
    __builtin_memcpy(result, &c, sizeof(result));
    printf("{%d, %d, %d, %d}\n", result[0], result[1], result[2], result[3]);

    // 浮点向量
    v4sf x = {1.0f, 2.0f, 3.0f, 4.0f};
    v4sf y = {0.5f, 0.5f, 0.5f, 0.5f};
    v4sf z = x * y;       // {0.5, 1.0, 1.5, 2.0}

    return 0;
}

向量操作

typedef int v4si __attribute__((vector_size(16)));

v4si a = {1, 2, 3, 4};
v4si b = {5, 6, 7, 8};

v4si add = a + b;         // 逐元素加法
v4si sub = a - b;         // 逐元素减法
v4si mul = a * b;         // 逐元素乘法
v4si div = a / b;         // 逐元素除法
v4si and = a & b;         // 逐元素按位与
v4si or  = a | b;         // 逐元素按位或
v4si cmp = a > b;         // 逐元素比较，结果为 0 或 -1

// 元素访问
int first = a[0];         // 获取第一个元素
a[2] = 10;               // 修改第三个元素

13.8 可变长度数组（VLA）

// C99 标准：运行时确定大小的数组
void process(int n) {
    int arr[n];          // VLA：大小在运行时确定
    for (int i = 0; i < n; i++) {
        arr[i] = i;
    }
}

// GCC 扩展：VLA 用于结构体
struct DynArray {
    int size;
    int data[];          // 柔性数组成员
};

// GCC 扩展：alloca 分配栈内存
void *ptr = __builtin_alloca(100);  // 在栈上分配 100 字节

13.9 Labels as Values（标签地址）

#include <stdio.h>

// 获取标签地址，计算 goto
int execute_bytecode(const unsigned char *code, int len) {
    static void *dispatch_table[] = {
        &&DO_ADD, &&DO_SUB, &&DO_MUL, &&DO_HALT
    };

    int a, b, pc = 0;
    while (1) {
        goto *dispatch_table[code[pc++]];

    DO_ADD:
        a = code[pc++]; b = code[pc++];
        printf("%d + %d = %d\n", a, b, a + b);
        continue;

    DO_SUB:
        a = code[pc++]; b = code[pc++];
        printf("%d - %d = %d\n", a, b, a - b);
        continue;

    DO_MUL:
        a = code[pc++]; b = code[pc++];
        printf("%d * %d = %d\n", a, b, a * b);
        continue;

    DO_HALT:
        return 0;
    }
}

要点回顾

要点	核心内容
`__attribute__`	函数、变量、类型的元信息标注
`__builtin_*`	编译器内置函数，映射到硬件指令或优化
语句表达式	`({ ...; value; })`，安全宏的关键
`typeof` / `__auto_type`	类型推断，通用宏的基础
向量类型	`vector_size` 创建 SIMD 类型
范围初始化	`[0 ... 9] = value`，`case 'a' ... 'z':`

注意事项

可移植性: GCC 扩展在 Clang 中大部分也支持（Clang 追求 GCC 兼容性），但 MSVC 不支持。跨平台项目应谨慎使用。

-pedantic 警告: 使用 -std=c11 -pedantic 时，GCC 扩展会产生警告。使用 -std=gnu11 启用扩展。

__builtin_unreachable() 的风险: 如果程序实际到达了标记为不可达的位置，会导致未定义行为。

向量类型的可移植性: vector_size 属性的可用性取决于目标架构是否支持 SIMD。

扩展阅读

GCC Extensions — C 语言扩展
C++ Extensions — C++ 扩展
GCC Built-in Functions — 内置函数完整列表
Using the GNU Compiler Collection — GCC 完整手册

下一步

→ 14 - Sanitizers：学习使用 ASan、TSan、UBSan 进行运行时错误检测。