Hunspell 拼写检查完全教程 / 第 07 章:编程接口
第 07 章:编程接口
7.1 概述
Hunspell 提供了多种编程语言的绑定,核心是 C/C++ 库 libhunspell。本章涵盖各主流语言的集成方案。
| 语言 | 绑定库 | 安装方式 | 成熟度 |
|---|
| C/C++ | libhunspell | 系统包 | ★★★★★ |
| Python | pyhunspell / pyhunspell2 / pyspellchecker | pip | ★★★★ |
| Node.js | nspell / nodehun / hunspell-asm | npm | ★★★★ |
| Go | gohunspell / go-spelling | go get | ★★★ |
| Rust | hunspell-rs / hunspell-sys | cargo | ★★★ |
| Java | hunspell-bridj / jhunspell | maven | ★★ |
| C#/.NET | Hunspell4Net / WeCantSpell.Hunspell | nuget | ★★★ |
| PHP | pspell (PHP 扩展) | pecl | ★★★ |
7.2 C API
7.2.1 头文件
/* hunspell.h — 核心 API 声明 */
#include <hunspell/hunspell.h>
7.2.2 核心 API 函数
| 函数 | 说明 | 参数 |
|---|
Hunspell_create(affpath, dpath) | 创建词典句柄 | aff 和 dic 文件路径 |
Hunspell_destroy(pHunspell) | 释放句柄 | 句柄指针 |
Hunspell_spell(pHunspell, word) | 检查单词是否正确 | 返回 1=正确, 0=错误 |
Hunspell_suggest(pHunspell, slst, word) | 获取建议列表 | 建议数组指针、单词 |
Hunspell_add(pHunspell, word) | 添加单词到会话词典 | 单词 |
Hunspell_add_with_affix(pHunspell, word, model) | 以词根为模板添加 | 单词、模型词 |
Hunspell_remove(pHunspell, word) | 从词典中移除 | 单词 |
Hunspell_analyze(pHunspell, slst, word) | 形态学分析 | 结果数组、单词 |
Hunspell_stem(pHunspell, slst, word) | 词干提取 | 结果数组、单词 |
Hunspell_generate(pHunspell, slst, word, word2) | 生成词形 | 结果数组、词、模型 |
Hunspell_free_list(pHunspell, slst, n) | 释放建议/分析列表 | 数组指针、数量 |
7.2.3 完整 C 示例
/* spellcheck_example.c — Hunspell C API 完整示例 */
#include <hunspell/hunspell.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD_LEN 256
#define MAX_SUGGESTIONS 20
int main(int argc, char *argv[]) {
const char *aff_path = "/usr/share/hunspell/en_US.aff";
const char *dic_path = "/usr/share/hunspell/en_US.dic";
/* 可通过命令行参数指定词典路径 */
if (argc >= 3) {
aff_path = argv[1];
dic_path = argv[2];
}
/* 1. 创建 Hunspell 句柄 */
Hunhandle *handle = Hunspell_create(aff_path, dic_path);
if (!handle) {
fprintf(stderr, "错误:无法加载词典 %s / %s\n", aff_path, dic_path);
return 1;
}
printf("词典加载成功: %s\n", dic_path);
/* 2. 拼写检查 */
const char *test_words[] = {
"hello", "world", "helo", "programming",
"progrmming", "correct", "corect", NULL
};
printf("\n=== 拼写检查 ===\n");
for (int i = 0; test_words[i]; i++) {
int correct = Hunspell_spell(handle, test_words[i]);
printf(" %-15s → %s\n", test_words[i], correct ? "✓ 正确" : "✗ 错误");
}
/* 3. 获取建议 */
printf("\n=== 拼写建议 ===\n");
char **sug_list = NULL;
const char *misspelled[] = {"helo", "progrmming", "wrold", NULL};
for (int i = 0; misspelled[i]; i++) {
int count = Hunspell_suggest(handle, &sug_list, misspelled[i]);
printf(" '%s' 的建议 (%d 个):", misspelled[i], count);
for (int j = 0; j < count && j < 5; j++) {
printf(" %s", sug_list[j]);
}
printf("\n");
Hunspell_free_list(handle, &sug_list, count);
}
/* 4. 形态学分析 */
printf("\n=== 形态学分析 ===\n");
char **stem_list = NULL;
const char *analyze_words[] = {"running", "wolves", "unhappiness", NULL};
for (int i = 0; analyze_words[i]; i++) {
/* 词干提取 */
int stem_count = Hunspell_stem(handle, &stem_list, analyze_words[i]);
printf(" '%s' → 词干:", analyze_words[i]);
for (int j = 0; j < stem_count; j++) {
printf(" %s", stem_list[j]);
}
printf("\n");
Hunspell_free_list(handle, &stem_list, stem_count);
/* 形态分析 */
char **morph_list = NULL;
int morph_count = Hunspell_analyze(handle, &morph_list, analyze_words[i]);
if (morph_count > 0) {
printf(" '%s' → 形态:", analyze_words[i]);
for (int j = 0; j < morph_count; j++) {
printf(" [%s]", morph_list[j]);
}
printf("\n");
Hunspell_free_list(handle, &morph_list, morph_count);
}
}
/* 5. 添加单词到会话词典 */
printf("\n=== 添加单词 ===\n");
Hunspell_add(handle, "Hunspell");
Hunspell_add(handle, "Nemeth");
int check1 = Hunspell_spell(handle, "Hunspell");
int check2 = Hunspell_spell(handle, "Nemeth");
printf(" 添加后 'Hunspell': %s\n", check1 ? "✓" : "✗");
printf(" 添加后 'Nemeth': %s\n", check2 ? "✓" : "✗");
/* 6. 清理 */
Hunspell_destroy(handle);
printf("\n资源已释放\n");
return 0;
}
编译运行:
gcc spellcheck_example.c -o spellcheck_example $(pkg-config --cflags --libs hunspell)
./spellcheck_example
7.2.4 批量检查文本
/* spellcheck_text.c — 批量检查文本文件 */
#include <hunspell/hunspell.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAX_WORD 256
/* 从文本中提取下一个单词 */
int next_word(FILE *fp, char *word, int max_len) {
int c, i = 0;
/* 跳过非字母字符 */
while ((c = fgetc(fp)) != EOF && !isalpha(c)) {
if (c == '\n') return -2; /* 行结束标记 */
}
if (c == EOF) return -1;
/* 收集字母 */
word[i++] = c;
while ((c = fgetc(fp)) != EOF && (isalpha(c) || c == '\'' || c == '-')) {
if (i < max_len - 1) word[i++] = c;
}
word[i] = '\0';
if (c != EOF) ungetc(c, fp);
return i > 0 ? 0 : -1;
}
int main(int argc, char *argv[]) {
if (argc < 2) {
fprintf(stderr, "用法: %s <文件> [词典路径]\n", argv[0]);
return 1;
}
const char *aff = "/usr/share/hunspell/en_US.aff";
const char *dic = "/usr/share/hunspell/en_US.dic";
if (argc >= 4) {
aff = argv[2];
dic = argv[3];
}
Hunhandle *H = Hunspell_create(aff, dic);
if (!H) { fprintf(stderr, "加载词典失败\n"); return 1; }
FILE *fp = fopen(argv[1], "r");
if (!fp) { perror("打开文件失败"); Hunspell_destroy(H); return 1; }
char word[MAX_WORD];
int line = 1, total = 0, errors = 0;
while (1) {
int result = next_word(fp, word, MAX_WORD);
if (result == -2) { line++; continue; }
if (result == -1) break;
total++;
if (!Hunspell_spell(H, word)) {
char **sugs = NULL;
int n = Hunspell_suggest(H, &sugs, word);
printf("行 %d: '%s'", line, word);
if (n > 0) {
printf(" → 建议: %s", sugs[0]);
if (n > 1) printf(", %s", sugs[1]);
}
printf("\n");
Hunspell_free_list(H, &sugs, n);
errors++;
}
}
printf("\n总计: %d 词, %d 拼写错误 (%.1f%%)\n",
total, errors, total > 0 ? 100.0 * errors / total : 0.0);
fclose(fp);
Hunspell_destroy(H);
return 0;
}
7.3 Python
7.3.1 方案对比
| 包名 | 说明 | 推荐度 |
|---|
pyspellchecker | 纯 Python 实现,不依赖 Hunspell 二进制 | ★★★★★ |
pyhunspell | libhunspell 的 Python 绑定 | ★★★ |
pyhunspell2 | pyhunspell 的现代版本 | ★★★ |
cyhunspell | Cython 绑定 | ★★ |
hunspell-interface | 子进程包装 | ★★ |
7.3.2 pyspellchecker(推荐)
pip install pyspellchecker
#!/usr/bin/env python3
"""pyspellchecker 基本使用示例"""
from spellchecker import SpellChecker
# 1. 创建检查器实例(默认英语)
spell = SpellChecker()
# 2. 检查单词是否正确
print(spell.correction("helo")) # → "hello"
print(spell.correction("wrold")) # → "world"
print(spell["hello"]) # → 0(词频,0 表示不在词典中)
print(spell["helo"]) # → 0
# 3. 获取建议列表
print(spell.candidates("helo")) # → {'helo', 'hello', 'helot', ...}
print(spell.candidates("wrold")) # → {'world', 'wold', 'would', ...}
# 4. 检查整个文本
text = "This sentense has a few typose in it"
misspelled = spell.unknown(text.split())
print(f"拼写错误: {misspelled}") # → {'sentense', 'typose'}
# 5. 获取建议及概率
for word in misspelled:
correction = spell.correction(word)
candidates = spell.candidates(word)
print(f" '{word}' → '{correction}' (候选: {candidates})")
7.3.3 pyspellchecker 高级功能
#!/usr/bin/env python3
"""pyspellchecker 高级功能"""
from spellchecker import SpellChecker
import re
# ========== 多语言支持 ==========
# 内置语言: en, es, fr, de, pt, ru, ar, lv
spell_en = SpellChecker(language='en')
spell_fr = SpellChecker(language='fr')
spell_de = SpellChecker(language='de')
# ========== 自定义词典 ==========
spell = SpellChecker()
# 加载自定义词典文件
spell.word_frequency.load_text_file('.hunspell/project.dic')
# 或者从集合加载
custom_words = {'API', 'JSON', 'HTTP', 'GraphQL', 'Docker', 'Kubernetes'}
spell.word_frequency.load_words(custom_words)
# 单个添加
spell.word_frequency.add('TypeScript')
spell.word_frequency.add('Golang')
# 临时忽略(不影响词典,只影响当前实例)
# pyspellchecker 没有直接的忽略,通过添加实现
# ========== 词频分析 ==========
# 获取单词在词典中的频率
print(spell['the']) # → 高频词
print(spell['hello']) # → 中频词
print(spell['zyzzyva']) # → 低频词
# ========== 文本检查封装 ==========
def spellcheck_text(text: str, spell: SpellChecker) -> list[dict]:
"""详细文本拼写检查"""
results = []
# 提取单词(支持连字符和缩写)
words = re.findall(r"\b[a-zA-Z'-]+\b", text)
# 计算位置
pos = 0
for word in words:
idx = text.find(word, pos)
pos = idx + len(word)
if len(word) < 3: # 跳过短词
continue
if spell.unknown([word]):
correction = spell.correction(word)
candidates = list(spell.candidates(word))[:5]
results.append({
'word': word,
'position': idx,
'correction': correction,
'candidates': candidates
})
return results
# 使用
text = "This documnet explians the basc usage of pyspellcheker"
results = spellcheck_text(text, spell)
for r in results:
print(f" 位置 {r['position']}: '{r['word']}' → '{r['correction']}'")
7.3.4 pyhunspell(libhunspell 绑定)
# 安装(需要系统安装 libhunspell-dev)
pip install pyhunspell
# 或
pip install pyhunspell2
#!/usr/bin/env python3
"""pyhunspell 使用示例"""
import hunspell
# 1. 创建 Hunspell 对象
# 参数: aff 文件路径, dic 文件路径
hobj = hunspell.HunSpell(
'/usr/share/hunspell/en_US.aff',
'/usr/share/hunspell/en_US.dic'
)
# 2. 拼写检查
print(hobj.spell('hello')) # → True
print(hobj.spell('helo')) # → False
# 3. 获取建议
suggestions = hobj.suggest('helo')
print(f"建议: {suggestions}") # → ['hello', 'Helo', 'helot', 'help']
# 4. 添加单词
hobj.add('Hunspell')
print(hobj.spell('Hunspell')) # → True
# 5. 词干提取
stems = hobj.stem('running')
print(f"词干: {stems}") # → ['run']
# 6. 形态分析
analysis = hobj.analyze('unhappiness')
print(f"形态: {analysis}") # → ['un+happi+ness']
# 7. 生成词形
generated = hobj.generate('happy', 'unhappy')
print(f"生成: {generated}")
7.3.5 子进程方案(无需编译绑定)
#!/usr/bin/env python3
"""使用 subprocess 调用 hunspell 命令行(无需绑定库)"""
import subprocess
import re
class HunspellChecker:
"""Hunspell 命令行封装"""
def __init__(self, dictionary: str = "en_US", personal_dict: str = None):
self.dictionary = dictionary
self.personal_dict = personal_dict
def _run(self, args: list[str], input_text: str = "") -> str:
cmd = ["hunspell"] + args
if self.personal_dict:
cmd.extend(["-p", self.personal_dict])
result = subprocess.run(
cmd, input=input_text,
capture_output=True, text=True
)
return result.stdout
def check_word(self, word: str) -> bool:
"""检查单词是否正确"""
output = self._run(["-d", self.dictionary, "-l"], word)
return word not in output
def suggest(self, word: str, limit: int = 5) -> list[str]:
"""获取拼写建议"""
output = self._run(["-a", "-d", self.dictionary, "-L", str(limit)], word)
for line in output.strip().split("\n"):
if line.startswith("&"):
match = re.match(r"& \S+ \d+ \d+: (.+)", line)
if match:
return [s.strip() for s in match.group(1).split(",")]
return []
def find_misspellings(self, text: str) -> list[dict]:
"""找出所有拼写错误"""
output = self._run(["-a", "-d", self.dictionary], text)
errors = []
for line in output.strip().split("\n"):
if line.startswith("&"):
match = re.match(r"& (\S+) \d+ \d+: (.+)", line)
if match:
errors.append({
'word': match.group(1),
'suggestions': [s.strip() for s in match.group(2).split(",")]
})
elif line.startswith("#"):
match = re.match(r"# (\S+) \d+", line)
if match:
errors.append({
'word': match.group(1),
'suggestions': []
})
return errors
def get_stem(self, word: str) -> str:
"""获取词干"""
output = self._run(["-s", "-d", self.dictionary], word)
for line in output.strip().split("\n"):
if "->" in line:
return line.split("->")[1].strip()
return word
# 使用示例
checker = HunspellChecker("en_US")
print(checker.check_word("hello")) # True
print(checker.check_word("helo")) # False
print(checker.suggest("helo")) # ['hello', 'Helo', ...]
print(checker.find_misspellings("This sentense has typose"))
# [{'word': 'sentense', 'suggestions': ['sentence', ...]}, ...]
7.4 Node.js
7.4.1 nspell(推荐)
// nspell 基本使用
const nspell = require('nspell');
const fs = require('fs');
// 加载词典
const aff = fs.readFileSync('/usr/share/hunspell/en_US.aff');
const dic = fs.readFileSync('/usr/share/hunspell/en_US.dic');
const spell = nspell(aff, dic);
// 拼写检查
console.log(spell.correct('hello')); // true
console.log(spell.correct('helo')); // false
// 获取建议
console.log(spell.suggest('helo'));
// ['hello', 'Helo', 'helot', 'help']
// 词干提取
console.log(spell.stem('running')); // ['run']
// 添加个人词典
spell.add('Hunspell');
console.log(spell.correct('Hunspell')); // true
// 添加词根形式
spell.add('API', 'word'); // 添加为 "word" 类型
spell.add('APIs', 'word');
// 移除单词
spell.remove('Hunspell');
console.log(spell.correct('Hunspell')); // false
7.4.2 Express.js 中间件
// spellcheck_middleware.js — Express.js 拼写检查中间件
const nspell = require('nspell');
const fs = require('fs');
const path = require('path');
class SpellCheckerService {
constructor(dicts, customDictPath = null) {
// 加载多个词典
this.checkers = {};
for (const [lang, dictPath] of Object.entries(dicts)) {
const aff = fs.readFileSync(`${dictPath}.aff`);
const dic = fs.readFileSync(`${dictPath}.dic`);
this.checkers[lang] = nspell(aff, dic);
}
// 加载自定义词典
if (customDictPath && fs.existsSync(customDictPath)) {
const customWords = fs.readFileSync(customDictPath, 'utf-8')
.split('\n')
.filter(line => line && !line.startsWith('#'));
for (const checker of Object.values(this.checkers)) {
customWords.forEach(word => checker.add(word.trim()));
}
}
}
checkText(text, lang = 'en') {
const checker = this.checkers[lang];
if (!checker) throw new Error(`不支持的语言: ${lang}`);
const words = text.match(/\b[a-zA-Z'-]+\b/g) || [];
const errors = [];
const seen = new Set();
for (const word of words) {
if (word.length < 3 || seen.has(word.toLowerCase())) continue;
seen.add(word.toLowerCase());
if (!checker.correct(word)) {
errors.push({
word,
suggestions: checker.suggest(word).slice(0, 5)
});
}
}
return errors;
}
}
// 初始化服务
const spellService = new SpellCheckerService(
{ en: '/usr/share/hunspell/en_US' },
path.join(__dirname, '.hunspell', 'project.dic')
);
// Express 中间件
function spellcheckMiddleware(req, res, next) {
if (req.body && req.body.text) {
const lang = req.body.lang || 'en';
const errors = spellService.checkText(req.body.text, lang);
req.spellcheck = { errors, count: errors.length };
}
next();
}
// API 路由
const express = require('express');
const app = express();
app.use(express.json());
app.post('/api/spellcheck', spellcheckMiddleware, (req, res) => {
res.json({
errors: req.spellcheck.errors,
count: req.spellcheck.count
});
});
app.listen(3000, () => console.log('拼写检查服务运行在 :3000'));
7.4.3 Browserify / Webpack 打包
// browser_spell.js — 浏览器端拼写检查
// 使用 nspell 的浏览器兼容版本
import nspell from 'nspell';
async function loadDictionary(lang = 'en-US') {
// 从 CDN 或本地加载词典
const [aff, dic] = await Promise.all([
fetch(`/dictionaries/${lang}/${lang}.aff`).then(r => r.arrayBuffer()),
fetch(`/dictionaries/${lang}/${lang}.dic`).then(r => r.arrayBuffer())
]);
return nspell(Buffer.from(aff), Buffer.from(dic));
}
// 使用
const spell = await loadDictionary('en_US');
function checkInputElement(inputElement) {
const text = inputElement.value;
const words = text.match(/\b[a-zA-Z'-]+\b/g) || [];
const errors = words
.filter(w => w.length >= 3)
.filter(w => !spell.correct(w))
.map(w => ({ word: w, suggestions: spell.suggest(w).slice(0, 3) }));
return errors;
}
7.5 Go
7.5.1 gohunspell
go get github.com/kapsteur/gohunspell
// main.go — Go Hunspell 示例
package main
import (
"fmt"
"log"
"strings"
gohunspell "github.com/kapsteur/gohunspell"
)
func main() {
// 1. 创建 Hunspell 实例
affPath := "/usr/share/hunspell/en_US.aff"
dicPath := "/usr/share/hunspell/en_US.dic"
hunspell, err := gohunspell.NewHunspell(affPath, dicPath)
if err != nil {
log.Fatalf("加载词典失败: %v", err)
}
defer hunspell.DeleteHunspell()
// 2. 拼写检查
testWords := []string{"hello", "helo", "world", "wrold", "programming"}
for _, word := range testWords {
correct := hunspell.Spell(word)
status := "✓"
if !correct {
status = "✗"
}
fmt.Printf(" %s %s\n", status, word)
}
// 3. 获取建议
fmt.Println("\n=== 建议 ===")
misspelled := []string{"helo", "wrold", "progrmming"}
for _, word := range misspelled {
suggestions := hunspell.Suggest(word)
fmt.Printf(" '%s' → %s\n", word, strings.Join(suggestions[:min(5, len(suggestions))], ", "))
}
// 4. 词干提取
fmt.Println("\n=== 词干 ===")
stems := hunspell.Stem("running")
fmt.Printf(" 'running' → %s\n", strings.Join(stems, ", "))
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
7.5.2 Go HTTP 拼写检查服务
// server.go — Go HTTP 拼写检查 API
package main
import (
"encoding/json"
"log"
"net/http"
"sync"
gohunspell "github.com/kapsteur/gohunspell"
)
type SpellRequest struct {
Text string `json:"text"`
Lang string `json:"lang"`
}
type SpellError struct {
Word string `json:"word"`
Suggestions []string `json:"suggestions"`
}
type SpellResponse struct {
Errors []SpellError `json:"errors"`
Count int `json:"count"`
}
type SpellServer struct {
checkers map[string]*gohunspell.Hunspell
mu sync.RWMutex
}
func NewSpellServer(dicts map[string][2]string) (*SpellServer, error) {
s := &SpellServer{
checkers: make(map[string]*gohunspell.Hunspell),
}
for lang, paths := range dicts {
h, err := gohunspell.NewHunspell(paths[0], paths[1])
if err != nil {
return nil, fmt.Errorf("加载 %s 词典失败: %w", lang, err)
}
s.checkers[lang] = h
}
return s, nil
}
func (s *SpellServer) HandleSpellcheck(w http.ResponseWriter, r *http.Request) {
var req SpellRequest
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, "无效请求", 400)
return
}
s.mu.RLock()
checker, ok := s.checkers[req.Lang]
s.mu.RUnlock()
if !ok {
checker = s.checkers["en"] // 默认英语
}
words := extractWords(req.Text)
errors := make([]SpellError, 0)
seen := make(map[string]bool)
for _, word := range words {
if len(word) < 3 || seen[word] {
continue
}
seen[word] = true
if !checker.Spell(word) {
suggestions := checker.Suggest(word)
if len(suggestions) > 5 {
suggestions = suggestions[:5]
}
errors = append(errors, SpellError{
Word: word,
Suggestions: suggestions,
})
}
}
json.NewEncoder(w).Encode(SpellResponse{
Errors: errors,
Count: len(errors),
})
}
func main() {
server, err := NewSpellServer(map[string][2]string{
"en": {"/usr/share/hunspell/en_US.aff", "/usr/share/hunspell/en_US.dic"},
})
if err != nil {
log.Fatal(err)
}
http.HandleFunc("/api/spellcheck", server.HandleSpellcheck)
log.Println("拼写检查服务运行在 :8080")
log.Fatal(http.ListenAndServe(":8080", nil))
}
7.6 Rust
7.6.1 hunspell-rs
# Cargo.toml
[dependencies]
hunspell-rs = "0.4"
// src/main.rs — Rust Hunspell 示例
use hunspell_rs::{Hunspell, HunspellInitCheckType};
fn main() {
// 1. 创建 Hunspell 实例
let aff_path = "/usr/share/hunspell/en_US.aff";
let dic_path = "/usr/share/hunspell/en_US.dic";
let hunspell = Hunspell::new(aff_path, dic_path);
// 2. 拼写检查
let test_words = vec!["hello", "helo", "world", "wrold"];
println!("=== 拼写检查 ===");
for word in &test_words {
let correct = hunspell.check(word);
println!(" {} {}", if correct { "✓" } else { "✗" }, word);
}
// 3. 获取建议
println!("\n=== 建议 ===");
let misspelled = vec!["helo", "wrold"];
for word in &misspelled {
let suggestions = hunspell.suggest(word);
println!(" '{}' → {:?}", word, &suggestions[..suggestions.len().min(5)]);
}
// 4. 形态分析
println!("\n=== 形态分析 ===");
let analysis = hunspell.analyze("unhappiness");
println!(" 'unhappiness' → {:?}", analysis);
// 5. 词干提取
let stems = hunspell.stem("running");
println!(" 'running' → {:?}", stems);
// 6. 添加单词
hunspell.add("Hunspell");
println!("\n 添加后 'Hunspell': {}", hunspell.check("Hunspell"));
}
7.7 PHP
7.7.1 pspell 扩展
# 安装 pspell 扩展
sudo apt install php-pspell
# 或编译 PHP 时启用 --with-pspell
<?php
// spellcheck.php — PHP Hunspell 示例
// 1. 加载词典
$dict = pspell_new("en", "", "", "", PSPELL_FAST | PSPELL_RUN_TOGETHER);
if (!$dict) {
die("错误:无法加载词典\n");
}
// 2. 拼写检查
$test_words = ["hello", "helo", "world", "wrold", "programming"];
echo "=== 拼写检查 ===\n";
foreach ($test_words as $word) {
$correct = pspell_check($dict, $word);
echo sprintf(" %s %s\n", $correct ? "✓" : "✗", $word);
}
// 3. 获取建议
echo "\n=== 建议 ===\n";
$misspelled = ["helo", "wrold", "progrmming"];
foreach ($misspelled as $word) {
$suggestions = pspell_suggest($dict, $word);
echo sprintf(" '%s' → %s\n", $word, implode(", ", array_slice($suggestions, 0, 5)));
}
// 4. 添加个人词典单词
pspell_add_to_personal($dict, "API");
pspell_add_to_personal($dict, "JSON");
pspell_save_wordlist($dict, "/tmp/personal_dict.txt");
echo "\n 添加后 'API': " . (pspell_check($dict, "API") ? "✓" : "✗") . "\n";
// 5. 文本检查函数
function spellcheck_text(string $text, $dict): array {
$errors = [];
$words = preg_split('/\s+/', preg_replace('/[^\w\s\'-]/', '', $text));
foreach ($words as $word) {
if (strlen($word) < 3) continue;
if (!pspell_check($dict, $word)) {
$errors[] = [
'word' => $word,
'suggestions' => array_slice(pspell_suggest($dict, $word), 0, 5)
];
}
}
return $errors;
}
// 使用
$text = "This documnet explians the basc usage of pspell.";
$results = spellcheck_text($text, $dict);
echo "\n=== 文本检查 ===\n";
foreach ($results as $r) {
echo sprintf(" '%s' → 建议: %s\n", $r['word'], implode(', ', $r['suggestions']));
}
?>
7.8 多语言集成策略
7.8.1 统一接口设计
#!/usr/bin/env python3
"""多语言拼写检查器 — 统一接口"""
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional
@dataclass
class SpellResult:
word: str
is_correct: bool
suggestions: list[str]
language: str
class SpellCheckerBackend(ABC):
"""拼写检查器后端抽象基类"""
@abstractmethod
def check(self, word: str) -> bool:
...
@abstractmethod
def suggest(self, word: str, limit: int = 5) -> list[str]:
...
@abstractmethod
def add(self, word: str) -> None:
...
class HunspellBackend(SpellCheckerBackend):
"""基于子进程的 Hunspell 后端"""
def __init__(self, dictionary: str, personal_dict: Optional[str] = None):
self.dictionary = dictionary
self.personal_dict = personal_dict
self._custom_words = set()
def check(self, word: str) -> bool:
if word in self._custom_words:
return True
import subprocess
result = subprocess.run(
["hunspell", "-d", self.dictionary, "-l"],
input=word, capture_output=True, text=True
)
return word not in result.stdout
def suggest(self, word: str, limit: int = 5) -> list[str]:
import subprocess, re
args = ["hunspell", "-a", "-d", self.dictionary, "-L", str(limit)]
if self.personal_dict:
args.extend(["-p", self.personal_dict])
result = subprocess.run(args, input=word, capture_output=True, text=True)
for line in result.stdout.strip().split("\n"):
if line.startswith("&"):
match = re.match(r"& \S+ \d+ \d+: (.+)", line)
if match:
return [s.strip() for s in match.group(1).split(",")]
return []
def add(self, word: str) -> None:
self._custom_words.add(word)
class MultiLanguageSpellChecker:
"""多语言拼写检查管理器"""
def __init__(self):
self._backends: dict[str, SpellCheckerBackend] = {}
def register(self, language: str, backend: SpellCheckerBackend):
self._backends[language] = backend
def check(self, word: str, language: str) -> SpellResult:
backend = self._backends.get(language)
if not backend:
raise ValueError(f"未注册语言: {language}")
is_correct = backend.check(word)
suggestions = backend.suggest(word) if not is_correct else []
return SpellResult(
word=word,
is_correct=is_correct,
suggestions=suggestions,
language=language
)
def check_multilingual(self, text: str, detect_lang=None) -> list[SpellResult]:
"""检查混合语言文本"""
import re
words = re.findall(r'\b[a-zA-Z\u00C0-\u024F\u0400-\u04FF]{3,}\b', text)
results = []
for word in words:
lang = detect_lang(word) if detect_lang else "en"
if lang in self._backends:
results.append(self.check(word, lang))
return results
# 使用示例
checker = MultiLanguageSpellChecker()
checker.register("en", HunspellBackend("en_US"))
checker.register("fr", HunspellBackend("fr"))
checker.register("de", HunspellBackend("de_DE"))
# 单语言检查
result = checker.check("helo", "en")
print(f"'{result.word}' → {'✓' if result.is_correct else '✗'} {result.suggestions}")
# 多语言检查
text = "This English text avec du Français und Deutsch mischung"
results = checker.check_multilingual(text, detect_lang=lambda w: "en") # 简化:假设英语
7.9 本章小结
| 语言 | 推荐方案 | 安装命令 | 适用场景 |
|---|
| C/C++ | libhunspell | apt install libhunspell-dev | 底层集成 |
| Python | pyspellchecker | pip install pyspellchecker | 快速开发 |
| Node.js | nspell | npm install nspell | Web 应用 |
| Go | gohunspell | go get kapsteur/gohunspell | 微服务 |
| Rust | hunspell-rs | cargo add hunspell-rs | 高性能服务 |
| PHP | pspell | apt install php-pspell | Web 后端 |
扩展阅读