Nim 完全指南 / 09 字符串处理

第 09 章：字符串处理

9.1 字符串基础

Nim 的 string 类型是引用计数、可变的字节序列。默认编码为 UTF-8。

# 创建字符串
let s1 = "Hello, World!"        # 字面量
let s2 = """多行                   # 原始字符串（不处理转义）
字符串，包含 "引号" 也无需转义"""
let s3: string = ""              # 空字符串
let s4 = newString(10)           # 10 个空字符的字符串

# 字符串是值类型（引用计数）
var a = "hello"
var b = a
b.add(" world")
echo a  # "hello"（不受影响）
echo b  # "hello world"

9.2 字符串拼接

# 使用 & 运算符（推荐）
let first = "Hello"
let second = "World"
let greeting = first & ", " & second & "!"
echo greeting  # Hello, World!

# 使用 add 方法（原地修改）
var result = ""
result.add("Hello")
result.add(", ")
result.add("World")
echo result  # Hello, World!

# join 连接
import std/strutils
let words = @["Nim", "is", "awesome"]
echo words.join(" ")       # "Nim is awesome"
echo words.join(", ")      # "Nim, is, awesome"
echo words.join("")        # "Nimisawesome"

# repeat 重复
echo "Ha".repeat(3)        # "HaHaHa"
echo "=-".repeat(20)       # "=--=--=--=--=--=--=--=--=--=--"

9.3 字符串插值（strformat）

strformat 模块提供 Python 风格的格式化字符串：

import std/strformat

let name = "Nim"
let version = 2
let pi = 3.14159

# 基本插值
echo &"Hello, {name}!"            # Hello, Nim!
echo &"Version: {version}"        # Version: 2

# 格式化数字
echo &"Pi = {pi:.2f}"            # Pi = 3.14
echo &"Pi = {pi:.6f}"            # Pi = 3.141590

# 宽度对齐
echo &"|{'left':<10}|{'center':^10}|{'right':>10}|"
# |left      |  center  |     right|

# 数字格式
let price = 49.99
echo &"Price: ${price:.2f}"       # Price: $49.99
echo &"Hex: {255:#x}"             # Hex: 0xff
echo &"Oct: {255:#o}"             # Oct: 0o377
echo &"Bin: {255:#b}"             # Bin: 0b11111111
echo &"Sci: {1234567.89:.2e}"     # Sci: 1.23e+06

# 百分比
let rate = 0.856
echo &"Rate: {rate:.1%}"          # Rate: 85.6%

# 填充
echo &"|{'x':*^20}|"             # |********x*********|
echo &"|{42:0>8}|"               # |00000042|

9.4 字符串格式化（strutils）

import std/strutils

# formatFloat
echo formatFloat(3.14159, ffDecimal, 2)     # "3.14"
echo formatFloat(1234.5, ffThousands)        # "1,234.5"
echo formatFloat(1234.5, ffScientific)       # "1.234500e+03"

# align/justify
echo align("hello", 20)           # 右对齐
echo alignLeft("hello", 20)       # 左对齐
echo center("hello", 20, '*')     # 居中

# 数字格式化
echo insertSep($1234567890, ',')   # "1,234,567,890"
echo toOct(255)                    # "377"
echo toHex(255)                    # "FF"
echo toBin(255)                    # "11111111"

# 换行处理
let longText = "这是一段很长很长的文字，需要在适当的位置换行显示。"
echo wrap(longText, 20)

9.5 字符串搜索与替换

import std/strutils

let text = "Hello, World! Hello, Nim!"

# 查找
echo text.find("Hello")       # 0（首次出现的索引）
echo text.find("Hello", 5)    # 14（从索引5开始找）
echo text.rfind("Hello")      # 14（最后一次出现）

# 包含检查
echo text.contains("World")   # true
echo "World" in text           # true
echo text.startsWith("Hello") # true
echo text.endsWith("Nim!")    # true

# 替换
echo text.replace("Hello", "Hi")           # "Hi, World! Hi, Nim!"
echo text.replace("Hello", "Hi", 1)        # "Hi, World! Hello, Nim!"（只替换一次）

# 删除
echo "  Hello  ".strip()           # "Hello"
echo "***Hello***".strip(chars = {'*'})  # "Hello"

# 分割
let csv = "apple,banana,cherry,date"
let parts = csv.split(",")
echo parts  # @["apple", "banana", "cherry", "date"]

# 按行分割
let multiLine = """Line 1
Line 2
Line 3"""
let lines = multiLine.splitLines()
echo lines  # @["Line 1", "Line 2", "Line 3"]

9.6 Unicode 处理

import std/unicode

let text = "你好，世界！Hello!"

# Unicode 长度（字符数，不是字节数）
echo text.len              # 字节数: 25
echo text.runeLen           # 字符数: 11

# 遍历 Unicode 字符
for rune in text.runes:
  echo rune, " (U+", rune.int.toHex(4), ")"

# 按 Unicode 字符索引
let s = "你好世界"
let firstRune = s.runeAt(0)
echo firstRune              # '你'
echo s.runeOffset(1)        # 第二个字符的字节偏移

# Unicode 字符串操作
echo toUpper("hello")       # "HELLO"
echo toLower("HELLO")       # "hello"
echo capitalize("hello")    # "Hello"

# 反转 Unicode 字符串
echo reversed("你好世界")   # "界世好你"

# Unicode 类型检查
echo isAlpha('A'.Rune)      # true
echo isDigit('5'.Rune)      # true
echo isSpace(' '.Rune)      # true

9.7 正则表达式

import std/re

# 基本匹配
let text = "我的电话是 138-1234-5678，邮箱是 [email protected]"

# 匹配检查
echo text.contains(re"\d{3}-\d{4}-\d{4}")   # true

# 查找所有匹配
let emails = text.findAll(re"[\w.]+@[\w.]+\.\w+")
echo emails  # @["[email protected]"]

# 捕获组
var phone: string
if text.match(re"(\d{3})-(\d{4})-(\d{4})", phone):
  echo "电话: ", phone

# 替换
let censored = text.replace(re"\d{3}-\d{4}-\d{4}", "***-****-****")
echo censored  # 我的电话是 ***-****-****，邮箱是 [email protected]

# 分割
let parts = "one1two2three3four".split(re"\d")
echo parts  # @["one", "two", "three", "four"]

9.8 字符串与序列互转

import std/strutils

# 字符串 → 序列
let csv = "1,2,3,4,5"
let nums = csv.split(",").mapIt(parseInt(it))
echo nums  # @[1, 2, 3, 4, 5]

# 序列 → 字符串
let words = @["Nim", "is", "fast"]
echo words.join(" ")  # "Nim is fast"

# 字符串 → 字符序列
let chars = "Hello".toSeq
echo chars  # @['H', 'e', 'l', 'l', 'o']

# 字符序列 → 字符串
let s = chars.mapIt($it).join()
echo s  # "Hello"

# 数字 → 字符串
echo $42          # "42"
echo $3.14        # "3.14"
echo $true        # "true"
echo intToStr(42, 6)  # "000042"

9.9 字符串切片

let s = "Hello, World!"

# 切片（字节级别）
echo s[0..4]     # "Hello"
echo s[7..11]    # "World"
echo s[7..^1]    # "World!"

# 注意：对于多字节字符，按字节切片可能出错
# 使用 unicode 模块处理中文等
import std/unicode
let zh = "你好世界"
echo zh.runeSubStr(0, 2)  # "你好"
echo zh.runeSubStr(2, 2)  # "世界"

9.10 实战示例

🏢 场景：模板引擎

import std/[strutils, re, tables, sequtils]

type TemplateEngine = object
  variables: Table[string, string]

proc newTemplateEngine(): TemplateEngine =
  TemplateEngine(variables: initTable[string, string]())

proc setVar(te: var TemplateEngine, key, value: string) =
  te.variables[key] = value

proc render(te: TemplateEngine, templateStr: string): string =
  result = templateStr
  for key, value in te.variables:
    result = result.replace("{{" & key & "}}", value)

var te = newTemplateEngine()
te.setVar("name", "张三")
te.setVar("company", "Nim科技")
te.setVar("year", "2026")

let tpl = """尊敬的 {{name}}：

欢迎加入 {{company}}！
感谢您在 {{year}} 年选择我们。

此致
{{company}} 团队"""

echo te.render(tpl)

🏢 场景：日志格式化

import std/[strformat, times, strutils]

type
  LogLevel = enum
    Debug, Info, Warning, Error
  
  Logger = object
    minLevel: LogLevel

proc newLogger(minLevel: LogLevel = Info): Logger =
  Logger(minLevel: minLevel)

proc log(lg: Logger, level: LogLevel, message: string,
         file: string = "", line: int = 0) =
  if level < lg.minLevel:
    return
  
  let timestamp = now().format("yyyy-MM-dd HH:mm:ss")
  let levelStr = ($level).alignLeft(7)
  let location = if file.len > 0: &" [{extractFilename(file)}:{line}]" else: ""
  
  case level
  of Debug:   echo &"[{timestamp}] \e[36m{levelStr}\e[0m{location} {message}"
  of Info:    echo &"[{timestamp}] \e[32m{levelStr}\e[0m{location} {message}"
  of Warning: echo &"[{timestamp}] \e[33m{levelStr}\e[0m{location} {message}"
  of Error:   echo &"[{timestamp}] \e[31m{levelStr}\e[0m{location} {message}"

template debug(lg: Logger, msg: string) =
  lg.log(Debug, msg, instantiationInfo().filename, instantiationInfo().line)

template info(lg: Logger, msg: string) =
  lg.log(Info, msg, instantiationInfo().filename, instantiationInfo().line)

template warn(lg: Logger, msg: string) =
  lg.log(Warning, msg, instantiationInfo().filename, instantiationInfo().line)

template error(lg: Logger, msg: string) =
  lg.log(Error, msg, instantiationInfo().filename, instantiationInfo().line)

var logger = newLogger(Debug)
logger.info("应用启动")
logger.debug("加载配置文件")
logger.warn("配置项缺失")
logger.error("数据库连接失败")

本章小结

操作	方法	示例
拼接	`&`	`"a" & "b"`
格式化	`&"{}"`	`&"值: {x:.2f}"`
查找	`find`, `contains`	`s.find("hi")`
替换	`replace`	`s.replace("a", "b")`
分割	`split`	`s.split(",")`
连接	`join`	`@["a","b"].join(",")`
长度	`len`, `runeLen`	字节数/字符数
Unicode	`unicode` 模块	`runes`, `runeLen`
正则	`re` 模块	`re"\d+"`

练习

实现一个简单的 Markdown → HTML 转换器（处理标题、粗体、链接）
编写一个 CSV 解析器，支持引号内含逗号的情况
使用正则表达式提取一段文本中的所有 URL
实现一个支持变量插值的模板引擎

扩展阅读

← 上一章：数据结构 | 下一章：面向对象编程 →