强曰为道
与天地相似,故不违。知周乎万物,而道济天下,故不过。旁行而不流,乐天知命,故不忧.
文档目录

Nim 完全指南 / 09 字符串处理

第 09 章:字符串处理

9.1 字符串基础

Nim 的 string 类型是引用计数、可变的字节序列。默认编码为 UTF-8。

# 创建字符串
let s1 = "Hello, World!"        # 字面量
let s2 = """多行                   # 原始字符串(不处理转义)
字符串,包含 "引号" 也无需转义"""
let s3: string = ""              # 空字符串
let s4 = newString(10)           # 10 个空字符的字符串

# 字符串是值类型(引用计数)
var a = "hello"
var b = a
b.add(" world")
echo a  # "hello"(不受影响)
echo b  # "hello world"

9.2 字符串拼接

# 使用 & 运算符(推荐)
let first = "Hello"
let second = "World"
let greeting = first & ", " & second & "!"
echo greeting  # Hello, World!

# 使用 add 方法(原地修改)
var result = ""
result.add("Hello")
result.add(", ")
result.add("World")
echo result  # Hello, World!

# join 连接
import std/strutils
let words = @["Nim", "is", "awesome"]
echo words.join(" ")       # "Nim is awesome"
echo words.join(", ")      # "Nim, is, awesome"
echo words.join("")        # "Nimisawesome"

# repeat 重复
echo "Ha".repeat(3)        # "HaHaHa"
echo "=-".repeat(20)       # "=--=--=--=--=--=--=--=--=--=--"

9.3 字符串插值(strformat)

strformat 模块提供 Python 风格的格式化字符串:

import std/strformat

let name = "Nim"
let version = 2
let pi = 3.14159

# 基本插值
echo &"Hello, {name}!"            # Hello, Nim!
echo &"Version: {version}"        # Version: 2

# 格式化数字
echo &"Pi = {pi:.2f}"            # Pi = 3.14
echo &"Pi = {pi:.6f}"            # Pi = 3.141590

# 宽度对齐
echo &"|{'left':<10}|{'center':^10}|{'right':>10}|"
# |left      |  center  |     right|

# 数字格式
let price = 49.99
echo &"Price: ${price:.2f}"       # Price: $49.99
echo &"Hex: {255:#x}"             # Hex: 0xff
echo &"Oct: {255:#o}"             # Oct: 0o377
echo &"Bin: {255:#b}"             # Bin: 0b11111111
echo &"Sci: {1234567.89:.2e}"     # Sci: 1.23e+06

# 百分比
let rate = 0.856
echo &"Rate: {rate:.1%}"          # Rate: 85.6%

# 填充
echo &"|{'x':*^20}|"             # |********x*********|
echo &"|{42:0>8}|"               # |00000042|

9.4 字符串格式化(strutils)

import std/strutils

# formatFloat
echo formatFloat(3.14159, ffDecimal, 2)     # "3.14"
echo formatFloat(1234.5, ffThousands)        # "1,234.5"
echo formatFloat(1234.5, ffScientific)       # "1.234500e+03"

# align/justify
echo align("hello", 20)           # 右对齐
echo alignLeft("hello", 20)       # 左对齐
echo center("hello", 20, '*')     # 居中

# 数字格式化
echo insertSep($1234567890, ',')   # "1,234,567,890"
echo toOct(255)                    # "377"
echo toHex(255)                    # "FF"
echo toBin(255)                    # "11111111"

# 换行处理
let longText = "这是一段很长很长的文字,需要在适当的位置换行显示。"
echo wrap(longText, 20)

9.5 字符串搜索与替换

import std/strutils

let text = "Hello, World! Hello, Nim!"

# 查找
echo text.find("Hello")       # 0(首次出现的索引)
echo text.find("Hello", 5)    # 14(从索引5开始找)
echo text.rfind("Hello")      # 14(最后一次出现)

# 包含检查
echo text.contains("World")   # true
echo "World" in text           # true
echo text.startsWith("Hello") # true
echo text.endsWith("Nim!")    # true

# 替换
echo text.replace("Hello", "Hi")           # "Hi, World! Hi, Nim!"
echo text.replace("Hello", "Hi", 1)        # "Hi, World! Hello, Nim!"(只替换一次)

# 删除
echo "  Hello  ".strip()           # "Hello"
echo "***Hello***".strip(chars = {'*'})  # "Hello"

# 分割
let csv = "apple,banana,cherry,date"
let parts = csv.split(",")
echo parts  # @["apple", "banana", "cherry", "date"]

# 按行分割
let multiLine = """Line 1
Line 2
Line 3"""
let lines = multiLine.splitLines()
echo lines  # @["Line 1", "Line 2", "Line 3"]

9.6 Unicode 处理

import std/unicode

let text = "你好,世界!Hello!"

# Unicode 长度(字符数,不是字节数)
echo text.len              # 字节数: 25
echo text.runeLen           # 字符数: 11

# 遍历 Unicode 字符
for rune in text.runes:
  echo rune, " (U+", rune.int.toHex(4), ")"

# 按 Unicode 字符索引
let s = "你好世界"
let firstRune = s.runeAt(0)
echo firstRune              # '你'
echo s.runeOffset(1)        # 第二个字符的字节偏移

# Unicode 字符串操作
echo toUpper("hello")       # "HELLO"
echo toLower("HELLO")       # "hello"
echo capitalize("hello")    # "Hello"

# 反转 Unicode 字符串
echo reversed("你好世界")   # "界世好你"

# Unicode 类型检查
echo isAlpha('A'.Rune)      # true
echo isDigit('5'.Rune)      # true
echo isSpace(' '.Rune)      # true

9.7 正则表达式

import std/re

# 基本匹配
let text = "我的电话是 138-1234-5678,邮箱是 [email protected]"

# 匹配检查
echo text.contains(re"\d{3}-\d{4}-\d{4}")   # true

# 查找所有匹配
let emails = text.findAll(re"[\w.]+@[\w.]+\.\w+")
echo emails  # @["[email protected]"]

# 捕获组
var phone: string
if text.match(re"(\d{3})-(\d{4})-(\d{4})", phone):
  echo "电话: ", phone

# 替换
let censored = text.replace(re"\d{3}-\d{4}-\d{4}", "***-****-****")
echo censored  # 我的电话是 ***-****-****,邮箱是 [email protected]

# 分割
let parts = "one1two2three3four".split(re"\d")
echo parts  # @["one", "two", "three", "four"]

9.8 字符串与序列互转

import std/strutils

# 字符串 → 序列
let csv = "1,2,3,4,5"
let nums = csv.split(",").mapIt(parseInt(it))
echo nums  # @[1, 2, 3, 4, 5]

# 序列 → 字符串
let words = @["Nim", "is", "fast"]
echo words.join(" ")  # "Nim is fast"

# 字符串 → 字符序列
let chars = "Hello".toSeq
echo chars  # @['H', 'e', 'l', 'l', 'o']

# 字符序列 → 字符串
let s = chars.mapIt($it).join()
echo s  # "Hello"

# 数字 → 字符串
echo $42          # "42"
echo $3.14        # "3.14"
echo $true        # "true"
echo intToStr(42, 6)  # "000042"

9.9 字符串切片

let s = "Hello, World!"

# 切片(字节级别)
echo s[0..4]     # "Hello"
echo s[7..11]    # "World"
echo s[7..^1]    # "World!"

# 注意:对于多字节字符,按字节切片可能出错
# 使用 unicode 模块处理中文等
import std/unicode
let zh = "你好世界"
echo zh.runeSubStr(0, 2)  # "你好"
echo zh.runeSubStr(2, 2)  # "世界"

9.10 实战示例

🏢 场景:模板引擎

import std/[strutils, re, tables, sequtils]

type TemplateEngine = object
  variables: Table[string, string]

proc newTemplateEngine(): TemplateEngine =
  TemplateEngine(variables: initTable[string, string]())

proc setVar(te: var TemplateEngine, key, value: string) =
  te.variables[key] = value

proc render(te: TemplateEngine, templateStr: string): string =
  result = templateStr
  for key, value in te.variables:
    result = result.replace("{{" & key & "}}", value)

var te = newTemplateEngine()
te.setVar("name", "张三")
te.setVar("company", "Nim科技")
te.setVar("year", "2026")

let tpl = """尊敬的 {{name}}:

欢迎加入 {{company}}!
感谢您在 {{year}} 年选择我们。

此致
{{company}} 团队"""

echo te.render(tpl)

🏢 场景:日志格式化

import std/[strformat, times, strutils]

type
  LogLevel = enum
    Debug, Info, Warning, Error
  
  Logger = object
    minLevel: LogLevel

proc newLogger(minLevel: LogLevel = Info): Logger =
  Logger(minLevel: minLevel)

proc log(lg: Logger, level: LogLevel, message: string,
         file: string = "", line: int = 0) =
  if level < lg.minLevel:
    return
  
  let timestamp = now().format("yyyy-MM-dd HH:mm:ss")
  let levelStr = ($level).alignLeft(7)
  let location = if file.len > 0: &" [{extractFilename(file)}:{line}]" else: ""
  
  case level
  of Debug:   echo &"[{timestamp}] \e[36m{levelStr}\e[0m{location} {message}"
  of Info:    echo &"[{timestamp}] \e[32m{levelStr}\e[0m{location} {message}"
  of Warning: echo &"[{timestamp}] \e[33m{levelStr}\e[0m{location} {message}"
  of Error:   echo &"[{timestamp}] \e[31m{levelStr}\e[0m{location} {message}"

template debug(lg: Logger, msg: string) =
  lg.log(Debug, msg, instantiationInfo().filename, instantiationInfo().line)

template info(lg: Logger, msg: string) =
  lg.log(Info, msg, instantiationInfo().filename, instantiationInfo().line)

template warn(lg: Logger, msg: string) =
  lg.log(Warning, msg, instantiationInfo().filename, instantiationInfo().line)

template error(lg: Logger, msg: string) =
  lg.log(Error, msg, instantiationInfo().filename, instantiationInfo().line)

var logger = newLogger(Debug)
logger.info("应用启动")
logger.debug("加载配置文件")
logger.warn("配置项缺失")
logger.error("数据库连接失败")

本章小结

操作方法示例
拼接&"a" & "b"
格式化&"{}"&"值: {x:.2f}"
查找find, containss.find("hi")
替换replaces.replace("a", "b")
分割splits.split(",")
连接join@["a","b"].join(",")
长度len, runeLen字节数/字符数
Unicodeunicode 模块runes, runeLen
正则re 模块re"\d+"

练习

  1. 实现一个简单的 Markdown → HTML 转换器(处理标题、粗体、链接)
  2. 编写一个 CSV 解析器,支持引号内含逗号的情况
  3. 使用正则表达式提取一段文本中的所有 URL
  4. 实现一个支持变量插值的模板引擎

扩展阅读


上一章:数据结构 | 下一章:面向对象编程