Nim 完全指南 / 09 字符串处理
第 09 章:字符串处理
9.1 字符串基础
Nim 的 string 类型是引用计数、可变的字节序列。默认编码为 UTF-8。
# 创建字符串
let s1 = "Hello, World!" # 字面量
let s2 = """多行 # 原始字符串(不处理转义)
字符串,包含 "引号" 也无需转义"""
let s3: string = "" # 空字符串
let s4 = newString(10) # 10 个空字符的字符串
# 字符串是值类型(引用计数)
var a = "hello"
var b = a
b.add(" world")
echo a # "hello"(不受影响)
echo b # "hello world"
9.2 字符串拼接
# 使用 & 运算符(推荐)
let first = "Hello"
let second = "World"
let greeting = first & ", " & second & "!"
echo greeting # Hello, World!
# 使用 add 方法(原地修改)
var result = ""
result.add("Hello")
result.add(", ")
result.add("World")
echo result # Hello, World!
# join 连接
import std/strutils
let words = @["Nim", "is", "awesome"]
echo words.join(" ") # "Nim is awesome"
echo words.join(", ") # "Nim, is, awesome"
echo words.join("") # "Nimisawesome"
# repeat 重复
echo "Ha".repeat(3) # "HaHaHa"
echo "=-".repeat(20) # "=--=--=--=--=--=--=--=--=--=--"
9.3 字符串插值(strformat)
strformat 模块提供 Python 风格的格式化字符串:
import std/strformat
let name = "Nim"
let version = 2
let pi = 3.14159
# 基本插值
echo &"Hello, {name}!" # Hello, Nim!
echo &"Version: {version}" # Version: 2
# 格式化数字
echo &"Pi = {pi:.2f}" # Pi = 3.14
echo &"Pi = {pi:.6f}" # Pi = 3.141590
# 宽度对齐
echo &"|{'left':<10}|{'center':^10}|{'right':>10}|"
# |left | center | right|
# 数字格式
let price = 49.99
echo &"Price: ${price:.2f}" # Price: $49.99
echo &"Hex: {255:#x}" # Hex: 0xff
echo &"Oct: {255:#o}" # Oct: 0o377
echo &"Bin: {255:#b}" # Bin: 0b11111111
echo &"Sci: {1234567.89:.2e}" # Sci: 1.23e+06
# 百分比
let rate = 0.856
echo &"Rate: {rate:.1%}" # Rate: 85.6%
# 填充
echo &"|{'x':*^20}|" # |********x*********|
echo &"|{42:0>8}|" # |00000042|
9.4 字符串格式化(strutils)
import std/strutils
# formatFloat
echo formatFloat(3.14159, ffDecimal, 2) # "3.14"
echo formatFloat(1234.5, ffThousands) # "1,234.5"
echo formatFloat(1234.5, ffScientific) # "1.234500e+03"
# align/justify
echo align("hello", 20) # 右对齐
echo alignLeft("hello", 20) # 左对齐
echo center("hello", 20, '*') # 居中
# 数字格式化
echo insertSep($1234567890, ',') # "1,234,567,890"
echo toOct(255) # "377"
echo toHex(255) # "FF"
echo toBin(255) # "11111111"
# 换行处理
let longText = "这是一段很长很长的文字,需要在适当的位置换行显示。"
echo wrap(longText, 20)
9.5 字符串搜索与替换
import std/strutils
let text = "Hello, World! Hello, Nim!"
# 查找
echo text.find("Hello") # 0(首次出现的索引)
echo text.find("Hello", 5) # 14(从索引5开始找)
echo text.rfind("Hello") # 14(最后一次出现)
# 包含检查
echo text.contains("World") # true
echo "World" in text # true
echo text.startsWith("Hello") # true
echo text.endsWith("Nim!") # true
# 替换
echo text.replace("Hello", "Hi") # "Hi, World! Hi, Nim!"
echo text.replace("Hello", "Hi", 1) # "Hi, World! Hello, Nim!"(只替换一次)
# 删除
echo " Hello ".strip() # "Hello"
echo "***Hello***".strip(chars = {'*'}) # "Hello"
# 分割
let csv = "apple,banana,cherry,date"
let parts = csv.split(",")
echo parts # @["apple", "banana", "cherry", "date"]
# 按行分割
let multiLine = """Line 1
Line 2
Line 3"""
let lines = multiLine.splitLines()
echo lines # @["Line 1", "Line 2", "Line 3"]
9.6 Unicode 处理
import std/unicode
let text = "你好,世界!Hello!"
# Unicode 长度(字符数,不是字节数)
echo text.len # 字节数: 25
echo text.runeLen # 字符数: 11
# 遍历 Unicode 字符
for rune in text.runes:
echo rune, " (U+", rune.int.toHex(4), ")"
# 按 Unicode 字符索引
let s = "你好世界"
let firstRune = s.runeAt(0)
echo firstRune # '你'
echo s.runeOffset(1) # 第二个字符的字节偏移
# Unicode 字符串操作
echo toUpper("hello") # "HELLO"
echo toLower("HELLO") # "hello"
echo capitalize("hello") # "Hello"
# 反转 Unicode 字符串
echo reversed("你好世界") # "界世好你"
# Unicode 类型检查
echo isAlpha('A'.Rune) # true
echo isDigit('5'.Rune) # true
echo isSpace(' '.Rune) # true
9.7 正则表达式
import std/re
# 基本匹配
let text = "我的电话是 138-1234-5678,邮箱是 [email protected]"
# 匹配检查
echo text.contains(re"\d{3}-\d{4}-\d{4}") # true
# 查找所有匹配
let emails = text.findAll(re"[\w.]+@[\w.]+\.\w+")
echo emails # @["[email protected]"]
# 捕获组
var phone: string
if text.match(re"(\d{3})-(\d{4})-(\d{4})", phone):
echo "电话: ", phone
# 替换
let censored = text.replace(re"\d{3}-\d{4}-\d{4}", "***-****-****")
echo censored # 我的电话是 ***-****-****,邮箱是 [email protected]
# 分割
let parts = "one1two2three3four".split(re"\d")
echo parts # @["one", "two", "three", "four"]
9.8 字符串与序列互转
import std/strutils
# 字符串 → 序列
let csv = "1,2,3,4,5"
let nums = csv.split(",").mapIt(parseInt(it))
echo nums # @[1, 2, 3, 4, 5]
# 序列 → 字符串
let words = @["Nim", "is", "fast"]
echo words.join(" ") # "Nim is fast"
# 字符串 → 字符序列
let chars = "Hello".toSeq
echo chars # @['H', 'e', 'l', 'l', 'o']
# 字符序列 → 字符串
let s = chars.mapIt($it).join()
echo s # "Hello"
# 数字 → 字符串
echo $42 # "42"
echo $3.14 # "3.14"
echo $true # "true"
echo intToStr(42, 6) # "000042"
9.9 字符串切片
let s = "Hello, World!"
# 切片(字节级别)
echo s[0..4] # "Hello"
echo s[7..11] # "World"
echo s[7..^1] # "World!"
# 注意:对于多字节字符,按字节切片可能出错
# 使用 unicode 模块处理中文等
import std/unicode
let zh = "你好世界"
echo zh.runeSubStr(0, 2) # "你好"
echo zh.runeSubStr(2, 2) # "世界"
9.10 实战示例
🏢 场景:模板引擎
import std/[strutils, re, tables, sequtils]
type TemplateEngine = object
variables: Table[string, string]
proc newTemplateEngine(): TemplateEngine =
TemplateEngine(variables: initTable[string, string]())
proc setVar(te: var TemplateEngine, key, value: string) =
te.variables[key] = value
proc render(te: TemplateEngine, templateStr: string): string =
result = templateStr
for key, value in te.variables:
result = result.replace("{{" & key & "}}", value)
var te = newTemplateEngine()
te.setVar("name", "张三")
te.setVar("company", "Nim科技")
te.setVar("year", "2026")
let tpl = """尊敬的 {{name}}:
欢迎加入 {{company}}!
感谢您在 {{year}} 年选择我们。
此致
{{company}} 团队"""
echo te.render(tpl)
🏢 场景:日志格式化
import std/[strformat, times, strutils]
type
LogLevel = enum
Debug, Info, Warning, Error
Logger = object
minLevel: LogLevel
proc newLogger(minLevel: LogLevel = Info): Logger =
Logger(minLevel: minLevel)
proc log(lg: Logger, level: LogLevel, message: string,
file: string = "", line: int = 0) =
if level < lg.minLevel:
return
let timestamp = now().format("yyyy-MM-dd HH:mm:ss")
let levelStr = ($level).alignLeft(7)
let location = if file.len > 0: &" [{extractFilename(file)}:{line}]" else: ""
case level
of Debug: echo &"[{timestamp}] \e[36m{levelStr}\e[0m{location} {message}"
of Info: echo &"[{timestamp}] \e[32m{levelStr}\e[0m{location} {message}"
of Warning: echo &"[{timestamp}] \e[33m{levelStr}\e[0m{location} {message}"
of Error: echo &"[{timestamp}] \e[31m{levelStr}\e[0m{location} {message}"
template debug(lg: Logger, msg: string) =
lg.log(Debug, msg, instantiationInfo().filename, instantiationInfo().line)
template info(lg: Logger, msg: string) =
lg.log(Info, msg, instantiationInfo().filename, instantiationInfo().line)
template warn(lg: Logger, msg: string) =
lg.log(Warning, msg, instantiationInfo().filename, instantiationInfo().line)
template error(lg: Logger, msg: string) =
lg.log(Error, msg, instantiationInfo().filename, instantiationInfo().line)
var logger = newLogger(Debug)
logger.info("应用启动")
logger.debug("加载配置文件")
logger.warn("配置项缺失")
logger.error("数据库连接失败")
本章小结
| 操作 | 方法 | 示例 |
|---|---|---|
| 拼接 | & | "a" & "b" |
| 格式化 | &"{}" | &"值: {x:.2f}" |
| 查找 | find, contains | s.find("hi") |
| 替换 | replace | s.replace("a", "b") |
| 分割 | split | s.split(",") |
| 连接 | join | @["a","b"].join(",") |
| 长度 | len, runeLen | 字节数/字符数 |
| Unicode | unicode 模块 | runes, runeLen |
| 正则 | re 模块 | re"\d+" |
练习
- 实现一个简单的 Markdown → HTML 转换器(处理标题、粗体、链接)
- 编写一个 CSV 解析器,支持引号内含逗号的情况
- 使用正则表达式提取一段文本中的所有 URL
- 实现一个支持变量插值的模板引擎
扩展阅读
← 上一章:数据结构 | 下一章:面向对象编程 →