i tend to do something along these lines:
import std/[strutils,parseutils]
type
TokKind = enum
def, id, str, num, kw, uk, op
Token = object
label: TokKind
value: string
const
keywords = ["let", "const", "var", "if", "else"]
defKeywords: array[3, string] = ["let", "const", "var"]
operators = ["=", "+", "-", "/", "*", "^", "==", "++", "--"]
operatorStart = {'=','+','-','/','*','^',}
let compilationTarget = readFile("/tmp/lex/example.txt")
var tokens: seq[Token]
echo compilationTarget
var idx = compilationTarget.skipWhitespace()
while idx < compilationTarget.len:
proc parseUntil(label: TokKind, until: set[char]|char):Token =
result.label = label
idx += compilationTarget.parseUntil(result.value, until, start = idx)
proc parseWhile(label: TokKind, validChars: set[char]):Token =
result.label = label
idx += compilationTarget.parseWhile(result.value, validChars, start =
idx)
tokens.add case compilationTarget[idx]
of '\"':
str.parseUntil('\"')
of Digits:
num.parseWhile(Digits)
#plus {'.','e'} maybe, or parseuntil whitespace, sure why not
#i guess you're tokenizing `-1` as two tokens
of operatorStart:
op.parseWhile(operatorStart)
# ok this isn't quite right but you get the idea
of IdentStartChars:
var tmp = id.parseWhile(IdentChars)
if tmp.value in keywords:
tmp.label = kw
tmp
else:#unknown? is that what 'uk' means?
let tmp = Token(label: uk, value: $compilationTarget[idx])
inc idx
tmp
idx += compilationTarget.skipWhitespace(idx)
for cTk in tokens:
echo cTk
Run