i tend to do something along these lines:
    
    
    import std/[strutils,parseutils]
    
    type
        TokKind = enum
            def, id, str, num, kw, uk, op
        Token = object
            label: TokKind
            value: string
    
    const
      keywords = ["let", "const", "var", "if", "else"]
      defKeywords: array[3, string] = ["let", "const", "var"]
      operators = ["=", "+", "-", "/", "*", "^", "==", "++", "--"]
      operatorStart = {'=','+','-','/','*','^',}
    
    let compilationTarget = readFile("/tmp/lex/example.txt")
    
    var tokens: seq[Token]
    
    echo compilationTarget
    
    var idx = compilationTarget.skipWhitespace()
    while idx < compilationTarget.len:
      
      proc parseUntil(label: TokKind, until: set[char]|char):Token =
        result.label = label
        idx += compilationTarget.parseUntil(result.value, until, start = idx)
      proc parseWhile(label: TokKind, validChars: set[char]):Token =
        result.label = label
        idx += compilationTarget.parseWhile(result.value, validChars, start = 
idx)
      
      tokens.add case compilationTarget[idx]
      of '\"':
        str.parseUntil('\"')
      of Digits:
        num.parseWhile(Digits)
       #plus {'.','e'} maybe, or parseuntil whitespace, sure why not
       #i guess you're tokenizing `-1` as two tokens
      of operatorStart:
        op.parseWhile(operatorStart)
        # ok this isn't quite right but you get the idea
      of IdentStartChars:
        var tmp = id.parseWhile(IdentChars)
        if tmp.value in keywords:
          tmp.label = kw
        tmp
      else:#unknown? is that what 'uk' means?
        let tmp = Token(label: uk, value: $compilationTarget[idx])
        inc idx
        tmp
      
      idx += compilationTarget.skipWhitespace(idx)
    
    
    for cTk in tokens:
      echo cTk
    
    
    Run

Reply via email to