Hi Michael,

MA> But I resort to Unicon or Perl for the use of regular expressions at this
MA> point, as I'm a Rebol newby and find parsing in Rebol beyond my skills at 
MA> this point.

Until you get up to speed on PARSE, FIND/ANY can often be used for
simpler pattern matching. I also did a quick little pattern matcher
similar to VB's Like operator; it isn't anything fancy, but I included
it below.

-- Gregg                         


REBOL [
        Title:   "VB Like Operator Module"
        Date:    10-Sep-2003
        Version: 0.0.3
        File:    %like.r
        Author:  "Gregg Irwin"
        Email:   [EMAIL PROTECTED]
        Purpose: {
        The LIKE? function is a first crack at something like
        VB's Like operator. i.e. a *very* simple RegEx engine. The
        real purpose was to help me get acquainted with parse.
        }

        Comment: {
                A hyphen (�) can appear either at the beginning (after an
                exclamation point if one is used) or at the end of charlist
                to match itself. In any other location, the hyphen is used to
                identify a range of characters.

                When a range of characters is specified, they must appear in
                ascending sort order (from lowest to highest). [A-Z] is a valid
                pattern, but [Z-A] is not.

                The character sequence [] is considered a zero-length string 
("").

          *     Zero or more characters
          ?     Any single character
          #     Any single digit
        [list]  Any single char in list (character class)
        [!list] Any single char not in list

        Meta chars, except "]", can be used in character classes.

        "]" can be used by itself, as a regular char, but not in a
        character class.
        }

        History: [
        0.0.1 [03-Sep-2001 "Initial Release." Gregg]
        0.0.2 [19-Mar-2002 "Fixed negated char class syntax" Gregg]
        0.0.3 [10-Sep-2003
            {Rediscovered this and beefed up the char group syntax so it
             matches the VB spec better. Still in progress though.}
            {Renamed some things too.}
            {Cleaned things up (a little) and reorganized.}
            Gregg
        ]
        ]

]

vb-like-op: make object! [

        any-char: complement charset ""
        digit: charset [#"0" - #"9"]
        non-digit: complement digit
        any-single-digit: [1 digit]
        any-single-char: 'skip ; [1 any-char]
        ;any-multi-char:  [any any-char]
        ;any-multi-char-to:  [any any-char to]
        wild-chars: charset "*?![#"
        non-wild-chars: complement wild-chars
        valid-group-chars: complement charset "]"
        to-next-real-char: 'thru
        to-end: [to end]


    expand-pattern: func [
        {Convert a VB Like operator spec into a set of PARSE rules.}
        pattern [string!]
        /local plain-chars dig star any-one char-group emit tmp result
    ][
        emit: func [arg][
            ; OK, this is ugly. If you put *[ in your pattern, it causes
            ; problems because * = thru (right now) and you can't say
            ; "thru bitset!" in a parse rule. So, what I do in that case 
            ; is remove the thru and replace it with something I think 
            ; will work.
            either all [
                not empty? result
                'to-next-real-char = last result
                bitset! = type? arg
            ][
                change back tail result reduce ['any complement arg arg]
            ][
                append result arg
            ]
        ]

        plain-chars: [copy tmp some non-wild-chars (emit copy tmp)]
        dig:             ["#" (emit 'any-single-digit)]
        star:            ["*" (emit 'to-next-real-char)]
        any-one:         ["?" (emit 'any-single-char)]
        char-group:  [
            "[" copy tmp some valid-group-chars "]"
            (emit make-group-charset tmp)
        ]

        result: copy []
        parse/all pattern [
            some [char-group | plain-chars | dig | star | any-one]
        ]
        ; If the last thing in our pattern is thru, it won't work so we
        ; remove the trailing thru and replace it with "to end".
        if (last result) =? 'to-next-real-char [
            change back tail result 'to-end
        ]
        result
    ]


    set 'like? func [
        "Emulates the VB Like operator."
        string  [any-string!] "The string you want to check"
        pattern [any-string!] "The pattern you want to check the string against"
    ][
        parse/all string expand-pattern pattern
    ]


    make-group-charset: func [
        {Take a char-group spec and convert it to a charset.}
        string
        /local
            add-group-char add-group-range dash non-dash
            rules group-chars char char-1 char-2 comp result
    ][
        add-group-char: func [char][
            if not none? char [append first group-chars char]
        ]
        add-group-range: func [char-1 char-2][
            append group-chars reduce [to-char char-1 '- to-char char-2]
        ]
        dash: charset "-"
        non-dash: complement dash
        rules: [
            [copy char opt #"!" (comp: char)]
            [copy char opt dash (add-group-char char)]
            some [
                  copy char-1 non-dash dash copy char-2 non-dash
                  (add-group-range char-1 char-2)
                | copy char non-dash (add-group-char char)
            ]
            [copy char opt dash (add-group-char char)]
            end
        ]
        group-chars: reduce [copy ""]
        parse string rules
        ;print mold group-chars
        result: charset group-chars
        either comp [complement result][result]
    ]
    ; "ABCa-z!012" in PARSE rules is ["ABC" #"a" - #"z" "!012"]


]

test: on

if test [
    test-like: func [
        str [string!]
        pat [string!]
        /expect expected
        /show
        /local result
    ][
        ;print ["Parse: " tab remold vb-like-op/expand-pattern pat]
        result: like? str pat
        if show [
            print ["Str:   " tab str]
            print ["RegEx: " tab pat]
            print ["Result:" tab result]
            print ["Parse: " tab mold vb-like-op/expand-pattern pat]
            prin newline
        ]
        if all [expect  result <> expected] [
            print [
                "^/TEST FAILED!^/"
                tab str newline
                tab pat newline
                tab "Returned:" result newline
            ]
        ]
    ]

    test-data: reduce [
        "abc_()[EMAIL PROTECTED]"  "abc*def?�[xyz]" true

        "abc_defx"  "abc*def[xyz]"  true
        "abc_defx"  "abc?def[xyz]"  true
        "abc__defx" "abc??def[!xyz]" false
        "abc__defx" "abc??def[xyz]" true

        "abc_defx"  {abc?def[!x-z]} false
        "abc_defx"  {abc?def[x-z]}  true

        "abc_defx"  {abc?def[!x-z]} false
        "abc_defx"  {abc?def[x-z]}  true

        "abcdxxxxxx"    "abc?*"     true
        "avbcvz"        "a*z"       true
        "12345_xxx"     "*_*"       true
        "filename.txtdfdf" "*.txt*" true
        "abcdefg"       "ab*f[ghi]" true

        "]ab*&&&fg�?$^^^- `~�019["
        "]ab[*]*f[ghi]�[?]?*`~[����]###[[]"
            true

        "]ab*&&&fg�?$^^^- `~�019["
        "]ab[*]*f[ghi]�[?]?*^- ??![���]###[[]"
            false

        "Gregg 12340 Irwin" "* ####*"   true
        " 12340"            "* ####*"   true
        "Gregg 123400"      "* ####*"   true
        " 12340 Irwin"      "* ####*"   true

        "Looking for [ in text"
        "Looking for?[[]*"
            true

        "Looking for [ in text"
        "*[[]*"
            true

        "Looking for ] in text"
        "*[[]*"
            false
    ]

    foreach [str pat result] test-data [
        test-like/expect str pat result ;/show
    ]

    print "Tests complete."
    
    halt
]

-- 
To unsubscribe from the list, just send an email to 
lists at rebol.com with unsubscribe as the subject.

Reply via email to