Gabe Black has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/56339 )

Change subject: arch: Beef up some regular expressions in the ucode asm.
......................................................................

arch: Beef up some regular expressions in the ucode asm.

This will make the code that determines the bounds of a block or the
extent of a microop/directive/macros arguments work more like you'd
expect as far as singly or doubly single or double quoted strings, and
comments.

Change-Id: If63fecb87d2150cb42cd0c9ec19db3992e073935
---
M src/arch/ucasmlib/assembler.py
M src/arch/ucasmlib/block.py
2 files changed, 54 insertions(+), 5 deletions(-)



diff --git a/src/arch/ucasmlib/assembler.py b/src/arch/ucasmlib/assembler.py
index 072c87e..e9a5e1b 100644
--- a/src/arch/ucasmlib/assembler.py
+++ b/src/arch/ucasmlib/assembler.py
@@ -140,10 +140,28 @@

     # Leading whitespace.
     body_line_re = r'\s*'
-    # First non-whitespace, non } character.
-    body_line_re += r'[^\s\}]'
-    # Non-newline or escaped characters.
-    body_line_re += r'([^\n\\]|(\\[.\n]))*'
+    # Find but don't consume the first non-whitespace, non } character.
+    body_line_re += r'(?![\s}])'
+    # Followed by any of these...
+    body_line_re += r'('
+
+    # A singly double quoted string.
+    body_line_re += r'("(?!"")([^"\n\\]|(\\[.\n]))*")|'
+    # A singly single quoted string.
+    body_line_re += r"('(?!'')(([^'\n\\]|(\\[.\n]))*)')|"
+    # A triply double quoted string.
+    body_line_re += r'("""(("(?!""))|[^"])*""")|'
+    # A triply single quoted string.
+    body_line_re += r"('''(('(?!''))|[^'])*''')|"
+    # A single line comment.
+    body_line_re += r'([#]([^\n]*)(?=\n))|'
+    # A multi-line comment.
+    body_line_re += r'(/\*([^*]|(\*(?=/)))*\*/)|'
+    # Non-newline, non-quote, non-comment, or escaped character.
+    body_line_re += r'([^\n\"\'\#/\\]|(\\[\n\"\'\#])|(/(?!\*)))'
+
+    body_line_re += r')+'
+

     #
     # The regular expression to match an entire body.
diff --git a/src/arch/ucasmlib/block.py b/src/arch/ucasmlib/block.py
index 788e13f..e6f1cad 100644
--- a/src/arch/ucasmlib/block.py
+++ b/src/arch/ucasmlib/block.py
@@ -123,10 +123,27 @@
         t.lexer.pop_state()
         return t

+
+    # Params are composed of some sequence of one or more of these...
+    params_re = r'('
+
+    # A singly double quoted string.
+    params_re += r'("(?!"")([^"\n\\]|(\\[.\n]))*")|'
+    # A singly single quoted string.
+    params_re += r"('(?!'')([^'\n\\]|(\\[.\n]))*')|"
+    # A triply double quoted string.
+    params_re += r'("""(("(?!""))|[^"])*""")|'
+    # A triply single quoted string.
+    params_re += r"('''(('(?!''))|[^'])*''')|"
+    # Non-newline, non-quote, non-semicolon, or escaped character.
+    params_re += r'([^\n\"\';\\]|(\\[\n\"\';]))'
+
+    params_re += r')+'
+
     # Parameters are a string of text which don't contain an unescaped
     # statement statement terminator, ie a newline or semi colon.
+    @lex.TOKEN(params_re)
     def t_params_PARAMS(self, t):
-        r'([^\n;\\]|(\\[.\n]))+'
         t.lexer.lineno += t.value.count('\n')
         unescapeParamsRE = re.compile(r'(\\[\n;\\])')
         def unescapeParams(mo):

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/56339
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: If63fecb87d2150cb42cd0c9ec19db3992e073935
Gerrit-Change-Number: 56339
Gerrit-PatchSet: 1
Gerrit-Owner: Gabe Black <gabe.bl...@gmail.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to