Gabe Black has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/56339 )
Change subject: arch: Beef up some regular expressions in the ucode asm.
......................................................................
arch: Beef up some regular expressions in the ucode asm.
This will make the code that determines the bounds of a block or the
extent of a microop/directive/macros arguments work more like you'd
expect as far as singly or doubly single or double quoted strings, and
comments.
Change-Id: If63fecb87d2150cb42cd0c9ec19db3992e073935
---
M src/arch/ucasmlib/assembler.py
M src/arch/ucasmlib/block.py
2 files changed, 54 insertions(+), 5 deletions(-)
diff --git a/src/arch/ucasmlib/assembler.py b/src/arch/ucasmlib/assembler.py
index 072c87e..e9a5e1b 100644
--- a/src/arch/ucasmlib/assembler.py
+++ b/src/arch/ucasmlib/assembler.py
@@ -140,10 +140,28 @@
# Leading whitespace.
body_line_re = r'\s*'
- # First non-whitespace, non } character.
- body_line_re += r'[^\s\}]'
- # Non-newline or escaped characters.
- body_line_re += r'([^\n\\]|(\\[.\n]))*'
+ # Find but don't consume the first non-whitespace, non } character.
+ body_line_re += r'(?![\s}])'
+ # Followed by any of these...
+ body_line_re += r'('
+
+ # A singly double quoted string.
+ body_line_re += r'("(?!"")([^"\n\\]|(\\[.\n]))*")|'
+ # A singly single quoted string.
+ body_line_re += r"('(?!'')(([^'\n\\]|(\\[.\n]))*)')|"
+ # A triply double quoted string.
+ body_line_re += r'("""(("(?!""))|[^"])*""")|'
+ # A triply single quoted string.
+ body_line_re += r"('''(('(?!''))|[^'])*''')|"
+ # A single line comment.
+ body_line_re += r'([#]([^\n]*)(?=\n))|'
+ # A multi-line comment.
+ body_line_re += r'(/\*([^*]|(\*(?=/)))*\*/)|'
+ # Non-newline, non-quote, non-comment, or escaped character.
+ body_line_re += r'([^\n\"\'\#/\\]|(\\[\n\"\'\#])|(/(?!\*)))'
+
+ body_line_re += r')+'
+
#
# The regular expression to match an entire body.
diff --git a/src/arch/ucasmlib/block.py b/src/arch/ucasmlib/block.py
index 788e13f..e6f1cad 100644
--- a/src/arch/ucasmlib/block.py
+++ b/src/arch/ucasmlib/block.py
@@ -123,10 +123,27 @@
t.lexer.pop_state()
return t
+
+ # Params are composed of some sequence of one or more of these...
+ params_re = r'('
+
+ # A singly double quoted string.
+ params_re += r'("(?!"")([^"\n\\]|(\\[.\n]))*")|'
+ # A singly single quoted string.
+ params_re += r"('(?!'')([^'\n\\]|(\\[.\n]))*')|"
+ # A triply double quoted string.
+ params_re += r'("""(("(?!""))|[^"])*""")|'
+ # A triply single quoted string.
+ params_re += r"('''(('(?!''))|[^'])*''')|"
+ # Non-newline, non-quote, non-semicolon, or escaped character.
+ params_re += r'([^\n\"\';\\]|(\\[\n\"\';]))'
+
+ params_re += r')+'
+
# Parameters are a string of text which don't contain an unescaped
# statement statement terminator, ie a newline or semi colon.
+ @lex.TOKEN(params_re)
def t_params_PARAMS(self, t):
- r'([^\n;\\]|(\\[.\n]))+'
t.lexer.lineno += t.value.count('\n')
unescapeParamsRE = re.compile(r'(\\[\n;\\])')
def unescapeParams(mo):
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/56339
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings
Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: If63fecb87d2150cb42cd0c9ec19db3992e073935
Gerrit-Change-Number: 56339
Gerrit-PatchSet: 1
Gerrit-Owner: Gabe Black <gabe.bl...@gmail.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s