Gabe Black has uploaded this change for review. ( https://gem5-review.googlesource.com/c/public/gem5/+/56336 )

Change subject: arch,arch-x86: Separate out the block parser for ucode asm.
......................................................................

arch,arch-x86: Separate out the block parser for ucode asm.

This separation makes it possible to enter the block parser recursively,
which will be necessary to allow expanding macros.

Change-Id: I7fd45f8b03d7ac22fcc6d2b8715845d1b10af12d
---
M src/arch/SConscript
M src/arch/micro_asm.py
M src/arch/micro_asm_test.py
A src/arch/ucasmlib/__init__.py
A src/arch/ucasmlib/assembler.py
A src/arch/ucasmlib/block.py
A src/arch/ucasmlib/parser.py
M src/arch/x86/isa/macroop.isa
M src/arch/x86/isa/microasm.isa
M src/arch/x86/isa/rom.isa
10 files changed, 652 insertions(+), 433 deletions(-)



diff --git a/src/arch/SConscript b/src/arch/SConscript
index 3034dac..8a48e17 100644
--- a/src/arch/SConscript
+++ b/src/arch/SConscript
@@ -106,6 +106,7 @@

 parser_files = Glob('isa_parser/*.py')
 micro_asm_py = File('micro_asm.py')
+ucasmlib_py = Glob('ucasmlib/*.py')

# import ply here because SCons screws with sys.path when performing actions.
 import ply
@@ -209,7 +210,7 @@
             source_gen('generic_cpu_exec_%d.cc' % i)

     # Actually create the builder.
-    sources = [desc, micro_asm_py] + parser_files
+    sources = [desc, ucasmlib_py, micro_asm_py] + parser_files
     IsaDescBuilder(target=gen, source=sources, env=env)
     return gen

diff --git a/src/arch/micro_asm.py b/src/arch/micro_asm.py
index 6e3a3a0..80ec697 100644
--- a/src/arch/micro_asm.py
+++ b/src/arch/micro_asm.py
@@ -24,431 +24,4 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-import os
-import sys
-import re
-import traceback
-# get type names
-from types import *
-
-from ply import lex
-from ply import yacc
-
-##########################################################################
-#
-# Base classes for use outside of the assembler
-#
-##########################################################################
-
-class MicroContainer:
-    def __init__(self, name):
-        self.microops = []
-        self.name = name
-        self.directives = {}
-        self.micro_classes = {}
-        self.labels = {}
-
-    def add_microop(self, name, microop):
-        microop.mnemonic = name
-        microop.micropc = len(self.microops)
-        self.microops.append(microop)
-
-    def add_label(self, label):
-        self.labels[label.name] = len(self.microops)
-
-    def __str__(self):
-        string = "%s:\n" % self.name
-        for microop in self.microops:
-            string += "  %s\n" % microop
-        return string
-
-class CombinationalMacroop(MicroContainer):
-    pass
-
-class RomMacroop:
-    def __init__(self, name, target):
-        self.name = name
-        self.target = target
-
-    def __str__(self):
-        return "%s: %s\n" % (self.name, self.target)
-
-class Rom(MicroContainer):
-    def __init__(self, name):
-        super().__init__(name)
-        self.externs = {}
-
-    def add_label(self, label):
-        super().add_label(label)
-        if label.is_extern:
-            self.externs[label.name] = len(self.microops)
-
-##########################################################################
-#
-# Support classes
-#
-##########################################################################
-
-class Block:
-    def __init__(self):
-        self.statements = []
-
-class Statement:
-    def __init__(self, name, is_directive=False):
-        self.name = name
-        self.is_directive = is_directive
-
-class Microop(Statement):
-    def __init__(self, name, params):
-        super().__init__(name)
-        self.params = params
-
-    def handle(self, assembler, container):
-        microop = assembler.microops.get(self.name, None)
-        if not microop:
-            raise Exception(f'Unrecongized mnemonic: "{self.name}"')
-
-        try:
-            microop = eval(f'_cls({self.params})',
-                    {'_cls': microop}, assembler.symbols)
-        except:
-            print_error(f'Error instantiating microop "{self.name}"')
-            raise
-
-        container.add_microop(self.name, microop)
-
-class Directive(Statement):
-    def __init__(self, name, params):
-        super().__init__(name, True)
-        self.params = params
-
-    def handle(self, assembler, container):
-        directive = container.directives.get(self.name, None)
-        if not directive:
-            raise Exception(f'Unrecognized directive: "{self.name}"')
-        local = {'_dir': directive}
-        try:
-            eval(f'_dir({self.params})',
-                    {'_dir': directive}, assembler.symbols)
-        except:
-            print_error(f'Error executing directive "{self.name}"')
-            raise
-
-class Label(Statement):
-    def __init__(self, name, is_extern):
-        super().__init__(name)
-        self.is_extern = is_extern
-
-    def handle(self, assembler, container):
-        container.add_label(self)
-
-##########################################################################
-#
-# Functions that handle common tasks
-#
-##########################################################################
-
-def print_error(message):
-    print()
-    print("*** %s" % message)
-    print()
-
-class MicroAssembler:
-    ######################################################################
-    #
-    # Lexer specification
-    #
-    ######################################################################
-
-    # Error handler.  Just call exit.  Output formatted to work under
-    # Emacs compile-mode.  Optional 'print_traceback' arg, if set to True,
-    # prints a Python stack backtrace too (can be handy when trying to
-    # debug the parser itself).
-    def error(self, lineno, string, print_traceback = False):
-        # Print a Python stack backtrace if requested.
-        if (print_traceback):
-            traceback.print_exc()
-        if lineno != 0:
-            line_str = "%d:" % lineno
-        else:
-            line_str = ""
-        sys.exit("%s %s" % (line_str, string))
-
-    reserved = ('DEF', 'MACROOP', 'ROM', 'EXTERN')
-
-    tokens = reserved + (
-            # identifier
-            'ID',
-            # arguments for microops and directives
-            'PARAMS',
-
-            'LPAREN', 'RPAREN',
-            'LBRACE', 'RBRACE',
-            'COLON', 'SEMI', 'DOT',
-            'NEWLINE'
-            )
-
- # New lines are ignored at the top level, but they end statements in the
-    # assembler
-    states = (
-        ('asm', 'exclusive'),
-        ('params', 'exclusive'),
-        ('header', 'exclusive'),
-    )
-
-    reserved_map = { }
-    for r in reserved:
-        reserved_map[r.lower()] = r
-
-    # Ignore comments
-    def t_ANY_COMMENT(self, t):
-        r'\#[^\n]*(?=\n)'
-
-    def t_ANY_MULTILINECOMMENT(self, t):
-        r'/\*([^/]|((?<!\*)/))*\*/'
-
-    # A colon marks the end of a label. It should follow an ID which will
- # put the lexer in the "params" state. Seeing the colon will put it back
-    # in the "asm" state since it knows it saw a label and not a mnemonic.
-    def t_params_COLON(self, t):
-        r':'
-        t.lexer.pop_state()
-        return t
-
-    # Parameters are a string of text which don't contain an unescaped
-    # statement statement terminator, ie a newline or semi colon.
-    def t_params_PARAMS(self, t):
-        r'([^\n;\\]|(\\[\n;\\]))+'
-        t.lineno += t.value.count('\n')
-        unescapeParamsRE = re.compile(r'(\\[\n;\\])')
-        def unescapeParams(mo):
-            val = mo.group(0)
-            return val[1]
-        t.value = unescapeParamsRE.sub(unescapeParams, t.value)
-        t.lexer.pop_state()
-        return t
-
- # An "ID" in the micro assembler is either a label, directive, or mnemonic - # If it's either a directive or a mnemonic, it will be optionally followed - # by parameters. If it's a label, the following colon will make the lexer
-    # stop looking for parameters.
-    def t_asm_ID(self, t):
-        r'[A-Za-z_]\w*'
-        t.type = self.reserved_map.get(t.value, 'ID')
-        # If the ID is really "extern", we shouldn't start looking for
-        # parameters yet. The real ID, the label itself, is coming up.
-        if t.type != 'EXTERN':
-            t.lexer.push_state('params')
-        return t
-
-    def t_header_ID(self, t):
-        r'[A-Za-z_]\w*'
-        return t
-
-    # If there is a label and you're -not- in the assembler (which would be
-    # caught above), don't start looking for parameters.
-    def t_ANY_ID(self, t):
-        r'[A-Za-z_]\w*'
-        t.type = self.reserved_map.get(t.value, 'ID')
-        if t.type == 'MACROOP':
-            t.lexer.push_state('asm')
-            t.lexer.push_state('header')
-        elif t.type == 'ROM':
-            t.lexer.push_state('asm')
-            t.lexer.push_state('header')
-        return t
-
-    # Braces enter and exit micro assembly
-    def t_header_LBRACE(self, t):
-        r'\{'
-        t.lexer.pop_state()
-        return t
-
-    def t_asm_RBRACE(self, t):
-        r'\}'
-        t.lexer.pop_state()
-        return t
-
-    # In the micro assembler, do line counting but also return a token. The
-    # token is needed by the parser to detect the end of a statement.
-    def t_asm_NEWLINE(self, t):
-        r'\n+'
-        t.lineno += t.value.count('\n')
-        return t
-
-    # A newline or semi colon when looking for params signals that the
-    # statement is over and the lexer should go back to looking for regular
-    # assembly.
-    def t_params_NEWLINE(self, t):
-        r'\n+'
-        t.lineno += t.value.count('\n')
-        t.lexer.pop_state()
-        return t
-
-    def t_params_SEMI(self, t):
-        r';'
-        t.lexer.pop_state()
-        return t
-
- # Unless handled specially above, track newlines only for line counting.
-    def t_ANY_NEWLINE(self, t):
-        r'\n+'
-        t.lineno += t.value.count('\n')
-
-    # Basic regular expressions to pick out simple tokens
-    t_ANY_LPAREN = r'\('
-    t_ANY_RPAREN = r'\)'
-    t_ANY_SEMI   = r';'
-    t_ANY_DOT    = r'\.'
-
-    t_ANY_ignore = ' \t\x0c'
-
-    def t_ANY_error(self, t):
-        error(t.lineno, "illegal character '%s'" % t.value[0])
-        t.skip(1)
-
-    ######################################################################
-    #
-    # Parser specification
-    #
-    ######################################################################
-
-    # A file which may have one or more "object" defined in it.
-    def p_file(self, t):
-        'file : opt_objects'
-
-    # The objects are optional.
-    def p_opt_objects(self, t):
-        '''opt_objects : objects
-                       |'''
-
-    # One or more objects.
-    def p_objects(self, t):
-        '''objects : objects object
-                   | object'''
-
-    # Objects can be of various types.
-    def p_object(self, t):
-        '''object : rom_block
-                  | macroop_def'''
-
-    # Defines a section of microcode that should go in the current ROM.
-    def p_rom_block(self, t):
-        'rom_block : DEF ROM block SEMI'
-        if not self.rom:
-            print_error("Rom block found, but no Rom object specified.")
-            raise TypeError("Rom block found, but nowhere to put it.")
-        for statement in t[3].statements:
-            statement.handle(self, self.rom)
-        t[0] = self.rom
-
-    # Defines a macroop that jumps to an external label in the ROM.
-    def p_macroop_jump(self, t):
-        'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI'
-        if not self.rom_macroop_type:
-            print_error("ROM based macroop found, but no ROM macroop " +
-                "class was specified.")
- raise TypeError("ROM based macroop found, but no ROM macroop " +
-                "class was specified.")
-        macroop = self.rom_macroop_type(t[3], t[5])
-        self.macroops[t[3]] = macroop
-
-
-    # Defines a macroop that is combinationally generated.
-    def p_macroop_def(self, t):
-        'macroop_def : DEF MACROOP ID block SEMI'
-        try:
-            curop = self.macro_type(t[3])
-        except TypeError:
-            print_error("Error creating macroop object.")
-            raise
-        for statement in t[4].statements:
-            statement.handle(self, curop)
-        self.macroops[t[3]] = curop
-
-    # A block of statements
-    def p_block(self, t):
-        'block : LBRACE statements RBRACE'
-        block = Block()
-        block.statements = t[2]
-        t[0] = block
-
-    # One or more statements
-    def p_statements_0(self, t):
-        'statements : statement'
-        t[0] = [t[1]] if t[1] else []
-
-    def p_statements_1(self, t):
-        'statements : statements statement'
-        if t[2]:
-            t[1].append(t[2])
-        t[0] = t[1]
-
-    # A statement can be of various types.
-    def p_statement(self, t):
-        '''statement : empty_statement
-                     | label
-                     | microop
-                     | directive'''
-        t[0] = t[1]
-
-    # Parameters are optional.
-    def p_opt_params_0(self, t):
-        'opt_params : PARAMS'
-        t[0] = t[1]
-
-    def p_opt_params_1(self, t):
-        'opt_params :'
-        t[0] = ""
-
-    # Statements are often ended by newlines or a semi colon.
-    def p_end_of_statement(self, t):
-        '''end_of_statement : NEWLINE
-                            | SEMI'''
-        pass
-
-    # Ignore empty statements.
-    def p_empty_statement(self, t):
-        'empty_statement : end_of_statement'
-        pass
-
-    # A label in the microcode.
-    def p_label_0(self, t):
-        'label : EXTERN ID COLON'
-        t[0] = Label(t[2], True)
-
-    def p_label_1(self, t):
-        'label : ID COLON'
-        t[0] = Label(t[1], False)
-
-    # A microop with optional parameters.
-    def p_microop(self, t):
-        'microop : ID opt_params end_of_statement'
-        t[0] = Microop(t[1], t[2])
-
-    # Directives for the macroop.
-    def p_directive(self, t):
-        'directive : DOT ID opt_params end_of_statement'
-        t[0] = Directive(t[2], t[3])
-
-    # Parse error handler.  Note that the argument here is the offending
-    # *token*, not a grammar symbol (hence the need to use t.value)
-    def p_error(self, t):
-        if t:
-            error(t.lineno, "syntax error at '%s'" % t.value)
-        else:
-            error(0, "unknown syntax error", True)
-
- def __init__(self, macro_type, microops, rom=None, rom_macroop_type=None):
-        self.lexer = lex.lex(object=self)
-        self.parser = yacc.yacc(module=self)
-        self.macro_type = macro_type
-        self.macroops = {}
-        self.microops = microops
-        self.rom = rom
-        self.rom_macroop_type = rom_macroop_type
-        self.symbols = {}
-
-    def assemble(self, asm):
-        self.parser.parse(asm, lexer=self.lexer)
-        return self.macroops
+from .ucasmlib.assembler import MicroAssembler
diff --git a/src/arch/micro_asm_test.py b/src/arch/micro_asm_test.py
index 08c2412..45dab1e 100755
--- a/src/arch/micro_asm_test.py
+++ b/src/arch/micro_asm_test.py
@@ -24,7 +24,8 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-from micro_asm import MicroAssembler, CombinationalMacroop, RomMacroop, Rom
+from ucasmlib import CombinationalMacroop, RomMacroop, Rom
+from ucasmlib.assembler import MicroAssembler

 class Bah(object):
     def __init__(self):
diff --git a/src/arch/ucasmlib/__init__.py b/src/arch/ucasmlib/__init__.py
new file mode 100644
index 0000000..983f813
--- /dev/null
+++ b/src/arch/ucasmlib/__init__.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2003-2005 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+##########################################################################
+#
+# Base classes for use outside of the assembler
+#
+##########################################################################
+
+class MicroContainer:
+    def __init__(self, name):
+        self.microops = []
+        self.name = name
+        self.directives = {}
+        self.micro_classes = {}
+        self.labels = {}
+
+    def add_microop(self, name, microop):
+        microop.mnemonic = name
+        microop.micropc = len(self.microops)
+        self.microops.append(microop)
+
+    def add_label(self, label):
+        self.labels[label.name] = len(self.microops)
+
+    def __str__(self):
+        string = "%s:\n" % self.name
+        for microop in self.microops:
+            string += "  %s\n" % microop
+        return string
+
+class CombinationalMacroop(MicroContainer):
+    pass
+
+class RomMacroop:
+    def __init__(self, name, target):
+        self.name = name
+        self.target = target
+
+    def __str__(self):
+        return "%s: %s\n" % (self.name, self.target)
+
+class Rom(MicroContainer):
+    def __init__(self, name):
+        super().__init__(name)
+        self.externs = {}
+
+    def add_label(self, label):
+        super().add_label(label)
+        if label.is_extern:
+            self.externs[label.name] = len(self.microops)
diff --git a/src/arch/ucasmlib/assembler.py b/src/arch/ucasmlib/assembler.py
new file mode 100644
index 0000000..c5bf506
--- /dev/null
+++ b/src/arch/ucasmlib/assembler.py
@@ -0,0 +1,220 @@
+# Copyright (c) 2003-2005 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from .block import BlockParser
+from .parser import ParserBase
+
+from ply import lex
+from ply import yacc
+
+class MicroAssembler(ParserBase):
+    ######################################################################
+    #
+    # Lexer specification
+    #
+    ######################################################################
+
+    reserved = ('DEF', 'MACROOP', 'ROM')
+
+    tokens = reserved + (
+            # identifier
+            'ID',
+            # the body of a block
+            'BODY',
+
+            'LPAREN', 'RPAREN',
+            'LBRACE', 'RBRACE',
+            'SEMI'
+            )
+
+ # New lines are ignored at the top level, but they end statements in the
+    # assembler
+    states = (
+        ('asm', 'exclusive'),
+        ('header', 'exclusive'),
+    )
+
+    reserved_map = { }
+    for r in reserved:
+        reserved_map[r.lower()] = r
+
+    def t_ANY_ID(self, t):
+        r'[A-Za-z_]\w*'
+        t.type = self.reserved_map.get(t.value, 'ID')
+        if t.type == 'MACROOP':
+            t.lexer.push_state('asm')
+            t.lexer.push_state('header')
+        elif t.type == 'ROM':
+            t.lexer.push_state('asm')
+            t.lexer.push_state('header')
+        return t
+
+    def t_header_ID(self, t):
+        r'[A-Za-z_]\w*'
+        return t
+
+    # Braces enter and exit blocks.
+    def t_header_LBRACE(self, t):
+        r'\{'
+        t.lexer.pop_state()
+        return t
+
+
+    #
+ # Blocks have lines in them which are terminated with unescaped newlines,
+    # and end when a line starts with a }.
+    #
+
+    #
+    # The regular expression to match a single line of a body.
+    #
+
+    # Leading whitespace.
+    body_line_re = r'\s*'
+    # First non-whitespace, non } character.
+    body_line_re += r'[^\s\}]'
+    # Non-newline or escaped characters.
+    body_line_re += r'([^\n\\]|(\\.))*'
+
+    #
+    # The regular expression to match an entire body.
+    #
+
+    # Leading whitespace for entire body.
+    body_re = r'\s*'
+    # Any number of body lines followed by a new line.
+    body_re += r'(' + body_line_re + r'\n)*'
+    # One body line which is not followed by a new line.
+    body_re += r'(' + body_line_re + r')'
+    # Trailing whitespace for entire body.
+    body_re += r'\s*'
+
+    @lex.TOKEN(body_re)
+    def t_asm_BODY(self, t):
+        t.value = (t.lineno, t.value)
+        t.lineno += t.value[1].count('\n')
+        return t
+
+
+    def t_asm_RBRACE(self, t):
+        r'\}'
+        t.lexer.pop_state()
+        return t
+
+ # Unless handled specially above, track newlines only for line counting.
+    def t_ANY_NEWLINE(self, t):
+        r'\n+'
+        t.lineno += t.value.count('\n')
+
+    # Basic regular expressions to pick out simple tokens
+    t_ANY_LPAREN = r'\('
+    t_ANY_RPAREN = r'\)'
+    t_ANY_SEMI = r';'
+
+    t_ANY_ignore = ' \t\x0c'
+
+    ######################################################################
+    #
+    # Parser specification
+    #
+    ######################################################################
+
+    # A file which may have one or more "object" defined in it.
+    def p_file(self, t):
+        'file : opt_objects'
+
+    # The objects are optional.
+    def p_opt_objects(self, t):
+        '''opt_objects : objects
+                       |'''
+
+    # One or more objects.
+    def p_objects(self, t):
+        '''objects : objects object
+                   | object'''
+
+    # Objects can be of various types.
+    def p_object(self, t):
+        '''object : rom_block
+                  | macroop_def'''
+
+    # Defines a section of microcode that should go in the current ROM.
+    def p_rom_block(self, t):
+        'rom_block : DEF ROM block SEMI'
+        if not self.rom:
+ self.print_error("Rom block found, but no Rom object specified.")
+            raise TypeError("Rom block found, but nowhere to put it.")
+        for statement in t[3]:
+            statement.handle(self, self.rom)
+        t[0] = self.rom
+
+    # Defines a macroop that jumps to an external label in the ROM.
+    def p_macroop_jump(self, t):
+        'macroop_def : DEF MACROOP ID LPAREN ID RPAREN SEMI'
+        if not self.rom_macroop_type:
+            self.print_error("ROM based macroop found, but no ROM macroop "
+                "class was specified.")
+            raise TypeError("ROM based macroop found, but no ROM macroop "
+                "class was specified.")
+        macroop = self.rom_macroop_type(t[3], t[5])
+        self.macroops[t[3]] = macroop
+
+
+    # Defines a macroop that is combinationally generated.
+    def p_macroop_def(self, t):
+        'macroop_def : DEF MACROOP ID block SEMI'
+        try:
+            curop = self.macro_type(t[3])
+        except TypeError:
+            self.print_error("Error creating macroop object.")
+            raise
+        for statement in t[4]:
+            statement.handle(self, curop)
+        self.macroops[t[3]] = curop
+
+    # A block of statements
+    def p_block_0(self, t):
+        'block : LBRACE BODY RBRACE'
+        starting_line, text = t[2]
+        t[0] = BlockParser().parse(starting_line, text)
+
+    def p_block_1(self, t):
+        'block : LBRACE RBRACE'
+        t[0] = Block()
+
+ def __init__(self, macro_type, microops, rom=None, rom_macroop_type=None):
+        self.lexer = lex.lex(object=self)
+        self.parser = yacc.yacc(module=self)
+        self.macro_type = macro_type
+        self.macroops = {}
+        self.microops = microops
+        self.rom = rom
+        self.rom_macroop_type = rom_macroop_type
+        self.symbols = {}
+
+    def assemble(self, asm):
+        self.parser.parse(asm, lexer=self.lexer)
+        return self.macroops
diff --git a/src/arch/ucasmlib/block.py b/src/arch/ucasmlib/block.py
new file mode 100644
index 0000000..c3e8b02
--- /dev/null
+++ b/src/arch/ucasmlib/block.py
@@ -0,0 +1,264 @@
+# Copyright (c) 2003-2005 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from .parser import ParserBase
+
+import re
+
+from ply import lex
+from ply import yacc
+
+##########################################################################
+#
+# Support classes
+#
+##########################################################################
+
+class Statement:
+    def __init__(self, name, is_directive=False):
+        self.name = name
+        self.is_directive = is_directive
+
+class Microop(Statement):
+    def __init__(self, name, params):
+        super().__init__(name)
+        self.params = params
+
+    def handle(self, assembler, container):
+        microop = assembler.microops.get(self.name, None)
+        if not microop:
+            raise Exception(f'Unrecognized mnemonic: "{self.name}"')
+
+        try:
+            microop = eval(f'_cls({self.params})',
+                    {'_cls': microop}, assembler.symbols)
+        except:
+ assembler.print_error(f'Error instantiating microop "{self.name}"')
+            raise
+
+        container.add_microop(self.name, microop)
+
+class Directive(Statement):
+    def __init__(self, name, params):
+        super().__init__(name, True)
+        self.params = params
+
+    def handle(self, assembler, container):
+        directive = container.directives.get(self.name, None)
+        if not directive:
+            raise Exception(f'Unrecognized directive: "{self.name}"')
+        local = {'_dir': directive}
+        try:
+            eval(f'_dir({self.params})',
+                    {'_dir': directive}, assembler.symbols)
+        except:
+ assembler.print_error(f'Error executing directive "{self.name}"')
+            raise
+
+class Label(Statement):
+    def __init__(self, name, is_extern):
+        super().__init__(name)
+        self.is_extern = is_extern
+
+    def handle(self, assembler, container):
+        container.add_label(self)
+
+class BlockParser(ParserBase):
+    ######################################################################
+    #
+    # Lexer specification
+    #
+    ######################################################################
+
+    reserved = ('EXTERN',)
+
+    tokens = reserved + (
+            # identifier
+            'ID',
+            # arguments for microops and directives
+            'PARAMS',
+
+            'DOT', 'COLON',
+            'NEWLINE', 'SEMI',
+            )
+
+ # New lines are ignored at the top level, but they end statements in the
+    # assembler
+    states = (
+        ('params', 'exclusive'),
+    )
+
+    reserved_map = { }
+    for r in reserved:
+        reserved_map[r.lower()] = r
+
+    # A colon marks the end of a label. It should follow an ID which will
+ # put the lexer in the "params" state. Seeing the colon will put it back + # in the INITIAL state since it knows it saw a label and not a mnemonic.
+    def t_params_COLON(self, t):
+        r':'
+        t.lexer.pop_state()
+        return t
+
+    # Parameters are a string of text which don't contain an unescaped
+    # statement statement terminator, ie a newline or semi colon.
+    def t_params_PARAMS(self, t):
+        r'([^\n;\\]|(\\.))+'
+        t.lineno += t.value.count('\n')
+        unescapeParamsRE = re.compile(r'(\\[\n;\\])')
+        def unescapeParams(mo):
+            val = mo.group(0)
+            return val[1]
+        t.value = unescapeParamsRE.sub(unescapeParams, t.value)
+        t.lexer.pop_state()
+        return t
+
+ # An "ID" in the micro assembler is either a label, directive, or mnemonic + # If it's either a directive or a mnemonic, it will be optionally followed + # by parameters. If it's a label, the following colon will make the lexer
+    # stop looking for parameters.
+    def t_INITIAL_ID(self, t):
+        r'[A-Za-z_]\w*'
+        t.type = self.reserved_map.get(t.value, 'ID')
+        # If the ID is really "extern", we shouldn't start looking for
+        # parameters yet. The real ID, the label itself, is coming up.
+        if t.type != 'EXTERN':
+            t.lexer.push_state('params')
+        return t
+
+    # In the micro assembler, do line counting but also return a token. The
+    # token is needed by the parser to detect the end of a statement.
+    def t_INITIAL_NEWLINE(self, t):
+        r'\n+'
+        t.lineno += t.value.count('\n')
+        return t
+
+    # A newline or semi colon when looking for params signals that the
+    # statement is over and the lexer should go back to looking for regular
+    # assembly.
+    def t_params_NEWLINE(self, t):
+        r'\n+'
+        t.lineno += t.value.count('\n')
+        t.lexer.pop_state()
+        return t
+
+    def t_params_SEMI(self, t):
+        r';'
+        t.lexer.pop_state()
+        return t
+
+    # Basic regular expressions to pick out simple tokens
+    t_ANY_SEMI   = r';'
+    t_ANY_DOT    = r'\.'
+
+    t_ANY_ignore = ' \t\x0c'
+
+    ######################################################################
+    #
+    # Parser specification
+    #
+    ######################################################################
+
+    # A block of statements
+    def p_block(self, t):
+        'block : opt_statements'
+        t[0] = t[1]
+
+    # Having statements is optional.
+    def p_opt_statements_0(self, t):
+        'opt_statements : statements'
+        t[0] = t[1]
+
+    def p_opt_statements_1(self, t):
+        'opt_statements :'
+        t[0] = []
+
+    # One or more statements.
+    def p_statements_0(self, t):
+        'statements : statement'
+        t[0] = [t[1]] if t[1] else []
+
+    def p_statements_1(self, t):
+        'statements : statements statement'
+        if t[2]:
+            t[1].append(t[2])
+        t[0] = t[1]
+
+    # A statement can be of various types.
+    def p_statement(self, t):
+        '''statement : empty_statement
+                     | label
+                     | microop
+                     | directive'''
+        t[0] = t[1]
+
+    # Parameters are optional.
+    def p_opt_params_0(self, t):
+        'opt_params : PARAMS'
+        t[0] = t[1]
+
+    def p_opt_params_1(self, t):
+        'opt_params :'
+        t[0] = ""
+
+    # Statements are often ended by newlines or a semi colon.
+    def p_end_of_statement(self, t):
+        '''end_of_statement : NEWLINE
+                            | SEMI'''
+        pass
+
+    # Ignore empty statements.
+    def p_empty_statement(self, t):
+        'empty_statement : end_of_statement'
+        pass
+
+    # A label in the microcode.
+    def p_label_0(self, t):
+        'label : EXTERN ID COLON'
+        t[0] = Label(t[2], True)
+
+    def p_label_1(self, t):
+        'label : ID COLON'
+        t[0] = Label(t[1], False)
+
+    # A microop with optional parameters.
+    def p_microop(self, t):
+        'microop : ID opt_params end_of_statement'
+        t[0] = Microop(t[1], t[2])
+
+    # Directives for the macroop.
+    def p_directive(self, t):
+        'directive : DOT ID opt_params end_of_statement'
+        t[0] = Directive(t[2], t[3])
+
+    def __init__(self):
+        self.lexer = lex.lex(object=self)
+        self.parser = yacc.yacc(module=self)
+        self.statements = []
+
+    def parse(self, starting_lineno, text):
+        self.lexer.lineno = starting_lineno
+        return self.parser.parse(text, lexer=self.lexer)
diff --git a/src/arch/ucasmlib/parser.py b/src/arch/ucasmlib/parser.py
new file mode 100644
index 0000000..43e10d2
--- /dev/null
+++ b/src/arch/ucasmlib/parser.py
@@ -0,0 +1,73 @@
+# Copyright (c) 2003-2005 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import sys
+import traceback
+
+class ParserBase:
+    def print_error(message):
+        print()
+        print("*** %s" % message)
+        print()
+
+    ######################################################################
+    #
+    # Lexer specification
+    #
+    ######################################################################
+
+    # Error handler.  Just call exit.  Output formatted to work under
+    # Emacs compile-mode.  Optional 'print_traceback' arg, if set to True,
+    # prints a Python stack backtrace too (can be handy when trying to
+    # debug the parser itself).
+    def error(self, lineno, string, print_traceback = False):
+        # Print a Python stack backtrace if requested.
+        if (print_traceback):
+            traceback.print_exc()
+        if lineno != 0:
+            line_str = "%d:" % lineno
+        else:
+            line_str = ""
+        sys.exit("%s %s" % (line_str, string))
+
+    # Ignore comments
+    def t_ANY_COMMENT(self, t):
+        r'\#[^\n]*(?=\n)'
+
+    def t_ANY_MULTILINECOMMENT(self, t):
+        r'/\*([^/]|((?<!\*)/))*\*/'
+
+    def t_ANY_error(self, t):
+        self.error(t.lineno, "illegal character '%s'" % t.value[0])
+        t.skip(1)
+
+    # Parse error handler.  Note that the argument here is the offending
+    # *token*, not a grammar symbol (hence the need to use t.value)
+    def p_error(self, t):
+        if t:
+            self.error(t.lineno, f"syntax error at '{t.value}'")
+        else:
+            self.error(0, "unknown syntax error", True)
diff --git a/src/arch/x86/isa/macroop.isa b/src/arch/x86/isa/macroop.isa
index 07f8a02..6073756 100644
--- a/src/arch/x86/isa/macroop.isa
+++ b/src/arch/x86/isa/macroop.isa
@@ -131,7 +131,7 @@
 }};

 let {{
-    from micro_asm import CombinationalMacroop, RomMacroop
+    from ucasmlib import CombinationalMacroop, RomMacroop
     class X86Macroop(CombinationalMacroop):
         def setAdjustEnv(self, val):
             self.adjust_env = val
diff --git a/src/arch/x86/isa/microasm.isa b/src/arch/x86/isa/microasm.isa
index 7d621b7..88ecfbc 100644
--- a/src/arch/x86/isa/microasm.isa
+++ b/src/arch/x86/isa/microasm.isa
@@ -52,7 +52,8 @@
     sys.path[0:0] = ["src/arch/x86/isa/"]
     from insts import microcode
     # print microcode
-    from micro_asm import MicroAssembler, RomMacroop
+    from ucasmlib import RomMacroop
+    from ucasmlib.assembler import MicroAssembler
     mainRom = X86MicrocodeRom('main ROM')
assembler = MicroAssembler(X86Macroop, microopClasses, mainRom, RomMacroop)

diff --git a/src/arch/x86/isa/rom.isa b/src/arch/x86/isa/rom.isa
index d53c9de..289efc8 100644
--- a/src/arch/x86/isa/rom.isa
+++ b/src/arch/x86/isa/rom.isa
@@ -25,7 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 let {{
-    from micro_asm import Rom
+    from ucasmlib import Rom

     class X86MicrocodeRom(Rom):
         def getDeclaration(self):

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/56336
To unsubscribe, or for help writing mail filters, visit https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: develop
Gerrit-Change-Id: I7fd45f8b03d7ac22fcc6d2b8715845d1b10af12d
Gerrit-Change-Number: 56336
Gerrit-PatchSet: 1
Gerrit-Owner: Gabe Black <gabe.bl...@gmail.com>
Gerrit-MessageType: newchange
_______________________________________________
gem5-dev mailing list -- gem5-dev@gem5.org
To unsubscribe send an email to gem5-dev-le...@gem5.org
%(web_page_url)slistinfo%(cgiext)s/%(_internal_name)s

Reply via email to