Copilot commented on code in PR #13126:
URL: https://github.com/apache/trafficserver/pull/13126#discussion_r3163707867


##########
tools/hrw4u/src/ast_nodes.py:
##########
@@ -0,0 +1,152 @@
+#
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+from typing import Union
+
+
+@dataclass(frozen=True, kw_only=True)
+class Node:
+    line: int
+
+
+@dataclass(frozen=True)
+class Target:
+    namespace: str | None
+    field: str
+
+    @staticmethod
+    def from_dotted(name: str) -> Target:
+        # TODO: the grammar lexes dotted paths as a single IDENT token;
+        # ideally the grammar would split namespace/field so this
+        # heuristic isn't needed.
+        dot = name.rfind(".")
+        if dot == -1:
+            return Target(namespace=None, field=name)
+        return Target(namespace=name[:dot], field=name[dot + 1:])
+
+
+@dataclass(frozen=True, kw_only=True)
+class Assignment(Node):
+    target: Target
+    operator: str       # "=" or "+="
+    value: str | int | bool | tuple
+
+
+@dataclass(frozen=True, kw_only=True)
+class FunctionCall(Node):
+    name: str
+    args: tuple[str | int | bool, ...]
+

Review Comment:
   The AST currently collapses multiple value kinds into plain `str` (e.g., 
IDENT values, STRING literals, and `$param` references all become `str`). That 
loses critical semantic information (quoted vs unquoted, variable-ref vs string 
literal, param-ref vs literal text), which will break downstream codegen/static 
analysis that needs to reproduce correct hrw4u/header_rewrite output. Consider 
introducing explicit value nodes (e.g., StringLiteral/IdentValue/ParamRef) or a 
tagged Value type that preserves the original kind (and possibly raw token 
text) instead of using bare `str` for all of these cases.



##########
tools/hrw4u/src/ast_visitor.py:
##########
@@ -0,0 +1,293 @@
+#
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from hrw4u.hrw4uVisitor import hrw4uVisitor
+from hrw4u.ast_nodes import (
+    HRW4UAST,
+    Section,
+    Assignment,
+    FunctionCall,
+    Break,
+    Target,
+    IfBlock,
+    ElifBranch,
+    BoolLiteral,
+    Comparison,
+    LogicalOp,
+    NotOp,
+    IdentCondition,
+    ProcParam,
+    VarDecl,
+    VarSection,
+    UseDirective,
+    ProcedureDecl,
+)
+
+
+class ASTVisitor(hrw4uVisitor):
+    """ANTLR visitor that walks an HRW4U parse tree and produces an AST for 
HRW4U."""
+
+    # Only visitProgram is overridden from the ANTLR visitor interface;
+    # all other traversal uses private _visit_* helpers so that each
+    # method has an explicit return type and full control over how
+    # child results are assembled into parent AST nodes.
+
+    def visitProgram(self, ctx):
+        items = []
+        for item in ctx.programItem():
+            if item.useDirective() is not None:
+                items.append(self._visit_use_directive(item.useDirective()))
+            elif item.procedureDecl() is not None:
+                items.append(self._visit_procedure_decl(item.procedureDecl()))
+            elif item.section() is not None:
+                items.append(self._visit_section(item.section()))
+        return HRW4UAST(body=tuple(items))
+
+    def _visit_use_directive(self, ctx):
+        return UseDirective(
+            spec=ctx.QUALIFIED_IDENT().getText(),
+            line=ctx.start.line,
+        )
+
+    def _visit_procedure_decl(self, ctx):
+        name = ctx.QUALIFIED_IDENT().getText()
+        params = ()
+        if ctx.paramList():
+            params = tuple(
+                self._visit_proc_param(p) for p in ctx.paramList().param()
+            )
+        body = tuple(self._visit_body(ctx.block().blockItem()))
+        return ProcedureDecl(name=name, params=params, body=body, 
line=ctx.start.line)
+
+    def _visit_proc_param(self, ctx):
+        name = ctx.IDENT().getText()
+        default = self._extract_value(ctx.value()) if ctx.value() else None
+        return ProcParam(name=name, default=default, line=ctx.start.line)
+
+    def _visit_section(self, ctx):
+        if ctx.varSection() is not None:
+            return self._visit_var_section(ctx.varSection(), "txn")
+        if ctx.sessionVarSection() is not None:
+            return self._visit_var_section(ctx.sessionVarSection(), "session")
+        name = ctx.name.text
+        body = self._visit_body(ctx.sectionBody())
+        return Section(type=name, body=tuple(body), line=ctx.start.line)
+
+    def _visit_var_section(self, ctx, scope):
+        decls = []
+        for var_item in ctx.variables().variablesItem():
+            if var_item.variableDecl() is not None:
+                decls.append(self._visit_var_decl(var_item.variableDecl()))
+        return VarSection(scope=scope, declarations=tuple(decls), 
line=ctx.start.line)
+
+    def _visit_var_decl(self, ctx):
+        return VarDecl(
+            name=ctx.name.text,
+            type_name=ctx.typeName.text,
+            slot=int(ctx.slot.text) if ctx.slot else None,
+            line=ctx.start.line,
+        )
+
+    def _visit_body(self, items):
+        """Shared helper for sectionBody and blockItem lists."""
+        result = []
+        for item in items:
+            if item.statement() is not None:
+                result.append(self._visit_statement(item.statement()))
+            elif item.conditional() is not None:
+                result.append(self._visit_conditional(item.conditional()))
+        return result
+
+    def _visit_statement(self, ctx):
+        line = ctx.start.line
+        if ctx.BREAK():
+            return Break(line=line)
+        if ctx.functionCall():
+            return self._visit_function_call(ctx.functionCall())
+        if ctx.EQUAL():
+            target = Target.from_dotted(ctx.lhs.text)
+            value = self._extract_value(ctx.value())
+            return Assignment(target=target, operator="=", value=value, 
line=line)
+        if ctx.PLUSEQUAL():
+            target = Target.from_dotted(ctx.lhs.text)
+            value = self._extract_value(ctx.value())
+            return Assignment(target=target, operator="+=", value=value, 
line=line)
+        if ctx.op:
+            return FunctionCall(name=ctx.op.text, args=(), line=line)
+        raise ValueError(f"Unhandled statement alternative at line {line}")
+
+    def _visit_function_call(self, ctx):
+        name = ctx.funcName.text
+        args = ()
+        if ctx.argumentList():
+            args = tuple(
+                self._extract_value(v) for v in ctx.argumentList().value()
+            )
+        return FunctionCall(name=name, args=args, line=ctx.start.line)
+
+    def _extract_value(self, ctx):
+        if ctx.number is not None:
+            return int(ctx.number.text)
+        if ctx.str_ is not None:
+            return ctx.str_.text[1:-1]
+        if ctx.TRUE():
+            return True
+        if ctx.FALSE():
+            return False
+        if ctx.ident is not None:
+            return ctx.ident.text
+        if ctx.ip():
+            return ctx.ip().getText()
+        if ctx.iprange():
+            return tuple(ip.getText() for ip in ctx.iprange().ip())
+        if ctx.paramRef():
+            return ctx.paramRef().getText()
+        return ctx.getText()

Review Comment:
   `_extract_value()` strips quotes from STRING tokens (`ctx.str_.text[1:-1]`). 
This makes a quoted literal like `"foo"` indistinguishable from an unquoted 
IDENT value `foo` in the resulting AST (both become `'foo'`), and similarly 
`"$tag"` vs `$tag`. If downstream visitors rely on the AST for correct codegen, 
they need to know whether the original value was quoted or a reference. 
Consider returning a tagged value / literal node here rather than a plain 
Python `str`.



##########
tools/hrw4u/src/ast_visitor.py:
##########
@@ -0,0 +1,293 @@
+#
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from hrw4u.hrw4uVisitor import hrw4uVisitor
+from hrw4u.ast_nodes import (
+    HRW4UAST,
+    Section,
+    Assignment,
+    FunctionCall,
+    Break,
+    Target,
+    IfBlock,
+    ElifBranch,
+    BoolLiteral,
+    Comparison,
+    LogicalOp,
+    NotOp,
+    IdentCondition,
+    ProcParam,
+    VarDecl,
+    VarSection,
+    UseDirective,
+    ProcedureDecl,
+)
+
+
+class ASTVisitor(hrw4uVisitor):
+    """ANTLR visitor that walks an HRW4U parse tree and produces an AST for 
HRW4U."""
+
+    # Only visitProgram is overridden from the ANTLR visitor interface;
+    # all other traversal uses private _visit_* helpers so that each
+    # method has an explicit return type and full control over how
+    # child results are assembled into parent AST nodes.
+
+    def visitProgram(self, ctx):
+        items = []
+        for item in ctx.programItem():
+            if item.useDirective() is not None:
+                items.append(self._visit_use_directive(item.useDirective()))
+            elif item.procedureDecl() is not None:
+                items.append(self._visit_procedure_decl(item.procedureDecl()))
+            elif item.section() is not None:
+                items.append(self._visit_section(item.section()))
+        return HRW4UAST(body=tuple(items))

Review Comment:
   Most hrw4u Python modules annotate function signatures (at least return 
types), but `ASTVisitor` methods here are untyped. Adding return/arg 
annotations (e.g., `visitProgram(...) -> HRW4UAST`, helpers returning 
`BodyNode`/`ConditionExpr`, and a shared Value type) will make future 
grammar/AST evolution safer and easier to review.



##########
tools/hrw4u/src/ast_visitor.py:
##########
@@ -0,0 +1,293 @@
+#
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+from __future__ import annotations
+
+from hrw4u.hrw4uVisitor import hrw4uVisitor
+from hrw4u.ast_nodes import (
+    HRW4UAST,
+    Section,
+    Assignment,
+    FunctionCall,
+    Break,
+    Target,
+    IfBlock,
+    ElifBranch,
+    BoolLiteral,
+    Comparison,
+    LogicalOp,
+    NotOp,
+    IdentCondition,
+    ProcParam,
+    VarDecl,
+    VarSection,
+    UseDirective,
+    ProcedureDecl,
+)
+
+
+class ASTVisitor(hrw4uVisitor):
+    """ANTLR visitor that walks an HRW4U parse tree and produces an AST for 
HRW4U."""
+
+    # Only visitProgram is overridden from the ANTLR visitor interface;
+    # all other traversal uses private _visit_* helpers so that each
+    # method has an explicit return type and full control over how
+    # child results are assembled into parent AST nodes.
+
+    def visitProgram(self, ctx):
+        items = []
+        for item in ctx.programItem():
+            if item.useDirective() is not None:
+                items.append(self._visit_use_directive(item.useDirective()))
+            elif item.procedureDecl() is not None:
+                items.append(self._visit_procedure_decl(item.procedureDecl()))
+            elif item.section() is not None:
+                items.append(self._visit_section(item.section()))

Review Comment:
   `visitProgram()` silently ignores any `programItem` that isn't a 
use/procedure/section (currently comments are also skipped). Given the goal of 
catching visitor/grammar drift early, it would be better to handle 
`commentLine()` explicitly and raise on any truly unrecognized programItem 
alternative so new grammar constructs don’t get dropped without a failure.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to