[GitHub] [tvm] areusch commented on a diff in pull request #12087: [UMA] UMA v1.0

GitBox Tue, 19 Jul 2022 22:11:15 -0700


areusch commented on code in PR #12087:
URL: https://github.com/apache/tvm/pull/12087#discussion_r924081353



##########
tests/python/contrib/test_uma/test_uma_lowering_with_umalower.py:
##########
@@ -0,0 +1,115 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import pytest
+import pathlib
+
+import tvm
+from tests.python.contrib.test_uma.test_uma_utils import _create_schedule, 
_generate_io_arrays
+from tvm import topi
+from tvm.relay.backend.contrib.uma._template.passes import MyAiHwConv2dPass
+import tvm.testing
+from tvm import te
+from tvm.relay.backend.contrib.uma.api.lower import UMALower
+from tvm.relay.backend.contrib.uma.api.utils import PassPhase
+
+
+def _conv2d_te_definition(shapes: dict) -> list:
+    n, w, h, ci, kw, kh, co = (
+        shapes["n"],
+        shapes["w"],
+        shapes["h"],
+        shapes["ci"],
+        shapes["kw"],
+        shapes["kh"],
+        shapes["co"],
+    )
+    ifmap = te.placeholder((n, ci, w, h), dtype="float32", name="ifmap")
+    weights = te.placeholder((co, ci, kw, kh), dtype="float32", name="weights")
+    result = topi.nn.conv2d_nchw(ifmap, weights, stride=1, padding=[kw // 2, 
kh // 2], dilation=1)
+    return [ifmap, weights, result]
+
+
+def _pepare_conv2d_schedule(shapes, use_external_conv2d_impl=True):
+    placeholders = _conv2d_te_definition(shapes)
+
+    uma_path = 
pathlib.Path(str(tvm.relay.backend.contrib.uma.__file__)).parent.absolute()
+    conv2d_file = uma_path / "_template" / "conv2dnchw.cc"
+
+    with conv2d_file.open() as f:
+        sch_tir = _create_schedule(
+            placeholders, f, use_external_conv2d_impl=use_external_conv2d_impl
+        )
+    return placeholders, sch_tir
+
+
+def _run_external_conv2d(dut_io_arrays, conv2d_shapes, target):
+    # Run conv2d with external function
+    placeholders, schedule = _pepare_conv2d_schedule(conv2d_shapes)
+
+    uma_lower = UMALower("lower_test")
+    uma_lower._tir_passes.append((PassPhase.TIR_PHASE_0, MyAiHwConv2dPass()))
+    with tvm.transform.PassContext():
+        tir_mod = uma_lower._lower_stir_to_nstir(schedule.mod["main"])
+
+    ifmap_data, weight_data, result_data = dut_io_arrays
+
+    llvm_conv2d_mod = tvm.build(tir_mod, placeholders, target=target, 
name="test_external_conv2d")
+    llvm_conv2d_mod(ifmap_data, weight_data, result_data)
+
+
+def _run_reference_conv2d(reference_io_arrays, conv2d_shapes, target):
+    placeholders, schedule = _pepare_conv2d_schedule(conv2d_shapes)
+    ref_mod = tvm.build(schedule.mod, placeholders, target=target, 
name="test_reference_conv2d")
+    ifmap, weights, result = reference_io_arrays
+    ref_mod(ifmap, weights, result)
+
+
+def _prepare_io_arrays(conv2d_shapes, dev):
+    dut_io_arrays = _generate_io_arrays(conv2d_shapes, dev)
+    _, _, ref_result = _generate_io_arrays(conv2d_shapes, dev)
+    reference_io_arrays = [dut_io_arrays[0], dut_io_arrays[1], ref_result]
+    return dut_io_arrays, reference_io_arrays
+
+
+@pytest.mark.parametrize(
+    "n, w, h, ci, kw, kh, co",
+    [
+        (1, 224, 224, 3, 3, 3, 4),
+        (1, 224, 224, 3, 5, 5, 4),
+        (1, 224, 224, 3, 7, 7, 4),
+        (1, 224, 320, 3, 7, 7, 4),
+        (1, 224, 224, 3, 7, 7, 4),
+    ],
+)
+def test_lower_with_uma(n, w, h, ci, kw, kh, co):
+    target = tvm.target.Target(target="llvm", host="llvm")
+    dev = tvm.device(target.kind.name, 0)
+    conv2d_shapes = dict(n=n, w=w, h=h, ci=ci, kw=kw, kh=kh, co=co)
+
+    dut_io_arrays, reference_io_arrays = _prepare_io_arrays(conv2d_shapes, dev)
+
+    _run_external_conv2d(dut_io_arrays, conv2d_shapes, target)
+    _run_reference_conv2d(reference_io_arrays, conv2d_shapes, target)
+
+    # compare results
+    dut_results = dut_io_arrays[2].numpy()
+    ref_results = reference_io_arrays[2].numpy()
+    tvm.testing.assert_allclose(dut_results, ref_results, rtol=1e-5)
+
+
+if __name__ == "__main__":
+    test_lower_with_uma(1, 224, 224, 3, 3, 3, 4)

Review Comment:
   tvm.testing.main()



##########
python/tvm/relay/backend/contrib/uma/_template/backend.py:
##########
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""UMA backend for the my_ai_hw accelerator"""
+from .passes import MyAiHwConv2dPass
+from ..api.utils import PassPhase
+from ..backend import UMABackend
+from .codegen import gen_includes, gen_replace_call_extern
+from .patterns import conv2d_pattern
+
+
+class MyAiHwBackend(UMABackend):
+    """UMA backend for the MyAiHw accelerator."""
+
+    def __init__(self):
+        super().__init__()
+
+        #######################################################################

Review Comment:
   this might be a bit of a personal preference, but i often find long comment 
separators can make the code harder to parse as a whole. my personal vote is to 
remove these, though i don't know if there is a specific line in the style 
guide about this



##########
include/tvm/relay/transform.h:
##########
@@ -509,6 +509,8 @@ TVM_DLL Pass SimplifyExpr();
  *
  * \param config All available targets.
  *
+ * \param config All available targets.

Review Comment:
   nit: remove



##########
cmake/config.cmake:
##########
@@ -296,9 +296,6 @@ set(USE_VTA_FPGA OFF)
 # Whether use Thrust
 set(USE_THRUST OFF)
 
-# Whether use cuRAND

Review Comment:
   why this change?



##########
python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc:
##########
@@ -0,0 +1,76 @@
+/*
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+*/
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C"
+#endif
+    int

Review Comment:
   if possible could you write a docstring for this? could be helpful for folks 
to understand the parameters at this level, since C++ is often a familiar 
starting point



##########
python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc:
##########
@@ -0,0 +1,76 @@
+/*
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+*/
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C"
+#endif
+    int
+    my_ai_hw_conv2dnchw(float* ifmap, float* weights, float* result, int oc, 
int iw, int ih, int ic,
+                        int kh, int kw) {
+
+  int kw_low = kw / 2;
+  int kh_low = kh / 2;
+  int kw_high = iw + kw / 2;
+  int kh_high = ih + kh / 2;
+
+  int padded_iw = iw + 2 * kw_low;
+  int padded_ih = ih + 2 * kh_low;
+
+  float* pad_temp = (float*)malloc(
+      (((ic * padded_iw * padded_ih) + (padded_ih * padded_iw)) + padded_iw) * 
sizeof(float));
+
+  if (pad_temp == NULL) {
+    return -1;
+  }
+
+  for (int i1 = 0; i1 < ic; ++i1) {
+    for (int i2 = 0; i2 < padded_ih; ++i2) {
+      for (int i3 = 0; i3 < padded_iw; ++i3) {
+        ((float*)pad_temp)[(((i1 * padded_iw * padded_ih) + (i2 * padded_iw)) 
+ i3)] =
+            (((((kh_low <= i2) && (i2 < kh_high)) && (kw_low <= i3)) && (i3 < 
kw_high))
+                 ? ifmap[((((i1 * iw * ih) + ((i2 - kh_low) * iw)) + i3 - 
kw_low))]
+                 : 0.000000e+00f);
+      }
+    }
+  }
+  for (int i11 = 0; i11 < oc; ++i11) {
+    for (int i21 = 0; i21 < ih; ++i21) {
+      for (int i31 = 0; i31 < iw; ++i31) {
+        for (int i4 = 0; i4 < ic; ++i4) {
+          for (int i5 = 0; i5 < kh; ++i5) {
+            for (int i6 = 0; i6 < kw; ++i6) {
+              int cse_var_1 = (((i11 * iw * ih) + (i21 * iw)) + i31);
+              if (((i4 == 0) && (i5 == 0)) && (i6 == 0)) {
+                result[cse_var_1] = 0.000000e+00f;
+              }
+              result[cse_var_1] =
+                  (result[cse_var_1] +
+                   (((float*)
+                         pad_temp)[i4 * padded_iw * padded_ih + (i21 + i5) * 
padded_iw + i31 + i6] *
+                    weights[((((i11 * ic * kh * kw) + (i4 * kh * kw)) + (i5 * 
kw)) + i6)]));
+            }
+          }
+        }
+      }
+    }
+  }
+  free(pad_temp);
+  return 0;
+}

Review Comment:
   nit: newline at end of file



##########
python/tvm/relay/backend/contrib/uma/_template/passes.py:
##########
@@ -0,0 +1,137 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Transform passes for the my_ai_hw accelerator"""
+
+import tvm
+from tvm import relay, tir
+from tvm.relay.backend.contrib.uma.api.utils import add_llvm_to_block
+
+
+@tvm.tir.transform.prim_func_pass(opt_level=2)
+class MyAiHwConv2dPass:
+    def transform_function(
+        self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: 
tvm.ir.transform.PassContext
+    ) -> tvm.tir.PrimFunc:
+        return self._my_ai_hw_conv2d_pass(func, mod, ctx)
+
+    @staticmethod

Review Comment:
   what's the benefit of declaring this as staticmethod?



##########
python/tvm/relay/backend/contrib/uma/_template/backend.py:
##########
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""UMA backend for the my_ai_hw accelerator"""
+from .passes import MyAiHwConv2dPass
+from ..api.utils import PassPhase
+from ..backend import UMABackend
+from .codegen import gen_includes, gen_replace_call_extern
+from .patterns import conv2d_pattern
+
+
+class MyAiHwBackend(UMABackend):
+    """UMA backend for the MyAiHw accelerator."""
+
+    def __init__(self):
+        super().__init__()
+
+        #######################################################################
+        # Target configuration
+        #######################################################################
+        self._register_target_attr("dimension")
+
+        #######################################################################
+        # Relay to Relay function registration
+        #######################################################################
+        self._register_pattern("conv2d", conv2d_pattern())
+
+        #######################################################################
+        # Relay to TIR function registration
+        #######################################################################
+        self._register_tir_pass(PassPhase.TIR_PHASE_0, MyAiHwConv2dPass())

Review Comment:
   do we need to make the phase numbers part of an enum, or should it be more 
like
   ```
   self._register_tir_pass(PassPhase.TIR_PHASE, 0, MyAiHwConv2dPass())
   ```



##########
python/tvm/relay/backend/contrib/uma/_template/conv2dnchw.cc:
##########
@@ -0,0 +1,76 @@
+/*
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+*/
+#include <stdlib.h>
+
+#ifdef __cplusplus
+extern "C"
+#endif
+    int
+    my_ai_hw_conv2dnchw(float* ifmap, float* weights, float* result, int oc, 
int iw, int ih, int ic,
+                        int kh, int kw) {
+
+  int kw_low = kw / 2;
+  int kh_low = kh / 2;
+  int kw_high = iw + kw / 2;
+  int kh_high = ih + kh / 2;
+
+  int padded_iw = iw + 2 * kw_low;
+  int padded_ih = ih + 2 * kh_low;
+
+  float* pad_temp = (float*)malloc(

Review Comment:
   should this be `TVMBackendAllocWorkspace`? or do we intend to show use of a 
standard memory allocator here?



##########
python/tvm/relay/backend/contrib/uma/_template/passes.py:
##########
@@ -0,0 +1,137 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Transform passes for the my_ai_hw accelerator"""
+
+import tvm
+from tvm import relay, tir
+from tvm.relay.backend.contrib.uma.api.utils import add_llvm_to_block
+
+
+@tvm.tir.transform.prim_func_pass(opt_level=2)
+class MyAiHwConv2dPass:
+    def transform_function(

Review Comment:
   suggest docstring here since this is an example



##########
python/tvm/relay/backend/contrib/uma/_template/passes.py:
##########
@@ -0,0 +1,137 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Transform passes for the my_ai_hw accelerator"""
+
+import tvm
+from tvm import relay, tir
+from tvm.relay.backend.contrib.uma.api.utils import add_llvm_to_block
+
+
+@tvm.tir.transform.prim_func_pass(opt_level=2)
+class MyAiHwConv2dPass:
+    def transform_function(
+        self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: 
tvm.ir.transform.PassContext
+    ) -> tvm.tir.PrimFunc:
+        return self._my_ai_hw_conv2d_pass(func, mod, ctx)
+
+    @staticmethod
+    def _my_ai_hw_conv2d_pass(func, mod, ctx):
+        _found_blocks = []
+        _loops = dict()
+        _handles = []
+        _entry_node = None
+        _external_function_name = "my_ai_hw_conv2dnchw"
+        _tvm_block_match_name = "conv2d_nchw"
+
+        def _has_block(name: str, func) -> bool:
+            """
+            Determine of a tir.block with `name` exists in `func`
+            """
+
+            def _hb(op):
+                if isinstance(op, tvm.tir.Block):
+                    _found_blocks.append(op.name_hint)
+
+            _found_blocks = []
+            tvm.tir.stmt_functor.post_order_visit(func.body, _hb)
+            return name in _found_blocks
+
+        def _transform_function(
+            func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: 
tvm.ir.transform.PassContext
+        ) -> tvm.tir.PrimFunc:
+            def _replace_conv2d(op):
+                if op == _entry_node:
+                    irb = tvm.tir.ir_builder.create()
+                    # Collection of buffer address
+                    buffers = [b[1].data for b in _handles]
+                    # extraction of loop offsets
+                    for k, v in _loops.items():
+                        assert v.min.value == 0
+                    offset_order = ["co", "w", "h", "ci", "kh", "kw"]
+                    offsets = [_loops[i].extent.value for i in offset_order]
+                    args = buffers + offsets
+                    external_call = tvm.tir.Evaluate(
+                        tir_call(irb, True, _external_function_name, *args)
+                    )
+                    mac_calls = tvm.tir.SeqStmt([external_call])
+                    irb.emit(mac_calls)
+                    irb_result = irb.get()
+                    return irb_result
+                return op
+
+            sch = tir.Schedule(func)
+
+            if _has_block(_tvm_block_match_name, func):
+                conv2d_block = sch.get_block(_tvm_block_match_name)
+                rv_loops = sch.get_loops(conv2d_block)
+                assert len(rv_loops) == 7
+                loops = dict(
+                    n=rv_loops[0],
+                    co=rv_loops[1],
+                    h=rv_loops[2],
+                    w=rv_loops[3],
+                    ci=rv_loops[4],
+                    kh=rv_loops[5],
+                    kw=rv_loops[6],
+                )
+                _entry_node = sch.get(rv_loops[1])
+                _loops = {k: sch.get(v) for k, v in loops.items()}
+                _handles = func.buffer_map.items()
+
+                x = tvm.tir.stmt_functor.ir_transform(func.body, None, 
_replace_conv2d, ["tir.For"])
+                return func.with_body(x)
+            else:
+                return func
+
+        r = _transform_function(func, mod, ctx)
+        return r
+
+
+def tir_call(ib: tvm.tir.ir_builder, extern: bool, name: str, *args):
+    """
+    ib: ir_builder
+    extern: bool
+        True  --> tvm.tir.call_extern
+        False --> tvm.tir.call_packed
+    name: str
+        function name
+    *args:
+        arguments for function call
+    """
+
+    def buf_from_array(ib, arr, dtype):
+        # Allocate enough memory to store the whole array
+        var = ib.allocate("int32", (len(arr),), scope="global")
+        for i, v in enumerate(arr):
+            var[i] = v
+        # Declare a buffer, which is basically a view on the chunk of memory 
that we allocated previously
+        buf = tvm.tir.decl_buffer((len(arr),), dtype, data=var, scope="global")

Review Comment:
   should we use arr.shape?



##########
python/tvm/relay/backend/contrib/uma/_template/passes.py:
##########
@@ -0,0 +1,137 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Transform passes for the my_ai_hw accelerator"""
+
+import tvm
+from tvm import relay, tir
+from tvm.relay.backend.contrib.uma.api.utils import add_llvm_to_block
+
+
+@tvm.tir.transform.prim_func_pass(opt_level=2)
+class MyAiHwConv2dPass:
+    def transform_function(
+        self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: 
tvm.ir.transform.PassContext
+    ) -> tvm.tir.PrimFunc:
+        return self._my_ai_hw_conv2d_pass(func, mod, ctx)
+
+    @staticmethod
+    def _my_ai_hw_conv2d_pass(func, mod, ctx):
+        _found_blocks = []
+        _loops = dict()
+        _handles = []
+        _entry_node = None
+        _external_function_name = "my_ai_hw_conv2dnchw"

Review Comment:
   this one and the one below could be a class-level constant, e.g. 
`_EXTERNAL_FUNCTION_NAME = ...`



##########
python/tvm/relay/backend/contrib/uma/_template/run.py:
##########
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
+
+from tvm.testing.aot import compile_and_run, AOTTestModel, AOTTestRunner
+
+import tvm
+from tvm import relay
+from tvm.relay.backend.contrib.uma._template.backend import MyAiHwBackend
+from tvm.relay import transform
+from collections import OrderedDict
+
+import numpy as np
+import tarfile
+from pathlib import Path
+import onnx
+
+from tvm.testing.aot import (
+    AOTTestModel,
+    AOTTestRunner,
+    generate_ref_data,
+    compile_and_run,
+)
+
+
+def create_conv2d(groups=1, test_runner=AOT_DEFAULT_RUNNER, weight_shape=32):

Review Comment:
   is it possible to use this with Project API rather than AOT_DEFAULT_RUNNER? 
e.g. following `test_crt.py`.



##########
python/tvm/relay/backend/contrib/uma/_template/passes.py:
##########
@@ -0,0 +1,137 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Transform passes for the my_ai_hw accelerator"""
+
+import tvm
+from tvm import relay, tir
+from tvm.relay.backend.contrib.uma.api.utils import add_llvm_to_block
+
+
+@tvm.tir.transform.prim_func_pass(opt_level=2)
+class MyAiHwConv2dPass:
+    def transform_function(
+        self, func: tvm.tir.PrimFunc, mod: tvm.ir.IRModule, ctx: 
tvm.ir.transform.PassContext
+    ) -> tvm.tir.PrimFunc:
+        return self._my_ai_hw_conv2d_pass(func, mod, ctx)
+
+    @staticmethod
+    def _my_ai_hw_conv2d_pass(func, mod, ctx):
+        _found_blocks = []
+        _loops = dict()
+        _handles = []
+        _entry_node = None
+        _external_function_name = "my_ai_hw_conv2dnchw"
+        _tvm_block_match_name = "conv2d_nchw"
+
+        def _has_block(name: str, func) -> bool:

Review Comment:
   can `func` be typed?



##########
python/tvm/relay/backend/contrib/uma/uma_cli.py:
##########
@@ -0,0 +1,92 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+    UMA Command Line Interface (CLI)
+
+    Tool to create code skeletons for an easy integration of
+    new AI hardware accelerators/libraries into TVM using UMA
+"""
+
+import argparse
+import os
+import shutil
+import sys
+from inflection import camelize, underscore
+
+
+def _parse_args():
+    parser = argparse.ArgumentParser(description="UMA Interface command line 
interface")
+    parser.add_argument(
+        "--add_hardware",
+        type=str,
+        required=True,
+    )
+    parser.add_argument(
+        "--tutorial",
+        type=str,
+    )
+    args = parser.parse_args()
+    return args
+
+
+def replace_template_name(
+    files: list, template_name: str, add_hw_name: str, template_source: str = 
"_template"
+) -> None:
+    """
+    Replace names in template skeleton code by new name
+    """
+    for f in files:
+        with open(f) as read_file:
+            data = read_file.read()
+        for case in [underscore, camelize]:
+            data = data.replace(case(template_name), case(add_hw_name))
+        data = data.replace(template_source, underscore(add_hw_name))
+        with open(f, "w") as write_file:
+            write_file.write(data)
+
+
+def main():

Review Comment:
   just also wondering if this can be integrated with tvmc instead?



##########
python/tvm/relay/backend/contrib/uma/api/codegen.py:
##########
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Codegen base class of the Universal Modular Accelerator Interface (UMA)"""
+
+from typing import Callable
+import tvm
+
+
+class UMACodegen(object):
+    """
+    Codegen base class of the Universal Modular Accelerator Interface (UMA)
+    """
+
+    def __init__(self, target_name: str) -> None:
+        self.target_name = target_name
+
+    def _register_codegen(self, fmt: str = "c", **kwargs) -> None:
+        if fmt == "c":
+            self._register_c_codegen(**kwargs)
+        else:
+            raise RuntimeError(f'Unsupported codegen format "{fmt}"')
+
+    def _register_c_codegen(
+        self,
+        includes: Callable[[], str] = None,
+        replace_call_extern: Callable[[tvm.ir.container.Array], str] = None,
+    ) -> None:
+        if includes is not None:
+            tvm._ffi.register_func(
+                
"relay.ext.uma.codegen_c_includes_{}".format(self.target_name), includes

Review Comment:
   suggest f-string e.g. `f"relay.ext.uma.codegen_c_includes{self.target_name}"`



##########
src/relay/backend/contrib/uma/targets.cc:
##########
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file relay/backend/contrib/uma/targets.cc
+ *
+ * \brief this file contains the targets for the Universal Modular Accelerator 
Interface (UMA).
+ */
+
+#include <tvm/relay/transform.h>
+#include <tvm/target/target.h>
+
+namespace tvm {
+
+namespace relay {
+namespace contrib {
+namespace uma {
+tvm::transform::Pass RelayToTIR(String target_name);
+runtime::Module TIRToRuntime(IRModule mod, Target target);
+}  // namespace uma
+}  // namespace contrib
+}  // namespace relay
+
+TVM_REGISTER_GLOBAL("relay.backend.contrib.uma.RegisterTarget")
+    .set_body_typed([](String target_name, Map<String, ObjectRef> 
attr_options) {
+      auto target_kind =
+          ::tvm::TargetKindRegEntry::RegisterOrGet(target_name)
+              .set_name()
+              .set_device_type(kDLCPU)
+              .add_attr_option<Array<String>>("keys")
+              .add_attr_option<String>("tag")
+              .add_attr_option<String>("device")
+              .add_attr_option<String>("model")
+              .add_attr_option<Array<String>>("libs")
+              .add_attr_option<Target>("host")
+              .add_attr_option<Integer>("from_device")
+              .set_attr<FTVMRelayToTIR>(tvm::attr::kRelayToTIR,
+                                        
relay::contrib::uma::RelayToTIR(target_name))
+              .set_attr<FTVMTIRToRuntime>("TIRToRuntime", 
relay::contrib::uma::TIRToRuntime);
+
+      for (auto& attr_option : attr_options) {
+        try {
+          target_kind.add_attr_option<String>(attr_option.first,
+                                              
Downcast<String>(attr_option.second));
+          continue;
+        } catch (...) {

Review Comment:
   rather than catch, can you just attempt to downcast e.g. 
   
   ```
   if (String s = attr_option.as<String>() != nullptr) {
     target_kind.add_attr_option<String>(attr_option.first, s);
   } ...
   ```
   etc



##########
tests/python/contrib/test_uma/test_partition.py:
##########
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+
+import tvm
+
+from tvm.relay.backend.contrib.uma.api import UMAPartitioner
+from tvm.relay.op.contrib.register import get_pattern_table
+from tvm.relay.testing import resnet, mlp
+
+
+def test_partition_table():
+    partitioner = UMAPartitioner("test_partition")
+    assert get_pattern_table("test_partition") is None
+
+    partitioner.register()
+
+    assert get_pattern_table("test_partition") is not None
+
+
+@pytest.mark.parametrize(
+    "workload,backend,merge,expected_partitions",
+    [
+        ("resnet", "dnnl", False, 17),
+        ("resnet", "dnnl", True, 17),

Review Comment:
   possible to express expected_partitions as something slightly easier to 
port, in case these models change? e.g. expected_partition_layers and then 
compute the # of expected partitions by looking for layers named that in the 
model and asserting there is at least one?



##########
src/relay/backend/contrib/uma/relay_to_tir.cc:
##########
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file relay/backend/contrib/uma/codegen.cc
+ *
+ * \brief this file contains the target hooks for the Universal Modular 
Accelerator Interface (UMA).
+ */
+
+#include <tvm/ir/error.h>
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/attrs/annotation.h>
+#include <tvm/relay/expr.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/target/target.h>
+#include <tvm/tir/function.h>
+
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace uma {
+
+/*!
+ * \brief This mutator outlines functions that are marked with a named
+ * "Compiler" attribute. Functions that do not match this condition remain
+ * unaltered.
+ */
+class OutlineCompilerFunctionsMutator : public MixedModeMutator {

Review Comment:
   i think this already exists in ethosu, can we factor it out instead of 
duplicating?



##########
python/tvm/relay/backend/contrib/uma/tutorial.md:
##########
@@ -0,0 +1,195 @@
+<!--- Licensed to the Apache Software Foundation (ASF) under one -->
+<!--- or more contributor license agreements.  See the NOTICE file -->
+<!--- distributed with this work for additional information -->
+<!--- regarding copyright ownership.  The ASF licenses this file -->
+<!--- to you under the Apache License, Version 2.0 (the -->
+<!--- "License"); you may not use this file except in compliance -->
+<!--- with the License.  You may obtain a copy of the License at -->
+
+<!---   http://www.apache.org/licenses/LICENSE-2.0 -->
+
+<!--- Unless required by applicable law or agreed to in writing, -->
+<!--- software distributed under the License is distributed on an -->
+<!--- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -->
+<!--- KIND, either express or implied.  See the License for the -->
+<!--- specific language governing permissions and limitations -->
+<!--- under the License. -->
+
+Making your hardware accelerator TVM-ready with UMA 

Review Comment:
   could this be a rst tutorial in gallery/?



##########
src/relay/backend/contrib/uma/relay_to_tir.cc:
##########
@@ -0,0 +1,174 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file relay/backend/contrib/uma/codegen.cc
+ *
+ * \brief this file contains the target hooks for the Universal Modular 
Accelerator Interface (UMA).
+ */
+
+#include <tvm/ir/error.h>
+#include <tvm/relay/analysis.h>
+#include <tvm/relay/attrs/annotation.h>
+#include <tvm/relay/expr.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/target/target.h>
+#include <tvm/tir/function.h>
+
+#include <unordered_map>
+#include <unordered_set>
+#include <utility>
+#include <vector>
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace uma {
+
+/*!
+ * \brief This mutator outlines functions that are marked with a named
+ * "Compiler" attribute. Functions that do not match this condition remain
+ * unaltered.
+ */
+class OutlineCompilerFunctionsMutator : public MixedModeMutator {
+ public:
+  explicit OutlineCompilerFunctionsMutator(const IRModule& mod, const 
std::string& compiler_name)
+      : mod_(mod), compiler_name_(compiler_name) {}
+
+  Expr VisitExpr_(const LetNode* op) final {
+    auto pre_visit = [this](const LetNode* op) {
+      Expr var = this->VisitExpr(op->var);
+      Expr value = this->VisitExpr(op->value);
+
+      // Outlineable function no longer needs let binding
+      if (this->CanOutlineExpr(value)) {
+        this->memo_[var] = value;
+      }
+    };
+    auto post_visit = [this](const LetNode* op) {
+      // Rely on the Memoizer to cache pre-visit values
+      Expr value = this->VisitExpr(op->value);
+      Expr body = this->VisitExpr(op->body);
+      auto expr = GetRef<Expr>(op);
+
+      // Drop the let binding
+      if (this->CanOutlineExpr(value)) {
+        this->memo_[expr] = this->VisitExpr(op->body);
+      } else {
+        Var var = Downcast<Var>(this->VisitExpr(op->var));
+        if (var.same_as(op->var) && value.same_as(op->value) && 
body.same_as(op->body)) {
+          this->memo_[expr] = expr;
+        } else {
+          this->memo_[expr] = Let(var, value, body);
+        }
+      }
+    };
+    ExpandANormalForm(op, pre_visit, post_visit);
+    return memo_[GetRef<Expr>(op)];
+  }
+
+  Expr Rewrite_(const CallNode* pre, const Expr& post) override {
+    Call call = Downcast<Call>(post);
+    if (CanOutlineExpr(call->op)) {
+      Function func = Downcast<Function>(call->op);
+      auto gv_name = func->GetAttr<String>("global_symbol").value_or("");
+      ICHECK_NE(gv_name, "")
+          << "Function to be outlined must have global_symbol attribute, but 
didn't.";
+      GlobalVar gv(gv_name);
+      if (func->checked_type_.defined()) {
+        gv->checked_type_ = func->checked_type();
+      }
+      mod_->Update(gv, func);
+      return Call(gv, call->args, call->attrs, call->type_args);
+    }
+    return post;
+  }
+
+ private:
+  /*!
+   * \brief Check if the expr is a function and has the same
+   * compiler name as compiler_name_.
+   *
+   * \param expr The input expr.
+   * \return True if is outlineable else False.
+   */
+  bool CanOutlineExpr(const Expr& expr) {
+    if (!expr->IsInstance<FunctionNode>()) {
+      return false;
+    }
+    Function func = Downcast<Function>(expr);
+    auto compiler = func->GetAttr<String>(attr::kCompiler);
+    if (!compiler.defined()) {
+      return false;
+    }
+    if (compiler != compiler_name_) {
+      return false;
+    }
+    return true;
+  }
+
+  /*! \brief The module that the pass will run on. */
+  IRModule mod_;
+  /*! \brief The name of the compiler to enable outlining on external 
functions for. */
+  std::string compiler_name_;
+};
+
+/*!
+ * \brief A pass to outline compiler specific functions.
+ */
+tvm::transform::Pass OutlineCompilerFunctions(const std::string& 
compiler_name) {
+  runtime::TypedPackedFunc<IRModule(IRModule, transform::PassContext)> 
pass_func =
+      [=](IRModule mod, transform::PassContext ctx) {
+        GlobalVar gv = mod->GetGlobalVar("main");
+        Function main_func = Downcast<Function>(mod->Lookup("main"));
+        auto new_main_body =
+            OutlineCompilerFunctionsMutator(mod, 
compiler_name).VisitExpr(main_func->body);
+        if (!new_main_body.same_as(main_func->body)) {
+          Function new_main_func = WithFields(main_func, main_func->params, 
new_main_body);
+          mod->Update(gv, new_main_func);
+        }
+        return mod;
+      };
+  return tvm::transform::CreateModulePass(pass_func, 0,
+                                          
"relay.backend.contrib.uma.OutlineCompilerFunctions", {});
+}
+
+TVM_REGISTER_GLOBAL("relay.ext.uma.OutlineCompilerFunctions")
+    .set_body_typed(OutlineCompilerFunctions);
+
+/*!
+ * \brief This pass will lower NPU functions in a Relay module to scheduled 
TIR prim functions.

Review Comment:
   nit: fix comment



##########
tests/python/contrib/test_uma/test_partition.py:
##########
@@ -0,0 +1,71 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+
+import tvm
+
+from tvm.relay.backend.contrib.uma.api import UMAPartitioner
+from tvm.relay.op.contrib.register import get_pattern_table
+from tvm.relay.testing import resnet, mlp
+
+
+def test_partition_table():
+    partitioner = UMAPartitioner("test_partition")
+    assert get_pattern_table("test_partition") is None
+
+    partitioner.register()
+
+    assert get_pattern_table("test_partition") is not None
+
+
+@pytest.mark.parametrize(
+    "workload,backend,merge,expected_partitions",
+    [
+        ("resnet", "dnnl", False, 17),
+        ("resnet", "dnnl", True, 17),
+        ("mlp", "dnnl", False, 1),
+        ("resnet", "cutlass", False, 2),
+        ("resnet", "cutlass", True, 2),
+        ("mlp", "cutlass", False, 4),
+        ("mlp", "cutlass", True, 2),
+    ],
+)
+def test_existing_pattern_tables(workload, backend, merge, 
expected_partitions):
+    partitioner = UMAPartitioner(backend + "_uma", merge)
+    pattern_table = get_pattern_table(backend)
+
+    for entry in pattern_table:
+        partitioner.add_pattern(*entry)
+
+    if workload == "resnet":
+        net = resnet.get_net(1, 10)
+    elif workload == "mlp":
+        net = mlp.get_net(1, 10)

Review Comment:
   add
   ```
   else:
     assert False, f"don't know how to find workload for {workload}"
   ```



##########
python/tvm/relay/backend/contrib/uma/_template/backend.py:
##########
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one

Review Comment:
   i think maybe we could fix this with a dir-level comment in an 
`__init__.py`. did you guys exclude this file to avoid folks importing from 
`_template`?



##########
src/relay/backend/contrib/uma/tir_to_runtime.cc:
##########
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <cmath>
+#include <fstream>
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "../../../../runtime/file_utils.h"
+#include "../../../../target/source/codegen_c.h"
+#include "../../../../target/source/codegen_c_host.h"
+
+namespace tvm {
+using namespace tir;
+namespace relay {
+namespace contrib {
+namespace uma {
+
+class UMACodegen : public codegen::CodeGenCHost {
+ public:
+  explicit UMACodegen(String target_str) : target_str_(target_str) {}
+
+  void Init(bool output_ssa, bool emit_asserts) {
+    auto includes_pf =
+        tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_includes_" + 
target_str_);
+    ICHECK(includes_pf);
+    String includes = (*includes_pf)();
+    decl_stream << includes;
+    std::unordered_set<std::string> devices;
+    devices.insert(target_str_);
+    CodeGenCHost::Init(output_ssa, emit_asserts, target_str_, devices);
+  }
+
+  /*!
+   * \brief Emit code that offloads a subgraph to the UMA target
+   *
+   * \return string of code that offloads a subgraph to the UMA target
+   */
+  void AddFunction(const PrimFunc& prim_func) { 
CodeGenC::AddFunction(prim_func); }
+
+ private:
+  String target_str_;
+
+  using codegen::CodeGenCHost::VisitStmt_;
+
+  /*!  * \brief Emits target specific APIs for every call_extern */
+  void VisitExpr_(const CallNode* op, std::ostream& os) final {
+    if (!op->op.same_as(builtin::call_extern())) {
+      CodeGenCHost::VisitExpr_(op, os);
+      return;
+    }
+    auto replace_call_extern_pf =

Review Comment:
   this might be a bit hacky (e.g. how many PackedFuncs do we have to register 
per codegen?). possible to leverage ExprVisitor instead?



##########
python/tvm/relay/backend/contrib/uma/_template/run.py:
##########
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
+
+from tvm.testing.aot import compile_and_run, AOTTestModel, AOTTestRunner
+
+import tvm
+from tvm import relay
+from tvm.relay.backend.contrib.uma._template.backend import MyAiHwBackend
+from tvm.relay import transform
+from collections import OrderedDict
+
+import numpy as np
+import tarfile
+from pathlib import Path
+import onnx
+
+from tvm.testing.aot import (
+    AOTTestModel,
+    AOTTestRunner,
+    generate_ref_data,
+    compile_and_run,
+)
+
+
+def create_conv2d(groups=1, test_runner=AOT_DEFAULT_RUNNER, weight_shape=32):
+    dtype = "float32"
+    ishape = (1, 32, 14, 14)
+    wshape = (32, weight_shape, 3, 3)
+    pass_config = {"tir.usmp.enable": True}
+    test_runner = AOTTestRunner(
+        makefile=test_runner.makefile,
+        prologue=test_runner.prologue,
+        epilogue=test_runner.epilogue,
+        includes=test_runner.includes,
+        parameters=test_runner.parameters,
+        pass_config=pass_config,
+    )
+    data0 = relay.var("data", shape=ishape, dtype=dtype)
+    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
+    out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), 
groups=groups)
+    main_f = relay.Function([data0, weight0], out)
+    mod = tvm.IRModule()
+    mod["main"] = main_f
+    mod = transform.InferType()(mod)
+    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
+    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)
+    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
+    output_list = generate_ref_data(mod, inputs)
+    return mod, inputs, output_list, test_runner
+
+
+def main():
+    mod, inputs, output_list, test_runner = create_conv2d()
+
+    uma_backend = MyAiHwBackend()
+    uma_backend.register()
+    mod = uma_backend.partition(mod)
+    target = tvm.target.Target("my_ai_hw", host=tvm.target.Target("c"))
+
+    export_directory = tvm.contrib.utils.tempdir(keep_for_debug=True).path
+    print(f"Generated files are in {export_directory}")
+    compile_and_run(
+        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
+        test_runner,
+        interface_api="c",
+        use_unpacked_api=True,
+        target=target,
+        test_dir=str(export_directory),
+    )
+
+
+if __name__ == "__main__":
+    main()

Review Comment:
   is it possible to use this from `tvmc`? just curious whether you need this 
script or not



##########
src/relay/backend/contrib/uma/tir_to_runtime.cc:
##########
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+#include <cmath>
+#include <fstream>
+#include <map>
+#include <sstream>
+#include <string>
+#include <vector>
+
+#include "../../../../runtime/file_utils.h"
+#include "../../../../target/source/codegen_c.h"
+#include "../../../../target/source/codegen_c_host.h"
+
+namespace tvm {
+using namespace tir;
+namespace relay {
+namespace contrib {
+namespace uma {
+
+class UMACodegen : public codegen::CodeGenCHost {
+ public:
+  explicit UMACodegen(String target_str) : target_str_(target_str) {}
+
+  void Init(bool output_ssa, bool emit_asserts) {
+    auto includes_pf =
+        tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_includes_" + 
target_str_);
+    ICHECK(includes_pf);
+    String includes = (*includes_pf)();
+    decl_stream << includes;
+    std::unordered_set<std::string> devices;
+    devices.insert(target_str_);
+    CodeGenCHost::Init(output_ssa, emit_asserts, target_str_, devices);
+  }
+
+  /*!
+   * \brief Emit code that offloads a subgraph to the UMA target
+   *
+   * \return string of code that offloads a subgraph to the UMA target
+   */
+  void AddFunction(const PrimFunc& prim_func) { 
CodeGenC::AddFunction(prim_func); }
+
+ private:
+  String target_str_;
+
+  using codegen::CodeGenCHost::VisitStmt_;
+
+  /*!  * \brief Emits target specific APIs for every call_extern */
+  void VisitExpr_(const CallNode* op, std::ostream& os) final {
+    if (!op->op.same_as(builtin::call_extern())) {
+      CodeGenCHost::VisitExpr_(op, os);
+      return;
+    }
+    auto replace_call_extern_pf =
+        
tvm::runtime::Registry::Get("relay.ext.uma.codegen_c_replace_call_extern_" + 
target_str_);
+    if (replace_call_extern_pf == nullptr) {
+      CodeGenCHost::VisitExpr_(op, os);
+    } else {
+      // - funtion type (void) still gets printed before CallNode if extern 
call is wrapped in
+      // EvaluateNode
+      // - VarNode arguments might have "wrong" name_hints. The correct 
variable name is determined
+      // in C++ through GetVarID
+      String api_string = (*replace_call_extern_pf)(op->args);
+      os << api_string;
+    }
+    return;
+  }
+};
+
+runtime::Module TIRToRuntime(IRModule mod, Target target) {
+  bool output_ssa = false;
+  bool emit_asserts = false;
+  UMACodegen codegen(target->kind->name);
+  Array<String> function_names;
+  codegen.Init(output_ssa, emit_asserts);
+  for (auto kv : mod->functions) {
+    auto prim_func = Downcast<PrimFunc>(kv.second);
+    auto global_symbol = prim_func->GetAttr<String>(tvm::attr::kGlobalSymbol);
+    function_names.push_back(global_symbol.value());
+    codegen.AddFunction(prim_func);
+  }
+  std::string code = codegen.Finish();
+  return codegen::CSourceModuleCreate(code, "c", function_names);

Review Comment:
   this is a great question. in C runtime, we're building out a facility to 
invoke accelerator drivers e.g. before/after the whole inference run and 
before/after each call to an accelerator-offloaded layer. in C++ runtime, it's 
assumed that any such setup is handled via an appropriate runtime::Module 
subclass either in GetFunction, constructor, or in a custom PackedFunc override 
returned from GetFunction. However, AOT doesn't currently support calling out 
to those functions yet so the c++ strategy is not quite ready to be discussed 
in this PR in detail.
   
   could you provide a bit more detail about the kinds of drivers you want to 
invoke?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[GitHub] [tvm] areusch commented on a diff in pull request #12087: [UMA] UMA v1.0

Reply via email to