(tvm) branch main updated: [Relax][ONNX] Expand op support for ONNX frontend (#17427)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 9fdb86d3f6 [Relax][ONNX] Expand op support for ONNX frontend (#17427) 9fdb86d3f6 is described below commit 9fdb86d3f6bccc41a772328b5b0442908bc9f9a9 Author: Siyuan Feng AuthorDate: Thu Oct 3 22:36:55 2024 +0800 [Relax][ONNX] Expand op support for ONNX frontend (#17427) * [Relax][ONNX] Expand op support for ONNX frontend This PR adds a variety of ONNX ops to the Relax frontend, including: - Acos - Acosh - And - Asin - Asinh - Atan - Atanh - BitwiseAnd - BitwiseOr - BitwiseXor - Ceil - ConcatFromSequence - ConvTranspose - Cosh - DepthToSpace - FastGelu - Floor - GlobalLpPool - GlobalMaxPool - GreaterOrEqual - IsInf - IsNaN - LeakyRelu - LogSoftmax - MaxUnpool - Mean - MeanVarianceNormalization - Mish - Or - PRelu - Round - Scatter - ScatterElements - Selu - SequenceAt - SequenceConstruct - SequenceEmpty - SequenceErase - SequenceInsert - SequenceLength - Shrink - Sinh - Size - Softplus - Softsign - SpaceToDepth - SplitToSequence - Tan - ThresholdedRelu - TopK - Unique - Xor Also remains a few ops that are not supported yet, see the commented out ops in the ONNX frontend. * lint * lint * lint * update for ci --- python/tvm/relax/frontend/onnx/onnx_frontend.py| 1302 python/tvm/relax/op/set.py |8 +- python/tvm/relax/transform/legalize_ops/nn.py |9 +- tests/python/relax/test_frontend_onnx.py | 664 -- tests/python/relax/test_relax_operators.py |2 +- .../python/relax/test_transform_legalize_ops_nn.py | 47 + 6 files changed, 1617 insertions(+), 415 deletions(-) diff --git a/python/tvm/relax/frontend/onnx/onnx_frontend.py b/python/tvm/relax/frontend/onnx/onnx_frontend.py index 462d1cf92c..5777f51fe2 100644 --- a/python/tvm/relax/frontend/onnx/onnx_frontend.py +++ b/python/tvm/relax/frontend/onnx/onnx_frontend.py @@ -34,14 +34,15 @@ If this fails, there may still be dynamic operations in the model. Not all TVM kernels currently support dynamic shapes, please file an issue on github.com/apache/tvm/issues if you hit an error with dynamic kernels. """ +import math import warnings -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Union import numpy as _np import onnx.onnx_ml_pb2 import tvm -from tvm import relax, tir, topi +from tvm import TVMError, relax, tir, topi from tvm.ir import IRModule from tvm.ir.supply import NameSupply from tvm.tir.generic import cast @@ -236,28 +237,176 @@ class MatMul(OnnxOpConverter): return relax.op.matmul(inputs[0], inputs[1]) -class Div(OnnxOpConverter): -"""Converts an onnx Div node into an equivalent Relax expression.""" +class BinaryBase(OnnxOpConverter): +"""Converts an onnx BinaryBase node into an equivalent Relax expression.""" + +numpy_op: Callable = None +relax_op: Callable = None @classmethod -def _impl_v14(cls, bb, inputs, attr, params): +def _impl_v1(cls, bb, inputs, attr, params): +if cls.numpy_op is None or cls.relax_op is None: +raise ValueError("Numpy and Relax operators must be defined for BinaryBase.") if all([isinstance(inp, relax.Constant) for inp in inputs]): -output = inputs[0].data.numpy() / inputs[1].data.numpy() +output = cls.numpy_op( # pylint: disable=not-callable +inputs[0].data.numpy(), inputs[1].data.numpy() +) return relax.const(output, inputs[0].struct_info.dtype) if any([isinstance(inp, relax.PrimValue) for inp in inputs]): x = ( -int(inputs[0].value) +_np.array(inputs[0].value) if isinstance(inputs[0], relax.PrimValue) else inputs[0].data.numpy() ) y = ( -int(inputs[1].value) +_np.array(inputs[0].value) if isinstance(inputs[1], relax.PrimValue) else inputs[1].data.numpy() ) -return relax.PrimValue(int(x / y)) +return relax.PrimValue(cls.numpy_op(x, y)) # pylint: disable=not-callable + +return cls.relax_op(inputs[0], inputs[1]) # pylint: disable=not-callable + + +class Add(BinaryBase): +"""Converts an onnx Add node into an equivalent Relax expression.""" + +nu
(tvm) branch main updated: [TVMScript][TIR] Add source kernel intetration via call_kernel (#17434)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new dc2c5a28c9 [TVMScript][TIR] Add source kernel intetration via call_kernel (#17434) dc2c5a28c9 is described below commit dc2c5a28c9132aa314cca237ffbe32e1bad8dd2a Author: Wuwei Lin AuthorDate: Thu Oct 3 06:50:45 2024 -0700 [TVMScript][TIR] Add source kernel intetration via call_kernel (#17434) * [TVMScript][TIR] Add source kernel intetration via call_kernel * lint * lint --- .../tvm/script/ir_builder/tir/external_kernel.py | 62 - tests/python/relax/test_tir_call_source_kernel.py | 100 + 2 files changed, 160 insertions(+), 2 deletions(-) diff --git a/python/tvm/script/ir_builder/tir/external_kernel.py b/python/tvm/script/ir_builder/tir/external_kernel.py index 8c2467fad3..405e1e6cbf 100644 --- a/python/tvm/script/ir_builder/tir/external_kernel.py +++ b/python/tvm/script/ir_builder/tir/external_kernel.py @@ -18,14 +18,16 @@ import json import logging import tempfile +from pathlib import Path from typing import Any, Dict, List, Tuple, Union from tvm import __version__ as tvm_version from tvm import tir -from tvm.runtime import Module, load_module +from tvm.runtime import Module, load_module, const +from tvm.contrib import nvcc -class BaseKernel: +class BaseKernel: # pylint: disable=too-few-public-methods """Base class for external kernels.""" def compile_to_device_module( @@ -91,6 +93,60 @@ class BaseKernel: return kernel_module +class SourceKernel(BaseKernel): # pylint: disable=too-few-public-methods +"""A kernel from source code.""" + +def __init__(self, source_code: str): +self.source_code = source_code + +def compile_to_device_module( # pylint: disable=arguments-differ +self, grid: List[List[Union[int, tir.PrimExpr]]], *args: List[Any], **kwargs: Dict[str, Any] +) -> Tuple[str, Module, List[Any]]: +"""Compile the kernel to a device module.""" +from tvm.relax.frontend.nn import SourceModule # pylint: disable=import-outside-toplevel + +kernel_name = kwargs["kernel_name"] +assert len(grid) == 2, ( +"grid should be two list of integers, representing the dimension of " +"['blockIdx.x', 'blockIdx.y', 'blockIdx.z'] and " +"['threadIdx.x', 'threadIdx.y', 'threadIdx.z']" +) +assert isinstance(grid[0], (list, tuple)) and isinstance(grid[1], (list, tuple)) +launch_param_tags = ["blockIdx.x", "blockIdx.y", "blockIdx.z"][: len(grid[0])] + [ +"threadIdx.x", +"threadIdx.y", +"threadIdx.z", +][: len(grid[1])] +runtime_args = [arg if hasattr(arg, "dtype") else const(arg) for arg in args] +kernel_arg_types = [arg.dtype for arg in runtime_args] +runtime_args = runtime_args + list(grid[0]) + list(grid[1]) + +# Reuse compilation path from SourceModule +compile_options = SourceModule.get_compile_options("cu") +source_code = self.source_code +try: +source_path = Path(source_code) +if source_path.is_file(): +with open(source_path, "r") as f: +source_code = f.read() +except: # pylint: disable=bare-except +pass + +with tempfile.TemporaryDirectory() as temp_dir: +ptx_path = f"{temp_dir}/{kernel_name}.ptx" +nvcc.compile_cuda( +source_code, target_format="ptx", options=compile_options, path_target=ptx_path +) +with open(ptx_path, "r") as f: +ptx = f.read() + +kernel_module = self._create_cuda_module( +ptx, kernel_arg_types, launch_param_tags, kernel_name +) + +return kernel_name, kernel_module, runtime_args + + def call_kernel( kernel, launch_args: List[Union[int, tir.PrimExpr, List[Union[int, tir.PrimExpr, @@ -123,6 +179,8 @@ def call_kernel( from .triton import TritonKernel # pylint: disable=import-outside-toplevel kernel = TritonKernel(kernel) +elif kernel_type == "builtins.str": +kernel = SourceKernel(kernel) else: raise ValueError("Unsupported kernel type {}".format(kernel_type)) diff --git a/tests/python/relax/test_tir_call_source_kernel.py b/tests/python/relax/test_tir_call_source_kernel.py new file mode 100644 index 00..9a877ad35f
(tvm) branch main updated: [KVCACHE] Improved schedule for prefill attention (#17432)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 79abc0356e [KVCACHE] Improved schedule for prefill attention (#17432) 79abc0356e is described below commit 79abc0356ee66f3dbdd8bde3cbfcbf88a2ed746e Author: krishnaraj36 AuthorDate: Thu Oct 3 19:20:58 2024 +0530 [KVCACHE] Improved schedule for prefill attention (#17432) * [KVCACHE] Improved schedule for prefill attention Improvements Added Tranpose to K for better Vectorization during Matmul. Improved Load Schedule. Improved a bit more than 2x is most cases. Llama-2 7B observation ---kernelbaseline--optimized- ---batch_prefill_ragged_kv15 ms-7.1 ms * Update kv_cache.py --- python/tvm/relax/frontend/nn/llm/kv_cache.py | 60 +++- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/python/tvm/relax/frontend/nn/llm/kv_cache.py b/python/tvm/relax/frontend/nn/llm/kv_cache.py index 9b16fc2fbf..fd866ae06c 100644 --- a/python/tvm/relax/frontend/nn/llm/kv_cache.py +++ b/python/tvm/relax/frontend/nn/llm/kv_cache.py @@ -925,8 +925,12 @@ def _attention_decode( THREAD_LIMIT = 512 TILE_SIZE_PER_BDX = 2 -if target.kind.name == "opencl" and "android" in str(target.host): -THREAD_LIMIT = 256 if H_kv < 8 else 512 +if target.kind.name == "opencl" and ( +("android" in str(target.host)) or ("adreno" in str(target.attrs)) +): +# Keeping lower thread limit for this kernel on adreno target +# to avoid register spill +THREAD_LIMIT = 256 TILE_SIZE_PER_BDX = 1 max_num_threads_per_block = get_max_num_threads_per_block(target) thread_limit = min(max_num_threads_per_block, THREAD_LIMIT) @@ -1570,7 +1574,11 @@ def _attention_prefill_ragged(h_kv, h_q, d, dtype, rope_scaling: Dict[str, Any], bdx = 32 num_warps = 4 -tile_x, tile_y, tile_z = 64 // ((DataType(dtype).bits + 7) // 8) // max(d // 128, 1), d, 16 +tile_x, tile_y, tile_z = ( +64 // ((DataType(dtype).bits + 7) // 8) // max(d // 128, 1), +d, +64 // ((DataType(dtype).bits + 7) // 8) // max(d // 128, 1), +) # Otherwise we would exceed maxComputeWorkgroupStorageSize if ( @@ -1580,6 +1588,12 @@ def _attention_prefill_ragged(h_kv, h_q, d, dtype, rope_scaling: Dict[str, Any], tile_z = 8 num_warps = 2 +if target.kind.name == "opencl" and ( +("android" in str(target.host)) or ("adreno" in str(target.attrs)) +): +LOAD_VEC = 16 // ((DataType(dtype).bits + 7) // 8) # 16 bytes +NUM_BLKS = group_size * 8 + # fmt: off @T.prim_func def batch_prefill_ragged_kv( # pylint: disable=too-many-branches @@ -1708,8 +1722,6 @@ def _attention_prefill_ragged(h_kv, h_q, d, dtype, rope_scaling: Dict[str, Any], for lz, ly in T.grid(tile_z, tile_y): with T.block("K_load"): i, j = T.axis.remap("SS", [lz, ly]) -T.reads() -T.writes() cur_L = L_kv_start + i if cur_L < kv_chunk_len[0]: K_smem[i, j] = T.if_then_else( @@ -1824,6 +1836,14 @@ def _attention_prefill_ragged(h_kv, h_q, d, dtype, rope_scaling: Dict[str, Any], # fmt: on # pylint: enable=line-too-long,too-many-branches sch = tir.Schedule(batch_prefill_ragged_kv) +get_extent = lambda *lps: [int(sch.get(lp).extent) for lp in lps] + +def get_vecsize(extent): +return min(LOAD_VEC, (extent & ~(extent - 1))) + +def getxy_vecsize(x, y, t): +assert (x * y) % t == 0 +return min(get_vecsize(y), get_vecsize(x * y // t)) def get_tile_size(x, y, t): cnt = (x * y) // t @@ -1837,26 +1857,37 @@ def _attention_prefill_ragged(h_kv, h_q, d, dtype, rope_scaling: Dict[str, Any], def apply_to_qkv_load(sch: tir.Schedule, block): loop_x, loop_y = sch.get_loops(block)[-2:] -loop = sch.fuse(loop_x, loop_y) -_, ty, tx, vec = sch.split( -loop, factors=[None, num_warps, bdx, LOAD_VEC], preserve_unit_iters=True -) +x_extent, y_extent = get_extent(loop_x, loop_y) +vec_size = getxy_vecsize(x_extent, y_extent, bdx * num_warps) +yo, yv = sch.split(loop_y, [None, vec_size]) +yo_extent = y_extent // vec_size +tile_x, tile_y = get_tile_size(x_extent, yo_extent,
(tvm) branch main updated: [CI] Upgrade CI (#17425)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 5298b1298a [CI] Upgrade CI (#17425) 5298b1298a is described below commit 5298b1298a8bb9166ef99dedef9979f2719c2416 Author: Masahiro Hiramori AuthorDate: Wed Oct 2 22:29:48 2024 +0900 [CI] Upgrade CI (#17425) * upgrade ci --- docker/Dockerfile.ci_arm | 12 +- docker/Dockerfile.ci_cortexm | 6 +- docker/Dockerfile.ci_cpu | 12 +- docker/Dockerfile.ci_gpu | 4 +- docker/Dockerfile.ci_hexagon | 4 +- docker/Dockerfile.ci_i386 | 2 +- docker/Dockerfile.ci_lint | 4 +- docker/Dockerfile.ci_minimal | 4 +- docker/Dockerfile.ci_riscv | 4 +- docker/Dockerfile.ci_wasm | 4 +- docker/Dockerfile.demo_android | 4 +- docker/Dockerfile.demo_rocm| 4 +- docker/Dockerfile.demo_vitis_ai| 4 +- docker/install/ubuntu2004_install_python.sh| 8 +- docker/install/ubuntu_install_cmake_source.sh | 32 +- docker/install/ubuntu_install_jax.sh | 18 +- docker/install/ubuntu_install_llvm_from_source.sh | 2 +- docker/install/ubuntu_install_python.sh| 54 +- docker/install/ubuntu_install_spike_sim.sh | 68 +-- docker/install/ubuntu_install_tensorflow.sh| 4 +- .../install/ubuntu_install_tensorflow_aarch64.sh | 4 +- docker/install/ubuntu_install_tflite.sh| 40 +- docker/install/ubuntu_install_verilator.sh | 18 +- docker/install/ubuntu_install_zephyr.sh| 6 +- docker/python/bootstrap/generate.sh| 9 +- .../python/bootstrap/lockfiles/constraints-3.9.txt | 588 + .../bootstrap/lockfiles/requirements-3.9.txt | 3 + docs/how_to/dev/setup_rpc_system.rst | 4 +- python/tvm/tir/schedule/schedule.py| 9 +- 29 files changed, 764 insertions(+), 171 deletions(-) diff --git a/docker/Dockerfile.ci_arm b/docker/Dockerfile.ci_arm index f18d95daac..2be887079e 100644 --- a/docker/Dockerfile.ci_arm +++ b/docker/Dockerfile.ci_arm @@ -53,10 +53,10 @@ ENV PATH /opt/sccache:$PATH COPY install/ubuntu2204_install_llvm.sh /install/ubuntu2204_install_llvm.sh RUN bash /install/ubuntu2204_install_llvm.sh -ENV TVM_VENV /venv/apache-tvm-py3.8 +ENV TVM_VENV /venv/apache-tvm-py3.9 COPY python/bootstrap/lockfiles /install/python/bootstrap/lockfiles COPY install/ubuntu_install_python.sh /install/ubuntu_install_python.sh -RUN bash /install/ubuntu_install_python.sh 3.8 +RUN bash /install/ubuntu_install_python.sh 3.9 ENV PATH ${TVM_VENV}/bin:$PATH ENV PYTHONNOUSERSITE 1 # Disable .local directory from affecting CI. @@ -71,14 +71,6 @@ RUN bash /install/ubuntu_install_tensorflow_aarch64.sh COPY install/ubuntu_install_tflite.sh /install/ubuntu_install_tflite.sh RUN bash /install/ubuntu_install_tflite.sh -# Caffe deps -COPY install/ubuntu_install_boost.sh /install/ubuntu_install_boost.sh -RUN bash /install/ubuntu_install_boost.sh - -# Caffe -COPY install/ubuntu_install_caffe.sh /install/ubuntu_install_caffe.sh -RUN bash /install/ubuntu_install_caffe.sh - # ONNX COPY install/ubuntu_install_onnx.sh /install/ubuntu_install_onnx.sh RUN bash /install/ubuntu_install_onnx.sh diff --git a/docker/Dockerfile.ci_cortexm b/docker/Dockerfile.ci_cortexm index 0a898e7058..8006b27e84 100644 --- a/docker/Dockerfile.ci_cortexm +++ b/docker/Dockerfile.ci_cortexm @@ -30,15 +30,15 @@ COPY install/ubuntu_install_core.sh /install/ubuntu_install_core.sh RUN bash /install/ubuntu_install_core.sh COPY install/ubuntu_install_cmake_source.sh /install/ubuntu_install_cmake_source.sh -RUN bash /install/ubuntu_install_cmake_source.sh 3.20.0 +RUN bash /install/ubuntu_install_cmake_source.sh 3.20.0 9c06b2ddf7c337e31d8201f6ebcd3bba86a9a033976a9aee207fe0c6971f4755 COPY install/ubuntu_install_googletest.sh /install/ubuntu_install_googletest.sh RUN bash /install/ubuntu_install_googletest.sh -ENV TVM_VENV /venv/apache-tvm-py3.8 +ENV TVM_VENV /venv/apache-tvm-py3.9 COPY python/bootstrap/lockfiles /install/python/bootstrap/lockfiles COPY install/ubuntu_install_python.sh /install/ubuntu_install_python.sh -RUN bash /install/ubuntu_install_python.sh 3.8 +RUN bash /install/ubuntu_install_python.sh 3.9 ENV PATH ${TVM_VENV}/bin:$PATH ENV PYTHONNOUSERSITE 1 # Disable .local directory from affecting CI. diff --git a/docker/Dockerfile.ci_cpu b/docker/Dockerfile.ci_cpu index 17344f7dac..37c7c90857 100644 --- a/docker/Dockerfile.ci_cpu +++ b/docker/Dockerfile.ci_cpu @@ -34,10 +34,10 @@ RUN bash /install
(tvm) branch main updated (e80801030e -> 7569148c3c)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git from e80801030e [DLIGHT][GPU] Improve matmul schedule for adreno (#17430) add 7569148c3c [Relax] Introduce static shape tuning pipeline (#17428) No new revisions were added by this update. Summary of changes: docs/how_to/tutorials/e2e_opt_model.py | 16 +- python/tvm/relax/pipeline.py| 39 + python/tvm/relax/transform/transform.py | 5 ++--- 3 files changed, 42 insertions(+), 18 deletions(-)
(tvm) branch main updated (d9ee6377cd -> e80801030e)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git from d9ee6377cd [Relax][PyTorch] Support neural network ops for ExportedProgram importer (#17426) add e80801030e [DLIGHT][GPU] Improve matmul schedule for adreno (#17430) No new revisions were added by this update. Summary of changes: python/tvm/dlight/gpu/matmul.py| 108 ++ tests/python/dlight/test_gpu_matmul.py | 196 - 2 files changed, 178 insertions(+), 126 deletions(-)
(tvm) branch main updated: [Relax][PyTorch] Support neural network ops for ExportedProgram importer (#17426)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new d9ee6377cd [Relax][PyTorch] Support neural network ops for ExportedProgram importer (#17426) d9ee6377cd is described below commit d9ee6377cdd8395b27385d2fc2745b741fad6183 Author: Masahiro Hiramori AuthorDate: Sun Sep 29 06:59:33 2024 +0900 [Relax][PyTorch] Support neural network ops for ExportedProgram importer (#17426) * support batchnorm2d and getitem * support addmm * support avg_pool2d * support baddbmm * support bmm * support conv_transpose1d * support conv_transpose2d * support conv1d * support conv3d * support einsum * support embedding * support group_norm * support layer_norm * support scaled_dot_product_attention * support unbind * support interpolate * fix lint error --- .../frontend/torch/base_fx_graph_translator.py | 464 .../frontend/torch/exported_program_translator.py | 111 ++ python/tvm/relax/frontend/torch/fx_translator.py | 482 +--- .../relax/test_frontend_from_exported_program.py | 1150 +++- 4 files changed, 1723 insertions(+), 484 deletions(-) diff --git a/python/tvm/relax/frontend/torch/base_fx_graph_translator.py b/python/tvm/relax/frontend/torch/base_fx_graph_translator.py index a41b9b6d4f..52784dc8c3 100644 --- a/python/tvm/relax/frontend/torch/base_fx_graph_translator.py +++ b/python/tvm/relax/frontend/torch/base_fx_graph_translator.py @@ -227,6 +227,228 @@ class BaseFXGraphImporter(metaclass=abc.ABCMeta): relax.op.nn.adaptive_avg_pool2d(x, output_size, layout="NCHW") ) +def _addmm(self, node: fx.Node) -> relax.Var: +x = self.env[node.args[0]] +y = self.env[node.args[1]] +z = self.env[node.args[2]] +alpha = node.kwargs.get("alpha", 1) +beta = node.kwargs.get("beta", 1) + +res = None +if alpha != 0: +res = self.block_builder.emit(relax.op.linear_algebra.matmul(y, z, out_dtype="float32")) +if alpha != 1: +dtype = res.struct_info.dtype +res = self.block_builder.emit(relax.op.multiply(res, relax.const(alpha, dtype))) +if beta != 0: +dtype = x.struct_info.dtype +if beta != 1: +bias = self.block_builder.emit(relax.op.multiply(x, relax.const(beta, dtype))) +else: +bias = x +res = bias if res is None else self.block_builder.emit(relax.op.add(bias, res)) +return res + +def _avg_pool2d_impl( +self, +x: relax.Expr, +kernel_size: Union[int, Tuple[int, int]] = (1, 1), +stride: Optional[Union[int, Tuple[int, int]]] = None, +padding: Optional[int] = 0, +ceil_mode: Optional[bool] = False, +) -> relax.Var: +stride = kernel_size if stride is None or stride == [] else stride +return self.block_builder.emit( +relax.op.nn.avg_pool2d( +x, +pool_size=kernel_size, +strides=stride, +padding=padding, +ceil_mode=ceil_mode, +layout="NCHW", +) +) + +def _avg_pool2d(self, node: fx.Node) -> relax.Var: +args, kwargs = node.normalized_arguments(node) +x = self.env[args[0]] +kernel_size = args[1] if len(args) > 1 else kwargs["kernel_size"] +stride = args[2] if len(args) > 2 else kwargs.get("stride", None) +padding = args[3] if len(args) > 3 else kwargs.get("padding", 0) +ceil_mode = args[4] if len(args) > 4 else kwargs.get("ceil_mode", False) +return self._avg_pool2d_impl(x, kernel_size, stride, padding, ceil_mode) + +def _baddbmm(self, node: fx.Node) -> relax.Var: +x = self.env[node.args[0]] +batch1 = self.env[node.args[1]] +batch2 = self.env[node.args[2]] +alpha = node.kwargs.get("alpha", 1) +beta = node.kwargs.get("beta", 1) + +res = None +if alpha != 0: +res = self.block_builder.emit(relax.op.matmul(batch1, batch2)) +if alpha != 1: +dtype = res.struct_info.dtype +res = self.block_builder.emit(relax.op.multiply(res, relax.const(alpha, dtype))) +if beta != 0: +dtype = x.struct_info.dtype +if beta != 1: +bias = self.block_builder.emit(relax.op.multiply(x, relax.const(beta, dtype))) +else: +bias = x +res = bias if res is None else self.bl
(tvm) branch main updated: [Relax][PyTorch] Support binary, statistical and search ops for ExportedProgram importer (#17424)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 7c28c86f7d [Relax][PyTorch] Support binary, statistical and search ops for ExportedProgram importer (#17424) 7c28c86f7d is described below commit 7c28c86f7d3121ce2adc179475fdb1922c86b942 Author: Masahiro Hiramori AuthorDate: Sat Sep 28 22:30:15 2024 +0900 [Relax][PyTorch] Support binary, statistical and search ops for ExportedProgram importer (#17424) * support binary ops * support mean * support sum * support argmax and argmin --- .../frontend/torch/base_fx_graph_translator.py | 62 +++ .../frontend/torch/exported_program_translator.py | 25 + python/tvm/relax/frontend/torch/fx_translator.py | 62 --- .../relax/test_frontend_from_exported_program.py | 512 + 4 files changed, 599 insertions(+), 62 deletions(-) diff --git a/python/tvm/relax/frontend/torch/base_fx_graph_translator.py b/python/tvm/relax/frontend/torch/base_fx_graph_translator.py index d52b3d598f..a41b9b6d4f 100644 --- a/python/tvm/relax/frontend/torch/base_fx_graph_translator.py +++ b/python/tvm/relax/frontend/torch/base_fx_graph_translator.py @@ -185,6 +185,39 @@ class BaseFXGraphImporter(metaclass=abc.ABCMeta): return convert +## Binary Ops ## + +def _binary_op(self, relax_op: Callable, intrinsic_op: Callable) -> Callable: +from torch import fx + +def convert(node: fx.Node) -> relax.Var: +def promote_binary_op_args(lhs, rhs): +if isinstance(lhs, relax.Expr) and isinstance(rhs, relax.Expr): +return lhs, rhs +elif isinstance(lhs, relax.Expr): +assert isinstance(lhs.struct_info, relax.TensorStructInfo) +return lhs, relax.const(rhs, lhs.struct_info.dtype) +elif isinstance(rhs, relax.Expr): +assert isinstance(rhs.struct_info, relax.TensorStructInfo) +return relax.const(lhs, rhs.struct_info.dtype), rhs +else: +assert False + +def call_binary_op(op, lhs, rhs): +lhs, rhs = promote_binary_op_args(lhs, rhs) +return self.block_builder.emit(op(lhs, rhs)) + +lhs, rhs = self.retrieve_args(node) +if isinstance(lhs, relax.Var) or isinstance(rhs, relax.Var): +return call_binary_op(relax_op, lhs, rhs) +elif isinstance(lhs, relax.expr.Constant): +return call_binary_op(relax_op, lhs, relax.const(rhs, dtype=lhs.struct_info.dtype)) +elif isinstance(rhs, relax.expr.Constant): +return call_binary_op(relax_op, relax.const(lhs, dtype=rhs.struct_info.dtype), rhs) +return intrinsic_op(lhs, rhs) + +return convert + ## Neural Network ## def _adaptive_avg_pool2d(self, node: fx.Node) -> relax.Var: @@ -283,6 +316,35 @@ class BaseFXGraphImporter(metaclass=abc.ABCMeta): return self._max_pool2d_impl(x, kernel_size, stride, padding, dilation, ceil_mode) +## Statistical ## + +def _mean(self, node: fx.Node) -> relax.Var: +args = self.retrieve_args(node) +x = args[0] +dim = args[1] if len(node.args) > 1 else node.kwargs.get("dim", None) +keepdim = args[2] if len(node.args) > 2 else node.kwargs.get("keepdim", False) +return self.block_builder.emit(relax.op.mean(x, dim, keepdims=keepdim)) + +def _sum(self, node: fx.Node) -> relax.Var: +args = self.retrieve_args(node) +keepdim = node.kwargs["keepdim"] if "keepdim" in node.kwargs else False +if len(args) == 1: +return self.block_builder.emit(relax.op.sum(args[0], keepdims=keepdim)) +return self.block_builder.emit(relax.op.sum(args[0], args[1])) + +## Search ## + +def _argmax_argmin(self, op: Callable) -> Callable: +from torch import fx + +def convert(node: fx.Node): +x = self.env[node.args[0]] +dim = node.args[1] if len(node.args) > 1 else node.kwargs.get("dim", None) +keepdim = node.args[2] if len(node.args) > 2 else node.kwargs.get("keepdim", False) +return self.block_builder.emit(op(x, dim, keepdim)) + +return convert + ## Manipulation ## def _reshape(self, node: fx.Node) -> relax.Var: diff --git a/python/tvm/relax/frontend/torch/exported_program_translator.py b/python/tvm/relax/frontend/torch/exported_program_translator.py index 1ceddad7d7..11594690cd 100644 --- a/python/tvm/relax/frontend/torch/exported_program_translator.py +++ b/python
(tvm) branch main updated: [Web] Allow deprecated API requestAdapterInfo with any cast (#17420)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 7ff4d0d27d [Web] Allow deprecated API requestAdapterInfo with any cast (#17420) 7ff4d0d27d is described below commit 7ff4d0d27dcde17b536b1f0429366d297493c250 Author: Charlie Ruan <53290280+charliefr...@users.noreply.github.com> AuthorDate: Sat Sep 28 06:30:29 2024 -0700 [Web] Allow deprecated API requestAdapterInfo with any cast (#17420) * [Web] Allow deprectaed API with any cast * Fix lint * Fix by adding await --- web/package-lock.json | 4 ++-- web/package.json | 2 +- web/src/webgpu.ts | 4 +++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/web/package-lock.json b/web/package-lock.json index 561ba77091..751aaf2ef4 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -1,12 +1,12 @@ { "name": "tvmjs", - "version": "0.18.0-dev0", + "version": "0.18.0-dev2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "tvmjs", - "version": "0.18.0-dev0", + "version": "0.18.0-dev2", "license": "Apache-2.0", "devDependencies": { "@rollup/plugin-commonjs": "^20.0.0", diff --git a/web/package.json b/web/package.json index a4e5d7ac08..a63997bb2f 100644 --- a/web/package.json +++ b/web/package.json @@ -3,7 +3,7 @@ "description": "TVM WASM/WebGPU runtime for JS/TS", "license": "Apache-2.0", "homepage": "https://github.com/apache/tvm/tree/main/web";, - "version": "0.18.0-dev0", + "version": "0.18.0-dev2", "files": [ "lib" ], diff --git a/web/src/webgpu.ts b/web/src/webgpu.ts index d3d431cf1f..5b2d7c9f30 100644 --- a/web/src/webgpu.ts +++ b/web/src/webgpu.ts @@ -116,7 +116,9 @@ export async function detectGPUDevice(): Promise
(tvm) branch main updated: [Relax][PyTorch] Support more unary ops for ExportedProgram importer (#17421)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 176d01e612 [Relax][PyTorch] Support more unary ops for ExportedProgram importer (#17421) 176d01e612 is described below commit 176d01e61276b0e94910fd904363ef4cd91fb8b5 Author: Masahiro Hiramori AuthorDate: Sat Sep 28 05:12:17 2024 +0900 [Relax][PyTorch] Support more unary ops for ExportedProgram importer (#17421) * support more unary ops * support clamp * support gelu * support hardsigmoid * support hardswish * support hardtanh * support leaky_relu * support log_softmax * support round * support softmax * support tril and triu * skip flaky test --- .../frontend/torch/base_fx_graph_translator.py | 74 +++ .../frontend/torch/exported_program_translator.py | 38 ++ python/tvm/relax/frontend/torch/fx_translator.py | 74 --- .../relax/test_frontend_from_exported_program.py | 705 - tests/python/relay/test_to_mixed_precision.py | 1 + 5 files changed, 812 insertions(+), 80 deletions(-) diff --git a/python/tvm/relax/frontend/torch/base_fx_graph_translator.py b/python/tvm/relax/frontend/torch/base_fx_graph_translator.py index 6a001b5a04..d52b3d598f 100644 --- a/python/tvm/relax/frontend/torch/base_fx_graph_translator.py +++ b/python/tvm/relax/frontend/torch/base_fx_graph_translator.py @@ -111,6 +111,80 @@ class BaseFXGraphImporter(metaclass=abc.ABCMeta): return convert +def _clamp(self, node: fx.Node) -> relax.Expr: +args = self.retrieve_args(node) +a_min = args[1] if len(args) > 1 else node.kwargs["min"] +a_max = args[2] if len(args) > 2 else node.kwargs["max"] +if not isinstance(a_min, (int, float)): +raise ValueError( +f"TVM only supports constant min value for torch.clamp/clip, " +f"but got {a_min} with type {type(a_min)}" +) +if not isinstance(a_max, (int, float)): +raise ValueError( +f"TVM only supports constant max value for torch.clamp/clip, " +f"but got {a_max} with type {type(a_max)}" +) +return self.block_builder.emit(relax.op.clip(args[0], a_min, a_max)) + +def _gelu(self, node: fx.Node) -> relax.Expr: +approximate = node.kwargs.get("approximate", "none") +if approximate == "none": +return self.block_builder.emit(relax.op.nn.gelu(self.env[node.args[0]])) +elif approximate == "tanh": +return self.block_builder.emit(relax.op.nn.gelu_tanh(self.env[node.args[0]])) +else: +raise KeyError("Unregonized approximate algorithm for gelu: {}.".format(approximate)) + +def _hardsigmoid(self, node: fx.Node) -> relax.Var: +args = self.retrieve_args(node) +x = args[0] +dtype = x.struct_info.dtype +x0 = relax.op.add(x, relax.const(3, dtype)) +x1 = relax.op.clip(x0, 0, 6) +return self.block_builder.emit(relax.op.divide(x1, relax.const(6, dtype))) + +def _hardswish(self, node: fx.Node) -> relax.Var: +args = self.retrieve_args(node) +x = args[0] +dtype = x.struct_info.dtype +x0 = relax.op.add(x, relax.const(3, dtype)) +x1 = relax.op.clip(x0, 0, 6) +x2 = relax.op.divide(x1, relax.const(6, dtype)) +return self.block_builder.emit(relax.op.multiply(x, x2)) + +def _leakyrelu(self, node: fx.Node) -> relax.Var: +x = self.env[node.args[0]] +alpha = node.args[1] if len(node.args) > 1 else node.kwargs.get("negative_slope", 0.01) +return self.block_builder.emit(relax.op.nn.leakyrelu(x, alpha)) + +def _log_softmax(self, node: fx.Node) -> relax.Var: +x = self.env[node.args[0]] +dim = node.args[1] if len(node.args) > 1 else node.kwargs.get("dim", -1) +return self.block_builder.emit(relax.op.nn.log_softmax(x, dim)) + +def _round(self, node: fx.Node) -> relax.Expr: +if node.kwargs.get("decimals", 0) != 0: +raise ValueError("specifying decimals for round is not supported yet") +arg = self.env[node.args[0]] +return self.block_builder.emit(relax.op.round(arg)) + +def _softmax(self, node: fx.Node) -> relax.Var: +x = self.env[node.args[0]] +dim = node.args[1] if len(node.args) > 1 else node.kwargs.get("dim", -1) +return self.block_builder.emit(relax.op.nn.softmax(x, dim)) + +def _tril_triu(self, op: Callable) -> Callable: +from torch import fx + +
(tvm) branch main updated: [CMake] Add NCCL/RCCL header directory to include path (#17422)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 42ff98b131 [CMake] Add NCCL/RCCL header directory to include path (#17422) 42ff98b131 is described below commit 42ff98b131d7bb146393df80e16bcada4fea4a46 Author: Ruihang Lai AuthorDate: Fri Sep 27 10:31:45 2024 -0400 [CMake] Add NCCL/RCCL header directory to include path (#17422) This PR updates the CMakeList to include the NCCL/RCCL header directory in the include path of tvm build. This is necessary when the NCCL/RCCL is installed at the location covered by the default include pathes. In such cases, TVM is not able to find the NCCL/RCCL header and cannot have success build. --- CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 66ea6a07da..1fb28c8694 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -471,6 +471,7 @@ endif(USE_PROFILER) if(USE_CUDA AND USE_NCCL) message(STATUS "Build with NCCL...") find_nccl(${USE_NCCL}) + include_directories(SYSTEM ${NCCL_INCLUDE_DIR}) tvm_file_glob(GLOB RUNTIME_NCCL_SRC src/runtime/disco/nccl/*.cc src/runtime/disco/cuda_ipc/*.cc 3rdparty/tensorrt_llm/*.cu) set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=0") list(APPEND RUNTIME_SRCS ${RUNTIME_NCCL_SRC}) @@ -489,6 +490,7 @@ endif() if(USE_ROCM AND USE_RCCL) message(STATUS "Build with RCCL...") find_rccl(${USE_RCCL}) + include_directories(SYSTEM ${RCCL_INCLUDE_DIR}) tvm_file_glob(GLOB RUNTIME_RCCL_SRC src/runtime/disco/nccl/*.cc) set_source_files_properties(src/runtime/disco/nccl/nccl.cc PROPERTIES COMPILE_DEFINITIONS "TVM_NCCL_RCCL_SWITCH=1") list(APPEND RUNTIME_SRCS ${RUNTIME_RCCL_SRC})
(tvm) branch main updated: [FFI][BUGFIX] Grab GIL when check env signals (#17419)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 5e85443e43 [FFI][BUGFIX] Grab GIL when check env signals (#17419) 5e85443e43 is described below commit 5e85443e43f9befcf8319cdc4045597aa49bf724 Author: Tianqi Chen AuthorDate: Thu Sep 26 09:22:13 2024 -0400 [FFI][BUGFIX] Grab GIL when check env signals (#17419) This PR updates the CheckSignals function to grab GIL. This is needed because we now explicitly release gil when calling any C functions. GIL will need to be obtained otherwise we will run into segfault when checking the signal. The update now enables us to run ctrl + C in long running C functions. --- python/tvm/_ffi/_cython/base.pxi| 16 +++- python/tvm/_ffi/_cython/packed_func.pxi | 16 src/runtime/registry.cc | 12 src/support/ffi_testing.cc | 8 4 files changed, 27 insertions(+), 25 deletions(-) diff --git a/python/tvm/_ffi/_cython/base.pxi b/python/tvm/_ffi/_cython/base.pxi index 0f7e5fcae6..887ac123ce 100644 --- a/python/tvm/_ffi/_cython/base.pxi +++ b/python/tvm/_ffi/_cython/base.pxi @@ -201,6 +201,10 @@ cdef inline void* c_handle(object handle): # python env API cdef extern from "Python.h": int PyErr_CheckSignals() +void* PyGILState_Ensure() +void PyGILState_Release(void*) +void Py_IncRef(void*) +void Py_DecRef(void*) cdef extern from "tvm/runtime/c_backend_api.h": int TVMBackendRegisterEnvCAPI(const char* name, void* ptr) @@ -210,11 +214,13 @@ cdef _init_env_api(): # so backend can call tvm::runtime::EnvCheckSignals to check # signal when executing a long running function. # -# This feature is only enabled in cython for now due to problems of calling -# these functions in ctypes. -# -# When the functions are not registered, the signals will be handled -# only when the FFI function returns. +# Also registers the gil state release and ensure as PyErr_CheckSignals +# function is called with gil released and we need to regrab the gil CHECK_CALL(TVMBackendRegisterEnvCAPI(c_str("PyErr_CheckSignals"), PyErr_CheckSignals)) +CHECK_CALL(TVMBackendRegisterEnvCAPI(c_str("PyGILState_Ensure"), PyGILState_Ensure)) +CHECK_CALL(TVMBackendRegisterEnvCAPI(c_str("PyGILState_Release"), PyGILState_Release)) +CHECK_CALL(TVMBackendRegisterEnvCAPI(c_str("PyGILState_Release"), PyGILState_Release)) +CHECK_CALL(TVMBackendRegisterEnvCAPI(c_str("Py_IncRef"), Py_IncRef)) +CHECK_CALL(TVMBackendRegisterEnvCAPI(c_str("Py_DecRef"), Py_DecRef)) _init_env_api() diff --git a/python/tvm/_ffi/_cython/packed_func.pxi b/python/tvm/_ffi/_cython/packed_func.pxi index 6e062ab5f1..b9516e79e3 100644 --- a/python/tvm/_ffi/_cython/packed_func.pxi +++ b/python/tvm/_ffi/_cython/packed_func.pxi @@ -376,19 +376,3 @@ def _set_class_object_generic(object_generic_class, func_convert_to_object): global _FUNC_CONVERT_TO_OBJECT _CLASS_OBJECT_GENERIC = object_generic_class _FUNC_CONVERT_TO_OBJECT = func_convert_to_object - -# Py_INCREF and Py_DECREF are C macros, not function objects. -# Therefore, providing a wrapper function. -cdef void _py_incref_wrapper(void* py_object): -Py_INCREF(py_object) -cdef void _py_decref_wrapper(void* py_object): -Py_DECREF(py_object) - -def _init_pythonapi_inc_def_ref(): -register_func = TVMBackendRegisterEnvCAPI -register_func(c_str("Py_IncRef"), _py_incref_wrapper) -register_func(c_str("Py_DecRef"), _py_decref_wrapper) -register_func(c_str("PyGILState_Ensure"), PyGILState_Ensure) -register_func(c_str("PyGILState_Release"), PyGILState_Release) - -_init_pythonapi_inc_def_ref() diff --git a/src/runtime/registry.cc b/src/runtime/registry.cc index 0a034a7b58..09674edf35 100644 --- a/src/runtime/registry.cc +++ b/src/runtime/registry.cc @@ -183,10 +183,14 @@ class EnvCAPIRegistry { // implementation of tvm::runtime::EnvCheckSignals void CheckSignals() { // check python signal to see if there are exception raised -if (pyerr_check_signals != nullptr && (*pyerr_check_signals)() != 0) { - // The error will let FFI know that the frontend environment - // already set an error. - throw EnvErrorAlreadySet(""); +if (pyerr_check_signals != nullptr) { + // The C++ env comes without gil, so we need to grab gil here + WithGIL context(this); + if ((*pyerr_check_signals)() != 0) { +// The error will let FFI know that the frontend environment +// already set an error. +throw EnvErrorAlreadySet(""); + } } } diff --git a/src/support/ffi_tes
(tvm) branch main updated: [CI] Upgrade unity image tag to `20240917-153130-9f281758` (#17410)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 30b7b1c754 [CI] Upgrade unity image tag to `20240917-153130-9f281758` (#17410) 30b7b1c754 is described below commit 30b7b1c7549fbc1277e3a9f5eed73a13f2f0c0ba Author: Masahiro Hiramori AuthorDate: Wed Sep 25 21:52:26 2024 +0900 [CI] Upgrade unity image tag to `20240917-153130-9f281758` (#17410) * upgrade docker image to `20240917-153130-9f281758` * fix dynamo test case * building torch requires c++ 17 * temporary skip jax gpu tests due to XlaRuntimeError --- ci/jenkins/unity_jenkinsfile.groovy | 8 +++--- src/contrib/msc/plugin/torch_codegen.cc | 2 +- tests/python/relax/test_frontend_dynamo.py| 2 +- tests/python/relax/test_frontend_stablehlo.py | 36 ++- 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/ci/jenkins/unity_jenkinsfile.groovy b/ci/jenkins/unity_jenkinsfile.groovy index 9b4f0009e3..2a7a4fee37 100755 --- a/ci/jenkins/unity_jenkinsfile.groovy +++ b/ci/jenkins/unity_jenkinsfile.groovy @@ -30,14 +30,14 @@ import org.jenkinsci.plugins.pipeline.modeldefinition.Utils // NOTE: these lines are scanned by docker/dev_common.sh. Please update the regex as needed. --> -ci_lint = 'tlcpack/ci-lint:20240105-165030-51bdaec6' -ci_gpu = 'tlcpack/ci-gpu:20240105-165030-51bdaec6' -ci_cpu = 'tlcpack/ci-cpu:20240105-165030-51bdaec6' +ci_lint = 'tlcpack/ci_lint:20240917-153130-9f281758' +ci_gpu = 'tlcpack/ci_gpu:20240917-153130-9f281758' +ci_cpu = 'tlcpack/ci_cpu:20240917-153130-9f281758' ci_wasm = 'tlcpack/ci-wasm:v0.72' ci_i386 = 'tlcpack/ci-i386:v0.75' ci_qemu = 'tlcpack/ci-qemu:v0.11' ci_arm = 'tlcpack/ci-arm:v0.08' -ci_hexagon = 'tlcpack/ci-hexagon:20240105-165030-51bdaec6' +ci_hexagon = 'tlcpack/ci_hexagon:20240917-153130-9f281758' // <--- End of regex-scanned config. // Parameters to allow overriding (in Jenkins UI), the images diff --git a/src/contrib/msc/plugin/torch_codegen.cc b/src/contrib/msc/plugin/torch_codegen.cc index 4b8c24f17b..75471d85db 100644 --- a/src/contrib/msc/plugin/torch_codegen.cc +++ b/src/contrib/msc/plugin/torch_codegen.cc @@ -219,7 +219,7 @@ void TorchPluginCodeGen::CodeGenCmake(const std::set& devices) { flags.Set("PLUGIN_SUPPORT_TORCH", ""); CodeGenPreCmake(devices, flags); stack_.line() - .line("set(CMAKE_CXX_STANDARD 14)") + .line("set(CMAKE_CXX_STANDARD 17)") .line("list(APPEND CMAKE_PREFIX_PATH \"" + config()->torch_prefix + "\")") .line("find_package(Torch REQUIRED)"); Array includes, libs; diff --git a/tests/python/relax/test_frontend_dynamo.py b/tests/python/relax/test_frontend_dynamo.py index 21e1d82d28..28215e2e68 100644 --- a/tests/python/relax/test_frontend_dynamo.py +++ b/tests/python/relax/test_frontend_dynamo.py @@ -223,7 +223,7 @@ def test_subgraph_capture(): ) -> R.Tensor((10,), dtype="float32"): # block 0 with R.dataflow(): -lv5: R.Tensor((10,), dtype="float32") = R.multiply(inp_11, inp_01) +lv5: R.Tensor((10,), dtype="float32") = R.multiply(inp_01, inp_11) gv1: R.Tensor((10,), dtype="float32") = lv5 R.output(gv1) return gv1 diff --git a/tests/python/relax/test_frontend_stablehlo.py b/tests/python/relax/test_frontend_stablehlo.py index f2d0461dda..667953ab73 100644 --- a/tests/python/relax/test_frontend_stablehlo.py +++ b/tests/python/relax/test_frontend_stablehlo.py @@ -196,6 +196,10 @@ def test_add_dynamic(): @tvm.testing.requires_gpu +@pytest.mark.skip( +reason="jaxlib.xla_extension.XlaRuntimeError: FAILED_PRECONDITION: DNN library initialization failed." +) +# TODO(mshr-h): may be fixed by upgrading jax to >=0.4.33 def test_unary(): import jax @@ -229,6 +233,10 @@ def test_unary(): @tvm.testing.requires_gpu +@pytest.mark.skip( +reason="jaxlib.xla_extension.XlaRuntimeError: FAILED_PRECONDITION: DNN library initialization failed." +) +# TODO(mshr-h): may be fixed by upgrading jax to >=0.4.33 def test_binary(): import jax @@ -250,6 +258,10 @@ def test_binary(): @tvm.testing.requires_gpu +@pytest.mark.skip( +reason="jaxlib.xla_extension.XlaRuntimeError: FAILED_PRECONDITION: DNN library initialization failed." +) +# TODO(mshr-h): may be fixed by upgrading jax to >=0.4.33 def test_const(): import jax @@ -260,6 +272,10 @@ def test_const(): @tvm.testing.requires_gpu +@pytest.mark.skip( +reason=&
(tvm) branch main updated: [CI][Windows] Workaround for error in FindLLVM (#17409)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 7fc8adcc7e [CI][Windows] Workaround for error in FindLLVM (#17409) 7fc8adcc7e is described below commit 7fc8adcc7eb29b1d658ee0ab8d95c3036f8e83c3 Author: Siyuan Feng AuthorDate: Wed Sep 25 10:21:36 2024 +0800 [CI][Windows] Workaround for error in FindLLVM (#17409) * [CI][Windows] Workaround for error in FindLLVM This is a workaround for an upstream LLVM issue [0], in which the `CMAKE_INSTALL_LIBDIR` variable is used before definition. While there is an LLVM PR to resolve this fix [1], as of 2024-08-19 it has not yet been merged to LLVM. [0] https://github.com/llvm/llvm-project/issues/83802 [1] https://github.com/llvm/llvm-project/pull/83807 Co-authored-by: Eric Lunderberg * fix fp16 * lint - Co-authored-by: Eric Lunderberg --- cmake/utils/FindLLVM.cmake | 9 + tests/python/all-platform-minimal-test/test_runtime_ndarray.py | 1 + 2 files changed, 10 insertions(+) diff --git a/cmake/utils/FindLLVM.cmake b/cmake/utils/FindLLVM.cmake index ab1bce2741..182a2c6693 100644 --- a/cmake/utils/FindLLVM.cmake +++ b/cmake/utils/FindLLVM.cmake @@ -44,6 +44,15 @@ macro(find_llvm use_llvm) endif() if(${LLVM_CONFIG} MATCHES ${IS_TRUE_PATTERN}) +# This is a workaround for an upstream LLVM issue [0], in which +# the `CMAKE_INSTALL_LIBDIR` variable is used before definition. +# While there is an LLVM PR to resolve this fix [1], as of +# 2024-08-19 it has not yet been merged to LLVM. +# +# [0] https://github.com/llvm/llvm-project/issues/83802 +# [1] https://github.com/llvm/llvm-project/pull/83807 +include(GNUInstallDirs) + find_package(LLVM ${llvm_version_required} REQUIRED CONFIG) llvm_map_components_to_libnames(LLVM_LIBS "all") if (NOT LLVM_LIBS) diff --git a/tests/python/all-platform-minimal-test/test_runtime_ndarray.py b/tests/python/all-platform-minimal-test/test_runtime_ndarray.py index 38a1f32a10..8f929b1c1a 100644 --- a/tests/python/all-platform-minimal-test/test_runtime_ndarray.py +++ b/tests/python/all-platform-minimal-test/test_runtime_ndarray.py @@ -69,6 +69,7 @@ def test_memory_usage(target, dev, dtype): assert dev.available_global_memory == available_memory_before +@pytest.mark.skip(reason="Skip for passing windows test on CI") def test_fp16_conversion(): n = 100
(tvm) branch main updated: [TIR][NarrowDataType] Bufferload's index should not inherit bits constraint of value (#17411)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new a90fb8e2d9 [TIR][NarrowDataType] Bufferload's index should not inherit bits constraint of value (#17411) a90fb8e2d9 is described below commit a90fb8e2d93215bdae2fbd2359374ebe914bee45 Author: wrongtest AuthorDate: Wed Sep 25 10:18:59 2024 +0800 [TIR][NarrowDataType] Bufferload's index should not inherit bits constraint of value (#17411) bufferload's index dtype narrowing should not inherit value bits constraint Co-authored-by: wrongtest --- src/tir/transforms/narrow_datatype.cc | 14 +- .../tir-transform/test_tir_transform_narrow_datatype.py | 17 + 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/tir/transforms/narrow_datatype.cc b/src/tir/transforms/narrow_datatype.cc index 7b6187af64..696eae201f 100644 --- a/src/tir/transforms/narrow_datatype.cc +++ b/src/tir/transforms/narrow_datatype.cc @@ -97,6 +97,13 @@ class DataTypeVisitor final : public StmtExprVisitor { } } + void VisitExpr_(const BufferLoadNode* op) { +int tmp = bits_; +bits_ = target_bits_; +StmtExprVisitor::VisitExpr_(op); +bits_ = tmp; + } + void VisitStmt_(const ForNode* op) { analyzer_.Bind(op->loop_var, Range::FromMinExtent(op->min, op->extent)); vextent_[op->loop_var.as()] = op->extent.dtype(); @@ -245,7 +252,12 @@ class NarrowDataTypeRewriter : public IndexDataTypeRewriter { const CastNode* new_op = e.as(); ICHECK(new_op != nullptr) << "Expected type to be CastNode" << ", but get " << e->GetTypeKey(); - return Cast(visitor_.vmap[op], new_op->value); + PrimExpr new_value = new_op->value; + DataType cast_type = visitor_.vmap[op]; + if (new_value.dtype() != cast_type) { +new_value = Cast(cast_type, new_value); + } + return new_value; } return Parent::VisitExpr_(op); } diff --git a/tests/python/tir-transform/test_tir_transform_narrow_datatype.py b/tests/python/tir-transform/test_tir_transform_narrow_datatype.py index c03dd7a529..cf85f2e371 100644 --- a/tests/python/tir-transform/test_tir_transform_narrow_datatype.py +++ b/tests/python/tir-transform/test_tir_transform_narrow_datatype.py @@ -413,5 +413,22 @@ def test_avg_pool2d(): tvm.ir.assert_structural_equal(after["main"], expected_after.with_attr("global_symbol", "main")) +def test_narrow_i64_valued_bufferload_index_to_i32(): +@T.prim_func +def before(A: T.Buffer((16,), "int64")): +for i in range(T.int64(15)): +A[i + T.int64(1)] = A[i] + T.int64(1) + +@T.prim_func +def expect(A: T.Buffer((16,), "int64")): +for i in range(15): +A[i + 1] = A[i] + T.int64(1) + +after = tvm.tir.transform.NarrowDataType(32)( +tvm.IRModule.from_expr(before.with_attr("global_symbol", "main")) +)["main"] +tvm.ir.assert_structural_equal(after, expect.with_attr("global_symbol", "main")) + + if __name__ == "__main__": tvm.testing.main()
(tvm) branch main updated: [BYOC][NNAPI] Add NNAPI backend for BYOC (#17385)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 2a87c4cfc0 [BYOC][NNAPI] Add NNAPI backend for BYOC (#17385) 2a87c4cfc0 is described below commit 2a87c4cfc075b2cce18738cc270a2229cfb50de7 Author: Mengshiun Yu AuthorDate: Mon Sep 23 21:42:37 2024 -0400 [BYOC][NNAPI] Add NNAPI backend for BYOC (#17385) * [BYOC][NNAPI] This PR intorduce NNAPI to TVM This PR introduces a new BYOC backend for Android Neural Networks API (NNAPI), enabling execution of neural networks on custom accelerators. This feature adds a new codegen and runtime for NNAPI, supporting operations such as element-wise ops, nn.dense, and nn.conv2d for CNN model with static shape. Co-authored-by: Ming-Long Huang Co-authored-by: HMZ --- CMakeLists.txt| 3 + cmake/modules/LibInfo.cmake | 2 + cmake/modules/contrib/NNAPI.cmake | 39 ++ python/tvm/relax/backend/contrib/nnapi.py | 324 python/tvm/testing/utils.py | 6 + src/relax/backend/contrib/nnapi/codegen.cc| 272 ++ src/runtime/contrib/nnapi/nnapi_builder.cc| 264 ++ src/runtime/contrib/nnapi/nnapi_builder.h | 133 + src/runtime/contrib/nnapi/nnapi_ops.cc| 601 ++ src/runtime/contrib/nnapi/nnapi_ops.h | 165 ++ src/runtime/contrib/nnapi/nnapi_runtime.cc| 250 + src/support/libinfo.cc| 10 + tests/python/nightly/test_nnapi/__init__.py | 17 + tests/python/nightly/test_nnapi/conftest.py | 39 ++ tests/python/nightly/test_nnapi/infrastructure.py | 143 + tests/python/nightly/test_nnapi/test_network.py | 136 + tests/python/nightly/test_nnapi/test_ops.py | 362 + 17 files changed, 2766 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 38dd59b9c9..66ea6a07da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -125,6 +125,8 @@ tvm_option(USE_ARM_COMPUTE_LIB "Build with Arm Compute Library" OFF) tvm_option(USE_ARM_COMPUTE_LIB_GRAPH_EXECUTOR "Build with Arm Compute Library graph executor" OFF) tvm_option(USE_TENSORRT_CODEGEN "Build with TensorRT Codegen support" OFF) tvm_option(USE_TENSORRT_RUNTIME "Build with TensorRT runtime" OFF) +tvm_option(USE_NNAPI_CODEGEN "Build with NNAPI Codegen support" OFF) +tvm_option(USE_NNAPI_RUNTIME "Build with NNAPI runtime" OFF) tvm_option(USE_RUST_EXT "Build with Rust based compiler extensions, STATIC, DYNAMIC, or OFF" OFF) tvm_option(USE_VITIS_AI "Build with VITIS-AI Codegen support" OFF) tvm_option(SUMMARIZE "Print CMake option summary after configuring" OFF) @@ -602,6 +604,7 @@ include(cmake/modules/contrib/BNNS.cmake) include(cmake/modules/contrib/ONNX.cmake) include(cmake/modules/contrib/ArmComputeLib.cmake) include(cmake/modules/contrib/TensorRT.cmake) +include(cmake/modules/contrib/NNAPI.cmake) include(cmake/modules/contrib/VitisAI.cmake) include(cmake/modules/contrib/Verilator.cmake) include(cmake/modules/contrib/UMA.cmake) diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake index a2b51bb331..ee6561dffc 100644 --- a/cmake/modules/LibInfo.cmake +++ b/cmake/modules/LibInfo.cmake @@ -144,6 +144,8 @@ function(add_lib_info src_file) TVM_INFO_USE_MSC="${USE_MSC}" TVM_INFO_USE_CCACHE="${USE_CCACHE}" TVM_INFO_USE_NVSHMEM="${USE_NVSHMEM}" +TVM_INFO_USE_NNAPI_CODEGEN="${USE_NNAPI_CODEGEN}" +TVM_INFO_USE_NNAPI_RUNTIME="${USE_NNAPI_RUNTIME}" TVM_INFO_BACKTRACE_ON_SEGFAULT="${BACKTRACE_ON_SEGFAULT}" ) diff --git a/cmake/modules/contrib/NNAPI.cmake b/cmake/modules/contrib/NNAPI.cmake new file mode 100644 index 00..23eb6dd11e --- /dev/null +++ b/cmake/modules/contrib/NNAPI.cmake @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the L
(tvm-rfcs) branch main updated: [RFC] NNAPI Integration via BYOC (#109)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm-rfcs.git The following commit(s) were added to refs/heads/main by this push: new 8e5c125 [RFC] NNAPI Integration via BYOC (#109) 8e5c125 is described below commit 8e5c1250a6632033c8ffa2d901b3a4b0ce59f982 Author: Mengshiun Yu AuthorDate: Mon Sep 23 21:41:49 2024 -0400 [RFC] NNAPI Integration via BYOC (#109) This RFC introduces a new backend Android Neural Network API (NNAPI) for BYOC Co-authored-by: Ming-Long Huang mlhu...@pllab.cs.nthu.edu.tw Co-authored-by: HMZ mzhu...@pllab.cs.nthu.edu.tw Co-authored-by: Ming-Long Huang --- rfcs/0109-byoc-nnapi.md | 81 + 1 file changed, 81 insertions(+) diff --git a/rfcs/0109-byoc-nnapi.md b/rfcs/0109-byoc-nnapi.md new file mode 100644 index 000..707796e --- /dev/null +++ b/rfcs/0109-byoc-nnapi.md @@ -0,0 +1,81 @@ +- Feature Name: byoc_nnapi +- Start Date: 2024-08-01 +- RFC PR: [apache/tvm-rfcs#0109](https://github.com/apache/tvm-rfcs/pull/0109) +- GitHub Issue: [apache/tvm#](https://github.com/apache/tvm/issues/) + +# Summary +[summary]: #summary + +This RFC introduces a new backend Android Neural Network API (NNAPI) for BYOC. + +# Motivation +[motivation]: #motivation + +Android Neural Networks API (NNAPI) is a graph-level neural network inference API provided by the Android runtime. Prior to this RFC, TVM on Android mobile devices mainly relies on OpenCL for GPU acceleration. This RFC aims to add a new codegen and a runtime via the BYOC framework, which enables execution on custom accelerators from SoC vendors on mobile devices. + +# Guide-level explanation +[guide-level-explanation]: #guide-level-explanation + +**How to use the NNAPI BYOC backend?** + +Use the `partition_for_nnapi()` function to partition operations that are supported by NNAPI from an `IRModule`. The optional `feature_level` keyword argument specifies the highest NNAPI feature level. Operations introduced in feature levels higher than the specified level do not get partitioned. + +```python +from tvm.relax.op.contrib.nnapi import partition_for_nnapi + +mod = partition_for_nnapi(mod, feature_level=7) +``` + +Build the module after partitioning. The result of the build can then be exported and deployed to an Android device built with the NNAPI runtime support turned on. + +```python +android_target = "llvm -mtriple=aarch64-linux-android" +lib = relax.build(mod, target=android_target) +``` + +# Reference-level explanation +[reference-level-explanation]: #reference-level-explanation + +This RFC adds optional support for NNAPI via BYOC without affecting other features in TVM. + +**Added code**: + +We have an implementation with the following components added to the TVM codebase. + +- NNAPI partition function implemented with pattern matching. +- NNAPI codegen that serializes Relax IR subgraphs to JSON runtime modules. +- NNAPI runtime that loads JSON runtime modules and calls API functions to perform model build, compile, and inference. + +**Supported ops**: + +The implementation supports the following ops in both `float32` and `float16` data types. + +- Element-wise unary operations (relu, exp, …) +- Element-wise binary operations (add, multiply, …) +- nn.dense +- nn.conv2d +- nn.max_pool2d + +# Drawbacks +[drawbacks]: #drawbacks + +In the current implementation, the performance gain of NNAPI is not consistent on the mobile devices due to SoC drivers being unable to accelerate all of the supported operations. This may be mitigated by further integrating a smarter partitioning algorithm that selectively offloads operations based on profiling as seen in the [Prior art](#prior-art) section. + +# Rationale and alternatives +[rationale-and-alternatives]: #rationale-and-alternatives + +Instead of using JSON codegen, the integration can also be implemented using C source codegen. See the [Prior art](#prior-art) section. + +# Prior art +[prior-art]: #prior-art + +This RFC is a successor of [an RFC by us](https://discuss.tvm.apache.org/t/rfc-byoc-android-nnapi-integration/9072) in 2021. The codegen and the runtime has been rewritten from scratch since then to generate and load standardized `JSONRuntimeBased` modules instead of C source code. + +# Unresolved questions +[unresolved-questions]: #unresolved-questions + +# Future possibilities +[future-possibilities]: #future-possibilities + +- Add support for quantized data types to cover Relax QNN dialect or Relax quantize/dequantize operators. +- Add support for dynamic shape operands.
(tvm) branch main updated: [TVMjs] Modify web package description (#17405)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 30fb16a5e1 [TVMjs] Modify web package description (#17405) 30fb16a5e1 is described below commit 30fb16a5e1d564ffa8533cf154c0ba2ea06dfd43 Author: Charlie Ruan <53290280+charliefr...@users.noreply.github.com> AuthorDate: Mon Sep 23 06:34:46 2024 -0700 [TVMjs] Modify web package description (#17405) --- web/package-lock.json | 12 ++-- web/package.json | 12 +++- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/web/package-lock.json b/web/package-lock.json index 75efcbcc7b..561ba77091 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -1,12 +1,12 @@ { "name": "tvmjs", - "version": "0.17.0-dev0", + "version": "0.18.0-dev0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "tvmjs", - "version": "0.17.0-dev0", + "version": "0.18.0-dev0", "license": "Apache-2.0", "devDependencies": { "@rollup/plugin-commonjs": "^20.0.0", @@ -14,7 +14,7 @@ "@types/node": "^20.4.5", "@typescript-eslint/eslint-plugin": "^5.59.6", "@typescript-eslint/parser": "^5.59.6", -"@webgpu/types": "^0.1.40", +"@webgpu/types": "^0.1.42", "eslint": "^8.41.0", "jest": "^26.0.1", "rollup": "^2.56.2", @@ -1766,9 +1766,9 @@ } }, "node_modules/@webgpu/types": { - "version": "0.1.40", - "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.40.tgz";, - "integrity": "sha512-/BBkHLS6/eQjyWhY2H7Dx5DHcVrS2ICj9owvSRdgtQT6KcafLZA86tPze0xAOsd4FbsYKCUBUQyNi87q7gV7kw==", + "version": "0.1.46", + "resolved": "https://registry.npmjs.org/@webgpu/types/-/types-0.1.46.tgz";, + "integrity": "sha512-2iogO6Zh0pTbKLGZuuGWEmJpF/fTABGs7G9wXxpn7s24XSJchSUIiMqIJHURi5zsMZRRTuXrV/3GLOkmOFjq5w==", "dev": true }, "node_modules/abab": { diff --git a/web/package.json b/web/package.json index 710185c5bc..a4e5d7ac08 100644 --- a/web/package.json +++ b/web/package.json @@ -1,11 +1,21 @@ { "name": "tvmjs", - "displayName": "TVM Wasm JS runtime", + "description": "TVM WASM/WebGPU runtime for JS/TS", "license": "Apache-2.0", + "homepage": "https://github.com/apache/tvm/tree/main/web";, "version": "0.18.0-dev0", "files": [ "lib" ], + "repository": { +"type": "git", +"url": "git+https://github.com/apache/tvm/tree/main/web"; + }, + "keywords": [ +"llm", +"large language model", +"machine learning" + ], "main": "lib/index.js", "types": "lib/index.d.ts", "scripts": {
(tvm) branch main updated: [TIR, TVMScript] Add TIR - Triton integration (#17395)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 48d3ada275 [TIR, TVMScript] Add TIR - Triton integration (#17395) 48d3ada275 is described below commit 48d3ada2750959fb06cbb555a3491dbf41a3c155 Author: Wuwei Lin AuthorDate: Mon Sep 23 06:17:55 2024 -0700 [TIR, TVMScript] Add TIR - Triton integration (#17395) * [TIR, TVMScript] Add TIR - Triton integration Added a macro `T.call_triton` in TIR script parser, which expands to AOT compilation of the kernel and the host TIR code to launch the kernel. --- python/tvm/relax/vm_build.py | 14 +- python/tvm/script/ir_builder/ir/__init__.py| 2 + python/tvm/script/ir_builder/ir/ir.py | 58 - .../tvm/script/ir_builder/tir/external_kernel.py | 141 + python/tvm/script/ir_builder/tir/ir.py | 3 +- python/tvm/script/ir_builder/tir/triton.py | 115 + src/script/ir_builder/ir/ir.cc | 32 - .../python/contrib/test_tir_triton_integration.py | 119 + 8 files changed, 477 insertions(+), 7 deletions(-) diff --git a/python/tvm/relax/vm_build.py b/python/tvm/relax/vm_build.py index 243488e5d8..9fd7a74285 100644 --- a/python/tvm/relax/vm_build.py +++ b/python/tvm/relax/vm_build.py @@ -243,13 +243,25 @@ def _vmlink( if ext_libs is None: ext_libs = [] lib = None +relax_ext_libs = [] +tir_ext_libs = [] if tir_mod is not None and len(tir_mod.get_global_vars()) > 0: lib = tvm.build( tir_mod, target=target, runtime=_autodetect_system_lib_req(target, system_lib), ) -return Executable(_ffi_api.VMLink(builder, target, lib, ext_libs, params)) # type: ignore +for ext_mod in ext_libs: +if ext_mod.type_key == "cuda": +tir_ext_libs.append(ext_mod) +else: +relax_ext_libs.append(ext_mod) +if lib is not None: +for mod in tir_ext_libs: +lib.import_module(mod) +elif len(tir_ext_libs) > 0: +print("Warning: No TIR module is found, but external modules for TIR are provided.") +return Executable(_ffi_api.VMLink(builder, target, lib, relax_ext_libs, params)) # type: ignore def build( diff --git a/python/tvm/script/ir_builder/ir/__init__.py b/python/tvm/script/ir_builder/ir/__init__.py index fdf44b2b79..f604026a13 100644 --- a/python/tvm/script/ir_builder/ir/__init__.py +++ b/python/tvm/script/ir_builder/ir/__init__.py @@ -21,6 +21,8 @@ from .ir import ( def_function, ir_module, module_attrs, +module_get_attr, +module_set_attr, module_global_infos, lookup_vdevice, vdevice, diff --git a/python/tvm/script/ir_builder/ir/ir.py b/python/tvm/script/ir_builder/ir/ir.py index d35d73678b..05ee26e832 100644 --- a/python/tvm/script/ir_builder/ir/ir.py +++ b/python/tvm/script/ir_builder/ir/ir.py @@ -16,7 +16,7 @@ # under the License. """Package tvm.script.ir_builder.ir.ir""" -from typing import Dict, List +from typing import Dict, List, Optional from tvm.ir import BaseFunc, GlobalVar, GlobalInfo, VDevice, DummyGlobalInfo from tvm.runtime import Object as tvm_Object @@ -77,14 +77,66 @@ def def_function(func_name: str, func: BaseFunc) -> None: return _ffi_api.DefFunction(func_name, func) # type: ignore[attr-defined] # pylint: disable=no-member -def module_attrs(attrs: Dict[str, tvm_Object]) -> None: +def module_attrs(attrs: Dict[str, tvm_Object], allow_overwrite=False) -> None: """Specify the attrs of the ir_module frame. Parameters -- attrs: Dict[str, Object] The module attrs. +allow_overwrite: bool +Whether allow overwrite the existing attrs. """ -return _ffi_api.ModuleAttrs(attrs) # type: ignore[attr-defined] # pylint: disable=no-member +return _ffi_api.ModuleAttrs(attrs, allow_overwrite) # type: ignore[attr-defined] # pylint: disable=no-member + + +def current_ir_module() -> IRModuleFrame: +"""Get the current ir_module frame. +Returns +--- +frame: IRModuleFrame +The current frame. +""" +return _ffi_api.CurrentIRModule() # type: ignore[attr-defined] # pylint: disable=no-member + + +def module_get_attrs() -> Dict[str, tvm_Object]: +"""Get the attrs of the ir_module frame. +Returns +--- +attrs: Dict[str, Object] +The module attrs. +""" +return _ffi_api.ModuleGetAttrs() # type: ignore[attr-defined] # pylint: disable=no-member + + +def module_get_attr(attr_key: str) -> Optional[tvm_Object]:
(tvm) branch main updated: [WASM] Implement concat embeddings (#17404)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 44808b41c8 [WASM] Implement concat embeddings (#17404) 44808b41c8 is described below commit 44808b41c803a3f08a4f43a6455ae0b0df1ac3ba Author: Charlie Ruan <53290280+charliefr...@users.noreply.github.com> AuthorDate: Mon Sep 23 05:23:40 2024 -0700 [WASM] Implement concat embeddings (#17404) * [WASM] Implement concat embeddings * Make concatEmbeddings optional for backward compatibility --- src/target/source/codegen_webgpu.cc | 1 + web/emcc/wasm_runtime.cc| 46 + web/src/runtime.ts | 38 +- 3 files changed, 84 insertions(+), 1 deletion(-) diff --git a/src/target/source/codegen_webgpu.cc b/src/target/source/codegen_webgpu.cc index 83079a9f07..1d1df91dc4 100644 --- a/src/target/source/codegen_webgpu.cc +++ b/src/target/source/codegen_webgpu.cc @@ -125,6 +125,7 @@ runtime::FunctionInfo CodeGenWebGPU::AddFunction(const PrimFunc& f, bool skip_re name_supply_->ReserveName("var"); name_supply_->ReserveName("let"); name_supply_->ReserveName("const"); + name_supply_->ReserveName("std"); // skip the first underscore, so SSA variable starts from name_supply_->FreshName("v_"); diff --git a/web/emcc/wasm_runtime.cc b/web/emcc/wasm_runtime.cc index 2f71355958..9744750b80 100644 --- a/web/emcc/wasm_runtime.cc +++ b/web/emcc/wasm_runtime.cc @@ -173,5 +173,51 @@ TVM_REGISTER_GLOBAL("tvmjs.runtime.ArrayConcat").set_body([](TVMArgs args, TVMRe } *ret = Array(data); }); + +NDArray ConcatEmbeddings(const std::vector& embeddings) { + // Get output shape + int64_t hidden_size = embeddings[0]->shape[1]; + DLDataType dtype = embeddings[0]->dtype; + DLDevice device = embeddings[0]->device; + int seqLen = 0; + for (int i = 0; i < embeddings.size(); ++i) { +ICHECK_EQ(embeddings[i]->ndim, 2); +ICHECK_EQ(embeddings[i]->shape[1], hidden_size); +seqLen += embeddings[i]->shape[0]; + } + + // Create output + std::vector shape; + shape.push_back(seqLen); + shape.push_back(hidden_size); + NDArray result = NDArray::Empty(shape, dtype, device); + + // Copy + int offset = 0; + for (int i = 0; i < embeddings.size(); i++) { +const DLTensor& copy_src = *(embeddings[i].operator->()); +const DLTensor* p_copy_dst = result.operator->(); +DLTensor copy_dst = *p_copy_dst; +copy_dst.shape = embeddings[i]->shape; +copy_dst.byte_offset = +offset * hidden_size * ((embeddings[i]->dtype.bits * embeddings[i]->dtype.lanes + 7) / 8); +NDArray::CopyFromTo(©_src, ©_dst); +offset += embeddings[i]->shape[0]; + } + + return result; +} + +// Concatenate n NDArrays +TVM_REGISTER_GLOBAL("tvmjs.runtime.ConcatEmbeddings").set_body([](TVMArgs args, TVMRetValue* ret) { + std::vector embeddings; + for (int i = 0; i < args.size(); ++i) { +ICHECK_EQ(args[i].type_code(), kTVMNDArrayHandle); +embeddings.push_back(args[i]); + } + NDArray result = ConcatEmbeddings(std::move(embeddings)); + *ret = result; +}); + } // namespace runtime } // namespace tvm diff --git a/web/src/runtime.ts b/web/src/runtime.ts index 600a9b857f..8546cab773 100644 --- a/web/src/runtime.ts +++ b/web/src/runtime.ts @@ -174,6 +174,7 @@ class RuntimeContext implements Disposable { applyRepetitionPenalty: PackedFunc; applyPresenceAndFrequencyPenalty: PackedFunc; applySoftmaxWithTemperature: PackedFunc; + concatEmbeddings: PackedFunc | undefined; private autoDisposeScope: Array> = []; @@ -199,6 +200,11 @@ class RuntimeContext implements Disposable { this.applyRepetitionPenalty = getGlobalFunc("vm.builtin.apply_repetition_penalty"); this.applyPresenceAndFrequencyPenalty = getGlobalFunc("vm.builtin.apply_presence_and_frequency_penalty"); this.applySoftmaxWithTemperature = getGlobalFunc("vm.builtin.apply_softmax_with_temperature"); +try { + this.concatEmbeddings = getGlobalFunc("tvmjs.runtime.ConcatEmbeddings"); +} catch { + // TODO: remove soon. Older artifacts do not have this, try-catch for backward compatibility. +} } dispose(): void { @@ -223,6 +229,7 @@ class RuntimeContext implements Disposable { this.applyRepetitionPenalty.dispose(); this.applyPresenceAndFrequencyPenalty.dispose(); this.applySoftmaxWithTemperature.dispose(); +this.concatEmbeddings?.dispose(); } beginScope(): void { @@ -575,7 +582,10 @@ export class NDArray implements Disposable { * @param data The source data array. * @returns this */ - copyFrom(data: NDArray | Array | Float32Arra
(tvm) branch main updated: [CI] Update image tag to 20240917-153130-9f281758 (#17397)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 9e2a75d64e [CI] Update image tag to 20240917-153130-9f281758 (#17397) 9e2a75d64e is described below commit 9e2a75d64e937390eab2985743fef47cdeaf3c81 Author: Masahiro Hiramori AuthorDate: Mon Sep 23 21:22:04 2024 +0900 [CI] Update image tag to 20240917-153130-9f281758 (#17397) * update image tag to 20240917-153130-9f281758 * increase atol * define custom equal operator to avoid comparison error * try to remove android stuff * skip test_imagenet --- ci/jenkins/docker-images.ini | 20 ++-- tests/python/frontend/pytorch/test_fx_quant.py | 3 ++ tests/python/relax/test_frontend_onnx.py | 5 +-- .../tir-transform/test_tir_transform_simplify.py | 38 +- tests/scripts/task_build_hexagon_api.sh| 5 +-- 5 files changed, 47 insertions(+), 24 deletions(-) diff --git a/ci/jenkins/docker-images.ini b/ci/jenkins/docker-images.ini index 6e55160521..175917f887 100644 --- a/ci/jenkins/docker-images.ini +++ b/ci/jenkins/docker-images.ini @@ -17,13 +17,13 @@ # This data file is read during when Jenkins runs job to determine docker images. [jenkins] -ci_arm: tlcpack/ci-arm:20240428-060115-0b09ed018 -ci_cortexm: tlcpack/ci-cortexm:20240428-060115-0b09ed018 -ci_cpu: tlcpack/ci_cpu:20240428-060115-0b09ed018 -ci_gpu: tlcpack/ci-gpu:20240428-060115-0b09ed018 -ci_hexagon: tlcpack/ci-hexagon:20240428-060115-0b09ed018 -ci_i386: tlcpack/ci-i386:20240428-060115-0b09ed018 -ci_lint: tlcpack/ci-lint:20240428-060115-0b09ed018 -ci_minimal: tlcpack/ci-minimal:20240428-060115-0b09ed018 -ci_riscv: tlcpack/ci-riscv:20240428-060115-0b09ed018 -ci_wasm: tlcpack/ci-wasm:20240428-060115-0b09ed018 +ci_arm: tlcpack/ci-arm:20240917-153130-9f281758 +ci_cortexm: tlcpack/ci-cortexm:20240917-153130-9f281758 +ci_cpu: tlcpack/ci_cpu:20240917-153130-9f281758 +ci_gpu: tlcpack/ci-gpu:20240917-153130-9f281758 +ci_hexagon: tlcpack/ci-hexagon:20240917-153130-9f281758 +ci_i386: tlcpack/ci-i386:20240917-153130-9f281758 +ci_lint: tlcpack/ci-lint:20240917-153130-9f281758 +ci_minimal: tlcpack/ci-minimal:20240917-153130-9f281758 +ci_riscv: tlcpack/ci-riscv:20240917-153130-9f281758 +ci_wasm: tlcpack/ci-wasm:20240917-153130-9f281758 diff --git a/tests/python/frontend/pytorch/test_fx_quant.py b/tests/python/frontend/pytorch/test_fx_quant.py index 7f3083a7dc..8ed6e1a747 100644 --- a/tests/python/frontend/pytorch/test_fx_quant.py +++ b/tests/python/frontend/pytorch/test_fx_quant.py @@ -87,6 +87,9 @@ def test_deeplab_v3(): quantize_and_build(model, 300) +@pytest.mark.skip( +reason="Model binary isn't uploaded to S3. See https://github.com/apache/tvm/pull/17397"; +) def test_imagenet(): for model_func in [resnet50, efficientnet_b4]: quantize_and_build(model_func(pretrained=True).eval(), 224) diff --git a/tests/python/relax/test_frontend_onnx.py b/tests/python/relax/test_frontend_onnx.py index 8f4e9881f4..0e7cfbd7c0 100644 --- a/tests/python/relax/test_frontend_onnx.py +++ b/tests/python/relax/test_frontend_onnx.py @@ -76,6 +76,7 @@ def check_correctness( inputs: Optional[Dict[str, np.ndarray]] = None, ir_version: int = 8, opset: int = 14, +rtol: float = 1e-7, atol: float = 1e-5, ) -> None: """Run an onnx model in both onnxruntime and TVM through our importer @@ -154,7 +155,7 @@ def check_correctness( # TODO Allow configurable tolerance. # Sometimes None is used to indicate an unused output. if ort_out is not None: -tvm.testing.assert_allclose(tvm_out.numpy(), ort_out, atol=atol) +tvm.testing.assert_allclose(tvm_out.numpy(), ort_out, rtol=rtol, atol=atol) @pytest.mark.parametrize( @@ -1010,7 +1011,7 @@ def test_all_reduce_funcs(func, dynamic): inputs_dict = {"x": data} # Reduction ops accumulate arithmetic errors, so we use a higher tolerance. -check_correctness(model, inputs_dict, opset=11, atol=1e-4) +check_correctness(model, inputs_dict, opset=11, rtol=1e-4, atol=1e-4) for keepdims in [True, False]: verify_reduce_func( diff --git a/tests/python/tir-transform/test_tir_transform_simplify.py b/tests/python/tir-transform/test_tir_transform_simplify.py index f7887bc611..0b2d5f16d8 100644 --- a/tests/python/tir-transform/test_tir_transform_simplify.py +++ b/tests/python/tir-transform/test_tir_transform_simplify.py @@ -1021,18 +1021,40 @@ class TestMostRestrictiveConditional(BaseBeforeAfter): then `a >= b` cannot be proven, but can be reduced to `a == b`. """ +class TupleWrapper(tuple): +""" +A custom wrapper for `tuple` to hand
(tvm) branch main updated: [Fix][LLVM] Fix getHostCPUFeatures LLVM version cutoff (#17403)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 66b21d3c25 [Fix][LLVM] Fix getHostCPUFeatures LLVM version cutoff (#17403) 66b21d3c25 is described below commit 66b21d3c25d93631a91d5b6758eb379c2055c00c Author: Ruihang Lai AuthorDate: Mon Sep 23 08:21:20 2024 -0400 [Fix][LLVM] Fix getHostCPUFeatures LLVM version cutoff (#17403) This PR fixes the LLVM version cutoff for `llvm::sys::getHostCPUFeatures`. Previously the cutoff version is set to 20.0, assuming that the signature change happens since LLVM 20.0. While actually the signature change happens at 19.0. Reference: * LLVM 18.1.8 https://github.com/llvm/llvm-project/blob/llvmorg-18.1.8/llvm/include/llvm/TargetParser/Host.h#L56 * LLVM 19.1.0 https://github.com/llvm/llvm-project/blob/llvmorg-19.1.0-rc1/llvm/include/llvm/TargetParser/Host.h#L55 --- src/target/llvm/codegen_llvm.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/target/llvm/codegen_llvm.cc b/src/target/llvm/codegen_llvm.cc index 4c5bea8c9b..e21436e556 100644 --- a/src/target/llvm/codegen_llvm.cc +++ b/src/target/llvm/codegen_llvm.cc @@ -2315,7 +2315,7 @@ TVM_REGISTER_GLOBAL("tvm.codegen.llvm.GetHostCPUName").set_body_typed([]() -> st TVM_REGISTER_GLOBAL("tvm.codegen.llvm.GetHostCPUFeatures") .set_body_typed([]() -> Map { -#if TVM_LLVM_VERSION >= 200 +#if TVM_LLVM_VERSION >= 190 Map ret; auto features = llvm::sys::getHostCPUFeatures(); for (auto it = features.begin(); it != features.end(); ++it) {
(tvm) branch main updated: [KVCache] Attention func accepting over-padded qkv and output NDArray (#17401)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new ce461859c5 [KVCache] Attention func accepting over-padded qkv and output NDArray (#17401) ce461859c5 is described below commit ce461859c5a8dcb0a38b0af83ff206f2f2751e47 Author: Ruihang Lai AuthorDate: Sun Sep 22 11:02:58 2024 -0400 [KVCache] Attention func accepting over-padded qkv and output NDArray (#17401) This PR enhances the `AttentionWithFusedQKV` function of `PagedKVCache` so that it can now accept input `qkv_data` and `o_data` that have padding along the sequence dimension. We introduce this enhancement to allow more flexibility for the caller of PagedKVCache to decide whether to pad the input qkv/o NDArrays or not. --- src/runtime/relax_vm/paged_kv_cache.cc | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/runtime/relax_vm/paged_kv_cache.cc b/src/runtime/relax_vm/paged_kv_cache.cc index 78a7ed1dd1..b6636ae1a7 100644 --- a/src/runtime/relax_vm/paged_kv_cache.cc +++ b/src/runtime/relax_vm/paged_kv_cache.cc @@ -1755,7 +1755,7 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { for (int64_t seq_id = 0; seq_id < cur_batch_size_; ++seq_id) { total_seq_length += cur_append_lengths_[seq_id]; } -CHECK_EQ(total_seq_length, qkv_data->shape[0]); +CHECK_LE(total_seq_length, qkv_data->shape[0]); // Sync the copy stream and the compute stream. ComputeStreamWaitForCopyStream(); // The auxiliary data structure on device must have been synchronized. @@ -1767,12 +1767,21 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { qkv_data->dtype); NDArray v_data = temp_attn_v_device_.CreateView({total_seq_length, num_kv_heads_, head_dim_}, qkv_data->dtype); + +NDArray qkv_data_view = qkv_data; +NDArray o_data_view = o_data; +if (total_seq_length != qkv_data->shape[0]) { + qkv_data_view = qkv_data.CreateView( + {total_seq_length, qkv_data->shape[1], qkv_data->shape[2]}, qkv_data->dtype); + o_data_view = + o_data.CreateView({total_seq_length, num_qo_heads_, head_dim_}, qkv_data->dtype); +} // Part 2. Split fused qkv and apply rotary embedding to q/k data. if (!rope_ext_factors_.defined()) { - f_split_rotary_(qkv_data, q_rope_position_map_view_, q_data, k_data, v_data, + f_split_rotary_(qkv_data_view, q_rope_position_map_view_, q_data, k_data, v_data, static_cast(rope_mode_ == RoPEMode::kNormal)); } else { - f_split_rotary_(qkv_data, q_rope_position_map_view_, q_data, k_data, v_data, + f_split_rotary_(qkv_data_view, q_rope_position_map_view_, q_data, k_data, v_data, rope_ext_factors_.value()); } @@ -1781,7 +1790,7 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { f_transpose_append_(pages_[local_layer_id], k_data, v_data, append_position_map_view_); } // Part 4: perform attention -AttentionInternal(layer_id, q_data, k_data, v_data, o_data, attn_score_scaling_factor); +AttentionInternal(layer_id, q_data, k_data, v_data, o_data_view, attn_score_scaling_factor); // Part 5. Append k/v data to kv-cache if flag "append_before_attn" is not set. if (!append_before_attn_) { f_transpose_append_(pages_[local_layer_id], k_data, v_data, append_position_map_view_);
(tvm) branch main updated: [3rdparty] Bump FlashInfer for tmp workspace reduction (#17400)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 36ff1f146c [3rdparty] Bump FlashInfer for tmp workspace reduction (#17400) 36ff1f146c is described below commit 36ff1f146c6ad8debcc6675fb2dfc5537fc233dc Author: Ruihang Lai AuthorDate: Sun Sep 22 08:58:24 2024 -0400 [3rdparty] Bump FlashInfer for tmp workspace reduction (#17400) This PR bumps FlashInfer to reduce the size of required temporary workspace. --- 3rdparty/flashinfer| 2 +- src/runtime/relax_vm/paged_kv_cache.cc | 29 ++ ..._builtin_paged_attention_kv_cache_flashinfer.py | 2 +- ...runtime_builtin_paged_attention_kv_cache_tir.py | 2 +- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/3rdparty/flashinfer b/3rdparty/flashinfer index 0dd801d202..1e379898a5 16 --- a/3rdparty/flashinfer +++ b/3rdparty/flashinfer @@ -1 +1 @@ -Subproject commit 0dd801d2027af89f3603cbbf68a76e9503bb2f57 +Subproject commit 1e379898a589cdd4ff18a4621fcbe18d63501545 diff --git a/src/runtime/relax_vm/paged_kv_cache.cc b/src/runtime/relax_vm/paged_kv_cache.cc index 8809a1b072..78a7ed1dd1 100644 --- a/src/runtime/relax_vm/paged_kv_cache.cc +++ b/src/runtime/relax_vm/paged_kv_cache.cc @@ -57,8 +57,10 @@ namespace relax_vm { constexpr const int kPagedKVCacheMaxBlockDepth = 2; /*! \brief The maximum tree size of a single sequence in tree attention. */ constexpr const int kTreeAttnMaxTreeSize = 256; -/*! \brief The 8MB workspace size for attention auxiliary data. */ -constexpr const int kAttnWorkspaceByte = 128 * 1024 * 1024; +/*! \brief The 1MB workspace size for integer attention auxiliary data. */ +constexpr const int kIntAttnWorkspaceByte = 1 * 1024 * 1024; +/*! \brief The 128MB workspace size for floating-point attention auxiliary data. */ +constexpr const int kFloatAttnWorkspaceByte = 768 * 1024 * 1024; /*! \brief The id of the temporary logical page, which is useful for sliding window. */ constexpr const int kPagedKVCacheTempPageId = -1; @@ -915,7 +917,8 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { NDArray temp_attn_output_device_; NDArray temp_attn_scores_device_; NDArray merged_attn_scores_device_; - std::vector temp_attn_workspace_; + std::vector temp_int_attn_workspace_; + NDArray temp_float_attn_workspace_; //--- // Below are the auxiliary data structure on CPU. @@ -1089,8 +1092,8 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { for (int d = 0; d < kPagedKVCacheMaxBlockDepth; ++d) { if (NeedKernelBeginForward()) { -temp_attn_workspace_.push_back( -NDArray::Empty({kAttnWorkspaceByte / 4}, DataType::Float(32), device)); +temp_int_attn_workspace_.push_back( +NDArray::Empty({kIntAttnWorkspaceByte / 4}, DataType::Float(32), device)); } qo_indptr_on_depths_view_.push_back(NDArray()); page_indptr_on_depths_view_.push_back(NDArray()); @@ -1103,8 +1106,10 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { } // Additional workspace for the "prefill with ragged kv" kernel. if (NeedKernelBeginForward()) { - temp_attn_workspace_.push_back( - NDArray::Empty({kAttnWorkspaceByte / 4}, DataType::Float(32), device)); + temp_int_attn_workspace_.push_back( + NDArray::Empty({kIntAttnWorkspaceByte / 4}, DataType::Float(32), device)); + temp_float_attn_workspace_ = + NDArray::Empty({kFloatAttnWorkspaceByte / 4}, DataType::Float(32), device); } temp_attn_q_device_ = @@ -2324,7 +2329,8 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { if (!append_before_attn_) { if (is_chain_on_depths_[0]) { f_attention_prefill_ragged_begin_forward_.value()( -temp_attn_workspace_[0], cur_append_lengths_indptr_host_.as_ndarray(), +temp_float_attn_workspace_, temp_int_attn_workspace_[0], +cur_append_lengths_indptr_host_.as_ndarray(), cur_append_lengths_indptr_host_.as_ndarray(), cur_batch_size_, num_qo_heads_, num_kv_heads_, head_dim_, copy_stream_); } @@ -2336,14 +2342,15 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { CHECK(!support_sliding_window_) << "Kernel BeginForward doesn't support sliding window."; if (use_decode_kernel_[d]) { f_attention_decode_begin_forward_.value()( -d, temp_attn_workspace_[d + 1], page_indptr_on_depths_host_[d].as_ndarray(), +d, temp_float_attn_workspace_, temp_int_attn_workspace_[d + 1], +page_indptr_on_depths_host_[d].as_ndarray(), last_page_len_on_depths_host_[d].as_ndarray(), num_qo
(tvm) branch cmake-debug deleted (was 41a13e7f5f)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch cmake-debug in repository https://gitbox.apache.org/repos/asf/tvm.git was 41a13e7f5f Update The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(tvm) branch revert-17003-sme-conv2d-fp32 deleted (was b71a9a3827)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch revert-17003-sme-conv2d-fp32 in repository https://gitbox.apache.org/repos/asf/tvm.git was b71a9a3827 Revert "[SME][TOPI] Add conv2d NHWC SME fp32 schedule (#17003)" The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(tvm) branch main updated: [TVMScript][Relax] Allow return statement in DataflowBlock (#17131)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new a24204640e [TVMScript][Relax] Allow return statement in DataflowBlock (#17131) a24204640e is described below commit a24204640efe3dcf519ca3388633a8a62a7600eb Author: Eric Lunderberg AuthorDate: Wed Sep 18 13:01:43 2024 -0500 [TVMScript][Relax] Allow return statement in DataflowBlock (#17131) Prior to this commit, TVMScript required the return value of a Relax to be specified outside of any `with R.dataflow()` blocks. This resulted in a common pattern, where the return value of a function was first called with `R.output(ret_value)`, to mark `ret_value` as a `tvm::relax::Var` instead of a `tvm::relax::DataflowVar`, followed immediately by a `return ret_value` statement. This commit updates the TVMScript parser to allow a `return` statement inside a `with R.dataflow()` block. This is syntactic sugar that is equivalent to calling `R.output`, followed by a `return`. With this change, the following two TVMScript examples are now equivalent. (Prior to this change, the `return_inside_dataflow` example would raise an error during parsing.) ```python @R.function(private=True) def output_then_return(A: R.Tensor): with R.dataflow(): B = R.add(A, A) C = R.multiply(B, B) R.output(C) return C @R.function(private=True) def return_inside_dataflow(A: R.Tensor): with R.dataflow(): B = R.add(A, A) C = R.multiply(B, B) return C ``` --- src/script/ir_builder/relax/frame.cc| 69 - src/script/ir_builder/relax/ir.cc | 23 +++--- tests/python/relax/test_tvmscript_parser.py | 31 + 3 files changed, 75 insertions(+), 48 deletions(-) diff --git a/src/script/ir_builder/relax/frame.cc b/src/script/ir_builder/relax/frame.cc index 3153c0770e..faf6bd6466 100644 --- a/src/script/ir_builder/relax/frame.cc +++ b/src/script/ir_builder/relax/frame.cc @@ -118,36 +118,23 @@ void BlockFrameNode::EnterWithScope() { } } -class DataflowBlockRewriter : public tvm::relax::ExprMutator { +class VarReplacer : public tvm::relax::ExprMutator { public: - static tvm::relax::DataflowBlock Rewrite(const tvm::relax::DataflowBlock& block, - const Array& output_vars) { -DataflowBlockRewriter rewriter(output_vars); -return Downcast(rewriter.VisitBindingBlock(block)); + explicit VarReplacer( + std::unordered_map + var_remap) { +var_remap_ = std::move(var_remap); } - private: - explicit DataflowBlockRewriter(const Array& output_vars) { -for (const tvm::relax::Var& var : output_vars) { - output_var_set_.insert(var.get()); -} - } - - tvm::relax::Var VisitVarDef_(const tvm::relax::DataflowVarNode* op) final { -auto it = output_var_set_.find(op); -if (it != output_var_set_.end()) { - // Rewrite dataflow vars to global vars - auto n = make_object(*op); - tvm::relax::Var new_var(n); - this->var_remap_[op->vid] = new_var; - return new_var; + tvm::relax::Var VisitVarDef(const tvm::relax::Var& var) override { +// ExprMutator only applies var_remap_ at usage sites. This +// applies var_remap_ at each definition site as well. +if (auto it = var_remap_.find(var->vid); it != var_remap_.end()) { + return it->second; } else { - return GetRef(op); + return var; } } - - private: - std::unordered_set output_var_set_; }; void BlockFrameNode::ExitWithScope() { @@ -164,25 +151,27 @@ void BlockFrameNode::ExitWithScope() { // Step 3. Rewrite the dataflow block. if (is_dataflow) { -// Step 3.1. Rewrite block binding -block = DataflowBlockRewriter::Rewrite(Downcast(block), output_vars); - -// Step 3.2. Collect global vars' reference in bindings -Map new_global_vars; -for (const tvm::relax::Binding& binding : block->bindings) { - if (!binding->var->IsInstance()) { -new_global_vars.Set(binding->var->vid, binding->var); - } +// Step 3.0. Define a map to replace variables +Array new_output_vars; +std::unordered_map var_remap; +for (const auto& output_var : output_vars) { + tvm::relax::Var new_output_var(output_var->name_hint(), GetStructInfo(output_var)); + new_output_vars.push_back(new_output_var); + var_remap[output_var->vid] = new_output_var; } +VarReplacer mutator(std::move(var_remap)); + +// Step 3.1. Rewrite block binding +block = mutator.VisitBindingBlock(block); // Step 3.3. Rewrite output vars -Array new_o
(tvm) branch main updated: [TVMScript] Avoid segfault from invalid TVMScript (#17373)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new ff8e41644f [TVMScript] Avoid segfault from invalid TVMScript (#17373) ff8e41644f is described below commit ff8e41644fde86714d6dbf021d57baebe3a1ec1a Author: Eric Lunderberg AuthorDate: Tue Sep 17 09:07:41 2024 -0500 [TVMScript] Avoid segfault from invalid TVMScript (#17373) * [TVMScript] Avoid segfault from invalid TVMScript Prior to this commit, after the `DiagnosticContext` prints its error, it overwrites the `DiagnosticRenderer` with a NULL renderer. If a second call to `DiagnosticContext::Render` occurs, it will segfault. This appears to be intended to prevent double-printing of error messages, but double-printing error messages is much worse than a segfault. In addition, `DiagnosticContext::Render` should only be called once. There's a common pattern in the parser where it will wrap exceptions in `DiagnosticError`, but re-raise exceptions that are already a `DiagnosticError`. This requires every such location to include `except DiagnosticError: raise`, and can easily be missed. This PR makes two changes: First, the `DiagnosticRenderer` is updated to have a no-op callback rather than a NULL callback. Second, the re-raising of `DiagnosticError` is moved to `Parser.report_error`, so that it does not need to be handled separately at several independent locations in the TVMScript parser. --- python/tvm/script/parser/core/evaluator.py| 12 ++-- python/tvm/script/parser/core/parser.py | 19 ++- python/tvm/script/parser/relax/parser.py | 10 +- src/ir/diagnostic.cc | 3 ++- tests/python/relax/test_tvmscript_parser.py | 14 +++--- .../tvmscript/test_tvmscript_printer_highlight.py | 8 +--- 6 files changed, 39 insertions(+), 27 deletions(-) diff --git a/python/tvm/script/parser/core/evaluator.py b/python/tvm/script/parser/core/evaluator.py index 26e9d091bf..7a194c779d 100644 --- a/python/tvm/script/parser/core/evaluator.py +++ b/python/tvm/script/parser/core/evaluator.py @@ -267,8 +267,8 @@ class ExprEvaluator: value = self._eval_slice(fields) else: value = self._eval_expr(node.__class__(**fields)) -except Exception as e: # pylint: disable=broad-except,invalid-name -self.parser.report_error(node, e) +except Exception as err: # pylint: disable=broad-except +self.parser.report_error(node, err) return self._add_intermediate_result(value) def _eval_lambda(self, node: doc.Lambda) -> Any: @@ -286,8 +286,8 @@ class ExprEvaluator: """ try: value = self._eval_expr(node) -except Exception as e: # pylint: disable=broad-except,invalid-name -self.parser.report_error(node, str(e)) +except Exception as err: # pylint: disable=broad-except +self.parser.report_error(node, err) return self._add_intermediate_result(value) def _eval_bool_op(self, fields: Dict[str, Any]) -> Any: @@ -463,8 +463,8 @@ def eval_assign( """ try: return _eval_assign(target, source) -except Exception as e: # pylint: disable=broad-except,invalid-name -parser.report_error(target, f"Failed to evaluate assignment: {str(e)}") +except Exception as err: # pylint: disable=broad-except +parser.report_error(target, err) raise diff --git a/python/tvm/script/parser/core/parser.py b/python/tvm/script/parser/core/parser.py index 0ecf669566..372a3c54e4 100644 --- a/python/tvm/script/parser/core/parser.py +++ b/python/tvm/script/parser/core/parser.py @@ -307,10 +307,8 @@ def _dispatch_wrapper(func: dispatch.ParseMethod) -> dispatch.ParseMethod: def _wrapper(self: "Parser", node: doc.AST) -> None: try: return func(self, node) -except DiagnosticError: -raise -except Exception as e: # pylint: disable=broad-except,invalid-name -self.report_error(node, e) +except Exception as err: # pylint: disable=broad-except +self.report_error(node, err) raise return _wrapper @@ -547,6 +545,12 @@ class Parser(doc.NodeVisitor): err: Union[Exception, str] The error to report. """ + +# If the error is already being raised as a DiagnosticError, +# re-raise it without wrapping it in a DiagnosticContext. +if isinstance(err, DiagnosticError): +raise err + # Only take the last line of the error message if
(tvm) branch main updated: [CI] Upgrade PyTorch to 2.4.1 (#17338)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 9f281758e8 [CI] Upgrade PyTorch to 2.4.1 (#17338) 9f281758e8 is described below commit 9f281758e8a1a3c1c649b995367b0166da55f2c6 Author: Masahiro Hiramori AuthorDate: Tue Sep 17 23:07:22 2024 +0900 [CI] Upgrade PyTorch to 2.4.1 (#17338) upgrade pytorch to 2.4.1 --- docker/install/ubuntu_install_onnx.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/install/ubuntu_install_onnx.sh b/docker/install/ubuntu_install_onnx.sh index 2bb50c6198..6cea0075c1 100755 --- a/docker/install/ubuntu_install_onnx.sh +++ b/docker/install/ubuntu_install_onnx.sh @@ -36,6 +36,6 @@ pip3 install \ pip3 install future pip3 install \ -torch==2.0.0 \ -torchvision==0.15.1 \ +torch==2.4.1 \ +torchvision==0.19.1 \ --extra-index-url https://download.pytorch.org/whl/cpu
(tvm) branch main updated: [Doc] Relax Deep Dive (#17380)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 1435ddb118 [Doc] Relax Deep Dive (#17380) 1435ddb118 is described below commit 1435ddb118ce4fc6b87c07804e554c2e945053c9 Author: Siyuan Feng AuthorDate: Tue Sep 17 22:06:38 2024 +0800 [Doc] Relax Deep Dive (#17380) * [Doc] Relax Deep Dive Similar as TensorIR Deep Dive, we also have Relax Deep Dive. --- docs/conf.py | 7 +- docs/deep_dive/relax/abstraction.rst | 73 ++ docs/deep_dive/{tensor_ir => relax}/index.rst | 17 +- docs/deep_dive/relax/learning.rst | 272 docs/deep_dive/relax/tutorials/README.txt | 2 + docs/deep_dive/relax/tutorials/relax_creation.py | 281 + .../relax/tutorials/relax_transformation.py| 141 +++ docs/deep_dive/tensor_ir/abstraction.rst | 1 - docs/deep_dive/tensor_ir/index.rst | 6 +- .../tutorials/{creation.py => tir_creation.py} | 0 .../{transformation.py => tir_transformation.py} | 0 docs/index.rst | 1 + 12 files changed, 787 insertions(+), 14 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 12039ebb2c..acc03161e5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -424,6 +424,7 @@ examples_dirs = [ # New tutorial structure under docs folder tvm_path.joinpath("docs", "get_started", "tutorials"), tvm_path.joinpath("docs", "how_to", "tutorials"), +tvm_path.joinpath("docs", "deep_dive", "relax", "tutorials"), tvm_path.joinpath("docs", "deep_dive", "tensor_ir", "tutorials"), ] @@ -443,6 +444,7 @@ gallery_dirs = [ # New tutorial structure under docs folder "get_started/tutorials/", "how_to/tutorials/", +"deep_dive/relax/tutorials/", "deep_dive/tensor_ir/tutorials/", ] @@ -598,10 +600,10 @@ tvm_alias_check_map = { ## Setup header and other configs import tlcpack_sphinx_addon -footer_copyright = "© 2023 Apache Software Foundation | All rights reserved" +footer_copyright = "© 2024 Apache Software Foundation | All rights reserved" footer_note = " ".join( """ -Copyright © 2023 The Apache Software Foundation. Apache TVM, Apache, the Apache feather, +Copyright © 2024 The Apache Software Foundation. Apache TVM, Apache, the Apache feather, and the Apache TVM project logo are either trademarks or registered trademarks of the Apache Software Foundation.""".split( "\n" @@ -614,7 +616,6 @@ header_logo_link = "https://tvm.apache.org/"; header_links = [ ("Community", "https://tvm.apache.org/community";), ("Download", "https://tvm.apache.org/download";), -("VTA", "https://tvm.apache.org/vta";), ("Blog", "https://tvm.apache.org/blog";), ("Docs", "https://tvm.apache.org/docs";), ("Conference", "https://tvmconf.org";), diff --git a/docs/deep_dive/relax/abstraction.rst b/docs/deep_dive/relax/abstraction.rst new file mode 100644 index 00..2b9ee8b5d7 --- /dev/null +++ b/docs/deep_dive/relax/abstraction.rst @@ -0,0 +1,73 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +..http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. + +.. _relax-abstraction: + +Graph Abstraction for ML Models +--- +Graph abstraction is a key technique used in machine learning (ML) compilers +to represent and reason about the structure and data flow of ML models. By +abstracting the model into a graph representation, the compiler can perform +various optimizations to improve performance and efficiency. This tutorial will +cover the basics of graph abstraction, its key elements
(tvm) branch main updated: [CI] Disable NNPACK build and fix error on Android SDK installaion (#17337)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new d3900bed87 [CI] Disable NNPACK build and fix error on Android SDK installaion (#17337) d3900bed87 is described below commit d3900bed871b2fd54b55039fa4b41fe14b4c33e3 Author: Masahiro Hiramori AuthorDate: Tue Sep 17 10:09:20 2024 +0900 [CI] Disable NNPACK build and fix error on Android SDK installaion (#17337) * disable nnpack on ci * fix android sdk installation error * port from https://github.com/octoml/relax/pull/38 * remove androidsdk from ci image --- cmake/modules/contrib/TFLite.cmake | 4 docker/Dockerfile.ci_adreno | 5 - docker/Dockerfile.ci_cpu| 8 docker/Dockerfile.ci_gpu| 4 docker/Dockerfile.ci_hexagon| 6 -- docker/Dockerfile.demo_vitis_ai | 4 docker/install/ubuntu_install_androidsdk.sh | 14 +++--- docker/install/ubuntu_install_java.sh | 6 +++--- tests/scripts/task_config_build_cpu.sh | 2 -- tests/scripts/task_config_build_gpu.sh | 2 -- 10 files changed, 14 insertions(+), 41 deletions(-) diff --git a/cmake/modules/contrib/TFLite.cmake b/cmake/modules/contrib/TFLite.cmake index b8d6a0daff..255dc5fde7 100644 --- a/cmake/modules/contrib/TFLite.cmake +++ b/cmake/modules/contrib/TFLite.cmake @@ -39,6 +39,10 @@ if(NOT USE_TFLITE STREQUAL "OFF") endif() find_library(TFLITE_CONTRIB_LIB libtensorflow-lite.a ${USE_TFLITE}) file(GLOB_RECURSE TFLITE_DEPS "${USE_TFLITE}/*.a") + # the order of the next libs are important for correct build + list(REMOVE_ITEM TFLITE_DEPS "${USE_TFLITE}/_deps/clog-build/libclog.a" "${USE_TFLITE}/_deps/cpuinfo-build/libcpuinfo.a") + list(APPEND TFLITE_DEPS "${USE_TFLITE}/_deps/cpuinfo-build/libcpuinfo.a") + list(APPEND TFLITE_DEPS "${USE_TFLITE}/_deps/clog-build/libclog.a") list(APPEND TVM_RUNTIME_LINKER_LIBS ${TFLITE_CONTRIB_LIB}) list(APPEND TVM_RUNTIME_LINKER_LIBS ${TFLITE_DEPS}) diff --git a/docker/Dockerfile.ci_adreno b/docker/Dockerfile.ci_adreno index 961977c542..30e095b27a 100644 --- a/docker/Dockerfile.ci_adreno +++ b/docker/Dockerfile.ci_adreno @@ -20,11 +20,6 @@ FROM tlcpack/ci-gpu COPY utils/apt-install-and-clear.sh /usr/local/bin/apt-install-and-clear -# Android SDK -COPY install/ubuntu_install_androidsdk.sh /install/ubuntu_install_androidsdk.sh -RUN bash /install/ubuntu_install_androidsdk.sh 25.2.9519653 3.22.1 33.0.2 33 -ENV PATH /opt/android-sdk-linux/platform-tools:$PATH - # Clang tool for CLML source codegen RUN apt-get update && apt-install-and-clear -y clang-format-15 diff --git a/docker/Dockerfile.ci_cpu b/docker/Dockerfile.ci_cpu index ae088f5c9e..17344f7dac 100644 --- a/docker/Dockerfile.ci_cpu +++ b/docker/Dockerfile.ci_cpu @@ -77,10 +77,6 @@ COPY install/ubuntu_install_golang.sh /install/ubuntu_install_golang.sh RUN bash /install/ubuntu_install_golang.sh ENV PATH $PATH:/usr/lib/go-1.18/bin -# NNPACK deps -COPY install/ubuntu_install_nnpack.sh /install/ubuntu_install_nnpack.sh -RUN bash /install/ubuntu_install_nnpack.sh - # ANTLR deps COPY install/ubuntu_install_java.sh /install/ubuntu_install_java.sh RUN bash /install/ubuntu_install_java.sh @@ -129,10 +125,6 @@ RUN bash /install/ubuntu_install_ethosn_driver_stack.sh COPY install/ubuntu_install_vitis_ai_packages_ci.sh /install/ubuntu_install_vitis_ai_packages_ci.sh RUN bash /install/ubuntu_install_vitis_ai_packages_ci.sh -# Android SDK -COPY install/ubuntu_install_androidsdk.sh /install/ubuntu_install_androidsdk.sh -RUN bash /install/ubuntu_install_androidsdk.sh - # PaddlePaddle deps COPY install/ubuntu_install_paddle.sh /install/ubuntu_install_paddle.sh RUN bash /install/ubuntu_install_paddle.sh diff --git a/docker/Dockerfile.ci_gpu b/docker/Dockerfile.ci_gpu index acb0310a41..8d11882098 100644 --- a/docker/Dockerfile.ci_gpu +++ b/docker/Dockerfile.ci_gpu @@ -133,10 +133,6 @@ RUN bash /install/ubuntu_install_wasmtime.sh COPY install/ubuntu_install_redis.sh /install/ubuntu_install_redis.sh RUN bash /install/ubuntu_install_redis.sh -# NNPACK deps -COPY install/ubuntu_install_nnpack.sh /install/ubuntu_install_nnpack.sh -RUN bash /install/ubuntu_install_nnpack.sh - # BYODT deps COPY install/ubuntu_install_universal.sh /install/ubuntu_install_universal.sh RUN bash /install/ubuntu_install_universal.sh diff --git a/docker/Dockerfile.ci_hexagon b/docker/Dockerfile.ci_hexagon index 3b4c58ef43..1855e3a9c2 100644 --- a/docker/Dockerfile.ci_hexagon +++ b/docker/Dockerfile.ci_hexagon @@ -58,12 +58,6 @@ RUN bash /install/ubuntu_install_python_package.sh COPY install/ubuntu_install_java.sh /install/ubuntu_install_java.sh RUN
(tvm) branch main updated: [DOCS] Link to project-specific security page (#17378)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new a355a5247c [DOCS] Link to project-specific security page (#17378) a355a5247c is described below commit a355a5247c8c4b3b2cec65260cffb2668edc7741 Author: Arnout Engelen AuthorDate: Tue Sep 17 03:09:10 2024 +0200 [DOCS] Link to project-specific security page (#17378) Make the project-specific information more prominent. This project-specific page already links to the general ASF information at https://apache.org/security/ --- docs/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 8c71f5eb1d..12039ebb2c 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -627,7 +627,7 @@ header_dropdown = { ("Apache Homepage", "https://apache.org/";), ("License", "https://www.apache.org/licenses/";), ("Sponsorship", "https://www.apache.org/foundation/sponsorship.html";), -("Security", "https://www.apache.org/security/";), +("Security", "https://tvm.apache.org/docs/reference/security.html";), ("Thanks", "https://www.apache.org/foundation/thanks.html";), ("Events", "https://www.apache.org/events/current-event";), ],
(tvm) branch main updated: [Relax][PyTorch] Cleanup Tensor Manipulation and Creation op converters (#17376)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 4ab3f82669 [Relax][PyTorch] Cleanup Tensor Manipulation and Creation op converters (#17376) 4ab3f82669 is described below commit 4ab3f82669fb20d77cae47704c857ab39a577417 Author: Masahiro Hiramori AuthorDate: Mon Sep 16 23:13:41 2024 +0900 [Relax][PyTorch] Cleanup Tensor Manipulation and Creation op converters (#17376) * cleanup `_cat()` * cleanup `_cumsum()` * cleanup `_expand()` * cleanup `_flatten()` * cleanup `_permute()` * cleanup `_repeat()` * cleanup `_reshape()` * cleanup `_size()` * cleanup `_split()` * cleanup `_squeeze()` * cleanup `_tile()` * cleanup `_transpose()` * cleanup `chunk()` * cleanup `_arange()` * cleanup `_empty()` * cleanup `_inplace_fill()` * cleanup `_full()` * cleanup `_index_select()` * cleanup `_inplace_masked_fill()` * cleanup `_masked_fill()` * cleanup `_new_ones()` * cleanup `_ones()` * cleanup `_tensor()` * `_inplace_tril_triu()` is an unary op * `_batch_norm_2d()` is a nn ops * `_interpolate()` is a nn ops * `_cross_entropy()` is a nn ops * chore * fix tensor size --- python/tvm/relax/frontend/torch/fx_translator.py | 755 +++ 1 file changed, 358 insertions(+), 397 deletions(-) diff --git a/python/tvm/relax/frontend/torch/fx_translator.py b/python/tvm/relax/frontend/torch/fx_translator.py index 4dc49d20ff..983bce0255 100644 --- a/python/tvm/relax/frontend/torch/fx_translator.py +++ b/python/tvm/relax/frontend/torch/fx_translator.py @@ -212,6 +212,20 @@ class TorchFXImporter: assert dim is not None return self.block_builder.emit(relax.op.nn.softmax(x, dim)) +def _inplace_tril_triu(self, op: Callable) -> Callable: +from torch import fx + +def convert(node: fx.Node) -> relax.Var: +x = self.env[node.args[0]] +k = node.args[1] if len(node.args) > 1 else 0 +assert isinstance(k, int) + +mutated = self.block_builder.emit(op(x, k)) +self.env[node.args[0]] = mutated +return mutated + +return convert + def _tril_triu(self, op: Callable) -> Callable: from torch import fx @@ -356,6 +370,29 @@ class TorchFXImporter: res = bias if res is None else self.block_builder.emit(relax.op.add(res, bias)) return res +def _batch_norm_2d_module(self, node: fx.Node) -> relax.Var: +x = self.env[node.args[0]] +module = self.named_modules[node.target] +weight = self.params[module.weight] +bias = self.params[module.bias] +running_mean = self._convert_torch_tensor_to_relax(module.running_mean) +running_var = self._convert_torch_tensor_to_relax(module.running_var) +eps = module.eps + +res_tuple = self.block_builder.emit( +relax.op.nn.batch_norm( +x, +weight, +bias, +running_mean, +running_var, +axis=1, +epsilon=eps, +) +) + +return self.block_builder.emit(relax.TupleGetItem(res_tuple, 0)) + def _conv1d_transpose_impl( self, x: relax.Expr, @@ -683,6 +720,40 @@ class TorchFXImporter: groups=module.groups, ) +def _cross_entropy(self, node: fx.Node) -> relax.Expr: +preds = self.env[node.args[0]] +targets = self.env[node.args[1]] +weights = self.env.get(node.kwargs["weight"], None) +reduction = node.kwargs["reduction"] +ignore_index = node.kwargs["ignore_index"] + +return self.block_builder.emit( +relax.op.nn.nll_loss( +relax.op.nn.log_softmax(preds), targets, weights, reduction, ignore_index +) +) + +def _cross_entropy_module(self, node: fx.Node) -> relax.Expr: +preds = self.env[node.args[0]] +targets = self.env[node.args[1]] +module = self.named_modules[node.target] + +weights = module.weight +if weights is not None: +if weights in self.params: +weights = self.params[weights] +else: +weights = relax.const(weights.numpy(), preds.struct_info.dtype) + +reduction = module.reduction +ignore_index = module.ignore_index + +return self.block_builder.emit( +relax.op.nn.nll_loss( +relax.op.nn.log_softmax(preds), targets, weights, reduction, ignore_index +
(tvm) branch main updated: [Relax][PyTorch] Cleanup Statistical, Search and DataType op converters (#17372)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 48d661c0ee [Relax][PyTorch] Cleanup Statistical, Search and DataType op converters (#17372) 48d661c0ee is described below commit 48d661c0ee277a6594a845423a384b5e1a743350 Author: Masahiro Hiramori AuthorDate: Sun Sep 15 22:07:58 2024 +0900 [Relax][PyTorch] Cleanup Statistical, Search and DataType op converters (#17372) * cleanup `_mean()` * cleanup `_sum()` * cleanup `_argmax_argmin()` * cleanup datatype ops --- python/tvm/relax/frontend/torch/fx_translator.py | 123 ++- 1 file changed, 55 insertions(+), 68 deletions(-) diff --git a/python/tvm/relax/frontend/torch/fx_translator.py b/python/tvm/relax/frontend/torch/fx_translator.py index 1c4796a533..4dc49d20ff 100644 --- a/python/tvm/relax/frontend/torch/fx_translator.py +++ b/python/tvm/relax/frontend/torch/fx_translator.py @@ -884,6 +884,61 @@ class TorchFXImporter: ret.append(self.block_builder.emit(relax.op.squeeze(split[i], axis=dim))) return self.block_builder.emit(relax.Tuple(ret)) +## Statistical ## + +def _mean(self, node: fx.Node) -> relax.Var: +args = self.retrieve_args(node) +x = args[0] +dim = args[1] if len(node.args) > 1 else node.kwargs.get("dim", None) +keepdim = args[2] if len(node.args) > 2 else node.kwargs.get("keepdim", False) +return self.block_builder.emit(relax.op.mean(x, dim, keepdims=keepdim)) + +def _sum(self, node: fx.Node) -> relax.Var: +args = self.retrieve_args(node) +keepdim = node.kwargs["keepdim"] if "keepdim" in node.kwargs else False +if len(args) == 1: +return self.block_builder.emit(relax.op.sum(args[0], keepdims=keepdim)) +return self.block_builder.emit(relax.op.sum(args[0], args[1])) + +## Search ## + +def _argmax_argmin(self, op: Callable) -> Callable: +from torch import fx + +def convert(node: fx.Node): +x = self.env[node.args[0]] +dim = node.args[1] if len(node.args) > 1 else node.kwargs.get("dim", None) +keepdim = node.args[2] if len(node.args) > 2 else node.kwargs.get("keepdim", False) +return self.block_builder.emit(op(x, dim, keepdim)) + +return convert + +## DataType ## + +def _float(self, node: fx.Node) -> relax.Var: +return self.block_builder.emit(relax.op.astype(self.env[node.args[0]], "float32")) + +def _half(self, node: fx.Node) -> relax.Var: +return self.block_builder.emit(relax.op.astype(self.env[node.args[0]], "float16")) + +def _to(self, node: fx.Node) -> relax.Var: +import torch + +x = self.env[node.args[0]] +if len(node.args) == 2: +if isinstance(node.args[1], torch.dtype): +dtype = TorchFXImporter._convert_data_type(node.args[1], self.env) +return self.block_builder.emit(relax.op.astype(x, dtype)) +elif "dtype" in node.kwargs: +dtype = TorchFXImporter._convert_data_type(node.kwargs["dtype"], self.env) +return self.block_builder.emit(relax.op.astype(x, dtype)) +return x + +def _type(self, node: fx.Node) -> relax.Var: +x = self.env[node.args[0]] +dtype = TorchFXImporter._convert_data_type(node.args[1], self.env) +return self.block_builder.emit(relax.op.astype(x, dtype)) + ## Creation ## def _arange(self, node: fx.Node) -> relax.Var: @@ -1022,48 +1077,6 @@ class TorchFXImporter: ) ) -## Statistical ## - -def _sum(self, node: fx.Node) -> relax.Var: -args = self.retrieve_args(node) -keepdim = node.kwargs["keepdim"] if "keepdim" in node.kwargs else False -if len(args) == 1: -return self.block_builder.emit(relax.op.sum(args[0], keepdims=keepdim)) -return self.block_builder.emit(relax.op.sum(args[0], args[1])) - -def _mean(self, node: fx.Node) -> relax.Var: -args = self.retrieve_args(node) -keepdim = node.kwargs["keepdim"] if "keepdim" in node.kwargs else False -if len(args) == 1: -return self.block_builder.emit(relax.op.mean(args[0], keepdims=keepdim)) -return self.block_builder.emit(relax.op.mean(args[0], args[1], keepdims=keepdim)) - -## DataType ## - -def _float(self, node: fx.Node) -> relax.Var: -return self.block_builder.emit(relax.op.astype(self.env[node.args[0]], "float32"))
(tvm) branch main updated: [FIX] fix bug when normalize iter with different lower bounds (#17360)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new e0105e488d [FIX] fix bug when normalize iter with different lower bounds (#17360) e0105e488d is described below commit e0105e488dd99d5e153428bc1d8c3dec0c324086 Author: Jiaqiang Liu AuthorDate: Sat Sep 14 21:16:07 2024 +0800 [FIX] fix bug when normalize iter with different lower bounds (#17360) If an iter has been normalized with a lower bound, and then try to normalize with a new lower bound, the iter_min need to be updated only when the new lower bound is smaller than the original one. Co-authored-by: liujiaqiang --- src/arith/iter_affine_map.cc | 2 +- tests/python/arith/test_arith_iter_affine_map.py | 21 + 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/src/arith/iter_affine_map.cc b/src/arith/iter_affine_map.cc index 77b20fcdf2..d24c278f10 100644 --- a/src/arith/iter_affine_map.cc +++ b/src/arith/iter_affine_map.cc @@ -696,7 +696,7 @@ class IterMapRewriter : public ExprMutator { // the delta of iter_min when it is updated when the lower bound predicate is present PrimExpr iter_min_delta = make_const(iter_min.dtype(), 0); if (predicate_induced_min.defined()) { -iter_min_delta = predicate_induced_min.value() - iter_min; +iter_min_delta = max(predicate_induced_min.value(), iter_min) - iter_min; iter_min = max(predicate_induced_min.value(), iter_min); } if (predicate_induced_max.defined()) { diff --git a/tests/python/arith/test_arith_iter_affine_map.py b/tests/python/arith/test_arith_iter_affine_map.py index f0e6f05adf..f34dce5c86 100644 --- a/tests/python/arith/test_arith_iter_affine_map.py +++ b/tests/python/arith/test_arith_iter_affine_map.py @@ -346,6 +346,27 @@ def test_predicate(): predicate=tvm.tir.all(2 <= j * 2 + k, 0 <= i * 4 + j), ) +# constraint with differnent lower bound +assert_iter_sum_pattern( +{ +(i * 16 + j) // 23 * 8 ++ (i * 16 + j) % 23 +- 15: ( +64, +0, +1, +(i * 16 + j) // 23 * 8 + ((i * 16 + j) % 23 + tvm.tir.IntImm("int32", -15)), +) +}, +var_dom([(i, 12), (j, 16)]), +predicate=tvm.tir.And( +tvm.tir.And( +i * 16 + j < 184, tvm.tir.LE(tvm.tir.IntImm("int32", 8), (i * 16 + j) % 23) +), +tvm.tir.LE(tvm.tir.IntImm("int32", 15), (i * 16 + j) % 23), +), +) + # constraint on many disjoint fused iters, case 1 # i4 * 6 + i5 in [3, 9), extent=6 (= scale of i2) # i2 * 30 + i3 * 15 in [30, 90), extent=60 (= scale of i1)
(tvm) branch main updated: [WEBGPU] Update runtime to remove deprecated API (#17371)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new cea4c85022 [WEBGPU] Update runtime to remove deprecated API (#17371) cea4c85022 is described below commit cea4c850221cbbb757f753408274bdcfbd9bc648 Author: Tianqi Chen AuthorDate: Sat Sep 14 07:03:28 2024 -0400 [WEBGPU] Update runtime to remove deprecated API (#17371) This PR updates webgpu runtime code to remove deprecated API. unblocks the CI. --- web/src/webgpu.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/webgpu.ts b/web/src/webgpu.ts index 284d6d3887..d3d431cf1f 100644 --- a/web/src/webgpu.ts +++ b/web/src/webgpu.ts @@ -116,7 +116,7 @@ export async function detectGPUDevice(): Promise
(tvm) branch main updated: [Bugfix][Relax] Preserve existing DataflowBlock in ConvertToDataflow (#17148)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new eb011c7564 [Bugfix][Relax] Preserve existing DataflowBlock in ConvertToDataflow (#17148) eb011c7564 is described below commit eb011c75642c90c30c8ca139922fdde82034ee88 Author: Eric Lunderberg AuthorDate: Fri Sep 13 08:17:28 2024 -0500 [Bugfix][Relax] Preserve existing DataflowBlock in ConvertToDataflow (#17148) The `relax.transform.ConvertToDataflow` identifies portions of a Relax function that satisfy the requirements of a `relax::DataflowBlock`, and converts those portions to a new `DataflowBlock`, provided they are at least some minimum number of operations. Prior to this commit, if a function contained a region that would be converted to a `DataflowBlock`, but also contains existing `DataflowBlock`s that were smaller than the size required for creating a `DataflowBlock`, those existing blocks would be erroneously converted to non-dataflow. This commit updates the `ConvertToDataflow` pass to preserve all existing `DataflowBlock` present in the input. --- src/relax/transform/convert_dataflow.cc| 117 - .../relax/test_transform_convert_dataflow.py | 106 +++ 2 files changed, 173 insertions(+), 50 deletions(-) diff --git a/src/relax/transform/convert_dataflow.cc b/src/relax/transform/convert_dataflow.cc index b927307c2e..528a466a9b 100644 --- a/src/relax/transform/convert_dataflow.cc +++ b/src/relax/transform/convert_dataflow.cc @@ -28,6 +28,8 @@ #include #include +#include + namespace tvm { namespace relax { @@ -39,10 +41,59 @@ class DataflowBlockExtractor : public ExprMutator { Array new_blocks; Expr new_body = VisitExpr(seq->body); bool changed = !new_body.same_as(seq->body); -bool dataflow_streak = false; -Array dataflow_bindings; + +// Accumulated bindings that are not going to be added to a +// DataflowBlock, either because they would be illegal within a +// DataflowBlock, or because there were insufficient bindings to +// make a dataflowblock. Because these bindings occur prior to +// `dataflow_bindings`, this array may only be accumulated into +// when `dataflow_bindings` is empty. Array non_dataflow_bindings; +// Current bindings that may legally be added to a DataflowBlock. +Array dataflow_bindings; + +// If present, a DataflowBlock whose bindings are currently in +// `dataflow_bindings`. Used to propagate DataflowBlock to the +// output, even if it doesn't meet the minimum size. +Optional input_dataflow_block; + +// Handle any bindings currently in `dataflow_bindings`. These +// are either pushed to their own block, or to the end of +// `non_dataflow_bindings`, depending on whether the bindings meet +// the minimum size requirement. +auto push_dataflow_bindings = [&]() { + if (dataflow_bindings.empty()) { +// No Dataflow bindings, so no action required. +return; + } + if (dataflow_bindings.size() < min_size_ && !input_dataflow_block) { +// The df block is below the minimum length, and no input +// DataflowBlock needs to be preserved. Combine the blocks +// and reset the dataflow collection. + +non_dataflow_bindings.insert(non_dataflow_bindings.end(), dataflow_bindings.begin(), + dataflow_bindings.end()); + + } else { +// A new DataflowBlock can be generated, with bindings that +// occur after the non-dataflow bindings. +new_blocks.push_back(BindingBlock(non_dataflow_bindings)); +new_blocks.push_back(DataflowBlock(dataflow_bindings)); +non_dataflow_bindings = {}; + +// Making a dataflow block doesn't imply that the function was +// changed. A change requires that this either be a new +// dataflow block, or have additional dataflow bindings in the +// current block. +changed = changed || !input_dataflow_block.defined() || + input_dataflow_block.value()->bindings.size() != dataflow_bindings.size(); + } + + dataflow_bindings = {}; + input_dataflow_block = NullOpt; +}; + for (auto block : seq->blocks) { BindingBlock new_block = this->VisitBindingBlock(block); changed = changed || !new_block.same_as(block); @@ -50,74 +101,40 @@ class DataflowBlockExtractor : public ExprMutator { // For an existing dataflow block, we add to the current streak // or start a new streak in case there will be more dataflow operations // coming up - if (new_block.as()) { -if (!dataflow_streak) { - dataflow_streak = true; -} +
(tvm) branch main updated (751467e98d -> 37555713a0)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git from 751467e98d [Relax] Fix BYOC removing existing ext mods (#17353) add 37555713a0 [Relax][PyTorch] Cleanup Neural Network op converters (#17369) No new revisions were added by this update. Summary of changes: python/tvm/relax/frontend/torch/fx_translator.py | 1526 +++--- 1 file changed, 755 insertions(+), 771 deletions(-)
(tvm) branch main updated: [Relax] Fix BYOC removing existing ext mods (#17353)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 751467e98d [Relax] Fix BYOC removing existing ext mods (#17353) 751467e98d is described below commit 751467e98d0f3acd16d2031e5febef91717b9e98 Author: Wuwei Lin AuthorDate: Thu Sep 12 15:32:31 2024 -0700 [Relax] Fix BYOC removing existing ext mods (#17353) --- src/relax/transform/run_codegen.cc | 4 1 file changed, 4 insertions(+) diff --git a/src/relax/transform/run_codegen.cc b/src/relax/transform/run_codegen.cc index fe0e73d99e..af9ed2fffc 100644 --- a/src/relax/transform/run_codegen.cc +++ b/src/relax/transform/run_codegen.cc @@ -79,6 +79,10 @@ class CodeGenRunner : ExprMutator { auto out_mod = builder_->GetContextIRModule(); if (ext_mods.size()) { + if (auto opt_old_ext_mods = mod->GetAttr>(tvm::attr::kExternalMods)) { +auto old_ext_mods = opt_old_ext_mods.value(); +ext_mods.insert(ext_mods.begin(), old_ext_mods.begin(), old_ext_mods.end()); + } out_mod = WithAttr(out_mod, tvm::attr::kExternalMods, std::move(ext_mods)); }
(tvm) branch main updated (31da947173 -> 090430a284)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git from 31da947173 [Relax][PyTorch] Cleanup binary op converters (#17366) add 090430a284 [DLight] Fix Matmul rule for Conv3D (#17363) No new revisions were added by this update. Summary of changes: python/tvm/dlight/gpu/matmul.py | 100 +++-- tests/python/dlight/test_gpu_conv.py | 118 +++ 2 files changed, 170 insertions(+), 48 deletions(-) create mode 100644 tests/python/dlight/test_gpu_conv.py
(tvm) branch main updated: [Relax][PyTorch] Cleanup binary op converters (#17366)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 31da947173 [Relax][PyTorch] Cleanup binary op converters (#17366) 31da947173 is described below commit 31da94717377df367803c7c0ce8b3451b927a702 Author: Masahiro Hiramori AuthorDate: Thu Sep 12 21:18:13 2024 +0900 [Relax][PyTorch] Cleanup binary op converters (#17366) * introduce `_binary_op()` * cleanup --- python/tvm/relax/frontend/torch/fx_translator.py | 146 --- 1 file changed, 49 insertions(+), 97 deletions(-) diff --git a/python/tvm/relax/frontend/torch/fx_translator.py b/python/tvm/relax/frontend/torch/fx_translator.py index 8d66343254..7efc2412ea 100644 --- a/python/tvm/relax/frontend/torch/fx_translator.py +++ b/python/tvm/relax/frontend/torch/fx_translator.py @@ -19,7 +19,7 @@ # pylint: disable=import-outside-toplevel """PyTorch FX frontend of Relax.""" from typing import Callable, Dict, List, Optional, Tuple, Union -from functools import reduce +from functools import partial, reduce import tvm from tvm import relax @@ -119,23 +119,6 @@ class TorchFXImporter: else: return node -@staticmethod -def _promote_binary_op_args(lhs, rhs): -if isinstance(lhs, relax.Expr) and isinstance(rhs, relax.Expr): -return lhs, rhs -elif isinstance(lhs, relax.Expr): -assert isinstance(lhs.struct_info, relax.TensorStructInfo) -return lhs, relax.const(rhs, lhs.struct_info.dtype) -elif isinstance(rhs, relax.Expr): -assert isinstance(rhs.struct_info, relax.TensorStructInfo) -return relax.const(lhs, rhs.struct_info.dtype), rhs -else: -assert False - -def _call_binary_op(self, op, lhs, rhs): -lhs, rhs = TorchFXImporter._promote_binary_op_args(lhs, rhs) -return self.block_builder.emit(op(lhs, rhs)) - ## Unary Ops ## def _unary_op(self, op: Callable) -> Callable: @@ -240,66 +223,38 @@ class TorchFXImporter: return convert -## Arithmetic ## +## Binary Ops ## -def _add(self, node: fx.Node) -> relax.Expr: -lhs, rhs = self.retrieve_args(node) -if isinstance(lhs, relax.Var) or isinstance(rhs, relax.Var): -return self._call_binary_op(relax.op.add, lhs, rhs) -elif isinstance(lhs, relax.expr.Constant): -return self._call_binary_op( -relax.op.add, lhs, relax.const(rhs, dtype=lhs.struct_info.dtype) -) -elif isinstance(rhs, relax.expr.Constant): -return self._call_binary_op( -relax.op.add, relax.const(lhs, dtype=rhs.struct_info.dtype), rhs -) -return lhs + rhs - -def _max(self, node: fx.Node) -> relax.Expr: -lhs, rhs = self.retrieve_args(node) -if isinstance(lhs, relax.Var) or isinstance(rhs, relax.Var): -return self._call_binary_op(relax.op.maximum, lhs, rhs) - -def _floordiv(self, node: fx.Node) -> relax.Expr: -lhs, rhs = self.retrieve_args(node) -if isinstance(lhs, relax.Var) or isinstance(rhs, relax.Var): -return self._call_binary_op(relax.op.floor_divide, lhs, rhs) -return lhs // rhs - -def _mul(self, node: fx.Node) -> relax.Expr: -lhs, rhs = self.retrieve_args(node) -if isinstance(lhs, relax.Var) or isinstance(rhs, relax.Var): -return self._call_binary_op(relax.op.multiply, lhs, rhs) -return lhs * rhs - -def _pow(self, node: fx.Node) -> relax.Expr: -lhs, rhs = self.retrieve_args(node) -if isinstance(lhs, relax.Var) or isinstance(rhs, relax.Var): -return self._call_binary_op(relax.op.power, lhs, rhs) -return lhs**rhs - -def _sub(self, node: fx.Node) -> relax.Expr: -lhs, rhs = self.retrieve_args(node) -if isinstance(lhs, relax.Var) or isinstance(rhs, relax.Var): -return self._call_binary_op(relax.op.subtract, lhs, rhs) -return lhs - rhs - -def _truediv(self, node: fx.Node) -> relax.Expr: -lhs, rhs = self.retrieve_args(node) -if isinstance(lhs, relax.Var) or isinstance(rhs, relax.Var): -return self._call_binary_op(relax.op.divide, lhs, rhs) -return lhs / rhs - -## Compare ## - -def _lt(self, node: fx.Node) -> relax.Expr: -lhs, rhs = self.retrieve_args(node) -return self._call_binary_op(relax.op.less, lhs, rhs) - -def _eq(self, node: fx.Node) -> relax.Expr: -lhs, rhs = self.retrieve_args(node) -return self._call_binary_op(relax.op.equal, lhs, rhs) +def _binary_op(self, relax_op: Callable, intrinsic_op: Callable) ->
(tvm) branch main updated (38e726aab1 -> 5265d215fe)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git from 38e726aab1 [Relax][PyTorch] Cleanup unary op converters (#17356) add 5265d215fe [Relax] Add new NN allgather operator (#17359) No new revisions were added by this update. Summary of changes: python/tvm/relax/frontend/nn/op.py | 22 ++ 1 file changed, 22 insertions(+)
(tvm) branch main updated: [Relax][PyTorch] Cleanup unary op converters (#17356)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 38e726aab1 [Relax][PyTorch] Cleanup unary op converters (#17356) 38e726aab1 is described below commit 38e726aab191d5c16a7d98b2191a5f97f7fef410 Author: Masahiro Hiramori AuthorDate: Thu Sep 12 04:18:07 2024 +0900 [Relax][PyTorch] Cleanup unary op converters (#17356) * classify into 9 types of ops * introduce `_unary_op()` * cleanup `_clamp()` * cleanup `_gelu()` * cleanup `_hardsigmoid()` and `_hardswish()` * cleanup `_leakyrelu()` * cleanup `_log_softmax()` * cleanup `_round()` * cleanup `_softmax()` * cleanup `_tril_triu()` * replace `fx.node.Node` with `fx.Node` --- python/tvm/relax/frontend/torch/fx_translator.py | 566 --- 1 file changed, 288 insertions(+), 278 deletions(-) diff --git a/python/tvm/relax/frontend/torch/fx_translator.py b/python/tvm/relax/frontend/torch/fx_translator.py index aed38d7c49..8d66343254 100644 --- a/python/tvm/relax/frontend/torch/fx_translator.py +++ b/python/tvm/relax/frontend/torch/fx_translator.py @@ -35,7 +35,7 @@ class TorchFXImporter: import torch # type: ignore from torch import fx -self.env: Dict[fx.node.Node, relax.Expr] = {} +self.env: Dict[fx.Node, relax.Expr] = {} self.params: Dict[torch.Tensor, relax.Expr] = {} self.named_modules: Dict[str, torch.Module] = None self.block_builder: relax.BlockBuilder = None @@ -108,7 +108,7 @@ class TorchFXImporter: def _retrieve_args(self, node): from torch import fx -if isinstance(node, fx.node.Node): +if isinstance(node, fx.Node): return self.env[node] elif isinstance(node, tuple): return tuple(self._retrieve_args(x) for x in node) @@ -136,33 +136,113 @@ class TorchFXImporter: lhs, rhs = TorchFXImporter._promote_binary_op_args(lhs, rhs) return self.block_builder.emit(op(lhs, rhs)) -## Arithmetic ## +## Unary Ops ## -def _exp(self, node: fx.node.Node) -> relax.Var: -return self.block_builder.emit(relax.op.exp(self.env[node.args[0]])) +def _unary_op(self, op: Callable) -> Callable: +from torch import fx -def _sigmoid(self, node: fx.node.Node) -> relax.Var: -return self.block_builder.emit(relax.op.sigmoid(self.env[node.args[0]])) +def convert(node: fx.Node) -> relax.Var: +return self.block_builder.emit(op(self.env[node.args[0]])) -def _sqrt(self, node: fx.node.Node) -> relax.Expr: -arg = self.env[node.args[0]] -if isinstance(arg, (int, float)): -arg = relax.const(arg, "float32") -return self.block_builder.emit(relax.op.sqrt(arg)) +return convert -def _rsqrt(self, node: fx.node.Node) -> relax.Expr: -arg = self.env[node.args[0]] -if isinstance(arg, (int, float)): -arg = relax.const(arg, "float32") -return self.block_builder.emit(relax.op.rsqrt(arg)) +def _clamp(self, node: fx.Node) -> relax.Expr: +args = self.retrieve_args(node) +a_min = args[1] if len(args) > 1 else node.kwargs["min"] +a_max = args[2] if len(args) > 2 else node.kwargs["max"] +if not isinstance(a_min, (int, float)): +raise ValueError( +f"TVM only supports constant min value for torch.clamp/clip, " +f"but got {a_min} with type {type(a_min)}" +) +if not isinstance(a_max, (int, float)): +raise ValueError( +f"TVM only supports constant max value for torch.clamp/clip, " +f"but got {a_max} with type {type(a_max)}" +) +return self.block_builder.emit(relax.op.clip(args[0], a_min, a_max)) + +def _gelu(self, node: fx.Node) -> relax.Expr: +approximate = node.kwargs.get("approximate", "none") +if approximate == "none": +return self.block_builder.emit(relax.op.nn.gelu(self.env[node.args[0]])) +elif approximate == "tanh": +return self.block_builder.emit(relax.op.nn.gelu_tanh(self.env[node.args[0]])) +else: +raise KeyError("Unregonized approximate algorithm for gelu: {}.".format(approximate)) + +def _hardsigmoid(self, node: fx.Node) -> relax.Var: +args = self.retrieve_args(node) +x = args[0] +dtype = x.struct_info.dtype +x0 = relax.op.add(x, relax.const(3, dtype)) +x1 = relax.op.clip(x0, 0, 6) +return self.block_builder.emit
(tvm) branch main updated: [CI] Upgrade github upload-artifact action (#17355)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new f02d295e0b [CI] Upgrade github upload-artifact action (#17355) f02d295e0b is described below commit f02d295e0b38f48efebedcdb62bd82ffa17ef15e Author: Wuwei Lin AuthorDate: Mon Sep 9 17:55:50 2024 -0700 [CI] Upgrade github upload-artifact action (#17355) --- .github/workflows/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 759acd1fa5..db2d870da9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -175,7 +175,7 @@ jobs: export PATH="${ANDROID_NDK_LATEST_HOME}:$PATH" gradle clean build - name: Upload android_rpc APK -uses: actions/upload-artifact@v2 +uses: actions/upload-artifact@v4 with: name: android_rpc-debug.apk path: ./apps/android_rpc/app/build/outputs/apk/debug/app-debug.apk @@ -186,7 +186,7 @@ jobs: export PATH="${ANDROID_NDK_LATEST_HOME}:$PATH" gradle clean build - name: Upload android_deploy APK -uses: actions/upload-artifact@v2 +uses: actions/upload-artifact@v4 with: name: android_deploy-debug.apk path: ./apps/android_deploy/app/build/outputs/apk/debug/app-debug.apk
(tvm-site) branch main updated: Remove Legacy Components (#45)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm-site.git The following commit(s) were added to refs/heads/main by this push: new 17999e10e9 Remove Legacy Components (#45) 17999e10e9 is described below commit 17999e10e969e2e7fb0dfd732ee1ae7cb0eda852 Author: Siyuan Feng AuthorDate: Sat Sep 7 19:35:38 2024 +0800 Remove Legacy Components (#45) Remove: - VTA page - TVM Conference card in community page - Roadmap card in community page Update copyright year from 2023 to 2024 --- _data/community.yml | 8 _data/menus.yml | 2 -- _layouts/default.html | 4 ++-- vta.md| 36 4 files changed, 2 insertions(+), 48 deletions(-) diff --git a/_data/community.yml b/_data/community.yml index 762f59fe6f..831d042e5b 100644 --- a/_data/community.yml +++ b/_data/community.yml @@ -7,18 +7,10 @@ des: We use discuss forum for general discussions and usage trouble shooting. We welcome all topics related to the TVM stack. buttonname: Join The Discussion link: https://discuss.tvm.apache.org/ -- cardname: TVM Conference - des: We hold a yearly conference on the state of the art for TVM and would love for you to join us. You can also view videos from our past conferences here. - buttonname: Learn About the Conference - link: https://tvmcon.org/ - cardname: Github Issues des: We use our Github issue tracker for developer RFCs and roadmap discussion. buttonname: Github Issue Tracker link: https://github.com/apache/tvm/issues/ -- cardname: Roadmap - des: We are releasing our public roadmaps on github. Please reach out are interested working in aspects that are not on the roadmap. - buttonname: See The Public Roadmap - link: https://github.com/apache/tvm/projects/1 - cardname: Contributing des: As a community project, we welcome contributions! The package is developed and used by the community. buttonname: Contribute diff --git a/_data/menus.yml b/_data/menus.yml index df0be29cdb..2f70d9517d 100644 --- a/_data/menus.yml +++ b/_data/menus.yml @@ -2,8 +2,6 @@ link: /community - name: Download link: /download -- name: VTA - link: /vta - name: Blog link: /blog - name: Docs diff --git a/_layouts/default.html b/_layouts/default.html index c804cc9cd6..4b82709ddb 100644 --- a/_layouts/default.html +++ b/_layouts/default.html @@ -38,7 +38,7 @@ -© 2023 Apache Software Foundation | All right reserved +© 2024 Apache Software Foundation | All right reserved @@ -46,7 +46,7 @@ - Copyright © 2023 The Apache Software Foundation. Apache TVM, Apache, the Apache feather, and the Apache TVM project logo are either trademarks or registered trademarks of the Apache Software Foundation. + Copyright © 2024 The Apache Software Foundation. Apache TVM, Apache, the Apache feather, and the Apache TVM project logo are either trademarks or registered trademarks of the Apache Software Foundation. diff --git a/vta.md b/vta.md deleted file mode 100644 index 37aa9e5035..00 --- a/vta.md +++ /dev/null @@ -1,36 +0,0 @@ -layout: page -title: "VTA" -order : 13 -group : navigation -description: "VTA" -{% include JB/setup %} - -# About VTA - -The Versatile Tensor Accelerator (VTA) is an extension of the Apache(incubating) TVM framework designed to advance deep learning and hardware innovation. -VTA is a programmable accelerator that exposes a RISC-like programming abstraction to describe compute and memory operations at the tensor level. We designed VTA to expose the most salient and common characteristics of mainstream deep learning accelerators, such as tensor operations, DMA load/stores, and explicit compute/memory arbitration. - -VTA is more than a standalone accelerator design: it’s an end-to-end solution that includes drivers, a JIT runtime, and an optimizing compiler stack based on TVM. -The current release includes a behavioral hardware simulator, as well as the infrastructure to deploy VTA on low-cost FPGA hardware for fast prototyping. -By extending the TVM stack with a customizable, and open source deep learning hardware accelerator design, we are exposing a transparent end-to-end deep learning stack from the high-level deep learning framework, down to the actual hardware design and implementation. -This forms a truly end-to-end, from software-to-hardware open source stack for deep learning systems. - -{:center: style="text-align: center"} -![image](https://raw.githubusercontent.com/uwsampl/web-data/main/vta/blogpost/vta_stack.png){: width="50%"} -{:center} - -The VTA and TVM stack together constitute a blueprint for end-to-end, accelerator-centric deep learning system that can: - -- Provide an open deep learning system sta
(tvm) branch main updated: [Relax][Transform] Handle tuple return in RemoveUnusedOutputs (#17253)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new ff884b609a [Relax][Transform] Handle tuple return in RemoveUnusedOutputs (#17253) ff884b609a is described below commit ff884b609a2eb94fef1f061bff0ec867b79d4ba0 Author: Eric Lunderberg AuthorDate: Fri Sep 6 11:28:28 2024 -0500 [Relax][Transform] Handle tuple return in RemoveUnusedOutputs (#17253) * [Relax][Transform] Handle tuple return in RemoveUnusedOutputs Prior to this commit, the `relax.transform.RemoveUnusedOutputs` pass only marked a tuple element as used if it occurred in a `TupleGetItem` node. This ignored use cases where a tuple is used as an aggregate object, such as returning a tuple from a function. This would collect incorrect results for a Relax function that calls a subroutine, receives a tuple as the return value of the subroutine, then returns that tuple. This commit updates `RemoveUnusedOutputs` to look for usage of a tuple object, not just for usage in `TupleGetItem`. Closes https://github.com/apache/tvm/issues/17247 --- src/relax/transform/remove_unused_outputs.cc | 59 ++ .../relax/test_transform_remove_unused_outputs.py | 20 2 files changed, 59 insertions(+), 20 deletions(-) diff --git a/src/relax/transform/remove_unused_outputs.cc b/src/relax/transform/remove_unused_outputs.cc index e3bf12382c..9a5c31e79b 100644 --- a/src/relax/transform/remove_unused_outputs.cc +++ b/src/relax/transform/remove_unused_outputs.cc @@ -92,29 +92,48 @@ class PartialTupleUsageCollector : ExprVisitor { } void VisitExpr_(const TupleGetItemNode* op) override { -Expr tuple = UnwrapBindings(op->tuple); - -if (auto call = tuple.as()) { - if (auto opt_callee = call->op.as()) { -auto callee = opt_callee.value(); -if (auto it = output_usage_mask_.find(callee); it != output_usage_mask_.end()) { - auto& used_indices = it->second; - - CHECK_GE(op->index, 0) << "IndexError: " - << "Indices for TupleGetItem must be non-negative, " - << "but expression " << GetRef(op) - << " uses a tuple index of " << op->index; - size_t index = op->index; - - CHECK_LT(index, used_indices.size()) - << "IndexError: " - << "Indices for TupleGetItem must be less than the size of the tuple, " - << "but expression " << GetRef(op) << " uses a tuple index of " << op->index - << " for a tuple of size " << used_indices.size(); - used_indices[index] = true; +if (auto* usage_mask_ptr = GetCalleeUsageMask(op->tuple)) { + auto& used_indices = *usage_mask_ptr; + + CHECK_GE(op->index, 0) << "IndexError: " + << "Indices for TupleGetItem must be non-negative, " + << "but expression " << GetRef(op) << " uses a tuple index of " + << op->index; + size_t index = op->index; + + CHECK_LT(index, used_indices.size()) + << "IndexError: " + << "Indices for TupleGetItem must be less than the size of the tuple, " + << "but expression " << GetRef(op) << " uses a tuple index of " << op->index + << " for a tuple of size " << used_indices.size(); + used_indices[index] = true; +} + } + + void VisitExpr_(const VarNode* op) override { +if (auto* usage_mask_ptr = GetCalleeUsageMask(GetRef(op))) { + auto& usage_mask = *usage_mask_ptr; + for (size_t i = 0; i < usage_mask.size(); i++) { +usage_mask[i] = true; + } +} + } + + std::vector* GetCalleeUsageMask(Expr expr) { +if (!expr->struct_info_.as()) { + return nullptr; +} + +expr = UnwrapBindings(expr); +if (auto call = expr.as()) { + if (auto callee = call->op.as()) { +if (auto it = output_usage_mask_.find(callee.value()); it != output_usage_mask_.end()) { + return &it->second; } } } + +return nullptr; } Expr UnwrapBindings(Expr expr) const { diff --git a/tests/python/relax/test_transform_remove_unused_outputs.py b/tests/python/relax/test_transform_remove_unused_outputs.py index c0405ca58d..365ce1695d 100644 --- a/tests/python/relax/test_transform_remove_unused_ou
(tvm) branch main updated (f33cc8f559 -> f432ebd5f5)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git from f33cc8f559 [Relax][PyTorch] Add support for `torch.ops.aten.sym_size.int` (#17342) add f432ebd5f5 [Relax] Update GlobalVar name in AttachGlobalSymbol (#17202) No new revisions were added by this update. Summary of changes: include/tvm/ir/analysis.h | 63 ++ .../{relax/backend.h => ir/replace_global_var.h} | 40 + python/tvm/ir/__init__.py | 3 + .../{arith/_ffi_api.py => ir/_ffi_analysis_api.py} | 5 +- .../hexagon/batch_matmul.py => ir/analysis.py} | 28 --- .../decorate_device_scope.cc => ir/analysis.cc}| 42 +- src/ir/replace_global_var.cc | 63 ++ .../{shape_analysis.cc => collect_call_map.cc} | 49 +-- src/relax/transform/attach_global_symbol.cc| 48 --- src/relax/transform/dead_code_elimination.cc | 94 + src/relax/transform/replace_global_var.cc | 66 +++ .../tir/analysis/collect_call_map.cc | 46 +- src/tir/transforms/replace_global_var.cc | 68 +++ tests/python/ir/analysis/test_collect_call_map.py | 97 ++ .../relax/test_transform_attach_global_symbol.py | 6 +- .../relax/test_transform_dead_code_elimination.py | 60 - 16 files changed, 591 insertions(+), 187 deletions(-) create mode 100644 include/tvm/ir/analysis.h copy include/tvm/{relax/backend.h => ir/replace_global_var.h} (53%) copy python/tvm/{arith/_ffi_api.py => ir/_ffi_analysis_api.py} (90%) copy python/tvm/{topi/hexagon/batch_matmul.py => ir/analysis.py} (62%) copy src/{tir/transforms/decorate_device_scope.cc => ir/analysis.cc} (52%) create mode 100644 src/ir/replace_global_var.cc copy src/relax/analysis/{shape_analysis.cc => collect_call_map.cc} (52%) create mode 100644 src/relax/transform/replace_global_var.cc copy include/tvm/tir/usmp/analysis.h => src/tir/analysis/collect_call_map.cc (54%) create mode 100644 src/tir/transforms/replace_global_var.cc create mode 100644 tests/python/ir/analysis/test_collect_call_map.py
(tvm) branch main updated: [MSC][BugFix] Bugfix for strided_slice op (#17315)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new dbe95c43b2 [MSC][BugFix] Bugfix for strided_slice op (#17315) dbe95c43b2 is described below commit dbe95c43b2afde26eab428181d47cfc939d153c1 Author: Archermmt AuthorDate: Fri Sep 6 20:45:36 2024 +0800 [MSC][BugFix] Bugfix for strided_slice op (#17315) support strided_slice --- src/contrib/msc/core/codegen/base_codegen.h| 6 +- src/contrib/msc/core/ir/graph_builder.cc | 13 - .../msc/core/transform/bind_named_params.cc| 2 +- src/contrib/msc/core/utils.cc | 67 +- src/contrib/msc/core/utils.h | 54 ++--- tests/python/contrib/test_msc/test_graph_build.py | 3 - .../contrib/test_msc/test_translate_relax.py | 4 -- .../contrib/test_msc/test_translate_tensorflow.py | 4 -- .../contrib/test_msc/test_translate_torch.py | 3 - 9 files changed, 128 insertions(+), 28 deletions(-) diff --git a/src/contrib/msc/core/codegen/base_codegen.h b/src/contrib/msc/core/codegen/base_codegen.h index 19d8b524b9..acaac896a1 100644 --- a/src/contrib/msc/core/codegen/base_codegen.h +++ b/src/contrib/msc/core/codegen/base_codegen.h @@ -179,17 +179,17 @@ class BaseCodeGen { return 1; } if (node->scope.size() == scopes_.top().size()) { - ICHECK(StringUtils::CompareArrays(node->scope, scopes_.top())) + ICHECK(ArrayUtils::CompareArrays(node->scope, scopes_.top())) << "Scope mismatch, node " << node->scope << " compare to current " << scopes_.top(); return 0; } else if (node->scope.size() == scopes_.top().size() + 1) { - ICHECK(StringUtils::CompareArrays(node->scope, scopes_.top(), scopes_.top().size())) + ICHECK(ArrayUtils::CompareArrays(node->scope, scopes_.top(), scopes_.top().size())) << "Scope increase mismatch, node " << node->scope << " compare to current " << scopes_.top(); scopes_.push(node->scope); return 1; } else if (node->scope.size() == scopes_.top().size() - 1) { - ICHECK(StringUtils::CompareArrays(node->scope, scopes_.top(), node->scope.size())) + ICHECK(ArrayUtils::CompareArrays(node->scope, scopes_.top(), node->scope.size())) << "Scope decrease mismatch, node " << node->scope << " compare to current " << scopes_.top(); scopes_.pop(); diff --git a/src/contrib/msc/core/ir/graph_builder.cc b/src/contrib/msc/core/ir/graph_builder.cc index d35a462579..a968df4204 100644 --- a/src/contrib/msc/core/ir/graph_builder.cc +++ b/src/contrib/msc/core/ir/graph_builder.cc @@ -23,6 +23,7 @@ #include "graph_builder.h" +#include #include namespace tvm { @@ -71,6 +72,13 @@ void RelaxFuncValueGetter::VisitExpr_(const relax::CallNode* op) { for (const auto& arg : op->args) { if (const auto* s_node = arg.as()) { values_.push_back(StringUtils::ToString(s_node->value)); +} else if (const auto* s_node = arg.as()) { + bool all_values = + std::all_of(s_node->fields.begin(), s_node->fields.end(), + [](const relax::Expr& e) { return e->IsInstance(); }); + if (all_values) { +values_.push_back(StringUtils::ToString(s_node->fields)); + } } } } @@ -337,6 +345,8 @@ const MSCJoint RelaxGraphBuilder::AddNode(const Expr& expr, const Optional ICHECK(input_types[i] != "input") << i << " th PrimValue of " << optype << " should has special type, get " << input_types; attrs.Set(input_types[i], StringUtils::ToString(s_node->value)); + } else if (input_types[i] != "input" && arg->IsInstance()) { +attrs.Set(input_types[i], StringUtils::ToString(arg)); } } for (size_t i = call->args.size(); i < input_types.size(); i++) { @@ -371,7 +381,8 @@ const MSCJoint RelaxGraphBuilder::AddNode(const Expr& expr, const Optional Array arg_names; if (expr_tensor_map_.count(arg)) { arg_names = expr_tensor_map_[arg]; - } else if (const auto* tuple_node = arg.as()) { + } else if (input_types[i] == "input" && arg->IsInstance()) { +const auto* tuple_node = arg.as(); for (const auto& f : tuple_node->fields) { ICHECK(expr_tensor_map_.count(f)) << "Can not find tuple field " << f; for (const auto& in_name : expr_tensor_map_[f]) { diff --git a
(tvm) branch revert-16183-ffi_boxed_primitives_for_runtime deleted (was cc858c3f11)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch revert-16183-ffi_boxed_primitives_for_runtime in repository https://gitbox.apache.org/repos/asf/tvm.git was cc858c3f11 Revert "[FFI][RUNTIME] Introduce runtime boxed types for int/float/bool (#16183)" The revisions that were on this branch are still contained in other references; therefore, this change does not discard any commits from the repository.
(tvm) branch main updated: [Apps] Remove mxnet dependency from /apps/ios_rpc (#17299)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 823763db5b [Apps] Remove mxnet dependency from /apps/ios_rpc (#17299) 823763db5b is described below commit 823763db5b35aec04fb021b47d3f8b06db08e0b0 Author: Masahiro Hiramori AuthorDate: Thu Sep 5 23:01:09 2024 +0900 [Apps] Remove mxnet dependency from /apps/ios_rpc (#17299) use torchvision's mobilenet_v2 instead of mxnet --- apps/ios_rpc/tests/ios_rpc_mobilenet.py | 37 +++-- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/apps/ios_rpc/tests/ios_rpc_mobilenet.py b/apps/ios_rpc/tests/ios_rpc_mobilenet.py index 1872cf6787..85a4303177 100644 --- a/apps/ios_rpc/tests/ios_rpc_mobilenet.py +++ b/apps/ios_rpc/tests/ios_rpc_mobilenet.py @@ -23,7 +23,6 @@ import sys import coremltools import numpy as np import tvm -from mxnet import gluon from PIL import Image from tvm import relay, rpc from tvm.contrib import coreml_runtime, graph_executor, utils, xcode @@ -51,6 +50,8 @@ def compile_metal(src, target): def prepare_input(): +from torchvision import transforms + img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true"; img_name = "cat.png" synset_url = "".join( @@ -62,22 +63,36 @@ def prepare_input(): ] ) synset_name = "imagenet1000_clsid_to_human.txt" -img_path = download_testdata(img_url, "cat.png", module="data") +img_path = download_testdata(img_url, img_name, module="data") synset_path = download_testdata(synset_url, synset_name, module="data") with open(synset_path) as f: synset = eval(f.read()) -image = Image.open(img_path).resize((224, 224)) +input_image = Image.open(img_path) -image = np.array(image) - np.array([123.0, 117.0, 104.0]) -image /= np.array([58.395, 57.12, 57.375]) -image = image.transpose((2, 0, 1)) -image = image[np.newaxis, :] -return image.astype("float32"), synset +preprocess = transforms.Compose( +[ +transforms.Resize(256), +transforms.CenterCrop(224), +transforms.ToTensor(), +transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), +] +) +input_tensor = preprocess(input_image) +input_batch = input_tensor.unsqueeze(0) +return input_batch.detach().cpu().numpy(), synset def get_model(model_name, data_shape): -gluon_model = gluon.model_zoo.vision.get_model(model_name, pretrained=True) -mod, params = relay.frontend.from_mxnet(gluon_model, {"data": data_shape}) +import torch +import torchvision + +torch_model = getattr(torchvision.models, model_name)(weights="IMAGENET1K_V1").eval() +input_data = torch.randn(data_shape) +scripted_model = torch.jit.trace(torch_model, input_data) + +input_infos = [("data", input_data.shape)] +mod, params = relay.frontend.from_pytorch(scripted_model, input_infos) + # we want a probability so add a softmax operator func = mod["main"] func = relay.Function( @@ -90,7 +105,7 @@ def get_model(model_name, data_shape): def test_mobilenet(host, port, key, mode): temp = utils.tempdir() image, synset = prepare_input() -model, params = get_model("mobilenetv2_1.0", image.shape) +model, params = get_model("mobilenet_v2", image.shape) def run(mod, target): with relay.build_config(opt_level=3):
(tvm) branch main updated: [Relax][PyTorch][Fix] use`_convert_torch_tensor_to_relax()` where possible (#17335)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new e65aab6a4f [Relax][PyTorch][Fix] use`_convert_torch_tensor_to_relax()` where possible (#17335) e65aab6a4f is described below commit e65aab6a4f55f4b405ef2713f842d6a3b761151b Author: Masahiro Hiramori AuthorDate: Thu Sep 5 22:30:12 2024 +0900 [Relax][PyTorch][Fix] use`_convert_torch_tensor_to_relax()` where possible (#17335) * use `_convert_torch_tensor_to_relax` where possible * add type annotation --- python/tvm/relax/frontend/torch/fx_translator.py | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/tvm/relax/frontend/torch/fx_translator.py b/python/tvm/relax/frontend/torch/fx_translator.py index 21a0b2d564..6e60c3bb6f 100644 --- a/python/tvm/relax/frontend/torch/fx_translator.py +++ b/python/tvm/relax/frontend/torch/fx_translator.py @@ -62,7 +62,7 @@ class TorchFXImporter: return attr_itr @staticmethod -def _convert_data_type(input_type, env: Optional[Dict] = None): +def _convert_data_type(input_type: Union[str, torch.dtype], env: Optional[Dict] = None): """converts the PyTorch scalar type input_type to a TVM dtype.""" import torch # type: ignore @@ -1206,9 +1206,8 @@ class TorchFXImporter: module = self.named_modules[node.target] weight = self.params[module.weight] bias = self.params[module.bias] -dtype = TorchFXImporter._convert_data_type(str(module.running_mean.dtype)) -running_mean = relax.const(module.running_mean.cpu().detach().numpy(), dtype) -running_var = relax.const(module.running_var.cpu().detach().numpy(), dtype) +running_mean = self._convert_torch_tensor_to_relax(module.running_mean) +running_var = self._convert_torch_tensor_to_relax(module.running_var) eps = module.eps res_tuple = self.block_builder.emit( @@ -1769,7 +1768,7 @@ class TorchFXImporter: dtype = self._convert_data_type(str(param.data.dtype)) if dtype in ("float32", "float16"): if not keep_params_as_input: -self.params[param] = relax.const(param.data.cpu().numpy(), dtype) +self.params[param] = self._convert_torch_tensor_to_relax(param) else: raise ValueError("Unsupported data type for model parameters: %s" % dtype) # Translate the model.
(tvm) branch main updated: [KVCache] Add tree attention with paged cache support (#17326)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 8059c770dc [KVCache] Add tree attention with paged cache support (#17326) 8059c770dc is described below commit 8059c770dc563411717a44d9409888be3f85b7ee Author: Wuwei Lin AuthorDate: Tue Sep 3 11:39:26 2024 -0700 [KVCache] Add tree attention with paged cache support (#17326) --- python/tvm/relax/frontend/nn/llm/kv_cache.py | 3 +- python/tvm/relax/frontend/nn/llm/tree_attn.py | 536 - src/runtime/relax_vm/paged_kv_cache.cc | 384 +-- ...runtime_builtin_paged_attention_kv_cache_tir.py | 76 ++- 4 files changed, 828 insertions(+), 171 deletions(-) diff --git a/python/tvm/relax/frontend/nn/llm/kv_cache.py b/python/tvm/relax/frontend/nn/llm/kv_cache.py index 5ddce76eab..7b14c67a2e 100644 --- a/python/tvm/relax/frontend/nn/llm/kv_cache.py +++ b/python/tvm/relax/frontend/nn/llm/kv_cache.py @@ -30,7 +30,7 @@ from tvm.script import tir as T from tvm.target import Target from .position_embedding import llama_rope_with_position_map, switch_rope_freq_func -from .tree_attn import tree_attn +from .tree_attn import tree_attn, tree_attn_with_paged_kv_cache def get_max_num_threads_per_block(target: Target) -> int: @@ -257,6 +257,7 @@ class FlashInferPagedKVCache(PagedKVCache): # pylint: disable=too-few-public-me bb.add_func(_kv_cache_debug_get_kv(num_hidden_layers, num_key_value_heads, head_dim, dtype), "kv_cache_debug_get_kv"), bb.add_func(_compact_kv_copy(num_key_value_heads, head_dim, dtype, target), "kv_cache_compact_kv_copy"), bb.add_func(tree_attn(num_key_value_heads, num_attention_heads, head_dim, dtype, rope_scaling, target), "tir_attention_prefill_with_tree_mask"), +bb.add_func(tree_attn_with_paged_kv_cache(num_key_value_heads, num_attention_heads, head_dim, dtype, rope_scaling, target), "tir_attention_prefill_with_tree_mask_with_paged_kv_cache"), rope_ext_factors, # fmt: on # pylint: enable=line-too-long diff --git a/python/tvm/relax/frontend/nn/llm/tree_attn.py b/python/tvm/relax/frontend/nn/llm/tree_attn.py index 069eb48923..9e4a7ed97e 100644 --- a/python/tvm/relax/frontend/nn/llm/tree_attn.py +++ b/python/tvm/relax/frontend/nn/llm/tree_attn.py @@ -62,11 +62,29 @@ def _rope( return expr -def _tree_mask(row, col, mask_ptr, offset, stride, kv_len): -return tir.all(col < kv_len, mask_ptr[offset + row * stride + col] == 1) +def _check_tree_order(tree_order_indptr, tree_order, batch, row, col, kv_len, qo_len): +tree_order_len = tree_order_indptr[batch + 1] - tree_order_indptr[batch] + +tree_start = kv_len - tree_order_len +child_idx_in_tree = row + tree_order_len - qo_len +parent_idx_in_tree = col - tree_start +return tir.all( +col < kv_len, +tir.any( +col < tree_start, +tir.all( +tree_order[tree_order_indptr[batch] + child_idx_in_tree, 0] +>= tree_order[tree_order_indptr[batch] + parent_idx_in_tree, 0], +tree_order[tree_order_indptr[batch] + child_idx_in_tree, 0] +< tree_order[tree_order_indptr[batch] + parent_idx_in_tree, 1], +), +), +) -def tree_attn(h_kv, h_q, d, dtype, rope_scaling: Dict[str, Any], target: Target): +def tree_attn( +h_kv, h_q, d, dtype, rope_scaling: Dict[str, Any], target: Target +): # pylint: disable=unused-argument """Generate tree attention kernel for batched tree attention. Parameters @@ -87,7 +105,7 @@ def tree_attn(h_kv, h_q, d, dtype, rope_scaling: Dict[str, Any], target: Target) mod : tvm.IRModule The generated IR module. """ -# pylint: disable=line-too-long +# pylint: disable=invalid-name,line-too-long NUM_BLKS = 16 LOAD_VEC = 8 // ((DataType(dtype).bits + 7) // 8) # 8 bytes group_size = h_q // h_kv @@ -140,7 +158,7 @@ def tree_attn(h_kv, h_q, d, dtype, rope_scaling: Dict[str, Any], target: Target) kv_indptr = T.match_buffer(var_kv_indptr, (batch_size + 1,), "int32", elem_offset=kv_indptr_elem_offset) q_rope_position = T.match_buffer(var_q_rope_position, (qo_len,), "int32", elem_offset=q_rope_position_elem_offset) mn_indptr = T.match_buffer(var_mn_indptr, (batch_size + 1,), "int32", elem_offset=mn_indptr_elem_offset) -mask = T.match_buffer(var_mask, (tree_size,), "int32", elem_offset=mask_elem_offset) +mask = T.match_buffer(var_mask, (tree_size, 2), "int32", elem_offset=mask_elem_offset) output = T.match_buffer(var_output, (qo_len, h_q, d), dtype)
(tvm) branch main updated: [Relax][PyTorch] Add support for `torch.nn.functional.conv*` (#17325)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 0e9c683035 [Relax][PyTorch] Add support for `torch.nn.functional.conv*` (#17325) 0e9c683035 is described below commit 0e9c68303543e9b7e7a0146553aa0e81f63828f4 Author: Masahiro Hiramori AuthorDate: Wed Sep 4 02:39:57 2024 +0900 [Relax][PyTorch] Add support for `torch.nn.functional.conv*` (#17325) * add test for functional conv1d * add support for functional conv1d * cleanup conv1d * add test for functional conv_transpose1d * add support for functional conv_transpose1d * add test for functional conv_transpose2d * add support for functional conv_transpose2d * add test for functional conv3d * add support for functional conv3d --- python/tvm/relax/frontend/torch/fx_translator.py | 284 ++- tests/python/relax/test_frontend_from_fx.py | 52 + 2 files changed, 275 insertions(+), 61 deletions(-) diff --git a/python/tvm/relax/frontend/torch/fx_translator.py b/python/tvm/relax/frontend/torch/fx_translator.py index 676f63b5c3..245bb4cffb 100644 --- a/python/tvm/relax/frontend/torch/fx_translator.py +++ b/python/tvm/relax/frontend/torch/fx_translator.py @@ -740,61 +740,140 @@ class TorchFXImporter: bias = args[2] if len(args) > 2 else None return self.block_builder.emit(relax.op.linear(x, weight, bias, "float32")) -def _conv1d(self, node: fx.node.Node) -> relax.Var: -x = self.env[node.args[0]] -module = self.named_modules[node.target] -weight = self.params[module.weight] - +def _conv1d_impl( +self, +x: relax.Expr, +weight: relax.Expr, +bias: Optional[relax.Expr], +strides: Optional[Tuple], +padding: Optional[Tuple], +dilation: Optional[Tuple], +groups: Optional[Tuple], +) -> relax.Var: conv1d = self.block_builder.emit( relax.op.nn.conv1d( x, weight, -strides=module.stride, -padding=module.padding, -dilation=module.dilation, -groups=module.groups, +strides=strides, +padding=padding, +dilation=dilation, +groups=groups, data_layout="NCW", kernel_layout="OIW", out_dtype="float32", ) ) -if module.bias is None: +if bias is None: return conv1d - -bias = self.params[module.bias] assert len(self.shape_of(bias)) == 1 bias = relax.op.reshape(bias, (1, -1, 1)) - return self.block_builder.emit(relax.op.add(conv1d, bias)) -def _conv3d(self, node: fx.node.Node) -> relax.Var: +def _conv1d(self, node: fx.node.Node) -> relax.Var: x = self.env[node.args[0]] module = self.named_modules[node.target] weight = self.params[module.weight] +bias = None +if module.bias is not None: +bias = self.params[module.bias] -conv3d = self.block_builder.emit( -relax.op.nn.conv3d( +return self._conv1d_impl( +x, +weight, +bias=bias, +strides=module.stride, +padding=module.padding, +dilation=module.dilation, +groups=module.groups, +) + +def _conv1d_functional(self, node: fx.node.Node) -> relax.Var: +args = self.retrieve_args(node) +x = args[0] +weight = args[1] +bias = args[2] if len(args) > 2 else None +stride = args[3] if len(args) > 3 else 1 +padding = args[4] if len(args) > 4 else 0 +dilation = args[5] if len(args) > 5 else 1 +groups = args[6] if len(args) > 6 else 1 +return self._conv1d_impl( +x, +weight, +bias=bias, +strides=stride, +padding=padding, +dilation=dilation, +groups=groups, +) + +def _conv1d_transpose_impl( +self, +x: relax.Expr, +weight: relax.Expr, +bias: Optional[relax.Expr], +strides: Optional[Tuple], +padding: Optional[Tuple], +dilation: Optional[Tuple], +groups: Optional[Tuple], +) -> relax.Var: +conv1d_transpose = self.block_builder.emit( +relax.op.nn.conv1d_transpose( x, weight, -strides=module.stride, -padding=module.padding, -dilation=module.dilation, -groups=module.groups, -data_layout="
(tvm) branch main updated: [Target] Refine equality check on TargetKind instances (#17321)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 42bffc31ff [Target] Refine equality check on TargetKind instances (#17321) 42bffc31ff is described below commit 42bffc31ff2aa14b18275f70a3d658156dbed2a2 Author: wrongtest AuthorDate: Tue Sep 3 22:51:42 2024 +0800 [Target] Refine equality check on TargetKind instances (#17321) refine target kind identity Co-authored-by: wrongtest --- src/target/target_kind.cc | 15 ++- tests/python/target/test_target_target.py | 16 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc index fced74c3a5..979b755af8 100644 --- a/src/target/target_kind.cc +++ b/src/target/target_kind.cc @@ -35,7 +35,20 @@ namespace tvm { -TVM_REGISTER_NODE_TYPE(TargetKindNode); +// helper to get internal dev function in objectref. +struct TargetKind2ObjectPtr : public ObjectRef { + static ObjectPtr Get(const TargetKind& kind) { return GetDataPtr(kind); } +}; + +TVM_REGISTER_NODE_TYPE(TargetKindNode) +.set_creator([](const std::string& name) { + auto kind = TargetKind::Get(name); + ICHECK(kind.defined()) << "Cannot find target kind \'" << name << '\''; + return TargetKind2ObjectPtr::Get(kind.value()); +}) +.set_repr_bytes([](const Object* n) -> std::string { + return static_cast(n)->name; +}); TVM_STATIC_IR_FUNCTOR(ReprPrinter, vtable) .set_dispatch([](const ObjectRef& obj, ReprPrinter* p) { diff --git a/tests/python/target/test_target_target.py b/tests/python/target/test_target_target.py index e977ef10aa..1a52a46da1 100644 --- a/tests/python/target/test_target_target.py +++ b/tests/python/target/test_target_target.py @@ -559,5 +559,21 @@ def test_target_from_device_opencl(input_device): assert target.thread_warp_size == dev.warp_size +def test_module_dict_from_deserialized_targets(): +target = Target("llvm") + +from tvm.script import tir as T + +@T.prim_func +def func(): +T.evaluate(0) + +func = func.with_attr("Target", target) +target2 = tvm.ir.load_json(tvm.ir.save_json(target)) +mod = tvm.IRModule({"main": func}) +lib = tvm.build({target2: mod}, target_host=target) +lib["func"]() + + if __name__ == "__main__": tvm.testing.main()
(tvm) branch main updated: [Doc] Customize Optimization (#17320)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new cd3448603d [Doc] Customize Optimization (#17320) cd3448603d is described below commit cd3448603dffea2340e406dd7751a37b0440d81f Author: Siyuan Feng AuthorDate: Mon Sep 2 14:06:37 2024 +0800 [Doc] Customize Optimization (#17320) [Doc] Customization Optimization --- docs/how_to/index.rst | 1 + docs/how_to/tutorials/customize_opt.py | 225 + 2 files changed, 226 insertions(+) diff --git a/docs/how_to/index.rst b/docs/how_to/index.rst index 976b2f1bd4..c5b9d703f0 100644 --- a/docs/how_to/index.rst +++ b/docs/how_to/index.rst @@ -19,5 +19,6 @@ :maxdepth: 1 tutorials/e2e_opt_model + tutorials/customize_opt tutorials/cross_compilation_and_rpc dev/index diff --git a/docs/how_to/tutorials/customize_opt.py b/docs/how_to/tutorials/customize_opt.py new file mode 100644 index 00..5806d6ce5d --- /dev/null +++ b/docs/how_to/tutorials/customize_opt.py @@ -0,0 +1,225 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +.. _customize_opt: + +Customize Optimization +== +One main design goal of Apache TVM is to enable easy customization of the optimization pipeline +for both research or development purposes and iterate the engineering optimizations. In this +tutorial we will + +.. contents:: Table of Contents +:local: +:depth: 1 +""" + +## +# Review Overall Flow +# --- +# .. figure:: https://raw.githubusercontent.com/tlc-pack/web-data/main/images/design/tvm_overall_flow.svg +#:align: center +#:width: 80% +# +# The overall flow consists of the following steps: +# +# - **Construct or Import a Model**: Construct a neural network model or import a pre-trained +# model from other frameworks (e.g. PyTorch, ONNX), and create the TVM IRModule, which contains +# all the information needed for compilation, including high-level Relax functions for +# computational graph, and low-level TensorIR functions for tensor program. +# - **Perform Composable Optimizations**: Perform a series of optimization transformations, +# such as graph optimizations, tensor program optimizations, and library dispatching. +# - **Build and Universal Deployment**: Build the optimized model to a deployable module to the +# universal runtime, and execute it on different devices, such as CPU, GPU, or other accelerators. +# + +import os +import tempfile +import numpy as np +import tvm +from tvm import IRModule, relax +from tvm.relax.frontend import nn + +## +# Composable IRModule Optimization +# +# Apache TVM Unity provides a flexible way to optimize the IRModule. Everything centered +# around IRModule optimization can be composed with existing pipelines. Note that each optimization +# can focus on **part of the computation graph**, enabling partial lowering or partial optimization. +# +# In this tutorial, we will demonstrate how to optimize a model with Apache TVM Unity. + +## +# Prepare a Relax Module +# ~~ +# We first prepare a Relax module. The module can be imported from other frameworks, constructed +# with NN module frontend or TVMScript. Here we use a simple neural network model as an example. + + +class RelaxModel(nn.Module): +def __init__(self): +super(RelaxModel, self).__init__() +self.fc1 = nn.Linear(784, 256) +self.relu1 = nn.ReLU() +self.fc2 = nn.Linear(256, 10, bias=False) + +def forward(self, x): +x = self.fc1(x) +x = self.relu1(x) +x = self.fc2(x) +return x + + +input_shape = (1, 784) +mod, params = RelaxModel().export_tvm({"forward"
(tvm) branch main updated (40b6c14bba -> 3262f19e6f)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git from 40b6c14bba [Disco] Add NVSHMEM support (#17317) add 3262f19e6f [Doc] Fix doc build error in e2e_opt_model.py (#17319) No new revisions were added by this update. Summary of changes: docs/how_to/tutorials/e2e_opt_model.py | 63 +- 1 file changed, 31 insertions(+), 32 deletions(-)
(tvm) branch main updated: [TVM4J][BugFix] Fix unhandled return type in JNI (#17308)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 98de9ba841 [TVM4J][BugFix] Fix unhandled return type in JNI (#17308) 98de9ba841 is described below commit 98de9ba8418ec70ed7da59b737c93bd1b9ab611a Author: Yu Xuanchi AuthorDate: Thu Aug 29 19:11:59 2024 +0800 [TVM4J][BugFix] Fix unhandled return type in JNI (#17308) --- jvm/native/src/main/native/jni_helper_func.h | 1 + 1 file changed, 1 insertion(+) diff --git a/jvm/native/src/main/native/jni_helper_func.h b/jvm/native/src/main/native/jni_helper_func.h index 82165e9e04..d60a1a4230 100644 --- a/jvm/native/src/main/native/jni_helper_func.h +++ b/jvm/native/src/main/native/jni_helper_func.h @@ -188,6 +188,7 @@ jobject tvmRetValueToJava(JNIEnv* env, TVMValue value, int tcode) { switch (tcode) { case kDLUInt: case kDLInt: +case kTVMArgBool: return newTVMValueLong(env, static_cast(value.v_int64)); case kDLFloat: return newTVMValueDouble(env, static_cast(value.v_float64));
(tvm) branch main updated: [Doc] Refactor How-To (#17306)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new add93d7372 [Doc] Refactor How-To (#17306) add93d7372 is described below commit add93d7372cf255b4f1fb094c7d1e0eb8ae25321 Author: Siyuan Feng AuthorDate: Thu Aug 29 19:08:36 2024 +0800 [Doc] Refactor How-To (#17306) This PR refactors the how-to section and add new tutorials of `end-to-end optimization model` --- docs/conf.py | 2 + docs/dev/how_to/how_to.rst | 2 - .../how_to/how_to.rst => how_to/dev/index.rst} | 11 +- .../dev}/pytest_target_parametrization.rst | 0 .../how_to => how_to/dev}/setup_rpc_system.rst | 6 +- docs/how_to/index.rst | 22 +--- docs/how_to/{index.rst => legacy_index.rst}| 1 - docs/how_to/tutorials/README.txt | 2 + .../how_to/tutorials}/cross_compilation_and_rpc.py | 0 docs/how_to/tutorials/e2e_opt_model.py | 139 + docs/index.rst | 16 +-- gallery/tutorial/install.py| 50 gallery/tutorial/introduction.py | 2 - 13 files changed, 158 insertions(+), 95 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 1c5c5cb5d6..c933653233 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -423,6 +423,7 @@ examples_dirs = [ tvm_path.joinpath("vta", "tutorials"), # New tutorial structure under docs folder tvm_path.joinpath("docs", "get_started", "tutorials"), +tvm_path.joinpath("docs", "how_to", "tutorials"), ] gallery_dirs = [ @@ -440,6 +441,7 @@ gallery_dirs = [ "topic/vta/tutorials", # New tutorial structure under docs folder "get_started/tutorials/", +"how_to/tutorials/", ] diff --git a/docs/dev/how_to/how_to.rst b/docs/dev/how_to/how_to.rst index 1e1d1236bd..aa89324fb9 100644 --- a/docs/dev/how_to/how_to.rst +++ b/docs/dev/how_to/how_to.rst @@ -29,5 +29,3 @@ various areas of the TVM stack. relay_add_op relay_add_pass relay_bring_your_own_codegen - pytest_target_parametrization - setup_rpc_system diff --git a/docs/dev/how_to/how_to.rst b/docs/how_to/dev/index.rst similarity index 87% copy from docs/dev/how_to/how_to.rst copy to docs/how_to/dev/index.rst index 1e1d1236bd..c70832358a 100644 --- a/docs/dev/how_to/how_to.rst +++ b/docs/how_to/dev/index.rst @@ -15,19 +15,14 @@ specific language governing permissions and limitations under the License. -.. _dev-how-to: - -Developer How-To Guide -== +Develope Apache TVM +=== This section contains a collection of tips about how to work on various areas of the TVM stack. .. toctree:: :maxdepth: 1 - debugging_tvm - relay_add_op - relay_add_pass - relay_bring_your_own_codegen pytest_target_parametrization setup_rpc_system + ../../errors diff --git a/docs/dev/how_to/pytest_target_parametrization.rst b/docs/how_to/dev/pytest_target_parametrization.rst similarity index 100% rename from docs/dev/how_to/pytest_target_parametrization.rst rename to docs/how_to/dev/pytest_target_parametrization.rst diff --git a/docs/dev/how_to/setup_rpc_system.rst b/docs/how_to/dev/setup_rpc_system.rst similarity index 99% rename from docs/dev/how_to/setup_rpc_system.rst rename to docs/how_to/dev/setup_rpc_system.rst index 061aa5b07b..0131619b71 100644 --- a/docs/dev/how_to/setup_rpc_system.rst +++ b/docs/how_to/dev/setup_rpc_system.rst @@ -76,7 +76,7 @@ In our community, there is multiple RPC server implementations, e.g., ``apps/and RPC server need to be run on device machine, and it usually will depend on xPU driver, the enhanced TVM runtime with xPU support, and other libraries, so please setup the dependent components first, e.g., install the KMD driver, ensure the required dynamic libraries can be found from environment variable ``LD_LIBRARY_PATH``. -If the required compilation environment can be setup on your device machine, i.e., you needn't to do the cross compilation, then just follow the instruction of `<https://tvm.apache.org/docs/install/from_source.html>`_ to compile the TVM runtime and directly jump to the step :ref:`luanch-rpc-server`. +If the required compilation environment can be setup on your device machine, i.e., you needn't to do the cross compilation, then just follow the instruction of `<https://tvm.apache.org/docs/install/from_source.html>`_ to compile the TVM runtime and directly jump to the step :ref:`launch-rpc-server`. 1. Cross Compile TVM Runtime @@ -134,9 +134,9 @@ Then copy the com
(tvm) branch main updated: [Relax][PyTorch] Add support for torch.repeat (#17304)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 99defd25c4 [Relax][PyTorch] Add support for torch.repeat (#17304) 99defd25c4 is described below commit 99defd25c40c75b00395df1d2d58c84d2e0bd9ca Author: Masahiro Hiramori AuthorDate: Wed Aug 28 04:37:30 2024 +0900 [Relax][PyTorch] Add support for torch.repeat (#17304) * add test * add support for torch.repeat * remove debug print --- python/tvm/relax/frontend/torch/fx_translator.py | 9 ++ tests/python/relax/test_frontend_from_fx.py | 36 2 files changed, 45 insertions(+) diff --git a/python/tvm/relax/frontend/torch/fx_translator.py b/python/tvm/relax/frontend/torch/fx_translator.py index 6d01283d3e..676f63b5c3 100644 --- a/python/tvm/relax/frontend/torch/fx_translator.py +++ b/python/tvm/relax/frontend/torch/fx_translator.py @@ -640,6 +640,14 @@ class TorchFXImporter: dim = None return self.block_builder.emit(relax.op.squeeze(x, dim)) +def _repeat(self, node: fx.node.Node) -> relax.Var: +import torch # type: ignore + +args = self.retrieve_args(node) +if isinstance(args[1], (torch.Size, tuple, list)): +return self.block_builder.emit(relax.op.tile(args[0], tuple(args[1]))) +return self.block_builder.emit(relax.op.tile(args[0], args[1:])) + def _tile(self, node: fx.node.Node) -> relax.Var: import torch # type: ignore @@ -1484,6 +1492,7 @@ class TorchFXImporter: "expand": self._expand, "flatten": self._flatten, "permute": self._permute, +"repeat": self._repeat, "reshape": self._reshape, "split": self._split, "tile": self._tile, diff --git a/tests/python/relax/test_frontend_from_fx.py b/tests/python/relax/test_frontend_from_fx.py index 5398fe3420..c6c4f25972 100644 --- a/tests/python/relax/test_frontend_from_fx.py +++ b/tests/python/relax/test_frontend_from_fx.py @@ -3311,6 +3311,42 @@ def test_transpose(): verify_model(Transpose(), input_info, {}, expected1) +def test_repeat(): +class Tile1(Module): +def forward(self, x: torch.Tensor): +return x.repeat(2) + +class Tile2(Module): +def forward(self, x: torch.Tensor): +return x.repeat(4, 2) + +@tvm.script.ir_module +class expected1: +@R.function +def main(x: R.Tensor((3,), dtype="float32")) -> R.Tensor((6,), dtype="float32"): +# block 0 +with R.dataflow(): +lv: R.Tensor((6,), dtype="float32") = R.tile(x, 2) +gv: R.Tensor((6,), dtype="float32") = lv +R.output(gv) +return gv + +@tvm.script.ir_module +class expected2: +@R.function +def main(x: R.Tensor((1, 3), dtype="float32")) -> R.Tensor((4, 6), dtype="float32"): +# block 0 +with R.dataflow(): +lv: R.Tensor((4, 6), dtype="float32") = R.tile(x, [4, 2]) +gv: R.Tensor((4, 6), dtype="float32") = lv +R.output(gv) +return gv + +verify_model(Tile1(), [([3], "float32")], {}, expected1) +verify_model(Tile2(), [([1, 3], "float32")], {}, expected2) +verify_model(Tile2(), [(torch.Size([1, 3]), "float32")], {}, expected2) + + def test_view(): input_info = [([1, 2, 3, 4], "float32")]
(tvm) branch main updated: [Python][Relax] Rotary positional embedding scaling (#17305)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new bf7bbefd36 [Python][Relax] Rotary positional embedding scaling (#17305) bf7bbefd36 is described below commit bf7bbefd36ac91242496d533d2bfff71570bf04a Author: Ruihang Lai AuthorDate: Tue Aug 27 10:19:28 2024 -0400 [Python][Relax] Rotary positional embedding scaling (#17305) This PR introduces two styles of RoPE scaling: the llama3 style and the longrope scale. --- python/tvm/relax/frontend/nn/llm/kv_cache.py | 396 +++-- .../relax/frontend/nn/llm/position_embedding.py| 191 +- python/tvm/relax/frontend/nn/llm/tree_attn.py | 26 +- ...runtime_builtin_paged_attention_kv_cache_tir.py | 19 +- 4 files changed, 579 insertions(+), 53 deletions(-) diff --git a/python/tvm/relax/frontend/nn/llm/kv_cache.py b/python/tvm/relax/frontend/nn/llm/kv_cache.py index 25a3a1a00d..5ddce76eab 100644 --- a/python/tvm/relax/frontend/nn/llm/kv_cache.py +++ b/python/tvm/relax/frontend/nn/llm/kv_cache.py @@ -20,7 +20,7 @@ # pylint: disable=too-many-statements,too-many-lines,too-many-arguments,invalid-name import enum import math -from typing import Tuple +from typing import Any, Dict, Tuple from tvm import relax as rx from tvm import tir @@ -29,7 +29,7 @@ from tvm.runtime import DataType from tvm.script import tir as T from tvm.target import Target -from .position_embedding import llama_rope_with_position_map, rope_freq +from .position_embedding import llama_rope_with_position_map, switch_rope_freq_func from .tree_attn import tree_attn @@ -166,6 +166,8 @@ class FlashInferPagedKVCache(PagedKVCache): # pylint: disable=too-few-public-me rope_mode: RopeMode, rope_scale: int, rope_theta: int, +rope_scaling: Dict[str, Any], +rope_ext_factors: rx.Expr, rotary_dim: int, dtype: str, target: Target, @@ -195,6 +197,9 @@ class FlashInferPagedKVCache(PagedKVCache): # pylint: disable=too-few-public-me 0 or 1, denoting whether the KV cache supports sliding window. It is a symbolic variable whose concrete value is specified at runtime. +layer_partition : rx.ShapeExpr +The KV cache layer partition for pipeline stages. +It is an indptr array, denoting the starting layer of each pipeline stage. rope_mode : RopeMode The RoPE mode of the Paged KV cache. If it is normal, RoPE will be applied to k before adding k to cache. @@ -205,6 +210,8 @@ class FlashInferPagedKVCache(PagedKVCache): # pylint: disable=too-few-public-me The base of rotary position embedding. rope_scaling: Dict[str, Any] The RoPE scaling information dict. +rope_ext_factors: rx.Expr +The RoPE extension factors when "longrope" mode RoPE scaling is enabled. rotary_dim : int The number of dimensions in the embedding that RoPE is applied to. """ @@ -235,8 +242,8 @@ class FlashInferPagedKVCache(PagedKVCache): # pylint: disable=too-few-public-me bb.add_func(_kv_cache_transpose_append(num_key_value_heads, head_dim, dtype), "kv_cache_transpose_append"), rx.extern("flashinfer.attention_kernel_prefill_with_paged_kv_cache"), rx.extern("flashinfer.attention_kernel_decode_with_paged_kv_cache"), -bb.add_func(_attention_prefill(num_key_value_heads, num_attention_heads, head_dim, dtype, True, target), "tir_attention_prefill_sliding_window"), -bb.add_func(_attention_decode(num_key_value_heads, num_attention_heads, head_dim, dtype, True, target), "tir_attention_decode_sliding_window"), +bb.add_func(_attention_prefill(num_key_value_heads, num_attention_heads, head_dim, dtype, True, rope_scaling, target), "tir_attention_prefill_sliding_window"), +bb.add_func(_attention_decode(num_key_value_heads, num_attention_heads, head_dim, dtype, True, rope_scaling, target), "tir_attention_decode_sliding_window"), rx.extern("flashinfer.attention_kernel_prefill_with_ragged_kv_cache"), rx.extern("flashinfer.attention_kernel_prefill_with_ragged_kv_cache_begin_forward"), rx.extern("flashinfer.attention_kernel_prefill_with_ragged_kv_cache_end_forward"), @@ -245,11 +252,12 @@ class FlashInferPagedKVCache(PagedKVCache): # pylint: disable=too-few-public-me rx.extern("flashinfer.attention_kernel_decode_with_paged_kv_cache_begin_forward"), rx.extern("flashinfer.attention_kernel_decode_with_paged_kv_cache_end_forward"),
(tvm) branch main updated: [Runtime] Support KV cache with RoPE extension factor array (#17294)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 3138328207 [Runtime] Support KV cache with RoPE extension factor array (#17294) 3138328207 is described below commit 3138328207bbe0b519c33a2f59be8ef2cf44d5b7 Author: Ruihang Lai AuthorDate: Mon Aug 26 21:20:05 2024 -0400 [Runtime] Support KV cache with RoPE extension factor array (#17294) This PR enhances the KV cache with the RoPE extensio factor support. With this PR, the KV cache can support models like Phi3.5 which comes with the extension factor. --- src/runtime/relax_vm/kv_state.h| 1 + src/runtime/relax_vm/paged_kv_cache.cc | 63 +- ..._builtin_paged_attention_kv_cache_flashinfer.py | 3 ++ ...runtime_builtin_paged_attention_kv_cache_tir.py | 1 + 4 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/runtime/relax_vm/kv_state.h b/src/runtime/relax_vm/kv_state.h index f4d6036b96..6d30ce998a 100644 --- a/src/runtime/relax_vm/kv_state.h +++ b/src/runtime/relax_vm/kv_state.h @@ -167,6 +167,7 @@ class AttentionKVCacheObj : public KVStateObj { * `(total_length, num_qo_heads + 2 * num_kv_heads, head_dim)`. * \param mask The input mask data, in layout `(total_sqr_length)`. * \param o_data The output O data, in layout `(total_length, num_qo_heads, head_dim)`. + * \param attn_score_scaling_factor The additional attention scaling factor. * \sa AttentionKVCache::Attention */ virtual void AttentionWithFusedQKV(int64_t layer_id, NDArray qkv_data, Optional mask, diff --git a/src/runtime/relax_vm/paged_kv_cache.cc b/src/runtime/relax_vm/paged_kv_cache.cc index 6bf3dc7ce6..591187ab5f 100644 --- a/src/runtime/relax_vm/paged_kv_cache.cc +++ b/src/runtime/relax_vm/paged_kv_cache.cc @@ -848,6 +848,8 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { const double rotary_scale_; /*! \brief The RoPE theta. */ const double rotary_theta_; + /*! \brief The optional RoPE extension factors for RoPE scaling. */ + const Optional rope_ext_factors_; /*! \brief We fix int32 to be the index dtype of auxiliary data. */ const DLDataType dtype_aux_ = DLDataType(DataType::Int(32, 1)); @@ -988,7 +990,8 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { int64_t page_size, int64_t num_layers, int64_t layer_id_begin_offset, // int64_t num_qo_heads, int64_t num_kv_heads, int64_t head_dim, int64_t reserved_num_seqs, int64_t num_total_pages, int64_t prefill_chunk_size, bool support_sliding_window, - RoPEMode rope_mode, double rotary_scale, double rotary_theta, DLDataType dtype, Device device, + RoPEMode rope_mode, double rotary_scale, double rotary_theta, + Optional rope_ext_factors, DLDataType dtype, Device device, PackedFunc f_transpose_append, PackedFunc f_compact_copy, PackedFunc f_attention_prefill, PackedFunc f_attention_decode, PackedFunc f_attention_prefill_sliding_window, PackedFunc f_attention_decode_sliding_window, PackedFunc f_attention_prefill_ragged, @@ -1013,6 +1016,7 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { : rope_mode), rotary_scale_(rotary_scale), rotary_theta_(rotary_theta), +rope_ext_factors_(std::move(rope_ext_factors)), f_transpose_append_(std::move(f_transpose_append)), f_compact_copy_(std::move(f_compact_copy)), f_attention_prefill_(std::move(f_attention_prefill)), @@ -1132,6 +1136,12 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { reserved_num_seqs, num_total_pages, prefill_chunk_size, dtype_aux_, device, preferred_host_device, copy_stream_); } + +// Right now only the "normal" RoPE mode supports the RoPE extention factors. +if (rope_ext_factors_.defined()) { + CHECK(rope_mode_ == RoPEMode::kNormal) + << "The RoPE mode must be normal to support RoPE extension factors."; +} } ~PagedAttentionKVCacheObj() { @@ -1726,8 +1736,13 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { NDArray v_data = temp_attn_v_device_.CreateView({total_seq_length, num_kv_heads_, head_dim_}, qkv_data->dtype); // Part 2. Split fused qkv and apply rotary embedding to q/k data. -f_split_rotary_(qkv_data, q_rope_position_map_view_, q_data, k_data, v_data, -static_cast(rope_mode_ == RoPEMode::kNormal)); +if (!rope_ext_factors_.defined()) { + f_split_rotary_(qkv_data, q_rope_position_map_view_, q_data, k_data, v_data, + static_cast(rope_mode_ == RoPEMode::kNormal)); +} else { +
(tvm) branch main updated: [TE][CreatePrimFunc] Fix create reduce block with spatial iter dependent init value (#17301)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new c61982e2cd [TE][CreatePrimFunc] Fix create reduce block with spatial iter dependent init value (#17301) c61982e2cd is described below commit c61982e2cd74b29dd43455da390c456e53010307 Author: wrongtest AuthorDate: Mon Aug 26 21:55:57 2024 +0800 [TE][CreatePrimFunc] Fix create reduce block with spatial iter dependent init value (#17301) fix create reduce block with spatial iter dependent init value Co-authored-by: wrongtest --- src/te/operation/create_primfunc.cc| 17 --- tests/python/te/test_te_create_primfunc.py | 73 ++ 2 files changed, 84 insertions(+), 6 deletions(-) diff --git a/src/te/operation/create_primfunc.cc b/src/te/operation/create_primfunc.cc index b5a87d9446..31815fc710 100644 --- a/src/te/operation/create_primfunc.cc +++ b/src/te/operation/create_primfunc.cc @@ -228,6 +228,10 @@ BlockRealize GenerateBlockFromTensors(const te::ComputeOp& compute_op, } // Step 4. Create block body. + // helper to transform the expr and remap iters to the block domain + auto f_transform_and_remap = [&](const PrimExpr& e) { +return Substitute(info->transformer(e), var_map); + }; String block_name{nullptr}; Optional init = NullOpt; Stmt body; @@ -246,8 +250,7 @@ BlockRealize GenerateBlockFromTensors(const te::ComputeOp& compute_op, // - A RHS operand is the value to be reduced. for (int i = 0; i < n_buffers; ++i) { const PrimExpr& left = BufferLoad(buffers[i], indices); - const PrimExpr& right = - analyzer->Simplify(Substitute(info->transformer(reduce->source[i]), var_map)); + const PrimExpr& right = analyzer->Simplify(f_transform_and_remap(reduce->source[i])); lhs.push_back(left); rhs.push_back(right); ICHECK_EQ(left->dtype, right->dtype); @@ -267,13 +270,15 @@ BlockRealize GenerateBlockFromTensors(const te::ComputeOp& compute_op, // then store the value of the variables into the target buffer positions. for (int i = 0; i < n_buffers; ++i) { const Buffer& buffer = buffers[i]; - init_stmts.push_back(BufferStore(buffer, reduce->combiner->identity_element[i], indices)); + PrimExpr identity = f_transform_and_remap(reduce->combiner->identity_element[i]); + init_stmts.push_back(BufferStore(buffer, identity, indices)); PrimExpr value{nullptr}; if (n_buffers > 1) { temp_vars.push_back(Var("v_" + buffer->name, PrimType(lhs[i].dtype(; value = temp_vars.back(); } else { -value = reduce->combiner.get()->operator()(lhs, rhs)[i]; +PrimExpr combined = reduce->combiner.get()->operator()(lhs, rhs)[i]; +value = f_transform_and_remap(combined); } body_stmts.push_back(BufferStore(buffer, value, indices)); } @@ -283,7 +288,7 @@ BlockRealize GenerateBlockFromTensors(const te::ComputeOp& compute_op, if (n_buffers > 1) { // When there are multiple buffers, we wrap the body with LetStmts. for (int i = n_buffers - 1; i >= 0; --i) { -PrimExpr value = reduce->combiner.get()->operator()(lhs, rhs)[i]; +PrimExpr value = f_transform_and_remap(reduce->combiner.get()->operator()(lhs, rhs)[i]); body = LetStmt(temp_vars[i], std::move(value), std::move(body)); } } @@ -291,7 +296,7 @@ BlockRealize GenerateBlockFromTensors(const te::ComputeOp& compute_op, // Case 2. Data parallel compute ICHECK_EQ(tensors.size(), 1); block_name = info->FreshName(tensors[0]->GetNameHint()); -const PrimExpr& compute_body = Substitute(info->transformer(expr_body), var_map); +const PrimExpr& compute_body = f_transform_and_remap(expr_body); body = BufferStore(info->tensor2buffers[tensors[0]], analyzer->Simplify(compute_body), indices); } diff --git a/tests/python/te/test_te_create_primfunc.py b/tests/python/te/test_te_create_primfunc.py index ade414f423..1a7e03188a 100644 --- a/tests/python/te/test_te_create_primfunc.py +++ b/tests/python/te/test_te_create_primfunc.py @@ -814,5 +814,78 @@ def test_with_var_input(): _check_workload(te_slice_with_var_input, tir_slice_with_var_input, index_dtype_override="int64") +def test_loop_aware_initial_value(): +"""Test initial value aware of spatial iter position""" + +@T.prim_func +def tir_workload(var_a: T.handle, var_b: T.handle, var_sum_red: T.handle): +T.func_attr({"tir.noalias": T.bool(True), "global_symbol": "main"}) +a = T.match_buffer(var_a, (5, 5
(tvm) branch main updated: [Relax] Avoid wrapping TupleStructInfo into a Tuple for R.call_tir (#17243)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new c4acc79bde [Relax] Avoid wrapping TupleStructInfo into a Tuple for R.call_tir (#17243) c4acc79bde is described below commit c4acc79bdec9bd501d1732572843829d7f90c38d Author: Eric Lunderberg AuthorDate: Mon Aug 26 06:31:58 2024 -0500 [Relax] Avoid wrapping TupleStructInfo into a Tuple for R.call_tir (#17243) * [Relax] Avoid wrapping TupleStructInfo into a Tuple for R.call_tir Prior to this commit, the different `R.call_tir*` variations would wrap the arguments into an in-line `relax.Tuple`, if it is not already a `relax.Tuple`. While this allows a tensor to be passed into these functions as a single argument (`R.call_tir(func, arg, ...)` instead of `R.call_tir(func, [arg], ...)`), the wrapped Relax variable may already refer to a tuple. This use of a variable to refer to an argument tuple rather than an in-line argument tuple is not allowed by Relax. (See discussion on https://github.com/apache/tvm/pull/15916 for details.) However, by wrapping a variable `args: R.Tuple(R.Tensor, R.Tensor, ...)` into a tuple-of-tuples, the error occurs after the expression has already been generated, and refers to an expression `R.Tuple(R.Tuple(R.Tensor, R.Tensor, ...))` that doesn't appear anywhere in the user's input. This can make debugging difficult (see https://github.com/apache/tvm/issues/17239 for an example). This commit updates the argument-handling in `R.call_tir` to only generate an in-line `relax.Tuple` if the arguments do not already have `relax.TupleStructInfo`. If the argument was provided as a Relax variable bound to a tuple of arguments, it will still produce an error. However, that error will occur much earlier, and will explicitly state that the argument must be a `relax.Tuple` instead of a `relax.Var`. * lint fixes --- python/tvm/relax/op/base.py | 37 ++--- tests/python/relax/test_tvmscript_parser.py | 36 2 files changed, 64 insertions(+), 9 deletions(-) diff --git a/python/tvm/relax/op/base.py b/python/tvm/relax/op/base.py index 756d250c16..03e86a4633 100644 --- a/python/tvm/relax/op/base.py +++ b/python/tvm/relax/op/base.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # pylint: disable=redefined-builtin """The base Relax operators.""" + from typing import Dict, Union, List, Tuple, Optional, Callable @@ -25,7 +26,6 @@ from tvm.runtime import ObjectGeneric from . import _ffi_api from ..expr import Expr, StringImm, ShapeExpr, Call, ExternFunc, GlobalVar, Var -from ..expr import Tuple as RxTuple from ..struct_info import StructInfo, TensorStructInfo from ...ir import PrimExpr from ..utils import args_converter @@ -67,6 +67,29 @@ def null_value() -> Call: return _ffi_api.null_value() # type: ignore +def _wrap_inline_arg_tuple(args) -> Expr: +"""Helper function to wrap argument tuple + +Normalize the arguments provided the functions that accept a tuple +of arguments, and require the tuple of arguments to be written +in-line. If the arguments provided are a single relax expression, +and are not a reference to a relax tuple, then wrap them into an +in-line relax Tuple. + +""" +if ( +isinstance(args, Expr) +and not isinstance(args, tvm.relax.Tuple) +and ( +args.struct_info_ is None +or not isinstance(args.struct_info_, tvm.relax.TupleStructInfo) +) +): +return tvm.relax.Tuple([args]) +else: +return args + + @args_converter.auto def call_tir( gvar: GlobalVar, @@ -98,8 +121,7 @@ def call_tir( ret: Call A call node for the call_tir operator. """ -if isinstance(args, Expr) and not isinstance(args, RxTuple): # type: ignore -args = RxTuple((args,)) +args = _wrap_inline_arg_tuple(args) if not isinstance(out_sinfo, list): out_sinfo = [out_sinfo] @@ -153,8 +175,7 @@ def call_tir_with_grad( ret: Call A call node for the call_tir_with_grad operator. """ -if isinstance(args, Expr) and not isinstance(args, RxTuple): # type: ignore -args = RxTuple((args,)) +args = _wrap_inline_arg_tuple(args) if not isinstance(out_sinfo, list): out_sinfo = [out_sinfo] @@ -221,8 +242,7 @@ def call_tir_inplace( ret: Call A call node for the call_tir operator. """ -if isinstance(args, Expr) and not isinstance(args, RxTuple): # type: ignore -args = RxTuple((a
(tvm) branch main updated: [Support] Fix the Read/Write of socket stream (#17284)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new d5d5ebb601 [Support] Fix the Read/Write of socket stream (#17284) d5d5ebb601 is described below commit d5d5ebb601a1fee5be3ff52bb8520497db1b99de Author: Ruihang Lai AuthorDate: Mon Aug 26 07:29:40 2024 -0400 [Support] Fix the Read/Write of socket stream (#17284) This PR fixes the `dmlc::Stream::Read/Write` for TCP socket. Given socket does not guarantee that all data are send received/sent in a single shot, we need to use `RecvAll/SendAll`. --- src/support/socket.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/support/socket.h b/src/support/socket.h index 032cf257c0..e3972488d4 100644 --- a/src/support/socket.h +++ b/src/support/socket.h @@ -553,9 +553,9 @@ class TCPSocket : public Socket, public dmlc::Stream { return data; } - size_t Read(void* data, size_t size) final { return Recv(data, size); } + size_t Read(void* data, size_t size) final { return RecvAll(data, size); } - size_t Write(const void* data, size_t size) final { return Send(data, size); } + size_t Write(const void* data, size_t size) final { return SendAll(data, size); } }; /*! \brief helper data structure to perform poll */
(tvm) branch main updated: [Relax][Bugfix] Support torch.unbind op and fix bugs for expand && split (#17292)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 384360f628 [Relax][Bugfix] Support torch.unbind op and fix bugs for expand && split (#17292) 384360f628 is described below commit 384360f628201790ee6b3e821db060a42db8d155 Author: Archermmt AuthorDate: Mon Aug 26 19:29:23 2024 +0800 [Relax][Bugfix] Support torch.unbind op and fix bugs for expand && split (#17292) * support unbind * add unit test * format fix * ignore logging in ut --- python/tvm/contrib/msc/core/frontend/translate.py | 2 + python/tvm/relax/frontend/torch/fx_translator.py | 33 +- tests/python/contrib/test_msc/test_graph_build.py | 54 - tests/python/contrib/test_msc/test_pipeline.py | 2 +- .../contrib/test_msc/test_translate_relax.py | 41 ++- .../contrib/test_msc/test_translate_relay.py | 34 +- .../contrib/test_msc/test_translate_tensorrt.py| 36 +- .../contrib/test_msc/test_translate_torch.py | 35 +- tests/python/relax/test_frontend_from_fx.py| 128 - 9 files changed, 336 insertions(+), 29 deletions(-) diff --git a/python/tvm/contrib/msc/core/frontend/translate.py b/python/tvm/contrib/msc/core/frontend/translate.py index 2eaae13358..63b4424524 100644 --- a/python/tvm/contrib/msc/core/frontend/translate.py +++ b/python/tvm/contrib/msc/core/frontend/translate.py @@ -119,6 +119,7 @@ def from_relax( )(mod) patterns = get_patterns_with_prefix("msc.") passes = [ +tvm.relax.transform.ExpandTupleArguments(), msc_transform.SetExprName(), msc_transform.SetExprLayout(trans_config.get("allow_layout_missing", True)), tvm.relax.transform.FuseOpsByPattern( @@ -310,6 +311,7 @@ def byoc_partition( def _partition_mod(mod, as_msc=True): patterns = get_patterns_with_prefix(target) passes = [ +tvm.relax.transform.ExpandTupleArguments(), msc_transform.SetExprName(), msc_transform.SetExprLayout(trans_config.get("allow_layout_missing", True)), tvm.relax.transform.FuseOpsByPattern(patterns, bind_constants=not as_msc), diff --git a/python/tvm/relax/frontend/torch/fx_translator.py b/python/tvm/relax/frontend/torch/fx_translator.py index 35131d3240..6d01283d3e 100644 --- a/python/tvm/relax/frontend/torch/fx_translator.py +++ b/python/tvm/relax/frontend/torch/fx_translator.py @@ -526,6 +526,22 @@ class TorchFXImporter: return self.block_builder.emit(relax.op.einsum(tuple(args[1]), args[0])) return self.block_builder.emit(relax.op.einsum(args[1:], args[0])) +def _unbind(self, node: fx.node.Node) -> relax.Var: +if len(node.args) == 2: +assert isinstance(node.args[1], int), "Expected 2nd argument of unbind as int" +dim = node.args[1] +elif "dim" in node.kwargs: +dim = node.kwargs["dim"] +else: +dim = 0 +x = self.env[node.args[0]] +selections = self.shape_of(x)[dim].value +n_section = list(range(1, selections + 1)) +ret, split = [], self.block_builder.emit(relax.op.split(x, n_section, dim)) +for i in range(selections): +ret.append(self.block_builder.emit(relax.op.squeeze(split[i], axis=dim))) +return self.block_builder.emit(relax.Tuple(ret)) + ## Manipulation ## def _cat(self, node: fx.node.Node) -> relax.Var: @@ -535,7 +551,13 @@ class TorchFXImporter: def _expand(self, node: fx.node.Node) -> relax.Var: args = self.retrieve_args(node) -return self.block_builder.emit(relax.op.broadcast_to(args[0], args[1:])) +broadcast_shape, in_shape = [], self.shape_of(args[0]) +for idx, i in enumerate(args[1:]): +if isinstance(i, int) and i == -1: +broadcast_shape.append(in_shape[idx]) +else: +broadcast_shape.append(i) +return self.block_builder.emit(relax.op.broadcast_to(args[0], broadcast_shape)) def _flatten(self, node: fx.node.Node) -> relax.Var: x = self.env[node.args[0]] @@ -580,7 +602,13 @@ class TorchFXImporter: dim = node.kwargs["dim"] else: dim = 0 -n_section = (self.shape_of(x)[dim].value + split_size - 1) // split_size +if isinstance(split_size, (list, tuple)): +n_section = [] +for s in split_size[:-1]: +cum_sum = 0 if not n_section else n_section[-1] +n_section.append(s + cum_sum) +else: +n_section = (self.shape_of(x)[dim].value + split_size - 1) // split_size
(tvm) branch main updated: [Codegen][WebGPU] LetNode common subexpr override (#17302)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 47e964a597 [Codegen][WebGPU] LetNode common subexpr override (#17302) 47e964a597 is described below commit 47e964a5973575c1e270c62b0fd785135e1b5bca Author: Charlie Ruan <53290280+charliefr...@users.noreply.github.com> AuthorDate: Mon Aug 26 04:27:47 2024 -0700 [Codegen][WebGPU] LetNode common subexpr override (#17302) This PR overrides the WebGPU codegen function of `tir::LetNode` to adapt to the recent LetNode common subexpression changes. Co-authored-by: Ruihang Lai --- src/target/source/codegen_webgpu.cc | 21 + src/target/source/codegen_webgpu.h | 3 ++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/target/source/codegen_webgpu.cc b/src/target/source/codegen_webgpu.cc index b76b05470d..83079a9f07 100644 --- a/src/target/source/codegen_webgpu.cc +++ b/src/target/source/codegen_webgpu.cc @@ -433,6 +433,27 @@ void CodeGenWebGPU::VisitExpr_(const SelectNode* op, std::ostream& os) { // NOL << PrintExpr(op->condition) << ")"; } +void CodeGenWebGPU::VisitExpr_(const LetNode* op, std::ostream& os) { // NOLINT(*) + // use ssa form. + if (print_ssa_form_) { +std::string value = PrintExpr(op->value); +ICHECK(!var_idmap_.count(op->var.get())); +var_idmap_[op->var.get()] = value; + } else { +PrintIndent(); +std::string value = PrintExpr(op->value); +this->stream << "let " << AllocVarID(op->var.get()) << " : "; +PrintType(op->var.dtype(), this->stream); +this->stream << " = " << value << ";\n"; + } + os << PrintExpr(op->body); + // Pop the defined var from var_idmap when exiting its scope. + // We do this because it is hard to completely avoid a same LetNode appearing + // at different places. + bool removed = var_idmap_.erase(op->var.get()); + ICHECK(removed); +} + void CodeGenWebGPU::VisitExpr_(const IntImmNode* op, std::ostream& os) { // NOLINT(*) if (op->dtype.bits() == 32) { std::ostringstream temp; diff --git a/src/target/source/codegen_webgpu.h b/src/target/source/codegen_webgpu.h index a100396b25..09f99fb886 100644 --- a/src/target/source/codegen_webgpu.h +++ b/src/target/source/codegen_webgpu.h @@ -63,7 +63,8 @@ class CodeGenWebGPU final : public CodeGenC { void VisitExpr_(const CallNode* op, std::ostream& os) final;// NOLINT(*) void VisitExpr_(const BufferLoadNode* op, std::ostream& os) final; // NOLINT(*) void VisitExpr_(const CastNode* op, std::ostream& os) final;// NOLINT(*) - void VisitExpr_(const SelectNode* op, std::ostream& os) override; // NOLINT(*) + void VisitExpr_(const SelectNode* op, std::ostream& os) final; // NOLINT(*) + void VisitExpr_(const LetNode* op, std::ostream& os) final; // NOLINT(*) void VisitExpr_(const FloatImmNode* op, std::ostream& os) final;// NOLINT(*) void VisitExpr_(const IntImmNode* op, std::ostream& os) final; // NOLINT(*)
(tvm) branch main updated: [Rocm] Fix non-standard rocm path (#17295)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 541f9c280c [Rocm] Fix non-standard rocm path (#17295) 541f9c280c is described below commit 541f9c280c567b63630229bc03855d43fc6811af Author: Wuwei Lin AuthorDate: Sat Aug 24 08:44:04 2024 -0700 [Rocm] Fix non-standard rocm path (#17295) * [Rocm] Fix non-standard rocm path --- python/tvm/contrib/rocm.py | 16 src/runtime/rocm/rocm_device_api.cc | 3 ++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/python/tvm/contrib/rocm.py b/python/tvm/contrib/rocm.py index 119a2c588c..f3427463b3 100644 --- a/python/tvm/contrib/rocm.py +++ b/python/tvm/contrib/rocm.py @@ -136,8 +136,10 @@ def callback_rocm_bitcode_path(rocdl_dir=None): # seems link order matters. if rocdl_dir is None: -if exists("/opt/rocm/amdgcn/bitcode/"): -rocdl_dir = "/opt/rocm/amdgcn/bitcode/" # starting with rocm 3.9 +rocm_path = find_rocm_path() +amdgcn_path = f"{rocm_path}/amdgcn/bitcode/" +if exists(amdgcn_path): +rocdl_dir = amdgcn_path # starting with rocm 3.9 else: rocdl_dir = "/opt/rocm/lib/" # until rocm 3.8 @@ -226,7 +228,7 @@ def have_matrixcore(compute_version=None): @tvm._ffi.register_func("tvm_callback_rocm_get_arch") -def get_rocm_arch(rocm_path="/opt/rocm"): +def get_rocm_arch(rocm_path=None): """Utility function to get the AMD GPU architecture Parameters @@ -239,9 +241,15 @@ def get_rocm_arch(rocm_path="/opt/rocm"): gpu_arch : str The AMD GPU architecture """ +if rocm_path is None: +try: +rocm_path = find_rocm_path() +except RuntimeError: +rocm_path = None + gpu_arch = "gfx900" # check if rocm is installed -if not os.path.exists(rocm_path): +if rocm_path is None or not os.path.exists(rocm_path): print("ROCm not detected, using default gfx900") return gpu_arch try: diff --git a/src/runtime/rocm/rocm_device_api.cc b/src/runtime/rocm/rocm_device_api.cc index c37e9fada5..ebfd312595 100644 --- a/src/runtime/rocm/rocm_device_api.cc +++ b/src/runtime/rocm/rocm_device_api.cc @@ -139,7 +139,8 @@ class ROCMDeviceAPI final : public DeviceAPI { case kAvailableGlobalMemory: // Not currently implemented. -break; +*rv = nullptr; +return; } *rv = value; }
(tvm) branch main updated (1518008262 -> ca22bad77d)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git from 1518008262 [Web] Add TVMArgBool to ArgTypeCode (#17251) add ca22bad77d [Doc] Overview (#17296) No new revisions were added by this update. Summary of changes: docs/get_started/overview.rst | 66 +++ docs/index.rst| 1 + 2 files changed, 67 insertions(+) create mode 100644 docs/get_started/overview.rst
(tvm) branch main updated: [Web] Add TVMArgBool to ArgTypeCode (#17251)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 1518008262 [Web] Add TVMArgBool to ArgTypeCode (#17251) 1518008262 is described below commit 15180082626d01ccad0648a088d11a29e0678790 Author: Charlie Ruan <53290280+charliefr...@users.noreply.github.com> AuthorDate: Fri Aug 23 08:49:33 2024 -0700 [Web] Add TVMArgBool to ArgTypeCode (#17251) --- web/src/ctypes.ts | 5 +++-- web/src/runtime.ts | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/web/src/ctypes.ts b/web/src/ctypes.ts index cb2a0e1097..c4941f07d5 100644 --- a/web/src/ctypes.ts +++ b/web/src/ctypes.ts @@ -171,7 +171,7 @@ export type FTVMBackendPackedCFunc = ( /** * int TVMObjectFree(TVMObjectHandle obj); */ - export type FTVMObjectFree = (obj: Pointer) => number; +export type FTVMObjectFree = (obj: Pointer) => number; /** * int TVMObjectGetTypeIndex(TVMObjectHandle obj, unsigned* out_tindex); @@ -252,5 +252,6 @@ export const enum ArgTypeCode { TVMStr = 11, TVMBytes = 12, TVMNDArrayHandle = 13, - TVMObjectRValueRefArg = 14 + TVMObjectRValueRefArg = 14, + TVMArgBool = 15, } diff --git a/web/src/runtime.ts b/web/src/runtime.ts index e446c4dc4d..600a9b857f 100644 --- a/web/src/runtime.ts +++ b/web/src/runtime.ts @@ -2474,6 +2474,7 @@ export class Instance implements Disposable { switch (tcode) { case ArgTypeCode.Int: case ArgTypeCode.UInt: + case ArgTypeCode.TVMArgBool: return this.memory.loadI64(rvaluePtr); case ArgTypeCode.Float: return this.memory.loadF64(rvaluePtr);
(tvm) branch main updated: [Doc] IRModule (#17298)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new e1da4651df [Doc] IRModule (#17298) e1da4651df is described below commit e1da4651df0afcea740f53f590aa42450f3795ed Author: Siyuan Feng AuthorDate: Fri Aug 23 20:05:55 2024 +0800 [Doc] IRModule (#17298) --- docs/get_started/tutorials/ir_module.py | 281 docs/index.rst | 1 + 2 files changed, 282 insertions(+) diff --git a/docs/get_started/tutorials/ir_module.py b/docs/get_started/tutorials/ir_module.py new file mode 100644 index 00..f81baf --- /dev/null +++ b/docs/get_started/tutorials/ir_module.py @@ -0,0 +1,281 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +.. _ir_module: + +IRModule + +This tutorial presents the core abstraction of Apache TVM Unity, the IRModule. +The IRModule encompasses the **entirety** of the ML models, incorporating the +computational graph, tensor programs, and potential calls to external libraries. + +.. contents:: Table of Contents +:local: +:depth: 1 +""" + +import numpy as np +import tvm +from tvm import relax + +## +# Create IRModule +# --- +# IRModules can be initialized in various ways. We demonstrate a few of them +# below. + +import torch +from torch import fx, nn +from tvm.relax.frontend.torch import from_fx + +## +# Import from existing models +# ~~~ +# The most common way to initialize an IRModule is to import from an existing +# model. Apache TVM Unity accommodates imports from a range of frameworks, +# such as PyTorch and ONNX. This tutorial solely demonstrates the import process +# from PyTorch. + + +# Create a dummy model +class TorchModel(nn.Module): +def __init__(self): +super(TorchModel, self).__init__() +self.fc1 = nn.Linear(784, 256) +self.relu1 = nn.ReLU() +self.fc2 = nn.Linear(256, 10) + +def forward(self, x): +x = self.fc1(x) +x = self.relu1(x) +x = self.fc2(x) +return x + + +# Give the input shape and data type +input_info = [((1, 784), "float32")] + +# Convert the model to IRModule +with torch.no_grad(): +torch_fx_model = fx.symbolic_trace(TorchModel()) +mod_from_torch = from_fx(torch_fx_model, input_info, keep_params_as_input=True) + +mod_from_torch, params_from_torch = relax.frontend.detach_params(mod_from_torch) +# Print the IRModule +mod_from_torch.show() + +## +# Write with Relax NN Module +# ~~ +# Apache TVM Unity also provides a set of PyTorch-liked APIs, to help users +# write the IRModule directly. + +from tvm.relax.frontend import nn + + +class RelaxModel(nn.Module): +def __init__(self): +super(RelaxModel, self).__init__() +self.fc1 = nn.Linear(784, 256) +self.relu1 = nn.ReLU() +self.fc2 = nn.Linear(256, 10) + +def forward(self, x): +x = self.fc1(x) +x = self.relu1(x) +x = self.fc2(x) +return x + + +mod_from_relax, params_from_relax = RelaxModel().export_tvm( +{"forward": {"x": nn.spec.Tensor((1, 784), "float32")}} +) +mod_from_relax.show() + +## +# Create via TVMScript +# +# TVMScript is a Python-based DSL for IRModules. We are able to +# directly output the IRModule in the TVMScript syntax, or alternatively, +# parse the TVMScript to obtain an IRModule. + +from tvm.script import ir as I +from tvm.script import relax as R + + +@I.ir_module +class TVMScriptModule: +@R.function +def main( +x: R.Tensor((1, 784), dtype="float32"), +fc1_weight: R.Tensor((256, 784), dtype="float32"), +fc1_bias: R.Tensor((256,), dtype=&
(tvm) branch main updated: [Docs] Introduce Relax API and move legacy part to standalone page (#17286)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 9e865b4b8f [Docs] Introduce Relax API and move legacy part to standalone page (#17286) 9e865b4b8f is described below commit 9e865b4b8fdf4cc624e94f8db9e5674c4519db05 Author: Siyuan Feng AuthorDate: Fri Aug 23 06:16:56 2024 +0800 [Docs] Introduce Relax API and move legacy part to standalone page (#17286) * [Docs] Introduce Relax API and move legacy part to standalone page As the TVM project evolves, the Unity strategy has been the recommended way to use Apache TVM applications. Hence, we are pushing documentation for the Relax API to the forefront and moving the legacy part to a standalone page, which may be removed in the future. * update for ci * update for ci --- docs/arch/index.rst| 9 -- docs/conf.py | 41 docs/dev/how_to/relay_add_op.rst | 6 +- docs/index.rst | 20 ++-- .../api/python/{runtime.rst => dlight.rst} | 9 +- docs/reference/api/python/index.rst| 113 +++-- .../api/python/{runtime.rst => instrument.rst} | 9 +- docs/reference/api/python/ir.rst | 16 --- .../api/python/{runtime.rst => relax/analysis.rst} | 9 +- .../{runtime.rst => relax/block_builder.rst} | 10 +- .../api/python/{tir.rst => relax/frontend.rst} | 42 .../reference/api/python/{tir.rst => relax/op.rst} | 60 +++ .../{relay/transform.rst => relax/relax.rst} | 9 +- .../python/{runtime.rst => relax/transform.rst}| 9 +- docs/reference/api/python/relay/transform.rst | 1 + .../{relay/transform.rst => runtime/disco.rst} | 8 +- .../reference/api/python/{ => runtime}/ndarray.rst | 6 -- .../{relay/transform.rst => runtime/profiling.rst} | 9 +- .../{relay/transform.rst => runtime/relax_vm.rst} | 9 +- .../reference/api/python/{ => runtime}/runtime.rst | 3 - .../api/python/{runtime.rst => tir/analysis.rst} | 10 +- .../{relay/transform.rst => tir/schedule.rst} | 8 +- .../python/{runtime.rst => tir/stmt_functor.rst} | 10 +- .../api/python/{runtime.rst => tir/tir.rst}| 10 +- .../api/python/{runtime.rst => tir/transform.rst} | 8 +- .../api/python/{runtime.rst => transform.rst} | 9 +- docs/reference/api/python/vta/index.rst| 45 docs/{arch => reference}/security.rst | 0 python/tvm/driver/build_module.py | 4 +- python/tvm/relax/op/create.py | 2 +- python/tvm/relax/transform/transform.py| 27 ++--- python/tvm/runtime/profiling/__init__.py | 3 +- python/tvm/target/__init__.py | 2 +- python/tvm/te/operation.py | 2 +- python/tvm/tir/buffer.py | 2 +- 35 files changed, 269 insertions(+), 271 deletions(-) diff --git a/docs/arch/index.rst b/docs/arch/index.rst index b84afeea28..17884a7742 100644 --- a/docs/arch/index.rst +++ b/docs/arch/index.rst @@ -408,15 +408,6 @@ Frontends ingest models from different frameworks into the TVM stack. frontend/tensorflow - -Security -- -.. toctree:: - :maxdepth: 1 - - security - - microTVM .. toctree:: diff --git a/docs/conf.py b/docs/conf.py index c3472c15de..1c5c5cb5d6 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -39,6 +39,7 @@ from pathlib import Path import re import sys from textwrap import dedent, indent +from typing import List from unittest.mock import patch # If extensions (or modules to document with autodoc) are in another directory, @@ -718,10 +719,50 @@ def update_alias_docstring(name, obj, lines): lines.append(".. rubric:: Alias of %s:`%s.%s`" % (obj_type, amod, target_name)) +tvm_class_name_rewrite_map = { +"tvm.tir": ["Var", "Call"], +"tvm.relax": ["Var", "Call"], +"tvm.relax.frontend.nn": ["Module"], +} + + +def distinguish_class_name(name: str, lines: List[str]): +"""Distinguish the docstring of type annotations. + +In the whole TVM, there are many classes with the same name but in different modules, +e.g. ``tir.Var``, ``relax.Var``. This function is used to distinguish them in the docstring, +by adding the module name as prefix. + +To be specific, this function will check the current object name, and if it in the specific +module with specific name, it will add the module name as prefix to the class name to prevent +the confusion. Further,
(tvm) branch main updated: [Relax][PyTorch] Add support for torch.tile (#17291)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 481c2dc852 [Relax][PyTorch] Add support for torch.tile (#17291) 481c2dc852 is described below commit 481c2dc85209fa3d104c020b0d8d8e4ce7ed20c1 Author: Masahiro Hiramori AuthorDate: Fri Aug 23 07:16:44 2024 +0900 [Relax][PyTorch] Add support for torch.tile (#17291) * add test * add support for torch.tile --- python/tvm/relax/frontend/torch/fx_translator.py | 9 + tests/python/relax/test_frontend_from_fx.py | 42 2 files changed, 51 insertions(+) diff --git a/python/tvm/relax/frontend/torch/fx_translator.py b/python/tvm/relax/frontend/torch/fx_translator.py index 093f3ae4cf..35131d3240 100644 --- a/python/tvm/relax/frontend/torch/fx_translator.py +++ b/python/tvm/relax/frontend/torch/fx_translator.py @@ -612,6 +612,14 @@ class TorchFXImporter: dim = None return self.block_builder.emit(relax.op.squeeze(x, dim)) +def _tile(self, node: fx.node.Node) -> relax.Var: +import torch # type: ignore + +args = self.retrieve_args(node) +if isinstance(args[1], (torch.Size, tuple, list)): +return self.block_builder.emit(relax.op.tile(args[0], tuple(args[1]))) +return self.block_builder.emit(relax.op.tile(args[0], args[1:])) + def _cumsum(self, node: fx.node.Node) -> relax.Var: x = self.env[node.args[0]] @@ -1450,6 +1458,7 @@ class TorchFXImporter: "permute": self._permute, "reshape": self._reshape, "split": self._split, +"tile": self._tile, "cumsum": self._cumsum, "chunk": self._chunk, "transpose": self._transpose, diff --git a/tests/python/relax/test_frontend_from_fx.py b/tests/python/relax/test_frontend_from_fx.py index 1a2cc5da62..6be3e7b23e 100644 --- a/tests/python/relax/test_frontend_from_fx.py +++ b/tests/python/relax/test_frontend_from_fx.py @@ -3126,6 +3126,48 @@ def test_reshape(): verify_model(Reshape(), input_info, {}, expected1) +def test_tile(): +input_info = [([1, 3], "float32")] + +class Tile1(Module): +def forward(self, x): +return x.tile((2,)) + +class Tile2(Module): +def forward(self, x): +return x.tile(4, 2) + +class Tile3(Module): +def forward(self, x): +return torch.tile(x, (4, 2)) + +@tvm.script.ir_module +class expected1: +@R.function +def main(x: R.Tensor((1, 3), dtype="float32")) -> R.Tensor((1, 6), dtype="float32"): +# block 0 +with R.dataflow(): +lv: R.Tensor((1, 6), dtype="float32") = R.tile(x, [2]) +gv: R.Tensor((1, 6), dtype="float32") = lv +R.output(gv) +return gv + +@tvm.script.ir_module +class expected2: +@R.function +def main(x: R.Tensor((1, 3), dtype="float32")) -> R.Tensor((4, 6), dtype="float32"): +# block 0 +with R.dataflow(): +lv: R.Tensor((4, 6), dtype="float32") = R.tile(x, [4, 2]) +gv: R.Tensor((4, 6), dtype="float32") = lv +R.output(gv) +return gv + +verify_model(Tile1(), input_info, {}, expected1) +verify_model(Tile2(), input_info, {}, expected2) +verify_model(Tile3(), input_info, {}, expected2) + + def test_transpose(): input_info = [([1, 2, 3, 4], "float32")]
(tvm) branch main updated: [ROCm] hipBLAS integration (#17290)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 8db545 [ROCm] hipBLAS integration (#17290) 8db545 is described below commit 8db54509e1cb892d3efc8f5859acaf52482a Author: Ruihang Lai AuthorDate: Thu Aug 22 13:33:04 2024 -0400 [ROCm] hipBLAS integration (#17290) This commit integrates hipBLAS into TVM. The minimum ROCm version requirement is 6.0. Co-authored-by: Lesheng Jin --- CMakeLists.txt | 1 + cmake/modules/LibInfo.cmake| 1 + cmake/modules/ROCM.cmake | 12 + cmake/utils/FindROCM.cmake | 4 + python/tvm/contrib/hipblas.py | 86 python/tvm/relax/backend/contrib/hipblas.py| 180 python/tvm/testing/utils.py| 3 + src/relax/backend/contrib/hipblas/codegen.cc | 110 + src/runtime/contrib/hipblas/hipblas.cc | 456 + .../contrib/hipblas/hipblas_json_runtime.cc| 153 +++ src/runtime/contrib/hipblas/hipblas_utils.cc | 78 src/runtime/contrib/hipblas/hipblas_utils.h| 155 +++ src/support/libinfo.cc | 1 + tests/python/contrib/test_hipblas.py | 109 + tests/python/relax/test_codegen_hipblas.py | 165 15 files changed, 1514 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7fba5355f0..aa2a385683 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -107,6 +107,7 @@ tvm_option(USE_THRUST "Build with Thrust" OFF) tvm_option(USE_CURAND "Build with cuRAND" OFF) tvm_option(USE_MIOPEN "Build with ROCM:MIOpen" OFF) tvm_option(USE_ROCBLAS "Build with ROCM:RoCBLAS" OFF) +tvm_option(USE_HIPBLAS "Build with ROCM:HIPBLAS" OFF) tvm_option(USE_SORT "Build with sort support" ON) tvm_option(USE_NNPACK "Build with nnpack support" OFF) tvm_option(USE_LIBTORCH "Build with libtorch support" OFF) diff --git a/cmake/modules/LibInfo.cmake b/cmake/modules/LibInfo.cmake index c4637a0c17..da9bc3e1c9 100644 --- a/cmake/modules/LibInfo.cmake +++ b/cmake/modules/LibInfo.cmake @@ -116,6 +116,7 @@ function(add_lib_info src_file) TVM_INFO_TVM_DEBUG_WITH_ABI_CHANGE="${TVM_DEBUG_WITH_ABI_CHANGE}" TVM_INFO_TVM_LOG_BEFORE_THROW="${TVM_LOG_BEFORE_THROW}" TVM_INFO_USE_ROCBLAS="${USE_ROCBLAS}" +TVM_INFO_USE_HIPBLAS="${USE_HIPBLAS}" TVM_INFO_USE_ROCM="${USE_ROCM}" TVM_INFO_USE_RCCL="${USE_RCCL}" TVM_INFO_USE_RPC="${USE_RPC}" diff --git a/cmake/modules/ROCM.cmake b/cmake/modules/ROCM.cmake index 02c4c73993..4d0f76d687 100644 --- a/cmake/modules/ROCM.cmake +++ b/cmake/modules/ROCM.cmake @@ -53,6 +53,18 @@ if(USE_ROCM) list(APPEND TVM_RUNTIME_LINKER_LIBS ${ROCM_ROCBLAS_LIBRARY}) endif(USE_ROCBLAS) + if(USE_HIPBLAS) +message(STATUS "Build with HIPBLAS support") +tvm_file_glob(GLOB HIPBLAS_CONTRIB_SRC src/relax/backend/contrib/hipblas/*.cc) +list(APPEND COMPILER_SRCS ${HIPBLAS_CONTRIB_SRC}) +tvm_file_glob(GLOB HIPBLAS_CONTRIB_SRCS src/runtime/contrib/hipblas/*.cc) +list(APPEND RUNTIME_SRCS ${HIPBLAS_CONTRIB_SRCS}) +list(APPEND TVM_RUNTIME_LINKER_LIBS ${ROCM_HIPBLAS_LIBRARY}) +if(NOT ROCM_HIPBLASLT_LIBRARY STREQUAL "ROCM_HIPBLASLT_LIBRARY-NOTFOUND") + list(APPEND TVM_RUNTIME_LINKER_LIBS ${ROCM_HIPBLASLT_LIBRARY}) +endif() + endif(USE_HIPBLAS) + if(USE_THRUST) message(STATUS "Build with rocThrust support") # We need to override CXX to hipcc. This is required by rocthrust diff --git a/cmake/utils/FindROCM.cmake b/cmake/utils/FindROCM.cmake index 4d895ff89d..6f54c179ee 100644 --- a/cmake/utils/FindROCM.cmake +++ b/cmake/utils/FindROCM.cmake @@ -55,6 +55,8 @@ macro(find_rocm use_rocm) endif() find_library(ROCM_MIOPEN_LIBRARY MIOpen ${__rocm_sdk}/lib) find_library(ROCM_ROCBLAS_LIBRARY rocblas ${__rocm_sdk}/lib) +find_library(ROCM_HIPBLAS_LIBRARY hipblas ${__rocm_sdk}/lib) +find_library(ROCM_HIPBLASLT_LIBRARY hipblaslt ${__rocm_sdk}/lib) find_library(ROCM_HSA_LIBRARY hsa-runtime64 ${__rocm_sdk}/lib) if(ROCM_HIPHCC_LIBRARY) @@ -66,5 +68,7 @@ macro(find_rocm use_rocm) message(STATUS "Found ROCM_HIPHCC_LIBRARY=" ${ROCM_HIPHCC_LIBRARY}) message(STATUS "Found ROCM_MIOPEN_LIBRARY=" ${ROCM_MIOPEN_LIBRARY}) message(STATUS "Found ROCM_ROCBLAS_LIBRARY=" ${ROCM_ROCBLAS_LIBRARY}) +message(STATUS "Found ROCM_HIPBLAS_LIBRARY=" ${ROCM_HIPBLAS_LIBRARY}) +message(STATUS "Found ROCM_HIPBLASLT_LIBRARY=" ${ROCM_HIPBLASLT_LIBR
(tvm) branch main updated: [FFI][Runtime] Use TVMValue::v_int64 to represent boolean values (#17240)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 0f037a6d99 [FFI][Runtime] Use TVMValue::v_int64 to represent boolean values (#17240) 0f037a6d99 is described below commit 0f037a6d9957108decceaf0c91bd84667a077aad Author: Eric Lunderberg AuthorDate: Thu Aug 22 12:13:16 2024 -0500 [FFI][Runtime] Use TVMValue::v_int64 to represent boolean values (#17240) * [FFI][Runtime] Use TVMValue::v_int64 to represent boolean values This is a follow-up to https://github.com/apache/tvm/pull/16183, which added handling of boolean values in the TVM FFI. The initial implementation added both a new type code (`kTVMArgBool`) and a new `TVMValue::v_bool` variant. This commit removes the `TVMValue::v_bool` variant, since the `kTVMArgBool` type code is sufficient to handle boolean arguments. Removing the `TVMValue::v_bool` variant also makes all `TVMValue` variants be 64-bit (assuming a 64-bit CPU). This can simplify debugging in some cases, since it prevents partial values from inactive variants from being present in memory. * Update MakePackedAPI, less special handling required for boolean --- include/tvm/runtime/c_runtime_api.h | 1 - include/tvm/runtime/packed_func.h| 10 +- python/tvm/_ffi/_cython/packed_func.pxi | 4 ++-- rust/tvm-sys/src/packed_func.rs | 4 ++-- src/runtime/crt/common/crt_runtime_api.c | 4 +--- src/runtime/minrpc/rpc_reference.h | 4 ++-- src/target/llvm/codegen_cpu.cc | 2 +- src/tir/transforms/ir_utils.h| 3 +-- src/tir/transforms/make_packed_api.cc| 20 +++- tests/python/codegen/test_target_codegen_llvm.py | 16 .../test_tir_transform_make_packed_api.py| 12 ++-- 11 files changed, 39 insertions(+), 41 deletions(-) diff --git a/include/tvm/runtime/c_runtime_api.h b/include/tvm/runtime/c_runtime_api.h index b4c653a0a5..d26c95e4f5 100644 --- a/include/tvm/runtime/c_runtime_api.h +++ b/include/tvm/runtime/c_runtime_api.h @@ -209,7 +209,6 @@ typedef DLTensor* TVMArrayHandle; */ typedef union { int64_t v_int64; - bool v_bool; double v_float64; void* v_handle; const char* v_str; diff --git a/include/tvm/runtime/packed_func.h b/include/tvm/runtime/packed_func.h index 91e53055b7..7c1b08e490 100644 --- a/include/tvm/runtime/packed_func.h +++ b/include/tvm/runtime/packed_func.h @@ -669,7 +669,7 @@ class TVMPODValue_ { // conversions. This is publicly exposed, as it can be useful in // specializations of PackedFuncValueConverter. if (type_code_ == kTVMArgBool) { - return value_.v_bool; + return static_cast(value_.v_int64); } else { return std::nullopt; } @@ -1041,7 +1041,7 @@ class TVMRetValue : public TVMPODValue_CRTP_ { TVMRetValue& operator=(const DataType& other) { return operator=(other.operator DLDataType()); } TVMRetValue& operator=(bool value) { this->SwitchToPOD(kTVMArgBool); -value_.v_bool = value; +value_.v_int64 = value; return *this; } TVMRetValue& operator=(std::string value) { @@ -1831,7 +1831,7 @@ class TVMArgsSetter { type_codes_[i] = kDLInt; } TVM_ALWAYS_INLINE void operator()(size_t i, bool value) const { -values_[i].v_bool = value; +values_[i].v_int64 = value; type_codes_[i] = kTVMArgBool; } TVM_ALWAYS_INLINE void operator()(size_t i, uint64_t value) const { @@ -2142,7 +2142,7 @@ inline void TVMArgsSetter::SetObject(size_t i, T&& value) const { std::is_base_of_v) { if (std::is_base_of_v || ptr->IsInstance()) { - values_[i].v_bool = static_cast(ptr)->value; + values_[i].v_int64 = static_cast(ptr)->value; type_codes_[i] = kTVMArgBool; return; } @@ -2327,7 +2327,7 @@ inline TObjectRef TVMPODValue_CRTP_::AsObjectRef() const { if constexpr (std::is_base_of_v) { if (type_code_ == kTVMArgBool) { - return Bool(value_.v_bool); + return Bool(value_.v_int64); } } diff --git a/python/tvm/_ffi/_cython/packed_func.pxi b/python/tvm/_ffi/_cython/packed_func.pxi index 7977f37d0b..6e062ab5f1 100644 --- a/python/tvm/_ffi/_cython/packed_func.pxi +++ b/python/tvm/_ffi/_cython/packed_func.pxi @@ -121,7 +121,7 @@ cdef inline int make_arg(object arg, elif isinstance(arg, bool): # A python `bool` is a subclass of `int`, so this check # must occur before `Integral`. -value[0].v_bool = arg +value[0].v_int64 = arg tcode[0] = kTVMArgBool elif isinstance(arg, Integral): value[0].v_int64 = arg @@ -215,7 +215,7 @@
(tvm) branch main updated: [Cleanup] Remove `using namespace tvm::runtime` from headers (#17246)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 20289e8502 [Cleanup] Remove `using namespace tvm::runtime` from headers (#17246) 20289e8502 is described below commit 20289e8502dd27c91f3945418c864ad7233aec89 Author: Eric Lunderberg AuthorDate: Thu Aug 22 12:12:56 2024 -0500 [Cleanup] Remove `using namespace tvm::runtime` from headers (#17246) Prior to this commit, various header files had `using namespace tvm::runtime`, which imports all names from `tvm::runtime` into the current namespace. These imports can cause compilation errors depending on the order of `#include` statements. For example, the `#include ` file uses the unqualified name `Bool` to refer to `::tvm::Bool`, a subclass of `PrimExpr`. If a different header file specifies `using namespace tvm::runtime` within the `tvm::relay` namespace, then the unqualified name `Bool` ambiguously refers to either `::tvm::Bool` or `::tvm::runtime::Bool`. In MSVC, this can cause even further compilation errors. By default, MSVC does not follow the C++ standard for name resolution in templates. The standard requires that any names in a template that do not depend on template parameters be resolved when the template is declared. However, MSVC instead resolves these names when the template is instantiated. As a result, the same `using namespace tvm::runtime` may cause a compilation error if it occurs after the template's declaration, but before the template's usage. (TVM provides the `/permissive-` flag to MSVC builds specifically to disable MSVC's non-standard name resolution, so this only impacts downstream forks that disable this flag. See https://github.com/apache/tvm/pull/16343 for more details.) This commit removes `using namespace tvm::runtime`, replacing them with explicit `using tvm::runtime::SOME_SPECIFIC_SYMBOL` where necessary. This resolves both the include-order dependency for standards-compliant compilers, and the compilation errors for MSVC's default build. --- src/contrib/msc/core/ir/graph_builder.h | 3 ++- src/relay/backend/vm/compiler.h | 3 ++- src/relay/parser/parser.cc| 2 ++ src/relay/parser/token.h | 2 -- src/relay/parser/tokenizer.h | 2 -- src/runtime/contrib/cblas/gemm_common.h | 5 - src/runtime/contrib/json/json_node.h | 1 - src/runtime/contrib/nnpack/nnpack_utils.h | 1 - src/runtime/contrib/verilator/verilator_runtime.h | 1 - 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/contrib/msc/core/ir/graph_builder.h b/src/contrib/msc/core/ir/graph_builder.h index 4b042c5617..d514a79347 100644 --- a/src/contrib/msc/core/ir/graph_builder.h +++ b/src/contrib/msc/core/ir/graph_builder.h @@ -51,7 +51,8 @@ namespace msc { using Expr = tvm::RelayExpr; using RelaxExprVisitor = tvm::relax::ExprVisitor; using RelayExprVisitor = tvm::relay::ExprVisitor; -using namespace tvm::runtime; + +using tvm::runtime::NDArray; /*! * \brief Config for building MSCGraph. diff --git a/src/relay/backend/vm/compiler.h b/src/relay/backend/vm/compiler.h index acb4d2d1d2..d22fb3d4d5 100644 --- a/src/relay/backend/vm/compiler.h +++ b/src/relay/backend/vm/compiler.h @@ -51,7 +51,8 @@ namespace tvm { namespace relay { namespace vm { -using namespace tvm::runtime; +using tvm::runtime::ModulePropertyMask; +using tvm::runtime::NDArray; using namespace tvm::runtime::vm; using namespace relay::transform; diff --git a/src/relay/parser/parser.cc b/src/relay/parser/parser.cc index b519a1778c..233455bf89 100644 --- a/src/relay/parser/parser.cc +++ b/src/relay/parser/parser.cc @@ -48,6 +48,8 @@ namespace relay { /*! \brief The meta table maps from type key to a sequence of objects. */ using MetaTable = Map>; +using tvm::runtime::NDArray; +using tvm::runtime::String2DLDataType; using tvm::transform::CreateModulePass; using tvm::transform::PassContext; diff --git a/src/relay/parser/token.h b/src/relay/parser/token.h index 7b11e701cf..13875cb093 100644 --- a/src/relay/parser/token.h +++ b/src/relay/parser/token.h @@ -36,8 +36,6 @@ namespace tvm { namespace relay { -using namespace runtime; - enum class TokenType { kCommentStart, kCommentEnd, diff --git a/src/relay/parser/tokenizer.h b/src/relay/parser/tokenizer.h index 04dcd3263e..2b7ad4e559 100644 --- a/src/relay/parser/tokenizer.h +++ b/src/relay/parser/tokenizer.h @@ -41,8 +41,6 @@ namespace tvm { namespace relay { -using namespace runtime; - // trim from start (in place) static inline void ltrim(std::string& s) { // NOLINT(*) s.erase(s.begin(), std::find_if(s.
(tvm) branch main updated: [Doc] Quick Start (#17289)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 32063b0dfc [Doc] Quick Start (#17289) 32063b0dfc is described below commit 32063b0dfcb8ffcec6b7b4f99bc51adb178f1394 Author: Siyuan Feng AuthorDate: Thu Aug 22 22:24:23 2024 +0800 [Doc] Quick Start (#17289) This PR introduces a new quick start tutorial to the documentation. --- docs/.gitignore | 1 - docs/conf.py | 6 + docs/get_started/tutorials/README.txt | 2 + docs/get_started/tutorials/quick_start.py | 193 ++ docs/index.rst| 1 + tests/scripts/task_python_docs.sh | 2 + 6 files changed, 204 insertions(+), 1 deletion(-) diff --git a/docs/.gitignore b/docs/.gitignore index 84b247d369..041cf35887 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,3 +1,2 @@ doxygen modules -tutorials diff --git a/docs/conf.py b/docs/conf.py index be1ba11aa0..c3472c15de 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -408,6 +408,7 @@ intersphinx_mapping = { from sphinx_gallery.sorting import ExplicitOrder examples_dirs = [ +# legacy tutorial structure under gallery folder tvm_path.joinpath("gallery", "tutorial"), tvm_path.joinpath("gallery", "how_to", "compile_models"), tvm_path.joinpath("gallery", "how_to", "deploy_models"), @@ -419,9 +420,12 @@ examples_dirs = [ tvm_path.joinpath("gallery", "how_to", "work_with_microtvm"), tvm_path.joinpath("gallery", "how_to", "extend_tvm"), tvm_path.joinpath("vta", "tutorials"), +# New tutorial structure under docs folder +tvm_path.joinpath("docs", "get_started", "tutorials"), ] gallery_dirs = [ +# legacy tutorial structure under gallery folder "tutorial", "how_to/compile_models", "how_to/deploy_models", @@ -433,6 +437,8 @@ gallery_dirs = [ "how_to/work_with_microtvm", "how_to/extend_tvm", "topic/vta/tutorials", +# New tutorial structure under docs folder +"get_started/tutorials/", ] diff --git a/docs/get_started/tutorials/README.txt b/docs/get_started/tutorials/README.txt new file mode 100644 index 00..62e2c7b770 --- /dev/null +++ b/docs/get_started/tutorials/README.txt @@ -0,0 +1,2 @@ +Get Started +--- diff --git a/docs/get_started/tutorials/quick_start.py b/docs/get_started/tutorials/quick_start.py new file mode 100644 index 00..a4edf0b7c4 --- /dev/null +++ b/docs/get_started/tutorials/quick_start.py @@ -0,0 +1,193 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +.. _quick_start: + +Quick Start +=== + +This tutorial is for people who are new to Apache TVM. Taking an simple example +to show how to use Apache TVM to compile a simple neural network. + +.. contents:: Table of Contents +:local: +:depth: 2 + +""" + + +# Overview +# +# Apache TVM is a machine learning compilation framework, following the principle of +# **Python-first development** and **universal deployment**. It takes in pre-trained +# machine learning models, compiles and generates deployable modules that can be embedded +# and run everywhere. +# Apache TVM also enables customizing optimization processes to introduce new optimizations, +# libraries, codegen and more. +# +# Apache TVM can help to: +# +# - **Optimize** performance of ML workloads, composing libraries and codegen. +# - **Deploy** ML workloads to a diverse set of new environments, including new runtime and new +# hardware. +# - **Continuously improve and customize** ML deployment pipeline in Python by quickly customizing +# library dispatching, bringing in customized operato
(tvm) branch main updated: [Codegen] Emit `tir::Let` as var assignment explicitly (#17278)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new b76ebad886 [Codegen] Emit `tir::Let` as var assignment explicitly (#17278) b76ebad886 is described below commit b76ebad8867e36121708cf654923b66c4f7c9ede Author: Ruihang Lai AuthorDate: Wed Aug 21 09:04:34 2024 -0400 [Codegen] Emit `tir::Let` as var assignment explicitly (#17278) Prior to this PR, the PrimExpr `tir::Let` is treated as inlining during codegen, which makes any common subexpression elimination (CSE) efforts using `tir::Let` at TIR level effectless. This PR updates codegen so that the `tir::Let` will have an explicit var assignment and thus can effectively reflect the CSE efforts. --- python/tvm/relax/frontend/nn/op.py| 6 +++--- src/target/source/codegen_c.cc| 21 - tests/python/relax/test_frontend_nn_op.py | 6 +++--- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/python/tvm/relax/frontend/nn/op.py b/python/tvm/relax/frontend/nn/op.py index 17a40a8cce..04c030bea6 100644 --- a/python/tvm/relax/frontend/nn/op.py +++ b/python/tvm/relax/frontend/nn/op.py @@ -2544,7 +2544,7 @@ def sample_top_p_top_k_from_sorted_prob( @T.prim_func(private=True) def _get_renorm_prob(A: T.handle, B: T.handle, C: T.handle, D: T.handle): -batch, vocab_size = T.int64(), T.int64() +batch, vocab_size = T.int64(is_size_var=True), T.int64(is_size_var=True) cumsum_sorted = T.match_buffer(A, (batch, vocab_size), prob_dtype) top_p = T.match_buffer(B, (batch, 1), prob_dtype) top_k = T.match_buffer(C, (batch, 1), index_dtype) @@ -2564,8 +2564,8 @@ def sample_top_p_top_k_from_sorted_prob( def _get_index_from_sorted( A: T.handle, B: T.handle, C: T.handle, D: T.handle, E: T.handle, F: T.handle ): -batch, vocab_size = T.int64(), T.int64() -out_batch = T.int64() +batch, vocab_size = T.int64(is_size_var=True), T.int64(is_size_var=True) +out_batch = T.int64(is_size_var=True) cumsum_sorted = T.match_buffer(A, (batch, vocab_size), prob_dtype) indices = T.match_buffer(B, (batch, vocab_size), index_dtype) renorm_prob = T.match_buffer(C, (batch, 1), prob_dtype) diff --git a/src/target/source/codegen_c.cc b/src/target/source/codegen_c.cc index 03c3e3af66..9f68cd8d66 100644 --- a/src/target/source/codegen_c.cc +++ b/src/target/source/codegen_c.cc @@ -887,8 +887,27 @@ void CodeGenC::VisitExpr_(const LetNode* op, std::ostream& os) { // NOLINT(*) let_binding_[op->var] = op; } std::string value = PrintExpr(op->value); - var_idmap_[op->var.get()] = value; + if (print_ssa_form_) { +ICHECK(!var_idmap_.count(op->var.get())); +var_idmap_[op->var.get()] = value; + } else { +PrintIndent(); +if (op->var.dtype() == DataType::Handle() && handle_data_type_.count(op->var.get())) { + PrintType(handle_data_type_.at(op->var.get()), this->stream); + this->stream << "* " << AllocVarID(op->var.get()) << " = ("; + PrintType(handle_data_type_.at(op->var.get()), this->stream); + this->stream << "*)" << value << ";\n"; +} else { + PrintType(op->var.dtype(), this->stream); + this->stream << ' ' << AllocVarID(op->var.get()) << " = " << value << ";\n"; +} + } os << PrintExpr(op->body); + // Pop the defined var from var_idmap when exiting its scope. + // We do this because it is hard to completely avoid a same LetNode appearing + // at different places. + bool removed = var_idmap_.erase(op->var.get()); + ICHECK(removed); } void CodeGenC::VisitExpr_(const RampNode* op, std::ostream& os) { // NOLINT(*) diff --git a/tests/python/relax/test_frontend_nn_op.py b/tests/python/relax/test_frontend_nn_op.py index 6c32691954..40624790cb 100644 --- a/tests/python/relax/test_frontend_nn_op.py +++ b/tests/python/relax/test_frontend_nn_op.py @@ -947,11 +947,11 @@ def test_sample_top_p_top_k_from_sorted_prob(): class Expected: @T.prim_func(private=True) def get_index_from_sorted(A: T.handle, B: T.handle, C: T.handle, D: T.handle, E: T.handle, F: T.handle): -batch, vocab_size = T.int64(), T.int64() +batch, vocab_size = T.int64(is_size_var=True), T.int64(is_size_var=True) cumsum_sorted = T.match_buffer(A, (batch, vocab_size)) indices = T.match_buffer(B, (batch, vocab_size), "int64") renorm_prob = T.match_buffer(C, (batch, 1)) -out_batch = T.int64() +out_batch = T.int64(is_size_var=Tr
(tvm) branch main updated: [Doc] Refactor install docs (#17287)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new dc247816f0 [Doc] Refactor install docs (#17287) dc247816f0 is described below commit dc247816f0b6be770a39064286d9723df6782a86 Author: Siyuan Feng AuthorDate: Wed Aug 21 20:52:51 2024 +0800 [Doc] Refactor install docs (#17287) * [Doc] Refactor install docs The major updates include: 1. remove nnpack installation guide 2. refactor building guide into step-by-step instructions * update for ci --- docs/install/from_source.rst | 421 +-- docs/install/index.rst | 3 +- docs/install/nnpack.rst | 118 3 files changed, 163 insertions(+), 379 deletions(-) diff --git a/docs/install/from_source.rst b/docs/install/from_source.rst index 4dc14863a8..a963d06ab5 100644 --- a/docs/install/from_source.rst +++ b/docs/install/from_source.rst @@ -19,240 +19,239 @@ Install from Source === -This page gives instructions on how to build and install the TVM package from -scratch on various systems. It consists of two steps: +This page gives instructions on how to build and install the TVM package from source. -1. First build the shared library from the C++ codes (`libtvm.so` for linux, `libtvm.dylib` for macOS and `libtvm.dll` for windows). -2. Setup for the language packages (e.g. Python Package). +.. contents:: Table of Contents +:local: +:depth: 2 -To get started, download tvm source code from the `Download Page <https://tvm.apache.org/download>`_. +.. _install-dependencies: -Developers: Get Source from Github --- -You can also choose to clone the source repo from github. -It is important to clone the submodules along, with ``--recursive`` option. +Step 1. Install Dependencies + -.. code:: bash +Apache TVM requires the following dependencies: -git clone --recursive https://github.com/apache/tvm tvm +- CMake (>= 3.24.0) +- LLVM (recommended >= 15) +- Git +- A recent C++ compiler supporting C++ 17, at the minimum +- GCC 7.1 +- Clang 5.0 +- Apple Clang 9.3 +- Visual Studio 2019 (v16.7) +- Python (>= 3.8) +- (Optional) Conda (Strongly Recommended) -For windows users who use github tools, you can open the git shell, and type the following command. +To easiest way to manage dependency is via conda, which maintains a set of toolchains +including LLVM across platforms. To create the environment of those build dependencies, +one may simply use: .. code:: bash - git submodule init - git submodule update +# make sure to start with a fresh environment +conda env remove -n tvm-build-venv +# create the conda environment with build dependency +conda create -n tvm-build-venv -c conda-forge \ +"llvmdev>=15" \ +"cmake>=3.24" \ +git \ +python=3.11 +# enter the build environment +conda activate tvm-build-venv -.. _build-shared-library: +Step 2. Get Source from Github +-- +You can also choose to clone the source repo from github. -Build the Shared Library - +.. code:: bash -Our goal is to build the shared libraries: +git clone --recursive https://github.com/apache/tvm tvm - - On Linux the target library are `libtvm.so` and `libtvm_runtime.so` - - On macOS the target library are `libtvm.dylib` and `libtvm_runtime.dylib` - - On Windows the target library are `libtvm.dll` and `libtvm_runtime.dll` +.. note:: +It's important to use the ``--recursive`` flag when cloning the TVM repository, which will +automatically clone the submodules. If you forget to use this flag, you can manually clone the submodules +by running ``git submodule update --init --recursive`` in the root directory of the TVM repository. -It is also possible to :ref:`build the runtime ` library only. +Step 3. Configure and Build +--- +Create a build directory and run CMake to configure the build. The following example shows how to build -The minimal building requirements for the ``TVM`` libraries are: +.. code:: bash - - A recent C++ compiler supporting C++ 17, at the minimum - - GCC 7.1 - - Clang 5.0 - - Apple Clang 9.3 - - Visual Studio 2019 (v16.7) - - CMake 3.18 or higher - - We highly recommend to build with LLVM to enable all the features. - - If you want to use CUDA, CUDA toolkit version >= 8.0 is required. If you are upgrading from an older version, make sure you purge the older version and reboot after installation. - - On macOS, you may want to install `Homebrew <https://brew.sh>`_ to easily install and manage dependencies. - - Pyth
(tvm) branch main updated: [WINDOWS] Compiler options for non x86 targets (#17260)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 7bea15f162 [WINDOWS] Compiler options for non x86 targets (#17260) 7bea15f162 is described below commit 7bea15f162ceb3f38809212eec5d711929709620 Author: krishnaraj36 AuthorDate: Wed Aug 21 00:53:53 2024 +0530 [WINDOWS] Compiler options for non x86 targets (#17260) --- python/tvm/contrib/cc.py | 5 - python/tvm/dlight/gpu/gemv.py | 15 +++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/python/tvm/contrib/cc.py b/python/tvm/contrib/cc.py index 59b57e08ba..110f80db61 100644 --- a/python/tvm/contrib/cc.py +++ b/python/tvm/contrib/cc.py @@ -372,8 +372,11 @@ def _linux_compile( def _windows_compile(output, objects, options, cwd=None, ccache_env=None): -cmd = ["clang"] +compiler = os.getenv("TVM_WIN_CC", default="clang") +win_target = os.getenv("TVM_WIN_TARGET", default="x86_64") +cmd = [compiler] cmd += ["-O2"] +cmd += ["--target=" + win_target] if output.endswith(".so") or output.endswith(".dll"): cmd += ["-shared"] diff --git a/python/tvm/dlight/gpu/gemv.py b/python/tvm/dlight/gpu/gemv.py index 2bcb8563a2..cff234140e 100644 --- a/python/tvm/dlight/gpu/gemv.py +++ b/python/tvm/dlight/gpu/gemv.py @@ -11,7 +11,7 @@ # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the +# KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """A rule for GEMV and DecodeGEMV.""" @@ -478,7 +478,9 @@ class GEMV(GPUScheduleRule): TS, TR = 8, 64 else: TS, TR = 1, 64 -elif target.kind.name == "opencl" and "android" in str(target.host): +elif target.kind.name == "opencl" and ( +("android" in str(target.host)) or ("adreno" in str(target.attrs)) +): TAG_S, TAG_R = "threadIdx.x", "threadIdx.y" VEC_C = 8 LOAD_V_SHARED = False @@ -686,7 +688,9 @@ class GEMV(GPUScheduleRule): DEC_PACK = 8 SCALE_PACK = 4 -if target.kind.name == "opencl" and "android" in str(target.host): +if target.kind.name == "opencl" and ( +("android" in str(target.host)) or ("adreno" in str(target.attrs)) +): TAG_S, TAG_R = "threadIdx.x", "threadIdx.y" VEC_C = 8 UNROLL = 8 @@ -756,7 +760,10 @@ class GEMV(GPUScheduleRule): ): """Schedule the outer reduction block.""" # NOTE: Only Android is supported so far -if not (target.kind.name == "opencl" and "android" in str(target.host)): +if not ( +target.kind.name == "opencl" +and (("android" in str(target.host)) or ("adreno" in str(target.attrs))) +): return None batch, s, r, c = sch.get_loops(block) len_s = get_extent(sch, s)
(tvm) branch main updated (6f4ac2312b -> 1ca9833db2)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git from 6f4ac2312b [Relay][Pytorch] Add support for `aten::tile` (#17277) add 1ca9833db2 [IR] Handle NaN in StructuralEqual and StructuralHash (#17249) No new revisions were added by this update. Summary of changes: include/tvm/node/structural_equal.h| 21 --- include/tvm/node/structural_hash.h | 13 ++- .../tir-base/test_tir_structural_equal_hash.py | 43 ++ 3 files changed, 71 insertions(+), 6 deletions(-)
(tvm) branch main updated: [Relay][Pytorch] Add support for `aten::tile` (#17277)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 6f4ac2312b [Relay][Pytorch] Add support for `aten::tile` (#17277) 6f4ac2312b is described below commit 6f4ac2312b9bbcbfb465ead0de410ab7dd1494a4 Author: Masahiro Hiramori AuthorDate: Mon Aug 19 22:31:50 2024 +0900 [Relay][Pytorch] Add support for `aten::tile` (#17277) * add test for torch.tile * add support for `aten::tile` --- python/tvm/relay/frontend/pytorch.py | 11 +++ tests/python/frontend/pytorch/test_forward.py | 24 2 files changed, 35 insertions(+) diff --git a/python/tvm/relay/frontend/pytorch.py b/python/tvm/relay/frontend/pytorch.py index 1f78d77390..0d93ff987c 100644 --- a/python/tvm/relay/frontend/pytorch.py +++ b/python/tvm/relay/frontend/pytorch.py @@ -4022,6 +4022,16 @@ class PyTorchOpConverter: attn_weight = _op.reshape(attn_weight, newshape=[-4, batch_size, -1, -2]) return attn_weight +def tile(self, inputs, input_types): +data = inputs[0] +reps = [] +for r in inputs[1]: +if isinstance(r, int): +reps.append(r) +else: +reps.append(int(_infer_value(r, {}).numpy())) +return _op.tile(data, reps) + # Operator mappings def create_convert_map(self): self.convert_map = { @@ -4302,6 +4312,7 @@ class PyTorchOpConverter: "aten::swapaxes": self.transpose, "aten::linalg_vector_norm": self.linalg_vector_norm, "aten::scaled_dot_product_attention": self.scaled_dot_product_attention, +"aten::tile": self.tile, } def update_convert_map(self, custom_map): diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py index a273af8fb8..9f8fac9306 100644 --- a/tests/python/frontend/pytorch/test_forward.py +++ b/tests/python/frontend/pytorch/test_forward.py @@ -5658,6 +5658,30 @@ def test_parameterlist(): verify_model(ParamListModel().float().eval(), input_data=input_data) +@tvm.testing.uses_gpu +def test_forward_tile(): +"""test_forward_repeat""" +torch.set_grad_enabled(False) +input_shape = [1, 3] + +class Tile1(Module): +def forward(self, *args): +return args[0].tile(1, 1) + +class Tile2(Module): +def forward(self, *args): +return args[0].tile(4, 2) + +class Tile3(Module): +def forward(self, *args): +return args[0].tile(4, 2, 1) + +input_data = torch.rand(input_shape).float() +verify_model(Tile1().float().eval(), input_data=input_data) +verify_model(Tile2().float().eval(), input_data=input_data) +verify_model(Tile3().float().eval(), input_data=input_data) + + class TestSetSpan: """test structural equal between translated / hand-crafted relay IR with span tagged."""
(tvm) branch main updated: [CI] Resolve CI compilation failures on MacOSX (#17271)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 6bcec1d6c3 [CI] Resolve CI compilation failures on MacOSX (#17271) 6bcec1d6c3 is described below commit 6bcec1d6c358268b12da733d995f61bb7384b0ac Author: Eric Lunderberg AuthorDate: Mon Aug 19 08:29:59 2024 -0500 [CI] Resolve CI compilation failures on MacOSX (#17271) * Debug, list configs in base conda environment * Add the "auto-update-conda: true" flag for miniconda setup It looks like the base environment provides `conda==24.5.0`, but the `tvm-build` environment only provides `conda==23.9.0`, and the error in `cargo build` is triggered from within the `tvm-build` environment. Seeing if it just needs to be allowed to update to a newer `conda` version. * Attempt bumping the required conda version The `conda-build` package specifies compatibility with `conda >= 23.7`, but the `libmamba` requirement requirement isn't provided until `23.10`. Possibly an incompatibility, where the default solver is decided based on the base environment's `conda` version, but the availability is based on the `tvm-build` environment. * Try adding "conda-solver: classic" Since libmamba isn't available inside the generated environment * Exit on cmake failure in Windows build * Exit on first error for Windows conda build From what I can tell, batch scripts do not have an equivalent to `set -e`, so this needs to be added to every command in the batch scripts. --- .github/actions/setup/action.yml| 4 conda/build_win.bat | 4 +++- conda/recipe/bld.bat| 2 +- conda/recipe/install_libtvm.bat | 8 +--- conda/recipe/install_tvm_python.bat | 4 ++-- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/.github/actions/setup/action.yml b/.github/actions/setup/action.yml index 40ddf4f906..6fd81c1d69 100644 --- a/.github/actions/setup/action.yml +++ b/.github/actions/setup/action.yml @@ -15,6 +15,7 @@ runs: channel-priority: strict environment-file: conda/build-environment.yaml auto-activate-base: false + conda-solver: classic use-only-tar-bz2: true python-version: 3.9 condarc-file: conda/condarc @@ -25,6 +26,7 @@ runs: channel-priority: strict environment-file: conda/build-environment.yaml auto-activate-base: false + conda-solver: classic use-only-tar-bz2: true python-version: 3.9 condarc-file: conda/condarc @@ -33,3 +35,5 @@ runs: run: | conda info conda list + conda info --envs + conda list --name base diff --git a/conda/build_win.bat b/conda/build_win.bat index 59d0d07340..e37a06ce7c 100644 --- a/conda/build_win.bat +++ b/conda/build_win.bat @@ -15,4 +15,6 @@ :: specific language governing permissions and limitations :: under the License. -conda build --output-folder=conda/pkg conda/recipe +echo on + +conda build --output-folder=conda/pkg conda/recipe || exit /b diff --git a/conda/recipe/bld.bat b/conda/recipe/bld.bat index f8988b1357..561dcff878 100644 --- a/conda/recipe/bld.bat +++ b/conda/recipe/bld.bat @@ -32,7 +32,7 @@ cmake ^ -DUSE_RANDOM=ON ^ -DUSE_PROFILER=ON ^ -DINSTALL_DEV=ON ^ - %SRC_DIR% + %SRC_DIR% || exit /b cd .. :: defer build to install stage to avoid rebuild. diff --git a/conda/recipe/install_libtvm.bat b/conda/recipe/install_libtvm.bat index f423c521f8..c56f83bfaa 100644 --- a/conda/recipe/install_libtvm.bat +++ b/conda/recipe/install_libtvm.bat @@ -15,8 +15,10 @@ :: specific language governing permissions and limitations :: under the License. -cmake --build build --config Release --target install +echo on + +cmake --build build --config Release --target install || exit /b :: Copy files into library bin so that they can be found -cp %LIBRARY_LIB%\tvm.dll %LIBRARY_BIN%\tvm.dll -cp %LIBRARY_LIB%\tvm_runtime.dll %LIBRARY_BIN%\tvm_runtime.dll +cp %LIBRARY_LIB%\tvm.dll %LIBRARY_BIN%\tvm.dll || exit /b +cp %LIBRARY_LIB%\tvm_runtime.dll %LIBRARY_BIN%\tvm_runtime.dll || exit /b diff --git a/conda/recipe/install_tvm_python.bat b/conda/recipe/install_tvm_python.bat index 96187468c2..07c0465b84 100644 --- a/conda/recipe/install_tvm_python.bat +++ b/conda/recipe/install_tvm_python.bat @@ -16,5 +16,5 @@ :: under the License. echo on -cd %SRC_DIR%\python -%PYTHON% setup.py install --single-version-externally-managed --record=%SRC_DIR%\record.txt +cd %SRC_DIR%\python || exit /b +%PYTHON% setup.py install --single-version-externally-managed --record=%SRC_DIR%\record.txt || exit /b
(tvm) branch main updated: [TOPI][ADRENO] Add Group Conv2d texture schedule (#17274)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 517c420d7b [TOPI][ADRENO] Add Group Conv2d texture schedule (#17274) 517c420d7b is described below commit 517c420d7b89029638926f10bbe9bed27f23bb5f Author: krishnaraj36 AuthorDate: Mon Aug 19 18:22:45 2024 +0530 [TOPI][ADRENO] Add Group Conv2d texture schedule (#17274) * Added Support for Adreno Texture Based Group Convolution * Added Few Testcases and Fixed Compute * Limited Support for Group Convolution * Removed Dead Code, Fixed Minor Issues - Co-authored-by: Sanjay Shankar Krishnaa --- python/tvm/relay/op/strategy/adreno.py | 31 +- python/tvm/topi/adreno/__init__.py | 1 + python/tvm/topi/adreno/group_conv2d_nchw.py| 386 + .../test_group_conv2d_nchw_texture.py | 208 +++ 4 files changed, 625 insertions(+), 1 deletion(-) diff --git a/python/tvm/relay/op/strategy/adreno.py b/python/tvm/relay/op/strategy/adreno.py index bacace9ad4..99e4d0a405 100644 --- a/python/tvm/relay/op/strategy/adreno.py +++ b/python/tvm/relay/op/strategy/adreno.py @@ -182,8 +182,37 @@ def conv2d_strategy_adreno(attrs, inputs, out_type, target): + kernel_layout + ") - only support NCHW4c / OIHW4o and NHWC / HWOI layouts for conv2d" ) +elif (data_layout == "NCHW4c" or data_layout == "NCHW") and ( +kernel_layout == "OIHW" or kernel_layout == "OIHW4o" +): +pad_in_chunks = (len(data.shape) == 5 and data.shape[1] % groups != 0) or ( +len(data.shape) == 4 and data.shape[1] % (groups * 4) != 0 +) +pad_out_chunks = (len(kernel.shape) == 5 and kernel.shape[0] % groups != 0) or ( +len(kernel.shape) == 4 and kernel.shape[0] % (groups * 4) != 0 +) + +if not (pad_in_chunks or pad_out_chunks): +strategy.add_implementation( +wrap_compute_conv2d(topi.adreno.group_conv2d_nchwc), + wrap_topi_schedule(topi.adreno.schedule_group_conv2d_nchwc), +name="group_conv2d_nchwc.image2d", +plevel=10, +) +elif len(data.shape) == 4 and len(kernel.shape) == 4: +strategy.add_implementation( +wrap_compute_conv2d(topi.cuda.group_conv2d_nchw, has_groups=True), +wrap_topi_schedule(topi.cuda.schedule_group_conv2d_nchw), +name="group_conv2d_nchw.cuda", +) +else: +raise RuntimeError( +"General group convolution is not currently supported for NCHWc layouts" +) else: -raise RuntimeError("General group convolution is not currently supported") +raise RuntimeError( +"General group convolution has limited support for NCHW(4c) layouts..." +) return strategy diff --git a/python/tvm/topi/adreno/__init__.py b/python/tvm/topi/adreno/__init__.py index cd42848b29..2c0ed20f10 100644 --- a/python/tvm/topi/adreno/__init__.py +++ b/python/tvm/topi/adreno/__init__.py @@ -20,6 +20,7 @@ from .conv2d_nchw import * from .depthwise_conv2d_nchw import * from .conv2d_nhwc import * +from .group_conv2d_nchw import * from .depthwise_conv2d_nhwc import * from .pooling import * from .conv2d_alter_op import * diff --git a/python/tvm/topi/adreno/group_conv2d_nchw.py b/python/tvm/topi/adreno/group_conv2d_nchw.py new file mode 100644 index 00..f1ab7fcf0e --- /dev/null +++ b/python/tvm/topi/adreno/group_conv2d_nchw.py @@ -0,0 +1,386 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name,unused-variable,unused-argument,no-else-return + +"""Group Conv2d NCHW Operato
(tvm) branch main updated: [Disco] Fix double free of nccl communicator (#17275)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 132daf6c95 [Disco] Fix double free of nccl communicator (#17275) 132daf6c95 is described below commit 132daf6c959efe04cffa90234ef1688d82d193e3 Author: Wuwei Lin AuthorDate: Thu Aug 15 09:52:37 2024 -0700 [Disco] Fix double free of nccl communicator (#17275) --- src/runtime/disco/nccl/nccl_context.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/runtime/disco/nccl/nccl_context.h b/src/runtime/disco/nccl/nccl_context.h index 730479b61a..b874da219f 100644 --- a/src/runtime/disco/nccl/nccl_context.h +++ b/src/runtime/disco/nccl/nccl_context.h @@ -129,6 +129,9 @@ struct CCLThreadLocalContext { void Clear() { if (group_comm) { NCCL_CALL(ncclCommDestroy(group_comm)); + if (global_comm == group_comm) { +global_comm = nullptr; + } group_comm = nullptr; } if (global_comm) {
(tvm) branch main updated: [CODEGEN][OPENCL] Fix opencl codegen for few ops (#17273)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new fb16d9487d [CODEGEN][OPENCL] Fix opencl codegen for few ops (#17273) fb16d9487d is described below commit fb16d9487d062353b1fed3b14729e9282da2b875 Author: krishnaraj36 AuthorDate: Wed Aug 14 18:25:09 2024 +0530 [CODEGEN][OPENCL] Fix opencl codegen for few ops (#17273) * Compiler pass config to choose target clml support version Partition pass should shoose off loading ops based on target support this config enables choosing target version on python api aswell as tvmc. * Update clml.py * Fix opencl codegen for few ops Fixed the opencl codegen for few operators - 1. Atomic add for float - opencl doesn't have support float atomic add, Enabled work-around for this operation with atomic_cmpexch() 2. fmodf - Opencl only support fmod for all floating point 3. nearbyint - Opencl doesn't have this function and henced replaced with roud function. * Update test_relay_ops.py * Update codegen_opencl.cc * Update codegen_opencl.cc * Revert "Compiler pass config to choose target clml support version" This reverts commit bc955b02c436cdab7e397a2f1e66d828861da6e8. * Revert "Update clml.py" This reverts commit 4ff98a82dc463628f673292631df518e6831fd4e. - Co-authored-by: Siva Co-authored-by: B, Siva Rama Krishna Reddy Co-authored-by: Vegiraju, Krishna Raju --- python/tvm/topi/cuda/nms.py| 4 +- src/target/source/codegen_opencl.cc| 52 ++- src/target/source/codegen_opencl.h | 1 + .../python/relay/opencl_texture/test_relay_ops.py | 73 ++ 4 files changed, 126 insertions(+), 4 deletions(-) diff --git a/python/tvm/topi/cuda/nms.py b/python/tvm/topi/cuda/nms.py index e402c58889..f258bffc3e 100644 --- a/python/tvm/topi/cuda/nms.py +++ b/python/tvm/topi/cuda/nms.py @@ -50,7 +50,9 @@ def cuda_atomic_add_rule(op): def opencl_atomic_add_rule(op): if op.dtype == "int32": return tvm.tir.call_pure_extern("int32", "atomic_add", op.args[0], op.args[1]) -raise RuntimeError("only support int32") +elif op.dtype == "float32": +return tvm.tir.call_pure_extern("float32", "atomic_add", op.args[0], op.args[1]) +raise RuntimeError("only support int32, float32") register_intrin_lowering("tir.atomic_add", target="cuda", f=cuda_atomic_add_rule, level=99) diff --git a/src/target/source/codegen_opencl.cc b/src/target/source/codegen_opencl.cc index f17a452d5c..5933c9582c 100644 --- a/src/target/source/codegen_opencl.cc +++ b/src/target/source/codegen_opencl.cc @@ -129,6 +129,16 @@ std::string CodeGenOpenCL::Finish() { if (enable_atomics_) { decl_stream << "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n\n"; +decl_stream << "__inline float atomic_add_float_emu(volatile __global float* sum, const float " + "toAdd) {\n" + "float next_value = 0;" + "float prev_value = 0;" + "do {\n" + "prev_value =*(sum);\n" + "next_value =prev_value + toAdd;\n" + "} while(atomic_cmpxchg((volatile global int *)(sum), *((int*)&prev_value), " + "*((int*)&next_value)) != *((int*)&prev_value));\n" + "return next_value;\n}\n"; } // Enable OpenCL 1.2 sampler-less texture reads, but utilize @@ -458,13 +468,21 @@ void CodeGenOpenCL::VisitExpr_(const CallNode* op, std::ostream& os) { this->PrintExpr(op->args.back(), os); os << "]"; } - } else if (op->op.same_as(builtin_call_extern_)) { + } else if (op->op.same_as(builtin_call_extern_) || op->op.same_as(builtin_call_pure_extern_)) { auto func = Downcast(op->args[0]); // Enable atomics extension if used. -if (func->value == "atomic_add") { +if (func->value == "atomic_add" && op->dtype.is_float()) { enable_atomics_ = true; + this->PrintCallExtern(GetType(GetRef(op)), "atomic_add_float_emu", op->args, true, +os); +} else if (func->value == "nearbyint") { + this->PrintCallExtern(GetType(GetRef(op)), "round", op-
(tvm) branch main updated: [Relax][Bugfix] Preserve dtype in ToMixedPrecision for kNever ops (#17263)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new b3d01c2295 [Relax][Bugfix] Preserve dtype in ToMixedPrecision for kNever ops (#17263) b3d01c2295 is described below commit b3d01c2295cde9dcd02980bad49fcd9cd3049231 Author: Eric Lunderberg AuthorDate: Sun Aug 11 13:43:09 2024 -0500 [Relax][Bugfix] Preserve dtype in ToMixedPrecision for kNever ops (#17263) Prior to this commit, while an operator with the `MixedPrecisionPolicyKind::kNever` attribute would not be updated from `float32` to `float16`, it would be erroneously updated from `float16` to `float32`. This commit updates `ToMixedPrecision` to preserve the datatype of any arguments used in a `kNever` operation, rather than forcing them to a `float32` datatype. --- src/relax/transform/to_mixed_precision.cc | 69 ++ .../relax/test_transform_to_mixed_precision.py | 34 ++- 2 files changed, 75 insertions(+), 28 deletions(-) diff --git a/src/relax/transform/to_mixed_precision.cc b/src/relax/transform/to_mixed_precision.cc index c844d59356..1b660b8fec 100644 --- a/src/relax/transform/to_mixed_precision.cc +++ b/src/relax/transform/to_mixed_precision.cc @@ -303,11 +303,7 @@ class ToMixedPrecisionRewriter : public ExprMutator { } Array RemapArgs(const Array& args) { -Array new_args; -for (const auto& arg : args) { - new_args.push_back(VarReplacer::Replace(arg, var_remap_)); -} -return new_args; +return args.Map([this](Expr arg) { return VarReplacer::Replace(arg, var_remap_); }); } // Util function to rewrite the expr to the given dtype @@ -475,37 +471,60 @@ class ToMixedPrecisionRewriter : public ExprMutator { ReEmitBinding(binding, call_node->args[0]); return; } -DataType to; -ObjectPtr new_call = make_object(*call_node); + +Call new_call = GetRef(call_node); + // We first to remap the args to the current vars according to the var_remap_ -new_call->args = std::move(RemapArgs(call_node->args)); +new_call.CopyOnWrite()->args = RemapArgs(new_call->args); + // Then we rewrite the args according to the policy +std::optional opt_new_dtype = std::nullopt; + if (policy == kAlways) { - to = fp16_; + opt_new_dtype = fp16_; auto attr_map = Op::GetAttrMap("FInferMixedPrecision"); ICHECK(attr_map.count(op)); - auto f = attr_map[op]; - new_call = make_object(*(f(Call(new_call), output_dtype_).get())); + new_call = attr_map[op](new_call, output_dtype_); } else if (policy == kFollow) { - to = AllFP16Castable(new_call->args) ? fp16_ : fp32_; + opt_new_dtype = AllFP16Castable(new_call->args) ? fp16_ : fp32_; } else if (policy == kNever) { - to = fp32_; + // An upstream operation may have changed the datatype of the + // arguments. Because this operation must be provided with + // exactly the same dtype as it previously had, it may require a + // cast back to the original datatype. + + if (!new_call->args.same_as(call_node->args)) { +Array new_typed_args; +for (size_t i = 0; i < call_node->args.size(); i++) { + auto arg = new_call->args[i]; + auto old_ntype = NTypeFrom(call_node->args[i]); + new_typed_args.push_back(RewriteExpr(arg, old_ntype)); +} +new_call.CopyOnWrite()->args = new_typed_args; + } + } else { LOG(FATAL) << "Unsupported TMixedPrecisionPolicy: " << policy; } -new_call->args = std::move(RewriteArgs(new_call->args, to)); -new_call->struct_info_ = NullOpt; -Expr new_value = builder_->Normalize(Call(new_call)); -if (policy == kAlways && binding->var->IsInstance()) { - // kAlways: store the tensors to fp16 - // But global vars will be stored to the original dtype anyway (see below) - new_value = RewriteExpr(new_value, NTypeFrom(new_value, fp16_)); -} -if (!binding->var->IsInstance()) { - // Global var: store the tensors to the original dtype - NType to = NTypeFrom(binding->var); - new_value = RewriteExpr(new_value, to); + +Expr new_value = new_call; +if (opt_new_dtype) { + auto new_dtype = opt_new_dtype.value(); + new_call.CopyOnWrite()->args = RewriteArgs(new_call->args, new_dtype); + new_call.CopyOnWrite()->struct_info_ = NullOpt; + + new_value = builder_->Normalize(Call(new_call)); + + if (!binding->var->IsInstance()) { +// Non-Dataflow var: store the tensors to the original dtype +new_value = RewriteExpr(new_value, NTypeFrom(binding->var)); + } else if (policy == kAlway
(tvm) branch main updated: [Disco] Disable splitting nccl communicator in single-group (#17264)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new bed66d20f1 [Disco] Disable splitting nccl communicator in single-group (#17264) bed66d20f1 is described below commit bed66d20f1640f814b9f27bcc439f8761e3070cf Author: Wuwei Lin AuthorDate: Sat Aug 10 10:06:17 2024 -0700 [Disco] Disable splitting nccl communicator in single-group (#17264) --- src/runtime/disco/nccl/nccl.cc | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/runtime/disco/nccl/nccl.cc b/src/runtime/disco/nccl/nccl.cc index d35fc911c6..a5240aa2b2 100644 --- a/src/runtime/disco/nccl/nccl.cc +++ b/src/runtime/disco/nccl/nccl.cc @@ -101,8 +101,12 @@ void InitCCLPerWorker(IntTuple device_ids, std::string unique_id_bytes) { ncclUniqueId id; std::memcpy(id.internal, unique_id_bytes.data(), NCCL_UNIQUE_ID_BYTES); NCCL_CALL(ncclCommInitRank(&ctx->global_comm, worker->num_workers, id, worker->worker_id)); - NCCL_CALL(ncclCommSplit(ctx->global_comm, worker->worker_id / group_size, - worker->worker_id % group_size, &ctx->group_comm, NULL)); + if (worker->num_groups == 1) { +ctx->group_comm = ctx->global_comm; + } else { +NCCL_CALL(ncclCommSplit(ctx->global_comm, worker->worker_id / group_size, +worker->worker_id % group_size, &ctx->group_comm, NULL)); + } } void AllReduce(NDArray send, ReduceKind reduce_kind, bool in_group, NDArray recv) {
(tvm) branch main updated: [CompileBugfix][contrib] meet 'base64.h: No such file or directory' and '‘tvm::runtime::vm::AllocatorType’ has not been declared' while compiling (#17265)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 2d828f5cc2 [CompileBugfix][contrib] meet 'base64.h: No such file or directory' and '‘tvm::runtime::vm::AllocatorType’ has not been declared' while compiling (#17265) 2d828f5cc2 is described below commit 2d828f5cc29692546317cb0a2e76ba521b1bd080 Author: Weiyi Ding <72555042+dd...@users.noreply.github.com> AuthorDate: Sun Aug 11 00:29:26 2024 +0800 [CompileBugfix][contrib] meet 'base64.h: No such file or directory' and '‘tvm::runtime::vm::AllocatorType’ has not been declared' while compiling (#17265) --- src/contrib/torch/pt_call_tvm/tvm_class.cc | 2 +- .../torch/tvm_module_wrapper/RuntimeModuleWrapperTVM.cc| 10 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/contrib/torch/pt_call_tvm/tvm_class.cc b/src/contrib/torch/pt_call_tvm/tvm_class.cc index 5e57dc152f..f5ae95a5a7 100644 --- a/src/contrib/torch/pt_call_tvm/tvm_class.cc +++ b/src/contrib/torch/pt_call_tvm/tvm_class.cc @@ -167,7 +167,7 @@ class TvmVMModulePack { const auto runtime_create = *tvm::runtime::Registry::Get("runtime._VirtualMachine"); vm_ = runtime_create(exe_); auto init_func = vm_.GetFunction("init", false); -auto alloc_type = static_cast(tvm::runtime::vm::AllocatorType::kPooled); +auto alloc_type = static_cast(tvm::runtime::memory::AllocatorType::kPooled); if (device_type != kDLCPU) { // CPU is required for executing shape functions init_func(static_cast(kDLCPU), 0, alloc_type, device_type, device_id, alloc_type); diff --git a/src/contrib/torch/tvm_module_wrapper/RuntimeModuleWrapperTVM.cc b/src/contrib/torch/tvm_module_wrapper/RuntimeModuleWrapperTVM.cc index c77996cf67..3e1c7e7c0e 100644 --- a/src/contrib/torch/tvm_module_wrapper/RuntimeModuleWrapperTVM.cc +++ b/src/contrib/torch/tvm_module_wrapper/RuntimeModuleWrapperTVM.cc @@ -29,7 +29,7 @@ #include #include "../../../runtime/graph_executor/graph_executor_factory.h" -#include "../../support/base64.h" +#include "../../../support/base64.h" #include "runtime_bridge.h" namespace tvm { @@ -209,10 +209,10 @@ inline void b64decode(const std::string b64str, uint8_t* ret) { size_t index = 0; const auto length = b64str.size(); for (size_t i = 0; i < length; i += 4) { -int8_t ch0 = base64::DecodeTable[(int32_t)b64str[i]]; -int8_t ch1 = base64::DecodeTable[(int32_t)b64str[i + 1]]; -int8_t ch2 = base64::DecodeTable[(int32_t)b64str[i + 2]]; -int8_t ch3 = base64::DecodeTable[(int32_t)b64str[i + 3]]; +int8_t ch0 = tvm::support::base64::DecodeTable[(int32_t)b64str[i]]; +int8_t ch1 = tvm::support::base64::DecodeTable[(int32_t)b64str[i + 1]]; +int8_t ch2 = tvm::support::base64::DecodeTable[(int32_t)b64str[i + 2]]; +int8_t ch3 = tvm::support::base64::DecodeTable[(int32_t)b64str[i + 3]]; uint8_t st1 = (ch0 << 2) + (ch1 >> 4); ret[index++] = st1; if (b64str[i + 2] != '=') {
(tvm) branch main updated: [DLIGHT][ADRENO] Fix for opencl adreno matmul schedule (#17259)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new e5f85c0e32 [DLIGHT][ADRENO] Fix for opencl adreno matmul schedule (#17259) e5f85c0e32 is described below commit e5f85c0e32046b6b1bdc5bd1a2485c645df4e730 Author: krishnaraj36 AuthorDate: Sat Aug 10 21:55:51 2024 +0530 [DLIGHT][ADRENO] Fix for opencl adreno matmul schedule (#17259) Fixed the matmul schedule for the case of epilog blocks --- python/tvm/dlight/gpu/matmul.py| 50 ++- tests/python/dlight/test_gpu_matmul.py | 89 ++ 2 files changed, 85 insertions(+), 54 deletions(-) diff --git a/python/tvm/dlight/gpu/matmul.py b/python/tvm/dlight/gpu/matmul.py index 25cc649b44..5fb8e2469d 100644 --- a/python/tvm/dlight/gpu/matmul.py +++ b/python/tvm/dlight/gpu/matmul.py @@ -941,7 +941,7 @@ class Matmul(GPUScheduleRule): inner_x=False, ) elif target.kind.name == "opencl" and ( -("android" in str(target.host)) or ("windows" in str(target.host)) +("android" in str(target.host)) or ("adreno" in str(target.attrs)) ): return Matmul.Config( block_size_x=32, @@ -991,7 +991,10 @@ class Matmul(GPUScheduleRule): end_it = block_stmt.reads[-1].region[-1].min return {it.var: it.kind for it in iter_infos}.get(end_it, "O") == "R" -if target.kind.name == "opencl" and not is_inner_reduction(block_stmt, iter_infos): +if ( +target.kind.name == "opencl" +and (("android" in str(target.host)) or ("adreno" in str(target.attrs))) +) and not is_inner_reduction(block_stmt, iter_infos): ret = self.sch_outer_reduction(sch, config, main_block, blocks) if ret is not None: return ret @@ -1122,6 +1125,16 @@ class Matmul(GPUScheduleRule): reduction_block: tir.schedule.BlockRV, blocks: List[tir.schedule.BlockRV], ) -> Optional[tir.Schedule]: + +"""Get vectorization factor""" + +def get_max_factor(n, factors): +factors = sorted(factors, reverse=True) +for factor in factors: +if n % factor == 0: +return factor +return 1 + reduction_loops = sch.get_loops(reduction_block) if not len(reduction_loops) == 4: return None @@ -1140,13 +1153,17 @@ class Matmul(GPUScheduleRule): config.vector_size, config.unroll, ) - -is_dequant_block = len(blocks) > 1 -if is_dequant_block: -compute_block, dequant_block, matmul_block = blocks -sch.compute_inline(compute_block) -else: -(matmul_block,) = blocks +VecSize = min(get_max_factor(sch.get(n).extent // Threads_X, [1, 2, 4, 8]), VecSize) +dequant_block = None +matmul_block = reduction_block +epilogue_block = None +if blocks[-1] is not matmul_block: +epilogue_block = blocks[-1] +for blk in blocks[:-1]: +if "dequantize" in sch.get(blk).name_hint: +dequant_block = blk +elif blk is not matmul_block: +sch.compute_inline(blk) m = sch.fuse(mb, ms) @@ -1162,12 +1179,13 @@ class Matmul(GPUScheduleRule): sch.reorder(no, mo, ni, mi, k0, k1, k2, k3, mu, nv) sch.compute_at(rmat_block, k0) -if is_dequant_block: +if dequant_block is not None: sch.compute_at(dequant_block, k3) sch.reverse_compute_at(wmat_block, mi) sch.set_scope(rmat_block, 0, "shared") sch.set_scope(matmul_block, 0, "local") -if is_dequant_block: + +if dequant_block is not None: sch.set_scope(dequant_block, 0, "local") sch.bind(mo, "blockIdx.y") @@ -1175,7 +1193,7 @@ class Matmul(GPUScheduleRule): sch.bind(mi, "threadIdx.y") sch.bind(ni, "threadIdx.x") sch.vectorize(sch.get_loops(matmul_block)[-1]) -if is_dequant_block: +if dequant_block is not None: sch.vectorize(sch.get_loops(dequant_block)[-1]) # Co-operative Memory Fetch @@ -1187,7 +1205,7 @@ class Matmul(GPUScheduleRule): sch.vectorize(wv) # Scale and Quant Cache -if is_dequant_block: +if dequant_block is not None: qb = sch.cache_read(dequant_block, 0, "local") sb = sch.cache_read(dequant_block, 1, "local") sch.compute_at(sb,
(tvm) branch main updated: [ROCm] Support ROCm 6 (#17256)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 6ae29610a5 [ROCm] Support ROCm 6 (#17256) 6ae29610a5 is described below commit 6ae29610a531cea66e94f8bdcf96f2c5cbdb3bf9 Author: Ruihang Lai AuthorDate: Fri Aug 9 09:44:59 2024 -0400 [ROCm] Support ROCm 6 (#17256) This PR updates some ROCm modules in order to support ROCm 6. --- cmake/modules/ROCM.cmake| 1 + cmake/utils/FindRCCL.cmake | 2 +- src/runtime/rocm/rocm_device_api.cc | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cmake/modules/ROCM.cmake b/cmake/modules/ROCM.cmake index 37fcd71646..02c4c73993 100644 --- a/cmake/modules/ROCM.cmake +++ b/cmake/modules/ROCM.cmake @@ -23,6 +23,7 @@ if(ROCM_FOUND) # avoid global retrigger of cmake include_directories(SYSTEM ${ROCM_INCLUDE_DIRS}) add_definitions(-D__HIP_PLATFORM_HCC__=1) + add_definitions(-D__HIP_PLATFORM_AMD__=1) endif(ROCM_FOUND) diff --git a/cmake/utils/FindRCCL.cmake b/cmake/utils/FindRCCL.cmake index 93d8c87446..95cb555178 100644 --- a/cmake/utils/FindRCCL.cmake +++ b/cmake/utils/FindRCCL.cmake @@ -32,7 +32,7 @@ macro(find_rccl use_rccl) find_path(RCCL_INCLUDE_DIR NAMES rccl.h) find_library(RCCL_LIBRARY NAMES rccl) else() -find_path(RCCL_INCLUDE_DIR NAMES rccl.h HINTS ${use_rccl} ${use_rccl}/include) +find_path(RCCL_INCLUDE_DIR NAMES rccl.h HINTS ${use_rccl} ${use_rccl}/include ${use_rccl}/include/rccl) find_library(RCCL_LIBRARY NAMES rccl HINTS ${use_rccl} ${use_rccl}/lib) endif() include(FindPackageHandleStandardArgs) diff --git a/src/runtime/rocm/rocm_device_api.cc b/src/runtime/rocm/rocm_device_api.cc index e2a5048ca0..c37e9fada5 100644 --- a/src/runtime/rocm/rocm_device_api.cc +++ b/src/runtime/rocm/rocm_device_api.cc @@ -113,7 +113,7 @@ class ROCMDeviceAPI final : public DeviceAPI { case kGcnArch: { hipDeviceProp_t prop; ROCM_CALL(hipGetDeviceProperties(&prop, device.device_id)); -*rv = prop.gcnArch; +*rv = prop.gcnArchName; return; } case kApiVersion: {
(tvm) branch main updated: [WebGPU] Fix unexpected device lost error when intentional dispose (#17250)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 1fcb62023f [WebGPU] Fix unexpected device lost error when intentional dispose (#17250) 1fcb62023f is described below commit 1fcb62023f0a5f878abd5b43ec9e547933fb5fab Author: Charlie Ruan <53290280+charliefr...@users.noreply.github.com> AuthorDate: Thu Aug 8 08:39:43 2024 -0400 [WebGPU] Fix unexpected device lost error when intentional dispose (#17250) --- web/src/runtime.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/web/src/runtime.ts b/web/src/runtime.ts index d71c98e7d1..e446c4dc4d 100644 --- a/web/src/runtime.ts +++ b/web/src/runtime.ts @@ -1122,7 +1122,7 @@ export class Instance implements Disposable { // ctx release goes back into lib. this.ctx.dispose(); this.lib.dispose(); -this.deviceLostIsError = true; +// Cannot set deviceLostIsError back to true here because GPUDevice.destroy() is asynchronous. } /** @@ -2122,6 +2122,7 @@ export class Instance implements Disposable { this.dispose(); } }); +this.deviceLostIsError = true; const webGPUContext = new WebGPUContext( this.memory, device
(tvm) branch revert-16183-ffi_boxed_primitives_for_runtime created (now cc858c3f11)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch revert-16183-ffi_boxed_primitives_for_runtime in repository https://gitbox.apache.org/repos/asf/tvm.git at cc858c3f11 Revert "[FFI][RUNTIME] Introduce runtime boxed types for int/float/bool (#16183)" This branch includes the following new commits: new cc858c3f11 Revert "[FFI][RUNTIME] Introduce runtime boxed types for int/float/bool (#16183)" The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference.
(tvm) branch main updated: [Relax] Remove segfault in R.call_tir_inplace validation (#17242)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 591cf1ec42 [Relax] Remove segfault in R.call_tir_inplace validation (#17242) 591cf1ec42 is described below commit 591cf1ec4281872b97449fdd0da56ff255c9f383 Author: Eric Lunderberg AuthorDate: Tue Aug 6 07:03:37 2024 -0500 [Relax] Remove segfault in R.call_tir_inplace validation (#17242) Prior to this commit, the error message produced when validating `R.call_tir_inplace` included the shape of the argument that will be mutated in-place. This correctly caught and raised an error when the argument is a tensor with known shape that is incompatible with the output tensor's shape. However, this same error message could be also be reached if the input does not have `TensorStructInfo` at all, which would trigger a segfault. This commit updates the validation to print the argument's `StructInfo` directly, rather than a field from the struct info. This correctly raises an error for the cases where the argument is not a tensor, or is a tensor with unknown dimensionality, while still printing the explicit shape of the mismatched tensor when avalable. --- src/relax/op/op.cc | 80 +++--- tests/python/relax/test_transform.py | 197 +-- 2 files changed, 202 insertions(+), 75 deletions(-) diff --git a/src/relax/op/op.cc b/src/relax/op/op.cc index 77cf4a2c6f..0a840248ff 100644 --- a/src/relax/op/op.cc +++ b/src/relax/op/op.cc @@ -419,13 +419,19 @@ Expr NormalizeCallTIRInPlace(const BlockBuilder& ctx, Call call) { // may result in an error if performed before normalization. call = Downcast(NormalizeCallTIR(ctx, std::move(call))); + Array sinfo_outputs = [&]() -> Array { +auto out_sinfo = call->sinfo_args[0]; +if (auto* tuple_output = out_sinfo.as()) { + return tuple_output->fields; +} else { + return {out_sinfo}; +} + }(); + // there must be an inplace index for each output const auto* attrs = call->attrs.as(); - size_t num_outputs = 1U; - if (auto* tup_info = call->sinfo_args[0].as()) { -num_outputs = tup_info->fields.size(); - } - if (attrs->inplace_indices.size() != num_outputs) { + ICHECK(attrs); + if (attrs->inplace_indices.size() != sinfo_outputs.size()) { ctx->ReportFatal(Diagnostic::Error(call) << "There must be an in-place index specified for each output"); } @@ -459,45 +465,37 @@ Expr NormalizeCallTIRInPlace(const BlockBuilder& ctx, Call call) { // input shape // TODO(@slyubomirsky): eventually we will want to handle cases where that is not true Tuple call_args = Downcast(call->args[1]); - if (attrs->inplace_indices.size() == 1) { -auto* out_sinfo = call->sinfo_args[0].as(); -if (!out_sinfo) { - ctx->ReportFatal(Diagnostic::Error(call) << "The output struct info must be a tensor"); + + for (size_t i_output = 0; i_output < attrs->inplace_indices.size(); i_output++) { +auto i_input = attrs->inplace_indices[i_output].IntValue(); +if (i_input == -1) { + continue; } -auto* input_sinfo = GetStructInfoAs( -call_args->fields[attrs->inplace_indices[0].IntValue()]); -if (!input_sinfo || !input_sinfo->shape.defined() || -!CanProveShapeEqual(input_sinfo->shape.value(), out_sinfo->shape.value(), -ctx->GetAnalyzer())) { + +auto sinfo_output = sinfo_outputs[i_output]; +auto tinfo_output = sinfo_output.as(); + +if (!tinfo_output || !tinfo_output->shape.defined() || tinfo_output->IsUnknownDtype()) { ctx->ReportFatal(Diagnostic::Error(call) - << "The shape of output 0 must match input " - << attrs->inplace_indices[0].IntValue() << ", whereas we have " - << out_sinfo->shape.value() << " in output 0 versus " - << input_sinfo->shape.value() << " in input " - << attrs->inplace_indices[0].IntValue()); + << "The output struct info for an in-place mutation must be a tensor " + << "with a defined shape and dtype, " + << "but output " << i_output << " has struct info " << sinfo_output); } - } else { -auto out_sinfos = call->sinfo_args[0].as()->fields; -for (size_t i = 0; i < attrs->inplace_indices.size(); i++) { - if (attrs->inplace_indices[i].IntValue() =
(tvm) branch main updated: [Unity][Frontend] Add Sqrt Op (#17228)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 5a67a00bcb [Unity][Frontend] Add Sqrt Op (#17228) 5a67a00bcb is described below commit 5a67a00bcbb53731bbf53db7801fa16c8c9eb9f2 Author: Shushi Hong <820958...@qq.com> AuthorDate: Mon Aug 5 21:17:48 2024 +0800 [Unity][Frontend] Add Sqrt Op (#17228) * Update op.py * Update test_frontend_nn_op.py * Update op.py with annotation * Update core.py(typo in annotation) --- python/tvm/relax/frontend/nn/core.py | 2 +- python/tvm/relax/frontend/nn/op.py| 22 ++ tests/python/relax/test_frontend_nn_op.py | 6 -- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/python/tvm/relax/frontend/nn/core.py b/python/tvm/relax/frontend/nn/core.py index 3511c38a2b..21118b1cb8 100644 --- a/python/tvm/relax/frontend/nn/core.py +++ b/python/tvm/relax/frontend/nn/core.py @@ -17,7 +17,7 @@ """The core infra for nn.Module, which includes the following pieces: - Tensor, a wrapper on top of relax.Expr whose struct_info is a TensorStructInfo, providing more convenient access shape and dtype information. - Tensor is always symbolc and not bound to any concrete values. + Tensor is always symbolic and not bound to any concrete values. - Parameter, a special tensor which could be bound or not bound to concrete values. - Module, a container of nn.Parameters and sub nn.Modules. - Effect, a non-user-facing class that encloses potential side effects, for example, IO, diff --git a/python/tvm/relax/frontend/nn/op.py b/python/tvm/relax/frontend/nn/op.py index e1ba4483c7..17a40a8cce 100644 --- a/python/tvm/relax/frontend/nn/op.py +++ b/python/tvm/relax/frontend/nn/op.py @@ -1486,6 +1486,28 @@ def square(x: Tensor, name: str = "square") -> Tensor: return wrap_nested(_op.square(x._expr), name) +def sqrt(x: Tensor, name: str = "sqrt") -> Tensor: +"""Computes the element-wise sqrt of the input tensor. + +Parameters +-- +x : Tensor +The input tensor. + +name : str +Name hint. + +Returns +--- +result : Tensor +The computed result. +Note + +The input tensor is required to have float dtype +""" +return wrap_nested(_op.sqrt(x._expr), name) + + def get_timestep_embedding( x: Tensor, embedding_dim: int, diff --git a/tests/python/relax/test_frontend_nn_op.py b/tests/python/relax/test_frontend_nn_op.py index a632a86743..6c32691954 100644 --- a/tests/python/relax/test_frontend_nn_op.py +++ b/tests/python/relax/test_frontend_nn_op.py @@ -31,7 +31,8 @@ def test_unary(): class Model(Module): def test(self, x: Tensor): z0 = op.square(x) -return (x,) +z1 = op.sqrt(x) +return (z0, z1) # fmt: off @R.function @@ -39,7 +40,8 @@ def test_unary(): R.func_attr({"num_input": 2}) with R.dataflow(): square: R.Tensor((1, 10), dtype="float32") = R.square(x) -gv1 = (x,), (_io,) +sqrt: R.Tensor((1, 10), dtype="float32") = R.sqrt(x) +gv1 = (square, sqrt), (_io,) R.output(gv1) return gv1 # fmt: on
(tvm) branch main updated: [Runtime] Reorganize PagedKVCache attn kernel invocation (#17237)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new cd09ab64b5 [Runtime] Reorganize PagedKVCache attn kernel invocation (#17237) cd09ab64b5 is described below commit cd09ab64b5ccf6ff0a96d887a968acd4602188a8 Author: Ruihang Lai AuthorDate: Sat Aug 3 20:01:01 2024 -0400 [Runtime] Reorganize PagedKVCache attn kernel invocation (#17237) This PR reorganizes the attention kernel invocation logic in the PagedKVCache, so that in cases of sequence fork, we can effectively merge one ragged-prefill kernel and a decode kernel into a single decode kernel. --- src/relax/transform/fuse_ops.cc| 2 +- src/runtime/relax_vm/paged_kv_cache.cc | 127 + 2 files changed, 65 insertions(+), 64 deletions(-) diff --git a/src/relax/transform/fuse_ops.cc b/src/relax/transform/fuse_ops.cc index e791aeab06..85c739e083 100644 --- a/src/relax/transform/fuse_ops.cc +++ b/src/relax/transform/fuse_ops.cc @@ -646,7 +646,7 @@ class FunctionCreator : public ExprMutator { return tvm::tir::UndefinedVars(prim_value->value).empty(); } else if (const auto* shape_expr = expr.as()) { return std::all_of(shape_expr->values.begin(), shape_expr->values.end(), - [this](const PrimExpr& e) { return tvm::tir::UndefinedVars(e).empty(); }); + [](const PrimExpr& e) { return tvm::tir::UndefinedVars(e).empty(); }); } return false; } diff --git a/src/runtime/relax_vm/paged_kv_cache.cc b/src/runtime/relax_vm/paged_kv_cache.cc index 5aa1411ec1..cf5de97202 100644 --- a/src/runtime/relax_vm/paged_kv_cache.cc +++ b/src/runtime/relax_vm/paged_kv_cache.cc @@ -1535,7 +1535,7 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { CHECK_EQ(chunked_block_ids_arr[num_depths_ - 1].size(), cur_batch_size_); } -append_before_attn_ = !support_sliding_window_ && num_depths_ == 1 && use_decode_kernel_[0]; +append_before_attn_ = !support_sliding_window_ && use_decode_kernel_.back(); if (NeedKernelBeginForward() && num_qo_heads_ / num_kv_heads_ >= 4) { // When GQA group size is at least 4 and FlashInfer is enabled, // we always use prefill kernel for better performance. @@ -2220,39 +2220,33 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { return; } -if (append_before_attn_) { - if (!support_sliding_window_) { +if (!append_before_attn_) { + if (is_chain_) { +f_attention_prefill_ragged_begin_forward_.value()( +temp_attn_workspace_[0], cur_append_lengths_indptr_host_.as_ndarray(), +cur_append_lengths_indptr_host_.as_ndarray(), cur_batch_size_, num_qo_heads_, +num_kv_heads_, head_dim_, copy_stream_); + } else { +LOG(FATAL) << "Kernel BeginForward doesn't support tree attn."; + } +} +for (int d = 0; d < num_depths_; ++d) { + if (page_indices_on_depths_view_[d]->shape[0] == 0) { +continue; + } + CHECK(!support_sliding_window_) << "Kernel BeginForward doesn't support sliding window."; + if (use_decode_kernel_[d]) { f_attention_decode_begin_forward_.value()( -/*depth=*/0, temp_attn_workspace_[1], page_indptr_on_depths_host_[0].as_ndarray(), -last_page_len_on_depths_host_[0].as_ndarray(), num_qo_heads_, num_kv_heads_, head_dim_, +d, temp_attn_workspace_[d + 1], page_indptr_on_depths_host_[d].as_ndarray(), +last_page_len_on_depths_host_[d].as_ndarray(), num_qo_heads_, num_kv_heads_, head_dim_, page_size_, /*rotary_mode=*/rope_mode_ == RoPEMode::kInline, copy_stream_); - } -} else { - f_attention_prefill_ragged_begin_forward_.value()( - temp_attn_workspace_[0], cur_append_lengths_indptr_host_.as_ndarray(), - cur_append_lengths_indptr_host_.as_ndarray(), cur_batch_size_, num_qo_heads_, - num_kv_heads_, head_dim_, copy_stream_); - if (support_sliding_window_) { -return; - } - for (int d = 0; d < num_depths_; ++d) { -if (page_indices_on_depths_view_[d]->shape[0] == 0) { - continue; -} -if (use_decode_kernel_[d]) { - f_attention_decode_begin_forward_.value()( - d, temp_attn_workspace_[d + 1], page_indptr_on_depths_host_[d].as_ndarray(), - last_page_len_on_depths_host_[d].as_ndarray(), num_qo_heads_, num_kv_heads_, - head_dim_, page_size_, - /*rotary_mode=*/rope_mode_ == RoPEMode::kInline, copy_stream_); -} else { - f_attention_prefill_begin_forward_.
(tvm) branch main updated: [Bugfix][Cutlass] fix cutlass instantiate attention template bugs (#17229)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 21c12fb124 [Bugfix][Cutlass] fix cutlass instantiate attention template bugs (#17229) 21c12fb124 is described below commit 21c12fb1243a79df2aea8b83956c6b0b914cf4a5 Author: senlyu163 <70838408+senlyu...@users.noreply.github.com> AuthorDate: Sat Aug 3 20:45:36 2024 +0800 [Bugfix][Cutlass] fix cutlass instantiate attention template bugs (#17229) [Bugfix][Cutlass] fix cutlass attention template --- python/tvm/contrib/cutlass/attention_operation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/tvm/contrib/cutlass/attention_operation.py b/python/tvm/contrib/cutlass/attention_operation.py index 518778ec52..69298453cb 100644 --- a/python/tvm/contrib/cutlass/attention_operation.py +++ b/python/tvm/contrib/cutlass/attention_operation.py @@ -111,7 +111,7 @@ def instantiate_attention_template(attrs): if (accumulator_buf_size <= ${workspace}->shape[0]) { p.output_accum_ptr = static_cast(${workspace}->data); } else { -accumulator_buf_size = true; +accumulator_buf_allocated = true; cudaMalloc( &p.output_accum_ptr, accumulator_buf_size
(tvm) branch main updated: [3rdparty] Bump FlashInfer (#17236)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 76b954a09e [3rdparty] Bump FlashInfer (#17236) 76b954a09e is described below commit 76b954a09e781b7f664b1d345e1494123c19484c Author: Ruihang Lai AuthorDate: Sat Aug 3 04:28:02 2024 -0400 [3rdparty] Bump FlashInfer (#17236) This PR bumps FlashInfer and updates PagedKVCache accordingly for performance improvement. Some notes on this bump: * When the Grouped-Query Attention group size is at least 4 and FlashInfer is enabled, we use the prefill attn kernel for better performance. * We enlarge the temporary workspace for FlashInfer use accordingly, as FlashInfer in the current version may consume much larger workspace. We turn off the workspace when FlashInfer is not enabled. * We reduce the max block depth to be 2, in observation of the limited help of cascade inference when batch size is not large and the prompt reuse is low. --- 3rdparty/flashinfer| 2 +- src/runtime/relax_vm/paged_kv_cache.cc | 48 +++--- ..._builtin_paged_attention_kv_cache_flashinfer.py | 13 +- ...runtime_builtin_paged_attention_kv_cache_tir.py | 13 +- 4 files changed, 58 insertions(+), 18 deletions(-) diff --git a/3rdparty/flashinfer b/3rdparty/flashinfer index 7e9cc7ff42..0dd801d202 16 --- a/3rdparty/flashinfer +++ b/3rdparty/flashinfer @@ -1 +1 @@ -Subproject commit 7e9cc7ff42ca283c317061a877305d09a395fad2 +Subproject commit 0dd801d2027af89f3603cbbf68a76e9503bb2f57 diff --git a/src/runtime/relax_vm/paged_kv_cache.cc b/src/runtime/relax_vm/paged_kv_cache.cc index 2fb8a72f42..5aa1411ec1 100644 --- a/src/runtime/relax_vm/paged_kv_cache.cc +++ b/src/runtime/relax_vm/paged_kv_cache.cc @@ -54,11 +54,11 @@ namespace relax_vm { * \brief The maximum allowed block depth (a.k.a. number of common * prefixes) in paged KV cache. */ -constexpr const int kPagedKVCacheMaxBlockDepth = 5; +constexpr const int kPagedKVCacheMaxBlockDepth = 2; /*! \brief The maximum tree size of a single sequence in tree attention. */ constexpr const int kTreeAttnMaxTreeSize = 256; /*! \brief The 8MB workspace size for attention auxiliary data. */ -constexpr const int kAttnWorkspaceByte = 8 * 1024 * 1024; +constexpr const int kAttnWorkspaceByte = 128 * 1024 * 1024; /*! \brief The id of the temporary logical page, which is useful for sliding window. */ constexpr const int kPagedKVCacheTempPageId = -1; @@ -119,6 +119,9 @@ struct Block { void Reset() { page_ids.clear(); seq_length = 0; +start_pos = 0; +sink_length = 0; +sliding_window_offset = 0; parent_idx = -1; external_ref_cnt = 0; } @@ -169,11 +172,9 @@ struct Sequence { this->last_block_idx = last_block_idx; int32_t block_ptr = last_block_idx; // Go through each block in the sequence, sum up the length. -int depth = 0; while (true) { const Block& block = global_block_pool->at(block_ptr); this->seq_length += block.seq_length; - ++depth; if (block.parent_idx == -1) { break; } @@ -1078,8 +1079,10 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { dtype_aux_, preferred_host_device); for (int d = 0; d < kPagedKVCacheMaxBlockDepth; ++d) { - temp_attn_workspace_.push_back( - NDArray::Empty({kAttnWorkspaceByte / 4}, DataType::Float(32), device)); + if (NeedKernelBeginForward()) { +temp_attn_workspace_.push_back( +NDArray::Empty({kAttnWorkspaceByte / 4}, DataType::Float(32), device)); + } qo_indptr_on_depths_view_.push_back(NDArray()); page_indptr_on_depths_view_.push_back(NDArray()); page_indices_on_depths_view_.push_back(NDArray()); @@ -1087,8 +1090,10 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { k_rope_pos_offset_view_.push_back(NDArray()); } // Additional workspace for the "prefill with ragged kv" kernel. -temp_attn_workspace_.push_back( -NDArray::Empty({kAttnWorkspaceByte / 4}, DataType::Float(32), device)); +if (NeedKernelBeginForward()) { + temp_attn_workspace_.push_back( + NDArray::Empty({kAttnWorkspaceByte / 4}, DataType::Float(32), device)); +} temp_attn_q_device_ = NDArray::Empty({prefill_chunk_size_, num_qo_heads, head_dim}, dtype, device); @@ -1531,6 +1536,12 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj { } append_before_attn_ = !support_sliding_window_ && num_depths_ == 1 && use_decode_kernel_[0]; +if (NeedKernelBeginForward() && num_qo_heads_ / num_kv_heads_ >= 4) { + // When GQA group size is at least 4 and FlashInfer i
(tvm) branch main updated (3a02309ed8 -> 219ae85d4b)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git from 3a02309ed8 [Relax] FuseTransposeMatmul Pass (#17234) add 219ae85d4b [Runtime Patch] Add AbortSignal to fetchWithCache in ArtifactCacheTemplate interface (#17233) No new revisions were added by this update. Summary of changes: web/src/artifact_cache.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-)
(tvm) branch main updated: [Relax] FuseTransposeMatmul Pass (#17234)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 3a02309ed8 [Relax] FuseTransposeMatmul Pass (#17234) 3a02309ed8 is described below commit 3a02309ed85d308da1b1af127bc97b5b22589a43 Author: Siyuan Feng AuthorDate: Fri Aug 2 22:14:32 2024 +0800 [Relax] FuseTransposeMatmul Pass (#17234) Introduce a new pass to fuse transpose and matmul, which specially for `Linear` ops in PyTorch and NNModule. Note that this pass is migrated from MLC-LLM. Co-authored-by: Ruihang Lai Co-authored-by: Junru Shao --- python/tvm/relax/transform/__init__.py | 1 + .../tvm/relax/transform/fuse_transpose_matmul.py | 175 + .../relax/test_transform_fuse_transpose_matmul.py | 82 ++ 3 files changed, 258 insertions(+) diff --git a/python/tvm/relax/transform/__init__.py b/python/tvm/relax/transform/__init__.py index 5e76fff6bd..5789e2fcf2 100644 --- a/python/tvm/relax/transform/__init__.py +++ b/python/tvm/relax/transform/__init__.py @@ -90,6 +90,7 @@ from .lower_gpu_ipc_alloc_storage import LowerGPUIPCAllocStorage from .optimize_layout_transform import OptimizeLayoutTransform from .remove_redundant_reshape import RemoveRedundantReshape from .fast_math import FastMathTransform +from .fuse_transpose_matmul import FuseTransposeMatmul from .attach_external_modules import AttachExternModules # Import to register the legalization functions. diff --git a/python/tvm/relax/transform/fuse_transpose_matmul.py b/python/tvm/relax/transform/fuse_transpose_matmul.py new file mode 100644 index 00..1d2324a28b --- /dev/null +++ b/python/tvm/relax/transform/fuse_transpose_matmul.py @@ -0,0 +1,175 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""A compiler pass that fuses transpose + matmul and generate TIR function. +Note that +1. Please put the pass before LegalizeOps pass. +2. The pass only works for XW^T but not X^TW +3. The pass would rewrite the relax ops into TIR functions. If you'd like to dispatch the + ops into library (e.g. cuBLAS) calls, please run dispatch pass before this pass. +""" + +import tvm +from tvm import IRModule, relax, te, tir +from tvm.relax.dpl.pattern import is_op, wildcard +from tvm.relax.expr_functor import PyExprMutator, mutator + + +@tvm.transform.module_pass(opt_level=0, name="FuseTransposeMatmul") +class FuseTransposeMatmul: # pylint: disable=too-few-public-methods +"""A compiler pass that fuses transpose + matmul.""" + +def transform_module(self, mod: IRModule, _ctx: tvm.transform.PassContext) -> IRModule: +"""IRModule-level transformation""" +mod = relax.transform.FuseOpsByPattern( +[ +( +"transpose_matmul_fuse", +*_pattern(), +), +] +)(mod) +transpose_matmul_codegen = _TransposeMatmulFuser(mod) +for g_var, func in mod.functions_items(): +if isinstance(func, relax.Function): +func = transpose_matmul_codegen.visit_expr(func) +transpose_matmul_codegen.builder_.update_func(g_var, func) +return transpose_matmul_codegen.builder_.get() + + +def _pattern(): +"""Pattern for transpose + matmul.""" +# pylint: disable=invalid-name +w = wildcard() +x = wildcard() +wT = is_op("relax.permute_dims")(w) +o = is_op("relax.matmul")(x, wT) +# pylint: enable=invalid-name +annotations = {"o": o, "w": w, "x": x, "wT": wT} + +def _check(context: relax.transform.PatternCheckContext) -> bool: +transpose_call = context.annotated_expr["wT"] +ndim = transpose_call.args[0].struct_info.ndim +if ndim == -1: +return False +if ndim == 2 and transpose_call.attrs.axes is None: +
(tvm) branch main updated: [Runtime] Allow aborting fetchWithCache through AbortSignal (#17227)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 031f0475be [Runtime] Allow aborting fetchWithCache through AbortSignal (#17227) 031f0475be is described below commit 031f0475bea40f6dfb07c7d53e7078edfcbd300d Author: Nestor Qin AuthorDate: Thu Aug 1 11:42:49 2024 -0400 [Runtime] Allow aborting fetchWithCache through AbortSignal (#17227) [Runtime] Add AbortSignal to fetchWithCache() --- web/src/artifact_cache.ts | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/web/src/artifact_cache.ts b/web/src/artifact_cache.ts index 9690ed3320..794efdcedb 100644 --- a/web/src/artifact_cache.ts +++ b/web/src/artifact_cache.ts @@ -114,10 +114,11 @@ export class ArtifactCache implements ArtifactCacheTemplate { * fetch the corresponding url object in response or stored object format * @param url url * @param storetype the storage type for indexedDB + * @param signal an optional abort signal to abort fetching * @returns response in json, arraybuffer or pure response format */ - async fetchWithCache(url: string, storetype?: string): Promise { -await this.addToCache(url, storetype); + async fetchWithCache(url: string, storetype?: string, signal?: AbortSignal): Promise { +await this.addToCache(url, storetype, signal); const result = await this.cache.match(new Request(url)); if (result === undefined) { // Already called `addToCache()`, should expect the request in cache. @@ -242,8 +243,8 @@ export class ArtifactIndexedDBCache implements ArtifactCacheTemplate { }) } - async fetchWithCache(url: string, storetype?: string): Promise { -await this.addToCache(url, storetype); + async fetchWithCache(url: string, storetype?: string, signal?: AbortSignal): Promise { +await this.addToCache(url, storetype, signal); let result = await this.asyncGetHelper(url); if (result === null) { // previously null data in cache or somehow failed to add to cache, delete and retry