[GitHub] [tvm] Meteorix commented on a change in pull request #7147: [CUDA][PASS]Legalize tensorcore
Meteorix commented on a change in pull request #7147: URL: https://github.com/apache/tvm/pull/7147#discussion_r547098688 ## File path: python/tvm/topi/nn/batch_matmul.py ## @@ -59,3 +60,25 @@ def batch_matmul(x, y, oshape=None): lambda b, i, j: te.sum(x[b if XB != 1 else 0, i, k] * y[b if YB != 1 else 0, j, k], axis=k), tag="batch_matmul", ) + + +@tvm.target.generic_func +def batch_matmul_legalize(attrs, inputs, types): +"""Legalizes Conv2D op. Review comment: thanks, fixed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] kongroo commented on a change in pull request #7147: [CUDA][PASS]Legalize tensorcore
kongroo commented on a change in pull request #7147: URL: https://github.com/apache/tvm/pull/7147#discussion_r547096507 ## File path: python/tvm/topi/nn/batch_matmul.py ## @@ -59,3 +60,25 @@ def batch_matmul(x, y, oshape=None): lambda b, i, j: te.sum(x[b if XB != 1 else 0, i, k] * y[b if YB != 1 else 0, j, k], axis=k), tag="batch_matmul", ) + + +@tvm.target.generic_func +def batch_matmul_legalize(attrs, inputs, types): +"""Legalizes Conv2D op. Review comment: typo, should be batch_matmul This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] Meteorix commented on a change in pull request #7146: [CUDA]batch_matmul tensorcore schedule
Meteorix commented on a change in pull request #7146: URL: https://github.com/apache/tvm/pull/7146#discussion_r547093078 ## File path: python/tvm/topi/cuda/batch_matmul_tensorcore.py ## @@ -0,0 +1,274 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name,too-many-locals,unused-variable,unused-argument +"""cuda batch_matmul operators""" +import tvm +from tvm import autotvm +from tvm import te +from ..utils import traverse_inline, get_const_tuple +from .tensor_intrin import intrin_wmma_load_matrix_A, \ +intrin_wmma_load_matrix_W, intrin_wmma_store_matrix, intrin_wmma_gemm + +@autotvm.register_topi_compute("batch_matmul_tensorcore.cuda") +def batch_matmul_tensorcore(cfg, x, y, out_shape=None): +"""batch matmul tensorcore operator on cuda""" +# todo: deal with out_shape for broadcast, liuxin.ai +return batch_matmul_tensorcore_cuda(x, y) + + +@autotvm.register_topi_schedule("batch_matmul_tensorcore.cuda") +def schedule_batch_matmul_tensorcore(cfg, outs): +"""Schedule for batch_matmul operator using Tensorcore + +Parameters +-- +outs: Array of Tensor + The computation graph description of batch_matmul + in the format of an array of tensors. + +Returns +--- +s: Schedule +The computation schedule for the op. +""" +outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs +s = te.create_schedule([x.op for x in outs]) + +def _schedule(cfg, s, C): +A, B = s[C].op.input_tensors +batch, m_dim, k_dim = get_const_tuple(A.shape) +batch, n_dim, k_dim = get_const_tuple(B.shape) +out_dtype = C.dtype +# inline astype fp16 +s[A].compute_inline() +s[B].compute_inline() + +# Explicit memory access +AS = s.cache_read(A, 'shared', [C]) +BS = s.cache_read(B, 'shared', [C]) +AF = s.cache_read(AS, 'wmma.matrix_a', [C]) +BF = s.cache_read(BS, 'wmma.matrix_b', [C]) +CF = s.cache_write(C, 'wmma.accumulator') +CS = s.cache_read(CF, 'shared', [C]) + +# fallback support +target = tvm.target.Target.current() +if cfg.is_fallback: +ref_log = autotvm.tophub.load_reference_log( +target.kind.name, target.model, 'batch_matmul_tensorcore.cuda') +cfg.fallback_with_reference_log(ref_log) + +# Deal with op fusion, such as bias/relu and slice after padding +if C.op not in s.outputs and "injective" in s.outputs[0].tag: +s[C].compute_inline() +C = s.outputs[0].output(0) + +# create tuning space +cfg.define_knob("block_row_warps", [1, 2, 4]) +cfg.define_knob("block_col_warps", [1, 2, 4]) +cfg.define_knob("warp_row_tiles", [1, 2, 4]) +cfg.define_knob("warp_col_tiles", [1, 2, 4]) +cfg.define_knob("chunk", [1, 2, 4, 8]) +cfg.define_knob("offset", [0, 8]) +cfg.define_knob("offsetCS", [0, 8]) +cfg.define_knob("vec", [1, 2, 4, 8]) + +# Ensure that the default parameters are applicable when autotvm is not in use +if (m_dim % 32 == 0 and n_dim % 8 == 0): +cfg.define_knob("wmma_m", [32, 16, 8]) +elif (m_dim % 16 == 0 and n_dim % 16 == 0): +cfg.define_knob("wmma_m", [16, 8, 32]) +elif (m_dim % 8 == 0 and n_dim % 32 == 0): +cfg.define_knob("wmma_m", [8, 16, 32]) + +warp_size = 32 +wmma_k = 16 +block_row_warps = cfg["block_row_warps"].val +block_col_warps = cfg["block_col_warps"].val +warp_row_tiles = cfg["warp_row_tiles"].val +warp_col_tiles = cfg["warp_col_tiles"].val +chunk = cfg["chunk"].val +offset = cfg["offset"].val +offsetCS = cfg["offsetCS"].val +wmma_m = cfg["wmma_m"].val +vec = cfg["vec"].val + +if wmma_m == 16: +wmma_n = 16 +elif wmma_m == 8: +wmma_n = 32 +elif wmma_m == 32: +wmma_n = 8 + +# Define the stride of intrin functions +AS_align = chunk * wmma_k + offset +BS_align = chunk * wmma_k + offset +CS_align = warp_col_tiles *
[GitHub] [tvm] Meteorix commented on a change in pull request #7146: [CUDA]batch_matmul tensorcore schedule
Meteorix commented on a change in pull request #7146: URL: https://github.com/apache/tvm/pull/7146#discussion_r547091793 ## File path: python/tvm/relay/op/strategy/cuda.py ## @@ -657,6 +657,20 @@ def batch_matmul_strategy_cuda(attrs, inputs, out_type, target): name="batch_matmul_cublas.cuda", plevel=15, ) +if target.kind.name == "cuda" and nvcc.have_tensorcore(tvm.gpu(0).compute_version): +x, y = inputs +B, M, K = get_const_tuple(x.shape) +B, N, K = get_const_tuple(y.shape) +# "The shape of (M, K, N) must be multiple of (16, 16, 16) or (32, 16, 8) or (8, 16, 32) for now" +if ((M % 8 == 0 and K % 16 == 0 and N % 32 == 0) or \ +(M % 16 == 0 and K % 16 == 0 and N % 16 == 0) or \ +(M % 32 == 0 and K % 16 == 0 and N % 8 == 0)): Review comment: I just kept the same with code for dense_tensorcore https://github.com/apache/tvm/blob/main/python/tvm/relay/op/strategy/cuda.py#L679 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] Meteorix commented on a change in pull request #7146: [CUDA]batch_matmul tensorcore schedule
Meteorix commented on a change in pull request #7146: URL: https://github.com/apache/tvm/pull/7146#discussion_r547091707 ## File path: python/tvm/topi/cuda/batch_matmul_tensorcore.py ## @@ -0,0 +1,274 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name,too-many-locals,unused-variable,unused-argument +"""cuda batch_matmul operators""" +import tvm +from tvm import autotvm +from tvm import te +from ..utils import traverse_inline, get_const_tuple +from .tensor_intrin import intrin_wmma_load_matrix_A, \ +intrin_wmma_load_matrix_W, intrin_wmma_store_matrix, intrin_wmma_gemm + +@autotvm.register_topi_compute("batch_matmul_tensorcore.cuda") +def batch_matmul_tensorcore(cfg, x, y, out_shape=None): +"""batch matmul tensorcore operator on cuda""" +# todo: deal with out_shape for broadcast, liuxin.ai +return batch_matmul_tensorcore_cuda(x, y) + + +@autotvm.register_topi_schedule("batch_matmul_tensorcore.cuda") +def schedule_batch_matmul_tensorcore(cfg, outs): +"""Schedule for batch_matmul operator using Tensorcore + +Parameters +-- +outs: Array of Tensor + The computation graph description of batch_matmul + in the format of an array of tensors. + +Returns +--- +s: Schedule +The computation schedule for the op. +""" +outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs +s = te.create_schedule([x.op for x in outs]) + +def _schedule(cfg, s, C): +A, B = s[C].op.input_tensors +batch, m_dim, k_dim = get_const_tuple(A.shape) +batch, n_dim, k_dim = get_const_tuple(B.shape) +out_dtype = C.dtype +# inline astype fp16 +s[A].compute_inline() +s[B].compute_inline() + +# Explicit memory access +AS = s.cache_read(A, 'shared', [C]) +BS = s.cache_read(B, 'shared', [C]) +AF = s.cache_read(AS, 'wmma.matrix_a', [C]) +BF = s.cache_read(BS, 'wmma.matrix_b', [C]) +CF = s.cache_write(C, 'wmma.accumulator') +CS = s.cache_read(CF, 'shared', [C]) + +# fallback support +target = tvm.target.Target.current() +if cfg.is_fallback: +ref_log = autotvm.tophub.load_reference_log( +target.kind.name, target.model, 'batch_matmul_tensorcore.cuda') +cfg.fallback_with_reference_log(ref_log) + +# Deal with op fusion, such as bias/relu and slice after padding +if C.op not in s.outputs and "injective" in s.outputs[0].tag: +s[C].compute_inline() +C = s.outputs[0].output(0) + +# create tuning space +cfg.define_knob("block_row_warps", [1, 2, 4]) +cfg.define_knob("block_col_warps", [1, 2, 4]) +cfg.define_knob("warp_row_tiles", [1, 2, 4]) +cfg.define_knob("warp_col_tiles", [1, 2, 4]) +cfg.define_knob("chunk", [1, 2, 4, 8]) +cfg.define_knob("offset", [0, 8]) +cfg.define_knob("offsetCS", [0, 8]) +cfg.define_knob("vec", [1, 2, 4, 8]) + +# Ensure that the default parameters are applicable when autotvm is not in use +if (m_dim % 32 == 0 and n_dim % 8 == 0): +cfg.define_knob("wmma_m", [32, 16, 8]) +elif (m_dim % 16 == 0 and n_dim % 16 == 0): +cfg.define_knob("wmma_m", [16, 8, 32]) +elif (m_dim % 8 == 0 and n_dim % 32 == 0): +cfg.define_knob("wmma_m", [8, 16, 32]) + +warp_size = 32 +wmma_k = 16 +block_row_warps = cfg["block_row_warps"].val +block_col_warps = cfg["block_col_warps"].val +warp_row_tiles = cfg["warp_row_tiles"].val +warp_col_tiles = cfg["warp_col_tiles"].val +chunk = cfg["chunk"].val +offset = cfg["offset"].val +offsetCS = cfg["offsetCS"].val +wmma_m = cfg["wmma_m"].val +vec = cfg["vec"].val + +if wmma_m == 16: +wmma_n = 16 +elif wmma_m == 8: +wmma_n = 32 +elif wmma_m == 32: +wmma_n = 8 + +# Define the stride of intrin functions +AS_align = chunk * wmma_k + offset +BS_align = chunk * wmma_k + offset +CS_align = warp_col_tiles *
[GitHub] [tvm] ANSHUMAN87 commented on a change in pull request #7107: [Tutorial] Add output validation to sparse tutorial
ANSHUMAN87 commented on a change in pull request #7107: URL: https://github.com/apache/tvm/pull/7107#discussion_r547091469 ## File path: tests/scripts/task_ci_python_setup.sh ## @@ -31,3 +31,4 @@ set -o pipefail echo "Addtiional setup in" ${CI_IMAGE_NAME} python3 -m pip install --user tlcpack-sphinx-addon==0.1.3 synr==0.2.1 +python3 -m pip install --user tokenizers==0.9.4 transformers==4.0.1 Review comment: Okay will revert this change and check the error. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] jcf94 commented on pull request #7146: [CUDA]batch_matmul tensorcore schedule
jcf94 commented on pull request #7146: URL: https://github.com/apache/tvm/pull/7146#issuecomment-749364296 @Meteorix, great thanks for your PR! The code looks good to me. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] ANSHUMAN87 opened a new pull request #7148: [Frontend][Tensorflow] Sparse_Dense Op CSR scheduling issue resolved for Cuda & X86
ANSHUMAN87 opened a new pull request #7148: URL: https://github.com/apache/tvm/pull/7148 This is a follow up PR. 1. It has resolved the issue in CSR scheduling for both Cuda & X86. 2. Also the test cases in Tensorflow frontends are enabled for the same. cc @tkonolige , @FrozenGene ! This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] jcf94 commented on a change in pull request #7146: [CUDA]batch_matmul tensorcore schedule
jcf94 commented on a change in pull request #7146: URL: https://github.com/apache/tvm/pull/7146#discussion_r547085365 ## File path: python/tvm/relay/op/strategy/cuda.py ## @@ -657,6 +657,20 @@ def batch_matmul_strategy_cuda(attrs, inputs, out_type, target): name="batch_matmul_cublas.cuda", plevel=15, ) +if target.kind.name == "cuda" and nvcc.have_tensorcore(tvm.gpu(0).compute_version): +x, y = inputs +B, M, K = get_const_tuple(x.shape) +B, N, K = get_const_tuple(y.shape) +# "The shape of (M, K, N) must be multiple of (16, 16, 16) or (32, 16, 8) or (8, 16, 32) for now" +if ((M % 8 == 0 and K % 16 == 0 and N % 32 == 0) or \ +(M % 16 == 0 and K % 16 == 0 and N % 16 == 0) or \ +(M % 32 == 0 and K % 16 == 0 and N % 8 == 0)): Review comment: Will it be better to also add data type check here or use some other user defined options? TensorCore needs to be computed in float16, but I'm not sure if this will bring any loss in precision if we just try to transform all float32 batch_matmul ops to compute in lower precision. Besides, TensorCore can also support datatype like int8 in some higher cuda versions. ## File path: python/tvm/topi/cuda/batch_matmul_tensorcore.py ## @@ -0,0 +1,274 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name,too-many-locals,unused-variable,unused-argument +"""cuda batch_matmul operators""" +import tvm +from tvm import autotvm +from tvm import te +from ..utils import traverse_inline, get_const_tuple +from .tensor_intrin import intrin_wmma_load_matrix_A, \ +intrin_wmma_load_matrix_W, intrin_wmma_store_matrix, intrin_wmma_gemm + +@autotvm.register_topi_compute("batch_matmul_tensorcore.cuda") +def batch_matmul_tensorcore(cfg, x, y, out_shape=None): +"""batch matmul tensorcore operator on cuda""" +# todo: deal with out_shape for broadcast, liuxin.ai +return batch_matmul_tensorcore_cuda(x, y) + + +@autotvm.register_topi_schedule("batch_matmul_tensorcore.cuda") +def schedule_batch_matmul_tensorcore(cfg, outs): +"""Schedule for batch_matmul operator using Tensorcore + +Parameters +-- +outs: Array of Tensor + The computation graph description of batch_matmul + in the format of an array of tensors. + +Returns +--- +s: Schedule +The computation schedule for the op. +""" +outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs +s = te.create_schedule([x.op for x in outs]) + +def _schedule(cfg, s, C): +A, B = s[C].op.input_tensors +batch, m_dim, k_dim = get_const_tuple(A.shape) +batch, n_dim, k_dim = get_const_tuple(B.shape) +out_dtype = C.dtype +# inline astype fp16 +s[A].compute_inline() +s[B].compute_inline() + +# Explicit memory access +AS = s.cache_read(A, 'shared', [C]) +BS = s.cache_read(B, 'shared', [C]) +AF = s.cache_read(AS, 'wmma.matrix_a', [C]) +BF = s.cache_read(BS, 'wmma.matrix_b', [C]) +CF = s.cache_write(C, 'wmma.accumulator') +CS = s.cache_read(CF, 'shared', [C]) + +# fallback support +target = tvm.target.Target.current() +if cfg.is_fallback: +ref_log = autotvm.tophub.load_reference_log( +target.kind.name, target.model, 'batch_matmul_tensorcore.cuda') +cfg.fallback_with_reference_log(ref_log) + +# Deal with op fusion, such as bias/relu and slice after padding +if C.op not in s.outputs and "injective" in s.outputs[0].tag: +s[C].compute_inline() +C = s.outputs[0].output(0) + +# create tuning space +cfg.define_knob("block_row_warps", [1, 2, 4]) +cfg.define_knob("block_col_warps", [1, 2, 4]) +cfg.define_knob("warp_row_tiles", [1, 2, 4]) +cfg.define_knob("warp_col_tiles", [1, 2, 4]) +cfg.define_knob("chunk", [1, 2, 4, 8]) +cfg.define_knob("offset", [0, 8]) +cfg.define_knob("offsetCS", [0, 8]) +cfg.define_knob("vec", [1, 2, 4, 8]) + +# Ensure that the default
[GitHub] [tvm] codeislife99 commented on a change in pull request #7125: Sparse reshape op
codeislife99 commented on a change in pull request #7125: URL: https://github.com/apache/tvm/pull/7125#discussion_r547087287 ## File path: python/tvm/relay/op/transform.py ## @@ -1320,3 +1320,52 @@ def adv_index(inputs): Output tensor. """ return _make.adv_index(Tuple(inputs)) + + +def sparsereshape(sparse_indices, sparse_values, prev_shape, new_shape): +""" +Reshape a Sparse Tensor Review comment: Can you explain how this convention is different from the `sparse_to_dense` operator. I could only find that operator as an example of existing representations. ## File path: python/tvm/relay/op/transform.py ## @@ -1320,3 +1320,52 @@ def adv_index(inputs): Output tensor. """ return _make.adv_index(Tuple(inputs)) + + +def sparsereshape(sparse_indices, sparse_values, prev_shape, new_shape): +""" +Reshape a Sparse Tensor Review comment: Can you explain how this convention is different from the `sparse_to_dense` operator. I could only find that operator as an example of existing representations ? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 commented on a change in pull request #7125: Sparse reshape op
codeislife99 commented on a change in pull request #7125: URL: https://github.com/apache/tvm/pull/7125#discussion_r547086641 ## File path: src/relay/op/tensor/transform.cc ## @@ -1553,6 +1553,52 @@ RELAY_REGISTER_OP("meshgrid") .set_attr("FTVMCompute", MeshgridCompute) .set_attr("TOpPattern", kInjective); +TVM_REGISTER_NODE_TYPE(SparseReshapeAttrs); + +bool SparseReshapeRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + // types: [sparse_indices, sparse_values, result] + // ICHECK_EQ(types.size(), 3); Review comment: Done. ## File path: src/relay/op/tensor/transform.cc ## @@ -1553,6 +1553,52 @@ RELAY_REGISTER_OP("meshgrid") .set_attr("FTVMCompute", MeshgridCompute) .set_attr("TOpPattern", kInjective); +TVM_REGISTER_NODE_TYPE(SparseReshapeAttrs); + +bool SparseReshapeRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + // types: [sparse_indices, sparse_values, result] + // ICHECK_EQ(types.size(), 3); + auto sparse_indices = types[0].as(); + const auto* param = attrs.as(); + CHECK(param != nullptr); Review comment: Done. ## File path: src/relay/op/tensor/transform.cc ## @@ -1553,6 +1553,52 @@ RELAY_REGISTER_OP("meshgrid") .set_attr("FTVMCompute", MeshgridCompute) .set_attr("TOpPattern", kInjective); +TVM_REGISTER_NODE_TYPE(SparseReshapeAttrs); + +bool SparseReshapeRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + // types: [sparse_indices, sparse_values, result] + // ICHECK_EQ(types.size(), 3); + auto sparse_indices = types[0].as(); + const auto* param = attrs.as(); + CHECK(param != nullptr); + Array new_sparse_indices_shape{sparse_indices->shape[0], + static_cast((param->new_shape).size())}; + reporter->Assign(types[2], TensorType(new_sparse_indices_shape, sparse_indices->dtype)); + return true; +} + +Array SparseReshapeCompute(const Attrs& attrs, const Array& inputs, + const Type& out_type) { + // ICHECK_EQ(inputs.size(), 2); Review comment: Done. ## File path: src/relay/op/tensor/transform.cc ## @@ -1553,6 +1553,52 @@ RELAY_REGISTER_OP("meshgrid") .set_attr("FTVMCompute", MeshgridCompute) .set_attr("TOpPattern", kInjective); +TVM_REGISTER_NODE_TYPE(SparseReshapeAttrs); + +bool SparseReshapeRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + // types: [sparse_indices, sparse_values, result] + // ICHECK_EQ(types.size(), 3); + auto sparse_indices = types[0].as(); + const auto* param = attrs.as(); + CHECK(param != nullptr); + Array new_sparse_indices_shape{sparse_indices->shape[0], + static_cast((param->new_shape).size())}; + reporter->Assign(types[2], TensorType(new_sparse_indices_shape, sparse_indices->dtype)); + return true; +} + +Array SparseReshapeCompute(const Attrs& attrs, const Array& inputs, + const Type& out_type) { + // ICHECK_EQ(inputs.size(), 2); + const auto* param = attrs.as(); + CHECK(param != nullptr); Review comment: Done. ## File path: src/relay/op/tensor/transform.cc ## @@ -1553,6 +1553,52 @@ RELAY_REGISTER_OP("meshgrid") .set_attr("FTVMCompute", MeshgridCompute) .set_attr("TOpPattern", kInjective); +TVM_REGISTER_NODE_TYPE(SparseReshapeAttrs); + +bool SparseReshapeRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + // types: [sparse_indices, sparse_values, result] + // ICHECK_EQ(types.size(), 3); + auto sparse_indices = types[0].as(); + const auto* param = attrs.as(); + CHECK(param != nullptr); + Array new_sparse_indices_shape{sparse_indices->shape[0], + static_cast((param->new_shape).size())}; + reporter->Assign(types[2], TensorType(new_sparse_indices_shape, sparse_indices->dtype)); + return true; +} + +Array SparseReshapeCompute(const Attrs& attrs, const Array& inputs, + const Type& out_type) { + // ICHECK_EQ(inputs.size(), 2); + const auto* param = attrs.as(); + CHECK(param != nullptr); + return {topi::SparseReshape(inputs[0], inputs[1], param->prev_shape, param->new_shape)}; +} + +Expr MakeSparseReshape(Expr sparse_indices, Expr sparse_values, Array prev_shape, + Array new_shape) { + auto attrs = make_object(); + attrs->prev_shape = std::move(prev_shape); + attrs->new_shape = std::move(new_shape); + static const Op& op = Op::Get("sparsereshape"); + return Call(op, {sparse_indices, sparse_values}, Attrs(attrs), {}); +} +
[GitHub] [tvm] Meteorix commented on a change in pull request #7146: [CUDA]batch_matmul tensorcore schedule
Meteorix commented on a change in pull request #7146: URL: https://github.com/apache/tvm/pull/7146#discussion_r547086764 ## File path: python/tvm/topi/cuda/batch_matmul_tensorcore.py ## @@ -0,0 +1,275 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name,too-many-locals,unused-variable,unused-argument +"""cuda batch_matmul operators""" +import tvm +from tvm import autotvm +from tvm import te +from ..utils import traverse_inline, get_const_tuple +from .tensor_intrin import intrin_wmma_load_matrix_A, \ +intrin_wmma_load_matrix_W, intrin_wmma_store_matrix, intrin_wmma_gemm + +@autotvm.register_topi_compute("batch_matmul_tensorcore.cuda") +def batch_matmul_tensorcore(cfg, x, y, out_shape=None): +"""batch matmul tensorcore operator on cuda""" +# todo: deal with out_shape for broadcast, liuxin.ai +return batch_matmul_tensorcore_cuda(x, y) + + +@autotvm.register_topi_schedule("batch_matmul_tensorcore.cuda") +def schedule_batch_matmul_tensorcore(cfg, outs): +"""Schedule for batch_matmul operator using Tensorcore + +Parameters +-- +outs: Array of Tensor + The computation graph description of batch_matmul + in the format of an array of tensors. + +Returns +--- +s: Schedule +The computation schedule for the op. +""" +outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs +s = te.create_schedule([x.op for x in outs]) + +def _schedule(cfg, s, C): +A, B = s[C].op.input_tensors +batch, m_dim, k_dim = get_const_tuple(A.shape) +batch, n_dim, k_dim = get_const_tuple(B.shape) +out_dtype = C.dtype +# inline astype fp16 +s[A].compute_inline() +s[B].compute_inline() + +# Explicit memory access +AS = s.cache_read(A, 'shared', [C]) +BS = s.cache_read(B, 'shared', [C]) +AF = s.cache_read(AS, 'wmma.matrix_a', [C]) +BF = s.cache_read(BS, 'wmma.matrix_b', [C]) +CF = s.cache_write(C, 'wmma.accumulator') +CS = s.cache_read(CF, 'shared', [C]) + +# fallback support +target = tvm.target.Target.current() +if cfg.is_fallback: +ref_log = autotvm.tophub.load_reference_log( +target.kind.name, target.model, 'batch_matmul_tensorcore.cuda') +cfg.fallback_with_reference_log(ref_log) + +# ??? Deal with op fusion, such as bias and relu ??? is this needed? Review comment: fixed This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 commented on a change in pull request #7125: Sparse reshape op
codeislife99 commented on a change in pull request #7125: URL: https://github.com/apache/tvm/pull/7125#discussion_r547086590 ## File path: python/tvm/relay/op/_transform.py ## @@ -63,6 +63,8 @@ _reg.register_injective_schedule("sparse_to_dense") _reg.register_injective_schedule("matrix_set_diag") _reg.register_injective_schedule("adv_index") +_reg.register_injective_schedule("sparsereshape") Review comment: Done. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] Meteorix opened a new pull request #7147: [CUDA][PASS]Legalize tensorcore
Meteorix opened a new pull request #7147: URL: https://github.com/apache/tvm/pull/7147 Add legalize pass: padding dense/conv2d/batch_matmul ops to legal shapes for using tensorcore on cuda target. To limit the overhead introduced by padding, we count the `extra_flops` and set the threshold to 2x, which is conservative compared to the speedup of tensorcore. This pr is dependent on https://github.com/apache/tvm/pull/7146 . @jcf94 @merrymercy could you also help review this pr? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] jcf94 commented on a change in pull request #7146: [CUDA]batch_matmul tensorcore schedule
jcf94 commented on a change in pull request #7146: URL: https://github.com/apache/tvm/pull/7146#discussion_r547081523 ## File path: python/tvm/topi/cuda/batch_matmul_tensorcore.py ## @@ -0,0 +1,275 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name,too-many-locals,unused-variable,unused-argument +"""cuda batch_matmul operators""" +import tvm +from tvm import autotvm +from tvm import te +from ..utils import traverse_inline, get_const_tuple +from .tensor_intrin import intrin_wmma_load_matrix_A, \ +intrin_wmma_load_matrix_W, intrin_wmma_store_matrix, intrin_wmma_gemm + +@autotvm.register_topi_compute("batch_matmul_tensorcore.cuda") +def batch_matmul_tensorcore(cfg, x, y, out_shape=None): +"""batch matmul tensorcore operator on cuda""" +# todo: deal with out_shape for broadcast, liuxin.ai +return batch_matmul_tensorcore_cuda(x, y) + + +@autotvm.register_topi_schedule("batch_matmul_tensorcore.cuda") +def schedule_batch_matmul_tensorcore(cfg, outs): +"""Schedule for batch_matmul operator using Tensorcore + +Parameters +-- +outs: Array of Tensor + The computation graph description of batch_matmul + in the format of an array of tensors. + +Returns +--- +s: Schedule +The computation schedule for the op. +""" +outs = [outs] if isinstance(outs, te.tensor.Tensor) else outs +s = te.create_schedule([x.op for x in outs]) + +def _schedule(cfg, s, C): +A, B = s[C].op.input_tensors +batch, m_dim, k_dim = get_const_tuple(A.shape) +batch, n_dim, k_dim = get_const_tuple(B.shape) +out_dtype = C.dtype +# inline astype fp16 +s[A].compute_inline() +s[B].compute_inline() + +# Explicit memory access +AS = s.cache_read(A, 'shared', [C]) +BS = s.cache_read(B, 'shared', [C]) +AF = s.cache_read(AS, 'wmma.matrix_a', [C]) +BF = s.cache_read(BS, 'wmma.matrix_b', [C]) +CF = s.cache_write(C, 'wmma.accumulator') +CS = s.cache_read(CF, 'shared', [C]) + +# fallback support +target = tvm.target.Target.current() +if cfg.is_fallback: +ref_log = autotvm.tophub.load_reference_log( +target.kind.name, target.model, 'batch_matmul_tensorcore.cuda') +cfg.fallback_with_reference_log(ref_log) + +# ??? Deal with op fusion, such as bias and relu ??? is this needed? Review comment: typo? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] Meteorix opened a new pull request #7146: [CUDA]batch_matmul tensorcore schedule
Meteorix opened a new pull request #7146: URL: https://github.com/apache/tvm/pull/7146 Add batch_matmul tensorcore schedule for bert inference. It shows better performance than cublas batch_matmul kernel. @jcf94 @merrymercy could you help review this pr? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] FrozenGene commented on pull request #7132: [Auto Scheduler] Mali Support
FrozenGene commented on pull request #7132: URL: https://github.com/apache/tvm/pull/7132#issuecomment-749329511 Thanks everyone @comaniac @jcf94 @giuseros This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[tvm] branch main updated (24cd869 -> 98ca771)
This is an automated email from the ASF dual-hosted git repository. zhaowu pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git. from 24cd869 [COMMUNITY] @jcf94 -> Committer (#7141) add 98ca771 [Auto Scheduler] Mali Support (#7132) No new revisions were added by this update. Summary of changes: python/tvm/auto_scheduler/relay_integration.py | 8 +- python/tvm/relay/op/strategy/mali.py | 55 python/tvm/topi/mali/conv2d.py | 75 --- python/tvm/topi/nn/conv2d.py | 13 +- src/auto_scheduler/search_policy/sketch_policy.cc | 65 ++ src/auto_scheduler/search_task.cc | 16 +++ src/relay/backend/build_module.cc | 5 +- .../{tune_network_x86.py => tune_network_mali.py} | 138 + 8 files changed, 278 insertions(+), 97 deletions(-) copy tutorials/auto_scheduler/{tune_network_x86.py => tune_network_mali.py} (76%)
[GitHub] [tvm] FrozenGene merged pull request #7132: [Auto Scheduler] Mali Support
FrozenGene merged pull request #7132: URL: https://github.com/apache/tvm/pull/7132 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] comaniac commented on a change in pull request #7145: [AutoScheduler][Bugfix] Hardware params is not serialized properly
comaniac commented on a change in pull request #7145: URL: https://github.com/apache/tvm/pull/7145#discussion_r547055396 ## File path: python/tvm/auto_scheduler/search_task.py ## @@ -221,10 +221,6 @@ def __init__( target_host = Target(target_host) self.dag = compute_dag Review comment: I don't think we can access the constructed object in this constructor? The point is both SearchTask and ConouteDAG have to be reconstructed when deserilization, so we need to call their C++ constructors in `__setstate__`. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] comaniac commented on a change in pull request #7145: [AutoScheduler][Bugfix] Hardware params is not serialized properly
comaniac commented on a change in pull request #7145: URL: https://github.com/apache/tvm/pull/7145#discussion_r547055396 ## File path: python/tvm/auto_scheduler/search_task.py ## @@ -221,10 +221,6 @@ def __init__( target_host = Target(target_host) self.dag = compute_dag Review comment: I don't think we can access the constructed object in this constructor? The point is both SearchTask and ConouteDAG have to be reconstructed when deserilization, we need to call their C++ constructors in `__setstate__`. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] jcf94 commented on a change in pull request #7145: [AutoScheduler][Bugfix] Hardware params is not serialized properly
jcf94 commented on a change in pull request #7145: URL: https://github.com/apache/tvm/pull/7145#discussion_r547038245 ## File path: python/tvm/auto_scheduler/search_task.py ## @@ -221,10 +221,6 @@ def __init__( target_host = Target(target_host) self.dag = compute_dag Review comment: The search task also has a ComputeDAG member, can we just use `self.compute_dag` after `__init_handle_by_constructor__`? E... seem I have not fully understand the "reconstruct ComputeDAG" part. ## File path: python/tvm/auto_scheduler/search_task.py ## @@ -314,30 +310,26 @@ def __getstate__(self): def __setstate__(self, state): self.dag = state["dag"] -self.workload_key = state["workload_key"] # Register the workload if needed try: -workload = json.loads(self.workload_key) +workload = json.loads(state["workload_key"]) except Exception: # pylint: disable=broad-except -raise RuntimeError("Invalid workload key %s" % self.workload_key) +raise RuntimeError("Invalid workload key %s" % state["workload_key"]) # The workload from a compute DAG does not have arguments and is not registered # by default so we register it here. If the workload has already been registered, # the later registration overrides the prvious one. if len(workload) == 1: register_workload_tensors(workload[0], self.dag.tensors) -self.target = state["target"] -self.target_host = state["target_host"] -self.hardware_params = state["hardware_params"] self.__init_handle_by_constructor__( _ffi_api.SearchTask, self.dag, Review comment: The same to the comment above, can we just build a local variable `dag` here, and it will be processed by the constructor to set it to `self.compute_dag`? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] comaniac opened a new pull request #7145: [AutoScheduler][Bugfix] Hardware params is not serialized properly
comaniac opened a new pull request #7145: URL: https://github.com/apache/tvm/pull/7145 The previous implementation of task serialization mechanism has a potential problem, which is a bit tricky so I didn't notice that before. Previously, I defined a set of attributes in the Python object (e.g., `hardware_params`) and use them to be the state when serializing a task object in Python. However, in the case that users don't provide `hardware_params`, the SearchTask constructor in C++ invokes `GetDefaultHardwareParams` to get the default hardware parameters. These default hardware parameters aren't exposed to the Python object because of the same name attribute. It wouldn't be a bit deal because when a task is deserialized, we follow the same process to get the default hardware parameters. However, since `GetDefaultHardwareParams` may access hardware context (e.g., CUDA context), this prevents tasks from being deserialized in parallel (with multiprocessing). This PR removes all attributes in `SearchTask` Python object to make sure we access the up-to-date C++ object attributes when serializing a task. The only exception is `self.dag` because we need it to reconstruct a ComputeDAG. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] comaniac opened a new pull request #7144: [AutoScheduler] Support string processing to records
comaniac opened a new pull request #7144: URL: https://github.com/apache/tvm/pull/7144 Current record processing APIs in auto_scheduler are based on files. This PR adds two APIs to manipulate tuning records in memory. cc @merrymercy @jcf94 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] junrushao1994 merged pull request #7141: [COMMUNITY] @jcf94 -> Committer
junrushao1994 merged pull request #7141: URL: https://github.com/apache/tvm/pull/7141 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 commented on a change in pull request #7126: Sparse fill empty rows op
codeislife99 commented on a change in pull request #7126: URL: https://github.com/apache/tvm/pull/7126#discussion_r546961924 ## File path: python/tvm/relay/op/transform.py ## @@ -1320,3 +1320,84 @@ def adv_index(inputs): Output tensor. """ return _make.adv_index(Tuple(inputs)) + + +def sparsefillemptyrows(sparse_indices, sparse_values, dense_shape, default_value): +""" +Fill first column of the empty rows with default values for a sparse array. + +Parameters +-- +sparse_indices : relay.Expr +A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the +number of sparse values and n_dim is the number of dimensions of the dense_shape + +sparse_values : relay.Expr +A 1-D tensor[N] containing the sparse values for the sparse indices. + +dense_shape : relay.Expr +A list of integers. Shape of the dense output tensor. + +default_value : relay.Expr +A 0-D tensor containing the default value for the remaining locations. +Defaults to 0. + +Returns +--- +TupleWrapper with the following four outputs + +new_sparse_indices : relay.Expr +A 2-D tensor[N + dense_shape[0], n_dim] of integers containing location of new sparse +indices where N is the number of sparse values. It is filled with -1 at to_be_discarded +indices. + +empty_row_indicator : relay.Expr +A 1-D Boolean tensor[dense_shape[0]] indicating whether the particular row is empty + +new_sparse_values : relay.Expr +A 1-D tensor[dense_shape[0]] containing the sparse values for the sparse indices. It is +filled with -1 at to_be_discarded indices. + +slice_element_index : relay.Expr +A 1-D tensor containing the amount of elements in the sparse_indices and new_sparse_values +expression to be sliced in a future op discarding non-useful elements in new_sparse_indices +and new_sparse_values + +Examples +--- + +.. code-block:: python + +sparse_indices = [[0, 1], + [0, 3], + [2, 0], + [3, 1]] +sparse_values = [1, 2, 3, 4] +default_value = [10] +dense_shape = [5, 6] +new_sparse_indices, empty_row_indicator, new_sparse_values, slice_element_index = +relay.sparsereshape( +sparse_indices, +sparse_values, +prev_shape, +new_shape) +new_sparse_indices = [[0, 1], + [0, 3], + [2, 0], + [3, 1], + [1, 0], + [4, 0], + [-1, -1], Review comment: Yeah, in the short-term it will be used for TF. https://www.tensorflow.org/api_docs/python/tf/sparse/fill_empty_rows This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] comaniac opened a new pull request #7143: [AutoScheduler] Python based measure callbacks
comaniac opened a new pull request #7143: URL: https://github.com/apache/tvm/pull/7143 The current auto_scheduler callbacks can only be implemented in C++. This PR exposes the interface of measure callbacks to Python so that people can plug in their own callback functions easily. Note: looks like I cannot pass the `SearchPolicy` node to the `PackedFunc`. Specifically, `callback_func(policy, inputs, results);` results in the type mismatching error. This also prevents us from introducing the Python API for `SearchCallback`. Any advise is appreciated. cc @merrymercy @jcf94 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] zhiics commented on pull request #7002: Created CSourceMetaData module for model metadata
zhiics commented on pull request #7002: URL: https://github.com/apache/tvm/pull/7002#issuecomment-749223654 Thanks @manupa-arm @areusch This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] zhiics merged pull request #7002: Created CSourceMetaData module for model metadata
zhiics merged pull request #7002: URL: https://github.com/apache/tvm/pull/7002 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[tvm] branch main updated: Created CSourceMetaData module for model metadata (#7002)
This is an automated email from the ASF dual-hosted git repository. zhic pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 9713d67 Created CSourceMetaData module for model metadata (#7002) 9713d67 is described below commit 9713d675c64ae3075e10be5acadeef1328a44bb5 Author: manupa-arm AuthorDate: Mon Dec 21 22:07:33 2020 + Created CSourceMetaData module for model metadata (#7002) * Created CSourceMetaData module for model metadata * Currently, there is a MetaData module to capture constants conditionaly if the runtime modules implement const init PackedFuncs. However, this one relies on a load process in which the metadata is created on volatile memory that may be not usable in uTVM environments. * There is a need for model level metadata that is valid across all runtime modules such as the func registry when creating a system-lib. * This commit implements a CSoureMetaData module to hold func registry that collects function names from the runtime module and generates a c source file to be linked with final artifact. * Modified and added export_library for utvm Change-Id: Ie2e8e2aea1a66520f03fe8af7cc5bdf27339ea10 * Created CSourceMetaData module for model metadata * fixed llvm_module to return null pfs for get_symbol and get_const_vars Change-Id: I84810e0695d4d6fb314af2469117f965eed71b51 * Created CSourceMetaData module for model metadata *fixed bundle_deploy tests Change-Id: I0d1332a4abbb6830531784c59264021bbbd7148a * Created CSourceMetaData module for model metadata *fixed export_library not to insert "options" when targeting tar *fixed unit tests Change-Id: Ia1686889498b71af66f1a0311a059154ad3c2c3e * Created CSourceMetaData module for model metadata * enable wasm to support csource metadata module * disabled non DSOExportables from using csource metadata module Change-Id: Ie09beaad35cbc2ef738d1d24d91e249b5e099569 * Created CSourceMetaData module for model metadata * changed const pfs to be called only on external modules or DSOExportable modules Change-Id: I6ad28f166c0fc27a2548c851bf9287ec805550d1 * Created CSourceMetaData module for model metadata * CSourceMetadata module wrapper is only created for c/llvm targets Change-Id: I13cb4140c17e2e1f91d495b15a1ff7eeab9fb14d * Created CSourceMetaData module for model metadata *target should be defined to use csourcemetdata module Change-Id: Id8e55b23d0007a79c550334de2c0fec63d40171f * Created CSourceMetaData module for model metadata * reinstate llvm func registry Change-Id: I53e0754b6fb533637f08b25e98064d8c04092de4 * Created CSourceMetaData module for model metadata * addressed comments and fixed bugs Change-Id: I26401685dc803aeaf7642c865df88d683419e859 * Created CSourceMetaData module for model metadata * addressed a missed comment Change-Id: I65e65c30bc780a946f3f1b8372c40a49a5c20582 * Created CSourceMetaData module for model metadata * te build interface should only include c-source metadata if targetting "c" Change-Id: Ie23cb8c6231c1f2de6d2827084774e3510288098 * Created CSourceMetaData module for model metadata * c_source modules should be created only if they are non-DSO exportable Change-Id: I53f2f8e9caa41f133446f8881b9dc541ebeee8cc * Created CSourceMetaData module for model metadata * documetation misalignment in source_module.cc Change-Id: I83e2c29b1f2980ca65a694304720dc58a5cb7879 * Created CSourceMetaData module for model metadata * typo : same object file written as a dependency in the Makefile Change-Id: I8becc4196d286cfb6372768687b3c836799dcb78 * Created CSourceMetaData module for model metadata * removed unused param from a brief Change-Id: Ie4db2aca3b7ea147bd8c65ef5d1cc2146f530e76 * Created CSourceMetaData module for model metadata * made export library use c as the format for c source modules Change-Id: Ie2fd6204414f0fa43988a8082d18af7a3225e237 * Created CSourceMetaData module for model metadata *addressed a nit Change-Id: I6084b8c06ddfaaece295439dbab589e6e202b664 --- apps/bundle_deploy/build_model.py | 2 - python/tvm/driver/build_module.py | 12 ++ python/tvm/micro/build.py | 14 +- python/tvm/runtime/module.py | 40 +++-- src/relay/backend/build_module.cc | 10 +- src/relay/backend/contrib/codegen_c/codegen.cc | 26 ++-
[GitHub] [tvm] Wheest opened a new pull request #7142: Asymmetric padding in conv2d workload
Wheest opened a new pull request #7142: URL: https://github.com/apache/tvm/pull/7142 The goal of this pull request is to make asymmetric padding a first-class citizen in 2D convolution in TOPI. The current workload description has `"hpad"` and `"wpad"`, however this is not representative of all of the possible configurations. Most TOPI conv2d implementations in TVM already support asymmetric padding, so I think this should be reflected in the workload description. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] kevinthesun commented on a change in pull request #7137: [Torch] Fix PyTorch NMS conversion for negative scores
kevinthesun commented on a change in pull request #7137: URL: https://github.com/apache/tvm/pull/7137#discussion_r546923370 ## File path: python/tvm/relay/frontend/pytorch.py ## @@ -1857,16 +1857,18 @@ def nms(self, inputs, input_types): scores = inputs[1] iou_threshold = inputs[2] +num_boxes = _op.shape_of(scores) + +# TVM NMS assumes score > 0 +scores = scores - _op.min(scores) + _op.const(1.0) # Generate data with shape (1, num_anchors, 5) scores = AttrCvt(op_name="expand_dims", extras={"axis": -1, "num_newaxis": 1})([scores], {}) - -# Prepare input data for get_valid_counts data = _op.concatenate([scores, boxes], -1) data = _op.expand_dims(data, 0, 1) -# Leverage get_valid_counts to sort the data and clear invalid boxes -ct, data, indices = get_relay_op("get_valid_counts")( -data, score_threshold=-1.0, id_index=-1, score_index=0 -) +# PyTorch NMS doesn't have score_threshold, so no need to run get_valid_count Review comment: torchvision nms doesn't filter out invalid boxes before nms, which can be super slow. Filtering out negative score boxes should have no affect to results. Probably we can discuss more about what we can get from this change. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] kevinthesun commented on a change in pull request #7137: [Torch] Fix PyTorch NMS conversion for negative scores
kevinthesun commented on a change in pull request #7137: URL: https://github.com/apache/tvm/pull/7137#discussion_r546923370 ## File path: python/tvm/relay/frontend/pytorch.py ## @@ -1857,16 +1857,18 @@ def nms(self, inputs, input_types): scores = inputs[1] iou_threshold = inputs[2] +num_boxes = _op.shape_of(scores) + +# TVM NMS assumes score > 0 +scores = scores - _op.min(scores) + _op.const(1.0) # Generate data with shape (1, num_anchors, 5) scores = AttrCvt(op_name="expand_dims", extras={"axis": -1, "num_newaxis": 1})([scores], {}) - -# Prepare input data for get_valid_counts data = _op.concatenate([scores, boxes], -1) data = _op.expand_dims(data, 0, 1) -# Leverage get_valid_counts to sort the data and clear invalid boxes -ct, data, indices = get_relay_op("get_valid_counts")( -data, score_threshold=-1.0, id_index=-1, score_index=0 -) +# PyTorch NMS doesn't have score_threshold, so no need to run get_valid_count Review comment: torchvision nms doesn't filter out invalid boxes before nms, which can be super slow. Filtering out negative score boxes should have not affect to results. Probably we can discuss more about what we can get from this change. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[tvm] branch main updated (82942fb -> 53c0641)
This is an automated email from the ASF dual-hosted git repository. mbrookhart pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git. from 82942fb [TOPI] Simplify GPU NMS IR and optimize a bit (#7136) add 53c0641 Fix a few OpNode argument field descriptions when registered (#7140) No new revisions were added by this update. Summary of changes: src/relay/op/annotation/annotation.cc | 6 ++ src/relay/op/device_copy.cc | 1 + src/relay/op/image/grid_sample.cc | 3 ++- src/relay/op/memory/memory.cc | 2 +- src/relay/op/nn/pooling.cc| 2 ++ src/relay/op/tensor/transform.cc | 7 +++ src/relay/op/vm/vm.cc | 4 +++- src/target/source/intrin_rule_cuda.cc | 12 8 files changed, 34 insertions(+), 3 deletions(-)
[GitHub] [tvm] mbrookhart commented on pull request #7140: Fix a few OpNode argument field descriptions when registered
mbrookhart commented on pull request #7140: URL: https://github.com/apache/tvm/pull/7140#issuecomment-749189788 Thanks @domin1985 @junrushao1994 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] mbrookhart merged pull request #7140: Fix a few OpNode argument field descriptions when registered
mbrookhart merged pull request #7140: URL: https://github.com/apache/tvm/pull/7140 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] mbrookhart commented on a change in pull request #7126: Sparse fill empty rows op
mbrookhart commented on a change in pull request #7126: URL: https://github.com/apache/tvm/pull/7126#discussion_r546916764 ## File path: python/tvm/relay/op/transform.py ## @@ -1320,3 +1320,84 @@ def adv_index(inputs): Output tensor. """ return _make.adv_index(Tuple(inputs)) + + +def sparsefillemptyrows(sparse_indices, sparse_values, dense_shape, default_value): +""" +Fill first column of the empty rows with default values for a sparse array. + +Parameters +-- +sparse_indices : relay.Expr +A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the +number of sparse values and n_dim is the number of dimensions of the dense_shape + +sparse_values : relay.Expr +A 1-D tensor[N] containing the sparse values for the sparse indices. + +dense_shape : relay.Expr +A list of integers. Shape of the dense output tensor. + +default_value : relay.Expr +A 0-D tensor containing the default value for the remaining locations. +Defaults to 0. + +Returns +--- +TupleWrapper with the following four outputs + +new_sparse_indices : relay.Expr +A 2-D tensor[N + dense_shape[0], n_dim] of integers containing location of new sparse +indices where N is the number of sparse values. It is filled with -1 at to_be_discarded +indices. + +empty_row_indicator : relay.Expr +A 1-D Boolean tensor[dense_shape[0]] indicating whether the particular row is empty + +new_sparse_values : relay.Expr +A 1-D tensor[dense_shape[0]] containing the sparse values for the sparse indices. It is +filled with -1 at to_be_discarded indices. + +slice_element_index : relay.Expr +A 1-D tensor containing the amount of elements in the sparse_indices and new_sparse_values +expression to be sliced in a future op discarding non-useful elements in new_sparse_indices +and new_sparse_values + +Examples +--- + +.. code-block:: python + +sparse_indices = [[0, 1], + [0, 3], + [2, 0], + [3, 1]] +sparse_values = [1, 2, 3, 4] +default_value = [10] +dense_shape = [5, 6] +new_sparse_indices, empty_row_indicator, new_sparse_values, slice_element_index = +relay.sparsereshape( +sparse_indices, +sparse_values, +prev_shape, +new_shape) +new_sparse_indices = [[0, 1], + [0, 3], + [2, 0], + [3, 1], + [1, 0], + [4, 0], + [-1, -1], Review comment: We have it set up that way because that's how MXNet does it (TOPI's original opset was mostly cloned from MXNet), and MXNet is fairly conservative about static shapes. From a performance perspective, if the expectation is that we'll always use this with a dynamic strided slice after, I'd think it would be better to just emit a dynamic shape. If we have another usecase, however, I think this is fine. Do you have a framework level usecase in mind? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 commented on a change in pull request #7126: Sparse fill empty rows op
codeislife99 commented on a change in pull request #7126: URL: https://github.com/apache/tvm/pull/7126#discussion_r546895484 ## File path: python/tvm/relay/op/transform.py ## @@ -1320,3 +1320,84 @@ def adv_index(inputs): Output tensor. """ return _make.adv_index(Tuple(inputs)) + + +def sparsefillemptyrows(sparse_indices, sparse_values, dense_shape, default_value): +""" +Fill first column of the empty rows with default values for a sparse array. + +Parameters +-- +sparse_indices : relay.Expr +A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the +number of sparse values and n_dim is the number of dimensions of the dense_shape + +sparse_values : relay.Expr +A 1-D tensor[N] containing the sparse values for the sparse indices. + +dense_shape : relay.Expr +A list of integers. Shape of the dense output tensor. + +default_value : relay.Expr +A 0-D tensor containing the default value for the remaining locations. +Defaults to 0. + +Returns +--- +TupleWrapper with the following four outputs + +new_sparse_indices : relay.Expr +A 2-D tensor[N + dense_shape[0], n_dim] of integers containing location of new sparse +indices where N is the number of sparse values. It is filled with -1 at to_be_discarded +indices. + +empty_row_indicator : relay.Expr +A 1-D Boolean tensor[dense_shape[0]] indicating whether the particular row is empty + +new_sparse_values : relay.Expr +A 1-D tensor[dense_shape[0]] containing the sparse values for the sparse indices. It is +filled with -1 at to_be_discarded indices. + +slice_element_index : relay.Expr +A 1-D tensor containing the amount of elements in the sparse_indices and new_sparse_values +expression to be sliced in a future op discarding non-useful elements in new_sparse_indices +and new_sparse_values + +Examples +--- + +.. code-block:: python + +sparse_indices = [[0, 1], + [0, 3], + [2, 0], + [3, 1]] +sparse_values = [1, 2, 3, 4] +default_value = [10] +dense_shape = [5, 6] +new_sparse_indices, empty_row_indicator, new_sparse_values, slice_element_index = +relay.sparsereshape( +sparse_indices, +sparse_values, +prev_shape, +new_shape) +new_sparse_indices = [[0, 1], + [0, 3], + [2, 0], + [3, 1], + [1, 0], + [4, 0], + [-1, -1], Review comment: No I haven't considered that. I noticed that for input dependent ops like `non_maximum_suppression`, they used nms with fixed size + strided slice and I just borrowed that idea. Do you mind pointing me to an implementation similar to what you are talking about ? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 commented on a change in pull request #7126: Sparse fill empty rows op
codeislife99 commented on a change in pull request #7126: URL: https://github.com/apache/tvm/pull/7126#discussion_r546893836 ## File path: python/tvm/relay/op/_transform.py ## @@ -63,6 +63,7 @@ _reg.register_injective_schedule("sparse_to_dense") _reg.register_injective_schedule("matrix_set_diag") _reg.register_injective_schedule("adv_index") +_reg.register_injective_schedule("sparsefillemptyrows") Review comment: Changed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] tkonolige commented on a change in pull request #7126: Sparse fill empty rows op
tkonolige commented on a change in pull request #7126: URL: https://github.com/apache/tvm/pull/7126#discussion_r546891906 ## File path: python/tvm/relay/op/transform.py ## @@ -1320,3 +1320,84 @@ def adv_index(inputs): Output tensor. """ return _make.adv_index(Tuple(inputs)) + + +def sparsefillemptyrows(sparse_indices, sparse_values, dense_shape, default_value): +""" +Fill first column of the empty rows with default values for a sparse array. + +Parameters +-- +sparse_indices : relay.Expr +A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the +number of sparse values and n_dim is the number of dimensions of the dense_shape + +sparse_values : relay.Expr +A 1-D tensor[N] containing the sparse values for the sparse indices. + +dense_shape : relay.Expr +A list of integers. Shape of the dense output tensor. + +default_value : relay.Expr +A 0-D tensor containing the default value for the remaining locations. +Defaults to 0. + +Returns +--- +TupleWrapper with the following four outputs + +new_sparse_indices : relay.Expr +A 2-D tensor[N + dense_shape[0], n_dim] of integers containing location of new sparse +indices where N is the number of sparse values. It is filled with -1 at to_be_discarded +indices. + +empty_row_indicator : relay.Expr +A 1-D Boolean tensor[dense_shape[0]] indicating whether the particular row is empty + +new_sparse_values : relay.Expr +A 1-D tensor[dense_shape[0]] containing the sparse values for the sparse indices. It is +filled with -1 at to_be_discarded indices. + +slice_element_index : relay.Expr +A 1-D tensor containing the amount of elements in the sparse_indices and new_sparse_values +expression to be sliced in a future op discarding non-useful elements in new_sparse_indices +and new_sparse_values + +Examples +--- + +.. code-block:: python + +sparse_indices = [[0, 1], + [0, 3], + [2, 0], + [3, 1]] +sparse_values = [1, 2, 3, 4] +default_value = [10] +dense_shape = [5, 6] +new_sparse_indices, empty_row_indicator, new_sparse_values, slice_element_index = +relay.sparsereshape( +sparse_indices, +sparse_values, +prev_shape, +new_shape) +new_sparse_indices = [[0, 1], + [0, 3], + [2, 0], + [3, 1], + [1, 0], + [4, 0], + [-1, -1], Review comment: Ok, I understand now. Could you add a little more documentation to that effect? I would also fill the values associated invalid indices with zeros instead of -1. That way even if someone uses the indices, the effect would be nothing. Also, have you considered using dynamic shape instead? That would avoid the need for the negative indices. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] Wheest edited a comment on pull request #6137: Better grouped convolution for CPU targets
Wheest edited a comment on pull request #6137: URL: https://github.com/apache/tvm/pull/6137#issuecomment-749136716 Hello there, updating this pull request to be up-to-date with the latest `main` branch. In terms of things remaining to do: - [x] [Consider using compute_at and vectorize data load](https://github.com/apache/tvm/pull/6137#discussion_r463852066) - did not get an improvement. - [x] [We should support asymmetic padding like other compute / schedule.](https://github.com/apache/tvm/pull/6137#pullrequestreview-459474121) - this is implemented in GSPC, however requires extending `get_workload` for Conv2D generally. I began working on this in `505c127`, but have reverted it, and will have this as it's own pull request in the comings days. - [ ] [Pack in alter_op_layout for kernel](https://github.com/apache/tvm/pull/6137#discussion_r463844394): have been working on this, but have an issue. My data is being passed to my `group_conv2d_NCHWc.x86` in the `conv2d_NCHWc` format (5D input data), rather than the GSPC format (6D input data). Despite my changes to the x86 `_alter_conv2d_layout`. [See this branch](https://github.com/Wheest/incubator-tvm/tree/wheest/gspc-dev-alter-op). Some guidance or pointers would be appreciated @FrozenGene. In the interests of more transparent development, [here's part of my test suite](https://github.com/Wheest/tvm-grouped-conv-test). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 commented on a change in pull request #7126: Sparse fill empty rows op
codeislife99 commented on a change in pull request #7126: URL: https://github.com/apache/tvm/pull/7126#discussion_r546888428 ## File path: python/tvm/relay/op/transform.py ## @@ -1320,3 +1320,84 @@ def adv_index(inputs): Output tensor. """ return _make.adv_index(Tuple(inputs)) + + +def sparsefillemptyrows(sparse_indices, sparse_values, dense_shape, default_value): +""" +Fill first column of the empty rows with default values for a sparse array. + +Parameters +-- +sparse_indices : relay.Expr +A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the +number of sparse values and n_dim is the number of dimensions of the dense_shape + +sparse_values : relay.Expr +A 1-D tensor[N] containing the sparse values for the sparse indices. + +dense_shape : relay.Expr +A list of integers. Shape of the dense output tensor. + +default_value : relay.Expr +A 0-D tensor containing the default value for the remaining locations. +Defaults to 0. + +Returns +--- +TupleWrapper with the following four outputs + +new_sparse_indices : relay.Expr +A 2-D tensor[N + dense_shape[0], n_dim] of integers containing location of new sparse +indices where N is the number of sparse values. It is filled with -1 at to_be_discarded +indices. + +empty_row_indicator : relay.Expr +A 1-D Boolean tensor[dense_shape[0]] indicating whether the particular row is empty + +new_sparse_values : relay.Expr +A 1-D tensor[dense_shape[0]] containing the sparse values for the sparse indices. It is +filled with -1 at to_be_discarded indices. + +slice_element_index : relay.Expr +A 1-D tensor containing the amount of elements in the sparse_indices and new_sparse_values +expression to be sliced in a future op discarding non-useful elements in new_sparse_indices +and new_sparse_values + +Examples +--- + +.. code-block:: python + +sparse_indices = [[0, 1], + [0, 3], + [2, 0], + [3, 1]] +sparse_values = [1, 2, 3, 4] +default_value = [10] +dense_shape = [5, 6] +new_sparse_indices, empty_row_indicator, new_sparse_values, slice_element_index = +relay.sparsereshape( +sparse_indices, +sparse_values, +prev_shape, +new_shape) +new_sparse_indices = [[0, 1], + [0, 3], + [2, 0], + [3, 1], + [1, 0], + [4, 0], + [-1, -1], Review comment: Let me know if you have any better suggestions for implementation, else I will stick with this. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 commented on a change in pull request #7126: Sparse fill empty rows op
codeislife99 commented on a change in pull request #7126: URL: https://github.com/apache/tvm/pull/7126#discussion_r546888139 ## File path: python/tvm/relay/op/transform.py ## @@ -1320,3 +1320,84 @@ def adv_index(inputs): Output tensor. """ return _make.adv_index(Tuple(inputs)) + + +def sparsefillemptyrows(sparse_indices, sparse_values, dense_shape, default_value): +""" +Fill first column of the empty rows with default values for a sparse array. + +Parameters +-- +sparse_indices : relay.Expr +A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the +number of sparse values and n_dim is the number of dimensions of the dense_shape + +sparse_values : relay.Expr +A 1-D tensor[N] containing the sparse values for the sparse indices. + +dense_shape : relay.Expr +A list of integers. Shape of the dense output tensor. + +default_value : relay.Expr +A 0-D tensor containing the default value for the remaining locations. +Defaults to 0. + +Returns +--- +TupleWrapper with the following four outputs + +new_sparse_indices : relay.Expr +A 2-D tensor[N + dense_shape[0], n_dim] of integers containing location of new sparse +indices where N is the number of sparse values. It is filled with -1 at to_be_discarded +indices. + +empty_row_indicator : relay.Expr +A 1-D Boolean tensor[dense_shape[0]] indicating whether the particular row is empty + +new_sparse_values : relay.Expr +A 1-D tensor[dense_shape[0]] containing the sparse values for the sparse indices. It is +filled with -1 at to_be_discarded indices. + +slice_element_index : relay.Expr +A 1-D tensor containing the amount of elements in the sparse_indices and new_sparse_values +expression to be sliced in a future op discarding non-useful elements in new_sparse_indices +and new_sparse_values + +Examples +--- + +.. code-block:: python + +sparse_indices = [[0, 1], + [0, 3], + [2, 0], + [3, 1]] +sparse_values = [1, 2, 3, 4] +default_value = [10] +dense_shape = [5, 6] +new_sparse_indices, empty_row_indicator, new_sparse_values, slice_element_index = +relay.sparsereshape( +sparse_indices, +sparse_values, +prev_shape, +new_shape) +new_sparse_indices = [[0, 1], + [0, 3], + [2, 0], + [3, 1], + [1, 0], + [4, 0], + [-1, -1], Review comment: So , if you look at the description (`N + dense_shape[0]` ) <-- this is the dim0 shape of the tensor. This is because we can't know beforehand how many rows are missing. This operation would be succeeded by strided_slice to remove with begin as 0 and end as `slice_element_index` to achieve the desired output This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] Wheest edited a comment on pull request #6137: Better grouped convolution for CPU targets
Wheest edited a comment on pull request #6137: URL: https://github.com/apache/tvm/pull/6137#issuecomment-749136716 Hello there, updating this pull request to be up-to-date with the latest `main` branch. In terms of things remaining to do: - [x] [Consider using compute_at and vectorize data load](https://github.com/apache/tvm/pull/6137#discussion_r463852066) - did not get an improvement. - [x] [We should support asymmetic padding like other compute / schedule.](https://github.com/apache/tvm/pull/6137#pullrequestreview-459474121) - this is implemented in GSPC, however requires extending `get_workload` for Conv2D generally. I began working on this in `505c127`, but have reverted it, and will have this as it's own pull request in the comings days. - [ ] [Pack in alter_op_layout for kernel](https://github.com/apache/tvm/pull/6137#discussion_r463844394): have been working on this, but have an issue. My data is being passed to my `group_conv2d_NCHWc.x86` in the `conv2d_NCHWc` format (5D input data), rather than the GSPC format (6D input data). Despite my changes to the x86 `_alter_conv2d_layout`. [See this branch](https://github.com/Wheest/incubator-tvm/tree/wheest/gspc-dev-alter-op). Some guidance or pointers would be appreciated @FrozenGene. In the interests of more transparent development, [here's part of my test suite](https://github.com/Wheest/tvm-grouped-conv-test). **EDIT** My first commit `76c908b` failed a linting check. Locally I ran ` bash ./tests/scripts/task_lint.sh`, and it worked before and after the commit. However on CI I got a lot of new formatting suggestions, which I am following now. Why am I not getting these linting errors locally? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] Wheest edited a comment on pull request #6137: Better grouped convolution for CPU targets
Wheest edited a comment on pull request #6137: URL: https://github.com/apache/tvm/pull/6137#issuecomment-749136716 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] Wheest edited a comment on pull request #6137: Better grouped convolution for CPU targets
Wheest edited a comment on pull request #6137: URL: https://github.com/apache/tvm/pull/6137#issuecomment-749136716 Hello there, updating this pull request to be up-to-date with the latest `main` branch. In terms of things remaining to do: - [x] [Consider using compute_at and vectorize data load](https://github.com/apache/tvm/pull/6137#discussion_r463852066) - did not get an improvement. - [x] [We should support asymmetic padding like other compute / schedule.](https://github.com/apache/tvm/pull/6137#pullrequestreview-459474121) - this is implemented in GSPC, however requires extending `get_workload` for Conv2D generally. I began working on this in `505c127`, but have reverted it, and will have this as it's own pull request in the comings days. - [ ] [Pack in alter_op_layout for kernel](https://github.com/apache/tvm/pull/6137#discussion_r463844394): have been working on this, but have an issue. My data is being passed to my `group_conv2d_NCHWc.x86` in the `conv2d_NCHWc` format (5D input data), rather than the GSPC format (6D input data). Despite my changes to the x86 `_alter_conv2d_layout`. [See this branch](https://github.com/Wheest/incubator-tvm/tree/wheest/gspc-dev-alter-op). Some guidance or pointers would be appreciated @FrozenGene. In the interests of more transparent development, [here's part of my test suite](https://github.com/Wheest/tvm-grouped-conv-test). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] Wheest commented on pull request #6137: Better grouped convolution for CPU targets
Wheest commented on pull request #6137: URL: https://github.com/apache/tvm/pull/6137#issuecomment-749136716 Hello there, updating this pull request to be up-to-date with the latest `main` branch. In terms of things remaining to do: - [x] [Consider using compute_at and vectorize data load](https://github.com/apache/tvm/pull/6137#discussion_r463852066) - did not get an improvement. - [x] [We should support asymmetic padding like other compute / schedule.](https://github.com/apache/tvm/pull/6137#pullrequestreview-459474121) - this is implemented in GSPC, however requires extending `get_workload` for Conv2D generally. I began working on this in `505c127`, but have reverted it, and will have this as it's own pull request in the comings days. - [ ] [Pack in alter_op_layout for kernel](https://github.com/apache/tvm/pull/6137#discussion_r463844394): have been working on this, but have an issue. My data is being passed to my `group_conv2d_NCHWc.x86` in the `conv2d_NCHWc` format (5D input data), rather than the GSPC format (6D input data). Despite my changes to the x86 `_alter_conv2d_layout`. [See this branch](https://github.com/Wheest/incubator-tvm/tree/wheest/gspc-dev-alter-op). Some guidance or pointers would be appreciated. In the interests of more transparent development, [here's part of my test suite](https://github.com/Wheest/tvm-grouped-conv-test). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] TylerADavis commented on a change in pull request #7134: Add `is_floating_point()` test and better type support in `verify_model_vm()`
TylerADavis commented on a change in pull request #7134: URL: https://github.com/apache/tvm/pull/7134#discussion_r546871956 ## File path: tests/python/frontend/pytorch/test_forward.py ## @@ -1889,21 +1889,73 @@ def _get_default_vm_targets(): return [tgt for (tgt, _) in tvm.testing.enabled_targets()] -def verify_script_model(pt_model, ishapes, targets): +def verify_script_model(pt_model, ishapes, targets, idtype=None): script_module = torch.jit.script(pt_model) verify_model_vm(script_module, ishapes, targets=targets) +if idtype: +verify_model_vm(script_module, ishapes, idtype=idtype, targets=targets) +else: +verify_model_vm(script_module, ishapes, targets=targets) + Review comment: Done. I've cleaned up `verify_script_model()` and changed the handling of default arguments in `verify_model_vm()` so that passing in `None` will result in a `torch.float` being used. Let me know if the new approach looks good. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] TylerADavis commented on a change in pull request #7134: Add `is_floating_point()` test and better type support in `verify_model_vm()`
TylerADavis commented on a change in pull request #7134: URL: https://github.com/apache/tvm/pull/7134#discussion_r546871956 ## File path: tests/python/frontend/pytorch/test_forward.py ## @@ -1889,21 +1889,73 @@ def _get_default_vm_targets(): return [tgt for (tgt, _) in tvm.testing.enabled_targets()] -def verify_script_model(pt_model, ishapes, targets): +def verify_script_model(pt_model, ishapes, targets, idtype=None): script_module = torch.jit.script(pt_model) verify_model_vm(script_module, ishapes, targets=targets) +if idtype: +verify_model_vm(script_module, ishapes, idtype=idtype, targets=targets) +else: +verify_model_vm(script_module, ishapes, targets=targets) + Review comment: Done. I've cleaned up `verify_script_model()` and changed the handling of default arguments in `verify_model_vm()` so that passing in `None` will result in a `torch.float` being used. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] tqchen opened a new pull request #7141: [COMMUNITY] @jcf94 -> Committer
tqchen opened a new pull request #7141: URL: https://github.com/apache/tvm/pull/7141 Please join us to welcome @jcf94 as a new committer. He is one of the major contributors to the latest AutoScheduler(AutoTVMv2). He also actively supports the community on answering questions related to auto scheduling. - [Commits History](https://github.com/apache/incubator-tvm/commits?author=jcf94) - [Code Review](https://github.com/apache/incubator-tvm/pulls?utf8=%E2%9C%93=reviewed-by:jcf94) - [Community Forum Summary](https://discuss.tvm.apache.org/u/jcf94/summary) This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] tkonolige commented on a change in pull request #7107: [Tutorial] Add output validation to sparse tutorial
tkonolige commented on a change in pull request #7107: URL: https://github.com/apache/tvm/pull/7107#discussion_r546840492 ## File path: tests/scripts/task_ci_python_setup.sh ## @@ -31,3 +31,4 @@ set -o pipefail echo "Addtiional setup in" ${CI_IMAGE_NAME} python3 -m pip install --user tlcpack-sphinx-addon==0.1.3 synr==0.2.1 +python3 -m pip install --user tokenizers==0.9.4 transformers==4.0.1 Review comment: I'm not sure which commit was broken. Can you remove these and then we can debug? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] tkonolige commented on a change in pull request #7126: Sparse fill empty rows op
tkonolige commented on a change in pull request #7126: URL: https://github.com/apache/tvm/pull/7126#discussion_r546836442 ## File path: python/tvm/relay/op/transform.py ## @@ -1320,3 +1320,84 @@ def adv_index(inputs): Output tensor. """ return _make.adv_index(Tuple(inputs)) + + +def sparsefillemptyrows(sparse_indices, sparse_values, dense_shape, default_value): +""" +Fill first column of the empty rows with default values for a sparse array. + +Parameters +-- +sparse_indices : relay.Expr +A 2-D tensor[N, n_dim] of integers containing location of sparse values, where N is the +number of sparse values and n_dim is the number of dimensions of the dense_shape + +sparse_values : relay.Expr +A 1-D tensor[N] containing the sparse values for the sparse indices. + +dense_shape : relay.Expr +A list of integers. Shape of the dense output tensor. + +default_value : relay.Expr +A 0-D tensor containing the default value for the remaining locations. +Defaults to 0. + +Returns +--- +TupleWrapper with the following four outputs + +new_sparse_indices : relay.Expr +A 2-D tensor[N + dense_shape[0], n_dim] of integers containing location of new sparse +indices where N is the number of sparse values. It is filled with -1 at to_be_discarded Review comment: What is `to_be_discarded` it is not mentioned anywhere? ## File path: python/tvm/relay/op/_transform.py ## @@ -63,6 +63,7 @@ _reg.register_injective_schedule("sparse_to_dense") _reg.register_injective_schedule("matrix_set_diag") _reg.register_injective_schedule("adv_index") +_reg.register_injective_schedule("sparsefillemptyrows") Review comment: I think `sparse_fill_empty_rows` fits better with our current naming convention. ## File path: src/relay/op/tensor/transform.cc ## @@ -1553,6 +1553,63 @@ RELAY_REGISTER_OP("meshgrid") .set_attr("FTVMCompute", MeshgridCompute) .set_attr("TOpPattern", kInjective); +TVM_REGISTER_NODE_TYPE(SparseFillEmptyRowsAttrs); + +bool SparseFillEmptyRowsRel(const Array& types, int num_inputs, const Attrs& attrs, +const TypeReporter& reporter) { + // types: [ sparse_indices, sparse_values, default_values, result] + ICHECK_EQ(types.size(), 4); + ICHECK_EQ(num_inputs, 3); + std::vector fields; + auto sparse_indices = types[0].as(); + auto default_value = types[2].as(); + const auto* param = attrs.as(); + CHECK(param != nullptr); + + Array sp_ordered_output_shape; + sp_ordered_output_shape.push_back(param->dense_shape[0] + sparse_indices->shape[0]); + if (sparse_indices->shape.size() > 1) { +sp_ordered_output_shape.push_back(sparse_indices->shape[1]); + } + fields.push_back(TensorType(sp_ordered_output_shape, sparse_indices->dtype)); + fields.push_back(TensorType(Array{param->dense_shape[0]}, tvm::DataType::Bool())); + fields.push_back(TensorType(Array{sp_ordered_output_shape[0]}, default_value->dtype)); + fields.push_back(TensorType(Array{1}, tvm::DataType::Int(32))); + reporter->Assign(types[3], TupleType(Array(fields))); + return true; +} + +Array SparseFillEmptyRowsCompute(const Attrs& attrs, const Array& inputs, + const Type& out_type) { + CHECK_EQ(inputs.size(), 3); + const auto* param = attrs.as(); + CHECK(param != nullptr); + return {topi::SparseFillEmptyRows(inputs[0], inputs[1], inputs[2], param->dense_shape)}; +} + +Expr MakeSparseFillEmptyRows(Expr sparse_indices, Expr sparse_values, Expr default_value, + Array dense_shape) { + auto attrs = make_object(); + attrs->dense_shape = std::move(dense_shape); + static const Op& op = Op::Get("sparsefillemptyrows"); + return Call(op, {sparse_indices, sparse_values, default_value}, Attrs(attrs), {}); +} + +TVM_REGISTER_GLOBAL("relay.op._make.sparsefillemptyrows").set_body_typed(MakeSparseFillEmptyRows); + +RELAY_REGISTER_OP("sparsefillemptyrows") +.describe(R"code(Return twice of normal addition of two tensors. Review comment: Could you update the description. ## File path: src/relay/op/tensor/transform.cc ## @@ -1553,6 +1553,63 @@ RELAY_REGISTER_OP("meshgrid") .set_attr("FTVMCompute", MeshgridCompute) .set_attr("TOpPattern", kInjective); +TVM_REGISTER_NODE_TYPE(SparseFillEmptyRowsAttrs); + +bool SparseFillEmptyRowsRel(const Array& types, int num_inputs, const Attrs& attrs, +const TypeReporter& reporter) { + // types: [ sparse_indices, sparse_values, default_values, result] + ICHECK_EQ(types.size(), 4); + ICHECK_EQ(num_inputs, 3); + std::vector fields; + auto sparse_indices = types[0].as(); + auto default_value = types[2].as(); + const auto* param = attrs.as(); + CHECK(param != nullptr); + + Array sp_ordered_output_shape; +
[GitHub] [tvm] tkonolige commented on a change in pull request #7125: Sparse reshape op
tkonolige commented on a change in pull request #7125: URL: https://github.com/apache/tvm/pull/7125#discussion_r546831211 ## File path: python/tvm/relay/op/_transform.py ## @@ -63,6 +63,8 @@ _reg.register_injective_schedule("sparse_to_dense") _reg.register_injective_schedule("matrix_set_diag") _reg.register_injective_schedule("adv_index") +_reg.register_injective_schedule("sparsereshape") Review comment: I think `sparse_reshape` would be a more appropriate name given the above naming conventions. ## File path: python/tvm/relay/op/transform.py ## @@ -1320,3 +1320,52 @@ def adv_index(inputs): Output tensor. """ return _make.adv_index(Tuple(inputs)) + + +def sparsereshape(sparse_indices, sparse_values, prev_shape, new_shape): +""" +Reshape a Sparse Tensor Review comment: Could you note that this function only support tensors in COO format, not CSR. In other parts of the codebase, we tend to use CSR. ## File path: src/relay/op/tensor/transform.cc ## @@ -1553,6 +1553,52 @@ RELAY_REGISTER_OP("meshgrid") .set_attr("FTVMCompute", MeshgridCompute) .set_attr("TOpPattern", kInjective); +TVM_REGISTER_NODE_TYPE(SparseReshapeAttrs); + +bool SparseReshapeRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + // types: [sparse_indices, sparse_values, result] + // ICHECK_EQ(types.size(), 3); + auto sparse_indices = types[0].as(); + const auto* param = attrs.as(); + CHECK(param != nullptr); + Array new_sparse_indices_shape{sparse_indices->shape[0], + static_cast((param->new_shape).size())}; + reporter->Assign(types[2], TensorType(new_sparse_indices_shape, sparse_indices->dtype)); + return true; +} + +Array SparseReshapeCompute(const Attrs& attrs, const Array& inputs, + const Type& out_type) { + // ICHECK_EQ(inputs.size(), 2); + const auto* param = attrs.as(); + CHECK(param != nullptr); + return {topi::SparseReshape(inputs[0], inputs[1], param->prev_shape, param->new_shape)}; +} + +Expr MakeSparseReshape(Expr sparse_indices, Expr sparse_values, Array prev_shape, + Array new_shape) { + auto attrs = make_object(); + attrs->prev_shape = std::move(prev_shape); + attrs->new_shape = std::move(new_shape); + static const Op& op = Op::Get("sparsereshape"); + return Call(op, {sparse_indices, sparse_values}, Attrs(attrs), {}); +} + +TVM_REGISTER_GLOBAL("relay.op._make.sparsereshape").set_body_typed(MakeSparseReshape); + +RELAY_REGISTER_OP("sparsereshape") +.describe(R"code(Return twice of normal addition of two tensors. Review comment: Update the description ## File path: src/relay/op/tensor/transform.cc ## @@ -1553,6 +1553,52 @@ RELAY_REGISTER_OP("meshgrid") .set_attr("FTVMCompute", MeshgridCompute) .set_attr("TOpPattern", kInjective); +TVM_REGISTER_NODE_TYPE(SparseReshapeAttrs); + +bool SparseReshapeRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + // types: [sparse_indices, sparse_values, result] + // ICHECK_EQ(types.size(), 3); Review comment: uncomment or remove ## File path: src/relay/op/tensor/transform.cc ## @@ -1553,6 +1553,52 @@ RELAY_REGISTER_OP("meshgrid") .set_attr("FTVMCompute", MeshgridCompute) .set_attr("TOpPattern", kInjective); +TVM_REGISTER_NODE_TYPE(SparseReshapeAttrs); + +bool SparseReshapeRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + // types: [sparse_indices, sparse_values, result] + // ICHECK_EQ(types.size(), 3); + auto sparse_indices = types[0].as(); + const auto* param = attrs.as(); + CHECK(param != nullptr); Review comment: ```suggestion ICHECK(param != nullptr); ``` ## File path: src/relay/op/tensor/transform.cc ## @@ -1553,6 +1553,52 @@ RELAY_REGISTER_OP("meshgrid") .set_attr("FTVMCompute", MeshgridCompute) .set_attr("TOpPattern", kInjective); +TVM_REGISTER_NODE_TYPE(SparseReshapeAttrs); + +bool SparseReshapeRel(const Array& types, int num_inputs, const Attrs& attrs, + const TypeReporter& reporter) { + // types: [sparse_indices, sparse_values, result] + // ICHECK_EQ(types.size(), 3); + auto sparse_indices = types[0].as(); + const auto* param = attrs.as(); + CHECK(param != nullptr); + Array new_sparse_indices_shape{sparse_indices->shape[0], + static_cast((param->new_shape).size())}; + reporter->Assign(types[2], TensorType(new_sparse_indices_shape, sparse_indices->dtype)); + return true; +} + +Array SparseReshapeCompute(const Attrs& attrs, const Array& inputs, + const Type& out_type) { + // ICHECK_EQ(inputs.size(), 2); Review comment:
[GitHub] [tvm] Laurawly commented on pull request #7136: [TOPI] Simplify GPU NMS IR and optimize a bit
Laurawly commented on pull request #7136: URL: https://github.com/apache/tvm/pull/7136#issuecomment-749093671 Thanks @masahi @mbrookhart This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] Laurawly merged pull request #7136: [TOPI] Simplify GPU NMS IR and optimize a bit
Laurawly merged pull request #7136: URL: https://github.com/apache/tvm/pull/7136 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[tvm] branch main updated: [TOPI] Simplify GPU NMS IR and optimize a bit (#7136)
This is an automated email from the ASF dual-hosted git repository. laurawly pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 82942fb [TOPI] Simplify GPU NMS IR and optimize a bit (#7136) 82942fb is described below commit 82942fb33fd6e3572897c815af16905c4f75c2a4 Author: masahi AuthorDate: Tue Dec 22 02:17:26 2020 +0900 [TOPI] Simplify GPU NMS IR and optimize a bit (#7136) * remove get_valid_counts from pytorch nms * fix pytorch nms for negative score * merge reset by -1 * move max_out_size handling to triangle loop * update torch nms test * fuse the last two kernels * parallelize the first kernel * merge first and last kernel * remove unnecessary cases * fix typo * revert pytorch frontend change * fuse rearrange step with triangle loop * fix max_output_size handling * check if already surpressed * fix topi vision test by wrapping tir const around int argument * fix for num anchors = 0 case * fix missing zero init of num valid boxes when the input is empty * add some comments and missing doc * typo fix * add a guard against zero dim grid / thread block inside ir_buidlder * typo fix * trigger CI --- python/tvm/tir/ir_builder.py | 4 + python/tvm/topi/cuda/nms.py | 279 +++ 2 files changed, 102 insertions(+), 181 deletions(-) diff --git a/python/tvm/tir/ir_builder.py b/python/tvm/tir/ir_builder.py index 75c5c29..6dcc858 100644 --- a/python/tvm/tir/ir_builder.py +++ b/python/tvm/tir/ir_builder.py @@ -21,6 +21,7 @@ from tvm.ir import container as _container, PointerType, PrimType from . import stmt as _stmt from . import expr as _expr +from . import op class WithScope(object): @@ -200,6 +201,9 @@ class IRBuilder(object): node = _expr.StringImm(node) if isinstance(value, string_types): value = _expr.StringImm(value) +# thread_extent could be zero for dynamic workloads +if attr_key == "thread_extent": +value = op.max(1, value) self.emit(lambda x: _stmt.AttrStmt(node, attr_key, value, x)) def for_range(self, begin, end, name="i", dtype="int32", for_type="serial"): diff --git a/python/tvm/topi/cuda/nms.py b/python/tvm/topi/cuda/nms.py index 2733970..cea287e 100644 --- a/python/tvm/topi/cuda/nms.py +++ b/python/tvm/topi/cuda/nms.py @@ -51,68 +51,8 @@ def atomic_add(x, y): return tvm.tir.call_intrin(y.dtype, "tir.atomic_add", x, y) -def rearrange_indices_out_ir(data, output, valid_box_count): -"""Hybrid routine to rearrange nms output to -move all valid entries to top. - -Parameters --- -data : tvm.te.Tensor or numpy NDArray -NMS output. 3-D tensor with shape -[batch_size, num_anchors, 6] or -[batch_size, num_anchors, 5], or 2-D -tensor with shape [batch_size, num_anchors]. - -one: tvm.tir.const -Constant one with the same dtype as data. - -batch_size: tvm.tir.IntImm or tvm.tir.Var -Batch size. We need to pass it in since hybrid script doesn't support -binding variable to symbolic dim. - -num_anchors: tvm.tir.IntImm or tvm.tir.Var -Number of anchors. - -Returns ---- -output : tvm.te.Tensor or numpy NDArray -2-D tensor with shape [batch_size, num_anchors]. - -valid_box_count : tvm.te.Tensor or numpy NDArray -Tensor with shape [batch_size, 1], indicates -the valid number of boxes. -""" -batch_size = data.shape[0] -num_anchors = data.shape[1] - -ib = tvm.tir.ir_builder.create() - -data = ib.buffer_ptr(data) -valid_box_count = ib.buffer_ptr(valid_box_count) -output = ib.buffer_ptr(output) - -with ib.new_scope(): -i = te.thread_axis("blockIdx.x") -ib.scope_attr(i, "thread_extent", batch_size) -valid_idx = ib.allocate("int32", (1,), name="valid_idx", scope="local") -valid_idx[0] = 0 -with ib.for_range(0, num_anchors, name="j") as j: -with ib.if_scope(data[i, j] >= 0): -with ib.if_scope(data[i, j] > num_anchors): -output[i, valid_idx[0]] = 0 -valid_idx[0] = valid_idx[0] + 1 -with ib.else_scope(): -output[i, valid_idx[0]] = data[i, j] -valid_idx[0] = valid_idx[0] + 1 -with ib.else_scope(): -with ib.if_scope(data[i, j] < -num_anchors): -output[i, valid_idx[0]] = 0 -valid_idx[0] = valid_idx[0] + 1 -with ib.if_scope(j >= valid_idx[0]): -output[i, j] = -1 -valid_box_count[i, 0] = valid_idx[0] - -return ib.get() +def
[GitHub] [tvm] manupa-arm commented on pull request #7002: Created CSourceMetaData module for model metadata
manupa-arm commented on pull request #7002: URL: https://github.com/apache/tvm/pull/7002#issuecomment-749003721 Ping @areusch @zhiics. Let me know if you have any more concerns, I ll be disappearing for holidays from tomorrow. Thus, would like to address if there is anything today. Thanks! This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] tqchen commented on pull request #7127: Added additional information to the from_onnx tutorial
tqchen commented on pull request #7127: URL: https://github.com/apache/tvm/pull/7127#issuecomment-748981949 Thanks @hogepodge ! This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] tqchen commented on pull request #7093: [TFLite] add support for float16
tqchen commented on pull request #7093: URL: https://github.com/apache/tvm/pull/7093#issuecomment-748982915 Thanks @euntaik @giuseros @FrozenGene ! This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] tqchen merged pull request #7093: [TFLite] add support for float16
tqchen merged pull request #7093: URL: https://github.com/apache/tvm/pull/7093 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[tvm] branch main updated: [TFLite] add support for float16 (#7093)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 9914685 [TFLite] add support for float16 (#7093) 9914685 is described below commit 991468502f3a629560a4c284b73ce52094573523 Author: eric AuthorDate: Mon Dec 21 22:49:11 2020 +0900 [TFLite] add support for float16 (#7093) * [TFLite] add support for float16 * add testi case * add test case * add comments --- python/tvm/relay/frontend/tflite.py | 61 +--- tests/python/frontend/tflite/test_forward.py | 35 +++- 2 files changed, 79 insertions(+), 17 deletions(-) diff --git a/python/tvm/relay/frontend/tflite.py b/python/tvm/relay/frontend/tflite.py index 54eeb9d..a55eb16 100644 --- a/python/tvm/relay/frontend/tflite.py +++ b/python/tvm/relay/frontend/tflite.py @@ -325,6 +325,7 @@ class OperatorConverter(object): return { TensorType.UINT8: np.uint8, TensorType.INT8: np.int8, +TensorType.FLOAT16: np.float16, TensorType.FLOAT32: np.float32, TensorType.INT32: np.int32, TensorType.INT64: np.int64, @@ -362,6 +363,8 @@ class OperatorConverter(object): return "int8" if tensor_type == TensorType.UINT8: return "uint8" +if tensor_type == TensorType.FLOAT16: +return "float16" if tensor_type == TensorType.FLOAT32: return "float32" if tensor_type == TensorType.INT32: @@ -1991,20 +1994,33 @@ class OperatorConverter(object): weight_tensor_type_str = self.get_tensor_type_str(weight_tensor_type) in_expr = self.get_expr(input_tensor_idx) -weight_value = self.get_tensor_value(weight_tensor) - -# TFLite kernel layout: -# convolution: -# OC KH KW IC, we require KH KW IC OC (HWIO) -# depthwise convolution: -# 1 KH KW C(input_c * depth_multiplier), we require -# KH KW IC M (depth_multiplier) (HWOI) -if is_depthwise_conv: -weight_value = weight_value.reshape(kernel_h, kernel_w, input_c, depth_multiplier) + +# TFLite converts float32 models to float16 models by introducing +# a Dequantize op in every op that contains a float32 values. +# (weights, biases, and constants etc. ) +# So conv op may have weight and bias as tensors instead of values. +if self.has_expr(weight_tensor.tensor_idx): +weight_expr = self.get_expr(weight_tensor.tensor_idx) +if is_depthwise_conv: +weight_expr = _op.reshape( +weight_expr, (kernel_h, kernel_w, input_c, depth_multiplier) +) +else: +weight_expr = _op.transpose(weight_expr, axes=(1, 2, 3, 0)) else: -weight_value = weight_value.transpose((1, 2, 3, 0)) +weight_value = self.get_tensor_value(weight_tensor) +# TFLite kernel layout: +# convolution: +# OC KH KW IC, we require KH KW IC OC (HWIO) +# depthwise convolution: +# 1 KH KW C(input_c * depth_multiplier), we require +# KH KW IC M (depth_multiplier) (HWOI) +if is_depthwise_conv: +weight_value = weight_value.reshape(kernel_h, kernel_w, input_c, depth_multiplier) +else: +weight_value = weight_value.transpose((1, 2, 3, 0)) -weight_expr = self.exp_tab.new_const(weight_value, dtype=weight_tensor_type_str) +weight_expr = self.exp_tab.new_const(weight_value, dtype=weight_tensor_type_str) if padding == Padding.VALID: pass @@ -2039,9 +2055,12 @@ class OperatorConverter(object): # bias tensor type should be INT32 (quantization) or FLOAT32 assert bias_tensor_type in (TensorType.INT32, TensorType.FLOAT32) bias_tensor_type_str = self.get_tensor_type_str(bias_tensor_type) -bias_expr = self.exp_tab.new_const( -self.get_tensor_value(bias_tensor), dtype=bias_tensor_type_str -) +if self.has_expr(bias_tensor.tensor_idx): +bias_expr = self.get_expr(bias_tensor.tensor_idx) +else: +bias_expr = self.exp_tab.new_const( +self.get_tensor_value(bias_tensor), dtype=bias_tensor_type_str +) channel_axis = 3 out = _op.nn.bias_add(out, bias_expr, axis=channel_axis) @@ -2870,10 +2889,22 @@ class OperatorConverter(object): def convert_dequantize(self, op): """Convert TFLite Dequantize""" +try: +from tflite.TensorType import TensorType +except ImportError: +raise
[tvm] branch main updated: [CUDA] Parallel Cuda Mergesort (#7099)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new 38273ee [CUDA] Parallel Cuda Mergesort (#7099) 38273ee is described below commit 38273eeb39bd9b1ef642bd8e940e732f19ee98e8 Author: Matthew Brookhart AuthorDate: Mon Dec 21 06:48:29 2020 -0700 [CUDA] Parallel Cuda Mergesort (#7099) --- python/tvm/driver/build_module.py | 2 +- python/tvm/topi/cuda/sort.py | 292 +++-- tests/python/relay/test_any.py | 7 +- tests/python/relay/test_op_level6.py | 8 +- tests/python/topi/python/test_topi_argwhere.py | 7 +- 5 files changed, 234 insertions(+), 82 deletions(-) diff --git a/python/tvm/driver/build_module.py b/python/tvm/driver/build_module.py index 058bd62..dc9d741 100644 --- a/python/tvm/driver/build_module.py +++ b/python/tvm/driver/build_module.py @@ -277,7 +277,7 @@ def _build_for_device(input_mod, target, target_host): lambda f: "calling_conv" not in f.attrs or f.attrs["calling_conv"].value != CallingConv.DEVICE_KERNEL_LAUNCH ), -tvm.tir.transform.Apply(lambda f: f.with_attr("target", target)), +tvm.tir.transform.Apply(lambda f: f.with_attr("target", target_host)), tvm.tir.transform.LowerTVMBuiltin(), tvm.tir.transform.LowerDeviceStorageAccessInfo(), tvm.tir.transform.LowerCustomDatatypes(), diff --git a/python/tvm/topi/cuda/sort.py b/python/tvm/topi/cuda/sort.py index ea14905..039ebe3 100644 --- a/python/tvm/topi/cuda/sort.py +++ b/python/tvm/topi/cuda/sort.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, too-many-statements, singleton-comparison, unused-argument +# pylint: disable=invalid-name, no-member, too-many-locals, too-many-arguments, too-many-statements, singleton-comparison, unused-argument, no-else-return """Sort related operators """ import tvm from tvm import te @@ -62,7 +62,9 @@ def _schedule_sort(outs): return s -def sort_ir(data, values_out, axis, is_ascend, indices_out=None): +def sort_ir( +data, values_out, values_out_swap, axis, is_ascend, indices_out=None, indices_out_swap=None +): """Low level IR to do nms sorting on the GPU, same usage as tvm.contrib.sort.argsort on the CPU. Parameters @@ -70,8 +72,11 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None): data: Buffer Buffer of input data. Data will be sorted in place. -output : Buffer -Output buffer of indicies of sorted tensor with same shape as data. +values_out : Buffer +Output buffer of values of sorted tensor with same shape as data. + +values_out_swap : Buffer +Output buffer of values with same shape as data to use as swap. axis : Int Axis long which to sort the input tensor. @@ -79,11 +84,21 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None): is_ascend : Boolean Whether to sort in ascending or descending order. +indicess_out : Buffer +Output buffer of indices of sorted tensor with same shape as data. + +indices_out_swap : Buffer +Output buffer of indices with same shape as data to use as swap. + Returns --- stmt : Stmt The result IR statement. """ + +def ceil_div(a, b): +return tvm.tir.indexdiv(a + b - 1, b) + axis_mul_before = 1 axis_mul_after = 1 shape = data.shape @@ -94,64 +109,182 @@ def sort_ir(data, values_out, axis, is_ascend, indices_out=None): axis_mul_before *= value elif i > axis: axis_mul_after *= value -max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads) + ib = tvm.tir.ir_builder.create() + data = ib.buffer_ptr(data) values_out = ib.buffer_ptr(values_out) +values_out_swap = ib.buffer_ptr(values_out_swap) if indices_out is not None: indices_out = ib.buffer_ptr(indices_out) -nthread_tx = max_threads -nthread_bx = shape[axis] // max_threads + 1 +assert indices_out_swap is not None +indices_out_swap = ib.buffer_ptr(indices_out_swap) -tx = te.thread_axis("threadIdx.x") -bx = te.thread_axis("blockIdx.x") -ib.scope_attr(tx, "thread_extent", nthread_tx) -ib.scope_attr(bx, "thread_extent", nthread_bx) -tid = bx * nthread_tx + tx -temp_data = ib.allocate(values_out.dtype, (1,), name="temp_data", scope="local") -if indices_out is not None: -temp_index = ib.allocate(indices_out.dtype, (1,), name="temp_index", scope="local") +# Set up threading +
[GitHub] [tvm] tqchen merged pull request #7099: [CUDA] Parallel Cuda Mergesort
tqchen merged pull request #7099: URL: https://github.com/apache/tvm/pull/7099 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[tvm] branch main updated: Added additional information to the from_onnx tutorial (#7127)
This is an automated email from the ASF dual-hosted git repository. tqchen pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git The following commit(s) were added to refs/heads/main by this push: new bf5e248 Added additional information to the from_onnx tutorial (#7127) bf5e248 is described below commit bf5e248d7b52de21f1c92f7cb88293223cd7a32d Author: Chris Hoge AuthorDate: Mon Dec 21 05:47:12 2020 -0800 Added additional information to the from_onnx tutorial (#7127) --- tutorials/frontend/from_onnx.py | 23 +-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/tutorials/frontend/from_onnx.py b/tutorials/frontend/from_onnx.py index 1557ea5..1b969bc 100644 --- a/tutorials/frontend/from_onnx.py +++ b/tutorials/frontend/from_onnx.py @@ -60,7 +60,11 @@ onnx_model = onnx.load(model_path) ## # Load a test image # - -# A single cat dominates the examples! +# A single cat dominates the examples! This model takes a single input image of size +# 224x224 and outputs a scaled image that is 3x greater than the input along each +# axis, a 672x672 image. Re-scale the cat image to fit this input shape then +# convert to `YCbCr`. The super resolution model will then be applied to the +# luminance (`Y`) channel. from PIL import Image img_url = "https://github.com/dmlc/mxnet.js/blob/main/data/cat.png?raw=true; @@ -73,6 +77,14 @@ x = np.array(img_y)[np.newaxis, np.newaxis, :, :] ## # Compile the model with relay # - +# Typically ONNX models mix model input values with parameter values, with +# the input having the name `1`. This model dependent, and you should check +# with the documentation for your model to determine the full input and +# parameter name space. +# +# Passing in the shape dictionary to the `relay.frontend.from_onnx` method +# tells relay which ONNX parameters are inputs, and which are parameters, and +# provides a static definition of the input size. target = "llvm" input_name = "1" @@ -91,7 +103,9 @@ tvm_output = intrp.evaluate()(tvm.nd.array(x.astype(dtype)), **params).asnumpy() ## # Display results # - -# We put input and output image neck to neck +# We put input and output image neck to neck. The luminance channel, `Y` is the output +# from the model. The chroma channels `Cb` and `Cr` are resized to match with a simple +# bicubic algorithm. The image is then recombined and converted back to `RGB`. from matplotlib import pyplot as plt out_y = Image.fromarray(np.uint8((tvm_output[0, 0]).clip(0, 255)), mode="L") @@ -112,3 +126,8 @@ plt.show() # into a static shapes at compile time. If this fails, there may still be dynamic # operations in the model. Not all TVM kernels currently support dynamic shapes, # please file an issue on discuss.tvm.apache.org if you hit an error with dynamic kernels. +# +# This particular model was build using an older version of ONNX. During the import +# phase ONNX importer will run the ONNX verifier, which may throw a `Mismatched attribute type` +# warning. Because TVM supports a number of different ONNX versions, the Relay model +# will still be valid.
[GitHub] [tvm] tqchen merged pull request #7127: Added additional information to the from_onnx tutorial
tqchen merged pull request #7127: URL: https://github.com/apache/tvm/pull/7127 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 commented on a change in pull request #7125: Sparse reshape op
codeislife99 commented on a change in pull request #7125: URL: https://github.com/apache/tvm/pull/7125#discussion_r546677613 ## File path: include/tvm/topi/transform.h ## @@ -1386,6 +1386,85 @@ inline Array meshgrid(const Array& inputs, const std::string& in return result; } +/*! + * \brief Compute new sparse indices and return them after the sparsereshape operation + * + * \param sparse_indices Indices where values of the dense tensor exist + * \param sparse_values Values at the above indices respectively + * \param prev_shape Old Shape of the sparse tensor corresponding to sparse_indices + * \param new_shape Desired Shape of the sparse tensor which will correspond to output + * \param name The name of the operation + * \param tag The tag to mark the operation + * + * \return A Tensor whose op member is the sparsereshape operation + */ + +inline Array SparseReshape(const Tensor& sparse_indices, const Tensor& sparse_values, + const Tensor& prev_shape, const Tensor& new_shape, + const std::string name = "T_sparsereshape", + std::string tag = kInjective) { + Array result; + Array new_sparse_indices_shape{sparse_indices->shape[0], new_shape->shape[0]}; + std::vector multipliers(GetConstInt(prev_shape->shape[0]), 1); + std::vector dividers(GetConstInt(new_shape->shape[0]), 1); + + tvm::te::compute(Array{1}, [&](const Array& indices) { Review comment: Cleaned this up as per offline discussion. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] FrozenGene commented on pull request #7132: [Auto Scheduler] Mali Support
FrozenGene commented on pull request #7132: URL: https://github.com/apache/tvm/pull/7132#issuecomment-748942493 @comaniac @jcf94 @giuseros have addressed the comments. Please have another one round of look. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] FrozenGene commented on a change in pull request #7132: [Auto Scheduler] Mali Support
FrozenGene commented on a change in pull request #7132: URL: https://github.com/apache/tvm/pull/7132#discussion_r546664255 ## File path: python/tvm/relay/op/strategy/mali.py ## @@ -69,6 +71,36 @@ def conv2d_strategy_mali(attrs, inputs, out_type, target): raise RuntimeError( "Unsupported weight layout {} for conv2d NCHW".format(kernel_layout) ) +elif layout == "NHWC": +assert kernel_layout == "HWIO" +if not is_auto_scheduler_enabled(): +logger.error("conv2d NHWC layout is not enabled for mali with autotvm.") +strategy.add_implementation( +wrap_compute_conv2d(topi.nn.conv2d_nhwc, need_auto_scheduler_layout=True), +naive_schedule, +name="conv2d_nhwc.mali", +) +is_winograd_applicable = False +if len(kernel.shape) == 4: +kernel_h, kernel_w, _, _ = get_const_tuple(kernel.shape) +is_winograd_applicable = ( +"float" in data.dtype +and "float" in kernel.dtype +and kernel_h == 3 +and kernel_w == 3 +and stride_h == 1 +and stride_w == 1 +and dilation_h == 1 +and dilation_w == 1 +) Review comment: I think about it for a while. I think current way is acceptable. Winograd on cuda is encapsulated because we need complex logic to distinguish with TensorCore, but mali target doesn't need this. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 commented on a change in pull request #7125: Sparse reshape op
codeislife99 commented on a change in pull request #7125: URL: https://github.com/apache/tvm/pull/7125#discussion_r546652775 ## File path: python/tvm/relay/op/transform.py ## @@ -1320,3 +1320,47 @@ def adv_index(inputs): Output tensor. """ return _make.adv_index(Tuple(inputs)) + + +def sparsereshape(sparse_indices, sparse_values, prev_shape, new_shape): +""" +Reshape a Sparse Tensor + +Parameters +-- +inputs : List[relay.Expr] +Input tensor and indices. +The first tensor is input data and rests are indices. + +Returns +--- +result: relay.Expr +Output tensor. +Examples + +.. code-block:: python + +sparse_indices = [[0, 0, 0], + [0, 0, 1], + [0, 1, 0], + [1, 0, 0], + [1, 2, 3]] + +sparse_values = [7, 5, 6, 3, 9] + +prev_shape = [2, 3, 4] + +new_shape = [9, -1] + +relay.sparsereshape(sparse_indices, +sparse_values, +prev_shape, +new_shape) + = [[0, 0], + [0, 1], + [1, 2], + [4, 2], + [8, 1]] + Review comment: Done. ## File path: python/tvm/topi/transform.py ## @@ -931,3 +931,47 @@ def adv_index(data, indices): Output tensor """ return cpp.adv_index(data, indices) + + +def sparsereshape(sparse_indices, sparse_values, prev_shape, new_shape): +""" +Reshape a Sparse Tensor + +Parameters +-- +inputs : List[relay.Expr] +Input tensor and indices. +The first tensor is input data and rests are indices. + +Returns +--- +result: relay.Expr +Output tensor. +Examples + +.. code-block:: python + +sparse_indices = [[0, 0, 0], + [0, 0, 1], + [0, 1, 0], + [1, 0, 0], + [1, 2, 3]] + +sparse_values = [7, 5, 6, 3, 9] + +prev_shape = [2, 3, 4] + +new_shape = [9, -1] + +relay.sparsereshape(sparse_indices, +sparse_values, +prev_shape, +new_shape) + = [[0, 0], + [0, 1], + [1, 2], + [4, 2], + [8, 1]] + Review comment: Done. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 commented on a change in pull request #7125: Sparse reshape op
codeislife99 commented on a change in pull request #7125: URL: https://github.com/apache/tvm/pull/7125#discussion_r546652731 ## File path: python/tvm/topi/transform.py ## @@ -931,3 +931,47 @@ def adv_index(data, indices): Output tensor """ return cpp.adv_index(data, indices) + + +def sparsereshape(sparse_indices, sparse_values, prev_shape, new_shape): +""" +Reshape a Sparse Tensor + +Parameters +-- +inputs : List[relay.Expr] +Input tensor and indices. +The first tensor is input data and rests are indices. + +Returns +--- +result: relay.Expr +Output tensor. +Examples + Review comment: Done. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 commented on a change in pull request #7125: Sparse reshape op
codeislife99 commented on a change in pull request #7125: URL: https://github.com/apache/tvm/pull/7125#discussion_r546652595 ## File path: include/tvm/topi/transform.h ## @@ -1386,6 +1386,85 @@ inline Array meshgrid(const Array& inputs, const std::string& in return result; } +/*! + * \brief Compute new sparse indices and return them after the sparsereshape operation + * + * \param sparse_indices Indices where values of the dense tensor exist + * \param sparse_values Values at the above indices respectively + * \param prev_shape Old Shape of the sparse tensor corresponding to sparse_indices + * \param new_shape Desired Shape of the sparse tensor which will correspond to output + * \param name The name of the operation + * \param tag The tag to mark the operation + * + * \return A Tensor whose op member is the sparsereshape operation + */ + Review comment: Done. ## File path: python/tvm/relay/op/transform.py ## @@ -1320,3 +1320,47 @@ def adv_index(inputs): Output tensor. """ return _make.adv_index(Tuple(inputs)) + + +def sparsereshape(sparse_indices, sparse_values, prev_shape, new_shape): +""" +Reshape a Sparse Tensor + +Parameters +-- +inputs : List[relay.Expr] +Input tensor and indices. +The first tensor is input data and rests are indices. + +Returns +--- +result: relay.Expr +Output tensor. +Examples + Review comment: Done. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 edited a comment on pull request #7126: Sparse fill empty rows op
codeislife99 edited a comment on pull request #7126: URL: https://github.com/apache/tvm/pull/7126#issuecomment-748921727 cc : @trevor-m @comaniac @zhiics PTAL This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] codeislife99 commented on pull request #7126: Sparse fill empty rows op
codeislife99 commented on pull request #7126: URL: https://github.com/apache/tvm/pull/7126#issuecomment-748921727 cc : @trevor-m @comaniac @zhiics This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] FrozenGene commented on a change in pull request #7132: [Auto Scheduler] Mali Support
FrozenGene commented on a change in pull request #7132: URL: https://github.com/apache/tvm/pull/7132#discussion_r546625878 ## File path: python/tvm/topi/nn/conv2d.py ## @@ -382,7 +382,15 @@ def conv2d_nhwc( if auto_scheduler_rewritten_layout: # Infer shape for the rewritten layout # todo(merrymercy): wrap this with a more general interface. -if len(Filter.shape) >= 10: +if len(Filter.shape) == 17: Review comment: Yes, I will supply some comment about why we get this. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] FrozenGene commented on a change in pull request #7132: [Auto Scheduler] Mali Support
FrozenGene commented on a change in pull request #7132: URL: https://github.com/apache/tvm/pull/7132#discussion_r546625280 ## File path: src/auto_scheduler/search_task.cc ## @@ -90,6 +90,22 @@ HardwareParams HardwareParamsNode::GetDefaultHardwareParams(const Target& target int max_vthread_extent = warp_size / 4; return HardwareParams(-1, 16, 64, max_shared_memory_per_block, max_local_memory_per_block, max_threads_per_block, max_vthread_extent, warp_size); + } else if (target->kind->device_type == kDLOpenCL) { +if (target->GetAttr("device", "") == "mali") { + // We can not use device api to get attr like CUDA + // because like Mali target is normally on the remote machine + int max_shared_memory_per_block = 32768; + int max_local_memory_per_block = INT32_MAX; // skip the check on local memory + int max_threads_per_block = 256; + int warp_size = 1; + int max_vthread_extent = 1; + return HardwareParams(-1, 16, 64, max_shared_memory_per_block, max_local_memory_per_block, +max_threads_per_block, max_vthread_extent, warp_size); Review comment: This is the default target parameter for mali. I will write how to get these parameter and pass them to `SearchTask` in the tutorial of Mali so that you could overwrite default parameter if you have need. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] FrozenGene commented on a change in pull request #7132: [Auto Scheduler] Mali Support
FrozenGene commented on a change in pull request #7132: URL: https://github.com/apache/tvm/pull/7132#discussion_r546625280 ## File path: src/auto_scheduler/search_task.cc ## @@ -90,6 +90,22 @@ HardwareParams HardwareParamsNode::GetDefaultHardwareParams(const Target& target int max_vthread_extent = warp_size / 4; return HardwareParams(-1, 16, 64, max_shared_memory_per_block, max_local_memory_per_block, max_threads_per_block, max_vthread_extent, warp_size); + } else if (target->kind->device_type == kDLOpenCL) { +if (target->GetAttr("device", "") == "mali") { + // We can not use device api to get attr like CUDA + // because like Mali target is normally on the remote machine + int max_shared_memory_per_block = 32768; + int max_local_memory_per_block = INT32_MAX; // skip the check on local memory + int max_threads_per_block = 256; + int warp_size = 1; + int max_vthread_extent = 1; + return HardwareParams(-1, 16, 64, max_shared_memory_per_block, max_local_memory_per_block, +max_threads_per_block, max_vthread_extent, warp_size); Review comment: This is the default target parameter for mali. I will write how to get these parameter and pass them to `SearchTask` in the tutorial of Mali. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] FrozenGene commented on a change in pull request #7132: [Auto Scheduler] Mali Support
FrozenGene commented on a change in pull request #7132: URL: https://github.com/apache/tvm/pull/7132#discussion_r546624511 ## File path: python/tvm/topi/mali/conv2d.py ## @@ -545,6 +577,19 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): return None +@conv2d_winograd_nhwc.register(["mali"]) +def conv2d_winograd_nhwc_cuda( Review comment: Yes. Thanks! New commit will resolve this! This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] domin1985 opened a new pull request #7140: Fix a few OpNode argument field descriptions when registered
domin1985 opened a new pull request #7140: URL: https://github.com/apache/tvm/pull/7140 Fix a few missed OpNode argument field descriptions when registered. @mbrookhart @junrushao1994 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] giuseros commented on a change in pull request #7132: [Auto Scheduler] Mali Support
giuseros commented on a change in pull request #7132: URL: https://github.com/apache/tvm/pull/7132#discussion_r546591992 ## File path: python/tvm/topi/nn/conv2d.py ## @@ -382,7 +382,15 @@ def conv2d_nhwc( if auto_scheduler_rewritten_layout: # Infer shape for the rewritten layout # todo(merrymercy): wrap this with a more general interface. -if len(Filter.shape) >= 10: +if len(Filter.shape) == 17: Review comment: Could you maybe add some comments about the constants? ## File path: python/tvm/topi/mali/conv2d.py ## @@ -545,6 +577,19 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): return None +@conv2d_winograd_nhwc.register(["mali"]) +def conv2d_winograd_nhwc_cuda( Review comment: Should this be called `conv2d_winograd_nhwc_mali`? ## File path: src/auto_scheduler/search_task.cc ## @@ -90,6 +90,22 @@ HardwareParams HardwareParamsNode::GetDefaultHardwareParams(const Target& target int max_vthread_extent = warp_size / 4; return HardwareParams(-1, 16, 64, max_shared_memory_per_block, max_local_memory_per_block, max_threads_per_block, max_vthread_extent, warp_size); + } else if (target->kind->device_type == kDLOpenCL) { +if (target->GetAttr("device", "") == "mali") { + // We can not use device api to get attr like CUDA + // because like Mali target is normally on the remote machine + int max_shared_memory_per_block = 32768; + int max_local_memory_per_block = INT32_MAX; // skip the check on local memory + int max_threads_per_block = 256; + int warp_size = 1; + int max_vthread_extent = 1; + return HardwareParams(-1, 16, 64, max_shared_memory_per_block, max_local_memory_per_block, +max_threads_per_block, max_vthread_extent, warp_size); Review comment: Is this referring to a specific target? Either way, could add a bit more explanation about the constants? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org
[GitHub] [tvm] masahi opened a new issue #7139: [TEST][FLAKY] test_topi_math.py:test_ewise
masahi opened a new issue #7139: URL: https://github.com/apache/tvm/issues/7139 https://ci.tlcpack.ai/blue/organizations/jenkins/tvm/detail/PR-7136/8/pipeline This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org