[tvm] branch main updated (bd61d18 -> a374cdd)
This is an automated email from the ASF dual-hosted git repository. liangfu pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git. from bd61d18 [Relay] s/SEScope/VirtualDevice/g (#9759) add a374cdd [Runtime][Pipeline Executor] Add the map logic of global input and subgraph input. (#9751) No new revisions were added by this update. Summary of changes: python/tvm/contrib/pipeline_executor.py | 138 src/runtime/pipeline/pipeline_executor.cc| 25 +++- src/runtime/pipeline/pipeline_executor.h | 48 --- src/runtime/pipeline/pipeline_scheduler.cc | 2 +- src/runtime/pipeline/pipeline_scheduler.h| 3 +- src/runtime/pipeline/pipeline_struct.h | 181 --- tests/python/relay/test_pipeline_executor.py | 16 ++- 7 files changed, 306 insertions(+), 107 deletions(-)
[tvm] branch main updated (06fc788 -> 421dbf1)
This is an automated email from the ASF dual-hosted git repository. liangfu pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git. from 06fc788 [RISCV] Add support for llvm parameter -mabi (-target-abi) (#8860) add 421dbf1 [Community] @manupa-arm -> Committer (#8870) No new revisions were added by this update. Summary of changes: CONTRIBUTORS.md | 1 + 1 file changed, 1 insertion(+)
[tvm-vta] branch main updated: Chisel Pipelined GEMM (#30)
This is an automated email from the ASF dual-hosted git repository. liangfu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm-vta.git The following commit(s) were added to refs/heads/main by this push: new c18a224 Chisel Pipelined GEMM (#30) c18a224 is described below commit c18a22475d8ccb5447da4591c709d347687fb2b8 Author: Abhijit Davare AuthorDate: Tue Jun 22 18:03:09 2021 -0700 Chisel Pipelined GEMM (#30) * Reset to 644 file permissions * Add json files to src/test/resources for testing * Add new TensorGemmPipelinedSplit module and rename existing TensorGemm to TensorGemmOrig * Tests for TensorGemmPipelinedSplit, TensorGemmOrig, and associated submodules * Add jackson plugin dependency and stricter Scala checks * Remove debug prints * Rename x.json and y.json to gemm_1uop_overflow_offset.json and gemm_2uop_overflow_cascaded.json respectively * All occurrences of '\( ' replaced with '\(' * Add linting rule to flag spaces after lparen characters * Remove comment * Rename TensorGemmOrig to TensorGemmSimple --- hardware/chisel/build.sbt | 8 + hardware/chisel/scalastyle-config.xml | 5 + .../chisel/src/main/scala/core/TensorGemm.scala| 418 ++-- hardware/chisel/src/test/resources/.gitignore | 1 + .../test/resources/gemm_1uop_overflow_offset.json | 188 ++ .../resources/gemm_2uop_overflow_cascaded.json | 188 ++ .../chisel/src/test/scala/unittest/GemmTest.scala | 90 +++ .../chisel/src/test/scala/unittest/Generic.scala | 0 .../src/test/scala/unittest/TensorAluTest.scala| 0 .../test/scala/unittest/TensorGemmJsonTest.scala | 280 .../src/test/scala/unittest/TensorGemmTest.scala | 742 + 11 files changed, 1874 insertions(+), 46 deletions(-) diff --git a/hardware/chisel/build.sbt b/hardware/chisel/build.sbt index 7efd59d..851f5ab 100644 --- a/hardware/chisel/build.sbt +++ b/hardware/chisel/build.sbt @@ -68,5 +68,13 @@ val defaultVersions = Map( libraryDependencies ++= Seq("chisel3","chisel-iotesters").map { dep: String => "edu.berkeley.cs" %% dep % sys.props.getOrElse(dep + "Version", defaultVersions(dep)) } +libraryDependencies ++= Seq( + "com.fasterxml.jackson.core" % "jackson-databind" % "2.10.3", + "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.10.3" +) + +scalacOptions += "-language:reflectiveCalls" +scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature", "-Xfatal-warnings") + scalacOptions ++= scalacOptionsVersion(scalaVersion.value) javacOptions ++= javacOptionsVersion(scalaVersion.value) diff --git a/hardware/chisel/scalastyle-config.xml b/hardware/chisel/scalastyle-config.xml index 1252900..89196be 100644 --- a/hardware/chisel/scalastyle-config.xml +++ b/hardware/chisel/scalastyle-config.xml @@ -71,6 +71,11 @@ + + + LPAREN + + diff --git a/hardware/chisel/src/main/scala/core/TensorGemm.scala b/hardware/chisel/src/main/scala/core/TensorGemm.scala index e977552..f63de94 100644 --- a/hardware/chisel/src/main/scala/core/TensorGemm.scala +++ b/hardware/chisel/src/main/scala/core/TensorGemm.scala @@ -21,12 +21,11 @@ package vta.core import chisel3._ import chisel3.util._ -import chisel3.experimental._ import vta.util.config._ import scala.math.pow /** Pipelined multiply and accumulate */ -class MAC(aBits: Int = 8, bBits: Int = 8, cBits: Int = 16) extends Module { +class MAC(aBits: Int = 8, bBits: Int = 8, cBits: Int = 16, flopIn: Boolean = false) extends Module { val outBits = Math.max(aBits + bBits, cBits) + 1 val io = IO(new Bundle { val a = Input(SInt(aBits.W)) @@ -34,16 +33,15 @@ class MAC(aBits: Int = 8, bBits: Int = 8, cBits: Int = 16) extends Module { val c = Input(SInt(cBits.W)) val y = Output(SInt(outBits.W)) }) + val mult = Wire(SInt((aBits + bBits).W)) - val add = Wire(SInt(outBits.W)) - val rA = RegNext(io.a) - val rB = RegNext(io.b) - val rC = RegNext(io.c) + val rA = if (flopIn) RegNext(io.a) else io.a + val rB = if (flopIn) RegNext(io.b) else io.b + val rC = if (flopIn) RegNext(io.c) else io.c mult := rA * rB - add := rC +& mult - - io.y := add + val addV = if (flopIn) {rC +& mult} else {RegNext(rC +& mult)} + io.y := addV } /** PipeAdder @@ -86,28 +84,31 @@ class Adder(aBits: Int = 8, bBits: Int = 8) extends Module { } /** Pipelined DotProduct based on MAC and PipeAdder */ -class DotProduct(aBits: Int = 8, bBits: Int = 8, size: Int = 16) extends Module { +class DotProduct(aBits: Int = 8, bBits: Int = 8, blockIn: Int = 16) extends Module { val errorMsg = s"\n\n[VTA] [DotProduct] size must be greater
[tvm] branch main updated (86a8504 -> 25f0252)
This is an automated email from the ASF dual-hosted git repository. liangfu pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/tvm.git. from 86a8504 [Frontend][MXNet] add _npi_subtract_scalar (#7191) add 25f0252 Makes sure g_last_error is null terminated. (#7190) No new revisions were added by this update. Summary of changes: src/runtime/crt/common/crt_runtime_api.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-)
[tvm-vta] branch main updated: Enable Supported Xilinx target ZCU104 with Hardware Preset (#20)
This is an automated email from the ASF dual-hosted git repository. liangfu pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tvm-vta.git The following commit(s) were added to refs/heads/main by this push: new 57db5a7 Enable Supported Xilinx target ZCU104 with Hardware Preset (#20) 57db5a7 is described below commit 57db5a718c74a788c98120ebbe1230797be698c8 Author: Daniel Steger AuthorDate: Wed Dec 9 19:34:15 2020 -0800 Enable Supported Xilinx target ZCU104 with Hardware Preset (#20) * targets: Added zcu104 vta target This commit adds support for the Xilinx zcu104 development board. Currently, TVM-VTA does not support a production board thus this commit. Leveraging a supported board which is integrated into vivado provides board presets properly configuring peripherals and IO for development. This means that the project can be used for further bsp development using the xsa. * scripts: Enable applying board preset in vivado Enable fpga_board and fpga_board_ver properties for Xilinx Platforms. This enables the hardware project to produce a usable xsa which contains the board presets. The board preset contains board specific config such as IO/PS/DDR settings. The end goal of this commit is to allow the output products of TVM-VTA to be used in bsp creation and.. produce more meaningful output products. --- config/pkg_config.py | 23 +-- config/vta_config.py | 14 ++ config/zcu104_sample.json | 13 + hardware/xilinx/scripts/vivado.tcl | 9 + 4 files changed, 57 insertions(+), 2 deletions(-) diff --git a/config/pkg_config.py b/config/pkg_config.py index 2fe1c4c..a324f3e 100644 --- a/config/pkg_config.py +++ b/config/pkg_config.py @@ -91,7 +91,7 @@ class PkgConfig(object): # List of source files that can be used to build standalone library. self.lib_source = [] self.lib_source += glob.glob("%s/src/*.cc" % vta_hw_path) -if self.TARGET in ["pynq", "ultra96"]: +if self.TARGET in ["pynq", "ultra96", "zcu104"]: # add pynq drivers for any board that uses pynq driver stack (see pynq.io) self.lib_source += glob.glob("%s/src/pynq/*.cc" % vta_hw_path) elif self.TARGET in ["de10nano"]: @@ -102,7 +102,7 @@ class PkgConfig(object): ] # Linker flags -if self.TARGET in ["pynq", "ultra96"]: +if self.TARGET in ["pynq", "ultra96", "zcu104"]: self.ldflags = [ "-L/usr/lib", "-l:libcma.so"] @@ -152,6 +152,23 @@ class PkgConfig(object): elif self.TARGET == "ultra96": self.fpga_device = "xczu3eg-sbva484-1-e" self.fpga_family = "zynq-ultrascale+" +self.fpga_board = None +self.fpga_board_rev = None +self.fpga_freq = 333 +self.fpga_per = 2 +self.fpga_log_axi_bus_width = 7 +self.axi_prot_bits = '010' +# IP register address map +self.ip_reg_map_range = "0x1000" +self.fetch_base_addr = "0xA000" +self.load_base_addr = "0xA0001000" +self.compute_base_addr = "0xA0002000" +self.store_base_addr = "0xA0003000" +elif self.TARGET == "zcu104": +self.fpga_device = "xczu7ev-ffvc1156-2-e" +self.fpga_family = "zynq-ultrascale+" +self.fpga_board = "xilinx.com:zcu104:part0" +self.fpga_board_rev = "1.1" self.fpga_freq = 333 self.fpga_per = 2 self.fpga_log_axi_bus_width = 7 @@ -166,6 +183,8 @@ class PkgConfig(object): # By default, we use the pynq parameters self.fpga_device = "xc7z020clg484-1" self.fpga_family = "zynq-7000" +self.fpga_board = None +self.fpga_board_rev = None self.fpga_freq = 100 self.fpga_per = 7 self.fpga_log_axi_bus_width = 6 diff --git a/config/vta_config.py b/config/vta_config.py index 6396ae5..84bba62 100644 --- a/config/vta_config.py +++ b/config/vta_config.py @@ -40,6 +40,8 @@ def gen_target_name(pkg): return "VTA_TARGET_DE10_NANO" elif pkg.TARGET == "ultra96": return "VTA_TARGET_ULTRA96" +elif pkg.TARGET == "zcu104": +return "VTA_TARGET_ZCU104" else: return None @@ -70,6 +72,8 @@ def gen_tcl_vivado(pkg, file): fo.write("\nconst TARGET {}".format(p
[incubator-tvm] branch main updated (9564925 -> c7ff885)
This is an automated email from the ASF dual-hosted git repository. liangfu pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git. from 9564925 [Relay][Frontend][Onnx] Allow A to B broadcasting of batch_matmul and reverse strided slice (#6681) add c7ff885 Add µTVM Zephyr support + QEMU regression test (#6603) No new revisions were added by this update. Summary of changes: include/tvm/runtime/crt/error_codes.h | 1 + include/tvm/runtime/crt/utvm_rpc_server.h | 24 +- python/tvm/exec/rpc_server.py | 69 --- python/tvm/micro/__init__.py | 2 +- python/tvm/micro/artifact.py | 108 +++- .../micro_kernel => micro/contrib}/__init__.py | 0 python/tvm/micro/contrib/base.py | 67 +++ python/tvm/micro/contrib/zephyr.py | 621 + python/tvm/micro/debugger.py | 25 +- python/tvm/micro/micro_binary.py | 15 +- python/tvm/micro/micro_library.py | 13 +- python/tvm/micro/session.py| 50 +- python/tvm/micro/transport.py | 238 .../graph_tuner => micro/transport}/__init__.py| 15 +- python/tvm/micro/transport/base.py | 299 ++ python/tvm/micro/transport/debug.py| 63 +++ python/tvm/micro/transport/file_descriptor.py | 105 python/tvm/micro/transport/subprocess.py | 67 +++ python/tvm/micro/transport/wakeup.py | 74 +++ src/runtime/crt/host/main.cc | 19 +- src/runtime/crt/utvm_rpc_server/rpc_server.cc | 50 +- src/runtime/micro/micro_session.cc | 136 - tests/lint/check_file_type.py | 3 + tests/micro/qemu/.gitignore| 2 + tests/micro/qemu/test_zephyr.py| 143 + tests/micro/qemu/zephyr-runtime/.gitignore | 3 + tests/micro/qemu/zephyr-runtime/CMakeLists.txt | 27 + .../micro/qemu/zephyr-runtime/crt/crt_config.h | 22 +- .../qemu/zephyr-runtime/prj.conf} | 21 +- .../zephyr-runtime/qemu-hack/qemu-system-i386} | 26 +- .../micro/qemu/zephyr-runtime/sample.yaml | 12 +- tests/micro/qemu/zephyr-runtime/src/main.c | 238 tests/python/unittest/test_crt.py | 3 +- tests/python/unittest/test_micro_artifact.py | 137 + tests/scripts/task_python_microtvm.sh | 9 + 35 files changed, 2244 insertions(+), 463 deletions(-) copy python/tvm/{topi/arm_cpu/cortex_m7/micro_kernel => micro/contrib}/__init__.py (100%) create mode 100644 python/tvm/micro/contrib/base.py create mode 100644 python/tvm/micro/contrib/zephyr.py delete mode 100644 python/tvm/micro/transport.py copy python/tvm/{autotvm/graph_tuner => micro/transport}/__init__.py (69%) create mode 100644 python/tvm/micro/transport/base.py create mode 100644 python/tvm/micro/transport/debug.py create mode 100644 python/tvm/micro/transport/file_descriptor.py create mode 100644 python/tvm/micro/transport/subprocess.py create mode 100644 python/tvm/micro/transport/wakeup.py create mode 100644 tests/micro/qemu/.gitignore create mode 100644 tests/micro/qemu/test_zephyr.py create mode 100644 tests/micro/qemu/zephyr-runtime/.gitignore create mode 100644 tests/micro/qemu/zephyr-runtime/CMakeLists.txt copy src/runtime/crt/crt_config-template.h => tests/micro/qemu/zephyr-runtime/crt/crt_config.h (77%) copy tests/{scripts/task_python_ethosn_tests.sh => micro/qemu/zephyr-runtime/prj.conf} (74%) mode change 100755 => 100644 copy tests/{lint/cppdocs.sh => micro/qemu/zephyr-runtime/qemu-hack/qemu-system-i386} (64%) copy conda/conda_build_config.yaml => tests/micro/qemu/zephyr-runtime/sample.yaml (88%) create mode 100644 tests/micro/qemu/zephyr-runtime/src/main.c create mode 100644 tests/python/unittest/test_micro_artifact.py
[incubator-tvm] branch master updated: [VTA][Xilinx] Update to Vivado 2020.1 and Pynq 2.5 (#6402)
This is an automated email from the ASF dual-hosted git repository. liangfu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git The following commit(s) were added to refs/heads/master by this push: new 3451ccb [VTA][Xilinx] Update to Vivado 2020.1 and Pynq 2.5 (#6402) 3451ccb is described below commit 3451ccb14384c2d98738efa6ff9b073f74d87ad2 Author: Thierry Moreau AuthorDate: Sat Sep 5 03:54:59 2020 -0700 [VTA][Xilinx] Update to Vivado 2020.1 and Pynq 2.5 (#6402) * vivado version update * update docs --- 3rdparty/vta-hw | 2 +- docs/vta/install.rst | 24 +--- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/3rdparty/vta-hw b/3rdparty/vta-hw index db65157..9b178fd 16 --- a/3rdparty/vta-hw +++ b/3rdparty/vta-hw @@ -1 +1 @@ -Subproject commit db65157208ec8fabb7b548c94596211b9db04190 +Subproject commit 9b178fdb387bffc708f2448a82e85b4737239aed diff --git a/docs/vta/install.rst b/docs/vta/install.rst index fe6b468..e47f84d 100644 --- a/docs/vta/install.rst +++ b/docs/vta/install.rst @@ -114,8 +114,8 @@ Setup your Pynq board based on the `Pynq board getting started tutorial http://www.pynq.io/board.html>`_ (released February 22rd 2019), and have imaged your SD card with it (we recommend the free `Etcher <https://etcher.io/>`_ program). -* For this test setup, follow the `"Connect to a Computer" <http://pynq.readthedocs.io/en/latest/getting_started.html#connect-to-a-computer>`_ Ethernet setup instructions. To be able to talk to the board, make sure to `assign your computer a static IP address <http://pynq.readthedocs.io/en/latest/appendix.html#assign-your-computer-a-static-ip>`_ +* Make sure that you've downloaded the latest Pynq image, `PYNQ-Z1 v2.5 <http://www.pynq.io/board.html>`_, and have imaged your SD card with it (we recommend the free `Etcher <https://etcher.io/>`_ program). +* For this test setup, follow the `"Connect to a Computer" <https://pynq.readthedocs.io/en/latest/getting_started/pynq_z1_setup.html>`_ Ethernet setup instructions. To be able to talk to the board, make sure to `assign your computer a static IP address <https://pynq.readthedocs.io/en/latest/appendix.html#assign-your-computer-a-static-ip>`_ Once the board is powered on and connected to your development machine, try connecting to it to make sure you've properly set up your Pynq board: @@ -156,6 +156,8 @@ The build process should take roughly 5 minutes. cd build cmake .. make runtime vta -j2 + # FIXME (tmoreau89): remove this step by fixing the cmake build + make clean; make runtime vta -j2 # Build VTA RPC server (takes 1 min) cd .. sudo ./apps/vta_rpc/start_rpc_server.sh # pw is 'xilinx' @@ -310,33 +312,33 @@ If you're interested in generating the Xilinx FPGA bitstream on your own instead Xilinx Toolchain Installation ^ -We recommend using Vivado 2018.3 since our scripts have been tested to work on this version of the Xilinx toolchains. +We recommend using Vivado 2020.1 since our scripts have been tested to work on this version of the Xilinx toolchains. Our guide is written for Linux (Ubuntu) installation. -You’ll need to install Xilinx’ FPGA compilation toolchain, `Vivado HL WebPACK 2018.3 <https://www.xilinx.com/products/design-tools/vivado.html>`_, which a license-free version of the Vivado HLx toolchain. +You’ll need to install Xilinx’ FPGA compilation toolchain, `Vivado HL WebPACK 2020.1 <https://www.xilinx.com/products/design-tools/vivado.html>`_, which a license-free version of the Vivado HLx toolchain. Obtaining and Launching the Vivado GUI Installer """""""""""""""""""""""""""""""""""""""""""""""" -1. Go to the `download webpage <https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vivado-design-tools/2018-3.html>`_, and download the Linux Self Extracting Web Installer for Vivado HLx 2018.3: WebPACK and Editions. +1. Go to the `download webpage <https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vivado-design-tools/2020-1.html>`_, and download the Linux Self Extracting Web Installer for Vivado HLx 2020.1: WebPACK and Editions. 2. You’ll have to sign in with a Xilinx account. This requires a Xilinx account creation that will take 2 minutes. -3. Complete the Name and Address Verification by clicking “Next”, and you will get the opportunity to download a binary file, called ``Xilinx_Vivado_SDK_Web_2018.3_1207_2324_Lin64.bin``. +3. Complete the Name and Address Verification by clic
[incubator-tvm] branch master updated (9e1fe82 -> c815d28)
This is an automated email from the ASF dual-hosted git repository. liangfu pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git. from 9e1fe82 [BYOC][JSON] json_node.h should include data_type.h (#6224) add c815d28 [uTVM] fix crt building and running error (#6231) No new revisions were added by this update. Summary of changes: include/tvm/runtime/crt/module.h| 7 +++ src/support/str_escape.h| 4 +++- src/target/source/codegen_c_host.cc | 7 --- 3 files changed, 14 insertions(+), 4 deletions(-)
[incubator-tvm] branch master updated (dff715a -> 3fda73f)
This is an automated email from the ASF dual-hosted git repository. liangfu pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git. from dff715a Fix error message in Buffer::vstore, NFC (#6056) add 3fda73f [RUNTIME][CRT] init TVMPackedFunc's name (#6044) No new revisions were added by this update. Summary of changes: src/runtime/crt/common/packed_func.c | 3 +++ 1 file changed, 3 insertions(+)
[incubator-tvm] branch master updated: µTVM CRT modifications for on-device RPC server (#5921)
This is an automated email from the ASF dual-hosted git repository. liangfu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git The following commit(s) were added to refs/heads/master by this push: new d6ceba0 µTVM CRT modifications for on-device RPC server (#5921) d6ceba0 is described below commit d6ceba044b2427d493575c26749164aef2efaf30 Author: Andrew Reusch AuthorDate: Sun Jul 12 02:28:31 2020 -0700 µTVM CRT modifications for on-device RPC server (#5921) * Reorganize CRT into parts, public API, and add standalone build. * Create a make-based build in src/runtime/crt. This is intended to be built in build/standalone_crt (generated by running ninja standalone_crt in build/). Its job is to build CRT without depending on headers not explicitly allowed in CRT. * Create a "public-facing" CRT API targeted to firmware running alongside CRT in include/tvm/runtime/crt. Developers who are integrating the CRT are the target of this API. * Reorganize CRT internally into common/ and graph_runtime/ pieces. Build each pieces as a separate statically-linked library. * Slim down TVMGraphRuntime public-facing API to just the functions that are used externally. * Updates to apps/bundle_deploy to make this work. * Add TVMFuncRegistry, CRT test infrastructure, and tests. * Also add error_codes.h, a file containing error codes returned by CRT. * Add TVMErrorf() * [API_CHANGE] Integrate func registry into CRT. * NOTE: This changes the default API for functions exposed under the CRT by the TVMFuncCall API. `resource_handle` is now always given as a new 6th parameter. * `resource_handle` is NULL when invoked on a global function and a pointer to the module owning the function otherwise. * Generalize arena-based memory manager. * lint * Fix git-clang-format arg parsing * add apache header * add mutable func registry tests * git-clang-format * fix more lint * Move memory_test to crttests. * fix tests * checkpoint * checkpoint * bundle_deploy demo_static works * rm debug printf * git-clang-format * fix lint * add asf header * pylint * update build configs for jenkins * make regression compiler happy * fix build errors in regression GCC * address comments * git-clang-format * fix for 32-bit cpp regression * fix incorrect use of memcpy and tests for 32-bit * clang-format --- CMakeLists.txt | 1 + Makefile | 3 + apps/bundle_deploy/Makefile| 48 ++- apps/bundle_deploy/build_model.py | 18 +- apps/bundle_deploy/bundle.c| 2 + apps/bundle_deploy/bundle_static.c | 63 ++-- .../{runtime.c => crt_config/crt_config.h} | 26 +- cmake/config.cmake | 3 + cmake/modules/StandaloneCrt.cmake | 151 ++ include/tvm/runtime/c_backend_api.h| 4 +- .../runtime.cc => include/tvm/runtime/crt/crt.h| 42 +-- include/tvm/runtime/crt/error_codes.h | 55 include/tvm/runtime/crt/func_registry.h| 137 + include/tvm/runtime/crt/graph_runtime.h| 115 +++ include/tvm/runtime/crt/memory.h | 12 +- {src => include/tvm}/runtime/crt/module.h | 22 +- include/tvm/runtime/crt/packed_func.h | 78 + .../tvm/runtime/crt/platform.h | 49 ++- python/tvm/micro/func_registry.py | 76 + src/runtime/crt/.gitignore | 1 + src/runtime/crt/Makefile | 57 src/runtime/crt/{ => common}/crt_backend_api.c | 11 +- src/runtime/crt/common/crt_runtime_api.c | 335 + src/runtime/crt/common/func_registry.c | 152 ++ src/runtime/crt/{ => common}/memory.c | 275 +++-- src/runtime/crt/{ => common}/ndarray.c | 7 +- .../crt/{packed_func.h => common/packed_func.c}| 104 +++ src/runtime/crt/crt_runtime_api.c | 97 -- src/runtime/crt/graph_runtime.h| 215 - .../crt/{ => graph_runtime}/graph_runtime.c| 62 ++-- src/runtime/crt/{ => graph_runtime}/load_json.c| 7 +- .../runtime.c => src/runtime/crt/host/crt_config.h | 28 +- .../runtime/crt/internal/common/func_registry.h} | 36 +-- .../tvm/runtime/crt/internal/common}/logging.h | 8 +- .../tvm/runtime/crt/internal/common/memory.h
[incubator-tvm] branch v0.6 updated: [BACKPORT-0.6][Bugfix][VTA] Enable streamlined GEMM execution (#5893)
This is an automated email from the ASF dual-hosted git repository. liangfu pushed a commit to branch v0.6 in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git The following commit(s) were added to refs/heads/v0.6 by this push: new 6ae8939 [BACKPORT-0.6][Bugfix][VTA] Enable streamlined GEMM execution (#5893) 6ae8939 is described below commit 6ae89396130d0207ff7182a11ae769165cbc5564 Author: Liangfu Chen AuthorDate: Tue Jun 23 14:32:27 2020 +0800 [BACKPORT-0.6][Bugfix][VTA] Enable streamlined GEMM execution (#5893) * [BACKPORT-0.6][Bugfix][VTA] Enable streamlined GEMM execution * [BACKPORT-0.6][Bugfix][VTA] Fix an issue in updating uop_idx in the TensorGemm module --- .../chisel/src/main/scala/core/TensorGemm.scala| 61 -- 1 file changed, 46 insertions(+), 15 deletions(-) diff --git a/vta/hardware/chisel/src/main/scala/core/TensorGemm.scala b/vta/hardware/chisel/src/main/scala/core/TensorGemm.scala index 3f5f387..f982176 100644 --- a/vta/hardware/chisel/src/main/scala/core/TensorGemm.scala +++ b/vta/hardware/chisel/src/main/scala/core/TensorGemm.scala @@ -46,7 +46,10 @@ class MAC(aBits: Int = 8, bBits: Int = 8, cBits: Int = 16) extends Module { io.y := add } -/** Pipelined adder */ +/** PipeAdder + * + * This unit loads input bits into register and performs addition in the next cycle + */ class PipeAdder(aBits: Int = 8, bBits: Int = 8) extends Module { val outBits = Math.max(aBits, bBits) + 1 val io = IO(new Bundle { @@ -61,6 +64,27 @@ class PipeAdder(aBits: Int = 8, bBits: Int = 8) extends Module { io.y := add } +/** Adder + * + * This unit wires input bits to an adder directly. + * The output comes out of combinational logic without waiting for another cycle. + */ +class Adder(aBits: Int = 8, bBits: Int = 8) extends Module { + val outBits = Math.max(aBits, bBits) + 1 + val io = IO(new Bundle { +val a = Input(SInt(aBits.W)) +val b = Input(SInt(bBits.W)) +val y = Output(SInt(outBits.W)) + }) + val add = Wire(SInt(outBits.W)) + val rA = Wire(SInt(aBits.W)) + val rB = Wire(SInt(bBits.W)) + rA := io.a + rB := io.b + add := rA +& rB + io.y := add +} + /** Pipelined DotProduct based on MAC and PipeAdder */ class DotProduct(aBits: Int = 8, bBits: Int = 8, size: Int = 16) extends Module { @@ -80,9 +104,11 @@ class DotProduct(aBits: Int = 8, bBits: Int = 8, size: Int = 16) val m = Seq.fill(s(0))(Module(new MAC(aBits, bBits, cBits = 1))) // # of total vector pairs val a = Seq.tabulate(p)( i => - Seq.fill(s(i + 1))(Module(new PipeAdder( -aBits = (b + i + 1), -bBits = (b + i + 1) // # adders within each layer + Seq.fill(s(i + 1))( +if (i == 0) + Module(new PipeAdder(aBits = (b + i + 1), bBits = (b + i + 1))) +else + Module(new Adder(aBits = (b + i + 1), bBits = (b + i + 1) // # adders within each layer // Vector MACs for (i <- 0 until s(0)) { @@ -126,8 +152,9 @@ class MatrixVectorMultiplication(implicit p: Parameters) extends Module { }) val dot = Seq.fill(size)( Module(new DotProduct(aBits = inpBits, bBits = wgtBits, size))) - val acc = Seq.fill(size)( -Module(new Pipe(UInt(accBits.W), latency = log2Ceil(size) + 1))) + // Latency is defined as two in the following, because there is one cycle in the MAC module, + // and another cycle in the pipelined adders as the first layer of the accumulator + val acc = Seq.fill(size)(Module(new Pipe(UInt(accBits.W), latency = 2))) val add = Seq.fill(size)(Wire(SInt(accBits.W))) val vld = Wire(Vec(size, Bool())) @@ -188,7 +215,9 @@ class TensorGemm(debug: Boolean = false)(implicit p: Parameters) val wgt_i = Reg(chiselTypeOf(dec.uop_end)) val pBits = log2Ceil(p(CoreKey).blockOut) + 1 val inflight = Reg(UInt(pBits.W)) - val wrpipe = Module(new Pipe(chiselTypeOf(dec.uop_end), latency = pBits)) + // Latency is defined as two in the following, because there is one cycle in the MAC module, + // and another cycle in the pipelined adders as the first layer of the accumulator + val wrpipe = Module(new Pipe(chiselTypeOf(dec.uop_end), latency = 2)) val done = inflight === 0.U & ((state === sExe & cnt_o === dec.lp_0 - 1.U & @@ -236,11 +265,14 @@ class TensorGemm(debug: Boolean = false)(implicit p: Parameters) when(state === sIdle) { inflight := 0.U }.elsewhen(!dec.reset) { -when(state === sReadTensor) { // issue a tensor - inflight := inflight + 1.U -}.elsewhen(mvc.io.acc_o.data.valid) { // commit a tensor - inflight := inflight - 1.U -} +when((state === sReadTensor) && mvc.io.acc_o.data.valid) { // issue & commit + inflight := inflight +}.elsewhen(state === sReadTensor) { // issue a tensor +inflight := inflight + 1.U + } + .elsewhen(mvc.io.acc_o.data.valid) { // commit a tensor +inflight := infl
[incubator-tvm] branch master updated: [CI] Update the ci-gpu to the lastest build with the new vulkansdk. (#5571)
This is an automated email from the ASF dual-hosted git repository. liangfu pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git The following commit(s) were added to refs/heads/master by this push: new 97b5b78 [CI] Update the ci-gpu to the lastest build with the new vulkansdk. (#5571) 97b5b78 is described below commit 97b5b7850c6fd675b875ca5bf2f3d26dac0ef3bb Author: Tianqi Chen AuthorDate: Tue May 12 02:41:04 2020 -0700 [CI] Update the ci-gpu to the lastest build with the new vulkansdk. (#5571) --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 9280740..60ee142 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -44,7 +44,7 @@ // ci_lint = "tvmai/ci-lint:v0.61" -ci_gpu = "tvmai/ci-gpu:v0.63" +ci_gpu = "tvmai/ci-gpu:v0.64" ci_cpu = "tvmai/ci-cpu:v0.62" ci_wasm = "tvmai/ci-wasm:v0.60" ci_i386 = "tvmai/ci-i386:v0.52"