[tvm] branch main updated (bd61d18 -> a374cdd)

2021-12-17 Thread liangfu
This is an automated email from the ASF dual-hosted git repository.

liangfu pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git.


from bd61d18  [Relay] s/SEScope/VirtualDevice/g (#9759)
 add a374cdd  [Runtime][Pipeline Executor] Add the map logic of global 
input and subgraph input. (#9751)

No new revisions were added by this update.

Summary of changes:
 python/tvm/contrib/pipeline_executor.py  | 138 
 src/runtime/pipeline/pipeline_executor.cc|  25 +++-
 src/runtime/pipeline/pipeline_executor.h |  48 ---
 src/runtime/pipeline/pipeline_scheduler.cc   |   2 +-
 src/runtime/pipeline/pipeline_scheduler.h|   3 +-
 src/runtime/pipeline/pipeline_struct.h   | 181 ---
 tests/python/relay/test_pipeline_executor.py |  16 ++-
 7 files changed, 306 insertions(+), 107 deletions(-)


[tvm] branch main updated (06fc788 -> 421dbf1)

2021-08-29 Thread liangfu
This is an automated email from the ASF dual-hosted git repository.

liangfu pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git.


from 06fc788  [RISCV] Add support for llvm parameter -mabi (-target-abi) 
(#8860)
 add 421dbf1  [Community] @manupa-arm -> Committer (#8870)

No new revisions were added by this update.

Summary of changes:
 CONTRIBUTORS.md | 1 +
 1 file changed, 1 insertion(+)


[tvm-vta] branch main updated: Chisel Pipelined GEMM (#30)

2021-06-22 Thread liangfu
This is an automated email from the ASF dual-hosted git repository.

liangfu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm-vta.git


The following commit(s) were added to refs/heads/main by this push:
 new c18a224  Chisel Pipelined GEMM (#30)
c18a224 is described below

commit c18a22475d8ccb5447da4591c709d347687fb2b8
Author: Abhijit Davare 
AuthorDate: Tue Jun 22 18:03:09 2021 -0700

Chisel Pipelined GEMM (#30)

* Reset to 644 file permissions

* Add json files to src/test/resources for testing

* Add new TensorGemmPipelinedSplit module and rename existing TensorGemm to 
TensorGemmOrig

* Tests for TensorGemmPipelinedSplit, TensorGemmOrig, and associated 
submodules

* Add jackson plugin dependency and stricter Scala checks

* Remove debug prints

* Rename x.json and y.json to gemm_1uop_overflow_offset.json and 
gemm_2uop_overflow_cascaded.json respectively

* All occurrences of '\( ' replaced with '\('

* Add linting rule to flag spaces after lparen characters

* Remove comment

* Rename TensorGemmOrig to TensorGemmSimple
---
 hardware/chisel/build.sbt  |   8 +
 hardware/chisel/scalastyle-config.xml  |   5 +
 .../chisel/src/main/scala/core/TensorGemm.scala| 418 ++--
 hardware/chisel/src/test/resources/.gitignore  |   1 +
 .../test/resources/gemm_1uop_overflow_offset.json  | 188 ++
 .../resources/gemm_2uop_overflow_cascaded.json | 188 ++
 .../chisel/src/test/scala/unittest/GemmTest.scala  |  90 +++
 .../chisel/src/test/scala/unittest/Generic.scala   |   0
 .../src/test/scala/unittest/TensorAluTest.scala|   0
 .../test/scala/unittest/TensorGemmJsonTest.scala   | 280 
 .../src/test/scala/unittest/TensorGemmTest.scala   | 742 +
 11 files changed, 1874 insertions(+), 46 deletions(-)

diff --git a/hardware/chisel/build.sbt b/hardware/chisel/build.sbt
index 7efd59d..851f5ab 100644
--- a/hardware/chisel/build.sbt
+++ b/hardware/chisel/build.sbt
@@ -68,5 +68,13 @@ val defaultVersions = Map(
 libraryDependencies ++= Seq("chisel3","chisel-iotesters").map {
   dep: String => "edu.berkeley.cs" %% dep % sys.props.getOrElse(dep + 
"Version", defaultVersions(dep)) }
 
+libraryDependencies ++= Seq(
+  "com.fasterxml.jackson.core" % "jackson-databind" % "2.10.3",
+  "com.fasterxml.jackson.module" %% "jackson-module-scala" % "2.10.3"
+)
+
+scalacOptions += "-language:reflectiveCalls"
+scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature", 
"-Xfatal-warnings")
+
 scalacOptions ++= scalacOptionsVersion(scalaVersion.value)
 javacOptions ++= javacOptionsVersion(scalaVersion.value)
diff --git a/hardware/chisel/scalastyle-config.xml 
b/hardware/chisel/scalastyle-config.xml
index 1252900..89196be 100644
--- a/hardware/chisel/scalastyle-config.xml
+++ b/hardware/chisel/scalastyle-config.xml
@@ -71,6 +71,11 @@
  
  
  
+ 
+  
+   LPAREN
+  
+ 
  
  
  
diff --git a/hardware/chisel/src/main/scala/core/TensorGemm.scala 
b/hardware/chisel/src/main/scala/core/TensorGemm.scala
index e977552..f63de94 100644
--- a/hardware/chisel/src/main/scala/core/TensorGemm.scala
+++ b/hardware/chisel/src/main/scala/core/TensorGemm.scala
@@ -21,12 +21,11 @@ package vta.core
 
 import chisel3._
 import chisel3.util._
-import chisel3.experimental._
 import vta.util.config._
 import scala.math.pow
 
 /** Pipelined multiply and accumulate */
-class MAC(aBits: Int = 8, bBits: Int = 8, cBits: Int = 16) extends Module {
+class MAC(aBits: Int = 8, bBits: Int = 8, cBits: Int = 16, flopIn: Boolean = 
false) extends Module {
   val outBits = Math.max(aBits + bBits, cBits) + 1
   val io = IO(new Bundle {
 val a = Input(SInt(aBits.W))
@@ -34,16 +33,15 @@ class MAC(aBits: Int = 8, bBits: Int = 8, cBits: Int = 16) 
extends Module {
 val c = Input(SInt(cBits.W))
 val y = Output(SInt(outBits.W))
   })
+
   val mult = Wire(SInt((aBits + bBits).W))
-  val add = Wire(SInt(outBits.W))
-  val rA = RegNext(io.a)
-  val rB = RegNext(io.b)
-  val rC = RegNext(io.c)
+  val rA = if (flopIn) RegNext(io.a) else io.a
+  val rB = if (flopIn) RegNext(io.b) else io.b
+  val rC = if (flopIn) RegNext(io.c) else io.c
 
   mult := rA * rB
-  add := rC +& mult
-
-  io.y := add
+  val addV = if (flopIn) {rC +& mult} else {RegNext(rC +& mult)}
+  io.y := addV
 }
 
 /** PipeAdder
@@ -86,28 +84,31 @@ class Adder(aBits: Int = 8, bBits: Int = 8) extends Module {
 }
 
 /** Pipelined DotProduct based on MAC and PipeAdder */
-class DotProduct(aBits: Int = 8, bBits: Int = 8, size: Int = 16) extends 
Module {
+class DotProduct(aBits: Int = 8, bBits: Int = 8, blockIn: Int = 16) extends 
Module {
   val errorMsg =
 s"\n\n[VTA] [DotProduct] size must be greater than 4 and a power o

[tvm] branch main updated (86a8504 -> 25f0252)

2021-01-04 Thread liangfu
This is an automated email from the ASF dual-hosted git repository.

liangfu pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git.


from 86a8504  [Frontend][MXNet] add _npi_subtract_scalar (#7191)
 add 25f0252  Makes sure g_last_error is null terminated. (#7190)

No new revisions were added by this update.

Summary of changes:
 src/runtime/crt/common/crt_runtime_api.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)



[tvm-vta] branch main updated: Enable Supported Xilinx target ZCU104 with Hardware Preset (#20)

2020-12-09 Thread liangfu
This is an automated email from the ASF dual-hosted git repository.

liangfu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm-vta.git


The following commit(s) were added to refs/heads/main by this push:
 new 57db5a7  Enable Supported Xilinx target ZCU104 with Hardware Preset 
(#20)
57db5a7 is described below

commit 57db5a718c74a788c98120ebbe1230797be698c8
Author: Daniel Steger 
AuthorDate: Wed Dec 9 19:34:15 2020 -0800

Enable Supported Xilinx target ZCU104 with Hardware Preset (#20)

* targets: Added zcu104 vta target

This commit adds support for the Xilinx zcu104 development board.
Currently, TVM-VTA does not support a production board thus this
commit. Leveraging a supported board which is integrated into vivado
provides board presets properly configuring peripherals and IO for
development. This means that the project can be used for further
bsp development using the xsa.

* scripts: Enable applying board preset in vivado

Enable fpga_board and fpga_board_ver properties for
Xilinx Platforms. This enables the hardware project to
produce a usable xsa which contains the board presets.
The board preset contains board specific config such as
IO/PS/DDR settings.

The end goal of this commit is to allow the output products
of TVM-VTA to be used in bsp creation and.. produce more
meaningful output products.
---
 config/pkg_config.py   | 23 +--
 config/vta_config.py   | 14 ++
 config/zcu104_sample.json  | 13 +
 hardware/xilinx/scripts/vivado.tcl |  9 +
 4 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/config/pkg_config.py b/config/pkg_config.py
index 2fe1c4c..a324f3e 100644
--- a/config/pkg_config.py
+++ b/config/pkg_config.py
@@ -91,7 +91,7 @@ class PkgConfig(object):
 # List of source files that can be used to build standalone library.
 self.lib_source = []
 self.lib_source += glob.glob("%s/src/*.cc" % vta_hw_path)
-if self.TARGET in ["pynq", "ultra96"]:
+if self.TARGET in ["pynq", "ultra96", "zcu104"]:
 # add pynq drivers for any board that uses pynq driver stack (see 
pynq.io)
 self.lib_source += glob.glob("%s/src/pynq/*.cc" % vta_hw_path)
 elif self.TARGET in ["de10nano"]:
@@ -102,7 +102,7 @@ class PkgConfig(object):
 ]
 
 # Linker flags
-if self.TARGET in ["pynq", "ultra96"]:
+if self.TARGET in ["pynq", "ultra96", "zcu104"]:
 self.ldflags = [
 "-L/usr/lib",
 "-l:libcma.so"]
@@ -152,6 +152,23 @@ class PkgConfig(object):
 elif self.TARGET == "ultra96":
 self.fpga_device = "xczu3eg-sbva484-1-e"
 self.fpga_family = "zynq-ultrascale+"
+self.fpga_board = None
+self.fpga_board_rev = None
+self.fpga_freq = 333
+self.fpga_per = 2
+self.fpga_log_axi_bus_width = 7
+self.axi_prot_bits = '010'
+# IP register address map
+self.ip_reg_map_range = "0x1000"
+self.fetch_base_addr = "0xA000"
+self.load_base_addr = "0xA0001000"
+self.compute_base_addr = "0xA0002000"
+self.store_base_addr = "0xA0003000"
+elif self.TARGET == "zcu104":
+self.fpga_device = "xczu7ev-ffvc1156-2-e"
+self.fpga_family = "zynq-ultrascale+"
+self.fpga_board = "xilinx.com:zcu104:part0"
+self.fpga_board_rev = "1.1"
 self.fpga_freq = 333
 self.fpga_per = 2
 self.fpga_log_axi_bus_width = 7
@@ -166,6 +183,8 @@ class PkgConfig(object):
 # By default, we use the pynq parameters
 self.fpga_device = "xc7z020clg484-1"
 self.fpga_family = "zynq-7000"
+self.fpga_board = None
+self.fpga_board_rev = None
 self.fpga_freq = 100
 self.fpga_per = 7
 self.fpga_log_axi_bus_width = 6
diff --git a/config/vta_config.py b/config/vta_config.py
index 6396ae5..84bba62 100644
--- a/config/vta_config.py
+++ b/config/vta_config.py
@@ -40,6 +40,8 @@ def gen_target_name(pkg):
 return "VTA_TARGET_DE10_NANO"
 elif pkg.TARGET == "ultra96":
 return "VTA_TARGET_ULTRA96"
+elif pkg.TARGET == "zcu104":
+return "VTA_TARGET_ZCU104"
 else:
 return None
 
@@ -70,6 +72,8 @@ def gen_tcl_vivado(pkg, file):
 fo.write("\nconst TARGET {}".format(pkg.TARGET)

[incubator-tvm] branch main updated (9564925 -> c7ff885)

2020-10-15 Thread liangfu
This is an automated email from the ASF dual-hosted git repository.

liangfu pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git.


from 9564925  [Relay][Frontend][Onnx] Allow A to B broadcasting of 
batch_matmul and reverse strided slice (#6681)
 add c7ff885  Add µTVM Zephyr support + QEMU regression test (#6603)

No new revisions were added by this update.

Summary of changes:
 include/tvm/runtime/crt/error_codes.h  |   1 +
 include/tvm/runtime/crt/utvm_rpc_server.h  |  24 +-
 python/tvm/exec/rpc_server.py  |  69 ---
 python/tvm/micro/__init__.py   |   2 +-
 python/tvm/micro/artifact.py   | 108 +++-
 .../micro_kernel => micro/contrib}/__init__.py |   0
 python/tvm/micro/contrib/base.py   |  67 +++
 python/tvm/micro/contrib/zephyr.py | 621 +
 python/tvm/micro/debugger.py   |  25 +-
 python/tvm/micro/micro_binary.py   |  15 +-
 python/tvm/micro/micro_library.py  |  13 +-
 python/tvm/micro/session.py|  50 +-
 python/tvm/micro/transport.py  | 238 
 .../graph_tuner => micro/transport}/__init__.py|  15 +-
 python/tvm/micro/transport/base.py | 299 ++
 python/tvm/micro/transport/debug.py|  63 +++
 python/tvm/micro/transport/file_descriptor.py  | 105 
 python/tvm/micro/transport/subprocess.py   |  67 +++
 python/tvm/micro/transport/wakeup.py   |  74 +++
 src/runtime/crt/host/main.cc   |  19 +-
 src/runtime/crt/utvm_rpc_server/rpc_server.cc  |  50 +-
 src/runtime/micro/micro_session.cc | 136 -
 tests/lint/check_file_type.py  |   3 +
 tests/micro/qemu/.gitignore|   2 +
 tests/micro/qemu/test_zephyr.py| 143 +
 tests/micro/qemu/zephyr-runtime/.gitignore |   3 +
 tests/micro/qemu/zephyr-runtime/CMakeLists.txt |  27 +
 .../micro/qemu/zephyr-runtime/crt/crt_config.h |  22 +-
 .../qemu/zephyr-runtime/prj.conf}  |  21 +-
 .../zephyr-runtime/qemu-hack/qemu-system-i386} |  26 +-
 .../micro/qemu/zephyr-runtime/sample.yaml  |  12 +-
 tests/micro/qemu/zephyr-runtime/src/main.c | 238 
 tests/python/unittest/test_crt.py  |   3 +-
 tests/python/unittest/test_micro_artifact.py   | 137 +
 tests/scripts/task_python_microtvm.sh  |   9 +
 35 files changed, 2244 insertions(+), 463 deletions(-)
 copy python/tvm/{topi/arm_cpu/cortex_m7/micro_kernel => 
micro/contrib}/__init__.py (100%)
 create mode 100644 python/tvm/micro/contrib/base.py
 create mode 100644 python/tvm/micro/contrib/zephyr.py
 delete mode 100644 python/tvm/micro/transport.py
 copy python/tvm/{autotvm/graph_tuner => micro/transport}/__init__.py (69%)
 create mode 100644 python/tvm/micro/transport/base.py
 create mode 100644 python/tvm/micro/transport/debug.py
 create mode 100644 python/tvm/micro/transport/file_descriptor.py
 create mode 100644 python/tvm/micro/transport/subprocess.py
 create mode 100644 python/tvm/micro/transport/wakeup.py
 create mode 100644 tests/micro/qemu/.gitignore
 create mode 100644 tests/micro/qemu/test_zephyr.py
 create mode 100644 tests/micro/qemu/zephyr-runtime/.gitignore
 create mode 100644 tests/micro/qemu/zephyr-runtime/CMakeLists.txt
 copy src/runtime/crt/crt_config-template.h => 
tests/micro/qemu/zephyr-runtime/crt/crt_config.h (77%)
 copy tests/{scripts/task_python_ethosn_tests.sh => 
micro/qemu/zephyr-runtime/prj.conf} (74%)
 mode change 100755 => 100644
 copy tests/{lint/cppdocs.sh => 
micro/qemu/zephyr-runtime/qemu-hack/qemu-system-i386} (64%)
 copy conda/conda_build_config.yaml => 
tests/micro/qemu/zephyr-runtime/sample.yaml (88%)
 create mode 100644 tests/micro/qemu/zephyr-runtime/src/main.c
 create mode 100644 tests/python/unittest/test_micro_artifact.py



[incubator-tvm] branch master updated: [VTA][Xilinx] Update to Vivado 2020.1 and Pynq 2.5 (#6402)

2020-09-05 Thread liangfu
This is an automated email from the ASF dual-hosted git repository.

liangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git


The following commit(s) were added to refs/heads/master by this push:
 new 3451ccb  [VTA][Xilinx] Update to Vivado 2020.1 and Pynq 2.5 (#6402)
3451ccb is described below

commit 3451ccb14384c2d98738efa6ff9b073f74d87ad2
Author: Thierry Moreau 
AuthorDate: Sat Sep 5 03:54:59 2020 -0700

[VTA][Xilinx] Update to Vivado 2020.1 and Pynq 2.5 (#6402)

* vivado version update

* update docs
---
 3rdparty/vta-hw  |  2 +-
 docs/vta/install.rst | 24 +---
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/3rdparty/vta-hw b/3rdparty/vta-hw
index db65157..9b178fd 16
--- a/3rdparty/vta-hw
+++ b/3rdparty/vta-hw
@@ -1 +1 @@
-Subproject commit db65157208ec8fabb7b548c94596211b9db04190
+Subproject commit 9b178fdb387bffc708f2448a82e85b4737239aed
diff --git a/docs/vta/install.rst b/docs/vta/install.rst
index fe6b468..e47f84d 100644
--- a/docs/vta/install.rst
+++ b/docs/vta/install.rst
@@ -114,8 +114,8 @@ Setup your Pynq board based on the `Pynq board getting 
started tutorial http://www.pynq.io/board.html>`_ (released February 22rd 2019), and have 
imaged your SD card with it (we recommend the free `Etcher 
<https://etcher.io/>`_ program).
-* For this test setup, follow the `"Connect to a Computer" 
<http://pynq.readthedocs.io/en/latest/getting_started.html#connect-to-a-computer>`_
 Ethernet setup instructions. To be able to talk to the board, make sure to 
`assign your computer a static IP address 
<http://pynq.readthedocs.io/en/latest/appendix.html#assign-your-computer-a-static-ip>`_
+* Make sure that you've downloaded the latest Pynq image, `PYNQ-Z1 v2.5 
<http://www.pynq.io/board.html>`_, and have imaged your SD card with it (we 
recommend the free `Etcher <https://etcher.io/>`_ program).
+* For this test setup, follow the `"Connect to a Computer" 
<https://pynq.readthedocs.io/en/latest/getting_started/pynq_z1_setup.html>`_ 
Ethernet setup instructions. To be able to talk to the board, make sure to 
`assign your computer a static IP address 
<https://pynq.readthedocs.io/en/latest/appendix.html#assign-your-computer-a-static-ip>`_
 
 Once the board is powered on and connected to your development machine, try 
connecting to it to make sure you've properly set up your Pynq board:
 
@@ -156,6 +156,8 @@ The build process should take roughly 5 minutes.
cd build
cmake ..
make runtime vta -j2
+   # FIXME (tmoreau89): remove this step by fixing the cmake build
+   make clean; make runtime vta -j2
# Build VTA RPC server (takes 1 min)
cd ..
sudo ./apps/vta_rpc/start_rpc_server.sh # pw is 'xilinx'
@@ -310,33 +312,33 @@ If you're interested in generating the Xilinx FPGA 
bitstream on your own instead
 Xilinx Toolchain Installation
 ^
 
-We recommend using Vivado 2018.3 since our scripts have been tested to work on 
this version of the Xilinx toolchains.
+We recommend using Vivado 2020.1 since our scripts have been tested to work on 
this version of the Xilinx toolchains.
 Our guide is written for Linux (Ubuntu) installation.
 
-You’ll need to install Xilinx’ FPGA compilation toolchain, `Vivado HL WebPACK 
2018.3 <https://www.xilinx.com/products/design-tools/vivado.html>`_, which a 
license-free version of the Vivado HLx toolchain.
+You’ll need to install Xilinx’ FPGA compilation toolchain, `Vivado HL WebPACK 
2020.1 <https://www.xilinx.com/products/design-tools/vivado.html>`_, which a 
license-free version of the Vivado HLx toolchain.
 
 Obtaining and Launching the Vivado GUI Installer
 """"""""""""""""""""""""""""""""""""""""""""""""
 
-1. Go to the `download webpage 
<https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vivado-design-tools/2018-3.html>`_,
 and download the Linux Self Extracting Web Installer for Vivado HLx 2018.3: 
WebPACK and Editions.
+1. Go to the `download webpage 
<https://www.xilinx.com/support/download/index.html/content/xilinx/en/downloadNav/vivado-design-tools/2020-1.html>`_,
 and download the Linux Self Extracting Web Installer for Vivado HLx 2020.1: 
WebPACK and Editions.
 2. You’ll have to sign in with a Xilinx account. This requires a Xilinx 
account creation that will take 2 minutes.
-3. Complete the Name and Address Verification by clicking “Next”, and you will 
get the opportunity to download a binary file, called 
``Xilinx_Vivado_SDK_Web_2018.3_1207_2324_Lin64.bin``.
+3. Complete the Name and Address Verification by clicking “Next”, and you will

[incubator-tvm] branch master updated (9e1fe82 -> c815d28)

2020-08-09 Thread liangfu
This is an automated email from the ASF dual-hosted git repository.

liangfu pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git.


from 9e1fe82  [BYOC][JSON] json_node.h should include data_type.h (#6224)
 add c815d28  [uTVM] fix crt building and running error (#6231)

No new revisions were added by this update.

Summary of changes:
 include/tvm/runtime/crt/module.h| 7 +++
 src/support/str_escape.h| 4 +++-
 src/target/source/codegen_c_host.cc | 7 ---
 3 files changed, 14 insertions(+), 4 deletions(-)



[incubator-tvm] branch master updated (dff715a -> 3fda73f)

2020-07-16 Thread liangfu
This is an automated email from the ASF dual-hosted git repository.

liangfu pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git.


from dff715a  Fix error message in Buffer::vstore, NFC (#6056)
 add 3fda73f  [RUNTIME][CRT] init TVMPackedFunc's name (#6044)

No new revisions were added by this update.

Summary of changes:
 src/runtime/crt/common/packed_func.c | 3 +++
 1 file changed, 3 insertions(+)



[incubator-tvm] branch master updated: µTVM CRT modifications for on-device RPC server (#5921)

2020-07-12 Thread liangfu
This is an automated email from the ASF dual-hosted git repository.

liangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git


The following commit(s) were added to refs/heads/master by this push:
 new d6ceba0  µTVM CRT modifications for on-device RPC server (#5921)
d6ceba0 is described below

commit d6ceba044b2427d493575c26749164aef2efaf30
Author: Andrew Reusch 
AuthorDate: Sun Jul 12 02:28:31 2020 -0700

µTVM CRT modifications for on-device RPC server (#5921)

* Reorganize CRT into parts, public API, and add standalone build.

 * Create a make-based build in src/runtime/crt. This is intended to
   be built in build/standalone_crt (generated by running ninja
   standalone_crt in build/). Its job is to build CRT without
   depending on headers not explicitly allowed in CRT.
 * Create a "public-facing" CRT API targeted to firmware running
   alongside CRT in include/tvm/runtime/crt. Developers who are
   integrating the CRT are the target of this API.
 * Reorganize CRT internally into common/ and graph_runtime/
   pieces. Build each pieces as a separate statically-linked library.
 * Slim down TVMGraphRuntime public-facing API to just the functions
   that are used externally.
 * Updates to apps/bundle_deploy to make this work.

* Add TVMFuncRegistry, CRT test infrastructure, and tests.

 * Also add error_codes.h, a file containing error codes returned by CRT.

* Add TVMErrorf()

* [API_CHANGE] Integrate func registry into CRT.

 * NOTE: This changes the default API for functions exposed under the
   CRT by the TVMFuncCall API. `resource_handle` is now always given
   as a new 6th parameter.
 * `resource_handle` is NULL when invoked on a global function and a
   pointer to the module owning the function otherwise.

* Generalize arena-based memory manager.

* lint

* Fix git-clang-format arg parsing

* add apache header

* add mutable func registry tests

* git-clang-format

* fix more lint

* Move memory_test to crttests.

* fix tests

* checkpoint

* checkpoint

* bundle_deploy demo_static works

* rm debug printf

* git-clang-format

* fix lint

* add asf header

* pylint

* update build configs for jenkins

* make regression compiler happy

* fix build errors in regression GCC

* address comments

* git-clang-format

* fix for 32-bit cpp regression

* fix incorrect use of memcpy and tests for 32-bit

* clang-format
---
 CMakeLists.txt |   1 +
 Makefile   |   3 +
 apps/bundle_deploy/Makefile|  48 ++-
 apps/bundle_deploy/build_model.py  |  18 +-
 apps/bundle_deploy/bundle.c|   2 +
 apps/bundle_deploy/bundle_static.c |  63 ++--
 .../{runtime.c => crt_config/crt_config.h} |  26 +-
 cmake/config.cmake |   3 +
 cmake/modules/StandaloneCrt.cmake  | 151 ++
 include/tvm/runtime/c_backend_api.h|   4 +-
 .../runtime.cc => include/tvm/runtime/crt/crt.h|  42 +--
 include/tvm/runtime/crt/error_codes.h  |  55 
 include/tvm/runtime/crt/func_registry.h| 137 +
 include/tvm/runtime/crt/graph_runtime.h| 115 +++
 include/tvm/runtime/crt/memory.h   |  12 +-
 {src => include/tvm}/runtime/crt/module.h  |  22 +-
 include/tvm/runtime/crt/packed_func.h  |  78 +
 .../tvm/runtime/crt/platform.h |  49 ++-
 python/tvm/micro/func_registry.py  |  76 +
 src/runtime/crt/.gitignore |   1 +
 src/runtime/crt/Makefile   |  57 
 src/runtime/crt/{ => common}/crt_backend_api.c |  11 +-
 src/runtime/crt/common/crt_runtime_api.c   | 335 +
 src/runtime/crt/common/func_registry.c | 152 ++
 src/runtime/crt/{ => common}/memory.c  | 275 +++--
 src/runtime/crt/{ => common}/ndarray.c |   7 +-
 .../crt/{packed_func.h => common/packed_func.c}| 104 +++
 src/runtime/crt/crt_runtime_api.c  |  97 --
 src/runtime/crt/graph_runtime.h| 215 -
 .../crt/{ => graph_runtime}/graph_runtime.c|  62 ++--
 src/runtime/crt/{ => graph_runtime}/load_json.c|   7 +-
 .../runtime.c => src/runtime/crt/host/crt_config.h |  28 +-
 .../runtime/crt/internal/common/func_registry.h}   |  36 +--
 .../tvm/runtime/crt/internal/common}/logging.h |   8 +-
 .../tvm/runtime/crt/internal/common/memory.h   

[incubator-tvm] branch v0.6 updated: [BACKPORT-0.6][Bugfix][VTA] Enable streamlined GEMM execution (#5893)

2020-06-23 Thread liangfu
This is an automated email from the ASF dual-hosted git repository.

liangfu pushed a commit to branch v0.6
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git


The following commit(s) were added to refs/heads/v0.6 by this push:
 new 6ae8939  [BACKPORT-0.6][Bugfix][VTA] Enable streamlined GEMM execution 
(#5893)
6ae8939 is described below

commit 6ae89396130d0207ff7182a11ae769165cbc5564
Author: Liangfu Chen 
AuthorDate: Tue Jun 23 14:32:27 2020 +0800

[BACKPORT-0.6][Bugfix][VTA] Enable streamlined GEMM execution (#5893)

* [BACKPORT-0.6][Bugfix][VTA] Enable streamlined GEMM execution

* [BACKPORT-0.6][Bugfix][VTA] Fix an issue in updating uop_idx in the 
TensorGemm module
---
 .../chisel/src/main/scala/core/TensorGemm.scala| 61 --
 1 file changed, 46 insertions(+), 15 deletions(-)

diff --git a/vta/hardware/chisel/src/main/scala/core/TensorGemm.scala 
b/vta/hardware/chisel/src/main/scala/core/TensorGemm.scala
index 3f5f387..f982176 100644
--- a/vta/hardware/chisel/src/main/scala/core/TensorGemm.scala
+++ b/vta/hardware/chisel/src/main/scala/core/TensorGemm.scala
@@ -46,7 +46,10 @@ class MAC(aBits: Int = 8, bBits: Int = 8, cBits: Int = 16) 
extends Module {
   io.y := add
 }
 
-/** Pipelined adder */
+/** PipeAdder
+  *
+  * This unit loads input bits into register and performs addition in the next 
cycle
+  */
 class PipeAdder(aBits: Int = 8, bBits: Int = 8) extends Module {
   val outBits = Math.max(aBits, bBits) + 1
   val io = IO(new Bundle {
@@ -61,6 +64,27 @@ class PipeAdder(aBits: Int = 8, bBits: Int = 8) extends 
Module {
   io.y := add
 }
 
+/** Adder
+  *
+  * This unit wires input bits to an adder directly.
+  * The output comes out of combinational logic without waiting for another 
cycle.
+  */
+class Adder(aBits: Int = 8, bBits: Int = 8) extends Module {
+  val outBits = Math.max(aBits, bBits) + 1
+  val io = IO(new Bundle {
+val a = Input(SInt(aBits.W))
+val b = Input(SInt(bBits.W))
+val y = Output(SInt(outBits.W))
+  })
+  val add = Wire(SInt(outBits.W))
+  val rA = Wire(SInt(aBits.W))
+  val rB = Wire(SInt(bBits.W))
+  rA := io.a
+  rB := io.b
+  add := rA +& rB
+  io.y := add
+}
+
 /** Pipelined DotProduct based on MAC and PipeAdder */
 class DotProduct(aBits: Int = 8, bBits: Int = 8, size: Int = 16)
 extends Module {
@@ -80,9 +104,11 @@ class DotProduct(aBits: Int = 8, bBits: Int = 8, size: Int 
= 16)
   val m = Seq.fill(s(0))(Module(new MAC(aBits, bBits, cBits = 1))) // # of 
total vector pairs
   val a = Seq.tabulate(p)(
 i =>
-  Seq.fill(s(i + 1))(Module(new PipeAdder(
-aBits = (b + i + 1),
-bBits = (b + i + 1) // # adders within each layer
+  Seq.fill(s(i + 1))(
+if (i == 0)
+  Module(new PipeAdder(aBits = (b + i + 1), bBits = (b + i + 1)))
+else
+  Module(new Adder(aBits = (b + i + 1), bBits = (b + i + 1) // # 
adders within each layer
 
   // Vector MACs
   for (i <- 0 until s(0)) {
@@ -126,8 +152,9 @@ class MatrixVectorMultiplication(implicit p: Parameters) 
extends Module {
   })
   val dot = Seq.fill(size)(
 Module(new DotProduct(aBits = inpBits, bBits = wgtBits, size)))
-  val acc = Seq.fill(size)(
-Module(new Pipe(UInt(accBits.W), latency = log2Ceil(size) + 1)))
+  // Latency is defined as two in the following, because there is one cycle in 
the MAC module,
+  // and another cycle in the pipelined adders as the first layer of the 
accumulator
+  val acc = Seq.fill(size)(Module(new Pipe(UInt(accBits.W), latency = 2)))
   val add = Seq.fill(size)(Wire(SInt(accBits.W)))
   val vld = Wire(Vec(size, Bool()))
 
@@ -188,7 +215,9 @@ class TensorGemm(debug: Boolean = false)(implicit p: 
Parameters)
   val wgt_i = Reg(chiselTypeOf(dec.uop_end))
   val pBits = log2Ceil(p(CoreKey).blockOut) + 1
   val inflight = Reg(UInt(pBits.W))
-  val wrpipe = Module(new Pipe(chiselTypeOf(dec.uop_end), latency = pBits))
+  // Latency is defined as two in the following, because there is one cycle in 
the MAC module,
+  // and another cycle in the pipelined adders as the first layer of the 
accumulator
+  val wrpipe = Module(new Pipe(chiselTypeOf(dec.uop_end), latency = 2))
   val done = inflight === 0.U &
 ((state === sExe &
   cnt_o === dec.lp_0 - 1.U &
@@ -236,11 +265,14 @@ class TensorGemm(debug: Boolean = false)(implicit p: 
Parameters)
   when(state === sIdle) {
 inflight := 0.U
   }.elsewhen(!dec.reset) {
-when(state === sReadTensor) { // issue a tensor
-  inflight := inflight + 1.U
-}.elsewhen(mvc.io.acc_o.data.valid) { // commit a tensor
-  inflight := inflight - 1.U
-}
+when((state === sReadTensor) && mvc.io.acc_o.data.valid) { // issue & 
commit
+  inflight := inflight
+}.elsewhen(state === sReadTensor) { // issue a tensor
+inflight := inflight + 1.U
+  }
+  .elsewhen(mvc.io.acc_o.data.valid) { // commit a tensor
+inflight := infl

[incubator-tvm] branch master updated: [CI] Update the ci-gpu to the lastest build with the new vulkansdk. (#5571)

2020-05-12 Thread liangfu
This is an automated email from the ASF dual-hosted git repository.

liangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git


The following commit(s) were added to refs/heads/master by this push:
 new 97b5b78  [CI] Update the ci-gpu to the lastest build with the new 
vulkansdk. (#5571)
97b5b78 is described below

commit 97b5b7850c6fd675b875ca5bf2f3d26dac0ef3bb
Author: Tianqi Chen 
AuthorDate: Tue May 12 02:41:04 2020 -0700

[CI] Update the ci-gpu to the lastest build with the new vulkansdk. (#5571)
---
 Jenkinsfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Jenkinsfile b/Jenkinsfile
index 9280740..60ee142 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -44,7 +44,7 @@
 //
 
 ci_lint = "tvmai/ci-lint:v0.61"
-ci_gpu = "tvmai/ci-gpu:v0.63"
+ci_gpu = "tvmai/ci-gpu:v0.64"
 ci_cpu = "tvmai/ci-cpu:v0.62"
 ci_wasm = "tvmai/ci-wasm:v0.60"
 ci_i386 = "tvmai/ci-i386:v0.52"