[GitHub] [tvm] huajsj commented on a diff in pull request #11557: [Runtime][PipelineExecutor] Tutorial of using pipeline executor.

GitBox Fri, 22 Jul 2022 00:37:39 -0700


huajsj commented on code in PR #11557:
URL: https://github.com/apache/tvm/pull/11557#discussion_r927376733



##########
gallery/how_to/work_with_relay/using_pipeline_executor.py:
##########
@@ -0,0 +1,250 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""
+Using Pipeline Executor in Relay
+=================================
+**Author**: `Hua Jiang <https://https://github.com/huajsj>`_
+
+This is a short tutorial on how to use "Pipeline Executor" with Relay.
+"""
+import tvm
+from tvm import te
+import numpy as np
+from tvm.contrib import graph_executor as runtime
+from tvm.relay.op.contrib.cutlass import partition_for_cutlass
+from tvm import relay
+from tvm.relay import testing
+import tvm.testing
+from tvm.contrib.cutlass import (
+    has_cutlass,
+    num_cutlass_partitions,
+    finalize_modules,
+    finalize_modules_vm,
+)
+
+img_size = 8
+#######################################################################
+# Create a simple network, this network can be a pre-trained model too.
+# ---------------------------------------------------------------------
+# Let's create a very simple network for demonstration.
+# It consists of convolution, batch normalization, dense, and ReLU activation.
+def get_network():
+    out_channels = 16
+    batch_size = 1
+    data = relay.var("data", relay.TensorType((batch_size, 3, img_size, 
img_size), "float16"))
+    dense_weight = relay.var(
+        "dweight", relay.TensorType((batch_size, 16 * img_size * img_size), 
"float16")
+    )
+    weight = relay.var("weight")
+    second_weight = relay.var("second_weight")
+    bn_gamma = relay.var("bn_gamma")
+    bn_beta = relay.var("bn_beta")
+    bn_mmean = relay.var("bn_mean")
+    bn_mvar = relay.var("bn_var")
+    simple_net = relay.nn.conv2d(
+        data=data, weight=weight, kernel_size=(3, 3), channels=out_channels, 
padding=(1, 1)
+    )
+    simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta, bn_mmean, 
bn_mvar)[0]
+    simple_net = relay.nn.relu(simple_net)
+    simple_net = relay.nn.batch_flatten(simple_net)
+    simple_net = relay.nn.dense(simple_net, dense_weight)
+    simple_net = relay.Function(relay.analysis.free_vars(simple_net), 
simple_net)
+    data_shape = (batch_size, 3, img_size, img_size)
+    net, params = testing.create_workload(simple_net)
+    return net, params, data_shape
+
+
+net, params, data_shape = get_network()
+###########################################
+# Splitting the network into two subgraphs.
+# -----------------------------------------
+# This function called 'graph_split' from a unit test is just an example. User 
can create a customized logic
+# to split the graph.
+import inspect
+import os
+
+test_path = os.path.dirname(inspect.getfile(lambda: None))
+os.sys.path.append(os.path.join(test_path, "../../../tests/python/relay"))
+from test_pipeline_executor import graph_split
+
+###########################################
+# Splitting the network into two subgraphs.
+split_config = [{"op_name": "nn.relu", "op_index": 0}]
+subgraphs = graph_split(net["main"], split_config, params)
+###########################################################
+# The generated subgraphs should look something like below.
+
+"""
+#subgraphs[0])
+
+ def @main(%data: Tensor[(1, 3, img_size, img_size), float16]) {
+  %0 = nn.conv2d(%data, meta[relay.Constant][0] /* ty=Tensor[(16, 3, 3, 3), 
float16] */, padding=[1, 1, 1, 1], channels=16, kernel_size=[3, 3]) /* 
ty=Tensor[(1, 16, img_size, img_size), float16] */;
+  %1 = nn.batch_norm(%0, meta[relay.Constant][1] /* ty=Tensor[(16), float16] 
*/, meta[relay.Constant][2] /* ty=Tensor[(16), float16]*/, 
meta[relay.Constant][3] /* ty=Tensor[(16), float16] */, meta[relay.Constant][4] 
/* ty=Tensor[(16), float16] */) /* ty=(Tensor[(1,16, img_size, img_size), 
float16], Tensor[(16), float16], Tensor[(16), float16]) */;
+  %2 = %1.0;
+  nn.relu(%2) /* ty=Tensor[(1, 16, img_size, img_size), float16] */
+ }
+
+#subgraphs[1]
+
+ def @main(%data_n_0: Tensor[(1, 16, 8, 8), float16] /* ty=Tensor[(1, 16, 8, 
8), float16] */) {
+  %0 = nn.batch_flatten(%data_n_0) /* ty=Tensor[(1, 1024), float16] */;
+  nn.dense(%0, meta[relay.Constant][0] /* ty=Tensor[(1, 1024), float16] */, 
units=None) /* ty=Tensor[(1, 1), float16] */
+ }
+
+"""
+
+# sphinx_gallery_start_ignore
+from tvm import testing
+
+testing.utils.install_request_hook(depth=3)
+# sphinx_gallery_end_ignore
+
+#########################################
+# Build the subgraph with cutlass target.
+# ---------------------------------------
+
+cutlass = tvm.target.Target(
+    {
+        "kind": "cutlass",
+        "sm": int(tvm.target.Target("cuda").arch.split("_")[1]),
+        "use_3xtf32": True,
+        "split_k_slices": [1],
+        "profile_all_alignments": False,
+        "find_first_valid": True,
+        "use_multiprocessing": True,
+        "use_fast_math": False,
+        "tmp_dir": "./tmp",
+    },
+    host=tvm.target.Target("llvm"),
+)
+
+
+def cutlass_build(mod, target, params=None, target_host=None, 
mod_name="default"):
+    target = [target, cutlass]
+    lib = relay.build_module.build(
+        mod, target=target, params=params, target_host=target_host, 
mod_name=mod_name
+    )
+    return lib
+
+
+###########################################################
+# Run the two subgraphs in pipeline with pipeline executor.
+# ---------------------------------------------------------
+# Set 'USE_PIPELINE_EXECUTOR' as ON, and set USE_CUTLASS' as ON  in cmake.
+from tvm.contrib import graph_executor, pipeline_executor, 
pipeline_executor_build
+
+#########################################
+# Create subgraph pipeline configuration.
+# Associate a subgraph module with a target.
+# Use CUTLASS BYOC to build the second subgraph module.
+mod0, mod1 = subgraphs[0], subgraphs[1]
+# Use cutlass as the codegen.
+mod1 = partition_for_cutlass(mod1)
+#################################################
+# Get the pipeline executor configuration object.
+pipe_config = pipeline_executor_build.PipelineConfig()
+###########################################################################
+# Set the compile target of the subgraph module.
+pipe_config[mod0].target = "llvm"
+pipe_config[mod0].dev = tvm.cpu(0)
+###############################################################################
+# Set the cpu affinity for control flow, for example using cpu 0 for control 
flow.
+pipe_config[mod1].cpu_affinity = "0"
+##############################################################
+# Set the compile target of the second subgraph module as cuda.
+pipe_config[mod1].target = "cuda"
+pipe_config[mod1].dev = tvm.device("cuda", 0)
+pipe_config[mod1].build_func = cutlass_build
+pipe_config[mod1].export_cc = "nvcc"
+#################################################################################
+# Set the cpu afinity for control flow, for example using cpu 1 for control 
flow.
+pipe_config[mod1].cpu_affinity = "1"
+pipe_config["input"]["data"].connect(pipe_config[mod0]["input"]["data"])
+pipe_config[mod0]["output"][0].connect(pipe_config[mod1]["input"]["data_n_0"])
+pipe_config[mod1]["output"]["0"].connect(pipe_config["output"][0])

Review Comment:
   these three line related connect subgraph to build pipeline instead of 
affinity,   added detail explain.
   
   "data" and "data_n_0" coming from subgraphs which is a list of subgraph, by 
print(subgraph[0]) ,  print(subgraph[1]) the said  "data" and "data_n_0" will 
shown. if here give a wrong name which not exist , the API will throw a error.
   
   pipe_config[mod0]["output"][0] means "the first output interface" of "mod0", 
 line 178 "0" is typo , fixed.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[GitHub] [tvm] huajsj commented on a diff in pull request #11557: [Runtime][PipelineExecutor] Tutorial of using pipeline executor.

Reply via email to