[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-15 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r652035337



##
File path: src/relay/transforms/fp32_to_fp16.h
##
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file fp32_to_fp16.h
+ * \brief Utilities and common types used for FP32->FP16 pass.
+ */
+#ifndef TVM_RELAY_TRANSFORMS_FP32_TO_FP16_H_
+#define TVM_RELAY_TRANSFORMS_FP32_TO_FP16_H_
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace tvm {
+namespace relay {
+
+struct FP16OpDType {
+  DataType accumulation_dtype;
+  DataType output_dtype;
+};
+
+// GREEN colored ops should always be done in FP16 due to the speed and memory 
savings
+// GRAY colored ops can be done in FP16 but don't have speedups to justify a 
dedicated cast.
+// RED colored ops should not be done in FP16 due to numerical reasons.
+enum FP16ConversionCategory { RED, GRAY, GREEN };
+
+using OpStringSet = std::unordered_set;
+
+// Default lists inspired from TF's classifications:

Review comment:
   This is now done.

##
File path: src/relay/transforms/to_mixed_precision.cc
##
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *
+ * \file to_mixed_precision.cc
+ * \brief Automatic mixed precision for relay graphs. i.e. turn a graph into 
fp16 form.
+ */
+#include "to_mixed_precision.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+
+// A callable which hashes std::pair
+struct pair_hash {
+  template 
+  std::size_t operator()(const std::pair& pair) const {
+auto h1 = std::hash()(pair.first);
+auto h2 = std::hash()(pair.second);
+
+// Use boost's combine_hash strategy
+return h1 ^ (h1 + 0x9e3779b9 + (h2 << 6) + (h2 >> 2));
+  }
+};
+
+// A map of a parent node and a wanted dtype to existing nodes casted to the 
wanted dtype
+using CachedCastNodes = std::unordered_map, Expr, pair_hash>;
+
+// A function which maps CallNodes to their initial conversion color
+using ColorFunc = std::function;
+
+// A function which maps MIXED_PRECISION_ALWAYS CallNodes to wanted 
accumulation and output dtypes
+using OutputDtypeFunc = std::function;
+
+class MixedPrecisionPass : public MixedModeMutator {
+ private:
+  CachedCastNodes cast_nodes_cache;
+  const ColorFunc colorer;
+  const OutputDtypeFunc output_dtype_func;
+  const DataType mixed_precision_type;
+
+  Attrs GetNewAttrs(const CallNode* call, const DataType& accumulation_dtype) 
const {
+/* If the accumulation dtype is in the attributes make a copy and mutate 
the field. */
+Attrs new_attrs = Attrs(call->attrs);
+if (new_attrs.get() != nullptr) {
+  // TODO(AndrewZhaoLuo): Figure out a better way to do this
+  // modify output_dtype attributes (accumulation dtypes for ops)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, 

[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-14 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r651431025



##
File path: src/relay/transforms/fp32_to_fp16.cc
##
@@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *
+ * \file fp32_to_fp16.cc
+ * \brief Rewrite a graph into an fp16 form.
+ */
+#include "fp32_to_fp16.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+
+// A callable which hashes std::pair
+struct pair_hash {
+  template 
+  std::size_t operator()(const std::pair& pair) const {
+auto h1 = std::hash()(pair.first);
+auto h2 = std::hash()(pair.second);
+
+// Use boost's combine_hash strategy
+return h1 ^ (h1 + 0x9e3779b9 + (h2 << 6) + (h2 >> 2));
+  }
+};
+
+// A map of a parent node and a wanted dtype to existing nodes casted to the 
wanted dtype
+using CachedCastNodes = std::unordered_map, Expr, pair_hash>;
+
+// A function which maps CallNodes to their initial conversion color
+using ColorFunc = std::function;
+
+// A function which maps green CallNodes to wanted accumulation and output 
dtypes
+using OutputDtypeFunc = std::function;
+
+class AmpGraphCreator : public ExprMutator {
+ private:
+  CachedCastNodes cast_nodes_cache;
+  const ColorFunc colorer;
+  const OutputDtypeFunc output_dtype_func;
+
+  Attrs GetNewAttrs(const CallNode* call, const DataType& accumulation_dtype) 
const {
+/* If the accumulation dtype is in the attributes make a copy and mutate 
the field. */
+Attrs new_attrs = Attrs(call->attrs);
+if (new_attrs.get() != nullptr) {
+  // TODO(AndrewZhaoLuo): Figure out a better way to do this
+  // modify output_dtype attributes (accumulation dtypes for ops)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = 
new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  }
+
+  // modify dtype attributes (creating new tensors of type dtype)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsDType(attrs, accumulation_dtype);
+  }
+}
+
+return new_attrs;
+  }
+
+  template 
+  void ModifyAttrsOutputDType(const T* attrs, const DataType& 
accumulation_dtype) const {
+/*
+ Helper template to modify relevant attributes with out_dtype type.
+ These represent accumulation dtypes for some operations e.g.
+ conv2d might take in fp16 and give a fp32 result.
+ Attrs is const because we get it as a const.
+ */
+T* mutable_attrs = const_cast(attrs);
+
+DataType cur_type = (mutable_attrs->out_dtype);
+if (cur_type.is_float() || cur_type.is_void()) mutable_attrs->out_dtype = 
accumulation_dtype;
+  }
+
+  template 
+  void ModifyAttrsDType(const T* attrs, const DataType& accumulation_dtype) 
const {
+/*
+ Helper template to modify relevant attributes with dtype type.
+ This determines the output dtype for some ops. For example
+ zeros creates a tensor of zeros of the specified dtype.
+ Attrs is const 

[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-14 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r651430536



##
File path: src/relay/transforms/fp32_to_fp16.h
##
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file fp32_to_fp16.h
+ * \brief Utilities and common types used for FP32->FP16 pass.
+ */
+#ifndef TVM_RELAY_TRANSFORMS_FP32_TO_FP16_H_
+#define TVM_RELAY_TRANSFORMS_FP32_TO_FP16_H_
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace tvm {
+namespace relay {
+
+struct FP16OpDType {
+  DataType accumulation_dtype;
+  DataType output_dtype;
+};
+
+// GREEN colored ops should always be done in FP16 due to the speed and memory 
savings
+// GRAY colored ops can be done in FP16 but don't have speedups to justify a 
dedicated cast.
+// RED colored ops should not be done in FP16 due to numerical reasons.
+enum FP16ConversionCategory { RED, GRAY, GREEN };

Review comment:
   I've implemented the suggestions listed.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-14 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r651148846



##
File path: src/relay/transforms/to_mixed_precision.cc
##
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *
+ * \file to_mixed_precision.cc
+ * \brief Automatic mixed precision for relay graphs. i.e. turn a graph into 
fp16 form.
+ */
+#include "to_mixed_precision.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+
+// A callable which hashes std::pair
+struct pair_hash {
+  template 
+  std::size_t operator()(const std::pair& pair) const {
+auto h1 = std::hash()(pair.first);
+auto h2 = std::hash()(pair.second);
+
+// Use boost's combine_hash strategy
+return h1 ^ (h1 + 0x9e3779b9 + (h2 << 6) + (h2 >> 2));
+  }
+};
+
+// A map of a parent node and a wanted dtype to existing nodes casted to the 
wanted dtype
+using CachedCastNodes = std::unordered_map, Expr, pair_hash>;
+
+// A function which maps CallNodes to their initial conversion color
+using ColorFunc = std::function;
+
+// A function which maps MIXED_PRECISION_ALWAYS CallNodes to wanted 
accumulation and output dtypes
+using OutputDtypeFunc = std::function;
+
+class MixedPrecisionPass : public MixedModeMutator {
+ private:
+  CachedCastNodes cast_nodes_cache;
+  const ColorFunc colorer;
+  const OutputDtypeFunc output_dtype_func;
+  const DataType mixed_precision_type;
+
+  Attrs GetNewAttrs(const CallNode* call, const DataType& accumulation_dtype) 
const {
+/* If the accumulation dtype is in the attributes make a copy and mutate 
the field. */
+Attrs new_attrs = Attrs(call->attrs);
+if (new_attrs.get() != nullptr) {
+  // TODO(AndrewZhaoLuo): Figure out a better way to do this
+  // modify output_dtype attributes (accumulation dtypes for ops)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = 
new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  }
+
+  // modify dtype attributes (creating new tensors of type dtype)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsDType(attrs, accumulation_dtype);
+  }
+}
+
+return new_attrs;
+  }
+
+  template 
+  void ModifyAttrsOutputDType(const T* attrs, const DataType& 
accumulation_dtype) const {
+/*
+ Helper template to modify relevant attributes with out_dtype type.
+ These represent accumulation dtypes for some operations e.g.
+ conv2d might take in fp16 and give a fp32 result.
+ Attrs is const because we get it as a const.
+ */
+T* mutable_attrs = const_cast(attrs);
+
+DataType cur_type = (mutable_attrs->out_dtype);
+if (cur_type.is_float() || cur_type.is_void()) mutable_attrs->out_dtype = 
accumulation_dtype;
+  }
+
+  template 
+  void ModifyAttrsDType(const T* attrs, const DataType& accumulation_dtype) 
const {
+/*
+ Helper template to modify relevant attributes with dtype type.
+ This determines 

[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-10 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r649467676



##
File path: src/relay/transforms/fp32_to_fp16.cc
##
@@ -0,0 +1,332 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *
+ * \file fp32_to_fp16.cc
+ * \brief Rewrite a graph into an fp16 form.
+ */
+#include "fp32_to_fp16.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+
+// A callable which hashes std::pair
+struct pair_hash {
+  template 
+  std::size_t operator()(const std::pair& pair) const {
+auto h1 = std::hash()(pair.first);
+auto h2 = std::hash()(pair.second);
+
+return h1 ^ (h2 << 1);
+  }
+};
+
+// A map of a parent node and a wanted dtype to existing nodes casted to the 
wanted dtype
+using CachedCastNodes = std::unordered_map, Expr, pair_hash>;
+
+// A function which maps CallNodes to their initial conversion color
+using ColorFunc = std::function;
+
+// A function which maps green CallNodes to wanted accumulation and output 
dtypes
+using OutputDtypeFunc = std::function;
+
+class AmpGraphCreator : public ExprMutator {
+ private:
+  CachedCastNodes cast_nodes_cache;
+  const ColorFunc colorer;
+  const OutputDtypeFunc output_dtype_func;
+
+  Attrs GetNewAttrs(const CallNode* call, const DataType& accumulation_dtype) 
const {
+/* If the accumulation dtype is in the attributes make a copy and mutate 
the field. */
+Attrs new_attrs = Attrs(call->attrs);
+if (new_attrs.get() != nullptr) {
+  // TODO(AndrewZhaoLuo): Figure out a better way to do this
+  // modify output_dtype attributes (accumulation dtypes for ops)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = 
new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  }
+
+  // modify dtype attributes (creating new tensors of type dtype)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsDType(attrs, accumulation_dtype);
+  }
+}
+
+return new_attrs;
+  }
+
+  template 
+  void ModifyAttrsOutputDType(const T* attrs, const DataType& 
accumulation_dtype) const {
+/*
+ Helper template to modify relevant attributes with out_dtype type.
+ These represent accumulation dtypes for some operations e.g.
+ conv2d might take in fp16 and give a fp32 result.
+ Attrs is const because we get it as a const.
+ */
+T* mutable_attrs = const_cast(attrs);
+if ((mutable_attrs->out_dtype).is_float()) mutable_attrs->out_dtype = 
accumulation_dtype;
+  }
+
+  template 
+  void ModifyAttrsDType(const T* attrs, const DataType& accumulation_dtype) 
const {
+/*
+ Helper template to modify relevant attributes with dtype type.
+ This determines the output dtype for some ops. For example
+ zeros creates a tensor of zeros of the specified dtype.
+ Attrs is const because we get it as a const.
+*/
+T* mutable_attrs = const_cast(attrs);
+if ((mutable_attrs->dtype).is_float()) 

[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-09 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r648649618



##
File path: python/tvm/relay/transform/transform.py
##
@@ -1199,3 +1198,20 @@ def FakeQuantizationToInteger():
 The registered SimplifyExpr pass.
 """
 return _ffi_api.FakeQuantizationToInteger()
+
+
+def AMPRewrite():

Review comment:
   Hmm, I would prefer `ToMixedPrecision` still if that is fine with you.
   
   The example you list only works for me because it exists under the `amp` 
namespace. `AutoCast` by itself without being part of `torch.cuda.amp` does not 
show mixed precision.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-09 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r648649618



##
File path: python/tvm/relay/transform/transform.py
##
@@ -1199,3 +1198,20 @@ def FakeQuantizationToInteger():
 The registered SimplifyExpr pass.
 """
 return _ffi_api.FakeQuantizationToInteger()
+
+
+def AMPRewrite():

Review comment:
   Hmm, I would prefer `ToMixedPrecision` still if that is fine with you.
   
   The example you list only works for me because it exists under the `amp` 
namespace. `AutoCast` by itself without being part of torch.cuda.amp does not 
show mixed precision.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-09 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r648649618



##
File path: python/tvm/relay/transform/transform.py
##
@@ -1199,3 +1198,20 @@ def FakeQuantizationToInteger():
 The registered SimplifyExpr pass.
 """
 return _ffi_api.FakeQuantizationToInteger()
+
+
+def AMPRewrite():

Review comment:
   Hmm, I would prefer `ToMixedPrecision` still if that is fine with you.
   
   The example you list only works for me because it exists under the amp 
namespace.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-09 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r648649618



##
File path: python/tvm/relay/transform/transform.py
##
@@ -1199,3 +1198,20 @@ def FakeQuantizationToInteger():
 The registered SimplifyExpr pass.
 """
 return _ffi_api.FakeQuantizationToInteger()
+
+
+def AMPRewrite():

Review comment:
   I'll go with autocast.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-09 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r648609224



##
File path: src/relay/transforms/fp32_to_fp16.cc
##
@@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *
+ * \file fp32_to_fp16.cc
+ * \brief Rewrite a graph into an fp16 form.
+ */
+#include "fp32_to_fp16.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+
+// A callable which hashes std::pair
+struct pair_hash {
+  template 
+  std::size_t operator()(const std::pair& pair) const {
+auto h1 = std::hash()(pair.first);
+auto h2 = std::hash()(pair.second);
+
+// Use boost's combine_hash strategy
+return h1 ^ (h1 + 0x9e3779b9 + (h2 << 6) + (h2 >> 2));
+  }
+};
+
+// A map of a parent node and a wanted dtype to existing nodes casted to the 
wanted dtype
+using CachedCastNodes = std::unordered_map, Expr, pair_hash>;
+
+// A function which maps CallNodes to their initial conversion color
+using ColorFunc = std::function;
+
+// A function which maps green CallNodes to wanted accumulation and output 
dtypes
+using OutputDtypeFunc = std::function;
+
+class AmpGraphCreator : public ExprMutator {
+ private:
+  CachedCastNodes cast_nodes_cache;
+  const ColorFunc colorer;
+  const OutputDtypeFunc output_dtype_func;
+
+  Attrs GetNewAttrs(const CallNode* call, const DataType& accumulation_dtype) 
const {
+/* If the accumulation dtype is in the attributes make a copy and mutate 
the field. */
+Attrs new_attrs = Attrs(call->attrs);
+if (new_attrs.get() != nullptr) {
+  // TODO(AndrewZhaoLuo): Figure out a better way to do this
+  // modify output_dtype attributes (accumulation dtypes for ops)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = 
new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  }
+
+  // modify dtype attributes (creating new tensors of type dtype)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsDType(attrs, accumulation_dtype);
+  }
+}
+
+return new_attrs;
+  }
+
+  template 
+  void ModifyAttrsOutputDType(const T* attrs, const DataType& 
accumulation_dtype) const {
+/*
+ Helper template to modify relevant attributes with out_dtype type.
+ These represent accumulation dtypes for some operations e.g.
+ conv2d might take in fp16 and give a fp32 result.
+ Attrs is const because we get it as a const.
+ */
+T* mutable_attrs = const_cast(attrs);
+
+DataType cur_type = (mutable_attrs->out_dtype);
+if (cur_type.is_float() || cur_type.is_void()) mutable_attrs->out_dtype = 
accumulation_dtype;
+  }
+
+  template 
+  void ModifyAttrsDType(const T* attrs, const DataType& accumulation_dtype) 
const {
+/*
+ Helper template to modify relevant attributes with dtype type.
+ This determines the output dtype for some ops. For example
+ zeros creates a tensor of zeros of the specified dtype.
+ Attrs is const 

[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-09 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r648617582



##
File path: tests/python/relay/test_fp32_to_fp16_transform.py
##
@@ -0,0 +1,328 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Unit tests for testing FP32 -> FP16 pass"""
+from typing import Any, Dict, List
+
+import numpy as np
+import tvm
+from tvm import relay
+from tvm.relay.testing import lstm
+from tvm.relay.transform import AMPRewrite
+from tvm.relay.transform.transform import InferType
+
+
+def run_module(mod: tvm.runtime.Module, mod_params: Dict[str, Any]) -> List:
+dev = tvm.device("llvm", 0)
+intrp = relay.create_executor("debug", mod, device=dev, target="llvm")
+result = intrp.evaluate()(**mod_params)
+if isinstance(result, tvm.runtime.container.ADT):
+result = [r.asnumpy() for r in result]
+return result
+else:
+return [result.asnumpy()]
+
+
+def verify_fp32_fp16_output_close(
+mod: tvm.runtime.Module, mod_params: Dict[str, Any], rtol: float = 1e-3, 
atol: float = 0
+) -> tvm.runtime.Module:
+mod = InferType()(mod)
+result_fp32 = run_module(mod, mod_params)
+fp16_mod = AMPRewrite()(mod)
+result_fp16 = run_module(fp16_mod, mod_params)
+
+# Ensure the results are close
+for fp32, fp16 in zip(result_fp32, result_fp16):
+np.testing.assert_allclose(fp32, fp16, rtol=rtol, atol=atol)
+
+return fp16_mod
+
+
+def test_lstm():
+"""A small stress test on a single unrolled lstm unit.
+
+Has internal functions and let statements the pass must work on.
+"""
+np.random.seed(5628)
+units = 3
+iterations = 5
+mod, mod_params = lstm.get_workload(iterations=iterations, 
num_hidden=units)
+
+# This is an unrolled lstm so each data should be the previous results but
+# we don't care, we just want to stress test things.
+for i in range(iterations):
+mod_params["data" if i == 0 else f"data{i}"] = np.random.uniform(
+-10, 10, (1, units)
+).astype("float32")
+
+verify_fp32_fp16_output_close(mod, mod_params, rtol=0.01, atol=0.01)
+
+
+def test_convert_single_conv():
+"""Conv is a green listed operation meaning it will always use fp16 
workload.
+
+By default it accumulates to fp32 and outputs fp16.
+"""
+np.random.seed(208)
+
+data_shape = (1, 3, 32, 32)
+weight_shape = (5, 3, 3, 3)
+data = relay.var("data", shape=data_shape, dtype="float32")
+weight = relay.var("weight", shape=weight_shape, dtype="float32")
+conv = relay.nn.conv2d(data, weight, strides=(1, 1), padding=(1, 1), 
out_dtype="float32")
+mod = tvm.IRModule.from_expr(conv)
+mod = tvm.relay.transform.InferType()(mod)
+
+mod_params = {
+"data": np.random.uniform(-1, 1, size=data_shape).astype("float32"),
+"weight": np.random.uniform(-1, 1, 
size=weight_shape).astype("float32"),
+}
+fp16_mod = verify_fp32_fp16_output_close(mod, mod_params, atol=0.01, 
rtol=1e-3)
+
+expected_mod = tvm.IRModule.from_expr(
+relay.cast(
+relay.nn.conv2d(
+relay.cast(data, "float16"),
+relay.cast(weight, "float16"),
+strides=(1, 1),
+padding=(1, 1),
+out_dtype="float32",
+),
+"float16",
+)
+)
+expected_mod = tvm.relay.transform.InferType()(expected_mod)
+
+assert not tvm.ir.structural_equal(fp16_mod, mod)
+assert tvm.ir.structural_equal(fp16_mod, expected_mod)
+
+
+def test_convert_conv_bn():
+"""Conv is green and batch norm is gray. As Conv should output fp16 
batch_norm should be green."""
+np.random.seed(208)
+
+data_shape = (1, 3, 32, 32)
+weight_shape = (5, 3, 3, 3)
+data = relay.var("data", shape=data_shape, dtype="float32")
+weight = relay.var("weight", shape=weight_shape, dtype="float32")
+conv = relay.nn.conv2d(data, weight, strides=(1, 1), padding=(1, 1), 
out_dtype="float32")
+
+bn_shape = [5]
+gamma = relay.var("gamma", shape=bn_shape)
+beta = relay.var("beta", shape=bn_shape)
+moving_mean = relay.var("moving_mean", shape=bn_shape)
+moving_var = 

[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-09 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r648611836



##
File path: src/relay/transforms/fp32_to_fp16.cc
##
@@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *
+ * \file fp32_to_fp16.cc
+ * \brief Rewrite a graph into an fp16 form.
+ */
+#include "fp32_to_fp16.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+
+// A callable which hashes std::pair
+struct pair_hash {
+  template 
+  std::size_t operator()(const std::pair& pair) const {
+auto h1 = std::hash()(pair.first);
+auto h2 = std::hash()(pair.second);
+
+// Use boost's combine_hash strategy
+return h1 ^ (h1 + 0x9e3779b9 + (h2 << 6) + (h2 >> 2));
+  }
+};
+
+// A map of a parent node and a wanted dtype to existing nodes casted to the 
wanted dtype
+using CachedCastNodes = std::unordered_map, Expr, pair_hash>;
+
+// A function which maps CallNodes to their initial conversion color
+using ColorFunc = std::function;
+
+// A function which maps green CallNodes to wanted accumulation and output 
dtypes
+using OutputDtypeFunc = std::function;
+
+class AmpGraphCreator : public ExprMutator {
+ private:
+  CachedCastNodes cast_nodes_cache;
+  const ColorFunc colorer;
+  const OutputDtypeFunc output_dtype_func;
+
+  Attrs GetNewAttrs(const CallNode* call, const DataType& accumulation_dtype) 
const {
+/* If the accumulation dtype is in the attributes make a copy and mutate 
the field. */
+Attrs new_attrs = Attrs(call->attrs);
+if (new_attrs.get() != nullptr) {
+  // TODO(AndrewZhaoLuo): Figure out a better way to do this
+  // modify output_dtype attributes (accumulation dtypes for ops)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = 
new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  }
+
+  // modify dtype attributes (creating new tensors of type dtype)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsDType(attrs, accumulation_dtype);
+  }
+}
+
+return new_attrs;
+  }
+
+  template 
+  void ModifyAttrsOutputDType(const T* attrs, const DataType& 
accumulation_dtype) const {
+/*
+ Helper template to modify relevant attributes with out_dtype type.
+ These represent accumulation dtypes for some operations e.g.
+ conv2d might take in fp16 and give a fp32 result.
+ Attrs is const because we get it as a const.
+ */
+T* mutable_attrs = const_cast(attrs);
+
+DataType cur_type = (mutable_attrs->out_dtype);
+if (cur_type.is_float() || cur_type.is_void()) mutable_attrs->out_dtype = 
accumulation_dtype;
+  }
+
+  template 
+  void ModifyAttrsDType(const T* attrs, const DataType& accumulation_dtype) 
const {
+/*
+ Helper template to modify relevant attributes with dtype type.
+ This determines the output dtype for some ops. For example
+ zeros creates a tensor of zeros of the specified dtype.
+ Attrs is const 

[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-09 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r648609224



##
File path: src/relay/transforms/fp32_to_fp16.cc
##
@@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *
+ * \file fp32_to_fp16.cc
+ * \brief Rewrite a graph into an fp16 form.
+ */
+#include "fp32_to_fp16.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+
+// A callable which hashes std::pair
+struct pair_hash {
+  template 
+  std::size_t operator()(const std::pair& pair) const {
+auto h1 = std::hash()(pair.first);
+auto h2 = std::hash()(pair.second);
+
+// Use boost's combine_hash strategy
+return h1 ^ (h1 + 0x9e3779b9 + (h2 << 6) + (h2 >> 2));
+  }
+};
+
+// A map of a parent node and a wanted dtype to existing nodes casted to the 
wanted dtype
+using CachedCastNodes = std::unordered_map, Expr, pair_hash>;
+
+// A function which maps CallNodes to their initial conversion color
+using ColorFunc = std::function;
+
+// A function which maps green CallNodes to wanted accumulation and output 
dtypes
+using OutputDtypeFunc = std::function;
+
+class AmpGraphCreator : public ExprMutator {
+ private:
+  CachedCastNodes cast_nodes_cache;
+  const ColorFunc colorer;
+  const OutputDtypeFunc output_dtype_func;
+
+  Attrs GetNewAttrs(const CallNode* call, const DataType& accumulation_dtype) 
const {
+/* If the accumulation dtype is in the attributes make a copy and mutate 
the field. */
+Attrs new_attrs = Attrs(call->attrs);
+if (new_attrs.get() != nullptr) {
+  // TODO(AndrewZhaoLuo): Figure out a better way to do this
+  // modify output_dtype attributes (accumulation dtypes for ops)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = 
new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  }
+
+  // modify dtype attributes (creating new tensors of type dtype)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsDType(attrs, accumulation_dtype);
+  }
+}
+
+return new_attrs;
+  }
+
+  template 
+  void ModifyAttrsOutputDType(const T* attrs, const DataType& 
accumulation_dtype) const {
+/*
+ Helper template to modify relevant attributes with out_dtype type.
+ These represent accumulation dtypes for some operations e.g.
+ conv2d might take in fp16 and give a fp32 result.
+ Attrs is const because we get it as a const.
+ */
+T* mutable_attrs = const_cast(attrs);
+
+DataType cur_type = (mutable_attrs->out_dtype);
+if (cur_type.is_float() || cur_type.is_void()) mutable_attrs->out_dtype = 
accumulation_dtype;
+  }
+
+  template 
+  void ModifyAttrsDType(const T* attrs, const DataType& accumulation_dtype) 
const {
+/*
+ Helper template to modify relevant attributes with dtype type.
+ This determines the output dtype for some ops. For example
+ zeros creates a tensor of zeros of the specified dtype.
+ Attrs is const 

[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-09 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r648601951



##
File path: src/relay/transforms/fp32_to_fp16.h
##
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file fp32_to_fp16.h
+ * \brief Utilities and common types used for FP32->FP16 pass.
+ */
+#ifndef TVM_RELAY_TRANSFORMS_FP32_TO_FP16_H_
+#define TVM_RELAY_TRANSFORMS_FP32_TO_FP16_H_
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace tvm {
+namespace relay {
+
+struct FP16OpDType {
+  DataType accumulation_dtype;
+  DataType output_dtype;
+};
+
+// GREEN colored ops should always be done in FP16 due to the speed and memory 
savings
+// GRAY colored ops can be done in FP16 but don't have speedups to justify a 
dedicated cast.
+// RED colored ops should not be done in FP16 due to numerical reasons.
+enum FP16ConversionCategory { RED, GRAY, GREEN };
+
+using OpStringSet = std::unordered_set;
+
+// Default lists inspired from TF's classifications:

Review comment:
   Good advices. 
   
   I'll use better terms instead of RED/GRAY/GREEN.
   I'll also make the warning messages configurable to the user.
   For the registering attributes to each op, I think it's probably a good idea 
but do you have an example of this strategy I could look at?
   The user defined rules from python is a goal I will try for. It might take a 
little longer though.
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-09 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r648598356



##
File path: src/relay/transforms/fp32_to_fp16.cc
##
@@ -0,0 +1,337 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *
+ * \file fp32_to_fp16.cc

Review comment:
   I'll change it to something like `amp.cc` when we decide what we want to 
call the pass. 
   
   I would like the file names to match closely to the user interface name. 
E.g. `ToMixedPrecision` --> `to_mixed_precision.cc`




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-09 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r648596936



##
File path: python/tvm/relay/transform/transform.py
##
@@ -1199,3 +1198,20 @@ def FakeQuantizationToInteger():
 The registered SimplifyExpr pass.
 """
 return _ffi_api.FakeQuantizationToInteger()
+
+
+def AMPRewrite():

Review comment:
   I disagree. All the passes have names which are verbs which describe 
what they do while `AMP` is a noun. Maybe `AutoCast` would be better but it 
doesn't capture the mixed precision nature.
   
   Maybe `ToMixedPrecision` would be a better name?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-08 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r647779530



##
File path: src/relay/transforms/fp32_to_fp16.cc
##
@@ -0,0 +1,330 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *
+ * \file fp32_to_fp16.cc
+ * \brief Rewrite a graph into an fp16 form.
+ */
+#include "fp32_to_fp16.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+
+// A callable which hashes std::pair
+struct pair_hash {
+  template 
+  std::size_t operator()(const std::pair& pair) const {
+auto h1 = std::hash()(pair.first);
+auto h2 = std::hash()(pair.second);
+
+return h1 ^ (h2 << 1);

Review comment:
   Done.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-08 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r647776502



##
File path: tests/python/frontend/mxnet/test_forward.py
##
@@ -1223,6 +1221,8 @@ def verify(shape, axis=1, fix_gamma=False):
 
 @tvm.testing.uses_gpu
 def test_forward_instance_norm():
+np.random.seed(90)
+

Review comment:
   Oh ok that's an interesting idea. I had a failure where the passing rtol 
was 1.05e-5 so I'm just going to increase the tolerance.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-08 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r647776445



##
File path: src/relay/transforms/fp32_to_fp16.cc
##
@@ -0,0 +1,332 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *
+ * \file fp32_to_fp16.cc
+ * \brief Rewrite a graph into an fp16 form.
+ */
+#include "fp32_to_fp16.h"
+
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "pattern_utils.h"
+
+namespace tvm {
+namespace relay {
+
+// A callable which hashes std::pair
+struct pair_hash {
+  template 
+  std::size_t operator()(const std::pair& pair) const {
+auto h1 = std::hash()(pair.first);
+auto h2 = std::hash()(pair.second);
+
+return h1 ^ (h2 << 1);
+  }
+};
+
+// A map of a parent node and a wanted dtype to existing nodes casted to the 
wanted dtype
+using CachedCastNodes = std::unordered_map, Expr, pair_hash>;
+
+// A function which maps CallNodes to their initial conversion color
+using ColorFunc = std::function;
+
+// A function which maps green CallNodes to wanted accumulation and output 
dtypes
+using OutputDtypeFunc = std::function;
+
+class AmpGraphCreator : public ExprMutator {
+ private:
+  CachedCastNodes cast_nodes_cache;
+  const ColorFunc colorer;
+  const OutputDtypeFunc output_dtype_func;
+
+  Attrs GetNewAttrs(const CallNode* call, const DataType& accumulation_dtype) 
const {
+/* If the accumulation dtype is in the attributes make a copy and mutate 
the field. */
+Attrs new_attrs = Attrs(call->attrs);
+if (new_attrs.get() != nullptr) {
+  // TODO(AndrewZhaoLuo): Figure out a better way to do this
+  // modify output_dtype attributes (accumulation dtypes for ops)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = 
new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  } else if (auto attrs = new_attrs.as()) {
+ModifyAttrsOutputDType(attrs, accumulation_dtype);
+  }
+
+  // modify dtype attributes (creating new tensors of type dtype)
+  if (auto attrs = new_attrs.as()) {
+ModifyAttrsDType(attrs, accumulation_dtype);
+  }
+}
+
+return new_attrs;
+  }
+
+  template 
+  void ModifyAttrsOutputDType(const T* attrs, const DataType& 
accumulation_dtype) const {
+/*
+ Helper template to modify relevant attributes with out_dtype type.
+ These represent accumulation dtypes for some operations e.g.
+ conv2d might take in fp16 and give a fp32 result.
+ Attrs is const because we get it as a const.
+ */
+T* mutable_attrs = const_cast(attrs);
+if ((mutable_attrs->out_dtype).is_float()) mutable_attrs->out_dtype = 
accumulation_dtype;
+  }
+
+  template 
+  void ModifyAttrsDType(const T* attrs, const DataType& accumulation_dtype) 
const {
+/*
+ Helper template to modify relevant attributes with dtype type.
+ This determines the output dtype for some ops. For example
+ zeros creates a tensor of zeros of the specified dtype.
+ Attrs is const because we get it as a const.
+*/
+T* mutable_attrs = const_cast(attrs);
+if ((mutable_attrs->dtype).is_float()) 

[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-07 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r646890463



##
File path: src/relay/transforms/fp32_to_fp16.h
##
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file fp32_to_fp16.h
+ * \brief Utilities and common types used for FP32->FP16 pass.
+ */
+#ifndef TVM_RELAY_TRANSFORMS_FP32_TO_FP16_H_
+#define TVM_RELAY_TRANSFORMS_FP32_TO_FP16_H_
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace tvm {
+namespace relay {
+
+struct FP16OpDType {
+  DataType accumulation_dtype;
+  DataType output_dtype;
+};
+
+// GREEN colored ops should always be done in FP16 due to the speed and memory 
savings
+// GRAY colored ops can be done in FP16 but don't have speedups to justify a 
dedicated cast.
+// RED colored ops should not be done in FP16 due to numerical reasons.
+enum FP16ConversionCategory { RED, GRAY, GREEN };

Review comment:
   By default it would be RED and a warning would be emitted.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org




[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8069: [Relay] [Pass] Add FP16 model conversion pass

2021-06-07 Thread GitBox


AndrewZhaoLuo commented on a change in pull request #8069:
URL: https://github.com/apache/tvm/pull/8069#discussion_r646890115



##
File path: python/tvm/relay/transform/transform.py
##
@@ -1145,6 +1144,21 @@ def AnnotateSpans():
 Returns
 ---
 ret : tvm.transform.Pass
-The regsistered AnnotateSpans pass.
+The registered AnnotateSpans pass.
 """
 return _ffi_api.AnnotateSpans()
+
+
+def RewriteFP16():

Review comment:
   Good idea. Done.

##
File path: src/relay/transforms/fp32_to_fp16.h
##
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file fp32_to_fp16.h
+ * \brief Utilities and common types used for FP32->FP16 pass.
+ */
+#ifndef TVM_RELAY_TRANSFORMS_FP32_TO_FP16_H_
+#define TVM_RELAY_TRANSFORMS_FP32_TO_FP16_H_
+
+#include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+namespace tvm {
+namespace relay {
+
+struct FP16OpDType {
+  DataType accumulation_dtype;
+  DataType output_dtype;
+};
+
+// GREEN colored ops should always be done in FP16 due to the speed and memory 
savings
+// GRAY colored ops can be done in FP16 but don't have speedups to justify a 
dedicated cast.
+// RED colored ops should not be done in FP16 due to numerical reasons.
+enum FP16ConversionCategory { RED, GRAY, GREEN };
+
+using OpStringSet = std::unordered_set;
+
+// Default lists inspired from TF's classifications:
+// 
github.com/tensorflow/tensorflow/blob/v2.5.0/tensorflow/core/grappler/optimizers/auto_mixed_precision_lists.h
+// They have a bias toward Nvidia Tensor Cores so modify lists per your 
hardware choice.
+OpStringSet DEFAULT_GREEN_LIST({
+"nn.conv1d",
+"nn.conv2d",
+"nn.conv3d",
+"nn.conv1d_transpose",
+"nn.conv2d_transpose",
+"nn.conv3d_transpose",
+"nn.dense",
+"nn.batch_matmul",
+});
+OpStringSet DEFAULT_GRAY_LIST({
+// These ops add new data or change shape
+"nn.pad",
+"nn.batch_flatten",
+"concatenate",
+"zeros",
+"split",
+"squeeze",
+"transpose",
+"expand_dims",
+"reshape",
+"dyn.reshape",
+"broadcast_to_like",
+"dyn.broadcast_to",
+"strided_slice",
+"dyn.strided_slice",
+"take",
+"argwhere",
+"where",
+"tile",
+"dyn.tile",
+"scatter",
+"full",
+"dyn.full",
+// Comparison
+"less",
+"greater",
+"less_equal",
+"greater_equal",
+// By definition copy and cast will become green or red based on inputs
+"copy",
+"cast",
+"cast_like",
+// Simple arithmetic
+"add",
+"subtract",
+"multiply",
+"divide",
+"nn.bias_add",
+"nn.batch_norm",
+"sum",
+"mean",
+"sqrt",
+"shape_of",
+// Simple activations
+"max",
+"min",
+"maximum",
+"minimum",
+"nn.relu",
+"nn.leaky_relu",
+"nn.prelu",
+"nn.dropout",
+// Complicated activations which saturate in a narrow range
+"sigmoid",
+"tanh",
+// Pooling operations
+"nn.max_pool1d",
+"nn.max_pool2d",
+"nn.max_pool3d",
+"nn.avg_pool1d",
+"nn.avg_pool2d",
+"nn.avg_pool3d",
+// "nn.global_max_pool1d", // does not exist yet
+"nn.global_max_pool2d",
+// "nn.global_max_pool3d", // does not exist yet
+// "nn.global_avg_pool1d", // does not exist yet
+"nn.global_avg_pool2d",
+// "nn.global_avg_pool3d", // does not exist yet
+"nn.adaptive_max_pool1d",
+"nn.adaptive_max_pool2d",
+"nn.adaptive_max_pool3d",
+"nn.adaptive_avg_pool1d",
+"nn.adaptive_avg_pool2d",
+"nn.adaptive_avg_pool3d",
+});
+OpStringSet DEFAULT_RED_LIST({
+// In general if |f(x)| >> |x| for expected inputs then put the op here.
+"exp",
+"power",
+"nn.cross_entropy",
+"nn.cross_entropy_with_logits",
+"nn.softmax",
+"nn.l2_normalize",
+// Error function doesn't seem to be able to be lowered into fp16 version 
in llvm.
+// Move to gray list when it does.
+"erf",
+});
+
+class DefaultFP16Colorer {
+  /* The default class to initially color ops for conversion using lists.
+
+  Creates a callable which given a CallNode* returns the node's color.
+  */
+ private:
+