[GitHub] KellenSunderland commented on a change in pull request #12530: Implement mkldnn convolution fusion and quantization.

2018-10-05 Thread GitBox
KellenSunderland commented on a change in pull request #12530: Implement mkldnn 
convolution fusion and quantization.
URL: https://github.com/apache/incubator-mxnet/pull/12530#discussion_r222948018
 
 

 ##
 File path: tests/python/mkl/test_subgraph.py
 ##
 @@ -0,0 +1,472 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+import os
+import mxnet as mx
+import numpy as np
+import unittest
+import ctypes
+from mxnet.io import NDArrayIter
+from mxnet.module import Module
+from mxnet.symbol import Symbol
+from importlib import import_module
+from numpy.testing import assert_allclose
+from mxnet.base import SymbolHandle, check_call, _LIB, mx_uint, c_str
+from mxnet.test_utils import DummyIter
+curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
+sys.path.append(os.path.join(curr_path, '../unittest/'))
+from common import with_seed
+
+DATA_SHAPE=[(4, 4, 10, 10), (32, 3, 24, 24), (64, 8, 64, 64)]
+DATA_LABEL=[(4, 10), (32, 10), (64, 10)]
+MIN_VALUE=-1.0
+MAX_VALUE=1.0
+
+def check_qsym_calibrated(qsym):
+  assert ''.join(qsym.attr_dict().keys()).find('quantized_sg_mkldnn_conv') != 
-1
+  for k, v in qsym.attr_dict().items():
+if k.find('quantized_sg_mkldnn_conv') != -1:
+  assert 'min_calib_range' in v
+  assert 'max_calib_range' in v
+if k.find('_quantize') != -1:
+  assert v['out_type'] == 'uint8'
+
+def check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape, 
label_shape):
+  mod = mx.mod.Module(symbol=qsym, context=mx.current_context())
+  mod.bind(for_training=False,
+   data_shapes=[('data', data_shape)],
+   label_shapes=[('softmax_label', label_shape)])
+  mod.set_params(qarg_params, qaux_params)
+  mod.forward(batch, is_train=False)
+  for output in mod.get_outputs():
+output.wait_to_read()
+  return output
+
+def check_quantize(sym, arg_params, aux_params, data_shape, label_shape, 
batch, sym_output):
+  excluded_sym_names = []
+  if mx.current_context() == mx.cpu():
+excluded_sym_names += ['fc']
+  calib_data = mx.nd.random.uniform(shape=data_shape)
+  calib_data = NDArrayIter(data=calib_data)
+  calib_data = DummyIter(calib_data)
+  calib_layer = lambda name: name.endswith('_output')
+  qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym,
+   
arg_params=arg_params,
+   
aux_params=aux_params,
+   
ctx=mx.current_context(),
+   
excluded_sym_names=excluded_sym_names,
+   
quantized_dtype='uint8',
+   
calib_mode='naive',
+   
calib_data=calib_data,
+   
calib_layer=calib_layer,
+   
calib_quantize_op=True,
+   
num_calib_examples=20)
+  qsym = qsym.get_backend_symbol("MKLDNN_POST_QUANTIZE")
+  check_qsym_calibrated(qsym)
+  qsym_output = check_qsym_forward(qsym, qarg_params, qaux_params, batch, 
data_shape, label_shape)
+
+  diff = mx.nd.abs(sym_output - qsym_output.astype(sym_output.dtype))
+  cond = mx.nd.lesser(2, diff).sum().asscalar()
+  assert cond == 0
+
+@with_seed()
+def check_fusion(sym, data_shape, label_shape, attrs_op):
+  dev = mx.cpu()
+  mod = Module(symbol=sym)
+  mod.bind(data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', 
label_shape)])
+  mod.init_params(mx.init.Normal(0.5))
+  arg_params, aux_params = mod.get_params()
+
+  data = [mx.random.uniform(MIN_VALUE, MAX_VALUE, shape=shape, ctx=dev) for _, 
shape in mod.data_shapes]
+  batch = mx.io.DataBatch(data, [])
+
+  mod.forward(batch, is_train=False)
+  for output in mod.get_outputs():
+  output.wait_to_read()
+
+  sym_sg = sym.get_backend_symbol("MKLDNN")
+  mod_sg = Module(symbol=sym)
+  

[GitHub] KellenSunderland commented on a change in pull request #12530: Implement mkldnn convolution fusion and quantization.

2018-10-05 Thread GitBox
KellenSunderland commented on a change in pull request #12530: Implement mkldnn 
convolution fusion and quantization.
URL: https://github.com/apache/incubator-mxnet/pull/12530#discussion_r222947557
 
 

 ##
 File path: src/c_api/c_api_symbolic.cc
 ##
 @@ -696,3 +696,21 @@ int MXSetCalibTableToQuantizedSymbol(SymbolHandle 
qsym_handle,
   *ret_qsym_handle = s;
   API_END_HANDLE_ERROR(delete s);
 }
+
+int MXGenBackendSubgraph(SymbolHandle sym_handle, const char *backend,
 
 Review comment:
   I guess my point is that 'backend' is an overloaded term here, so to me it's 
confusing when you say you're converting a symbol to a backend specific symbol.
   
   Am I understanding correctly that when you're fusing ops and calling this 
function the symbol that you begin with (sym_handle) is a symbol representing a 
graph of NNVM ops which use our default MXNet backend, and the symbol you're 
converting to represents a fused operator targeting an MKLDNN backend?


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] KellenSunderland commented on a change in pull request #12530: Implement mkldnn convolution fusion and quantization.

2018-10-05 Thread GitBox
KellenSunderland commented on a change in pull request #12530: Implement mkldnn 
convolution fusion and quantization.
URL: https://github.com/apache/incubator-mxnet/pull/12530#discussion_r222943278
 
 

 ##
 File path: src/ndarray/ndarray.cc
 ##
 @@ -1592,8 +1601,12 @@ void NDArray::Save(dmlc::Stream *strm) const {
 save_data = nd_cpu.data();
   } else {
 this->WaitToRead();
-save_data = this->data();
 nd_cpu = *this;
+#if MXNET_USE_MKLDNN == 1
+if (nd_cpu.IsMKLDNNData())
 
 Review comment:
   I just bring it up because I was corrected in another PR.  Not sure if it's 
formalized anywhere.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] KellenSunderland commented on a change in pull request #12530: Implement mkldnn convolution fusion and quantization.

2018-10-04 Thread GitBox
KellenSunderland commented on a change in pull request #12530: Implement mkldnn 
convolution fusion and quantization.
URL: https://github.com/apache/incubator-mxnet/pull/12530#discussion_r222583387
 
 

 ##
 File path: tests/python/mkl/test_subgraph.py
 ##
 @@ -0,0 +1,472 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+import os
+import mxnet as mx
+import numpy as np
+import unittest
+import ctypes
+from mxnet.io import NDArrayIter
+from mxnet.module import Module
+from mxnet.symbol import Symbol
+from importlib import import_module
+from numpy.testing import assert_allclose
+from mxnet.base import SymbolHandle, check_call, _LIB, mx_uint, c_str
+from mxnet.test_utils import DummyIter
+curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
+sys.path.append(os.path.join(curr_path, '../unittest/'))
+from common import with_seed
+
+DATA_SHAPE=[(4, 4, 10, 10), (32, 3, 24, 24), (64, 8, 64, 64)]
+DATA_LABEL=[(4, 10), (32, 10), (64, 10)]
+MIN_VALUE=-1.0
+MAX_VALUE=1.0
+
+def check_qsym_calibrated(qsym):
+  assert ''.join(qsym.attr_dict().keys()).find('quantized_sg_mkldnn_conv') != 
-1
+  for k, v in qsym.attr_dict().items():
+if k.find('quantized_sg_mkldnn_conv') != -1:
+  assert 'min_calib_range' in v
+  assert 'max_calib_range' in v
+if k.find('_quantize') != -1:
+  assert v['out_type'] == 'uint8'
+
+def check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape, 
label_shape):
+  mod = mx.mod.Module(symbol=qsym, context=mx.current_context())
+  mod.bind(for_training=False,
+   data_shapes=[('data', data_shape)],
+   label_shapes=[('softmax_label', label_shape)])
+  mod.set_params(qarg_params, qaux_params)
+  mod.forward(batch, is_train=False)
+  for output in mod.get_outputs():
+output.wait_to_read()
+  return output
+
+def check_quantize(sym, arg_params, aux_params, data_shape, label_shape, 
batch, sym_output):
+  excluded_sym_names = []
+  if mx.current_context() == mx.cpu():
+excluded_sym_names += ['fc']
+  calib_data = mx.nd.random.uniform(shape=data_shape)
+  calib_data = NDArrayIter(data=calib_data)
+  calib_data = DummyIter(calib_data)
+  calib_layer = lambda name: name.endswith('_output')
+  qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym,
+   
arg_params=arg_params,
+   
aux_params=aux_params,
+   
ctx=mx.current_context(),
+   
excluded_sym_names=excluded_sym_names,
+   
quantized_dtype='uint8',
+   
calib_mode='naive',
+   
calib_data=calib_data,
+   
calib_layer=calib_layer,
+   
calib_quantize_op=True,
+   
num_calib_examples=20)
+  qsym = qsym.get_backend_symbol("MKLDNN_POST_QUANTIZE")
+  check_qsym_calibrated(qsym)
+  qsym_output = check_qsym_forward(qsym, qarg_params, qaux_params, batch, 
data_shape, label_shape)
+
+  diff = mx.nd.abs(sym_output - qsym_output.astype(sym_output.dtype))
+  cond = mx.nd.lesser(2, diff).sum().asscalar()
+  assert cond == 0
+
+@with_seed()
+def check_fusion(sym, data_shape, label_shape, attrs_op):
+  dev = mx.cpu()
+  mod = Module(symbol=sym)
+  mod.bind(data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', 
label_shape)])
+  mod.init_params(mx.init.Normal(0.5))
+  arg_params, aux_params = mod.get_params()
+
+  data = [mx.random.uniform(MIN_VALUE, MAX_VALUE, shape=shape, ctx=dev) for _, 
shape in mod.data_shapes]
+  batch = mx.io.DataBatch(data, [])
+
+  mod.forward(batch, is_train=False)
+  for output in mod.get_outputs():
+  output.wait_to_read()
+
+  sym_sg = sym.get_backend_symbol("MKLDNN")
+  mod_sg = Module(symbol=sym)
+  

[GitHub] KellenSunderland commented on a change in pull request #12530: Implement mkldnn convolution fusion and quantization.

2018-10-04 Thread GitBox
KellenSunderland commented on a change in pull request #12530: Implement mkldnn 
convolution fusion and quantization.
URL: https://github.com/apache/incubator-mxnet/pull/12530#discussion_r222582000
 
 

 ##
 File path: src/operator/quantization/mkldnn/mkldnn_quantize-inl.h
 ##
 @@ -75,6 +75,11 @@ static void MKLDNNQuantizeComputeKer(const 
std::vector& inputs,
   auto i_mpd = i_mem->get_primitive_desc();
   auto i_desc = i_mpd.desc();
   mkldnn::memory::format i_fmt = 
static_cast(i_desc.data.format);
+  if (i_fmt == mkldnn::memory::format::nchw ||
+  i_fmt == mkldnn::memory::format::nChw8c ||
+  i_fmt == mkldnn_nChw16c) {
+i_fmt = mkldnn::memory::format::nhwc;
+  }
 
 Review comment:
   Question for both of you: If we quantize to int8 and store the params, will 
they be stored in nhwc by default (and thus not need to be converted each time 
the model is loaded)?


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] KellenSunderland commented on a change in pull request #12530: Implement mkldnn convolution fusion and quantization.

2018-10-04 Thread GitBox
KellenSunderland commented on a change in pull request #12530: Implement mkldnn 
convolution fusion and quantization.
URL: https://github.com/apache/incubator-mxnet/pull/12530#discussion_r222580648
 
 

 ##
 File path: src/ndarray/ndarray.cc
 ##
 @@ -1592,8 +1601,12 @@ void NDArray::Save(dmlc::Stream *strm) const {
 save_data = nd_cpu.data();
   } else {
 this->WaitToRead();
-save_data = this->data();
 nd_cpu = *this;
+#if MXNET_USE_MKLDNN == 1
+if (nd_cpu.IsMKLDNNData())
 
 Review comment:
   NIt: I believe the project encourages braces even on single line 
conditionals.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] KellenSunderland commented on a change in pull request #12530: Implement mkldnn convolution fusion and quantization.

2018-10-04 Thread GitBox
KellenSunderland commented on a change in pull request #12530: Implement mkldnn 
convolution fusion and quantization.
URL: https://github.com/apache/incubator-mxnet/pull/12530#discussion_r222580110
 
 

 ##
 File path: src/c_api/c_api_symbolic.cc
 ##
 @@ -696,3 +696,21 @@ int MXSetCalibTableToQuantizedSymbol(SymbolHandle 
qsym_handle,
   *ret_qsym_handle = s;
   API_END_HANDLE_ERROR(delete s);
 }
+
+int MXGenBackendSubgraph(SymbolHandle sym_handle, const char *backend,
 
 Review comment:
   By backend in this context do you mean the symbol that's a placeholder for 
the fused mkldnn call?


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] KellenSunderland commented on a change in pull request #12530: Implement mkldnn convolution fusion and quantization.

2018-10-04 Thread GitBox
KellenSunderland commented on a change in pull request #12530: Implement mkldnn 
convolution fusion and quantization.
URL: https://github.com/apache/incubator-mxnet/pull/12530#discussion_r222577892
 
 

 ##
 File path: python/mxnet/contrib/quantization.py
 ##
 @@ -53,6 +53,7 @@ def _quantize_params(qsym, params):
 qsym : Symbol
 Quantized symbol from FP32 symbol.
 params : dict of str->NDArray
+th_dict: dict of min/max pairs of layers' output
 
 Review comment:
   I may be misunderstanding something here, but Is the thresholding applied to 
the output?  My understanding was it's usually applied to the weights during 
quantization.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] KellenSunderland commented on a change in pull request #12530: Implement mkldnn convolution fusion and quantization.

2018-10-04 Thread GitBox
KellenSunderland commented on a change in pull request #12530: Implement mkldnn 
convolution fusion and quantization.
URL: https://github.com/apache/incubator-mxnet/pull/12530#discussion_r222576926
 
 

 ##
 File path: python/mxnet/contrib/quantization.py
 ##
 @@ -40,7 +40,7 @@
 from ..module import Module
 
 
-def _quantize_params(qsym, params):
+def _quantize_params(qsym, params, th_dict):
 
 Review comment:
   Probably worthwhile calling it thresh_dict or thresholds_dict.   Still a 
pretty concise name, and it could avoid confusion.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] KellenSunderland commented on a change in pull request #12530: Implement mkldnn convolution fusion and quantization.

2018-10-02 Thread GitBox
KellenSunderland commented on a change in pull request #12530: Implement mkldnn 
convolution fusion and quantization.
URL: https://github.com/apache/incubator-mxnet/pull/12530#discussion_r221867338
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -199,33 +248,45 @@ Graph QuantizeGraph(Graph &) {
   // the new_node.
   *new_node = *node;
   new_node->inputs.clear();
-  for (const auto& e : node->inputs) {
-NodePtr mirror_node = mirror_map.at(e.node.get());
-NodeEntry mirror_entry = NodeEntry{
-  mirror_node, e.index, e.version};
-// if input node is quantized operator, add dequantize node
-if (NeedQuantize(e.node, excluded_nodes)) {
-  // here we calculate the output number (exclude min/max, in order to
-  // calculate min/max index from mirror node) based on assumption that
-  // there is only 1min and 1max output from mirror node (which is
-  // currently true)
-  size_t num_outputs = mirror_node->num_outputs() - 2;
-  uint32_t min_index = num_outputs + 2 * e.index;
-  uint32_t max_index = num_outputs + 2 * e.index + 1;
-  NodePtr dequantize_node = CreateNode("_contrib_dequantize",
-e.node->attrs.name + "_dequantize");
-  dequantize_node->inputs.emplace_back(mirror_entry);
-  dequantize_node->inputs.emplace_back(NodeEntry{mirror_node, 
min_index, 0});
-  dequantize_node->inputs.emplace_back(NodeEntry{mirror_node, 
max_index, 0});
-  dequantize_node->op()->attr_parser(&(dequantize_node->attrs));
+  if (node->is_variable() && node->attrs.name == "data") {
+// Instert identity for data to collect calib for it.
 
 Review comment:
   Instert -> Insert


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] KellenSunderland commented on a change in pull request #12530: Implement mkldnn convolution fusion and quantization.

2018-10-02 Thread GitBox
KellenSunderland commented on a change in pull request #12530: Implement mkldnn 
convolution fusion and quantization.
URL: https://github.com/apache/incubator-mxnet/pull/12530#discussion_r221866332
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -89,17 +89,42 @@ std::vector 
OfflineParams(std::vector&& outputs,
   return outputs;
 }
 
-inline bool NeedQuantize(NodePtr node, const std::unordered_set 
excluded_nodes) {
+inline bool NeedQuantize(NodePtr node, const std::unordered_set& 
excluded_nodes) {
   static auto& quantized_op_map = 
Op::GetAttr("FQuantizedOp");
-  return quantized_op_map.count(node->op()) && !excluded_nodes.count(node);
+  static auto& fexec_type = nnvm::Op::GetAttr("FExecType");
+  const auto& op = node->op();
+  if (op && quantized_op_map.count(op)) {
+bool need = true;
+if (excluded_nodes.count(node->attrs.name)) {
+  need = false;
+} else if (node->attrs.subgraphs.size()) {
 
 Review comment:
   Nit: consider
   ```
   else if (!node->attrs.subgraphs.empty())
   ```
   A little more explicit / readable.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] KellenSunderland commented on a change in pull request #12530: Implement mkldnn convolution fusion and quantization.

2018-10-02 Thread GitBox
KellenSunderland commented on a change in pull request #12530: Implement mkldnn 
convolution fusion and quantization.
URL: https://github.com/apache/incubator-mxnet/pull/12530#discussion_r221862468
 
 

 ##
 File path: src/operator/quantization/quantize_graph_pass.cc
 ##
 @@ -178,19 +226,20 @@ Graph QuantizeGraph(Graph &) {
   // If the new_node op registered attr FNeedRequantize, insert requantize 
node after it.
   // Here it's assumed that the quantized_op node only produces three 
outputs:
   // out_data, min_range, and max_range.
-  if (need_requantize_map.count(new_node->op()) > 0
-  && need_requantize_map[new_node->op()](new_node->attrs)) {
+  if (need_requantize_map.count(new_node->op()) > 0 &&
+  need_requantize_map[new_node->op()](new_node->attrs)) {
 NodePtr requantize_node = Node::Create();
 requantize_node->attrs.op = Op::Get("_contrib_requantize");
 requantize_node->attrs.name = "requantize_" + node->attrs.name;
 if (requantize_node->op()->attr_parser != nullptr) {
   requantize_node->op()->attr_parser(&(requantize_node->attrs));
 }
 for (size_t i = 0; i < 3; ++i) {
-  requantize_node->inputs.emplace_back(NodeEntry{new_node, 
static_cast(i), 0});
+  requantize_node->inputs.emplace_back(
+  NodeEntry{new_node, static_cast(i), 0});
 }
 new_node = requantize_node;
-  }
+}
 
 Review comment:
   Nit: whitespace seems off here.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services