sxjscience commented on pull request #6699:
URL: https://github.com/apache/incubator-tvm/pull/6699#issuecomment-71989
I've verified the TVM integration with 5 NLP backbones in GluonNLP: BERT,
ALBERT, ELECTRA, RoBERTA, and BART
```python
import mxnet as mx
import numpy as np
import gluonnlp
from gluonnlp.models import get_backbone
import numpy.testing as npt
import tvm
from tvm import relay
import tvm.contrib.graph_runtime as runtime
mx.npx.set_np()
instance_info = {
'g4': {'target': "cuda -model=t4", 'use_gpu': True},
'c4': {'target': 'llvm -mcpu=core-avx2 -libs=cblas', 'use_gpu': False},
'c5': {'target': 'llvm -mcpu=skylake-avx512 -libs=cblas', 'use_gpu':
False},
'p3': {'target': 'cuda -model=v100', 'use_gpu': True}
}
def test_backbone(model_name, batch_size=2, seq_length=128, instance='g4',
required_pass=None, opt_level=3):
if required_pass is None:
required_pass = ["FastMath"]
model_cls, cfg, tokenizer, backbone_param_path, _ =
get_backbone(model_name)
model = model_cls.from_cfg(cfg)
model.load_parameters(backbone_param_path)
model.hybridize()
token_ids = mx.np.random.randint(0, cfg.MODEL.vocab_size, (batch_size,
seq_length), dtype=np.int32)
token_types = mx.np.random.randint(0, 2, (batch_size, seq_length),
dtype=np.int32)
valid_length = mx.np.random.randint(seq_length // 2, seq_length,
(batch_size,), dtype=np.int32)
if 'bart' in model_name:
mx_out = model(token_ids, valid_length, token_ids, valid_length)
shape_dict = {
'data0': token_ids.shape,
'data1': valid_length.shape,
'data2': token_ids.shape,
'data3': valid_length.shape,
}
dtype_dict = {
'data0': token_ids.dtype.name,
'data1': valid_length.dtype.name,
'data2': token_ids.dtype.name,
'data3': valid_length.dtype.name,
}
elif 'roberta' in model_name or 'xlmr' in model_name:
mx_out = model(token_ids, valid_length)
shape_dict = {
'data0': token_ids.shape,
'data1': valid_length.shape,
}
dtype_dict = {
'data0': token_ids.dtype.name,
'data1': valid_length.dtype.name,
}
else:
mx_out = model(token_ids, token_types, valid_length)
shape_dict = {
'data0': token_ids.shape,
'data1': token_types.shape,
'data2': valid_length.shape
}
dtype_dict = {
'data0': token_ids.dtype.name,
'data1': token_types.dtype.name,
'data2': valid_length.dtype.name
}
sym = model._cached_graph[1]
params = {}
for k, v in model.collect_params().items():
params[v._var_name] = tvm.nd.array(v.data().asnumpy())
mod, params = relay.frontend.from_mxnet(sym, shape=shape_dict,
dtype=dtype_dict, arg_params=params)
target = instance_info[instance]['target']
use_gpu = instance_info[instance]['use_gpu']
with relay.build_config(opt_level=opt_level,
required_pass=required_pass):
graph, lib, cparams = relay.build(mod, target, params=params)
if use_gpu:
ctx = tvm.gpu()
else:
ctx = tvm.cpu()
rt = runtime.create(graph, lib, ctx)
rt.set_input(**cparams)
if 'bart' in model_name:
rt.set_input(data0=token_ids, data1=valid_length, data2=token_ids,
data3=valid_length)
elif 'roberta' in model_name:
rt.set_input(data0=token_ids, data1=valid_length)
else:
rt.set_input(data0=token_ids, data1=token_types, data2=valid_length)
rt.run()
for i in range(rt.get_num_outputs()):
out = rt.get_output(i)
if rt.get_num_outputs() == 1:
mx_out_gt = mx_out.asnumpy()
else:
mx_out_gt = mx_out[i].asnumpy()
if 'mobilebert' in model_name and len(out.shape) == 3:
npt.assert_allclose(out.asnumpy()[:, 1:, :],
mx_out[i].asnumpy()[:, 1:, :],
rtol=6e-2, atol=6e-2)
else:
npt.assert_allclose(out.asnumpy(), mx_out_gt, rtol=6e-2,
atol=6e-2)
# test_backbone('google_en_cased_bert_base', instance='g4')
test_model_names = ['google_albert_base_v2',
'google_en_cased_bert_base',
'google_electra_small',
'google_uncased_mobilebert',
'fairseq_roberta_base',
'fairseq_bart_base']
for model_name in test_model_names:
test_backbone(model_name, instance='g4')
```
This is an automated m