Mohamad11Dab opened a new issue, #18000:
URL: https://github.com/apache/tvm/issues/18000
A simple PyTorch model exported to ONNX shows small numeric mismatches
(~0.75%) between ONNXRuntime and TVM (0.17.0).
This is likely due to operator fusion or precision loss in BatchNorm
lowering ?
### Expected behavior
outputs should remain within rtol=1e-2, atol=1e-3 bounds across all elements.
### Actual behavior
`––––– MISMATCH DETECTED –––––
Not equal to tolerance rtol=0.01, atol=0.001
Mismatched elements: 124 / 16384 (0.757%)
Max absolute difference: 1.2893372
Max relative difference: 1.0156949
x: array([[[[ 1.930062e+00, 1.751171e+00, 3.203031e-01, ...,
3.307423e+01, -1.868098e+01, -5.810167e+01],
[ 9.250920e-01, 3.591827e+00, 1.927757e+00, ...,...
y: array([[[[ 1.930138e+00, 1.751273e+00, 3.202525e-01, ...,
3.307463e+01, -1.868023e+01, -5.810147e+01],
[ 9.251037e-01, 3.591987e+00, 1.927869e+00, ...,...`
### Environment
TVM:0.17.0
ONNXRuntime: 1.16.3
### Steps to reproduce
```python
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
'..')))
import torch
import torch.nn as nn
import torch.nn.functional as F
import tempfile
import onnx
import onnxruntime as ort
from numpy.testing import assert_allclose
import tvm
from tvm import relay
from tvm.contrib import graph_executor
import numpy as np
import nas_model_2
class SimpleBugModel(nn.Module):
def __init__(self):
super().__init__()
self.input_conv = torch.nn.modules.conv.Conv2d(in_channels=3,
out_channels=16, kernel_size=1)
self.block0 = torch.nn.modules.normalization.GroupNorm(num_groups=4,
num_channels=16)
self.block2 = nas_model_2.TanWrapper()
self.block3 = torch.nn.modules.batchnorm.BatchNorm2d(num_features=16)
self.block4 = torch.nn.modules.conv.Conv2d(kernel_size=5, padding=2,
in_channels=16,
out_channels=16)
def forward(self, x):
__input_conv = self.input_conv(x)
__blocks__0 = self.block0(__input_conv)
__blocks__2 = self.block2(__blocks__0)
__blocks__3 = self.block3(__blocks__2)
__blocks__4 = self.block4(__blocks__3)
return __blocks__4
def main():
model = SimpleBugModel()
model.eval()
dummy = torch.randn(1, 3, 32, 32, dtype=torch.float32)
with tempfile.NamedTemporaryFile(suffix='.onnx', delete=False) as tmp:
onnx_path = tmp.name
torch.onnx.export(model, dummy, onnx_path, opset_version=19,
input_names=['input'], output_names=['output'])
ort_sess = ort.InferenceSession(onnx_path,
providers=['CPUExecutionProvider'])
ort_out = ort_sess.run(None, {'input': dummy.numpy()})[0]
onnx_model = onnx.load(onnx_path)
shape_dict = {'input': dummy.numpy().shape}
mod, params = relay.frontend.from_onnx(onnx_model, shape_dict,
freeze_params=True)
with tvm.transform.PassContext(opt_level=4):
lib = relay.build(mod, target='llvm', params=params)
m = graph_executor.GraphModule(lib['default'](tvm.cpu()))
m.set_input('input', tvm.nd.array(dummy.numpy()))
m.run()
tvm_out = m.get_output(0)
tvm_out = tvm_out.numpy()
try:
assert_allclose(ort_out, tvm_out, rtol=1e-2, atol=1e-3,
equal_nan=True)
except AssertionError as e:
print('––––– MISMATCH DETECTED –––––')
print(e)
except Exception as e:
print('––––– UNEXPECTED ERROR DURING COMPARISON –––––')
print(f'{type(e).__name__}: {e}')
if __name__ == '__main__':
main()
```
```python
@basic_unit
class TanWrapper(nni_nn.Module):
def forward(self, x):
return torch.tan(x)
```
### Triage
* needs-triage
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]