[I] [Bug] give inconsistent inference results after use ReorderTakeAfterMatmul [tvm]

via GitHub Mon, 12 Aug 2024 08:37:55 -0700


Cookiee235 opened a new issue, #17270:
URL: https://github.com/apache/tvm/issues/17270


   ### Actual behavior
   ```
   Traceback (most recent call last):
     File "/share_container/optfuzz/res/bugs/inconsis222.py", line 258, in 
<module>
       np.testing.assert_allclose(before_outputs, after_outputs, 1e-3, 1e-3)
     File 
"/root/miniconda3/lib/python3.12/site-packages/numpy/testing/_private/utils.py",
 line 1504, in assert_allclose
       assert_array_compare(compare, actual, desired, err_msg=str(err_msg),
     File "/root/miniconda3/lib/python3.12/contextlib.py", line 81, in inner
       return func(*args, **kwds)
              ^^^^^^^^^^^^^^^^^^^
     File 
"/root/miniconda3/lib/python3.12/site-packages/numpy/testing/_private/utils.py",
 line 718, in assert_array_compare
       flagged = func_assert_same_pos(x, y, func=isnan, hasval='nan')
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
     File 
"/root/miniconda3/lib/python3.12/site-packages/numpy/testing/_private/utils.py",
 line 688, in func_assert_same_pos
       raise AssertionError(msg)
   AssertionError: 
   Not equal to tolerance rtol=0.001, atol=0.001
   
   x and y nan location mismatch:
    x: array([[ 7.936000e+04,  8.032000e+04,  8.128000e+04,  8.224000e+04,
            8.320000e+04,  8.416000e+04,  8.512000e+04,  8.608000e+04,
            7.168000e+04,  7.252000e+04,  1.898367e+16,  7.420000e+04,...
    y: array([[ 7.936000e+04,  8.032000e+04,  8.128000e+04,  8.224000e+04,
            8.320000e+04,  8.416000e+04,  8.512000e+04,  8.608000e+04,
                     nan,  7.252000e+04,  7.336000e+04,  7.420000e+04,...
   
   ```
   
   
   ### Steps to reproduce
   <details>
   <summary>This is a complex test case, I cannot further reduce this case due 
to unknown root case</summary>
   
   
   ```python
   import tvm
   from tvm import relax
   import numpy as np
   import tvm
   metadata = tvm.ir.load_json("""{
     \"root\": 1, 
     \"nodes\": [
       {
         \"type_key\": \"\"
       }, 
       {
         \"type_key\": \"Map\", 
         \"keys\": [
           \"relax.expr.Constant\"
         ], 
         \"data\": [2]
       }, 
       {
         \"type_key\": \"Array\", 
         \"data\": [3]
       }, 
       {
         \"type_key\": \"relax.expr.Constant\", 
         \"attrs\": {
           \"_checked_type_\": \"11\", 
           \"data\": \"0\", 
           \"span\": \"0\", 
           \"struct_info_\": \"4\"
         }
       }, 
       {
         \"type_key\": \"relax.TensorStructInfo\", 
         \"attrs\": {
           \"dtype\": \"float32\", 
           \"ndim\": \"2\", 
           \"shape\": \"5\", 
           \"span\": \"0\", 
           \"vdevice\": \"0\"
         }
       }, 
       {
         \"type_key\": \"relax.expr.ShapeExpr\", 
         \"attrs\": {
           \"_checked_type_\": \"10\", 
           \"span\": \"0\", 
           \"struct_info_\": \"9\", 
           \"values\": \"6\"
         }
       }, 
       {
         \"type_key\": \"Array\", 
         \"data\": [7, 8]
       }, 
       {
         \"type_key\": \"IntImm\", 
         \"attrs\": {
           \"dtype\": \"int64\", 
           \"span\": \"0\", 
           \"value\": \"16\"
         }
       }, 
       {
         \"type_key\": \"IntImm\", 
         \"attrs\": {
           \"dtype\": \"int64\", 
           \"span\": \"0\", 
           \"value\": \"16\"
         }
       }, 
       {
         \"type_key\": \"relax.ShapeStructInfo\", 
         \"attrs\": {
           \"ndim\": \"2\", 
           \"span\": \"0\", 
           \"values\": \"6\"
         }
       }, 
       {
         \"type_key\": \"relax.ShapeType\", 
         \"attrs\": {
           \"ndim\": \"2\", 
           \"span\": \"0\"
         }
       }, 
       {
         \"type_key\": \"relax.DynTensorType\", 
         \"attrs\": {
           \"dtype\": \"float32\", 
           \"ndim\": \"2\", 
           \"span\": \"0\"
         }
       }
     ], 
     \"b64ndarrays\": [
       
\"P6G0lvBAXt0AAAAAAAAAAAEAAAAAAAAAAgAAAAIgAQAQAAAAAAAAABAAAAAAAAAAAAQAAAAAAAAAAAAAAACAPwAAAEAAAEBAAACAQAAAoEAAAMBAAADgQAAAAEEAABBBAAAgQQAAMEEAAEBBAABQQQAAYEEAAHBBAACAQQAAiEEAAJBBAACYQQAAoEEAAKhBAACwQQAAuEEAAMBBAADIQQAA0EEAANhBAADgQQAA6EEAAPBBAAD4QQAAAEIAAARCAAAIQgAADEIAABBCAAAUQgAAGEIAABxCAAAgQgAAJEIAAChCAAAsQgAAMEIAADRCAAA4QgAAPEIAAEBCAABEQgAASEIAAExCAABQQgAAVEIAAFhCAABcQgAAYEIAAGRCAABoQgAAbEIAAHBCAAB0QgAAeEIAAHxCAACAQgAAgkIAAIRCAACGQgAAiEIAAIpCAACMQgAAjkIAAJBCAACSQgAAlEIAAJZCAACYQgAAmkIAAJxCAACeQgAAoEIAAKJCAACkQgAApkIAAKhCAACqQgAArEIAAK5CAACwQgAAskIAALRCAAC2QgAAuEIAALpCAAC8QgAAvkIAAMBCAADCQgAAxEIAAMZCAADIQgAAykIAAMxCAADOQgAA0EIAANJCAADUQgAA1kIAANhCAADaQgAA3EIAAN5CAADgQgAA4kIAAORCAADmQgAA6EIAAOpCAADsQgAA7kIAAPBCAADyQgAA9EIAAPZCAAD4QgAA+kIAAPxCAAD+QgAAAEMAAAFDAAACQwAAA0MAAARDAAAFQwAABkMAAAdDAAAIQwAACUMAAApDAAALQwAADEMAAA1DAAAOQwAAD0MAABBDAAARQwAAEkMAABNDAAAUQwAAFUMAABZDAAAXQwAAGEMAABlDAAAaQwAAG0MAABxDAAAdQwAAHkMAAB9DAAAgQwAAIUMAACJDAAAjQwAAJEMAACVDAAAmQwAAJ0MAAChDAAApQwAAKkMAA
 
CtDAAAsQwAALUMAAC5DAAAvQwAAMEMAADFDAAAyQwAAM0MAADRDAAA1QwAANkMAADdDAAA4QwAAOUMAADpDAAA7QwAAPEMAAD1DAAA+QwAAP0MAAEBDAABBQwAAQkMAAENDAABEQwAARUMAAEZDAABHQwAASEMAAElDAABKQwAAS0MAAExDAABNQwAATkMAAE9DAABQQwAAUUMAAFJDAABTQwAAVEMAAFVDAABWQwAAV0MAAFhDAABZQwAAWkMAAFtDAABcQwAAXUMAAF5DAABfQwAAYEMAAGFDAABiQwAAY0MAAGRDAABlQwAAZkMAAGdDAABoQwAAaUMAAGpDAABrQwAAbEMAAG1DAABuQwAAb0MAAHBDAABxQwAAckMAAHNDAAB0QwAAdUMAAHZDAAB3QwAAeEMAAHlDAAB6QwAAe0MAAHxDAAB9QwAAfkMAAH9D\"
     ], 
     \"attrs\": {\"tvm_version\": \"0.17.dev0\"}
   }""")
   from tvm.script import ir as I
   from tvm.script import tir as T
   from tvm.script import relax as R
   
   @I.ir_module
   class Module:
       @T.prim_func(private=True)
       def add(A: T.Buffer((T.int64(16), T.int64(16)), "float32"), B: 
T.Buffer((T.int64(16), T.int64(16)), "float32"), T_add: T.Buffer((T.int64(16), 
T.int64(16)), "float32")):
           T.func_attr({"tir.noalias": T.bool(True)})
           # with T.block("root"):
           for ax0, ax1 in T.grid(T.int64(16), T.int64(16)):
               with T.block("T_add"):
                   v_ax0, v_ax1 = T.axis.remap("SS", [ax0, ax1])
                   T.reads(A[v_ax0, v_ax1], B[v_ax0, v_ax1])
                   T.writes(T_add[v_ax0, v_ax1])
                   T_add[v_ax0, v_ax1] = A[v_ax0, v_ax1] + B[v_ax0, v_ax1]
   
       @T.prim_func(private=True)
       def cast(gv: T.Buffer((T.int64(16), T.int64(16)), "float32"), compute: 
T.Buffer((T.int64(16), T.int64(16)), "int64")):
           T.func_attr({"tir.noalias": T.bool(True)})
           # with T.block("root"):
           for i0, i1 in T.grid(T.int64(16), T.int64(16)):
               with T.block("compute"):
                   v_i0, v_i1 = T.axis.remap("SS", [i0, i1])
                   T.reads(gv[v_i0, v_i1])
                   T.writes(compute[v_i0, v_i1])
                   compute[v_i0, v_i1] = T.Cast("int64", gv[v_i0, v_i1])
   
       @T.prim_func(private=True)
       def matmul(x: T.Buffer((T.int64(1), T.int64(16)), "float32"), weight: 
T.Buffer((T.int64(16), T.int64(32)), "float32"), matmul: T.Buffer((T.int64(1), 
T.int64(32)), "float32")):
           T.func_attr({"tir.noalias": T.bool(True)})
           # with T.block("root"):
           for i0, i1, k in T.grid(T.int64(1), T.int64(32), T.int64(16)):
               with T.block("matmul"):
                   v_i0, v_i1, v_k = T.axis.remap("SSR", [i0, i1, k])
                   T.reads(x[v_i0, v_k], weight[v_k, v_i1])
                   T.writes(matmul[v_i0, v_i1])
                   with T.init():
                       matmul[v_i0, v_i1] = T.float32(0)
                   matmul[v_i0, v_i1] = matmul[v_i0, v_i1] + x[v_i0, v_k] * 
weight[v_k, v_i1]
   
       @T.prim_func(private=True)
       def reshape(gv: T.Buffer((T.int64(16), T.int64(16)), "float32"), 
T_reshape: T.Buffer((T.int64(256),), "float32")):
           T.func_attr({"tir.noalias": T.bool(True)})
           # with T.block("root"):
           for ax0 in range(T.int64(256)):
               with T.block("T_reshape"):
                   v_ax0 = T.axis.spatial(T.int64(256), ax0)
                   T.reads(gv[v_ax0 % T.int64(256) // T.int64(16), v_ax0 % 
T.int64(16)])
                   T.writes(T_reshape[v_ax0])
                   T_reshape[v_ax0] = gv[v_ax0 % T.int64(256) // T.int64(16), 
v_ax0 % T.int64(16)]
   
       @T.prim_func(private=True)
       def reshape1(temp: T.Buffer((T.int64(16),), "float32"), T_reshape: 
T.Buffer((T.int64(1), T.int64(16)), "float32")):
           T.func_attr({"tir.noalias": T.bool(True)})
           # with T.block("root"):
           for ax0, ax1 in T.grid(T.int64(1), T.int64(16)):
               with T.block("T_reshape"):
                   v_ax0, v_ax1 = T.axis.remap("SS", [ax0, ax1])
                   T.reads(temp[v_ax1 % T.int64(16)])
                   T.writes(T_reshape[v_ax0, v_ax1])
                   T_reshape[v_ax0, v_ax1] = temp[v_ax1 % T.int64(16)]
   
       @T.prim_func(private=True)
       def reshape2(gv: T.Buffer((T.int64(16), T.int64(16)), "int64"), 
T_reshape: T.Buffer((T.int64(256),), "int64")):
           T.func_attr({"tir.noalias": T.bool(True)})
           # with T.block("root"):
           for ax0 in range(T.int64(256)):
               with T.block("T_reshape"):
                   v_ax0 = T.axis.spatial(T.int64(256), ax0)
                   T.reads(gv[v_ax0 % T.int64(256) // T.int64(16), v_ax0 % 
T.int64(16)])
                   T.writes(T_reshape[v_ax0])
                   T_reshape[v_ax0] = gv[v_ax0 % T.int64(256) // T.int64(16), 
v_ax0 % T.int64(16)]
   
       @T.prim_func(private=True)
       def reshape3(temp: T.Buffer((T.int64(32),), "int64"), T_reshape: 
T.Buffer((T.int64(32),), "int64")):
           T.func_attr({"tir.noalias": T.bool(True)})
           # with T.block("root"):
           for ax0 in range(T.int64(32)):
               with T.block("T_reshape"):
                   v_ax0 = T.axis.spatial(T.int64(32), ax0)
                   T.reads(temp[v_ax0 % T.int64(32)])
                   T.writes(T_reshape[v_ax0])
                   T_reshape[v_ax0] = temp[v_ax0 % T.int64(32)]
   
       @T.prim_func(private=True)
       def strided_slice(tensor_1dim: T.Buffer((T.int64(256),), "float32"), 
T_strided_slice_with_axes: T.Buffer((T.int64(16),), "float32")):
           T.func_attr({"tir.noalias": T.bool(True)})
           # with T.block("root"):
           for ax0 in range(T.int64(16)):
               with T.block("T_strided_slice_with_axes"):
                   v_ax0 = T.axis.spatial(T.int64(16), ax0)
                   T.reads(tensor_1dim[v_ax0])
                   T.writes(T_strided_slice_with_axes[v_ax0])
                   T_strided_slice_with_axes[v_ax0] = tensor_1dim[v_ax0]
   
       @T.prim_func(private=True)
       def strided_slice1(tensor_1dim: T.Buffer((T.int64(256),), "int64"), 
T_strided_slice_with_axes: T.Buffer((T.int64(32),), "int64")):
           T.func_attr({"tir.noalias": T.bool(True)})
           # with T.block("root"):
           for ax0 in range(T.int64(32)):
               with T.block("T_strided_slice_with_axes"):
                   v_ax0 = T.axis.spatial(T.int64(32), ax0)
                   T.reads(tensor_1dim[v_ax0])
                   T.writes(T_strided_slice_with_axes[v_ax0])
                   T_strided_slice_with_axes[v_ax0] = tensor_1dim[v_ax0]
   
       @T.prim_func(private=True)
       def take(var_weight_table: T.handle, routing_table: 
T.Buffer((T.int64(32),), "int64"), T_take: T.Buffer((T.int64(16), T.int64(32)), 
"float32")):
           T.func_attr({"tir.noalias": T.bool(True)})
           weight_table_size = T.int64()
           weight_table = T.match_buffer(var_weight_table, (T.int64(16), 
weight_table_size))
           # with T.block("root"):
           for ax0, ax1 in T.grid(T.int64(16), T.int64(32)):
               with T.block("T_take"):
                   v_ax0, v_ax1 = T.axis.remap("SS", [ax0, ax1])
                   T.reads(weight_table[v_ax0, routing_table[v_ax1]], 
routing_table[v_ax1])
                   T.writes(T_take[v_ax0, v_ax1])
                   T_take[v_ax0, v_ax1] = weight_table[v_ax0, 
routing_table[v_ax1]]
   
       @R.function
       def main_7(x: R.Tensor((1, 16), dtype="float32"), weight_table: 
R.Tensor((16, "weight_table_size"), dtype="float32"), routing_table: 
R.Tensor((32,), dtype="int64")) -> R.Tensor((1, 32), dtype="float32"):
           weight_table_size = T.int64()
           cls = Module
           with R.dataflow():
               weight = R.call_tir(cls.take, (weight_table, routing_table), 
out_sinfo=R.Tensor((16, 32), dtype="float32"))
               out = R.call_tir(cls.matmul, (x, weight), out_sinfo=R.Tensor((1, 
32), dtype="float32"))
               R.output(out)
           return out
   
       @R.function
       def main() -> R.Tensor((1, 32), dtype="float32"):
           cls = Module
           gv = R.call_tir(cls.add, (metadata["relax.expr.Constant"][0], 
metadata["relax.expr.Constant"][0]), out_sinfo=R.Tensor((16, 16), 
dtype="float32"))
           tensor_1dim = R.call_tir(cls.reshape, (gv,), 
out_sinfo=R.Tensor((256,), dtype="float32"))
           temp = R.call_tir(cls.strided_slice, (tensor_1dim,), 
out_sinfo=R.Tensor((16,), dtype="float32"))
           para0 = R.call_tir(cls.reshape1, (temp,), out_sinfo=R.Tensor((1, 
16), dtype="float32"))
           para1: R.Tensor((16, 16), dtype="float32") = gv
           gv_1 = R.call_tir(cls.cast, (gv,), out_sinfo=R.Tensor((16, 16), 
dtype="int64"))
           tensor_1dim_1 = R.call_tir(cls.reshape2, (gv_1,), 
out_sinfo=R.Tensor((256,), dtype="int64"))
           temp_1 = R.call_tir(cls.strided_slice1, (tensor_1dim_1,), 
out_sinfo=R.Tensor((32,), dtype="int64"))
           para2 = R.call_tir(cls.reshape3, (temp_1,), 
out_sinfo=R.Tensor((32,), dtype="int64"))
           res: R.Tensor((1, 32), dtype="float32") = cls.main_7(para0, para1, 
para2)
           return res
   
   
   def compile_mod(mod, func_name, target, *inputs):
       ex = relax.build(mod, target='llvm')
       vm = relax.VirtualMachine(ex, tvm.cpu())
       mod_outputs = vm[f'{func_name}'](*inputs)
       mod_outputs = mod_outputs.numpy()
       return mod_outputs
   mod = Module
   before_outputs = compile_mod(mod, 'main', 'llvm')
   mod = relax.transform.FoldConstant()(mod)
   mod = relax.transform.ReorderTakeAfterMatmul()(mod)
   after_outputs = compile_mod(mod, 'main', 'llvm')
   np.testing.assert_allclose(before_outputs, after_outputs, 1e-3, 1e-3)
   
   ```
   </details>
   
   CC @Lunderberg @junrushao 


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[I] [Bug] give inconsistent inference results after use ReorderTakeAfterMatmul [tvm]

Reply via email to