The verification code I extracted is:
```
import logging
import numpy as np
import tvm
import random
import sys
import math
import timeit
from tvm import relay
from tvm import autotvm
def numpyBaseline(M,K,N):
np_repeat = 100
np_runing_time = timeit.timeit(setup='import numpy\n'
'M = ' + str(M) + '\n'
'K = ' + str(K) + '\n'
'N = ' + str(N) + '\n'
'dtype = "float32"\n'
'a = numpy.random.rand(M,
K).astype(dtype)\n'
'b = numpy.random.rand(K,
N).astype(dtype)\n',
stmt='answer = numpy.dot(a, b)',
number=np_repeat)
print("Numpy running time: %f" % (np_runing_time / np_repeat))
def buildandevaluation(s,A,B,C,a,b,c,ctx,c_np):
with relay.build_config(opt_level=3):
func = tvm.build(s, [A, B, C], target=target, name='gemm')
assert func
func(a, b, c)
# print(func)
# #print(func.get_source())
#
# print(func.get_function('gemm'))
# print(func.get_source())
# with open("gemm.ll", "w", encoding='utf-8') as f:
# f.write(str(func.get_source()))
# f.close()
# from tvm.contrib import util
# temp = util.tempdir()
# path_dso = temp.relpath("temp.so")
# path = temp.relpath('lib.tar')
# func.export_library(path_dso)
# m = tvm.module.load(path_dso)
# print(m.get_source())
tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
evaluator = func.time_evaluator(func.entry_name, ctx, number=100)
print('time: %f' % evaluator(a, b, c).mean)
#print(tvm.lower(s, [A, B, C], simple_mode=True))
###########################################################################################################
def schedule_defination_gemm_dense_default_nopack(M, K, N, dtype, kts):
'''e2e dense nopack(default) schedule'''
data = tvm.placeholder((M, K), name='data', dtype=dtype)
weight = tvm.placeholder((N, K), name='weight', dtype=dtype)
# create tuning space
cfg = autotvm.get_config()
cfg.define_split("tile_y",M,num_outputs=2,policy="oracle")
cfg.define_split("tile_x",N,num_outputs=2,policy="oracle")
cfg.define_split("tile_k",K,num_outputs=2,policy="oracle")
#vec = cfg["tile_k"].size[-1]
vec = kts
k = tvm.reduce_axis((0, K // vec), "k")
#k = tvm.reduce_axis((0, math.ceil(K / vec)), "k")
CC = tvm.compute((M, N, vec),lambda z, y, x:
tvm.sum(data[z, k * vec + x].astype(dtype)
*weight[y, k * vec + x].astype(dtype), axis=k))
kk = tvm.reduce_axis((0, vec), "kk")
C = tvm.compute((M, N),lambda y, x: tvm.sum(CC[y, x, kk], axis=kk))
s = tvm.create_schedule(C.op)
return s, [data, weight, C]
def schedule_defination_gemm_dense_pack_default(M, K, N, dtype, bn):
'''e2e dense pack schedule'''
data = tvm.placeholder((M, K), name='data', dtype=dtype)
weight = tvm.placeholder((N, K), name='weight', dtype=dtype)
# create tuning space
cfg = autotvm.get_config()
cfg.define_split("tile_y", M, num_outputs=3, policy="verbose")
cfg.define_split("tile_x", N, num_outputs=3, policy="verbose")
cfg.define_split("tile_k", K, num_outputs=2, policy="verbose")
# packw_bn = cfg["tile_x"].size[-1]
packw_bn = bn
packw_shape = (N // packw_bn, K, packw_bn)
packw = tvm.compute(packw_shape,
lambda z, y, x: weight[z * packw_bn + x, y],
name="packed_weight")
k = tvm.reduce_axis((0, K), name="k")
C = tvm.compute((M, N),
lambda y, x: tvm.sum(
data[y, k].astype(dtype) *
packw[tvm.indexdiv(x, packw_bn), k, tvm.indexmod(x,
packw_bn)].astype(dtype),
axis=k))
s = tvm.create_schedule(C.op)
return s, [data, weight, C]
###########################################################################################################
def schedule_optimization_dense_default_nopack(s, C, mts, nts):
kk, = s[C].op.reduce_axis
# yo, yi = cfg["tile_y"].apply(s, C, y)
# xo, xi = cfg["tile_x"].apply(s, C, x)
yo, xo, yi, xi = s[C].tile(C.op.axis[0], C.op.axis[1], mts, nts)
s[C].reorder(yo, xo, yi, xi)
xyo = s[C].fuse(yo, xo)
s[C].parallel(xyo)
#s[C].unroll(kk)
CC, = s[C].op.input_tensors
s[CC].compute_at(s[C], xyo)
z, y, x = s[CC].op.axis
k, = s[CC].op.reduce_axis
yz = s[CC].fuse(z, y)
s[CC].reorder(k, yz, x)
#s[CC].unroll(yz)
s[CC].vectorize(x)
data, weight, = s[CC].op.input_tensors
print(tvm.lower(s, [data, weight,CC, C], simple_mode=True))
def schedule_optimization_dense_pack_default(s,C,mts,kts,nts):
A, packedB = s[C].op.input_tensors
z, y, x = s[packedB].op.axis
s[packedB].reorder(z, x, y)
s[packedB].parallel(z)
s[packedB].vectorize(x)
CC = s.cache_write(C, "global")
k, = s[CC].op.reduce_axis
# yo, yi = cfg["tile_y"].apply(s, C, y)
# xo, xi = cfg["tile_x"].apply(s, C, x)
yto , yi = s[C].split(C.op.axis[0],factor=mts)
xto, xi = s[C].split(C.op.axis[1], factor=nts)
yt,yo = s[C].split(yto,factor=4)
xt,xo = s[C].split(xto,factor=2)
#yo, xo, yi, xi = s[C].tile(C.op.axis[0], C.op.axis[1], mts, nts)
s[C].reorder(yt,xt,yo, xo, yi, xi)
yxt = s[C].fuse(yt,xt)
s[C].parallel(yxt)
xyo = s[C].fuse(yo, xo)
#s[C].unroll(yi)
s[C].vectorize(xi)
s[CC].compute_at(s[C], xyo)
y, x = s[CC].op.axis
# ko, ki = cfg["tile_k"].apply(s, CC, k)
ko, ki = s[CC].split(k, factor=kts)
s[CC].reorder(ko, ki, y, x)
s[CC].vectorize(x)
#s[CC].unroll(y)
#s[CC].unroll(ki)
weight, = s[packedB].op.input_tensors
print(tvm.lower(s, [A, weight, packedB, CC, C], simple_mode=True))
###########################################################################################################
def dense_nopack_0_T(M, K, N, dtype, mts, kts, nts):
s, [data, weight, C] = schedule_defination_gemm_dense_default_nopack(M, K,
N, dtype, kts)
schedule_optimization_dense_default_nopack(s, C, mts, nts)
return s, [data, weight, C]
def dense_pack_default(M, K, N, dtype, bn):
s, [data, weight, C] = schedule_defination_gemm_dense_pack_default(M, K, N,
dtype, bn)
schedule_optimization_dense_pack_default(s, C, mts, kts, nts)
return s,[data,weight,C]
###########################################################################################################
if __name__ == '__main__':
M = sys.argv[1]
K = sys.argv[2]
N = sys.argv[3]
M_TS = sys.argv[4]
K_TS = sys.argv[5]
N_TS = sys.argv[6]
M = int(M)
K = int(K)
N = int(N)
mts = int(M_TS)
kts = int(K_TS)
nts = int(N_TS)
random.seed(30)
target = 'llvm -mcpu=core-avx2'
dtype = 'float32'
ctx = tvm.context(target, 0)
k = tvm.reduce_axis((0, K), 'k')
A = tvm.placeholder((M, K), name='A')
B = tvm.placeholder((K, N), name='B')
BT = tvm.placeholder((N, K), name='BT')
C = tvm.compute((M, N),lambda x, y: tvm.sum(A[x, k] * B[k, y],
axis=k),name='C')
CT = tvm.compute((M, N),lambda x, y: tvm.sum(A[x, k] * BT[y, k],
axis=k),name='CT')
a_np = np.random.rand(M,K).astype(dtype)
b_np = np.random.rand(K,N).astype(dtype)
bt_np = np.random.rand(N,K).astype(dtype)
c_np = a_np.dot(b_np)
ct_np = a_np.dot(bt_np.T)
a = tvm.nd.array(a_np, ctx)
b = tvm.nd.array(b_np, ctx)
bt = tvm.nd.array(bt_np, ctx)
c = tvm.nd.array(c_np, ctx)
ct = tvm.nd.array(ct_np, ctx)
# numpyBaseline(M,K,N)
s = tvm.create_schedule(C.op)
st = tvm.create_schedule(CT.op)
print("dense_nopack_0_T")
s, [data, weight, out] = dense_nopack_0_T(M, K, N, dtype, mts, kts, nts)
buildandevaluation(s, data, weight, out, a, bt, ct, ctx, ct_np)
```
---
[Visit
Topic](https://discuss.tvm.apache.org/t/how-to-verify-the-correctness-of-different-schedule-and-tile-size-in-autotvm/8143/2)
to respond.
You are receiving this because you enabled mailing list mode.
To unsubscribe from these emails, [click
here](https://discuss.tvm.apache.org/email/unsubscribe/aeed0bf07ec98d04791947e04aee78189e0193e63b93c940508a365e0bb6e3f3).