MasterJH5574 commented on code in PR #12246: URL: https://github.com/apache/tvm/pull/12246#discussion_r933939479
########## tests/python/unittest/test_meta_schedule_space_cpu.py: ########## @@ -1201,6 +1201,180 @@ def gmm_2(X: T.Buffer[(1, 128, 128), "float32"], Y: T.Buffer[(1, 128, 128), "flo ) +def test_cpu_grp(): + # fmt: off + @T.prim_func + def grp_0(inputs: T.Buffer[(1, 56, 56, 64), "float32"], weight: T.Buffer[(3, 3, 16, 128), "float32"], conv2d_nhwc: T.Buffer[(1, 28, 28, 128), "float32"]) -> None: + # function attr dict + T.func_attr({"global_symbol": "main", "tir.noalias": True}) + # body + with T.block("root"): + T.reads() + T.writes() + T.block_attr({"meta_schedule.parallel":288, "meta_schedule.unroll_explicit":16, "meta_schedule.vectorize":64}) + PadInput = T.alloc_buffer([1, 58, 58, 64], dtype="float32") + conv2d_nhwc_global = T.alloc_buffer([1, 28, 28, 128], dtype="float32") + for i0_0, i1_0, i2_0, i3_0 in T.grid(1, 7, 1, 2): + for ax0, ax1, ax2, ax3 in T.grid(1, 9, 57, 32): + with T.block("PadInput"): + i0 = T.axis.spatial(1, ax0) + i1 = T.axis.spatial(58, i1_0 * 8 + ax1) + i2 = T.axis.spatial(58, ax2) + i3 = T.axis.spatial(64, i3_0 * 32 + ax3) + T.reads(inputs[i0, i1 - 1, i2 - 1, i3]) + T.writes(PadInput[i0, i1, i2, i3]) + PadInput[i0, i1, i2, i3] = T.if_then_else(1 <= i1 and i1 < 57 and 1 <= i2 and i2 < 57, inputs[i0, i1 - 1, i2 - 1, i3], T.float32(0), dtype="float32") + for i0_1, i1_1, i2_1, i3_1 in T.grid(1, 4, 1, 1): + for i4_0, i5_0, i6_0, i0_2, i1_2, i2_2, i3_2, i4_1, i5_1, i6_1, i0_3, i1_3, i2_3, i3_3 in T.grid(1, 3, 8, 1, 1, 4, 4, 3, 1, 2, 1, 1, 7, 16): + with T.block("conv2d_nhwc"): + n = T.axis.spatial(1, i0_3 + i0_0 + i0_1 + i0_2) + h = T.axis.spatial(28, i1_0 * 4 + i1_1 + i1_2 + i1_3) + w = T.axis.spatial(28, i2_0 * 28 + i2_1 * 28 + i2_2 * 7 + i2_3) + co = T.axis.spatial(128, i3_0 * 64 + i3_1 * 64 + i3_2 * 16 + i3_3) + rh = T.axis.reduce(3, i4_0 * 3 + i4_1) + rw = T.axis.reduce(3, i5_0 + i5_1) + rc = T.axis.reduce(16, i6_0 * 2 + i6_1) + T.reads(PadInput[n, h * 2 + rh, w * 2 + rw, co // 32 * 16 + rc], weight[rh, rw, rc, co]) + T.writes(conv2d_nhwc_global[n, h, w, co]) + T.block_attr({"meta_schedule.tiling_structure":"SSRSRS"}) + with T.init(): + conv2d_nhwc_global[n, h, w, co] = T.float32(0) + conv2d_nhwc_global[n, h, w, co] = conv2d_nhwc_global[n, h, w, co] + PadInput[n, h * 2 + rh, w * 2 + rw, co // 32 * 16 + rc] * weight[rh, rw, rc, co] + for ax0, ax1, ax2, ax3 in T.grid(1, 1, 28, 64): + with T.block("conv2d_nhwc_global"): + v0 = T.axis.spatial(1, ax0) + v1 = T.axis.spatial(28, i1_0 * 4 + i1_1 + ax1) + v2 = T.axis.spatial(28, ax2) + v3 = T.axis.spatial(128, i3_0 * 64 + ax3) + T.reads(conv2d_nhwc_global[v0, v1, v2, v3]) + T.writes(conv2d_nhwc[v0, v1, v2, v3]) + conv2d_nhwc[v0, v1, v2, v3] = conv2d_nhwc_global[v0, v1, v2, v3] + @T.prim_func Review Comment: Just out of curiosity, do we need blank lines in between 👀? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@tvm.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org