On 4/24/24 12:22, Robin Dapp wrote:
> The dynamic icounts looks sane (vs. Apr 10 snapshot) except for a
>> regression in x264 which is likely independent of the chaos going on.
>>
>>      Apr 10     |     Apr 23      |
>>   109f1b28fc94  |  6f0a646dd2fc   |
>> ----------------+-----------------+--------
>> 276,584,692,883 | 277,816,987,018 |  -0.45%
>> 913,452,236,000 | 927,291,935,180 |  -1.52%
>> 903,916,092,805 | 915,364,006,176 |  -1.27%
> x264 uses widening arithmetic so it could be the reverts.
> Can you compare the hot functions (e.g. x264_pixel_sad_16x16)

Function                                     old     new   delta
x264_pixel_sad_x4_16x8.lto_priv             5188    5288    +100
x264_pixel_sad_x4_8x16.lto_priv             5844    5924     +80
x264_pixel_sad_x3_16x8.lto_priv             3904    3980     +76
x264_pixel_sad_x4_16x16.lto_priv             834     898     +64
x264_pixel_sad_x3_8x16.lto_priv             4408    4468     +60
x264_pixel_sad_x4_8x8.lto_priv              3010    3058     +48
x264_pixel_sad_x3_8x8.lto_priv              2290    2338     +48
...
...
x264_pixel_sad_x4_4x8.lto_priv              1366    1362      -4
x264_pixel_sad_x4_4x4.lto_priv               716     712      -4
x264_pixel_sad_4x8.lto_priv                  332     328      -4
x264_pixel_sad_4x4.lto_priv                  172     168      -4
hpel_filter.lto_priv                         984     980      -4



> if anything stands out surrounding the vwadd.wv for example?

Yeah it does:  not specifically in the routine you mentioned above but
in its various brethren: see attached objdump for
x264_pixel_sad_x4_16x16 () for the 2 cases.

-Vineet
0000000000021872 <x264_pixel_sad_x4_16x16.lto_priv.0>:
   21872:       vsetivli        zero,4,e32,m1,ta,ma
   21876:       vmv.v.i v5,0
   2187a:       add     sp,sp,-32
   2187c:       add     t4,a0,4
   21880:       vmv1r.v v7,v5
   21884:       vmv1r.v v8,v5
   21888:       vmv1r.v v9,v5
   2188c:       vmv1r.v v6,v5
   21890:       add     t5,a0,8
   21894:       add     t6,a0,12
   21898:       sd      s0,24(sp)
   2189a:       sd      s1,16(sp)
   2189c:       mv      s0,a6
   2189e:       sd      s2,8(sp)
   218a0:       sd      s3,0(sp)
   218a2:       mv      a6,t4
   218a4:       mv      t2,t5
   218a6:       mv      t0,t6
   218a8:       mv      t1,a0
   218aa:       add     a7,a0,256
   218ae:       mv      t3,a0
   218b0:       vsetvli zero,zero,e8,mf4,ta,ma
   218b4:       add     s3,a1,4
   218b8:       add     s2,a1,8
   218bc:       vle8.v  v3,(s3)
   218c0:       vle8.v  v14,(a6)
   218c4:       vle8.v  v2,(s2)
   218c8:       vle8.v  v13,(t2)
   218cc:       add     s1,a1,12
   218d0:       vle8.v  v12,(t0)
   218d4:       vle8.v  v11,(t3)
   218d8:       vle8.v  v10,(a1)
   218dc:       vle8.v  v1,(s1)
   218e0:       vwsubu.vv       v4,v14,v3
   218e4:       vwsubu.vv       v3,v13,v2
   218e8:       add     t3,t3,16
   218ea:       add     a6,a6,16
   218ec:       add     t2,t2,16
   218ee:       vwsubu.vv       v2,v12,v1
   218f2:       vwsubu.vv       v1,v11,v10
   218f6:       vsetvli zero,zero,e16,mf2,ta,mu
   218fa:       vmsle.vi        v0,v4,-1
   218fe:       vmsle.vi        v12,v3,-1
   21902:       vmsle.vi        v11,v2,-1
   21906:       vneg.v  v4,v4,v0.t
   2190a:       vmv1r.v v0,v12
   2190e:       vmsle.vi        v10,v1,-1
   21912:       vwadd.wv        v9,v9,v4
   21916:       vneg.v  v3,v3,v0.t
   2191a:       vmv1r.v v0,v11
   2191e:       add     t0,t0,16
   21920:       vwadd.wv        v8,v8,v3
   21924:       vneg.v  v2,v2,v0.t
   21928:       vmv1r.v v0,v10
   2192c:       add     a1,a1,a5
   2192e:       vwadd.wv        v7,v7,v2
   21932:       vneg.v  v1,v1,v0.t
   21936:       vwadd.wv        v6,v6,v1
   2193a:       bne     t3,a7,218b0 <x264_pixel_sad_x4_16x16.lto_priv.0+0x3e>
   2193e:       vsetvli zero,zero,e32,m1,ta,ma
   21942:       vadd.vv v1,v6,v9
   21946:       li      a6,0
   21948:       vmv.s.x v2,a6
   2194c:       vadd.vv v1,v1,v8
   21950:       vmv1r.v v9,v5
   21954:       vmv1r.v v8,v5
   21958:       vadd.vv v1,v1,v7
   2195c:       vmv1r.v v6,v5
   21960:       vmv1r.v v7,v5
   21964:       vredsum.vs      v1,v1,v2
   21968:       mv      t2,t6
   2196a:       mv      t0,t5
   2196c:       mv      t3,t4
   2196e:       mv      a1,a0
   21970:       vmv.x.s a6,v1
   21974:       sw      a6,0(s0)
   21978:       vsetvli zero,zero,e8,mf4,ta,ma
   2197c:       add     s2,a2,4
   21980:       add     s1,a2,8
   21984:       vle8.v  v3,(s2)
   21988:       vle8.v  v14,(t3)
   2198c:       vle8.v  v2,(s1)
   21990:       vle8.v  v13,(t0)
   21994:       add     a6,a2,12
   21998:       vle8.v  v12,(t2)
   2199c:       vle8.v  v11,(a1)
   219a0:       vle8.v  v10,(a2)
   219a4:       vle8.v  v1,(a6)
   219a8:       vwsubu.vv       v4,v14,v3
   219ac:       vwsubu.vv       v3,v13,v2
   219b0:       add     a1,a1,16
   219b2:       add     t3,t3,16
   219b4:       add     t0,t0,16
   219b6:       vwsubu.vv       v2,v12,v1
   219ba:       vwsubu.vv       v1,v11,v10
   219be:       vsetvli zero,zero,e16,mf2,ta,mu
   219c2:       vmsle.vi        v0,v4,-1
   219c6:       vmsle.vi        v12,v3,-1
   219ca:       vmsle.vi        v11,v2,-1
   219ce:       vneg.v  v4,v4,v0.t
   219d2:       vmv1r.v v0,v12
   219d6:       vmsle.vi        v10,v1,-1
   219da:       vwadd.wv        v9,v9,v4
   219de:       vneg.v  v3,v3,v0.t
   219e2:       vmv1r.v v0,v11
   219e6:       add     t2,t2,16
   219e8:       vwadd.wv        v7,v7,v3
   219ec:       vneg.v  v2,v2,v0.t
   219f0:       vmv1r.v v0,v10
   219f4:       add     a2,a2,a5
   219f6:       vwadd.wv        v8,v8,v2
   219fa:       vneg.v  v1,v1,v0.t
   219fe:       vwadd.wv        v6,v6,v1
   21a02:       bne     a1,a7,21978 <x264_pixel_sad_x4_16x16.lto_priv.0+0x106>
   21a06:       vsetvli zero,zero,e32,m1,ta,ma
   21a0a:       vadd.vv v1,v6,v9
   21a0e:       li      a1,0
   21a10:       vmv.s.x v2,a1
   21a14:       vadd.vv v1,v1,v7
   21a18:       vmv1r.v v9,v5
   21a1c:       vmv1r.v v7,v5
   21a20:       vadd.vv v1,v1,v8
   21a24:       vmv1r.v v6,v5
   21a28:       vmv1r.v v8,v5
   21a2c:       vredsum.vs      v1,v1,v2
   21a30:       mv      a2,a0
   21a32:       mv      t3,t6
   21a34:       mv      a0,t5
   21a36:       mv      a1,t4
   21a38:       vmv.x.s a6,v1
   21a3c:       sw      a6,4(s0)
   21a40:       vsetvli zero,zero,e8,mf4,ta,ma
   21a44:       add     t2,a3,4
   21a48:       add     t0,a3,8
   21a4c:       vle8.v  v3,(t2)
   21a50:       vle8.v  v14,(a1)
   21a54:       vle8.v  v2,(t0)
   21a58:       vle8.v  v13,(a0)
   21a5c:       add     a6,a3,12
   21a60:       vle8.v  v12,(t3)
   21a64:       vle8.v  v11,(a2)
   21a68:       vle8.v  v10,(a3)
   21a6c:       vle8.v  v1,(a6)
   21a70:       vwsubu.vv       v4,v14,v3
   21a74:       vwsubu.vv       v3,v13,v2
   21a78:       add     a2,a2,16
   21a7a:       add     a1,a1,16
   21a7c:       add     a0,a0,16
   21a7e:       vwsubu.vv       v2,v12,v1
   21a82:       vwsubu.vv       v1,v11,v10
   21a86:       vsetvli zero,zero,e16,mf2,ta,mu
   21a8a:       vmsle.vi        v0,v4,-1
   21a8e:       vmsle.vi        v12,v3,-1
   21a92:       vmsle.vi        v11,v2,-1
   21a96:       vneg.v  v4,v4,v0.t
   21a9a:       vmv1r.v v0,v12
   21a9e:       vmsle.vi        v10,v1,-1
   21aa2:       vwadd.wv        v9,v9,v4
   21aa6:       vneg.v  v3,v3,v0.t
   21aaa:       vmv1r.v v0,v11
   21aae:       add     t3,t3,16
   21ab0:       vwadd.wv        v8,v8,v3
   21ab4:       vneg.v  v2,v2,v0.t
   21ab8:       vmv1r.v v0,v10
   21abc:       add     a3,a3,a5
   21abe:       vwadd.wv        v7,v7,v2
   21ac2:       vneg.v  v1,v1,v0.t
   21ac6:       vwadd.wv        v6,v6,v1
   21aca:       bne     a7,a2,21a40 <x264_pixel_sad_x4_16x16.lto_priv.0+0x1ce>
   21ace:       vsetvli zero,zero,e32,m1,ta,ma
   21ad2:       vadd.vv v1,v6,v9
   21ad6:       li      a2,0
   21ad8:       vmv.s.x v2,a2
   21adc:       vadd.vv v1,v1,v8
   21ae0:       vmv1r.v v6,v5
   21ae4:       vmv1r.v v8,v5
   21ae8:       vadd.vv v1,v1,v7
   21aec:       vmv1r.v v7,v5
   21af0:       vredsum.vs      v1,v1,v2
   21af4:       vmv.x.s a3,v1
   21af8:       sw      a3,8(s0)
   21afa:       vsetvli zero,zero,e8,mf4,ta,ma
   21afe:       add     a1,a4,4
   21b02:       add     a2,a4,8
   21b06:       vle8.v  v3,(a1)
   21b0a:       vle8.v  v13,(t4)
   21b0e:       vle8.v  v2,(a2)
   21b12:       vle8.v  v12,(t5)
   21b16:       add     a3,a4,12
   21b1a:       vle8.v  v11,(t6)
   21b1e:       vle8.v  v10,(t1)
   21b22:       vle8.v  v9,(a4)
   21b26:       vle8.v  v1,(a3)
   21b2a:       vwsubu.vv       v4,v13,v3
   21b2e:       vwsubu.vv       v3,v12,v2
   21b32:       add     t1,t1,16
   21b34:       add     t4,t4,16
   21b36:       add     t5,t5,16
   21b38:       vwsubu.vv       v2,v11,v1
   21b3c:       vwsubu.vv       v1,v10,v9
   21b40:       vsetvli zero,zero,e16,mf2,ta,mu
   21b44:       vmsle.vi        v0,v4,-1
   21b48:       vmsle.vi        v11,v3,-1
   21b4c:       vmsle.vi        v10,v2,-1
   21b50:       vneg.v  v4,v4,v0.t
   21b54:       vmv1r.v v0,v11
   21b58:       vmsle.vi        v9,v1,-1
   21b5c:       vwadd.wv        v8,v8,v4
   21b60:       vneg.v  v3,v3,v0.t
   21b64:       vmv1r.v v0,v10
   21b68:       add     t6,t6,16
   21b6a:       vwadd.wv        v7,v7,v3
   21b6e:       vneg.v  v2,v2,v0.t
   21b72:       vmv1r.v v0,v9
   21b76:       add     a4,a4,a5
   21b78:       vwadd.wv        v5,v5,v2
   21b7c:       vneg.v  v1,v1,v0.t
   21b80:       vwadd.wv        v6,v6,v1
   21b84:       bne     a7,t1,21afa <x264_pixel_sad_x4_16x16.lto_priv.0+0x288>
   21b88:       vsetvli zero,zero,e32,m1,ta,ma
   21b8c:       vadd.vv v1,v6,v8
   21b90:       li      a4,0
   21b92:       vmv.s.x v2,a4
   21b96:       vadd.vv v1,v1,v7
   21b9a:       vadd.vv v1,v1,v5
   21b9e:       vredsum.vs      v1,v1,v2
   21ba2:       vmv.x.s a5,v1
   21ba6:       sw      a5,12(s0)
   21ba8:       ld      s0,24(sp)
   21baa:       ld      s1,16(sp)
   21bac:       ld      s2,8(sp)
   21bae:       ld      s3,0(sp)
   21bb0:       add     sp,sp,32
   21bb2:       ret
0000000000021892 <x264_pixel_sad_x4_16x16.lto_priv.0>:
   21892:       vsetivli        zero,4,e32,m1,ta,ma
   21896:       vmv.v.i v5,0
   2189a:       add     sp,sp,-32
   2189c:       add     t4,a0,4
   218a0:       vmv1r.v v7,v5
   218a4:       vmv1r.v v8,v5
   218a8:       vmv1r.v v9,v5
   218ac:       vmv1r.v v6,v5
   218b0:       add     t5,a0,8
   218b4:       add     t6,a0,12
   218b8:       sd      s0,24(sp)
   218ba:       sd      s1,16(sp)
   218bc:       mv      s0,a6
   218be:       sd      s2,8(sp)
   218c0:       sd      s3,0(sp)
   218c2:       mv      a6,t4
   218c4:       mv      t2,t5
   218c6:       mv      t0,t6
   218c8:       mv      t1,a0
   218ca:       add     a7,a0,256
   218ce:       mv      t3,a0
   218d0:       vsetvli zero,zero,e8,mf4,ta,ma
   218d4:       add     s3,a1,4
   218d8:       add     s2,a1,8
   218dc:       vle8.v  v3,(s3)
   218e0:       vle8.v  v14,(a6)
   218e4:       vle8.v  v2,(s2)
   218e8:       vle8.v  v13,(t2)
   218ec:       add     s1,a1,12
   218f0:       vle8.v  v12,(t0)
   218f4:       vle8.v  v11,(t3)
   218f8:       vle8.v  v10,(a1)
   218fc:       vle8.v  v1,(s1)
   21900:       vwsubu.vv       v4,v14,v3
   21904:       vwsubu.vv       v3,v13,v2
   21908:       add     t3,t3,16
   2190a:       add     a6,a6,16
   2190c:       add     t2,t2,16
   2190e:       vwsubu.vv       v2,v12,v1
   21912:       vwsubu.vv       v1,v11,v10
   21916:       vsetvli zero,zero,e16,mf2,ta,mu
   2191a:       vmsle.vi        v0,v4,-1
   2191e:       vmsle.vi        v12,v3,-1
   21922:       vmsle.vi        v11,v2,-1
   21926:       vneg.v  v4,v4,v0.t
   2192a:       vmv1r.v v0,v12
   2192e:       vmsle.vi        v10,v1,-1
   21932:       add     t0,t0,16
   21934:       vneg.v  v3,v3,v0.t
   21938:       vmv1r.v v0,v11
   2193c:       add     a1,a1,a5
   2193e:       vneg.v  v2,v2,v0.t
   21942:       vmv1r.v v0,v10
   21946:       vmv1r.v v10,v9
   2194a:       vneg.v  v1,v1,v0.t
   2194e:       vwadd.wv        v9,v10,v4
   21952:       vmv1r.v v4,v8
   21956:       vwadd.wv        v8,v4,v3
   2195a:       vmv1r.v v3,v7
   2195e:       vwadd.wv        v7,v3,v2
   21962:       vmv1r.v v2,v6
   21966:       vwadd.wv        v6,v2,v1
   2196a:       bne     t3,a7,218d0 <x264_pixel_sad_x4_16x16.lto_priv.0+0x3e>
   2196e:       vsetvli zero,zero,e32,m1,ta,ma
   21972:       vadd.vv v1,v6,v9
   21976:       li      a6,0
   21978:       vmv.s.x v2,a6
   2197c:       vadd.vv v1,v1,v8
   21980:       vmv1r.v v9,v5
   21984:       vmv1r.v v8,v5
   21988:       vadd.vv v1,v1,v7
   2198c:       vmv1r.v v6,v5
   21990:       vmv1r.v v7,v5
   21994:       vredsum.vs      v1,v1,v2
   21998:       mv      t2,t6
   2199a:       mv      t0,t5
   2199c:       mv      t3,t4
   2199e:       mv      a1,a0
   219a0:       vmv.x.s a6,v1
   219a4:       sw      a6,0(s0)
   219a8:       vsetvli zero,zero,e8,mf4,ta,ma
   219ac:       add     s2,a2,4
   219b0:       add     s1,a2,8
   219b4:       vle8.v  v3,(s2)
   219b8:       vle8.v  v14,(t3)
   219bc:       vle8.v  v2,(s1)
   219c0:       vle8.v  v13,(t0)
   219c4:       add     a6,a2,12
   219c8:       vle8.v  v12,(t2)
   219cc:       vle8.v  v11,(a1)
   219d0:       vle8.v  v10,(a2)
   219d4:       vle8.v  v1,(a6)
   219d8:       vwsubu.vv       v4,v14,v3
   219dc:       vwsubu.vv       v3,v13,v2
   219e0:       add     a1,a1,16
   219e2:       add     t3,t3,16
   219e4:       add     t0,t0,16
   219e6:       vwsubu.vv       v2,v12,v1
   219ea:       vwsubu.vv       v1,v11,v10
   219ee:       vsetvli zero,zero,e16,mf2,ta,mu
   219f2:       vmsle.vi        v0,v4,-1
   219f6:       vmsle.vi        v12,v3,-1
   219fa:       vmsle.vi        v11,v2,-1
   219fe:       vneg.v  v4,v4,v0.t
   21a02:       vmv1r.v v0,v12
   21a06:       vmsle.vi        v10,v1,-1
   21a0a:       add     t2,t2,16
   21a0c:       vneg.v  v3,v3,v0.t
   21a10:       vmv1r.v v0,v11
   21a14:       add     a2,a2,a5
   21a16:       vneg.v  v2,v2,v0.t
   21a1a:       vmv1r.v v0,v10
   21a1e:       vmv1r.v v10,v9
   21a22:       vneg.v  v1,v1,v0.t
   21a26:       vwadd.wv        v9,v10,v4
   21a2a:       vmv1r.v v4,v7
   21a2e:       vwadd.wv        v7,v4,v3
   21a32:       vmv1r.v v3,v8
   21a36:       vwadd.wv        v8,v3,v2
   21a3a:       vmv1r.v v2,v6
   21a3e:       vwadd.wv        v6,v2,v1
   21a42:       bne     a1,a7,219a8 <x264_pixel_sad_x4_16x16.lto_priv.0+0x116>
   21a46:       vsetvli zero,zero,e32,m1,ta,ma
   21a4a:       vadd.vv v1,v6,v9
   21a4e:       li      a1,0
   21a50:       vmv.s.x v2,a1
   21a54:       vadd.vv v1,v1,v7
   21a58:       vmv1r.v v9,v5
   21a5c:       vmv1r.v v7,v5
   21a60:       vadd.vv v1,v1,v8
   21a64:       vmv1r.v v6,v5
   21a68:       vmv1r.v v8,v5
   21a6c:       vredsum.vs      v1,v1,v2
   21a70:       mv      a2,a0
   21a72:       mv      t3,t6
   21a74:       mv      a0,t5
   21a76:       mv      a1,t4
   21a78:       vmv.x.s a6,v1
   21a7c:       sw      a6,4(s0)
   21a80:       vsetvli zero,zero,e8,mf4,ta,ma
   21a84:       add     t2,a3,4
   21a88:       add     t0,a3,8
   21a8c:       vle8.v  v3,(t2)
   21a90:       vle8.v  v14,(a1)
   21a94:       vle8.v  v2,(t0)
   21a98:       vle8.v  v13,(a0)
   21a9c:       add     a6,a3,12
   21aa0:       vle8.v  v12,(t3)
   21aa4:       vle8.v  v11,(a2)
   21aa8:       vle8.v  v10,(a3)
   21aac:       vle8.v  v1,(a6)
   21ab0:       vwsubu.vv       v4,v14,v3
   21ab4:       vwsubu.vv       v3,v13,v2
   21ab8:       add     a2,a2,16
   21aba:       add     a1,a1,16
   21abc:       add     a0,a0,16
   21abe:       vwsubu.vv       v2,v12,v1
   21ac2:       vwsubu.vv       v1,v11,v10
   21ac6:       vsetvli zero,zero,e16,mf2,ta,mu
   21aca:       vmsle.vi        v0,v4,-1
   21ace:       vmsle.vi        v12,v3,-1
   21ad2:       vmsle.vi        v11,v2,-1
   21ad6:       vneg.v  v4,v4,v0.t
   21ada:       vmv1r.v v0,v12
   21ade:       vmsle.vi        v10,v1,-1
   21ae2:       add     t3,t3,16
   21ae4:       vneg.v  v3,v3,v0.t
   21ae8:       vmv1r.v v0,v11
   21aec:       add     a3,a3,a5
   21aee:       vneg.v  v2,v2,v0.t
   21af2:       vmv1r.v v0,v10
   21af6:       vmv1r.v v10,v9
   21afa:       vneg.v  v1,v1,v0.t
   21afe:       vwadd.wv        v9,v10,v4
   21b02:       vmv1r.v v4,v8
   21b06:       vwadd.wv        v8,v4,v3
   21b0a:       vmv1r.v v3,v7
   21b0e:       vwadd.wv        v7,v3,v2
   21b12:       vmv1r.v v2,v6
   21b16:       vwadd.wv        v6,v2,v1
   21b1a:       bne     a7,a2,21a80 <x264_pixel_sad_x4_16x16.lto_priv.0+0x1ee>
   21b1e:       vsetvli zero,zero,e32,m1,ta,ma
   21b22:       vadd.vv v1,v6,v9
   21b26:       li      a2,0
   21b28:       vmv.s.x v2,a2
   21b2c:       vadd.vv v1,v1,v8
   21b30:       vmv1r.v v6,v5
   21b34:       vmv1r.v v8,v5
   21b38:       vadd.vv v1,v1,v7
   21b3c:       vmv1r.v v7,v5
   21b40:       vredsum.vs      v1,v1,v2
   21b44:       vmv.x.s a3,v1
   21b48:       sw      a3,8(s0)
   21b4a:       vsetvli zero,zero,e8,mf4,ta,ma
   21b4e:       add     a1,a4,4
   21b52:       add     a2,a4,8
   21b56:       vle8.v  v3,(a1)
   21b5a:       vle8.v  v13,(t4)
   21b5e:       vle8.v  v2,(a2)
   21b62:       vle8.v  v12,(t5)
   21b66:       add     a3,a4,12
   21b6a:       vle8.v  v11,(t6)
   21b6e:       vle8.v  v10,(t1)
   21b72:       vle8.v  v9,(a4)
   21b76:       vle8.v  v1,(a3)
   21b7a:       vwsubu.vv       v4,v13,v3
   21b7e:       vwsubu.vv       v3,v12,v2
   21b82:       add     t1,t1,16
   21b84:       add     t4,t4,16
   21b86:       add     t5,t5,16
   21b88:       vwsubu.vv       v2,v11,v1
   21b8c:       vwsubu.vv       v1,v10,v9
   21b90:       vsetvli zero,zero,e16,mf2,ta,mu
   21b94:       vmsle.vi        v0,v4,-1
   21b98:       vmsle.vi        v11,v3,-1
   21b9c:       vmsle.vi        v10,v2,-1
   21ba0:       vneg.v  v4,v4,v0.t
   21ba4:       vmv1r.v v0,v11
   21ba8:       vmsle.vi        v9,v1,-1
   21bac:       add     t6,t6,16
   21bae:       vneg.v  v3,v3,v0.t
   21bb2:       vmv1r.v v0,v10
   21bb6:       add     a4,a4,a5
   21bb8:       vneg.v  v2,v2,v0.t
   21bbc:       vmv1r.v v0,v9
   21bc0:       vmv1r.v v9,v8
   21bc4:       vneg.v  v1,v1,v0.t
   21bc8:       vwadd.wv        v8,v9,v4
   21bcc:       vmv1r.v v4,v7
   21bd0:       vwadd.wv        v7,v4,v3
   21bd4:       vmv1r.v v3,v5
   21bd8:       vwadd.wv        v5,v3,v2
   21bdc:       vmv1r.v v2,v6
   21be0:       vwadd.wv        v6,v2,v1
   21be4:       bne     a7,t1,21b4a <x264_pixel_sad_x4_16x16.lto_priv.0+0x2b8>
   21be8:       vsetvli zero,zero,e32,m1,ta,ma
   21bec:       vadd.vv v1,v6,v8
   21bf0:       li      a4,0
   21bf2:       vmv.s.x v2,a4
   21bf6:       vadd.vv v1,v1,v7
   21bfa:       vadd.vv v1,v1,v5
   21bfe:       vredsum.vs      v1,v1,v2
   21c02:       vmv.x.s a5,v1
   21c06:       sw      a5,12(s0)
   21c08:       ld      s0,24(sp)
   21c0a:       ld      s1,16(sp)
   21c0c:       ld      s2,8(sp)
   21c0e:       ld      s3,0(sp)
   21c10:       add     sp,sp,32
   21c12:       ret

Reply via email to