# HG changeset patch
# User Rajesh Paulraj
# Date 1447764366 -19800
# Tue Nov 17 18:16:06 2015 +0530
# Node ID 069c502d4957f36bb5887158b13dfe94d4e0f737
# Parent e8f9a60d4cd9e73c9f2baf05c2ccda5af1892b46
asm: fix intrapred_planar32x32 sse2 code for main12
intra_planar_32x32 7.13x
# HG changeset patch
# User Rajesh Paulraj
# Date 1447410710 -19800
# Fri Nov 13 16:01:50 2015 +0530
# Node ID 7991d313fffb10f5a067dba69241ced1be73c013
# Parent 25264cd68abe8789351cfbcb8e100e0238fd3ca8
asm: enable intrapred_planar32x32 sse4 code for main12
diff -r 25264cd68abe -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1447331262 -19800
# Thu Nov 12 17:57:42 2015 +0530
# Node ID fcfb9bd67e1e00a745ce0bb6f151aa9851c197c1
# Parent 45ea73c63c12c66e5e5e777e80853c8b3cadf101
asm: fix intrapred_planar32x32 avx2 code for main12
sse4:
intra_planar_32x32 5.09x
# HG changeset patch
# User Rajesh Paulraj
# Date 1446527171 -19800
# Tue Nov 03 10:36:11 2015 +0530
# Node ID d2c889865d6ff99cfeab4b69e898a4d0514f2440
# Parent 544dfa2c3a16efd0f679374ccc654aa4aefb1a49
asm: fix intrapred_planar16x16 avx2 code for main12
sse4:
intra_planar_16x16 4.42x
# HG changeset patch
# User Rajesh Paulraj
# Date 1446012729 -19800
# Wed Oct 28 11:42:09 2015 +0530
# Node ID 544dfa2c3a16efd0f679374ccc654aa4aefb1a49
# Parent 6563218ce342c30bfd4f9bc172a1dab510e6e55b
asm: fix intrapred_planar16x16 sse2 code for main12
intra_planar_16x16 7.00x
# HG changeset patch
# User Rajesh Paulraj
# Date 1444288440 -19800
# Thu Oct 08 12:44:00 2015 +0530
# Node ID 8a91a65e9290075c2dfcc192c076682e9b3c2514
# Parent 0e3aeb97e206b04521b13666c5c4bf4681748bb7
asm: fix main12 avx2 for luma_vsp and luma_hvpp
diff -r 0e3aeb97e206 -r 8a91a65e9290
# HG changeset patch
# User Rajesh Paulraj
# Date 1444041513 -19800
# Mon Oct 05 16:08:33 2015 +0530
# Node ID 8dc9dfe33c370e5bc09863ab1062568662d46e37
# Parent 5f73ada8caa0c62cc7540799966bde7536861bf7
asm: fix main12 avx2 for chroma_vpp/vps/vsp/vss
diff -r 5f73ada8caa0 -r 8dc9dfe33c37
# HG changeset patch
# User Rajesh Paulraj
# Date 1443702239 -19800
# Thu Oct 01 17:53:59 2015 +0530
# Node ID 5f73ada8caa0c62cc7540799966bde7536861bf7
# Parent b2889a2a87f8194fa5587496e8f5752ca13b8d9f
asm: fix main12 avx2 for chroma_hps, chroma_hpp
diff -r b2889a2a87f8 -r 5f73ada8caa0
# HG changeset patch
# User Rajesh Paulraj
# Date 1443683779 -19800
# Thu Oct 01 12:46:19 2015 +0530
# Node ID b2889a2a87f8194fa5587496e8f5752ca13b8d9f
# Parent 6e7761bdfe23addb862483f8407b388800de7d92
asm: fix Main12 luma_hps avx2
luma_hps[ 4x4] - improved 1182.34c -> 914.36c
luma_
# HG changeset patch
# User Rajesh Paulraj
# Date 1443517300 -19800
# Tue Sep 29 14:31:40 2015 +0530
# Node ID 96144b807717679d61cbe4fd117895a501410f02
# Parent 8411b3d5b07c65bfcf2813133bb8daa665526ac4
asm: fix sad[64xN] avx2 code for main12
diff -r 8411b3d5b07c -r 96144b807717 source
# HG changeset patch
# User Rajesh Paulraj
# Date 1443515696 -19800
# Tue Sep 29 14:04:56 2015 +0530
# Node ID 8411b3d5b07c65bfcf2813133bb8daa665526ac4
# Parent d7cde5dbec838f53e87faceac989d6cd987bfc72
asm: fix sad[48x64] avx2 code for main12
diff -r d7cde5dbec83 -r 8411b3d5b07c source
# HG changeset patch
# User Rajesh Paulraj
# Date 1443515427 -19800
# Tue Sep 29 14:00:27 2015 +0530
# Node ID d7cde5dbec838f53e87faceac989d6cd987bfc72
# Parent ff279fe6bcccbc5dbe384194a1332b1e96595b61
asm: fix sad[32xN] avx2 code for main12
diff -r ff279fe6bccc -r d7cde5dbec83 source
# HG changeset patch
# User Rajesh Paulraj
# Date 1443508666 -19800
# Tue Sep 29 12:07:46 2015 +0530
# Node ID ff279fe6bcccbc5dbe384194a1332b1e96595b61
# Parent f4c267f28487161fa78c43cabb30dc4f4f82570c
asm: fix sad[16x64] avx2 code for main12
diff -r f4c267f28487 -r ff279fe6bccc source
# HG changeset patch
# User Rajesh Paulraj
# Date 1442307006 -19800
# Tue Sep 15 14:20:06 2015 +0530
# Node ID 4fe52a4c976193e8ddbeb16b5d3edeeac4fab6c8
# Parent 31d45b06c3ee6d747c8d4f50f171aa6479b1c60f
asm: avx2 8bpp code for chroma_p2s[16xN] for i420, i422, improved over 25% than
SSE
diff
# HG changeset patch
# User Rajesh Paulraj
# Date 1442305574 -19800
# Tue Sep 15 13:56:14 2015 +0530
# Node ID 31d45b06c3ee6d747c8d4f50f171aa6479b1c60f
# Parent 01a21374067d5ee6f4b4fe6eb512afeb44974f39
asm: avx2 8bpp code for convert_p2s[16xN],improved over 25% than SSE
avx2:
convert_p2s
# HG changeset patch
# User Rajesh Paulraj
# Date 1442296528 -19800
# Tue Sep 15 11:25:28 2015 +0530
# Node ID 5fd2ef7bbf09f771d479a11eec2256d02fadf1cf
# Parent 365f7ed4d89628d49cd6af8d81d4edc01f73ffad
asm: avx2 code for sad_x4_64xN, improved over 40% than SSE
avx2:
sad_x4[64x16] 75.32x
# HG changeset patch
# User Rajesh Paulraj
# Date 1442296837 -19800
# Tue Sep 15 11:30:37 2015 +0530
# Node ID 01a21374067d5ee6f4b4fe6eb512afeb44974f39
# Parent 5fd2ef7bbf09f771d479a11eec2256d02fadf1cf
asm: avx2 code for sad_x4_48x64, improved over 25% than SSE
diff -r 5fd2ef7bbf09 -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1441881122 -19800
# Thu Sep 10 16:02:02 2015 +0530
# Node ID 6bbfbfc5611e804938a49bb83689c7d81d1844c1
# Parent 365f7ed4d89628d49cd6af8d81d4edc01f73ffad
asm: pixelavg_pp[48x64] avx2 8bpp code
avx2:
avg_pp[48x64] 31.62x 1687.89
# HG changeset patch
# User Rajesh Paulraj
# Date 1440482220 -19800
# Tue Aug 25 11:27:00 2015 +0530
# Node ID 65feb1620237d624296276635b2f658c0b1b1719
# Parent 8a414544bfbf64b119fa6dd2e23cef8cb89d0a54
asm: avx2 code for intra_ang_16 mode 17, improved over 65% than SSE asm
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1440413739 -19800
# Mon Aug 24 16:25:39 2015 +0530
# Node ID 8a414544bfbf64b119fa6dd2e23cef8cb89d0a54
# Parent a28a863393994d8fb1d58c721352d9b4ec8c46ee
asm: replace movu+vinserti128 by vbroadcasti128 instruction
diff -r a28a86339399 -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1440161042 -19800
# Fri Aug 21 18:14:02 2015 +0530
# Node ID 54894c40cac59b4854ded180632dbd255d4382f9
# Parent 61ff378a924c6b6c70fc180a2d02d67c535435a4
asm: avx2 code for intra_ang_16 mode 16, improved over 63% than SSE asm
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1440159642 -19800
# Fri Aug 21 17:50:42 2015 +0530
# Node ID 6e8a47ea76867245443b51651f77945b3e1e2cae
# Parent 127ba27bc703f6a255388e3bf194262b48fed7b7
asm: avx2 code for intra_ang_16 mode 14, improved over 59% than SSE asm
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1440160356 -19800
# Fri Aug 21 18:02:36 2015 +0530
# Node ID 61ff378a924c6b6c70fc180a2d02d67c535435a4
# Parent 6e8a47ea76867245443b51651f77945b3e1e2cae
asm: avx2 code for intra_ang_16 mode 15, improved over 60% than SSE asm
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1440158537 -19800
# Fri Aug 21 17:32:17 2015 +0530
# Node ID 127ba27bc703f6a255388e3bf194262b48fed7b7
# Parent a7ce76f4bc1456cffd048170929aa4f5202d005a
asm: avx2 code for intra_ang_16 mode 13, improved over 58% than SSE asm
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1440157611 -19800
# Fri Aug 21 17:16:51 2015 +0530
# Node ID a7ce76f4bc1456cffd048170929aa4f5202d005a
# Parent e9c65dc48171d57ef8073aeca936fa41055f083d
asm: avx2 code for intra_ang_16 mode 12, improved over 43% than previous avx2
asm
diff
# HG changeset patch
# User Rajesh Paulraj
# Date 1440156144 -19800
# Fri Aug 21 16:52:24 2015 +0530
# Node ID e9c65dc48171d57ef8073aeca936fa41055f083d
# Parent f63273fa3137fef2f6898c686b68ee12608acd31
asm: avx2 code for intra_ang_16 mode 9, improved over 53% than previous avx2 asm
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1438851223 -19800
# Thu Aug 06 14:23:43 2015 +0530
# Node ID 913ca368a61423268207b85247ec5b81b8be905d
# Parent ff4a5208eff874ac6b9781cb863afc1571852647
asm: avx2 code for intra_ang_16 mode 8, replace old avx2 code
with less constant table and
# HG changeset patch
# User Rajesh Paulraj
# Date 1438850543 -19800
# Thu Aug 06 14:12:23 2015 +0530
# Node ID 7195cd4420b97000a7ad7fb2a0861994223f5da4
# Parent ac57cc4fedfc9fab8311a67a49e6c1c6259f6ec3
asm: avx2 code for intra_ang_16 mode 6, replace old avx2 code
with less constant table and
# HG changeset patch
# User Rajesh Paulraj
# Date 1438851008 -19800
# Thu Aug 06 14:20:08 2015 +0530
# Node ID ff4a5208eff874ac6b9781cb863afc1571852647
# Parent 7195cd4420b97000a7ad7fb2a0861994223f5da4
asm: avx2 code for intra_ang_16 mode 7, replace old avx2 code
with less constant table and
# HG changeset patch
# User Rajesh Paulraj
# Date 1438847047 -19800
# Thu Aug 06 13:14:07 2015 +0530
# Node ID ac57cc4fedfc9fab8311a67a49e6c1c6259f6ec3
# Parent e5d57775bbef81e37bc028c27d61a0a20e64bc9e
asm: new algorithm for intra_ang_16 mode 5, improved over 16% than previous
avx2 code
# HG changeset patch
# User Rajesh Paulraj
# Date 1438846738 -19800
# Thu Aug 06 13:08:58 2015 +0530
# Node ID e5d57775bbef81e37bc028c27d61a0a20e64bc9e
# Parent 4078c3fa7b2a362cdab1b1ea54e13a29ae0ef4f2
asm: new algorithm for intra_ang_16 modes 4 & 32, improved over 15%
than previous
# HG changeset patch
# User Rajesh Paulraj
# Date 1438846342 -19800
# Thu Aug 06 13:02:22 2015 +0530
# Node ID 4078c3fa7b2a362cdab1b1ea54e13a29ae0ef4f2
# Parent 377a996a8d74110f838ff2e3cef1c42781d6d730
asm: new algorithm for intra_ang_16 modes 3 & 33, improved over 23%
than previous
asm)
>
> On Thu, Aug 6, 2015 at 10:41 AM, Deepthi Nandakumar <
> deep...@multicorewareinc.com> wrote:
>
>> Please be sure to mention what is the baseline - for instance, what is
>> 1075 cycles?
>>
>> On Wed, Aug 5, 2015 at 6:06 PM, wrote:
>>
>>&
# HG changeset patch
# User Rajesh Paulraj
# Date 1438768214 -19800
# Wed Aug 05 15:20:14 2015 +0530
# Node ID 6b16959ee2ca309ab71119d0a6ae1d8d67cfd83a
# Parent 80f43039f4286205441125fff66f89c8a512afeb
asm: avx2 code for intra_ang_16 mode 5
intra_ang_16x16[ 5] - improved 1022.83
# HG changeset patch
# User Rajesh Paulraj
# Date 1438767148 -19800
# Wed Aug 05 15:02:28 2015 +0530
# Node ID 80f43039f4286205441125fff66f89c8a512afeb
# Parent 4a71c4261e5a7955a7ecdda61db1f20744254b0e
asm: avx2 code for intra_ang_16 modes 4 & 32
intra_ang_16x16[ 4] - improved 102
# HG changeset patch
# User Rajesh Paulraj
# Date 1438766294 -19800
# Wed Aug 05 14:48:14 2015 +0530
# Node ID 4a71c4261e5a7955a7ecdda61db1f20744254b0e
# Parent 3fa7f6838098854de79d3800b2d775dabaf45705
asm: avx2 code for intra_ang_16 modes 3 & 33
intra_ang_16x16[ 3] - improved 107
please ignore this patch. I will send the updated patch.
2015-07-29 16:33 GMT+05:30 :
> # HG changeset patch
> # User Rajesh Paulraj
> # Date 1438162042 -19800
> # Wed Jul 29 14:57:22 2015 +0530
> # Node ID 90bf2117cff7e5727c43e65f926870e8874b
# HG changeset patch
# User Rajesh Paulraj
# Date 1438162042 -19800
# Wed Jul 29 14:57:22 2015 +0530
# Node ID 90bf2117cff7e5727c43e65f926870e8874b9ed2
# Parent e08a245054434090ccb1fc1b985f955a66711157
asm: avx2 code for intra_ang_16 modes 3 & 33
intra_ang_16x16[ 3] - improved 107
# HG changeset patch
# User Rajesh Paulraj
# Date 143669 -19800
# Thu Jul 09 17:51:09 2015 +0530
# Node ID 55c41fad48cf4a3af08cecc55deccbd34aadd252
# Parent 83bc6fac1fb54e9d5241c5c10d8578811a355273
asm: frameInitLowres avx2 code for 8bpp and 10bpp
8bpp:
avx2: downscale 30.38x
# HG changeset patch
# User Rajesh Paulraj
# Date 1435735579 -19800
# Wed Jul 01 12:56:19 2015 +0530
# Node ID ab8cfdf88aad580381f2fcc11533c4cd1d2f1250
# Parent 2f345c1c0d8e2351e5aaae5f3e0e017b5810f32e
asm: pixelavg_pp[8xN] sse2 code for 10bpp
avg_pp[ 8x4] 5.12x125.34
ok. sure.
On Mon, Jun 29, 2015 at 11:38 AM, Praveen Tiwari <
prav...@multicorewareinc.com> wrote:
> You would like to visit 8bpp code as well.
>
> Regards,
> Praveen
>
> On Mon, Jun 29, 2015 at 11:24 AM, Rajesh Paulraj <
> raj...@multicorewareinc.com> wrote:
>
isabled
>
> On Fri, Jun 26, 2015 at 5:40 PM, Rajesh Paulraj <
> raj...@multicorewareinc.com> wrote:
>
>> yes. It looks like we need to optimize sse2 code. I will work on this.
>>
>> On Fri, Jun 26, 2015 at 5:31 PM, Praveen Tiwari <
>> prav...@multicore
5] [PATCH] asm: pixelavg_pp[8xN] avx2 code for 10bpp
> To: x265-devel@videolan.org
>
>
> # HG changeset patch
> # User Rajesh Paulraj
> # Date 1435311076 -19800
> # Fri Jun 26 15:01:16 2015 +0530
> # Node ID 956401f1a679f1e71181b704d64e4acdb6f1a93f
> # Parent d64227e54
3:14 PM
> Subject: [x265] [PATCH] asm: pixelavg_pp[8xN] avx2 code for 10bpp
> To: x265-devel@videolan.org
>
>
> # HG changeset patch
> # User Rajesh Paulraj
> # Date 1435311076 -19800
> # Fri Jun 26 15:01:16 2015 +0530
> # Node ID
# HG changeset patch
# User Rajesh Paulraj
# Date 1435311076 -19800
# Fri Jun 26 15:01:16 2015 +0530
# Node ID 956401f1a679f1e71181b704d64e4acdb6f1a93f
# Parent d64227e54233d1646c55bcb4b0b831e5340009ed
asm: pixelavg_pp[8xN] avx2 code for 10bpp
avx2:
avg_pp[ 8x4] 4.39x145.09
# HG changeset patch
# User Rajesh Paulraj
# Date 1435311677 -19800
# Fri Jun 26 15:11:17 2015 +0530
# Node ID 818b70b015513a01993af0c48e4714cf4fd8dc84
# Parent 956401f1a679f1e71181b704d64e4acdb6f1a93f
asm: avx2 10bit code for planecopy_cp(10660.20 -> 5685.80)
avx2:
planecopy_cp 19.
# HG changeset patch
# User Rajesh Paulraj
# Date 1435230598 -19800
# Thu Jun 25 16:39:58 2015 +0530
# Node ID 1f24ff6471506c0ff5fd4addce149169976f845b
# Parent a0de1e88f3b1a10d6f8cf656a95e6ec37e1bc134
asm: avx2 10bit code for sign primitive(356.91 -> 242.00)
avx2:
calSign 9.08x242
# HG changeset patch
# User Rajesh Paulraj
# Date 1435229751 -19800
# Thu Jun 25 16:25:51 2015 +0530
# Node ID a0de1e88f3b1a10d6f8cf656a95e6ec37e1bc134
# Parent b1af4c36f48a4500a4912373ebcda9a5540b5c15
asm: sse4 10bit code for sign primitive
calSign 6.16x356.91 2197.63
Sorry . Both are same primitive. I will correct it and resend the two
patches.
On Thu, Jun 25, 2015 at 3:34 PM, Deepthi Nandakumar <
deep...@multicorewareinc.com> wrote:
>
>
> On Thu, Jun 25, 2015 at 2:19 PM, wrote:
>
>> # HG changeset patch
>> # User Rajesh Paulr
# HG changeset patch
# User Rajesh Paulraj
# Date 1435220688 -19800
# Thu Jun 25 13:54:48 2015 +0530
# Node ID c8d1630fc5ccb85aa7d98a198895bad31ccc33b0
# Parent 26e8eff8eb5abc1c2fa5dd94f59f620c6040caf9
asm: pixelavg_pp[8xN] avx2 code for 10bpp
avx2:
avg_pp[ 8x4] 4.39x145.09
# HG changeset patch
# User Rajesh Paulraj
# Date 1435220155 -19800
# Thu Jun 25 13:45:55 2015 +0530
# Node ID 26e8eff8eb5abc1c2fa5dd94f59f620c6040caf9
# Parent 430625004ef81ba9e9e398d4cf12a68a1cd4b664
asm: avx2 10bit code for planecopy_cp(10660.20 -> 5685.80)
avx2:
planecopy_cp 19.
# HG changeset patch
# User Rajesh Paulraj
# Date 1435219198 -19800
# Thu Jun 25 13:29:58 2015 +0530
# Node ID a03487d6295cf89b065eff36e5c1ec4ee4253243
# Parent b1af4c36f48a4500a4912373ebcda9a5540b5c15
asm: sse4 10bit code for sign primitive
calSign 6.16x356.91 2197.63
# HG changeset patch
# User Rajesh Paulraj
# Date 1435219457 -19800
# Thu Jun 25 13:34:17 2015 +0530
# Node ID 430625004ef81ba9e9e398d4cf12a68a1cd4b664
# Parent a03487d6295cf89b065eff36e5c1ec4ee4253243
asm: avx2 10bit code for sign primitive(356.91 -> 242.00)
avx2:
calSign 9.08x242
# HG changeset patch
# User Rajesh Paulraj
# Date 1435150086 -19800
# Wed Jun 24 18:18:06 2015 +0530
# Node ID 9d8d2bf23696f2329aa553604898e460cb10bf84
# Parent 6b51492f87f036b6e58f5a92d2b3e85da4e57906
asm: pixelavg_pp[12x16],[24x32] avx2 code for 10bpp
avx2:
avg_pp[24x32] 14.35x 965.89
# HG changeset patch
# User Rajesh Paulraj
# Date 1435149234 -19800
# Wed Jun 24 18:03:54 2015 +0530
# Node ID 6b51492f87f036b6e58f5a92d2b3e85da4e57906
# Parent 3a5cd130f9084147168c02f26de102faf59d193b
asm: pixelavg_pp[32xN],[64xN],48x64 avx2 code for 10bpp
avx2:
avg_pp[ 32x8] 13.95x
# HG changeset patch
# User Rajesh Paulraj
# Date 1434981872 -19800
# Mon Jun 22 19:34:32 2015 +0530
# Node ID d4c7638a0d5b842ca2657969b0f1a2bcd8a82d0b
# Parent 83a7d824442455ba5e0a6b53ea68e6b7043845de
asm: pixelavg_pp[16xN] avx2 code for 10bpp
avx2:
avg_pp[ 16x4] 9.60x140.07
# HG changeset patch
# User Rajesh Paulraj
# Date 1434001187 -19800
# Thu Jun 11 11:09:47 2015 +0530
# Node ID 07e20891148d8c645ab8955d0537ed145b8e0976
# Parent 6245476add8f0562e3ccb657f572ff94fe96adf0
asm: interp_4tap_vert_X[6xN] avx2 10bit code for i420,i422
avx2:
chroma_vpp[ 6x8][i420
# HG changeset patch
# User Rajesh Paulraj
# Date 1433928862 -19800
# Wed Jun 10 15:04:22 2015 +0530
# Node ID e62e1cdeb0f4d11a52c9d71bf2f33d798e6a27f0
# Parent 6245476add8f0562e3ccb657f572ff94fe96adf0
asm: interp_4tap_vert_X[64xN] avx2 10bit code for i444
avx2:
chroma_vpp[64x64
# HG changeset patch
# User Rajesh Paulraj
# Date 1433849724 -19800
# Tue Jun 09 17:05:24 2015 +0530
# Node ID a776f1048ec69418f59be0399082722523c2b3d6
# Parent 8f9f36c1fd4799cf31a3fe99ffcf8f83d4ee2d45
asm: interp_4tap_vert_X[i444][16xN, 32xN, 12x16, 24x32] avx2 10bit code
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1433843287 -19800
# Tue Jun 09 15:18:07 2015 +0530
# Node ID 97e1a9097a80ac3c290ca7eae3fe8ddb5b3029fd
# Parent 2c7b1c9c83d4e54128f3d0687c2548b28e17a4fd
asm: interp_4tap_vert_X[48x64] avx2 10bit code for i444
avx2:
chroma_vpp[48x64
# HG changeset patch
# User Rajesh Paulraj
# Date 1433847184 -19800
# Tue Jun 09 16:23:04 2015 +0530
# Node ID 8f9f36c1fd4799cf31a3fe99ffcf8f83d4ee2d45
# Parent 97e1a9097a80ac3c290ca7eae3fe8ddb5b3029fd
asm: interp_4tap_vert_X[i422][16xN, 32xN, 12x32, 24x64] avx2 10bit code
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1433842142 -19800
# Tue Jun 09 14:59:02 2015 +0530
# Node ID 2c7b1c9c83d4e54128f3d0687c2548b28e17a4fd
# Parent 5994f794ff36e881e3f992e78b2167a0e4ad4768
asm: interp_4tap_vert_X[12x16],[24x32] avx2 10bit code for i420
avx2:
chroma_vpp[12x16
# HG changeset patch
# User Rajesh Paulraj
# Date 1433840941 -19800
# Tue Jun 09 14:39:01 2015 +0530
# Node ID 5994f794ff36e881e3f992e78b2167a0e4ad4768
# Parent b252468dde7ffca57da27575388d95ce538945d2
asm: interp_4tap_vert_X[32xN] avx2 10bit code for i420
avx2:
chroma_vpp[32x32
This patch and the following patch "asm: interp_4tap_vert_X[16xN] avx2
10bit code for i420" has not been pushed yet.
On Fri, Jun 5, 2015 at 6:59 PM, wrote:
> # HG changeset patch
> # User Rajesh Paulraj
> # Date 146549 -19800
> # Wed Jun 03 18:32:29
# HG changeset patch
# User Rajesh Paulraj
# Date 146549 -19800
# Wed Jun 03 18:32:29 2015 +0530
# Node ID 462553e87d22db5d6939ab565b5893cd96ba2eba
# Parent 43afbde189f390c74f580b0d377731b498c7f7ce
asm: align tab_ChromaCoeffV constant to 32 bytes bound
and modify all chroma vertical
# HG changeset patch
# User Rajesh Paulraj
# Date 1433507999 -19800
# Fri Jun 05 18:09:59 2015 +0530
# Node ID 521da585af406880e86a2975cf44617225efe0e3
# Parent 462553e87d22db5d6939ab565b5893cd96ba2eba
asm: interp_4tap_vert_X[16xN] avx2 10bit code for i420
avx2:
chroma_vpp[16x16
# HG changeset patch
# User Rajesh Paulraj
# Date 1432811286 -19800
# Thu May 28 16:38:06 2015 +0530
# Node ID ac15f079bd838b1aa874b2787035a7f52e2b2c1e
# Parent 09b0056ca229c87288ef0169ed2d169b706b237b
asm: chroma_hpp[i422][8xN, 16xN, 32xN, 6x16, 12x32, 24x64] avx2 code for 16bpp
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1432811555 -19800
# Thu May 28 16:42:35 2015 +0530
# Node ID cd016c5093499e7e3d90ea34a909141d7e94027f
# Parent ac15f079bd838b1aa874b2787035a7f52e2b2c1e
asm: chroma_hpp[i444][8xN, 16xN, 32xN, 12x16, 24x32, 48x64] avx2 code for 16bpp
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1432806848 -19800
# Thu May 28 15:24:08 2015 +0530
# Node ID a0df6b36abaaa849d733febb589416cead40d5b8
# Parent a9d27027fd7702f328544eabc285120b77b4129c
asm: avx2 10bit code for chroma_hpp[64xN] for i444
avx2:
chroma_hpp[64x16] 5.71x
# HG changeset patch
# User Rajesh Paulraj
# Date 1432807224 -19800
# Thu May 28 15:30:24 2015 +0530
# Node ID 09b0056ca229c87288ef0169ed2d169b706b237b
# Parent a0df6b36abaaa849d733febb589416cead40d5b8
asm: avx2 10bit code for chroma_hpp[48x64] for i444(37333.09 -> 22624.32)
diff
# HG changeset patch
# User Rajesh Paulraj
# Date 1432806088 -19800
# Thu May 28 15:11:28 2015 +0530
# Node ID a9d27027fd7702f328544eabc285120b77b4129c
# Parent f1ab5ba015c00dc2bfb8927e047c0c6d5f974144
asm: avx2 10bit code for chroma_hpp[6x8] for i420(2458.88 -> 907.85)
diff -r f1ab5ba01
# HG changeset patch
# User Rajesh Paulraj
# Date 1432805111 -19800
# Thu May 28 14:55:11 2015 +0530
# Node ID f1ab5ba015c00dc2bfb8927e047c0c6d5f974144
# Parent 3c2a050acbd7ee16f44f19ce170a701774c0e460
asm: avx2 10bit code for chroma_hpp[12x16],[24x32] for i420
avx2:
chroma_hpp[12x16
# HG changeset patch
# User Rajesh Paulraj
# Date 1432711490 -19800
# Wed May 27 12:54:50 2015 +0530
# Node ID 3c2a050acbd7ee16f44f19ce170a701774c0e460
# Parent 0de4de0f5e1a020de61bde7b981d8ab1fcbb6b74
asm: avx2 10bit code for chroma_hpp[32xN] for i420
avx2:
chroma_hpp[ 32x8] 6.38x
# HG changeset patch
# User Rajesh Paulraj
# Date 1432711326 -19800
# Wed May 27 12:52:06 2015 +0530
# Node ID 0de4de0f5e1a020de61bde7b981d8ab1fcbb6b74
# Parent c3664f2048569576fa4626e85961d73108c13cb1
asm: avx2 10bit code for chroma_hpp[16xN] for i420
avx2:
chroma_hpp[ 16x4] 7.99x
# HG changeset patch
# User Rajesh Paulraj
# Date 1432711171 -19800
# Wed May 27 12:49:31 2015 +0530
# Node ID c3664f2048569576fa4626e85961d73108c13cb1
# Parent 18939c0e321f08207fa0a383939bc44485773013
asm: avx2 10bit code for chroma_hpp[8xN] for i420
avx2:
chroma_hpp[ 8x2] 5.64x
# HG changeset patch
# User Rajesh Paulraj
# Date 1432111013 -19800
# Wed May 20 14:06:53 2015 +0530
# Node ID 453218d87a4ee8667b013b1f36465533df06330b
# Parent 9b31a8a7bd57efededcc3884eec09f649394
asm: avx2 10bit code for luma_hpp[4xN]
avx2:
luma_hpp[ 4x4] 4.59x423.90
please ignore this patch.Need to rename the macro, will resend the patch.
On Wed, May 20, 2015 at 12:04 PM, wrote:
> # HG changeset patch
> # User Rajesh Paulraj
> # Date 1432102514 -19800
> # Wed May 20 11:45:14 2015 +0530
> # Node ID 0fce0242d05d385afa69c003fbade0477fda
# HG changeset patch
# User Rajesh Paulraj
# Date 1432102514 -19800
# Wed May 20 11:45:14 2015 +0530
# Node ID 0fce0242d05d385afa69c003fbade0477fda43a2
# Parent 9b31a8a7bd57efededcc3884eec09f649394
asm: avx2 10bit code for luma_hpp[4xN]
avx2:
luma_hpp[ 4x4] 4.59x423.90
# HG changeset patch
# User Rajesh Paulraj
# Date 1432024172 -19800
# Tue May 19 13:59:32 2015 +0530
# Node ID 6fad8107d1a6bebf92d7b38e57528b3cedf5cbd6
# Parent 9d394ee847ae33abb2a3ae06bf934eb5ebac3d03
asm: avx2 10bit code for luma_hpp[48x64] (82440.47 -> 44731.61)
diff -r 9d394ee847ae
# HG changeset patch
# User Rajesh Paulraj
# Date 1432024895 -19800
# Tue May 19 14:11:35 2015 +0530
# Node ID 9d394ee847ae33abb2a3ae06bf934eb5ebac3d03
# Parent b7f9e65a33ade32c1f14b04d69cce50cecde8ab5
asm: avx2 10bit code for luma_hpp[24x32] (18855.08 -> 10742.66)
diff -r b7f9e65a33ad
# HG changeset patch
# User Rajesh Paulraj
# Date 1432021220 -19800
# Tue May 19 13:10:20 2015 +0530
# Node ID b7f9e65a33ade32c1f14b04d69cce50cecde8ab5
# Parent 569f678f36d731690115b27ed244970f3bc822a8
asm: avx2 10bit code for luma_hpp[12x16] (5154.47 -> 3632.88)
diff -r 569f678f36d7
# HG changeset patch
# User Rajesh Paulraj
# Date 1432020349 -19800
# Tue May 19 12:55:49 2015 +0530
# Node ID 569f678f36d731690115b27ed244970f3bc822a8
# Parent d0f54566d1f457f00fc071c47cbb04186e4da99e
asm: avx2 10bit code for luma_hpp[32xN],[64xN]
avx2:
luma_hpp[ 32x8] 8.32x
# HG changeset patch
# User Rajesh Paulraj
# Date 1432019267 -19800
# Tue May 19 12:37:47 2015 +0530
# Node ID d0f54566d1f457f00fc071c47cbb04186e4da99e
# Parent 712f3f1950098d1603a662944359978e19e39752
asm: avx2 10bit code for luma_hpp[16xN]
avx2:
luma_hpp[ 16x4] 7.81x955.42
# HG changeset patch
# User Rajesh Paulraj
# Date 1432018444 -19800
# Tue May 19 12:24:04 2015 +0530
# Node ID 712f3f1950098d1603a662944359978e19e39752
# Parent d7b100e51e828833eee006f1da93e499ac161d28
asm: avx2 10bit code for luma_hpp[8xN]
avx2:
luma_hpp[ 8x4] 7.30x507.64
# HG changeset patch
# User Rajesh Paulraj
# Date 1429883568 -19800
# Fri Apr 24 19:22:48 2015 +0530
# Node ID 7788614584bc7267c3724f717e2c2fefa2579876
# Parent 22037a908cb58adf8be2600e5dd038a1b6d9348e
asm: avx2 10bit code for add_ps for chroma sizes 16xN, 32xN, reuse luma code
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1429883436 -19800
# Fri Apr 24 19:20:36 2015 +0530
# Node ID 22037a908cb58adf8be2600e5dd038a1b6d9348e
# Parent a35fafa25df2c82fec9e44d95f0a29ba835b48ea
asm: avx2 10bit code for add_ps[16x16],[32x32],[64x64]
add_ps[16x16](19.29x), add_ps
# HG changeset patch
# User Rajesh Paulraj
# Date 1429796254 -19800
# Thu Apr 23 19:07:34 2015 +0530
# Node ID cd0c8df9e9bb9e8ceebfa84532e6dca8d50916ee
# Parent cec68d3e37ef15c571cfa7f2784a12e944a2e2a7
asm: avx2 10bit code for sub_ps[16x16],[32x32],[64x64]
sub_ps[16x16](13.23x), sub_ps
# HG changeset patch
# User Rajesh Paulraj
# Date 1429796441 -19800
# Thu Apr 23 19:10:41 2015 +0530
# Node ID 4a04e3bafeaaffeb582bcb0ffea71900e92c7386
# Parent cd0c8df9e9bb9e8ceebfa84532e6dca8d50916ee
asm: avx2 10bit code for sub_ps for chroma sizes 16xN, 32xN, reuse luma code
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1429712526 -19800
# Wed Apr 22 19:52:06 2015 +0530
# Node ID eb7ba0bea5b6bac28ea39030062c31b5ed504487
# Parent 86268e498680951069c48b681eef830b0aa37873
asm: avx2 10bit code for scale2D_64to32
AVX2:
scale2D_64to32 17.07x 3873.16 44301.99
# HG changeset patch
# User Rajesh Paulraj
# Date 1429266115 -19800
# Fri Apr 17 15:51:55 2015 +0530
# Node ID d299770bebb21f7e1b9603b150087d38fdbf3f68
# Parent d967fc6a12dcfb2984172d5fadb1f06148ab4eef
asm: avx2 10bit code for chroma_p2s[16xN],[24xN],[32xN], reuse luma code
diff -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1429249760 -19800
# Fri Apr 17 11:19:20 2015 +0530
# Node ID d967fc6a12dcfb2984172d5fadb1f06148ab4eef
# Parent 925a29ed81a4697655d9716f93ff29b246a17756
asm: avx2 10bit code for convert_p2s[24xN],[48x64]
convert_p2s[24x32](20.90x
# HG changeset patch
# User Rajesh Paulraj
# Date 1429174083 -19800
# Thu Apr 16 14:18:03 2015 +0530
# Node ID 925a29ed81a4697655d9716f93ff29b246a17756
# Parent 8d14e23666ec5fb90a85d90032f52d3fb3e6fe4a
asm: avx2 10bit code for convert_p2s[32xN],[64xN]
convert_p2s[32x8](15.77x
# HG changeset patch
# User Rajesh Paulraj
# Date 1429188456 -19800
# Thu Apr 16 18:17:36 2015 +0530
# Node ID 8d14e23666ec5fb90a85d90032f52d3fb3e6fe4a
# Parent 7be1172ec816298c32f588908e1b6f0fa214d349
asm: avx2 10bit code for convert_p2s[16xN]
convert_p2s[16x4](10.44x), convert_p2s
# HG changeset patch
# User Rajesh Paulraj
# Date 1429163674 -19800
# Thu Apr 16 11:24:34 2015 +0530
# Node ID 7dec3ef187cbf3c1fd5ebfdce1172dc577e11dfe
# Parent 96cc21821c57e15fbb72d690745c4ceb28e6aca5
asm: sse4 10bit code for chroma_p2s[2xN] for i420, i422
chroma_p2s[i420][2x4](1.71x
# HG changeset patch
# User Rajesh Paulraj
# Date 1429161813 -19800
# Thu Apr 16 10:53:33 2015 +0530
# Node ID 96cc21821c57e15fbb72d690745c4ceb28e6aca5
# Parent fbfcf28a6485f61ea5ed707e4ca04b9e348f29e2
asm: sse4 10bit code for chroma_p2s[6xN] for i420, i422
chroma_p2s[i420][6x8](2.89x
# HG changeset patch
# User Rajesh Paulraj
# Date 1429164423 -19800
# Thu Apr 16 11:37:03 2015 +0530
# Node ID 9248bece15a1ec8439210c0e517fb3f3bc305080
# Parent 7dec3ef187cbf3c1fd5ebfdce1172dc577e11dfe
asm: sse version 10bit code for chroma_p2s, reuse luma code
diff -r 7dec3ef187cb -r
# HG changeset patch
# User Rajesh Paulraj
# Date 1429104844 -19800
# Wed Apr 15 19:04:04 2015 +0530
# Node ID 42d5394a0eb22f7e1126aca2dfa98f5d01d373b0
# Parent f9c0e1f233cc15ccce4eb96adef11583af082f33
asm: ssse3 10bit code for convert_p2s[12xN],[48x64]
convert_p2s[12x16](11.37x
# HG changeset patch
# User Rajesh Paulraj
# Date 1429105570 -19800
# Wed Apr 15 19:16:10 2015 +0530
# Node ID fbfcf28a6485f61ea5ed707e4ca04b9e348f29e2
# Parent 42d5394a0eb22f7e1126aca2dfa98f5d01d373b0
asm: ssse3 10bit code for chroma_p2s[4x2],[8x2],[8x6]
chroma_p2s[i420][4x2](2.52x
# HG changeset patch
# User Rajesh Paulraj
# Date 1429018549 -19800
# Tue Apr 14 19:05:49 2015 +0530
# Node ID ecc15210425c2430a184c5647b0a97cdd2a3e597
# Parent bd2972af8c4f1712936a477c76640ac0581e6ded
asm: ssse3 10bit code for convert_p2s[32xN],[64xN]
convert_p2s[32x8](9.51x
# HG changeset patch
# User Rajesh Paulraj
# Date 1429019326 -19800
# Tue Apr 14 19:18:46 2015 +0530
# Node ID 30b44e2ddf5983289b12ee379d2b1b87a187ebed
# Parent ecc15210425c2430a184c5647b0a97cdd2a3e597
asm: ssse3 10bit code for convert_p2s[24xN]
convert_p2s[24x32](14.57x)
diff -r
1 - 100 of 151 matches
Mail list logo