# HG changeset patch
# User Yuvaraj Venkatesh <[email protected]>
# Date 1381153465 -19800
#      Mon Oct 07 19:14:25 2013 +0530
# Node ID 52ee436b58f9aa48757063bd678672d0ab56be01
# Parent  c010342f7605c86867824f5b525a8f84c0d2de1c
Replacing Residual4 from vector class to intrinsic.

diff -r c010342f7605 -r 52ee436b58f9 source/common/vec/pixel8.inc
--- a/source/common/vec/pixel8.inc      Sun Oct 06 02:09:00 2013 -0500
+++ b/source/common/vec/pixel8.inc      Mon Oct 07 19:14:25 2013 +0530
@@ -29,19 +29,35 @@
 
 void getResidual4(pixel *fenc, pixel *pred, short *resi, int stride)
 {
-    for (int y = 0; y < 4; y++)
-    {
-        Vec16uc f;
-        f.fromUint32(*(uint32_t*)fenc);
-        Vec16uc p;
-        p.fromUint32(*(uint32_t*)pred);
-        Vec8s r = extend_low(f) - extend_low(p);
-        store_partial(const_int(8), resi, r);
+    __m128i T00, T01, T02; 
 
-        fenc += stride;
-        pred += stride;
-        resi += stride;
-    }
+    T00 = _mm_cvtsi32_si128(*(uint32_t*)fenc);
+    T01 = _mm_cvtsi32_si128(*(uint32_t*)pred);
+       T00 = _mm_unpacklo_epi8(T00, _mm_setzero_si128());
+       T01 = _mm_unpacklo_epi8(T01, _mm_setzero_si128());
+       T02 = _mm_sub_epi16(T00, T01);
+    _mm_storel_epi64((__m128i*)resi, T02);
+
+       T00 = _mm_cvtsi32_si128(*(uint32_t*)(fenc + stride));
+    T01 = _mm_cvtsi32_si128(*(uint32_t*)(pred + stride));
+       T00 = _mm_unpacklo_epi8(T00, _mm_setzero_si128());
+       T01 = _mm_unpacklo_epi8(T01, _mm_setzero_si128());
+       T02 = _mm_sub_epi16(T00, T01);
+    _mm_storel_epi64((__m128i*)(resi + stride), T02);
+
+       T00 = _mm_cvtsi32_si128(*(uint32_t*)(fenc + (2) * stride));
+    T01 = _mm_cvtsi32_si128(*(uint32_t*)(pred + (2) * stride));
+       T00 = _mm_unpacklo_epi8(T00, _mm_setzero_si128());
+       T01 = _mm_unpacklo_epi8(T01, _mm_setzero_si128());
+       T02 = _mm_sub_epi16(T00, T01);
+    _mm_storel_epi64((__m128i*)(resi + (2) * stride), T02);
+
+       T00 = _mm_cvtsi32_si128(*(uint32_t*)(fenc + (3) * stride));
+    T01 = _mm_cvtsi32_si128(*(uint32_t*)(pred + (3) * stride));
+       T00 = _mm_unpacklo_epi8(T00, _mm_setzero_si128());
+       T01 = _mm_unpacklo_epi8(T01, _mm_setzero_si128());
+       T02 = _mm_sub_epi16(T00, T01);
+    _mm_storel_epi64((__m128i*)(resi + (3) * stride), T02);
 }
 
 void getResidual8(pixel *fenc, pixel *pred, short *resi, int stride)
_______________________________________________
x265-devel mailing list
[email protected]
https://mailman.videolan.org/listinfo/x265-devel

Reply via email to