Skip to site navigation (Press enter)

[webkit-changes] [138936] trunk/Source/WebCore

rgabor Mon, 07 Jan 2013 06:29:49 -0800

Title: [138936] trunk/Source/WebCore

Revision: 138936
Author: rga...@webkit.org
Date: 2013-01-07 06:31:32 -0800 (Mon, 07 Jan 2013)

Log Message

Optimizing RGBA16, RGB16, ARGB16, BGRA16 unpacking functions with NEON intrinsics
https://bugs.webkit.org/show_bug.cgi?id=103614


Reviewed by Zoltan Herczeg.

Optimizing RGBA16, RGB16, ARGB16, BGRA16 unpacking functions in GraphicsContext3D
with ARM NEON intrinsics. The optimized functions are 2-3x faster than the origin.

* platform/graphics/GraphicsContext3D.cpp:
(WebCore):
* platform/graphics/cpu/arm/GraphicsContext3DNEON.h:
(WebCore::SIMD::unpackOneRowOfRGBA16LittleToRGBA8):
(SIMD):
(WebCore::SIMD::unpackOneRowOfRGB16LittleToRGBA8):
(WebCore::SIMD::unpackOneRowOfARGB16LittleToRGBA8):
(WebCore::SIMD::unpackOneRowOfBGRA16LittleToRGBA8):

Modified Paths

trunk/Source/WebCore/ChangeLog
trunk/Source/WebCore/platform/graphics/GraphicsContext3D.cpp
trunk/Source/WebCore/platform/graphics/cpu/arm/GraphicsContext3DNEON.h

Diff

Modified: trunk/Source/WebCore/ChangeLog (138935 => 138936)


--- trunk/Source/WebCore/ChangeLog	2013-01-07 13:26:13 UTC (rev 138935)
+++ trunk/Source/WebCore/ChangeLog	2013-01-07 14:31:32 UTC (rev 138936)
@@ -1,3 +1,22 @@
+2013-01-07  Gabor Rapcsanyi  <rga...@webkit.org>
+
+        Optimizing RGBA16, RGB16, ARGB16, BGRA16 unpacking functions with NEON intrinsics
+        https://bugs.webkit.org/show_bug.cgi?id=103614
+
+        Reviewed by Zoltan Herczeg.
+
+        Optimizing RGBA16, RGB16, ARGB16, BGRA16 unpacking functions in GraphicsContext3D
+        with ARM NEON intrinsics. The optimized functions are 2-3x faster than the origin.
+
+        * platform/graphics/GraphicsContext3D.cpp:
+        (WebCore):
+        * platform/graphics/cpu/arm/GraphicsContext3DNEON.h:
+        (WebCore::SIMD::unpackOneRowOfRGBA16LittleToRGBA8):
+        (SIMD):
+        (WebCore::SIMD::unpackOneRowOfRGB16LittleToRGBA8):
+        (WebCore::SIMD::unpackOneRowOfARGB16LittleToRGBA8):
+        (WebCore::SIMD::unpackOneRowOfBGRA16LittleToRGBA8):
+
 2013-01-07  Christophe Dumez  <christophe.du...@intel.com>
 
         Regression(r138786): Causes webaudio tests to crash

Modified: trunk/Source/WebCore/platform/graphics/GraphicsContext3D.cpp (138935 => 138936)


--- trunk/Source/WebCore/platform/graphics/GraphicsContext3D.cpp	2013-01-07 13:26:13 UTC (rev 138935)
+++ trunk/Source/WebCore/platform/graphics/GraphicsContext3D.cpp	2013-01-07 14:31:32 UTC (rev 138936)
@@ -392,6 +392,9 @@
 
 void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
 {
+#if HAVE(ARM_NEON_INTRINSICS)
+    SIMD::unpackOneRowOfRGBA16LittleToRGBA8(source, destination, pixelsPerRow);
+#endif
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16LittleTo8(source[0]);
         destination[1] = convertColor16LittleTo8(source[1]);
@@ -428,6 +431,9 @@
 
 void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
 {
+#if HAVE(ARM_NEON_INTRINSICS)
+    SIMD::unpackOneRowOfRGB16LittleToRGBA8(source, destination, pixelsPerRow);
+#endif
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16LittleTo8(source[0]);
         destination[1] = convertColor16LittleTo8(source[1]);
@@ -476,6 +482,9 @@
 
 void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
 {
+#if HAVE(ARM_NEON_INTRINSICS)
+    SIMD::unpackOneRowOfARGB16LittleToRGBA8(source, destination, pixelsPerRow);
+#endif
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16LittleTo8(source[1]);
         destination[1] = convertColor16LittleTo8(source[2]);
@@ -530,6 +539,9 @@
 
 void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t* source, uint8_t* destination, unsigned int pixelsPerRow)
 {
+#if HAVE(ARM_NEON_INTRINSICS)
+    SIMD::unpackOneRowOfBGRA16LittleToRGBA8(source, destination, pixelsPerRow);
+#endif
     for (unsigned int i = 0; i < pixelsPerRow; ++i) {
         destination[0] = convertColor16LittleTo8(source[2]);
         destination[1] = convertColor16LittleTo8(source[1]);

Modified: trunk/Source/WebCore/platform/graphics/cpu/arm/GraphicsContext3DNEON.h (138935 => 138936)


--- trunk/Source/WebCore/platform/graphics/cpu/arm/GraphicsContext3DNEON.h	2013-01-07 13:26:13 UTC (rev 138935)
+++ trunk/Source/WebCore/platform/graphics/cpu/arm/GraphicsContext3DNEON.h	2013-01-07 14:31:32 UTC (rev 138936)
@@ -34,6 +34,86 @@
 
 namespace SIMD {
 
+ALWAYS_INLINE void unpackOneRowOfRGBA16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
+{
+    unsigned componentsPerRow = pixelsPerRow * 4;
+    unsigned tailComponents = componentsPerRow % 16;
+    unsigned componentsSize = componentsPerRow - tailComponents;
+    const uint8_t* src = "" uint8_t*>(source);
+
+    for (unsigned i = 0; i < componentsSize; i += 16) {
+        uint8x16x2_t components = vld2q_u8(src + i * 2);
+        vst1q_u8(destination + i, components.val[1]);
+    }
+
+    source += componentsSize;
+    destination += componentsSize;
+    pixelsPerRow = tailComponents / 4;
+}
+
+ALWAYS_INLINE void unpackOneRowOfRGB16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
+{
+    unsigned componentsPerRow = pixelsPerRow * 3;
+    unsigned tailComponents = componentsPerRow % 24;
+    unsigned componentsSize = componentsPerRow - tailComponents;
+
+    uint8x8_t componentA = vdup_n_u8(0xFF);
+    for (unsigned i = 0; i < componentsSize; i += 24) {
+        uint16x8x3_t RGB16 = vld3q_u16(source + i);
+        uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(RGB16.val[0], 8));
+        uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(RGB16.val[1], 8));
+        uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(RGB16.val[2], 8));
+        uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}};
+        vst4_u8(destination, RGBA8);
+        destination += 32;
+    }
+
+    source += componentsSize;
+    pixelsPerRow = tailComponents / 3;
+}
+
+ALWAYS_INLINE void unpackOneRowOfARGB16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
+{
+    unsigned componentsPerRow = pixelsPerRow * 4;
+    unsigned tailComponents = componentsPerRow % 32;
+    unsigned componentsSize = componentsPerRow - tailComponents;
+
+    for (unsigned i = 0; i < componentsSize; i += 32) {
+        uint16x8x4_t ARGB16 = vld4q_u16(source + i);
+        uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8));
+        uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8));
+        uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8));
+        uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8));
+        uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}};
+        vst4_u8(destination + i, RGBA8);
+    }
+
+    source += componentsSize;
+    destination += componentsSize;
+    pixelsPerRow = tailComponents / 4;
+}
+
+ALWAYS_INLINE void unpackOneRowOfBGRA16LittleToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
+{
+    unsigned componentsPerRow = pixelsPerRow * 4;
+    unsigned tailComponents = componentsPerRow % 32;
+    unsigned componentsSize = componentsPerRow - tailComponents;
+
+    for (unsigned i = 0; i < componentsSize; i += 32) {
+        uint16x8x4_t ARGB16 = vld4q_u16(source + i);
+        uint8x8_t componentB = vqmovn_u16(vshrq_n_u16(ARGB16.val[0], 8));
+        uint8x8_t componentG = vqmovn_u16(vshrq_n_u16(ARGB16.val[1], 8));
+        uint8x8_t componentR = vqmovn_u16(vshrq_n_u16(ARGB16.val[2], 8));
+        uint8x8_t componentA = vqmovn_u16(vshrq_n_u16(ARGB16.val[3], 8));
+        uint8x8x4_t RGBA8 = {{componentR, componentG, componentB, componentA}};
+        vst4_u8(destination + i, RGBA8);
+    }
+
+    source += componentsSize;
+    destination += componentsSize;
+    pixelsPerRow = tailComponents / 4;
+}
+
 ALWAYS_INLINE void unpackOneRowOfRGBA4444ToRGBA8(const uint16_t*& source, uint8_t*& destination, unsigned& pixelsPerRow)
 {
     unsigned tailPixels = pixelsPerRow % 8;

_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
http://lists.webkit.org/mailman/listinfo/webkit-changes