Log Message
Optimizing blend filter to ARM-NEON with intrinsics https://bugs.webkit.org/show_bug.cgi?id=90949
Reviewed by Zoltan Herczeg. The feBlend SVG filter modes can be greatly fasten up with ARM-NEON since we are able to calculate with 2 pixels (8 channels) at the same time. The code is written with NEON intrinsics and it doesn't affect the general - it has the same behaviour as the original algorithm. With this NEON optimization the calculation is ~4.5 times faster for each mode. Existing tests cover this issue. * CMakeLists.txt: * GNUmakefile.list.am: * Target.pri: * WebCore.gypi: * WebCore.vcproj/WebCore.vcproj: * WebCore.xcodeproj/project.pbxproj: * platform/graphics/filters/FEBlend.cpp: (WebCore::FEBlend::platformApplyGeneric): (WebCore): (WebCore::FEBlend::platformApplySoftware): * platform/graphics/filters/FEBlend.h: (FEBlend): * platform/graphics/filters/arm/FEBlendNEON.h: Added. (WebCore): (FEBlendUtilitiesNEON): (WebCore::FEBlendUtilitiesNEON::div255): integer divison with 255 (WebCore::FEBlendUtilitiesNEON::normal): calculate normal mode blending for two pixels (WebCore::FEBlendUtilitiesNEON::multiply): calculate multiply mode blending for two pixels (WebCore::FEBlendUtilitiesNEON::screen): calculate screen mode blending for two pixels (WebCore::FEBlendUtilitiesNEON::darken): calculate darken mode blending for two pixels (WebCore::FEBlendUtilitiesNEON::lighten): calculate lighten mode blending for two pixels (WebCore::FEBlend::platformApplyNEON):
Modified Paths
- trunk/Source/WebCore/CMakeLists.txt
- trunk/Source/WebCore/ChangeLog
- trunk/Source/WebCore/GNUmakefile.list.am
- trunk/Source/WebCore/Target.pri
- trunk/Source/WebCore/WebCore.gypi
- trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj
- trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj
- trunk/Source/WebCore/platform/graphics/filters/FEBlend.cpp
- trunk/Source/WebCore/platform/graphics/filters/FEBlend.h
Added Paths
Diff
Modified: trunk/Source/WebCore/CMakeLists.txt (122581 => 122582)
--- trunk/Source/WebCore/CMakeLists.txt 2012-07-13 15:15:40 UTC (rev 122581)
+++ trunk/Source/WebCore/CMakeLists.txt 2012-07-13 15:31:06 UTC (rev 122582)
@@ -1246,6 +1246,7 @@
platform/graphics/filters/SourceGraphic.cpp
platform/graphics/filters/arm/NEONHelpers.h
+ platform/graphics/filters/arm/FEBlendNEON.h
platform/graphics/filters/arm/FECompositeArithmeticNEON.h
platform/graphics/filters/arm/FEGaussianBlurNEON.h
platform/graphics/filters/arm/FELightingNEON.cpp
Modified: trunk/Source/WebCore/ChangeLog (122581 => 122582)
--- trunk/Source/WebCore/ChangeLog 2012-07-13 15:15:40 UTC (rev 122581)
+++ trunk/Source/WebCore/ChangeLog 2012-07-13 15:31:06 UTC (rev 122582)
@@ -1,3 +1,41 @@
+2012-07-13 Gabor Rapcsanyi <rga...@webkit.org>
+
+ Optimizing blend filter to ARM-NEON with intrinsics
+ https://bugs.webkit.org/show_bug.cgi?id=90949
+
+ Reviewed by Zoltan Herczeg.
+
+ The feBlend SVG filter modes can be greatly fasten up with ARM-NEON since
+ we are able to calculate with 2 pixels (8 channels) at the same time.
+ The code is written with NEON intrinsics and it doesn't affect the
+ general - it has the same behaviour as the original algorithm.
+ With this NEON optimization the calculation is ~4.5 times faster for each mode.
+
+ Existing tests cover this issue.
+
+ * CMakeLists.txt:
+ * GNUmakefile.list.am:
+ * Target.pri:
+ * WebCore.gypi:
+ * WebCore.vcproj/WebCore.vcproj:
+ * WebCore.xcodeproj/project.pbxproj:
+ * platform/graphics/filters/FEBlend.cpp:
+ (WebCore::FEBlend::platformApplyGeneric):
+ (WebCore):
+ (WebCore::FEBlend::platformApplySoftware):
+ * platform/graphics/filters/FEBlend.h:
+ (FEBlend):
+ * platform/graphics/filters/arm/FEBlendNEON.h: Added.
+ (WebCore):
+ (FEBlendUtilitiesNEON):
+ (WebCore::FEBlendUtilitiesNEON::div255): integer divison with 255
+ (WebCore::FEBlendUtilitiesNEON::normal): calculate normal mode blending for two pixels
+ (WebCore::FEBlendUtilitiesNEON::multiply): calculate multiply mode blending for two pixels
+ (WebCore::FEBlendUtilitiesNEON::screen): calculate screen mode blending for two pixels
+ (WebCore::FEBlendUtilitiesNEON::darken): calculate darken mode blending for two pixels
+ (WebCore::FEBlendUtilitiesNEON::lighten): calculate lighten mode blending for two pixels
+ (WebCore::FEBlend::platformApplyNEON):
+
2012-07-13 Ilya Tikhonovsky <loi...@chromium.org>
Web Inspector: native memory instrumentation: extract instrumentation methods into MemoryClassInfo
Modified: trunk/Source/WebCore/GNUmakefile.list.am (122581 => 122582)
--- trunk/Source/WebCore/GNUmakefile.list.am 2012-07-13 15:15:40 UTC (rev 122581)
+++ trunk/Source/WebCore/GNUmakefile.list.am 2012-07-13 15:31:06 UTC (rev 122582)
@@ -3288,6 +3288,7 @@
Source/WebCore/platform/graphics/filters/SpotLightSource.cpp \
Source/WebCore/platform/graphics/filters/SpotLightSource.h \
Source/WebCore/platform/graphics/filters/arm/NEONHelpers.h \
+ Source/WebCore/platform/graphics/filters/arm/FEBlendNEON.h \
Source/WebCore/platform/graphics/filters/arm/FECompositeArithmeticNEON.h \
Source/WebCore/platform/graphics/filters/arm/FEGaussianBlurNEON.h \
Source/WebCore/platform/graphics/filters/arm/FELightingNEON.cpp \
Modified: trunk/Source/WebCore/Target.pri (122581 => 122582)
--- trunk/Source/WebCore/Target.pri 2012-07-13 15:15:40 UTC (rev 122581)
+++ trunk/Source/WebCore/Target.pri 2012-07-13 15:31:06 UTC (rev 122582)
@@ -2278,6 +2278,7 @@
platform/graphics/filters/SourceAlpha.h \
platform/graphics/filters/SourceGraphic.h \
platform/graphics/filters/arm/NEONHelpers.h \
+ platform/graphics/filters/arm/FEBlendNEON.h \
platform/graphics/filters/arm/FECompositeArithmeticNEON.h \
platform/graphics/filters/arm/FEGaussianBlurNEON.h \
platform/graphics/filters/arm/FELightingNEON.h \
Modified: trunk/Source/WebCore/WebCore.gypi (122581 => 122582)
--- trunk/Source/WebCore/WebCore.gypi 2012-07-13 15:15:40 UTC (rev 122581)
+++ trunk/Source/WebCore/WebCore.gypi 2012-07-13 15:31:06 UTC (rev 122582)
@@ -3714,6 +3714,7 @@
'platform/graphics/filters/SpotLightSource.cpp',
'platform/graphics/filters/SpotLightSource.h',
'platform/graphics/filters/arm/NEONHelpers.h',
+ 'platform/graphics/filters/arm/FEBlendNEON.h',
'platform/graphics/filters/arm/FECompositeArithmeticNEON.h',
'platform/graphics/filters/arm/FEGaussianBlurNEON.h',
'platform/graphics/filters/arm/FELightingNEON.cpp',
Modified: trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj (122581 => 122582)
--- trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj 2012-07-13 15:15:40 UTC (rev 122581)
+++ trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj 2012-07-13 15:31:06 UTC (rev 122582)
@@ -31549,6 +31549,10 @@
>
</File>
<File
+ RelativePath="..\platform\graphics\filters\arm\FEBlendNEON.h"
+ >
+ </File>
+ <File
RelativePath="..\platform\graphics\filters\FEColorMatrix.cpp"
>
</File>
Modified: trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj (122581 => 122582)
--- trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj 2012-07-13 15:15:40 UTC (rev 122581)
+++ trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj 2012-07-13 15:31:06 UTC (rev 122582)
@@ -10332,6 +10332,7 @@
938192040F87E1EC00D5352A /* BinaryPropertyList.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = BinaryPropertyList.h; sourceTree = "<group>"; };
9382AAB10D8C386100F357A6 /* NodeWithIndex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = NodeWithIndex.h; sourceTree = "<group>"; };
9382DF5710A8D5C900925652 /* ColorSpace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ColorSpace.h; sourceTree = "<group>"; };
+ 938C496815AD67E300FCB2E1 /* FEBlendNEON.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FEBlendNEON.h; sourceTree = "<group>"; };
938E65F009F09840008A48EC /* JSHTMLElementWrapperFactory.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = JSHTMLElementWrapperFactory.h; sourceTree = "<group>"; };
938E65F609F0985D008A48EC /* JSHTMLElementWrapperFactory.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = JSHTMLElementWrapperFactory.cpp; sourceTree = "<group>"; };
938E662509F09956008A48EC /* HTMLCanvasElement.idl */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = HTMLCanvasElement.idl; sourceTree = "<group>"; };
@@ -15125,6 +15126,7 @@
49ECEB5C1499790D00CDD3A4 /* arm */ = {
isa = PBXGroup;
children = (
+ 938C496815AD67E300FCB2E1 /* FEBlendNEON.h */,
49ECEB5E1499790D00CDD3A4 /* FECompositeArithmeticNEON.h */,
49ECEB601499790D00CDD3A4 /* FEGaussianBlurNEON.h */,
49ECEB611499790D00CDD3A4 /* FELightingNEON.cpp */,
Modified: trunk/Source/WebCore/platform/graphics/filters/FEBlend.cpp (122581 => 122582)
--- trunk/Source/WebCore/platform/graphics/filters/FEBlend.cpp 2012-07-13 15:15:40 UTC (rev 122581)
+++ trunk/Source/WebCore/platform/graphics/filters/FEBlend.cpp 2012-07-13 15:31:06 UTC (rev 122582)
@@ -24,6 +24,7 @@
#if ENABLE(FILTERS)
#include "FEBlend.h"
+#include "FEBlendNEON.h"
#include "Filter.h"
#include "FloatPoint.h"
@@ -86,26 +87,12 @@
return ((std::max((255 - alphaA) * colorB + colorA * 255, (255 - alphaB) * colorA + colorB * 255)) / 255);
}
-void FEBlend::platformApplySoftware()
+void FEBlend::platformApplyGeneric(PassRefPtr<Uint8ClampedArray> pixelArrayA, PassRefPtr<Uint8ClampedArray> pixelArrayB,
+ Uint8ClampedArray* dstPixelArray, unsigned pixelArrayLength)
{
- FilterEffect* in = inputEffect(0);
- FilterEffect* in2 = inputEffect(1);
+ RefPtr<Uint8ClampedArray> srcPixelArrayA = pixelArrayA;
+ RefPtr<Uint8ClampedArray> srcPixelArrayB = pixelArrayB;
- ASSERT(m_mode > FEBLEND_MODE_UNKNOWN);
- ASSERT(m_mode <= FEBLEND_MODE_LIGHTEN);
-
- Uint8ClampedArray* dstPixelArray = createPremultipliedImageResult();
- if (!dstPixelArray)
- return;
-
- IntRect effectADrawingRect = requestedRegionOfInputImageData(in->absolutePaintRect());
- RefPtr<Uint8ClampedArray> srcPixelArrayA = in->asPremultipliedImage(effectADrawingRect);
-
- IntRect effectBDrawingRect = requestedRegionOfInputImageData(in2->absolutePaintRect());
- RefPtr<Uint8ClampedArray> srcPixelArrayB = in2->asPremultipliedImage(effectBDrawingRect);
-
- unsigned pixelArrayLength = srcPixelArrayA->length();
- ASSERT(pixelArrayLength == srcPixelArrayB->length());
for (unsigned pixelOffset = 0; pixelOffset < pixelArrayLength; pixelOffset += 4) {
unsigned char alphaA = srcPixelArrayA->item(pixelOffset + 3);
unsigned char alphaB = srcPixelArrayB->item(pixelOffset + 3);
@@ -143,6 +130,45 @@
}
}
+void FEBlend::platformApplySoftware()
+{
+ FilterEffect* in = inputEffect(0);
+ FilterEffect* in2 = inputEffect(1);
+
+ ASSERT(m_mode > FEBLEND_MODE_UNKNOWN);
+ ASSERT(m_mode <= FEBLEND_MODE_LIGHTEN);
+
+ Uint8ClampedArray* dstPixelArray = createPremultipliedImageResult();
+ if (!dstPixelArray)
+ return;
+
+ IntRect effectADrawingRect = requestedRegionOfInputImageData(in->absolutePaintRect());
+ RefPtr<Uint8ClampedArray> srcPixelArrayA = in->asPremultipliedImage(effectADrawingRect);
+
+ IntRect effectBDrawingRect = requestedRegionOfInputImageData(in2->absolutePaintRect());
+ RefPtr<Uint8ClampedArray> srcPixelArrayB = in2->asPremultipliedImage(effectBDrawingRect);
+
+ unsigned pixelArrayLength = srcPixelArrayA->length();
+ ASSERT(pixelArrayLength == srcPixelArrayB->length());
+
+#if HAVE(ARM_NEON_INTRINSICS)
+ if (pixelArrayLength >= 8)
+ platformApplyNEON(srcPixelArrayA->data(), srcPixelArrayB->data(), dstPixelArray->data(), pixelArrayLength);
+ else { // If there is just one pixel we expand it to two.
+ ASSERT(pixelArrayLength > 0);
+ uint32_t sourceA[2] = {0, 0};
+ uint32_t sourceBAndDest[2] = {0, 0};
+
+ sourceA[0] = reinterpret_cast<uint32_t*>(srcPixelArrayA->data())[0];
+ sourceBAndDest[0] = reinterpret_cast<uint32_t*>(srcPixelArrayB->data())[0];
+ platformApplyNEON(reinterpret_cast<uint8_t*>(sourceA), reinterpret_cast<uint8_t*>(sourceBAndDest), reinterpret_cast<uint8_t*>(sourceBAndDest), 8);
+ reinterpret_cast<uint32_t*>(dstPixelArray->data())[0] = sourceBAndDest[0];
+ }
+#else
+ platformApplyGeneric(srcPixelArrayA, srcPixelArrayB, dstPixelArray, pixelArrayLength);
+#endif
+}
+
void FEBlend::dump()
{
}
Modified: trunk/Source/WebCore/platform/graphics/filters/FEBlend.h (122581 => 122582)
--- trunk/Source/WebCore/platform/graphics/filters/FEBlend.h 2012-07-13 15:15:40 UTC (rev 122581)
+++ trunk/Source/WebCore/platform/graphics/filters/FEBlend.h 2012-07-13 15:31:06 UTC (rev 122582)
@@ -45,6 +45,11 @@
BlendModeType blendMode() const;
bool setBlendMode(BlendModeType);
+ void platformApplyGeneric(PassRefPtr<Uint8ClampedArray> pixelArrayA, PassRefPtr<Uint8ClampedArray> pixelArrayB,
+ Uint8ClampedArray* dstPixelArray, unsigned pixelArrayLength);
+ void platformApplyNEON(unsigned char* srcPixelArrayA, unsigned char* srcPixelArrayB, unsigned char* dstPixelArray,
+ unsigned colorArrayLength);
+
virtual void platformApplySoftware();
virtual void dump();
Added: trunk/Source/WebCore/platform/graphics/filters/arm/FEBlendNEON.h (0 => 122582)
--- trunk/Source/WebCore/platform/graphics/filters/arm/FEBlendNEON.h (rev 0)
+++ trunk/Source/WebCore/platform/graphics/filters/arm/FEBlendNEON.h 2012-07-13 15:31:06 UTC (rev 122582)
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2012 University of Szeged
+ * Copyright (C) 2012 Gabor Rapcsanyi
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF SZEGED ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL UNIVERSITY OF SZEGED OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef FEBlendNEON_h
+#define FEBlendNEON_h
+
+#include <wtf/Platform.h>
+
+#if ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS)
+
+#include "FEBlend.h"
+#include <arm_neon.h>
+
+namespace WebCore {
+
+class FEBlendUtilitiesNEON {
+public:
+ static inline uint16x8_t div255(uint16x8_t num, uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
+ {
+ uint16x8_t quotient = vshrq_n_u16(num, 8);
+ uint16x8_t remainder = vaddq_u16(vsubq_u16(num, vmulq_u16(sixteenConst255, quotient)), sixteenConstOne);
+ return vaddq_u16(quotient, vshrq_n_u16(remainder, 8));
+ }
+
+ static inline uint16x8_t normal(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t,
+ uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
+ {
+ uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA);
+ uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB);
+ uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne);
+ return vaddq_u16(tmp3, pixelA);
+ }
+
+ static inline uint16x8_t multiply(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB,
+ uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
+ {
+ uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA);
+ uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB);
+ uint16x8_t tmp3 = vaddq_u16(vsubq_u16(sixteenConst255, alphaB), pixelB);
+ uint16x8_t tmp4 = vmulq_u16(tmp3, pixelA);
+ uint16x8_t tmp5 = vaddq_u16(tmp2, tmp4);
+ return div255(tmp5, sixteenConst255, sixteenConstOne);
+ }
+
+ static inline uint16x8_t screen(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t, uint16x8_t,
+ uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
+ {
+ uint16x8_t tmp1 = vaddq_u16(pixelA, pixelB);
+ uint16x8_t tmp2 = vmulq_u16(pixelA, pixelB);
+ uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne);
+ return vsubq_u16(tmp1, tmp3);
+ }
+
+ static inline uint16x8_t darken(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB,
+ uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
+ {
+ uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA);
+ uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB);
+ uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne);
+ uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA);
+
+ uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB);
+ uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA);
+ uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne);
+ uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB);
+
+ return vminq_u16(tmp4, tmp8);
+ }
+
+ static inline uint16x8_t lighten(uint16x8_t pixelA, uint16x8_t pixelB, uint16x8_t alphaA, uint16x8_t alphaB,
+ uint16x8_t sixteenConst255, uint16x8_t sixteenConstOne)
+ {
+ uint16x8_t tmp1 = vsubq_u16(sixteenConst255, alphaA);
+ uint16x8_t tmp2 = vmulq_u16(tmp1, pixelB);
+ uint16x8_t tmp3 = div255(tmp2, sixteenConst255, sixteenConstOne);
+ uint16x8_t tmp4 = vaddq_u16(tmp3, pixelA);
+
+ uint16x8_t tmp5 = vsubq_u16(sixteenConst255, alphaB);
+ uint16x8_t tmp6 = vmulq_u16(tmp5, pixelA);
+ uint16x8_t tmp7 = div255(tmp6, sixteenConst255, sixteenConstOne);
+ uint16x8_t tmp8 = vaddq_u16(tmp7, pixelB);
+
+ return vmaxq_u16(tmp4, tmp8);
+ }
+};
+
+void FEBlend::platformApplyNEON(unsigned char* srcPixelArrayA, unsigned char* srcPixelArrayB, unsigned char* dstPixelArray,
+ unsigned colorArrayLength)
+{
+ uint8_t* sourcePixelA = reinterpret_cast<uint8_t*>(srcPixelArrayA);
+ uint8_t* sourcePixelB = reinterpret_cast<uint8_t*>(srcPixelArrayB);
+ uint8_t* destinationPixel = reinterpret_cast<uint8_t*>(dstPixelArray);
+
+ uint16x8_t sixteenConst255 = vdupq_n_u16(255);
+ uint16x8_t sixteenConstOne = vdupq_n_u16(1);
+
+ unsigned colorOffset = 0;
+ while (colorOffset < colorArrayLength) {
+ unsigned char alphaA1 = srcPixelArrayA[colorOffset + 3];
+ unsigned char alphaB1 = srcPixelArrayB[colorOffset + 3];
+ unsigned char alphaA2 = srcPixelArrayA[colorOffset + 7];
+ unsigned char alphaB2 = srcPixelArrayB[colorOffset + 7];
+
+ uint16x8_t doubblePixelA = vmovl_u8(vld1_u8(sourcePixelA + colorOffset));
+ uint16x8_t doubblePixelB = vmovl_u8(vld1_u8(sourcePixelB + colorOffset));
+ uint16x8_t alphaA = vcombine_u16(vdup_n_u16(alphaA1), vdup_n_u16(alphaA2));
+ uint16x8_t alphaB = vcombine_u16(vdup_n_u16(alphaB1), vdup_n_u16(alphaB2));
+
+ uint16x8_t result;
+ switch (m_mode) {
+ case FEBLEND_MODE_NORMAL:
+ result = FEBlendUtilitiesNEON::normal(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
+ break;
+ case FEBLEND_MODE_MULTIPLY:
+ result = FEBlendUtilitiesNEON::multiply(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
+ break;
+ case FEBLEND_MODE_SCREEN:
+ result = FEBlendUtilitiesNEON::screen(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
+ break;
+ case FEBLEND_MODE_DARKEN:
+ result = FEBlendUtilitiesNEON::darken(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
+ break;
+ case FEBLEND_MODE_LIGHTEN:
+ result = FEBlendUtilitiesNEON::lighten(doubblePixelA, doubblePixelB, alphaA, alphaB, sixteenConst255, sixteenConstOne);
+ break;
+ case FEBLEND_MODE_UNKNOWN:
+ default:
+ result = vdupq_n_u16(0);
+ break;
+ }
+
+ vst1_u8(destinationPixel + colorOffset, vmovn_u16(result));
+
+ unsigned char alphaR1 = 255 - ((255 - alphaA1) * (255 - alphaB1)) / 255;
+ unsigned char alphaR2 = 255 - ((255 - alphaA2) * (255 - alphaB2)) / 255;
+
+ dstPixelArray[colorOffset + 3] = alphaR1;
+ dstPixelArray[colorOffset + 7] = alphaR2;
+
+ colorOffset += 8;
+ if (colorOffset > colorArrayLength) {
+ ASSERT(colorOffset - 4 == colorArrayLength);
+ colorOffset = colorArrayLength - 8;
+ }
+ }
+}
+
+} // namespace WebCore
+
+#endif // ENABLE(FILTERS) && HAVE(ARM_NEON_INTRINSICS)
+
+#endif // FEBlendNEON_h
_______________________________________________ webkit-changes mailing list webkit-changes@lists.webkit.org http://lists.webkit.org/mailman/listinfo/webkit-changes