vcl/Library_vcl.mk                       |   16 ++++++
 vcl/inc/ScanlineTools.hxx                |   28 +++++++++++
 vcl/source/bitmap/ScanlineTools.cxx      |   30 ++++++++++++
 vcl/source/bitmap/ScanlineToolsAVX2.cxx  |   64 ++++++++++++++++++++++++++
 vcl/source/bitmap/ScanlineToolsSSE2.cxx  |   76 +++++++++++++++++++++++++++++++
 vcl/source/bitmap/ScanlineToolsSSSE3.cxx |   61 ++++++++++++++++++++++++
 6 files changed, 275 insertions(+)

New commits:
commit 81910410d22c060f9901b129697ea43a25cfbd99
Author: Tomaž Vajngerl <tomaz.vajng...@collabora.co.uk>
Date:   Thu Jul 13 22:02:19 2017 +0200

    scanline tools: convert RGBA <-> BGRA + vectorized fast paths
    
    This adds a tool to convert a scanline from RGBA color channel
    order to BGRA color channel order and back. It also includes the
    vectorized fast path to accelerate it with SSE2 (~1.7x faster),
    SSSE3 (~4x faster), AVX2 (~8x faster).
    
    Change-Id: Ic427eed15d3cef40f9ad87220fb6b71770673c92

diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index a0113a2e85d4..93981505a600 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -411,6 +411,22 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
     vcl/backendtest/outputdevice/rectangle \
 ))
 
+$(eval $(call gb_Library_add_cxxobjects,vcl,\
+    vcl/source/bitmap/ScanlineTools, $(gb_LinkTarget_EXCEPTIONFLAGS) \
+))
+
+$(eval $(call gb_Library_add_cxxobjects,vcl,\
+    vcl/source/bitmap/ScanlineToolsSSE2, $(gb_LinkTarget_EXCEPTIONFLAGS) 
$(INTRINSICS_CXXFLAGS)\
+))
+
+$(eval $(call gb_Library_add_cxxobjects,vcl,\
+    vcl/source/bitmap/ScanlineToolsSSSE3, $(gb_LinkTarget_EXCEPTIONFLAGS) 
$(INTRINSICS_CXXFLAGS)\
+))
+
+$(eval $(call gb_Library_add_cxxobjects,vcl,\
+    vcl/source/bitmap/ScanlineToolsAVX2, $(gb_LinkTarget_EXCEPTIONFLAGS) 
$(INTRINSICS_CXXFLAGS)\
+))
+
 $(eval $(call gb_Library_add_cobjects,vcl,\
     vcl/source/filter/jpeg/transupp \
 ))
diff --git a/vcl/inc/ScanlineTools.hxx b/vcl/inc/ScanlineTools.hxx
new file mode 100644
index 000000000000..898019903a15
--- /dev/null
+++ b/vcl/inc/ScanlineTools.hxx
@@ -0,0 +1,28 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#ifndef INCLUDED_VCL_INC_SCANLINETOOLS_HXX
+#define INCLUDED_VCL_INC_SCANLINETOOLS_HXX
+
+#include <vcl/dllapi.h>
+
+namespace vcl {
+namespace scanline {
+
+bool VCL_DLLPUBLIC swapABCDtoCBAD(sal_uInt8* pSource, sal_uInt8* pDestination, 
sal_Int32 nScanlineSize);
+bool VCL_DLLPUBLIC swapABCDtoCBAD_SSE2(sal_uInt8* pSource, sal_uInt8* 
pDestination, sal_Int32 nScanlineSize);
+bool VCL_DLLPUBLIC swapABCDtoCBAD_SSSE3(sal_uInt8* pSource, sal_uInt8* 
pDestination, sal_Int32 nScanlineSize);
+bool VCL_DLLPUBLIC swapABCDtoCBAD_AVX2(sal_uInt8* pSource, sal_uInt8* 
pDestination, sal_Int32 nScanlineSize);
+
+}} // end vcl::scanline
+
+#endif // INCLUDED_VCL_INC_BITMAPSYMMETRYCHECK_HXX
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/bitmap/ScanlineTools.cxx 
b/vcl/source/bitmap/ScanlineTools.cxx
new file mode 100644
index 000000000000..ceefd0f2c30f
--- /dev/null
+++ b/vcl/source/bitmap/ScanlineTools.cxx
@@ -0,0 +1,30 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include "ScanlineTools.hxx"
+
+namespace vcl {
+namespace scanline {
+
+bool swapABCDtoCBAD(sal_uInt8* pSource, sal_uInt8* pDestination, sal_Int32 
nScanlineSize)
+{
+    for (sal_Int32 i = 0; i < nScanlineSize; i += 4, pSource += 4, 
pDestination += 4)
+    {
+        pDestination[0] = pSource[2];
+        pDestination[1] = pSource[1];
+        pDestination[2] = pSource[0];
+        pDestination[3] = pSource[3];
+    }
+    return true;
+}
+
+}} // end vcl::scanline
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/bitmap/ScanlineToolsAVX2.cxx 
b/vcl/source/bitmap/ScanlineToolsAVX2.cxx
new file mode 100644
index 000000000000..1ef386f8256b
--- /dev/null
+++ b/vcl/source/bitmap/ScanlineToolsAVX2.cxx
@@ -0,0 +1,64 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include "ScanlineTools.hxx"
+
+#include <tools/simdsupport.hxx>
+
+#if defined(LO_AVX2_AVAILABLE)
+#include <immintrin.h>
+#endif
+
+namespace vcl {
+namespace scanline {
+
+#if defined(LO_AVX2_AVAILABLE)
+bool swapABCDtoCBAD_AVX2(sal_uInt8* pSource, sal_uInt8* pDestination, 
sal_Int32 nScanlineSize)
+{
+    __m256i aShuffleMask = _mm256_set_epi8(31, 28, 29, 30, 27, 24, 25, 26,
+                                           23, 20, 21, 22, 19, 16, 17, 18,
+                                           15, 12, 13, 14, 11,  8,  9, 10,
+                                            7,  4,  5,  6,  3,  0,  1,  2);
+
+    sal_Int32 nBlocks = nScanlineSize / 32;
+
+    if (nBlocks > 0)
+    {
+        __m256i* pSource256      = reinterpret_cast<__m256i*>(pSource);
+        __m256i* pDestination256 = reinterpret_cast<__m256i*>(pDestination);
+
+        for (sal_Int32 x = 0; x < nBlocks; ++x, ++pDestination256, 
++pSource256)
+        {
+            _mm256_storeu_si256(pDestination256, 
_mm256_shuffle_epi8(_mm256_loadu_si256(pSource256), aShuffleMask));
+        }
+    }
+
+    pSource += nBlocks * 32;
+    pDestination += nBlocks * 32;
+
+    for (sal_Int32 i = nBlocks * 32; i < nScanlineSize; i += 4, pSource += 4, 
pDestination += 4)
+    {
+        pDestination[0] = pSource[2];
+        pDestination[1] = pSource[1];
+        pDestination[2] = pSource[0];
+        pDestination[3] = pSource[3];
+    }
+    return true;
+}
+#else
+bool swapABCDtoCBAD_AVX2(sal_uInt8* pSource, sal_uInt8* pDestination, 
sal_Int32 nScanlineSize)
+{
+    return false;
+}
+#endif
+
+}} // end vcl::scanline
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/bitmap/ScanlineToolsSSE2.cxx 
b/vcl/source/bitmap/ScanlineToolsSSE2.cxx
new file mode 100644
index 000000000000..1f52fb6c5ec7
--- /dev/null
+++ b/vcl/source/bitmap/ScanlineToolsSSE2.cxx
@@ -0,0 +1,76 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include "ScanlineTools.hxx"
+
+#include <tools/simdsupport.hxx>
+
+#if defined(LO_SSE2_AVAILABLE)
+#include <emmintrin.h>
+#endif
+
+namespace vcl {
+namespace scanline {
+
+#if defined(LO_SSE2_AVAILABLE)
+bool swapABCDtoCBAD_SSE2(sal_uInt8* pSource, sal_uInt8* pDestination, 
sal_Int32 nScanlineSize)
+{
+
+    sal_Int32 nBlocks = nScanlineSize / 16;
+
+    if (nBlocks > 0)
+    {
+        __m128i* pSource128      = reinterpret_cast<__m128i*>(pSource);
+        __m128i* pDestination128 = reinterpret_cast<__m128i*>(pDestination);
+
+        __m128i agmask = _mm_set1_epi32(0xFF00FF00);
+
+        for (sal_Int32 x = 0; x < nBlocks; ++x, ++pDestination128, 
++pSource128)
+        {
+            // RGBA RGBA RGBA RGBA
+            __m128i rgba = _mm_loadu_si128(pSource128);
+
+            // 0G0A 0G0A 0G0A 0G0A
+            __m128i ag = _mm_and_si128(agmask, rgba);
+            // R0B0 R0B0 R0B0 R0B0
+            __m128i rb = _mm_andnot_si128(agmask, rgba);
+
+            // Swap R and B
+            // B0R0 B0R0 B0R0 B0R0
+            __m128i br = _mm_shufflehi_epi16(_mm_shufflelo_epi16(rb, 
_MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
+
+            //     B0R0 B0R0 B0R0 B0R0
+            // or  0G0A 0G0A 0G0A 0G0A
+            _mm_storeu_si128(pDestination128, _mm_or_si128(ag, br));
+        }
+    }
+
+    pSource += nBlocks * 16;
+    pDestination += nBlocks * 16;
+
+    for (sal_Int32 i = nBlocks * 16; i < nScanlineSize; i += 4, pSource += 4, 
pDestination += 4)
+    {
+        pDestination[0] = pSource[2];
+        pDestination[1] = pSource[1];
+        pDestination[2] = pSource[0];
+        pDestination[3] = pSource[3];
+    }
+    return true;
+}
+#else
+bool swapABCDtoCBAD_SSE2(sal_uInt8* pSource, sal_uInt8* pDestination, 
sal_Int32 nScanlineSize)
+{
+    return false;
+}
+#endif
+
+}} // end vcl::scanline
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/bitmap/ScanlineToolsSSSE3.cxx 
b/vcl/source/bitmap/ScanlineToolsSSSE3.cxx
new file mode 100644
index 000000000000..bced320b4809
--- /dev/null
+++ b/vcl/source/bitmap/ScanlineToolsSSSE3.cxx
@@ -0,0 +1,61 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ */
+
+#include "ScanlineTools.hxx"
+
+#include <tools/simdsupport.hxx>
+
+#if defined(LO_SSSE3_AVAILABLE)
+#include <tmmintrin.h>
+#endif
+
+namespace vcl {
+namespace scanline {
+
+#if defined(LO_SSSE3_AVAILABLE)
+bool swapABCDtoCBAD_SSSE3(sal_uInt8* pSource, sal_uInt8* pDestination, 
sal_Int32 nScanlineSize)
+{
+    __m128i aShuffleMask = _mm_set_epi8(15, 12, 13, 14, 11, 8, 9, 10, 7, 4, 5, 
6, 3, 0, 1, 2);
+
+    sal_Int32 nBlocks = nScanlineSize / 16;
+
+    if (nBlocks > 0)
+    {
+        __m128i* pSource128      = reinterpret_cast<__m128i*>(pSource);
+        __m128i* pDestination128 = reinterpret_cast<__m128i*>(pDestination);
+
+        for (sal_Int32 x = 0; x < nBlocks; ++x, ++pDestination128, 
++pSource128)
+        {
+            // _mm_lddqu_si128 - faster unaligned load with ssse3
+            _mm_storeu_si128(pDestination128, 
_mm_shuffle_epi8(_mm_lddqu_si128(pSource128), aShuffleMask));
+        }
+    }
+
+    pSource += nBlocks * 16;
+    pDestination += nBlocks * 16;
+
+    for (sal_Int32 i = nBlocks * 16; i < nScanlineSize; i += 4, pSource += 4, 
pDestination += 4)
+    {
+        pDestination[0] = pSource[2];
+        pDestination[1] = pSource[1];
+        pDestination[2] = pSource[0];
+        pDestination[3] = pSource[3];
+    }
+    return true;
+}
+#else
+bool swapABCDtoCBAD_SSSE3(sal_uInt8* pSource, sal_uInt8* pDestination, 
sal_Int32 nScanlineSize)
+{
+    return false;
+}
+#endif
+}} // end vcl::scanline
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
_______________________________________________
Libreoffice-commits mailing list
libreoffice-comm...@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits

Reply via email to