Hi,

On 28/03/2016 21:19, Matthieu Bouron wrote:
---
  libswscale/arm/yuv2rgb_neon.S | 88 +++++++++++++++++--------------------------
  1 file changed, 34 insertions(+), 54 deletions(-)

diff --git a/libswscale/arm/yuv2rgb_neon.S b/libswscale/arm/yuv2rgb_neon.S
index 124d7d3..6b911c8 100644
--- a/libswscale/arm/yuv2rgb_neon.S
+++ b/libswscale/arm/yuv2rgb_neon.S

[...]

@@ -94,25 +67,29 @@
  .ifc \ofmt,bgra
      compute_rgba        d8, d7, d6, d9, d12, d11, d10, d13
  .endif
+
+    vzip.8              d6, d10
+    vzip.8              d7, d11
+    vzip.8              d8, d12
+    vzip.8              d9, d13

Adding a comment to explain the resulting interleaving would be nice

      vst4.8              {q3, q4}, [\dst,:128]!
      vst4.8              {q5, q6}, [\dst,:128]!
-
  .endm
.macro process_1l ofmt
-    compute_premult     d28, d29, d30, d31
-    vld1.8              {q7}, [r4]!
-    compute             r2, d14, d15, \ofmt
+    compute_premult
+    vld2.8              {d14, d15}, [r4]!
+    compute             r2, \ofmt
  .endm
.macro process_2l ofmt
-    compute_premult     d28, d29, d30, d31
+    compute_premult
- vld1.8 {q7}, [r4]! @ first line of luma
-    compute             r2, d14, d15, \ofmt
+    vld2.8              {d14, d15}, [r4]!                              @ q7 = 
Y (interleaved)
+    compute             r2, \ofmt
- vld1.8 {q7}, [r12]! @ second line of luma
-    compute             r11, d14, d15, \ofmt
+    vld2.8              {d14, d15}, [r12]!                             @ q7 = 
Y (interleaved)
+    compute             r11, \ofmt
  .endm

What about adding a level of macro here? Something like:
.macro process_1l_internal ofmt src_addr res
    compute_premult
    vld2.8            {d14, d15}, [\src_addr]!
    compute        \res, \ofmt
.endm

(again, the naming could be changed, according to your own taste :-) )

This way, we would get:
.macro process_1l ofmt
    process_1l_internal \ofmt, r4, r2
.endm

.macro process_2l ofmt
    process_1l_internal \ofmt, r4,  r2
    process_1l_internal \ofmt, r12, r11
.endm

--
Ben

_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

Reply via email to