Sorry for the attachment. git send-email is giving me an unusual error when
i try to send this.
>From 9e8ec4d51566cdda677b15e50240e8842ec6cd34 Mon Sep 17 00:00:00 2001
From: James Almer
Date: Mon, 28 Sep 2015 00:58:01 -0300
Subject: [PATCH] x86/hevc_sao: move 10/12bit functions into a separate file
Signed-off-by: James Almer
---
There's a bit of code duplication now (init functions), but it's cleaner
and should hopefully be easier to read.
libavcodec/x86/Makefile | 3 +-
libavcodec/x86/hevc_sao.asm | 394 +-
libavcodec/x86/hevc_sao_10bit.asm | 433 ++
3 files changed, 490 insertions(+), 340 deletions(-)
create mode 100644 libavcodec/x86/hevc_sao_10bit.asm
diff --git a/libavcodec/x86/Makefile b/libavcodec/x86/Makefile
index b3cfb0b..febaccd 100644
--- a/libavcodec/x86/Makefile
+++ b/libavcodec/x86/Makefile
@@ -140,7 +140,8 @@ YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_mc.o \
x86/hevc_deblock.o\
x86/hevc_idct.o \
x86/hevc_res_add.o\
- x86/hevc_sao.o
+ x86/hevc_sao.o\
+ x86/hevc_sao_10bit.o
YASM-OBJS-$(CONFIG_JPEG2000_DECODER) += x86/jpeg2000dsp.o
YASM-OBJS-$(CONFIG_MLP_DECODER)+= x86/mlpdsp.o
YASM-OBJS-$(CONFIG_MPEG4_DECODER) += x86/xvididct.o
diff --git a/libavcodec/x86/hevc_sao.asm b/libavcodec/x86/hevc_sao.asm
index fa45a24..888a28a 100644
--- a/libavcodec/x86/hevc_sao.asm
+++ b/libavcodec/x86/hevc_sao.asm
@@ -1,5 +1,5 @@
;**
-;* SIMD optimized SAO functions for HEVC decoding
+;* SIMD optimized SAO functions for HEVC 8bit decoding
;*
;* Copyright (c) 2013 Pierre-Edouard LEPERE
;* Copyright (c) 2014 James Almer
@@ -25,27 +25,18 @@
SECTION_RODATA 32
-pw_mask10: times 16 dw 0x03FF
-pw_mask12: times 16 dw 0x0FFF
-pw_m2: times 16 dw -2
pb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
pb_eo: db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1
-cextern pw_m1
-cextern pw_1
-cextern pw_2
cextern pb_1
cextern pb_2
SECTION .text
-%define MAX_PB_SIZE 64
-%define PADDING_SIZE 32 ; AV_INPUT_BUFFER_PADDING_SIZE
-
;**
;SAO Band Filter
;**
-%macro HEVC_SAO_BAND_FILTER_INIT 1
+%macro HEVC_SAO_BAND_FILTER_INIT 0
andleftq, 31
movd xm0, leftd
addleftq, 1
@@ -76,9 +67,6 @@ SECTION .text
%endif
%if ARCH_X86_64
-%if %1 > 8
-mova m13, [pw_mask %+ %1]
-%endif
pxor m14, m14
%else ; ARCH_X86_32
@@ -90,9 +78,6 @@ SECTION .text
mova [rsp+mmsize*5], m5
mova [rsp+mmsize*6], m6
pxor m0, m0
-%if %1 > 8
-mova m1, [pw_mask %+ %1]
-%endif
%assign MMSIZE mmsize
%define m14 m0
%define m13 m1
@@ -103,49 +88,49 @@ DEFINE_ARGS dst, src, dststride, srcstride, offset, height
mov heightd, r7m
%endmacro
-%macro HEVC_SAO_BAND_FILTER_COMPUTE 3
-psraw %2, %3, %1-5
+%macro HEVC_SAO_BAND_FILTER_COMPUTE 2
+psraw %1, %2, 3
%if ARCH_X86_64
-pcmpeqw m10, %2, m0
-pcmpeqw m11, %2, m1
-pcmpeqw m12, %2, m2
-pcmpeqw %2, m3
+pcmpeqw m10, %1, m0
+pcmpeqw m11, %1, m1
+pcmpeqw m12, %1, m2
+pcmpeqw %1, m3
pand m10, m4
pand m11, m5
pand m12, m6
-pand %2, m7
+pand %1, m7
por m10, m11
-por m12, %2
+por m12, %1
por m10, m12
-paddw %3, m10
+paddw %2, m10
%else ; ARCH_X86_32
-pcmpeqw m4, %2, [rsp+MMSIZE*0]
-pcmpeqw m5, %2, [rsp+MMSIZE*1]
-pcmpeqw m6, %2, [rsp+MMSIZE*2]
-pcmpeqw %2, [rsp+MMSIZE*3]
+pcmpeqw m4, %1, [rsp+MMSIZE*0]
+pcmpeqw m5, %1, [rsp+MMSIZE*1]
+pcmpeqw m6, %1, [rsp+MMSIZE*2]
+pcmpeqw %1, [rsp+MMSIZE*3]
pand m4, [rsp+MMSIZE*4]
pand m5, [rsp+MMSIZE*5]
pand m6, [rsp+MMSIZE*6]
-pand %2, m7
+pand %1, m7
por m4, m5
-por m6, %2
+por m6, %1
por m4, m6
-paddw %3, m4
+paddw %2, m4
%endif ; ARCH
%endmacro
;void ff_hevc_sao_band_filter__