This avoids SIMD-optimized functions having to sign-extend their line size argument manually to be able to do pointer arithmetic. --- libavcodec/aarch64/h264dsp_init_aarch64.c | 37 +++++++++++------------ libavcodec/arm/h264dsp_init_arm.c | 37 +++++++++++------------ libavcodec/h264_mb.c | 6 ++-- libavcodec/h264_mb_template.c | 2 +- libavcodec/h264dsp.h | 49 ++++++++++++++++--------------- libavcodec/h264dsp_template.c | 36 +++++++++++------------ libavcodec/ppc/h264dsp.c | 31 +++++++++---------- libavcodec/x86/h264_deblock.asm | 20 ++++++------- libavcodec/x86/h264_deblock_10bit.asm | 14 ++++----- libavcodec/x86/h264_idct.asm | 48 +++++++++++++++--------------- libavcodec/x86/h264_idct_10bit.asm | 18 ++++++------ libavcodec/x86/h264_weight.asm | 4 +-- libavcodec/x86/h264_weight_10bit.asm | 4 +-- libavcodec/x86/h264dsp_init.c | 22 +++++++------- tests/checkasm/h264dsp.c | 4 +-- 15 files changed, 168 insertions(+), 164 deletions(-)
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c index b106f11..25acf00 100644 --- a/libavcodec/aarch64/h264dsp_init_aarch64.c +++ b/libavcodec/aarch64/h264dsp_init_aarch64.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <stddef.h> #include <stdint.h> #include "libavutil/attributes.h" @@ -25,48 +26,48 @@ #include "libavutil/aarch64/cpu.h" #include "libavcodec/h264dsp.h" -void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, +void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); -void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, +void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); -void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, +void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); -void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, +void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); -void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height, +void ff_weight_h264_pixels_16_neon(uint8_t *dst, ptrdiff_t stride, int height, int log2_den, int weight, int offset); -void ff_weight_h264_pixels_8_neon(uint8_t *dst, int stride, int height, +void ff_weight_h264_pixels_8_neon(uint8_t *dst, ptrdiff_t stride, int height, int log2_den, int weight, int offset); -void ff_weight_h264_pixels_4_neon(uint8_t *dst, int stride, int height, +void ff_weight_h264_pixels_4_neon(uint8_t *dst, ptrdiff_t stride, int height, int log2_den, int weight, int offset); -void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, int stride, +void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_den, int weightd, int weights, int offset); -void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, int stride, +void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_den, int weightd, int weights, int offset); -void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride, +void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_den, int weightd, int weights, int offset); -void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, int stride); -void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, int stride); +void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, ptrdiff_t stride); +void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, ptrdiff_t stride); void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset, - int16_t *block, int stride, + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[6*8]); void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset, - int16_t *block, int stride, + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[6*8]); void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset, - int16_t *block, int stride, + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[6*8]); -void ff_h264_idct8_add_neon(uint8_t *dst, int16_t *block, int stride); -void ff_h264_idct8_dc_add_neon(uint8_t *dst, int16_t *block, int stride); +void ff_h264_idct8_add_neon(uint8_t *dst, int16_t *block, ptrdiff_t stride); +void ff_h264_idct8_dc_add_neon(uint8_t *dst, int16_t *block, ptrdiff_t stride); void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset, - int16_t *block, int stride, + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[6*8]); av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth, diff --git a/libavcodec/arm/h264dsp_init_arm.c b/libavcodec/arm/h264dsp_init_arm.c index 7afd350..ea0f643 100644 --- a/libavcodec/arm/h264dsp_init_arm.c +++ b/libavcodec/arm/h264dsp_init_arm.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include <stddef.h> #include <stdint.h> #include "libavutil/attributes.h" @@ -25,48 +26,48 @@ #include "libavcodec/h264dsp.h" #include "libavcodec/arm/startcode.h" -void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, +void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); -void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, +void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); -void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, +void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); -void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, +void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); -void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height, +void ff_weight_h264_pixels_16_neon(uint8_t *dst, ptrdiff_t stride, int height, int log2_den, int weight, int offset); -void ff_weight_h264_pixels_8_neon(uint8_t *dst, int stride, int height, +void ff_weight_h264_pixels_8_neon(uint8_t *dst, ptrdiff_t stride, int height, int log2_den, int weight, int offset); -void ff_weight_h264_pixels_4_neon(uint8_t *dst, int stride, int height, +void ff_weight_h264_pixels_4_neon(uint8_t *dst, ptrdiff_t stride, int height, int log2_den, int weight, int offset); -void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, int stride, +void ff_biweight_h264_pixels_16_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_den, int weightd, int weights, int offset); -void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, int stride, +void ff_biweight_h264_pixels_8_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_den, int weightd, int weights, int offset); -void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, int stride, +void ff_biweight_h264_pixels_4_neon(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_den, int weightd, int weights, int offset); -void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, int stride); -void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, int stride); +void ff_h264_idct_add_neon(uint8_t *dst, int16_t *block, ptrdiff_t stride); +void ff_h264_idct_dc_add_neon(uint8_t *dst, int16_t *block, ptrdiff_t stride); void ff_h264_idct_add16_neon(uint8_t *dst, const int *block_offset, - int16_t *block, int stride, + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[6*8]); void ff_h264_idct_add16intra_neon(uint8_t *dst, const int *block_offset, - int16_t *block, int stride, + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[6*8]); void ff_h264_idct_add8_neon(uint8_t **dest, const int *block_offset, - int16_t *block, int stride, + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[6*8]); -void ff_h264_idct8_add_neon(uint8_t *dst, int16_t *block, int stride); -void ff_h264_idct8_dc_add_neon(uint8_t *dst, int16_t *block, int stride); +void ff_h264_idct8_add_neon(uint8_t *dst, int16_t *block, ptrdiff_t stride); +void ff_h264_idct8_dc_add_neon(uint8_t *dst, int16_t *block, ptrdiff_t stride); void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset, - int16_t *block, int stride, + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[6*8]); static av_cold void h264dsp_init_neon(H264DSPContext *c, const int bit_depth, diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c index 6f3c719..a0b5bbc 100644 --- a/libavcodec/h264_mb.c +++ b/libavcodec/h264_mb.c @@ -617,8 +617,8 @@ static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, int linesize, uint8_t *dest_y, int p) { - void (*idct_add)(uint8_t *dst, int16_t *block, int stride); - void (*idct_dc_add)(uint8_t *dst, int16_t *block, int stride); + void (*idct_add)(uint8_t *dst, int16_t *block, ptrdiff_t stride); + void (*idct_dc_add)(uint8_t *dst, int16_t *block, ptrdiff_t stride); int i; int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1]; block_offset += 16 * p; @@ -725,7 +725,7 @@ static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264Sl int linesize, uint8_t *dest_y, int p) { - void (*idct_add)(uint8_t *dst, int16_t *block, int stride); + void (*idct_add)(uint8_t *dst, int16_t *block, ptrdiff_t stride); int i; block_offset += 16 * p; if (!IS_INTRA4x4(mb_type)) { diff --git a/libavcodec/h264_mb_template.c b/libavcodec/h264_mb_template.c index 1f583df..28d075b 100644 --- a/libavcodec/h264_mb_template.c +++ b/libavcodec/h264_mb_template.c @@ -49,7 +49,7 @@ static av_noinline void FUNC(hl_decode_mb)(const H264Context *h, H264SliceContex int i, j; const int *block_offset = &h->block_offset[0]; const int transform_bypass = !SIMPLE && (sl->qscale == 0 && h->ps.sps->transform_bypass); - void (*idct_add)(uint8_t *dst, int16_t *block, int stride); + void (*idct_add)(uint8_t *dst, int16_t *block, ptrdiff_t stride); const int block_h = 16 >> h->chroma_y_shift; const int chroma422 = CHROMA422(h); diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h index 3a5b25b..0316eae 100644 --- a/libavcodec/h264dsp.h +++ b/libavcodec/h264dsp.h @@ -27,12 +27,13 @@ #ifndef AVCODEC_H264DSP_H #define AVCODEC_H264DSP_H +#include <stddef.h> #include <stdint.h> -typedef void (*h264_weight_func)(uint8_t *block, int stride, int height, +typedef void (*h264_weight_func)(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset); typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, - int stride, int height, int log2_denom, + ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset); /** @@ -44,32 +45,32 @@ typedef struct H264DSPContext { h264_biweight_func biweight_h264_pixels_tab[4]; /* loop filter */ - void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, int stride, + void (*h264_v_loop_filter_luma)(uint8_t *pix /*align 16*/, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, int stride, + void (*h264_h_loop_filter_luma)(uint8_t *pix /*align 4 */, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, int stride, + void (*h264_h_loop_filter_luma_mbaff)(uint8_t *pix /*align 16*/, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); /* v/h_loop_filter_luma_intra: align 16 */ - void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, + void (*h264_v_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta); - void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, + void (*h264_h_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta); void (*h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix /*align 16*/, - int stride, int alpha, int beta); - void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, int stride, + ptrdiff_t stride, int alpha, int beta); + void (*h264_v_loop_filter_chroma)(uint8_t *pix /*align 8*/, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); - void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, int stride, + void (*h264_h_loop_filter_chroma)(uint8_t *pix /*align 4*/, ptrdiff_t stride, int alpha, int beta, int8_t *tc0); void (*h264_h_loop_filter_chroma_mbaff)(uint8_t *pix /*align 8*/, - int stride, int alpha, int beta, + ptrdiff_t stride, int alpha, int beta, int8_t *tc0); void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/, - int stride, int alpha, int beta); + ptrdiff_t stride, int alpha, int beta); void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix /*align 8*/, - int stride, int alpha, int beta); + ptrdiff_t stride, int alpha, int beta); void (*h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix /*align 8*/, - int stride, int alpha, int beta); + ptrdiff_t stride, int alpha, int beta); // h264_loop_filter_strength: simd only. the C version is inlined in h264_loopfilter.c void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], @@ -78,33 +79,33 @@ typedef struct H264DSPContext { /* IDCT */ void (*h264_idct_add)(uint8_t *dst /*align 4*/, - int16_t *block /*align 16*/, int stride); + int16_t *block /*align 16*/, ptrdiff_t stride); void (*h264_idct8_add)(uint8_t *dst /*align 8*/, - int16_t *block /*align 16*/, int stride); + int16_t *block /*align 16*/, ptrdiff_t stride); void (*h264_idct_dc_add)(uint8_t *dst /*align 4*/, - int16_t *block /*align 16*/, int stride); + int16_t *block /*align 16*/, ptrdiff_t stride); void (*h264_idct8_dc_add)(uint8_t *dst /*align 8*/, - int16_t *block /*align 16*/, int stride); + int16_t *block /*align 16*/, ptrdiff_t stride); void (*h264_idct_add16)(uint8_t *dst /*align 16*/, const int *blockoffset, - int16_t *block /*align 16*/, int stride, + int16_t *block /*align 16*/, ptrdiff_t stride, const uint8_t nnzc[15 * 8]); void (*h264_idct8_add4)(uint8_t *dst /*align 16*/, const int *blockoffset, - int16_t *block /*align 16*/, int stride, + int16_t *block /*align 16*/, ptrdiff_t stride, const uint8_t nnzc[15 * 8]); void (*h264_idct_add8)(uint8_t **dst /*align 16*/, const int *blockoffset, - int16_t *block /*align 16*/, int stride, + int16_t *block /*align 16*/, ptrdiff_t stride, const uint8_t nnzc[15 * 8]); void (*h264_idct_add16intra)(uint8_t *dst /*align 16*/, const int *blockoffset, int16_t *block /*align 16*/, - int stride, const uint8_t nnzc[15 * 8]); + ptrdiff_t stride, const uint8_t nnzc[15 * 8]); void (*h264_luma_dc_dequant_idct)(int16_t *output, int16_t *input /*align 16*/, int qmul); void (*h264_chroma_dc_dequant_idct)(int16_t *block, int qmul); /* bypass-transform */ - void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, int stride); - void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, int stride); + void (*h264_add_pixels8_clear)(uint8_t *dst, int16_t *block, ptrdiff_t stride); + void (*h264_add_pixels4_clear)(uint8_t *dst, int16_t *block, ptrdiff_t stride); /** * Search buf from the start for up to size bytes. Return the index diff --git a/libavcodec/h264dsp_template.c b/libavcodec/h264dsp_template.c index c2d1394..ed09f6f 100644 --- a/libavcodec/h264dsp_template.c +++ b/libavcodec/h264dsp_template.c @@ -30,7 +30,7 @@ #define op_scale1(x) block[x] = av_clip_pixel( (block[x]*weight + offset) >> log2_denom ) #define op_scale2(x) dst[x] = av_clip_pixel( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1)) #define H264_WEIGHT(W) \ -static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, int stride, int height, \ +static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, ptrdiff_t stride, int height, \ int log2_denom, int weight, int offset) \ { \ int y; \ @@ -60,7 +60,7 @@ static void FUNCC(weight_h264_pixels ## W)(uint8_t *_block, int stride, int heig op_scale1(15); \ } \ } \ -static void FUNCC(biweight_h264_pixels ## W)(uint8_t *_dst, uint8_t *_src, int stride, int height, \ +static void FUNCC(biweight_h264_pixels ## W)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride, int height, \ int log2_denom, int weightd, int weights, int offset) \ { \ int y; \ @@ -149,15 +149,15 @@ static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma)(uint8_t *_p } } } -static void FUNCC(h264_v_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +static void FUNCC(h264_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) { FUNCC(h264_loop_filter_luma)(pix, stride, sizeof(pixel), 4, alpha, beta, tc0); } -static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +static void FUNCC(h264_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) { FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); } -static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +static void FUNCC(h264_h_loop_filter_luma_mbaff)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) { FUNCC(h264_loop_filter_luma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); } @@ -215,15 +215,15 @@ static av_always_inline av_flatten void FUNCC(h264_loop_filter_luma_intra)(uint8 pix += ystride; } } -static void FUNCC(h264_v_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta) +static void FUNCC(h264_v_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) { FUNCC(h264_loop_filter_luma_intra)(pix, stride, sizeof(pixel), 4, alpha, beta); } -static void FUNCC(h264_h_loop_filter_luma_intra)(uint8_t *pix, int stride, int alpha, int beta) +static void FUNCC(h264_h_loop_filter_luma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) { FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); } -static void FUNCC(h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) +static void FUNCC(h264_h_loop_filter_luma_mbaff_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) { FUNCC(h264_loop_filter_luma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); } @@ -261,23 +261,23 @@ static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma)(uint8_t * } } } -static void FUNCC(h264_v_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +static void FUNCC(h264_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) { FUNCC(h264_loop_filter_chroma)(pix, stride, sizeof(pixel), 2, alpha, beta, tc0); } -static void FUNCC(h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +static void FUNCC(h264_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) { FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); } -static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) { FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); } -static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) { FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); } -static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) +static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) { FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); } @@ -306,23 +306,23 @@ static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uin pix += ystride; } } -static void FUNCC(h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta) +static void FUNCC(h264_v_loop_filter_chroma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) { FUNCC(h264_loop_filter_chroma_intra)(pix, stride, sizeof(pixel), 2, alpha, beta); } -static void FUNCC(h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta) +static void FUNCC(h264_h_loop_filter_chroma_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) { FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); } -static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) +static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) { FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); } -static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, int stride, int alpha, int beta) +static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) { FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); } -static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) +static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) { FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); } diff --git a/libavcodec/ppc/h264dsp.c b/libavcodec/ppc/h264dsp.c index ce514e3..0a1ca78 100644 --- a/libavcodec/ppc/h264dsp.c +++ b/libavcodec/ppc/h264dsp.c @@ -20,6 +20,7 @@ #include "config.h" +#include <stddef.h> #include <stdint.h> #include <string.h> @@ -77,7 +78,7 @@ va_u32 = vec_splat((vec_u32)va_u8, 0); \ vec_ste(va_u32, element, (uint32_t*)dst); -static void h264_idct_add_altivec(uint8_t *dst, int16_t *block, int stride) +static void h264_idct_add_altivec(uint8_t *dst, int16_t *block, ptrdiff_t stride) { vec_s16 va0, va1, va2, va3; vec_s16 vz0, vz1, vz2, vz3; @@ -192,7 +193,7 @@ static void h264_idct_add_altivec(uint8_t *dst, int16_t *block, int stride) vec_st( hv, 0, dest ); \ } -static void h264_idct8_add_altivec(uint8_t *dst, int16_t *dct, int stride) +static void h264_idct8_add_altivec(uint8_t *dst, int16_t *dct, ptrdiff_t stride) { vec_s16 s0, s1, s2, s3, s4, s5, s6, s7; vec_s16 d0, d1, d2, d3, d4, d5, d6, d7; @@ -238,7 +239,7 @@ static void h264_idct8_add_altivec(uint8_t *dst, int16_t *dct, int stride) ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel); } -static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *block, int stride, int size) +static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *block, ptrdiff_t stride, int size) { vec_s16 dc16; vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner; @@ -284,18 +285,18 @@ static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *bl } } -static void h264_idct_dc_add_altivec(uint8_t *dst, int16_t *block, int stride) +static void h264_idct_dc_add_altivec(uint8_t *dst, int16_t *block, ptrdiff_t stride) { h264_idct_dc_add_internal(dst, block, stride, 4); } -static void h264_idct8_dc_add_altivec(uint8_t *dst, int16_t *block, int stride) +static void h264_idct8_dc_add_altivec(uint8_t *dst, int16_t *block, ptrdiff_t stride) { h264_idct_dc_add_internal(dst, block, stride, 8); } static void h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, - int16_t *block, int stride, + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[15 * 8]) { int i; @@ -309,7 +310,7 @@ static void h264_idct_add16_altivec(uint8_t *dst, const int *block_offset, } static void h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset, - int16_t *block, int stride, + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[15 * 8]) { int i; @@ -320,7 +321,7 @@ static void h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset, } static void h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, - int16_t *block, int stride, + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[15 * 8]) { int i; @@ -334,7 +335,7 @@ static void h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset, } static void h264_idct_add8_altivec(uint8_t **dest, const int *block_offset, - int16_t *block, int stride, + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[15 * 8]) { int i, j; @@ -593,7 +594,7 @@ static inline vec_u8 h264_deblock_q1(register vec_u8 p0, q1 = newq1; \ } -static void h264_v_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { +static void h264_v_loop_filter_luma_altivec(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) { if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) >= 0) { register vec_u8 p2 = vec_ld(-3*stride, pix); @@ -610,7 +611,7 @@ static void h264_v_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, } } -static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) { +static void h264_h_loop_filter_luma_altivec(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) { register vec_u8 line0, line1, line2, line3, line4, line5; if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) < 0) @@ -622,7 +623,7 @@ static void h264_h_loop_filter_luma_altivec(uint8_t *pix, int stride, int alpha, } static av_always_inline -void weight_h264_W_altivec(uint8_t *block, int stride, int height, +void weight_h264_W_altivec(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset, int w) { int y, aligned; @@ -668,7 +669,7 @@ void weight_h264_W_altivec(uint8_t *block, int stride, int height, } static av_always_inline -void biweight_h264_W_altivec(uint8_t *dst, uint8_t *src, int stride, int height, +void biweight_h264_W_altivec(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset, int w) { int y, dst_aligned, src_aligned; @@ -733,12 +734,12 @@ void biweight_h264_W_altivec(uint8_t *dst, uint8_t *src, int stride, int height, } #define H264_WEIGHT(W) \ -static void weight_h264_pixels ## W ## _altivec(uint8_t *block, int stride, int height, \ +static void weight_h264_pixels ## W ## _altivec(uint8_t *block, ptrdiff_t stride, int height, \ int log2_denom, int weight, int offset) \ { \ weight_h264_W_altivec(block, stride, height, log2_denom, weight, offset, W); \ }\ -static void biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, int stride, int height, \ +static void biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, \ int log2_denom, int weightd, int weights, int offset) \ { \ biweight_h264_W_altivec(dst, src, stride, height, log2_denom, weightd, weights, offset, W); \ diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm index 33fd5a9..7c476b8 100644 --- a/libavcodec/x86/h264_deblock.asm +++ b/libavcodec/x86/h264_deblock.asm @@ -283,7 +283,7 @@ cextern pb_3 %if ARCH_X86_64 ;----------------------------------------------------------------------------- -; void ff_deblock_v_luma(uint8_t *pix, int stride, int alpha, int beta, +; void ff_deblock_v_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, ; int8_t *tc0) ;----------------------------------------------------------------------------- %macro DEBLOCK_LUMA 0 @@ -329,7 +329,7 @@ cglobal deblock_v_luma_8, 5,5,10 RET ;----------------------------------------------------------------------------- -; void ff_deblock_h_luma(uint8_t *pix, int stride, int alpha, int beta, +; void ff_deblock_h_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, ; int8_t *tc0) ;----------------------------------------------------------------------------- INIT_MMX cpuname @@ -391,7 +391,7 @@ DEBLOCK_LUMA %macro DEBLOCK_LUMA 2 ;----------------------------------------------------------------------------- -; void ff_deblock_v8_luma(uint8_t *pix, int stride, int alpha, int beta, +; void ff_deblock_v8_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, ; int8_t *tc0) ;----------------------------------------------------------------------------- cglobal deblock_%1_luma_8, 5,5,8,2*%2 @@ -440,7 +440,7 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2 RET ;----------------------------------------------------------------------------- -; void ff_deblock_h_luma(uint8_t *pix, int stride, int alpha, int beta, +; void ff_deblock_h_luma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, ; int8_t *tc0) ;----------------------------------------------------------------------------- INIT_MMX cpuname @@ -639,7 +639,7 @@ DEBLOCK_LUMA v, 16 %endif ;----------------------------------------------------------------------------- -; void ff_deblock_v_luma_intra(uint8_t *pix, int stride, int alpha, int beta) +; void ff_deblock_v_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) ;----------------------------------------------------------------------------- %if WIN64 cglobal deblock_%1_luma_intra_8, 4,6,16,0x10 @@ -699,7 +699,7 @@ cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50 INIT_MMX cpuname %if ARCH_X86_64 ;----------------------------------------------------------------------------- -; void ff_deblock_h_luma_intra(uint8_t *pix, int stride, int alpha, int beta) +; void ff_deblock_h_luma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) ;----------------------------------------------------------------------------- cglobal deblock_h_luma_intra_8, 4,9,0,0x80 movsxd r7, r1d @@ -802,7 +802,7 @@ INIT_MMX mmxext %define t6 r6 ;----------------------------------------------------------------------------- -; void ff_deblock_v_chroma(uint8_t *pix, int stride, int alpha, int beta, +; void ff_deblock_v_chroma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, ; int8_t *tc0) ;----------------------------------------------------------------------------- cglobal deblock_v_chroma_8, 5,6 @@ -817,7 +817,7 @@ cglobal deblock_v_chroma_8, 5,6 RET ;----------------------------------------------------------------------------- -; void ff_deblock_h_chroma(uint8_t *pix, int stride, int alpha, int beta, +; void ff_deblock_h_chroma(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, ; int8_t *tc0) ;----------------------------------------------------------------------------- cglobal deblock_h_chroma_8, 5,7 @@ -871,7 +871,7 @@ ff_chroma_inter_body_mmxext: %define t6 r5 ;------------------------------------------------------------------------------ -; void ff_deblock_v_chroma_intra(uint8_t *pix, int stride, int alpha, int beta) +; void ff_deblock_v_chroma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) ;------------------------------------------------------------------------------ cglobal deblock_v_chroma_intra_8, 4,5 CHROMA_V_START @@ -885,7 +885,7 @@ cglobal deblock_v_chroma_intra_8, 4,5 RET ;------------------------------------------------------------------------------ -; void ff_deblock_h_chroma_intra(uint8_t *pix, int stride, int alpha, int beta) +; void ff_deblock_h_chroma_intra(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) ;------------------------------------------------------------------------------ cglobal deblock_h_chroma_intra_8, 4,6 CHROMA_H_START diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index d049c62..887e249 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -152,7 +152,7 @@ cextern pw_4 %macro DEBLOCK_LUMA 0 ;----------------------------------------------------------------------------- -; void ff_deblock_v_luma_10(uint16_t *pix, int stride, int alpha, int beta, +; void ff_deblock_v_luma_10(uint16_t *pix, ptrdiff_t stride, int alpha, int beta, ; int8_t *tc0) ;----------------------------------------------------------------------------- cglobal deblock_v_luma_10, 5,5,8*(mmsize/16) @@ -599,7 +599,7 @@ DEBLOCK_LUMA_64 %if ARCH_X86_64 ;----------------------------------------------------------------------------- -; void ff_deblock_v_luma_intra_10(uint16_t *pix, int stride, int alpha, +; void ff_deblock_v_luma_intra_10(uint16_t *pix, ptrdiff_t stride, int alpha, ; int beta) ;----------------------------------------------------------------------------- %macro DEBLOCK_LUMA_INTRA_64 0 @@ -651,7 +651,7 @@ cglobal deblock_v_luma_intra_10, 4,7,16 REP_RET ;----------------------------------------------------------------------------- -; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha, +; void ff_deblock_h_luma_intra_10(uint16_t *pix, ptrdiff_t stride, int alpha, ; int beta) ;----------------------------------------------------------------------------- cglobal deblock_h_luma_intra_10, 4,7,16 @@ -722,7 +722,7 @@ DEBLOCK_LUMA_INTRA_64 %macro DEBLOCK_LUMA_INTRA 0 ;----------------------------------------------------------------------------- -; void ff_deblock_v_luma_intra_10(uint16_t *pix, int stride, int alpha, +; void ff_deblock_v_luma_intra_10(uint16_t *pix, ptrdiff_t stride, int alpha, ; int beta) ;----------------------------------------------------------------------------- cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16) @@ -751,7 +751,7 @@ cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16) RET ;----------------------------------------------------------------------------- -; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha, +; void ff_deblock_h_luma_intra_10(uint16_t *pix, ptrdiff_t stride, int alpha, ; int beta) ;----------------------------------------------------------------------------- cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16) @@ -848,7 +848,7 @@ DEBLOCK_LUMA_INTRA %macro DEBLOCK_CHROMA 0 ;----------------------------------------------------------------------------- -; void ff_deblock_v_chroma_10(uint16_t *pix, int stride, int alpha, int beta, +; void ff_deblock_v_chroma_10(uint16_t *pix, ptrdiff_t stride, int alpha, int beta, ; int8_t *tc0) ;----------------------------------------------------------------------------- cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) @@ -883,7 +883,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) %endif ;----------------------------------------------------------------------------- -; void ff_deblock_v_chroma_intra_10(uint16_t *pix, int stride, int alpha, +; void ff_deblock_v_chroma_intra_10(uint16_t *pix, ptrdiff_t stride, int alpha, ; int beta) ;----------------------------------------------------------------------------- cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16) diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm index 313791a..d90b0c7 100644 --- a/libavcodec/x86/h264_idct.asm +++ b/libavcodec/x86/h264_idct.asm @@ -55,7 +55,7 @@ cextern pw_1 SECTION .text -; %1=uint8_t *dst, %2=int16_t *block, %3=int stride +; %1=uint8_t *dst, %2=int16_t *block, %3=ptrdiff_t stride %macro IDCT4_ADD 3 ; Load dct coeffs movq m0, [%2] @@ -80,7 +80,7 @@ SECTION .text %endmacro INIT_MMX mmx -; void ff_h264_idct_add_8_mmx(uint8_t *dst, int16_t *block, int stride) +; void ff_h264_idct_add_8_mmx(uint8_t *dst, int16_t *block, ptrdiff_t stride) cglobal h264_idct_add_8, 3, 3, 0 IDCT4_ADD r0, r1, r2 RET @@ -163,7 +163,7 @@ cglobal h264_idct_add_8, 3, 3, 0 mova [%2+56], m7 %endmacro -; %1=uint8_t *dst, %2=int16_t *block, %3=int stride +; %1=uint8_t *dst, %2=int16_t *block, %3=ptrdiff_t stride %macro IDCT8_ADD_MMX_END 3-4 IDCT8_1D_FULL %2 mova [%2 ], m5 @@ -202,7 +202,7 @@ cglobal h264_idct_add_8, 3, 3, 0 %endmacro INIT_MMX mmx -; void ff_h264_idct8_add_8_mmx(uint8_t *dst, int16_t *block, int stride) +; void ff_h264_idct8_add_8_mmx(uint8_t *dst, int16_t *block, ptrdiff_t stride) cglobal h264_idct8_add_8, 3, 4, 0 %assign pad 128+4-(stack_offset&7) SUB rsp, pad @@ -217,7 +217,7 @@ cglobal h264_idct8_add_8, 3, 4, 0 ADD rsp, pad RET -; %1=uint8_t *dst, %2=int16_t *block, %3=int stride +; %1=uint8_t *dst, %2=int16_t *block, %3=ptrdiff_t stride %macro IDCT8_ADD_SSE 4 IDCT8_1D_FULL %2 %if ARCH_X86_64 @@ -270,7 +270,7 @@ cglobal h264_idct8_add_8, 3, 4, 0 %endmacro INIT_XMM sse2 -; void ff_h264_idct8_add_8_sse2(uint8_t *dst, int16_t *block, int stride) +; void ff_h264_idct8_add_8_sse2(uint8_t *dst, int16_t *block, ptrdiff_t stride) cglobal h264_idct8_add_8, 3, 4, 10 IDCT8_ADD_SSE r0, r1, r2, r3 RET @@ -307,7 +307,7 @@ cglobal h264_idct8_add_8, 3, 4, 10 %endmacro INIT_MMX mmxext -; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride) +; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, ptrdiff_t stride) %if ARCH_X86_64 cglobal h264_idct_dc_add_8, 3, 4, 0 movsx r3, word [r1] @@ -316,7 +316,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0 DC_ADD_MMXEXT_OP movh, r0, r2, r3 RET -; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride) +; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, ptrdiff_t stride) cglobal h264_idct8_dc_add_8, 3, 4, 0 movsx r3, word [r1] mov dword [r1], 0 @@ -326,7 +326,7 @@ cglobal h264_idct8_dc_add_8, 3, 4, 0 DC_ADD_MMXEXT_OP mova, r0, r2, r3 RET %else -; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride) +; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, ptrdiff_t stride) cglobal h264_idct_dc_add_8, 2, 3, 0 movsx r2, word [r1] mov dword [r1], 0 @@ -335,7 +335,7 @@ cglobal h264_idct_dc_add_8, 2, 3, 0 DC_ADD_MMXEXT_OP movh, r0, r1, r2 RET -; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride) +; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, ptrdiff_t stride) cglobal h264_idct8_dc_add_8, 2, 3, 0 movsx r2, word [r1] mov dword [r1], 0 @@ -349,7 +349,7 @@ cglobal h264_idct8_dc_add_8, 2, 3, 0 INIT_MMX mmx ; void ff_h264_idct_add16_8_mmx(uint8_t *dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6 * 8]) cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg xor r5, r5 @@ -372,7 +372,7 @@ cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, REP_RET ; void ff_h264_idct8_add4_8_mmx(uint8_t *dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6 * 8]) cglobal h264_idct8_add4_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg %assign pad 128+4-(stack_offset&7) @@ -406,7 +406,7 @@ cglobal h264_idct8_add4_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, INIT_MMX mmxext ; void ff_h264_idct_add16_8_mmxext(uint8_t *dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6 * 8]) cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg xor r5, r5 @@ -453,7 +453,7 @@ cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride INIT_MMX mmx ; void ff_h264_idct_add16intra_8_mmx(uint8_t *dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6 * 8]) cglobal h264_idct_add16intra_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg xor r5, r5 @@ -478,7 +478,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + npicregs, 0, dst, block_offset, block, st INIT_MMX mmxext ; void ff_h264_idct_add16intra_8_mmxext(uint8_t *dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6 * 8]) cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg xor r5, r5 @@ -522,7 +522,7 @@ cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, s REP_RET ; void ff_h264_idct8_add4_8_mmxext(uint8_t *dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6 * 8]) cglobal h264_idct8_add4_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg %assign pad 128+4-(stack_offset&7) @@ -584,7 +584,7 @@ cglobal h264_idct8_add4_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride INIT_XMM sse2 ; void ff_h264_idct8_add4_8_sse2(uint8_t *dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6 * 8]) cglobal h264_idct8_add4_8, 5, 8 + npicregs, 10, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg xor r5, r5 @@ -661,7 +661,7 @@ h264_idct_add8_mmx_plane: rep ret ; void ff_h264_idct_add8_8_mmx(uint8_t **dest, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6 * 8]) cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg mov r5, 16 @@ -727,7 +727,7 @@ h264_idct_add8_mmxext_plane: INIT_MMX mmxext ; void ff_h264_idct_add8_8_mmxext(uint8_t **dest, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6 * 8]) cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg mov r5, 16 @@ -749,7 +749,7 @@ cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, call h264_idct_add8_mmxext_plane RET -; r0 = uint8_t *dst, r2 = int16_t *block, r3 = int stride, r6=clobbered +; r0 = uint8_t *dst, r2 = int16_t *block, r3 = ptrdiff_t stride, r6=clobbered h264_idct_dc_add8_mmxext: movd m0, [r2 ] ; 0 0 X D mov word [r2+ 0], 0 @@ -769,7 +769,7 @@ h264_idct_dc_add8_mmxext: ALIGN 16 INIT_XMM sse2 -; r0 = uint8_t *dst (clobbered), r2 = int16_t *block, r3 = int stride +; r0 = uint8_t *dst (clobbered), r2 = int16_t *block, r3 = ptrdiff_t stride h264_add8x4_idct_sse2: movq m0, [r2+ 0] movq m1, [r2+ 8] @@ -811,7 +811,7 @@ h264_add8x4_idct_sse2: %endmacro ; void ff_h264_idct_add16_8_sse2(uint8_t *dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6 * 8]) cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8 %if ARCH_X86_64 @@ -859,7 +859,7 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8 %endmacro ; void ff_h264_idct_add16intra_8_sse2(uint8_t *dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6 * 8]) cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8 %if ARCH_X86_64 @@ -911,7 +911,7 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8 %endmacro ; void ff_h264_idct_add8_8_sse2(uint8_t **dest, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6 * 8]) cglobal h264_idct_add8_8, 5, 7 + ARCH_X86_64, 8 add r2, 512 diff --git a/libavcodec/x86/h264_idct_10bit.asm b/libavcodec/x86/h264_idct_10bit.asm index b7d5105..326e710 100644 --- a/libavcodec/x86/h264_idct_10bit.asm +++ b/libavcodec/x86/h264_idct_10bit.asm @@ -32,7 +32,7 @@ pd_32: times 4 dd 32 SECTION .text ;----------------------------------------------------------------------------- -; void ff_h264_idct_add_10(pixel *dst, int16_t *block, int stride) +; void ff_h264_idct_add_10(pixel *dst, int16_t *block, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro STORE_DIFFx2 6 psrad %1, 6 @@ -88,7 +88,7 @@ IDCT_ADD_10 ;----------------------------------------------------------------------------- ; void ff_h264_idct_add16_10(pixel *dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6*8]) ;----------------------------------------------------------------------------- ;;;;;;; NO FATE SAMPLES TRIGGER THIS @@ -159,7 +159,7 @@ INIT_XMM avx IDCT_ADD16_10 ;----------------------------------------------------------------------------- -; void ff_h264_idct_dc_add_10(pixel *dst, int16_t *block, int stride) +; void ff_h264_idct_dc_add_10(pixel *dst, int16_t *block, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro IDCT_DC_ADD_OP_10 3 pxor m5, m5 @@ -201,7 +201,7 @@ cglobal h264_idct_dc_add_10,3,3 RET ;----------------------------------------------------------------------------- -; void ff_h264_idct8_dc_add_10(pixel *dst, int16_t *block, int stride) +; void ff_h264_idct8_dc_add_10(pixel *dst, int16_t *block, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro IDCT8_DC_ADD 0 cglobal h264_idct8_dc_add_10,3,4,7 @@ -225,7 +225,7 @@ IDCT8_DC_ADD ;----------------------------------------------------------------------------- ; void ff_h264_idct_add16intra_10(pixel *dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6*8]) ;----------------------------------------------------------------------------- %macro AC 1 @@ -299,7 +299,7 @@ IDCT_ADD16INTRA_10 %assign last_block 36 ;----------------------------------------------------------------------------- ; void ff_h264_idct_add8_10(pixel **dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6*8]) ;----------------------------------------------------------------------------- %macro IDCT_ADD8 0 @@ -334,7 +334,7 @@ INIT_XMM avx IDCT_ADD8 ;----------------------------------------------------------------------------- -; void ff_h264_idct8_add_10(pixel *dst, int16_t *block, int stride) +; void ff_h264_idct8_add_10(pixel *dst, int16_t *block, ptrdiff_t stride) ;----------------------------------------------------------------------------- %macro IDCT8_1D 2 SWAP 0, 1 @@ -418,7 +418,7 @@ IDCT_ADD8 %endif %endmacro -; %1=uint8_t *dst, %2=int16_t *block, %3=int stride +; %1=uint8_t *dst, %2=int16_t *block, %3=ptrdiff_t stride %macro IDCT8_ADD_SSE_END 3 IDCT8_1D_FULL %2 mova [%2 ], m6 @@ -542,7 +542,7 @@ IDCT8_ADD ;----------------------------------------------------------------------------- ; void ff_h264_idct8_add4_10(pixel **dst, const int *block_offset, -; int16_t *block, int stride, +; int16_t *block, ptrdiff_t stride, ; const uint8_t nnzc[6*8]) ;----------------------------------------------------------------------------- ;;;;;;; NO FATE SAMPLES TRIGGER THIS diff --git a/libavcodec/x86/h264_weight.asm b/libavcodec/x86/h264_weight.asm index 9ad26de..e259298 100644 --- a/libavcodec/x86/h264_weight.asm +++ b/libavcodec/x86/h264_weight.asm @@ -28,11 +28,11 @@ SECTION .text ;----------------------------------------------------------------------------- ; biweight pred: ; -; void ff_h264_biweight_16_sse2(uint8_t *dst, uint8_t *src, int stride, +; void ff_h264_biweight_16_sse2(uint8_t *dst, uint8_t *src, ptrdiff_t stride, ; int height, int log2_denom, int weightd, ; int weights, int offset); ; and -; void ff_h264_weight_16_sse2(uint8_t *dst, int stride, int height, +; void ff_h264_weight_16_sse2(uint8_t *dst, ptrdiff_t stride, int height, ; int log2_denom, int weight, int offset); ;----------------------------------------------------------------------------- diff --git a/libavcodec/x86/h264_weight_10bit.asm b/libavcodec/x86/h264_weight_10bit.asm index 961ec8c..582a4ce 100644 --- a/libavcodec/x86/h264_weight_10bit.asm +++ b/libavcodec/x86/h264_weight_10bit.asm @@ -35,7 +35,7 @@ cextern pw_1 SECTION .text ;----------------------------------------------------------------------------- -; void ff_h264_weight_16_10(uint8_t *dst, int stride, int height, +; void ff_h264_weight_16_10(uint8_t *dst, ptrdiff_t stride, int height, ; int log2_denom, int weight, int offset); ;----------------------------------------------------------------------------- %macro WEIGHT_PROLOGUE 0 @@ -151,7 +151,7 @@ WEIGHT_FUNC_HALF_MM ;----------------------------------------------------------------------------- -; void ff_h264_biweight_16_10(uint8_t *dst, uint8_t *src, int stride, +; void ff_h264_biweight_16_10(uint8_t *dst, uint8_t *src, ptrdiff_t stride, ; int height, int log2_denom, int weightd, ; int weights, int offset); ;----------------------------------------------------------------------------- diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c index 134d594..89100d9 100644 --- a/libavcodec/x86/h264dsp_init.c +++ b/libavcodec/x86/h264dsp_init.c @@ -29,7 +29,7 @@ #define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \ void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT(uint8_t *dst, \ int16_t *block, \ - int stride); + ptrdiff_t stride); IDCT_ADD_FUNC(, 8, mmx) IDCT_ADD_FUNC(, 10, sse2) @@ -48,7 +48,7 @@ IDCT_ADD_FUNC(8, 10, avx) #define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \ void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ (uint8_t *dst, const int *block_offset, \ - int16_t *block, int stride, const uint8_t nnzc[6 * 8]); + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[6 * 8]); IDCT_ADD_REP_FUNC(8, 4, 8, mmx) IDCT_ADD_REP_FUNC(8, 4, 8, mmxext) @@ -70,7 +70,7 @@ IDCT_ADD_REP_FUNC(, 16intra, 10, avx) #define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \ void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ (uint8_t **dst, const int *block_offset, \ - int16_t *block, int stride, const uint8_t nnzc[6 * 8]); + int16_t *block, ptrdiff_t stride, const uint8_t nnzc[6 * 8]); IDCT_ADD_REP_FUNC2(, 8, 8, mmx) IDCT_ADD_REP_FUNC2(, 8, 8, mmxext) @@ -92,13 +92,13 @@ void ff_h264_loop_filter_strength_mmxext(int16_t bS[2][4][4], uint8_t nnz[40], #define LF_FUNC(DIR, TYPE, DEPTH, OPT) \ void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix, \ - int stride, \ + ptrdiff_t stride, \ int alpha, \ int beta, \ int8_t *tc0); #define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \ void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix, \ - int stride, \ + ptrdiff_t stride, \ int alpha, \ int beta); @@ -131,7 +131,7 @@ LF_FUNCS(uint16_t, 10) #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL LF_FUNC(v8, luma, 8, mmxext) -static void deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, +static void deblock_v_luma_8_mmxext(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) { if ((tc0[0] & tc0[1]) >= 0) @@ -140,7 +140,7 @@ static void deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, ff_deblock_v8_luma_8_mmxext(pix + 8, stride, alpha, beta, tc0 + 2); } LF_IFUNC(v8, luma_intra, 8, mmxext) -static void deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, +static void deblock_v_luma_intra_8_mmxext(uint8_t *pix, ptrdiff_t stride, int alpha, int beta) { ff_deblock_v8_luma_intra_8_mmxext(pix + 0, stride, alpha, beta); @@ -155,13 +155,13 @@ LF_IFUNC(v, luma_intra, 10, mmxext) /* weighted prediction */ #define H264_WEIGHT(W, OPT) \ -void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, int stride, \ +void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, ptrdiff_t stride, \ int height, int log2_denom, \ int weight, int offset); #define H264_BIWEIGHT(W, OPT) \ void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, uint8_t *src, \ - int stride, int height, \ + ptrdiff_t stride, int height, \ int log2_denom, int weightd, \ int weights, int offset); @@ -181,7 +181,7 @@ H264_BIWEIGHT_MMX(4) #define H264_WEIGHT_10(W, DEPTH, OPT) \ void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ - int stride, \ + ptrdiff_t stride, \ int height, \ int log2_denom, \ int weight, \ @@ -190,7 +190,7 @@ void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ #define H264_BIWEIGHT_10(W, DEPTH, OPT) \ void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ uint8_t *src, \ - int stride, \ + ptrdiff_t stride, \ int height, \ int log2_denom, \ int weightd, \ diff --git a/tests/checkasm/h264dsp.c b/tests/checkasm/h264dsp.c index bbdf74b..97b381f 100644 --- a/tests/checkasm/h264dsp.c +++ b/tests/checkasm/h264dsp.c @@ -218,7 +218,7 @@ static void check_idct(void) H264DSPContext h; int bit_depth, sz; int x, y, dc; - declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, int stride); + declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst, int16_t *block, ptrdiff_t stride); for (bit_depth = 8; bit_depth <= 10; bit_depth++) { ff_h264dsp_init(&h, bit_depth, 1); @@ -231,7 +231,7 @@ static void check_idct(void) dct8x8(coef, bit_depth); for (dc = 0; dc <= 1; dc++) { - void (*idct)(uint8_t *, int16_t *, int); + void (*idct)(uint8_t *, int16_t *, ptrdiff_t); switch ((sz << 1) | dc) { case (4 << 1) | 0: idct = h.h264_idct_add; break; case (4 << 1) | 1: idct = h.h264_idct_dc_add; break; -- 2.7.3 _______________________________________________ libav-devel mailing list libav-devel@libav.org https://lists.libav.org/mailman/listinfo/libav-devel