On 21/08/15 4:19 AM, Anton Khirnov wrote:
> +
> +    add       dstq, dststrideq
> +    add       srcq, srcstrideq
> +
> +%assign i (i + 1)
> +%endrep
> +
> +    dec heightq

This and every other case should be heightd. There's no guarantee the high bits 
will be zero
on every x86_64 target.
This is the source of the crashes i was getting.

> +    jg .loop
> +    RET
> +%endmacro
> +
> +INIT_XMM sse2
> +GET_PIXELS 4,  8, 1
> +GET_PIXELS 8,  8, 1
> +GET_PIXELS 12, 8, 3
> +GET_PIXELS 16, 8, 2
> +GET_PIXELS 24, 8, 3
> +GET_PIXELS 32, 8, 3
> +GET_PIXELS 48, 8, 3
> +GET_PIXELS 64, 8, 3
> +
> +GET_PIXELS 4,  10, 1
> +GET_PIXELS 8,  10, 1
> +GET_PIXELS 12, 10, 3
> +GET_PIXELS 16, 10, 2
> +GET_PIXELS 24, 10, 3
> +GET_PIXELS 32, 10, 3
> +GET_PIXELS 48, 10, 3
> +GET_PIXELS 64, 10, 3
> +
> +; hevc_qpel_h/v_<w>_8(int16_t *dst, ptrdiff_t dststride,
> +;                     uint8_t *src, ptrdiff_t srcstride,
> +;                     int height, int mx, int my, int *mcbuffer)
> +
> +; 8-bit qpel interpolation
> +; %1: block width
> +; %2: 0 - horizontal; 1 - vertical
> +%macro QPEL_8 2
> +%if %2
> +    %define postfix    v
> +    %define mvfrac     myq

Same here and below the else, rename this to mvfracq and add a mvfracd.

> +    %define pixstride  srcstrideq
> +    %define pixstride3 sstride3q
> +    %define src_m3     srcm3q
> +%else
> +    %define postfix    h
> +    %define mvfrac     mxq
> +    %define pixstride  1
> +    %define pixstride3 3
> +    %define src_m3     (srcq - 3)
> +%endif
> +
> +cglobal hevc_qpel_ %+ postfix %+ _ %+ %1 %+ _8, 8, 10, 7, dst, dststride, 
> src, srcstride, height, mx, my, sstride3, srcm3, coeffsreg
> +%if %2
> +    and       mvfrac, 0x3
> +%endif
> +    dec       mvfrac
> +    shl       mvfrac, 4

Use mvfracd on these three, it will clear the high bits for the mova below.

> +    lea       coeffsregq, [hevc_qpel_coeffs8]
> +    mova      m0, [coeffsregq + mvfrac]

Then use mvfraq here. Replicate this on every function, of course.

> +
> +%macro PUT_WEIGHTED_PRED 3
> +%if %1
> +cglobal hevc_put_weighted_pred_avg_ %+ %2 %+ _ %+ %3, 11, 11, 8, denom, 
> weight0, weight1, offset0, offset1, dst, dststride, src0, src1, srcstride, 
> height
> +%else
> +cglobal hevc_put_weighted_pred_ %+ %2 %+ _ %+ %3, 8, 8, 8, denom, weight0, 
> offset0, dst, dststride, src0, srcstride, height
> +%endif
> +    and heightq,    0x7fffffff

You should be able to remove this after the above changes.

_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to