Hi, On Wed, Sep 6, 2017 at 4:04 PM, Ashish Pratap Singh <ashk43...@gmail.com> wrote:
> +#define MAX_ALIGN 32 > +#define ALIGN_CEIL(x) ((x) + ((x) % MAX_ALIGN ? MAX_ALIGN - (x) % > MAX_ALIGN : 0)) > ../libavutil/macros.h:#define FFALIGN(x, a) (((x)+(a)-1)&~((a)-1)) > > +static const AVOption vmafmotion_options[] = { > + { NULL } > +}; > #define vmafmotion_options NULL? > +FRAMESYNC_DEFINE_CLASS(vmafmotion, VMAFMotionContext, fs); > src/libavfilter/vf_vmafmotion.c:59:1: warning: unused function 'vmafmotion_framesync_preinit' [-Wunused-function] FRAMESYNC_DEFINE_CLASS(vmafmotion, VMAFMotionContext, fs); ^ src/libavfilter/framesync2.h:298:12: note: expanded from macro 'FRAMESYNC_DEFINE_CLASS' static int name##_framesync_preinit(AVFilterContext *ctx) { \ ^ <scratch space>:44:1: note: expanded from here vmafmotion_framesync_preinit > +static inline int floorn(int n, int m) > +{ > + return n - n % m; > +} > + > +static inline int ceiln(int n, int m) > +{ > + return n % m ? n + (m - n % m) : n; > +} > + > +static void convolution_x(const int *filter, int filt_w, const uint16_t > *src, > + uint16_t *dst, int w, int h, ptrdiff_t > src_stride, > + ptrdiff_t dst_stride) > I would move sad() convolution_x/y() as function pointers into a VMAFMotionDSPContext. In ffmpeg, we always express "stride" in bytes, not in pixels. In SIMD, it will become clear why. Same comment for _y. > +{ > + int radius = filt_w / 2; > + int borders_left = ceiln(radius, 1); > + int borders_right = floorn(w - (filt_w - radius), 1); > For cases where m is 1, ceiln() and floorn() just return n, so this can be simplified. Same comment for _y. > +#define conv_y_fn(type, bits) \ > + static void convolution_y_##bits##bit(const int *filter, int filt_w, > \ > + const type *src, uint16_t *dst, > \ > + int w, int h, ptrdiff_t > src_stride, \ > + ptrdiff_t dst_stride) \ > +{ \ > + int radius = filt_w / 2; \ > + int borders_top = ceiln(radius, 1); \ > + int borders_bottom = floorn(h - (filt_w - radius), 1); \ > + int i, j, k; \ > + int sum = 0; \ > + \ > + for (i = 0; i < borders_top; i++) { \ > + for (j = 0; j < w; j++) { \ > + sum = 0; \ > + for (k = 0; k < filt_w; k++) { \ > + int i_tap = FFABS(i - radius + k); \ > + if (i_tap >= h) { \ > + i_tap = h - (i_tap - h + 1); \ > + } \ > + sum += filter[k] * src[i_tap * src_stride + j]; \ > + } \ > + dst[i * dst_stride + j] = sum >> N; \ > Same comment for bottom 2: I would decrease the shift in this expression by a few bits. The reason is that you can use the extra bits as fractional bits to increase precision. E.g. for 8-bits, the input is 8bits and the filter is 15, but the uint16_t can hold 16 bits (let's say 15 so we can use signed instructions also) data, so instead of 8*15>>15=8 bits, we can shift by just 8bits to get 15 bits (full-width) uint16_t. Same for 10-bit, where we can shift by 10 instead of 15. The sad calculation then indeed has to downshift the result again (*score = (double) (sad * 1.0 / (w * h)); becomes sad * 1.0 / (w * h << (15-bits)). > +void convolution_f32(const int *filter, int filt_w, const void *src, > + uint16_t *dst, uint16_t *tmp, int w, int h, > + ptrdiff_t src_stride, ptrdiff_t dst_stride, uint8_t > type) > +{ > + if(type == 8) { > Rename "type" to bitdepth. > + if (s->desc->comp[0].depth <= 8) { > + ref_px_stride = ref_stride / sizeof(uint8_t); > + convolution_f32(s->filter, 5, (const uint8_t *) ref->data[0], > + s->blur_data, s->temp_data, s->width, s->height, > + ref_px_stride, px_stride, 8); > + } else { > + ref_px_stride = ref_stride / sizeof(uint16_t); > + convolution_f32(s->filter, 5, (const uint16_t *) ref->data[0], > + s->blur_data, s->temp_data, s->width, s->height, > + ref_px_stride, px_stride, 10); > + } > The if and resulting cast are unnecessary f you express stride in bytes instead of pixels. + memcpy(s->prev_blur_data, s->blur_data, data_sz); > Hm... Try FFSWAP with pointers. > +static int query_formats(AVFilterContext *ctx) > +{ > + static const enum AVPixelFormat pix_fmts[] = { > + AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, > + AV_PIX_FMT_YUV444P10LE, AV_PIX_FMT_YUV422P10LE, > AV_PIX_FMT_YUV420P10LE, > Remove the LE suffix, this filter handles native endian. > +static int activate(AVFilterContext *ctx) > +{ > + VMAFMotionContext *s = ctx->priv; > + return ff_framesync2_activate(&s->fs); > +} > + > + > +static av_cold void uninit(AVFilterContext *ctx) > One newline too many in between the two functions. > +#ifndef MOTION_TOOLS_H_ > +#define MOTION_TOOLS_H_ > AVFILTER_VMAFMOTION_H > +#define N 15 > + > +static const float FILTER_5[5] = { > + 0.054488685, > + 0.244201342, > + 0.402619947, > + 0.244201342, > + 0.054488685 > +}; > > +void convolution_f32(const int *filter, int filt_width, const void *src, > + uint16_t *dst, uint16_t *tmp, int w, int h, > + ptrdiff_t src_stride, ptrdiff_t dst_stride, uint8_t > type); > These 3 should be in the C file. I would say N is a very generic name but if it compiles I guess it's fine :-). Ronald _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel