Re: [FFmpeg-soc] [PATCH] Audio visualization filter af_aviz

S.N. Hemanth Meenakshisundaram Tue, 10 Aug 2010 13:52:48 -0700

> I am testing this right now with ffplay.c changes, this now compiles.
> Will post an update once the ffplay changes work.
>
> Made the viz_type an enum as Stefano suggested for further expansion.
>


Made some more changes to get data out to the video filter properly. Had
to change the SampleFormat enum order to get around the merge formats
issue. Like Stefano said, this requires changes to the filter graph
building code in order to work without the hack. I also need to use a
filter for splitting audio instead of having two outputs for aviz as below
as this crashes after a few buffers of audio data are pushed through the
data.

The ffplay test code for this is also attached.

--- a/af_aviz.c
+++ b/af_aviz.c
@@ -1 +1,459 @@
+/*
+ * copyright (c) 2010 S.N. Hemanth Meenakshisundaram <[email protected]>
+ * based on code in libavcodec/aviz.c by Fabrice Bellard
+ * and libavcodec/audioconvert.c by Michael Neidermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
 
+/**
+ * @file
+ * aviz audio filter
+ */
+
+#include "avfilter.h"
+#include "libavcodec/audioconvert.h"
+#include "libavcodec/avfft.h"
+#include "libavcodec/colorspace.h"
+#include "libavcodec/fft.h"
+#include "libavformat/avformat.h"
+#include "libavutil/pixdesc.h"
+
+/* TODO: Add more visualizations */
+enum AVizType {
+    AVIZ_NONE = 0,
+    AVIZ_TIME_DOMAIN,
+    AVIZ_FREQ_DOMAIN
+};
+
+typedef struct {
+    short *sample_array;        ///< array of recent audio samples to be used for visualization
+    int sample_array_index;     ///< index of data being used
+    RDFTContext *rdft;          ///< DFT context for converting data into frequency domain
+    int rdft_bits;              ///< DFT bits
+    FFTSample *rdft_data;       ///< frequency domain data
+    int nb_frequency;           ///< number of frequencies in visualization
+    int nb_channels;            ///< number of channels of audio data
+    int64_t sample_rate;        ///< sample rate of audio
+    int screen_width;           ///< width of screen
+    int screen_height;          ///< height of screen
+    int freqviz_xpos;           ///< stored x position for frequency visualization
+    int hsub, vsub;             ///< chroma subsampling values of required output frames
+    int i_start;                ///< index of audio data at which to start visualizing
+    enum AVizType viz_type;     ///< visualize frequency or time domain data
+    unsigned char fgcolor1[4];  ///< foreground color 1 in YUV
+    unsigned char fgcolor2[4];  ///< foreground color 2 in YUV
+    unsigned char bgcolor[4];   ///< background color in YUV
+    AVFilterBufferRef *vizpic;  ///< buffer that stores the visualized picture data
+    int (*app_callback) (int *, int *, int *, int *, int *);
+                                ///< callback to get delay value from app and update other properties
+    void *app_ctx;              ///< context provided by app to be passed back in callback
+} AVizContext;
+
+#define SAMPLE_ARRAY_SIZE (2*65536)
+
+#define RGB_TO_YUV(yuvcolor, red, green, blue) {\
+    yuvcolor[0] = RGB_TO_Y(red, green, blue);\
+    yuvcolor[1] = RGB_TO_U(red, green, blue, 0);\
+    yuvcolor[2] = RGB_TO_V(red, green, blue, 0);\
+    yuvcolor[3] = 0xFF;\
+}\
+
+static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
+{
+    AVizContext *aviz = ctx->priv;
+    int rdft_bits = 0, nb_freq = 0;
+
+    av_log(NULL, AV_LOG_ERROR, "Getting to filt init\n");
+    aviz->sample_array = av_malloc(SAMPLE_ARRAY_SIZE * sizeof(short));
+    if (args){
+        sscanf(args, "%d:%d:%d:%ld", &aviz->screen_width, &aviz->screen_height,
+               (int *)&aviz->viz_type, (long int *)&aviz->app_callback);
+    }
+
+    aviz->app_ctx = opaque;
+
+    for (rdft_bits = 1; (1<<rdft_bits) < 2*aviz->screen_height; rdft_bits++)
+        ;
+    nb_freq = 1<<(rdft_bits-1);
+
+    aviz->rdft = av_rdft_init(rdft_bits, DFT_R2C);
+    aviz->rdft_bits = rdft_bits;
+    aviz->rdft_data = av_malloc(4*nb_freq*sizeof(*aviz->rdft_data));
+    aviz->nb_frequency = nb_freq;
+
+    /* TODO: Add error checking and configure callback function if there is going to be one */
+
+    RGB_TO_YUV(aviz->bgcolor, 0, 0, 0);
+    RGB_TO_YUV(aviz->fgcolor1, 0xFF, 0xFF, 0xFF);
+    RGB_TO_YUV(aviz->fgcolor2, 0x00, 0x00, 0xFF);
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AVizContext *aviz = ctx->priv;
+    av_free(aviz->sample_array);
+    av_rdft_end(aviz->rdft);
+    av_free(aviz->rdft_data);
+    if (aviz->vizpic)
+        avfilter_unref_buffer(aviz->vizpic);
+}
+
+static int input_config_props(AVFilterLink *link)
+{
+    AVFilterContext *ctx = link->dst;
+    AVizContext *aviz = ctx->priv;
+
+    aviz->nb_channels = avcodec_channel_layout_num_channels(link->channel_layout);
+    aviz->sample_rate = link->sample_rate;
+
+    /* We expect framework to insert appropriate resample filter when using the aviz filter */
+    if (link->format != SAMPLE_FMT_S16) {
+        av_log(ctx, AV_LOG_ERROR, "Input samples must be in S16 format\n");
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static int output_aconfig_props(AVFilterLink *link)
+{
+    return 0;
+}
+
+static int output_vconfig_props(AVFilterLink *link)
+{
+    AVizContext *aviz = link->src->priv;
+
+    /**
+     * Store chroma subsampling values so we can generate visualization frames
+     * in the required format.
+     */
+    const AVPixFmtDescriptor *pix_desc = &av_pix_fmt_descriptors[link->format];
+    aviz->hsub = pix_desc->log2_chroma_w;
+    aviz->vsub = pix_desc->log2_chroma_h;
+
+    aviz->vizpic = avfilter_get_video_buffer(link, AV_PERM_WRITE | AV_PERM_PRESERVE |
+                                             AV_PERM_REUSE2,
+                                             aviz->screen_width, aviz->screen_height);
+    return 0;
+}
+
+static void filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
+{
+    AVizContext *aviz = link->dst->priv;
+    AVFilterLink *outlink = link->dst->outputs[0];
+    AVFilterBufferRefAudioProps *sample_props;
+    int size, len;
+    uint8_t *audio_data = samplesref->data[0];
+    AVFILTER_GET_BUFFER_REF_AUDIO_PROPS(sample_props, samplesref);
+
+    /* We update this since this frame may have a different number of channels */
+    aviz->nb_channels = avcodec_channel_layout_num_channels(sample_props->channel_layout);
+    aviz->sample_rate = sample_props->sample_rate;
+
+    size = sample_props->size / sizeof(short);
+    while (size > 0) {
+        len = SAMPLE_ARRAY_SIZE - aviz->sample_array_index;
+        if (len > size)
+            len = size;
+        /* We definitely need a copy of data since we keep old audio data around for visualization */
+        memcpy(aviz->sample_array + aviz->sample_array_index, audio_data, len * sizeof(short));
+        audio_data += len;
+        aviz->sample_array_index += len;
+        if (aviz->sample_array_index >= SAMPLE_ARRAY_SIZE)
+            aviz->sample_array_index = 0;
+        size -= len;
+    }
+    /*
+     * We no longer need the reference, pass it on to the next filter
+     * FIXME: Remove this once a splitter is in place and aviz is outside the main
+     * audio filter chain.
+     */
+     // avfilter_unref_buffer(samplesref);
+     filter_samples(outlink, samplesref);
+}
+
+#define SET_PIXEL(picref, yuv_color, x, y, hsub, vsub) { \
+    luma_pos    = ((x)          ) + ((y)          ) * picref->linesize[0]; \
+    chroma_pos1 = ((x) >> (hsub)) + ((y) >> (vsub)) * picref->linesize[1]; \
+    chroma_pos2 = ((x) >> (hsub)) + ((y) >> (vsub)) * picref->linesize[2]; \
+    picref->data[0][luma_pos   ] = (yuv_color[3] * yuv_color[0] + (255 - yuv_color[3]) * picref->data[0][luma_pos   ]) >> 8; \
+    picref->data[1][chroma_pos1] = (yuv_color[3] * yuv_color[1] + (255 - yuv_color[3]) * picref->data[1][chroma_pos1]) >> 8; \
+    picref->data[2][chroma_pos2] = (yuv_color[3] * yuv_color[2] + (255 - yuv_color[3]) * picref->data[2][chroma_pos2]) >> 8; \
+}
+
+static inline void fillrect(AVFilterBufferRef *picref, unsigned int x, unsigned int y,
+                            unsigned int width, unsigned int height,
+                            unsigned char yuv_color[4], int hsub, int vsub)
+{
+    int i, plane;
+    uint8_t *p;
+
+    if (yuv_color[3] != 0xFF) {
+        unsigned int j, luma_pos, chroma_pos1, chroma_pos2;
+
+        for (j = 0; j < height; j++)
+            for (i = 0; i < width; i++)
+                SET_PIXEL(picref, yuv_color, (i+x), (y+j), hsub, vsub);
+
+    } else {
+        for (plane = 0; plane < 3 && picref->data[plane]; plane++) {
+            int hsub1 = plane == 1 || plane == 2 ? hsub : 0;
+            int vsub1 = plane == 1 || plane == 2 ? vsub : 0;
+
+            p = picref->data[plane] + (y >> vsub1) * picref->linesize[plane] + (x >> hsub1);
+            for (i = 0; i < (height >> vsub1); i++) {
+                memset(p, yuv_color[plane], (width >> hsub1));
+                p += picref->linesize[plane];
+            }
+        }
+    }
+}
+
+/* More visualization routines can be added here in the future */
+
+static void time_domain_visualize(AVizContext *aviz)
+{
+    AVFilterBufferRef *pic = aviz->vizpic;
+    int i, h, h2, ch, y1, x, y, ys;
+    int width = aviz->screen_width, height = aviz->screen_height;
+    int nb_channels = aviz->nb_channels;
+    uint8_t *fgcolor1 = aviz->fgcolor1, *fgcolor2 = aviz->fgcolor2;
+    int hsub = aviz->hsub, vsub = aviz->vsub;
+    short *sample_array = aviz->sample_array;
+
+    fillrect(pic, 0, 0, width, height, aviz->bgcolor, hsub, vsub);
+
+    /* total height for one channel */
+    h = height / nb_channels;
+    /* graph height / 2 */
+    h2 = (h * 9) / 20;
+    for (ch = 0; ch < nb_channels; ch++) {
+        i = aviz->i_start + ch;
+        y1 = ch*h + (h/2); /* position of center line */
+        for(x = 0; x < width; x++) {
+            y = (sample_array[i] * h2) >> 15;
+            if (y < 0) {
+                y = -y;
+                ys = y1 - y;
+            } else {
+                ys = y1;
+            }
+            fillrect(pic, 0+x, ys, 1, y, fgcolor1, hsub, vsub);
+            i += nb_channels;
+            if (i >= SAMPLE_ARRAY_SIZE)
+                i -= SAMPLE_ARRAY_SIZE;
+        }
+    }
+
+    for (ch = 1; ch < nb_channels; ch++) {
+        y = ch * h;
+        fillrect(pic, 0, y, width, 1, fgcolor2, hsub, vsub);
+    }
+}
+
+static void frequency_domain_visualize(AVizContext *aviz)
+{
+    AVFilterBufferRef *pic = aviz->vizpic;
+    int i, ch, x, y;
+    uint8_t fgcolor[4];
+    double w;
+    FFTSample *data[2];
+    int width = aviz->screen_width, height = aviz->screen_height;
+    int nb_channels = aviz->nb_channels;
+    int nb_display_channels = FFMIN(nb_channels, 2);
+    int nb_freq = aviz->nb_frequency;
+    int hsub = aviz->hsub, vsub = aviz->vsub;
+    short *sample_array = aviz->sample_array;
+
+    fillrect(pic, 0, 0, width, height, aviz->bgcolor, hsub, vsub);
+    for (ch = 0; ch < nb_display_channels; ch++) {
+        data[ch] = aviz->rdft_data + 2*nb_freq*ch;
+        i = aviz->i_start + ch;
+        for (x = 0; x < 2*nb_freq; x++) {
+            w = (x-nb_freq)*(1.0/nb_freq);
+            data[ch][x]= sample_array[i]*(1.0-w*w);
+            i += nb_channels;
+            if (i >= SAMPLE_ARRAY_SIZE)
+                i -= SAMPLE_ARRAY_SIZE;
+        }
+        av_rdft_calc(aviz->rdft, data[ch]);
+    }
+    w = 1/sqrt(nb_freq);
+    //least efficient way to do this, we should of course directly access it but its more than fast enough
+    for (y = 0; y < height; y++) {
+        int a = sqrt(w*sqrt(data[0][2*y+0]*data[0][2*y+0] + data[0][2*y+1]*data[0][2*y+1]));
+        int b = (nb_display_channels == 2 ) ? sqrt(w*sqrt(data[1][2*y+0]*data[1][2*y+0]
+                + data[1][2*y+1]*data[1][2*y+1])) : a;
+        a = FFMIN(a,255);
+        b = FFMIN(b,255);
+        RGB_TO_YUV(fgcolor, a, b, (a+b)/2);
+
+        fillrect(pic, 0, height-y, 1, 1, fgcolor, hsub, vsub);
+    }
+    aviz->freqviz_xpos++;
+    if (aviz->freqviz_xpos >= width)
+        aviz->freqviz_xpos = 0;
+}
+
+static inline int compute_mod(int a, int b)
+{
+    a = a % b;
+    if (a >= 0)
+        return a;
+    else
+        return a + b;
+}
+
+/*
+ * This calculates the index at which to start visualizing and reconfigures DFT if
+ * other parameters like screen height have changed.
+ */
+static void calc_index_and_config_aviz(AVFilterLink *link, int delay, int audio_cb_time,
+                                       int new_width, int new_height, int new_viz_type)
+{
+    AVizContext *aviz = link->src->priv;
+    int i, i_start, x;
+    int data_used;
+    int channels = aviz->nb_channels;
+    int n = 2*channels;
+
+    if (new_height != aviz->screen_height || new_width != aviz->screen_width) {
+
+        avfilter_unref_buffer(aviz->vizpic);
+        aviz->vizpic = avfilter_get_video_buffer(link, AV_PERM_WRITE |
+                                                 AV_PERM_PRESERVE | AV_PERM_REUSE2,
+                                                 new_width, new_height);
+    }
+    if (new_height != aviz->screen_height) {
+        int rdft_bits, nb_freq;
+
+        for (rdft_bits = 1; (1<<rdft_bits) < 2*aviz->screen_height; rdft_bits++)
+            ;
+        nb_freq = 1<<(rdft_bits-1);
+
+        aviz->rdft = av_rdft_init(rdft_bits, DFT_R2C);
+        aviz->rdft_bits = rdft_bits;
+        aviz->rdft_data = av_malloc(4*nb_freq*sizeof(*aviz->rdft_data));
+        aviz->nb_frequency = nb_freq;
+        aviz->screen_height = new_height;
+    }
+
+    data_used = aviz->viz_type == AVIZ_TIME_DOMAIN ? aviz->screen_width :
+                aviz->viz_type == AVIZ_FREQ_DOMAIN ? (2*aviz->nb_frequency) : 0;
+
+    aviz->screen_width = new_width;
+    aviz->viz_type = new_viz_type;
+
+    /* Client will send delay as zero if audio is paused */
+    if (delay == 0 || new_viz_type == AVIZ_NONE)
+        return;
+
+    delay /= n;
+
+    /* to be more precise, we take into account the time spent since
+       the last buffer computation */
+    if (audio_cb_time) {
+        int time_diff = av_gettime() - audio_cb_time;
+        delay -= (time_diff * aviz->sample_rate) / 1000000;
+    }
+
+    delay += 2*data_used;
+    if (delay < data_used)
+        delay = data_used;
+
+    i_start = x = compute_mod(aviz->sample_array_index - delay * channels, SAMPLE_ARRAY_SIZE);
+
+    if (aviz->viz_type == AVIZ_TIME_DOMAIN) {
+        int h = INT_MIN;
+        for (i = 0; i < 1000; i += channels) {
+            int idx = (SAMPLE_ARRAY_SIZE + x - i) % SAMPLE_ARRAY_SIZE;
+            int a = aviz->sample_array[idx];
+            int b = aviz->sample_array[(idx + 4*channels)%SAMPLE_ARRAY_SIZE];
+            int c = aviz->sample_array[(idx + 5*channels)%SAMPLE_ARRAY_SIZE];
+            int d = aviz->sample_array[(idx + 9*channels)%SAMPLE_ARRAY_SIZE];
+            int score = a-d;
+            if (h < score && (b^c) < 0) {
+                h = score;
+                i_start = idx;
+            }
+        }
+    }
+    aviz->i_start = i_start;
+}
+
+/* The output visualization filter calls this when it needs or is ready to receive new picture data */
+static int request_frame(AVFilterLink *link)
+{
+    AVizContext *aviz = link->src->priv;
+    int delay = 0, audio_cb_time = 0, new_width = 0, new_height = 0, new_viz_type = 0;
+   /**
+    * Here we use a callback provided by app to get parameters required to
+    * configure visualization.
+    * Changed parameters may include audio position reached by playback,
+    * screen dimensions or visualization type.
+    */
+    aviz->app_callback(&delay, &audio_cb_time, &new_width, &new_height, &new_viz_type);
+
+    calc_index_and_config_aviz(link, delay, audio_cb_time, new_width, new_height, new_viz_type);
+
+    switch (aviz->viz_type) {
+    case AVIZ_TIME_DOMAIN:
+        time_domain_visualize(aviz);
+        break;
+    case AVIZ_FREQ_DOMAIN:
+        frequency_domain_visualize(aviz);
+        break;
+    default:
+        fillrect(aviz->vizpic, 0, 0, aviz->screen_width, aviz->screen_height, aviz->bgcolor, aviz->hsub, aviz->vsub);
+        break;
+    }
+
+    avfilter_start_frame(link, avfilter_ref_buffer(aviz->vizpic, ~0));
+    avfilter_draw_slice(link, 0, link->h, 1);
+    avfilter_end_frame(link);
+
+    return 0;
+}
+
+AVFilter avfilter_af_aviz = {
+    .name        = "aviz",
+    .description = NULL_IF_CONFIG_SMALL("Visualize input audio samples and output picture frames"),
+
+    .init      = init,
+    .uninit    = uninit,
+
+    .priv_size = sizeof(AVizContext),
+
+    .inputs    = (AVFilterPad[]) {{ .name             = "default",
+                                    .type             = AVMEDIA_TYPE_AUDIO,
+                                    .filter_samples   = filter_samples,
+                                    .config_props     = input_config_props,
+                                    .min_perms        = AV_PERM_READ, },
+                                  { .name = NULL}},
+    .outputs   = (AVFilterPad[]) {{ .name             = "default",
+                                    .type             = AVMEDIA_TYPE_AUDIO,
+                                    .config_props     = output_aconfig_props, },
+                                  { .name             = "default2",
+                                    .type             = AVMEDIA_TYPE_VIDEO,
+                                    .config_props     = output_vconfig_props,
+                                    .request_frame    = request_frame, },
+                                  { .name = NULL}},
+};

--- a/ffplay.c
+++ b/ffplay.c
@@ -213,6 +213,7 @@
 #if CONFIG_AVFILTER
     AVFilterContext *out_video_filter;          ///<the last filter in the video chain
     AVFilterContext *out_audio_filter;          ///<the last filter in the audio chain
+    AVFilterContext *out_aviz_filter;           ///<the output filter for audio visualization
     AVFilterGraph *agraph;
 #endif
 
@@ -2356,6 +2357,16 @@
     *aviz_type = is->show_audio;
 }
 
+static void avfilter_aviz_callback(VideoState *is, int *delay, int *aud_cb_time,
+                            int *width, int *height, int *aviz_type)  
+{
+    *delay = audio_write_get_buf_size(is);
+    *aud_cb_time = audio_callback_time;
+    *width = is->width;
+    *height = is->height;
+    *aviz_type = is->show_audio;
+}
+
 /* open a given stream. Return 0 if OK */
 static int stream_component_open(VideoState *is, int stream_index)
 {
@@ -2464,8 +2475,24 @@
 
         av_freep(&afilters);
 
-    } else
-        if (avfilter_link(afilt_src, 0, afilt_out, 0) < 0)             goto the_end;
+    } else {
+        // if (avfilter_link(afilt_src, 0, afilt_out, 0) < 0)                goto the_end;
+        char args[255];
+        AVFilterContext *flt_aviz, *flt_oviz;
+        snprintf(args, 255, "640:480:1:%ld", (long int) avfilter_aviz_callback);
+        av_log(NULL, AV_LOG_ERROR, "Getting to filt config start\n");
+        if (!(flt_aviz = avfilter_open(avfilter_get_by_name("aviz"), "aviz"))) goto the_end;
+        if (avfilter_init_filter(flt_aviz, args, is) < 0)              goto the_end;
+        if (!(flt_oviz = avfilter_open(&output_filter, "out")))          goto the_end;
+        if (avfilter_init_filter(flt_oviz, NULL, NULL) < 0)              goto the_end;
+        if (avfilter_link(afilt_src, 0, flt_aviz, 0) < 0)                goto the_end;
+        if (avfilter_link(flt_aviz, 0, afilt_out, 0) < 0)                goto the_end;
+        if (avfilter_link(flt_aviz, 1, flt_oviz, 0) < 0)                 goto the_end;
+        if ((ret = avfilter_graph_add_filter(is->agraph, flt_aviz)) < 0) goto the_end;
+        if ((ret = avfilter_graph_add_filter(is->agraph, flt_oviz)) < 0) goto the_end;
+        is->out_aviz_filter = flt_oviz;
+        av_log(NULL, AV_LOG_ERROR, "Getting to filt config end\n");
+    }
 
     if ((ret = avfilter_graph_add_filter(is->agraph, afilt_src)) < 0)  goto the_end;
     if ((ret = avfilter_graph_add_filter(is->agraph, afilt_out)) < 0)  goto the_end;

_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc

Re: [FFmpeg-soc] [PATCH] Audio visualization filter af_aviz

Reply via email to