Re: [FFmpeg-soc] [PATCH] Audio visualization filter af_aviz

S.N. Hemanth Meenakshisundaram Sat, 07 Aug 2010 19:10:02 -0700

I am testing this right now with ffplay.c changes, this now compiles.
Will post an update once the ffplay changes work.


Made the viz_type an enum as Stefano suggested for further expansion.

Regards,
Hemanth

---
 libavfilter/Makefile     |    1 +
 libavfilter/af_aviz.c    |  436 ++++++++++++++++++++++++++++++++++++++++++++++
 libavfilter/allfilters.c |    1 +
 3 files changed, 438 insertions(+), 0 deletions(-)
 create mode 100644 libavfilter/af_aviz.c

diff --git a/libavfilter/Makefile b/libavfilter/Makefile
index de5be6c..47c59b7 100644
--- a/libavfilter/Makefile
+++ b/libavfilter/Makefile
@@ -14,6 +14,7 @@ OBJS = allfilters.o                                                     \
        graphparser.o                                                    \
        parseutils.o                                                     \
 
+OBJS-$(CONFIG_AVIZ_FILTER)                   += af_aviz.o
 OBJS-$(CONFIG_RESAMPLE_FILTER)               += af_resample.o
 
 OBJS-$(CONFIG_ASPECT_FILTER)                 += vf_aspect.o
diff --git a/libavfilter/af_aviz.c b/libavfilter/af_aviz.c
new file mode 100644
index 0000000..f89b794
--- /dev/null
+++ b/libavfilter/af_aviz.c
@@ -0,0 +1,436 @@
+/*
+ * copyright (c) 2010 S.N. Hemanth Meenakshisundaram <[email protected]>
+ * based on code in libavcodec/aviz.c by Fabrice Bellard
+ * and libavcodec/audioconvert.c by Michael Neidermayer
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * aviz audio filter
+ */
+
+#include "avfilter.h"
+#include "libavcodec/audioconvert.h"
+#include "libavcodec/avfft.h"
+#include "libavcodec/colorspace.h"
+#include "libavcodec/fft.h"
+#include "libavformat/avformat.h"
+#include "libavutil/pixdesc.h"
+
+/* TODO: Add more visualizations */
+enum AVizType {
+    AVIZ_NONE = 0,
+    AVIZ_TIME_DOMAIN,
+    AVIZ_FREQ_DOMAIN
+};
+
+typedef struct {
+    short *sample_array;        ///< array of recent audio samples to be used for visualization
+    int sample_array_index;     ///< index of data being used
+    RDFTContext *rdft;          ///< DFT context for converting data into frequency domain
+    int rdft_bits;              ///< DFT bits
+    FFTSample *rdft_data;       ///< frequency domain data
+    int nb_frequency;           ///< number of frequencies in visualization
+    int nb_channels;            ///< number of channels of audio data
+    int64_t sample_rate;        ///< sample rate of audio
+    int screen_width;           ///< width of screen
+    int screen_height;          ///< height of screen
+    int freqviz_xpos;           ///< stored x position for frequency visualization
+    int hsub, vsub;             ///< chroma subsampling values of required output frames
+    int i_start;                ///< index of audio data at which to start visualizing
+    enum AVizType viz_type;     ///< visualize frequency or time domain data
+    unsigned char fgcolor1[4];  ///< foreground color 1 in YUV
+    unsigned char fgcolor2[4];  ///< foreground color 2 in YUV
+    unsigned char bgcolor[4];   ///< background color in YUV
+    AVFilterBufferRef *vizpic;  ///< buffer that stores the visualized picture data
+    int (*app_callback) (int *, int *, int *, int *, int *);
+                                ///< callback to get delay value from app and update other properties
+} AVizContext;
+
+#define SAMPLE_ARRAY_SIZE (2*65536)
+
+#define RGB_TO_YUV(yuvcolor, red, green, blue) {\
+    yuvcolor[0] = RGB_TO_Y(red, green, blue);\
+    yuvcolor[1] = RGB_TO_U(red, green, blue, 0);\
+    yuvcolor[2] = RGB_TO_V(red, green, blue, 0);\
+    yuvcolor[3] = 0xFF;\
+}\
+
+static av_cold int init(AVFilterContext *ctx, const char *args, void *opaque)
+{
+    AVizContext *aviz = ctx->priv;
+    int rdft_bits = 0, nb_freq = 0;
+
+    aviz->sample_array = av_malloc(SAMPLE_ARRAY_SIZE * sizeof(short));
+    if (args){
+        sscanf(args, "%d:%d:%d:%ld", &aviz->screen_width, &aviz->screen_height,
+               (int *)&aviz->viz_type, (long int *)&aviz->app_callback);
+    }
+
+    for (rdft_bits = 1; (1<<rdft_bits) < 2*aviz->screen_height; rdft_bits++)
+        ;
+    nb_freq = 1<<(rdft_bits-1);
+
+    aviz->rdft = av_rdft_init(rdft_bits, DFT_R2C);
+    aviz->rdft_bits = rdft_bits;
+    aviz->rdft_data = av_malloc(4*nb_freq*sizeof(*aviz->rdft_data));
+    aviz->nb_frequency = nb_freq;
+
+    /* TODO: Add error checking and configure callback function if there is going to be one */
+
+    RGB_TO_YUV(aviz->bgcolor, 0, 0, 0);
+    RGB_TO_YUV(aviz->fgcolor1, 0xFF, 0xFF, 0xFF);
+    RGB_TO_YUV(aviz->fgcolor2, 0x00, 0x00, 0xFF);
+
+    return 0;
+}
+
+static av_cold void uninit(AVFilterContext *ctx)
+{
+    AVizContext *aviz = ctx->priv;
+    av_free(aviz->sample_array);
+    av_rdft_end(aviz->rdft);
+    av_free(aviz->rdft_data);
+    if (aviz->vizpic)
+        avfilter_unref_buffer(aviz->vizpic);
+}
+
+static int input_config_props(AVFilterLink *link)
+{
+    AVFilterContext *ctx = link->dst;
+    AVizContext *aviz = ctx->priv;
+
+    aviz->nb_channels = avcodec_channel_layout_num_channels(link->channel_layout);
+    aviz->sample_rate = link->sample_rate;
+
+    /* We expect framework to insert appropriate resample filter when using the aviz filter */
+    if (link->format != SAMPLE_FMT_S16) {
+        av_log(ctx, AV_LOG_ERROR, "Input samples must be in S16 format\n");
+        return AVERROR(EINVAL);
+    }
+
+    return 0;
+}
+
+static int output_config_props(AVFilterLink *link)
+{
+    AVFilterContext *ctx = link->dst;
+    AVizContext *aviz = ctx->priv;
+
+    /**
+     * Store chroma subsampling values so we can generate visualization frames
+     * in the required format.
+     */
+    const AVPixFmtDescriptor *pix_desc = &av_pix_fmt_descriptors[link->format];
+    aviz->hsub = pix_desc->log2_chroma_w;
+    aviz->vsub = pix_desc->log2_chroma_h;
+
+    return 0;
+}
+
+static void filter_samples(AVFilterLink *link, AVFilterBufferRef *samplesref)
+{
+    AVizContext *aviz = link->dst->priv;
+    AVFilterLink *outlink = link->dst->outputs[0];
+    AVFilterBufferRefAudioProps *sample_props;
+    int size, len;
+    uint8_t *audio_data = samplesref->data[0];
+    AVFILTER_GET_BUFFER_REF_AUDIO_PROPS(sample_props, samplesref);
+
+    /* We update this since this frame may have a different number of channels */
+    aviz->nb_channels = avcodec_channel_layout_num_channels(sample_props->channel_layout);
+    aviz->sample_rate = sample_props->sample_rate;
+
+    size = sample_props->size / sizeof(short);
+    while (size > 0) {
+        len = SAMPLE_ARRAY_SIZE - aviz->sample_array_index;
+        if (len > size)
+            len = size;
+        /* We definitely need a copy of data since we keep old audio data around for visualization */
+        memcpy(aviz->sample_array + aviz->sample_array_index, audio_data, len * sizeof(short));
+        audio_data += len;
+        aviz->sample_array_index += len;
+        if (aviz->sample_array_index >= SAMPLE_ARRAY_SIZE)
+            aviz->sample_array_index = 0;
+        size -= len;
+    }
+    /*
+     * We no longer need the reference, pass it on to the next filter
+     * FIXME: Remove this once a splitter is in place and aviz is outside the main
+     * audio filter chain.
+     */
+    avfilter_unref_buffer(samplesref);
+}
+
+#define SET_PIXEL(picref, yuv_color, x, y, hsub, vsub) { \
+    luma_pos    = ((x)          ) + ((y)          ) * picref->linesize[0]; \
+    chroma_pos1 = ((x) >> (hsub)) + ((y) >> (vsub)) * picref->linesize[1]; \
+    chroma_pos2 = ((x) >> (hsub)) + ((y) >> (vsub)) * picref->linesize[2]; \
+    picref->data[0][luma_pos   ] = (yuv_color[3] * yuv_color[0] + (255 - yuv_color[3]) * picref->data[0][luma_pos   ]) >> 8; \
+    picref->data[1][chroma_pos1] = (yuv_color[3] * yuv_color[1] + (255 - yuv_color[3]) * picref->data[1][chroma_pos1]) >> 8; \
+    picref->data[2][chroma_pos2] = (yuv_color[3] * yuv_color[2] + (255 - yuv_color[3]) * picref->data[2][chroma_pos2]) >> 8; \
+}
+
+static inline void fillrect(AVFilterBufferRef *picref, unsigned int x, unsigned int y,
+                            unsigned int width, unsigned int height,
+                            unsigned char yuv_color[4], int hsub, int vsub)
+{
+    int i, plane;
+    uint8_t *p;
+
+    if (yuv_color[3] != 0xFF) {
+        unsigned int j, luma_pos, chroma_pos1, chroma_pos2;
+
+        for (j = 0; j < height; j++)
+            for (i = 0; i < width; i++)
+                SET_PIXEL(picref, yuv_color, (i+x), (y+j), hsub, vsub);
+
+    } else {
+        for (plane = 0; plane < 3 && picref->data[plane]; plane++) {
+            int hsub1 = plane == 1 || plane == 2 ? hsub : 0;
+            int vsub1 = plane == 1 || plane == 2 ? vsub : 0;
+
+            p = picref->data[plane] + (y >> vsub1) * picref->linesize[plane] + (x >> hsub1);
+            for (i = 0; i < (height >> vsub1); i++) {
+                memset(p, yuv_color[plane], (width >> hsub1));
+                p += picref->linesize[plane];
+            }
+        }
+    }
+}
+
+/* More visualization routines can be added here in the future */
+
+static void time_domain_visualize(AVizContext *aviz)
+{
+    AVFilterBufferRef *pic = aviz->vizpic;
+    int i, h, h2, ch, y1, x, y, ys;
+    int width = aviz->screen_width, height = aviz->screen_height;
+    int nb_channels = aviz->nb_channels;
+    uint8_t *fgcolor1 = aviz->fgcolor1, *fgcolor2 = aviz->fgcolor2;
+    int hsub = aviz->hsub, vsub = aviz->vsub;
+    short *sample_array = aviz->sample_array;
+
+    fillrect(pic, 0, 0, width, height, aviz->bgcolor, hsub, vsub);
+
+    /* total height for one channel */
+    h = height / nb_channels;
+    /* graph height / 2 */
+    h2 = (h * 9) / 20;
+    for (ch = 0; ch < nb_channels; ch++) {
+        i = aviz->i_start + ch;
+        y1 = ch*h + (h/2); /* position of center line */
+        for(x = 0; x < width; x++) {
+            y = (sample_array[i] * h2) >> 15;
+            if (y < 0) {
+                y = -y;
+                ys = y1 - y;
+            } else {
+                ys = y1;
+            }
+            fillrect(pic, 0+x, ys, 1, y, fgcolor1, hsub, vsub);
+            i += nb_channels;
+            if (i >= SAMPLE_ARRAY_SIZE)
+                i -= SAMPLE_ARRAY_SIZE;
+        }
+    }
+
+    for (ch = 1; ch < nb_channels; ch++) {
+        y = ch * h;
+        fillrect(pic, 0, y, width, 1, fgcolor2, hsub, vsub);
+    }
+}
+
+static void frequency_domain_visualize(AVizContext *aviz)
+{
+    AVFilterBufferRef *pic = aviz->vizpic;
+    int i, ch, x, y;
+    uint8_t fgcolor[4];
+    double w;
+    FFTSample *data[2];
+    int width = aviz->screen_width, height = aviz->screen_height;
+    int nb_channels = aviz->nb_channels;
+    int nb_display_channels = FFMIN(nb_channels, 2);
+    int nb_freq = aviz->nb_frequency;
+    int hsub = aviz->hsub, vsub = aviz->vsub;
+    short *sample_array = aviz->sample_array;
+
+    fillrect(pic, 0, 0, width, height, aviz->bgcolor, hsub, vsub);
+    for (ch = 0; ch < nb_display_channels; ch++) {
+        data[ch] = aviz->rdft_data + 2*nb_freq*ch;
+        i = aviz->i_start + ch;
+        for (x = 0; x < 2*nb_freq; x++) {
+            w = (x-nb_freq)*(1.0/nb_freq);
+            data[ch][x]= sample_array[i]*(1.0-w*w);
+            i += nb_channels;
+            if (i >= SAMPLE_ARRAY_SIZE)
+                i -= SAMPLE_ARRAY_SIZE;
+        }
+        av_rdft_calc(aviz->rdft, data[ch]);
+    }
+    w = 1/sqrt(nb_freq);
+    //least efficient way to do this, we should of course directly access it but its more than fast enough
+    for (y = 0; y < height; y++) {
+        int a = sqrt(w*sqrt(data[0][2*y+0]*data[0][2*y+0] + data[0][2*y+1]*data[0][2*y+1]));
+        int b = (nb_display_channels == 2 ) ? sqrt(w*sqrt(data[1][2*y+0]*data[1][2*y+0]
+                + data[1][2*y+1]*data[1][2*y+1])) : a;
+        a = FFMIN(a,255);
+        b = FFMIN(b,255);
+        RGB_TO_YUV(fgcolor, a, b, (a+b)/2);
+
+        fillrect(pic, 0, height-y, 1, 1, fgcolor, hsub, vsub);
+    }
+    aviz->freqviz_xpos++;
+    if (aviz->freqviz_xpos >= width)
+        aviz->freqviz_xpos = 0;
+}
+
+static inline int compute_mod(int a, int b)
+{
+    a = a % b;
+    if (a >= 0)
+        return a;
+    else
+        return a + b;
+}
+
+/*
+ * This calculates the index at which to start visualizing and reconfigures DFT if
+ * other parameters like screen height have changed.
+ */
+static void calc_index_and_config_aviz(AVizContext *aviz, int delay, int audio_cb_time,
+                                       int new_width, int new_height, int new_viz_type)
+{
+    int i, i_start, x;
+    int data_used;
+    int channels = aviz->nb_channels;
+    int n = 2*channels;
+
+    if (new_height != aviz->screen_height) {
+        int rdft_bits, nb_freq;
+
+        for (rdft_bits = 1; (1<<rdft_bits) < 2*aviz->screen_height; rdft_bits++)
+            ;
+        nb_freq = 1<<(rdft_bits-1);
+
+        aviz->rdft = av_rdft_init(rdft_bits, DFT_R2C);
+        aviz->rdft_bits = rdft_bits;
+        aviz->rdft_data = av_malloc(4*nb_freq*sizeof(*aviz->rdft_data));
+        aviz->nb_frequency = nb_freq;
+        aviz->screen_height = new_height;
+    }
+
+    data_used = aviz->viz_type == AVIZ_TIME_DOMAIN ? aviz->screen_width :
+                aviz->viz_type == AVIZ_FREQ_DOMAIN ? (2*aviz->nb_frequency) : 0;
+
+    aviz->screen_width = new_width;
+    aviz->viz_type = new_viz_type;
+
+    /* Client will send delay as zero if audio is paused */
+    if (delay == 0 || new_viz_type == AVIZ_NONE)
+        return;
+
+    delay /= n;
+
+    /* to be more precise, we take into account the time spent since
+       the last buffer computation */
+    if (audio_cb_time) {
+        int time_diff = av_gettime() - audio_cb_time;
+        delay -= (time_diff * aviz->sample_rate) / 1000000;
+    }
+
+    delay += 2*data_used;
+    if (delay < data_used)
+        delay = data_used;
+
+    i_start = x = compute_mod(aviz->sample_array_index - delay * channels, SAMPLE_ARRAY_SIZE);
+
+    if (aviz->viz_type == AVIZ_TIME_DOMAIN) {
+        int h = INT_MIN;
+        for (i = 0; i < 1000; i += channels) {
+            int idx = (SAMPLE_ARRAY_SIZE + x - i) % SAMPLE_ARRAY_SIZE;
+            int a = aviz->sample_array[idx];
+            int b = aviz->sample_array[(idx + 4*channels)%SAMPLE_ARRAY_SIZE];
+            int c = aviz->sample_array[(idx + 5*channels)%SAMPLE_ARRAY_SIZE];
+            int d = aviz->sample_array[(idx + 9*channels)%SAMPLE_ARRAY_SIZE];
+            int score = a-d;
+            if (h < score && (b^c) < 0) {
+                h = score;
+                i_start = idx;
+            }
+        }
+    }
+    aviz->i_start = i_start;
+}
+
+/* The output visualization filter calls this when it needs or is ready to receive new picture data */
+static int request_frame(AVFilterLink *link)
+{
+    AVizContext *aviz = link->dst->priv;
+    int delay = 0, audio_cb_time = 0, new_width = 0, new_height = 0, new_viz_type = 0;
+   /**
+    * Here we use a callback provided by app to get parameters required to
+    * configure visualization.
+    * Changed parameters may include audio position reached by playback,
+    * screen dimensions or visualization type.
+    */
+    aviz->app_callback(&delay, &audio_cb_time, &new_width, &new_height, &new_viz_type);
+
+    calc_index_and_config_aviz(aviz, delay, audio_cb_time, new_width, new_height, new_viz_type);
+
+    switch (aviz->viz_type) {
+    case AVIZ_TIME_DOMAIN:
+        time_domain_visualize(aviz);
+        break;
+    case AVIZ_FREQ_DOMAIN:
+        frequency_domain_visualize(aviz);
+        break;
+    default:
+        fillrect(aviz->vizpic, 0, 0, aviz->screen_width, aviz->screen_height, aviz->bgcolor, aviz->hsub, aviz->vsub);
+        break;
+    }
+
+    avfilter_start_frame(link, avfilter_ref_buffer(aviz->vizpic, ~0));
+    avfilter_draw_slice(link, 0, link->h, 1);
+    avfilter_end_frame(link);
+
+    return 0;
+}
+
+AVFilter avfilter_af_aviz = {
+    .name        = "aviz",
+    .description = NULL_IF_CONFIG_SMALL("Visualize input audio samples and output picture frames"),
+
+    .init      = init,
+    .uninit    = uninit,
+
+    .priv_size = sizeof(AVizContext),
+
+    .inputs    = (AVFilterPad[]) {{ .name             = "default",
+                                    .type             = AVMEDIA_TYPE_AUDIO,
+                                    .filter_samples   = filter_samples,
+                                    .config_props     = input_config_props,
+                                    .min_perms        = AV_PERM_READ, },
+                                  { .name = NULL}},
+    .outputs   = (AVFilterPad[]) {{ .name             = "default",
+                                    .type             = AVMEDIA_TYPE_VIDEO,
+                                    .config_props     = output_config_props,
+                                    .request_frame    = request_frame, },
+                                  { .name = NULL}},
+};
diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
index ffd95db..7c947cc 100644
--- a/libavfilter/allfilters.c
+++ b/libavfilter/allfilters.c
@@ -34,6 +34,7 @@ void avfilter_register_all(void)
         return;
     initialized = 1;
 
+    REGISTER_FILTER (AVIZ,        aviz,        af);
     REGISTER_FILTER (RESAMPLE,    resample,    af);
 
     REGISTER_FILTER (ASPECT,      aspect,      vf);

_______________________________________________
FFmpeg-soc mailing list
[email protected]
https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-soc

Re: [FFmpeg-soc] [PATCH] Audio visualization filter af_aviz

Reply via email to