Implemented an SNR for audio files which takes into a account the psychoacoustic masking. This results in an SNR which is closer to how humans percieve sound, compared to tiny_psnr which directly compares audio signals
Signed-off-by: Senjuti Kundu <senjutikund...@gmail.com> --- tests/psy_snr.c | 421 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 421 insertions(+) create mode 100644 tests/psy_snr.c diff --git a/tests/psy_snr.c b/tests/psy_snr.c new file mode 100644 index 0000000..94041ed --- /dev/null +++ b/tests/psy_snr.c @@ -0,0 +1,421 @@ +/* + * Copyright (c) 2003 Michael Niedermayer <michae...@gmx.at> + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * Psy-SNR - Psychoacoustic SNR for audio files. + * Author - Senjuti Kundu <senjutikund...@gmail.com> + * Input format - tiny_psysnr <file1> <file2> + * [<elem size>|u8|s16|f32|f64 [<shift> [<skip bytes> [<shift search range>]]]] + * WAV headers are skipped automatically. + * SIZE can be changed to adjust window size as need be. + * compile using gcc psysnr.c $(pkg-config --cflags --libs libavformat libavcodec) + * -I /usr/local/include/libavcodec/ + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include <math.h> +#include <float.h> +#include <limits.h> + +#include "libavutil/intfloat.h" +#include "libavutil/intreadwrite.h" +#include "libavcodec/avfft.h" +#include "libavutil/mem.h" + +#define FFMIN(a, b) ((a) > (b) ? (b) : (a)) +#define FFMAX(a, b) ((a) > (b) ? (a) : (b)) +#define F 100 +//size should be close to 20k +#define SIZE 1024 +#define db_fw_rollof 4 +#define db_bw_rollof 4 +#define db_attenuation 0.1 + +uint64_t exp16_table[21] = { + 65537, + 65538, + 65540, + 65544, + 65552, + 65568, + 65600, + 65664, + 65793, + 66050, + 66568, + 67616, + 69763, + 74262, + 84150, + 108051, + 178145, + 484249, + 3578144, + 195360063, + 582360139072LL, +}; + +#if 0 +// 16.16 fixpoint exp() +static unsigned int exp16(unsigned int a){ + int i; + int out= 1<<16; + + for(i=19;i>=0;i--){ + if(a&(1<<i)) + out= (out*exp16_table[i] + (1<<15))>>16; + } + + return out; +} +#endif + +// 16.16 fixpoint log() +static int64_t log16(uint64_t a) +{ + int i; + int out = 0; + + if (a < 1 << 16) + return -log16((1LL << 32) / a); + a <<= 16; + + for (i = 20; i >= 0; i--) { + int64_t b = exp16_table[i]; + if (a < (b << 16)) + continue; + out |= 1 << i; + a = ((a / b) << 16) + (((a % b) << 16) + b / 2) / b; + } + return out; +} + +static uint64_t int_sqrt(uint64_t a) +{ + uint64_t ret = 0; + uint64_t ret_sq = 0; + int s; + + for (s = 31; s >= 0; s--) { + uint64_t b = ret_sq + (1ULL << (s * 2)) + (ret << s) * 2; + if (b <= a) { + ret_sq = b; + ret += 1ULL << s; + } + } + return ret; +} + +static int16_t get_s16l(uint8_t *p) +{ + union { + uint16_t u; + int16_t s; + } v; + v.u = p[0] | p[1] << 8; + return v.s; +} + +static float get_f32l(uint8_t *p) +{ + union av_intfloat32 v; + v.i = p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; + return v.f; +} + +static double get_f64l(uint8_t *p) +{ + return av_int2double(AV_RL64(p)); +} + +static float* get_mask_array(int tempsize){ + //modelling the mask function as a parabole. Others can be + //explored as need be. y=(-(x-mid)2+c)/c + int i = 0; + float* maskingfunc = malloc(tempsize*sizeof(float)); + maskingfunc[tempsize/2] = exp(-db_attenuation*log(10)); + for (i = (tempsize/2)+1; i<tempsize; i++){ + maskingfunc[i] = maskingfunc[i-1]*exp(-(db_fw_rollof*log(10))/(20*(i-(tempsize/2))));; + } + for(i = (tempsize/2)-1; i >= 0; i--){ + maskingfunc[i] = maskingfunc[i+1]*exp(-(2*db_bw_rollof*log(10))/(20*((tempsize/2)-i))); + } + return maskingfunc; +} + +static float* get_mask(FFTComplex* a, int tempsize, float* maskingfunc){ + int i = 0; + int j = 0; + float* mask = malloc(tempsize*sizeof(float)); + float self = 0; + float next = 0; + float prev = 0; + + for (i = 1; i<tempsize; i++){ + self = maskingfunc[tempsize/2]*sqrt((a[i].re*a[i].re)+(a[i].im*a[i].im)); + prev = maskingfunc[(tempsize/2)-1]*sqrt((a[i-1].re*a[i-1].re)+(a[i-1].im*a[i-1].im)); + mask[i] = FFMAX(self,prev); + } + for (i = tempsize-1; i>=0; i--){ + self = maskingfunc[tempsize/2]*sqrt((a[i].re*a[i].re)+(a[i].im*a[i].im)); + next = maskingfunc[tempsize/2+1]*sqrt((a[i+1].re*a[i+1].re)+(a[i+1].im*a[i+1].im)); + mask[i] = FFMAX(self,next); + } + return mask; +} + +static double get_psy_sse(FFTComplex* a,FFTComplex* b, float* mask, int tempsize){ + int i = 0; + double sse = 0; + for (i = 0; i<tempsize; i++){ + sse += (sqrt(((a[i].re-b[i].re)*(a[i].re-b[i].re))+ + ((a[i].im-b[i].im)*(a[i].im-b[i].im))))/(mask[i]+1); + } + return sse; +} + +static int run_psnr(FILE *f[2], int len, int shift, int skip_bytes) +{ + int i, j; + uint64_t sse = 0; + double sse_d = 0.0; + uint8_t buf[2][SIZE]; + int64_t max = (1LL << (8 * len)) - 1; + int size0 = 0; + int size1 = 0; + uint64_t maxdist = 0; + double maxdist_d = 0.0; + int noseek; + + + noseek = fseek(f[0], 0, SEEK_SET) || + fseek(f[1], 0, SEEK_SET); + + if (!noseek) { + for (i = 0; i < 2; i++) { + uint8_t *p = buf[i]; + if (fread(p, 1, 12, f[i]) != 12) + return 1; + if (!memcmp(p, "RIFF", 4) && + !memcmp(p + 8, "WAVE", 4)) { + if (fread(p, 1, 8, f[i]) != 8) + return 1; + while (memcmp(p, "data", 4)) { + int s = p[4] | p[5] << 8 | p[6] << 16 | p[7] << 24; + fseek(f[i], s, SEEK_CUR); + if (fread(p, 1, 8, f[i]) != 8) + return 1; + } + } else { + fseek(f[i], -12, SEEK_CUR); + } + } + + fseek(f[shift < 0], abs(shift), SEEK_CUR); + + fseek(f[0], skip_bytes, SEEK_CUR); + fseek(f[1], skip_bytes, SEEK_CUR); + } + + fflush(stdout); + for (;;) { + int s0 = fread(buf[0], 1, SIZE, f[0]); + int s1 = fread(buf[1], 1, SIZE, f[1]); + int tempsize = FFMIN(s0,s1); + DECLARE_ALIGNED(32, FFTComplex, fftcomplexa)[SIZE/len]; + DECLARE_ALIGNED(32, FFTComplex, fftcomplexb)[SIZE/len]; + + for (j = 0; j < tempsize; j += len) { + switch (len) { + case 1: + case 2: { + int64_t a = buf[0][j]; + int64_t b = buf[1][j]; + int dist; + if (len == 2) { + fftcomplexa[j/len].re = get_s16l(buf[0] + j); + fftcomplexb[j/len].re = get_s16l(buf[1] + j); + fftcomplexa[j/len].im = 0; + fftcomplexb[j/len].im = 0; + } else { + fftcomplexa[j/len].re = buf[0][j]; + fftcomplexb[j/len].re = buf[1][j]; + fftcomplexa[j/len].im = 0; + fftcomplexb[j/len].im = 0; + } + dist = abs(fftcomplexa[j/len].re-fftcomplexb[j/len].re); + if (dist > maxdist) + maxdist = dist; + break; + break; + } + case 4: + case 8: { + double dist, a, b; + if (len == 8) { + fftcomplexa[j/len].re = (float) get_f64l(buf[0] + j); + fftcomplexb[j/len].re = (float) get_f64l(buf[1] + j); + fftcomplexa[j/len].im = 0; + fftcomplexb[j/len].im = 0; + } else { + fftcomplexa[j/len].re = (float) get_f32l(buf[0] + j); + fftcomplexb[j/len].re = (float) get_f32l(buf[1] + j); + fftcomplexa[j/len].im = 0; + fftcomplexb[j/len].im = 0; + } + dist = abs(fftcomplexa[j/len].re-fftcomplexb[j/len].re); + if (dist > maxdist_d) + maxdist_d = dist; + break; + } + } + } + + for(;j<SIZE;j+=len){ + fftcomplexa[j/len].re = 0; + fftcomplexb[j/len].re = 0; + fftcomplexa[j/len].im = 0; + fftcomplexb[j/len].im = 0; + } + + size0 += s0; + size1 += s1; + if (s0 + s1 <= 0) + break; + + FFTContext* fftcontexta = av_fft_init(floor(log2(SIZE/len)),0); + av_fft_permute (fftcontexta, fftcomplexa); + int temp = 0; + av_fft_calc (fftcontexta, fftcomplexa); + FFTContext* fftcontextb = av_fft_init(floor(log2(SIZE/len)),0); + av_fft_permute (fftcontextb, fftcomplexb); + av_fft_calc (fftcontextb, fftcomplexb); + + float* maskingfunc = get_mask_array(SIZE/len); + float* mask = get_mask(fftcomplexa, SIZE/len, maskingfunc); + double psysse = get_psy_sse(fftcomplexa,fftcomplexb, mask, SIZE/len); + free(maskingfunc); + free(mask); + sse+=psysse; + sse_d+=psysse; + } + + fflush(stdout); + i = FFMIN(size0, size1) / len; + if (!i) + i = 1; + + switch (len) { + case 1: + case 2: { + uint64_t psnr; + uint64_t dev = int_sqrt(((sse / i) * F * F) + (((sse % i) * F * F) + i / 2) / i); + if (sse) + psnr = ((2 * log16(max << 16) + log16(i) - log16(sse)) * + 284619LL * F + (1LL << 31)) / (1LL << 32); + else + psnr = 1000 * F - 1; // floating point free infinity :) + + printf("stddev:%5d.%02d PSYSNR:%3d.%02d MAXDIFF:%5"PRIu64" bytes:%9d/%9d\n", + (int)(dev / F), (int)(dev % F), + (int)(psnr / F), (int)(psnr % F), + maxdist, size0, size1); + return psnr; + } + case 4: + case 8: { + char psnr_str[64]; + double psnr = INT_MAX; + double dev = sqrt(sse_d / i); + uint64_t scale = (len == 4) ? (1ULL << 24) : (1ULL << 32); + + if (sse_d) { + psnr = 2 * log(DBL_MAX) - log(i / sse_d); + snprintf(psnr_str, sizeof(psnr_str), "%5.02f", psnr); + } else + snprintf(psnr_str, sizeof(psnr_str), "inf"); + + maxdist = maxdist_d * scale; + + printf("stddev:%10.2f PSYSNR:%s MAXDIFF:%10"PRIu64" bytes:%9d/%9d\n", + dev * scale, psnr_str, maxdist, size0, size1); + return psnr; + } + } + return -1; +} + +int main(int argc, char *argv[]) +{ + FILE *f[2]; + int len = 1; + int shift_first= argc < 5 ? 0 : atoi(argv[4]); + int skip_bytes = argc < 6 ? 0 : atoi(argv[5]); + int shift_last = shift_first + (argc < 7 ? 0 : atoi(argv[6])); + int shift; + int max_psnr = -1; + int max_psnr_shift = 0; + + if (argc > 3) { + if (!strcmp(argv[3], "u8")) { + len = 1; + } else if (!strcmp(argv[3], "s16")) { + len = 2; + } else if (!strcmp(argv[3], "f32")) { + len = 4; + } else if (!strcmp(argv[3], "f64")) { + len = 8; + } else { + char *end; + len = strtol(argv[3], &end, 0); + if (*end || len < 1 || len > 2) { + fprintf(stderr, "Unsupported sample format: %s\nSupported: u8, s16, f32, f64\n", argv[3]); + return 1; + } + } + } + + if (argc < 3) { + printf("tiny_psysnr <file1> <file2> [<elem size>|u8|s16|f32|f64 [<shift> [<skip bytes> [<shift search range>]]]]\n"); + printf("WAV headers are skipped automatically.\n"); + return 1; + } + + f[0] = fopen(argv[1], "rb"); + f[1] = fopen(argv[2], "rb"); + if (!f[0] || !f[1]) { + fprintf(stderr, "Could not open input files.\n"); + return 1; + } + + for (shift = shift_first; shift <= shift_last; shift++) { + int psnr = run_psnr(f, len, shift, skip_bytes); + if (psnr > max_psnr || (shift < 0 && psnr == max_psnr)) { + max_psnr = psnr; + max_psnr_shift = shift; + } + } + if (shift_last > shift_first) + printf("Best PSNR is %3d.%02d for shift %i\n", (int)(max_psnr / F), (int)(max_psnr % F), max_psnr_shift); + return 0; +} -- 1.9.1 _______________________________________________ ffmpeg-devel mailing list ffmpeg-devel@ffmpeg.org http://ffmpeg.org/mailman/listinfo/ffmpeg-devel