[FFmpeg-devel] [PATCH] psy_snr:Psychoacoustic SNR for audio files
Senjuti Kundu
senjutikundu93 at gmail.com
Thu Oct 30 19:03:51 CET 2014
Implemented an SNR for audio files which takes into a account the psychoacoustic masking. This results in an SNR which is closer to how humans percieve sound, compared to tiny_psnr which directly compares audio signals
Signed-off-by: Senjuti Kundu <senjutikundu93 at gmail.com>
---
tests/psy_snr.c | 421 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 421 insertions(+)
create mode 100644 tests/psy_snr.c
diff --git a/tests/psy_snr.c b/tests/psy_snr.c
new file mode 100644
index 0000000..94041ed
--- /dev/null
+++ b/tests/psy_snr.c
@@ -0,0 +1,421 @@
+/*
+ * Copyright (c) 2003 Michael Niedermayer <michaelni at gmx.at>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/*
+ * Psy-SNR - Psychoacoustic SNR for audio files.
+ * Author - Senjuti Kundu <senjutikundu93 at gmail.com>
+ * Input format - tiny_psysnr <file1> <file2>
+ * [<elem size>|u8|s16|f32|f64 [<shift> [<skip bytes> [<shift search range>]]]]
+ * WAV headers are skipped automatically.
+ * SIZE can be changed to adjust window size as need be.
+ * compile using gcc psysnr.c $(pkg-config --cflags --libs libavformat libavcodec)
+ * -I /usr/local/include/libavcodec/
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <math.h>
+#include <float.h>
+#include <limits.h>
+
+#include "libavutil/intfloat.h"
+#include "libavutil/intreadwrite.h"
+#include "libavcodec/avfft.h"
+#include "libavutil/mem.h"
+
+#define FFMIN(a, b) ((a) > (b) ? (b) : (a))
+#define FFMAX(a, b) ((a) > (b) ? (a) : (b))
+#define F 100
+//size should be close to 20k
+#define SIZE 1024
+#define db_fw_rollof 4
+#define db_bw_rollof 4
+#define db_attenuation 0.1
+
+uint64_t exp16_table[21] = {
+ 65537,
+ 65538,
+ 65540,
+ 65544,
+ 65552,
+ 65568,
+ 65600,
+ 65664,
+ 65793,
+ 66050,
+ 66568,
+ 67616,
+ 69763,
+ 74262,
+ 84150,
+ 108051,
+ 178145,
+ 484249,
+ 3578144,
+ 195360063,
+ 582360139072LL,
+};
+
+#if 0
+// 16.16 fixpoint exp()
+static unsigned int exp16(unsigned int a){
+ int i;
+ int out= 1<<16;
+
+ for(i=19;i>=0;i--){
+ if(a&(1<<i))
+ out= (out*exp16_table[i] + (1<<15))>>16;
+ }
+
+ return out;
+}
+#endif
+
+// 16.16 fixpoint log()
+static int64_t log16(uint64_t a)
+{
+ int i;
+ int out = 0;
+
+ if (a < 1 << 16)
+ return -log16((1LL << 32) / a);
+ a <<= 16;
+
+ for (i = 20; i >= 0; i--) {
+ int64_t b = exp16_table[i];
+ if (a < (b << 16))
+ continue;
+ out |= 1 << i;
+ a = ((a / b) << 16) + (((a % b) << 16) + b / 2) / b;
+ }
+ return out;
+}
+
+static uint64_t int_sqrt(uint64_t a)
+{
+ uint64_t ret = 0;
+ uint64_t ret_sq = 0;
+ int s;
+
+ for (s = 31; s >= 0; s--) {
+ uint64_t b = ret_sq + (1ULL << (s * 2)) + (ret << s) * 2;
+ if (b <= a) {
+ ret_sq = b;
+ ret += 1ULL << s;
+ }
+ }
+ return ret;
+}
+
+static int16_t get_s16l(uint8_t *p)
+{
+ union {
+ uint16_t u;
+ int16_t s;
+ } v;
+ v.u = p[0] | p[1] << 8;
+ return v.s;
+}
+
+static float get_f32l(uint8_t *p)
+{
+ union av_intfloat32 v;
+ v.i = p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
+ return v.f;
+}
+
+static double get_f64l(uint8_t *p)
+{
+ return av_int2double(AV_RL64(p));
+}
+
+static float* get_mask_array(int tempsize){
+ //modelling the mask function as a parabole. Others can be
+ //explored as need be. y=(-(x-mid)2+c)/c
+ int i = 0;
+ float* maskingfunc = malloc(tempsize*sizeof(float));
+ maskingfunc[tempsize/2] = exp(-db_attenuation*log(10));
+ for (i = (tempsize/2)+1; i<tempsize; i++){
+ maskingfunc[i] = maskingfunc[i-1]*exp(-(db_fw_rollof*log(10))/(20*(i-(tempsize/2))));;
+ }
+ for(i = (tempsize/2)-1; i >= 0; i--){
+ maskingfunc[i] = maskingfunc[i+1]*exp(-(2*db_bw_rollof*log(10))/(20*((tempsize/2)-i)));
+ }
+ return maskingfunc;
+}
+
+static float* get_mask(FFTComplex* a, int tempsize, float* maskingfunc){
+ int i = 0;
+ int j = 0;
+ float* mask = malloc(tempsize*sizeof(float));
+ float self = 0;
+ float next = 0;
+ float prev = 0;
+
+ for (i = 1; i<tempsize; i++){
+ self = maskingfunc[tempsize/2]*sqrt((a[i].re*a[i].re)+(a[i].im*a[i].im));
+ prev = maskingfunc[(tempsize/2)-1]*sqrt((a[i-1].re*a[i-1].re)+(a[i-1].im*a[i-1].im));
+ mask[i] = FFMAX(self,prev);
+ }
+ for (i = tempsize-1; i>=0; i--){
+ self = maskingfunc[tempsize/2]*sqrt((a[i].re*a[i].re)+(a[i].im*a[i].im));
+ next = maskingfunc[tempsize/2+1]*sqrt((a[i+1].re*a[i+1].re)+(a[i+1].im*a[i+1].im));
+ mask[i] = FFMAX(self,next);
+ }
+ return mask;
+}
+
+static double get_psy_sse(FFTComplex* a,FFTComplex* b, float* mask, int tempsize){
+ int i = 0;
+ double sse = 0;
+ for (i = 0; i<tempsize; i++){
+ sse += (sqrt(((a[i].re-b[i].re)*(a[i].re-b[i].re))+
+ ((a[i].im-b[i].im)*(a[i].im-b[i].im))))/(mask[i]+1);
+ }
+ return sse;
+}
+
+static int run_psnr(FILE *f[2], int len, int shift, int skip_bytes)
+{
+ int i, j;
+ uint64_t sse = 0;
+ double sse_d = 0.0;
+ uint8_t buf[2][SIZE];
+ int64_t max = (1LL << (8 * len)) - 1;
+ int size0 = 0;
+ int size1 = 0;
+ uint64_t maxdist = 0;
+ double maxdist_d = 0.0;
+ int noseek;
+
+
+ noseek = fseek(f[0], 0, SEEK_SET) ||
+ fseek(f[1], 0, SEEK_SET);
+
+ if (!noseek) {
+ for (i = 0; i < 2; i++) {
+ uint8_t *p = buf[i];
+ if (fread(p, 1, 12, f[i]) != 12)
+ return 1;
+ if (!memcmp(p, "RIFF", 4) &&
+ !memcmp(p + 8, "WAVE", 4)) {
+ if (fread(p, 1, 8, f[i]) != 8)
+ return 1;
+ while (memcmp(p, "data", 4)) {
+ int s = p[4] | p[5] << 8 | p[6] << 16 | p[7] << 24;
+ fseek(f[i], s, SEEK_CUR);
+ if (fread(p, 1, 8, f[i]) != 8)
+ return 1;
+ }
+ } else {
+ fseek(f[i], -12, SEEK_CUR);
+ }
+ }
+
+ fseek(f[shift < 0], abs(shift), SEEK_CUR);
+
+ fseek(f[0], skip_bytes, SEEK_CUR);
+ fseek(f[1], skip_bytes, SEEK_CUR);
+ }
+
+ fflush(stdout);
+ for (;;) {
+ int s0 = fread(buf[0], 1, SIZE, f[0]);
+ int s1 = fread(buf[1], 1, SIZE, f[1]);
+ int tempsize = FFMIN(s0,s1);
+ DECLARE_ALIGNED(32, FFTComplex, fftcomplexa)[SIZE/len];
+ DECLARE_ALIGNED(32, FFTComplex, fftcomplexb)[SIZE/len];
+
+ for (j = 0; j < tempsize; j += len) {
+ switch (len) {
+ case 1:
+ case 2: {
+ int64_t a = buf[0][j];
+ int64_t b = buf[1][j];
+ int dist;
+ if (len == 2) {
+ fftcomplexa[j/len].re = get_s16l(buf[0] + j);
+ fftcomplexb[j/len].re = get_s16l(buf[1] + j);
+ fftcomplexa[j/len].im = 0;
+ fftcomplexb[j/len].im = 0;
+ } else {
+ fftcomplexa[j/len].re = buf[0][j];
+ fftcomplexb[j/len].re = buf[1][j];
+ fftcomplexa[j/len].im = 0;
+ fftcomplexb[j/len].im = 0;
+ }
+ dist = abs(fftcomplexa[j/len].re-fftcomplexb[j/len].re);
+ if (dist > maxdist)
+ maxdist = dist;
+ break;
+ break;
+ }
+ case 4:
+ case 8: {
+ double dist, a, b;
+ if (len == 8) {
+ fftcomplexa[j/len].re = (float) get_f64l(buf[0] + j);
+ fftcomplexb[j/len].re = (float) get_f64l(buf[1] + j);
+ fftcomplexa[j/len].im = 0;
+ fftcomplexb[j/len].im = 0;
+ } else {
+ fftcomplexa[j/len].re = (float) get_f32l(buf[0] + j);
+ fftcomplexb[j/len].re = (float) get_f32l(buf[1] + j);
+ fftcomplexa[j/len].im = 0;
+ fftcomplexb[j/len].im = 0;
+ }
+ dist = abs(fftcomplexa[j/len].re-fftcomplexb[j/len].re);
+ if (dist > maxdist_d)
+ maxdist_d = dist;
+ break;
+ }
+ }
+ }
+
+ for(;j<SIZE;j+=len){
+ fftcomplexa[j/len].re = 0;
+ fftcomplexb[j/len].re = 0;
+ fftcomplexa[j/len].im = 0;
+ fftcomplexb[j/len].im = 0;
+ }
+
+ size0 += s0;
+ size1 += s1;
+ if (s0 + s1 <= 0)
+ break;
+
+ FFTContext* fftcontexta = av_fft_init(floor(log2(SIZE/len)),0);
+ av_fft_permute (fftcontexta, fftcomplexa);
+ int temp = 0;
+ av_fft_calc (fftcontexta, fftcomplexa);
+ FFTContext* fftcontextb = av_fft_init(floor(log2(SIZE/len)),0);
+ av_fft_permute (fftcontextb, fftcomplexb);
+ av_fft_calc (fftcontextb, fftcomplexb);
+
+ float* maskingfunc = get_mask_array(SIZE/len);
+ float* mask = get_mask(fftcomplexa, SIZE/len, maskingfunc);
+ double psysse = get_psy_sse(fftcomplexa,fftcomplexb, mask, SIZE/len);
+ free(maskingfunc);
+ free(mask);
+ sse+=psysse;
+ sse_d+=psysse;
+ }
+
+ fflush(stdout);
+ i = FFMIN(size0, size1) / len;
+ if (!i)
+ i = 1;
+
+ switch (len) {
+ case 1:
+ case 2: {
+ uint64_t psnr;
+ uint64_t dev = int_sqrt(((sse / i) * F * F) + (((sse % i) * F * F) + i / 2) / i);
+ if (sse)
+ psnr = ((2 * log16(max << 16) + log16(i) - log16(sse)) *
+ 284619LL * F + (1LL << 31)) / (1LL << 32);
+ else
+ psnr = 1000 * F - 1; // floating point free infinity :)
+
+ printf("stddev:%5d.%02d PSYSNR:%3d.%02d MAXDIFF:%5"PRIu64" bytes:%9d/%9d\n",
+ (int)(dev / F), (int)(dev % F),
+ (int)(psnr / F), (int)(psnr % F),
+ maxdist, size0, size1);
+ return psnr;
+ }
+ case 4:
+ case 8: {
+ char psnr_str[64];
+ double psnr = INT_MAX;
+ double dev = sqrt(sse_d / i);
+ uint64_t scale = (len == 4) ? (1ULL << 24) : (1ULL << 32);
+
+ if (sse_d) {
+ psnr = 2 * log(DBL_MAX) - log(i / sse_d);
+ snprintf(psnr_str, sizeof(psnr_str), "%5.02f", psnr);
+ } else
+ snprintf(psnr_str, sizeof(psnr_str), "inf");
+
+ maxdist = maxdist_d * scale;
+
+ printf("stddev:%10.2f PSYSNR:%s MAXDIFF:%10"PRIu64" bytes:%9d/%9d\n",
+ dev * scale, psnr_str, maxdist, size0, size1);
+ return psnr;
+ }
+ }
+ return -1;
+}
+
+int main(int argc, char *argv[])
+{
+ FILE *f[2];
+ int len = 1;
+ int shift_first= argc < 5 ? 0 : atoi(argv[4]);
+ int skip_bytes = argc < 6 ? 0 : atoi(argv[5]);
+ int shift_last = shift_first + (argc < 7 ? 0 : atoi(argv[6]));
+ int shift;
+ int max_psnr = -1;
+ int max_psnr_shift = 0;
+
+ if (argc > 3) {
+ if (!strcmp(argv[3], "u8")) {
+ len = 1;
+ } else if (!strcmp(argv[3], "s16")) {
+ len = 2;
+ } else if (!strcmp(argv[3], "f32")) {
+ len = 4;
+ } else if (!strcmp(argv[3], "f64")) {
+ len = 8;
+ } else {
+ char *end;
+ len = strtol(argv[3], &end, 0);
+ if (*end || len < 1 || len > 2) {
+ fprintf(stderr, "Unsupported sample format: %s\nSupported: u8, s16, f32, f64\n", argv[3]);
+ return 1;
+ }
+ }
+ }
+
+ if (argc < 3) {
+ printf("tiny_psysnr <file1> <file2> [<elem size>|u8|s16|f32|f64 [<shift> [<skip bytes> [<shift search range>]]]]\n");
+ printf("WAV headers are skipped automatically.\n");
+ return 1;
+ }
+
+ f[0] = fopen(argv[1], "rb");
+ f[1] = fopen(argv[2], "rb");
+ if (!f[0] || !f[1]) {
+ fprintf(stderr, "Could not open input files.\n");
+ return 1;
+ }
+
+ for (shift = shift_first; shift <= shift_last; shift++) {
+ int psnr = run_psnr(f, len, shift, skip_bytes);
+ if (psnr > max_psnr || (shift < 0 && psnr == max_psnr)) {
+ max_psnr = psnr;
+ max_psnr_shift = shift;
+ }
+ }
+ if (shift_last > shift_first)
+ printf("Best PSNR is %3d.%02d for shift %i\n", (int)(max_psnr / F), (int)(max_psnr % F), max_psnr_shift);
+ return 0;
+}
--
1.9.1
More information about the ffmpeg-devel
mailing list