[FFmpeg-devel] [PATCH] psy_snr:Psychoacoustic SNR for audio files

Michael Niedermayer michaelni at gmx.at
Thu Oct 30 20:50:51 CET 2014


On Thu, Oct 30, 2014 at 11:33:51PM +0530, Senjuti Kundu wrote:
> Implemented an SNR for audio files which takes into a account the psychoacoustic masking. This results in an SNR which is closer to how humans percieve sound, compared to tiny_psnr which directly compares audio signals
> 
> Signed-off-by: Senjuti Kundu <senjutikundu93 at gmail.com>
> ---
>  tests/psy_snr.c | 421 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 421 insertions(+)
>  create mode 100644 tests/psy_snr.c
> 
> diff --git a/tests/psy_snr.c b/tests/psy_snr.c
> new file mode 100644
> index 0000000..94041ed
> --- /dev/null
> +++ b/tests/psy_snr.c
> @@ -0,0 +1,421 @@
> +/*
> + * Copyright (c) 2003 Michael Niedermayer <michaelni at gmx.at>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/*
> + * Psy-SNR - Psychoacoustic SNR for audio files.
> + * Author - Senjuti Kundu <senjutikundu93 at gmail.com>
> + * Input format - tiny_psysnr <file1> <file2>
> + *              [<elem size>|u8|s16|f32|f64 [<shift> [<skip bytes> [<shift search range>]]]]
> + * WAV headers are skipped automatically.
> + * SIZE can be changed to adjust window size as need be.
> + * compile using gcc psysnr.c $(pkg-config --cflags --libs libavformat libavcodec)
> + * -I /usr/local/include/libavcodec/
> + */
> +
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <inttypes.h>
> +#include <math.h>
> +#include <float.h>
> +#include <limits.h>
> +
> +#include "libavutil/intfloat.h"
> +#include "libavutil/intreadwrite.h"
> +#include "libavcodec/avfft.h"
> +#include "libavutil/mem.h"
> +
> +#define FFMIN(a, b) ((a) > (b) ? (b) : (a))
> +#define FFMAX(a, b) ((a) > (b) ? (a) : (b))
> +#define F 100
> +//size should be close to 20k
> +#define SIZE 1024

> +#define db_fw_rollof 4
> +#define db_bw_rollof 4
> +#define db_attenuation 0.1

#defines
 should be all uppercase


> +
> +uint64_t exp16_table[21] = {
> +           65537,
> +           65538,
> +           65540,
> +           65544,
> +           65552,
> +           65568,
> +           65600,
> +           65664,
> +           65793,
> +           66050,
> +           66568,
> +           67616,
> +           69763,
> +           74262,
> +           84150,
> +          108051,
> +          178145,
> +          484249,
> +         3578144,
> +       195360063,
> +    582360139072LL,
> +};
> +
> +#if 0
> +// 16.16 fixpoint exp()
> +static unsigned int exp16(unsigned int a){
> +    int i;
> +    int out= 1<<16;
> +
> +    for(i=19;i>=0;i--){
> +        if(a&(1<<i))
> +            out= (out*exp16_table[i] + (1<<15))>>16;
> +    }
> +
> +    return out;
> +}
> +#endif
> +
> +// 16.16 fixpoint log()
> +static int64_t log16(uint64_t a)
> +{
> +    int i;
> +    int out = 0;
> +
> +    if (a < 1 << 16)
> +        return -log16((1LL << 32) / a);
> +    a <<= 16;
> +
> +    for (i = 20; i >= 0; i--) {
> +        int64_t b = exp16_table[i];
> +        if (a < (b << 16))
> +            continue;
> +        out |= 1 << i;
> +        a    = ((a / b) << 16) + (((a % b) << 16) + b / 2) / b;
> +    }
> +    return out;
> +}
> +
> +static uint64_t int_sqrt(uint64_t a)
> +{
> +    uint64_t ret    = 0;
> +    uint64_t ret_sq = 0;
> +    int s;
> +
> +    for (s = 31; s >= 0; s--) {
> +        uint64_t b = ret_sq + (1ULL << (s * 2)) + (ret << s) * 2;
> +        if (b <= a) {
> +            ret_sq = b;
> +            ret   += 1ULL << s;
> +        }
> +    }
> +    return ret;
> +}
> +

these functions look duplicated from tiny_psnr, it would be better
to share them


> +static int16_t get_s16l(uint8_t *p)
> +{
> +    union {
> +        uint16_t u;
> +        int16_t  s;
> +    } v;
> +    v.u = p[0] | p[1] << 8;
> +    return v.s;
> +}
> +
> +static float get_f32l(uint8_t *p)
> +{
> +    union av_intfloat32 v;
> +    v.i = p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
> +    return v.f;
> +}
> +
> +static double get_f64l(uint8_t *p)
> +{
> +    return av_int2double(AV_RL64(p));
> +}
> +
> +static float* get_mask_array(int tempsize){
> +    //modelling the mask function as a parabole. Others can be
> +    //explored as need be. y=(-(x-mid)2+c)/c
> +    int i = 0;
> +    float* maskingfunc = malloc(tempsize*sizeof(float));

> +    maskingfunc[tempsize/2] = exp(-db_attenuation*log(10));


> +    for (i = (tempsize/2)+1; i<tempsize; i++){
> +        maskingfunc[i] = maskingfunc[i-1]*exp(-(db_fw_rollof*log(10))/(20*(i-(tempsize/2))));;

double ;



> +    }
> +    for(i = (tempsize/2)-1; i >= 0; i--){

> +        maskingfunc[i] = maskingfunc[i+1]*exp(-(2*db_bw_rollof*log(10))/(20*((tempsize/2)-i)));

this can be simplified


> +    }
> +    return maskingfunc;
> +}
> +
> +static float* get_mask(FFTComplex* a, int tempsize, float* maskingfunc){
> +    int i = 0;
> +    int j = 0;
> +    float* mask = malloc(tempsize*sizeof(float));
> +    float self = 0;
> +    float next = 0;
> +    float prev = 0;

missing malloc failure check


> +
> +    for (i = 1; i<tempsize; i++){
> +        self = maskingfunc[tempsize/2]*sqrt((a[i].re*a[i].re)+(a[i].im*a[i].im));
> +        prev = maskingfunc[(tempsize/2)-1]*sqrt((a[i-1].re*a[i-1].re)+(a[i-1].im*a[i-1].im));

the "abs"() can be factored out


[...]

> +        FFTContext* fftcontexta = av_fft_init(floor(log2(SIZE/len)),0);

mixing declarations ans statements causes problems with some compilers
also see av_log2() this doesnt need floats to caluclate the argument

also, has psy_snr been compared to some known to be correct/reference
implementation or been tested in some way ?

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Democracy is the form of government in which you can choose your dictator
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20141030/806abd8c/attachment.asc>


More information about the ffmpeg-devel mailing list