FFmpeg
psymodel.h
Go to the documentation of this file.
1 /*
2  * audio encoder psychoacoustic model
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #ifndef AVCODEC_PSYMODEL_H
23 #define AVCODEC_PSYMODEL_H
24 
25 #include "avcodec.h"
26 
27 /** maximum possible number of bands */
28 #define PSY_MAX_BANDS 128
29 /** maximum number of channels */
30 #define PSY_MAX_CHANS 20
31 
32 /* cutoff for VBR is purposely increased, since LP filtering actually
33  * hinders VBR performance rather than the opposite
34  */
35 #define AAC_CUTOFF_FROM_BITRATE(bit_rate,channels,sample_rate) (bit_rate ? FFMIN3(FFMIN3( \
36  FFMAX(bit_rate/channels/5, bit_rate/channels*15/32 - 5500), \
37  3000 + bit_rate/channels/4, \
38  12000 + bit_rate/channels/16), \
39  22000, \
40  sample_rate / 2): (sample_rate / 2))
41 #define AAC_CUTOFF(s) ( \
42  (s->flags & AV_CODEC_FLAG_QSCALE) \
43  ? s->sample_rate / 2 \
44  : AAC_CUTOFF_FROM_BITRATE(s->bit_rate, s->channels, s->sample_rate) \
45 )
46 
47 /**
48  * single band psychoacoustic information
49  */
50 typedef struct FFPsyBand {
51  int bits;
52  float energy;
53  float threshold;
54  float spread; /* Energy spread over the band */
55 } FFPsyBand;
56 
57 /**
58  * single channel psychoacoustic information
59  */
60 typedef struct FFPsyChannel {
61  FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information
62  float entropy; ///< total PE for this channel
63 } FFPsyChannel;
64 
65 /**
66  * psychoacoustic information for an arbitrary group of channels
67  */
68 typedef struct FFPsyChannelGroup {
69  FFPsyChannel *ch[PSY_MAX_CHANS]; ///< pointers to the individual channels in the group
70  uint8_t num_ch; ///< number of channels in this group
71  uint8_t coupling[PSY_MAX_BANDS]; ///< allow coupling for this band in the group
73 
74 /**
75  * windowing related information
76  */
77 typedef struct FFPsyWindowInfo {
78  int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next
79  int window_shape; ///< window shape (sine/KBD/whatever)
80  int num_windows; ///< number of windows in a frame
81  int grouping[8]; ///< window grouping (for e.g. AAC)
82  float clipping[8]; ///< maximum absolute normalized intensity in the given window for clip avoidance
83  int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA)
85 
86 /**
87  * context used by psychoacoustic model
88  */
89 typedef struct FFPsyContext {
90  AVCodecContext *avctx; ///< encoder context
91  const struct FFPsyModel *model; ///< encoder-specific model functions
92 
93  FFPsyChannel *ch; ///< single channel information
94  FFPsyChannelGroup *group; ///< channel group information
95  int num_groups; ///< number of channel groups
96  int cutoff; ///< lowpass frequency cutoff for analysis
97 
98  uint8_t **bands; ///< scalefactor band sizes for possible frame sizes
99  int *num_bands; ///< number of scalefactor bands for possible frame sizes
100  int num_lens; ///< number of scalefactor band sets
101 
102  struct {
103  int size; ///< size of the bitresevoir in bits
104  int bits; ///< number of bits used in the bitresevoir
105  int alloc; ///< number of bits allocated by the psy, or -1 if no allocation was done
106  } bitres;
107 
108  void* model_priv_data; ///< psychoacoustic model implementation private data
109 } FFPsyContext;
110 
111 /**
112  * codec-specific psychoacoustic model implementation
113  */
114 typedef struct FFPsyModel {
115  const char *name;
116  int (*init) (FFPsyContext *apc);
117 
118  /**
119  * Suggest window sequence for channel.
120  *
121  * @param ctx model context
122  * @param audio samples for the current frame
123  * @param la lookahead samples (NULL when unavailable)
124  * @param channel number of channel element to analyze
125  * @param prev_type previous window type
126  *
127  * @return suggested window information in a structure
128  */
129  FFPsyWindowInfo (*window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type);
130 
131  /**
132  * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels.
133  *
134  * @param ctx model context
135  * @param channel channel number of the first channel in the group to perform analysis on
136  * @param coeffs array of pointers to the transformed coefficients
137  * @param wi window information for the channels in the group
138  */
139  void (*analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi);
140 
141  void (*end) (FFPsyContext *apc);
142 } FFPsyModel;
143 
144 /**
145  * Initialize psychoacoustic model.
146  *
147  * @param ctx model context
148  * @param avctx codec context
149  * @param num_lens number of possible frame lengths
150  * @param bands scalefactor band lengths for all frame lengths
151  * @param num_bands number of scalefactor bands for all frame lengths
152  * @param num_groups number of channel groups
153  * @param group_map array with # of channels in group - 1, for each group
154  *
155  * @return zero if successful, a negative value if not
156  */
157 int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
158  const uint8_t **bands, const int *num_bands,
159  int num_groups, const uint8_t *group_map);
160 
161 /**
162  * Determine what group a channel belongs to.
163  *
164  * @param ctx psymodel context
165  * @param channel channel to locate the group for
166  *
167  * @return pointer to the FFPsyChannelGroup this channel belongs to
168  */
170 
171 /**
172  * Cleanup model context at the end.
173  *
174  * @param ctx model context
175  */
177 
178 
179 /**************************************************************************
180  * Audio preprocessing stuff. *
181  * This should be moved into some audio filter eventually. *
182  **************************************************************************/
184 
185 /**
186  * psychoacoustic model audio preprocessing initialization
187  */
189 
190 /**
191  * Preprocess several channel in audio frame in order to compress it better.
192  *
193  * @param ctx preprocessing context
194  * @param audio samples to be filtered (in place)
195  * @param channels number of channel to preprocess
196  */
197 void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels);
198 
199 /**
200  * Cleanup audio preprocessing module.
201  */
203 
204 #endif /* AVCODEC_PSYMODEL_H */
#define PSY_MAX_BANDS
maximum possible number of bands
Definition: psymodel.h:28
int num_groups
number of channel groups
Definition: psymodel.h:95
uint8_t ** bands
scalefactor band sizes for possible frame sizes
Definition: psymodel.h:98
FFPsyChannelGroup * group
channel group information
Definition: psymodel.h:94
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
channels
Definition: aptx.c:30
static int analyze(const uint8_t *buf, int size, int packet_size, int probe)
Definition: mpegts.c:574
psychoacoustic information for an arbitrary group of channels
Definition: psymodel.h:68
int alloc
number of bits allocated by the psy, or -1 if no allocation was done
Definition: psymodel.h:105
int * num_bands
number of scalefactor bands for possible frame sizes
Definition: psymodel.h:99
int * window_sizes
sequence of window sizes inside one frame (for eg. WMA)
Definition: psymodel.h:83
uint8_t
struct FFPsyPreprocessContext * ff_psy_preprocess_init(AVCodecContext *avctx)
psychoacoustic model audio preprocessing initialization
Definition: psymodel.c:103
int size
size of the bitresevoir in bits
Definition: psymodel.h:103
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
context used by psychoacoustic model
Definition: psymodel.h:89
single band psychoacoustic information
Definition: psymodel.h:50
AVCodecContext * avctx
Definition: psymodel.c:94
single channel psychoacoustic information
Definition: psymodel.h:60
int bits
Definition: psymodel.h:51
int num_windows
number of windows in a frame
Definition: psymodel.h:80
float energy
Definition: psymodel.h:52
codec-specific psychoacoustic model implementation
Definition: psymodel.h:114
static SDL_Window * window
Definition: ffplay.c:367
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
uint8_t num_ch
number of channels in this group
Definition: psymodel.h:70
AVFormatContext * ctx
Definition: movenc.c:48
void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx)
Cleanup audio preprocessing module.
Definition: psymodel.c:152
float entropy
total PE for this channel
Definition: psymodel.h:62
Libavcodec external API header.
main external API structure.
Definition: avcodec.h:1570
void * model_priv_data
psychoacoustic model implementation private data
Definition: psymodel.h:108
int bits
number of bits used in the bitresevoir
Definition: psymodel.h:104
#define PSY_MAX_CHANS
maximum number of channels
Definition: psymodel.h:30
static const float bands[]
FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)
Determine what group a channel belongs to.
Definition: psymodel.c:73
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:79
int cutoff
lowpass frequency cutoff for analysis
Definition: psymodel.h:96
const struct FFPsyModel * model
encoder-specific model functions
Definition: psymodel.h:91
uint8_t pi<< 24) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_U8,(uint64_t)((*(const uint8_t *) pi-0x80U))<< 56) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8,(*(const uint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8,(*(const uint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16,(*(const int16_t *) pi >>8)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S16,(uint64_t)(*(const int16_t *) pi)<< 48) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16,*(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16,*(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32,(*(const int32_t *) pi >>24)+0x80) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_S32,(uint64_t)(*(const int32_t *) pi)<< 32) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32,*(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32,*(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S64,(*(const int64_t *) pi >>56)+0x80) CONV_FUNC(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S64,*(const int64_t *) pi *(1.0f/(UINT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S64,*(const int64_t *) pi *(1.0/(UINT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_FLT, llrintf(*(const float *) pi *(UINT64_C(1)<< 63))) CONV_FUNC(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) CONV_FUNC(AV_SAMPLE_FMT_S64, int64_t, AV_SAMPLE_FMT_DBL, llrint(*(const double *) pi *(UINT64_C(1)<< 63)))#define FMT_PAIR_FUNC(out, in) static conv_func_type *const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB *AV_SAMPLE_FMT_NB]={FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_U8), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S16), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S32), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_FLT), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_DBL), FMT_PAIR_FUNC(AV_SAMPLE_FMT_U8, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S32, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_S64), FMT_PAIR_FUNC(AV_SAMPLE_FMT_S64, AV_SAMPLE_FMT_S64),};static void cpy1(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, len);}static void cpy2(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, 2 *len);}static void cpy4(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, 4 *len);}static void cpy8(uint8_t **dst, const uint8_t **src, int len){memcpy(*dst,*src, 8 *len);}AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, const int *ch_map, int flags){AudioConvert *ctx;conv_func_type *f=fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt)+AV_SAMPLE_FMT_NB *av_get_packed_sample_fmt(in_fmt)];if(!f) return NULL;ctx=av_mallocz(sizeof(*ctx));if(!ctx) return NULL;if(channels==1){in_fmt=av_get_planar_sample_fmt(in_fmt);out_fmt=av_get_planar_sample_fmt(out_fmt);}ctx->channels=channels;ctx->conv_f=f;ctx->ch_map=ch_map;if(in_fmt==AV_SAMPLE_FMT_U8||in_fmt==AV_SAMPLE_FMT_U8P) memset(ctx->silence, 0x80, sizeof(ctx->silence));if(out_fmt==in_fmt &&!ch_map){switch(av_get_bytes_per_sample(in_fmt)){case 1:ctx->simd_f=cpy1;break;case 2:ctx->simd_f=cpy2;break;case 4:ctx->simd_f=cpy4;break;case 8:ctx->simd_f=cpy8;break;}}if(HAVE_X86ASM &&1) swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels);if(ARCH_ARM) swri_audio_convert_init_arm(ctx, out_fmt, in_fmt, channels);if(ARCH_AARCH64) swri_audio_convert_init_aarch64(ctx, out_fmt, in_fmt, channels);return ctx;}void swri_audio_convert_free(AudioConvert **ctx){av_freep(ctx);}int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len){int ch;int off=0;const int os=(out->planar?1:out->ch_count)*out->bps;unsigned misaligned=0;av_assert0(ctx->channels==out->ch_count);if(ctx->in_simd_align_mask){int planes=in->planar?in->ch_count:1;unsigned m=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) in->ch[ch];misaligned|=m &ctx->in_simd_align_mask;}if(ctx->out_simd_align_mask){int planes=out->planar?out->ch_count:1;unsigned m=0;for(ch=0;ch< planes;ch++) m|=(intptr_t) out->ch[ch];misaligned|=m &ctx->out_simd_align_mask;}if(ctx->simd_f &&!ctx->ch_map &&!misaligned){off=len &~15;av_assert1(off >=0);av_assert1(off<=len);av_assert2(ctx->channels==SWR_CH_MAX||!in->ch[ctx->channels]);if(off >0){if(out->planar==in->planar){int planes=out->planar?out->ch_count:1;for(ch=0;ch< planes;ch++){ctx->simd_f(out-> ch ch
Definition: audioconvert.c:56
void ff_psy_end(FFPsyContext *ctx)
Cleanup model context at the end.
Definition: psymodel.c:83
const char * name
Definition: psymodel.h:115
int
windowing related information
Definition: psymodel.h:77
channel
Use these values when setting the channel map with ebur128_set_channel().
Definition: ebur128.h:39
int num_lens
number of scalefactor band sets
Definition: psymodel.h:100
FFPsyChannel * ch
single channel information
Definition: psymodel.h:93
AVCodecContext * avctx
encoder context
Definition: psymodel.h:90
float threshold
Definition: psymodel.h:53
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels)
Preprocess several channel in audio frame in order to compress it better.
Definition: psymodel.c:139
float spread
Definition: psymodel.h:54
int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, const uint8_t **bands, const int *num_bands, int num_groups, const uint8_t *group_map)
Initialize psychoacoustic model.
Definition: psymodel.c:31