FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
psymodel.h
Go to the documentation of this file.
1 /*
2  * audio encoder psychoacoustic model
3  * Copyright (C) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #ifndef AVCODEC_PSYMODEL_H
23 #define AVCODEC_PSYMODEL_H
24 
25 #include "avcodec.h"
26 
27 /** maximum possible number of bands */
28 #define PSY_MAX_BANDS 128
29 /** maximum number of channels */
30 #define PSY_MAX_CHANS 20
31 
32 /* cutoff for VBR is purposedly increased, since LP filtering actually
33  * hinders VBR performance rather than the opposite
34  */
35 #define AAC_CUTOFF_FROM_BITRATE(bit_rate,channels,sample_rate) (bit_rate ? FFMIN3(FFMIN3( \
36  FFMAX(bit_rate/channels/5, bit_rate/channels*15/32 - 5500), \
37  3000 + bit_rate/channels/4, \
38  12000 + bit_rate/channels/16), \
39  22000, \
40  sample_rate / 2): (sample_rate / 2))
41 #define AAC_CUTOFF(s) ( \
42  (s->flags & CODEC_FLAG_QSCALE) \
43  ? s->sample_rate / 2 \
44  : AAC_CUTOFF_FROM_BITRATE(s->bit_rate, s->channels, s->sample_rate) \
45 )
46 
47 /**
48  * single band psychoacoustic information
49  */
50 typedef struct FFPsyBand {
51  int bits;
52  float energy;
53  float threshold;
54  float spread; /* Energy spread over the band */
55 } FFPsyBand;
56 
57 /**
58  * single channel psychoacoustic information
59  */
60 typedef struct FFPsyChannel {
61  FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information
62  float entropy; ///< total PE for this channel
63 } FFPsyChannel;
64 
65 /**
66  * psychoacoustic information for an arbitrary group of channels
67  */
68 typedef struct FFPsyChannelGroup {
69  FFPsyChannel *ch[PSY_MAX_CHANS]; ///< pointers to the individual channels in the group
70  uint8_t num_ch; ///< number of channels in this group
71  uint8_t coupling[PSY_MAX_BANDS]; ///< allow coupling for this band in the group
73 
74 /**
75  * windowing related information
76  */
77 typedef struct FFPsyWindowInfo {
78  int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next
79  int window_shape; ///< window shape (sine/KBD/whatever)
80  int num_windows; ///< number of windows in a frame
81  int grouping[8]; ///< window grouping (for e.g. AAC)
82  float clipping[8]; ///< maximum absolute normalized intensity in the given window for clip avoidance
83  int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA)
85 
86 /**
87  * context used by psychoacoustic model
88  */
89 typedef struct FFPsyContext {
90  AVCodecContext *avctx; ///< encoder context
91  const struct FFPsyModel *model; ///< encoder-specific model functions
92 
93  FFPsyChannel *ch; ///< single channel information
94  FFPsyChannelGroup *group; ///< channel group information
95  int num_groups; ///< number of channel groups
96  int cutoff; ///< lowpass frequency cutoff for analysis
97 
98  uint8_t **bands; ///< scalefactor band sizes for possible frame sizes
99  int *num_bands; ///< number of scalefactor bands for possible frame sizes
100  int num_lens; ///< number of scalefactor band sets
101 
102  struct {
103  int size; ///< size of the bitresevoir in bits
104  int bits; ///< number of bits used in the bitresevoir
105  int alloc; ///< number of bits allocated by the psy, or -1 if no allocation was done
106  } bitres;
107 
108  void* model_priv_data; ///< psychoacoustic model implementation private data
109 } FFPsyContext;
110 
111 /**
112  * codec-specific psychoacoustic model implementation
113  */
114 typedef struct FFPsyModel {
115  const char *name;
116  int (*init) (FFPsyContext *apc);
117 
118  /**
119  * Suggest window sequence for channel.
120  *
121  * @param ctx model context
122  * @param audio samples for the current frame
123  * @param la lookahead samples (NULL when unavailable)
124  * @param channel number of channel element to analyze
125  * @param prev_type previous window type
126  *
127  * @return suggested window information in a structure
128  */
129  FFPsyWindowInfo (*window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type);
130 
131  /**
132  * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels.
133  *
134  * @param ctx model context
135  * @param channel channel number of the first channel in the group to perform analysis on
136  * @param coeffs array of pointers to the transformed coefficients
137  * @param wi window information for the channels in the group
138  */
139  void (*analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi);
140 
141  void (*end) (FFPsyContext *apc);
142 } FFPsyModel;
143 
144 /**
145  * Initialize psychoacoustic model.
146  *
147  * @param ctx model context
148  * @param avctx codec context
149  * @param num_lens number of possible frame lengths
150  * @param bands scalefactor band lengths for all frame lengths
151  * @param num_bands number of scalefactor bands for all frame lengths
152  * @param num_groups number of channel groups
153  * @param group_map array with # of channels in group - 1, for each group
154  *
155  * @return zero if successful, a negative value if not
156  */
157 int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
158  const uint8_t **bands, const int *num_bands,
159  int num_groups, const uint8_t *group_map);
160 
161 /**
162  * Determine what group a channel belongs to.
163  *
164  * @param ctx psymodel context
165  * @param channel channel to locate the group for
166  *
167  * @return pointer to the FFPsyChannelGroup this channel belongs to
168  */
170 
171 /**
172  * Cleanup model context at the end.
173  *
174  * @param ctx model context
175  */
177 
178 
179 /**************************************************************************
180  * Audio preprocessing stuff. *
181  * This should be moved into some audio filter eventually. *
182  **************************************************************************/
184 
185 /**
186  * psychoacoustic model audio preprocessing initialization
187  */
189 
190 /**
191  * Preprocess several channel in audio frame in order to compress it better.
192  *
193  * @param ctx preprocessing context
194  * @param audio samples to be filtered (in place)
195  * @param channels number of channel to preprocess
196  */
197 void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels);
198 
199 /**
200  * Cleanup audio preprocessing module.
201  */
203 
204 #endif /* AVCODEC_PSYMODEL_H */
#define PSY_MAX_BANDS
maximum possible number of bands
Definition: psymodel.h:28
int num_groups
number of channel groups
Definition: psymodel.h:95
int grouping[8]
window grouping (for e.g. AAC)
Definition: psymodel.h:81
uint8_t ** bands
scalefactor band sizes for possible frame sizes
Definition: psymodel.h:98
FFPsyChannelGroup * group
channel group information
Definition: psymodel.h:94
FFPsyBand psy_bands[PSY_MAX_BANDS]
channel bands information
Definition: psymodel.h:61
psychoacoustic information for an arbitrary group of channels
Definition: psymodel.h:68
int alloc
number of bits allocated by the psy, or -1 if no allocation was done
Definition: psymodel.h:105
int * num_bands
number of scalefactor bands for possible frame sizes
Definition: psymodel.h:99
int * window_sizes
sequence of window sizes inside one frame (for eg. WMA)
Definition: psymodel.h:83
uint8_t
struct FFPsyPreprocessContext * ff_psy_preprocess_init(AVCodecContext *avctx)
psychoacoustic model audio preprocessing initialization
Definition: psymodel.c:103
int size
size of the bitresevoir in bits
Definition: psymodel.h:103
context used by psychoacoustic model
Definition: psymodel.h:89
single band psychoacoustic information
Definition: psymodel.h:50
AVCodecContext * avctx
Definition: psymodel.c:94
single channel psychoacoustic information
Definition: psymodel.h:60
FFPsyChannel * ch[PSY_MAX_CHANS]
pointers to the individual channels in the group
Definition: psymodel.h:69
int bits
Definition: psymodel.h:51
int num_windows
number of windows in a frame
Definition: psymodel.h:80
float energy
Definition: psymodel.h:52
codec-specific psychoacoustic model implementation
Definition: psymodel.h:114
void(* analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi)
Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels...
Definition: psymodel.h:139
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
uint8_t num_ch
number of channels in this group
Definition: psymodel.h:70
AVFormatContext * ctx
Definition: movenc.c:48
void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx)
Cleanup audio preprocessing module.
Definition: psymodel.c:152
float entropy
total PE for this channel
Definition: psymodel.h:62
Libavcodec external API header.
FFPsyWindowInfo(* window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type)
Suggest window sequence for channel.
Definition: psymodel.h:129
main external API structure.
Definition: avcodec.h:1649
void * model_priv_data
psychoacoustic model implementation private data
Definition: psymodel.h:108
int bits
number of bits used in the bitresevoir
Definition: psymodel.h:104
void(* end)(FFPsyContext *apc)
Definition: psymodel.h:141
#define PSY_MAX_CHANS
maximum number of channels
Definition: psymodel.h:30
FFPsyChannelGroup * ff_psy_find_group(FFPsyContext *ctx, int channel)
Determine what group a channel belongs to.
Definition: psymodel.c:73
int window_shape
window shape (sine/KBD/whatever)
Definition: psymodel.h:79
int cutoff
lowpass frequency cutoff for analysis
Definition: psymodel.h:96
const struct FFPsyModel * model
encoder-specific model functions
Definition: psymodel.h:91
void ff_psy_end(FFPsyContext *ctx)
Cleanup model context at the end.
Definition: psymodel.c:83
struct FFPsyContext::@88 bitres
uint8_t coupling[PSY_MAX_BANDS]
allow coupling for this band in the group
Definition: psymodel.h:71
const char * name
Definition: psymodel.h:115
windowing related information
Definition: psymodel.h:77
int num_lens
number of scalefactor band sets
Definition: psymodel.h:100
static const int16_t coeffs[]
FFPsyChannel * ch
single channel information
Definition: psymodel.h:93
int(* init)(FFPsyContext *apc)
Definition: psymodel.h:116
AVCodecContext * avctx
encoder context
Definition: psymodel.h:90
float threshold
Definition: psymodel.h:53
void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels)
Preprocess several channel in audio frame in order to compress it better.
Definition: psymodel.c:139
float spread
Definition: psymodel.h:54
int window_type[3]
window type (short/long/transitional, etc.) - current, previous and next
Definition: psymodel.h:78
float clipping[8]
maximum absolute normalized intensity in the given window for clip avoidance
Definition: psymodel.h:82
int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, const uint8_t **bands, const int *num_bands, int num_groups, const uint8_t *group_map)
Initialize psychoacoustic model.
Definition: psymodel.c:31