FFmpeg
audio_mix_matrix.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2011 Michael Niedermayer (michaelni@gmx.at)
3  * Copyright (c) 2012 Justin Ruggles <justin.ruggles@gmail.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <stdint.h>
23 
24 #include "libavutil/common.h"
25 #include "libavutil/libm.h"
26 #include "libavutil/samplefmt.h"
27 #include "avresample.h"
28 #include "internal.h"
29 #include "audio_data.h"
30 #include "audio_mix.h"
31 
32 /* channel positions */
33 #define FRONT_LEFT 0
34 #define FRONT_RIGHT 1
35 #define FRONT_CENTER 2
36 #define LOW_FREQUENCY 3
37 #define BACK_LEFT 4
38 #define BACK_RIGHT 5
39 #define FRONT_LEFT_OF_CENTER 6
40 #define FRONT_RIGHT_OF_CENTER 7
41 #define BACK_CENTER 8
42 #define SIDE_LEFT 9
43 #define SIDE_RIGHT 10
44 #define TOP_CENTER 11
45 #define TOP_FRONT_LEFT 12
46 #define TOP_FRONT_CENTER 13
47 #define TOP_FRONT_RIGHT 14
48 #define TOP_BACK_LEFT 15
49 #define TOP_BACK_CENTER 16
50 #define TOP_BACK_RIGHT 17
51 #define STEREO_LEFT 29
52 #define STEREO_RIGHT 30
53 #define WIDE_LEFT 31
54 #define WIDE_RIGHT 32
55 #define SURROUND_DIRECT_LEFT 33
56 #define SURROUND_DIRECT_RIGHT 34
57 #define LOW_FREQUENCY_2 35
58 
59 #define SQRT3_2 1.22474487139158904909 /* sqrt(3/2) */
60 
61 static av_always_inline int even(uint64_t layout)
62 {
63  return (!layout || !!(layout & (layout - 1)));
64 }
65 
66 static int sane_layout(uint64_t layout)
67 {
68  /* check that there is at least 1 front speaker */
69  if (!(layout & AV_CH_LAYOUT_SURROUND))
70  return 0;
71 
72  /* check for left/right symmetry */
73  if (!even(layout & (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT)) ||
74  !even(layout & (AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT)) ||
75  !even(layout & (AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT)) ||
79  !even(layout & (AV_CH_STEREO_LEFT | AV_CH_STEREO_RIGHT)) ||
80  !even(layout & (AV_CH_WIDE_LEFT | AV_CH_WIDE_RIGHT)) ||
82  return 0;
83 
84  return 1;
85 }
86 
87 int avresample_build_matrix(uint64_t in_layout, uint64_t out_layout,
88  double center_mix_level, double surround_mix_level,
89  double lfe_mix_level, int normalize,
90  double *matrix_out, int stride,
91  enum AVMatrixEncoding matrix_encoding)
92 {
93  int i, j, out_i, out_j;
94  double matrix[64][64] = {{0}};
95  int64_t unaccounted;
96  double maxcoef = 0;
97  int in_channels, out_channels;
98 
99  if ((out_layout & AV_CH_LAYOUT_STEREO_DOWNMIX) == AV_CH_LAYOUT_STEREO_DOWNMIX) {
100  out_layout = AV_CH_LAYOUT_STEREO;
101  }
102 
103  unaccounted = in_layout & ~out_layout;
104 
105  in_channels = av_get_channel_layout_nb_channels( in_layout);
106  out_channels = av_get_channel_layout_nb_channels(out_layout);
107 
108  memset(matrix_out, 0, out_channels * stride * sizeof(*matrix_out));
109 
110  /* check if layouts are supported */
111  if (!in_layout || in_channels > AVRESAMPLE_MAX_CHANNELS)
112  return AVERROR(EINVAL);
113  if (!out_layout || out_channels > AVRESAMPLE_MAX_CHANNELS)
114  return AVERROR(EINVAL);
115 
116  /* check if layouts are unbalanced or abnormal */
117  if (!sane_layout(in_layout) || !sane_layout(out_layout))
118  return AVERROR_PATCHWELCOME;
119 
120  /* route matching input/output channels */
121  for (i = 0; i < 64; i++) {
122  if (in_layout & out_layout & (1ULL << i))
123  matrix[i][i] = 1.0;
124  }
125 
126  /* mix front center to front left/right */
127  if (unaccounted & AV_CH_FRONT_CENTER) {
128  if ((out_layout & AV_CH_LAYOUT_STEREO) == AV_CH_LAYOUT_STEREO) {
129  if ((in_layout & AV_CH_LAYOUT_STEREO) == AV_CH_LAYOUT_STEREO) {
130  matrix[FRONT_LEFT ][FRONT_CENTER] += center_mix_level;
131  matrix[FRONT_RIGHT][FRONT_CENTER] += center_mix_level;
132  } else {
133  matrix[FRONT_LEFT ][FRONT_CENTER] += M_SQRT1_2;
134  matrix[FRONT_RIGHT][FRONT_CENTER] += M_SQRT1_2;
135  }
136  } else
137  return AVERROR_PATCHWELCOME;
138  }
139  /* mix front left/right to center */
140  if (unaccounted & AV_CH_LAYOUT_STEREO) {
141  if (out_layout & AV_CH_FRONT_CENTER) {
142  matrix[FRONT_CENTER][FRONT_LEFT ] += M_SQRT1_2;
143  matrix[FRONT_CENTER][FRONT_RIGHT] += M_SQRT1_2;
144  /* mix left/right/center to center */
145  if (in_layout & AV_CH_FRONT_CENTER)
146  matrix[FRONT_CENTER][FRONT_CENTER] = center_mix_level * M_SQRT2;
147  } else
148  return AVERROR_PATCHWELCOME;
149  }
150  /* mix back center to back, side, or front */
151  if (unaccounted & AV_CH_BACK_CENTER) {
152  if (out_layout & AV_CH_BACK_LEFT) {
153  matrix[BACK_LEFT ][BACK_CENTER] += M_SQRT1_2;
154  matrix[BACK_RIGHT][BACK_CENTER] += M_SQRT1_2;
155  } else if (out_layout & AV_CH_SIDE_LEFT) {
156  matrix[SIDE_LEFT ][BACK_CENTER] += M_SQRT1_2;
157  matrix[SIDE_RIGHT][BACK_CENTER] += M_SQRT1_2;
158  } else if (out_layout & AV_CH_FRONT_LEFT) {
159  if (matrix_encoding == AV_MATRIX_ENCODING_DOLBY ||
160  matrix_encoding == AV_MATRIX_ENCODING_DPLII) {
161  if (unaccounted & (AV_CH_BACK_LEFT | AV_CH_SIDE_LEFT)) {
162  matrix[FRONT_LEFT ][BACK_CENTER] -= surround_mix_level * M_SQRT1_2;
163  matrix[FRONT_RIGHT][BACK_CENTER] += surround_mix_level * M_SQRT1_2;
164  } else {
165  matrix[FRONT_LEFT ][BACK_CENTER] -= surround_mix_level;
166  matrix[FRONT_RIGHT][BACK_CENTER] += surround_mix_level;
167  }
168  } else {
169  matrix[FRONT_LEFT ][BACK_CENTER] += surround_mix_level * M_SQRT1_2;
170  matrix[FRONT_RIGHT][BACK_CENTER] += surround_mix_level * M_SQRT1_2;
171  }
172  } else if (out_layout & AV_CH_FRONT_CENTER) {
173  matrix[FRONT_CENTER][BACK_CENTER] += surround_mix_level * M_SQRT1_2;
174  } else
175  return AVERROR_PATCHWELCOME;
176  }
177  /* mix back left/right to back center, side, or front */
178  if (unaccounted & AV_CH_BACK_LEFT) {
179  if (out_layout & AV_CH_BACK_CENTER) {
180  matrix[BACK_CENTER][BACK_LEFT ] += M_SQRT1_2;
181  matrix[BACK_CENTER][BACK_RIGHT] += M_SQRT1_2;
182  } else if (out_layout & AV_CH_SIDE_LEFT) {
183  /* if side channels do not exist in the input, just copy back
184  channels to side channels, otherwise mix back into side */
185  if (in_layout & AV_CH_SIDE_LEFT) {
186  matrix[SIDE_LEFT ][BACK_LEFT ] += M_SQRT1_2;
187  matrix[SIDE_RIGHT][BACK_RIGHT] += M_SQRT1_2;
188  } else {
189  matrix[SIDE_LEFT ][BACK_LEFT ] += 1.0;
190  matrix[SIDE_RIGHT][BACK_RIGHT] += 1.0;
191  }
192  } else if (out_layout & AV_CH_FRONT_LEFT) {
193  if (matrix_encoding == AV_MATRIX_ENCODING_DOLBY) {
194  matrix[FRONT_LEFT ][BACK_LEFT ] -= surround_mix_level * M_SQRT1_2;
195  matrix[FRONT_LEFT ][BACK_RIGHT] -= surround_mix_level * M_SQRT1_2;
196  matrix[FRONT_RIGHT][BACK_LEFT ] += surround_mix_level * M_SQRT1_2;
197  matrix[FRONT_RIGHT][BACK_RIGHT] += surround_mix_level * M_SQRT1_2;
198  } else if (matrix_encoding == AV_MATRIX_ENCODING_DPLII) {
199  matrix[FRONT_LEFT ][BACK_LEFT ] -= surround_mix_level * SQRT3_2;
200  matrix[FRONT_LEFT ][BACK_RIGHT] -= surround_mix_level * M_SQRT1_2;
201  matrix[FRONT_RIGHT][BACK_LEFT ] += surround_mix_level * M_SQRT1_2;
202  matrix[FRONT_RIGHT][BACK_RIGHT] += surround_mix_level * SQRT3_2;
203  } else {
204  matrix[FRONT_LEFT ][BACK_LEFT ] += surround_mix_level;
205  matrix[FRONT_RIGHT][BACK_RIGHT] += surround_mix_level;
206  }
207  } else if (out_layout & AV_CH_FRONT_CENTER) {
208  matrix[FRONT_CENTER][BACK_LEFT ] += surround_mix_level * M_SQRT1_2;
209  matrix[FRONT_CENTER][BACK_RIGHT] += surround_mix_level * M_SQRT1_2;
210  } else
211  return AVERROR_PATCHWELCOME;
212  }
213  /* mix side left/right into back or front */
214  if (unaccounted & AV_CH_SIDE_LEFT) {
215  if (out_layout & AV_CH_BACK_LEFT) {
216  /* if back channels do not exist in the input, just copy side
217  channels to back channels, otherwise mix side into back */
218  if (in_layout & AV_CH_BACK_LEFT) {
219  matrix[BACK_LEFT ][SIDE_LEFT ] += M_SQRT1_2;
220  matrix[BACK_RIGHT][SIDE_RIGHT] += M_SQRT1_2;
221  } else {
222  matrix[BACK_LEFT ][SIDE_LEFT ] += 1.0;
223  matrix[BACK_RIGHT][SIDE_RIGHT] += 1.0;
224  }
225  } else if (out_layout & AV_CH_BACK_CENTER) {
226  matrix[BACK_CENTER][SIDE_LEFT ] += M_SQRT1_2;
227  matrix[BACK_CENTER][SIDE_RIGHT] += M_SQRT1_2;
228  } else if (out_layout & AV_CH_FRONT_LEFT) {
229  if (matrix_encoding == AV_MATRIX_ENCODING_DOLBY) {
230  matrix[FRONT_LEFT ][SIDE_LEFT ] -= surround_mix_level * M_SQRT1_2;
231  matrix[FRONT_LEFT ][SIDE_RIGHT] -= surround_mix_level * M_SQRT1_2;
232  matrix[FRONT_RIGHT][SIDE_LEFT ] += surround_mix_level * M_SQRT1_2;
233  matrix[FRONT_RIGHT][SIDE_RIGHT] += surround_mix_level * M_SQRT1_2;
234  } else if (matrix_encoding == AV_MATRIX_ENCODING_DPLII) {
235  matrix[FRONT_LEFT ][SIDE_LEFT ] -= surround_mix_level * SQRT3_2;
236  matrix[FRONT_LEFT ][SIDE_RIGHT] -= surround_mix_level * M_SQRT1_2;
237  matrix[FRONT_RIGHT][SIDE_LEFT ] += surround_mix_level * M_SQRT1_2;
238  matrix[FRONT_RIGHT][SIDE_RIGHT] += surround_mix_level * SQRT3_2;
239  } else {
240  matrix[FRONT_LEFT ][SIDE_LEFT ] += surround_mix_level;
241  matrix[FRONT_RIGHT][SIDE_RIGHT] += surround_mix_level;
242  }
243  } else if (out_layout & AV_CH_FRONT_CENTER) {
244  matrix[FRONT_CENTER][SIDE_LEFT ] += surround_mix_level * M_SQRT1_2;
245  matrix[FRONT_CENTER][SIDE_RIGHT] += surround_mix_level * M_SQRT1_2;
246  } else
247  return AVERROR_PATCHWELCOME;
248  }
249  /* mix left-of-center/right-of-center into front left/right or center */
250  if (unaccounted & AV_CH_FRONT_LEFT_OF_CENTER) {
251  if (out_layout & AV_CH_FRONT_LEFT) {
252  matrix[FRONT_LEFT ][FRONT_LEFT_OF_CENTER ] += 1.0;
253  matrix[FRONT_RIGHT][FRONT_RIGHT_OF_CENTER] += 1.0;
254  } else if (out_layout & AV_CH_FRONT_CENTER) {
257  } else
258  return AVERROR_PATCHWELCOME;
259  }
260  /* mix LFE into front left/right or center */
261  if (unaccounted & AV_CH_LOW_FREQUENCY) {
262  if (out_layout & AV_CH_FRONT_CENTER) {
263  matrix[FRONT_CENTER][LOW_FREQUENCY] += lfe_mix_level;
264  } else if (out_layout & AV_CH_FRONT_LEFT) {
265  matrix[FRONT_LEFT ][LOW_FREQUENCY] += lfe_mix_level * M_SQRT1_2;
266  matrix[FRONT_RIGHT][LOW_FREQUENCY] += lfe_mix_level * M_SQRT1_2;
267  } else
268  return AVERROR_PATCHWELCOME;
269  }
270 
271  /* transfer internal matrix to output matrix and calculate maximum
272  per-channel coefficient sum */
273  for (out_i = i = 0; out_i < out_channels && i < 64; i++) {
274  double sum = 0;
275  for (out_j = j = 0; out_j < in_channels && j < 64; j++) {
276  matrix_out[out_i * stride + out_j] = matrix[i][j];
277  sum += fabs(matrix[i][j]);
278  if (in_layout & (1ULL << j))
279  out_j++;
280  }
281  maxcoef = FFMAX(maxcoef, sum);
282  if (out_layout & (1ULL << i))
283  out_i++;
284  }
285 
286  /* normalize */
287  if (normalize && maxcoef > 1.0) {
288  for (i = 0; i < out_channels; i++)
289  for (j = 0; j < in_channels; j++)
290  matrix_out[i * stride + j] /= maxcoef;
291  }
292 
293  return 0;
294 }
#define BACK_LEFT
#define AV_CH_TOP_FRONT_RIGHT
#define AV_CH_LAYOUT_SURROUND
#define FRONT_RIGHT_OF_CENTER
#define AV_CH_TOP_FRONT_LEFT
#define M_SQRT1_2
Definition: mathematics.h:58
#define AV_CH_SURROUND_DIRECT_RIGHT
#define AV_CH_LAYOUT_STEREO
#define SQRT3_2
int av_get_channel_layout_nb_channels(uint64_t channel_layout)
Return the number of channels in the channel layout.
#define AV_CH_WIDE_LEFT
#define AV_CH_TOP_BACK_LEFT
static int sane_layout(uint64_t layout)
#define LOW_FREQUENCY
#define AV_CH_WIDE_RIGHT
#define AV_CH_LOW_FREQUENCY
#define AV_CH_BACK_LEFT
#define BACK_CENTER
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define FFMAX(a, b)
Definition: common.h:94
#define AV_CH_STEREO_RIGHT
See AV_CH_STEREO_LEFT.
#define AV_CH_LAYOUT_STEREO_DOWNMIX
external API header
#define FRONT_CENTER
#define AV_CH_FRONT_LEFT_OF_CENTER
#define AV_CH_FRONT_CENTER
#define AV_CH_FRONT_RIGHT_OF_CENTER
#define AVERROR_PATCHWELCOME
Not yet implemented in FFmpeg, patches welcome.
Definition: error.h:62
#define FRONT_LEFT
#define SIDE_RIGHT
int avresample_build_matrix(uint64_t in_layout, uint64_t out_layout, double center_mix_level, double surround_mix_level, double lfe_mix_level, int normalize, double *matrix_out, int stride, enum AVMatrixEncoding matrix_encoding)
#define AV_CH_FRONT_LEFT
#define AVRESAMPLE_MAX_CHANNELS
Definition: avresample.h:104
static av_always_inline int even(uint64_t layout)
Replacements for frequently missing libm functions.
#define AV_CH_TOP_BACK_RIGHT
#define FRONT_RIGHT
#define FRONT_LEFT_OF_CENTER
#define BACK_RIGHT
#define AV_CH_BACK_CENTER
#define AV_CH_SIDE_RIGHT
#define M_SQRT2
Definition: mathematics.h:61
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:104
common internal and external API header
#define AV_CH_SURROUND_DIRECT_LEFT
#define AV_CH_FRONT_RIGHT
#define av_always_inline
Definition: attributes.h:39
AVMatrixEncoding
#define SIDE_LEFT
#define AV_CH_SIDE_LEFT
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel layout
#define AV_CH_BACK_RIGHT
#define AV_CH_STEREO_LEFT
Stereo downmix.