[FFmpeg-devel] [PATCH] HE-AAC v1 decoder

Alex Converse alex.converse
Wed Jan 27 18:14:16 CET 2010


2010/1/27 M?ns Rullg?rd <mans at mansr.com>:
> Alex Converse <alex.converse at gmail.com> writes:
>> More seriously, attached is a patch that implements SBR and integrates
>> it with AAC to make an HE-AAC v1 decoder. A filterbank compatible with
>> ff_float_to_int16_interleave_c is missing but otherwise the code is
>> complete.
>
> Does this mean it won't work on systems that don't have an ASM
> version?
>

At the moment yes. It looks like I will need to rewrite the filterbank
anyway but rest of the code is ready for review.

>> Note that the decoder spends 90+% of its time in the filterbank and
>> will be extremely slow on any system that does not have a asm
>> version of scalarproduct_float().
>
> Good thing it's an easy function to write asm for. ;-)
>
>> +static int array_min_int16(int16_t *array, int nel)
>> +{
>> + ? ?int i, min = array[0];
>> + ? ?for (i = 1; i < nel; i++)
>> + ? ? ? ?if (array[i] < min)
>> + ? ? ? ? ? ?min = array[i];
>> + ? ?return min;
>> +}
>
> How long are these arrays typically? ?Is this called a lot? ?There's a
> SIMD possibility here.
>

They are fairly short (less than 64 elements). I don't want to go the
SIMD route unless some sort of profiling is showing that we are
spending a measurable amount of time there but further C optimization
is always welcome.


>> +
>> +static void sbr_grid_copy(SBRData *dst, const SBRData *src) {
>> + ? ?//These variables are saved from the previous frame rather than copied
>> + ? ?dst->bs_freq_res[0] = dst->bs_freq_res[dst->bs_num_env[1]];
>> + ? ?dst->bs_num_env[0] ?= dst->bs_num_env[1];
>> +
>> + ? ?//These variables are read from the bitstream and therefore copied
>
> Maybe there's a good reason for that copying, but the comment sure
> doesn't make it any clearer.
>
>> + ? ?memcpy(dst->bs_freq_res+1, src->bs_freq_res+1, sizeof(dst->bs_freq_res)-1);
>> + ? ?memcpy(dst->bs_num_env+1, ?src->bs_num_env+1, ?sizeof(dst->bs_num_env)-1);
>> + ? ?memcpy(dst->bs_var_bord, ? src->bs_var_bord, ? sizeof(dst->bs_var_bord));
>> + ? ?memcpy(dst->bs_rel_bord, ? src->bs_rel_bord, ? sizeof(dst->bs_rel_bord));
>> + ? ?memcpy(dst->bs_num_rel, ? ?src->bs_num_rel, ? ?sizeof(dst->bs_rel_bord));
>> + ? ?dst->bs_amp_res ? = src->bs_amp_res;
>> + ? ?dst->bs_num_noise = src->bs_num_noise;
>> + ? ?dst->bs_pointer ? = src->bs_pointer;
>> + ? ?dst->bs_frame_class = src->bs_frame_class;
>> +}
>> +

What I'm trying to say is that when bitstream coupling is used,
variables read from the bitstream this frame are copied from the left
channel. Values that are saved from the previous frame are saved based
on the old right channel values.

>> +/// SBR Envelope and Noise Floor Decoding (14496-3 sp04 p201)
>> +static void sbr_env_noise_floors(SpectralBandReplication *sbr, SBRData *ch_data,
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? int ch)
>> +{
>> + ? ?int delta = (ch == 1 && sbr->bs_coupling == 1) ? 2 : 1;
>> + ? ?int i, k, l;
>> + ? ?const int temp = sbr->n[1] & 1;
>> + ? ?for (l = 0; l < ch_data->bs_num_env[1]; l++) {
>> + ? ? ? ?if (ch_data->bs_df_env[l]) {
>> + ? ? ? ? ? ?// bs_freq_res[0] == bs_freq_res[bs_num_env[1]] from prev frame
>> + ? ? ? ? ? ?if (ch_data->bs_freq_res[l + 1] == ch_data->bs_freq_res[l]) {
>> + ? ? ? ? ? ? ? ?for (k = 0; k < sbr->n[ch_data->bs_freq_res[l + 1]]; k++)
>> + ? ? ? ? ? ? ? ? ? ?ch_data->env_facs[l + 1][k] = ch_data->env_facs[l][k] + delta * ch_data->bs_data_env[l][k];

Top neighbor + new value

>> + ? ? ? ? ? ?} else if (ch_data->bs_freq_res[l + 1]) {
>> + ? ? ? ? ? ? ? ?for (k = 0; k < sbr->n[ch_data->bs_freq_res[l + 1]]; k++) {
>> + ? ? ? ? ? ? ? ? ? ?i = (k + temp) >> 1; // find i such that f_tablelow[i] <= f_tablehigh[k] < f_tablelow[i + 1]
>> + ? ? ? ? ? ? ? ? ? ?ch_data->env_facs[l + 1][k] = ch_data->env_facs[l][i] + delta * ch_data->bs_data_env[l][k];

Previous row of (index (k+even_odd)/2) + new value

>> + ? ? ? ? ? ? ? ?}
>> + ? ? ? ? ? ?} else {
>> + ? ? ? ? ? ? ? ?for (k = 0; k < sbr->n[ch_data->bs_freq_res[l + 1]]; k++) {
>> + ? ? ? ? ? ? ? ? ? ?i = k ? 2*k - temp : 0; // find i such that f_tablehigh[i] == f_tablelow[k]
>> + ? ? ? ? ? ? ? ? ? ?ch_data->env_facs[l + 1][k] = ch_data->env_facs[l][i] + delta * ch_data->bs_data_env[l][k];

Previous row of (index 2*k-even_odd, clipping to zero if k is zero) + new value

>> + ? ? ? ? ? ? ? ?}
>> + ? ? ? ? ? ?}
>> + ? ? ? ?} else {
>> + ? ? ? ? ? ?ch_data->env_facs[l + 1][0] = delta * ch_data->bs_data_env[l][0];
>> + ? ? ? ? ? ?for (k = 1; k < sbr->n[ch_data->bs_freq_res[l + 1]]; k++)
>> + ? ? ? ? ? ? ? ?ch_data->env_facs[l + 1][k] = ch_data->env_facs[l + 1][k - 1] + delta * ch_data->bs_data_env[l][k];

Left neighbor + new value

>> + ? ? ? ?}
>> + ? ?}
>> +
>> + ? ?for (l = 0; l < ch_data->bs_num_noise; l++) {
>> + ? ? ? ?if (ch_data->bs_df_noise[l])
>> + ? ? ? ? ? ?for (k = 0; k < sbr->n_q; k++)
>> + ? ? ? ? ? ? ? ?ch_data->noise_facs[l + 1][k] = ch_data->noise_facs[l][k] + delta * ch_data->bs_data_noise[l][k];
>> + ? ? ? ?else {
>> + ? ? ? ? ? ?ch_data->noise_facs[l + 1][0] = delta * ch_data->bs_data_noise[l][0];
>> + ? ? ? ? ? ?for (k = 1; k < sbr->n_q; k++)
>> + ? ? ? ? ? ? ? ?ch_data->noise_facs[l + 1][k] = ch_data->noise_facs[l + 1][k - 1] + delta * ch_data->bs_data_noise[l][k];
>> + ? ? ? ?}
>> + ? ?}
>> +
>> + ? ?//assign 0th elements of (env|noise)_facs from last elements
>> + ? ?memcpy( ?ch_data->env_facs[0], ? ch_data->env_facs[ch_data->bs_num_env[1]],
>> + ? ? ? ? ? sizeof( ?ch_data->env_facs[0]));
>> + ? ?memcpy(ch_data->noise_facs[0], ch_data->noise_facs[ch_data->bs_num_noise ],
>> + ? ? ? ? ? sizeof(ch_data->noise_facs[0]));
>> +}
>
> This function is impossible to read.

It seems pretty straight forward to me.

>
>> +/** High Frequency Generation (14496-3 sp04 p214+) and Inverse Filtering
>> + * (14496-3 sp04 p214)
>> + */
>> +static void sbr_hf_inverse_filter(float (*alpha0)[2], float (*alpha1)[2],
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?float X_low[32][40][2], int k0)
>> +{
>> + ? ?int i, j, k, n;
>> + ? ?for (k = 0; k < k0; k++) {
>> + ? ? ? ?float phi[3][2][2], dk;
>> +
>> + ? ? ? ?for (i = 0; i < 3; i++) {
>> + ? ? ? ? ? ?for (j = 0; j < 2; j++) {
>> + ? ? ? ? ? ? ? ?unsigned int idxtmp1 = ENVELOPE_ADJUSTMENT_OFFSET - i;
>> + ? ? ? ? ? ? ? ?unsigned int idxtmp2 = ENVELOPE_ADJUSTMENT_OFFSET - (j + 1);
>> +
>> + ? ? ? ? ? ? ? ?phi[i][j][0] = 0.0f;
>> + ? ? ? ? ? ? ? ?phi[i][j][1] = 0.0f;
>> +
>> + ? ? ? ? ? ? ? ?if (i <= j + 1)
>> + ? ? ? ? ? ? ? ?for (n = 0; n < 16 * 2 + 6; n++) {
>> + ? ? ? ? ? ? ? ? ? ?unsigned int idx1 = n + idxtmp1;
>> + ? ? ? ? ? ? ? ? ? ?unsigned int idx2 = n + idxtmp2;
>> + ? ? ? ? ? ? ? ? ? ?phi[i][j][0] += X_low[k][idx1][0] * X_low[k][idx2][0] +
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?X_low[k][idx1][1] * X_low[k][idx2][1];
>> + ? ? ? ? ? ? ? ? ? ?if (i != j + 1) { // imaginary part
>> + ? ? ? ? ? ? ? ? ? ? ? ?phi[i][j][1] += X_low[k][idx1][1] * X_low[k][idx2][0] -
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?X_low[k][idx1][0] * X_low[k][idx2][1];
>> + ? ? ? ? ? ? ? ? ? ?}
>> + ? ? ? ? ? ? ? ?}
>> + ? ? ? ? ? ?}
>> + ? ? ? ?}
>
> This seems to be asking for SIMD.
>
>> + ? ? ? ?dk = phi[2][1][0] * phi[1][0][0] -
>> + ? ? ? ? ? ? ? (phi[1][1][0] * phi[1][1][0] + phi[1][1][1] * phi[1][1][1]) / 1.000001f;
>> +
>> + ? ? ? ?if (!dk) {
>> + ? ? ? ? ? ?alpha1[k][0] = 0;
>> + ? ? ? ? ? ?alpha1[k][1] = 0;
>> + ? ? ? ?} else {
>> + ? ? ? ? ? ?float temp_real, temp_im;
>> + ? ? ? ? ? ?temp_real = phi[0][0][0] * phi[1][1][0] -
>> + ? ? ? ? ? ? ? ? ? ? ? ?phi[0][0][1] * phi[1][1][1] -
>> + ? ? ? ? ? ? ? ? ? ? ? ?phi[0][1][0] * phi[1][0][0];
>> + ? ? ? ? ? ?temp_im ? = phi[0][0][0] * phi[1][1][1] +
>> + ? ? ? ? ? ? ? ? ? ? ? ?phi[0][0][1] * phi[1][1][0] -
>> + ? ? ? ? ? ? ? ? ? ? ? ?phi[0][1][1] * phi[1][0][0];
>> +
>> + ? ? ? ? ? ?alpha1[k][0] = temp_real / dk;
>> + ? ? ? ? ? ?alpha1[k][1] = temp_im ? / dk;
>> + ? ? ? ?}
>> +
>> + ? ? ? ?if (!phi[1][0][0]) {
>> + ? ? ? ? ? ?alpha0[k][0] = 0;
>> + ? ? ? ? ? ?alpha0[k][1] = 0;
>> + ? ? ? ?} else {
>> + ? ? ? ? ? ?float temp_real, temp_im;
>> + ? ? ? ? ? ?temp_real = phi[0][0][0] + alpha1[k][0] * phi[1][1][0] +
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? alpha1[k][1] * phi[1][1][1];
>> + ? ? ? ? ? ?temp_im ? = phi[0][0][1] + alpha1[k][1] * phi[1][1][0] -
>> + ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? alpha1[k][0] * phi[1][1][1];
>> +
>> + ? ? ? ? ? ?alpha0[k][0] = -temp_real / phi[1][0][0];
>> + ? ? ? ? ? ?alpha0[k][1] = -temp_im ? / phi[1][0][0];
>> + ? ? ? ?}
>> +
>> + ? ? ? ?if (alpha1[k][0] * alpha1[k][0] + alpha1[k][1] * alpha1[k][1] >= 16.0f ||
>> + ? ? ? ? ? alpha0[k][0] * alpha0[k][0] + alpha0[k][1] * alpha0[k][1] >= 16.0f) {
>> + ? ? ? ? ? ?alpha1[k][0] = 0;
>> + ? ? ? ? ? ?alpha1[k][1] = 0;
>> + ? ? ? ? ? ?alpha0[k][0] = 0;
>> + ? ? ? ? ? ?alpha0[k][1] = 0;
>> + ? ? ? ?}
>
> These calculations look a little unstable.

If phi[1][0][0] is very small then when alpha blows up it will trigger
the condition at the end that zeros everything out.

>
>> +static const DECLARE_ALIGNED_16(float, sbr_qmf_window_us)[640] = {
>> + ? ? 0.0000000000, -0.0005525286, -0.0005617692, -0.0004947518,
>> + ? ?-0.0004875227, -0.0004893791, -0.0005040714, -0.0005226564,
>> + ? ?-0.0005466565, -0.0005677802, -0.0005870930, -0.0006132747,
>> + ? ?-0.0006312493, -0.0006540333, -0.0006777690, -0.0006941614,
>> + ? ?-0.0007157736, -0.0007255043, -0.0007440941, -0.0007490598,
>> + ? ?-0.0007681371, -0.0007724848, -0.0007834332, -0.0007779869,
>> + ? ?-0.0007803664, -0.0007801449, -0.0007757977, -0.0007630793,
>> + ? ?-0.0007530001, -0.0007319357, -0.0007215391, -0.0006917937,
>> + ? ?-0.0006650415, -0.0006341594, -0.0005946118, -0.0005564576,
>> + ? ?-0.0005145572, -0.0004606325, -0.0004095121, -0.0003501175,
>> + ? ?-0.0002896981, -0.0002098337, -0.0001446380, -0.0000617334,
>> + ? ? 0.0000134949, ?0.0001094383, ?0.0002043017, ?0.0002949531,
>> + ? ? 0.0004026540, ?0.0005107388, ?0.0006239376, ?0.0007458025,
>> + ? ? 0.0008608443, ?0.0009885988, ?0.0011250155, ?0.0012577884,
>
> Where do these numbers come from? ?The way they're written here some
> have 10 significant digits, others only 6. ?Is that intentional?
>

They are copied verbatim from the spec. So yes?



More information about the ffmpeg-devel mailing list