[FFmpeg-devel] [PATCH 03/14] libavcodec/libavutil: Implementation of AAC_fixed_decoder (LC-module) [3/4]
Nedeljko Babic
Nedeljko.Babic at imgtec.com
Wed May 20 09:39:43 CEST 2015
I will resend a patch with changes according to all the remarks.
Thanks,
- Nedeljko
________________________________________
Od: ffmpeg-devel-bounces at ffmpeg.org [ffmpeg-devel-bounces at ffmpeg.org] u ime korisnika Michael Niedermayer [michaelni at gmx.at]
Poslato: 16. maj 2015 2:12
Za: FFmpeg development discussions and patches
Tema: Re: [FFmpeg-devel] [PATCH 03/14] libavcodec/libavutil: Implementation of AAC_fixed_decoder (LC-module) [3/4]
On Thu, May 14, 2015 at 03:33:55PM +0200, Nedeljko Babic wrote:
> From: Djordje Pesut <djordje.pesut at imgtec.com>
>
> Add fixed point implementation
>
> Signed-off-by: Nedeljko Babic <nedeljko.babic at imgtec.com>
> ---
> libavcodec/aac.h | 101 ++++++++--
> libavcodec/aacdec.c | 5 +
> libavcodec/aacdec_fixed.c | 446 +++++++++++++++++++++++++++++++++++++++++++
> libavcodec/aacdec_template.c | 433 ++++++++++++++++++++++++++++-------------
> libavcodec/lpc.h | 15 +-
> libavcodec/mdct_template.c | 5 +
> libavutil/fixed_dsp.c | 70 ++++++-
> libavutil/fixed_dsp.h | 53 +++++
the changes to libavutil should be split into a seperate patch
> 8 files changed, 965 insertions(+), 163 deletions(-)
> create mode 100644 libavcodec/aacdec_fixed.c
>
> diff --git a/libavcodec/aac.h b/libavcodec/aac.h
> index 23ec085..571f8b9 100644
> --- a/libavcodec/aac.h
> +++ b/libavcodec/aac.h
> @@ -30,9 +30,62 @@
> #ifndef AVCODEC_AAC_H
> #define AVCODEC_AAC_H
>
> +#ifndef USE_FIXED
> +#define USE_FIXED 0
> +#endif
> +
> +#if USE_FIXED
> +
> +#define FFT_FLOAT 0
> +#define FFT_FIXED_32 1
> +
> +#define AAC_RENAME(x) x ## _fixed
> +#define AAC_RENAME_32(x) x ## _fixed_32
> +#define INTFLOAT int
> +#define SHORTFLOAT int16_t
> +#define AAC_FLOAT aac_float_t
i think *_t is reserved in POSIX to some extend
[...]
> +#include <assert.h>
we use av_assert*()
> +#include <errno.h>
is that really used ?
[...]
> +/**
> + * Apply dependent channel coupling (applied before IMDCT).
> + *
> + * @param index index into coupling gain array
> + */
> +static void apply_dependent_coupling_fixed(AACContext *ac,
> + SingleChannelElement *target,
> + ChannelElement *cce, int index)
> +{
> + IndividualChannelStream *ics = &cce->ch[0].ics;
> + const uint16_t *offsets = ics->swb_offset;
> + int *dest = target->coeffs;
> + const int *src = cce->ch[0].coeffs;
> + int g, i, group, k, idx = 0;
> + if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
> + av_log(ac->avctx, AV_LOG_ERROR,
> + "Dependent coupling is not supported together with LTP\n");
> + return;
> + }
> + for (g = 0; g < ics->num_window_groups; g++) {
> + for (i = 0; i < ics->max_sfb; i++, idx++) {
> + if (cce->ch[0].band_type[idx] != ZERO_BT) {
> + const int gain = cce->coup.gain[index][idx];
> + int shift, round, c, tmp;
> +
> + if (gain < 0) {
> + c = -cce_scale_fixed[-gain & 7];
> + shift = (-gain-1024) >> 3;
> + }
> + else {
> + c = cce_scale_fixed[gain & 7];
> + shift = (gain-1024) >> 3;
> + }
> +
> + if (shift < 0) {
> + shift = -shift;
> + round = 1 << (shift - 1);
> +
> + for (group = 0; group < ics->group_len[g]; group++) {
> + for (k = offsets[i]; k < offsets[i + 1]; k++) {
> + tmp = (int)(((int64_t)src[group * 128 + k] * c + \
> + (int64_t)0x1000000000) >> 37);
> + dest[group * 128 + k] += (tmp + round) >> shift;
> + }
> + }
> + }
there is something wrong with the indention level
[...]
> memcpy(sce->ltp_state, sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
> @@ -2341,22 +2472,27 @@ static void update_ltp(AACContext *ac, SingleChannelElement *sce)
> static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
> {
> IndividualChannelStream *ics = &sce->ics;
> - float *in = sce->coeffs;
> - float *out = sce->ret;
> - float *saved = sce->saved;
> - const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
> - const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
> - const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
> - float *buf = ac->buf_mdct;
> - float *temp = ac->temp;
> + INTFLOAT *in = sce->coeffs;
> + INTFLOAT *out = sce->ret;
> + INTFLOAT *saved = sce->saved;
> + const INTFLOAT *swindow = ics->use_kb_window[0] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128);
> + const INTFLOAT *lwindow_prev = ics->use_kb_window[1] ? AAC_RENAME(ff_aac_kbd_long_1024) : AAC_RENAME(ff_sine_1024);
> + const INTFLOAT *swindow_prev = ics->use_kb_window[1] ? AAC_RENAME(ff_aac_kbd_short_128) : AAC_RENAME(ff_sine_128);
> + INTFLOAT *buf = ac->buf_mdct;
> + INTFLOAT *temp = ac->temp;
> int i;
>
> // imdct
> if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
> for (i = 0; i < 1024; i += 128)
> ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
> - } else
> + } else {
> ac->mdct.imdct_half(&ac->mdct, buf, in);
> +#if USE_FIXED
> + for (i=0; i<1024; i++)
> + buf[i] = (buf[i] + 4) >> 3;
> +#endif /* USE_FIXED */
> + }
>
> /* window overlapping
> * NOTE: To simplify the overlapping code, all 'meaningless' short to long
> @@ -2368,7 +2504,7 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
> (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
> ac->fdsp->vector_fmul_window( out, saved, buf, lwindow_prev, 512);
> } else {
> - memcpy( out, saved, 448 * sizeof(float));
> + memcpy( out, saved, 448 * sizeof(INTFLOAT));
>
> if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
> ac->fdsp->vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, 64);
> @@ -2376,65 +2512,71 @@ static void imdct_and_windowing(AACContext *ac, SingleChannelElement *sce)
> ac->fdsp->vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, 64);
> ac->fdsp->vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, 64);
> ac->fdsp->vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, 64);
> - memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
> + memcpy( out + 448 + 4*128, temp, 64 * sizeof(INTFLOAT));
> } else {
> ac->fdsp->vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, 64);
> - memcpy( out + 576, buf + 64, 448 * sizeof(float));
> + memcpy( out + 576, buf + 64, 448 * sizeof(INTFLOAT));
> }
> }
>
> // buffer update
> if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
> - memcpy( saved, temp + 64, 64 * sizeof(float));
> + memcpy( saved, temp + 64, 64 * sizeof(INTFLOAT));
> ac->fdsp->vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 64);
> ac->fdsp->vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
> ac->fdsp->vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
> - memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
> + memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(INTFLOAT));
> } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
> - memcpy( saved, buf + 512, 448 * sizeof(float));
> - memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
> + memcpy( saved, buf + 512, 448 * sizeof(INTFLOAT));
> + memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(INTFLOAT));
> } else { // LONG_STOP or ONLY_LONG
> - memcpy( saved, buf + 512, 512 * sizeof(float));
> + memcpy( saved, buf + 512, 512 * sizeof(INTFLOAT));
these should use sizeof(*saved) or something not sizeof(INTFLOAT)
so as to make it more robust
[...]
> diff --git a/libavutil/fixed_dsp.h b/libavutil/fixed_dsp.h
> index ff6f365..73859c0 100644
> --- a/libavutil/fixed_dsp.h
> +++ b/libavutil/fixed_dsp.h
> @@ -54,6 +54,8 @@
> #include "libavcodec/mathops.h"
>
> typedef struct AVFixedDSPContext {
> + /* assume len is a multiple of 16, and arrays are 32-byte aligned */
> +
> /**
> * Overlap/add with window function.
> * Used primarily by MDCT-based audio codecs.
> @@ -92,6 +94,57 @@ typedef struct AVFixedDSPContext {
> */
> void (*vector_fmul_window)(int32_t *dst, const int32_t *src0, const int32_t *src1, const int32_t *win, int len);
>
> + /**
> + * Fixed-point multiplication that calculates the product of two vectors of
> + * integers and stores the result in a vector of integers.
> + *
> + * @param dst output vector
> + * constraints: 32-byte aligned
> + * @param src0 first input vector
> + * constraints: 32-byte aligned
> + * @param src1 second input vector
> + * constraints: 32-byte aligned
> + * @param len number of elements in the input
> + * constraints: multiple of 16
> + */
> + void (*vector_fmul)(int *dst, const int *src0, const int *src1,
> + int len);
> +
> + void (*vector_fmul_reverse)(int *dst, const int *src0, const int *src1, int len);
> + /**
> + * Calculate the product of two vectors of integers, add a third vector of
> + * integers and store the result in a vector of integers.
> + *
> + * @param dst output vector
> + * constraints: 32-byte aligned
> + * @param src0 first input vector
> + * constraints: 32-byte aligned
> + * @param src1 second input vector
> + * constraints: 32-byte aligned
> + * @param src1 third input vector
> + * constraints: 32-byte aligned
> + * @param len number of elements in the input
> + * constraints: multiple of 16
> + */
> + void (*vector_fmul_add)(int *dst, const int *src0, const int *src1,
> + const int *src2, int len);
> +
> + /**
> + * Calculate the scalar product of two vectors of integers.
> + * @param v1 first vector, 16-byte aligned
> + * @param v2 second vector, 16-byte aligned
> + * @param len length of vectors, multiple of 4
> + */
> + int (*scalarproduct_fixed)(const int *v1, const int *v2, int len);
> +
> + /**
> + * Calculate the sum and difference of two vectors of integers.
> + *
> + * @param v1 first input vector, sum output, 16-byte aligned
> + * @param v2 second input vector, difference output, 16-byte aligned
> + * @param len length of vectors, multiple of 4
> + */
> + void (*butterflies_fixed)(int *av_restrict v1, int *av_restrict v2, int len);
these ignore the 1.0 point / amount of shifting done
the documentation should be sufficient so that someone could write
a working implementation based on just the documentation
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
Concerning the gods, I have no means of knowing whether they exist or not
or of what sort they may be, because of the obscurity of the subject, and
the brevity of human life -- Protagoras
More information about the ffmpeg-devel
mailing list