[FFmpeg-devel] [PATCH 2/2] avcodec/vorbisenc: Implement transient detection in Vorbis encoder

Tue Mar 28 00:45:59 EEST 2017

On 27 March 2017 at 03:58, Tyler Jones <tdjones879 at gmail.com> wrote:

> The existing AAC psychoacoustic system is used to detect transients within
> the
> vorbis encoder. This is useful, in general, as an initial step in later
> utilizing
> a complex psychoacoustic model for the vorbis encoder, but more
> specifically
> allows the cacellation of pre-echo effects that frequently occur with this
> codec.
>
> Signed-off-by: Tyler Jones <tdjones879 at gmail.com>
> ---
>  libavcodec/psymodel.c  |  1 +
>  libavcodec/vorbisenc.c | 60 ++++++++++++++++++++++++++++++
> ++++++++++++++++++++
>  2 files changed, 61 insertions(+)
>
> diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c
> index 2b5f111..38831ce 100644
> --- a/libavcodec/psymodel.c
> +++ b/libavcodec/psymodel.c
> @@ -62,6 +62,7 @@ av_cold int ff_psy_init(FFPsyContext *ctx,
> AVCodecContext *avctx, int num_lens,
>
>      switch (ctx->avctx->codec_id) {
>      case AV_CODEC_ID_AAC:
> +    case AV_CODEC_ID_VORBIS:
>          ctx->model = &ff_aac_psy_model;
>          break;
>      }
> diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c
> index 2974ca2..e4ec822 100644
> --- a/libavcodec/vorbisenc.c
> +++ b/libavcodec/vorbisenc.c
> @@ -33,6 +33,8 @@
>  #include "vorbis.h"
>  #include "vorbis_enc_data.h"
>
> +#include "psymodel.h"
> +
>  #define BITSTREAM_WRITER_LE
>  #include "put_bits.h"
>
> @@ -126,6 +128,9 @@ typedef struct vorbis_enc_context {
>      vorbis_enc_mode *modes;
>
>      int64_t next_pts;
> +
> +    FFPsyContext psy;
> +    struct FFPsyPreprocessContext* psypp;
>  } vorbis_enc_context;
>
>  #define MAX_CHANNELS     2
> @@ -1024,10 +1029,38 @@ static int vorbis_encode_frame(AVCodecContext
> *avctx, AVPacket *avpkt,
>      vorbis_enc_context *venc = avctx->priv_data;
>      float **audio = frame ? (float **)frame->extended_data : NULL;
>      int samples = frame ? frame->nb_samples : 0;
> +    float *samples2, *la, *overlap;
>      vorbis_enc_mode *mode;
>      vorbis_enc_mapping *mapping;
>      PutBitContext pb;
>      int i, ret;
> +    int start_ch, ch, chans, cur_channel;
> +    FFPsyWindowInfo windows[MAX_CHANNELS];
> +    enum WindowSequence window_sequence[MAX_CHANNELS];
> +
> +    if (!avctx->frame_number)
> +        return 0;
> +
> +    if (venc->psypp)
> +        ff_psy_preprocess(venc->psypp, audio, venc->channels);
> +
> +    if (frame) {
> +        start_ch = 0;
> +        cur_channel = 0;
> +        for (i = 0; i < venc->channels - 1; i++) {
> +            FFPsyWindowInfo* wi = windows + start_ch;
> +            chans = 2;
> +            for (ch = 0; ch < 2; ch++) {
> +                cur_channel = start_ch + ch;
> +                overlap = &audio[cur_channel][0];
> +                samples2 = overlap + 1024;
> +                la = samples2 + (448+64);
> +                wi[ch] = venc->psy.model->window(&venc->psy, samples2,
> la,
> +                                                 cur_channel,
> window_sequence[0]);
>

window_sequence[0] must point to the previous frame's type, not the
current. You'll need
to add enum WindowSequence window_sequence[MAX_CHANNELS]; to the main
encoder
context.

After that, check the wi[i] for EIGHT_SHORT, and if it is EIGHT_SHORT,
you'll need to modify
the encoder to do 8 small transforms and signal that so that the decoder
knows what to do.

> +            }
> +            start_ch += chans;
> +        }
> +    }
>
>      if (!apply_window_and_mdct(venc, audio, samples))
>          return 0;
> @@ -1158,7 +1191,10 @@ static av_cold int vorbis_encode_close(AVCodecContext
> *avctx)
>
>      ff_mdct_end(&venc->mdct[0]);
>      ff_mdct_end(&venc->mdct[1]);
> +    ff_psy_end(&venc->psy);
>
> +    if (venc->psypp)
> +        ff_psy_preprocess_end(venc->psypp);
>      av_freep(&avctx->extradata);
>
>      return 0 ;
> @@ -1168,6 +1204,10 @@ static av_cold int vorbis_encode_init(AVCodecContext
> *avctx)
>  {
>      vorbis_enc_context *venc = avctx->priv_data;
>      int ret;
> +    const uint8_t *sizes[MAX_CHANNELS];
> +    uint8_t grouping[MAX_CHANNELS];
> +    int lengths[MAX_CHANNELS];
> +    int samplerate_index;
>
>      if (avctx->channels != 2) {
>          av_log(avctx, AV_LOG_ERROR, "Current FFmpeg Vorbis encoder only
> supports 2 channels.\n");
> @@ -1190,6 +1230,26 @@ static av_cold int vorbis_encode_init(AVCodecContext
> *avctx)
>
>      avctx->frame_size = 1 << (venc->log2_blocksize[0] - 1);
>
> +    for (samplerate_index = 0; samplerate_index < 16; samplerate_index++)
> +        if (avctx->sample_rate == mpeg4audio_sample_rates[
> samplerate_index])
> +            break;
> +    if (samplerate_index == 16 ||
> +        samplerate_index >= ff_vorbis_swb_size_1024_len ||
> +        samplerate_index >= ff_vorbis_swb_size_128_len)
> +        av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n",
> avctx->sample_rate);
> +
> +    sizes[0]   = ff_vorbis_swb_size_1024[samplerate_index];
> +    sizes[1]   = ff_vorbis_swb_size_128[samplerate_index];
> +    lengths[0] = ff_vorbis_num_swb_1024[samplerate_index];
> +    lengths[1] = ff_vorbis_num_swb_128[samplerate_index];
> +    grouping[0] = 1;
> +
> +    if ((ret = ff_psy_init(&venc->psy, avctx, 2,
> +                           sizes, lengths,
> +                           1, grouping)) < 0)
> +        goto error;
> +    venc->psypp = ff_psy_preprocess_init(avctx);
> +
>      return 0;
>  error:
>      vorbis_encode_close(avctx);
> --
> 2.7.4
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>