[FFmpeg-devel] [PATCH 2/2] avcodec/vorbisenc: Implement transient detection in Vorbis encoder
Rostislav Pehlivanov
atomnuker at gmail.com
Tue Mar 28 00:45:59 EEST 2017
On 27 March 2017 at 03:58, Tyler Jones <tdjones879 at gmail.com> wrote:
> The existing AAC psychoacoustic system is used to detect transients within
> the
> vorbis encoder. This is useful, in general, as an initial step in later
> utilizing
> a complex psychoacoustic model for the vorbis encoder, but more
> specifically
> allows the cacellation of pre-echo effects that frequently occur with this
> codec.
>
> Signed-off-by: Tyler Jones <tdjones879 at gmail.com>
> ---
> libavcodec/psymodel.c | 1 +
> libavcodec/vorbisenc.c | 60 ++++++++++++++++++++++++++++++
> ++++++++++++++++++++
> 2 files changed, 61 insertions(+)
>
> diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c
> index 2b5f111..38831ce 100644
> --- a/libavcodec/psymodel.c
> +++ b/libavcodec/psymodel.c
> @@ -62,6 +62,7 @@ av_cold int ff_psy_init(FFPsyContext *ctx,
> AVCodecContext *avctx, int num_lens,
>
> switch (ctx->avctx->codec_id) {
> case AV_CODEC_ID_AAC:
> + case AV_CODEC_ID_VORBIS:
> ctx->model = &ff_aac_psy_model;
> break;
> }
> diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c
> index 2974ca2..e4ec822 100644
> --- a/libavcodec/vorbisenc.c
> +++ b/libavcodec/vorbisenc.c
> @@ -33,6 +33,8 @@
> #include "vorbis.h"
> #include "vorbis_enc_data.h"
>
> +#include "psymodel.h"
> +
> #define BITSTREAM_WRITER_LE
> #include "put_bits.h"
>
> @@ -126,6 +128,9 @@ typedef struct vorbis_enc_context {
> vorbis_enc_mode *modes;
>
> int64_t next_pts;
> +
> + FFPsyContext psy;
> + struct FFPsyPreprocessContext* psypp;
> } vorbis_enc_context;
>
> #define MAX_CHANNELS 2
> @@ -1024,10 +1029,38 @@ static int vorbis_encode_frame(AVCodecContext
> *avctx, AVPacket *avpkt,
> vorbis_enc_context *venc = avctx->priv_data;
> float **audio = frame ? (float **)frame->extended_data : NULL;
> int samples = frame ? frame->nb_samples : 0;
> + float *samples2, *la, *overlap;
> vorbis_enc_mode *mode;
> vorbis_enc_mapping *mapping;
> PutBitContext pb;
> int i, ret;
> + int start_ch, ch, chans, cur_channel;
> + FFPsyWindowInfo windows[MAX_CHANNELS];
> + enum WindowSequence window_sequence[MAX_CHANNELS];
> +
> + if (!avctx->frame_number)
> + return 0;
> +
> + if (venc->psypp)
> + ff_psy_preprocess(venc->psypp, audio, venc->channels);
> +
> + if (frame) {
> + start_ch = 0;
> + cur_channel = 0;
> + for (i = 0; i < venc->channels - 1; i++) {
> + FFPsyWindowInfo* wi = windows + start_ch;
> + chans = 2;
> + for (ch = 0; ch < 2; ch++) {
> + cur_channel = start_ch + ch;
> + overlap = &audio[cur_channel][0];
> + samples2 = overlap + 1024;
> + la = samples2 + (448+64);
> + wi[ch] = venc->psy.model->window(&venc->psy, samples2,
> la,
> + cur_channel,
> window_sequence[0]);
>
window_sequence[0] must point to the previous frame's type, not the
current. You'll need
to add enum WindowSequence window_sequence[MAX_CHANNELS]; to the main
encoder
context.
After that, check the wi[i] for EIGHT_SHORT, and if it is EIGHT_SHORT,
you'll need to modify
the encoder to do 8 small transforms and signal that so that the decoder
knows what to do.
> + }
> + start_ch += chans;
> + }
> + }
>
> if (!apply_window_and_mdct(venc, audio, samples))
> return 0;
> @@ -1158,7 +1191,10 @@ static av_cold int vorbis_encode_close(AVCodecContext
> *avctx)
>
> ff_mdct_end(&venc->mdct[0]);
> ff_mdct_end(&venc->mdct[1]);
> + ff_psy_end(&venc->psy);
>
> + if (venc->psypp)
> + ff_psy_preprocess_end(venc->psypp);
> av_freep(&avctx->extradata);
>
> return 0 ;
> @@ -1168,6 +1204,10 @@ static av_cold int vorbis_encode_init(AVCodecContext
> *avctx)
> {
> vorbis_enc_context *venc = avctx->priv_data;
> int ret;
> + const uint8_t *sizes[MAX_CHANNELS];
> + uint8_t grouping[MAX_CHANNELS];
> + int lengths[MAX_CHANNELS];
> + int samplerate_index;
>
> if (avctx->channels != 2) {
> av_log(avctx, AV_LOG_ERROR, "Current FFmpeg Vorbis encoder only
> supports 2 channels.\n");
> @@ -1190,6 +1230,26 @@ static av_cold int vorbis_encode_init(AVCodecContext
> *avctx)
>
> avctx->frame_size = 1 << (venc->log2_blocksize[0] - 1);
>
> + for (samplerate_index = 0; samplerate_index < 16; samplerate_index++)
> + if (avctx->sample_rate == mpeg4audio_sample_rates[
> samplerate_index])
> + break;
> + if (samplerate_index == 16 ||
> + samplerate_index >= ff_vorbis_swb_size_1024_len ||
> + samplerate_index >= ff_vorbis_swb_size_128_len)
> + av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n",
> avctx->sample_rate);
> +
> + sizes[0] = ff_vorbis_swb_size_1024[samplerate_index];
> + sizes[1] = ff_vorbis_swb_size_128[samplerate_index];
> + lengths[0] = ff_vorbis_num_swb_1024[samplerate_index];
> + lengths[1] = ff_vorbis_num_swb_128[samplerate_index];
> + grouping[0] = 1;
> +
> + if ((ret = ff_psy_init(&venc->psy, avctx, 2,
> + sizes, lengths,
> + 1, grouping)) < 0)
> + goto error;
> + venc->psypp = ff_psy_preprocess_init(avctx);
> +
> return 0;
> error:
> vorbis_encode_close(avctx);
> --
> 2.7.4
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
More information about the ffmpeg-devel
mailing list