[FFmpeg-devel] [PATCH 2/2] avcodec/vorbisenc: Implement transient detection in Vorbis encoder
Tyler Jones
tdjones879 at gmail.com
Mon Mar 27 05:58:33 EEST 2017
The existing AAC psychoacoustic system is used to detect transients within the
vorbis encoder. This is useful, in general, as an initial step in later utilizing
a complex psychoacoustic model for the vorbis encoder, but more specifically
allows the cacellation of pre-echo effects that frequently occur with this
codec.
Signed-off-by: Tyler Jones <tdjones879 at gmail.com>
---
libavcodec/psymodel.c | 1 +
libavcodec/vorbisenc.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 61 insertions(+)
diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c
index 2b5f111..38831ce 100644
--- a/libavcodec/psymodel.c
+++ b/libavcodec/psymodel.c
@@ -62,6 +62,7 @@ av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens,
switch (ctx->avctx->codec_id) {
case AV_CODEC_ID_AAC:
+ case AV_CODEC_ID_VORBIS:
ctx->model = &ff_aac_psy_model;
break;
}
diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c
index 2974ca2..e4ec822 100644
--- a/libavcodec/vorbisenc.c
+++ b/libavcodec/vorbisenc.c
@@ -33,6 +33,8 @@
#include "vorbis.h"
#include "vorbis_enc_data.h"
+#include "psymodel.h"
+
#define BITSTREAM_WRITER_LE
#include "put_bits.h"
@@ -126,6 +128,9 @@ typedef struct vorbis_enc_context {
vorbis_enc_mode *modes;
int64_t next_pts;
+
+ FFPsyContext psy;
+ struct FFPsyPreprocessContext* psypp;
} vorbis_enc_context;
#define MAX_CHANNELS 2
@@ -1024,10 +1029,38 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
vorbis_enc_context *venc = avctx->priv_data;
float **audio = frame ? (float **)frame->extended_data : NULL;
int samples = frame ? frame->nb_samples : 0;
+ float *samples2, *la, *overlap;
vorbis_enc_mode *mode;
vorbis_enc_mapping *mapping;
PutBitContext pb;
int i, ret;
+ int start_ch, ch, chans, cur_channel;
+ FFPsyWindowInfo windows[MAX_CHANNELS];
+ enum WindowSequence window_sequence[MAX_CHANNELS];
+
+ if (!avctx->frame_number)
+ return 0;
+
+ if (venc->psypp)
+ ff_psy_preprocess(venc->psypp, audio, venc->channels);
+
+ if (frame) {
+ start_ch = 0;
+ cur_channel = 0;
+ for (i = 0; i < venc->channels - 1; i++) {
+ FFPsyWindowInfo* wi = windows + start_ch;
+ chans = 2;
+ for (ch = 0; ch < 2; ch++) {
+ cur_channel = start_ch + ch;
+ overlap = &audio[cur_channel][0];
+ samples2 = overlap + 1024;
+ la = samples2 + (448+64);
+ wi[ch] = venc->psy.model->window(&venc->psy, samples2, la,
+ cur_channel, window_sequence[0]);
+ }
+ start_ch += chans;
+ }
+ }
if (!apply_window_and_mdct(venc, audio, samples))
return 0;
@@ -1158,7 +1191,10 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx)
ff_mdct_end(&venc->mdct[0]);
ff_mdct_end(&venc->mdct[1]);
+ ff_psy_end(&venc->psy);
+ if (venc->psypp)
+ ff_psy_preprocess_end(venc->psypp);
av_freep(&avctx->extradata);
return 0 ;
@@ -1168,6 +1204,10 @@ static av_cold int vorbis_encode_init(AVCodecContext *avctx)
{
vorbis_enc_context *venc = avctx->priv_data;
int ret;
+ const uint8_t *sizes[MAX_CHANNELS];
+ uint8_t grouping[MAX_CHANNELS];
+ int lengths[MAX_CHANNELS];
+ int samplerate_index;
if (avctx->channels != 2) {
av_log(avctx, AV_LOG_ERROR, "Current FFmpeg Vorbis encoder only supports 2 channels.\n");
@@ -1190,6 +1230,26 @@ static av_cold int vorbis_encode_init(AVCodecContext *avctx)
avctx->frame_size = 1 << (venc->log2_blocksize[0] - 1);
+ for (samplerate_index = 0; samplerate_index < 16; samplerate_index++)
+ if (avctx->sample_rate == mpeg4audio_sample_rates[samplerate_index])
+ break;
+ if (samplerate_index == 16 ||
+ samplerate_index >= ff_vorbis_swb_size_1024_len ||
+ samplerate_index >= ff_vorbis_swb_size_128_len)
+ av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate);
+
+ sizes[0] = ff_vorbis_swb_size_1024[samplerate_index];
+ sizes[1] = ff_vorbis_swb_size_128[samplerate_index];
+ lengths[0] = ff_vorbis_num_swb_1024[samplerate_index];
+ lengths[1] = ff_vorbis_num_swb_128[samplerate_index];
+ grouping[0] = 1;
+
+ if ((ret = ff_psy_init(&venc->psy, avctx, 2,
+ sizes, lengths,
+ 1, grouping)) < 0)
+ goto error;
+ venc->psypp = ff_psy_preprocess_init(avctx);
+
return 0;
error:
vorbis_encode_close(avctx);
--
2.7.4
More information about the ffmpeg-devel
mailing list