[FFmpeg-devel] [PATCH] libspeex Speex encoding

Michael Niedermayer michaelni
Tue Oct 27 15:33:38 CET 2009


On Sun, Oct 25, 2009 at 09:04:45AM -0400, Justin Ruggles wrote:
> Hi,
> 
> This patch combines parts of my previous libspeex encoding patch with
> parts of the one sent by Art Clarke.
> 
> The rate control is not as intuitive to use as I would like, but it
> works.  libspeex has the option to have the library choose a CBR bitrate
> based on a quality setting.  Providing that option doesn't really fit
> well into our current system since there is no way to tell if the user
> is specifying CBR quality or VBR quality.  So instead it just uses
> bitrate for CBR and quality for VBR like our other audio encoders.  The
> default bitrate of 64kbps is higher than the maximum Speex bitrate, so
> at least it will be good quality by default.

[...]
> +static av_cold int encode_init(AVCodecContext *avctx)
> +{
> +    LibSpeexEncContext *s = avctx->priv_data;
> +    const SpeexMode *mode;
> +    uint8_t *header_data;
> +    int header_size;
> +    int32_t complexity;
> +
> +    /* channels */
> +    if (avctx->channels < 1 || avctx->channels > 2) {
> +        av_log(avctx, AV_LOG_ERROR, "Invalid channels (%d). Only stereo and "
> +               "mono are supported\n", avctx->channels);
> +        return -1;
> +    }
> +
> +    /* sample rate and encoding mode */
> +    switch (avctx->sample_rate) {
> +    case  8000: mode = &speex_nb_mode;  break;
> +    case 16000: mode = &speex_wb_mode;  break;
> +    case 32000: mode = &speex_uwb_mode; break;
> +    default:
> +        av_log(avctx, AV_LOG_ERROR, "Sample rate of %d Hz is not supported. "
> +               "Resample to 8, 16, or 32 kHz.\n", avctx->sample_rate);
> +        return -1;
> +    }
> +
> +    /* initialize libspeex */
> +    s->enc_state = speex_encoder_init(mode);
> +    if (!s->enc_state) {
> +        av_log(avctx, AV_LOG_ERROR, "Error initializing libspeex\n");
> +        return -1;
> +    }
> +    speex_init_header(&s->header, avctx->sample_rate, avctx->channels, mode);
> +
> +    /* rate control method and parameters */
> +    if (avctx->flags & CODEC_FLAG_QSCALE) {
> +        /* VBR */
> +        s->header.vbr = 1;
> +        speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR, &s->header.vbr);
> +        s->vbr_quality = av_clipf(avctx->global_quality / (float)FF_QP2LAMBDA,
> +                                  0.0f, 10.0f);
> +        speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR_QUALITY, &s->vbr_quality);
> +        avctx->bit_rate = 0;
> +    } else {
> +        /* CBR */
> +        s->header.bitrate = avctx->bit_rate;
> +        speex_encoder_ctl(s->enc_state, SPEEX_SET_BITRATE, &s->header.bitrate);
> +        speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE, &s->header.bitrate);
> +        /* stereo side information adds about 800 bps to the base bitrate */

> +        avctx->bit_rate = s->header.bitrate + (avctx->channels == 2 ? 800 : 0);

avctx->bit_rate is set by the user and not the encoder


> +    }
> +
> +    /* set encoding complexity */
> +    if (avctx->compression_level > FF_COMPRESSION_DEFAULT) {
> +        complexity = av_clip(avctx->compression_level, 0, 10);
> +        speex_encoder_ctl(s->enc_state, SPEEX_SET_COMPLEXITY, &complexity);
> +    }
> +    speex_encoder_ctl(s->enc_state, SPEEX_GET_COMPLEXITY, &complexity);

> +    avctx->compression_level = complexity;

same


[...]
> +static int encode_frame(AVCodecContext *avctx, uint8_t *frame, int buf_size,
> +                        void *data)
> +{
> +    LibSpeexEncContext *s = avctx->priv_data;
> +    void *samples = data;
> +    int nframes, i;
> +
> +    if (!avctx->frame_size)
> +        return 0;
> +
> +    /* handle last packet, which may have fewer frames-per-packet and/or
> +       fewer samples in the last frame */
> +    nframes = s->header.frames_per_packet;
> +    if (avctx->frame_size < nframes * s->header.frame_size) {
> +        nframes = (avctx->frame_size + s->header.frame_size - 1) /
> +                  s->header.frame_size;
> +        if (avctx->frame_size != s->header.frame_size * nframes) {
> +            /* allocate new buffer to pad last frame */
> +            int new_samples_size;

> +            avctx->frame_size = nframes * s->header.frame_size;

iam not sure if this violates the API but at least i would say it is
unexpected by the application


> +            new_samples_size  = avctx->frame_size * avctx->channels *
> +                                (avctx->sample_fmt == SAMPLE_FMT_FLT ?
> +                                sizeof(float) : sizeof(int16_t));
> +            samples = av_mallocz(new_samples_size);
> +            if (!samples)
> +                return AVERROR(ENOMEM);
> +            memcpy(samples, data, new_samples_size);

i think the application is or at least should be required to allocate full
frames even for the possibly smaller last


> +        }
> +    }
> +
> +    /* encode Speex frames */
> +    speex_bits_reset(&s->bits);
> +    if (avctx->sample_fmt == SAMPLE_FMT_FLT) {
> +        float *samples_flt = samples;
> +

> +        /* scale floating point samples to 16-bit range as required by libspeex */
> +        if (avctx->sample_fmt == SAMPLE_FMT_FLT)
> +            for (i = 0; i < avctx->frame_size * avctx->channels; i++)
> +                samples_flt[i] *= 32767.0;

the audio encode function is:
int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size,
                         const short *samples);
samples is const


[...]
> +AVCodec libspeex_encoder = {
> +    "libspeex",
> +    CODEC_TYPE_AUDIO,
> +    CODEC_ID_SPEEX,
> +    sizeof(LibSpeexEncContext),
> +    encode_init,
> +    encode_frame,
> +    encode_close,
> +    NULL,
> +    .capabilities = CODEC_CAP_SMALL_LAST_FRAME,
> +    .sample_fmts = (const enum SampleFormat[]){ SAMPLE_FMT_S16, SAMPLE_FMT_FLT,
> +                                                SAMPLE_FMT_NONE },
> +    .long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"),

missing supported_samplerates and channel_layouts


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

The educated differ from the uneducated as much as the living from the
dead. -- Aristotle 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20091027/33a3dbce/attachment.pgp>



More information about the ffmpeg-devel mailing list