00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00084 #include <speex/speex.h>
00085 #include <speex/speex_header.h>
00086 #include <speex/speex_stereo.h>
00087
00088 #include "libavutil/channel_layout.h"
00089 #include "libavutil/common.h"
00090 #include "libavutil/opt.h"
00091 #include "avcodec.h"
00092 #include "internal.h"
00093 #include "audio_frame_queue.h"
00094
00095
00096 typedef struct {
00097 AVClass *class;
00098 SpeexBits bits;
00099 SpeexHeader header;
00100 void *enc_state;
00101 int frames_per_packet;
00102 float vbr_quality;
00103 int cbr_quality;
00104 int abr;
00105 int vad;
00106 int dtx;
00107 int pkt_frame_count;
00108 AudioFrameQueue afq;
00109 } LibSpeexEncContext;
00110
00111 static av_cold void print_enc_params(AVCodecContext *avctx,
00112 LibSpeexEncContext *s)
00113 {
00114 const char *mode_str = "unknown";
00115
00116 av_log(avctx, AV_LOG_DEBUG, "channels: %d\n", avctx->channels);
00117 switch (s->header.mode) {
00118 case SPEEX_MODEID_NB: mode_str = "narrowband"; break;
00119 case SPEEX_MODEID_WB: mode_str = "wideband"; break;
00120 case SPEEX_MODEID_UWB: mode_str = "ultra-wideband"; break;
00121 }
00122 av_log(avctx, AV_LOG_DEBUG, "mode: %s\n", mode_str);
00123 if (s->header.vbr) {
00124 av_log(avctx, AV_LOG_DEBUG, "rate control: VBR\n");
00125 av_log(avctx, AV_LOG_DEBUG, " quality: %f\n", s->vbr_quality);
00126 } else if (s->abr) {
00127 av_log(avctx, AV_LOG_DEBUG, "rate control: ABR\n");
00128 av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
00129 } else {
00130 av_log(avctx, AV_LOG_DEBUG, "rate control: CBR\n");
00131 av_log(avctx, AV_LOG_DEBUG, " bitrate: %d bps\n", avctx->bit_rate);
00132 }
00133 av_log(avctx, AV_LOG_DEBUG, "complexity: %d\n",
00134 avctx->compression_level);
00135 av_log(avctx, AV_LOG_DEBUG, "frame size: %d samples\n",
00136 avctx->frame_size);
00137 av_log(avctx, AV_LOG_DEBUG, "frames per packet: %d\n",
00138 s->frames_per_packet);
00139 av_log(avctx, AV_LOG_DEBUG, "packet size: %d\n",
00140 avctx->frame_size * s->frames_per_packet);
00141 av_log(avctx, AV_LOG_DEBUG, "voice activity detection: %d\n", s->vad);
00142 av_log(avctx, AV_LOG_DEBUG, "discontinuous transmission: %d\n", s->dtx);
00143 }
00144
00145 static av_cold int encode_init(AVCodecContext *avctx)
00146 {
00147 LibSpeexEncContext *s = avctx->priv_data;
00148 const SpeexMode *mode;
00149 uint8_t *header_data;
00150 int header_size;
00151 int32_t complexity;
00152
00153
00154 if (avctx->channels < 1 || avctx->channels > 2) {
00155 av_log(avctx, AV_LOG_ERROR, "Invalid channels (%d). Only stereo and "
00156 "mono are supported\n", avctx->channels);
00157 return AVERROR(EINVAL);
00158 }
00159
00160
00161 switch (avctx->sample_rate) {
00162 case 8000: mode = &speex_nb_mode; break;
00163 case 16000: mode = &speex_wb_mode; break;
00164 case 32000: mode = &speex_uwb_mode; break;
00165 default:
00166 av_log(avctx, AV_LOG_ERROR, "Sample rate of %d Hz is not supported. "
00167 "Resample to 8, 16, or 32 kHz.\n", avctx->sample_rate);
00168 return AVERROR(EINVAL);
00169 }
00170
00171
00172 s->enc_state = speex_encoder_init(mode);
00173 if (!s->enc_state) {
00174 av_log(avctx, AV_LOG_ERROR, "Error initializing libspeex\n");
00175 return -1;
00176 }
00177 speex_init_header(&s->header, avctx->sample_rate, avctx->channels, mode);
00178
00179
00180 if (avctx->flags & CODEC_FLAG_QSCALE) {
00181
00182 s->header.vbr = 1;
00183 s->vad = 1;
00184 speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR, &s->header.vbr);
00185 s->vbr_quality = av_clipf(avctx->global_quality / (float)FF_QP2LAMBDA,
00186 0.0f, 10.0f);
00187 speex_encoder_ctl(s->enc_state, SPEEX_SET_VBR_QUALITY, &s->vbr_quality);
00188 } else {
00189 s->header.bitrate = avctx->bit_rate;
00190 if (avctx->bit_rate > 0) {
00191
00192 if (s->abr) {
00193 speex_encoder_ctl(s->enc_state, SPEEX_SET_ABR,
00194 &s->header.bitrate);
00195 speex_encoder_ctl(s->enc_state, SPEEX_GET_ABR,
00196 &s->header.bitrate);
00197 } else {
00198 speex_encoder_ctl(s->enc_state, SPEEX_SET_BITRATE,
00199 &s->header.bitrate);
00200 speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
00201 &s->header.bitrate);
00202 }
00203 } else {
00204
00205 speex_encoder_ctl(s->enc_state, SPEEX_SET_QUALITY,
00206 &s->cbr_quality);
00207 speex_encoder_ctl(s->enc_state, SPEEX_GET_BITRATE,
00208 &s->header.bitrate);
00209 }
00210
00211
00212 avctx->bit_rate = s->header.bitrate + (avctx->channels == 2 ? 800 : 0);
00213 }
00214
00215
00216 if (s->vad)
00217 speex_encoder_ctl(s->enc_state, SPEEX_SET_VAD, &s->vad);
00218
00219
00220 if (s->dtx) {
00221 speex_encoder_ctl(s->enc_state, SPEEX_SET_DTX, &s->dtx);
00222 if (!(s->abr || s->vad || s->header.vbr))
00223 av_log(avctx, AV_LOG_WARNING, "DTX is not much of use without ABR, VAD or VBR\n");
00224 }
00225
00226
00227 if (avctx->compression_level > FF_COMPRESSION_DEFAULT) {
00228 complexity = av_clip(avctx->compression_level, 0, 10);
00229 speex_encoder_ctl(s->enc_state, SPEEX_SET_COMPLEXITY, &complexity);
00230 }
00231 speex_encoder_ctl(s->enc_state, SPEEX_GET_COMPLEXITY, &complexity);
00232 avctx->compression_level = complexity;
00233
00234
00235 avctx->frame_size = s->header.frame_size;
00236 s->header.frames_per_packet = s->frames_per_packet;
00237
00238
00239 speex_encoder_ctl(s->enc_state, SPEEX_GET_LOOKAHEAD, &avctx->delay);
00240 ff_af_queue_init(avctx, &s->afq);
00241
00242
00243
00244
00245 header_data = speex_header_to_packet(&s->header, &header_size);
00246
00247
00248 avctx->extradata = av_malloc(header_size + FF_INPUT_BUFFER_PADDING_SIZE);
00249 if (!avctx->extradata) {
00250 speex_header_free(header_data);
00251 speex_encoder_destroy(s->enc_state);
00252 av_log(avctx, AV_LOG_ERROR, "memory allocation error\n");
00253 return AVERROR(ENOMEM);
00254 }
00255 #if FF_API_OLD_ENCODE_AUDIO
00256 avctx->coded_frame = avcodec_alloc_frame();
00257 if (!avctx->coded_frame) {
00258 av_freep(&avctx->extradata);
00259 speex_header_free(header_data);
00260 speex_encoder_destroy(s->enc_state);
00261 av_log(avctx, AV_LOG_ERROR, "memory allocation error\n");
00262 return AVERROR(ENOMEM);
00263 }
00264 #endif
00265
00266
00267 memcpy(avctx->extradata, header_data, header_size);
00268 avctx->extradata_size = header_size;
00269 speex_header_free(header_data);
00270
00271
00272 speex_bits_init(&s->bits);
00273
00274 print_enc_params(avctx, s);
00275 return 0;
00276 }
00277
00278 static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
00279 const AVFrame *frame, int *got_packet_ptr)
00280 {
00281 LibSpeexEncContext *s = avctx->priv_data;
00282 int16_t *samples = frame ? (int16_t *)frame->data[0] : NULL;
00283 int ret;
00284
00285 if (samples) {
00286
00287 if (avctx->channels == 2)
00288 speex_encode_stereo_int(samples, s->header.frame_size, &s->bits);
00289 speex_encode_int(s->enc_state, samples, &s->bits);
00290 s->pkt_frame_count++;
00291 if ((ret = ff_af_queue_add(&s->afq, frame) < 0))
00292 return ret;
00293 } else {
00294
00295 if (!s->pkt_frame_count)
00296 return 0;
00297
00298 while (s->pkt_frame_count < s->frames_per_packet) {
00299 speex_bits_pack(&s->bits, 15, 5);
00300 s->pkt_frame_count++;
00301 }
00302 }
00303
00304
00305 if (s->pkt_frame_count == s->frames_per_packet) {
00306 s->pkt_frame_count = 0;
00307 if ((ret = ff_alloc_packet2(avctx, avpkt, speex_bits_nbytes(&s->bits))))
00308 return ret;
00309 ret = speex_bits_write(&s->bits, avpkt->data, avpkt->size);
00310 speex_bits_reset(&s->bits);
00311
00312
00313 ff_af_queue_remove(&s->afq, s->frames_per_packet * avctx->frame_size,
00314 &avpkt->pts, &avpkt->duration);
00315
00316 avpkt->size = ret;
00317 *got_packet_ptr = 1;
00318 return 0;
00319 }
00320 return 0;
00321 }
00322
00323 static av_cold int encode_close(AVCodecContext *avctx)
00324 {
00325 LibSpeexEncContext *s = avctx->priv_data;
00326
00327 speex_bits_destroy(&s->bits);
00328 speex_encoder_destroy(s->enc_state);
00329
00330 ff_af_queue_close(&s->afq);
00331 #if FF_API_OLD_ENCODE_AUDIO
00332 av_freep(&avctx->coded_frame);
00333 #endif
00334 av_freep(&avctx->extradata);
00335
00336 return 0;
00337 }
00338
00339 #define OFFSET(x) offsetof(LibSpeexEncContext, x)
00340 #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
00341 static const AVOption options[] = {
00342 { "abr", "Use average bit rate", OFFSET(abr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
00343 { "cbr_quality", "Set quality value (0 to 10) for CBR", OFFSET(cbr_quality), AV_OPT_TYPE_INT, { .i64 = 8 }, 0, 10, AE },
00344 { "frames_per_packet", "Number of frames to encode in each packet", OFFSET(frames_per_packet), AV_OPT_TYPE_INT, { .i64 = 1 }, 1, 8, AE },
00345 { "vad", "Voice Activity Detection", OFFSET(vad), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
00346 { "dtx", "Discontinuous Transmission", OFFSET(dtx), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
00347 { NULL },
00348 };
00349
00350 static const AVClass class = {
00351 .class_name = "libspeex",
00352 .item_name = av_default_item_name,
00353 .option = options,
00354 .version = LIBAVUTIL_VERSION_INT,
00355 };
00356
00357 static const AVCodecDefault defaults[] = {
00358 { "b", "0" },
00359 { "compression_level", "3" },
00360 { NULL },
00361 };
00362
00363 AVCodec ff_libspeex_encoder = {
00364 .name = "libspeex",
00365 .type = AVMEDIA_TYPE_AUDIO,
00366 .id = AV_CODEC_ID_SPEEX,
00367 .priv_data_size = sizeof(LibSpeexEncContext),
00368 .init = encode_init,
00369 .encode2 = encode_frame,
00370 .close = encode_close,
00371 .capabilities = CODEC_CAP_DELAY,
00372 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S16,
00373 AV_SAMPLE_FMT_NONE },
00374 .channel_layouts = (const uint64_t[]){ AV_CH_LAYOUT_MONO,
00375 AV_CH_LAYOUT_STEREO,
00376 0 },
00377 .supported_samplerates = (const int[]){ 8000, 16000, 32000, 0 },
00378 .long_name = NULL_IF_CONFIG_SMALL("libspeex Speex"),
00379 .priv_class = &class,
00380 .defaults = defaults,
00381 };