[FFmpeg-devel] [PATCH 2/2] avcodec/amr*dec: add multichannel support

Fri Oct 1 04:17:15 EEST 2021

Sent with a Spark
2021年10月1日 +0800 05:01 Paul B Mahol <onemda at gmail.com>，写道：
> Signed-off-by: Paul B Mahol <onemda at gmail.com>
> ---
> libavcodec/amr_parser.c | 39 ++++++++++++++++++--------
> libavcodec/amrnbdec.c | 54 +++++++++++++++++++++++-------------
> libavcodec/amrwbdec.c | 59 +++++++++++++++++++++++++--------------
> libavformat/amr.c | 61 +++++++++++++++++++++++++++++------------
> 4 files changed, 143 insertions(+), 70 deletions(-)
>
> diff --git a/libavcodec/amr_parser.c b/libavcodec/amr_parser.c
> index 2659cb40d7..222d8e05e9 100644
> --- a/libavcodec/amr_parser.c
> +++ b/libavcodec/amr_parser.c
> @@ -39,6 +39,7 @@ typedef struct AMRParseContext {
> ParseContext pc;
> uint64_t cumulated_size;
> uint64_t block_count;
> + int current_channel;
> int remaining;
> } AMRParseContext;
>
> @@ -57,21 +58,35 @@ static int amr_parse(AVCodecParserContext *s1,
> if (s1->flags & PARSER_FLAG_COMPLETE_FRAMES) {
> next = buf_size;
> } else {
> - if (s->remaining) {
> - next = s->remaining;
> - } else {
> - int mode = (buf[0] >> 3) & 0x0F;
> -
> - if (avctx->codec_id == AV_CODEC_ID_AMR_NB) {
> - next = amrnb_packed_size[mode];
> - } else if (avctx->codec_id == AV_CODEC_ID_AMR_WB) {
> - next = amrwb_packed_size[mode];
> + int offset = 0;
> +
> + for (int ch = s->current_channel; ch < avctx->channels; ch++) {
> + if (s->remaining) {
> + next = s->remaining;
> + } else {
> + int mode = (buf[offset] >> 3) & 0x0F;
> +
> + if (avctx->codec_id == AV_CODEC_ID_AMR_NB) {
> + next = amrnb_packed_size[mode];
> + } else if (avctx->codec_id == AV_CODEC_ID_AMR_WB) {
> + next = amrwb_packed_size[mode];
> + }
> + }
> +
> + offset += next;
> + if (offset >= buf_size) {
> + s->remaining = offset - buf_size;
> + next = END_NOT_FOUND;
> + s->current_channel = ch;
> + break;
> + } else {
> + s->remaining = 0;
> + s->current_channel = 0;
> }
> }
>
> - s->remaining = next - FFMIN(buf_size, next);
> - if (s->remaining)
> - next = END_NOT_FOUND;
> + if (s->remaining == 0)
> + next = offset;
>
> if (next != END_NOT_FOUND) {
> if (s->cumulated_size < UINT64_MAX - next) {
> diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c
> index e366a09976..472fa85f87 100644
> --- a/libavcodec/amrnbdec.c
> +++ b/libavcodec/amrnbdec.c
> @@ -145,6 +145,10 @@ typedef struct AMRContext {
>
> } AMRContext;
>
> +typedef struct AMRChannelsContext {
> + AMRContext ch[2];
> +} AMRChannelsContext;
> +
> /** Double version of ff_weighted_vector_sumf() */
> static void weighted_vector_sumd(double *out, const double *in_a,
> const double *in_b, double weight_coeff_a,
> @@ -159,20 +163,24 @@ static void weighted_vector_sumd(double *out, const double *in_a,
>
> static av_cold int amrnb_decode_init(AVCodecContext *avctx)
> {
> - AMRContext *p = avctx->priv_data;
> + AMRChannelsContext *s = avctx->priv_data;
> int i;
>
> - if (avctx->channels > 1) {
> - avpriv_report_missing_feature(avctx, "multi-channel AMR");
> + if (avctx->channels > 2) {
> + avpriv_report_missing_feature(avctx, ">2 channel AMR");
> return AVERROR_PATCHWELCOME;
> }
>
> - avctx->channels = 1;
> - avctx->channel_layout = AV_CH_LAYOUT_MONO;
> + if (!avctx->channels) {
> + avctx->channels = 1;
> + avctx->channel_layout = AV_CH_LAYOUT_MONO;
> + }
> if (!avctx->sample_rate)
> avctx->sample_rate = 8000;
> - avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
> + avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
>
> + for (int ch = 0; ch < avctx->channels; ch++) {
> + AMRContext *p = &s->ch[ch];
> // p->excitation always points to the same position in p->excitation_buf
> p->excitation = &p->excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1];
>
> @@ -188,6 +196,7 @@ static av_cold int amrnb_decode_init(AVCodecContext *avctx)
> ff_acelp_vectors_init(&p->acelpv_ctx);
> ff_celp_filter_init(&p->celpf_ctx);
> ff_celp_math_init(&p->celpm_ctx);
> + }
>
> return 0;
> }
> @@ -949,25 +958,30 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
> int *got_frame_ptr, AVPacket *avpkt)
> {
>
> - AMRContext *p = avctx->priv_data; // pointer to private data
> + AMRChannelsContext *s = avctx->priv_data; // pointer to private data
> AVFrame *frame = data;
> const uint8_t *buf = avpkt->data;
> int buf_size = avpkt->size;
> - float *buf_out; // pointer to the output data buffer
> - int i, subframe, ret;
> - float fixed_gain_factor;
> - AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness processing
> - float spare_vector[AMR_SUBFRAME_SIZE]; // extra stack space to hold result from anti-sparseness processing
> - float synth_fixed_gain; // the fixed gain that synthesis should use
> - const float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use
> + int ret;
>
> /* get output buffer */
> frame->nb_samples = AMR_BLOCK_SIZE;
> if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> return ret;
> - buf_out = (float *)frame->data[0];
> +
> + for (int ch = 0; ch < avctx->channels; ch++) {
> + AMRContext *p = &s->ch[ch];
> + float fixed_gain_factor;
> + AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness processing
> + float spare_vector[AMR_SUBFRAME_SIZE]; // extra stack space to hold result from anti-sparseness processing
> + float synth_fixed_gain; // the fixed gain that synthesis should use
> + const float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use
> + float *buf_out = (float *)frame->extended_data[ch];
> + int channel_size;
> + int i, subframe;
>
> p->cur_frame_mode = unpack_bitstream(p, buf, buf_size);
> + channel_size = frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and +8 for TOC
> if (p->cur_frame_mode == NO_DATA) {
> av_log(avctx, AV_LOG_ERROR, "Corrupt bitstream\n");
> return AVERROR_INVALIDDATA;
> @@ -1072,11 +1086,13 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data,
> * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */
> p->acelpv_ctx.weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3],
> 0.84, 0.16, LP_FILTER_ORDER);
> + buf += channel_size;
> + buf_size -= channel_size;
> + }
>
> *got_frame_ptr = 1;
>
> - /* return the amount of bytes consumed if everything was OK */
> - return frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and +8 for TOC
> + return avpkt->size;
> }
>
>
> @@ -1085,10 +1101,10 @@ const AVCodec ff_amrnb_decoder = {
> .long_name = NULL_IF_CONFIG_SMALL("AMR-NB (Adaptive Multi-Rate NarrowBand)"),
> .type = AVMEDIA_TYPE_AUDIO,
> .id = AV_CODEC_ID_AMR_NB,
> - .priv_data_size = sizeof(AMRContext),
> + .priv_data_size = sizeof(AMRChannelsContext),
> .init = amrnb_decode_init,
> .decode = amrnb_decode_frame,
> .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
> - .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
> + .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
> AV_SAMPLE_FMT_NONE },
> };
> diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c
> index a6c1d894d7..998dd82791 100644
> --- a/libavcodec/amrwbdec.c
> +++ b/libavcodec/amrwbdec.c
> @@ -93,21 +93,30 @@ typedef struct AMRWBContext {
>
> } AMRWBContext;
>
> +typedef struct AMRWBChannelsContext {
> + AMRWBContext ch[2];
> +} AMRWBChannelsContext;
> +
> static av_cold int amrwb_decode_init(AVCodecContext *avctx)
> {
> - AMRWBContext *ctx = avctx->priv_data;
> + AMRWBChannelsContext *s = avctx->priv_data;
> int i;
>
> - if (avctx->channels > 1) {
> - avpriv_report_missing_feature(avctx, "multi-channel AMR");
> + if (avctx->channels > 2) {
> + avpriv_report_missing_feature(avctx, ">2 channel AMR");
> return AVERROR_PATCHWELCOME;
> }
>
> - avctx->channels = 1;
> - avctx->channel_layout = AV_CH_LAYOUT_MONO;
> + if (!avctx->channels) {
> + avctx->channels = 1;
> + avctx->channel_layout = AV_CH_LAYOUT_MONO;
> + }
> if (!avctx->sample_rate)
> avctx->sample_rate = 16000;
> - avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
> + avctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
> +
> + for (int ch = 0; ch < avctx->channels; ch++) {
> + AMRWBContext *ctx = &s->ch[ch];
>
> av_lfg_init(&ctx->prng, 1);
>
> @@ -124,6 +133,7 @@ static av_cold int amrwb_decode_init(AVCodecContext *avctx)
> ff_acelp_vectors_init(&ctx->acelpv_ctx);
> ff_celp_filter_init(&ctx->celpf_ctx);
> ff_celp_math_init(&ctx->celpm_ctx);
> + }
>
> return 0;
> }
> @@ -1094,13 +1104,21 @@ static void update_sub_state(AMRWBContext *ctx)
> static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
> int *got_frame_ptr, AVPacket *avpkt)
> {
> - AMRWBContext *ctx = avctx->priv_data;
> + AMRWBChannelsContext *s = avctx->priv_data;
> AVFrame *frame = data;
> - AMRWBFrame *cf = &ctx->frame;
> const uint8_t *buf = avpkt->data;
> int buf_size = avpkt->size;
> + int sub, i, ret;
> +
> + /* get output buffer */
> + frame->nb_samples = 4 * AMRWB_SFR_SIZE_16k;
> + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> + return ret;
> +
> + for (int ch = 0; ch < avctx->channels; ch++) {
> + AMRWBContext *ctx = &s->ch[ch];
> + AMRWBFrame *cf = &ctx->frame;
> int expected_fr_size, header_size;
> - float *buf_out;
> float spare_vector[AMRWB_SFR_SIZE]; // extra stack space to hold result from anti-sparseness processing
> float fixed_gain_factor; // fixed gain correction factor (gamma)
> float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use
> @@ -1110,13 +1128,7 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
> float hb_exc[AMRWB_SFR_SIZE_16k]; // excitation for the high frequency band
> float hb_samples[AMRWB_SFR_SIZE_16k]; // filtered high-band samples from synthesis
> float hb_gain;
> - int sub, i, ret;
> -
> - /* get output buffer */
> - frame->nb_samples = 4 * AMRWB_SFR_SIZE_16k;
> - if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
> - return ret;
> - buf_out = (float *)frame->data[0];
> + float *buf_out = (float *)frame->extended_data[ch];
>
> header_size = decode_mime_header(ctx, buf);
> expected_fr_size = ((cf_sizes_wb[ctx->fr_cur_mode] + 7) >> 3) + 1;
> @@ -1127,9 +1139,10 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
> if (ctx->fr_cur_mode == NO_DATA || !ctx->fr_quality) {
> /* The specification suggests a "random signal" and
> "a muting technique" to "gradually decrease the output level". */
> - av_samples_set_silence(&frame->data[0], 0, frame->nb_samples, 1, AV_SAMPLE_FMT_FLT);
> - *got_frame_ptr = 1;
> - return expected_fr_size;
> + av_samples_set_silence(&frame->extended_data[ch], 0, frame->nb_samples, 1, AV_SAMPLE_FMT_FLT);
> + buf += expected_fr_size;
> + buf_size -= expected_fr_size;
> + continue;
> }
> if (ctx->fr_cur_mode > MODE_SID) {
> av_log(avctx, AV_LOG_ERROR,
> @@ -1270,9 +1283,13 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
> memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(ctx->isp[3][0]));
> memcpy(ctx->isf_past_final, ctx->isf_cur, LP_ORDER * sizeof(float));
>
> + buf += expected_fr_size;
> + buf_size -= expected_fr_size;
> + }
> +
> *got_frame_ptr = 1;
>
> - return expected_fr_size;
> + return avpkt->size;
> }
>
> const AVCodec ff_amrwb_decoder = {
> @@ -1280,7 +1297,7 @@ const AVCodec ff_amrwb_decoder = {
> .long_name = NULL_IF_CONFIG_SMALL("AMR-WB (Adaptive Multi-Rate WideBand)"),
> .type = AVMEDIA_TYPE_AUDIO,
> .id = AV_CODEC_ID_AMR_WB,
> - .priv_data_size = sizeof(AMRWBContext),
> + .priv_data_size = sizeof(AMRWBChannelsContext),
> .init = amrwb_decode_init,
> .decode = amrwb_decode_frame,
> .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_CHANNEL_CONF,
> diff --git a/libavformat/amr.c b/libavformat/amr.c
> index 8e79caee18..3f73c51a57 100644
> --- a/libavformat/amr.c
> +++ b/libavformat/amr.c
> @@ -21,13 +21,12 @@
>
> /*
> Write and read amr data according to RFC3267, http://www.ietf.org/rfc/rfc3267.txt?number=3267
> -
> -Only mono files are supported.
> -
> */
>
> #include "libavutil/channel_layout.h"
> +#include "libavutil/intreadwrite.h"
> #include "avformat.h"
> +#include "avio_internal.h"
> #include "internal.h"
> #include "rawdec.h"
> #include "rawenc.h"
> @@ -36,8 +35,10 @@ typedef struct AMRContext {
> FFRawDemuxerContext rawctx;
> } AMRContext;
>
> -static const char AMR_header[] = "#!AMR\n";
> -static const char AMRWB_header[] = "#!AMR-WB\n";
> +static const uint8_t AMR_header[6] = "#!AMR\x0a";
> +static const uint8_t AMRMC_header[12] = "#!AMR_MC1.0\x0a";
> +static const uint8_t AMRWB_header[9] = "#!AMR-WB\x0a";
> +static const uint8_t AMRWBMC_header[15] = "#!AMR-WB_MC1.0\x0a";
>
> static const uint8_t amrnb_packed_size[16] = {
> 13, 14, 16, 18, 20, 21, 27, 32, 6, 1, 1, 1, 1, 1, 1, 1
> @@ -69,7 +70,7 @@ static int amr_probe(const AVProbeData *p)
> {
> // Only check for "#!AMR" which could be amr-wb, amr-nb.
> // This will also trigger multichannel files: "#!AMR_MC1.0\n" and
> - // "#!AMR-WB_MC1.0\n" (not supported)
> + // "#!AMR-WB_MC1.0\n"
>
> if (!memcmp(p->buf, AMR_header, 5))
> return AVPROBE_SCORE_MAX;
> @@ -82,35 +83,59 @@ static int amr_read_header(AVFormatContext *s)
> {
> AVIOContext *pb = s->pb;
> AVStream *st;
> - uint8_t header[9];
> + uint8_t header[19];
> + int back = 0, ret;
> +
> + ret = ffio_ensure_seekback(s->pb, 19);
> + if (ret < 0)
> + return ret;
>
> - if (avio_read(pb, header, 6) != 6)
> + if (avio_read(pb, header, 19) != 19)
> return AVERROR_INVALIDDATA;
there are header only amr files only
have “#!AMR\n”.  It would be better to
use the way to check headers in different
amrs in this patch.
https://patchwork.ffmpeg.org/project/ffmpeg/patch/TYCPR01MB59827312A79D5E772DE0D6AAC1DC9@TYCPR01MB5982.jpnprd01.prod.outlook.com/
>
> st = avformat_new_stream(s, NULL);
> if (!st)
> return AVERROR(ENOMEM);
> - if (memcmp(header, AMR_header, 6)) {
> - if (avio_read(pb, header + 6, 3) != 3)
> - return AVERROR_INVALIDDATA;
> - if (memcmp(header, AMRWB_header, 9)) {
> - return -1;
> - }
> -
> + if (!memcmp(header, AMR_header, sizeof(AMR_header))) {
> + st->codecpar->codec_tag = MKTAG('s', 'a', 'm', 'r');
> + st->codecpar->codec_id = AV_CODEC_ID_AMR_NB;
> + st->codecpar->sample_rate = 8000;
> + st->codecpar->channels = 1;
> + st->codecpar->channel_layout = AV_CH_LAYOUT_MONO;
> + back = 19 - sizeof(AMR_header);
> + } else if (!memcmp(header, AMRWB_header, sizeof(AMRWB_header))) {
> st->codecpar->codec_tag = MKTAG('s', 'a', 'w', 'b');
> st->codecpar->codec_id = AV_CODEC_ID_AMR_WB;
> st->codecpar->sample_rate = 16000;
> - } else {
> + st->codecpar->channels = 1;
> + st->codecpar->channel_layout = AV_CH_LAYOUT_MONO;
> + back = 19 - sizeof(AMRWB_header);
> + } else if (!memcmp(header, AMRMC_header, sizeof(AMRMC_header))) {
> st->codecpar->codec_tag = MKTAG('s', 'a', 'm', 'r');
> st->codecpar->codec_id = AV_CODEC_ID_AMR_NB;
> st->codecpar->sample_rate = 8000;
> + st->codecpar->channels = AV_RL32(header + 12);
> + back = 15 - sizeof(AMRMC_header);
> + } else if (!memcmp(header, AMRWBMC_header, sizeof(AMRWBMC_header))) {
> + st->codecpar->codec_tag = MKTAG('s', 'a', 'w', 'b');
> + st->codecpar->codec_id = AV_CODEC_ID_AMR_WB;
> + st->codecpar->sample_rate = 16000;
> + st->codecpar->channels = AV_RL32(header + 15);
> + back = 15 - sizeof(AMRWBMC_header);
> + } else {
> + return AVERROR_INVALIDDATA;
> }
> - st->codecpar->channels = 1;
> - st->codecpar->channel_layout = AV_CH_LAYOUT_MONO;
> +
> + if (st->codecpar->channels < 1)
> + return AVERROR_INVALIDDATA;
> +
> st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
> ffstream(st)->need_parsing = AVSTREAM_PARSE_FULL_RAW;
> avpriv_set_pts_info(st, 64, 1, st->codecpar->sample_rate);
>
> + if (back > 0)
> + avio_seek(pb, -back, SEEK_CUR);
> +
> return 0;
> }
>
> --
> 2.33.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request at ffmpeg.org with subject "unsubscribe".