[FFmpeg-devel] [PATCH] Add Lame info tag to the Xing/Info header of mp3 files

Michael Niedermayer michaelni at gmx.at
Fri May 23 15:01:13 CEST 2014


On Thu, May 22, 2014 at 05:31:09PM -0700, Giovanni Motta wrote:
> Add Lame info tag (with delay, padding, replay gain, signal peak, etc..)
> to the Xing/Info header of mp3 files.
> 
> A few notes/comments:
> 
> - Fate initially failed for lavf.mp3; this is expected.
>   The test result has been updated with new CRC.
> 
> - The Lame info tag is generated by libmp3lame and passed to the mp3enc via extradata.
> 
> - To keep the size of the tag constant and simplify the code, vbr_scale is always added.
> 
> - The Lame string vendor in the tag is fixed length, so vendor is trimmed
>   to 9 bytes and padded with 0x20 if shorter.
>   Note: the current vendor string in ffmpeg is too long. The lame tags
>   currently generated doesn't work and the delay added after the string is
>   never parsed correctly.
> 
> - replay_gain and find_peak need a version of lame patched with
>   libmp3lame/lame.c Revision 1.367 (patch tracker item #66): http://sourceforge.net/p/lame/patches/66/
>   They have no effect otherwise.
> 
> - find_peak_sample only works if Lame is configured with --enable-decoder.
>   It has no effect otherwise.
> 
> - Some fields in the Lame tag are not set because not accessible from
>   the set/get API (preset and noise shaping, for example). I will bring this to
>   the attention of the Lame developers and help there with any change if we
>   decide to merge the patch.
> 
> Thanks
> 
> Giovanni
> 
> 
> 
> ---
>  libavcodec/libmp3lame.c | 256 +++++++++++++++++++++++++++++++++++++++++++++++-
>  libavformat/mp3enc.c    | 112 ++++++++++++++++-----
>  tests/ref/lavf-fate/mp3 |   2 +-
>  3 files changed, 344 insertions(+), 26 deletions(-)

please split this patch between libavcodec and libavformat


> 
> diff --git a/libavcodec/libmp3lame.c b/libavcodec/libmp3lame.c
> index a8c39cc..8f8b5ee 100644
> --- a/libavcodec/libmp3lame.c
> +++ b/libavcodec/libmp3lame.c
> @@ -26,6 +26,7 @@
> 
>  #include <lame/lame.h>
> 
> +#include "libavutil/avstring.h"
>  #include "libavutil/channel_layout.h"
>  #include "libavutil/common.h"
>  #include "libavutil/float_dsp.h"
> @@ -40,6 +41,9 @@
> 
>  #define BUFFER_SIZE (7200 + 2 * MPA_FRAME_SIZE + MPA_FRAME_SIZE / 4+1000) // FIXME: Buffer size to small? Adding 1000 to make up for it.
> 
> +/* size of the Lame Info tag added to the Xing/Info header */
> +#define LAME_EXT_SIZE 40
> +
>  typedef struct LAMEContext {
>      AVClass *class;
>      AVCodecContext *avctx;
> @@ -48,6 +52,8 @@ typedef struct LAMEContext {
>      int buffer_index;
>      int buffer_size;
>      int reservoir;
> +    int find_peak_sample;
> +    int replay_gain;
>      int joint_stereo;
>      int abr;
>      float *samples_flt[2];

> @@ -93,6 +99,10 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
> 
>      s->avctx = avctx;
> 
> +    /* extradata is used to store the Lame Info tag */
> +    avctx->extradata = NULL;
> +    avctx->extradata_size = 0;

not needed



> +
>      /* initialize LAME and get defaults */
>      if ((s->gfp = lame_init()) == NULL)
>          return AVERROR(ENOMEM);
> @@ -131,6 +141,12 @@ static av_cold int mp3lame_encode_init(AVCodecContext *avctx)
>      /* bit reservoir usage */
>      lame_set_disable_reservoir(s->gfp, !s->reservoir);
> 
> +    /* find peak sample */
> +    lame_set_decode_on_the_fly(s->gfp, s->find_peak_sample);
> +
> +    /* compute replay gain */
> +    lame_set_findReplayGain(s->gfp, s->replay_gain);
> +
>      /* set specified parameters */
>      if (lame_init_params(s->gfp) < 0) {
>          ret = -1;
> @@ -176,6 +192,226 @@ error:
>                         s->buffer_size - s->buffer_index);                   \
>  } while (0)
> 
> +/* flow and comments adapted from PutLameVBR() in Lame's VbrTag.c */
> +static int GetLameInfoTag(lame_global_flags const *gfp, uint8_t * extradata)
> +{
> +    int nBytesWritten = 0;
> +
> +    int enc_delay = lame_get_encoder_delay(gfp);      // encoder delay
> +    int enc_padding = lame_get_encoder_padding(gfp);  // encoder padding
> +
> +    /* recall: gfp->VBR_q is for example set by the switch -V */
> +    /*         gfp->quality by -q, -h, -f, etc.               */
> +    int nQuality = (100 - 10 * lame_get_VBR_q(gfp) - lame_get_quality(gfp));
> +
> +    const char *szVersion = get_lame_very_short_version();
> +    uint8_t nVBR;
> +    uint8_t nRevision = 0x00;
> +    uint8_t nRevMethod;
> +
> +    /* numbering is different in vbr_mode vs. Lame tag */
> +    uint8_t vbr_type_translator[] = { 1, 5, 3, 2, 4, 0, 3 };
> +
> +    uint8_t nLowpass =
> +        (((lame_get_lowpassfreq(gfp) / 100.0) + .5) > 255 ?
> +                255 : (lame_get_lowpassfreq(gfp) / 100.0) + .5);
> +
> +    uint32_t nPeakSignalAmplitude = 0;
> +    uint16_t nRadioReplayGain = 0;
> +    uint16_t nAudiophileReplayGain = 0;
> +
> +    uint8_t nNoiseShaping = 0;
> +    uint8_t nStereoMode = 0;
> +    int bNonOptimal = 0;
> +    uint8_t nSourceFreq = 0;
> +
> +    uint8_t nMisc = 0;
> +
> +    size_t  nMusicLength = 0;
> +    uint16_t nMusicCRC = 0;
> +    uint16_t nTagCRC = 0;
> +
> +    /* psy model type: Gpsycho or NsPsytune */
> +    unsigned char bExpNPsyTune = lame_get_exp_nspsytune(gfp) & 1;
> +    unsigned char bSafeJoint = (lame_get_exp_nspsytune(gfp) & 2) != 0;
> +
> +    unsigned char bNoGapMore = 0;
> +    unsigned char bNoGapPrevious = 0;
> +
> +    int nNoGapCount = lame_get_nogap_total(gfp);
> +    int nNoGapCurr = lame_get_nogap_currentindex(gfp);
> +
> +    uint8_t nAthType = lame_get_ATHtype(gfp);  /* 4 bits */
> +    int nInSampleRate = lame_get_in_samplerate(gfp);
> +    uint8_t nFlags = 0;
> +
> +    /* if ABR, {store bitrate <= 255} else {store "-b"} */
> +    int nABRBitrate;
> +    switch (lame_get_VBR(gfp)) {
> +    case vbr_abr:
> +        nABRBitrate = lame_get_VBR_mean_bitrate_kbps(gfp);
> +        break;
> +    case vbr_off:
> +        nABRBitrate = lame_get_brate(gfp);
> +        break;
> +    default:          // vbr modes
> +        nABRBitrate = lame_get_VBR_mean_bitrate_kbps(gfp);
> +    }
> +
> +    /* revision and vbr method */
> +    if (lame_get_VBR(gfp) < sizeof(vbr_type_translator))
> +        nVBR = vbr_type_translator[lame_get_VBR(gfp)];
> +    else
> +        nVBR = 0x00;    // unknown
> +    nRevMethod = 0x10 * nRevision + nVBR;
> +
> +
> +    /* ReplayGain */
> +    if (lame_get_findReplayGain(gfp)) {
> +        int nRadioGain = lame_get_RadioGain(gfp);
> +        if (nRadioGain > 0x1FE)
> +            nRadioGain = 0x1FE;
> +        if (nRadioGain < -0x1FE)
> +            nRadioGain = -0x1FE;
> +
> +        nRadioReplayGain = 0x2000;  // set name code
> +        nRadioReplayGain |= 0xC00;  // set originator code to `determined automatically'
> +
> +        if (nRadioGain >= 0) {
> +            nRadioReplayGain |= nRadioGain;  // set gain adjustment
> +        } else {
> +            nRadioReplayGain |= 0x200;       // set the sign bit
> +            nRadioReplayGain |= -nRadioGain; // set gain adjustment
> +        }
> +    }
> +
> +    /* peak sample */
> +    if (lame_get_decode_on_the_fly(gfp))
> +        nPeakSignalAmplitude = abs((int) ((lame_get_PeakSample(gfp) / 32767.0) * pow(2, 23) + .5));
> +
> +    /* nogap */
> +    if (nNoGapCount != -1) {
> +        if (nNoGapCurr > 0)
> +            bNoGapPrevious = 1;
> +
> +        if (nNoGapCurr < nNoGapCount - 1)
> +            bNoGapMore = 1;
> +    }
> +
> +    /* flags */
> +    nFlags = nAthType + (bExpNPsyTune << 4)
> +        + (bSafeJoint << 5)
> +        + (bNoGapMore << 6)
> +        + (bNoGapPrevious << 7);
> +
> +    if (nQuality < 0)
> +        nQuality = 0;
> +
> +    /* stereo mode field... a bit ugly */
> +    switch (lame_get_mode(gfp)) {
> +    case MONO:
> +        nStereoMode = 0;
> +        break;
> +    case STEREO:
> +        nStereoMode = 1;
> +        break;
> +    case DUAL_CHANNEL:
> +        nStereoMode = 2;
> +        break;
> +    case JOINT_STEREO:
> +        if (lame_get_force_ms(gfp))
> +            nStereoMode = 4;
> +        else
> +            nStereoMode = 3;
> +        break;
> +    case NOT_SET:
> +        // FALLTHROUGH
> +    default:
> +        nStereoMode = 7;
> +        break;
> +    }
> +
> +    /* intensity stereo : nStereoMode = 6. IS is not implemented */
> +    if (nInSampleRate <= 32000)
> +        nSourceFreq = 0x00;
> +    else if (nInSampleRate == 48000)
> +        nSourceFreq = 0x02;
> +    else if (nInSampleRate > 48000)
> +        nSourceFreq = 0x03;
> +    else
> +        nSourceFreq = 0x01;   // default is 44100Hz
> +
> +    /* check if the user overrided the default LAME behaviour with some nasty options */
> +    if ((lame_get_no_short_blocks(gfp) == 1) ||     // short blocks dispensed
> +        (lame_get_force_short_blocks(gfp) == 1) ||  // short blocks forced
> +        ((lame_get_lowpassfreq(gfp) == -1) && (lame_get_highpassfreq(gfp) == -1)) ||    // "-k"
> +        (lame_get_scale_left(gfp) < lame_get_scale_right(gfp)) ||
> +        (lame_get_scale_left(gfp) > lame_get_scale_right(gfp)) ||
> +        (lame_get_disable_reservoir(gfp) && lame_get_brate(gfp) < 320) ||
> +        lame_get_noATH(gfp) || lame_get_ATHonly(gfp) || (nAthType == 0) ||
> +        (nInSampleRate <= 32000))
> +        bNonOptimal = 1;
> +
> +    nMisc = nNoiseShaping + (nStereoMode << 2)
> +        + (bNonOptimal << 5)
> +        + (nSourceFreq << 6);
> +
> +    /* write all this information to extradata */
> +    AV_WB32(&extradata[nBytesWritten], nQuality);
> +    nBytesWritten += 4;
> +
> +    av_strlcpy((char *) &extradata[nBytesWritten], szVersion, 9);
> +    nBytesWritten += 9;
> +
> +    extradata[nBytesWritten] = nRevMethod;
> +    nBytesWritten++;
> +
> +    extradata[nBytesWritten] = nLowpass;
> +    nBytesWritten++;
> +
> +    AV_WB32(&extradata[nBytesWritten], nPeakSignalAmplitude);
> +    nBytesWritten += 4;
> +
> +    AV_WB16(&extradata[nBytesWritten], nRadioReplayGain);
> +    nBytesWritten += 2;
> +
> +    AV_WB16(&extradata[nBytesWritten], nAudiophileReplayGain);
> +    nBytesWritten += 2;
> +
> +    extradata[nBytesWritten] = nFlags;
> +    nBytesWritten++;
> +
> +    if (nABRBitrate >= 255)
> +        extradata[nBytesWritten] = 0xFF;
> +    else
> +        extradata[nBytesWritten] = nABRBitrate;
> +    nBytesWritten++;
> +
> +    extradata[nBytesWritten] = enc_delay >> 4;
> +    extradata[nBytesWritten + 1] = (enc_delay << 4) + (enc_padding >> 8);
> +    extradata[nBytesWritten + 2] = enc_padding;
> +    nBytesWritten += 3;
> +
> +    extradata[nBytesWritten] = nMisc;
> +    nBytesWritten++;
> +
> +    extradata[nBytesWritten++] = 0;   // unused in rev0
> +
> +    AV_WB16(&extradata[nBytesWritten], 0);
> +    nBytesWritten += 2;
> +
> +    AV_WB32(&extradata[nBytesWritten], (int) nMusicLength);
> +    nBytesWritten += 4;
> +
> +    AV_WB16(&extradata[nBytesWritten], nMusicCRC);
> +    nBytesWritten += 2;
> +
> +    AV_WB16(&extradata[nBytesWritten], nTagCRC);
> +    nBytesWritten += 2;

see libavcodec/bytestream.h, above can be simplified using it



> +
> +    return nBytesWritten;
> +}
> +
>  static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
>                                  const AVFrame *frame, int *got_packet_ptr)
>  {
> @@ -213,6 +449,18 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
>      } else {
>          lame_result = lame_encode_flush(s->gfp, s->buffer + s->buffer_index,
>                                          s->buffer_size - s->buffer_index);
> +        /* get Lame Info tag and store it in extradata */
> +        if(avctx->extradata == NULL) {
> +            avctx->extradata = av_malloc(LAME_EXT_SIZE);
> +            avctx->extradata_size = GetLameInfoTag(s->gfp, avctx->extradata);
> +            if (avctx->extradata_size != LAME_EXT_SIZE) {
> +                av_log(avctx, AV_LOG_ERROR,
> +                       "error storing Lame info tag (%d bytes written instead of %d)\n",
> +                       avctx->extradata_size, LAME_EXT_SIZE);
> +            } else {
> +                av_log(avctx, AV_LOG_DEBUG, "lame info tag succesfully stored\n");
> +            }

that looks wrong, you set extradata_size to something else than the
allocated size then you check it and print an error but dont
handle it beyond that,
if extradata_size is bigger than the allocated space this will
be inconsistent and crash if accessed


> +        }
>      }
>      if (lame_result < 0) {
>          if (lame_result == -1) {
> @@ -267,9 +515,11 @@ static int mp3lame_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
>  #define OFFSET(x) offsetof(LAMEContext, x)
>  #define AE AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
>  static const AVOption options[] = {
> -    { "reservoir",    "use bit reservoir", OFFSET(reservoir),    AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
> -    { "joint_stereo", "use joint stereo",  OFFSET(joint_stereo), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
> -    { "abr",          "use ABR",           OFFSET(abr),          AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
> +    { "reservoir",        "use bit reservoir",   OFFSET(reservoir),        AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
> +    { "find_peak_sample", "find peak sample",    OFFSET(find_peak_sample), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
> +    { "replay_gain",      "compute replay gain", OFFSET(replay_gain),      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },
> +    { "joint_stereo",     "use joint stereo",    OFFSET(joint_stereo),     AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AE },
> +    { "abr",              "use ABR",             OFFSET(abr),              AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AE },

reformating existing options and adding new ones should be in
seperate patches to keep git diff and blame readable


[...]

> +
> +    avio_flush(s->pb);
>      avio_seek(s->pb, 0, SEEK_END);
>  }
> 
> @@ -363,7 +431,7 @@ static int mp3_write_trailer(struct AVFormatContext *s)
>          avio_write(s->pb, buf, ID3v1_TAG_SIZE);
>      }
> 
> -    if (mp3->xing_offset)
> +    if (mp3->write_xing && mp3->xing_offset)
>          mp3_update_xing(s);
> 
>      return 0;

> @@ -384,7 +452,7 @@ static int query_codec(enum AVCodecID id, int std_compliance)
>  AVOutputFormat ff_mp2_muxer = {
>      .name              = "mp2",
>      .long_name         = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
> -    .mime_type         = "audio/mpeg",
> +    .mime_type         = "audio/x-mpeg",

like ubitux already said, this doesnt belong in here, please
read over your patch before submitting it to make sure it doesnt
contain things that you didnt intent to be in it ... could be theres
more that we dont recognize but you of course would know which
changes you want to submit ...

[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

In fact, the RIAA has been known to suggest that students drop out
of college or go to community college in order to be able to afford
settlements. -- The RIAA
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 181 bytes
Desc: Digital signature
URL: <https://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20140523/ac8097f9/attachment.asc>


More information about the ffmpeg-devel mailing list