[FFmpeg-devel] [PATCH] ALAC Encoder

Mon Aug 18 18:25:10 CEST 2008

Hi,

On Monday 18 Aug 2008 3:46:23 am Michael Niedermayer wrote:
> On Mon, Aug 18, 2008 at 02:38:24AM +0530, Jai Menon wrote:
> > Hi,
> >
> > On Sunday 17 Aug 2008 5:17:52 pm Michael Niedermayer wrote:
> > > On Sun, Aug 17, 2008 at 11:17:10AM +0530, Jai Menon wrote:
>
> [...]
>
> [...]
>
> > Index: libavcodec/alacenc.c
> > ===================================================================
> > --- libavcodec/alacenc.c	(revision 14818)
> > +++ libavcodec/alacenc.c	(working copy)
> > @@ -33,15 +33,58 @@
> >
> >  #define ALAC_ESCAPE_CODE          0x1FF
> >  #define ALAC_MAX_LPC_ORDER        30
> > +#define DEFAULT_MAX_PRED_ORDER    6
> > +#define DEFAULT_MIN_PRED_ORDER    4
> > +#define ALAC_MAX_LPC_PRECISION    9
> > +#define ALAC_MAX_LPC_SHIFT        9
>
> ok
>
> > +#define ALAC_CHMODE_LEFT_RIGHT    1
> > +#define ALAC_CHMODE_LEFT_SIDE     8
> > +#define ALAC_CHMODE_RIGHT_SIDE    9
> > +#define ALAC_CHMODE_MID_SIDE     10
> > +
> >
> > +typedef struct RiceContext {
> > +    int history_mult;
> > +    int initial_history;
> > +    int k_modifier;
> > +    int rice_modifier;
> > +} RiceContext;
> > +
> > +typedef struct LPCContext {
> > +    int lpc_order;
> > +    int lpc_coeff[ALAC_MAX_LPC_ORDER+1];
> > +    int lpc_quant;
> > +} LPCContext;
> > +
> > +typedef struct AlacEncodeContext {
> > +    int compression_level;
> > +    int max_coded_frame_size;
> > +    int write_sample_size;
> > +    int32_t sample_buf[MAX_CHANNELS][DEFAULT_FRAME_SIZE];
>
> ok
>
> > +    int32_t predictor_buf[DEFAULT_FRAME_SIZE];
> >      int interlacing_shift;
> >      int interlacing_leftweight;
> >      PutBitContext pbctx;
> >
> > +    RiceContext rc;
> > +    LPCContext lpc[MAX_CHANNELS];
>
> ok
>
> >      DSPContext dspctx;
> >      AVCodecContext *avctx;
> >  } AlacEncodeContext;
> >
> >
> >
> > +static void init_sample_buffers(AlacEncodeContext *s, int16_t
> > *input_samples) +{
> > +    int ch, i;
> > +
> > +    for(ch=0;ch<s->avctx->channels;ch++) {
> > +        int16_t *sptr = input_samples + ch;
> > +        for(i=0;i<s->avctx->frame_size;i++) {
> > +            s->sample_buf[ch][i] = *sptr;
> > +            sptr += s->avctx->channels;
> > +        }
> > +    }
> > +}
> > +
> >  static void encode_scalar(AlacEncodeContext *s, int x, int k, int
> > write_sample_size) {
> >      int divisor, q, r;
>
> ok
>
> > @@ -71,7 +114,7 @@
> >
> >  static void write_frame_header(AlacEncodeContext *s, int is_verbatim)
> >  {
> > -    put_bits(&s->pbctx, 3,  s->channels-1);                 // No. of
> > channels -1 +    put_bits(&s->pbctx, 3,  s->avctx->channels-1);         
> > // No. of channels -1 put_bits(&s->pbctx, 16, 0);                        
> >     // Seems to be zero put_bits(&s->pbctx, 1,  1);                      
> >       // Sample count is in the header put_bits(&s->pbctx, 2,  0);       
> >                      // FIXME: Wasted bytes field
>
> ok
>
> > @@ -79,6 +122,205 @@
> >      put_bits(&s->pbctx, 32, s->avctx->frame_size);          // No. of
> > samples in the frame }
> >
> > +static void calc_predictor_params(AlacEncodeContext *s, int ch)
> > +{
> > +    int32_t coefs[MAX_LPC_ORDER][MAX_LPC_ORDER];
> > +    int shift[MAX_LPC_ORDER];
> > +    int opt_order;
> > +
> > +    opt_order = ff_lpc_calc_coefs(&s->dspctx, s->sample_buf[ch],
> > s->avctx->frame_size, DEFAULT_MIN_PRED_ORDER, DEFAULT_MAX_PRED_ORDER, +  
> >                                 ALAC_MAX_LPC_PRECISION, coefs, shift, 1,
> > ORDER_METHOD_EST, ALAC_MAX_LPC_SHIFT, 1); +
> > +    s->lpc[ch].lpc_order = opt_order;
> > +    s->lpc[ch].lpc_quant = shift[opt_order-1];
> > +    memcpy(s->lpc[ch].lpc_coeff, coefs[opt_order-1],
> > opt_order*sizeof(int)); +}
> > +
>
> I think this should be using AVCodecContext.min/max_prediction_order
>
> > +static int estimate_stereo_mode(int32_t *left_ch, int32_t *right_ch, int
> > n) +{
> > +    int i, best;
> > +    int32_t lt, rt;
> > +    uint64_t sum[4];
> > +    uint64_t score[4];
> > +
> > +    /* calculate sum of 2nd order residual for each channel */
> > +    sum[0] = sum[1] = sum[2] = sum[3] = 0;
> > +    for(i=2; i<n; i++) {
> > +        lt = left_ch[i] - 2*left_ch[i-1] + left_ch[i-2];
> > +        rt = right_ch[i] - 2*right_ch[i-1] + right_ch[i-2];
> > +        sum[2] += FFABS((lt + rt) >> 1);
> > +        sum[3] += FFABS(lt - rt);
> > +        sum[0] += FFABS(lt);
> > +        sum[1] += FFABS(rt);
> > +    }
> > +
> > +    /* calculate score for each mode */
> > +    score[0] = sum[0] + sum[1];
> > +    score[1] = sum[0] + sum[3];
> > +    score[2] = sum[1] + sum[3];
> > +    score[3] = sum[2] + sum[3];
> > +
> > +    /* return mode with lowest score */
> > +    best = 0;
> > +    for(i=1; i<4; i++) {
> > +        if(score[i] < score[best]) {
> > +            best = i;
> > +        }
> > +    }
>
> ok
>
> > +    if(best == 0) {
> > +        return ALAC_CHMODE_LEFT_RIGHT;
> > +    } else if(best == 1) {
> > +        return ALAC_CHMODE_LEFT_SIDE;
> > +    } else if(best == 2) {
> > +        return ALAC_CHMODE_RIGHT_SIDE;
> > +    } else {
> > +        return ALAC_CHMODE_MID_SIDE;
> > +    }
> > +}
>
> i think best could simply be returned
>
> > +
> > +static void alac_stereo_decorrelation(AlacEncodeContext *s)
> > +{
> > +    int32_t *left = s->sample_buf[0], *right = s->sample_buf[1];
> > +    int i, mode, n = s->avctx->frame_size;
> > +
> > +    mode = estimate_stereo_mode(left, right, n);
> > +
> > +    if(mode == ALAC_CHMODE_LEFT_RIGHT) {
> > +        s->interlacing_leftweight = 0;
> > +        s->interlacing_shift = 0;
> > +        return;
> > +    }
> > +
> > +    if(mode == ALAC_CHMODE_LEFT_SIDE) {
> > +        for(i=0; i<n; i++) {
> > +            right[i] = left[i] - right[i];
> > +        }
> > +        s->interlacing_leftweight = 1;
> > +        s->interlacing_shift = 0;
> > +
> > +    } else {
> > +        int32_t tmp;
> > +        for(i=0; i<n; i++) {
> > +            tmp = left[i];
> > +            left[i] = (tmp + right[i]) >> 1;
> > +            right[i] = tmp - right[i];
> > +        }
> > +        s->interlacing_leftweight = 1;
> > +        s->interlacing_shift = 1;
> > +    }
>
> i think 1 mode is missing

I left out the right-side mode because I really don't see how the decoder 
could support it without accidentally swapping channels. Or am I missing 
something?

Regards,

Jai Menon