[FFmpeg-devel] [PATCH] RoQ video encoder (take 4)

Fri Jun 22 22:15:16 CEST 2007

Hi

On Wed, Jun 20, 2007 at 12:32:03PM +0200, Vitor wrote:
[...]
> 
> >[...]
> >  
> >>+/**
> >>+ * Get macroblocks from parts of the image
> >>+ */
> >>+static void get_frame_mb(AVFrame *frame, int x, int y, uint8_t mb[], int 
> >>dim)
> >>+{
> >>+    int i, j, cp;
> >>+    int stride = frame->linesize[0];
> >>+
> >>+    for (i=0; i<dim; i++)
> >>+        for (j=0; j<dim; j++)
> >>+            for (cp=0; cp<3; cp++)
> >>+                mb[i*3*dim+j*3+cp] = frame->data[cp][(y+i)*stride+x+j];
> >>+}
> >>    
> >
> >why is the planar data converted to packed?
> >  
> 
> It simplified some code...

which code?
also a 3 byte compare where 2 are weighted by CHROMA_BIAS is very SIMD
unfriendly (=slow)

[...]
> 
> [...]
> >>+
> >>+
> >>+
> >>+#define SPOOL_TYPECODE(type)        \
> >>+do {\
> >>+    int a;\
> >>+    typeSpool |= ((type) & 3) << (14 - typeSpoolLength);\
> >>+    typeSpoolLength += 2;\
> >>+    if (typeSpoolLength == 16) {\
> >>+        bytestream_put_le16(&enc->out_buf, typeSpool); \
> >>+        for (a=0; a<argumentSpoolLength; a++)\
> >>+            bytestream_put_byte(&enc->out_buf, argumentSpool[a]); \
> >>+        typeSpoolLength = 0;\
> >>+        typeSpool = 0;\
> >>+        argumentSpoolLength = 0;\
> >>+    }\
> >>+} while(0)
> >>    
> >
> >instead of this complicated fifo spool thingy, why not
> >
> >use
> >bytestream_put_byte(&out, arg); instead of
> >SPOOL_ARGUMENT(arg)
> >
> >and
> >
> >write_typecode(){
> >    spool |= (type & 3) << (14 - spool_length);
> >    spool_length += 2;
> >    if (spool_length == 16){
> >        AV_WL16(typep, spool);
> >        spool= 0;
> >        typep= out;
> >        out+=2;
> >        spool_length=0;
> >    }
> >}
> >  
> 
> I didn't do exactly as you suggested. If you still think it is still not
> ok, I'll try to polish it some more.

its better but its still a mess, if you can clean it up further that would be
definitly welcome

> 
> >>+typedef struct {
> >>+    int d[2];
> >>+} motion_vect;
> >>    
> >
> >why not drop this struct and use int [2] directly?
> >  
> 
> For being able do do motion1=motion2;

is 
memcpy(a,b, sizeof(a));
a[x]
so much worse than
a=b;
a.d[x]
?
copy is uglier, access is nicer ...

anyway i dont mind keeping struct motion_vect if you prefer

[...]
> +#include <stdio.h>

hmm what part needs stdio ?

[...]

> +#define ROQ_LAMBDA_MAX (uint64_t) FF_LAMBDA_MAX/1024

hmm the /1024 will decrease the range of lambda significantly

also a lambda X specified by the user should have the same meaning
independant of the codec (i dont know if thats the case or not)

[...]
> +static void unpack_roq_qcell(uint8_t cb2[], roq_qcell *qcell, uint8_t u[4*4*3])
> +{
> +    uint8_t base[4][4*3];
> +    int n,cp,i;
> +    static const int offsets[4] = {0, 2, 8, 10};
> +
> +    for(i=0;i<4;i++)
> +        memcpy(base[i], cb2 + qcell->idx[i]*2*2*3, 2*2*3);
> +
> +    for(cp=0;cp<3;cp++)
> +        for(n=0;n<4;n++) {
> +            u[cp+3*(offsets[n]  )] = base[n][cp+3*0];
> +            u[cp+3*(offsets[n]+1)] = base[n][cp+3*1];
> +            u[cp+3*(offsets[n]+4)] = base[n][cp+3*2];
> +            u[cp+3*(offsets[n]+5)] = base[n][cp+3*3];
> +        }

    for(n=0;n<4;n++) {
        memcpy(u+3*(offsets[n]  ), base[n]+3*0, 6);
        memcpy(u+3*(offsets[n]+4), base[n]+3*2, 6);
    }

[...]
> +// FIXME Could use DSPContext.sse, but it is not so speed critical (used
> +// just for motion estimation).
> +static int block_sse(uint8_t **buf1, uint8_t **buf2, int x1, int y1, int x2, int y2,
> +                     int stride, int size)
> +{
> +    int i, j, k, bias;
> +    int sse=0;
> +
> +    for (k=0; k<3; k++) {
> +        bias = (k ? CHROMA_BIAS : 1);
> +        for (i=0; i<size; i++)
> +            for (j=0; j<size; j++)
> +                sse += (bias*square(buf1[k][(y1+i)*stride + x1+j] -
> +                                    buf2[k][(y2+i)*stride + x2+j]) + 2)/4;
> +    }

why /4 ?
and the /4 can be factored out and done outside of the loops

> +
> +    return sse;
> +}
> +
> +static int eval_motion_dist(RoqContext *enc, int x, int y, int mx, int my,
> +                             int size)
> +{

mx/my could be a int[2] or motion_vect which would simplify the code which
calls this function

[...]
> +/**
> + * Returns distortion between two macroblocks
> + */
> +static inline int squared_diff_macroblock(uint8_t a[], uint8_t b[], int size)
> +{
> +    int sdiff=0;
> +    int n, cp, bias;
> +
> +    for(cp=0;cp<3;cp++) {
> +        bias = (cp ? CHROMA_BIAS : 1);
> +        for(n=0;n<size*size;n++)
> +            sdiff += (bias*square(b[cp+3*n] - a[cp+3*n]) + 2)/4;
> +    }
> +
> +    return sdiff;
> +}

duplicate of block_sse()

> +
> +typedef struct
> +{
> +    int eval_dist[4];
> +    int best_bit_use;
> +    int best_coding;
> +
> +    int subCels[4];
> +
> +    int motionX, motionY;
> +    int cbEntry;
> +} subcel_evaluation_t;
> +
> +typedef struct
> +{
> +    int eval_dist[4];
> +    int best_coding;
> +
> +    subcel_evaluation_t subCels[4];
> +
> +    int motionX, motionY;
> +    int cbEntry;
> +
> +    int sourceX, sourceY;
> +} cel_evaluation_t;

*X/*Y could be int[2] or motion_vect

also iam tempted to suggest to merge these 2 structs, they are pretty much the
same, but maybe it cant be done cleanly?

[...]
> +/**
> + * Find the codebook with the lowest distortion from an image
> + */
> +
> +static int index_mb(uint8_t cluster[], uint8_t cb[], int numCB,

empty line between comment and function

> +                    int *outIndex, int dim)
> +{
> +    int diff;
> +    int pick=0, lDiff;
> +    int i=0;
> +
> +    lDiff = INT_MAX;
> +
> +    /* Diff against the others */
> +    for (i=0; i<numCB; i++) {

i=0 is done twice

> +        diff = squared_diff_macroblock(cluster, cb + i*dim*dim*3, dim);

the declaration could be merged with this (int diff = squared_...)

[...]
> +/**
> + * Gets distortion for all options available to a subcel
> + */
> +static void gather_data_for_subcel(subcel_evaluation_t *subcel, int x,
> +                                   int y, RoqContext *enc, roq_tempdata_t *tempData)
> +{
> +    uint8_t mb4[4*4*3];
> +    uint8_t mb2[2*2*3];
> +    int cluster_index;
> +    int i, best_dist;
> +
> +    static const int bitsUsed[4] = {2, 10, 10, 34};
> +
> +    if (enc->framesSinceKeyframe >= 1) {
> +        subcel->motionX = enc->this_motion4[y*enc->width/16 + x/4].d[0];
> +        subcel->motionY = enc->this_motion4[y*enc->width/16 + x/4].d[1];
> +
> +        subcel->eval_dist[RoQ_ID_FCC] =
> +            eval_motion_dist(enc, x, y,
> +                             enc->this_motion4[y*enc->width/16 + x/4].d[0],
> +                             enc->this_motion4[y*enc->width/16 + x/4].d[1], 4);
> +    } else
> +        subcel->eval_dist[RoQ_ID_FCC] = INT_MAX;

maybe setting all eval_dist to INT_MAX at some central place would look
simpler then these else cases

[...]
> +/**
> + * Gathers SE data for all feasible possibilities
> + */
> +static void gather_cel_data(RoqContext *enc, roq_tempdata_t *tempData)
> +{
> +    int i;
> +
> +    for (i=0; i<enc->width*enc->height/64; i++)
> +        gather_data_for_cel(tempData->cel_evals + i, enc, tempData);
> +}

does this single loop merit a function?

[...]
> +            for (j=0; j<4; j++)
> +                bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].y[j]);
> +
> +            bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].u);
> +            bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].v);

bytestream_put_buffer(6);

[...]
> +/* NOTE: Typecodes must be spooled AFTER arguments!! */
> +static void write_typecode(RoqContext *enc, uint8_t type, int *typeSpool,
> +                           int *typeSpoolLength, uint8_t **args,
> +                           uint8_t *argumentSpool)
> +{
> +    int a;
> +    *typeSpool |= ((type) & 3) << (14 - *typeSpoolLength);
> +    *typeSpoolLength += 2;
> +    if (*typeSpoolLength == 16) {
> +        bytestream_put_le16(&enc->out_buf, *typeSpool);
> +        for (a=0; a < *args - argumentSpool; a++)
> +            bytestream_put_byte(&enc->out_buf, argumentSpool[a]);
> +        *typeSpoolLength = 0;
> +        *typeSpool = 0;
> +        *args = argumentSpool;
> +    }
> +}

maybe some of the arguments passed to this function could be put into a
struct? it would avoid passing them every time

[...]
> +    /* Flush the remainder of the argument/type spool */
> +    while (typeSpoolLength)
> +        write_typecode(enc, 0x0, &typeSpool, &typeSpoolLength, &args,
> +                       argumentSpool);
> +
> +
> +}

empty lines

> +
> +
> +/**
> + * Create a single YUV cell from a 2x2 section of the image
> + */
> +static inline void frame_block_to_cell(uint8_t *block, uint8_t **data,
> +                                       int top, int left, int *stride)
> +{
> +    int i, j, u=0, v=0;
> +
> +    for (i=0; i<2; i++)
> +        for (j=0; j<2; j++) {
> +            *block++ = data[0][(top+i)*stride[0] +
> +                               left + j];
> +
> +            u       += data[1][(top+i)*stride[0] +
> +                               left + j];
> +
> +            v       += data[2][(top+i)*stride[0] +
> +                               left + j];
> +        }
> +
> +    *block++ = (u+2)/4;
> +    *block++ = (v+2)/4;
> +}

why is the left + j on a seperate line?
also a
int x= (top+i)*stride[0] + left + j;
would simplify this code

[...]
> +            results->y[0] = *buf++;
> +            results->y[1] = *buf++;
> +            results->y[2] = *buf++;
> +            results->y[3] = *buf++;

for(j=0; j<4; j++)

[...]
> +    enc->framesSinceKeyframe++;
> +
> +}

empty line

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Many that live deserve death. And some that die deserve life. Can you give
it to them? Then do not be too eager to deal out death in judgement. For
even the very wise cannot see all ends. -- Gandalf
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070622/c8490655/attachment.pgp>