[FFmpeg-devel] [PATCH] RoQ video encoder

Michael Niedermayer michaelni
Fri May 11 03:55:13 CEST 2007


Hi

On Thu, May 10, 2007 at 10:20:46AM +0200, Vitor wrote:
> Hi,
> 
> Now that the muxer and the audio encoder are in svn, I'm sending the 
> video encoder. Suggestions (and criticism) expected and welcome!
> 
> -Vitor

[...]

> +#define ENLARGE_ELEMENT(x,y)    \
> +    src = &image4[(y*4)+x];\
> +    memcpy(&image8[(y*16)+(x*2)], src, sizeof(roq_pixel_t));\
> +    memcpy(&image8[(y*16)+(x*2)+1], src, sizeof(roq_pixel_t));\
> +    memcpy(&image8[(y*16)+(x*2)+8], src, sizeof(roq_pixel_t));\
> +    memcpy(&image8[(y*16)+(x*2)+9], src, sizeof(roq_pixel_t))

copying 1 struct into another can be done by
a=b
instead of
memcpy(&a, &b, sizeof(roq_pixel_t));

which is IMHO cleaner


> +
> +
> +static inline void enlarge(roq_pixel_t *image4, roq_pixel_t *image8)
> +{
> +    roq_pixel_t *src;
> +
> +    ENLARGE_ELEMENT(0,0);
> +    ENLARGE_ELEMENT(1,0);
> +    ENLARGE_ELEMENT(2,0);
> +    ENLARGE_ELEMENT(3,0);
> +
> +    ENLARGE_ELEMENT(0,1);
> +    ENLARGE_ELEMENT(1,1);
> +    ENLARGE_ELEMENT(2,1);
> +    ENLARGE_ELEMENT(3,1);
> +
> +    ENLARGE_ELEMENT(0,2);
> +    ENLARGE_ELEMENT(1,2);
> +    ENLARGE_ELEMENT(2,2);
> +    ENLARGE_ELEMENT(3,2);
> +
> +    ENLARGE_ELEMENT(0,3);
> +    ENLARGE_ELEMENT(1,3);
> +    ENLARGE_ELEMENT(2,3);
> +    ENLARGE_ELEMENT(3,3);
> +}

this can be done with 2 simple loops instead of the "forced" 16x code 
duplication


> +
> +/**
> + * Temporary vars
> + */
> +typedef struct
> +{
> +    roq_cel_evaluation_t *cel_evals;
> +    roq_possibility_list_t *plists;
> +    roq_yuvcluster4_t *yuvClusters;
> +    roq_sort_option_t *sortOptions;
> +    roq_sort_option_t **sortOptionsSorted;
> +
> +    roq_pixel_t *reconstruct;
> +
> +    uint8_t *outbuffer;
> +
> +    int f2i4[256];
> +    int i2f4[256];
> +    int f2i2[256];
> +    int i2f2[256];
> +
> +    int numCB4;
> +    int numCB2;
> +
> +    int mainChunkSize;
> +
> +    roq_codebooks_t codebooks;
> +} roq_tempdata_t;

i think the roq_ prefix on all the roq private stuff makes not much sense


> +
> +static void free_temp_data(roq_tempdata_t *tempData)
> +{
> +    if (tempData->cel_evals)
> +        av_free(tempData->cel_evals);
> +    if (tempData->plists)
> +        av_free(tempData->plists);
> +    if (tempData->yuvClusters)
> +        av_free(tempData->yuvClusters);
> +    if (tempData->sortOptions)
> +        av_free(tempData->sortOptions);
> +    if (tempData->sortOptionsSorted)
> +        av_free(tempData->sortOptionsSorted);
> +    if (tempData->reconstruct)
> +        av_free(tempData->reconstruct);
> +    if (tempData->outbuffer)
> +        av_free(tempData->outbuffer);
> +}

av_free(NULL) is completely safe so the checks are unneeded


> +
> +/**
> + * Initializes cel evaluators and sets their source coordinates
> + */
> +static int create_cel_evals(roq_encoder_t *enc, roq_tempdata_t *tempData)
> +{
> +    int width, height;
> +    int n,x,y;
> +
> +    width = enc->width;
> +    height= enc->height;
> +
> +    tempData->cel_evals = av_malloc(width*height/64 * sizeof(roq_cel_evaluation_t));
> +    if (!tempData->cel_evals)
> +        return 0;

code in libav* generally returns negative values on error, so
i would suggest that for consistency the roq encoder would do that
too


[...]
> +    for (y=0; y<h; y+=4) {
> +        for (x=0; x<w; x+=4) {
> +            /* Copy data */
> +            blit(image + (y*w)+x, w, &blocks2[0], 8, 2, 2);
> +            blit(image + (y*w)+x+2, w, &blocks2[2], 8, 2, 2);
> +            blit(image + ((y+2)*w)+x, w, &blocks2[4], 8, 2, 2);
> +            blit(image + ((y+2)*w)+x+2, w, &blocks2[6], 8, 2, 2);

this could be vertically aligned like

blit(image + ( y   *w)+x  , w, &blocks2[0], 8, 2, 2);
blit(image + ( y   *w)+x+2, w, &blocks2[2], 8, 2, 2);
blit(image + ((y+2)*w)+x  , w, &blocks2[4], 8, 2, 2);
blit(image + ((y+2)*w)+x+2, w, &blocks2[6], 8, 2, 2);


[...]
> +/**
> + * Template code to find the codebook with the lowest median squared error from an image
> + */
> +#define GET_LOWEST_CB_MSE(FUNCT, CBTYPE, COMMAND1, COMMAND2) \

MSE stands or mean squared error normally and squared_diff_yuv returns
neither median nor mean ...


> +static int FUNCT(const roq_pixel_t *image, int width, CBTYPE *cb, int numCB, uint8_t *outIndex) \
> +{ \
> +    int diff; \
> +    int pick, lDiff; \
> +    int i=0; \
> +\
> +    COMMAND1 \
> +    lDiff = COMMAND2 \
> +    pick = 0; \
> +\
> +    /* Diff against the others */ \
> +    for (i=1; i<numCB; i++) { \
> +        COMMAND1 \
> +        diff = COMMAND2 \
> +        if (diff < lDiff) { \
> +            lDiff = diff; \
> +            pick = i; \
> +        } \
> +    } \

lDiff=INT_MAX
for(i=0; i<numCB; i++) {
 ...


[...]
> +typedef struct
> +{
> +    int dx, dy;
> +} roq_motionsearch_vector_t;
> +
> +/**
> + * Performs motion searching on an image at an offset, sets outDX and outDY to motion offset
> + */
> +static int motion_search(roq_encoder_t *enc, const roq_pixel_t *image, int x, int y, int8_t *outDX, int8_t *outDY, uint32_t d)
> +{
> +    roq_motionsearch_vector_t offsets[9] = {
> +        {0,0},
> +        {0,-1},
> +        {-1,-1},
> +        {-1,0},
> +        {-1,1},
> +        {0,1},
> +        {1,1},
> +        {1,0},
> +        {1,-1},
> +    };
> +
> +    int diffs[9];
> +    int diffPick, lowestDiff;
> +
> +    int w,h,i;
> +
> +    uint32_t rx,ry;
> +
> +    int finalDX, finalDY;
> +
> +    int step;
> +
> +    w = enc->width;
> +    h = enc->height;
> +
> +    finalDX = 0;
> +    finalDY = 0;
> +
> +    /* Simple three-step search */

implement a normal predictive zonal search or use the existing code from
motion_est.c, but it might be hard to use the existing code ...

3 step search is ancient, low quality and slow

predictive zonal search simply tries the motion vectors of the left, top
top right blocks and the motion vector of the previous frame at the current
position the motion vector from the previous frame of one block to the right
and the previous frame and one block down it allso tries the 0,0 vector
and the median of the left/top/topright blocks vectors

the vector choosen after this step is simply the one with the lowest (M)SE or
other comparission function

in the next step a zonal search is done, the simplest variant just tries the
vectors (-1,0) (1,0) (0,-1) (0,1) relative the the current best vector until
a local minimum is found

using the existing motion_est.c would allow many different comparission
functions to be used and also many variations of the exact motion vector
search but the motion_est.c code is a little messy and entangled with
mpegvideo stuff so iam not insisting on it being used, if you still want
to try, then i suggest that you look at svq1.c and snow.c which are other
non mpeg encoders using it


[...]
> +    subcel->eval_mse[ROQ_EVALTYPE_CODEBOOK] = cb4_entry_mse(image + poffset, w, tempData->codebooks.cb4, tempData->codebooks.numCB4, &subcel->cbEntry);
> +
> +    subcel->eval_mse[ROQ_EVALTYPE_SUBDIVIDE] = cb2_entry_mse(image + poffset, w, tempData->codebooks.cb2, tempData->codebooks.numCB2, &subcel->subCels[0]);
> +    subcel->eval_mse[ROQ_EVALTYPE_SUBDIVIDE] += cb2_entry_mse(image + poffset + 2, w, tempData->codebooks.cb2, tempData->codebooks.numCB2, &subcel->subCels[1]);
> +    subcel->eval_mse[ROQ_EVALTYPE_SUBDIVIDE] += cb2_entry_mse(image + poffset + (w*2), w, tempData->codebooks.cb2, tempData->codebooks.numCB2, &subcel->subCels[2]);
> +    subcel->eval_mse[ROQ_EVALTYPE_SUBDIVIDE] += cb2_entry_mse(image + poffset + (w*2) + 2, w, tempData->codebooks.cb2, tempData->codebooks.numCB2, &subcel->subCels[3]);

IMHO 172 chars per line is too much


[...]
> +    /* Disallowed = later in the list */
> +    if (!p2[0]->allowed) {
> +        if (p1[0]->allowed)
> +            return -1;
> +        else
> +            return 0;
> +    }
> +
> +    if (!p1[0]->allowed) {
> +        if (p2[0]->allowed)
> +            return 1;
> +        else
> +            return 0;
> +    }

the second return 0 cannot be reached


[...]
> +            for (j=0; j<step; j++) {
> +                if (c[j] != result[i*step + j]) {
> +                    mismatch = 1;
> +                    break;
> +                }
> +            }

memcmp()


[...]

> +/* NeuQuant Neural-Net Quantization Algorithm functions */

what is the advantage of this neural net quantization stuff compared to a much
simpler and faster algorithms like
ELBG (http://citeseer.ist.psu.edu/patan01enhanced.html) ?

note, you can use tests/tiny_psnr to compare raw (yuv/rgb/whatever) videos


[...]
> +    switch (avctx->width) {
> +    case 0:
> +        return -1;
> +    case 16:
> +    case 32:
> +    case 64:
> +    case 128:
> +    case 256:
> +    case 512:
> +    case 1024:
> +    case 2048:
> +    case 4096:
> +    case 8192:
> +    case 16384:
> +    case 32768:
> +        break;
> +    default:
> +        if (avctx->width > 65535) {
> +            av_log(avctx, AV_LOG_ERROR, "Width must be < 65536\n");
> +            return -1;
> +        } else {
> +            av_log(avctx, AV_LOG_ERROR, "Warning: Width not power of two\n");
> +        }
> +    };

to test for power of 2 try

if(x&(x-1)) ...


[...]
> +static int roq_encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data)
> +{
> +    roq_encoder_t *enc = avctx->priv_data;
> +    int i, numPixels;
> +    roq_pixel_t *pixel;
> +    int rate;
> +    writebuffer_t wb;
> +    AVFrame *frame= data;
> +
> +    numPixels = enc->width * enc->height;
> +
> +    pixel = enc->pixels;
> +    for (i=0; i < numPixels; i++) {
> +        (pixel+i)->y = frame->data[0][i];
> +        (pixel+i)->u = frame->data[1][i];
> +        (pixel+i)->v = frame->data[2][i];
> +    }

whats this good for? why isnt the code using the planer yuv data as is
but rather converts it to this odd roq_pixel_t structs?


[...]
> +static enum PixelFormat roq_pixelformats[] =
> +{
> +    PIX_FMT_YUV444P,

roq is a YV12 codec not a YUV 4:4:4 codec AFAIK so this is just wrong
its less wrong than RGB but its still not correct


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Observe your enemies, for they first find out your faults. -- Antisthenes
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: not available
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070511/e901d860/attachment.pgp>



More information about the ffmpeg-devel mailing list