[FFmpeg-devel] [PATCH] RV30/40 decoder
Michael Niedermayer
michaelni
Sun Sep 16 23:10:43 CEST 2007
Hi
On Sun, Sep 16, 2007 at 07:58:51PM +0300, Kostya wrote:
> Here is my RV30/40 decoder developed during this GSoC.
>
> It still outputs slightly incorrect picture and needs fixing of
> B-frames motion vector reconstruction(prediction) but it should
> be stable now and picture is quite recognizable.
>
> VLC tables are omitted from this patch, they can be found
> at svn://svn.mplayerhq.hu/soc/rv40 files rv40vlc.h and rv40vlc2.h
[...]
> +/**
> + * This table is used for retrieving current intra type
> + * basing on its neighbours and adjustment provided by
> + * code read and decoded before.
> + *
> + * This is really three-dimensional matrix with dimensions
> + * [-1..9][-1..9][0..9], first and second coordinates
> + * are detemined by top and left neighbours (-1 if unavailable).
> + */
> +static const uint8_t rv30_itype_from_context[900] = {
you could half the size of this table by using the low and high 4bit
instead of 2 bytes, this also would fit naturaly with the code which
uses them in pairs
[...]
> + RV40_MB_P_MIX16x16, // inter 16x16 block very similar to Intra16x16
not doxygen compatible
[...]
> +/** Slice information saved for truncated slices */
> +typedef struct SavedSliceInfo{
> + uint8_t *data; ///< bitstream data
> + int data_size; ///< data size
> + int bits_used; ///< bits used up to last decoded block
> + int mb_x, mb_y; ///< coordinates of the last decoded block
> +}SavedSliceInfo;
truncated slices are an error in the parser or demuxer and MUST be corrected
there
(note this is the most important complaint in this review)
> +
> +/** Decoder context */
> +typedef struct RV40DecContext{
> + MpegEncContext s;
> + int mb_bits; ///< bits needed to read MB offet in slice header
> + int *intra_types_hist; ///< old block types, used for prediction
> + int *intra_types; ///< block types
> + int intra_types_stride; ///< stride for block types data
why dont you use MpegEncContext.b4_stride ?
> + int block_start; ///< start of slice in blocks
> + int ptype; ///< picture type
unused
> + int quant; ///< quantizer
why dont you use MpegEncContext.qscale?
> +
> + int vlc_set; ///< index of currently selected VLC set
> + RV40VLC *cur_vlcs; ///< VLC set used for current frame decoding
> + int bits; ///< slice size in bits
> + H264PredContext h; ///< functions for 4x4 and 16x16 intra block prediction
> + SliceInfo si; ///< current slice information
> + SliceInfo prev_si; ///< info for the saved slice
> + uint8_t *slice_data; ///< saved slice data
> + int has_slice; ///< has previously saved slice
> + int skip_blocks; ///< blocks to skip (interframe slice only)
why dont you use MpegEncContext.mb_skip_run?
> +
> + int *mb_type; ///< internal macroblock types
> + int block_type; ///< current block type
> + int luma_vlc; ///< which VLC set will be used for luma blocks decoding
> + int chroma_vlc; ///< which VLC set will be used for chroma blocks decoding
> + int is16; ///< current block has additional 16x16 specific features or not
> + int dmv[4][2]; ///< differential motion vectors for the current macroblock
this as well has a entry in MpegEncContext
(i mean if you wouldnt use MpegEncContext at all thats fine but if you do use
it, you could as well use its fields instead of duplicating them ...)
> +
> + int truncated; ///< flag signalling that slice ended prematurely
> + SavedSliceInfo ssi; ///< data for truncated slice
> +
> + int rv30; ///< indicates which RV variasnt is currently decoded
> + int rpr; ///< one field size in RV30 slice header
> +
> + int avail[4]; ///< whether left, top, top rights and top left MBs are available
> +}RV40DecContext;
> +
> +static RV40VLC intra_vlcs[NUM_INTRA_TABLES], inter_vlcs[NUM_INTER_TABLES];
> +static VLC aic_top_vlc;
> +static VLC aic_mode1_vlc[AIC_MODE1_NUM], aic_mode2_vlc[AIC_MODE2_NUM];
> +static VLC mbinfo_vlc, ptype_vlc[NUM_PTYPE_VLCS], btype_vlc[NUM_BTYPE_VLCS];
> +
> +/**
> + * @defgroup vlc RV40 VLC generating functions
> + * @{
> + */
> +
> +/**
> + * Generate VLC from codeword lengths
> + */
> +static int rv40_gen_vlc(const uint8_t *bits2, int size, VLC *vlc)
> +{
> + int i;
> + int counts[17], codes[17];
> + uint16_t *cw, *syms;
> + uint8_t *bits;
> + int maxbits = 0, realsize;
> + int ret;
> +
> + memset(counts, 0, 16 * sizeof(int));
int counts[17]={0};
> +
> + cw = av_mallocz(size * 2);
> + syms = av_malloc(size * 2);
> + bits = av_malloc(size);
these could as well use use arrays on the stack, simplifying the source
> +
> + realsize = 0;
> + for(i = 0; i < size; i++){
> + if(bits2[i]){
> + bits[realsize] = bits2[i];
> + syms[realsize] = i;
> + realsize++;
> + if(bits2[i] > maxbits)
> + maxbits = bits2[i];
FFMAX
> + }
> + }
> +
> + size = realsize;
> + for(i = 0; i < size; i++)
> + counts[bits[i]]++;
cant these loops be merged?
[...]
> + for(i = 0; i < NUM_INTRA_TABLES; i++){
> + for(j = 0; j < 2; j++)
> + rv40_gen_vlc(rv40_intra_cbppatvlc_pointers[i][j], CBPPAT_VLC_SIZE, &intra_vlcs[i].cbppattern[j]);
> + for(j = 0; j < 2; j++)
> + for(k = 0; k < 4; k++)
> + rv40_gen_vlc(rv40_intra_cbpvlc_pointers[i][j][k], CBP_VLC_SIZE, &intra_vlcs[i].cbp[j][k]);
> + for(j = 0; j < 4; j++)
> + rv40_gen_vlc(rv40_intra_firstpatvlc_pointers[i][j], FIRSTBLK_VLC_SIZE, &intra_vlcs[i].first_pattern[j]);
> + for(j = 0; j < 2; j++)
> + rv40_gen_vlc(rv40_intra_secondpatvlc_pointers[i][j], OTHERBLK_VLC_SIZE, &intra_vlcs[i].second_pattern[j]);
> + for(j = 0; j < 2; j++)
> + rv40_gen_vlc(rv40_intra_thirdpatvlc_pointers[i][j], OTHERBLK_VLC_SIZE, &intra_vlcs[i].third_pattern[j]);
> + rv40_gen_vlc(rv40_intra_coeffvlc_pointers[i], COEFF_VLC_SIZE, &intra_vlcs[i].coefficient);
> + }
> +
> + for(i = 0; i < NUM_INTER_TABLES; i++){
> + rv40_gen_vlc(rv40_inter_cbppatvlc_pointers[i], CBPPAT_VLC_SIZE, &inter_vlcs[i].cbppattern[0]);
> + for(j = 0; j < 4; j++)
> + rv40_gen_vlc(rv40_inter_cbpvlc_pointers[i][j], CBP_VLC_SIZE, &inter_vlcs[i].cbp[0][j]);
> + for(j = 0; j < 2; j++)
> + rv40_gen_vlc(rv40_inter_firstpatvlc_pointers[i][j], FIRSTBLK_VLC_SIZE, &inter_vlcs[i].first_pattern[j]);
> + for(j = 0; j < 2; j++)
> + rv40_gen_vlc(rv40_inter_secondpatvlc_pointers[i][j], OTHERBLK_VLC_SIZE, &inter_vlcs[i].second_pattern[j]);
> + for(j = 0; j < 2; j++)
> + rv40_gen_vlc(rv40_inter_thirdpatvlc_pointers[i][j], OTHERBLK_VLC_SIZE, &inter_vlcs[i].third_pattern[j]);
> + rv40_gen_vlc(rv40_inter_coeffvlc_pointers[i], COEFF_VLC_SIZE, &inter_vlcs[i].coefficient);
> + }
some of these loops can be merged
[...]
> +/**
> + * Real Video 4.0 inverse transform
> + * Code is almost the same as in SVQ3, only scaling is different
> + */
> +static void rv40_intra_inv_transform(DCTELEM *block, const int offset){
> + int temp[16];
> + unsigned int i;
this still is duplicate of the svq3 code
[...]
> +static inline void decode_subblock(DCTELEM *dst, int code, GetBitContext *gb, VLC *vlc)
> +{
> + int coeffs[4];
> +
> + coeffs[0] = modulo_three_table[code][0];
> + coeffs[1] = modulo_three_table[code][1];
> + coeffs[2] = modulo_three_table[code][2];
> + coeffs[3] = modulo_three_table[code][3];
> + decode_coeff(dst , coeffs[0], 3, gb, vlc);
> + decode_coeff(dst+1, coeffs[1], 2, gb, vlc);
> + decode_coeff(dst+8, coeffs[2], 2, gb, vlc);
> + decode_coeff(dst+9, coeffs[3], 2, gb, vlc);
> +}
> +
> +// slightly modified version for the third subblock
> +static inline void decode_subblock2(DCTELEM *dst, int code, GetBitContext *gb, VLC *vlc)
not doxygen compatible and a
if(variant2){
decode_coeff(dst+1, coeffs[2], 2, gb, vlc);
decode_coeff(dst+9, coeffs[3], 2, gb, vlc);
}else{
decode_coeff(dst+9, coeffs[2], 2, gb, vlc);
decode_coeff(dst+1, coeffs[3], 2, gb, vlc);
}
would avoid the near duplicate function
[...]
> +/**
> + * Dequantize ordinary 4x4 block
> + * @todo optimize
> + */
> +static inline void rv40_dequant4x4(DCTELEM *block, int offset, int Qdc, int Q)
> +{
> + int i, j;
> +
> + block += offset;
> + block[0] = (block[0] * Qdc + 8) >> 4;
> + for(i = 0; i < 4; i++)
> + for(j = 0; j < 4; j++)
> + if(i || j)
> + block[j + i*8] = (block[j + i*8] * Q + 8) >> 4;
for(i = 0; i < 4; i++)
for(j = !i; j < 4; j++)
block[j + i*8] = (block[j + i*8] * Q + 8) >> 4;
(does the check only 4 instead of 16 times)
[...]
> +static int rv30_parse_slice_header(RV40DecContext *r, GetBitContext *gb, SliceInfo *si)
> +{
please split rv30 and rv40 into seperate files (rv30/rv40/common code) and
merge rv30/rv40 functions if they are nearly idenitcal
> + int t, mb_bits;
> + int w = r->s.width, h = r->s.height;
> + int i, mb_size;
> +
> + memset(si, 0, sizeof(SliceInfo));
> + si->type = -1;
> + get_bits(gb, 3);
> + si->type = get_bits(gb, 2);
huh?
[...]
> +static inline int get_omega(GetBitContext *gb);
could you reorder functions so as to avoid such stuff?
> +
> +/**
> + * Decode 4x4 intra types array
> + */
> +static int rv30_decode_intra_types(RV40DecContext *r, GetBitContext *gb, int *dst)
> +{
> + int i, j;
> + int A, B;
> + int *ptr;
> + int code;
> +
> + for(i = 0; i < 4; i++, dst += r->intra_types_stride){
> + ptr = dst;
> + for(j = 0; j < 4; j+= 2){
> + code = (get_omega(gb) - 1) << 1;
> + if(code >= 81*2){
> + av_log(r->s.avctx, AV_LOG_ERROR, "Incorrect intra prediction code\n");
> + return -1;
> + }
> + A = ptr[-r->intra_types_stride] + 1;
> + B = ptr[-1] + 1;
> + *ptr++ = rv30_itype_from_context[A * 90 + B * 9 + rv30_itype_code[code + 0]];
> + if(ptr[-1] == 9){
> + av_log(r->s.avctx, AV_LOG_ERROR, "Incorrect intra prediction mode\n");
> + return -1;
> + }
> + A = ptr[-r->intra_types_stride] + 1;
> + B = ptr[-1] + 1;
> + *ptr++ = rv30_itype_from_context[A * 90 + B * 9 + rv30_itype_code[code + 1]];
> + if(ptr[-1] == 9){
> + av_log(r->s.avctx, AV_LOG_ERROR, "Incorrect intra prediction mode\n");
> + return -1;
> + }
a goto error would simplify this
also maybe a for(k<2) could simplify this
[...]
> + if(s->pict_type == P_TYPE){
> + if(prev_type == RV40_MB_SKIP) prev_type = RV40_MB_P_16x16;
> + prev_type = block_num_to_ptype_vlc_num[prev_type];
> + q = get_vlc2(gb, ptype_vlc[prev_type].table, PTYPE_VLC_BITS, 1);
> + if(q < PBTYPE_ESCAPE)
> + return q;
> + q = get_vlc2(gb, ptype_vlc[prev_type].table, PTYPE_VLC_BITS, 1);
> + av_log(NULL,0,"Dquant for P-frame\n");
please dont av_log(NULL, 0
av_log takes AVCodecContext and the log level
[...]
> + if(s->pict_type == P_TYPE && r->block_type == RV40_MB_SKIP)
> + r->mb_type[mb_pos] = RV40_MB_P_16x16;
> + if(s->pict_type == B_TYPE && r->block_type == RV40_MB_SKIP)
> + r->mb_type[mb_pos] = RV40_MB_B_INTERP;
following is more readable
if(r->block_type == RV40_MB_SKIP){
if(s->pict_type == P_TYPE)
r->mb_type[mb_pos] = RV40_MB_P_16x16;
...
}
[...]
> + if(r->rv30){
> + if(rv30_decode_intra_types(r, gb, intra_types) < 0)
> + return -1;
> + }else{
> + if(rv40_decode_intra_types(r, gb, intra_types) < 0)
> + return -1;
> + }
> + r->chroma_vlc = 0;
> + r->luma_vlc = 1;
> + }else{
> + t = get_bits(gb, 2);
> + for(i = 0; i < 16; i++)
> + intra_types[(i & 3) + (i>>2) * r->intra_types_stride] = t;
> + r->chroma_vlc = 0;
> + r->luma_vlc = 2;
> + }
> + r->cur_vlcs = choose_vlc_set(r->si.quant, r->si.vlc_set, 0);
> + }else{
> + for(i = 0; i < 16; i++)
> + intra_types[(i & 3) + (i>>2) * r->intra_types_stride] = 0;
> + r->cur_vlcs = choose_vlc_set(r->si.quant, r->si.vlc_set, 1);
> + if(r->mb_type[mb_pos] == RV40_MB_P_MIX16x16){
> + r->is16 = 1;
> + r->chroma_vlc = 1;
> + r->luma_vlc = 2;
> + r->cur_vlcs = choose_vlc_set(r->si.quant, r->si.vlc_set, 0);
> + }
> + }
> + return rv40_decode_cbp(gb, r->cur_vlcs, r->is16);
functions which are used in bith rv30 and rv40 should not be named rv40_...
[...]
> +static void rv40_postprocess(RV40DecContext *r)
> +{
unused
[...]
> + avctx->max_b_frames = 8;
unneeded
[...]
> +/**
> + * Find slice offsets and return them in array.
> + */
> +static int* find_slice_offsets(uint8_t *buf, int buf_size, int *slices)
> +{
> + int *offsets;
> + int i;
> + int hdr = AV_RB32(buf) & KEY_MASK;
> +
> + offsets = av_malloc(sizeof(int)*12);//just to avoid unnecessary reallocs
> + offsets[0] = 0;
> + *slices = 1;
> + for(i = 4; i < buf_size; i += 4){
> + if((AV_RB32(buf + i) & KEY_MASK) == hdr){
> + (*slices)++;
> + offsets = av_realloc(offsets, *slices * sizeof(int));
> + offsets[*slices - 1] = i;
> + }
> + }
> + return offsets;
> +}
unused
[...]
> + if(avctx->slice_count){
> + slice_count = avctx->slice_count;
> + slice_offset = avctx->slice_offset;
AVCodecContext.slice_count and slice_offset is deprecated, they break
remuxing, cause thread issues with the demuxer writing these into the
decoder context, ...
[...]
--
Michael GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
No human being will ever know the Truth, for even if they happen to say it
by chance, they would not even known they had done so. -- Xenophanes
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20070916/2477d1c7/attachment.pgp>
More information about the ffmpeg-devel
mailing list