[FFmpeg-devel] [PATCH 01/15] Choose h264 chroma dc dequant function dynamically.

Jason Garrett-Glaser jason
Fri Mar 11 06:04:37 CET 2011


On Thu, Mar 10, 2011 at 2:27 PM, Oskar Arvidsson
<arvidsson.oskar at gmail.com> wrote:
> Needed for high bit depth h264 decoding.
> ---
> ?libavcodec/dsputil.h ?| ? ?1 +
> ?libavcodec/h264.c ? ? | ? 29 ++++-------------------------
> ?libavcodec/h264dsp.c ?| ? ?1 +
> ?libavcodec/h264dsp.h ?| ? ?1 +
> ?libavcodec/h264idct.c | ? 22 ++++++++++++++++++++++
> ?5 files changed, 29 insertions(+), 25 deletions(-)
>
> diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
> index 3e55d13..8e348d1 100644
> --- a/libavcodec/dsputil.h
> +++ b/libavcodec/dsputil.h
> @@ -64,6 +64,7 @@ void ff_h264_idct_add16intra_c(uint8_t *dst, const int *blockoffset, DCTELEM *bl
> ?void ff_h264_idct8_add4_c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
> ?void ff_h264_idct_add8_c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
>
> +void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul);
> ?void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul);
> ?void ff_svq3_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qp);
> ?void ff_svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
> diff --git a/libavcodec/h264.c b/libavcodec/h264.c
> index 5ebf929..ade1d47 100644
> --- a/libavcodec/h264.c
> +++ b/libavcodec/h264.c
> @@ -292,27 +292,6 @@ static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
> ?#undef xStride
> ?#undef stride
>
> -static void chroma_dc_dequant_idct_c(DCTELEM *block, int qmul){
> - ? ?const int stride= 16*2;
> - ? ?const int xStride= 16;
> - ? ?int a,b,c,d,e;
> -
> - ? ?a= block[stride*0 + xStride*0];
> - ? ?b= block[stride*0 + xStride*1];
> - ? ?c= block[stride*1 + xStride*0];
> - ? ?d= block[stride*1 + xStride*1];
> -
> - ? ?e= a-b;
> - ? ?a= a+b;
> - ? ?b= c-d;
> - ? ?c= c+d;
> -
> - ? ?block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
> - ? ?block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
> - ? ?block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
> - ? ?block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
> -}
> -
> ?#if 0
> ?static void chroma_dc_dct_c(DCTELEM *block){
> ? ? const int stride= 16*2;
> @@ -1290,15 +1269,15 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
> ? ? ? ? ? ? }else{
> ? ? ? ? ? ? ? ? if(is_h264){
> ? ? ? ? ? ? ? ? ? ? if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
> - ? ? ? ? ? ? ? ? ? ? ? ?chroma_dc_dequant_idct_c(h->mb + 16*16 ? ? , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
> + ? ? ? ? ? ? ? ? ? ? ? ?h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16 ? ? , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
> ? ? ? ? ? ? ? ? ? ? if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
> - ? ? ? ? ? ? ? ? ? ? ? ?chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
> + ? ? ? ? ? ? ? ? ? ? ? ?h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
> ? ? ? ? ? ? ? ? ? ? h->h264dsp.h264_idct_add8(dest, block_offset,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? h->mb, uvlinesize,
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? h->non_zero_count_cache);
> ? ? ? ? ? ? ? ? }else{
> - ? ? ? ? ? ? ? ? ? ?chroma_dc_dequant_idct_c(h->mb + 16*16 ? ? , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
> - ? ? ? ? ? ? ? ? ? ?chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
> + ? ? ? ? ? ? ? ? ? ?h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16 ? ? , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
> + ? ? ? ? ? ? ? ? ? ?h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
> ? ? ? ? ? ? ? ? ? ? for(i=16; i<16+8; i++){
> ? ? ? ? ? ? ? ? ? ? ? ? if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
> ? ? ? ? ? ? ? ? ? ? ? ? ? ? uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
> diff --git a/libavcodec/h264dsp.c b/libavcodec/h264dsp.c
> index c3ee06d..56e606c 100644
> --- a/libavcodec/h264dsp.c
> +++ b/libavcodec/h264dsp.c
> @@ -283,6 +283,7 @@ void ff_h264dsp_init(H264DSPContext *c)
> ? ? c->h264_idct_add8 ? ? ?= ff_h264_idct_add8_c;
> ? ? c->h264_idct_add16intra= ff_h264_idct_add16intra_c;
> ? ? c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_c;
> + ? ?c->h264_chroma_dc_dequant_idct= ff_h264_chroma_dc_dequant_idct_c;
>
> ? ? c->weight_h264_pixels_tab[0]= weight_h264_pixels16x16_c;
> ? ? c->weight_h264_pixels_tab[1]= weight_h264_pixels16x8_c;
> diff --git a/libavcodec/h264dsp.h b/libavcodec/h264dsp.h
> index 2491c90..bdbb4a3 100644
> --- a/libavcodec/h264dsp.h
> +++ b/libavcodec/h264dsp.h
> @@ -68,6 +68,7 @@ typedef struct H264DSPContext{
> ? ? void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
> ? ? void (*h264_idct_add16intra)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
> ? ? void (*h264_luma_dc_dequant_idct)(DCTELEM *output, DCTELEM *input/*align 16*/, int qmul);
> + ? ?void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul);
> ?}H264DSPContext;
>
> ?void ff_h264dsp_init(H264DSPContext *c);
> diff --git a/libavcodec/h264idct.c b/libavcodec/h264idct.c
> index f5b05ac..7c96f42 100644
> --- a/libavcodec/h264idct.c
> +++ b/libavcodec/h264idct.c
> @@ -250,4 +250,26 @@ void ff_h264_luma_dc_dequant_idct_c(DCTELEM *output, DCTELEM *input, int qmul){
> ? ? ? ? output[stride* 4+offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
> ? ? ? ? output[stride* 5+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
> ? ? }
> +#undef stride
> +}
> +
> +void ff_h264_chroma_dc_dequant_idct_c(DCTELEM *block, int qmul){
> + ? ?const int stride= 16*2;
> + ? ?const int xStride= 16;
> + ? ?int a,b,c,d,e;
> +
> + ? ?a= block[stride*0 + xStride*0];
> + ? ?b= block[stride*0 + xStride*1];
> + ? ?c= block[stride*1 + xStride*0];
> + ? ?d= block[stride*1 + xStride*1];
> +
> + ? ?e= a-b;
> + ? ?a= a+b;
> + ? ?b= c-d;
> + ? ?c= c+d;
> +
> + ? ?block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
> + ? ?block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
> + ? ?block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
> + ? ?block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
> ?}
> --
> 1.7.3.5
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at mplayerhq.hu
> https://lists.mplayerhq.hu/mailman/listinfo/ffmpeg-devel
>

Looks good to me, I was going to do this myself for unrelated reasons
so I could asm-ize it.

Jason



More information about the ffmpeg-devel mailing list