[FFmpeg-cvslog] r21280 - in trunk/libavcodec: h264.c h264.h h264_loopfilter.c
michael
subversion
Mon Jan 18 06:15:31 CET 2010
Author: michael
Date: Mon Jan 18 06:15:31 2010
New Revision: 21280
Log:
Perform reference remapping at fill_cache() time instead of in the
loop filter. This removes one obstacle of getting ff_h264_filter_mb_fast()
bitexact. code is maybe 0.1% faster
Modified:
trunk/libavcodec/h264.c
trunk/libavcodec/h264.h
trunk/libavcodec/h264_loopfilter.c
Modified: trunk/libavcodec/h264.c
==============================================================================
--- trunk/libavcodec/h264.c Mon Jan 18 04:57:01 2010 (r21279)
+++ trunk/libavcodec/h264.c Mon Jan 18 06:15:31 2010 (r21280)
@@ -2087,16 +2087,35 @@ static int decode_slice_header(H264Conte
}
for(j=0; j<2; j++){
+ int id_list[16];
int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
+ for(i=0; i<16; i++){
+ id_list[i]= 60;
+ if(h->ref_list[j][i].data[0]){
+ int k;
+ uint8_t *base= h->ref_list[j][i].base[0];
+ for(k=0; k<h->short_ref_count; k++)
+ if(h->short_ref[k]->base[0] == base){
+ id_list[i]= k;
+ break;
+ }
+ for(k=0; k<h->long_ref_count; k++)
+ if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
+ id_list[i]= h->short_ref_count + k;
+ break;
+ }
+ }
+ }
+
ref2frm[0]=
ref2frm[1]= -1;
for(i=0; i<16; i++)
- ref2frm[i+2]= 4*h->ref_list[j][i].frame_num
+ ref2frm[i+2]= 4*id_list[i]
+(h->ref_list[j][i].reference&3);
ref2frm[18+0]=
ref2frm[18+1]= -1;
for(i=16; i<48; i++)
- ref2frm[i+4]= 4*h->ref_list[j][i].frame_num
+ ref2frm[i+4]= 4*id_list[(i-16)>>1]
+(h->ref_list[j][i].reference&3);
}
Modified: trunk/libavcodec/h264.h
==============================================================================
--- trunk/libavcodec/h264.h Mon Jan 18 04:57:01 2010 (r21279)
+++ trunk/libavcodec/h264.h Mon Jan 18 06:15:31 2010 (r21280)
@@ -834,11 +834,20 @@ static av_always_inline int fill_caches(
}
ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
+ if(for_deblock){
+ int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
+ *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
+ ref += h->b8_stride;
+ *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
+ *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
+ }else{
*(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
*(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
ref += h->b8_stride;
*(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
*(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
+ }
b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
for(y=0; y<4; y++){
@@ -1029,16 +1038,24 @@ static av_always_inline int fill_caches(
*(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
*(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
*(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
+ if(for_deblock){
+ int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ h->ref_cache[list][scan8[0] + 0 - 1*8]=
+ h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
+ h->ref_cache[list][scan8[0] + 2 - 1*8]=
+ h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
+ }else{
h->ref_cache[list][scan8[0] + 0 - 1*8]=
h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
h->ref_cache[list][scan8[0] + 2 - 1*8]=
h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
+ }
}else{
*(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
*(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
*(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
*(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
- *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
+ *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= (((for_deblock||top_type) ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
}
for(i=0; i<2; i++){
@@ -1048,13 +1065,19 @@ static av_always_inline int fill_caches(
const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
*(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
*(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
+ if(for_deblock){
+ int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[i]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ h->ref_cache[list][cache_idx ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]];
+ h->ref_cache[list][cache_idx+8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]];
+ }else{
h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
+ }
}else{
*(uint32_t*)h->mv_cache [list][cache_idx ]=
*(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
h->ref_cache[list][cache_idx ]=
- h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
+ h->ref_cache[list][cache_idx+8]= (for_deblock||left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
}
}
Modified: trunk/libavcodec/h264_loopfilter.c
==============================================================================
--- trunk/libavcodec/h264_loopfilter.c Mon Jan 18 04:57:01 2010 (r21279)
+++ trunk/libavcodec/h264_loopfilter.c Mon Jan 18 06:15:31 2010 (r21280)
@@ -445,8 +445,6 @@ static av_always_inline void filter_mb_d
int edge;
const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
const int mbm_type = s->current_picture.mb_type[mbm_xy];
- int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
- int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
@@ -513,7 +511,6 @@ static av_always_inline void filter_mb_d
/* mbn_xy: neighbor macroblock */
const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
const int mbn_type = s->current_picture.mb_type[mbn_xy];
- int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
int16_t bS[4];
int qp;
@@ -553,7 +550,7 @@ static av_always_inline void filter_mb_d
int v = 0;
for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
- v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] |
+ v |= h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] |
h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U |
FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
}
@@ -562,7 +559,7 @@ static av_always_inline void filter_mb_d
v=0;
for( l = 0; !v && l < 2; l++ ) {
int ln= 1-l;
- v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] |
+ v |= h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] |
h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U |
FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
}
@@ -588,7 +585,7 @@ static av_always_inline void filter_mb_d
{
bS[i] = 0;
for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
- if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] |
+ if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] |
h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U |
FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
bS[i] = 1;
@@ -600,7 +597,7 @@ static av_always_inline void filter_mb_d
bS[i] = 0;
for( l = 0; l < 2; l++ ) {
int ln= 1-l;
- if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] |
+ if( h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] |
h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U |
FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
bS[i] = 1;
More information about the ffmpeg-cvslog
mailing list