[Ffmpeg-cvslog] CVS: ffmpeg/libavcodec h264.c,1.198,1.199
Loren Merritt CVS
lorenm
Wed Mar 22 13:41:05 CET 2006
Update of /cvsroot/ffmpeg/ffmpeg/libavcodec
In directory mail:/var2/tmp/cvs-serv8056
Modified Files:
h264.c
Log Message:
1.5x faster write_back_motion, 1-3% overall
Index: h264.c
===================================================================
RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/h264.c,v
retrieving revision 1.198
retrieving revision 1.199
diff -u -d -r1.198 -r1.199
--- h264.c 13 Mar 2006 23:46:47 -0000 1.198
+++ h264.c 22 Mar 2006 12:41:02 -0000 1.199
@@ -715,7 +715,7 @@
}
h->mv_cache_clean[list]= 0;
- if(IS_INTER(top_type)){
+ if(USES_LIST(top_type, list)){
const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
*(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
@@ -735,7 +735,7 @@
}
//FIXME unify cleanup or sth
- if(IS_INTER(left_type[0])){
+ if(USES_LIST(left_type[0], list)){
const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
*(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
@@ -749,7 +749,7 @@
h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
}
- if(IS_INTER(left_type[1])){
+ if(USES_LIST(left_type[1], list)){
const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
*(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
@@ -767,7 +767,7 @@
if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
continue;
- if(IS_INTER(topleft_type)){
+ if(USES_LIST(topleft_type, list)){
const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
*(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
@@ -777,7 +777,7 @@
h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
}
- if(IS_INTER(topright_type)){
+ if(USES_LIST(topright_type, list)){
const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
*(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
@@ -801,14 +801,14 @@
if( h->pps.cabac ) {
/* XXX beurk, Load mvd */
- if(IS_INTER(topleft_type)){
+ if(USES_LIST(topleft_type, list)){
const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
*(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy];
}else{
*(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0;
}
- if(IS_INTER(top_type)){
+ if(USES_LIST(top_type, list)){
const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
*(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
*(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
@@ -820,7 +820,7 @@
*(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
*(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
}
- if(IS_INTER(left_type[0])){
+ if(USES_LIST(left_type[0], list)){
const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
*(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
*(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
@@ -828,7 +828,7 @@
*(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
*(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
}
- if(IS_INTER(left_type[1])){
+ if(USES_LIST(left_type[1], list)){
const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
*(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
*(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
@@ -1424,26 +1424,8 @@
for(list=0; list<2; list++){
int y;
- if(!USES_LIST(mb_type, list)){
- if(1){ //FIXME skip or never read if mb_type doesn't use it
- for(y=0; y<4; y++){
- *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
- *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
- }
- if( h->pps.cabac ) {
- /* FIXME needed ? */
- for(y=0; y<4; y++){
- *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]=
- *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0;
- }
- }
- for(y=0; y<2; y++){
- s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]=
- s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED;
- }
- }
+ if(!USES_LIST(mb_type, list))
continue;
- }
for(y=0; y<4; y++){
*(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
@@ -1455,17 +1437,22 @@
*(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
}
}
- for(y=0; y<2; y++){
- s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
- s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
+
+ {
+ uint8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
+ ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
+ ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
+ ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
+ ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
}
}
if(h->slice_type == B_TYPE && h->pps.cabac){
if(IS_8X8(mb_type)){
- h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
- h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
- h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
+ uint8_t *direct_table = &h->direct_table[b8_xy];
+ direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
+ direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
+ direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
}
}
}
More information about the ffmpeg-cvslog
mailing list