[FFmpeg-devel] [PATCH] lavfi/lut: use FF_CEIL_RSHIFT for chroma w/h rounding.

Clément Bœsch ubitux at gmail.com
Sat May 11 01:04:24 CEST 2013


---
We might consider adding a FF_CEIL_RSHIFT2 with this second form which,
according to Jason and Ronald, might be more interesting in case of a
constant shift.

For the record:

% cat a.c
int f1(int x, int n) { return -((-x) >> n); }
int f2(int x, int n) { return (x + (1<<n) - 1) >> n; }

% gcc -O2 -c a.c && objdump -d -Mintel a.o

a.o:     file format elf64-x86-64

Disassembly of section .text:

0000000000000000 <f1>:
   0:	89 f8                	mov    eax,edi
   2:	89 f1                	mov    ecx,esi
   4:	f7 d8                	neg    eax
   6:	d3 f8                	sar    eax,cl
   8:	f7 d8                	neg    eax
   a:	c3                   	ret
   b:	0f 1f 44 00 00       	nop    DWORD PTR [rax+rax*1+0x0]

0000000000000010 <f2>:
  10:	89 f1                	mov    ecx,esi
  12:	b8 01 00 00 00       	mov    eax,0x1
  17:	d3 e0                	shl    eax,cl
  19:	8d 44 07 ff          	lea    eax,[rdi+rax*1-0x1]
  1d:	d3 f8                	sar    eax,cl
  1f:	c3                   	ret

% clang -O2 -c a.c && objdump -d -Mintel a.o

a.o:     file format elf64-x86-64

Disassembly of section .text:

0000000000000000 <f1>:
   0:	f7 df                	neg    edi
   2:	40 88 f1             	mov    cl,sil
   5:	d3 ff                	sar    edi,cl
   7:	f7 df                	neg    edi
   9:	89 f8                	mov    eax,edi
   b:	c3                   	ret
   c:	0f 1f 40 00          	nop    DWORD PTR [rax+0x0]

0000000000000010 <f2>:
  10:	89 f1                	mov    ecx,esi
  12:	b8 01 00 00 00       	mov    eax,0x1
  17:	d3 e0                	shl    eax,cl
  19:	8d 44 07 ff          	lea    eax,[rdi+rax*1-0x1]
  1d:	d3 f8                	sar    eax,cl
  1f:	c3                   	ret
---
 libavfilter/vf_lut.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/libavfilter/vf_lut.c b/libavfilter/vf_lut.c
index d544419..070cf55 100644
--- a/libavfilter/vf_lut.c
+++ b/libavfilter/vf_lut.c
@@ -305,13 +305,14 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in)
         for (plane = 0; plane < 4 && in->data[plane]; plane++) {
             int vsub = plane == 1 || plane == 2 ? lut->vsub : 0;
             int hsub = plane == 1 || plane == 2 ? lut->hsub : 0;
+            int h = FF_CEIL_RSHIFT(inlink->h, vsub);
+            int w = FF_CEIL_RSHIFT(inlink->w, hsub);
 
             inrow  = in ->data[plane];
             outrow = out->data[plane];
 
-            for (i = 0; i < (in->height + (1<<vsub) - 1)>>vsub; i ++) {
+            for (i = 0; i < h; i++) {
                 const uint8_t *tab = lut->lut[plane];
-                int w = (inlink->w + (1<<hsub) - 1)>>hsub;
                 for (j = 0; j < w; j++)
                     outrow[j] = tab[inrow[j]];
                 inrow  += in ->linesize[plane];
-- 
1.8.2.2



More information about the ffmpeg-devel mailing list