FFmpeg
 All Data Structures Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vf_ilpack.c
Go to the documentation of this file.
1 /*
2  * This file is part of MPlayer.
3  *
4  * MPlayer is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * MPlayer is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with MPlayer; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <inttypes.h>
23 
24 #include "config.h"
25 #include "mp_msg.h"
26 #include "cpudetect.h"
27 
28 #include "img_format.h"
29 #include "mp_image.h"
30 #include "vf.h"
31 #include "libavutil/attributes.h"
32 
33 typedef void (pack_func_t)(unsigned char *dst, unsigned char *y,
34  unsigned char *u, unsigned char *v, int w, int us, int vs);
35 
36 struct vf_priv_s {
37  int mode;
39 };
40 
41 static void pack_nn_C(unsigned char *dst, unsigned char *y,
42  unsigned char *u, unsigned char *v, int w,
43  int av_unused us, int av_unused vs)
44 {
45  int j;
46  for (j = w/2; j; j--) {
47  *dst++ = *y++;
48  *dst++ = *u++;
49  *dst++ = *y++;
50  *dst++ = *v++;
51  }
52 }
53 
54 static void pack_li_0_C(unsigned char *dst, unsigned char *y,
55  unsigned char *u, unsigned char *v, int w, int us, int vs)
56 {
57  int j;
58  for (j = w/2; j; j--) {
59  *dst++ = *y++;
60  *dst++ = (u[us+us] + 7*u[0])>>3;
61  *dst++ = *y++;
62  *dst++ = (v[vs+vs] + 7*v[0])>>3;
63  u++; v++;
64  }
65 }
66 
67 static void pack_li_1_C(unsigned char *dst, unsigned char *y,
68  unsigned char *u, unsigned char *v, int w, int us, int vs)
69 {
70  int j;
71  for (j = w/2; j; j--) {
72  *dst++ = *y++;
73  *dst++ = (3*u[us+us] + 5*u[0])>>3;
74  *dst++ = *y++;
75  *dst++ = (3*v[vs+vs] + 5*v[0])>>3;
76  u++; v++;
77  }
78 }
79 
80 #if HAVE_MMX
81 static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
82  unsigned char *u, unsigned char *v, int w,
83  int av_unused us, int av_unused vs)
84 {
85  __asm__ volatile (""
86  ASMALIGN(4)
87  "1: \n\t"
88  "movq (%0), %%mm1 \n\t"
89  "movq (%0), %%mm2 \n\t"
90  "movq (%1), %%mm4 \n\t"
91  "movq (%2), %%mm6 \n\t"
92  "punpcklbw %%mm6, %%mm4 \n\t"
93  "punpcklbw %%mm4, %%mm1 \n\t"
94  "punpckhbw %%mm4, %%mm2 \n\t"
95 
96  "add $8, %0 \n\t"
97  "add $4, %1 \n\t"
98  "add $4, %2 \n\t"
99  "movq %%mm1, (%3) \n\t"
100  "movq %%mm2, 8(%3) \n\t"
101  "add $16, %3 \n\t"
102  "decl %4 \n\t"
103  "jnz 1b \n\t"
104  "emms \n\t"
105  :
106  : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8)
107  : "memory"
108  );
109  pack_nn_C(dst, y, u, v, (w&7), 0, 0);
110 }
111 
112 #if HAVE_EBX_AVAILABLE
113 static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
114  unsigned char *u, unsigned char *v, int w, int us, int vs)
115 {
116  __asm__ volatile (""
117  "push %%"REG_BP" \n\t"
118 #if ARCH_X86_64
119  "mov %6, %%"REG_BP" \n\t"
120 #else
121  "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
122  "movl (%%"REG_d"), %%"REG_d" \n\t"
123 #endif
124  "pxor %%mm0, %%mm0 \n\t"
125 
126  ASMALIGN(4)
127  ".Lli0: \n\t"
128  "movq (%%"REG_S"), %%mm1 \n\t"
129  "movq (%%"REG_S"), %%mm2 \n\t"
130 
131  "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
132  "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
133  "punpcklbw %%mm0, %%mm4 \n\t"
134  "punpcklbw %%mm0, %%mm6 \n\t"
135  "movq (%%"REG_a"), %%mm3 \n\t"
136  "movq (%%"REG_b"), %%mm5 \n\t"
137  "punpcklbw %%mm0, %%mm3 \n\t"
138  "punpcklbw %%mm0, %%mm5 \n\t"
139  "paddw %%mm3, %%mm4 \n\t"
140  "paddw %%mm5, %%mm6 \n\t"
141  "paddw %%mm3, %%mm4 \n\t"
142  "paddw %%mm5, %%mm6 \n\t"
143  "paddw %%mm3, %%mm4 \n\t"
144  "paddw %%mm5, %%mm6 \n\t"
145  "paddw %%mm3, %%mm4 \n\t"
146  "paddw %%mm5, %%mm6 \n\t"
147  "paddw %%mm3, %%mm4 \n\t"
148  "paddw %%mm5, %%mm6 \n\t"
149  "paddw %%mm3, %%mm4 \n\t"
150  "paddw %%mm5, %%mm6 \n\t"
151  "paddw %%mm3, %%mm4 \n\t"
152  "paddw %%mm5, %%mm6 \n\t"
153  "psrlw $3, %%mm4 \n\t"
154  "psrlw $3, %%mm6 \n\t"
155  "packuswb %%mm4, %%mm4 \n\t"
156  "packuswb %%mm6, %%mm6 \n\t"
157  "punpcklbw %%mm6, %%mm4 \n\t"
158  "punpcklbw %%mm4, %%mm1 \n\t"
159  "punpckhbw %%mm4, %%mm2 \n\t"
160 
161  "movq %%mm1, (%%"REG_D") \n\t"
162  "movq %%mm2, 8(%%"REG_D") \n\t"
163 
164  "movq 8(%%"REG_S"), %%mm1 \n\t"
165  "movq 8(%%"REG_S"), %%mm2 \n\t"
166 
167  "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
168  "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
169  "punpckhbw %%mm0, %%mm4 \n\t"
170  "punpckhbw %%mm0, %%mm6 \n\t"
171  "movq (%%"REG_a"), %%mm3 \n\t"
172  "movq (%%"REG_b"), %%mm5 \n\t"
173  "punpckhbw %%mm0, %%mm3 \n\t"
174  "punpckhbw %%mm0, %%mm5 \n\t"
175  "paddw %%mm3, %%mm4 \n\t"
176  "paddw %%mm5, %%mm6 \n\t"
177  "paddw %%mm3, %%mm4 \n\t"
178  "paddw %%mm5, %%mm6 \n\t"
179  "paddw %%mm3, %%mm4 \n\t"
180  "paddw %%mm5, %%mm6 \n\t"
181  "paddw %%mm3, %%mm4 \n\t"
182  "paddw %%mm5, %%mm6 \n\t"
183  "paddw %%mm3, %%mm4 \n\t"
184  "paddw %%mm5, %%mm6 \n\t"
185  "paddw %%mm3, %%mm4 \n\t"
186  "paddw %%mm5, %%mm6 \n\t"
187  "paddw %%mm3, %%mm4 \n\t"
188  "paddw %%mm5, %%mm6 \n\t"
189  "psrlw $3, %%mm4 \n\t"
190  "psrlw $3, %%mm6 \n\t"
191  "packuswb %%mm4, %%mm4 \n\t"
192  "packuswb %%mm6, %%mm6 \n\t"
193  "punpcklbw %%mm6, %%mm4 \n\t"
194  "punpcklbw %%mm4, %%mm1 \n\t"
195  "punpckhbw %%mm4, %%mm2 \n\t"
196 
197  "add $16, %%"REG_S" \n\t"
198  "add $8, %%"REG_a" \n\t"
199  "add $8, %%"REG_b" \n\t"
200 
201  "movq %%mm1, 16(%%"REG_D") \n\t"
202  "movq %%mm2, 24(%%"REG_D") \n\t"
203  "add $32, %%"REG_D" \n\t"
204 
205  "decl %%ecx \n\t"
206  "jnz .Lli0 \n\t"
207  "emms \n\t"
208  "pop %%"REG_BP" \n\t"
209  :
210  : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
211 #if ARCH_X86_64
212  "d" ((x86_reg)us), "r" ((x86_reg)vs)
213 #else
214  "d" (&us)
215 #endif
216  : "memory"
217  );
218  pack_li_0_C(dst, y, u, v, (w&15), us, vs);
219 }
220 
221 static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
222  unsigned char *u, unsigned char *v, int w, int us, int vs)
223 {
224  __asm__ volatile (""
225  "push %%"REG_BP" \n\t"
226 #if ARCH_X86_64
227  "mov %6, %%"REG_BP" \n\t"
228 #else
229  "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
230  "movl (%%"REG_d"), %%"REG_d" \n\t"
231 #endif
232  "pxor %%mm0, %%mm0 \n\t"
233 
234  ASMALIGN(4)
235  ".Lli1: \n\t"
236  "movq (%%"REG_S"), %%mm1 \n\t"
237  "movq (%%"REG_S"), %%mm2 \n\t"
238 
239  "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
240  "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
241  "punpcklbw %%mm0, %%mm4 \n\t"
242  "punpcklbw %%mm0, %%mm6 \n\t"
243  "movq (%%"REG_a"), %%mm3 \n\t"
244  "movq (%%"REG_b"), %%mm5 \n\t"
245  "punpcklbw %%mm0, %%mm3 \n\t"
246  "punpcklbw %%mm0, %%mm5 \n\t"
247  "movq %%mm4, %%mm7 \n\t"
248  "paddw %%mm4, %%mm4 \n\t"
249  "paddw %%mm7, %%mm4 \n\t"
250  "movq %%mm6, %%mm7 \n\t"
251  "paddw %%mm6, %%mm6 \n\t"
252  "paddw %%mm7, %%mm6 \n\t"
253  "paddw %%mm3, %%mm4 \n\t"
254  "paddw %%mm5, %%mm6 \n\t"
255  "paddw %%mm3, %%mm4 \n\t"
256  "paddw %%mm5, %%mm6 \n\t"
257  "paddw %%mm3, %%mm4 \n\t"
258  "paddw %%mm5, %%mm6 \n\t"
259  "paddw %%mm3, %%mm4 \n\t"
260  "paddw %%mm5, %%mm6 \n\t"
261  "paddw %%mm3, %%mm4 \n\t"
262  "paddw %%mm5, %%mm6 \n\t"
263  "psrlw $3, %%mm4 \n\t"
264  "psrlw $3, %%mm6 \n\t"
265  "packuswb %%mm4, %%mm4 \n\t"
266  "packuswb %%mm6, %%mm6 \n\t"
267  "punpcklbw %%mm6, %%mm4 \n\t"
268  "punpcklbw %%mm4, %%mm1 \n\t"
269  "punpckhbw %%mm4, %%mm2 \n\t"
270 
271  "movq %%mm1, (%%"REG_D") \n\t"
272  "movq %%mm2, 8(%%"REG_D") \n\t"
273 
274  "movq 8(%%"REG_S"), %%mm1 \n\t"
275  "movq 8(%%"REG_S"), %%mm2 \n\t"
276 
277  "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
278  "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
279  "punpckhbw %%mm0, %%mm4 \n\t"
280  "punpckhbw %%mm0, %%mm6 \n\t"
281  "movq (%%"REG_a"), %%mm3 \n\t"
282  "movq (%%"REG_b"), %%mm5 \n\t"
283  "punpckhbw %%mm0, %%mm3 \n\t"
284  "punpckhbw %%mm0, %%mm5 \n\t"
285  "movq %%mm4, %%mm7 \n\t"
286  "paddw %%mm4, %%mm4 \n\t"
287  "paddw %%mm7, %%mm4 \n\t"
288  "movq %%mm6, %%mm7 \n\t"
289  "paddw %%mm6, %%mm6 \n\t"
290  "paddw %%mm7, %%mm6 \n\t"
291  "paddw %%mm3, %%mm4 \n\t"
292  "paddw %%mm5, %%mm6 \n\t"
293  "paddw %%mm3, %%mm4 \n\t"
294  "paddw %%mm5, %%mm6 \n\t"
295  "paddw %%mm3, %%mm4 \n\t"
296  "paddw %%mm5, %%mm6 \n\t"
297  "paddw %%mm3, %%mm4 \n\t"
298  "paddw %%mm5, %%mm6 \n\t"
299  "paddw %%mm3, %%mm4 \n\t"
300  "paddw %%mm5, %%mm6 \n\t"
301  "psrlw $3, %%mm4 \n\t"
302  "psrlw $3, %%mm6 \n\t"
303  "packuswb %%mm4, %%mm4 \n\t"
304  "packuswb %%mm6, %%mm6 \n\t"
305  "punpcklbw %%mm6, %%mm4 \n\t"
306  "punpcklbw %%mm4, %%mm1 \n\t"
307  "punpckhbw %%mm4, %%mm2 \n\t"
308 
309  "add $16, %%"REG_S" \n\t"
310  "add $8, %%"REG_a" \n\t"
311  "add $8, %%"REG_b" \n\t"
312 
313  "movq %%mm1, 16(%%"REG_D") \n\t"
314  "movq %%mm2, 24(%%"REG_D") \n\t"
315  "add $32, %%"REG_D" \n\t"
316 
317  "decl %%ecx \n\t"
318  "jnz .Lli1 \n\t"
319  "emms \n\t"
320  "pop %%"REG_BP" \n\t"
321  :
322  : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
323 #if ARCH_X86_64
324  "d" ((x86_reg)us), "r" ((x86_reg)vs)
325 #else
326  "d" (&us)
327 #endif
328  : "memory"
329  );
330  pack_li_1_C(dst, y, u, v, (w&15), us, vs);
331 }
332 #endif /* HAVE_EBX_AVAILABLE */
333 #endif
334 
338 
339 static void ilpack(unsigned char *dst, unsigned char *src[3],
340  int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2])
341 {
342  int i;
343  unsigned char *y, *u, *v;
344  int ys = srcstride[0], us = srcstride[1], vs = srcstride[2];
345  int a, b;
346 
347  y = src[0];
348  u = src[1];
349  v = src[2];
350 
351  pack_nn(dst, y, u, v, w, 0, 0);
352  y += ys; dst += dststride;
353  pack_nn(dst, y, u+us, v+vs, w, 0, 0);
354  y += ys; dst += dststride;
355  for (i=2; i<h-2; i++) {
356  a = (i&2) ? 1 : -1;
357  b = (i&1) ^ ((i&2)>>1);
358  pack[b](dst, y, u, v, w, us*a, vs*a);
359  y += ys;
360  if ((i&3) == 1) {
361  u -= us;
362  v -= vs;
363  } else {
364  u += us;
365  v += vs;
366  }
367  dst += dststride;
368  }
369  pack_nn(dst, y, u, v, w, 0, 0);
370  y += ys; dst += dststride; u += us; v += vs;
371  pack_nn(dst, y, u, v, w, 0, 0);
372 }
373 
374 
375 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
376 {
377  mp_image_t *dmpi;
378 
379  // hope we'll get DR buffer:
380  dmpi=ff_vf_get_image(vf->next, IMGFMT_YUY2,
382  mpi->w, mpi->h);
383 
384  ilpack(dmpi->planes[0], mpi->planes, dmpi->stride[0], mpi->stride, mpi->w, mpi->h, vf->priv->pack);
385 
386  return ff_vf_next_put_image(vf,dmpi, pts);
387 }
388 
389 static int config(struct vf_instance *vf,
390  int width, int height, int d_width, int d_height,
391  unsigned int flags, unsigned int outfmt)
392 {
393  /* FIXME - also support UYVY output? */
394  return ff_vf_next_config(vf, width, height, d_width, d_height, flags, IMGFMT_YUY2);
395 }
396 
397 
398 static int query_format(struct vf_instance *vf, unsigned int fmt)
399 {
400  /* FIXME - really any YUV 4:2:0 input format should work */
401  switch (fmt) {
402  case IMGFMT_YV12:
403  case IMGFMT_IYUV:
404  case IMGFMT_I420:
406  }
407  return 0;
408 }
409 
410 static int vf_open(vf_instance_t *vf, char *args)
411 {
412  vf->config=config;
414  vf->put_image=put_image;
415  vf->priv = calloc(1, sizeof(struct vf_priv_s));
416  vf->priv->mode = 1;
417  if (args) sscanf(args, "%d", &vf->priv->mode);
418 
419  pack_nn = pack_nn_C;
420  pack_li_0 = pack_li_0_C;
421  pack_li_1 = pack_li_1_C;
422 #if HAVE_MMX
423  if(ff_gCpuCaps.hasMMX) {
424  pack_nn = pack_nn_MMX;
425 #if HAVE_EBX_AVAILABLE
426  pack_li_0 = pack_li_0_MMX;
427  pack_li_1 = pack_li_1_MMX;
428 #endif
429  }
430 #endif
431 
432  switch(vf->priv->mode) {
433  case 0:
434  vf->priv->pack[0] = vf->priv->pack[1] = pack_nn;
435  break;
436  default:
438  "ilpack: unknown mode %d (fallback to linear)\n",
439  vf->priv->mode);
440  /* Fallthrough */
441  case 1:
442  vf->priv->pack[0] = pack_li_0;
443  vf->priv->pack[1] = pack_li_1;
444  break;
445  }
446 
447  return 1;
448 }
449 
451  "4:2:0 planar -> 4:2:2 packed reinterlacer",
452  "ilpack",
453  "Richard Felker",
454  "",
455  vf_open,
456  NULL
457 };