FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
vf_ilpack.c
Go to the documentation of this file.
1 /*
2  * This file is part of MPlayer.
3  *
4  * MPlayer is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation; either version 2 of the License, or
7  * (at your option) any later version.
8  *
9  * MPlayer is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License along
15  * with MPlayer; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17  */
18 
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <inttypes.h>
23 
24 #include "config.h"
25 #include "mp_msg.h"
26 #include "cpudetect.h"
27 
28 #include "img_format.h"
29 #include "mp_image.h"
30 #include "vf.h"
31 #include "libavutil/attributes.h"
32 #include "libavutil/x86/asm.h"
33 
34 typedef void (pack_func_t)(unsigned char *dst, unsigned char *y,
35  unsigned char *u, unsigned char *v, int w, int us, int vs);
36 
37 struct vf_priv_s {
38  int mode;
40 };
41 
42 static void pack_nn_C(unsigned char *dst, unsigned char *y,
43  unsigned char *u, unsigned char *v, int w,
44  int av_unused us, int av_unused vs)
45 {
46  int j;
47  for (j = w/2; j; j--) {
48  *dst++ = *y++;
49  *dst++ = *u++;
50  *dst++ = *y++;
51  *dst++ = *v++;
52  }
53 }
54 
55 static void pack_li_0_C(unsigned char *dst, unsigned char *y,
56  unsigned char *u, unsigned char *v, int w, int us, int vs)
57 {
58  int j;
59  for (j = w/2; j; j--) {
60  *dst++ = *y++;
61  *dst++ = (u[us+us] + 7*u[0])>>3;
62  *dst++ = *y++;
63  *dst++ = (v[vs+vs] + 7*v[0])>>3;
64  u++; v++;
65  }
66 }
67 
68 static void pack_li_1_C(unsigned char *dst, unsigned char *y,
69  unsigned char *u, unsigned char *v, int w, int us, int vs)
70 {
71  int j;
72  for (j = w/2; j; j--) {
73  *dst++ = *y++;
74  *dst++ = (3*u[us+us] + 5*u[0])>>3;
75  *dst++ = *y++;
76  *dst++ = (3*v[vs+vs] + 5*v[0])>>3;
77  u++; v++;
78  }
79 }
80 
81 #if HAVE_MMX
82 static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
83  unsigned char *u, unsigned char *v, int w,
84  int av_unused us, int av_unused vs)
85 {
86  __asm__ volatile (""
87  ASMALIGN(4)
88  "1: \n\t"
89  "movq (%0), %%mm1 \n\t"
90  "movq (%0), %%mm2 \n\t"
91  "movq (%1), %%mm4 \n\t"
92  "movq (%2), %%mm6 \n\t"
93  "punpcklbw %%mm6, %%mm4 \n\t"
94  "punpcklbw %%mm4, %%mm1 \n\t"
95  "punpckhbw %%mm4, %%mm2 \n\t"
96 
97  "add $8, %0 \n\t"
98  "add $4, %1 \n\t"
99  "add $4, %2 \n\t"
100  "movq %%mm1, (%3) \n\t"
101  "movq %%mm2, 8(%3) \n\t"
102  "add $16, %3 \n\t"
103  "decl %4 \n\t"
104  "jnz 1b \n\t"
105  "emms \n\t"
106  :
107  : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8)
108  : "memory"
109  );
110  pack_nn_C(dst, y, u, v, (w&7), 0, 0);
111 }
112 
113 #if HAVE_EBX_AVAILABLE
114 static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
115  unsigned char *u, unsigned char *v, int w, int us, int vs)
116 {
117  __asm__ volatile (""
118  "push %%"REG_BP" \n\t"
119 #if ARCH_X86_64
120  "mov %6, %%"REG_BP" \n\t"
121 #else
122  "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
123  "movl (%%"REG_d"), %%"REG_d" \n\t"
124 #endif
125  "pxor %%mm0, %%mm0 \n\t"
126 
127  ASMALIGN(4)
128  ".Lli0: \n\t"
129  "movq (%%"REG_S"), %%mm1 \n\t"
130  "movq (%%"REG_S"), %%mm2 \n\t"
131 
132  "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
133  "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
134  "punpcklbw %%mm0, %%mm4 \n\t"
135  "punpcklbw %%mm0, %%mm6 \n\t"
136  "movq (%%"REG_a"), %%mm3 \n\t"
137  "movq (%%"REG_b"), %%mm5 \n\t"
138  "punpcklbw %%mm0, %%mm3 \n\t"
139  "punpcklbw %%mm0, %%mm5 \n\t"
140  "paddw %%mm3, %%mm4 \n\t"
141  "paddw %%mm5, %%mm6 \n\t"
142  "paddw %%mm3, %%mm4 \n\t"
143  "paddw %%mm5, %%mm6 \n\t"
144  "paddw %%mm3, %%mm4 \n\t"
145  "paddw %%mm5, %%mm6 \n\t"
146  "paddw %%mm3, %%mm4 \n\t"
147  "paddw %%mm5, %%mm6 \n\t"
148  "paddw %%mm3, %%mm4 \n\t"
149  "paddw %%mm5, %%mm6 \n\t"
150  "paddw %%mm3, %%mm4 \n\t"
151  "paddw %%mm5, %%mm6 \n\t"
152  "paddw %%mm3, %%mm4 \n\t"
153  "paddw %%mm5, %%mm6 \n\t"
154  "psrlw $3, %%mm4 \n\t"
155  "psrlw $3, %%mm6 \n\t"
156  "packuswb %%mm4, %%mm4 \n\t"
157  "packuswb %%mm6, %%mm6 \n\t"
158  "punpcklbw %%mm6, %%mm4 \n\t"
159  "punpcklbw %%mm4, %%mm1 \n\t"
160  "punpckhbw %%mm4, %%mm2 \n\t"
161 
162  "movq %%mm1, (%%"REG_D") \n\t"
163  "movq %%mm2, 8(%%"REG_D") \n\t"
164 
165  "movq 8(%%"REG_S"), %%mm1 \n\t"
166  "movq 8(%%"REG_S"), %%mm2 \n\t"
167 
168  "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
169  "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
170  "punpckhbw %%mm0, %%mm4 \n\t"
171  "punpckhbw %%mm0, %%mm6 \n\t"
172  "movq (%%"REG_a"), %%mm3 \n\t"
173  "movq (%%"REG_b"), %%mm5 \n\t"
174  "punpckhbw %%mm0, %%mm3 \n\t"
175  "punpckhbw %%mm0, %%mm5 \n\t"
176  "paddw %%mm3, %%mm4 \n\t"
177  "paddw %%mm5, %%mm6 \n\t"
178  "paddw %%mm3, %%mm4 \n\t"
179  "paddw %%mm5, %%mm6 \n\t"
180  "paddw %%mm3, %%mm4 \n\t"
181  "paddw %%mm5, %%mm6 \n\t"
182  "paddw %%mm3, %%mm4 \n\t"
183  "paddw %%mm5, %%mm6 \n\t"
184  "paddw %%mm3, %%mm4 \n\t"
185  "paddw %%mm5, %%mm6 \n\t"
186  "paddw %%mm3, %%mm4 \n\t"
187  "paddw %%mm5, %%mm6 \n\t"
188  "paddw %%mm3, %%mm4 \n\t"
189  "paddw %%mm5, %%mm6 \n\t"
190  "psrlw $3, %%mm4 \n\t"
191  "psrlw $3, %%mm6 \n\t"
192  "packuswb %%mm4, %%mm4 \n\t"
193  "packuswb %%mm6, %%mm6 \n\t"
194  "punpcklbw %%mm6, %%mm4 \n\t"
195  "punpcklbw %%mm4, %%mm1 \n\t"
196  "punpckhbw %%mm4, %%mm2 \n\t"
197 
198  "add $16, %%"REG_S" \n\t"
199  "add $8, %%"REG_a" \n\t"
200  "add $8, %%"REG_b" \n\t"
201 
202  "movq %%mm1, 16(%%"REG_D") \n\t"
203  "movq %%mm2, 24(%%"REG_D") \n\t"
204  "add $32, %%"REG_D" \n\t"
205 
206  "decl %%ecx \n\t"
207  "jnz .Lli0 \n\t"
208  "emms \n\t"
209  "pop %%"REG_BP" \n\t"
210  :
211  : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
212 #if ARCH_X86_64
213  "d" ((x86_reg)us), "r" ((x86_reg)vs)
214 #else
215  "d" (&us)
216 #endif
217  : "memory"
218  );
219  pack_li_0_C(dst, y, u, v, (w&15), us, vs);
220 }
221 
222 static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
223  unsigned char *u, unsigned char *v, int w, int us, int vs)
224 {
225  __asm__ volatile (""
226  "push %%"REG_BP" \n\t"
227 #if ARCH_X86_64
228  "mov %6, %%"REG_BP" \n\t"
229 #else
230  "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
231  "movl (%%"REG_d"), %%"REG_d" \n\t"
232 #endif
233  "pxor %%mm0, %%mm0 \n\t"
234 
235  ASMALIGN(4)
236  ".Lli1: \n\t"
237  "movq (%%"REG_S"), %%mm1 \n\t"
238  "movq (%%"REG_S"), %%mm2 \n\t"
239 
240  "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
241  "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
242  "punpcklbw %%mm0, %%mm4 \n\t"
243  "punpcklbw %%mm0, %%mm6 \n\t"
244  "movq (%%"REG_a"), %%mm3 \n\t"
245  "movq (%%"REG_b"), %%mm5 \n\t"
246  "punpcklbw %%mm0, %%mm3 \n\t"
247  "punpcklbw %%mm0, %%mm5 \n\t"
248  "movq %%mm4, %%mm7 \n\t"
249  "paddw %%mm4, %%mm4 \n\t"
250  "paddw %%mm7, %%mm4 \n\t"
251  "movq %%mm6, %%mm7 \n\t"
252  "paddw %%mm6, %%mm6 \n\t"
253  "paddw %%mm7, %%mm6 \n\t"
254  "paddw %%mm3, %%mm4 \n\t"
255  "paddw %%mm5, %%mm6 \n\t"
256  "paddw %%mm3, %%mm4 \n\t"
257  "paddw %%mm5, %%mm6 \n\t"
258  "paddw %%mm3, %%mm4 \n\t"
259  "paddw %%mm5, %%mm6 \n\t"
260  "paddw %%mm3, %%mm4 \n\t"
261  "paddw %%mm5, %%mm6 \n\t"
262  "paddw %%mm3, %%mm4 \n\t"
263  "paddw %%mm5, %%mm6 \n\t"
264  "psrlw $3, %%mm4 \n\t"
265  "psrlw $3, %%mm6 \n\t"
266  "packuswb %%mm4, %%mm4 \n\t"
267  "packuswb %%mm6, %%mm6 \n\t"
268  "punpcklbw %%mm6, %%mm4 \n\t"
269  "punpcklbw %%mm4, %%mm1 \n\t"
270  "punpckhbw %%mm4, %%mm2 \n\t"
271 
272  "movq %%mm1, (%%"REG_D") \n\t"
273  "movq %%mm2, 8(%%"REG_D") \n\t"
274 
275  "movq 8(%%"REG_S"), %%mm1 \n\t"
276  "movq 8(%%"REG_S"), %%mm2 \n\t"
277 
278  "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
279  "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
280  "punpckhbw %%mm0, %%mm4 \n\t"
281  "punpckhbw %%mm0, %%mm6 \n\t"
282  "movq (%%"REG_a"), %%mm3 \n\t"
283  "movq (%%"REG_b"), %%mm5 \n\t"
284  "punpckhbw %%mm0, %%mm3 \n\t"
285  "punpckhbw %%mm0, %%mm5 \n\t"
286  "movq %%mm4, %%mm7 \n\t"
287  "paddw %%mm4, %%mm4 \n\t"
288  "paddw %%mm7, %%mm4 \n\t"
289  "movq %%mm6, %%mm7 \n\t"
290  "paddw %%mm6, %%mm6 \n\t"
291  "paddw %%mm7, %%mm6 \n\t"
292  "paddw %%mm3, %%mm4 \n\t"
293  "paddw %%mm5, %%mm6 \n\t"
294  "paddw %%mm3, %%mm4 \n\t"
295  "paddw %%mm5, %%mm6 \n\t"
296  "paddw %%mm3, %%mm4 \n\t"
297  "paddw %%mm5, %%mm6 \n\t"
298  "paddw %%mm3, %%mm4 \n\t"
299  "paddw %%mm5, %%mm6 \n\t"
300  "paddw %%mm3, %%mm4 \n\t"
301  "paddw %%mm5, %%mm6 \n\t"
302  "psrlw $3, %%mm4 \n\t"
303  "psrlw $3, %%mm6 \n\t"
304  "packuswb %%mm4, %%mm4 \n\t"
305  "packuswb %%mm6, %%mm6 \n\t"
306  "punpcklbw %%mm6, %%mm4 \n\t"
307  "punpcklbw %%mm4, %%mm1 \n\t"
308  "punpckhbw %%mm4, %%mm2 \n\t"
309 
310  "add $16, %%"REG_S" \n\t"
311  "add $8, %%"REG_a" \n\t"
312  "add $8, %%"REG_b" \n\t"
313 
314  "movq %%mm1, 16(%%"REG_D") \n\t"
315  "movq %%mm2, 24(%%"REG_D") \n\t"
316  "add $32, %%"REG_D" \n\t"
317 
318  "decl %%ecx \n\t"
319  "jnz .Lli1 \n\t"
320  "emms \n\t"
321  "pop %%"REG_BP" \n\t"
322  :
323  : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
324 #if ARCH_X86_64
325  "d" ((x86_reg)us), "r" ((x86_reg)vs)
326 #else
327  "d" (&us)
328 #endif
329  : "memory"
330  );
331  pack_li_1_C(dst, y, u, v, (w&15), us, vs);
332 }
333 #endif /* HAVE_EBX_AVAILABLE */
334 #endif
335 
339 
340 static void ilpack(unsigned char *dst, unsigned char *src[3],
341  int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2])
342 {
343  int i;
344  unsigned char *y, *u, *v;
345  int ys = srcstride[0], us = srcstride[1], vs = srcstride[2];
346  int a, b;
347 
348  y = src[0];
349  u = src[1];
350  v = src[2];
351 
352  pack_nn(dst, y, u, v, w, 0, 0);
353  y += ys; dst += dststride;
354  pack_nn(dst, y, u+us, v+vs, w, 0, 0);
355  y += ys; dst += dststride;
356  for (i=2; i<h-2; i++) {
357  a = (i&2) ? 1 : -1;
358  b = (i&1) ^ ((i&2)>>1);
359  pack[b](dst, y, u, v, w, us*a, vs*a);
360  y += ys;
361  if ((i&3) == 1) {
362  u -= us;
363  v -= vs;
364  } else {
365  u += us;
366  v += vs;
367  }
368  dst += dststride;
369  }
370  pack_nn(dst, y, u, v, w, 0, 0);
371  y += ys; dst += dststride; u += us; v += vs;
372  pack_nn(dst, y, u, v, w, 0, 0);
373 }
374 
375 
376 static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
377 {
378  mp_image_t *dmpi;
379 
380  // hope we'll get DR buffer:
381  dmpi=ff_vf_get_image(vf->next, IMGFMT_YUY2,
383  mpi->w, mpi->h);
384 
385  ilpack(dmpi->planes[0], mpi->planes, dmpi->stride[0], mpi->stride, mpi->w, mpi->h, vf->priv->pack);
386 
387  return ff_vf_next_put_image(vf,dmpi, pts);
388 }
389 
390 static int config(struct vf_instance *vf,
391  int width, int height, int d_width, int d_height,
392  unsigned int flags, unsigned int outfmt)
393 {
394  /* FIXME - also support UYVY output? */
395  return ff_vf_next_config(vf, width, height, d_width, d_height, flags, IMGFMT_YUY2);
396 }
397 
398 
399 static int query_format(struct vf_instance *vf, unsigned int fmt)
400 {
401  /* FIXME - really any YUV 4:2:0 input format should work */
402  switch (fmt) {
403  case IMGFMT_YV12:
404  case IMGFMT_IYUV:
405  case IMGFMT_I420:
407  }
408  return 0;
409 }
410 
411 static int vf_open(vf_instance_t *vf, char *args)
412 {
413  vf->config=config;
415  vf->put_image=put_image;
416  vf->priv = calloc(1, sizeof(struct vf_priv_s));
417  vf->priv->mode = 1;
418  if (args) sscanf(args, "%d", &vf->priv->mode);
419 
420  pack_nn = pack_nn_C;
421  pack_li_0 = pack_li_0_C;
422  pack_li_1 = pack_li_1_C;
423 #if HAVE_MMX
424  if(ff_gCpuCaps.hasMMX) {
425  pack_nn = pack_nn_MMX;
426 #if HAVE_EBX_AVAILABLE
427  pack_li_0 = pack_li_0_MMX;
428  pack_li_1 = pack_li_1_MMX;
429 #endif
430  }
431 #endif
432 
433  switch(vf->priv->mode) {
434  case 0:
435  vf->priv->pack[0] = vf->priv->pack[1] = pack_nn;
436  break;
437  default:
439  "ilpack: unknown mode %d (fallback to linear)\n",
440  vf->priv->mode);
441  /* Fallthrough */
442  case 1:
443  vf->priv->pack[0] = pack_li_0;
444  vf->priv->pack[1] = pack_li_1;
445  break;
446  }
447 
448  return 1;
449 }
450 
452  "4:2:0 planar -> 4:2:2 packed reinterlacer",
453  "ilpack",
454  "Richard Felker",
455  "",
456  vf_open,
457  NULL
458 };