FFmpeg
vf_spp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License along
17  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
18  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19  */
20 
21 
22 #include "libavutil/attributes.h"
23 #include "libavutil/cpu.h"
24 #include "libavutil/crc.h"
25 #include "libavutil/x86/asm.h"
26 #include "libavfilter/vf_spp.h"
27 
28 #if HAVE_MMX_INLINE
29 static void hardthresh_mmx(int16_t dst[64], const int16_t src[64],
30  int qp, const uint8_t *permutation)
31 {
32  int bias = 0; //FIXME
33  unsigned int threshold1;
34 
35  threshold1 = qp * ((1<<4) - bias) - 1;
36 
37 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
38  "movq " #src0 ", %%mm0 \n" \
39  "movq " #src1 ", %%mm1 \n" \
40  "movq " #src2 ", %%mm2 \n" \
41  "movq " #src3 ", %%mm3 \n" \
42  "psubw %%mm4, %%mm0 \n" \
43  "psubw %%mm4, %%mm1 \n" \
44  "psubw %%mm4, %%mm2 \n" \
45  "psubw %%mm4, %%mm3 \n" \
46  "paddusw %%mm5, %%mm0 \n" \
47  "paddusw %%mm5, %%mm1 \n" \
48  "paddusw %%mm5, %%mm2 \n" \
49  "paddusw %%mm5, %%mm3 \n" \
50  "paddw %%mm6, %%mm0 \n" \
51  "paddw %%mm6, %%mm1 \n" \
52  "paddw %%mm6, %%mm2 \n" \
53  "paddw %%mm6, %%mm3 \n" \
54  "psubusw %%mm6, %%mm0 \n" \
55  "psubusw %%mm6, %%mm1 \n" \
56  "psubusw %%mm6, %%mm2 \n" \
57  "psubusw %%mm6, %%mm3 \n" \
58  "psraw $3, %%mm0 \n" \
59  "psraw $3, %%mm1 \n" \
60  "psraw $3, %%mm2 \n" \
61  "psraw $3, %%mm3 \n" \
62  \
63  "movq %%mm0, %%mm7 \n" \
64  "punpcklwd %%mm2, %%mm0 \n" /*A*/ \
65  "punpckhwd %%mm2, %%mm7 \n" /*C*/ \
66  "movq %%mm1, %%mm2 \n" \
67  "punpcklwd %%mm3, %%mm1 \n" /*B*/ \
68  "punpckhwd %%mm3, %%mm2 \n" /*D*/ \
69  "movq %%mm0, %%mm3 \n" \
70  "punpcklwd %%mm1, %%mm0 \n" /*A*/ \
71  "punpckhwd %%mm7, %%mm3 \n" /*C*/ \
72  "punpcklwd %%mm2, %%mm7 \n" /*B*/ \
73  "punpckhwd %%mm2, %%mm1 \n" /*D*/ \
74  \
75  "movq %%mm0, " #dst0 " \n" \
76  "movq %%mm7, " #dst1 " \n" \
77  "movq %%mm3, " #dst2 " \n" \
78  "movq %%mm1, " #dst3 " \n"
79 
80  __asm__ volatile(
81  "movd %2, %%mm4 \n"
82  "movd %3, %%mm5 \n"
83  "movd %4, %%mm6 \n"
84  "packssdw %%mm4, %%mm4 \n"
85  "packssdw %%mm5, %%mm5 \n"
86  "packssdw %%mm6, %%mm6 \n"
87  "packssdw %%mm4, %%mm4 \n"
88  "packssdw %%mm5, %%mm5 \n"
89  "packssdw %%mm6, %%mm6 \n"
90  REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
91  REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
92  REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
93  REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
94  : : "r" (src), "r" (dst), "g" (threshold1+1), "g" (threshold1+5), "g" (threshold1-4) //FIXME maybe more accurate then needed?
95  );
96  dst[0] = (src[0] + 4) >> 3;
97 }
98 
99 static void softthresh_mmx(int16_t dst[64], const int16_t src[64],
100  int qp, const uint8_t *permutation)
101 {
102  int bias = 0; //FIXME
103  unsigned int threshold1;
104 
105  threshold1 = qp*((1<<4) - bias) - 1;
106 
107 #undef REQUANT_CORE
108 #define REQUANT_CORE(dst0, dst1, dst2, dst3, src0, src1, src2, src3) \
109  "movq " #src0 ", %%mm0 \n" \
110  "movq " #src1 ", %%mm1 \n" \
111  "pxor %%mm6, %%mm6 \n" \
112  "pxor %%mm7, %%mm7 \n" \
113  "pcmpgtw %%mm0, %%mm6 \n" \
114  "pcmpgtw %%mm1, %%mm7 \n" \
115  "pxor %%mm6, %%mm0 \n" \
116  "pxor %%mm7, %%mm1 \n" \
117  "psubusw %%mm4, %%mm0 \n" \
118  "psubusw %%mm4, %%mm1 \n" \
119  "pxor %%mm6, %%mm0 \n" \
120  "pxor %%mm7, %%mm1 \n" \
121  "movq " #src2 ", %%mm2 \n" \
122  "movq " #src3 ", %%mm3 \n" \
123  "pxor %%mm6, %%mm6 \n" \
124  "pxor %%mm7, %%mm7 \n" \
125  "pcmpgtw %%mm2, %%mm6 \n" \
126  "pcmpgtw %%mm3, %%mm7 \n" \
127  "pxor %%mm6, %%mm2 \n" \
128  "pxor %%mm7, %%mm3 \n" \
129  "psubusw %%mm4, %%mm2 \n" \
130  "psubusw %%mm4, %%mm3 \n" \
131  "pxor %%mm6, %%mm2 \n" \
132  "pxor %%mm7, %%mm3 \n" \
133  \
134  "paddsw %%mm5, %%mm0 \n" \
135  "paddsw %%mm5, %%mm1 \n" \
136  "paddsw %%mm5, %%mm2 \n" \
137  "paddsw %%mm5, %%mm3 \n" \
138  "psraw $3, %%mm0 \n" \
139  "psraw $3, %%mm1 \n" \
140  "psraw $3, %%mm2 \n" \
141  "psraw $3, %%mm3 \n" \
142  \
143  "movq %%mm0, %%mm7 \n" \
144  "punpcklwd %%mm2, %%mm0 \n" /*A*/ \
145  "punpckhwd %%mm2, %%mm7 \n" /*C*/ \
146  "movq %%mm1, %%mm2 \n" \
147  "punpcklwd %%mm3, %%mm1 \n" /*B*/ \
148  "punpckhwd %%mm3, %%mm2 \n" /*D*/ \
149  "movq %%mm0, %%mm3 \n" \
150  "punpcklwd %%mm1, %%mm0 \n" /*A*/ \
151  "punpckhwd %%mm7, %%mm3 \n" /*C*/ \
152  "punpcklwd %%mm2, %%mm7 \n" /*B*/ \
153  "punpckhwd %%mm2, %%mm1 \n" /*D*/ \
154  \
155  "movq %%mm0, " #dst0 " \n" \
156  "movq %%mm7, " #dst1 " \n" \
157  "movq %%mm3, " #dst2 " \n" \
158  "movq %%mm1, " #dst3 " \n"
159 
160  __asm__ volatile(
161  "movd %2, %%mm4 \n"
162  "movd %3, %%mm5 \n"
163  "packssdw %%mm4, %%mm4 \n"
164  "packssdw %%mm5, %%mm5 \n"
165  "packssdw %%mm4, %%mm4 \n"
166  "packssdw %%mm5, %%mm5 \n"
167  REQUANT_CORE( (%1), 8(%1), 16(%1), 24(%1), (%0), 8(%0), 64(%0), 72(%0))
168  REQUANT_CORE(32(%1), 40(%1), 48(%1), 56(%1),16(%0),24(%0), 48(%0), 56(%0))
169  REQUANT_CORE(64(%1), 72(%1), 80(%1), 88(%1),32(%0),40(%0), 96(%0),104(%0))
170  REQUANT_CORE(96(%1),104(%1),112(%1),120(%1),80(%0),88(%0),112(%0),120(%0))
171  : : "r" (src), "r" (dst), "g" (threshold1), "rm" (4) //FIXME maybe more accurate then needed?
172  );
173 
174  dst[0] = (src[0] + 4) >> 3;
175 }
176 
177 static void store_slice_mmx(uint8_t *dst, const int16_t *src,
178  int dst_stride, int src_stride,
179  int width, int height, int log2_scale,
180  const uint8_t dither[8][8])
181 {
182  int y;
183 
184  for (y = 0; y < height; y++) {
185  uint8_t *dst1 = dst;
186  const int16_t *src1 = src;
187  __asm__ volatile(
188  "movq (%3), %%mm3 \n"
189  "movq (%3), %%mm4 \n"
190  "movd %4, %%mm2 \n"
191  "pxor %%mm0, %%mm0 \n"
192  "punpcklbw %%mm0, %%mm3 \n"
193  "punpckhbw %%mm0, %%mm4 \n"
194  "psraw %%mm2, %%mm3 \n"
195  "psraw %%mm2, %%mm4 \n"
196  "movd %5, %%mm2 \n"
197  "1: \n"
198  "movq (%0), %%mm0 \n"
199  "movq 8(%0), %%mm1 \n"
200  "paddw %%mm3, %%mm0 \n"
201  "paddw %%mm4, %%mm1 \n"
202  "psraw %%mm2, %%mm0 \n"
203  "psraw %%mm2, %%mm1 \n"
204  "packuswb %%mm1, %%mm0 \n"
205  "movq %%mm0, (%1) \n"
206  "add $16, %0 \n"
207  "add $8, %1 \n"
208  "cmp %2, %1 \n"
209  " jb 1b \n"
210  : "+r" (src1), "+r"(dst1)
211  : "r"(dst + width), "r"(dither[y]), "g"(log2_scale), "g"(MAX_LEVEL - log2_scale)
212  );
213  src += src_stride;
214  dst += dst_stride;
215  }
216 }
217 
218 #endif /* HAVE_MMX_INLINE */
219 
221 {
222 #if HAVE_MMX_INLINE
223  int cpu_flags = av_get_cpu_flags();
224 
225  if (cpu_flags & AV_CPU_FLAG_MMX) {
226  static const uint32_t mmx_idct_perm_crc = 0xe5e8adc4;
227  uint32_t idct_perm_crc =
229  s->dct->idct_permutation,
230  sizeof(s->dct->idct_permutation));
231  int64_t bps;
232  s->store_slice = store_slice_mmx;
233  av_opt_get_int(s->dct, "bits_per_sample", 0, &bps);
234  if (bps <= 8 && idct_perm_crc == mmx_idct_perm_crc) {
235  switch (s->mode) {
236  case 0: s->requantize = hardthresh_mmx; break;
237  case 1: s->requantize = softthresh_mmx; break;
238  }
239  }
240  }
241 #endif
242 }
r
const char * r
Definition: vf_curves.c:116
src1
const pixel * src1
Definition: h264pred_template.c:421
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:101
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:52
crc.h
av_cold
#define av_cold
Definition: attributes.h:90
width
#define width
s
#define s(width, name)
Definition: cbs_vp9.c:256
g
const char * g
Definition: vf_curves.c:117
vf_spp.h
av_opt_get_int
int av_opt_get_int(void *obj, const char *name, int search_flags, int64_t *out_val)
Definition: opt.c:978
SPPContext
Definition: vf_spp.h:31
MAX_LEVEL
#define MAX_LEVEL
Definition: rl.h:36
cpu.h
bps
unsigned bps
Definition: movenc.c:1647
asm.h
height
#define height
av_crc_get_table
const AVCRC * av_crc_get_table(AVCRCId crc_id)
Get an initialized standard CRC table.
Definition: crc.c:374
attributes.h
AV_CRC_32_IEEE
@ AV_CRC_32_IEEE
Definition: crc.h:52
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:29
av_crc
uint32_t av_crc(const AVCRC *ctx, uint32_t crc, const uint8_t *buffer, size_t length)
Calculate the CRC of a block.
Definition: crc.c:392
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
ff_spp_init_x86
av_cold void ff_spp_init_x86(SPPContext *s)
Definition: vf_spp.c:220
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:58