Go to the documentation of this file.
30 #define DECLARE_DOUBLE_1 double db_1
31 #define DECLARE_DOUBLE_2 double db_2
32 #define DECLARE_UINT32_T uint32_t it_1
33 #define RESTRICT_ASM_DOUBLE_1 [db_1]"=&f"(db_1)
34 #define RESTRICT_ASM_DOUBLE_2 [db_2]"=&f"(db_2)
35 #define RESTRICT_ASM_UINT32_T [it_1]"=&r"(it_1)
37 #define MMI_PCMPGTUB(dst, src1, src2) \
38 "pcmpeqb %[db_1], "#src1", "#src2" \n\t" \
39 "pmaxub %[db_2], "#src1", "#src2" \n\t" \
40 "pcmpeqb %[db_2], %[db_2], "#src1" \n\t" \
41 "pxor "#dst", %[db_2], %[db_1] \n\t"
43 #define MMI_BTOH(dst_l, dst_r, src) \
44 "pxor %[db_1], %[db_1], %[db_1] \n\t" \
45 "pcmpgtb %[db_2], %[db_1], "#src" \n\t" \
46 "punpcklbh "#dst_r", "#src", %[db_2] \n\t" \
47 "punpckhbh "#dst_l", "#src", %[db_2] \n\t"
49 #define MMI_VP8_LOOP_FILTER \
51 "dmtc1 %[thresh], %[ftmp3] \n\t" \
52 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
53 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
54 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
55 "pasubub %[ftmp0], %[p1], %[p0] \n\t" \
56 "pasubub %[ftmp1], %[q1], %[q0] \n\t" \
57 "pmaxub %[ftmp0], %[ftmp0], %[ftmp1] \n\t" \
58 MMI_PCMPGTUB(%[hev], %[ftmp0], %[ftmp3]) \
60 "pasubub %[ftmp1], %[p0], %[q0] \n\t" \
61 "paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
62 "pasubub %[ftmp2], %[p1], %[q1] \n\t" \
63 "li %[tmp0], 0x09 \n\t" \
64 "dmtc1 %[tmp0], %[ftmp3] \n\t" \
65 PSRLB_MMI(%[ftmp2], %[ftmp3], %[ftmp4], %[ftmp5], %[ftmp2]) \
66 "paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
67 "dmtc1 %[e], %[ftmp3] \n\t" \
68 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
69 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
70 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
71 MMI_PCMPGTUB(%[mask], %[ftmp1], %[ftmp3]) \
72 "pmaxub %[mask], %[mask], %[ftmp0] \n\t" \
73 "pasubub %[ftmp1], %[p3], %[p2] \n\t" \
74 "pasubub %[ftmp2], %[p2], %[p1] \n\t" \
75 "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
76 "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \
77 "pasubub %[ftmp1], %[q3], %[q2] \n\t" \
78 "pasubub %[ftmp2], %[q2], %[q1] \n\t" \
79 "pmaxub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \
80 "pmaxub %[mask], %[mask], %[ftmp1] \n\t" \
81 "dmtc1 %[i], %[ftmp3] \n\t" \
82 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
83 "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
84 "punpcklwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
85 MMI_PCMPGTUB(%[mask], %[mask], %[ftmp3]) \
86 "pcmpeqw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" \
87 "pxor %[mask], %[mask], %[ftmp3] \n\t" \
89 "li %[tmp0], 0x80808080 \n\t" \
90 "dmtc1 %[tmp0], %[ftmp7] \n\t" \
91 "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t" \
92 "pxor %[p2], %[p2], %[ftmp7] \n\t" \
93 "pxor %[p1], %[p1], %[ftmp7] \n\t" \
94 "pxor %[p0], %[p0], %[ftmp7] \n\t" \
95 "pxor %[q0], %[q0], %[ftmp7] \n\t" \
96 "pxor %[q1], %[q1], %[ftmp7] \n\t" \
97 "pxor %[q2], %[q2], %[ftmp7] \n\t" \
98 "psubsb %[ftmp4], %[p1], %[q1] \n\t" \
99 "psubb %[ftmp5], %[q0], %[p0] \n\t" \
100 MMI_BTOH(%[ftmp1], %[ftmp0], %[ftmp5]) \
101 MMI_BTOH(%[ftmp3], %[ftmp2], %[ftmp4]) \
103 "paddh %[ftmp5], %[ftmp0], %[ftmp0] \n\t" \
104 "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t" \
105 "paddh %[ftmp0], %[ftmp2], %[ftmp0] \n\t" \
107 "paddh %[ftmp5], %[ftmp1], %[ftmp1] \n\t" \
108 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" \
109 "paddh %[ftmp1], %[ftmp3], %[ftmp1] \n\t" \
111 "packsshb %[ftmp1], %[ftmp0], %[ftmp1] \n\t" \
112 "pand %[ftmp1], %[ftmp1], %[mask] \n\t" \
113 "pand %[ftmp2], %[ftmp1], %[hev] \n\t" \
114 "li %[tmp0], 0x04040404 \n\t" \
115 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
116 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
117 "paddsb %[ftmp3], %[ftmp2], %[ftmp0] \n\t" \
118 "li %[tmp0], 0x0B \n\t" \
119 "dmtc1 %[tmp0], %[ftmp4] \n\t" \
120 PSRAB_MMI(%[ftmp3], %[ftmp4], %[ftmp5], %[ftmp6], %[ftmp3]) \
121 "li %[tmp0], 0x03030303 \n\t" \
122 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
123 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
124 "paddsb %[ftmp4], %[ftmp2], %[ftmp0] \n\t" \
125 "li %[tmp0], 0x0B \n\t" \
126 "dmtc1 %[tmp0], %[ftmp2] \n\t" \
127 PSRAB_MMI(%[ftmp4], %[ftmp2], %[ftmp5], %[ftmp6], %[ftmp4]) \
128 "psubsb %[q0], %[q0], %[ftmp3] \n\t" \
129 "paddsb %[p0], %[p0], %[ftmp4] \n\t" \
131 "pcmpeqw %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
132 "pxor %[hev], %[hev], %[ftmp0] \n\t" \
133 "pand %[ftmp1], %[ftmp1], %[hev] \n\t" \
134 MMI_BTOH(%[ftmp5], %[ftmp6], %[ftmp1]) \
135 "li %[tmp0], 0x07 \n\t" \
136 "dmtc1 %[tmp0], %[ftmp2] \n\t" \
137 "li %[tmp0], 0x001b001b \n\t" \
138 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
139 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
140 "li %[tmp0], 0x003f003f \n\t" \
141 "dmtc1 %[tmp0], %[ftmp0] \n\t" \
142 "punpcklwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \
144 "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
145 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
146 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
148 "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
149 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
150 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
152 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
153 "psubsb %[q0], %[q0], %[ftmp4] \n\t" \
154 "pxor %[q0], %[q0], %[ftmp7] \n\t" \
155 "paddsb %[p0], %[p0], %[ftmp4] \n\t" \
156 "pxor %[p0], %[p0], %[ftmp7] \n\t" \
157 "li %[tmp0], 0x00120012 \n\t" \
158 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
159 "punpcklwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t" \
161 "pmullh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
162 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
163 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
165 "pmullh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
166 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
167 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
169 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
170 "psubsb %[q1], %[q1], %[ftmp4] \n\t" \
171 "pxor %[q1], %[q1], %[ftmp7] \n\t" \
172 "paddsb %[p1], %[p1], %[ftmp4] \n\t" \
173 "pxor %[p1], %[p1], %[ftmp7] \n\t" \
174 "li %[tmp0], 0x03 \n\t" \
175 "dmtc1 %[tmp0], %[ftmp1] \n\t" \
177 "psllh %[ftmp3], %[ftmp6], %[ftmp1] \n\t" \
178 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t" \
179 "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" \
180 "psrah %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
182 "psllh %[ftmp4], %[ftmp5], %[ftmp1] \n\t" \
183 "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \
184 "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \
185 "psrah %[ftmp4], %[ftmp4], %[ftmp2] \n\t" \
187 "packsshb %[ftmp4], %[ftmp3], %[ftmp4] \n\t" \
188 "psubsb %[q2], %[q2], %[ftmp4] \n\t" \
189 "pxor %[q2], %[q2], %[ftmp7] \n\t" \
190 "paddsb %[p2], %[p2], %[ftmp4] \n\t" \
191 "pxor %[p2], %[p2], %[ftmp7] \n\t"
193 #define PUT_VP8_EPEL4_H6_MMI(src, dst) \
194 MMI_ULWC1(%[ftmp1], src, 0x00) \
195 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
196 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
198 MMI_ULWC1(%[ftmp1], src, -0x01) \
199 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
200 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
201 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
203 MMI_ULWC1(%[ftmp1], src, -0x02) \
204 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
205 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
206 "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
208 MMI_ULWC1(%[ftmp1], src, 0x01) \
209 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
210 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
212 MMI_ULWC1(%[ftmp1], src, 0x02) \
213 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
214 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
215 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
217 MMI_ULWC1(%[ftmp1], src, 0x03) \
218 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
219 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
220 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
222 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
223 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
224 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
225 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
227 MMI_SWC1(%[ftmp1], dst, 0x00)
230 #define PUT_VP8_EPEL4_H4_MMI(src, dst) \
231 MMI_ULWC1(%[ftmp1], src, 0x00) \
232 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
233 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
235 MMI_ULWC1(%[ftmp1], src, -0x01) \
236 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
237 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
238 "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
240 MMI_ULWC1(%[ftmp1], src, 0x01) \
241 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
242 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
244 MMI_ULWC1(%[ftmp1], src, 0x02) \
245 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
246 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
247 "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
249 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
251 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
252 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
254 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
255 MMI_SWC1(%[ftmp1], dst, 0x00)
258 #define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride) \
259 MMI_ULWC1(%[ftmp1], src, 0x00) \
260 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
261 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
263 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
264 MMI_ULWC1(%[ftmp1], src1, 0x00) \
265 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
266 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
267 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
269 PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \
270 MMI_ULWC1(%[ftmp1], src1, 0x00) \
271 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
272 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
273 "paddsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
275 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
276 MMI_ULWC1(%[ftmp1], src1, 0x00) \
277 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
278 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
280 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
281 MMI_ULWC1(%[ftmp1], src1, 0x00) \
282 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
283 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
284 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
286 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
287 MMI_ULWC1(%[ftmp1], src1, 0x00) \
288 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
289 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
290 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
292 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
294 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
295 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
296 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
298 MMI_SWC1(%[ftmp1], dst, 0x00)
301 #define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride) \
302 MMI_ULWC1(%[ftmp1], src, 0x00) \
303 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
304 "pmullh %[ftmp3], %[ftmp2], %[filter2] \n\t" \
306 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
307 MMI_ULWC1(%[ftmp1], src1, 0x00) \
308 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
309 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
310 "psubsh %[ftmp5], %[ftmp3], %[ftmp2] \n\t" \
312 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
313 MMI_ULWC1(%[ftmp1], src1, 0x00) \
314 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
315 "pmullh %[ftmp3], %[ftmp2], %[filter3] \n\t" \
317 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
318 MMI_ULWC1(%[ftmp1], src1, 0x00) \
319 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
320 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
321 "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
323 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \
325 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_64] \n\t" \
326 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
327 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
329 MMI_SWC1(%[ftmp1], dst, 0x00)
332 #define PUT_VP8_EPEL8_H6_MMI(src, dst) \
333 MMI_ULDC1(%[ftmp1], src, 0x00) \
334 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
335 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
336 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
337 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
339 MMI_ULDC1(%[ftmp1], src, -0x01) \
340 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
341 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
342 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
343 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
344 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
345 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
347 MMI_ULDC1(%[ftmp1], src, -0x02) \
348 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
349 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
350 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
351 "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \
352 "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
353 "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
355 MMI_ULDC1(%[ftmp1], src, 0x01) \
356 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
357 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
358 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
359 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
361 MMI_ULDC1(%[ftmp1], src, 0x02) \
362 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
363 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
364 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
365 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
366 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
367 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
369 MMI_ULDC1(%[ftmp1], src, 0x03) \
370 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
371 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
372 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
373 "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \
374 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
375 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
377 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
378 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
380 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
381 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
382 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
383 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
384 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
386 MMI_SDC1(%[ftmp1], dst, 0x00)
389 #define PUT_VP8_EPEL8_H4_MMI(src, dst) \
390 MMI_ULDC1(%[ftmp1], src, 0x00) \
391 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
392 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
393 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
394 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
396 MMI_ULDC1(%[ftmp1], src, -0x01) \
397 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
398 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
399 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
400 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
401 "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
402 "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
404 MMI_ULDC1(%[ftmp1], src, 0x01) \
405 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
406 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
407 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
408 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
410 MMI_ULDC1(%[ftmp1], src, 0x02) \
411 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
412 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
413 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
414 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
415 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
416 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
418 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
419 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
421 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
422 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
423 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
424 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
426 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
427 MMI_SDC1(%[ftmp1], dst, 0x00)
430 #define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride) \
431 MMI_ULDC1(%[ftmp1], src, 0x00) \
432 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
433 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
434 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
435 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
437 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
438 MMI_ULDC1(%[ftmp1], src1, 0x00) \
439 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
440 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
441 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
442 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
443 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
444 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
446 PTR_SUBU ""#src1", "#src1", "#srcstride" \n\t" \
447 MMI_ULDC1(%[ftmp1], src1, 0x00) \
448 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
449 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
450 "pmullh %[ftmp2], %[ftmp2], %[filter0] \n\t" \
451 "pmullh %[ftmp3], %[ftmp3], %[filter0] \n\t" \
452 "paddsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
453 "paddsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
455 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
456 MMI_ULDC1(%[ftmp1], src1, 0x00) \
457 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
458 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
459 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
460 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
462 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
463 MMI_ULDC1(%[ftmp1], src1, 0x00) \
464 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
465 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
466 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
467 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
468 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
469 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
471 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
472 MMI_ULDC1(%[ftmp1], src1, 0x00) \
473 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
474 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
475 "pmullh %[ftmp2], %[ftmp2], %[filter5] \n\t" \
476 "pmullh %[ftmp3], %[ftmp3], %[filter5] \n\t" \
477 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
478 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
480 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
481 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
483 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
484 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
485 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
486 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
487 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
489 MMI_SDC1(%[ftmp1], dst, 0x00)
492 #define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride) \
493 MMI_ULDC1(%[ftmp1], src, 0x00) \
494 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
495 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
496 "pmullh %[ftmp5], %[ftmp2], %[filter2] \n\t" \
497 "pmullh %[ftmp6], %[ftmp3], %[filter2] \n\t" \
499 PTR_SUBU ""#src1", "#src", "#srcstride" \n\t" \
500 MMI_ULDC1(%[ftmp1], src1, 0x00) \
501 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
502 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
503 "pmullh %[ftmp2], %[ftmp2], %[filter1] \n\t" \
504 "pmullh %[ftmp3], %[ftmp3], %[filter1] \n\t" \
505 "psubsh %[ftmp7], %[ftmp5], %[ftmp2] \n\t" \
506 "psubsh %[ftmp8], %[ftmp6], %[ftmp3] \n\t" \
508 PTR_ADDU ""#src1", "#src", "#srcstride" \n\t" \
509 MMI_ULDC1(%[ftmp1], src1, 0x00) \
510 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
511 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
512 "pmullh %[ftmp5], %[ftmp2], %[filter3] \n\t" \
513 "pmullh %[ftmp6], %[ftmp3], %[filter3] \n\t" \
515 PTR_ADDU ""#src1", "#src1", "#srcstride" \n\t" \
516 MMI_ULDC1(%[ftmp1], src1, 0x00) \
517 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
518 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
519 "pmullh %[ftmp2], %[ftmp2], %[filter4] \n\t" \
520 "pmullh %[ftmp3], %[ftmp3], %[filter4] \n\t" \
521 "psubsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
522 "psubsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
524 "paddsh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \
525 "paddsh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" \
527 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_64] \n\t" \
528 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_64] \n\t" \
529 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
530 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
531 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
533 MMI_SDC1(%[ftmp1], dst, 0x00)
536 #define PUT_VP8_BILINEAR8_H_MMI(src, dst) \
537 MMI_ULDC1(%[ftmp1], src, 0x00) \
538 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
539 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
540 "pmullh %[ftmp5], %[ftmp2], %[a] \n\t" \
541 "pmullh %[ftmp6], %[ftmp3], %[a] \n\t" \
543 MMI_ULDC1(%[ftmp1], src, 0x01) \
544 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
545 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
546 "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \
547 "pmullh %[ftmp3], %[ftmp3], %[b] \n\t" \
548 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
549 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
551 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \
552 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \
553 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
554 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
556 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
557 MMI_SDC1(%[ftmp1], dst, 0x00)
560 #define PUT_VP8_BILINEAR4_H_MMI(src, dst) \
561 MMI_ULWC1(%[ftmp1], src, 0x00) \
562 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
563 "pmullh %[ftmp3], %[ftmp2], %[a] \n\t" \
565 MMI_ULWC1(%[ftmp1], src, 0x01) \
566 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
567 "pmullh %[ftmp2], %[ftmp2], %[b] \n\t" \
568 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
570 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \
571 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
573 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
574 MMI_SWC1(%[ftmp1], dst, 0x00)
577 #define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride) \
578 MMI_ULDC1(%[ftmp1], src, 0x00) \
579 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
580 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
581 "pmullh %[ftmp5], %[ftmp2], %[c] \n\t" \
582 "pmullh %[ftmp6], %[ftmp3], %[c] \n\t" \
584 PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \
585 MMI_ULDC1(%[ftmp1], src1, 0x00) \
586 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
587 "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \
588 "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \
589 "pmullh %[ftmp3], %[ftmp3], %[d] \n\t" \
590 "paddsh %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \
591 "paddsh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" \
593 "paddsh %[ftmp5], %[ftmp5], %[ff_pw_4] \n\t" \
594 "paddsh %[ftmp6], %[ftmp6], %[ff_pw_4] \n\t" \
595 "psrah %[ftmp5], %[ftmp5], %[ftmp4] \n\t" \
596 "psrah %[ftmp6], %[ftmp6], %[ftmp4] \n\t" \
598 "packushb %[ftmp1], %[ftmp5], %[ftmp6] \n\t" \
599 MMI_SDC1(%[ftmp1], dst, 0x00)
602 #define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride) \
603 MMI_ULWC1(%[ftmp1], src, 0x00) \
604 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
605 "pmullh %[ftmp3], %[ftmp2], %[c] \n\t" \
607 PTR_ADDU ""#src1", "#src", "#sstride" \n\t" \
608 MMI_ULWC1(%[ftmp1], src1, 0x00) \
609 "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \
610 "pmullh %[ftmp2], %[ftmp2], %[d] \n\t" \
611 "paddsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t" \
613 "paddsh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t" \
614 "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t" \
616 "packushb %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \
617 MMI_SWC1(%[ftmp1], dst, 0x00)
621 {0x0000000000000000, 0x0006000600060006, 0x007b007b007b007b,
622 0x000c000c000c000c, 0x0001000100010001, 0x0000000000000000},
624 {0x0002000200020002, 0x000b000b000b000b, 0x006c006c006c006c,
625 0x0024002400240024, 0x0008000800080008, 0x0001000100010001},
627 {0x0000000000000000, 0x0009000900090009, 0x005d005d005d005d,
628 0x0032003200320032, 0x0006000600060006, 0x0000000000000000},
630 {0x0003000300030003, 0x0010001000100010, 0x004d004d004d004d,
631 0x004d004d004d004d, 0x0010001000100010, 0x0003000300030003},
633 {0x0000000000000000, 0x0006000600060006, 0x0032003200320032,
634 0x005d005d005d005d, 0x0009000900090009, 0x0000000000000000},
636 {0x0001000100010001, 0x0008000800080008, 0x0024002400240024,
637 0x006c006c006c006c, 0x000b000b000b000b, 0x0002000200020002},
639 {0x0000000000000000, 0x0001000100010001, 0x000c000c000c000c,
640 0x007b007b007b007b, 0x0006000600060006, 0x0000000000000000}
644 #define FILTER_6TAP(src, F, stride) \
645 cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
646 F[0] * src[x - 2 * stride] + F[3] * src[x + 1 * stride] - \
647 F[4] * src[x + 2 * stride] + F[5] * src[x + 3 * stride] + 64) >> 7]
649 #define FILTER_4TAP(src, F, stride) \
650 cm[(F[2] * src[x + 0 * stride] - F[1] * src[x - 1 * stride] + \
651 F[3] * src[x + 1 * stride] - F[4] * src[x + 2 * stride] + 64) >> 7]
654 { 0, 6, 123, 12, 1, 0 },
655 { 2, 11, 108, 36, 8, 1 },
656 { 0, 9, 93, 50, 6, 0 },
657 { 3, 16, 77, 77, 16, 3 },
658 { 0, 6, 50, 93, 9, 0 },
659 { 1, 8, 36, 108, 11, 2 },
660 { 0, 1, 12, 123, 6, 0 },
663 #define MUL_20091(a) ((((a) * 20091) >> 16) + (a))
664 #define MUL_35468(a) (((a) * 35468) >> 16)
667 #define clip_int8(n) (cm[(n) + 0x80] - 0x80)
684 f1 =
FFMIN(
a + 4, 127) >> 3;
685 f2 =
FFMIN(
a + 3, 127) >> 3;
708 f1 =
FFMIN(
a + 4, 127) >> 3;
709 f2 =
FFMIN(
a + 3, 127) >> 3;
756 a0 = (27 *
w + 63) >> 7;
757 a1 = (18 *
w + 63) >> 7;
758 a2 = (9 *
w + 63) >> 7;
787 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
798 MMI_ULDC1(%[
q0], %[dst], 0x0)
799 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t"
800 MMI_ULDC1(%[p0], %[tmp0], 0x0)
801 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
802 MMI_ULDC1(%[p1], %[tmp0], 0x0)
803 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
804 MMI_ULDC1(%[p2], %[tmp0], 0x0)
805 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
806 MMI_ULDC1(%[p3], %[tmp0], 0x0)
807 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
808 MMI_ULDC1(%[
q1], %[tmp0], 0x0)
809 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
810 MMI_ULDC1(%[q2], %[tmp0], 0x0)
811 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
812 MMI_ULDC1(%[q3], %[tmp0], 0x0)
815 MMI_USDC1(%[
q0], %[dst], 0x0)
816 PTR_SUBU "%[tmp0], %[dst], %[stride] \n\t"
817 MMI_USDC1(%[p0], %[tmp0], 0x0)
818 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
819 MMI_USDC1(%[p1], %[tmp0], 0x0)
820 PTR_SUBU "%[tmp0], %[tmp0], %[stride] \n\t"
821 MMI_USDC1(%[p2], %[tmp0], 0x0)
822 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
823 MMI_USDC1(%[
q1], %[tmp0], 0x0)
824 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
825 MMI_USDC1(%[q2], %[tmp0], 0x0)
827 [p3]
"=&f"(ftmp[0]), [p2]
"=&f"(ftmp[1]),
828 [p1]
"=&f"(ftmp[2]), [p0]
"=&f"(ftmp[3]),
829 [
q0]
"=&f"(ftmp[4]), [
q1]
"=&f"(ftmp[5]),
830 [q2]
"=&f"(ftmp[6]), [q3]
"=&f"(ftmp[7]),
831 [ftmp0]
"=&f"(ftmp[8]), [ftmp1]
"=&f"(ftmp[9]),
832 [ftmp2]
"=&f"(ftmp[10]), [ftmp3]
"=&f"(ftmp[11]),
833 [
hev]
"=&f"(ftmp[12]), [
mask]
"=&f"(ftmp[13]),
834 [ftmp4]
"=&f"(ftmp[14]), [ftmp5]
"=&f"(ftmp[15]),
835 [ftmp6]
"=&f"(ftmp[16]), [ftmp7]
"=&f"(ftmp[17]),
836 [dst]
"+&r"(dst), [tmp0]
"=&r"(
tmp[0]),
846 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
850 for (
i = 0;
i < 8;
i++)
852 int hv =
hev(dst +
i * 1,
stride, hev_thresh);
861 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
872 MMI_ULDC1(%[p3], %[dst], -0x04)
873 PTR_ADDU "%[tmp0], %[dst], %[stride] \n\t"
874 MMI_ULDC1(%[p2], %[tmp0], -0x04)
875 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
876 MMI_ULDC1(%[p1], %[tmp0], -0x04)
877 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
878 MMI_ULDC1(%[p0], %[tmp0], -0x04)
879 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
880 MMI_ULDC1(%[
q0], %[tmp0], -0x04)
881 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
882 MMI_ULDC1(%[
q1], %[tmp0], -0x04)
883 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
884 MMI_ULDC1(%[q2], %[tmp0], -0x04)
885 PTR_ADDU "%[tmp0], %[tmp0], %[stride] \n\t"
886 MMI_ULDC1(%[q3], %[tmp0], -0x04)
889 %[
q0], %[
q1], %[q2], %[q3],
890 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
894 %[
q0], %[
q1], %[q2], %[q3],
895 %[ftmp1], %[ftmp2], %[ftmp3], %[ftmp4])
897 MMI_USDC1(%[p3], %[dst], -0x04)
898 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
899 MMI_USDC1(%[p2], %[dst], -0x04)
900 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
901 MMI_USDC1(%[p1], %[dst], -0x04)
902 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
903 MMI_USDC1(%[p0], %[dst], -0x04)
904 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
905 MMI_USDC1(%[
q0], %[dst], -0x04)
906 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
907 MMI_USDC1(%[
q1], %[dst], -0x04)
908 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
909 MMI_USDC1(%[q2], %[dst], -0x04)
910 PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
911 MMI_USDC1(%[q3], %[dst], -0x04)
913 [p3]
"=&f"(ftmp[0]), [p2]
"=&f"(ftmp[1]),
914 [p1]
"=&f"(ftmp[2]), [p0]
"=&f"(ftmp[3]),
915 [
q0]
"=&f"(ftmp[4]), [
q1]
"=&f"(ftmp[5]),
916 [q2]
"=&f"(ftmp[6]), [q3]
"=&f"(ftmp[7]),
917 [ftmp0]
"=&f"(ftmp[8]), [ftmp1]
"=&f"(ftmp[9]),
918 [ftmp2]
"=&f"(ftmp[10]), [ftmp3]
"=&f"(ftmp[11]),
919 [
hev]
"=&f"(ftmp[12]), [
mask]
"=&f"(ftmp[13]),
920 [ftmp4]
"=&f"(ftmp[14]), [ftmp5]
"=&f"(ftmp[15]),
921 [ftmp6]
"=&f"(ftmp[16]), [ftmp7]
"=&f"(ftmp[17]),
922 [dst]
"+&r"(dst), [tmp0]
"=&r"(
tmp[0]),
932 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
936 for (
i = 0;
i < 8;
i++)
938 int hv =
hev(dst +
i *
stride, 1, hev_thresh);
953 MMI_LDC1(%[ftmp0], %[
dc], 0x00)
954 MMI_LDC1(%[ftmp1], %[
dc], 0x08)
955 MMI_LDC1(%[ftmp2], %[
dc], 0x10)
956 MMI_LDC1(%[ftmp3], %[
dc], 0x18)
957 "paddsh %[ftmp4], %[ftmp0], %[ftmp3] \n\t"
958 "psubsh %[ftmp5], %[ftmp0], %[ftmp3] \n\t"
959 "paddsh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
960 "psubsh %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
961 "paddsh %[ftmp0], %[ftmp4], %[ftmp6] \n\t"
962 "paddsh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
963 "psubsh %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
964 "psubsh %[ftmp3], %[ftmp5], %[ftmp7] \n\t"
965 MMI_SDC1(%[ftmp0], %[
dc], 0x00)
966 MMI_SDC1(%[ftmp1], %[
dc], 0x08)
967 MMI_SDC1(%[ftmp2], %[
dc], 0x10)
968 MMI_SDC1(%[ftmp3], %[
dc], 0x18)
969 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
970 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
971 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
972 [ftmp6]
"=&f"(ftmp[6]),
974 [ftmp7]
"=&f"(ftmp[7])
975 : [
dc]
"r"((uint8_t*)
dc)
994 block[3][0][0] = (
dc[12] +
dc[15] + 3 +
dc[13] +
dc[14]) >> 3;
995 block[3][1][0] = (
dc[12] -
dc[15] + 3 +
dc[13] -
dc[14]) >> 3;
996 block[3][2][0] = (
dc[12] +
dc[15] + 3 -
dc[13] -
dc[14]) >> 3;
997 block[3][3][0] = (
dc[12] -
dc[15] + 3 -
dc[13] +
dc[14]) >> 3;
1000 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1001 MMI_SDC1(%[ftmp0], %[
dc], 0x00)
1002 MMI_SDC1(%[ftmp0], %[
dc], 0x08)
1003 MMI_SDC1(%[ftmp0], %[
dc], 0x10)
1004 MMI_SDC1(%[ftmp0], %[
dc], 0x18)
1006 [ftmp0]
"=&f"(ftmp[0])
1007 : [
dc]
"r"((uint8_t *)
dc)
1011 int t00, t01, t02, t03,
t10,
t11,
t12, t13, t20, t21, t22, t23, t30, t31, t32, t33;
1013 t00 =
dc[0] +
dc[12];
1015 t20 =
dc[2] +
dc[14];
1016 t30 =
dc[3] +
dc[15];
1018 t03 =
dc[0] -
dc[12];
1019 t13 =
dc[1] -
dc[13];
1020 t23 =
dc[2] -
dc[14];
1021 t33 =
dc[3] -
dc[15];
1023 t01 =
dc[4] +
dc[ 8];
1025 t21 =
dc[6] +
dc[10];
1026 t31 =
dc[7] +
dc[11];
1028 t02 =
dc[4] -
dc[ 8];
1030 t22 =
dc[6] -
dc[10];
1031 t32 =
dc[7] -
dc[11];
1063 block[2][0][0] = (
dc[8] +
dc[11] + 3 +
dc[9] +
dc[10]) >> 3;
1064 block[2][1][0] = (
dc[8] -
dc[11] + 3 +
dc[9] -
dc[10]) >> 3;
1065 block[2][2][0] = (
dc[8] +
dc[11] + 3 -
dc[9] -
dc[10]) >> 3;
1066 block[2][3][0] = (
dc[8] -
dc[11] + 3 -
dc[9] +
dc[10]) >> 3;
1068 block[3][0][0] = (
dc[12] +
dc[15] + 3 +
dc[13] +
dc[14]) >> 3;
1069 block[3][1][0] = (
dc[12] -
dc[15] + 3 +
dc[13] -
dc[14]) >> 3;
1070 block[3][2][0] = (
dc[12] +
dc[15] + 3 -
dc[13] -
dc[14]) >> 3;
1071 block[3][3][0] = (
dc[12] -
dc[15] + 3 -
dc[13] +
dc[14]) >> 3;
1082 int val = (
dc[0] + 3) >> 3;
1113 ff_ph_4e7b_u.
i = 0x4e7b4e7b4e7b4e7bULL;
1114 ff_ph_22a3_u.
i = 0x22a322a322a322a3ULL;
1117 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1118 MMI_LDC1(%[ftmp1], %[
block], 0x00)
1119 MMI_LDC1(%[ftmp2], %[
block], 0x08)
1120 MMI_LDC1(%[ftmp3], %[
block], 0x10)
1121 MMI_LDC1(%[ftmp4], %[
block], 0x18)
1123 "li %[tmp0], 0x02 \n\t"
1124 "mtc1 %[tmp0], %[ftmp11] \n\t"
1127 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
1129 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
1131 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
1132 "pmulhh %[ftmp7], %[ftmp9], %[ff_ph_22a3] \n\t"
1134 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
1135 "pmulhh %[ftmp8], %[ftmp9], %[ff_ph_22a3] \n\t"
1137 "pmulhh %[ftmp9], %[ftmp2], %[ff_ph_4e7b] \n\t"
1138 "paddh %[ftmp9], %[ftmp9], %[ftmp2] \n\t"
1140 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
1141 "paddh %[ftmp10], %[ftmp10], %[ftmp4] \n\t"
1144 "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t"
1145 "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
1147 "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t"
1148 "psubh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
1150 "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t"
1151 "paddh %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
1153 "psubh %[ftmp4], %[ftmp5], %[ftmp7] \n\t"
1154 "psubh %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1156 MMI_SDC1(%[ftmp0], %[
block], 0x00)
1157 MMI_SDC1(%[ftmp0], %[
block], 0x08)
1158 MMI_SDC1(%[ftmp0], %[
block], 0x10)
1159 MMI_SDC1(%[ftmp0], %[
block], 0x18)
1162 %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1165 "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t"
1167 "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t"
1169 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t"
1170 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
1171 "psubh %[ftmp7], %[ftmp9], %[ftmp4] \n\t"
1172 "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t"
1173 "psubh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1175 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t"
1176 "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t"
1177 "paddh %[ftmp8], %[ftmp9], %[ftmp2] \n\t"
1178 "pmulhh %[ftmp10], %[ftmp2], %[ff_ph_4e7b] \n\t"
1179 "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1181 "li %[tmp0], 0x03 \n\t"
1182 "mtc1 %[tmp0], %[ftmp11] \n\t"
1183 "paddh %[ftmp1], %[ftmp5], %[ftmp8] \n\t"
1184 "paddh %[ftmp1], %[ftmp1], %[ff_pw_4] \n\t"
1185 "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t"
1186 "paddh %[ftmp2], %[ftmp6], %[ftmp7] \n\t"
1187 "paddh %[ftmp2], %[ftmp2], %[ff_pw_4] \n\t"
1188 "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t"
1189 "psubh %[ftmp3], %[ftmp6], %[ftmp7] \n\t"
1190 "paddh %[ftmp3], %[ftmp3], %[ff_pw_4] \n\t"
1191 "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t"
1192 "psubh %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
1193 "paddh %[ftmp4], %[ftmp4], %[ff_pw_4] \n\t"
1194 "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
1197 %[ftmp5], %[ftmp6], %[ftmp7], %[ftmp8])
1199 MMI_LWC1(%[ftmp5], %[dst0], 0x00)
1200 MMI_LWC1(%[ftmp6], %[dst1], 0x00)
1201 MMI_LWC1(%[ftmp7], %[dst2], 0x00)
1202 MMI_LWC1(%[ftmp8], %[dst3], 0x00)
1204 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1205 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1206 "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1207 "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1209 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1210 "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1211 "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1212 "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1214 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1215 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1216 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1217 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1219 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1220 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1221 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1222 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1223 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1224 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1225 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1226 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1227 [ftmp8]
"=&f"(ftmp[8]), [ftmp9]
"=&f"(ftmp[9]),
1228 [ftmp10]
"=&f"(ftmp[10]), [ftmp11]
"=&f"(ftmp[11]),
1232 : [dst0]
"r"(dst), [dst1]
"r"(dst+
stride),
1235 [ff_ph_4e7b]
"f"(ff_ph_4e7b_u.
f), [ff_ph_22a3]
"f"(ff_ph_22a3_u.
f)
1242 for (
i = 0;
i < 4;
i++) {
1258 for (
i = 0;
i < 4;
i++) {
1261 t2 = MUL_35468(
tmp[4 +
i]) - MUL_20091(
tmp[12 +
i]);
1262 t3 = MUL_20091(
tmp[4 +
i]) + MUL_35468(
tmp[12 +
i]);
1283 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1284 "mtc1 %[dc], %[ftmp5] \n\t"
1285 MMI_LWC1(%[ftmp1], %[dst0], 0x00)
1286 MMI_LWC1(%[ftmp2], %[dst1], 0x00)
1287 MMI_LWC1(%[ftmp3], %[dst2], 0x00)
1288 MMI_LWC1(%[ftmp4], %[dst3], 0x00)
1289 "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1290 "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1291 "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1292 "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1293 "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1294 "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1295 "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1296 "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1297 "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1298 "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1299 "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1300 "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1301 "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1302 MMI_SWC1(%[ftmp1], %[dst0], 0x00)
1303 MMI_SWC1(%[ftmp2], %[dst1], 0x00)
1304 MMI_SWC1(%[ftmp3], %[dst2], 0x00)
1305 MMI_SWC1(%[ftmp4], %[dst3], 0x00)
1306 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1307 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1308 [ftmp4]
"=&f"(ftmp[4]),
1310 [ftmp5]
"=&f"(ftmp[5])
1311 : [dst0]
"r"(dst), [dst1]
"r"(dst+
stride),
1321 for (
i = 0;
i < 4;
i++) {
1351 int flim_I,
int hev_thresh)
1358 int flim_I,
int hev_thresh)
1366 int flim_E,
int flim_I,
int hev_thresh)
1373 int flim_E,
int flim_I,
int hev_thresh)
1381 int flim_E,
int flim_I,
int hev_thresh)
1385 for (
i = 0;
i < 16;
i++)
1387 int hv =
hev(dst +
i * 1,
stride, hev_thresh);
1396 int flim_E,
int flim_I,
int hev_thresh)
1400 for (
i = 0;
i < 16;
i++)
1402 int hv =
hev(dst +
i *
stride, 1, hev_thresh);
1411 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
1418 ptrdiff_t
stride,
int flim_E,
int flim_I,
int hev_thresh)
1428 for (
i = 0;
i < 16;
i++)
1437 for (
i = 0;
i < 16;
i++)
1443 ptrdiff_t srcstride,
int h,
int x,
int y)
1453 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1454 MMI_ULDC1(%[ftmp0], %[
src], 0x00)
1455 "ldl %[tmp0], 0x0f(%[src]) \n\t"
1456 "ldr %[tmp0], 0x08(%[src]) \n\t"
1457 MMI_ULDC1(%[ftmp1], %[addr0], 0x00)
1458 "ldl %[tmp1], 0x0f(%[addr0]) \n\t"
1459 "ldr %[tmp1], 0x08(%[addr0]) \n\t"
1460 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1461 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1462 "sdl %[tmp0], 0x0f(%[dst]) \n\t"
1463 "sdr %[tmp0], 0x08(%[dst]) \n\t"
1464 "addiu %[h], %[h], -0x02 \n\t"
1465 MMI_SDC1(%[ftmp1], %[addr1], 0x00)
1466 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1467 "sdl %[tmp1], 0x0f(%[addr1]) \n\t"
1468 "sdr %[tmp1], 0x08(%[addr1]) \n\t"
1469 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1470 "bnez %[h], 1b \n\t"
1471 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1472 [tmp0]
"=&r"(
tmp[0]), [tmp1]
"=&r"(
tmp[1]),
1474 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1475 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
1477 : [dststride]
"r"((
mips_reg)dststride),
1478 [srcstride]
"r"((
mips_reg)srcstride)
1484 for (
i = 0;
i <
h;
i++, dst += dststride,
src += srcstride)
1485 memcpy(dst,
src, 16);
1490 ptrdiff_t srcstride,
int h,
int x,
int y)
1500 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1501 MMI_ULDC1(%[ftmp0], %[
src], 0x00)
1502 "ldl %[tmp0], 0x07(%[addr0]) \n\t"
1503 "ldr %[tmp0], 0x00(%[addr0]) \n\t"
1504 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1505 MMI_SDC1(%[ftmp0], %[dst], 0x00)
1506 "addiu %[h], %[h], -0x02 \n\t"
1507 "sdl %[tmp0], 0x07(%[addr1]) \n\t"
1508 "sdr %[tmp0], 0x00(%[addr1]) \n\t"
1509 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1510 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1511 "bnez %[h], 1b \n\t"
1512 : [ftmp0]
"=&f"(ftmp[0]), [tmp0]
"=&r"(
tmp[0]),
1514 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1515 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
1517 : [dststride]
"r"((
mips_reg)dststride),
1518 [srcstride]
"r"((
mips_reg)srcstride)
1524 for (
i = 0;
i <
h;
i++, dst += dststride,
src += srcstride)
1525 memcpy(dst,
src, 8);
1530 ptrdiff_t srcstride,
int h,
int x,
int y)
1540 PTR_ADDU "%[addr0], %[src], %[srcstride] \n\t"
1541 MMI_LWC1(%[ftmp0], %[
src], 0x00)
1542 "lwl %[tmp0], 0x03(%[addr0]) \n\t"
1543 "lwr %[tmp0], 0x00(%[addr0]) \n\t"
1544 PTR_ADDU "%[addr1], %[dst], %[dststride] \n\t"
1545 MMI_SWC1(%[ftmp0], %[dst], 0x00)
1546 "addiu %[h], %[h], -0x02 \n\t"
1547 "swl %[tmp0], 0x03(%[addr1]) \n\t"
1548 "swr %[tmp0], 0x00(%[addr1]) \n\t"
1549 PTR_ADDU "%[src], %[addr0], %[srcstride] \n\t"
1550 PTR_ADDU "%[dst], %[addr1], %[dststride] \n\t"
1551 "bnez %[h], 1b \n\t"
1552 : [ftmp0]
"=&f"(ftmp[0]), [tmp0]
"=&r"(
tmp[0]),
1554 [addr0]
"=&r"(addr[0]), [addr1]
"=&r"(addr[1]),
1555 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
1557 : [dststride]
"r"((
mips_reg)dststride),
1558 [srcstride]
"r"((
mips_reg)srcstride)
1564 for (
i = 0;
i <
h;
i++, dst += dststride,
src += srcstride)
1565 memcpy(dst,
src, 4);
1570 ptrdiff_t srcstride,
int h,
int mx,
int my)
1607 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1608 "li %[tmp0], 0x07 \n\t"
1609 "mtc1 %[tmp0], %[ftmp4] \n\t"
1619 "addiu %[h], %[h], -0x01 \n\t"
1620 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1621 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1622 "bnez %[h], 1b \n\t"
1623 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1624 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1625 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1626 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1627 [ftmp8]
"=&f"(ftmp[8]),
1628 [tmp0]
"=&r"(
tmp[0]),
1630 [dst1]
"=&r"(dst1), [
src1]
"=&r"(
src1),
1632 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1634 [srcstride]
"r"((
mips_reg)srcstride),
1635 [dststride]
"r"((
mips_reg)dststride),
1637 [filter3]
"f"(filter3.
f), [filter4]
"f"(filter4.
f)
1645 for (y = 0; y <
h; y++) {
1646 for (x = 0; x < 16; x++)
1655 ptrdiff_t srcstride,
int h,
int mx,
int my)
1683 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1684 "li %[tmp0], 0x07 \n\t"
1685 "mtc1 %[tmp0], %[ftmp4] \n\t"
1690 "addiu %[h], %[h], -0x01 \n\t"
1691 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1692 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1693 "bnez %[h], 1b \n\t"
1694 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1695 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1696 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1697 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1698 [ftmp8]
"=&f"(ftmp[8]),
1699 [tmp0]
"=&r"(
tmp[0]),
1702 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1704 [srcstride]
"r"((
mips_reg)srcstride),
1705 [dststride]
"r"((
mips_reg)dststride),
1707 [filter3]
"f"(filter3.
f), [filter4]
"f"(filter4.
f)
1715 for (y = 0; y <
h; y++) {
1716 for (x = 0; x < 8; x++)
1725 ptrdiff_t srcstride,
int h,
int mx,
int my)
1748 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1749 "li %[tmp0], 0x07 \n\t"
1750 "mtc1 %[tmp0], %[ftmp4] \n\t"
1755 "addiu %[h], %[h], -0x01 \n\t"
1756 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1757 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1758 "bnez %[h], 1b \n\t"
1759 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1760 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1761 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1762 [tmp0]
"=&r"(
tmp[0]),
1765 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1767 [srcstride]
"r"((
mips_reg)srcstride),
1768 [dststride]
"r"((
mips_reg)dststride),
1770 [filter3]
"f"(filter3.
f), [filter4]
"f"(filter4.
f)
1778 for (y = 0; y <
h; y++) {
1779 for (x = 0; x < 4; x++)
1788 ptrdiff_t srcstride,
int h,
int mx,
int my)
1829 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1830 "li %[tmp0], 0x07 \n\t"
1831 "mtc1 %[tmp0], %[ftmp4] \n\t"
1841 "addiu %[h], %[h], -0x01 \n\t"
1842 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1843 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1844 "bnez %[h], 1b \n\t"
1845 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1846 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1847 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1848 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1849 [ftmp8]
"=&f"(ftmp[8]),
1850 [tmp0]
"=&r"(
tmp[0]),
1852 [dst1]
"=&r"(dst1), [
src1]
"=&r"(
src1),
1854 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1856 [srcstride]
"r"((
mips_reg)srcstride),
1857 [dststride]
"r"((
mips_reg)dststride),
1859 [filter2]
"f"(filter2.
f), [filter3]
"f"(filter3.
f),
1860 [filter4]
"f"(filter4.
f), [filter5]
"f"(filter5.
f)
1868 for (y = 0; y <
h; y++) {
1869 for (x = 0; x < 16; x++)
1878 ptrdiff_t srcstride,
int h,
int mx,
int my)
1909 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1910 "li %[tmp0], 0x07 \n\t"
1911 "mtc1 %[tmp0], %[ftmp4] \n\t"
1916 "addiu %[h], %[h], -0x01 \n\t"
1917 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1918 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1919 "bnez %[h], 1b \n\t"
1920 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1921 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1922 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1923 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
1924 [ftmp8]
"=&f"(ftmp[8]),
1925 [tmp0]
"=&r"(
tmp[0]),
1928 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1930 [srcstride]
"r"((
mips_reg)srcstride),
1931 [dststride]
"r"((
mips_reg)dststride),
1933 [filter2]
"f"(filter2.
f), [filter3]
"f"(filter3.
f),
1934 [filter4]
"f"(filter4.
f), [filter5]
"f"(filter5.
f)
1942 for (y = 0; y <
h; y++) {
1943 for (x = 0; x < 8; x++)
1952 ptrdiff_t srcstride,
int h,
int mx,
int my)
1979 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1980 "li %[tmp0], 0x07 \n\t"
1981 "mtc1 %[tmp0], %[ftmp4] \n\t"
1986 "addiu %[h], %[h], -0x01 \n\t"
1987 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
1988 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
1989 "bnez %[h], 1b \n\t"
1990 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
1991 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
1992 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
1993 [tmp0]
"=&r"(
tmp[0]),
1996 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
1998 [srcstride]
"r"((
mips_reg)srcstride),
1999 [dststride]
"r"((
mips_reg)dststride),
2001 [filter2]
"f"(filter2.
f), [filter3]
"f"(filter3.
f),
2002 [filter4]
"f"(filter4.
f), [filter5]
"f"(filter5.
f)
2010 for (y = 0; y <
h; y++) {
2011 for (x = 0; x < 4; x++)
2020 ptrdiff_t srcstride,
int h,
int mx,
int my)
2057 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2058 "li %[tmp0], 0x07 \n\t"
2059 "mtc1 %[tmp0], %[ftmp4] \n\t"
2069 "addiu %[h], %[h], -0x01 \n\t"
2070 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2071 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2072 "bnez %[h], 1b \n\t"
2073 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2074 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2075 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2076 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2077 [ftmp8]
"=&f"(ftmp[8]),
2078 [tmp0]
"=&r"(
tmp[0]),
2080 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2083 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2085 [srcstride]
"r"((
mips_reg)srcstride),
2086 [dststride]
"r"((
mips_reg)dststride),
2088 [filter3]
"f"(filter3.
f), [filter4]
"f"(filter4.
f)
2096 for (y = 0; y <
h; y++) {
2097 for (x = 0; x < 16; x++)
2106 ptrdiff_t srcstride,
int h,
int mx,
int my)
2134 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2135 "li %[tmp0], 0x07 \n\t"
2136 "mtc1 %[tmp0], %[ftmp4] \n\t"
2141 "addiu %[h], %[h], -0x01 \n\t"
2142 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2143 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2144 "bnez %[h], 1b \n\t"
2145 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2146 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2147 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2148 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2149 [ftmp8]
"=&f"(ftmp[8]),
2150 [tmp0]
"=&r"(
tmp[0]),
2154 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2156 [srcstride]
"r"((
mips_reg)srcstride),
2157 [dststride]
"r"((
mips_reg)dststride),
2159 [filter3]
"f"(filter3.
f), [filter4]
"f"(filter4.
f)
2167 for (y = 0; y <
h; y++) {
2168 for (x = 0; x < 8; x++)
2177 ptrdiff_t srcstride,
int h,
int mx,
int my)
2201 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2202 "li %[tmp0], 0x07 \n\t"
2203 "mtc1 %[tmp0], %[ftmp4] \n\t"
2208 "addiu %[h], %[h], -0x01 \n\t"
2209 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2210 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2211 "bnez %[h], 1b \n\t"
2212 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2213 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2214 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2215 [tmp0]
"=&r"(
tmp[0]),
2219 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2221 [srcstride]
"r"((
mips_reg)srcstride),
2222 [dststride]
"r"((
mips_reg)dststride),
2224 [filter3]
"f"(filter3.
f), [filter4]
"f"(filter4.
f)
2232 for (y = 0; y <
h; y++) {
2233 for (x = 0; x < 4; x++)
2242 ptrdiff_t srcstride,
int h,
int mx,
int my)
2283 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2284 "li %[tmp0], 0x07 \n\t"
2285 "mtc1 %[tmp0], %[ftmp4] \n\t"
2295 "addiu %[h], %[h], -0x01 \n\t"
2296 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2297 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2298 "bnez %[h], 1b \n\t"
2299 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2300 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2301 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2302 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2303 [ftmp8]
"=&f"(ftmp[8]),
2304 [tmp0]
"=&r"(
tmp[0]),
2306 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
2309 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2311 [srcstride]
"r"((
mips_reg)srcstride),
2312 [dststride]
"r"((
mips_reg)dststride),
2314 [filter2]
"f"(filter2.
f), [filter3]
"f"(filter3.
f),
2315 [filter4]
"f"(filter4.
f), [filter5]
"f"(filter5.
f)
2323 for (y = 0; y <
h; y++) {
2324 for (x = 0; x < 16; x++)
2333 ptrdiff_t srcstride,
int h,
int mx,
int my)
2365 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2366 "li %[tmp0], 0x07 \n\t"
2367 "mtc1 %[tmp0], %[ftmp4] \n\t"
2372 "addiu %[h], %[h], -0x01 \n\t"
2373 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2374 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2375 "bnez %[h], 1b \n\t"
2376 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2377 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2378 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2379 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
2380 [ftmp8]
"=&f"(ftmp[8]),
2381 [tmp0]
"=&r"(
tmp[0]),
2385 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2387 [srcstride]
"r"((
mips_reg)srcstride),
2388 [dststride]
"r"((
mips_reg)dststride),
2390 [filter2]
"f"(filter2.
f), [filter3]
"f"(filter3.
f),
2391 [filter4]
"f"(filter4.
f), [filter5]
"f"(filter5.
f)
2399 for (y = 0; y <
h; y++) {
2400 for (x = 0; x < 8; x++)
2409 ptrdiff_t srcstride,
int h,
int mx,
int my)
2437 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2438 "li %[tmp0], 0x07 \n\t"
2439 "mtc1 %[tmp0], %[ftmp4] \n\t"
2444 "addiu %[h], %[h], -0x01 \n\t"
2445 PTR_ADDU "%[src], %[src], %[srcstride] \n\t"
2446 PTR_ADDU "%[dst], %[dst], %[dststride] \n\t"
2447 "bnez %[h], 1b \n\t"
2448 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2449 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2450 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2451 [tmp0]
"=&r"(
tmp[0]),
2455 [dst]
"+&r"(dst), [
src]
"+&r"(
src)
2457 [srcstride]
"r"((
mips_reg)srcstride),
2458 [dststride]
"r"((
mips_reg)dststride),
2460 [filter2]
"f"(filter2.
f), [filter3]
"f"(filter3.
f),
2461 [filter4]
"f"(filter4.
f), [filter5]
"f"(filter5.
f)
2469 for (y = 0; y <
h; y++) {
2470 for (x = 0; x < 4; x++)
2479 ptrdiff_t srcstride,
int h,
int mx,
int my)
2483 uint8_t *
tmp = tmp_array;
2487 tmp = tmp_array + 16;
2493 uint8_t tmp_array[560];
2494 uint8_t *
tmp = tmp_array;
2498 for (y = 0; y <
h + 3; y++) {
2499 for (x = 0; x < 16; x++)
2505 tmp = tmp_array + 16;
2508 for (y = 0; y <
h; y++) {
2509 for (x = 0; x < 16; x++)
2518 ptrdiff_t srcstride,
int h,
int mx,
int my)
2522 uint8_t *
tmp = tmp_array;
2526 tmp = tmp_array + 8;
2532 uint8_t tmp_array[152];
2533 uint8_t *
tmp = tmp_array;
2537 for (y = 0; y <
h + 3; y++) {
2538 for (x = 0; x < 8; x++)
2544 tmp = tmp_array + 8;
2547 for (y = 0; y <
h; y++) {
2548 for (x = 0; x < 8; x++)
2557 ptrdiff_t srcstride,
int h,
int mx,
int my)
2561 uint8_t *
tmp = tmp_array;
2565 tmp = tmp_array + 4;
2571 uint8_t tmp_array[44];
2572 uint8_t *
tmp = tmp_array;
2576 for (y = 0; y <
h + 3; y++) {
2577 for (x = 0; x < 4; x++)
2582 tmp = tmp_array + 4;
2585 for (y = 0; y <
h; y++) {
2586 for (x = 0; x < 4; x++)
2595 ptrdiff_t srcstride,
int h,
int mx,
int my)
2599 uint8_t *
tmp = tmp_array;
2601 src -= 2 * srcstride;
2603 tmp = tmp_array + 32;
2609 uint8_t tmp_array[592];
2610 uint8_t *
tmp = tmp_array;
2612 src -= 2 * srcstride;
2614 for (y = 0; y <
h + 5; y++) {
2615 for (x = 0; x < 16; x++)
2621 tmp = tmp_array + 32;
2624 for (y = 0; y <
h; y++) {
2625 for (x = 0; x < 16; x++)
2634 ptrdiff_t srcstride,
int h,
int mx,
int my)
2638 uint8_t *
tmp = tmp_array;
2640 src -= 2 * srcstride;
2642 tmp = tmp_array + 16;
2648 uint8_t tmp_array[168];
2649 uint8_t *
tmp = tmp_array;
2651 src -= 2 * srcstride;
2653 for (y = 0; y <
h + 5; y++) {
2654 for (x = 0; x < 8; x++)
2660 tmp = tmp_array + 16;
2663 for (y = 0; y <
h; y++) {
2664 for (x = 0; x < 8; x++)
2673 ptrdiff_t srcstride,
int h,
int mx,
int my)
2677 uint8_t *
tmp = tmp_array;
2679 src -= 2 * srcstride;
2681 tmp = tmp_array + 8;
2687 uint8_t tmp_array[52];
2688 uint8_t *
tmp = tmp_array;
2690 src -= 2 * srcstride;
2692 for (y = 0; y <
h + 5; y++) {
2693 for (x = 0; x < 4; x++)
2699 tmp = tmp_array + 8;
2702 for (y = 0; y <
h; y++) {
2703 for (x = 0; x < 4; x++)
2712 ptrdiff_t srcstride,
int h,
int mx,
int my)
2716 uint8_t *
tmp = tmp_array;
2720 tmp = tmp_array + 16;
2726 uint8_t tmp_array[560];
2727 uint8_t *
tmp = tmp_array;
2731 for (y = 0; y <
h + 3; y++) {
2732 for (x = 0; x < 16; x++)
2738 tmp = tmp_array + 16;
2741 for (y = 0; y <
h; y++) {
2742 for (x = 0; x < 16; x++)
2751 ptrdiff_t srcstride,
int h,
int mx,
int my)
2755 uint8_t *
tmp = tmp_array;
2759 tmp = tmp_array + 8;
2765 uint8_t tmp_array[152];
2766 uint8_t *
tmp = tmp_array;
2770 for (y = 0; y <
h + 3; y++) {
2771 for (x = 0; x < 8; x++)
2777 tmp = tmp_array + 8;
2780 for (y = 0; y <
h; y++) {
2781 for (x = 0; x < 8; x++)
2790 ptrdiff_t srcstride,
int h,
int mx,
int my)
2794 uint8_t *
tmp = tmp_array;
2798 tmp = tmp_array + 4;
2804 uint8_t tmp_array[44];
2805 uint8_t *
tmp = tmp_array;
2809 for (y = 0; y <
h + 3; y++) {
2810 for (x = 0; x < 4; x++)
2816 tmp = tmp_array + 4;
2819 for (y = 0; y <
h; y++) {
2820 for (x = 0; x < 4; x++)
2829 ptrdiff_t srcstride,
int h,
int mx,
int my)
2833 uint8_t *
tmp = tmp_array;
2835 src -= 2 * srcstride;
2837 tmp = tmp_array + 32;
2843 uint8_t tmp_array[592];
2844 uint8_t *
tmp = tmp_array;
2846 src -= 2 * srcstride;
2848 for (y = 0; y <
h + 5; y++) {
2849 for (x = 0; x < 16; x++)
2855 tmp = tmp_array + 32;
2858 for (y = 0; y <
h; y++) {
2859 for (x = 0; x < 16; x++)
2868 ptrdiff_t srcstride,
int h,
int mx,
int my)
2872 uint8_t *
tmp = tmp_array;
2874 src -= 2 * srcstride;
2876 tmp = tmp_array + 16;
2882 uint8_t tmp_array[168];
2883 uint8_t *
tmp = tmp_array;
2885 src -= 2 * srcstride;
2887 for (y = 0; y <
h + 5; y++) {
2888 for (x = 0; x < 8; x++)
2894 tmp = tmp_array + 16;
2897 for (y = 0; y <
h; y++) {
2898 for (x = 0; x < 8; x++)
2907 ptrdiff_t srcstride,
int h,
int mx,
int my)
2911 uint8_t *
tmp = tmp_array;
2913 src -= 2 * srcstride;
2915 tmp = tmp_array + 8;
2921 uint8_t tmp_array[52];
2922 uint8_t *
tmp = tmp_array;
2924 src -= 2 * srcstride;
2926 for (y = 0; y <
h + 5; y++) {
2927 for (x = 0; x < 4; x++)
2933 tmp = tmp_array + 8;
2936 for (y = 0; y <
h; y++) {
2937 for (x = 0; x < 4; x++)
2946 ptrdiff_t sstride,
int h,
int mx,
int my)
2977 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2978 "li %[tmp0], 0x03 \n\t"
2979 "mtc1 %[tmp0], %[ftmp4] \n\t"
2980 "pshufh %[a], %[a], %[ftmp0] \n\t"
2981 "pshufh %[b], %[b], %[ftmp0] \n\t"
2991 "addiu %[h], %[h], -0x01 \n\t"
2992 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
2993 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
2994 "bnez %[h], 1b \n\t"
2995 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
2996 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
2997 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
2998 [ftmp6]
"=&f"(ftmp[6]),
2999 [tmp0]
"=&r"(
tmp[0]),
3001 [dst0]
"=&r"(dst0), [
src0]
"=&r"(
src0),
3003 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3004 [
a]
"+&f"(
a.f), [
b]
"+&f"(
b.f)
3011 int a = 8 - mx,
b = mx;
3014 for (y = 0; y <
h; y++) {
3015 for (x = 0; x < 16; x++)
3016 dst[x] = (
a *
src[x] +
b *
src[x + 1] + 4) >> 3;
3024 ptrdiff_t sstride,
int h,
int mx,
int my)
3046 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3047 "li %[tmp0], 0x03 \n\t"
3048 "mtc1 %[tmp0], %[ftmp4] \n\t"
3049 "pshufh %[c], %[c], %[ftmp0] \n\t"
3050 "pshufh %[d], %[d], %[ftmp0] \n\t"
3060 "addiu %[h], %[h], -0x01 \n\t"
3061 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3062 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3063 "bnez %[h], 1b \n\t"
3064 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3065 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3066 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3067 [ftmp6]
"=&f"(ftmp[6]),
3068 [tmp0]
"=&r"(
tmp[0]),
3070 [
src0]
"=&r"(
src0), [dst0]
"=&r"(dst0),
3073 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3074 [
c]
"+&f"(
c.f), [
d]
"+&f"(
d.f)
3081 int c = 8 - my,
d = my;
3084 for (y = 0; y <
h; y++) {
3085 for (x = 0; x < 16; x++)
3086 dst[x] = (
c *
src[x] +
d *
src[x + sstride] + 4) >> 3;
3094 ptrdiff_t sstride,
int h,
int mx,
int my)
3098 uint8_t *
tmp = tmp_array;
3103 int a = 8 - mx,
b = mx;
3104 int c = 8 - my,
d = my;
3106 uint8_t tmp_array[528];
3107 uint8_t *
tmp = tmp_array;
3109 for (y = 0; y <
h + 1; y++) {
3110 for (x = 0; x < 16; x++)
3118 for (y = 0; y <
h; y++) {
3119 for (x = 0; x < 16; x++)
3120 dst[x] = (
c *
tmp[x] +
d *
tmp[x + 16] + 4) >> 3;
3128 ptrdiff_t sstride,
int h,
int mx,
int my)
3149 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3150 "li %[tmp0], 0x03 \n\t"
3151 "mtc1 %[tmp0], %[ftmp4] \n\t"
3152 "pshufh %[a], %[a], %[ftmp0] \n\t"
3153 "pshufh %[b], %[b], %[ftmp0] \n\t"
3158 "addiu %[h], %[h], -0x01 \n\t"
3159 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3160 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3161 "bnez %[h], 1b \n\t"
3162 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3163 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3164 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3165 [ftmp6]
"=&f"(ftmp[6]),
3166 [tmp0]
"=&r"(
tmp[0]),
3169 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3170 [
a]
"+&f"(
a.f), [
b]
"+&f"(
b.f)
3177 int a = 8 - mx,
b = mx;
3180 for (y = 0; y <
h; y++) {
3181 for (x = 0; x < 8; x++)
3182 dst[x] = (
a *
src[x] +
b *
src[x + 1] + 4) >> 3;
3190 ptrdiff_t sstride,
int h,
int mx,
int my)
3212 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3213 "li %[tmp0], 0x03 \n\t"
3214 "mtc1 %[tmp0], %[ftmp4] \n\t"
3215 "pshufh %[c], %[c], %[ftmp0] \n\t"
3216 "pshufh %[d], %[d], %[ftmp0] \n\t"
3221 "addiu %[h], %[h], -0x01 \n\t"
3222 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3223 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3224 "bnez %[h], 1b \n\t"
3225 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3226 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3227 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
3228 [ftmp6]
"=&f"(ftmp[6]),
3229 [tmp0]
"=&r"(
tmp[0]),
3233 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3234 [
c]
"+&f"(
c.f), [
d]
"+&f"(
d.f)
3241 int c = 8 - my,
d = my;
3244 for (y = 0; y <
h; y++) {
3245 for (x = 0; x < 8; x++)
3246 dst[x] = (
c *
src[x] +
d *
src[x + sstride] + 4) >> 3;
3254 ptrdiff_t sstride,
int h,
int mx,
int my)
3258 uint8_t *
tmp = tmp_array;
3263 int a = 8 - mx,
b = mx;
3264 int c = 8 - my,
d = my;
3266 uint8_t tmp_array[136];
3267 uint8_t *
tmp = tmp_array;
3269 for (y = 0; y <
h + 1; y++) {
3270 for (x = 0; x < 8; x++)
3278 for (y = 0; y <
h; y++) {
3279 for (x = 0; x < 8; x++)
3280 dst[x] = (
c *
tmp[x] +
d *
tmp[x + 8] + 4) >> 3;
3288 ptrdiff_t sstride,
int h,
int mx,
int my)
3306 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3307 "li %[tmp0], 0x03 \n\t"
3308 "mtc1 %[tmp0], %[ftmp4] \n\t"
3309 "pshufh %[a], %[a], %[ftmp0] \n\t"
3310 "pshufh %[b], %[b], %[ftmp0] \n\t"
3315 "addiu %[h], %[h], -0x01 \n\t"
3316 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3317 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3318 "bnez %[h], 1b \n\t"
3319 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3320 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3321 [ftmp4]
"=&f"(ftmp[4]),
3322 [tmp0]
"=&r"(
tmp[0]),
3326 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3327 [
a]
"+&f"(
a.f), [
b]
"+&f"(
b.f)
3334 int a = 8 - mx,
b = mx;
3337 for (y = 0; y <
h; y++) {
3338 for (x = 0; x < 4; x++)
3339 dst[x] = (
a *
src[x] +
b *
src[x + 1] + 4) >> 3;
3347 ptrdiff_t sstride,
int h,
int mx,
int my)
3366 "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
3367 "li %[tmp0], 0x03 \n\t"
3368 "mtc1 %[tmp0], %[ftmp4] \n\t"
3369 "pshufh %[c], %[c], %[ftmp0] \n\t"
3370 "pshufh %[d], %[d], %[ftmp0] \n\t"
3375 "addiu %[h], %[h], -0x01 \n\t"
3376 PTR_ADDU "%[src], %[src], %[sstride] \n\t"
3377 PTR_ADDU "%[dst], %[dst], %[dstride] \n\t"
3378 "bnez %[h], 1b \n\t"
3379 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
3380 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
3381 [ftmp4]
"=&f"(ftmp[4]),
3382 [tmp0]
"=&r"(
tmp[0]),
3387 [dst]
"+&r"(dst), [
src]
"+&r"(
src),
3388 [
c]
"+&f"(
c.f), [
d]
"+&f"(
d.f)
3395 int c = 8 - my,
d = my;
3398 for (y = 0; y <
h; y++) {
3399 for (x = 0; x < 4; x++)
3400 dst[x] = (
c *
src[x] +
d *
src[x + sstride] + 4) >> 3;
3408 ptrdiff_t sstride,
int h,
int mx,
int my)
3412 uint8_t *
tmp = tmp_array;
3417 int a = 8 - mx,
b = mx;
3418 int c = 8 - my,
d = my;
3420 uint8_t tmp_array[36];
3421 uint8_t *
tmp = tmp_array;
3423 for (y = 0; y <
h + 1; y++) {
3424 for (x = 0; x < 4; x++)
3432 for (y = 0; y <
h; y++) {
3433 for (x = 0; x < 4; x++)
3434 dst[x] = (
c *
tmp[x] +
d *
tmp[x + 4] + 4) >> 3;
#define PUT_VP8_EPEL4_V6_MMI(src, src1, dst, srcstride)
static const uint8_t q1[256]
void ff_put_vp8_epel4_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define FILTER_4TAP(src, F, stride)
static av_always_inline void vp8_filter_common_isnot4tap(uint8_t *p, ptrdiff_t stride)
void ff_vp8_h_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
const union av_intfloat64 ff_pw_4
void ff_vp8_v_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
#define DECLARE_VAR_LOW32
void ff_put_vp8_pixels16_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
void ff_put_vp8_bilinear16_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
#define RESTRICT_ASM_DOUBLE_1
void ff_vp8_h_loop_filter_simple_mmi(uint8_t *dst, ptrdiff_t stride, int flim)
#define PUT_VP8_EPEL8_V4_MMI(src, src1, dst, srcstride)
void ff_vp8_luma_dc_wht_mmi(int16_t block[4][4][16], int16_t dc[16])
static av_always_inline int vp8_simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
const union av_intfloat64 ff_pw_64
#define PUT_VP8_BILINEAR4_H_MMI(src, dst)
#define TRANSPOSE_4H(fr_i0, fr_i1, fr_i2, fr_i3, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 4X4 half word packaged data.
void ff_put_vp8_bilinear16_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_bilinear8_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
static av_always_inline void vp8_filter_common_is4tap(uint8_t *p, ptrdiff_t stride)
void ff_put_vp8_epel8_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static double val(void *priv, double ch)
static const uint64_t fourtap_subpel_filters[7][6]
void ff_put_vp8_epel16_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel8_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static const uint16_t mask[17]
static av_always_inline void vp8_v_loop_filter8_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel4_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel16_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_BILINEAR4_V_MMI(src, src1, dst, sstride)
#define PUT_VP8_BILINEAR8_V_MMI(src, src1, dst, sstride)
void ff_vp8_h_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel8_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_idct_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
void ff_put_vp8_epel4_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static const uint8_t q0[256]
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
#define FILTER_6TAP(src, F, stride)
void ff_put_vp8_epel4_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_BILINEAR8_H_MMI(src, dst)
#define RESTRICT_ASM_UINT32_T
#define PUT_VP8_EPEL4_V4_MMI(src, src1, dst, srcstride)
void ff_put_vp8_bilinear4_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel8_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel8_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define MMI_VP8_LOOP_FILTER
void ff_put_vp8_epel4_h6v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static av_always_inline void vp8_v_loop_filter8_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel16_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_EPEL4_H4_MMI(src, dst)
void ff_vp8_v_loop_filter16_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define RESTRICT_ASM_DOUBLE_2
void ff_vp8_v_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static av_always_inline void vp8_h_loop_filter8_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
void ff_vp8_idct_dc_add4y_mmi(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
void ff_put_vp8_epel8_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define DECLARE_ALIGNED(n, t, v)
void ff_put_vp8_epel16_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear8_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_bilinear4_v_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_put_vp8_epel8_h6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_vp8_idct_dc_add_mmi(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
void ff_vp8_idct_dc_add4uv_mmi(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
#define DECLARE_VAR_ALL64
static av_always_inline int vp8_normal_limit(uint8_t *p, ptrdiff_t stride, int E, int I)
void ff_put_vp8_epel16_h4v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_epel4_v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear16_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
#define i(width, name, range_min, range_max)
void ff_put_vp8_bilinear8_h_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
void ff_vp8_v_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_v_loop_filter16_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_put_vp8_epel16_h4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_bilinear4_hv_mmi(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
#define PUT_VP8_EPEL8_V6_MMI(src, src1, dst, srcstride)
void ff_put_vp8_epel16_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
#define PUT_VP8_EPEL8_H4_MMI(src, dst)
void ff_put_vp8_epel8_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
static av_always_inline void vp8_h_loop_filter8_inner_mmi(uint8_t *dst, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_h_loop_filter8uv_inner_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
void ff_vp8_luma_dc_wht_dc_mmi(int16_t block[4][4][16], int16_t dc[16])
#define RESTRICT_ASM_LOW32
void ff_put_vp8_epel16_h4v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_pixels4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
#define TRANSPOSE_8B(fr_i0, fr_i1, fr_i2, fr_i3, fr_i4, fr_i5, fr_i6, fr_i7, fr_t0, fr_t1, fr_t2, fr_t3)
brief: Transpose 8x8 byte packaged data.
#define PUT_VP8_EPEL4_H6_MMI(src, dst)
void ff_put_vp8_epel4_h6v4_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
void ff_put_vp8_epel4_v6_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my)
void ff_put_vp8_pixels8_mmi(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y)
The exact code depends on how similar the blocks are and how related they are to the block
#define PUT_VP8_EPEL8_H6_MMI(src, dst)
#define RESTRICT_ASM_ALL64
void ff_vp8_h_loop_filter8uv_mmi(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride, int flim_E, int flim_I, int hev_thresh)
static const uint8_t subpel_filters[7][6]