FFmpeg
mpegvideo_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized mpegvideo
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "mpegvideo_mips.h"
27 
29  int n, int qscale)
30 {
31  int64_t level, nCoeffs;
32  double ftmp[6];
33  mips_reg addr[1];
34  union mmi_intfloat64 qmul_u, qadd_u;
36 
37  qmul_u.i = qscale << 1;
38  av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
39 
40  if (!s->h263_aic) {
41  if (n<4)
42  level = block[0] * s->y_dc_scale;
43  else
44  level = block[0] * s->c_dc_scale;
45  qadd_u.i = (qscale-1) | 1;
46  } else {
47  qadd_u.i = 0;
48  level = block[0];
49  }
50 
51  if(s->ac_pred)
52  nCoeffs = 63;
53  else
54  nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
55 
56  __asm__ volatile (
57  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
58  "packsswh %[qmul], %[qmul], %[qmul] \n\t"
59  "packsswh %[qmul], %[qmul], %[qmul] \n\t"
60  "packsswh %[qadd], %[qadd], %[qadd] \n\t"
61  "packsswh %[qadd], %[qadd], %[qadd] \n\t"
62  "psubh %[ftmp0], %[ftmp0], %[qadd] \n\t"
63  "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
64  ".p2align 4 \n\t"
65 
66  "1: \n\t"
67  PTR_ADDU "%[addr0], %[block], %[nCoeffs] \n\t"
68  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
69  MMI_LDC1(%[ftmp2], %[addr0], 0x08)
70  "mov.d %[ftmp3], %[ftmp1] \n\t"
71  "mov.d %[ftmp4], %[ftmp2] \n\t"
72  "pmullh %[ftmp1], %[ftmp1], %[qmul] \n\t"
73  "pmullh %[ftmp2], %[ftmp2], %[qmul] \n\t"
74  "pcmpgth %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
75  "pcmpgth %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
76  "pxor %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
77  "pxor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
78  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
79  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
80  "pxor %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
81  "pxor %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
82  "pcmpeqh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
83  "pcmpeqh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
84  "pandn %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
85  "pandn %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
86  PTR_ADDIU "%[nCoeffs], %[nCoeffs], 0x10 \n\t"
87  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
88  MMI_SDC1(%[ftmp2], %[addr0], 0x08)
89  "blez %[nCoeffs], 1b \n\t"
90  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
91  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
92  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
94  [addr0]"=&r"(addr[0])
95  : [block]"r"((mips_reg)(block+nCoeffs)),
96  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
97  [qmul]"f"(qmul_u.f), [qadd]"f"(qadd_u.f)
98  : "memory"
99  );
100 
101  block[0] = level;
102 }
103 
105  int n, int qscale)
106 {
107  int64_t nCoeffs;
108  double ftmp[6];
109  mips_reg addr[1];
110  union mmi_intfloat64 qmul_u, qadd_u;
112 
113  qmul_u.i = qscale << 1;
114  qadd_u.i = (qscale - 1) | 1;
115  av_assert2(s->block_last_index[n]>=0 || s->h263_aic);
116  nCoeffs = s->inter_scantable.raster_end[s->block_last_index[n]];
117 
118  __asm__ volatile (
119  "packsswh %[qmul], %[qmul], %[qmul] \n\t"
120  "packsswh %[qmul], %[qmul], %[qmul] \n\t"
121  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
122  "packsswh %[qadd], %[qadd], %[qadd] \n\t"
123  "packsswh %[qadd], %[qadd], %[qadd] \n\t"
124  "psubh %[ftmp0], %[ftmp0], %[qadd] \n\t"
125  "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
126  ".p2align 4 \n\t"
127  "1: \n\t"
128  PTR_ADDU "%[addr0], %[block], %[nCoeffs] \n\t"
129  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
130  MMI_LDC1(%[ftmp2], %[addr0], 0x08)
131  "mov.d %[ftmp3], %[ftmp1] \n\t"
132  "mov.d %[ftmp4], %[ftmp2] \n\t"
133  "pmullh %[ftmp1], %[ftmp1], %[qmul] \n\t"
134  "pmullh %[ftmp2], %[ftmp2], %[qmul] \n\t"
135  "pcmpgth %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
136  "pcmpgth %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
137  "pxor %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
138  "pxor %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
139  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
140  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
141  "pxor %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
142  "pxor %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
143  "pcmpeqh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
144  "pcmpeqh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
145  "pandn %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
146  "pandn %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
147  PTR_ADDIU "%[nCoeffs], %[nCoeffs], 0x10 \n\t"
148  MMI_SDC1(%[ftmp1], %[addr0], 0x00)
149  MMI_SDC1(%[ftmp2], %[addr0], 0x08)
150  "blez %[nCoeffs], 1b \n\t"
151  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
152  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
153  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
155  [addr0]"=&r"(addr[0])
156  : [block]"r"((mips_reg)(block+nCoeffs)),
157  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
158  [qmul]"f"(qmul_u.f), [qadd]"f"(qadd_u.f)
159  : "memory"
160  );
161 }
162 
164  int n, int qscale)
165 {
166  int64_t nCoeffs;
167  const uint16_t *quant_matrix;
168  int block0;
169  double ftmp[10];
170  uint64_t tmp[1];
171  mips_reg addr[1];
174 
175  av_assert2(s->block_last_index[n]>=0);
176  nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
177 
178  if (n<4)
179  block0 = block[0] * s->y_dc_scale;
180  else
181  block0 = block[0] * s->c_dc_scale;
182 
183  /* XXX: only mpeg1 */
184  quant_matrix = s->intra_matrix;
185 
186  __asm__ volatile (
187  "dli %[tmp0], 0x0f \n\t"
188  "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
189  "dmtc1 %[tmp0], %[ftmp4] \n\t"
190  "dmtc1 %[qscale], %[ftmp1] \n\t"
191  "psrlh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
192  "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
193  "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
194  "or %[addr0], %[nCoeffs], $0 \n\t"
195  ".p2align 4 \n\t"
196 
197  "1: \n\t"
198  MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x00)
199  MMI_LDXC1(%[ftmp3], %[addr0], %[block], 0x08)
200  "mov.d %[ftmp4], %[ftmp2] \n\t"
201  "mov.d %[ftmp5], %[ftmp3] \n\t"
202  MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x00)
203  MMI_LDXC1(%[ftmp7], %[addr0], %[quant], 0x08)
204  "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
205  "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
206  "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
207  "pxor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
208  "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
209  "pcmpgth %[ftmp9], %[ftmp9], %[ftmp3] \n\t"
210  "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
211  "pxor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
212  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
213  "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
214  "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
215  "pmullh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
216  "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
217  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
218  "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
219  "dli %[tmp0], 0x03 \n\t"
220  "pcmpeqh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
221  "dmtc1 %[tmp0], %[ftmp4] \n\t"
222  "psrah %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
223  "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
224  "psubh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
225  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
226  "por %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
227  "por %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
228  "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
229  "pxor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
230  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
231  "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
232  "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
233  "pandn %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
234  MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x00)
235  MMI_SDXC1(%[ftmp7], %[addr0], %[block], 0x08)
236  PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t"
237  "bltz %[addr0], 1b \n\t"
238  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
239  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
240  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
241  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
242  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
243  [tmp0]"=&r"(tmp[0]),
246  [addr0]"=&r"(addr[0])
247  : [block]"r"((mips_reg)(block+nCoeffs)),
248  [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
249  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
250  [qscale]"r"(qscale)
251  : "memory"
252  );
253 
254  block[0] = block0;
255 }
256 
258  int n, int qscale)
259 {
260  int64_t nCoeffs;
261  const uint16_t *quant_matrix;
262  double ftmp[10];
263  uint64_t tmp[1];
264  mips_reg addr[1];
267 
268  av_assert2(s->block_last_index[n] >= 0);
269  nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]] + 1;
270  quant_matrix = s->inter_matrix;
271 
272  __asm__ volatile (
273  "dli %[tmp0], 0x0f \n\t"
274  "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
275  "dmtc1 %[tmp0], %[ftmp4] \n\t"
276  "dmtc1 %[qscale], %[ftmp1] \n\t"
277  "psrlh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
278  "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
279  "packsswh %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
280  "or %[addr0], %[nCoeffs], $0 \n\t"
281  ".p2align 4 \n\t"
282 
283  "1: \n\t"
284  MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x00)
285  MMI_LDXC1(%[ftmp3], %[addr0], %[block], 0x08)
286  "mov.d %[ftmp4], %[ftmp2] \n\t"
287  "mov.d %[ftmp5], %[ftmp3] \n\t"
288  MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x00)
289  MMI_LDXC1(%[ftmp7], %[addr0], %[quant], 0x08)
290  "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
291  "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
292  "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
293  "pxor %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
294  "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
295  "pcmpgth %[ftmp9], %[ftmp9], %[ftmp3] \n\t"
296  "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
297  "pxor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
298  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
299  "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
300  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
301  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
302  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
303  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
304  "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
305  "pmullh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
306  "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
307  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
308  "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
309  "dli %[tmp0], 0x04 \n\t"
310  "pcmpeqh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
311  "dmtc1 %[tmp0], %[ftmp4] \n\t"
312  "psrah %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
313  "psrah %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
314  "psubh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
315  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
316  "por %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
317  "por %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
318  "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
319  "pxor %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
320  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
321  "psubh %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
322  "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
323  "pandn %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
324  MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x00)
325  MMI_SDXC1(%[ftmp7], %[addr0], %[block], 0x08)
326  PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t"
327  "bltz %[addr0], 1b \n\t"
328  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
329  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
330  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
331  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
332  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
333  [tmp0]"=&r"(tmp[0]),
336  [addr0]"=&r"(addr[0])
337  : [block]"r"((mips_reg)(block+nCoeffs)),
338  [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
339  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
340  [qscale]"r"(qscale)
341  : "memory"
342  );
343 }
344 
346  int n, int qscale)
347 {
348  uint64_t nCoeffs;
349  const uint16_t *quant_matrix;
350  int block0;
351  double ftmp[10];
352  uint64_t tmp[1];
353  mips_reg addr[1];
356 
357  assert(s->block_last_index[n]>=0);
358 
359  if (s->alternate_scan)
360  nCoeffs = 63;
361  else
362  nCoeffs = s->intra_scantable.raster_end[s->block_last_index[n]];
363 
364  if (n < 4)
365  block0 = block[0] * s->y_dc_scale;
366  else
367  block0 = block[0] * s->c_dc_scale;
368 
369  quant_matrix = s->intra_matrix;
370 
371  __asm__ volatile (
372  "dli %[tmp0], 0x0f \n\t"
373  "pcmpeqh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
374  "mtc1 %[tmp0], %[ftmp3] \n\t"
375  "mtc1 %[qscale], %[ftmp9] \n\t"
376  "psrlh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
377  "packsswh %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
378  "packsswh %[ftmp9], %[ftmp9], %[ftmp9] \n\t"
379  "or %[addr0], %[nCoeffs], $0 \n\t"
380  ".p2align 4 \n\t"
381 
382  "1: \n\t"
383  MMI_LDXC1(%[ftmp1], %[addr0], %[block], 0x00)
384  MMI_LDXC1(%[ftmp2], %[addr0], %[block], 0x08)
385  "mov.d %[ftmp3], %[ftmp1] \n\t"
386  "mov.d %[ftmp4], %[ftmp2] \n\t"
387  MMI_LDXC1(%[ftmp5], %[addr0], %[quant], 0x00)
388  MMI_LDXC1(%[ftmp6], %[addr0], %[quant], 0x08)
389  "pmullh %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
390  "pmullh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
391  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
392  "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
393  "pcmpgth %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
394  "pcmpgth %[ftmp8], %[ftmp8], %[ftmp2] \n\t"
395  "pxor %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
396  "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
397  "psubh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
398  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
399  "pmullh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
400  "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
401  "pxor %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
402  "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
403  "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
404  "dli %[tmp0], 0x03 \n\t"
405  "pcmpeqh %[ftmp6] , %[ftmp6], %[ftmp4] \n\t"
406  "mtc1 %[tmp0], %[ftmp3] \n\t"
407  "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
408  "psrah %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
409  "pxor %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
410  "pxor %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
411  "psubh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
412  "psubh %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
413  "pandn %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
414  "pandn %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
415  MMI_SDXC1(%[ftmp5], %[addr0], %[block], 0x00)
416  MMI_SDXC1(%[ftmp6], %[addr0], %[block], 0x08)
417  PTR_ADDIU "%[addr0], %[addr0], 0x10 \n\t"
418  "blez %[addr0], 1b \n\t"
419  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
420  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
421  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
422  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
423  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
424  [tmp0]"=&r"(tmp[0]),
427  [addr0]"=&r"(addr[0])
428  : [block]"r"((mips_reg)(block+nCoeffs)),
429  [quant]"r"((mips_reg)(quant_matrix+nCoeffs)),
430  [nCoeffs]"r"((mips_reg)(2*(-nCoeffs))),
431  [qscale]"r"(qscale)
432  : "memory"
433  );
434 
435  block[0]= block0;
436 }
437 
439 {
440  const int intra = s->mb_intra;
441  int *sum = s->dct_error_sum[intra];
442  uint16_t *offset = s->dct_offset[intra];
443  double ftmp[8];
444  mips_reg addr[1];
446 
447  s->dct_count[intra]++;
448 
449  __asm__ volatile(
450  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
451  "1: \n\t"
452  MMI_LDC1(%[ftmp1], %[block], 0x00)
453  "pxor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
454  MMI_LDC1(%[ftmp3], %[block], 0x08)
455  "pxor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
456  "pcmpgth %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
457  "pcmpgth %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
458  "pxor %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
459  "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
460  "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
461  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
462  MMI_LDC1(%[ftmp6], %[offset], 0x00)
463  "mov.d %[ftmp5], %[ftmp1] \n\t"
464  "psubush %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
465  MMI_LDC1(%[ftmp6], %[offset], 0x08)
466  "mov.d %[ftmp7], %[ftmp3] \n\t"
467  "psubush %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
468  "pxor %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
469  "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
470  "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
471  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
472  MMI_SDC1(%[ftmp1], %[block], 0x00)
473  MMI_SDC1(%[ftmp3], %[block], 0x08)
474  "mov.d %[ftmp1], %[ftmp5] \n\t"
475  "mov.d %[ftmp3], %[ftmp7] \n\t"
476  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
477  "punpckhhw %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
478  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
479  "punpckhhw %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
480  MMI_LDC1(%[ftmp2], %[sum], 0x00)
481  "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
482  MMI_LDC1(%[ftmp2], %[sum], 0x08)
483  "paddw %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
484  MMI_LDC1(%[ftmp2], %[sum], 0x10)
485  "paddw %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
486  MMI_LDC1(%[ftmp2], %[sum], 0x18)
487  "paddw %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
488  MMI_SDC1(%[ftmp5], %[sum], 0x00)
489  MMI_SDC1(%[ftmp1], %[sum], 0x08)
490  MMI_SDC1(%[ftmp7], %[sum], 0x10)
491  MMI_SDC1(%[ftmp3], %[sum], 0x18)
492  PTR_ADDIU "%[block], %[block], 0x10 \n\t"
493  PTR_ADDIU "%[sum], %[sum], 0x20 \n\t"
494  PTR_SUBU "%[addr0], %[block1], %[block] \n\t"
495  PTR_ADDIU "%[offset], %[offset], 0x10 \n\t"
496  "bgtz %[addr0], 1b \n\t"
497  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
498  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
499  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
500  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
502  [addr0]"=&r"(addr[0]),
503  [block]"+&r"(block), [sum]"+&r"(sum),
504  [offset]"+&r"(offset)
505  : [block1]"r"(block+64)
506  : "memory"
507  );
508 }
level
uint8_t level
Definition: svq3.c:204
int64_t
long long int64_t
Definition: coverity.c:34
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
mips_reg
#define mips_reg
Definition: asmdefs.h:46
ff_dct_unquantize_mpeg1_intra_mmi
void ff_dct_unquantize_mpeg1_intra_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo_mmi.c:163
mmiutils.h
quant
static const uint8_t quant[64]
Definition: vmixdec.c:71
s
#define s(width, name)
Definition: cbs_vp9.c:198
ff_dct_unquantize_mpeg2_intra_mmi
void ff_dct_unquantize_mpeg2_intra_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo_mmi.c:345
mmi_intfloat64::f
double f
Definition: asmdefs.h:105
ff_dct_unquantize_h263_intra_mmi
void ff_dct_unquantize_h263_intra_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo_mmi.c:28
block1
static int16_t block1[64]
Definition: dct.c:120
ff_dct_unquantize_h263_inter_mmi
void ff_dct_unquantize_h263_inter_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo_mmi.c:104
ff_denoise_dct_mmi
void ff_denoise_dct_mmi(MpegEncContext *s, int16_t *block)
Definition: mpegvideo_mmi.c:438
PTR_SUBU
#define PTR_SUBU
Definition: asmdefs.h:52
DECLARE_VAR_ALL64
#define DECLARE_VAR_ALL64
Definition: mmiutils.h:39
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
av_assert2
#define av_assert2(cond)
assert() equivalent, that does lie in speed critical code.
Definition: avassert.h:67
mpegvideo_mips.h
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:49
DECLARE_VAR_ADDRT
#define DECLARE_VAR_ADDRT
Definition: mmiutils.h:41
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:50
mmi_intfloat64
Definition: asmdefs.h:103
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
mmi_intfloat64::i
int64_t i
Definition: asmdefs.h:104
RESTRICT_ASM_ADDRT
#define RESTRICT_ASM_ADDRT
Definition: mmiutils.h:42
RESTRICT_ASM_ALL64
#define RESTRICT_ASM_ALL64
Definition: mmiutils.h:40
MpegEncContext
MpegEncContext.
Definition: mpegvideo.h:67
ff_dct_unquantize_mpeg1_inter_mmi
void ff_dct_unquantize_mpeg1_inter_mmi(MpegEncContext *s, int16_t *block, int n, int qscale)
Definition: mpegvideo_mmi.c:257