FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
idctdsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized idctdsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "idctdsp_mips.h"
25 #include "constants.h"
26 
27 void ff_put_pixels_clamped_mmi(const int16_t *block,
28  uint8_t *av_restrict pixels, ptrdiff_t line_size)
29 {
30  const int16_t *p;
31  uint8_t *pix;
32 
33  p = block;
34  pix = pixels;
35 
36  __asm__ volatile (
37  "ldc1 $f0, 0+%3 \r\n"
38  "ldc1 $f2, 8+%3 \r\n"
39  "ldc1 $f4, 16+%3 \r\n"
40  "ldc1 $f6, 24+%3 \r\n"
41  "ldc1 $f8, 32+%3 \r\n"
42  "ldc1 $f10, 40+%3 \r\n"
43  "ldc1 $f12, 48+%3 \r\n"
44  "ldc1 $f14, 56+%3 \r\n"
45  "dadd $10, %0, %1 \r\n"
46  "packushb $f0, $f0, $f2 \r\n"
47  "packushb $f4, $f4, $f6 \r\n"
48  "packushb $f8, $f8, $f10 \r\n"
49  "packushb $f12, $f12, $f14 \r\n"
50  "sdc1 $f0, 0(%0) \r\n"
51  "sdc1 $f4, 0($10) \r\n"
52  "gssdxc1 $f8, 0($10, %1) \r\n"
53  "gssdxc1 $f12, 0(%0, %2) \r\n"
54  ::"r"(pix),"r"((int)line_size),
55  "r"((int)line_size*3),"m"(*p)
56  : "$10","memory"
57  );
58 
59  pix += line_size*4;
60  p += 32;
61 
62  __asm__ volatile (
63  "ldc1 $f0, 0+%3 \r\n"
64  "ldc1 $f2, 8+%3 \r\n"
65  "ldc1 $f4, 16+%3 \r\n"
66  "ldc1 $f6, 24+%3 \r\n"
67  "ldc1 $f8, 32+%3 \r\n"
68  "ldc1 $f10, 40+%3 \r\n"
69  "ldc1 $f12, 48+%3 \r\n"
70  "ldc1 $f14, 56+%3 \r\n"
71  "dadd $10, %0, %1 \r\n"
72  "packushb $f0, $f0, $f2 \r\n"
73  "packushb $f4, $f4, $f6 \r\n"
74  "packushb $f8, $f8, $f10 \r\n"
75  "packushb $f12, $f12, $f14 \r\n"
76  "sdc1 $f0, 0(%0) \r\n"
77  "sdc1 $f4, 0($10) \r\n"
78  "gssdxc1 $f8, 0($10, %1) \r\n"
79  "gssdxc1 $f12, 0(%0, %2) \r\n"
80  ::"r"(pix),"r"((int)line_size),
81  "r"((int)line_size*3),"m"(*p)
82  : "$10","memory"
83  );
84 }
85 
87  uint8_t *av_restrict pixels, ptrdiff_t line_size)
88 {
89  int64_t line_skip = line_size;
90  int64_t line_skip3;
91 
92  __asm__ volatile (
93  "dmtc1 %4, $f0 \n\t"
94  "daddu %1, %3, %3 \n\t"
95  "ldc1 $f2, 0(%2) \n\t"
96  "ldc1 $f10, 8(%2) \n\t"
97  "packsshb $f2, $f2, $f10 \n\t"
98  "ldc1 $f4, 16(%2) \n\t"
99  "ldc1 $f10, 24(%2) \n\t"
100  "packsshb $f4, $f4, $f10 \n\t"
101  "ldc1 $f6, 32(%2) \n\t"
102  "ldc1 $f10, 40(%2) \n\t"
103  "packsshb $f6, $f6, $f10 \n\t"
104  "ldc1 $f8, 48(%2) \n\t"
105  "ldc1 $f10, 56(%2) \n\t"
106  "packsshb $f8, $f8, $f10 \n\t"
107  "paddb $f2, $f2, $f0 \n\t"
108  "paddb $f4, $f4, $f0 \n\t"
109  "paddb $f6, $f6, $f0 \n\t"
110  "paddb $f8, $f8, $f0 \n\t"
111  "sdc1 $f2, 0(%0) \n\t"
112  "gssdxc1 $f4, 0(%0, %3) \n\t"
113  "gssdxc1 $f6, 0(%0, %1) \n\t"
114  "daddu %1, %1, %3 \n\t"
115  "gssdxc1 $f8, 0(%0, %1) \n\t"
116  "daddu $10, %1, %3 \n\t"
117  "daddu %0, %0, $10 \n\t"
118  "ldc1 $f2, 64(%2) \n\t"
119  "ldc1 $f10, 8+64(%2) \n\t"
120  "packsshb $f2, $f2, $f10 \n\t"
121  "ldc1 $f4, 16+64(%2) \n\t"
122  "ldc1 $f10, 24+64(%2) \n\t"
123  "packsshb $f4, $f4, $f10 \n\t"
124  "ldc1 $f6, 32+64(%2) \n\t"
125  "ldc1 $f10, 40+64(%2) \n\t"
126  "packsshb $f6, $f6, $f10 \n\t"
127  "ldc1 $f8, 48+64(%2) \n\t"
128  "ldc1 $f10, 56+64(%2) \n\t"
129  "packsshb $f8, $f8, $f10 \n\t"
130  "paddb $f2, $f2, $f0 \n\t"
131  "paddb $f4, $f4, $f0 \n\t"
132  "paddb $f6, $f6, $f0 \n\t"
133  "paddb $f8, $f8, $f0 \n\t"
134  "sdc1 $f2, 0(%0) \n\t"
135  "gssdxc1 $f4, 0(%0, %3) \n\t"
136  "daddu $10, %3, %3 \n\t"
137  "gssdxc1 $f6, 0(%0, $10) \n\t"
138  "gssdxc1 $f8, 0(%0, %1) \n\t"
139  : "+&r"(pixels),"=&r"(line_skip3)
140  : "r"(block),"r"(line_skip),"r"(ff_pb_80)
141  : "$10","memory"
142  );
143 }
144 
145 void ff_add_pixels_clamped_mmi(const int16_t *block,
146  uint8_t *av_restrict pixels, ptrdiff_t line_size)
147 {
148  const int16_t *p;
149  uint8_t *pix;
150  int i = 4;
151 
152  p = block;
153  pix = pixels;
154 
155  __asm__ volatile (
156  "xor $f14, $f14, $f14 \r\n"
157  ::
158  );
159 
160  do {
161  __asm__ volatile (
162  "ldc1 $f0, 0+%2 \r\n"
163  "ldc1 $f2, 8+%2 \r\n"
164  "ldc1 $f4, 16+%2 \r\n"
165  "ldc1 $f6, 24+%2 \r\n"
166  "ldc1 $f8, %0 \r\n"
167  "ldc1 $f12, %1 \r\n"
168  "mov.d $f10, $f8 \r\n"
169  "punpcklbh $f8, $f8, $f14 \r\n"
170  "punpckhbh $f10, $f10, $f14 \r\n"
171  "paddsh $f0, $f0, $f8 \r\n"
172  "paddsh $f2, $f2, $f10 \r\n"
173  "mov.d $f10, $f12 \r\n"
174  "punpcklbh $f12, $f12, $f14 \r\n"
175  "punpckhbh $f10, $f10, $f14 \r\n"
176  "paddsh $f4, $f4, $f12 \r\n"
177  "paddsh $f6, $f6, $f10 \r\n"
178  "packushb $f0, $f0, $f2 \r\n"
179  "packushb $f4, $f4, $f6 \r\n"
180  "sdc1 $f0, %0 \r\n"
181  "sdc1 $f4, %1 \r\n"
182  : "+m"(*pix),"+m"(*(pix+line_size))
183  : "m"(*p)
184  : "memory"
185  );
186 
187  pix += line_size*2;
188  p += 16;
189  } while (--i);
190 }
const uint64_t ff_pb_80
Definition: constants.c:53
uint8_t
void ff_put_signed_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_mmi.c:86
void ff_put_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_mmi.c:27
void ff_add_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_mmi.c:145
int pixels
Definition: avisynth_c.h:298
static int16_t block[64]
Definition: dct-test.c:110