FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
idctdsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized idctdsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "idctdsp_mips.h"
25 #include "constants.h"
26 #include "libavutil/mips/asmdefs.h"
27 
28 void ff_put_pixels_clamped_mmi(const int16_t *block,
29  uint8_t *av_restrict pixels, ptrdiff_t line_size)
30 {
31  double ftmp[8];
32  mips_reg addr[1];
33 
34  __asm__ volatile (
35  "ldc1 %[ftmp0], 0x00(%[block]) \n\t"
36  "ldc1 %[ftmp1], 0x08(%[block]) \n\t"
37  "ldc1 %[ftmp2], 0x10(%[block]) \n\t"
38  "ldc1 %[ftmp3], 0x18(%[block]) \n\t"
39  "ldc1 %[ftmp4], 0x20(%[block]) \n\t"
40  "ldc1 %[ftmp5], 0x28(%[block]) \n\t"
41  "ldc1 %[ftmp6], 0x30(%[block]) \n\t"
42  "ldc1 %[ftmp7], 0x38(%[block]) \n\t"
43  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
44  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
45  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
46  "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
47  "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
48  "sdc1 %[ftmp0], 0x00(%[pixels]) \n\t"
49  "sdc1 %[ftmp2], 0x00(%[addr0]) \n\t"
50  "gssdxc1 %[ftmp4], 0x00(%[addr0], %[line_size]) \n\t"
51  "gssdxc1 %[ftmp6], 0x00(%[pixels], %[line_sizex3]) \n\t"
52  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
53  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
54  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
55  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
56  [addr0]"=&r"(addr[0]),
57  [pixels]"+&r"(pixels)
58  : [line_size]"r"((mips_reg)line_size),
59  [line_sizex3]"r"((mips_reg)(line_size*3)),
60  [block]"r"(block)
61  : "memory"
62  );
63 
64  pixels += line_size*4;
65  block += 32;
66 
67  __asm__ volatile (
68  "ldc1 %[ftmp0], 0x00(%[block]) \n\t"
69  "ldc1 %[ftmp1], 0x08(%[block]) \n\t"
70  "ldc1 %[ftmp2], 0x10(%[block]) \n\t"
71  "ldc1 %[ftmp3], 0x18(%[block]) \n\t"
72  "ldc1 %[ftmp4], 0x20(%[block]) \n\t"
73  "ldc1 %[ftmp5], 0x28(%[block]) \n\t"
74  "ldc1 %[ftmp6], 0x30(%[block]) \n\t"
75  "ldc1 %[ftmp7], 0x38(%[block]) \n\t"
76  PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
77  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
78  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
79  "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
80  "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
81  "sdc1 %[ftmp0], 0x00(%[pixels]) \n\t"
82  "sdc1 %[ftmp2], 0x00(%[addr0]) \n\t"
83  "gssdxc1 %[ftmp4], 0x00(%[addr0], %[line_size]) \n\t"
84  "gssdxc1 %[ftmp6], 0x00(%[pixels], %[line_sizex3]) \n\t"
85  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
86  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
87  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
88  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
89  [addr0]"=&r"(addr[0]),
90  [pixels]"+&r"(pixels)
91  : [line_size]"r"((mips_reg)line_size),
92  [line_sizex3]"r"((mips_reg)(line_size*3)),
93  [block]"r"(block)
94  : "memory"
95  );
96 }
97 
99  uint8_t *av_restrict pixels, ptrdiff_t line_size)
100 {
101  int64_t line_skip = line_size;
102  int64_t line_skip3 = 0;
103  double ftmp[5];
104  mips_reg addr[1];
105 
106  __asm__ volatile (
107  PTR_ADDU "%[line_skip3], %[line_skip], %[line_skip] \n\t"
108  "ldc1 %[ftmp1], 0x00(%[block]) \n\t"
109  "ldc1 %[ftmp0], 0x08(%[block]) \n\t"
110  "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
111  "ldc1 %[ftmp2], 0x10(%[block]) \n\t"
112  "ldc1 %[ftmp0], 0x18(%[block]) \n\t"
113  "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
114  "ldc1 %[ftmp3], 0x20(%[block]) \n\t"
115  "ldc1 %[ftmp0], 0x28(%[block]) \n\t"
116  "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
117  "ldc1 %[ftmp4], 48(%[block]) \n\t"
118  "ldc1 %[ftmp0], 56(%[block]) \n\t"
119  "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
120  "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t"
121  "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t"
122  "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
123  "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
124  "sdc1 %[ftmp1], 0x00(%[pixels]) \n\t"
125  "gssdxc1 %[ftmp2], 0x00(%[pixels], %[line_skip]) \n\t"
126  "gssdxc1 %[ftmp3], 0x00(%[pixels], %[line_skip3]) \n\t"
127  PTR_ADDU "%[line_skip3], %[line_skip3], %[line_skip] \n\t"
128  "gssdxc1 %[ftmp4], 0x00(%[pixels], %[line_skip3]) \n\t"
129  PTR_ADDU "%[addr0], %[line_skip3], %[line_skip] \n\t"
130  PTR_ADDU "%[pixels], %[pixels], %[addr0] \n\t"
131  "ldc1 %[ftmp1], 0x40(%[block]) \n\t"
132  "ldc1 %[ftmp0], 0x48(%[block]) \n\t"
133  "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
134  "ldc1 %[ftmp2], 0x50(%[block]) \n\t"
135  "ldc1 %[ftmp0], 0x58(%[block]) \n\t"
136  "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
137  "ldc1 %[ftmp3], 0x60(%[block]) \n\t"
138  "ldc1 %[ftmp0], 0x68(%[block]) \n\t"
139  "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
140  "ldc1 %[ftmp4], 0x70(%[block]) \n\t"
141  "ldc1 %[ftmp0], 0x78(%[block]) \n\t"
142  "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
143  "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t"
144  "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t"
145  "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
146  "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
147  "sdc1 %[ftmp1], 0x00(%[pixels]) \n\t"
148  "gssdxc1 %[ftmp2], 0x00(%[pixels], %[line_skip]) \n\t"
149  PTR_ADDU "%[addr0], %[line_skip], %[line_skip] \n\t"
150  "gssdxc1 %[ftmp3], 0x00(%[pixels], %[addr0]) \n\t"
151  "gssdxc1 %[ftmp4], 0x00(%[pixels], %[line_skip3]) \n\t"
152  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
153  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
154  [ftmp4]"=&f"(ftmp[4]),
155  [addr0]"=&r"(addr[0]),
156  [pixels]"+&r"(pixels), [line_skip3]"+&r"(line_skip3)
157  : [block]"r"(block),
158  [line_skip]"r"((mips_reg)line_skip),
159  [ff_pb_80]"f"(ff_pb_80)
160  : "memory"
161  );
162 }
163 
164 void ff_add_pixels_clamped_mmi(const int16_t *block,
165  uint8_t *av_restrict pixels, ptrdiff_t line_size)
166 {
167  double ftmp[8];
168  uint64_t tmp[1];
169 
170  __asm__ volatile (
171  "li %[tmp0], 0x04 \n\t"
172  "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
173  "1: \n\t"
174  "ldc1 %[ftmp1], 0x00(%[block]) \n\t"
175  "ldc1 %[ftmp2], 0x08(%[block]) \n\t"
176  "ldc1 %[ftmp3], 0x10(%[block]) \n\t"
177  "ldc1 %[ftmp4], 0x18(%[block]) \n\t"
178  "ldc1 %[ftmp5], 0x00(%[pixels]) \n\t"
179  "gsldxc1 %[ftmp6], 0x00(%[pixels], %[line_size]) \n\t"
180  "mov.d %[ftmp7], %[ftmp5] \n\t"
181  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
182  "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
183  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
184  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
185  "mov.d %[ftmp7], %[ftmp6] \n\t"
186  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
187  "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
188  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
189  "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
190  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
191  "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
192  "sdc1 %[ftmp1], 0x00(%[pixels]) \n\t"
193  "gssdxc1 %[ftmp3], 0x00(%[pixels], %[line_size]) \n\t"
194  "addi %[tmp0], %[tmp0], -0x01 \n\t"
195  PTR_ADDIU "%[block], %[block], 0x20 \n\t"
196  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
197  PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
198  "bnez %[tmp0], 1b"
199  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
200  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
201  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
202  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
203  [tmp0]"=&r"(tmp[0]),
204  [pixels]"+&r"(pixels), [block]"+&r"(block)
205  : [line_size]"r"((mips_reg)line_size)
206  : "memory"
207  );
208 }
#define mips_reg
Definition: asmdefs.h:44
MIPS assembly defines from sys/asm.h but rewritten for use with C inline assembly (rather than from w...
static int16_t block[64]
Definition: dct.c:113
const uint64_t ff_pb_80
Definition: constants.c:54
uint8_t
void ff_put_signed_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_mmi.c:98
#define PTR_ADDIU
Definition: asmdefs.h:48
void ff_put_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_mmi.c:28
void ff_add_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
Definition: idctdsp_mmi.c:164
int pixels
Definition: avisynth_c.h:298
static uint8_t tmp[8]
Definition: des.c:38
#define PTR_ADDU
Definition: asmdefs.h:47