35 "ldc1 %[ftmp0], 0x00(%[block]) \n\t"
36 "ldc1 %[ftmp1], 0x08(%[block]) \n\t"
37 "ldc1 %[ftmp2], 0x10(%[block]) \n\t"
38 "ldc1 %[ftmp3], 0x18(%[block]) \n\t"
39 "ldc1 %[ftmp4], 0x20(%[block]) \n\t"
40 "ldc1 %[ftmp5], 0x28(%[block]) \n\t"
41 "ldc1 %[ftmp6], 0x30(%[block]) \n\t"
42 "ldc1 %[ftmp7], 0x38(%[block]) \n\t"
43 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
44 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
45 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
46 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
47 "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
48 "sdc1 %[ftmp0], 0x00(%[pixels]) \n\t"
49 "sdc1 %[ftmp2], 0x00(%[addr0]) \n\t"
50 "gssdxc1 %[ftmp4], 0x00(%[addr0], %[line_size]) \n\t"
51 "gssdxc1 %[ftmp6], 0x00(%[pixels], %[line_sizex3]) \n\t"
52 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
53 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
54 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
55 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
56 [addr0]
"=&r"(addr[0]),
58 : [line_size]
"r"((
mips_reg)line_size),
59 [line_sizex3]
"r"((
mips_reg)(line_size*3)),
64 pixels += line_size*4;
68 "ldc1 %[ftmp0], 0x00(%[block]) \n\t"
69 "ldc1 %[ftmp1], 0x08(%[block]) \n\t"
70 "ldc1 %[ftmp2], 0x10(%[block]) \n\t"
71 "ldc1 %[ftmp3], 0x18(%[block]) \n\t"
72 "ldc1 %[ftmp4], 0x20(%[block]) \n\t"
73 "ldc1 %[ftmp5], 0x28(%[block]) \n\t"
74 "ldc1 %[ftmp6], 0x30(%[block]) \n\t"
75 "ldc1 %[ftmp7], 0x38(%[block]) \n\t"
76 PTR_ADDU "%[addr0], %[pixels], %[line_size] \n\t"
77 "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
78 "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
79 "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
80 "packushb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
81 "sdc1 %[ftmp0], 0x00(%[pixels]) \n\t"
82 "sdc1 %[ftmp2], 0x00(%[addr0]) \n\t"
83 "gssdxc1 %[ftmp4], 0x00(%[addr0], %[line_size]) \n\t"
84 "gssdxc1 %[ftmp6], 0x00(%[pixels], %[line_sizex3]) \n\t"
85 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
86 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
87 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
88 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
89 [addr0]
"=&r"(addr[0]),
91 : [line_size]
"r"((
mips_reg)line_size),
92 [line_sizex3]
"r"((
mips_reg)(line_size*3)),
101 int64_t line_skip = line_size;
102 int64_t line_skip3 = 0;
107 PTR_ADDU "%[line_skip3], %[line_skip], %[line_skip] \n\t"
108 "ldc1 %[ftmp1], 0x00(%[block]) \n\t"
109 "ldc1 %[ftmp0], 0x08(%[block]) \n\t"
110 "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
111 "ldc1 %[ftmp2], 0x10(%[block]) \n\t"
112 "ldc1 %[ftmp0], 0x18(%[block]) \n\t"
113 "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
114 "ldc1 %[ftmp3], 0x20(%[block]) \n\t"
115 "ldc1 %[ftmp0], 0x28(%[block]) \n\t"
116 "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
117 "ldc1 %[ftmp4], 48(%[block]) \n\t"
118 "ldc1 %[ftmp0], 56(%[block]) \n\t"
119 "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
120 "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t"
121 "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t"
122 "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
123 "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
124 "sdc1 %[ftmp1], 0x00(%[pixels]) \n\t"
125 "gssdxc1 %[ftmp2], 0x00(%[pixels], %[line_skip]) \n\t"
126 "gssdxc1 %[ftmp3], 0x00(%[pixels], %[line_skip3]) \n\t"
127 PTR_ADDU "%[line_skip3], %[line_skip3], %[line_skip] \n\t"
128 "gssdxc1 %[ftmp4], 0x00(%[pixels], %[line_skip3]) \n\t"
129 PTR_ADDU "%[addr0], %[line_skip3], %[line_skip] \n\t"
130 PTR_ADDU "%[pixels], %[pixels], %[addr0] \n\t"
131 "ldc1 %[ftmp1], 0x40(%[block]) \n\t"
132 "ldc1 %[ftmp0], 0x48(%[block]) \n\t"
133 "packsshb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
134 "ldc1 %[ftmp2], 0x50(%[block]) \n\t"
135 "ldc1 %[ftmp0], 0x58(%[block]) \n\t"
136 "packsshb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
137 "ldc1 %[ftmp3], 0x60(%[block]) \n\t"
138 "ldc1 %[ftmp0], 0x68(%[block]) \n\t"
139 "packsshb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
140 "ldc1 %[ftmp4], 0x70(%[block]) \n\t"
141 "ldc1 %[ftmp0], 0x78(%[block]) \n\t"
142 "packsshb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
143 "paddb %[ftmp1], %[ftmp1], %[ff_pb_80] \n\t"
144 "paddb %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t"
145 "paddb %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t"
146 "paddb %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t"
147 "sdc1 %[ftmp1], 0x00(%[pixels]) \n\t"
148 "gssdxc1 %[ftmp2], 0x00(%[pixels], %[line_skip]) \n\t"
149 PTR_ADDU "%[addr0], %[line_skip], %[line_skip] \n\t"
150 "gssdxc1 %[ftmp3], 0x00(%[pixels], %[addr0]) \n\t"
151 "gssdxc1 %[ftmp4], 0x00(%[pixels], %[line_skip3]) \n\t"
152 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
153 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
154 [ftmp4]
"=&f"(ftmp[4]),
155 [addr0]
"=&r"(addr[0]),
158 [line_skip]
"r"((
mips_reg)line_skip),
171 "li %[tmp0], 0x04 \n\t"
172 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
174 "ldc1 %[ftmp1], 0x00(%[block]) \n\t"
175 "ldc1 %[ftmp2], 0x08(%[block]) \n\t"
176 "ldc1 %[ftmp3], 0x10(%[block]) \n\t"
177 "ldc1 %[ftmp4], 0x18(%[block]) \n\t"
178 "ldc1 %[ftmp5], 0x00(%[pixels]) \n\t"
179 "gsldxc1 %[ftmp6], 0x00(%[pixels], %[line_size]) \n\t"
180 "mov.d %[ftmp7], %[ftmp5] \n\t"
181 "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
182 "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
183 "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
184 "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
185 "mov.d %[ftmp7], %[ftmp6] \n\t"
186 "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
187 "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
188 "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
189 "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
190 "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
191 "packushb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
192 "sdc1 %[ftmp1], 0x00(%[pixels]) \n\t"
193 "gssdxc1 %[ftmp3], 0x00(%[pixels], %[line_size]) \n\t"
194 "addi %[tmp0], %[tmp0], -0x01 \n\t"
195 PTR_ADDIU "%[block], %[block], 0x20 \n\t"
196 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
197 PTR_ADDU "%[pixels], %[pixels], %[line_size] \n\t"
199 : [ftmp0]
"=&f"(ftmp[0]), [ftmp1]
"=&f"(ftmp[1]),
200 [ftmp2]
"=&f"(ftmp[2]), [ftmp3]
"=&f"(ftmp[3]),
201 [ftmp4]
"=&f"(ftmp[4]), [ftmp5]
"=&f"(ftmp[5]),
202 [ftmp6]
"=&f"(ftmp[6]), [ftmp7]
"=&f"(ftmp[7]),
205 : [line_size]
"r"((
mips_reg)line_size)
MIPS assembly defines from sys/asm.h but rewritten for use with C inline assembly (rather than from w...
void ff_put_signed_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
void ff_put_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
void ff_add_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)