FFmpeg
vp3dsp_idct_msa.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 gxw <guxiwei-hf@loongson.cn>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "vp3dsp_mips.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavcodec/rnd_avg.h"
25 
26 static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
27 {
28  v8i16 r0, r1, r2, r3, r4, r5, r6, r7, sign;
29  v4i32 r0_r, r0_l, r1_r, r1_l, r2_r, r2_l, r3_r, r3_l,
30  r4_r, r4_l, r5_r, r5_l, r6_r, r6_l, r7_r, r7_l;
31  v4i32 A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
32  v4i32 Ed, Gd, Add, Bdd, Fd, Hd;
33  v16u8 sign_l;
34  v16i8 d0, d1, d2, d3, d4, d5, d6, d7;
35  v4i32 c0, c1, c2, c3, c4, c5, c6, c7;
36  v4i32 f0, f1, f2, f3, f4, f5, f6, f7;
37  v4i32 sign_t;
38  v16i8 zero = {0};
39  v16i8 mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0};
40  v4i32 cnst64277w = {64277, 64277, 64277, 64277};
41  v4i32 cnst60547w = {60547, 60547, 60547, 60547};
42  v4i32 cnst54491w = {54491, 54491, 54491, 54491};
43  v4i32 cnst46341w = {46341, 46341, 46341, 46341};
44  v4i32 cnst36410w = {36410, 36410, 36410, 36410};
45  v4i32 cnst25080w = {25080, 25080, 25080, 25080};
46  v4i32 cnst12785w = {12785, 12785, 12785, 12785};
47  v4i32 cnst8w = {8, 8, 8, 8};
48  v4i32 cnst2048w = {2048, 2048, 2048, 2048};
49  v4i32 cnst128w = {128, 128, 128, 128};
50 
51  /* Extended input data */
52  LD_SH8(input, 8, r0, r1, r2, r3, r4, r5, r6, r7);
53  sign = __msa_clti_s_h(r0, 0);
54  r0_r = (v4i32) __msa_ilvr_h(sign, r0);
55  r0_l = (v4i32) __msa_ilvl_h(sign, r0);
56  sign = __msa_clti_s_h(r1, 0);
57  r1_r = (v4i32) __msa_ilvr_h(sign, r1);
58  r1_l = (v4i32) __msa_ilvl_h(sign, r1);
59  sign = __msa_clti_s_h(r2, 0);
60  r2_r = (v4i32) __msa_ilvr_h(sign, r2);
61  r2_l = (v4i32) __msa_ilvl_h(sign, r2);
62  sign = __msa_clti_s_h(r3, 0);
63  r3_r = (v4i32) __msa_ilvr_h(sign, r3);
64  r3_l = (v4i32) __msa_ilvl_h(sign, r3);
65  sign = __msa_clti_s_h(r4, 0);
66  r4_r = (v4i32) __msa_ilvr_h(sign, r4);
67  r4_l = (v4i32) __msa_ilvl_h(sign, r4);
68  sign = __msa_clti_s_h(r5, 0);
69  r5_r = (v4i32) __msa_ilvr_h(sign, r5);
70  r5_l = (v4i32) __msa_ilvl_h(sign, r5);
71  sign = __msa_clti_s_h(r6, 0);
72  r6_r = (v4i32) __msa_ilvr_h(sign, r6);
73  r6_l = (v4i32) __msa_ilvl_h(sign, r6);
74  sign = __msa_clti_s_h(r7, 0);
75  r7_r = (v4i32) __msa_ilvr_h(sign, r7);
76  r7_l = (v4i32) __msa_ilvl_h(sign, r7);
77 
78  /* Right part */
79  A = ((r1_r * cnst64277w) >> 16) + ((r7_r * cnst12785w) >> 16);
80  B = ((r1_r * cnst12785w) >> 16) - ((r7_r * cnst64277w) >> 16);
81  C = ((r3_r * cnst54491w) >> 16) + ((r5_r * cnst36410w) >> 16);
82  D = ((r5_r * cnst54491w) >> 16) - ((r3_r * cnst36410w) >> 16);
83  Ad = ((A - C) * cnst46341w) >> 16;
84  Bd = ((B - D) * cnst46341w) >> 16;
85  Cd = A + C;
86  Dd = B + D;
87  E = ((r0_r + r4_r) * cnst46341w) >> 16;
88  F = ((r0_r - r4_r) * cnst46341w) >> 16;
89  G = ((r2_r * cnst60547w) >> 16) + ((r6_r * cnst25080w) >> 16);
90  H = ((r2_r * cnst25080w) >> 16) - ((r6_r * cnst60547w) >> 16);
91  Ed = E - G;
92  Gd = E + G;
93  Add = F + Ad;
94  Bdd = Bd - H;
95  Fd = F - Ad;
96  Hd = Bd + H;
97  r0_r = Gd + Cd;
98  r7_r = Gd - Cd;
99  r1_r = Add + Hd;
100  r2_r = Add - Hd;
101  r3_r = Ed + Dd;
102  r4_r = Ed - Dd;
103  r5_r = Fd + Bdd;
104  r6_r = Fd - Bdd;
105 
106  /* Left part */
107  A = ((r1_l * cnst64277w) >> 16) + ((r7_l * cnst12785w) >> 16);
108  B = ((r1_l * cnst12785w) >> 16) - ((r7_l * cnst64277w) >> 16);
109  C = ((r3_l * cnst54491w) >> 16) + ((r5_l * cnst36410w) >> 16);
110  D = ((r5_l * cnst54491w) >> 16) - ((r3_l * cnst36410w) >> 16);
111  Ad = ((A - C) * cnst46341w) >> 16;
112  Bd = ((B - D) * cnst46341w) >> 16;
113  Cd = A + C;
114  Dd = B + D;
115  E = ((r0_l + r4_l) * cnst46341w) >> 16;
116  F = ((r0_l - r4_l) * cnst46341w) >> 16;
117  G = ((r2_l * cnst60547w) >> 16) + ((r6_l * cnst25080w) >> 16);
118  H = ((r2_l * cnst25080w) >> 16) - ((r6_l * cnst60547w) >> 16);
119  Ed = E - G;
120  Gd = E + G;
121  Add = F + Ad;
122  Bdd = Bd - H;
123  Fd = F - Ad;
124  Hd = Bd + H;
125  r0_l = Gd + Cd;
126  r7_l = Gd - Cd;
127  r1_l = Add + Hd;
128  r2_l = Add - Hd;
129  r3_l = Ed + Dd;
130  r4_l = Ed - Dd;
131  r5_l = Fd + Bdd;
132  r6_l = Fd - Bdd;
133 
134  /* Row 0 to 3 */
135  TRANSPOSE4x4_SW_SW(r0_r, r1_r, r2_r, r3_r,
136  r0_r, r1_r, r2_r, r3_r);
137  TRANSPOSE4x4_SW_SW(r0_l, r1_l, r2_l, r3_l,
138  r0_l, r1_l, r2_l, r3_l);
139  A = ((r1_r * cnst64277w) >> 16) + ((r3_l * cnst12785w) >> 16);
140  B = ((r1_r * cnst12785w) >> 16) - ((r3_l * cnst64277w) >> 16);
141  C = ((r3_r * cnst54491w) >> 16) + ((r1_l * cnst36410w) >> 16);
142  D = ((r1_l * cnst54491w) >> 16) - ((r3_r * cnst36410w) >> 16);
143  Ad = ((A - C) * cnst46341w) >> 16;
144  Bd = ((B - D) * cnst46341w) >> 16;
145  Cd = A + C;
146  Dd = B + D;
147  E = ((r0_r + r0_l) * cnst46341w) >> 16;
148  E += cnst8w;
149  F = ((r0_r - r0_l) * cnst46341w) >> 16;
150  F += cnst8w;
151  if (type == 1) { // HACK
152  E += cnst2048w;
153  F += cnst2048w;
154  }
155  G = ((r2_r * cnst60547w) >> 16) + ((r2_l * cnst25080w) >> 16);
156  H = ((r2_r * cnst25080w) >> 16) - ((r2_l * cnst60547w) >> 16);
157  Ed = E - G;
158  Gd = E + G;
159  Add = F + Ad;
160  Bdd = Bd - H;
161  Fd = F - Ad;
162  Hd = Bd + H;
163  A = (Gd + Cd) >> 4;
164  B = (Gd - Cd) >> 4;
165  C = (Add + Hd) >> 4;
166  D = (Add - Hd) >> 4;
167  E = (Ed + Dd) >> 4;
168  F = (Ed - Dd) >> 4;
169  G = (Fd + Bdd) >> 4;
170  H = (Fd - Bdd) >> 4;
171  if (type != 1) {
172  LD_SB8(dst, stride, d0, d1, d2, d3, d4, d5, d6, d7);
173  ILVR_B4_SW(zero, d0, zero, d1, zero, d2, zero, d3,
174  f0, f1, f2, f3);
175  ILVR_B4_SW(zero, d4, zero, d5, zero, d6, zero, d7,
176  f4, f5, f6, f7);
177  ILVR_H4_SW(zero, f0, zero, f1, zero, f2, zero, f3,
178  c0, c1, c2, c3);
179  ILVR_H4_SW(zero, f4, zero, f5, zero, f6, zero, f7,
180  c4, c5, c6, c7);
181  A += c0;
182  B += c7;
183  C += c1;
184  D += c2;
185  E += c3;
186  F += c4;
187  G += c5;
188  H += c6;
189  }
190  A = CLIP_SW_0_255(A);
191  B = CLIP_SW_0_255(B);
192  C = CLIP_SW_0_255(C);
193  D = CLIP_SW_0_255(D);
194  E = CLIP_SW_0_255(E);
195  F = CLIP_SW_0_255(F);
196  G = CLIP_SW_0_255(G);
197  H = CLIP_SW_0_255(H);
198  sign_l = __msa_or_v((v16u8)r1_r, (v16u8)r2_r);
199  sign_l = __msa_or_v(sign_l, (v16u8)r3_r);
200  sign_l = __msa_or_v(sign_l, (v16u8)r0_l);
201  sign_l = __msa_or_v(sign_l, (v16u8)r1_l);
202  sign_l = __msa_or_v(sign_l, (v16u8)r2_l);
203  sign_l = __msa_or_v(sign_l, (v16u8)r3_l);
204  sign_t = __msa_ceqi_w((v4i32)sign_l, 0);
205  Add = ((r0_r * cnst46341w) + (8 << 16)) >> 20;
206  if (type == 1) {
207  Bdd = Add + cnst128w;
208  Bdd = CLIP_SW_0_255(Bdd);
209  Ad = Bdd;
210  Bd = Bdd;
211  Cd = Bdd;
212  Dd = Bdd;
213  Ed = Bdd;
214  Fd = Bdd;
215  Gd = Bdd;
216  Hd = Bdd;
217  } else {
218  Ad = Add + c0;
219  Bd = Add + c1;
220  Cd = Add + c2;
221  Dd = Add + c3;
222  Ed = Add + c4;
223  Fd = Add + c5;
224  Gd = Add + c6;
225  Hd = Add + c7;
226  Ad = CLIP_SW_0_255(Ad);
227  Bd = CLIP_SW_0_255(Bd);
228  Cd = CLIP_SW_0_255(Cd);
229  Dd = CLIP_SW_0_255(Dd);
230  Ed = CLIP_SW_0_255(Ed);
231  Fd = CLIP_SW_0_255(Fd);
232  Gd = CLIP_SW_0_255(Gd);
233  Hd = CLIP_SW_0_255(Hd);
234  }
235  Ad = (v4i32)__msa_and_v((v16u8)Ad, (v16u8)sign_t);
236  Bd = (v4i32)__msa_and_v((v16u8)Bd, (v16u8)sign_t);
237  Cd = (v4i32)__msa_and_v((v16u8)Cd, (v16u8)sign_t);
238  Dd = (v4i32)__msa_and_v((v16u8)Dd, (v16u8)sign_t);
239  Ed = (v4i32)__msa_and_v((v16u8)Ed, (v16u8)sign_t);
240  Fd = (v4i32)__msa_and_v((v16u8)Fd, (v16u8)sign_t);
241  Gd = (v4i32)__msa_and_v((v16u8)Gd, (v16u8)sign_t);
242  Hd = (v4i32)__msa_and_v((v16u8)Hd, (v16u8)sign_t);
243  sign_t = __msa_ceqi_w(sign_t, 0);
244  A = (v4i32)__msa_and_v((v16u8)A, (v16u8)sign_t);
245  B = (v4i32)__msa_and_v((v16u8)B, (v16u8)sign_t);
246  C = (v4i32)__msa_and_v((v16u8)C, (v16u8)sign_t);
247  D = (v4i32)__msa_and_v((v16u8)D, (v16u8)sign_t);
248  E = (v4i32)__msa_and_v((v16u8)E, (v16u8)sign_t);
249  F = (v4i32)__msa_and_v((v16u8)F, (v16u8)sign_t);
250  G = (v4i32)__msa_and_v((v16u8)G, (v16u8)sign_t);
251  H = (v4i32)__msa_and_v((v16u8)H, (v16u8)sign_t);
252  r0_r = Ad + A;
253  r1_r = Bd + C;
254  r2_r = Cd + D;
255  r3_r = Dd + E;
256  r0_l = Ed + F;
257  r1_l = Fd + G;
258  r2_l = Gd + H;
259  r3_l = Hd + B;
260 
261  /* Row 4 to 7 */
262  TRANSPOSE4x4_SW_SW(r4_r, r5_r, r6_r, r7_r,
263  r4_r, r5_r, r6_r, r7_r);
264  TRANSPOSE4x4_SW_SW(r4_l, r5_l, r6_l, r7_l,
265  r4_l, r5_l, r6_l, r7_l);
266  A = ((r5_r * cnst64277w) >> 16) + ((r7_l * cnst12785w) >> 16);
267  B = ((r5_r * cnst12785w) >> 16) - ((r7_l * cnst64277w) >> 16);
268  C = ((r7_r * cnst54491w) >> 16) + ((r5_l * cnst36410w) >> 16);
269  D = ((r5_l * cnst54491w) >> 16) - ((r7_r * cnst36410w) >> 16);
270  Ad = ((A - C) * cnst46341w) >> 16;
271  Bd = ((B - D) * cnst46341w) >> 16;
272  Cd = A + C;
273  Dd = B + D;
274  E = ((r4_r + r4_l) * cnst46341w) >> 16;
275  E += cnst8w;
276  F = ((r4_r - r4_l) * cnst46341w) >> 16;
277  F += cnst8w;
278  if (type == 1) { // HACK
279  E += cnst2048w;
280  F += cnst2048w;
281  }
282  G = ((r6_r * cnst60547w) >> 16) + ((r6_l * cnst25080w) >> 16);
283  H = ((r6_r * cnst25080w) >> 16) - ((r6_l * cnst60547w) >> 16);
284  Ed = E - G;
285  Gd = E + G;
286  Add = F + Ad;
287  Bdd = Bd - H;
288  Fd = F - Ad;
289  Hd = Bd + H;
290  A = (Gd + Cd) >> 4;
291  B = (Gd - Cd) >> 4;
292  C = (Add + Hd) >> 4;
293  D = (Add - Hd) >> 4;
294  E = (Ed + Dd) >> 4;
295  F = (Ed - Dd) >> 4;
296  G = (Fd + Bdd) >> 4;
297  H = (Fd - Bdd) >> 4;
298  if (type != 1) {
299  ILVL_H4_SW(zero, f0, zero, f1, zero, f2, zero, f3,
300  c0, c1, c2, c3);
301  ILVL_H4_SW(zero, f4, zero, f5, zero, f6, zero, f7,
302  c4, c5, c6, c7);
303  A += c0;
304  B += c7;
305  C += c1;
306  D += c2;
307  E += c3;
308  F += c4;
309  G += c5;
310  H += c6;
311  }
312  A = CLIP_SW_0_255(A);
313  B = CLIP_SW_0_255(B);
314  C = CLIP_SW_0_255(C);
315  D = CLIP_SW_0_255(D);
316  E = CLIP_SW_0_255(E);
317  F = CLIP_SW_0_255(F);
318  G = CLIP_SW_0_255(G);
319  H = CLIP_SW_0_255(H);
320  sign_l = __msa_or_v((v16u8)r5_r, (v16u8)r6_r);
321  sign_l = __msa_or_v(sign_l, (v16u8)r7_r);
322  sign_l = __msa_or_v(sign_l, (v16u8)r4_l);
323  sign_l = __msa_or_v(sign_l, (v16u8)r5_l);
324  sign_l = __msa_or_v(sign_l, (v16u8)r6_l);
325  sign_l = __msa_or_v(sign_l, (v16u8)r7_l);
326  sign_t = __msa_ceqi_w((v4i32)sign_l, 0);
327  Add = ((r4_r * cnst46341w) + (8 << 16)) >> 20;
328  if (type == 1) {
329  Bdd = Add + cnst128w;
330  Bdd = CLIP_SW_0_255(Bdd);
331  Ad = Bdd;
332  Bd = Bdd;
333  Cd = Bdd;
334  Dd = Bdd;
335  Ed = Bdd;
336  Fd = Bdd;
337  Gd = Bdd;
338  Hd = Bdd;
339  } else {
340  Ad = Add + c0;
341  Bd = Add + c1;
342  Cd = Add + c2;
343  Dd = Add + c3;
344  Ed = Add + c4;
345  Fd = Add + c5;
346  Gd = Add + c6;
347  Hd = Add + c7;
348  Ad = CLIP_SW_0_255(Ad);
349  Bd = CLIP_SW_0_255(Bd);
350  Cd = CLIP_SW_0_255(Cd);
351  Dd = CLIP_SW_0_255(Dd);
352  Ed = CLIP_SW_0_255(Ed);
353  Fd = CLIP_SW_0_255(Fd);
354  Gd = CLIP_SW_0_255(Gd);
355  Hd = CLIP_SW_0_255(Hd);
356  }
357  Ad = (v4i32)__msa_and_v((v16u8)Ad, (v16u8)sign_t);
358  Bd = (v4i32)__msa_and_v((v16u8)Bd, (v16u8)sign_t);
359  Cd = (v4i32)__msa_and_v((v16u8)Cd, (v16u8)sign_t);
360  Dd = (v4i32)__msa_and_v((v16u8)Dd, (v16u8)sign_t);
361  Ed = (v4i32)__msa_and_v((v16u8)Ed, (v16u8)sign_t);
362  Fd = (v4i32)__msa_and_v((v16u8)Fd, (v16u8)sign_t);
363  Gd = (v4i32)__msa_and_v((v16u8)Gd, (v16u8)sign_t);
364  Hd = (v4i32)__msa_and_v((v16u8)Hd, (v16u8)sign_t);
365  sign_t = __msa_ceqi_w(sign_t, 0);
366  A = (v4i32)__msa_and_v((v16u8)A, (v16u8)sign_t);
367  B = (v4i32)__msa_and_v((v16u8)B, (v16u8)sign_t);
368  C = (v4i32)__msa_and_v((v16u8)C, (v16u8)sign_t);
369  D = (v4i32)__msa_and_v((v16u8)D, (v16u8)sign_t);
370  E = (v4i32)__msa_and_v((v16u8)E, (v16u8)sign_t);
371  F = (v4i32)__msa_and_v((v16u8)F, (v16u8)sign_t);
372  G = (v4i32)__msa_and_v((v16u8)G, (v16u8)sign_t);
373  H = (v4i32)__msa_and_v((v16u8)H, (v16u8)sign_t);
374  r4_r = Ad + A;
375  r5_r = Bd + C;
376  r6_r = Cd + D;
377  r7_r = Dd + E;
378  r4_l = Ed + F;
379  r5_l = Fd + G;
380  r6_l = Gd + H;
381  r7_l = Hd + B;
382  VSHF_B2_SB(r0_r, r4_r, r1_r, r5_r, mask, mask, d0, d1);
383  VSHF_B2_SB(r2_r, r6_r, r3_r, r7_r, mask, mask, d2, d3);
384  VSHF_B2_SB(r0_l, r4_l, r1_l, r5_l, mask, mask, d4, d5);
385  VSHF_B2_SB(r2_l, r6_l, r3_l, r7_l, mask, mask, d6, d7);
386 
387  /* Final sequence of operations over-write original dst */
388  ST_D1(d0, 0, dst);
389  ST_D1(d1, 0, dst + stride);
390  ST_D1(d2, 0, dst + 2 * stride);
391  ST_D1(d3, 0, dst + 3 * stride);
392  ST_D1(d4, 0, dst + 4 * stride);
393  ST_D1(d5, 0, dst + 5 * stride);
394  ST_D1(d6, 0, dst + 6 * stride);
395  ST_D1(d7, 0, dst + 7 * stride);
396 }
397 
398 void ff_vp3_idct_put_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
399 {
400  idct_msa(dest, line_size, block, 1);
401  memset(block, 0, sizeof(*block) * 64);
402 }
403 
404 void ff_vp3_idct_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
405 {
406  idct_msa(dest, line_size, block, 2);
407  memset(block, 0, sizeof(*block) * 64);
408 }
409 
410 void ff_vp3_idct_dc_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
411 {
412  int i = (block[0] + 15) >> 5;
413  v4i32 dc = {i, i, i, i};
414  v16i8 d0, d1, d2, d3, d4, d5, d6, d7;
415  v4i32 c0, c1, c2, c3, c4, c5, c6, c7;
416  v4i32 e0, e1, e2, e3, e4, e5, e6, e7;
417  v4i32 r0, r1, r2, r3, r4, r5, r6, r7;
418  v16i8 mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0};
419  v16i8 zero = {0};
420 
421  LD_SB8(dest, line_size, d0, d1, d2, d3, d4, d5, d6, d7);
422  ILVR_B4_SW(zero, d0, zero, d1, zero, d2, zero, d3,
423  c0, c1, c2, c3);
424  ILVR_B4_SW(zero, d4, zero, d5, zero, d6, zero, d7,
425  c4, c5, c6, c7);
426  /* Right part */
427  ILVR_H4_SW(zero, c0, zero, c1, zero, c2, zero, c3,
428  e0, e1, e2, e3);
429  ILVR_H4_SW(zero, c4, zero, c5, zero, c6, zero, c7,
430  e4, e5, e6, e7);
431  e0 += dc;
432  e1 += dc;
433  e2 += dc;
434  e3 += dc;
435  e4 += dc;
436  e5 += dc;
437  e6 += dc;
438  e7 += dc;
439  e0 = CLIP_SW_0_255(e0);
440  e1 = CLIP_SW_0_255(e1);
441  e2 = CLIP_SW_0_255(e2);
442  e3 = CLIP_SW_0_255(e3);
443  e4 = CLIP_SW_0_255(e4);
444  e5 = CLIP_SW_0_255(e5);
445  e6 = CLIP_SW_0_255(e6);
446  e7 = CLIP_SW_0_255(e7);
447 
448  /* Left part */
449  ILVL_H4_SW(zero, c0, zero, c1, zero, c2, zero, c3,
450  r0, r1, r2, r3);
451  ILVL_H4_SW(zero, c4, zero, c5, zero, c6, zero, c7,
452  r4, r5, r6, r7);
453  r0 += dc;
454  r1 += dc;
455  r2 += dc;
456  r3 += dc;
457  r4 += dc;
458  r5 += dc;
459  r6 += dc;
460  r7 += dc;
461  r0 = CLIP_SW_0_255(r0);
462  r1 = CLIP_SW_0_255(r1);
463  r2 = CLIP_SW_0_255(r2);
464  r3 = CLIP_SW_0_255(r3);
465  r4 = CLIP_SW_0_255(r4);
466  r5 = CLIP_SW_0_255(r5);
467  r6 = CLIP_SW_0_255(r6);
468  r7 = CLIP_SW_0_255(r7);
469  VSHF_B2_SB(e0, r0, e1, r1, mask, mask, d0, d1);
470  VSHF_B2_SB(e2, r2, e3, r3, mask, mask, d2, d3);
471  VSHF_B2_SB(e4, r4, e5, r5, mask, mask, d4, d5);
472  VSHF_B2_SB(e6, r6, e7, r7, mask, mask, d6, d7);
473 
474  /* Final sequence of operations over-write original dst */
475  ST_D1(d0, 0, dest);
476  ST_D1(d1, 0, dest + line_size);
477  ST_D1(d2, 0, dest + 2 * line_size);
478  ST_D1(d3, 0, dest + 3 * line_size);
479  ST_D1(d4, 0, dest + 4 * line_size);
480  ST_D1(d5, 0, dest + 5 * line_size);
481  ST_D1(d6, 0, dest + 6 * line_size);
482  ST_D1(d7, 0, dest + 7 * line_size);
483 
484  block[0] = 0;
485 }
486 
487 void ff_vp3_v_loop_filter_msa(uint8_t *first_pixel, ptrdiff_t stride,
488  int *bounding_values)
489 {
490  int nstride = -stride;
491  v4i32 e0, e1, f0, f1, g0, g1;
492  v16i8 zero = {0};
493  v16i8 d0, d1, d2, d3;
494  v8i16 c0, c1, c2, c3;
495  v8i16 r0;
496  v8i16 cnst3h = {3, 3, 3, 3, 3, 3, 3, 3},
497  cnst4h = {4, 4, 4, 4, 4, 4, 4, 4};
498  v16i8 mask = {0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0};
499  int16_t temp_16[8];
500  int temp_32[8];
501 
502  LD_SB4(first_pixel + nstride * 2, stride, d0, d1, d2, d3);
503  ILVR_B4_SH(zero, d0, zero, d1, zero, d2, zero, d3,
504  c0, c1, c2, c3);
505  r0 = (c0 - c3) + (c2 - c1) * cnst3h;
506  r0 += cnst4h;
507  r0 = r0 >> 3;
508  /* Get filter_value from bounding_values one by one */
509  ST_SH(r0, temp_16);
510  for (int i = 0; i < 8; i++)
511  temp_32[i] = bounding_values[temp_16[i]];
512  LD_SW2(temp_32, 4, e0, e1);
513  ILVR_H2_SW(zero, c1, zero, c2, f0, g0);
514  ILVL_H2_SW(zero, c1, zero, c2, f1, g1);
515  f0 += e0;
516  f1 += e1;
517  g0 -= e0;
518  g1 -= e1;
519  f0 = CLIP_SW_0_255(f0);
520  f1 = CLIP_SW_0_255(f1);
521  g0 = CLIP_SW_0_255(g0);
522  g1 = CLIP_SW_0_255(g1);
523  VSHF_B2_SB(f0, f1, g0, g1, mask, mask, d1, d2);
524 
525  /* Final move to first_pixel */
526  ST_D1(d1, 0, first_pixel + nstride);
527  ST_D1(d2, 0, first_pixel);
528 }
529 
530 void ff_vp3_h_loop_filter_msa(uint8_t *first_pixel, ptrdiff_t stride,
531  int *bounding_values)
532 {
533  v16i8 d0, d1, d2, d3, d4, d5, d6, d7;
534  v8i16 c0, c1, c2, c3, c4, c5, c6, c7;
535  v8i16 r0;
536  v4i32 e0, e1, f0, f1, g0, g1;
537  v16i8 zero = {0};
538  v8i16 cnst3h = {3, 3, 3, 3, 3, 3, 3, 3},
539  cnst4h = {4, 4, 4, 4, 4, 4, 4, 4};
540  v16i8 mask = {0, 16, 4, 20, 8, 24, 12, 28, 0, 0, 0, 0, 0, 0, 0, 0};
541  int16_t temp_16[8];
542  int temp_32[8];
543 
544  LD_SB8(first_pixel - 2, stride, d0, d1, d2, d3, d4, d5, d6, d7);
545  ILVR_B4_SH(zero, d0, zero, d1, zero, d2, zero, d3,
546  c0, c1, c2, c3);
547  ILVR_B4_SH(zero, d4, zero, d5, zero, d6, zero, d7,
548  c4, c5, c6, c7);
549  TRANSPOSE8x8_SH_SH(c0, c1, c2, c3, c4, c5, c6, c7,
550  c0, c1, c2, c3, c4, c5, c6, c7);
551  r0 = (c0 - c3) + (c2 - c1) * cnst3h;
552  r0 += cnst4h;
553  r0 = r0 >> 3;
554 
555  /* Get filter_value from bounding_values one by one */
556  ST_SH(r0, temp_16);
557  for (int i = 0; i < 8; i++)
558  temp_32[i] = bounding_values[temp_16[i]];
559  LD_SW2(temp_32, 4, e0, e1);
560  ILVR_H2_SW(zero, c1, zero, c2, f0, g0);
561  ILVL_H2_SW(zero, c1, zero, c2, f1, g1);
562  f0 += e0;
563  f1 += e1;
564  g0 -= e0;
565  g1 -= e1;
566  f0 = CLIP_SW_0_255(f0);
567  f1 = CLIP_SW_0_255(f1);
568  g0 = CLIP_SW_0_255(g0);
569  g1 = CLIP_SW_0_255(g1);
570  VSHF_B2_SB(f0, g0, f1, g1, mask, mask, d1, d2);
571  /* Final move to first_pixel */
572  ST_H4(d1, 0, 1, 2, 3, first_pixel - 1, stride);
573  ST_H4(d2, 0, 1, 2, 3, first_pixel - 1 + 4 * stride, stride);
574 }
575 
577  const uint8_t *src2, ptrdiff_t stride, int h)
578 {
579  if (h == 8) {
580  v16i8 d0, d1, d2, d3, d4, d5, d6, d7;
581  v16i8 c0, c1, c2, c3;
582  v4i32 a0, a1, a2, a3, b0, b1, b2, b3;
583  v4i32 e0, e1, e2;
584  v4i32 f0, f1, f2;
585  v4u32 t0, t1, t2, t3;
586  v16i8 mask = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
587  int32_t value = 0xfefefefe;
588  v4i32 fmask = {value, value, value, value};
589 
590  LD_SB8(src1, stride, d0, d1, d2, d3, d4, d5, d6, d7);
591  VSHF_B2_SB(d0, d1, d2, d3, mask, mask, c0, c1);
592  VSHF_B2_SB(d4, d5, d6, d7, mask, mask, c2, c3);
593  a0 = (v4i32) __msa_pckev_d((v2i64)c1, (v2i64)c0);
594  a2 = (v4i32) __msa_pckod_d((v2i64)c1, (v2i64)c0);
595  a1 = (v4i32) __msa_pckev_d((v2i64)c3, (v2i64)c2);
596  a3 = (v4i32) __msa_pckod_d((v2i64)c3, (v2i64)c2);
597 
598  LD_SB8(src2, stride, d0, d1, d2, d3, d4, d5, d6, d7);
599  VSHF_B2_SB(d0, d1, d2, d3, mask, mask, c0, c1);
600  VSHF_B2_SB(d4, d5, d6, d7, mask, mask, c2, c3);
601  b0 = (v4i32) __msa_pckev_d((v2i64)c1, (v2i64)c0);
602  b2 = (v4i32) __msa_pckod_d((v2i64)c1, (v2i64)c0);
603  b1 = (v4i32) __msa_pckev_d((v2i64)c3, (v2i64)c2);
604  b3 = (v4i32) __msa_pckod_d((v2i64)c3, (v2i64)c2);
605 
606  e0 = (v4i32) __msa_xor_v((v16u8)a0, (v16u8)b0);
607  e0 = (v4i32) __msa_and_v((v16u8)e0, (v16u8)fmask);
608  t0 = ((v4u32)e0) >> 1;
609  e2 = (v4i32) __msa_and_v((v16u8)a0, (v16u8)b0);
610  t0 = t0 + (v4u32)e2;
611 
612  e1 = (v4i32) __msa_xor_v((v16u8)a1, (v16u8)b1);
613  e1 = (v4i32) __msa_and_v((v16u8)e1, (v16u8)fmask);
614  t1 = ((v4u32)e1) >> 1;
615  e2 = (v4i32) __msa_and_v((v16u8)a1, (v16u8)b1);
616  t1 = t1 + (v4u32)e2;
617 
618  f0 = (v4i32) __msa_xor_v((v16u8)a2, (v16u8)b2);
619  f0 = (v4i32) __msa_and_v((v16u8)f0, (v16u8)fmask);
620  t2 = ((v4u32)f0) >> 1;
621  f2 = (v4i32) __msa_and_v((v16u8)a2, (v16u8)b2);
622  t2 = t2 + (v4u32)f2;
623 
624  f1 = (v4i32) __msa_xor_v((v16u8)a3, (v16u8)b3);
625  f1 = (v4i32) __msa_and_v((v16u8)f1, (v16u8)fmask);
626  t3 = ((v4u32)f1) >> 1;
627  f2 = (v4i32) __msa_and_v((v16u8)a3, (v16u8)b3);
628  t3 = t3 + (v4u32)f2;
629 
630  ST_W8(t0, t1, 0, 1, 2, 3, 0, 1, 2, 3, dst, stride);
631  ST_W8(t2, t3, 0, 1, 2, 3, 0, 1, 2, 3, dst + 4, stride);
632  } else {
633  int i;
634 
635  for (i = 0; i < h; i++) {
636  uint32_t a, b;
637 
638  a = AV_RN32(&src1[i * stride]);
639  b = AV_RN32(&src2[i * stride]);
640  AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
641  a = AV_RN32(&src1[i * stride + 4]);
642  b = AV_RN32(&src2[i * stride + 4]);
643  AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
644  }
645  }
646 }
VSHF_B2_SB
#define VSHF_B2_SB(...)
Definition: generic_macros_msa.h:681
LD_SB4
#define LD_SB4(...)
Definition: generic_macros_msa.h:299
stride
int stride
Definition: mace.c:144
no_rnd_avg32
static uint32_t no_rnd_avg32(uint32_t a, uint32_t b)
Definition: rnd_avg.h:36
ff_put_no_rnd_pixels_l2_msa
void ff_put_no_rnd_pixels_l2_msa(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride, int h)
Definition: vp3dsp_idct_msa.c:576
ILVR_H4_SW
#define ILVR_H4_SW(...)
Definition: generic_macros_msa.h:1458
vp3dsp_mips.h
t0
#define t0
Definition: regdef.h:28
b
#define b
Definition: input.c:41
ST_D1
#define ST_D1(in, idx, pdst)
Definition: generic_macros_msa.h:485
F
#define F(x)
AV_WN32A
#define AV_WN32A(p, v)
Definition: intreadwrite.h:538
t1
#define t1
Definition: regdef.h:29
c1
static const uint64_t c1
Definition: murmur3.c:49
D
D(D(float, sse)
Definition: rematrix_init.c:28
ILVR_B4_SW
#define ILVR_B4_SW(...)
Definition: generic_macros_msa.h:1413
A
#define A(x)
Definition: vp56_arith.h:28
ff_vp3_idct_dc_add_msa
void ff_vp3_idct_dc_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: vp3dsp_idct_msa.c:410
ILVL_H4_SW
#define ILVL_H4_SW(...)
Definition: generic_macros_msa.h:1352
generic_macros_msa.h
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
TRANSPOSE8x8_SH_SH
#define TRANSPOSE8x8_SH_SH(...)
Definition: generic_macros_msa.h:2578
a1
#define a1
Definition: regdef.h:47
C
s EdgeDetect Foobar g libavfilter vf_edgedetect c libavfilter vf_foobar c edit libavfilter and add an entry for foobar following the pattern of the other filters edit libavfilter allfilters and add an entry for foobar following the pattern of the other filters configure make j< whatever > ffmpeg ffmpeg i you should get a foobar png with Lena edge detected That s your new playground is ready Some little details about what s going which in turn will define variables for the build system and the C
Definition: writing_filters.txt:58
mask
static const uint16_t mask[17]
Definition: lzw.c:38
intreadwrite.h
ILVR_H2_SW
#define ILVR_H2_SW(...)
Definition: generic_macros_msa.h:1442
TRANSPOSE4x4_SW_SW
#define TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, out0, out1, out2, out3)
Definition: generic_macros_msa.h:2586
E
#define E
Definition: avdct.c:32
int32_t
int32_t
Definition: audio_convert.c:194
H
F H1 F F H1 F F F F H1<-F-------F-------F v v v H2 H3 H2 ^ ^ ^ F-------F-------F-> H1<-F-------F-------F|||||||||F H1 F|||||||||F H1 Funavailable fullpel samples(outside the picture for example) shall be equalto the closest available fullpel sampleSmaller pel interpolation:--------------------------if diag_mc is set then points which lie on a line between 2 vertically, horizontally or diagonally adjacent halfpel points shall be interpolatedlinearly with rounding to nearest and halfway values rounded up.points which lie on 2 diagonals at the same time should only use the onediagonal not containing the fullpel point F--> O q O<--h1-> O q O<--F v \/v \/v O O O O O O O|/|\|q q q q q|/|\|O O O O O O O ^/\ ^/\ ^ h2--> O q O<--h3-> O q O<--h2 v \/v \/v O O O O O O O|\|/|q q q q q|\|/|O O O O O O O ^/\ ^/\ ^ F--> O q O<--h1-> O q O<--Fthe remaining points shall be bilinearly interpolated from theup to 4 surrounding halfpel and fullpel points, again rounding should be tonearest and halfway values rounded upcompliant Snow decoders MUST support 1-1/8 pel luma and 1/2-1/16 pel chromainterpolation at leastOverlapped block motion compensation:-------------------------------------FIXMELL band prediction:===================Each sample in the LL0 subband is predicted by the median of the left, top andleft+top-topleft samples, samples outside the subband shall be considered tobe 0. To reverse this prediction in the decoder apply the following.for(y=0;y< height;y++){ for(x=0;x< width;x++){ sample[y][x]+=median(sample[y-1][x], sample[y][x-1], sample[y-1][x]+sample[y][x-1]-sample[y-1][x-1]);}}sample[-1][ *]=sample[ *][-1]=0;width, height here are the width and height of the LL0 subband not of the finalvideoDequantization:===============FIXMEWavelet Transform:==================Snow supports 2 wavelet transforms, the symmetric biorthogonal 5/3 integertransform and an integer approximation of the symmetric biorthogonal 9/7daubechies wavelet.2D IDWT(inverse discrete wavelet transform) --------------------------------------------The 2D IDWT applies a 2D filter recursively, each time combining the4 lowest frequency subbands into a single subband until only 1 subbandremains.The 2D filter is done by first applying a 1D filter in the vertical directionand then applying it in the horizontal one. --------------- --------------- --------------- ---------------|LL0|HL0|||||||||||||---+---|HL1||L0|H0|HL1||LL1|HL1|||||LH0|HH0|||||||||||||-------+-------|-> L1 H1 LH1 HH1 LH1 HH1 LH1 HH1 this can end with a L or a H
Definition: snow.txt:555
AV_RN32
#define AV_RN32(p)
Definition: intreadwrite.h:364
idct_msa
static void idct_msa(uint8_t *dst, int stride, int16_t *input, int type)
Definition: vp3dsp_idct_msa.c:26
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
LD_SW2
#define LD_SW2(...)
Definition: generic_macros_msa.h:283
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
a0
#define a0
Definition: regdef.h:46
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
ff_vp3_h_loop_filter_msa
void ff_vp3_h_loop_filter_msa(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values)
Definition: vp3dsp_idct_msa.c:530
src1
#define src1
Definition: h264pred.c:139
LD_SH8
#define LD_SH8(...)
Definition: generic_macros_msa.h:339
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
ST_H4
#define ST_H4(in, idx0, idx1, idx2, idx3, pdst, stride)
Definition: generic_macros_msa.h:417
t3
#define t3
Definition: regdef.h:31
a2
#define a2
Definition: regdef.h:48
ST_W8
#define ST_W8(in0, in1, idx0, idx1, idx2, idx3, idx4, idx5, idx6, idx7, pdst, stride)
Definition: generic_macros_msa.h:470
value
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default value
Definition: writing_filters.txt:86
uint8_t
uint8_t
Definition: audio_convert.c:194
rnd_avg.h
G
#define G
Definition: huffyuvdsp.h:33
B
#define B
Definition: huffyuvdsp.h:32
ILVL_H2_SW
#define ILVL_H2_SW(...)
Definition: generic_macros_msa.h:1343
c2
static const uint64_t c2
Definition: murmur3.c:50
t2
#define t2
Definition: regdef.h:30
ff_vp3_v_loop_filter_msa
void ff_vp3_v_loop_filter_msa(uint8_t *first_pixel, ptrdiff_t stride, int *bounding_values)
Definition: vp3dsp_idct_msa.c:487
LD_SB8
#define LD_SB8(...)
Definition: generic_macros_msa.h:337
ILVR_B4_SH
#define ILVR_B4_SH(...)
Definition: generic_macros_msa.h:1412
ff_vp3_idct_put_msa
void ff_vp3_idct_put_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: vp3dsp_idct_msa.c:398
zero
#define zero
Definition: regdef.h:64
CLIP_SW_0_255
#define CLIP_SW_0_255(in)
Definition: generic_macros_msa.h:1002
ST_SH
#define ST_SH(...)
Definition: generic_macros_msa.h:47
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
h
h
Definition: vp9dsp_template.c:2038
a3
#define a3
Definition: regdef.h:49
ff_vp3_idct_add_msa
void ff_vp3_idct_add_msa(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
Definition: vp3dsp_idct_msa.c:404