FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
h264pred_msa.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
22 #include "h264dsp_mips.h"
23 
25  int32_t dst_stride)
26 {
27  uint32_t row;
28  uint32_t src_data1, src_data2;
29 
30  src_data1 = LW(src);
31  src_data2 = LW(src + 4);
32 
33  for (row = 8; row--;) {
34  SW(src_data1, dst);
35  SW(src_data2, (dst + 4));
36  dst += dst_stride;
37  }
38 }
39 
41  int32_t dst_stride)
42 {
43  uint32_t row;
44  v16u8 src0;
45 
46  src0 = LD_UB(src);
47 
48  for (row = 16; row--;) {
49  ST_UB(src0, dst);
50  dst += dst_stride;
51  }
52 }
53 
55  uint8_t *dst, int32_t dst_stride)
56 {
57  uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
58 
59  out0 = src[0 * src_stride] * 0x0101010101010101;
60  out1 = src[1 * src_stride] * 0x0101010101010101;
61  out2 = src[2 * src_stride] * 0x0101010101010101;
62  out3 = src[3 * src_stride] * 0x0101010101010101;
63  out4 = src[4 * src_stride] * 0x0101010101010101;
64  out5 = src[5 * src_stride] * 0x0101010101010101;
65  out6 = src[6 * src_stride] * 0x0101010101010101;
66  out7 = src[7 * src_stride] * 0x0101010101010101;
67 
68  SD4(out0, out1, out2, out3, dst, dst_stride);
69  dst += (4 * dst_stride);
70  SD4(out4, out5, out6, out7, dst, dst_stride);
71 }
72 
74  uint8_t *dst, int32_t dst_stride)
75 {
76  uint32_t row;
77  uint8_t inp0, inp1, inp2, inp3;
78  v16u8 src0, src1, src2, src3;
79 
80  for (row = 4; row--;) {
81  inp0 = src[0];
82  src += src_stride;
83  inp1 = src[0];
84  src += src_stride;
85  inp2 = src[0];
86  src += src_stride;
87  inp3 = src[0];
88  src += src_stride;
89 
90  src0 = (v16u8) __msa_fill_b(inp0);
91  src1 = (v16u8) __msa_fill_b(inp1);
92  src2 = (v16u8) __msa_fill_b(inp2);
93  src3 = (v16u8) __msa_fill_b(inp3);
94 
95  ST_UB4(src0, src1, src2, src3, dst, dst_stride);
96  dst += (4 * dst_stride);
97  }
98 }
99 
100 static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left,
101  int32_t src_stride_left,
102  uint8_t *dst, int32_t dst_stride,
103  uint8_t is_above, uint8_t is_left)
104 {
105  uint32_t row;
106  uint32_t out, addition = 0;
107  v16u8 src_above, store;
108  v8u16 sum_above;
109  v4u32 sum_top;
110  v2u64 sum;
111 
112  if (is_left && is_above) {
113  src_above = LD_UB(src_top);
114 
115  sum_above = __msa_hadd_u_h(src_above, src_above);
116  sum_top = __msa_hadd_u_w(sum_above, sum_above);
117  sum = __msa_hadd_u_d(sum_top, sum_top);
118  addition = __msa_copy_u_w((v4i32) sum, 0);
119 
120  for (row = 0; row < 8; row++) {
121  addition += src_left[row * src_stride_left];
122  }
123 
124  addition = (addition + 8) >> 4;
125  store = (v16u8) __msa_fill_b(addition);
126  } else if (is_left) {
127  for (row = 0; row < 8; row++) {
128  addition += src_left[row * src_stride_left];
129  }
130 
131  addition = (addition + 4) >> 3;
132  store = (v16u8) __msa_fill_b(addition);
133  } else if (is_above) {
134  src_above = LD_UB(src_top);
135 
136  sum_above = __msa_hadd_u_h(src_above, src_above);
137  sum_top = __msa_hadd_u_w(sum_above, sum_above);
138  sum = __msa_hadd_u_d(sum_top, sum_top);
139  sum = (v2u64) __msa_srari_d((v2i64) sum, 3);
140  store = (v16u8) __msa_splati_b((v16i8) sum, 0);
141  } else {
142  store = (v16u8) __msa_ldi_b(128);
143  }
144 
145  out = __msa_copy_u_w((v4i32) store, 0);
146 
147  for (row = 8; row--;) {
148  SW(out, dst);
149  SW(out, (dst + 4));
150  dst += dst_stride;
151  }
152 }
153 
154 static void intra_predict_dc_16x16_msa(uint8_t *src_top, uint8_t *src_left,
155  int32_t src_stride_left,
156  uint8_t *dst, int32_t dst_stride,
157  uint8_t is_above, uint8_t is_left)
158 {
159  uint32_t row;
160  uint32_t addition = 0;
161  v16u8 src_above, store;
162  v8u16 sum_above;
163  v4u32 sum_top;
164  v2u64 sum;
165 
166  if (is_left && is_above) {
167  src_above = LD_UB(src_top);
168 
169  sum_above = __msa_hadd_u_h(src_above, src_above);
170  sum_top = __msa_hadd_u_w(sum_above, sum_above);
171  sum = __msa_hadd_u_d(sum_top, sum_top);
172  sum_top = (v4u32) __msa_pckev_w((v4i32) sum, (v4i32) sum);
173  sum = __msa_hadd_u_d(sum_top, sum_top);
174  addition = __msa_copy_u_w((v4i32) sum, 0);
175 
176  for (row = 0; row < 16; row++) {
177  addition += src_left[row * src_stride_left];
178  }
179 
180  addition = (addition + 16) >> 5;
181  store = (v16u8) __msa_fill_b(addition);
182  } else if (is_left) {
183  for (row = 0; row < 16; row++) {
184  addition += src_left[row * src_stride_left];
185  }
186 
187  addition = (addition + 8) >> 4;
188  store = (v16u8) __msa_fill_b(addition);
189  } else if (is_above) {
190  src_above = LD_UB(src_top);
191 
192  sum_above = __msa_hadd_u_h(src_above, src_above);
193  sum_top = __msa_hadd_u_w(sum_above, sum_above);
194  sum = __msa_hadd_u_d(sum_top, sum_top);
195  sum_top = (v4u32) __msa_pckev_w((v4i32) sum, (v4i32) sum);
196  sum = __msa_hadd_u_d(sum_top, sum_top);
197  sum = (v2u64) __msa_srari_d((v2i64) sum, 4);
198  store = (v16u8) __msa_splati_b((v16i8) sum, 0);
199  } else {
200  store = (v16u8) __msa_ldi_b(128);
201  }
202 
203  for (row = 16; row--;) {
204  ST_UB(store, dst);
205  dst += dst_stride;
206  }
207 }
208 
209 #define INTRA_PREDICT_VALDC_8X8_MSA(val) \
210 static void intra_predict_##val##dc_8x8_msa(uint8_t *dst, \
211  int32_t dst_stride) \
212 { \
213  uint32_t row, out; \
214  v16i8 store; \
215  \
216  store = __msa_ldi_b(val); \
217  out = __msa_copy_u_w((v4i32) store, 0); \
218  \
219  for (row = 8; row--;) { \
220  SW(out, dst); \
221  SW(out, (dst + 4)); \
222  dst += dst_stride; \
223  } \
224 }
225 
228 
229 #define INTRA_PREDICT_VALDC_16X16_MSA(val) \
230 static void intra_predict_##val##dc_16x16_msa(uint8_t *dst, \
231  int32_t dst_stride) \
232 { \
233  uint32_t row; \
234  v16u8 store; \
235  \
236  store = (v16u8) __msa_ldi_b(val); \
237  \
238  for (row = 16; row--;) { \
239  ST_UB(store, dst); \
240  dst += dst_stride; \
241  } \
242 }
243 
246 
248 {
249  uint8_t lpcnt;
250  int32_t res, res0, res1, res2, res3;
251  uint64_t out0, out1;
252  v16i8 shf_mask = { 3, 5, 2, 6, 1, 7, 0, 8, 3, 5, 2, 6, 1, 7, 0, 8 };
253  v8i16 short_multiplier = { 1, 2, 3, 4, 1, 2, 3, 4 };
254  v4i32 int_multiplier = { 0, 1, 2, 3 };
255  v16u8 src_top;
256  v8i16 vec9, vec10, vec11;
257  v4i32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8;
258  v2i64 sum;
259 
260  src_top = LD_UB(src - (stride + 1));
261  src_top = (v16u8) __msa_vshf_b(shf_mask, (v16i8) src_top, (v16i8) src_top);
262 
263  vec9 = __msa_hsub_u_h(src_top, src_top);
264  vec9 *= short_multiplier;
265  vec8 = __msa_hadd_s_w(vec9, vec9);
266  sum = __msa_hadd_s_d(vec8, vec8);
267 
268  res0 = __msa_copy_s_w((v4i32) sum, 0);
269 
270  res1 = (src[4 * stride - 1] - src[2 * stride - 1]) +
271  2 * (src[5 * stride - 1] - src[stride - 1]) +
272  3 * (src[6 * stride - 1] - src[-1]) +
273  4 * (src[7 * stride - 1] - src[-stride - 1]);
274 
275  res0 *= 17;
276  res1 *= 17;
277  res0 = (res0 + 16) >> 5;
278  res1 = (res1 + 16) >> 5;
279 
280  res3 = 3 * (res0 + res1);
281  res2 = 16 * (src[7 * stride - 1] + src[-stride + 7] + 1);
282  res = res2 - res3;
283 
284  vec8 = __msa_fill_w(res0);
285  vec4 = __msa_fill_w(res);
286  vec2 = __msa_fill_w(res1);
287  vec5 = vec8 * int_multiplier;
288  vec3 = vec8 * 4;
289 
290  for (lpcnt = 4; lpcnt--;) {
291  vec0 = vec5;
292  vec0 += vec4;
293  vec1 = vec0 + vec3;
294  vec6 = vec5;
295  vec4 += vec2;
296  vec6 += vec4;
297  vec7 = vec6 + vec3;
298 
299  SRA_4V(vec0, vec1, vec6, vec7, 5);
300  PCKEV_H2_SH(vec1, vec0, vec7, vec6, vec10, vec11);
301  CLIP_SH2_0_255(vec10, vec11);
302  PCKEV_B2_SH(vec10, vec10, vec11, vec11, vec10, vec11);
303 
304  out0 = __msa_copy_s_d((v2i64) vec10, 0);
305  out1 = __msa_copy_s_d((v2i64) vec11, 0);
306  SD(out0, src);
307  src += stride;
308  SD(out1, src);
309  src += stride;
310 
311  vec4 += vec2;
312  }
313 }
314 
316 {
317  uint8_t lpcnt;
318  int32_t res0, res1, res2, res3;
319  uint64_t load0, load1;
320  v16i8 shf_mask = { 7, 8, 6, 9, 5, 10, 4, 11, 3, 12, 2, 13, 1, 14, 0, 15 };
321  v8i16 short_multiplier = { 1, 2, 3, 4, 5, 6, 7, 8 };
322  v4i32 int_multiplier = { 0, 1, 2, 3 };
323  v16u8 src_top = { 0 };
324  v8i16 vec9, vec10;
325  v4i32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, res_add;
326 
327  load0 = LD(src - (stride + 1));
328  load1 = LD(src - (stride + 1) + 9);
329 
330  INSERT_D2_UB(load0, load1, src_top);
331 
332  src_top = (v16u8) __msa_vshf_b(shf_mask, (v16i8) src_top, (v16i8) src_top);
333 
334  vec9 = __msa_hsub_u_h(src_top, src_top);
335  vec9 *= short_multiplier;
336  vec8 = __msa_hadd_s_w(vec9, vec9);
337  res_add = (v4i32) __msa_hadd_s_d(vec8, vec8);
338 
339  res0 = __msa_copy_s_w(res_add, 0) + __msa_copy_s_w(res_add, 2);
340 
341  res1 = (src[8 * stride - 1] - src[6 * stride - 1]) +
342  2 * (src[9 * stride - 1] - src[5 * stride - 1]) +
343  3 * (src[10 * stride - 1] - src[4 * stride - 1]) +
344  4 * (src[11 * stride - 1] - src[3 * stride - 1]) +
345  5 * (src[12 * stride - 1] - src[2 * stride - 1]) +
346  6 * (src[13 * stride - 1] - src[stride - 1]) +
347  7 * (src[14 * stride - 1] - src[-1]) +
348  8 * (src[15 * stride - 1] - src[-1 * stride - 1]);
349 
350  res0 *= 5;
351  res1 *= 5;
352  res0 = (res0 + 32) >> 6;
353  res1 = (res1 + 32) >> 6;
354 
355  res3 = 7 * (res0 + res1);
356  res2 = 16 * (src[15 * stride - 1] + src[-stride + 15] + 1);
357  res2 -= res3;
358 
359  vec8 = __msa_fill_w(res0);
360  vec4 = __msa_fill_w(res2);
361  vec5 = __msa_fill_w(res1);
362  vec6 = vec8 * 4;
363  vec7 = vec8 * int_multiplier;
364 
365  for (lpcnt = 16; lpcnt--;) {
366  vec0 = vec7;
367  vec0 += vec4;
368  vec1 = vec0 + vec6;
369  vec2 = vec1 + vec6;
370  vec3 = vec2 + vec6;
371 
372  SRA_4V(vec0, vec1, vec2, vec3, 5);
373  PCKEV_H2_SH(vec1, vec0, vec3, vec2, vec9, vec10);
374  CLIP_SH2_0_255(vec9, vec10);
375  PCKEV_ST_SB(vec9, vec10, src);
376  src += stride;
377 
378  vec4 += vec5;
379  }
380 }
381 
383 {
384  uint8_t lp_cnt;
385  uint32_t src0, src1, src3, src2 = 0;
386  uint32_t out0, out1, out2, out3;
387  v16u8 src_top;
388  v8u16 add;
389  v4u32 sum;
390 
391  src_top = LD_UB(src - stride);
392  add = __msa_hadd_u_h((v16u8) src_top, (v16u8) src_top);
393  sum = __msa_hadd_u_w(add, add);
394  src0 = __msa_copy_u_w((v4i32) sum, 0);
395  src1 = __msa_copy_u_w((v4i32) sum, 1);
396 
397  for (lp_cnt = 0; lp_cnt < 4; lp_cnt++) {
398  src0 += src[lp_cnt * stride - 1];
399  src2 += src[(4 + lp_cnt) * stride - 1];
400  }
401 
402  src0 = (src0 + 4) >> 3;
403  src3 = (src1 + src2 + 4) >> 3;
404  src1 = (src1 + 2) >> 2;
405  src2 = (src2 + 2) >> 2;
406  out0 = src0 * 0x01010101;
407  out1 = src1 * 0x01010101;
408  out2 = src2 * 0x01010101;
409  out3 = src3 * 0x01010101;
410 
411  for (lp_cnt = 4; lp_cnt--;) {
412  SW(out0, src);
413  SW(out1, (src + 4));
414  SW(out2, (src + 4 * stride));
415  SW(out3, (src + 4 * stride + 4));
416  src += stride;
417  }
418 }
419 
421 {
422  uint8_t lp_cnt;
423  uint32_t src0 = 0, src1 = 0;
424  uint64_t out0, out1;
425 
426  for (lp_cnt = 0; lp_cnt < 4; lp_cnt++) {
427  src0 += src[lp_cnt * stride - 1];
428  src1 += src[(4 + lp_cnt) * stride - 1];
429  }
430 
431  src0 = (src0 + 2) >> 2;
432  src1 = (src1 + 2) >> 2;
433  out0 = src0 * 0x0101010101010101;
434  out1 = src1 * 0x0101010101010101;
435 
436  for (lp_cnt = 4; lp_cnt--;) {
437  SD(out0, src);
438  SD(out1, (src + 4 * stride));
439  src += stride;
440  }
441 }
442 
444 {
445  uint8_t lp_cnt;
446  uint32_t out0 = 0, out1 = 0;
447  v16u8 src_top;
448  v8u16 add;
449  v4u32 sum;
450  v4i32 res0, res1;
451 
452  src_top = LD_UB(src - stride);
453  add = __msa_hadd_u_h(src_top, src_top);
454  sum = __msa_hadd_u_w(add, add);
455  sum = (v4u32) __msa_srari_w((v4i32) sum, 2);
456  res0 = (v4i32) __msa_splati_b((v16i8) sum, 0);
457  res1 = (v4i32) __msa_splati_b((v16i8) sum, 4);
458  out0 = __msa_copy_u_w(res0, 0);
459  out1 = __msa_copy_u_w(res1, 0);
460 
461  for (lp_cnt = 8; lp_cnt--;) {
462  SW(out0, src);
463  SW(out1, src + 4);
464  src += stride;
465  }
466 }
467 
469 {
470  uint8_t lp_cnt;
471  uint32_t src0, src1, src2 = 0;
472  uint32_t out0, out1, out2;
473  v16u8 src_top;
474  v8u16 add;
475  v4u32 sum;
476 
477  src_top = LD_UB(src - stride);
478  add = __msa_hadd_u_h(src_top, src_top);
479  sum = __msa_hadd_u_w(add, add);
480  src0 = __msa_copy_u_w((v4i32) sum, 0);
481  src1 = __msa_copy_u_w((v4i32) sum, 1);
482 
483  for (lp_cnt = 0; lp_cnt < 4; lp_cnt++) {
484  src2 += src[lp_cnt * stride - 1];
485  }
486  src2 = (src0 + src2 + 4) >> 3;
487  src0 = (src0 + 2) >> 2;
488  src1 = (src1 + 2) >> 2;
489  out0 = src0 * 0x01010101;
490  out1 = src1 * 0x01010101;
491  out2 = src2 * 0x01010101;
492 
493  for (lp_cnt = 4; lp_cnt--;) {
494  SW(out2, src);
495  SW(out1, src + 4);
496  SW(out0, src + stride * 4);
497  SW(out1, src + stride * 4 + 4);
498  src += stride;
499  }
500 }
501 
503 {
504  uint8_t lp_cnt;
505  uint32_t src0, src1, src2 = 0, src3;
506  uint32_t out0, out1, out2, out3;
507  v16u8 src_top;
508  v8u16 add;
509  v4u32 sum;
510 
511  src_top = LD_UB(src - stride);
512  add = __msa_hadd_u_h(src_top, src_top);
513  sum = __msa_hadd_u_w(add, add);
514  src0 = __msa_copy_u_w((v4i32) sum, 0);
515  src1 = __msa_copy_u_w((v4i32) sum, 1);
516 
517  for (lp_cnt = 0; lp_cnt < 4; lp_cnt++) {
518  src2 += src[(4 + lp_cnt) * stride - 1];
519  }
520 
521  src0 = (src0 + 2) >> 2;
522  src3 = (src1 + src2 + 4) >> 3;
523  src1 = (src1 + 2) >> 2;
524  src2 = (src2 + 2) >> 2;
525 
526  out0 = src0 * 0x01010101;
527  out1 = src1 * 0x01010101;
528  out2 = src2 * 0x01010101;
529  out3 = src3 * 0x01010101;
530 
531  for (lp_cnt = 4; lp_cnt--;) {
532  SW(out0, src);
533  SW(out1, src + 4);
534  SW(out2, src + stride * 4);
535  SW(out3, src + stride * 4 + 4);
536  src += stride;
537  }
538 }
539 
541 {
542  uint8_t lp_cnt;
543  uint32_t src0 = 0;
544  uint64_t out0, out1;
545 
546  for (lp_cnt = 0; lp_cnt < 4; lp_cnt++) {
547  src0 += src[lp_cnt * stride - 1];
548  }
549 
550  src0 = (src0 + 2) >> 2;
551  out0 = src0 * 0x0101010101010101;
552  out1 = 0x8080808080808080;
553 
554  for (lp_cnt = 4; lp_cnt--;) {
555  SD(out0, src);
556  SD(out1, src + stride * 4);
557  src += stride;
558  }
559 }
560 
562 {
563  uint8_t lp_cnt;
564  uint32_t src0 = 0;
565  uint64_t out0, out1;
566 
567  for (lp_cnt = 0; lp_cnt < 4; lp_cnt++) {
568  src0 += src[(4 + lp_cnt) * stride - 1];
569  }
570 
571  src0 = (src0 + 2) >> 2;
572 
573  out0 = 0x8080808080808080;
574  out1 = src0 * 0x0101010101010101;
575 
576  for (lp_cnt = 4; lp_cnt--;) {
577  SD(out0, src);
578  SD(out1, src + stride * 4);
579  src += stride;
580  }
581 }
582 
584 {
585  intra_predict_plane_8x8_msa(src, stride);
586 }
587 
589 {
590  intra_predict_dc_4blk_8x8_msa(src, stride);
591 }
592 
594 {
595  intra_predict_hor_dc_8x8_msa(src, stride);
596 }
597 
599 {
600  intra_predict_vert_dc_8x8_msa(src, stride);
601 }
602 
604  ptrdiff_t stride)
605 {
607 }
608 
610  ptrdiff_t stride)
611 {
613 }
614 
616  ptrdiff_t stride)
617 {
619 }
620 
622  ptrdiff_t stride)
623 {
625 }
626 
628 {
629  intra_predict_plane_16x16_msa(src, stride);
630 }
631 
633 {
634  uint8_t *dst = src;
635 
636  intra_predict_vert_8x8_msa(src - stride, dst, stride);
637 }
638 
640 {
641  uint8_t *dst = src;
642 
643  intra_predict_horiz_8x8_msa(src - 1, stride, dst, stride);
644 }
645 
647 {
648  uint8_t *src_top = src - stride;
649  uint8_t *src_left = src - 1;
650  uint8_t *dst = src;
651 
652  intra_predict_dc_16x16_msa(src_top, src_left, stride, dst, stride, 1, 1);
653 }
654 
656 {
657  uint8_t *dst = src;
658 
659  intra_predict_vert_16x16_msa(src - stride, dst, stride);
660 }
661 
663 {
664  uint8_t *dst = src;
665 
666  intra_predict_horiz_16x16_msa(src - 1, stride, dst, stride);
667 }
668 
670 {
671  uint8_t *src_top = src - stride;
672  uint8_t *src_left = src - 1;
673  uint8_t *dst = src;
674 
675  intra_predict_dc_16x16_msa(src_top, src_left, stride, dst, stride, 0, 1);
676 }
677 
679 {
680  uint8_t *src_top = src - stride;
681  uint8_t *src_left = src - 1;
682  uint8_t *dst = src;
683 
684  intra_predict_dc_16x16_msa(src_top, src_left, stride, dst, stride, 1, 0);
685 }
686 
688 {
689  uint8_t *src_top = src - stride;
690  uint8_t *src_left = src - 1;
691  uint8_t *dst = src;
692 
693  intra_predict_dc_8x8_msa(src_top, src_left, stride, dst, stride, 0, 0);
694 }
695 
697 {
698  uint8_t *src_top = src - stride;
699  uint8_t *src_left = src - 1;
700  uint8_t *dst = src;
701 
702  intra_predict_dc_16x16_msa(src_top, src_left, stride, dst, stride, 0, 0);
703 }
704 
706 {
707  intra_predict_127dc_8x8_msa(src, stride);
708 }
709 
711 {
712  intra_predict_129dc_8x8_msa(src, stride);
713 }
714 
716 {
717  intra_predict_127dc_16x16_msa(src, stride);
718 }
719 
721 {
722  intra_predict_129dc_16x16_msa(src, stride);
723 }
void ff_h264_intra_predict_plane_16x16_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:627
static void intra_predict_vert_dc_8x8_msa(uint8_t *src, int32_t stride)
Definition: h264pred_msa.c:443
static void intra_predict_plane_8x8_msa(uint8_t *src, int32_t stride)
Definition: h264pred_msa.c:247
void ff_h264_intra_pred_dc_128_16x16_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:696
#define INTRA_PREDICT_VALDC_16X16_MSA(val)
Definition: h264pred_msa.c:229
#define PCKEV_B2_SH(...)
#define LW(psrc)
static void intra_predict_dc_8x8_msa(uint8_t *src_top, uint8_t *src_left, int32_t src_stride_left, uint8_t *dst, int32_t dst_stride, uint8_t is_above, uint8_t is_left)
Definition: h264pred_msa.c:100
static void intra_predict_dc_4blk_8x8_msa(uint8_t *src, int32_t stride)
Definition: h264pred_msa.c:382
void ff_h264_intra_pred_horiz_16x16_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:662
static void intra_predict_hor_dc_8x8_msa(uint8_t *src, int32_t stride)
Definition: h264pred_msa.c:420
void ff_h264_intra_predict_vert_dc_8x8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:598
#define SRA_4V(in0, in1, in2, in3, shift)
void ff_h264_intra_pred_vert_8x8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:632
#define PCKEV_ST_SB(in0, in1, pdst)
uint8_t
void ff_h264_intra_pred_dc_16x16_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:646
void ff_h264_intra_pred_dc_128_8x8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:687
void ff_h264_intra_predict_mad_cow_dc_l0t_8x8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:603
void ff_h264_intra_predict_mad_cow_dc_0l0_8x8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:621
#define CLIP_SH2_0_255(in0, in1)
void ff_h264_intra_pred_dc_left_16x16_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:669
static void intra_predict_vert_8x8_msa(uint8_t *src, uint8_t *dst, int32_t dst_stride)
Definition: h264pred_msa.c:24
static void intra_predict_mad_cow_dc_0l0_8x8_msa(uint8_t *src, int32_t stride)
Definition: h264pred_msa.c:561
#define SD
Definition: dvdsubdec.c:735
void ff_h264_intra_pred_dc_top_16x16_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:678
static void intra_predict_horiz_8x8_msa(uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride)
Definition: h264pred_msa.c:54
#define INTRA_PREDICT_VALDC_8X8_MSA(val)
Definition: h264pred_msa.c:209
void ff_h264_intra_predict_mad_cow_dc_0lt_8x8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:609
static void intra_predict_dc_16x16_msa(uint8_t *src_top, uint8_t *src_left, int32_t src_stride_left, uint8_t *dst, int32_t dst_stride, uint8_t is_above, uint8_t is_left)
Definition: h264pred_msa.c:154
void ff_h264_intra_predict_plane_8x8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:583
void ff_h264_intra_predict_dc_4blk_8x8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:588
int32_t
#define PCKEV_H2_SH(...)
static void intra_predict_mad_cow_dc_l0t_8x8_msa(uint8_t *src, int32_t stride)
Definition: h264pred_msa.c:468
#define ST_UB(...)
#define ST_UB4(...)
void ff_h264_intra_pred_horiz_8x8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:639
#define src1
Definition: h264pred.c:139
AVS_Value src
Definition: avisynth_c.h:482
void ff_h264_intra_predict_hor_dc_8x8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:593
static void intra_predict_plane_16x16_msa(uint8_t *src, int32_t stride)
Definition: h264pred_msa.c:315
static void intra_predict_mad_cow_dc_l00_8x8_msa(uint8_t *src, int32_t stride)
Definition: h264pred_msa.c:540
#define src0
Definition: h264pred.c:138
#define LD(psrc)
#define SD4(in0, in1, in2, in3, pdst, stride)
#define SW(val, pdst)
void ff_vp8_pred8x8_129_dc_8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:710
static void intra_predict_horiz_16x16_msa(uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride)
Definition: h264pred_msa.c:73
#define INSERT_D2_UB(...)
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
void ff_h264_intra_predict_mad_cow_dc_l00_8x8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:615
void ff_h264_intra_pred_vert_16x16_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:655
void ff_vp8_pred8x8_127_dc_8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:705
#define LD_UB(...)
static void intra_predict_vert_16x16_msa(uint8_t *src, uint8_t *dst, int32_t dst_stride)
Definition: h264pred_msa.c:40
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(constuint8_t *) pi-0x80)*(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(constint16_t *) pi >>8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t,*(constint16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(constint32_t *) pi >>24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t,*(constint32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(constfloat *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(constfloat *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(constfloat *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(constdouble *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(constdouble *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(constdouble *) pi *(1U<< 31))))#defineSET_CONV_FUNC_GROUP(ofmt, ifmt) staticvoidset_generic_function(AudioConvert *ac){}voidff_audio_convert_free(AudioConvert **ac){if(!*ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);}AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enumAVSampleFormatout_fmt, enumAVSampleFormatin_fmt, intchannels, intsample_rate, intapply_map){AudioConvert *ac;intin_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) returnNULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method!=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt)>2){ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc){av_free(ac);returnNULL;}returnac;}in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar){ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar?ac->channels:1;}elseif(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;elseac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);returnac;}intff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in){intuse_generic=1;intlen=in->nb_samples;intp;if(ac->dc){av_log(ac->avr, AV_LOG_TRACE,"%dsamples-audio_convert:%sto%s(dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));returnff_convert_dither(ac-> out
void ff_vp8_pred16x16_127_dc_8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:715
#define stride
static void intra_predict_mad_cow_dc_0lt_8x8_msa(uint8_t *src, int32_t stride)
Definition: h264pred_msa.c:502
void ff_vp8_pred16x16_129_dc_8_msa(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_msa.c:720