FFmpeg
h264idct_loongarch.c
Go to the documentation of this file.
1 /*
2  * Loongson LSX/LASX optimized h264idct
3  *
4  * Copyright (c) 2023 Loongson Technology Corporation Limited
5  * Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
6  * Xiwei Gu <guxiwei-hf@loongson.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "h264dsp_loongarch.h"
27 
28 void ff_h264_idct_add16_8_lsx(uint8_t *dst, const int32_t *blk_offset,
29  int16_t *block, int32_t dst_stride,
30  const uint8_t nzc[15 * 8])
31 {
32  int32_t i;
33 
34  for (i = 0; i < 16; i++) {
35  int32_t nnz = nzc[scan8[i]];
36 
37  if (nnz == 1 && ((dctcoef *) block)[i * 16]) {
38  ff_h264_idct_dc_add_8_lsx(dst + blk_offset[i],
39  block + i * 16 * sizeof(pixel),
40  dst_stride);
41  } else if (nnz) {
42  ff_h264_idct_add_8_lsx(dst + blk_offset[i],
43  block + i * 16 * sizeof(pixel),
44  dst_stride);
45  }
46  }
47 }
48 
49 void ff_h264_idct8_add4_8_lsx(uint8_t *dst, const int32_t *blk_offset,
50  int16_t *block, int32_t dst_stride,
51  const uint8_t nzc[15 * 8])
52 {
53  int32_t cnt;
54 
55  for (cnt = 0; cnt < 16; cnt += 4) {
56  int32_t nnz = nzc[scan8[cnt]];
57 
58  if (nnz == 1 && ((dctcoef *) block)[cnt * 16]) {
59  ff_h264_idct8_dc_add_8_lsx(dst + blk_offset[cnt],
60  block + cnt * 16 * sizeof(pixel),
61  dst_stride);
62  } else if (nnz) {
63  ff_h264_idct8_add_8_lsx(dst + blk_offset[cnt],
64  block + cnt * 16 * sizeof(pixel),
65  dst_stride);
66  }
67  }
68 }
69 
70 #if HAVE_LASX
71 void ff_h264_idct8_add4_8_lasx(uint8_t *dst, const int32_t *blk_offset,
72  int16_t *block, int32_t dst_stride,
73  const uint8_t nzc[15 * 8])
74 {
75  int32_t cnt;
76 
77  for (cnt = 0; cnt < 16; cnt += 4) {
78  int32_t nnz = nzc[scan8[cnt]];
79 
80  if (nnz == 1 && ((dctcoef *) block)[cnt * 16]) {
81  ff_h264_idct8_dc_add_8_lasx(dst + blk_offset[cnt],
82  block + cnt * 16 * sizeof(pixel),
83  dst_stride);
84  } else if (nnz) {
85  ff_h264_idct8_add_8_lasx(dst + blk_offset[cnt],
86  block + cnt * 16 * sizeof(pixel),
87  dst_stride);
88  }
89  }
90 }
91 #endif // #if HAVE_LASX
92 
93 void ff_h264_idct_add8_8_lsx(uint8_t **dst, const int32_t *blk_offset,
94  int16_t *block, int32_t dst_stride,
95  const uint8_t nzc[15 * 8])
96 {
97  int32_t i;
98 
99  for (i = 16; i < 20; i++) {
100  if (nzc[scan8[i]])
101  ff_h264_idct_add_8_lsx(dst[0] + blk_offset[i],
102  block + i * 16 * sizeof(pixel),
103  dst_stride);
104  else if (((dctcoef *) block)[i * 16])
105  ff_h264_idct_dc_add_8_lsx(dst[0] + blk_offset[i],
106  block + i * 16 * sizeof(pixel),
107  dst_stride);
108  }
109  for (i = 32; i < 36; i++) {
110  if (nzc[scan8[i]])
111  ff_h264_idct_add_8_lsx(dst[1] + blk_offset[i],
112  block + i * 16 * sizeof(pixel),
113  dst_stride);
114  else if (((dctcoef *) block)[i * 16])
115  ff_h264_idct_dc_add_8_lsx(dst[1] + blk_offset[i],
116  block + i * 16 * sizeof(pixel),
117  dst_stride);
118  }
119 }
120 
121 void ff_h264_idct_add8_422_8_lsx(uint8_t **dst, const int32_t *blk_offset,
122  int16_t *block, int32_t dst_stride,
123  const uint8_t nzc[15 * 8])
124 {
125  int32_t i;
126 
127  for (i = 16; i < 20; i++) {
128  if (nzc[scan8[i]])
129  ff_h264_idct_add_8_lsx(dst[0] + blk_offset[i],
130  block + i * 16 * sizeof(pixel),
131  dst_stride);
132  else if (((dctcoef *) block)[i * 16])
133  ff_h264_idct_dc_add_8_lsx(dst[0] + blk_offset[i],
134  block + i * 16 * sizeof(pixel),
135  dst_stride);
136  }
137  for (i = 20; i < 24; i++) {
138  if (nzc[scan8[i + 4]])
139  ff_h264_idct_add_8_lsx(dst[0] + blk_offset[i + 4],
140  block + i * 16 * sizeof(pixel),
141  dst_stride);
142  else if (((dctcoef *) block)[i * 16])
143  ff_h264_idct_dc_add_8_lsx(dst[0] + blk_offset[i + 4],
144  block + i * 16 * sizeof(pixel),
145  dst_stride);
146  }
147  for (i = 32; i < 36; i++) {
148  if (nzc[scan8[i]])
149  ff_h264_idct_add_8_lsx(dst[1] + blk_offset[i],
150  block + i * 16 * sizeof(pixel),
151  dst_stride);
152  else if (((dctcoef *) block)[i * 16])
153  ff_h264_idct_dc_add_8_lsx(dst[1] + blk_offset[i],
154  block + i * 16 * sizeof(pixel),
155  dst_stride);
156  }
157  for (i = 36; i < 40; i++) {
158  if (nzc[scan8[i + 4]])
159  ff_h264_idct_add_8_lsx(dst[1] + blk_offset[i + 4],
160  block + i * 16 * sizeof(pixel),
161  dst_stride);
162  else if (((dctcoef *) block)[i * 16])
163  ff_h264_idct_dc_add_8_lsx(dst[1] + blk_offset[i + 4],
164  block + i * 16 * sizeof(pixel),
165  dst_stride);
166  }
167 }
168 
169 void ff_h264_idct_add16_intra_8_lsx(uint8_t *dst, const int32_t *blk_offset,
170  int16_t *block, int32_t dst_stride,
171  const uint8_t nzc[15 * 8])
172 {
173  int32_t i;
174 
175  for (i = 0; i < 16; i++) {
176  if (nzc[scan8[i]])
177  ff_h264_idct_add_8_lsx(dst + blk_offset[i],
178  block + i * 16 * sizeof(pixel), dst_stride);
179  else if (((dctcoef *) block)[i * 16])
180  ff_h264_idct_dc_add_8_lsx(dst + blk_offset[i],
181  block + i * 16 * sizeof(pixel),
182  dst_stride);
183  }
184 }
ff_h264_idct_add16_8_lsx
void ff_h264_idct_add16_8_lsx(uint8_t *dst, const int32_t *blk_offset, int16_t *block, int32_t dst_stride, const uint8_t nzc[15 *8])
Definition: h264idct_loongarch.c:28
ff_h264_idct8_add4_8_lsx
void ff_h264_idct8_add4_8_lsx(uint8_t *dst, const int32_t *blk_offset, int16_t *block, int32_t dst_stride, const uint8_t nzc[15 *8])
Definition: h264idct_loongarch.c:49
ff_h264_idct8_add_8_lsx
void ff_h264_idct8_add_8_lsx(uint8_t *dst, int16_t *src, int dst_stride)
ff_h264_idct_add_8_lsx
void ff_h264_idct_add_8_lsx(uint8_t *dst, int16_t *src, int dst_stride)
ff_h264_idct_dc_add_8_lsx
void ff_h264_idct_dc_add_8_lsx(uint8_t *dst, int16_t *src, int dst_stride)
dctcoef
#define dctcoef
Definition: bit_depth_template.c:84
scan8
static const uint8_t scan8[16 *3+3]
Definition: h264_parse.h:40
ff_h264_idct8_dc_add_8_lsx
void ff_h264_idct8_dc_add_8_lsx(uint8_t *dst, int16_t *src, int dst_stride)
ff_h264_idct_add8_8_lsx
void ff_h264_idct_add8_8_lsx(uint8_t **dst, const int32_t *blk_offset, int16_t *block, int32_t dst_stride, const uint8_t nzc[15 *8])
Definition: h264idct_loongarch.c:93
h264dsp_loongarch.h
ff_h264_idct_add8_422_8_lsx
void ff_h264_idct_add8_422_8_lsx(uint8_t **dst, const int32_t *blk_offset, int16_t *block, int32_t dst_stride, const uint8_t nzc[15 *8])
Definition: h264idct_loongarch.c:121
pixel
uint8_t pixel
Definition: tiny_ssim.c:41
bit_depth_template.c
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:245
int32_t
int32_t
Definition: audioconvert.c:56
ff_h264_idct_add16_intra_8_lsx
void ff_h264_idct_add16_intra_8_lsx(uint8_t *dst, const int32_t *blk_offset, int16_t *block, int32_t dst_stride, const uint8_t nzc[15 *8])
Definition: h264idct_loongarch.c:169
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207