FFmpeg
dsp.h
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2024 Zhao Zhili
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef AVCODEC_AARCH64_H26X_DSP_H
22 #define AVCODEC_AARCH64_H26X_DSP_H
23 
24 #include <stddef.h>
25 #include <stdint.h>
26 
27 void ff_h26x_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src,
28  ptrdiff_t stride_dst, ptrdiff_t stride_src,
29  const int16_t *sao_offset_val, int sao_left_class,
30  int width, int height);
31 void ff_h26x_sao_band_filter_16x16_8_neon(uint8_t *_dst, const uint8_t *_src,
32  ptrdiff_t stride_dst, ptrdiff_t stride_src,
33  const int16_t *sao_offset_val, int sao_left_class,
34  int width, int height);
35 void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
36  const int16_t *sao_offset_val, int eo, int width, int height);
37 void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
38  const int16_t *sao_offset_val, int eo, int width, int height);
39 
40 void ff_vvc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
41  const int16_t *sao_offset_val, int eo, int width, int height);
42 void ff_vvc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
43  const int16_t *sao_offset_val, int eo, int width, int height);
44 
45 #define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
46  void ff_hevc_put_hevc_##fn##_h4_8_neon##ext args; \
47  void ff_hevc_put_hevc_##fn##_h6_8_neon##ext args; \
48  void ff_hevc_put_hevc_##fn##_h8_8_neon##ext args; \
49  void ff_hevc_put_hevc_##fn##_h12_8_neon##ext args; \
50  void ff_hevc_put_hevc_##fn##_h16_8_neon##ext args; \
51  void ff_hevc_put_hevc_##fn##_h32_8_neon##ext args;
52 
53 NEON8_FNPROTO_PARTIAL_6(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
54  intptr_t mx, intptr_t my, int width),)
55 
56 NEON8_FNPROTO_PARTIAL_6(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
57  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width),)
58 
59 NEON8_FNPROTO_PARTIAL_6(qpel_bi, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
60  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
61  mx, intptr_t my, int width),)
62 
63 #define NEON8_FNPROTO(fn, args, ext) \
64  void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
65  void ff_hevc_put_hevc_##fn##6_8_neon##ext args; \
66  void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
67  void ff_hevc_put_hevc_##fn##12_8_neon##ext args; \
68  void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
69  void ff_hevc_put_hevc_##fn##24_8_neon##ext args; \
70  void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \
71  void ff_hevc_put_hevc_##fn##48_8_neon##ext args; \
72  void ff_hevc_put_hevc_##fn##64_8_neon##ext args
73 
74 #define NEON8_FNPROTO_PARTIAL_4(fn, args, ext) \
75  void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
76  void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
77  void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
78  void ff_hevc_put_hevc_##fn##64_8_neon##ext args
79 
80 #define NEON8_FNPROTO_PARTIAL_5(fn, args, ext) \
81  void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
82  void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
83  void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
84  void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \
85  void ff_hevc_put_hevc_##fn##64_8_neon##ext args
86 
87 NEON8_FNPROTO(pel_pixels, (int16_t *dst,
88  const uint8_t *src, ptrdiff_t srcstride,
89  int height, intptr_t mx, intptr_t my, int width),);
90 
91 NEON8_FNPROTO(pel_bi_pixels, (uint8_t *dst, ptrdiff_t dststride,
92  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
93  int height, intptr_t mx, intptr_t my, int width),);
94 
95 NEON8_FNPROTO(pel_bi_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
96  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
97  int height, int denom, int wx0, int wx1,
98  int ox0, int ox1, intptr_t mx, intptr_t my, int width),);
99 
100 NEON8_FNPROTO(epel_bi_h, (uint8_t *dst, ptrdiff_t dststride,
101  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
102  int height, intptr_t mx, intptr_t my, int width),);
103 
104 NEON8_FNPROTO(epel_bi_v, (uint8_t *dst, ptrdiff_t dststride,
105  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
106  int height, intptr_t mx, intptr_t my, int width),);
107 
108 NEON8_FNPROTO(epel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
109  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
110  int height, intptr_t mx, intptr_t my, int width),);
111 
112 NEON8_FNPROTO(epel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
113  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
114  int height, intptr_t mx, intptr_t my, int width), _i8mm);
115 
116 NEON8_FNPROTO(epel_v, (int16_t *dst,
117  const uint8_t *src, ptrdiff_t srcstride,
118  int height, intptr_t mx, intptr_t my, int width),);
119 
120 NEON8_FNPROTO(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
121  const uint8_t *_src, ptrdiff_t _srcstride,
122  int height, intptr_t mx, intptr_t my, int width),);
123 
124 NEON8_FNPROTO(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
125  const uint8_t *_src, ptrdiff_t _srcstride,
126  int height, int denom, int wx, int ox,
127  intptr_t mx, intptr_t my, int width),);
128 
129 NEON8_FNPROTO(epel_uni_v, (uint8_t *dst, ptrdiff_t dststride,
130  const uint8_t *src, ptrdiff_t srcstride,
131  int height, intptr_t mx, intptr_t my, int width),);
132 
133 NEON8_FNPROTO(epel_uni_h, (uint8_t *dst, ptrdiff_t dststride,
134  const uint8_t *src, ptrdiff_t srcstride,
135  int height, intptr_t mx, intptr_t my, int width),);
136 
137 NEON8_FNPROTO(epel_uni_hv, (uint8_t *dst, ptrdiff_t _dststride,
138  const uint8_t *src, ptrdiff_t srcstride,
139  int height, intptr_t mx, intptr_t my, int width),);
140 
141 NEON8_FNPROTO(epel_uni_hv, (uint8_t *dst, ptrdiff_t _dststride,
142  const uint8_t *src, ptrdiff_t srcstride,
143  int height, intptr_t mx, intptr_t my, int width), _i8mm);
144 
145 NEON8_FNPROTO(epel_uni_w_v, (uint8_t *_dst, ptrdiff_t _dststride,
146  const uint8_t *_src, ptrdiff_t _srcstride,
147  int height, int denom, int wx, int ox,
148  intptr_t mx, intptr_t my, int width),);
149 
150 NEON8_FNPROTO(qpel_uni_w_v, (uint8_t *_dst, ptrdiff_t _dststride,
151  const uint8_t *_src, ptrdiff_t _srcstride,
152  int height, int denom, int wx, int ox,
153  intptr_t mx, intptr_t my, int width),);
154 
155 NEON8_FNPROTO(epel_h, (int16_t *dst,
156  const uint8_t *_src, ptrdiff_t _srcstride,
157  int height, intptr_t mx, intptr_t my, int width),);
158 
159 NEON8_FNPROTO(epel_hv, (int16_t *dst,
160  const uint8_t *src, ptrdiff_t srcstride,
161  int height, intptr_t mx, intptr_t my, int width), );
162 
163 NEON8_FNPROTO(epel_h, (int16_t *dst,
164  const uint8_t *_src, ptrdiff_t _srcstride,
165  int height, intptr_t mx, intptr_t my, int width), _i8mm);
166 
167 NEON8_FNPROTO(epel_hv, (int16_t *dst,
168  const uint8_t *src, ptrdiff_t srcstride,
169  int height, intptr_t mx, intptr_t my, int width), _i8mm);
170 
171 NEON8_FNPROTO(epel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
172  const uint8_t *_src, ptrdiff_t _srcstride,
173  int height, int denom, int wx, int ox,
174  intptr_t mx, intptr_t my, int width),);
175 
176 NEON8_FNPROTO(epel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
177  const uint8_t *_src, ptrdiff_t _srcstride,
178  int height, int denom, int wx, int ox,
179  intptr_t mx, intptr_t my, int width), _i8mm);
180 
181 NEON8_FNPROTO(qpel_h, (int16_t *dst,
182  const uint8_t *_src, ptrdiff_t _srcstride,
183  int height, intptr_t mx, intptr_t my, int width), _i8mm);
184 
185 NEON8_FNPROTO(qpel_v, (int16_t *dst,
186  const uint8_t *src, ptrdiff_t srcstride,
187  int height, intptr_t mx, intptr_t my, int width),);
188 
189 NEON8_FNPROTO(qpel_hv, (int16_t *dst,
190  const uint8_t *src, ptrdiff_t srcstride,
191  int height, intptr_t mx, intptr_t my, int width),);
192 
193 NEON8_FNPROTO(qpel_hv, (int16_t *dst,
194  const uint8_t *src, ptrdiff_t srcstride,
195  int height, intptr_t mx, intptr_t my, int width), _i8mm);
196 
197 NEON8_FNPROTO(qpel_uni_v, (uint8_t *dst, ptrdiff_t dststride,
198  const uint8_t *src, ptrdiff_t srcstride,
199  int height, intptr_t mx, intptr_t my, int width),);
200 
201 NEON8_FNPROTO(qpel_uni_hv, (uint8_t *dst, ptrdiff_t dststride,
202  const uint8_t *src, ptrdiff_t srcstride,
203  int height, intptr_t mx, intptr_t my, int width),);
204 
205 NEON8_FNPROTO(qpel_uni_hv, (uint8_t *dst, ptrdiff_t dststride,
206  const uint8_t *src, ptrdiff_t srcstride,
207  int height, intptr_t mx, intptr_t my, int width), _i8mm);
208 
209 NEON8_FNPROTO(qpel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
210  const uint8_t *_src, ptrdiff_t _srcstride,
211  int height, int denom, int wx, int ox,
212  intptr_t mx, intptr_t my, int width),);
213 
214 NEON8_FNPROTO(qpel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
215  const uint8_t *_src, ptrdiff_t _srcstride,
216  int height, int denom, int wx, int ox,
217  intptr_t mx, intptr_t my, int width), _i8mm);
218 
219 NEON8_FNPROTO(epel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
220  const uint8_t *_src, ptrdiff_t _srcstride,
221  int height, int denom, int wx, int ox,
222  intptr_t mx, intptr_t my, int width),);
223 
224 NEON8_FNPROTO(epel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
225  const uint8_t *_src, ptrdiff_t _srcstride,
226  int height, int denom, int wx, int ox,
227  intptr_t mx, intptr_t my, int width), _i8mm);
228 
229 NEON8_FNPROTO(qpel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
230  const uint8_t *_src, ptrdiff_t _srcstride,
231  int height, int denom, int wx, int ox,
232  intptr_t mx, intptr_t my, int width),);
233 
234 NEON8_FNPROTO(qpel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
235  const uint8_t *_src, ptrdiff_t _srcstride,
236  int height, int denom, int wx, int ox,
237  intptr_t mx, intptr_t my, int width), _i8mm);
238 
239 NEON8_FNPROTO(qpel_bi_v, (uint8_t *dst, ptrdiff_t dststride,
240  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
241  int height, intptr_t mx, intptr_t my, int width),);
242 
243 NEON8_FNPROTO(qpel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
244  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
245  int height, intptr_t mx, intptr_t my, int width),);
246 
247 NEON8_FNPROTO(qpel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
248  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
249  int height, intptr_t mx, intptr_t my, int width), _i8mm);
250 
251 #undef NEON8_FNPROTO_PARTIAL_4
252 #define NEON8_FNPROTO_PARTIAL_4(fn, args, ext) \
253  void ff_vvc_put_##fn##_h4_8_neon##ext args; \
254  void ff_vvc_put_##fn##_h8_8_neon##ext args; \
255  void ff_vvc_put_##fn##_h16_8_neon##ext args; \
256  void ff_vvc_put_##fn##_h32_8_neon##ext args;
257 
258 NEON8_FNPROTO_PARTIAL_4(qpel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
259  const int8_t *hf, const int8_t *vf, int width),)
260 
261 NEON8_FNPROTO_PARTIAL_4(qpel_uni, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
262  ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width),)
263 
264 NEON8_FNPROTO_PARTIAL_4(epel, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
265  const int8_t *hf, const int8_t *vf, int width),)
266 
267 #undef NEON8_FNPROTO_PARTIAL_6
268 #define NEON8_FNPROTO_PARTIAL_6(fn, args, ext) \
269  void ff_vvc_put_##fn##4_8_neon##ext args; \
270  void ff_vvc_put_##fn##8_8_neon##ext args; \
271  void ff_vvc_put_##fn##16_8_neon##ext args; \
272  void ff_vvc_put_##fn##32_8_neon##ext args; \
273  void ff_vvc_put_##fn##64_8_neon##ext args; \
274  void ff_vvc_put_##fn##128_8_neon##ext args
275 
276 NEON8_FNPROTO_PARTIAL_6(pel_pixels, (int16_t *dst,
277  const uint8_t *src, ptrdiff_t srcstride, int height,
278  const int8_t *hf, const int8_t *vf, int width),);
279 
280 NEON8_FNPROTO_PARTIAL_6(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
281  const uint8_t *_src, ptrdiff_t _srcstride, int height,
282  const int8_t *hf, const int8_t *vf, int width),);
283 
284 NEON8_FNPROTO_PARTIAL_6(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
285  const uint8_t *_src, ptrdiff_t _srcstride,
286  int height, int denom, int wx, int ox,
287  const int8_t *hf, const int8_t *vf, int width),);
288 
289 NEON8_FNPROTO_PARTIAL_6(qpel_h, (int16_t *dst,
290  const uint8_t *_src, ptrdiff_t _srcstride, int height,
291  const int8_t *hf, const int8_t *vf, int width), _i8mm);
292 
293 NEON8_FNPROTO_PARTIAL_6(epel_h, (int16_t *dst,
294  const uint8_t *_src, ptrdiff_t _srcstride, int height,
295  const int8_t *hf, const int8_t *vf, int width), _i8mm);
296 
297 void ff_vvc_put_qpel_v4_8_neon(int16_t *dst, const uint8_t *_src,
298  ptrdiff_t _srcstride, int height,
299  const int8_t *hf, const int8_t *vf, int width);
300 
301 void ff_vvc_put_qpel_v8_8_neon(int16_t *dst, const uint8_t *_src,
302  ptrdiff_t _srcstride, int height,
303  const int8_t *hf, const int8_t *vf, int width);
304 
305 NEON8_FNPROTO_PARTIAL_6(qpel_hv, (int16_t *dst,
306  const uint8_t *src, ptrdiff_t srcstride, int height,
307  const int8_t *hf, const int8_t *vf, int width),);
308 
309 NEON8_FNPROTO_PARTIAL_6(qpel_hv, (int16_t *dst,
310  const uint8_t *src, ptrdiff_t srcstride, int height,
311  const int8_t *hf, const int8_t *vf, int width), _i8mm);
312 
313 NEON8_FNPROTO_PARTIAL_6(epel_hv, (int16_t *dst,
314  const uint8_t *src, ptrdiff_t srcstride, int height,
315  const int8_t *hf, const int8_t *vf, int width),);
316 
317 NEON8_FNPROTO_PARTIAL_6(epel_hv, (int16_t *dst,
318  const uint8_t *src, ptrdiff_t srcstride, int height,
319  const int8_t *hf, const int8_t *vf, int width), _i8mm);
320 
321 #endif
_dst
uint8_t * _dst
Definition: dsp.h:56
ff_hevc_sao_edge_filter_16x16_8_neon
void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
src
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t const uint8_t * src
Definition: dsp.h:88
ff_vvc_put_qpel_v8_8_neon
void ff_vvc_put_qpel_v8_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width)
height
uint8_t ptrdiff_t const uint8_t ptrdiff_t int height
Definition: dsp.h:57
ff_hevc_sao_edge_filter_8x8_8_neon
void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
_src
uint8_t ptrdiff_t const uint8_t * _src
Definition: dsp.h:56
mx
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t mx
Definition: dsp.h:57
_srcstride
uint8_t ptrdiff_t const uint8_t ptrdiff_t _srcstride
Definition: dsp.h:57
ff_vvc_put_qpel_v4_8_neon
void ff_vvc_put_qpel_v4_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, const int8_t *hf, const int8_t *vf, int width)
ff_h26x_sao_band_filter_16x16_8_neon
void ff_h26x_sao_band_filter_16x16_8_neon(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height)
my
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t my
Definition: dsp.h:57
srcstride
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t const uint8_t ptrdiff_t srcstride
Definition: dsp.h:88
NEON8_FNPROTO
#define NEON8_FNPROTO(fn, args, ext)
width
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int width
Definition: dsp.h:57
_dststride
uint8_t ptrdiff_t _dststride
Definition: dsp.h:56
ff_vvc_sao_edge_filter_8x8_8_neon
void ff_vvc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
ff_h26x_sao_band_filter_8x8_8_neon
void ff_h26x_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height)
NEON8_FNPROTO_PARTIAL_4
#define NEON8_FNPROTO_PARTIAL_4(fn, args, ext)
Definition: dsp.h:252
src2
const pixel * src2
Definition: h264pred_template.c:421
hf
uint8_t ptrdiff_t const uint8_t ptrdiff_t int const int8_t * hf
Definition: dsp.h:262
NEON8_FNPROTO_PARTIAL_6
#define NEON8_FNPROTO_PARTIAL_6(fn, args, ext)
Definition: dsp.h:45
ff_vvc_sao_edge_filter_16x16_8_neon
void ff_vvc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
vf
uint8_t ptrdiff_t const uint8_t ptrdiff_t int const int8_t const int8_t * vf
Definition: dsp.h:262