FFmpeg
hevcdsp_init_aarch64.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020 Reimar Döffinger
3  * Copyright (c) 2023 xu fulong <839789740@qq.com>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include <stdint.h>
23 
24 #include "libavutil/attributes.h"
25 #include "libavutil/cpu.h"
26 #include "libavutil/aarch64/cpu.h"
27 #include "libavcodec/hevcdsp.h"
28 
29 void ff_hevc_v_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride,
30  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
31 void ff_hevc_v_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride,
32  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
33 void ff_hevc_v_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride,
34  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
35 void ff_hevc_h_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride,
36  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
37 void ff_hevc_h_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride,
38  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
39 void ff_hevc_h_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride,
40  const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
41 void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, const int16_t *coeffs,
42  ptrdiff_t stride);
43 void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, const int16_t *coeffs,
44  ptrdiff_t stride);
45 void ff_hevc_add_residual_4x4_12_neon(uint8_t *_dst, const int16_t *coeffs,
46  ptrdiff_t stride);
47 void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, const int16_t *coeffs,
48  ptrdiff_t stride);
49 void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, const int16_t *coeffs,
50  ptrdiff_t stride);
51 void ff_hevc_add_residual_8x8_12_neon(uint8_t *_dst, const int16_t *coeffs,
52  ptrdiff_t stride);
53 void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, const int16_t *coeffs,
54  ptrdiff_t stride);
55 void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, const int16_t *coeffs,
56  ptrdiff_t stride);
57 void ff_hevc_add_residual_16x16_12_neon(uint8_t *_dst, const int16_t *coeffs,
58  ptrdiff_t stride);
59 void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, const int16_t *coeffs,
60  ptrdiff_t stride);
61 void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, const int16_t *coeffs,
62  ptrdiff_t stride);
63 void ff_hevc_add_residual_32x32_12_neon(uint8_t *_dst, const int16_t *coeffs,
64  ptrdiff_t stride);
65 void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
66 void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
67 void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
68 void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
69 void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
70 void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
71 void ff_hevc_idct_32x32_8_neon(int16_t *coeffs, int col_limit);
72 void ff_hevc_idct_32x32_10_neon(int16_t *coeffs, int col_limit);
73 void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
74 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
75 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
76 void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
77 void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
78 void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
79 void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
80 void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
81 void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs);
82 void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src,
83  ptrdiff_t stride_dst, ptrdiff_t stride_src,
84  const int16_t *sao_offset_val, int sao_left_class,
85  int width, int height);
86 void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
87  const int16_t *sao_offset_val, int eo, int width, int height);
88 void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst,
89  const int16_t *sao_offset_val, int eo, int width, int height);
90 void ff_hevc_put_hevc_qpel_h4_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
91  intptr_t mx, intptr_t my, int width);
92 void ff_hevc_put_hevc_qpel_h6_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
93  intptr_t mx, intptr_t my, int width);
94 void ff_hevc_put_hevc_qpel_h8_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
95  intptr_t mx, intptr_t my, int width);
96 void ff_hevc_put_hevc_qpel_h12_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
97  intptr_t mx, intptr_t my, int width);
98 void ff_hevc_put_hevc_qpel_h16_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height,
99  intptr_t mx, intptr_t my, int width);
100 void ff_hevc_put_hevc_qpel_uni_h4_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
101  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,
102  int width);
103 void ff_hevc_put_hevc_qpel_uni_h6_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
104  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,
105  int width);
106 void ff_hevc_put_hevc_qpel_uni_h8_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
107  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my,
108  int width);
109 void ff_hevc_put_hevc_qpel_uni_h12_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
110  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t
111  my, int width);
112 void ff_hevc_put_hevc_qpel_uni_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
113  ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t
114  my, int width);
115 void ff_hevc_put_hevc_qpel_bi_h4_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
116  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
117  mx, intptr_t my, int width);
118 void ff_hevc_put_hevc_qpel_bi_h6_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
119  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
120  mx, intptr_t my, int width);
121 void ff_hevc_put_hevc_qpel_bi_h8_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
122  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
123  mx, intptr_t my, int width);
124 void ff_hevc_put_hevc_qpel_bi_h12_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
125  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
126  mx, intptr_t my, int width);
127 void ff_hevc_put_hevc_qpel_bi_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src,
128  ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t
129  mx, intptr_t my, int width);
130 
131 #define NEON8_FNPROTO(fn, args, ext) \
132  void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
133  void ff_hevc_put_hevc_##fn##6_8_neon##ext args; \
134  void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
135  void ff_hevc_put_hevc_##fn##12_8_neon##ext args; \
136  void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
137  void ff_hevc_put_hevc_##fn##24_8_neon##ext args; \
138  void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \
139  void ff_hevc_put_hevc_##fn##48_8_neon##ext args; \
140  void ff_hevc_put_hevc_##fn##64_8_neon##ext args
141 
142 #define NEON8_FNPROTO_PARTIAL_4(fn, args, ext) \
143  void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
144  void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
145  void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
146  void ff_hevc_put_hevc_##fn##64_8_neon##ext args
147 
148 #define NEON8_FNPROTO_PARTIAL_5(fn, args, ext) \
149  void ff_hevc_put_hevc_##fn##4_8_neon##ext args; \
150  void ff_hevc_put_hevc_##fn##8_8_neon##ext args; \
151  void ff_hevc_put_hevc_##fn##16_8_neon##ext args; \
152  void ff_hevc_put_hevc_##fn##32_8_neon##ext args; \
153  void ff_hevc_put_hevc_##fn##64_8_neon##ext args
154 
155 NEON8_FNPROTO(pel_pixels, (int16_t *dst,
156  const uint8_t *src, ptrdiff_t srcstride,
157  int height, intptr_t mx, intptr_t my, int width),);
158 
159 NEON8_FNPROTO(pel_bi_pixels, (uint8_t *dst, ptrdiff_t dststride,
160  const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2,
161  int height, intptr_t mx, intptr_t my, int width),);
162 
163 NEON8_FNPROTO(epel_bi_h, (uint8_t *dst, ptrdiff_t dststride,
164  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
165  int height, intptr_t mx, intptr_t my, int width),);
166 
167 NEON8_FNPROTO(epel_bi_v, (uint8_t *dst, ptrdiff_t dststride,
168  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
169  int height, intptr_t mx, intptr_t my, int width),);
170 
171 NEON8_FNPROTO(epel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
172  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
173  int height, intptr_t mx, intptr_t my, int width), _i8mm);
174 
175 NEON8_FNPROTO(epel_v, (int16_t *dst,
176  const uint8_t *src, ptrdiff_t srcstride,
177  int height, intptr_t mx, intptr_t my, int width),);
178 
179 NEON8_FNPROTO(pel_uni_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
180  const uint8_t *_src, ptrdiff_t _srcstride,
181  int height, intptr_t mx, intptr_t my, int width),);
182 
183 NEON8_FNPROTO(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride,
184  const uint8_t *_src, ptrdiff_t _srcstride,
185  int height, int denom, int wx, int ox,
186  intptr_t mx, intptr_t my, int width),);
187 
188 NEON8_FNPROTO(epel_uni_v, (uint8_t *dst, ptrdiff_t dststride,
189  const uint8_t *src, ptrdiff_t srcstride,
190  int height, intptr_t mx, intptr_t my, int width),);
191 
192 NEON8_FNPROTO(epel_uni_hv, (uint8_t *dst, ptrdiff_t _dststride,
193  const uint8_t *src, ptrdiff_t srcstride,
194  int height, intptr_t mx, intptr_t my, int width), _i8mm);
195 
196 NEON8_FNPROTO(epel_uni_w_v, (uint8_t *_dst, ptrdiff_t _dststride,
197  const uint8_t *_src, ptrdiff_t _srcstride,
198  int height, int denom, int wx, int ox,
199  intptr_t mx, intptr_t my, int width),);
200 
201 NEON8_FNPROTO_PARTIAL_4(qpel_uni_w_v, (uint8_t *_dst, ptrdiff_t _dststride,
202  const uint8_t *_src, ptrdiff_t _srcstride,
203  int height, int denom, int wx, int ox,
204  intptr_t mx, intptr_t my, int width),);
205 
206 NEON8_FNPROTO(epel_h, (int16_t *dst,
207  const uint8_t *_src, ptrdiff_t _srcstride,
208  int height, intptr_t mx, intptr_t my, int width), _i8mm);
209 
210 NEON8_FNPROTO(epel_hv, (int16_t *dst,
211  const uint8_t *src, ptrdiff_t srcstride,
212  int height, intptr_t mx, intptr_t my, int width), _i8mm);
213 
214 NEON8_FNPROTO(epel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
215  const uint8_t *_src, ptrdiff_t _srcstride,
216  int height, int denom, int wx, int ox,
217  intptr_t mx, intptr_t my, int width), _i8mm);
218 
219 NEON8_FNPROTO(qpel_h, (int16_t *dst,
220  const uint8_t *_src, ptrdiff_t _srcstride,
221  int height, intptr_t mx, intptr_t my, int width), _i8mm);
222 
223 NEON8_FNPROTO(qpel_v, (int16_t *dst,
224  const uint8_t *src, ptrdiff_t srcstride,
225  int height, intptr_t mx, intptr_t my, int width),);
226 
227 NEON8_FNPROTO(qpel_hv, (int16_t *dst,
228  const uint8_t *src, ptrdiff_t srcstride,
229  int height, intptr_t mx, intptr_t my, int width), _i8mm);
230 
231 NEON8_FNPROTO(qpel_uni_v, (uint8_t *dst, ptrdiff_t dststride,
232  const uint8_t *src, ptrdiff_t srcstride,
233  int height, intptr_t mx, intptr_t my, int width),);
234 
235 NEON8_FNPROTO(qpel_uni_hv, (uint8_t *dst, ptrdiff_t dststride,
236  const uint8_t *src, ptrdiff_t srcstride,
237  int height, intptr_t mx, intptr_t my, int width), _i8mm);
238 
239 NEON8_FNPROTO(qpel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
240  const uint8_t *_src, ptrdiff_t _srcstride,
241  int height, int denom, int wx, int ox,
242  intptr_t mx, intptr_t my, int width), _i8mm);
243 
244 NEON8_FNPROTO(epel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
245  const uint8_t *_src, ptrdiff_t _srcstride,
246  int height, int denom, int wx, int ox,
247  intptr_t mx, intptr_t my, int width), _i8mm);
248 
249 NEON8_FNPROTO_PARTIAL_5(qpel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
250  const uint8_t *_src, ptrdiff_t _srcstride,
251  int height, int denom, int wx, int ox,
252  intptr_t mx, intptr_t my, int width), _i8mm);
253 
254 NEON8_FNPROTO(qpel_bi_v, (uint8_t *dst, ptrdiff_t dststride,
255  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
256  int height, intptr_t mx, intptr_t my, int width),);
257 
258 NEON8_FNPROTO(qpel_bi_hv, (uint8_t *dst, ptrdiff_t dststride,
259  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
260  int height, intptr_t mx, intptr_t my, int width), _i8mm);
261 
262 #define NEON8_FNASSIGN(member, v, h, fn, ext) \
263  member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
264  member[2][v][h] = ff_hevc_put_hevc_##fn##6_8_neon##ext; \
265  member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
266  member[4][v][h] = ff_hevc_put_hevc_##fn##12_8_neon##ext; \
267  member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
268  member[6][v][h] = ff_hevc_put_hevc_##fn##24_8_neon##ext; \
269  member[7][v][h] = ff_hevc_put_hevc_##fn##32_8_neon##ext; \
270  member[8][v][h] = ff_hevc_put_hevc_##fn##48_8_neon##ext; \
271  member[9][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext;
272 
273 #define NEON8_FNASSIGN_PARTIAL_4(member, v, h, fn, ext) \
274  member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
275  member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
276  member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
277  member[7][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext; \
278  member[8][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext; \
279  member[9][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext;
280 
281 #define NEON8_FNASSIGN_PARTIAL_5(member, v, h, fn, ext) \
282  member[1][v][h] = ff_hevc_put_hevc_##fn##4_8_neon##ext; \
283  member[3][v][h] = ff_hevc_put_hevc_##fn##8_8_neon##ext; \
284  member[5][v][h] = ff_hevc_put_hevc_##fn##16_8_neon##ext; \
285  member[7][v][h] = ff_hevc_put_hevc_##fn##32_8_neon##ext; \
286  member[9][v][h] = ff_hevc_put_hevc_##fn##64_8_neon##ext;
287 
289 {
290  int cpu_flags = av_get_cpu_flags();
291  if (!have_neon(cpu_flags)) return;
292 
293  if (bit_depth == 8) {
294  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_neon;
295  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_neon;
296  c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon;
297  c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon;
298  c->add_residual[2] = ff_hevc_add_residual_16x16_8_neon;
299  c->add_residual[3] = ff_hevc_add_residual_32x32_8_neon;
300  c->idct[0] = ff_hevc_idct_4x4_8_neon;
301  c->idct[1] = ff_hevc_idct_8x8_8_neon;
302  c->idct[2] = ff_hevc_idct_16x16_8_neon;
303  c->idct[3] = ff_hevc_idct_32x32_8_neon;
304  c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon;
305  c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
306  c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
307  c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
308  c->transform_4x4_luma = ff_hevc_transform_luma_4x4_neon_8;
309  c->sao_band_filter[0] =
310  c->sao_band_filter[1] =
311  c->sao_band_filter[2] =
312  c->sao_band_filter[3] =
313  c->sao_band_filter[4] = ff_hevc_sao_band_filter_8x8_8_neon;
314  c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8x8_8_neon;
315  c->sao_edge_filter[1] =
316  c->sao_edge_filter[2] =
317  c->sao_edge_filter[3] =
318  c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_16x16_8_neon;
319  c->put_hevc_qpel[1][0][1] = ff_hevc_put_hevc_qpel_h4_8_neon;
320  c->put_hevc_qpel[2][0][1] = ff_hevc_put_hevc_qpel_h6_8_neon;
321  c->put_hevc_qpel[3][0][1] = ff_hevc_put_hevc_qpel_h8_8_neon;
322  c->put_hevc_qpel[4][0][1] =
323  c->put_hevc_qpel[6][0][1] = ff_hevc_put_hevc_qpel_h12_8_neon;
324  c->put_hevc_qpel[5][0][1] =
325  c->put_hevc_qpel[7][0][1] =
326  c->put_hevc_qpel[8][0][1] =
327  c->put_hevc_qpel[9][0][1] = ff_hevc_put_hevc_qpel_h16_8_neon;
328  c->put_hevc_qpel_uni[1][0][1] = ff_hevc_put_hevc_qpel_uni_h4_8_neon;
329  c->put_hevc_qpel_uni[2][0][1] = ff_hevc_put_hevc_qpel_uni_h6_8_neon;
330  c->put_hevc_qpel_uni[3][0][1] = ff_hevc_put_hevc_qpel_uni_h8_8_neon;
331  c->put_hevc_qpel_uni[4][0][1] =
332  c->put_hevc_qpel_uni[6][0][1] = ff_hevc_put_hevc_qpel_uni_h12_8_neon;
333  c->put_hevc_qpel_uni[5][0][1] =
334  c->put_hevc_qpel_uni[7][0][1] =
335  c->put_hevc_qpel_uni[8][0][1] =
336  c->put_hevc_qpel_uni[9][0][1] = ff_hevc_put_hevc_qpel_uni_h16_8_neon;
337  c->put_hevc_qpel_bi[1][0][1] = ff_hevc_put_hevc_qpel_bi_h4_8_neon;
338  c->put_hevc_qpel_bi[2][0][1] = ff_hevc_put_hevc_qpel_bi_h6_8_neon;
339  c->put_hevc_qpel_bi[3][0][1] = ff_hevc_put_hevc_qpel_bi_h8_8_neon;
340  c->put_hevc_qpel_bi[4][0][1] =
341  c->put_hevc_qpel_bi[6][0][1] = ff_hevc_put_hevc_qpel_bi_h12_8_neon;
342  c->put_hevc_qpel_bi[5][0][1] =
343  c->put_hevc_qpel_bi[7][0][1] =
344  c->put_hevc_qpel_bi[8][0][1] =
345  c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_qpel_bi_h16_8_neon;
346 
347  NEON8_FNASSIGN(c->put_hevc_epel, 0, 0, pel_pixels,);
348  NEON8_FNASSIGN(c->put_hevc_epel, 1, 0, epel_v,);
349  NEON8_FNASSIGN(c->put_hevc_qpel, 0, 0, pel_pixels,);
350  NEON8_FNASSIGN(c->put_hevc_qpel, 1, 0, qpel_v,);
351  NEON8_FNASSIGN(c->put_hevc_epel_bi, 0, 0, pel_bi_pixels,);
352  NEON8_FNASSIGN(c->put_hevc_epel_bi, 0, 1, epel_bi_h,);
353  NEON8_FNASSIGN(c->put_hevc_epel_bi, 1, 0, epel_bi_v,);
354  NEON8_FNASSIGN(c->put_hevc_qpel_bi, 0, 0, pel_bi_pixels,);
355  NEON8_FNASSIGN(c->put_hevc_qpel_bi, 1, 0, qpel_bi_v,);
356  NEON8_FNASSIGN(c->put_hevc_epel_uni, 0, 0, pel_uni_pixels,);
357  NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 0, epel_uni_v,);
358  NEON8_FNASSIGN(c->put_hevc_qpel_uni, 0, 0, pel_uni_pixels,);
359  NEON8_FNASSIGN(c->put_hevc_qpel_uni, 1, 0, qpel_uni_v,);
360  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 0, 0, pel_uni_w_pixels,);
361  NEON8_FNASSIGN(c->put_hevc_qpel_uni_w, 0, 0, pel_uni_w_pixels,);
362  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 1, 0, epel_uni_w_v,);
363  NEON8_FNASSIGN_PARTIAL_4(c->put_hevc_qpel_uni_w, 1, 0, qpel_uni_w_v,);
364 
365  if (have_i8mm(cpu_flags)) {
366  NEON8_FNASSIGN(c->put_hevc_epel, 0, 1, epel_h, _i8mm);
367  NEON8_FNASSIGN(c->put_hevc_epel, 1, 1, epel_hv, _i8mm);
368  NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 1, epel_uni_hv, _i8mm);
369  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 0, 1, epel_uni_w_h ,_i8mm);
370  NEON8_FNASSIGN(c->put_hevc_epel_bi, 1, 1, epel_bi_hv, _i8mm);
371  NEON8_FNASSIGN(c->put_hevc_qpel, 0, 1, qpel_h, _i8mm);
372  NEON8_FNASSIGN(c->put_hevc_qpel, 1, 1, qpel_hv, _i8mm);
373  NEON8_FNASSIGN(c->put_hevc_qpel_uni, 1, 1, qpel_uni_hv, _i8mm);
374  NEON8_FNASSIGN(c->put_hevc_qpel_uni_w, 0, 1, qpel_uni_w_h, _i8mm);
375  NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 1, 1, epel_uni_w_hv, _i8mm);
376  NEON8_FNASSIGN_PARTIAL_5(c->put_hevc_qpel_uni_w, 1, 1, qpel_uni_w_hv, _i8mm);
377  NEON8_FNASSIGN(c->put_hevc_qpel_bi, 1, 1, qpel_bi_hv, _i8mm);
378  }
379 
380  }
381  if (bit_depth == 10) {
382  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_neon;
383  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_neon;
384  c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
385  c->add_residual[1] = ff_hevc_add_residual_8x8_10_neon;
386  c->add_residual[2] = ff_hevc_add_residual_16x16_10_neon;
387  c->add_residual[3] = ff_hevc_add_residual_32x32_10_neon;
388  c->idct[0] = ff_hevc_idct_4x4_10_neon;
389  c->idct[1] = ff_hevc_idct_8x8_10_neon;
390  c->idct[2] = ff_hevc_idct_16x16_10_neon;
391  c->idct[3] = ff_hevc_idct_32x32_10_neon;
392  c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_neon;
393  c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_neon;
394  c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_neon;
395  c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_neon;
396  }
397  if (bit_depth == 12) {
398  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_neon;
399  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_neon;
400  c->add_residual[0] = ff_hevc_add_residual_4x4_12_neon;
401  c->add_residual[1] = ff_hevc_add_residual_8x8_12_neon;
402  c->add_residual[2] = ff_hevc_add_residual_16x16_12_neon;
403  c->add_residual[3] = ff_hevc_add_residual_32x32_12_neon;
404  }
405 }
NEON8_FNPROTO_PARTIAL_5
#define NEON8_FNPROTO_PARTIAL_5(fn, args, ext)
Definition: hevcdsp_init_aarch64.c:148
ff_hevc_v_loop_filter_chroma_10_neon
void ff_hevc_v_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
NEON8_FNASSIGN
#define NEON8_FNASSIGN(member, v, h, fn, ext)
Definition: hevcdsp_init_aarch64.c:262
ff_hevc_idct_4x4_dc_8_neon
void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs)
ff_hevc_put_hevc_qpel_h4_8_neon
void ff_hevc_put_hevc_qpel_h4_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_bi_h6_8_neon
void ff_hevc_put_hevc_qpel_bi_h6_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_transform_luma_4x4_neon_8
void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs)
ff_hevc_sao_edge_filter_8x8_8_neon
void ff_hevc_sao_edge_filter_8x8_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
_stride
ptrdiff_t _stride
Definition: h264pred_template.c:411
ff_hevc_idct_16x16_8_neon
void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit)
ff_hevc_put_hevc_qpel_bi_h12_8_neon
void ff_hevc_put_hevc_qpel_bi_h12_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_h8_8_neon
void ff_hevc_put_hevc_qpel_h8_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:103
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:245
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:52
ff_hevc_add_residual_16x16_10_neon
void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_32x32_8_neon
void ff_hevc_idct_32x32_8_neon(int16_t *coeffs, int col_limit)
ff_hevc_add_residual_4x4_10_neon
void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_8x8_8_neon
void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit)
ff_hevc_idct_16x16_dc_10_neon
void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs)
av_cold
#define av_cold
Definition: attributes.h:90
width
#define width
ff_hevc_idct_32x32_10_neon
void ff_hevc_idct_32x32_10_neon(int16_t *coeffs, int col_limit)
ff_hevc_add_residual_32x32_12_neon
void ff_hevc_add_residual_32x32_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_put_hevc_qpel_uni_h8_8_neon
void ff_hevc_put_hevc_qpel_uni_h8_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_uni_h12_8_neon
void ff_hevc_put_hevc_qpel_uni_h12_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_bi_h8_8_neon
void ff_hevc_put_hevc_qpel_bi_h8_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
hevcdsp.h
NEON8_FNASSIGN_PARTIAL_5
#define NEON8_FNASSIGN_PARTIAL_5(member, v, h, fn, ext)
Definition: hevcdsp_init_aarch64.c:281
ff_hevc_put_hevc_qpel_bi_h4_8_neon
void ff_hevc_put_hevc_qpel_bi_h4_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_idct_16x16_dc_8_neon
void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs)
ff_hevc_idct_32x32_dc_10_neon
void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
have_i8mm
#define have_i8mm(flags)
Definition: cpu.h:29
ff_hevc_put_hevc_qpel_uni_h4_8_neon
void ff_hevc_put_hevc_qpel_uni_h4_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_sao_band_filter_8x8_8_neon
void ff_hevc_sao_band_filter_8x8_8_neon(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height)
ff_hevc_add_residual_8x8_8_neon
void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_v_loop_filter_chroma_12_neon
void ff_hevc_v_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
cpu.h
ff_hevc_add_residual_16x16_12_neon
void ff_hevc_add_residual_16x16_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_4x4_10_neon
void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit)
ff_hevc_add_residual_8x8_12_neon
void ff_hevc_add_residual_8x8_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_put_hevc_qpel_h6_8_neon
void ff_hevc_put_hevc_qpel_h6_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_idct_4x4_dc_10_neon
void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs)
have_neon
#define have_neon(flags)
Definition: cpu.h:26
ff_hevc_h_loop_filter_chroma_12_neon
void ff_hevc_h_loop_filter_chroma_12_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
NEON8_FNPROTO
#define NEON8_FNPROTO(fn, args, ext)
Definition: hevcdsp_init_aarch64.c:131
height
#define height
ff_hevc_idct_8x8_10_neon
void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit)
HEVCDSPContext
Definition: hevcdsp.h:47
attributes.h
ff_hevc_idct_4x4_8_neon
void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit)
ff_hevc_idct_32x32_dc_8_neon
void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs)
src2
const pixel * src2
Definition: h264pred_template.c:422
ff_hevc_idct_8x8_dc_8_neon
void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs)
ff_hevc_put_hevc_qpel_uni_h6_8_neon
void ff_hevc_put_hevc_qpel_uni_h6_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_h12_8_neon
void ff_hevc_put_hevc_qpel_h12_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_put_hevc_qpel_uni_h16_8_neon
void ff_hevc_put_hevc_qpel_uni_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_idct_8x8_dc_10_neon
void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs)
ff_hevc_dsp_init_aarch64
av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
Definition: hevcdsp_init_aarch64.c:288
stride
#define stride
Definition: h264pred_template.c:537
ff_hevc_sao_edge_filter_16x16_8_neon
void ff_hevc_sao_edge_filter_16x16_8_neon(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
ff_hevc_add_residual_32x32_8_neon
void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_v_loop_filter_chroma_8_neon
void ff_hevc_v_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_put_hevc_qpel_h16_8_neon
void ff_hevc_put_hevc_qpel_h16_8_neon(int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_idct_16x16_10_neon
void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit)
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
ff_hevc_h_loop_filter_chroma_10_neon
void ff_hevc_h_loop_filter_chroma_10_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_add_residual_4x4_8_neon
void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
cpu.h
ff_hevc_add_residual_32x32_10_neon
void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
NEON8_FNPROTO_PARTIAL_4
#define NEON8_FNPROTO_PARTIAL_4(fn, args, ext)
Definition: hevcdsp_init_aarch64.c:142
ff_hevc_add_residual_8x8_10_neon
void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_h_loop_filter_chroma_8_neon
void ff_hevc_h_loop_filter_chroma_8_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_add_residual_16x16_8_neon
void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
NEON8_FNASSIGN_PARTIAL_4
#define NEON8_FNASSIGN_PARTIAL_4(member, v, h, fn, ext)
Definition: hevcdsp_init_aarch64.c:273
ff_hevc_put_hevc_qpel_bi_h16_8_neon
void ff_hevc_put_hevc_qpel_bi_h16_8_neon(uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
ff_hevc_add_residual_4x4_12_neon
void ff_hevc_add_residual_4x4_12_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)