FFmpeg
hevcdsp_init_neon.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/attributes.h"
22 #include "libavutil/arm/cpu.h"
23 #include "libavcodec/hevcdsp.h"
24 #include "libavcodec/avcodec.h"
25 #include "hevcdsp_arm.h"
26 
27 void ff_hevc_sao_band_filter_neon_8_wrapper(uint8_t *_dst, const uint8_t *_src,
28  ptrdiff_t stride_dst, ptrdiff_t stride_src,
29  const int16_t *sao_offset_val, int sao_left_class,
30  int width, int height);
31 void ff_hevc_sao_edge_filter_neon_8_wrapper(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, const int16_t *sao_offset_val,
32  int eo, int width, int height);
33 
34 void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
35 void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
36 void ff_hevc_v_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
37 void ff_hevc_h_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q);
38 void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, const int16_t *coeffs,
39  ptrdiff_t stride);
40 void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, const int16_t *coeffs,
41  ptrdiff_t stride);
42 void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, const int16_t *coeffs,
43  ptrdiff_t stride);
44 void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, const int16_t *coeffs,
45  ptrdiff_t stride);
46 void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, const int16_t *coeffs,
47  ptrdiff_t stride);
48 void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, const int16_t *coeffs,
49  ptrdiff_t stride);
50 void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, const int16_t *coeffs,
51  ptrdiff_t stride);
52 void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, const int16_t *coeffs,
53  ptrdiff_t stride);
54 void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs);
55 void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs);
56 void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs);
57 void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs);
58 void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs);
59 void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs);
60 void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs);
61 void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs);
62 void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit);
63 void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit);
64 void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit);
65 void ff_hevc_idct_32x32_8_neon(int16_t *coeffs, int col_limit);
66 void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit);
67 void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit);
68 void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit);
69 void ff_hevc_idct_32x32_10_neon(int16_t *coeffs, int col_limit);
70 void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs);
71 
72 #define PUT_PIXELS(name) \
73  void name(int16_t *dst, const uint8_t *src, \
74  ptrdiff_t srcstride, int height, \
75  intptr_t mx, intptr_t my, int width)
76 PUT_PIXELS(ff_hevc_put_pixels_w2_neon_8);
77 PUT_PIXELS(ff_hevc_put_pixels_w4_neon_8);
78 PUT_PIXELS(ff_hevc_put_pixels_w6_neon_8);
79 PUT_PIXELS(ff_hevc_put_pixels_w8_neon_8);
80 PUT_PIXELS(ff_hevc_put_pixels_w12_neon_8);
81 PUT_PIXELS(ff_hevc_put_pixels_w16_neon_8);
82 PUT_PIXELS(ff_hevc_put_pixels_w24_neon_8);
83 PUT_PIXELS(ff_hevc_put_pixels_w32_neon_8);
84 PUT_PIXELS(ff_hevc_put_pixels_w48_neon_8);
85 PUT_PIXELS(ff_hevc_put_pixels_w64_neon_8);
86 #undef PUT_PIXELS
87 
88 static void (*put_hevc_qpel_neon[4][4])(int16_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
89  int height, int width);
90 static void (*put_hevc_qpel_uw_neon[4][4])(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride,
91  int width, int height, const int16_t *src2, ptrdiff_t src2stride);
92 void ff_hevc_put_qpel_neon_wrapper(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,
93  int height, intptr_t mx, intptr_t my, int width);
94 void ff_hevc_put_qpel_uni_neon_wrapper(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
95  int height, intptr_t mx, intptr_t my, int width);
96 void ff_hevc_put_qpel_bi_neon_wrapper(uint8_t *dst, ptrdiff_t dststride,
97  const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2,
98  int height, intptr_t mx, intptr_t my, int width);
99 #define QPEL_FUNC(name) \
100  void name(int16_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride, \
101  int height, int width)
102 
103 QPEL_FUNC(ff_hevc_put_qpel_v1_neon_8);
104 QPEL_FUNC(ff_hevc_put_qpel_v2_neon_8);
105 QPEL_FUNC(ff_hevc_put_qpel_v3_neon_8);
106 QPEL_FUNC(ff_hevc_put_qpel_h1_neon_8);
107 QPEL_FUNC(ff_hevc_put_qpel_h2_neon_8);
108 QPEL_FUNC(ff_hevc_put_qpel_h3_neon_8);
109 QPEL_FUNC(ff_hevc_put_qpel_h1v1_neon_8);
110 QPEL_FUNC(ff_hevc_put_qpel_h1v2_neon_8);
111 QPEL_FUNC(ff_hevc_put_qpel_h1v3_neon_8);
112 QPEL_FUNC(ff_hevc_put_qpel_h2v1_neon_8);
113 QPEL_FUNC(ff_hevc_put_qpel_h2v2_neon_8);
114 QPEL_FUNC(ff_hevc_put_qpel_h2v3_neon_8);
115 QPEL_FUNC(ff_hevc_put_qpel_h3v1_neon_8);
116 QPEL_FUNC(ff_hevc_put_qpel_h3v2_neon_8);
117 QPEL_FUNC(ff_hevc_put_qpel_h3v3_neon_8);
118 #undef QPEL_FUNC
119 
120 #define QPEL_FUNC_UW_PIX(name) \
121  void name(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, \
122  int height, intptr_t mx, intptr_t my, int width);
123 QPEL_FUNC_UW_PIX(ff_hevc_put_qpel_uw_pixels_w4_neon_8);
124 QPEL_FUNC_UW_PIX(ff_hevc_put_qpel_uw_pixels_w8_neon_8);
125 QPEL_FUNC_UW_PIX(ff_hevc_put_qpel_uw_pixels_w16_neon_8);
126 QPEL_FUNC_UW_PIX(ff_hevc_put_qpel_uw_pixels_w24_neon_8);
127 QPEL_FUNC_UW_PIX(ff_hevc_put_qpel_uw_pixels_w32_neon_8);
128 QPEL_FUNC_UW_PIX(ff_hevc_put_qpel_uw_pixels_w48_neon_8);
129 QPEL_FUNC_UW_PIX(ff_hevc_put_qpel_uw_pixels_w64_neon_8);
130 #undef QPEL_FUNC_UW_PIX
131 
132 #define QPEL_FUNC_UW(name) \
133  void name(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, \
134  int width, int height, const int16_t* src2, ptrdiff_t src2stride);
135 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_pixels_neon_8);
136 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_v1_neon_8);
137 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_v2_neon_8);
138 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_v3_neon_8);
139 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h1_neon_8);
140 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h2_neon_8);
141 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h3_neon_8);
142 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h1v1_neon_8);
143 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h1v2_neon_8);
144 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h1v3_neon_8);
145 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h2v1_neon_8);
146 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h2v2_neon_8);
147 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h2v3_neon_8);
148 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h3v1_neon_8);
149 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h3v2_neon_8);
150 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h3v3_neon_8);
151 #undef QPEL_FUNC_UW
152 
153 void ff_hevc_sao_band_filter_neon_8(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int width, int height, int16_t *offset_table);
154 
155 void ff_hevc_sao_band_filter_neon_8_wrapper(uint8_t *_dst, const uint8_t *_src,
156  ptrdiff_t stride_dst, ptrdiff_t stride_src,
157  const int16_t *sao_offset_val, int sao_left_class,
158  int width, int height) {
159  uint8_t *dst = _dst;
160  const uint8_t *src = _src;
161  int16_t offset_table[32] = {0};
162  int k;
163 
164  for (k = 0; k < 4; k++) {
165  offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
166  }
167 
168  ff_hevc_sao_band_filter_neon_8(dst, src, stride_dst, stride_src, width, height, offset_table);
169 }
170 
171 void ff_hevc_sao_edge_filter_neon_8(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int width, int height,
172  int a_stride, int b_stride, const int16_t *sao_offset_val, const uint8_t *edge_idx);
173 
174 void ff_hevc_sao_edge_filter_neon_8_wrapper(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, const int16_t *sao_offset_val,
175  int eo, int width, int height) {
176  static uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
177  static const int8_t pos[4][2][2] = {
178  { { -1, 0 }, { 1, 0 } }, // horizontal
179  { { 0, -1 }, { 0, 1 } }, // vertical
180  { { -1, -1 }, { 1, 1 } }, // 45 degree
181  { { 1, -1 }, { -1, 1 } }, // 135 degree
182  };
183  uint8_t *dst = _dst;
184  const uint8_t *src = _src;
185  int a_stride, b_stride;
186  ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
187 
188  a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
189  b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src;
190 
191  ff_hevc_sao_edge_filter_neon_8(dst, src, stride_dst, stride_src, width, height, a_stride, b_stride, sao_offset_val, edge_idx);
192 }
193 
194 void ff_hevc_put_qpel_neon_wrapper(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride,
195  int height, intptr_t mx, intptr_t my, int width) {
196 
197  put_hevc_qpel_neon[my][mx](dst, MAX_PB_SIZE, src, srcstride, height, width);
198 }
199 
200 void ff_hevc_put_qpel_uni_neon_wrapper(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
201  int height, intptr_t mx, intptr_t my, int width) {
202 
203  put_hevc_qpel_uw_neon[my][mx](dst, dststride, src, srcstride, width, height, NULL, 0);
204 }
205 
206 void ff_hevc_put_qpel_bi_neon_wrapper(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride,
207  const int16_t *src2,
208  int height, intptr_t mx, intptr_t my, int width) {
209  put_hevc_qpel_uw_neon[my][mx](dst, dststride, src, srcstride, width, height, src2, MAX_PB_SIZE);
210 }
211 
213 {
214  if (bit_depth == 8) {
215  int x;
216  c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_neon;
217  c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_neon;
218  c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_neon;
219  c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_neon;
220  c->sao_band_filter[0] = ff_hevc_sao_band_filter_neon_8_wrapper;
221  c->sao_band_filter[1] = ff_hevc_sao_band_filter_neon_8_wrapper;
222  c->sao_band_filter[2] = ff_hevc_sao_band_filter_neon_8_wrapper;
223  c->sao_band_filter[3] = ff_hevc_sao_band_filter_neon_8_wrapper;
224  c->sao_band_filter[4] = ff_hevc_sao_band_filter_neon_8_wrapper;
225  c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_neon_8_wrapper;
226  c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_neon_8_wrapper;
227  c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_neon_8_wrapper;
228  c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_neon_8_wrapper;
229  c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_neon_8_wrapper;
230  c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon;
231  c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon;
232  c->add_residual[2] = ff_hevc_add_residual_16x16_8_neon;
233  c->add_residual[3] = ff_hevc_add_residual_32x32_8_neon;
234  c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_neon;
235  c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_neon;
236  c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_neon;
237  c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_neon;
238  c->idct[0] = ff_hevc_idct_4x4_8_neon;
239  c->idct[1] = ff_hevc_idct_8x8_8_neon;
240  c->idct[2] = ff_hevc_idct_16x16_8_neon;
241  c->idct[3] = ff_hevc_idct_32x32_8_neon;
242  c->transform_4x4_luma = ff_hevc_transform_luma_4x4_neon_8;
243  put_hevc_qpel_neon[1][0] = ff_hevc_put_qpel_v1_neon_8;
244  put_hevc_qpel_neon[2][0] = ff_hevc_put_qpel_v2_neon_8;
245  put_hevc_qpel_neon[3][0] = ff_hevc_put_qpel_v3_neon_8;
246  put_hevc_qpel_neon[0][1] = ff_hevc_put_qpel_h1_neon_8;
247  put_hevc_qpel_neon[0][2] = ff_hevc_put_qpel_h2_neon_8;
248  put_hevc_qpel_neon[0][3] = ff_hevc_put_qpel_h3_neon_8;
249  put_hevc_qpel_neon[1][1] = ff_hevc_put_qpel_h1v1_neon_8;
250  put_hevc_qpel_neon[1][2] = ff_hevc_put_qpel_h2v1_neon_8;
251  put_hevc_qpel_neon[1][3] = ff_hevc_put_qpel_h3v1_neon_8;
252  put_hevc_qpel_neon[2][1] = ff_hevc_put_qpel_h1v2_neon_8;
253  put_hevc_qpel_neon[2][2] = ff_hevc_put_qpel_h2v2_neon_8;
254  put_hevc_qpel_neon[2][3] = ff_hevc_put_qpel_h3v2_neon_8;
255  put_hevc_qpel_neon[3][1] = ff_hevc_put_qpel_h1v3_neon_8;
256  put_hevc_qpel_neon[3][2] = ff_hevc_put_qpel_h2v3_neon_8;
257  put_hevc_qpel_neon[3][3] = ff_hevc_put_qpel_h3v3_neon_8;
258  put_hevc_qpel_uw_neon[1][0] = ff_hevc_put_qpel_uw_v1_neon_8;
259  put_hevc_qpel_uw_neon[2][0] = ff_hevc_put_qpel_uw_v2_neon_8;
260  put_hevc_qpel_uw_neon[3][0] = ff_hevc_put_qpel_uw_v3_neon_8;
261  put_hevc_qpel_uw_neon[0][1] = ff_hevc_put_qpel_uw_h1_neon_8;
262  put_hevc_qpel_uw_neon[0][2] = ff_hevc_put_qpel_uw_h2_neon_8;
263  put_hevc_qpel_uw_neon[0][3] = ff_hevc_put_qpel_uw_h3_neon_8;
264  put_hevc_qpel_uw_neon[1][1] = ff_hevc_put_qpel_uw_h1v1_neon_8;
265  put_hevc_qpel_uw_neon[1][2] = ff_hevc_put_qpel_uw_h2v1_neon_8;
266  put_hevc_qpel_uw_neon[1][3] = ff_hevc_put_qpel_uw_h3v1_neon_8;
267  put_hevc_qpel_uw_neon[2][1] = ff_hevc_put_qpel_uw_h1v2_neon_8;
268  put_hevc_qpel_uw_neon[2][2] = ff_hevc_put_qpel_uw_h2v2_neon_8;
269  put_hevc_qpel_uw_neon[2][3] = ff_hevc_put_qpel_uw_h3v2_neon_8;
270  put_hevc_qpel_uw_neon[3][1] = ff_hevc_put_qpel_uw_h1v3_neon_8;
271  put_hevc_qpel_uw_neon[3][2] = ff_hevc_put_qpel_uw_h2v3_neon_8;
272  put_hevc_qpel_uw_neon[3][3] = ff_hevc_put_qpel_uw_h3v3_neon_8;
273  for (x = 3; x < 10; x++) {
274  if (x == 4) continue;
275  c->put_hevc_qpel[x][1][0] = ff_hevc_put_qpel_neon_wrapper;
276  c->put_hevc_qpel[x][0][1] = ff_hevc_put_qpel_neon_wrapper;
277  c->put_hevc_qpel[x][1][1] = ff_hevc_put_qpel_neon_wrapper;
278  c->put_hevc_qpel_uni[x][1][0] = ff_hevc_put_qpel_uni_neon_wrapper;
279  c->put_hevc_qpel_uni[x][0][1] = ff_hevc_put_qpel_uni_neon_wrapper;
280  c->put_hevc_qpel_uni[x][1][1] = ff_hevc_put_qpel_uni_neon_wrapper;
281  c->put_hevc_qpel_bi[x][1][0] = ff_hevc_put_qpel_bi_neon_wrapper;
282  c->put_hevc_qpel_bi[x][0][1] = ff_hevc_put_qpel_bi_neon_wrapper;
283  c->put_hevc_qpel_bi[x][1][1] = ff_hevc_put_qpel_bi_neon_wrapper;
284  }
285  c->put_hevc_qpel[0][0][0] = ff_hevc_put_pixels_w2_neon_8;
286  c->put_hevc_qpel[1][0][0] = ff_hevc_put_pixels_w4_neon_8;
287  c->put_hevc_qpel[2][0][0] = ff_hevc_put_pixels_w6_neon_8;
288  c->put_hevc_qpel[3][0][0] = ff_hevc_put_pixels_w8_neon_8;
289  c->put_hevc_qpel[4][0][0] = ff_hevc_put_pixels_w12_neon_8;
290  c->put_hevc_qpel[5][0][0] = ff_hevc_put_pixels_w16_neon_8;
291  c->put_hevc_qpel[6][0][0] = ff_hevc_put_pixels_w24_neon_8;
292  c->put_hevc_qpel[7][0][0] = ff_hevc_put_pixels_w32_neon_8;
293  c->put_hevc_qpel[8][0][0] = ff_hevc_put_pixels_w48_neon_8;
294  c->put_hevc_qpel[9][0][0] = ff_hevc_put_pixels_w64_neon_8;
295 
296  c->put_hevc_qpel_uni[1][0][0] = ff_hevc_put_qpel_uw_pixels_w4_neon_8;
297  c->put_hevc_qpel_uni[3][0][0] = ff_hevc_put_qpel_uw_pixels_w8_neon_8;
298  c->put_hevc_qpel_uni[5][0][0] = ff_hevc_put_qpel_uw_pixels_w16_neon_8;
299  c->put_hevc_qpel_uni[6][0][0] = ff_hevc_put_qpel_uw_pixels_w24_neon_8;
300  c->put_hevc_qpel_uni[7][0][0] = ff_hevc_put_qpel_uw_pixels_w32_neon_8;
301  c->put_hevc_qpel_uni[8][0][0] = ff_hevc_put_qpel_uw_pixels_w48_neon_8;
302  c->put_hevc_qpel_uni[9][0][0] = ff_hevc_put_qpel_uw_pixels_w64_neon_8;
303  }
304 
305  if (bit_depth == 10) {
306  c->add_residual[0] = ff_hevc_add_residual_4x4_10_neon;
307  c->add_residual[1] = ff_hevc_add_residual_8x8_10_neon;
308  c->add_residual[2] = ff_hevc_add_residual_16x16_10_neon;
309  c->add_residual[3] = ff_hevc_add_residual_32x32_10_neon;
310 
311  c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_neon;
312  c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_neon;
313  c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_neon;
314  c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_neon;
315 
316  c->idct[0] = ff_hevc_idct_4x4_10_neon;
317  c->idct[1] = ff_hevc_idct_8x8_10_neon;
318  c->idct[2] = ff_hevc_idct_16x16_10_neon;
319  c->idct[3] = ff_hevc_idct_32x32_10_neon;
320  }
321 }
bit_depth
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:226
QPEL_FUNC
#define QPEL_FUNC(name)
Definition: hevcdsp_init_neon.c:99
ff_hevc_add_residual_4x4_10_neon
void ff_hevc_add_residual_4x4_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_16x16_dc_10_neon
void ff_hevc_idct_16x16_dc_10_neon(int16_t *coeffs)
ff_hevc_idct_32x32_8_neon
void ff_hevc_idct_32x32_8_neon(int16_t *coeffs, int col_limit)
ff_hevc_idct_16x16_dc_8_neon
void ff_hevc_idct_16x16_dc_8_neon(int16_t *coeffs)
ff_hevc_add_residual_8x8_8_neon
void ff_hevc_add_residual_8x8_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_h_loop_filter_chroma_neon
void ff_hevc_h_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_idct_32x32_dc_8_neon
void ff_hevc_idct_32x32_dc_8_neon(int16_t *coeffs)
_stride
ptrdiff_t _stride
Definition: h264pred_template.c:411
ff_hevc_idct_4x4_dc_8_neon
void ff_hevc_idct_4x4_dc_8_neon(int16_t *coeffs)
ff_hevc_v_loop_filter_luma_neon
void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_sao_edge_filter_neon_8_wrapper
void ff_hevc_sao_edge_filter_neon_8_wrapper(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, const int16_t *sao_offset_val, int eo, int width, int height)
Definition: hevcdsp_init_neon.c:174
ff_hevc_idct_8x8_dc_10_neon
void ff_hevc_idct_8x8_dc_10_neon(int16_t *coeffs)
ff_hevc_put_qpel_uni_neon_wrapper
void ff_hevc_put_qpel_uni_neon_wrapper(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_init_neon.c:200
ff_hevc_idct_16x16_10_neon
void ff_hevc_idct_16x16_10_neon(int16_t *coeffs, int col_limit)
av_cold
#define av_cold
Definition: attributes.h:90
ff_hevc_v_loop_filter_chroma_neon
void ff_hevc_v_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
width
#define width
ff_hevc_sao_band_filter_neon_8
void ff_hevc_sao_band_filter_neon_8(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int width, int height, int16_t *offset_table)
ff_hevc_idct_8x8_10_neon
void ff_hevc_idct_8x8_10_neon(int16_t *coeffs, int col_limit)
hevcdsp.h
ff_hevc_sao_edge_filter_neon_8
void ff_hevc_sao_edge_filter_neon_8(uint8_t *dst, const uint8_t *src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int width, int height, int a_stride, int b_stride, const int16_t *sao_offset_val, const uint8_t *edge_idx)
NULL
#define NULL
Definition: coverity.c:32
cpu.h
ff_hevc_put_qpel_bi_neon_wrapper
void ff_hevc_put_qpel_bi_neon_wrapper(uint8_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride, const int16_t *src2, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_init_neon.c:206
ff_hevc_idct_4x4_dc_10_neon
void ff_hevc_idct_4x4_dc_10_neon(int16_t *coeffs)
ff_hevc_idct_4x4_8_neon
void ff_hevc_idct_4x4_8_neon(int16_t *coeffs, int col_limit)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_hevc_add_residual_8x8_10_neon
void ff_hevc_add_residual_8x8_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
hevcdsp_arm.h
ff_hevc_add_residual_32x32_8_neon
void ff_hevc_add_residual_32x32_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_idct_32x32_dc_10_neon
void ff_hevc_idct_32x32_dc_10_neon(int16_t *coeffs)
ff_hevc_idct_32x32_10_neon
void ff_hevc_idct_32x32_10_neon(int16_t *coeffs, int col_limit)
height
#define height
HEVCDSPContext
Definition: hevcdsp.h:47
attributes.h
ff_hevc_transform_luma_4x4_neon_8
void ff_hevc_transform_luma_4x4_neon_8(int16_t *coeffs)
put_hevc_qpel_neon
static void(* put_hevc_qpel_neon[4][4])(int16_t *dst, ptrdiff_t dststride, const uint8_t *src, ptrdiff_t srcstride, int height, int width)
Definition: hevcdsp_init_neon.c:88
offset_table
static const uint8_t offset_table[]
Definition: escape130.c:42
put_hevc_qpel_uw_neon
static void(* put_hevc_qpel_uw_neon[4][4])(uint8_t *dst, ptrdiff_t dststride, const uint8_t *_src, ptrdiff_t _srcstride, int width, int height, const int16_t *src2, ptrdiff_t src2stride)
Definition: hevcdsp_init_neon.c:90
MAX_PB_SIZE
#define MAX_PB_SIZE
Definition: hevcdsp.h:32
src2
const pixel * src2
Definition: h264pred_template.c:422
avcodec.h
stride
#define stride
Definition: h264pred_template.c:537
ff_hevc_add_residual_16x16_10_neon
void ff_hevc_add_residual_16x16_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
QPEL_FUNC_UW
#define QPEL_FUNC_UW(name)
Definition: hevcdsp_init_neon.c:132
ff_hevc_add_residual_4x4_8_neon
void ff_hevc_add_residual_4x4_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
pos
unsigned int pos
Definition: spdifenc.c:412
AV_INPUT_BUFFER_PADDING_SIZE
#define AV_INPUT_BUFFER_PADDING_SIZE
Definition: defs.h:40
PUT_PIXELS
#define PUT_PIXELS(name)
Definition: hevcdsp_init_neon.c:72
ff_hevc_add_residual_32x32_10_neon
void ff_hevc_add_residual_32x32_10_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_h_loop_filter_luma_neon
void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, const int *_tc, const uint8_t *_no_p, const uint8_t *_no_q)
ff_hevc_idct_16x16_8_neon
void ff_hevc_idct_16x16_8_neon(int16_t *coeffs, int col_limit)
QPEL_FUNC_UW_PIX
#define QPEL_FUNC_UW_PIX(name)
Definition: hevcdsp_init_neon.c:120
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
ff_hevc_sao_band_filter_neon_8_wrapper
void ff_hevc_sao_band_filter_neon_8_wrapper(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src, const int16_t *sao_offset_val, int sao_left_class, int width, int height)
Definition: hevcdsp_init_neon.c:155
ff_hevc_idct_8x8_8_neon
void ff_hevc_idct_8x8_8_neon(int16_t *coeffs, int col_limit)
ff_hevc_add_residual_16x16_8_neon
void ff_hevc_add_residual_16x16_8_neon(uint8_t *_dst, const int16_t *coeffs, ptrdiff_t stride)
ff_hevc_put_qpel_neon_wrapper
void ff_hevc_put_qpel_neon_wrapper(int16_t *dst, const uint8_t *src, ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width)
Definition: hevcdsp_init_neon.c:194
ff_hevc_idct_8x8_dc_8_neon
void ff_hevc_idct_8x8_dc_8_neon(int16_t *coeffs)
ff_hevc_idct_4x4_10_neon
void ff_hevc_idct_4x4_10_neon(int16_t *coeffs, int col_limit)
ff_hevc_dsp_init_neon
av_cold void ff_hevc_dsp_init_neon(HEVCDSPContext *c, const int bit_depth)
Definition: hevcdsp_init_neon.c:212