FFmpeg
me_cmp_init_aarch64.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include <stdint.h>
20 
21 #include "config.h"
22 #include "libavutil/attributes.h"
23 #include "libavutil/aarch64/cpu.h"
24 #include "libavcodec/mpegvideo.h"
25 
26 int ff_pix_abs16_neon(MpegEncContext *s, const uint8_t *blk1, const uint8_t *blk2,
27  ptrdiff_t stride, int h);
28 int ff_pix_abs16_xy2_neon(MpegEncContext *s, const uint8_t *blk1, const uint8_t *blk2,
29  ptrdiff_t stride, int h);
30 int ff_pix_abs16_x2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
31  ptrdiff_t stride, int h);
32 int ff_pix_abs16_y2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
33  ptrdiff_t stride, int h);
34 int ff_pix_abs8_neon(MpegEncContext *s, const uint8_t *blk1, const uint8_t *blk2,
35  ptrdiff_t stride, int h);
36 
37 int sse16_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
38  ptrdiff_t stride, int h);
39 int sse8_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
40  ptrdiff_t stride, int h);
41 int sse4_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
42  ptrdiff_t stride, int h);
43 
44 int vsad16_neon(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
45  ptrdiff_t stride, int h);
46 int vsad_intra16_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy,
47  ptrdiff_t stride, int h) ;
48 int vsad_intra8_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy,
49  ptrdiff_t stride, int h) ;
50 int vsse16_neon(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
51  ptrdiff_t stride, int h);
52 int vsse_intra16_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy,
53  ptrdiff_t stride, int h);
54 int nsse16_neon(int multiplier, const uint8_t *s, const uint8_t *s2,
55  ptrdiff_t stride, int h);
56 int nsse16_neon_wrapper(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
57  ptrdiff_t stride, int h);
58 int pix_median_abs16_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
59  ptrdiff_t stride, int h);
60 int pix_median_abs8_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
61  ptrdiff_t stride, int h);
62 int ff_pix_abs8_x2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
63  ptrdiff_t stride, int h);
64 int ff_pix_abs8_y2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
65  ptrdiff_t stride, int h);
66 int ff_pix_abs8_xy2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
67  ptrdiff_t stride, int h);
68 
69 int nsse8_neon(int multiplier, const uint8_t *s, const uint8_t *s2,
70  ptrdiff_t stride, int h);
71 int nsse8_neon_wrapper(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
72  ptrdiff_t stride, int h);
73 
74 int vsse8_neon(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
75  ptrdiff_t stride, int h);
76 
77 int vsse_intra8_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy,
78  ptrdiff_t stride, int h);
79 
80 #if HAVE_DOTPROD
81 int sse16_neon_dotprod(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
82  ptrdiff_t stride, int h);
83 int vsse_intra16_neon_dotprod(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
84  ptrdiff_t stride, int h);
85 #endif
86 
88 {
90 
91  if (have_neon(cpu_flags)) {
92  c->pix_abs[0][0] = ff_pix_abs16_neon;
93  c->pix_abs[0][1] = ff_pix_abs16_x2_neon;
94  c->pix_abs[0][2] = ff_pix_abs16_y2_neon;
95  c->pix_abs[0][3] = ff_pix_abs16_xy2_neon;
96  c->pix_abs[1][0] = ff_pix_abs8_neon;
97  c->pix_abs[1][1] = ff_pix_abs8_x2_neon;
98  c->pix_abs[1][2] = ff_pix_abs8_y2_neon;
99  c->pix_abs[1][3] = ff_pix_abs8_xy2_neon;
100 
101  c->sad[0] = ff_pix_abs16_neon;
102  c->sad[1] = ff_pix_abs8_neon;
103  c->sse[0] = sse16_neon;
104  c->sse[1] = sse8_neon;
105  c->sse[2] = sse4_neon;
106 
107  c->vsad[0] = vsad16_neon;
108  c->vsad[4] = vsad_intra16_neon;
109  c->vsad[5] = vsad_intra8_neon;
110 
111  c->vsse[0] = vsse16_neon;
112  c->vsse[1] = vsse8_neon;
113 
114  c->vsse[4] = vsse_intra16_neon;
115  c->vsse[5] = vsse_intra8_neon;
116 
117  c->nsse[0] = nsse16_neon_wrapper;
118  c->nsse[1] = nsse8_neon_wrapper;
119 
120  c->median_sad[0] = pix_median_abs16_neon;
121  c->median_sad[1] = pix_median_abs8_neon;
122  }
123 
124 #if HAVE_DOTPROD
125  if (have_dotprod(cpu_flags)) {
126  c->sse[0] = sse16_neon_dotprod;
127  c->vsse[4] = vsse_intra16_neon_dotprod;
128  }
129 #endif
130 }
131 
132 int nsse16_neon_wrapper(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
133  ptrdiff_t stride, int h)
134 {
135  if (c)
136  return nsse16_neon(c->avctx->nsse_weight, s1, s2, stride, h);
137  else
138  return nsse16_neon(8, s1, s2, stride, h);
139 }
140 
141 int nsse8_neon_wrapper(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
142  ptrdiff_t stride, int h)
143 {
144  if (c)
145  return nsse8_neon(c->avctx->nsse_weight, s1, s2, stride, h);
146  else
147  return nsse8_neon(8, s1, s2, stride, h);
148 }
ff_pix_abs16_xy2_neon
int ff_pix_abs16_xy2_neon(MpegEncContext *s, const uint8_t *blk1, const uint8_t *blk2, ptrdiff_t stride, int h)
ff_pix_abs8_neon
int ff_pix_abs8_neon(MpegEncContext *s, const uint8_t *blk1, const uint8_t *blk2, ptrdiff_t stride, int h)
nsse8_neon_wrapper
int nsse8_neon_wrapper(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
Definition: me_cmp_init_aarch64.c:141
mpegvideo.h
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:107
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
vsse_intra16_neon
int vsse_intra16_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy, ptrdiff_t stride, int h)
vsad16_neon
int vsad16_neon(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
dummy
int dummy
Definition: motion.c:66
nsse16_neon
int nsse16_neon(int multiplier, const uint8_t *s, const uint8_t *s2, ptrdiff_t stride, int h)
ff_pix_abs16_neon
int ff_pix_abs16_neon(MpegEncContext *s, const uint8_t *blk1, const uint8_t *blk2, ptrdiff_t stride, int h)
av_cold
#define av_cold
Definition: attributes.h:90
ff_pix_abs16_y2_neon
int ff_pix_abs16_y2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
ff_pix_abs8_xy2_neon
int ff_pix_abs8_xy2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
s
#define s(width, name)
Definition: cbs_vp9.c:198
ff_pix_abs8_x2_neon
int ff_pix_abs8_x2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
MECmpContext
Definition: me_cmp.h:55
pix_median_abs8_neon
int pix_median_abs8_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
ff_pix_abs16_x2_neon
int ff_pix_abs16_x2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
sse8_neon
int sse8_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
nsse16_neon_wrapper
int nsse16_neon_wrapper(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
Definition: me_cmp_init_aarch64.c:132
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
vsad_intra16_neon
int vsad_intra16_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy, ptrdiff_t stride, int h)
have_neon
#define have_neon(flags)
Definition: cpu.h:26
attributes.h
ff_pix_abs8_y2_neon
int ff_pix_abs8_y2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
nsse8_neon
int nsse8_neon(int multiplier, const uint8_t *s, const uint8_t *s2, ptrdiff_t stride, int h)
ff_me_cmp_init_aarch64
av_cold void ff_me_cmp_init_aarch64(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp_init_aarch64.c:87
stride
#define stride
Definition: h264pred_template.c:537
vsad_intra8_neon
int vsad_intra8_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy, ptrdiff_t stride, int h)
AVCodecContext
main external API structure.
Definition: avcodec.h:451
have_dotprod
#define have_dotprod(flags)
Definition: cpu.h:28
vsse8_neon
int vsse8_neon(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
pix_median_abs16_neon
int pix_median_abs16_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
vsse_intra8_neon
int vsse_intra8_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy, ptrdiff_t stride, int h)
sse16_neon
int sse16_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
h
h
Definition: vp9dsp_template.c:2070
cpu.h
vsse16_neon
int vsse16_neon(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
sse4_neon
int sse4_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
MpegEncContext
MpegEncContext.
Definition: mpegvideo.h:73