FFmpeg
me_cmp_init_aarch64.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include <stdint.h>
20 
21 #include "config.h"
22 #include "libavutil/attributes.h"
23 #include "libavutil/aarch64/cpu.h"
24 #include "libavcodec/mpegvideo.h"
25 
26 int ff_pix_abs16_neon(MpegEncContext *s, const uint8_t *blk1, const uint8_t *blk2,
27  ptrdiff_t stride, int h);
28 int ff_pix_abs16_xy2_neon(MpegEncContext *s, const uint8_t *blk1, const uint8_t *blk2,
29  ptrdiff_t stride, int h);
30 int ff_pix_abs16_x2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
31  ptrdiff_t stride, int h);
32 int ff_pix_abs16_y2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
33  ptrdiff_t stride, int h);
34 int ff_pix_abs8_neon(MpegEncContext *s, const uint8_t *blk1, const uint8_t *blk2,
35  ptrdiff_t stride, int h);
36 
37 int sse16_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
38  ptrdiff_t stride, int h);
39 int sse8_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
40  ptrdiff_t stride, int h);
41 int sse4_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
42  ptrdiff_t stride, int h);
43 
44 int vsad16_neon(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
45  ptrdiff_t stride, int h);
46 int vsad_intra16_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy,
47  ptrdiff_t stride, int h) ;
48 int vsad_intra8_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy,
49  ptrdiff_t stride, int h) ;
50 int vsse16_neon(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
51  ptrdiff_t stride, int h);
52 int vsse_intra16_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy,
53  ptrdiff_t stride, int h);
54 int nsse16_neon(int multiplier, const uint8_t *s, const uint8_t *s2,
55  ptrdiff_t stride, int h);
56 int nsse16_neon_wrapper(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
57  ptrdiff_t stride, int h);
58 int pix_median_abs16_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
59  ptrdiff_t stride, int h);
60 int pix_median_abs8_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
61  ptrdiff_t stride, int h);
62 int ff_pix_abs8_x2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
63  ptrdiff_t stride, int h);
64 int ff_pix_abs8_y2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
65  ptrdiff_t stride, int h);
66 int ff_pix_abs8_xy2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2,
67  ptrdiff_t stride, int h);
68 
69 int nsse8_neon(int multiplier, const uint8_t *s, const uint8_t *s2,
70  ptrdiff_t stride, int h);
71 int nsse8_neon_wrapper(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
72  ptrdiff_t stride, int h);
73 
74 int vsse8_neon(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
75  ptrdiff_t stride, int h);
76 
77 int vsse_intra8_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy,
78  ptrdiff_t stride, int h);
79 
81 {
83 
84  if (have_neon(cpu_flags)) {
85  c->pix_abs[0][0] = ff_pix_abs16_neon;
86  c->pix_abs[0][1] = ff_pix_abs16_x2_neon;
87  c->pix_abs[0][2] = ff_pix_abs16_y2_neon;
88  c->pix_abs[0][3] = ff_pix_abs16_xy2_neon;
89  c->pix_abs[1][0] = ff_pix_abs8_neon;
90  c->pix_abs[1][1] = ff_pix_abs8_x2_neon;
91  c->pix_abs[1][2] = ff_pix_abs8_y2_neon;
92  c->pix_abs[1][3] = ff_pix_abs8_xy2_neon;
93 
94  c->sad[0] = ff_pix_abs16_neon;
95  c->sad[1] = ff_pix_abs8_neon;
96  c->sse[0] = sse16_neon;
97  c->sse[1] = sse8_neon;
98  c->sse[2] = sse4_neon;
99 
100  c->vsad[0] = vsad16_neon;
101  c->vsad[4] = vsad_intra16_neon;
102  c->vsad[5] = vsad_intra8_neon;
103 
104  c->vsse[0] = vsse16_neon;
105  c->vsse[1] = vsse8_neon;
106 
107  c->vsse[4] = vsse_intra16_neon;
108  c->vsse[5] = vsse_intra8_neon;
109 
110  c->nsse[0] = nsse16_neon_wrapper;
111  c->nsse[1] = nsse8_neon_wrapper;
112 
113  c->median_sad[0] = pix_median_abs16_neon;
114  c->median_sad[1] = pix_median_abs8_neon;
115  }
116 }
117 
118 int nsse16_neon_wrapper(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
119  ptrdiff_t stride, int h)
120 {
121  if (c)
122  return nsse16_neon(c->avctx->nsse_weight, s1, s2, stride, h);
123  else
124  return nsse16_neon(8, s1, s2, stride, h);
125 }
126 
127 int nsse8_neon_wrapper(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2,
128  ptrdiff_t stride, int h)
129 {
130  if (c)
131  return nsse8_neon(c->avctx->nsse_weight, s1, s2, stride, h);
132  else
133  return nsse8_neon(8, s1, s2, stride, h);
134 }
ff_pix_abs16_xy2_neon
int ff_pix_abs16_xy2_neon(MpegEncContext *s, const uint8_t *blk1, const uint8_t *blk2, ptrdiff_t stride, int h)
ff_pix_abs8_neon
int ff_pix_abs8_neon(MpegEncContext *s, const uint8_t *blk1, const uint8_t *blk2, ptrdiff_t stride, int h)
nsse8_neon_wrapper
int nsse8_neon_wrapper(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
Definition: me_cmp_init_aarch64.c:127
mpegvideo.h
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:103
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:52
vsse_intra16_neon
int vsse_intra16_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy, ptrdiff_t stride, int h)
vsad16_neon
int vsad16_neon(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
dummy
int dummy
Definition: motion.c:66
nsse16_neon
int nsse16_neon(int multiplier, const uint8_t *s, const uint8_t *s2, ptrdiff_t stride, int h)
ff_pix_abs16_neon
int ff_pix_abs16_neon(MpegEncContext *s, const uint8_t *blk1, const uint8_t *blk2, ptrdiff_t stride, int h)
av_cold
#define av_cold
Definition: attributes.h:90
ff_pix_abs16_y2_neon
int ff_pix_abs16_y2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
ff_pix_abs8_xy2_neon
int ff_pix_abs8_xy2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
s
#define s(width, name)
Definition: cbs_vp9.c:198
s1
#define s1
Definition: regdef.h:38
ff_pix_abs8_x2_neon
int ff_pix_abs8_x2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
MECmpContext
Definition: me_cmp.h:55
pix_median_abs8_neon
int pix_median_abs8_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
ff_pix_abs16_x2_neon
int ff_pix_abs16_x2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
sse8_neon
int sse8_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
nsse16_neon_wrapper
int nsse16_neon_wrapper(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
Definition: me_cmp_init_aarch64.c:118
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
s2
#define s2
Definition: regdef.h:39
vsad_intra16_neon
int vsad_intra16_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy, ptrdiff_t stride, int h)
have_neon
#define have_neon(flags)
Definition: cpu.h:26
attributes.h
ff_pix_abs8_y2_neon
int ff_pix_abs8_y2_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
nsse8_neon
int nsse8_neon(int multiplier, const uint8_t *s, const uint8_t *s2, ptrdiff_t stride, int h)
ff_me_cmp_init_aarch64
av_cold void ff_me_cmp_init_aarch64(MECmpContext *c, AVCodecContext *avctx)
Definition: me_cmp_init_aarch64.c:80
stride
#define stride
Definition: h264pred_template.c:537
vsad_intra8_neon
int vsad_intra8_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy, ptrdiff_t stride, int h)
AVCodecContext
main external API structure.
Definition: avcodec.h:445
vsse8_neon
int vsse8_neon(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
pix_median_abs16_neon
int pix_median_abs16_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
vsse_intra8_neon
int vsse_intra8_neon(MpegEncContext *c, const uint8_t *s, const uint8_t *dummy, ptrdiff_t stride, int h)
sse16_neon
int sse16_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
h
h
Definition: vp9dsp_template.c:2038
cpu.h
vsse16_neon
int vsse16_neon(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h)
sse4_neon
int sse4_neon(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h)
MpegEncContext
MpegEncContext.
Definition: mpegvideo.h:73