FFmpeg
vf_bwdif_init_aarch64.c
Go to the documentation of this file.
1 /*
2  * bwdif aarch64 NEON optimisations
3  *
4  * Copyright (c) 2023 John Cox <jc@kynesim.co.uk>
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "libavutil/common.h"
24 #include "libavfilter/bwdifdsp.h"
25 #include "libavutil/aarch64/cpu.h"
26 
27 void ff_bwdif_filter_edge_neon(void *dst1, const void *prev1, const void *cur1, const void *next1,
28  int w, int prefs, int mrefs, int prefs2, int mrefs2,
29  int parity, int clip_max, int spat);
30 
31 void ff_bwdif_filter_intra_neon(void *dst1, const void *cur1, int w, int prefs, int mrefs,
32  int prefs3, int mrefs3, int parity, int clip_max);
33 
34 void ff_bwdif_filter_line_neon(void *dst1, const void *prev1, const void *cur1, const void *next1,
35  int w, int prefs, int mrefs, int prefs2, int mrefs2,
36  int prefs3, int mrefs3, int prefs4, int mrefs4,
37  int parity, int clip_max);
38 
39 void ff_bwdif_filter_line3_neon(void * dst1, int d_stride,
40  const void * prev1, const void * cur1, const void * next1, int s_stride,
41  int w, int parity, int clip_max);
42 
43 
44 static void filter_line3_helper(void * dst1, int d_stride,
45  const void * prev1, const void * cur1, const void * next1, int s_stride,
46  int w, int parity, int clip_max)
47 {
48  // Asm works on 16 byte chunks
49  // If w is a multiple of 16 then all is good - if not then if width rounded
50  // up to nearest 16 will fit in both src & dst strides then allow the asm
51  // to write over the padding bytes as that is almost certainly faster than
52  // having to invoke the C version to clean up the tail.
53  const int w1 = FFALIGN(w, 16);
54  const int w0 = clip_max != 255 ? 0 :
55  d_stride <= w1 && s_stride <= w1 ? w : w & ~15;
56 
57  ff_bwdif_filter_line3_neon(dst1, d_stride,
58  prev1, cur1, next1, s_stride,
59  w0, parity, clip_max);
60 
61  if (w0 < w)
62  ff_bwdif_filter_line3_c((char *)dst1 + w0, d_stride,
63  (const char *)prev1 + w0, (const char *)cur1 + w0, (const char *)next1 + w0, s_stride,
64  w - w0, parity, clip_max);
65 }
66 
67 static void filter_line_helper(void *dst1, const void *prev1, const void *cur1, const void *next1,
68  int w, int prefs, int mrefs, int prefs2, int mrefs2,
69  int prefs3, int mrefs3, int prefs4, int mrefs4,
70  int parity, int clip_max)
71 {
72  const int w0 = clip_max != 255 ? 0 : w & ~15;
73 
74  ff_bwdif_filter_line_neon(dst1, prev1, cur1, next1,
75  w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max);
76 
77  if (w0 < w)
78  ff_bwdif_filter_line_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0,
79  w - w0, prefs, mrefs, prefs2, mrefs2, prefs3, mrefs3, prefs4, mrefs4, parity, clip_max);
80 }
81 
82 static void filter_edge_helper(void *dst1, const void *prev1, const void *cur1, const void *next1,
83  int w, int prefs, int mrefs, int prefs2, int mrefs2,
84  int parity, int clip_max, int spat)
85 {
86  const int w0 = clip_max != 255 ? 0 : w & ~15;
87 
88  ff_bwdif_filter_edge_neon(dst1, prev1, cur1, next1, w0, prefs, mrefs, prefs2, mrefs2,
89  parity, clip_max, spat);
90 
91  if (w0 < w)
92  ff_bwdif_filter_edge_c((char *)dst1 + w0, (char *)prev1 + w0, (char *)cur1 + w0, (char *)next1 + w0,
93  w - w0, prefs, mrefs, prefs2, mrefs2,
94  parity, clip_max, spat);
95 }
96 
97 static void filter_intra_helper(void *dst1, const void *cur1, int w, int prefs, int mrefs,
98  int prefs3, int mrefs3, int parity, int clip_max)
99 {
100  const int w0 = clip_max != 255 ? 0 : w & ~15;
101 
102  ff_bwdif_filter_intra_neon(dst1, cur1, w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max);
103 
104  if (w0 < w)
105  ff_bwdif_filter_intra_c((char *)dst1 + w0, (char *)cur1 + w0,
106  w - w0, prefs, mrefs, prefs3, mrefs3, parity, clip_max);
107 }
108 
109 void
111 {
112  const int cpu_flags = av_get_cpu_flags();
113 
114  if (bit_depth != 8)
115  return;
116 
117  if (!have_neon(cpu_flags))
118  return;
119 
120  s->filter_intra = filter_intra_helper;
121  s->filter_line = filter_line_helper;
122  s->filter_edge = filter_edge_helper;
123  s->filter_line3 = filter_line3_helper;
124 }
125 
ff_bwdif_filter_intra_c
void ff_bwdif_filter_intra_c(void *dst1, const void *cur1, int w, int prefs, int mrefs, int prefs3, int mrefs3, int parity, int clip_max)
Definition: bwdifdsp.c:118
ff_bwdif_filter_line3_c
static void ff_bwdif_filter_line3_c(void *dst1, int d_stride, const void *prev1, const void *cur1, const void *next1, int s_stride, int w, int parity, int clip_max)
Definition: bwdifdsp.h:57
ff_bwdif_filter_line_neon
void ff_bwdif_filter_line_neon(void *dst1, const void *prev1, const void *cur1, const void *next1, int w, int prefs, int mrefs, int prefs2, int mrefs2, int prefs3, int mrefs3, int prefs4, int mrefs4, int parity, int clip_max)
w
uint8_t w
Definition: llviddspenc.c:38
filter_line3_helper
static void filter_line3_helper(void *dst1, int d_stride, const void *prev1, const void *cur1, const void *next1, int s_stride, int w, int parity, int clip_max)
Definition: vf_bwdif_init_aarch64.c:44
ff_bwdif_filter_edge_c
void ff_bwdif_filter_edge_c(void *dst1, const void *prev1, const void *cur1, const void *next1, int w, int prefs, int mrefs, int prefs2, int mrefs2, int parity, int clip_max, int spat)
Definition: bwdifdsp.c:146
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:107
bit_depth
static void bit_depth(AudioStatsContext *s, const uint64_t *const mask, uint8_t *depth)
Definition: af_astats.c:246
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
ff_bwdif_filter_intra_neon
void ff_bwdif_filter_intra_neon(void *dst1, const void *cur1, int w, int prefs, int mrefs, int prefs3, int mrefs3, int parity, int clip_max)
ff_bwdif_filter_line3_neon
void ff_bwdif_filter_line3_neon(void *dst1, int d_stride, const void *prev1, const void *cur1, const void *next1, int s_stride, int w, int parity, int clip_max)
s
#define s(width, name)
Definition: cbs_vp9.c:198
ff_bwdif_init_aarch64
void ff_bwdif_init_aarch64(BWDIFDSPContext *s, int bit_depth)
Definition: vf_bwdif_init_aarch64.c:110
filter_edge_helper
static void filter_edge_helper(void *dst1, const void *prev1, const void *cur1, const void *next1, int w, int prefs, int mrefs, int prefs2, int mrefs2, int parity, int clip_max, int spat)
Definition: vf_bwdif_init_aarch64.c:82
BWDIFDSPContext
Definition: bwdifdsp.h:25
parity
mcdeint parity
Definition: vf_mcdeint.c:281
have_neon
#define have_neon(flags)
Definition: cpu.h:26
filter_line_helper
static void filter_line_helper(void *dst1, const void *prev1, const void *cur1, const void *next1, int w, int prefs, int mrefs, int prefs2, int mrefs2, int prefs3, int mrefs3, int prefs4, int mrefs4, int parity, int clip_max)
Definition: vf_bwdif_init_aarch64.c:67
ff_bwdif_filter_line_c
void ff_bwdif_filter_line_c(void *dst1, const void *prev1, const void *cur1, const void *next1, int w, int prefs, int mrefs, int prefs2, int mrefs2, int prefs3, int mrefs3, int prefs4, int mrefs4, int parity, int clip_max)
Definition: bwdifdsp.c:128
common.h
bwdifdsp.h
ff_bwdif_filter_edge_neon
void ff_bwdif_filter_edge_neon(void *dst1, const void *prev1, const void *cur1, const void *next1, int w, int prefs, int mrefs, int prefs2, int mrefs2, int parity, int clip_max, int spat)
filter_intra_helper
static void filter_intra_helper(void *dst1, const void *cur1, int w, int prefs, int mrefs, int prefs3, int mrefs3, int parity, int clip_max)
Definition: vf_bwdif_init_aarch64.c:97
FFALIGN
#define FFALIGN(x, a)
Definition: macros.h:78
cpu.h