FFmpeg
swscale_unscaled.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "config.h"
20 #include "libswscale/swscale.h"
22 #include "libavutil/aarch64/cpu.h"
23 
24 #define YUV_TO_RGB_TABLE \
25  c->yuv2rgb_v2r_coeff, \
26  c->yuv2rgb_u2g_coeff, \
27  c->yuv2rgb_v2g_coeff, \
28  c->yuv2rgb_u2b_coeff, \
29 
30 #define DECLARE_FF_YUVX_TO_RGBX_FUNCS(ifmt, ofmt) \
31 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
32  uint8_t *dst, int linesize, \
33  const uint8_t *srcY, int linesizeY, \
34  const uint8_t *srcU, int linesizeU, \
35  const uint8_t *srcV, int linesizeV, \
36  const int16_t *table, \
37  int y_offset, \
38  int y_coeff); \
39  \
40 static int ifmt##_to_##ofmt##_neon_wrapper(SwsInternal *c, const uint8_t *const src[], \
41  const int srcStride[], int srcSliceY, \
42  int srcSliceH, uint8_t *const dst[], \
43  const int dstStride[]) { \
44  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
45  \
46  return ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
47  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
48  src[0], srcStride[0], \
49  src[1], srcStride[1], \
50  src[2], srcStride[2], \
51  yuv2rgb_table, \
52  c->yuv2rgb_y_offset >> 6, \
53  c->yuv2rgb_y_coeff); \
54 } \
55 
56 #define DECLARE_FF_YUVX_TO_GBRP_FUNCS(ifmt, ofmt) \
57 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
58  uint8_t *dst, int linesize, \
59  const uint8_t *srcY, int linesizeY, \
60  const uint8_t *srcU, int linesizeU, \
61  const uint8_t *srcV, int linesizeV, \
62  const int16_t *table, \
63  int y_offset, \
64  int y_coeff, \
65  uint8_t *dst1, int linesize1, \
66  uint8_t *dst2, int linesize2); \
67  \
68 static int ifmt##_to_##ofmt##_neon_wrapper(SwsInternal *c, const uint8_t *const src[], \
69  const int srcStride[], int srcSliceY, \
70  int srcSliceH, uint8_t *const dst[], \
71  const int dstStride[]) { \
72  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
73  \
74  return ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
75  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
76  src[0], srcStride[0], \
77  src[1], srcStride[1], \
78  src[2], srcStride[2], \
79  yuv2rgb_table, \
80  c->yuv2rgb_y_offset >> 6, \
81  c->yuv2rgb_y_coeff, \
82  dst[1] + srcSliceY * dstStride[1], dstStride[1], \
83  dst[2] + srcSliceY * dstStride[2], dstStride[2]); \
84 } \
85 
86 #define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx) \
87 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, argb) \
88 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, rgba) \
89 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, abgr) \
90 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, bgra) \
91 DECLARE_FF_YUVX_TO_GBRP_FUNCS(yuvx, gbrp) \
92 
95 
96 #define DECLARE_FF_NVX_TO_RGBX_FUNCS(ifmt, ofmt) \
97 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
98  uint8_t *dst, int linesize, \
99  const uint8_t *srcY, int linesizeY, \
100  const uint8_t *srcC, int linesizeC, \
101  const int16_t *table, \
102  int y_offset, \
103  int y_coeff); \
104  \
105 static int ifmt##_to_##ofmt##_neon_wrapper(SwsInternal *c, const uint8_t *const src[], \
106  const int srcStride[], int srcSliceY, \
107  int srcSliceH, uint8_t *const dst[], \
108  const int dstStride[]) { \
109  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
110  \
111  return ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
112  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
113  src[0], srcStride[0], src[1], srcStride[1], \
114  yuv2rgb_table, \
115  c->yuv2rgb_y_offset >> 6, \
116  c->yuv2rgb_y_coeff); \
117 } \
118 
119 #define DECLARE_FF_NVX_TO_GBRP_FUNCS(ifmt, ofmt) \
120 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
121  uint8_t *dst, int linesize, \
122  const uint8_t *srcY, int linesizeY, \
123  const uint8_t *srcC, int linesizeC, \
124  const int16_t *table, \
125  int y_offset, \
126  int y_coeff, \
127  uint8_t *dst1, int linesize1, \
128  uint8_t *dst2, int linesize2); \
129  \
130 static int ifmt##_to_##ofmt##_neon_wrapper(SwsInternal *c, const uint8_t *const src[], \
131  const int srcStride[], int srcSliceY, \
132  int srcSliceH, uint8_t *const dst[], \
133  const int dstStride[]) { \
134  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
135  \
136  return ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
137  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
138  src[0], srcStride[0], src[1], srcStride[1], \
139  yuv2rgb_table, \
140  c->yuv2rgb_y_offset >> 6, \
141  c->yuv2rgb_y_coeff, \
142  dst[1] + srcSliceY * dstStride[1], dstStride[1], \
143  dst[2] + srcSliceY * dstStride[2], dstStride[2]); \
144 } \
145 
146 void ff_nv24_to_yuv420p_chroma_neon(uint8_t *dst1, int dstStride1,
147  uint8_t *dst2, int dstStride2,
148  const uint8_t *src, int srcStride,
149  int w, int h);
150 
151 static int nv24_to_yuv420p_neon_wrapper(SwsInternal *c, const uint8_t *const src[],
152  const int srcStride[], int srcSliceY, int srcSliceH,
153  uint8_t *const dst[], const int dstStride[])
154 {
155  uint8_t *dst1 = dst[1] + dstStride[1] * srcSliceY / 2;
156  uint8_t *dst2 = dst[2] + dstStride[2] * srcSliceY / 2;
157 
158  ff_copyPlane(src[0], srcStride[0], srcSliceY, srcSliceH, c->srcW,
159  dst[0], dstStride[0]);
160 
161  if (c->srcFormat == AV_PIX_FMT_NV24)
162  ff_nv24_to_yuv420p_chroma_neon(dst1, dstStride[1], dst2, dstStride[2],
163  src[1], srcStride[1], c->srcW / 2, srcSliceH);
164  else
165  ff_nv24_to_yuv420p_chroma_neon(dst2, dstStride[2], dst1, dstStride[1],
166  src[1], srcStride[1], c->srcW / 2, srcSliceH);
167 
168  return srcSliceH;
169 }
170 
171 #define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx) \
172 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, argb) \
173 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, rgba) \
174 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, abgr) \
175 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, bgra) \
176 DECLARE_FF_NVX_TO_GBRP_FUNCS(nvx, gbrp) \
177 
180 
181 /* We need a 16 pixel width alignment. This constraint can easily be removed
182  * for input reading but for the output which is 4-bytes per pixel (RGBA) the
183  * assembly might be writing as much as 4*15=60 extra bytes at the end of the
184  * line, which won't fit the 32-bytes buffer alignment. */
185 #define SET_FF_NVX_TO_RGBX_FUNC(ifmt, IFMT, ofmt, OFMT, accurate_rnd) do { \
186  if (c->srcFormat == AV_PIX_FMT_##IFMT \
187  && c->dstFormat == AV_PIX_FMT_##OFMT \
188  && !(c->srcH & 1) \
189  && !(c->srcW & 15) \
190  && !accurate_rnd) \
191  c->convert_unscaled = ifmt##_to_##ofmt##_neon_wrapper; \
192 } while (0)
193 
194 #define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd) do { \
195  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, argb, ARGB, accurate_rnd); \
196  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, rgba, RGBA, accurate_rnd); \
197  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, abgr, ABGR, accurate_rnd); \
198  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, bgra, BGRA, accurate_rnd); \
199  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, gbrp, GBRP, accurate_rnd); \
200 } while (0)
201 
203  int accurate_rnd = c->flags & SWS_ACCURATE_RND;
204 
205  SET_FF_NVX_TO_ALL_RGBX_FUNC(nv12, NV12, accurate_rnd);
206  SET_FF_NVX_TO_ALL_RGBX_FUNC(nv21, NV21, accurate_rnd);
207  SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv420p, YUV420P, accurate_rnd);
208  SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv422p, YUV422P, accurate_rnd);
209 
210  if (c->dstFormat == AV_PIX_FMT_YUV420P &&
211  (c->srcFormat == AV_PIX_FMT_NV24 || c->srcFormat == AV_PIX_FMT_NV42) &&
212  !(c->srcH & 1) && !(c->srcW & 15) && !accurate_rnd)
213  c->convert_unscaled = nv24_to_yuv420p_neon_wrapper;
214 }
215 
217 {
218  int cpu_flags = av_get_cpu_flags();
219  if (have_neon(cpu_flags))
221 }
get_unscaled_swscale_neon
static void get_unscaled_swscale_neon(SwsInternal *c)
Definition: swscale_unscaled.c:202
w
uint8_t w
Definition: llviddspenc.c:38
SET_FF_NVX_TO_ALL_RGBX_FUNC
#define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd)
Definition: swscale_unscaled.c:194
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:107
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS
#define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx)
Definition: swscale_unscaled.c:86
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
ff_get_unscaled_swscale_aarch64
void ff_get_unscaled_swscale_aarch64(SwsInternal *c)
Definition: swscale_unscaled.c:216
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
have_neon
#define have_neon(flags)
Definition: cpu.h:26
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:199
ff_copyPlane
void ff_copyPlane(const uint8_t *src, int srcStride, int srcSliceY, int srcSliceH, int width, uint8_t *dst, int dstStride)
Definition: swscale_unscaled.c:125
AV_PIX_FMT_NV24
@ AV_PIX_FMT_NV24
planar YUV 4:4:4, 24bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:371
swscale_internal.h
ff_nv24_to_yuv420p_chroma_neon
void ff_nv24_to_yuv420p_chroma_neon(uint8_t *dst1, int dstStride1, uint8_t *dst2, int dstStride2, const uint8_t *src, int srcStride, int w, int h)
AV_PIX_FMT_NV42
@ AV_PIX_FMT_NV42
as above, but U and V bytes are swapped
Definition: pixfmt.h:372
SwsInternal
Definition: swscale_internal.h:330
DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS
#define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx)
Definition: swscale_unscaled.c:171
nv24_to_yuv420p_neon_wrapper
static int nv24_to_yuv420p_neon_wrapper(SwsInternal *c, const uint8_t *const src[], const int srcStride[], int srcSliceY, int srcSliceH, uint8_t *const dst[], const int dstStride[])
Definition: swscale_unscaled.c:151
h
h
Definition: vp9dsp_template.c:2070
cpu.h
src
#define src
Definition: vp8dsp.c:248
swscale.h