FFmpeg
swscale_unscaled.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "config.h"
22 #include "libswscale/swscale.h"
24 #include "libavutil/arm/cpu.h"
25 
26 #if HAVE_AS_DN_DIRECTIVE
27 extern void rgbx_to_nv12_neon_32(const uint8_t *src, uint8_t *y, uint8_t *chroma,
28  int width, int height,
29  int y_stride, int c_stride, int src_stride,
30  int32_t coeff_tbl[9]);
31 
32 extern void rgbx_to_nv12_neon_16(const uint8_t *src, uint8_t *y, uint8_t *chroma,
33  int width, int height,
34  int y_stride, int c_stride, int src_stride,
35  int32_t coeff_tbl[9]);
36 
37 static int rgbx_to_nv12_neon_32_wrapper(SwsInternal *context, const uint8_t *const src[],
38  const int srcStride[], int srcSliceY, int srcSliceH,
39  uint8_t *const dst[], const int dstStride[]) {
40 
41  rgbx_to_nv12_neon_32(src[0] + srcSliceY * srcStride[0],
42  dst[0] + srcSliceY * dstStride[0],
43  dst[1] + (srcSliceY / 2) * dstStride[1],
44  context->srcW, srcSliceH,
45  dstStride[0], dstStride[1], srcStride[0],
46  context->input_rgb2yuv_table);
47 
48  return 0;
49 }
50 
51 static int rgbx_to_nv12_neon_16_wrapper(SwsInternal *context, const uint8_t *const src[],
52  const int srcStride[], int srcSliceY, int srcSliceH,
53  uint8_t *const dst[], int dstStride[]) {
54 
55  rgbx_to_nv12_neon_16(src[0] + srcSliceY * srcStride[0],
56  dst[0] + srcSliceY * dstStride[0],
57  dst[1] + (srcSliceY / 2) * dstStride[1],
58  context->srcW, srcSliceH,
59  dstStride[0], dstStride[1], srcStride[0],
60  context->input_rgb2yuv_table);
61 
62  return 0;
63 }
64 
65 #define YUV_TO_RGB_TABLE \
66  c->yuv2rgb_v2r_coeff, \
67  c->yuv2rgb_u2g_coeff, \
68  c->yuv2rgb_v2g_coeff, \
69  c->yuv2rgb_u2b_coeff, \
70 
71 #define DECLARE_FF_YUVX_TO_RGBX_FUNCS(ifmt, ofmt) \
72 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
73  uint8_t *dst, int linesize, \
74  const uint8_t *srcY, int linesizeY, \
75  const uint8_t *srcU, int linesizeU, \
76  const uint8_t *srcV, int linesizeV, \
77  const int16_t *table, \
78  int y_offset, \
79  int y_coeff); \
80  \
81 static int ifmt##_to_##ofmt##_neon_wrapper(SwsInternal *c, const uint8_t *const src[], \
82  const int srcStride[], int srcSliceY, \
83  int srcSliceH, uint8_t *const dst[], \
84  const int dstStride[]) { \
85  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
86  \
87  ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
88  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
89  src[0], srcStride[0], \
90  src[1], srcStride[1], \
91  src[2], srcStride[2], \
92  yuv2rgb_table, \
93  c->yuv2rgb_y_offset >> 6, \
94  c->yuv2rgb_y_coeff); \
95  \
96  return 0; \
97 } \
98 
99 #define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx) \
100 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, argb) \
101 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, rgba) \
102 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, abgr) \
103 DECLARE_FF_YUVX_TO_RGBX_FUNCS(yuvx, bgra) \
104 
107 
108 #define DECLARE_FF_NVX_TO_RGBX_FUNCS(ifmt, ofmt) \
109 int ff_##ifmt##_to_##ofmt##_neon(int w, int h, \
110  uint8_t *dst, int linesize, \
111  const uint8_t *srcY, int linesizeY, \
112  const uint8_t *srcC, int linesizeC, \
113  const int16_t *table, \
114  int y_offset, \
115  int y_coeff); \
116  \
117 static int ifmt##_to_##ofmt##_neon_wrapper(SwsInternal *c, const uint8_t *const src[], \
118  const int srcStride[], int srcSliceY, \
119  int srcSliceH, uint8_t *const dst[], \
120  const int dstStride[]) { \
121  const int16_t yuv2rgb_table[] = { YUV_TO_RGB_TABLE }; \
122  \
123  ff_##ifmt##_to_##ofmt##_neon(c->srcW, srcSliceH, \
124  dst[0] + srcSliceY * dstStride[0], dstStride[0], \
125  src[0], srcStride[0], src[1], srcStride[1], \
126  yuv2rgb_table, \
127  c->yuv2rgb_y_offset >> 6, \
128  c->yuv2rgb_y_coeff); \
129  \
130  return 0; \
131 } \
132 
133 #define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx) \
134 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, argb) \
135 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, rgba) \
136 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, abgr) \
137 DECLARE_FF_NVX_TO_RGBX_FUNCS(nvx, bgra) \
138 
141 
142 /* We need a 16 pixel width alignment. This constraint can easily be removed
143  * for input reading but for the output which is 4-bytes per pixel (RGBA) the
144  * assembly might be writing as much as 4*15=60 extra bytes at the end of the
145  * line, which won't fit the 32-bytes buffer alignment. */
146 #define SET_FF_NVX_TO_RGBX_FUNC(ifmt, IFMT, ofmt, OFMT, accurate_rnd) do { \
147  if (c->srcFormat == AV_PIX_FMT_##IFMT \
148  && c->dstFormat == AV_PIX_FMT_##OFMT \
149  && !(c->srcH & 1) \
150  && !(c->srcW & 15) \
151  && !accurate_rnd) { \
152  c->convert_unscaled = ifmt##_to_##ofmt##_neon_wrapper; \
153  } \
154 } while (0)
155 
156 #define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd) do { \
157  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, argb, ARGB, accurate_rnd); \
158  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, rgba, RGBA, accurate_rnd); \
159  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, abgr, ABGR, accurate_rnd); \
160  SET_FF_NVX_TO_RGBX_FUNC(nvx, NVX, bgra, BGRA, accurate_rnd); \
161 } while (0)
162 
164  int accurate_rnd = c->flags & SWS_ACCURATE_RND;
165  if (c->srcFormat == AV_PIX_FMT_RGBA
166  && c->dstFormat == AV_PIX_FMT_NV12
167  && (c->srcW >= 16)) {
168  c->convert_unscaled = accurate_rnd ? rgbx_to_nv12_neon_32_wrapper
169  : rgbx_to_nv12_neon_16_wrapper;
170  }
171 
172  SET_FF_NVX_TO_ALL_RGBX_FUNC(nv12, NV12, accurate_rnd);
173  SET_FF_NVX_TO_ALL_RGBX_FUNC(nv21, NV21, accurate_rnd);
174  SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv420p, YUV420P, accurate_rnd);
175  SET_FF_NVX_TO_ALL_RGBX_FUNC(yuv422p, YUV422P, accurate_rnd);
176 }
177 
179 {
180  int cpu_flags = av_get_cpu_flags();
181  if (have_neon(cpu_flags))
183 }
184 #else
186 {
187 }
188 #endif
get_unscaled_swscale_neon
static void get_unscaled_swscale_neon(SwsInternal *c)
Definition: swscale_unscaled.c:202
SET_FF_NVX_TO_ALL_RGBX_FUNC
#define SET_FF_NVX_TO_ALL_RGBX_FUNC(nvx, NVX, accurate_rnd)
Definition: swscale_unscaled.c:194
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:107
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:56
DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS
#define DECLARE_FF_YUVX_TO_ALL_RGBX_FUNCS(yuvx)
Definition: swscale_unscaled.c:86
AV_PIX_FMT_RGBA
@ AV_PIX_FMT_RGBA
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
Definition: pixfmt.h:100
context
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf default minimum maximum flags name is the option keep it simple and lowercase description are in without and describe what they for example set the foo of the bar offset is the offset of the field in your context
Definition: writing_filters.txt:91
cpu.h
ff_get_unscaled_swscale_arm
void ff_get_unscaled_swscale_arm(SwsInternal *c)
Definition: swscale_unscaled.c:185
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
height
#define height
Definition: dsp.h:85
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
have_neon
#define have_neon(flags)
Definition: cpu.h:26
SWS_ACCURATE_RND
#define SWS_ACCURATE_RND
Definition: swscale.h:199
swscale_internal.h
SwsInternal
Definition: swscale_internal.h:330
AV_PIX_FMT_NV12
@ AV_PIX_FMT_NV12
planar YUV 4:2:0, 12bpp, 1 plane for Y and 1 plane for the UV components, which are interleaved (firs...
Definition: pixfmt.h:96
DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS
#define DECLARE_FF_NVX_TO_ALL_RGBX_FUNCS(nvx)
Definition: swscale_unscaled.c:171
int32_t
int32_t
Definition: audioconvert.c:56
width
#define width
Definition: dsp.h:85
src
#define src
Definition: vp8dsp.c:248
swscale.h