FFmpeg
sw_rgb.c
Go to the documentation of this file.
1 /*
2  *
3  * This file is part of FFmpeg.
4  *
5  * FFmpeg is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * FFmpeg is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License along
16  * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
18  */
19 
20 #include <string.h>
21 
22 #include "libavutil/common.h"
23 #include "libavutil/intreadwrite.h"
24 #include "libavutil/mem_internal.h"
25 #include "libavutil/pixdesc.h"
26 
27 #include "libswscale/rgb2rgb.h"
28 #include "libswscale/swscale.h"
30 
31 #include "checkasm.h"
32 
33 #define randomize_buffers(buf, size) \
34  do { \
35  int j; \
36  for (j = 0; j < size; j+=4) \
37  AV_WN32(buf + j, rnd()); \
38  } while (0)
39 
40 static const uint8_t width[] = {12, 16, 20, 32, 36, 128};
41 static const struct {uint8_t w, h, s;} planes[] = {
42  {12,16,12}, {16,16,16}, {20,23,25}, {32,18,48}, {8,128,16}, {128,128,128}
43 };
44 
45 #define MAX_STRIDE 128
46 #define MAX_HEIGHT 128
47 
48 static void check_shuffle_bytes(void * func, const char * report)
49 {
50  int i;
51  LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE]);
52  LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE]);
53  LOCAL_ALIGNED_32(uint8_t, dst0, [MAX_STRIDE]);
54  LOCAL_ALIGNED_32(uint8_t, dst1, [MAX_STRIDE]);
55 
56  declare_func(void, const uint8_t *src, uint8_t *dst, int src_size);
57 
58  memset(dst0, 0, MAX_STRIDE);
59  memset(dst1, 0, MAX_STRIDE);
61  memcpy(src1, src0, MAX_STRIDE);
62 
63  if (check_func(func, "%s", report)) {
64  for (i = 0; i < 6; i ++) {
65  call_ref(src0, dst0, width[i]);
66  call_new(src1, dst1, width[i]);
67  if (memcmp(dst0, dst1, MAX_STRIDE))
68  fail();
69  }
70  bench_new(src0, dst0, width[5]);
71  }
72 }
73 
74 static void check_uyvy_to_422p(void)
75 {
76  int i;
77 
78  LOCAL_ALIGNED_32(uint8_t, src0, [MAX_STRIDE * MAX_HEIGHT * 2]);
79  LOCAL_ALIGNED_32(uint8_t, src1, [MAX_STRIDE * MAX_HEIGHT * 2]);
80  LOCAL_ALIGNED_32(uint8_t, dst_y_0, [MAX_STRIDE * MAX_HEIGHT]);
81  LOCAL_ALIGNED_32(uint8_t, dst_y_1, [MAX_STRIDE * MAX_HEIGHT]);
82  LOCAL_ALIGNED_32(uint8_t, dst_u_0, [(MAX_STRIDE/2) * MAX_HEIGHT]);
83  LOCAL_ALIGNED_32(uint8_t, dst_u_1, [(MAX_STRIDE/2) * MAX_HEIGHT]);
84  LOCAL_ALIGNED_32(uint8_t, dst_v_0, [(MAX_STRIDE/2) * MAX_HEIGHT]);
85  LOCAL_ALIGNED_32(uint8_t, dst_v_1, [(MAX_STRIDE/2) * MAX_HEIGHT]);
86 
87  declare_func(void, uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
88  const uint8_t *src, int width, int height,
89  int lumStride, int chromStride, int srcStride);
90 
92  memcpy(src1, src0, MAX_STRIDE * MAX_HEIGHT * 2);
93 
94  if (check_func(uyvytoyuv422, "uyvytoyuv422")) {
95  for (i = 0; i < 6; i ++) {
96  memset(dst_y_0, 0, MAX_STRIDE * MAX_HEIGHT);
97  memset(dst_y_1, 0, MAX_STRIDE * MAX_HEIGHT);
98  memset(dst_u_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
99  memset(dst_u_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
100  memset(dst_v_0, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
101  memset(dst_v_1, 0, (MAX_STRIDE/2) * MAX_HEIGHT);
102 
103  call_ref(dst_y_0, dst_u_0, dst_v_0, src0, planes[i].w, planes[i].h,
104  MAX_STRIDE, MAX_STRIDE / 2, planes[i].s);
105  call_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[i].w, planes[i].h,
106  MAX_STRIDE, MAX_STRIDE / 2, planes[i].s);
107  if (memcmp(dst_y_0, dst_y_1, MAX_STRIDE * MAX_HEIGHT) ||
108  memcmp(dst_u_0, dst_u_1, (MAX_STRIDE/2) * MAX_HEIGHT) ||
109  memcmp(dst_v_0, dst_v_1, (MAX_STRIDE/2) * MAX_HEIGHT))
110  fail();
111  }
112  bench_new(dst_y_1, dst_u_1, dst_v_1, src1, planes[5].w, planes[5].h,
113  MAX_STRIDE, MAX_STRIDE / 2, planes[5].s);
114  }
115 }
116 
117 #define NUM_LINES 5
118 #define MAX_LINE_SIZE 1920
119 #define BUFSIZE (NUM_LINES * MAX_LINE_SIZE)
120 
121 static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
122 {
123  for (size_t i = 0; i < n; i++) {
124  if (abs(ref[i] - test[i]) > accuracy)
125  return 1;
126  }
127  return 0;
128 }
129 
130 static void check_rgb24toyv12(SwsContext *sws)
131 {
132  static const int input_sizes[] = {16, 128, 512, MAX_LINE_SIZE, -MAX_LINE_SIZE};
133  SwsInternal *ctx = sws_internal(sws);
134 
135  LOCAL_ALIGNED_32(uint8_t, src, [BUFSIZE * 3]);
136  LOCAL_ALIGNED_32(uint8_t, buf_y_0, [BUFSIZE]);
137  LOCAL_ALIGNED_32(uint8_t, buf_y_1, [BUFSIZE]);
138  LOCAL_ALIGNED_32(uint8_t, buf_u_0, [BUFSIZE / 4]);
139  LOCAL_ALIGNED_32(uint8_t, buf_u_1, [BUFSIZE / 4]);
140  LOCAL_ALIGNED_32(uint8_t, buf_v_0, [BUFSIZE / 4]);
141  LOCAL_ALIGNED_32(uint8_t, buf_v_1, [BUFSIZE / 4]);
142 
143  declare_func(void, const uint8_t *src, uint8_t *ydst, uint8_t *udst,
144  uint8_t *vdst, int width, int height, int lumStride,
145  int chromStride, int srcStride, const int32_t *rgb2yuv);
146 
148 
149  for (int isi = 0; isi < FF_ARRAY_ELEMS(input_sizes); isi++) {
150  int input_size = input_sizes[isi];
151  int negstride = input_size < 0;
152  const char *negstride_str = negstride ? "_negstride" : "";
153  int width = FFABS(input_size);
154  int linesize = width + 32;
155  /* calculate height based on specified width to use the entire buffer. */
156  int height = (BUFSIZE / linesize) & ~1;
157  uint8_t *src0 = src;
158  uint8_t *src1 = src;
159  uint8_t *dst_y_0 = buf_y_0;
160  uint8_t *dst_y_1 = buf_y_1;
161  uint8_t *dst_u_0 = buf_u_0;
162  uint8_t *dst_u_1 = buf_u_1;
163  uint8_t *dst_v_0 = buf_v_0;
164  uint8_t *dst_v_1 = buf_v_1;
165 
166  if (negstride) {
167  src0 += (height - 1) * (linesize * 3);
168  src1 += (height - 1) * (linesize * 3);
169  dst_y_0 += (height - 1) * linesize;
170  dst_y_1 += (height - 1) * linesize;
171  dst_u_0 += ((height / 2) - 1) * (linesize / 2);
172  dst_u_1 += ((height / 2) - 1) * (linesize / 2);
173  dst_v_0 += ((height / 2) - 1) * (linesize / 2);
174  dst_v_1 += ((height / 2) - 1) * (linesize / 2);
175  linesize *= -1;
176  }
177 
178  if (check_func(ff_rgb24toyv12, "rgb24toyv12_%d_%d%s", width, height, negstride_str)) {
179  memset(buf_y_0, 0xFF, BUFSIZE);
180  memset(buf_y_1, 0xFF, BUFSIZE);
181  memset(buf_u_0, 0xFF, BUFSIZE / 4);
182  memset(buf_u_1, 0xFF, BUFSIZE / 4);
183  memset(buf_v_0, 0xFF, BUFSIZE / 4);
184  memset(buf_v_1, 0xFF, BUFSIZE / 4);
185 
186  call_ref(src0, dst_y_0, dst_u_0, dst_v_0, width, height,
187  linesize, linesize / 2, linesize * 3, ctx->input_rgb2yuv_table);
188  call_new(src1, dst_y_1, dst_u_1, dst_v_1, width, height,
189  linesize, linesize / 2, linesize * 3, ctx->input_rgb2yuv_table);
190  if (cmp_off_by_n(buf_y_0, buf_y_1, BUFSIZE, 1) ||
191  cmp_off_by_n(buf_u_0, buf_u_1, BUFSIZE / 4, 1) ||
192  cmp_off_by_n(buf_v_0, buf_v_1, BUFSIZE / 4, 1))
193  fail();
194  bench_new(src1, dst_y_1, dst_u_1, dst_v_1, width, height,
195  linesize, linesize / 2, linesize * 3, ctx->input_rgb2yuv_table);
196  }
197  }
198 }
199 
200 #undef NUM_LINES
201 #undef MAX_LINE_SIZE
202 #undef BUFSIZE
203 
204 static void check_interleave_bytes(void)
205 {
206  LOCAL_ALIGNED_16(uint8_t, src0_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
207  LOCAL_ALIGNED_16(uint8_t, src1_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
208  LOCAL_ALIGNED_16(uint8_t, dst0_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]);
209  LOCAL_ALIGNED_16(uint8_t, dst1_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]);
210  // Intentionally using unaligned buffers, as this function doesn't have
211  // any alignment requirements.
212  uint8_t *src0 = src0_buf + 1;
213  uint8_t *src1 = src1_buf + 1;
214  uint8_t *dst0 = dst0_buf + 2;
215  uint8_t *dst1 = dst1_buf + 2;
216 
217  declare_func(void, const uint8_t *, const uint8_t *,
218  uint8_t *, int, int, int, int, int);
219 
222 
223  if (check_func(interleaveBytes, "interleave_bytes")) {
224  for (int i = 0; i <= 16; i++) {
225  // Try all widths [1,16], and try one random width.
226 
227  int w = i > 0 ? i : (1 + (rnd() % (MAX_STRIDE-2)));
228  int h = 1 + (rnd() % (MAX_HEIGHT-2));
229 
230  int src0_offset = 0, src0_stride = MAX_STRIDE;
231  int src1_offset = 0, src1_stride = MAX_STRIDE;
232  int dst_offset = 0, dst_stride = 2 * MAX_STRIDE;
233 
234  memset(dst0, 0, 2 * MAX_STRIDE * MAX_HEIGHT);
235  memset(dst1, 0, 2 * MAX_STRIDE * MAX_HEIGHT);
236 
237  // Try different combinations of negative strides
238  if (i & 1) {
239  src0_offset = (h-1)*src0_stride;
240  src0_stride = -src0_stride;
241  }
242  if (i & 2) {
243  src1_offset = (h-1)*src1_stride;
244  src1_stride = -src1_stride;
245  }
246  if (i & 4) {
247  dst_offset = (h-1)*dst_stride;
248  dst_stride = -dst_stride;
249  }
250 
251  call_ref(src0 + src0_offset, src1 + src1_offset, dst0 + dst_offset,
252  w, h, src0_stride, src1_stride, dst_stride);
253  call_new(src0 + src0_offset, src1 + src1_offset, dst1 + dst_offset,
254  w, h, src0_stride, src1_stride, dst_stride);
255  // Check a one pixel-pair edge around the destination area,
256  // to catch overwrites past the end.
257  checkasm_check(uint8_t, dst0, 2*MAX_STRIDE, dst1, 2*MAX_STRIDE,
258  2 * w + 2, h + 1, "dst");
259  }
260 
261  bench_new(src0, src1, dst1, 127, MAX_HEIGHT,
263  }
264  if (check_func(interleaveBytes, "interleave_bytes_aligned")) {
265  // Bench the function in a more typical case, with aligned
266  // buffers and widths.
267  bench_new(src0_buf, src1_buf, dst1_buf, 128, MAX_HEIGHT,
269  }
270 }
271 
272 static void check_deinterleave_bytes(void)
273 {
274  LOCAL_ALIGNED_16(uint8_t, src_buf, [2*MAX_STRIDE*MAX_HEIGHT+2]);
275  LOCAL_ALIGNED_16(uint8_t, dst0_u_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
276  LOCAL_ALIGNED_16(uint8_t, dst0_v_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
277  LOCAL_ALIGNED_16(uint8_t, dst1_u_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
278  LOCAL_ALIGNED_16(uint8_t, dst1_v_buf, [MAX_STRIDE*MAX_HEIGHT+1]);
279  // Intentionally using unaligned buffers, as this function doesn't have
280  // any alignment requirements.
281  uint8_t *src = src_buf + 2;
282  uint8_t *dst0_u = dst0_u_buf + 1;
283  uint8_t *dst0_v = dst0_v_buf + 1;
284  uint8_t *dst1_u = dst1_u_buf + 1;
285  uint8_t *dst1_v = dst1_v_buf + 1;
286 
287  declare_func(void, const uint8_t *src, uint8_t *dst1, uint8_t *dst2,
288  int width, int height, int srcStride,
289  int dst1Stride, int dst2Stride);
290 
292 
293  if (check_func(deinterleaveBytes, "deinterleave_bytes")) {
294  for (int i = 0; i <= 16; i++) {
295  // Try all widths [1,16], and try one random width.
296 
297  int w = i > 0 ? i : (1 + (rnd() % (MAX_STRIDE-2)));
298  int h = 1 + (rnd() % (MAX_HEIGHT-2));
299 
300  int src_offset = 0, src_stride = 2 * MAX_STRIDE;
301  int dst_u_offset = 0, dst_u_stride = MAX_STRIDE;
302  int dst_v_offset = 0, dst_v_stride = MAX_STRIDE;
303 
304  memset(dst0_u, 0, MAX_STRIDE * MAX_HEIGHT);
305  memset(dst0_v, 0, MAX_STRIDE * MAX_HEIGHT);
306  memset(dst1_u, 0, MAX_STRIDE * MAX_HEIGHT);
307  memset(dst1_v, 0, MAX_STRIDE * MAX_HEIGHT);
308 
309  // Try different combinations of negative strides
310  if (i & 1) {
311  src_offset = (h-1)*src_stride;
312  src_stride = -src_stride;
313  }
314  if (i & 2) {
315  dst_u_offset = (h-1)*dst_u_stride;
316  dst_u_stride = -dst_u_stride;
317  }
318  if (i & 4) {
319  dst_v_offset = (h-1)*dst_v_stride;
320  dst_v_stride = -dst_v_stride;
321  }
322 
323  call_ref(src + src_offset, dst0_u + dst_u_offset, dst0_v + dst_v_offset,
324  w, h, src_stride, dst_u_stride, dst_v_stride);
325  call_new(src + src_offset, dst1_u + dst_u_offset, dst1_v + dst_v_offset,
326  w, h, src_stride, dst_u_stride, dst_v_stride);
327  // Check a one pixel-pair edge around the destination area,
328  // to catch overwrites past the end.
329  checkasm_check(uint8_t, dst0_u, MAX_STRIDE, dst1_u, MAX_STRIDE,
330  w + 1, h + 1, "dst_u");
331  checkasm_check(uint8_t, dst0_v, MAX_STRIDE, dst1_v, MAX_STRIDE,
332  w + 1, h + 1, "dst_v");
333  }
334 
335  bench_new(src, dst1_u, dst1_v, 127, MAX_HEIGHT,
337  }
338  if (check_func(deinterleaveBytes, "deinterleave_bytes_aligned")) {
339  // Bench the function in a more typical case, with aligned
340  // buffers and widths.
341  bench_new(src_buf, dst1_u_buf, dst1_v_buf, 128, MAX_HEIGHT,
343  }
344 }
345 
346 #define MAX_LINE_SIZE 1920
347 static const int input_sizes[] = {8, 128, 1080, MAX_LINE_SIZE};
348 static const enum AVPixelFormat rgb_formats[] = {
355 };
356 
357 static void check_rgb_to_y(SwsContext *sws)
358 {
359  SwsInternal *ctx = sws_internal(sws);
360 
361  LOCAL_ALIGNED_16(uint8_t, src24, [MAX_LINE_SIZE * 3]);
362  LOCAL_ALIGNED_16(uint8_t, src32, [MAX_LINE_SIZE * 4]);
363  LOCAL_ALIGNED_32(uint8_t, dst0_y, [MAX_LINE_SIZE * 2]);
364  LOCAL_ALIGNED_32(uint8_t, dst1_y, [MAX_LINE_SIZE * 2]);
365 
366  declare_func(void, uint8_t *dst, const uint8_t *src,
367  const uint8_t *unused1, const uint8_t *unused2, int width,
368  uint32_t *rgb2yuv, void *opq);
369 
370  randomize_buffers(src24, MAX_LINE_SIZE * 3);
371  randomize_buffers(src32, MAX_LINE_SIZE * 4);
372 
373  for (int i = 0; i < FF_ARRAY_ELEMS(rgb_formats); i++) {
375 
376  sws->src_format = rgb_formats[i];
378 
379  for (int j = 0; j < FF_ARRAY_ELEMS(input_sizes); j++) {
380  int w = input_sizes[j];
381 
382  if (check_func(ctx->lumToYV12, "%s_to_y_%d", desc->name, w)) {
383  const uint8_t *src = desc->nb_components == 3 ? src24 : src32;
384  memset(dst0_y, 0xFA, MAX_LINE_SIZE * 2);
385  memset(dst1_y, 0xFA, MAX_LINE_SIZE * 2);
386 
387  call_ref(dst0_y, src, NULL, NULL, w, ctx->input_rgb2yuv_table, NULL);
388  call_new(dst1_y, src, NULL, NULL, w, ctx->input_rgb2yuv_table, NULL);
389 
390  if (memcmp(dst0_y, dst1_y, w * 2))
391  fail();
392 
393  if (desc->nb_components == 3 ||
394  // only bench native endian formats
396  bench_new(dst1_y, src, NULL, NULL, w, ctx->input_rgb2yuv_table, NULL);
397  }
398  }
399  }
400 }
401 
402 static void check_rgb_to_uv(SwsContext *sws)
403 {
404  SwsInternal *ctx = sws_internal(sws);
405 
406  LOCAL_ALIGNED_16(uint8_t, src24, [MAX_LINE_SIZE * 3]);
407  LOCAL_ALIGNED_16(uint8_t, src32, [MAX_LINE_SIZE * 4]);
408  LOCAL_ALIGNED_16(uint8_t, dst0_u, [MAX_LINE_SIZE * 2]);
409  LOCAL_ALIGNED_16(uint8_t, dst0_v, [MAX_LINE_SIZE * 2]);
410  LOCAL_ALIGNED_16(uint8_t, dst1_u, [MAX_LINE_SIZE * 2]);
411  LOCAL_ALIGNED_16(uint8_t, dst1_v, [MAX_LINE_SIZE * 2]);
412 
413  declare_func(void, uint8_t *dstU, uint8_t *dstV,
414  const uint8_t *src1, const uint8_t *src2, const uint8_t *src3,
415  int width, uint32_t *pal, void *opq);
416 
417  randomize_buffers(src24, MAX_LINE_SIZE * 3);
418  randomize_buffers(src32, MAX_LINE_SIZE * 4);
419 
420  for (int i = 0; i < 2 * FF_ARRAY_ELEMS(rgb_formats); i++) {
421  enum AVPixelFormat src_fmt = rgb_formats[i / 2];
422  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(src_fmt);
423 
424  ctx->chrSrcHSubSample = (i % 2) ? 0 : 1;
425  sws->src_format = src_fmt;
426  sws->dst_format = ctx->chrSrcHSubSample ? AV_PIX_FMT_YUV420P : AV_PIX_FMT_YUV444P;
428 
429  for (int j = 0; j < FF_ARRAY_ELEMS(input_sizes); j++) {
430  int w = input_sizes[j] >> ctx->chrSrcHSubSample;
431 
432  if (check_func(ctx->chrToYV12, "%s_to_uv%s_%d", desc->name,
433  ctx->chrSrcHSubSample ? "_half" : "",
434  input_sizes[j])) {
435  const uint8_t *src = desc->nb_components == 3 ? src24 : src32;
436  memset(dst0_u, 0xFF, MAX_LINE_SIZE * 2);
437  memset(dst0_v, 0xFF, MAX_LINE_SIZE * 2);
438  memset(dst1_u, 0xFF, MAX_LINE_SIZE * 2);
439  memset(dst1_v, 0xFF, MAX_LINE_SIZE * 2);
440 
441  call_ref(dst0_u, dst0_v, NULL, src, src, w, ctx->input_rgb2yuv_table, NULL);
442  call_new(dst1_u, dst1_v, NULL, src, src, w, ctx->input_rgb2yuv_table, NULL);
443 
444  if (memcmp(dst0_u, dst1_u, w * 2) || memcmp(dst0_v, dst1_v, w * 2))
445  fail();
446 
447  if (desc->nb_components == 3 ||
448  // only bench native endian formats
450  bench_new(dst1_u, dst1_v, NULL, src, src, w, ctx->input_rgb2yuv_table, NULL);
451  }
452  }
453  }
454 }
455 
456 static void check_rgba_to_a(SwsContext *sws)
457 {
458  SwsInternal *ctx = sws_internal(sws);
459 
460  LOCAL_ALIGNED_16(uint8_t, src, [MAX_LINE_SIZE * 4]);
461  LOCAL_ALIGNED_32(uint8_t, dst0_y, [MAX_LINE_SIZE * 2]);
462  LOCAL_ALIGNED_32(uint8_t, dst1_y, [MAX_LINE_SIZE * 2]);
463 
464  declare_func(void, uint8_t *dst, const uint8_t *src1,
465  const uint8_t *src2, const uint8_t *src3, int width,
466  uint32_t *rgb2yuv, void *opq);
467 
469 
470  for (int i = 0; i < FF_ARRAY_ELEMS(rgb_formats); i++) {
472  if (desc->nb_components < 4)
473  continue;
474 
475  sws->src_format = rgb_formats[i];
477 
478  for (int j = 0; j < FF_ARRAY_ELEMS(input_sizes); j++) {
479  int w = input_sizes[j];
480 
481  if (check_func(ctx->alpToYV12, "%s_to_y_%d", desc->name, w)) {
482  memset(dst0_y, 0xFA, MAX_LINE_SIZE * 2);
483  memset(dst1_y, 0xFA, MAX_LINE_SIZE * 2);
484 
485  call_ref(dst0_y, NULL, NULL, src, w, ctx->input_rgb2yuv_table, NULL);
486  call_new(dst1_y, NULL, NULL, src, w, ctx->input_rgb2yuv_table, NULL);
487 
488  if (memcmp(dst0_y, dst1_y, w * 2))
489  fail();
490 
491  // only bench native endian formats
493  bench_new(dst1_y, NULL, NULL, src, w, ctx->input_rgb2yuv_table, NULL);
494  }
495  }
496  }
497 }
498 
499 
500 static const int packed_rgb_fmts[] = {
533 };
534 
535 #define INPUT_SIZE 512
536 
537 static void check_yuv2packed1(void)
538 {
539  static const int alpha_values[] = {0, 2048, 4096};
540 
542  void, SwsInternal *c, const int16_t *lumSrc,
543  const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
544  const int16_t *alpSrc, uint8_t *dest,
545  int dstW, int uvalpha, int y);
546 
547  const int16_t *luma;
548  const int16_t *chru[2];
549  const int16_t *chrv[2];
550  const int16_t *alpha;
551 
552  LOCAL_ALIGNED_8(int32_t, src_y, [2 * INPUT_SIZE]);
553  LOCAL_ALIGNED_8(int32_t, src_u, [2 * INPUT_SIZE]);
554  LOCAL_ALIGNED_8(int32_t, src_v, [2 * INPUT_SIZE]);
555  LOCAL_ALIGNED_8(int32_t, src_a, [2 * INPUT_SIZE]);
556 
557  LOCAL_ALIGNED_8(uint8_t, dst0, [INPUT_SIZE * sizeof(int32_t[4])]);
558  LOCAL_ALIGNED_8(uint8_t, dst1, [INPUT_SIZE * sizeof(int32_t[4])]);
559 
560  randomize_buffers((uint8_t*)src_y, 2 * INPUT_SIZE * sizeof(int32_t));
561  randomize_buffers((uint8_t*)src_u, 2 * INPUT_SIZE * sizeof(int32_t));
562  randomize_buffers((uint8_t*)src_v, 2 * INPUT_SIZE * sizeof(int32_t));
563  randomize_buffers((uint8_t*)src_a, 2 * INPUT_SIZE * sizeof(int32_t));
564 
565  /* Limit to 14 bit input range */
566  for (int i = 0; i < 2 * INPUT_SIZE; i++) {
567  src_y[i] &= 0x3FFF3FFF;
568  src_a[i] &= 0x3FFF3FFF;
569  src_u[i] &= 0x3FFF3FFF;
570  src_v[i] &= 0x3FFF3FFF;
571  }
572 
573  luma = (int16_t *)src_y;
574  alpha = (int16_t *)src_a;
575  for (int i = 0; i < 2; i++) {
576  chru[i] = (int16_t *)(src_u + i*INPUT_SIZE);
577  chrv[i] = (int16_t *)(src_v + i*INPUT_SIZE);
578  }
579 
580  for (int fmi = 0; fmi < FF_ARRAY_ELEMS(packed_rgb_fmts); fmi++) {
582  int line_size = INPUT_SIZE * desc->comp[0].step;
583  SwsContext *sws;
584  SwsInternal *c;
585 
586  if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM)
587  line_size = AV_CEIL_RSHIFT(line_size, 3);
588 
592  if (!sws)
593  fail();
594 
595  c = sws_internal(sws);
596 
597  for (int ai = 0; ai < FF_ARRAY_ELEMS(alpha_values); ai++) {
598  const int chr_alpha = alpha_values[ai];
599  if (check_func(c->yuv2packed1, "yuv2%s_1_%d_%d", desc->name, chr_alpha, INPUT_SIZE)) {
600  memset(dst0, 0xFF, INPUT_SIZE * sizeof(int32_t[4]));
601  memset(dst1, 0xFF, INPUT_SIZE * sizeof(int32_t[4]));
602 
603  call_ref(c, luma, chru, chrv, alpha, dst0, INPUT_SIZE, chr_alpha, 0);
604  call_new(c, luma, chru, chrv, alpha, dst1, INPUT_SIZE, chr_alpha, 0);
605 
606  if (memcmp(dst0, dst1, line_size))
607  fail();
608 
609  bench_new(c, luma, chru, chrv, alpha, dst1, INPUT_SIZE, chr_alpha, 0);
610  }
611  }
612 
613  sws_freeContext(sws);
614  }
615 }
616 
617 static void check_yuv2packed2(void)
618 {
619  static const int alpha_values[] = {0, 2048, 4096};
620 
622  void, SwsInternal *c, const int16_t *lumSrc[2],
623  const int16_t *chrUSrc[2], const int16_t *chrVSrc[2],
624  const int16_t *alpSrc[2], uint8_t *dest,
625  int dstW, int yalpha, int uvalpha, int y);
626 
627  const int16_t *luma[2];
628  const int16_t *chru[2];
629  const int16_t *chrv[2];
630  const int16_t *alpha[2];
631 
632  LOCAL_ALIGNED_8(int32_t, src_y, [2 * INPUT_SIZE]);
633  LOCAL_ALIGNED_8(int32_t, src_u, [2 * INPUT_SIZE]);
634  LOCAL_ALIGNED_8(int32_t, src_v, [2 * INPUT_SIZE]);
635  LOCAL_ALIGNED_8(int32_t, src_a, [2 * INPUT_SIZE]);
636 
637  LOCAL_ALIGNED_8(uint8_t, dst0, [INPUT_SIZE * sizeof(int32_t[4])]);
638  LOCAL_ALIGNED_8(uint8_t, dst1, [INPUT_SIZE * sizeof(int32_t[4])]);
639 
640  randomize_buffers((uint8_t*)src_y, 2 * INPUT_SIZE * sizeof(int32_t));
641  randomize_buffers((uint8_t*)src_u, 2 * INPUT_SIZE * sizeof(int32_t));
642  randomize_buffers((uint8_t*)src_v, 2 * INPUT_SIZE * sizeof(int32_t));
643  randomize_buffers((uint8_t*)src_a, 2 * INPUT_SIZE * sizeof(int32_t));
644 
645  /* Limit to 14 bit input range */
646  for (int i = 0; i < 2 * INPUT_SIZE; i++) {
647  src_y[i] &= 0x3FFF3FFF;
648  src_u[i] &= 0x3FFF3FFF;
649  src_v[i] &= 0x3FFF3FFF;
650  src_a[i] &= 0x3FFF3FFF;
651  }
652 
653  for (int i = 0; i < 2; i++) {
654  luma[i] = (int16_t *)(src_y + i*INPUT_SIZE);
655  chru[i] = (int16_t *)(src_u + i*INPUT_SIZE);
656  chrv[i] = (int16_t *)(src_v + i*INPUT_SIZE);
657  alpha[i] = (int16_t *)(src_a + i*INPUT_SIZE);
658  }
659 
660  for (int fmi = 0; fmi < FF_ARRAY_ELEMS(packed_rgb_fmts); fmi++) {
662  int line_size = INPUT_SIZE * desc->comp[0].step;
663  SwsContext *sws;
664  SwsInternal *c;
665 
666  if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM)
667  line_size = AV_CEIL_RSHIFT(line_size, 3);
668 
672  if (!sws)
673  fail();
674 
675  c = sws_internal(sws);
676 
677  for (int ai = 0; ai < FF_ARRAY_ELEMS(alpha_values); ai++) {
678  const int lum_alpha = alpha_values[ai];
679  const int chr_alpha = alpha_values[ai];
680  if (check_func(c->yuv2packed2, "yuv2%s_2_%d_%d", desc->name, lum_alpha, INPUT_SIZE)) {
681  memset(dst0, 0xFF, INPUT_SIZE * sizeof(int32_t[4]));
682  memset(dst1, 0xFF, INPUT_SIZE * sizeof(int32_t[4]));
683 
684  call_ref(c, luma, chru, chrv, alpha, dst0, INPUT_SIZE, lum_alpha, chr_alpha, 0);
685  call_new(c, luma, chru, chrv, alpha, dst1, INPUT_SIZE, lum_alpha, chr_alpha, 0);
686 
687  if (memcmp(dst0, dst1, line_size))
688  fail();
689 
690  bench_new(c, luma, chru, chrv, alpha, dst1, INPUT_SIZE, lum_alpha, chr_alpha, 0);
691  }
692  }
693 
694  sws_freeContext(sws);
695  }
696 }
697 
698 static void check_yuv2packedX(void)
699 {
700 #define LARGEST_FILTER 16
701  static const int filter_sizes[] = {2, 16};
702 
704  void, SwsInternal *c, const int16_t *lumFilter,
705  const int16_t **lumSrcx, int lumFilterSize,
706  const int16_t *chrFilter, const int16_t **chrUSrcx,
707  const int16_t **chrVSrcx, int chrFilterSize,
708  const int16_t **alpSrcx, uint8_t *dest,
709  int dstW, int y);
710 
711  const int16_t *luma[LARGEST_FILTER];
712  const int16_t *chru[LARGEST_FILTER];
713  const int16_t *chrv[LARGEST_FILTER];
714  const int16_t *alpha[LARGEST_FILTER];
715 
716  LOCAL_ALIGNED_8(int16_t, luma_filter, [LARGEST_FILTER]);
717  LOCAL_ALIGNED_8(int16_t, chr_filter, [LARGEST_FILTER]);
718 
723 
724  LOCAL_ALIGNED_8(uint8_t, dst0, [INPUT_SIZE * sizeof(int32_t[4])]);
725  LOCAL_ALIGNED_8(uint8_t, dst1, [INPUT_SIZE * sizeof(int32_t[4])]);
726 
727  randomize_buffers((uint8_t*)src_y, LARGEST_FILTER * INPUT_SIZE * sizeof(int32_t));
728  randomize_buffers((uint8_t*)src_u, LARGEST_FILTER * INPUT_SIZE * sizeof(int32_t));
729  randomize_buffers((uint8_t*)src_v, LARGEST_FILTER * INPUT_SIZE * sizeof(int32_t));
730  randomize_buffers((uint8_t*)src_a, LARGEST_FILTER * INPUT_SIZE * sizeof(int32_t));
731 
732  /* Limit to 14 bit input range */
733  for (int i = 0; i < LARGEST_FILTER * INPUT_SIZE; i++) {
734  src_y[i] &= 0x3FFF3FFF;
735  src_u[i] &= 0x3FFF3FFF;
736  src_v[i] &= 0x3FFF3FFF;
737  src_a[i] &= 0x3FFF3FFF;
738  }
739 
740  for (int i = 0; i < LARGEST_FILTER; i++) {
741  luma[i] = (int16_t *)(src_y + i*INPUT_SIZE);
742  chru[i] = (int16_t *)(src_u + i*INPUT_SIZE);
743  chrv[i] = (int16_t *)(src_v + i*INPUT_SIZE);
744  alpha[i] = (int16_t *)(src_a + i*INPUT_SIZE);
745  }
746 
747  for (int fmi = 0; fmi < FF_ARRAY_ELEMS(packed_rgb_fmts); fmi++) {
749  int line_size = INPUT_SIZE * desc->comp[0].step;
750  SwsContext *sws;
751  SwsInternal *c;
752 
753  if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM)
754  line_size = AV_CEIL_RSHIFT(line_size, 3);
755 
759  if (!sws)
760  fail();
761 
762  c = sws_internal(sws);
763 
764  for (int fsi = 0; fsi < FF_ARRAY_ELEMS(filter_sizes); fsi++) {
765  const int luma_filter_size = filter_sizes[fsi];
766  const int chr_filter_size = filter_sizes[fsi];
767 
768  for (int i = 0; i < luma_filter_size; i++)
769  luma_filter[i] = -((1 << 12) / (luma_filter_size - 1));
770  luma_filter[rnd() % luma_filter_size] = (1 << 13) - 1;
771 
772  for (int i = 0; i < chr_filter_size; i++)
773  chr_filter[i] = -((1 << 12) / (chr_filter_size - 1));
774  chr_filter[rnd() % chr_filter_size] = (1 << 13) - 1;
775 
776  if (check_func(c->yuv2packedX, "yuv2%s_X_%d_%d", desc->name, luma_filter_size, INPUT_SIZE)) {
777  memset(dst0, 0xFF, INPUT_SIZE * sizeof(int32_t[4]));
778  memset(dst1, 0xFF, INPUT_SIZE * sizeof(int32_t[4]));
779 
780  call_ref(c, luma_filter, luma, luma_filter_size,
781  chr_filter, chru, chrv, chr_filter_size,
782  alpha, dst0, INPUT_SIZE, 0);
783 
784  call_new(c, luma_filter, luma, luma_filter_size,
785  chr_filter, chru, chrv, chr_filter_size,
786  alpha, dst1, INPUT_SIZE, 0);
787 
788  if (memcmp(dst0, dst1, line_size))
789  fail();
790 
791  bench_new(c, luma_filter, luma, luma_filter_size,
792  chr_filter, chru, chrv, chr_filter_size,
793  alpha, dst1, INPUT_SIZE, 0);
794  }
795  }
796 
797  sws_freeContext(sws);
798  }
799 }
800 
801 #undef INPUT_SIZE
802 #undef LARGEST_FILTER
803 
805 {
806  SwsContext *sws;
807 
809 
810  check_shuffle_bytes(shuffle_bytes_2103, "shuffle_bytes_2103");
811  report("shuffle_bytes_2103");
812 
813  check_shuffle_bytes(shuffle_bytes_0321, "shuffle_bytes_0321");
814  report("shuffle_bytes_0321");
815 
816  check_shuffle_bytes(shuffle_bytes_1230, "shuffle_bytes_1230");
817  report("shuffle_bytes_1230");
818 
819  check_shuffle_bytes(shuffle_bytes_3012, "shuffle_bytes_3012");
820  report("shuffle_bytes_3012");
821 
822  check_shuffle_bytes(shuffle_bytes_3210, "shuffle_bytes_3210");
823  report("shuffle_bytes_3210");
824 
825  check_shuffle_bytes(shuffle_bytes_3102, "shuffle_bytes_3102");
826  report("shuffle_bytes_3102");
827 
828  check_shuffle_bytes(shuffle_bytes_2013, "shuffle_bytes_2013");
829  report("shuffle_bytes_2013");
830 
831  check_shuffle_bytes(shuffle_bytes_1203, "shuffle_bytes_1203");
832  report("shuffle_bytes_1203");
833 
834  check_shuffle_bytes(shuffle_bytes_2130, "shuffle_bytes_2130");
835  report("shuffle_bytes_2130");
836 
837  {
838  /* rgb24tobgr24 operates on 3-byte pixels, so test widths must be
839  * multiples of 3 to avoid reading past the source buffer. */
840  static const int rgb24_width[] = {3, 12, 24, 36, 48, 126, 1920 * 3};
841  int i;
842 #define RGB24_BENCH_WIDTH (1920 * 3)
845  LOCAL_ALIGNED_32(uint8_t, dst0, [RGB24_BENCH_WIDTH]);
846  LOCAL_ALIGNED_32(uint8_t, dst1, [RGB24_BENCH_WIDTH]);
847 
848  declare_func(void, const uint8_t *src, uint8_t *dst, int src_size);
849 
850  memset(dst0, 0, RGB24_BENCH_WIDTH);
851  memset(dst1, 0, RGB24_BENCH_WIDTH);
853  memcpy(src1, src0, RGB24_BENCH_WIDTH);
854 
855  if (check_func(rgb24tobgr24, "rgb24tobgr24")) {
856  for (i = 0; i < FF_ARRAY_ELEMS(rgb24_width); i++) {
857  call_ref(src0, dst0, rgb24_width[i]);
858  call_new(src1, dst1, rgb24_width[i]);
859  if (memcmp(dst0, dst1, rgb24_width[i]))
860  fail();
861  }
863  }
864 #undef RGB24_BENCH_WIDTH
865  }
866  report("rgb24tobgr24");
867 
868  {
869  /* rgb32tobgr24: 4-byte pixels → 3-byte pixels.
870  * Test widths must be multiples of 4 (one pixel).
871  * Sizes chosen to exercise each codepath tier:
872  * 4 = scalar only (1 pixel)
873  * 16 = scalar only (4 pixels, loop iteration)
874  * 32 = medium only
875  * 48 = medium + scalar
876  * 64 = fast only (exact)
877  * 68 = fast + scalar (skip medium)
878  * 100 = fast + medium + scalar (all tiers)
879  * 128 = fast only (multi-iteration)
880  * 1920*4 = fast only (benchmark width)
881  */
882  static const int rgb32_widths[] = {4, 16, 32, 48, 64, 68, 100, 128, 1920 * 4};
883 #define RGB32_BENCH_WIDTH (1920 * 4)
884 #define RGB32_DST_SIZE (RGB32_BENCH_WIDTH * 3 / 4 + 8)
887  LOCAL_ALIGNED_32(uint8_t, dst0, [RGB32_DST_SIZE]);
888  LOCAL_ALIGNED_32(uint8_t, dst1, [RGB32_DST_SIZE]);
889 
890  declare_func(void, const uint8_t *src, uint8_t *dst, int src_size);
891 
893  memcpy(src1, src0, RGB32_BENCH_WIDTH);
894 
895  if (check_func(rgb32tobgr24, "rgb32tobgr24")) {
896  for (int i = 0; i < FF_ARRAY_ELEMS(rgb32_widths); i++) {
897  int out_size = rgb32_widths[i] * 3 / 4;
898  memset(dst0, 0xAA, RGB32_DST_SIZE);
899  memset(dst1, 0xAA, RGB32_DST_SIZE);
900  call_ref(src0, dst0, rgb32_widths[i]);
901  call_new(src1, dst1, rgb32_widths[i]);
902  if (memcmp(dst0, dst1, out_size) ||
903  dst0[out_size] != 0xAA ||
904  dst1[out_size] != 0xAA)
905  fail();
906  }
908  }
909 #undef RGB32_DST_SIZE
910 #undef RGB32_BENCH_WIDTH
911  }
912  report("rgb32tobgr24");
913 
914  {
915  /* rgb24tobgr32: 3-byte pixels → 4-byte pixels.
916  * Test widths must be multiples of 3 (one pixel).
917  * Sizes chosen to exercise each codepath tier:
918  * 3 = scalar only (1 pixel)
919  * 12 = scalar only (4 pixels, loop iteration)
920  * 24 = medium only
921  * 36 = medium + scalar
922  * 48 = fast only (exact)
923  * 51 = fast + scalar (skip medium)
924  * 126 = fast + medium + scalar (all tiers)
925  * 1920*3 = fast only (benchmark width)
926  */
927  static const int rgb24to32_widths[] = {3, 12, 24, 36, 48, 51, 126, 1920 * 3};
928 #define RGB24TO32_BENCH_WIDTH (1920 * 3)
929 #define RGB24TO32_DST_SIZE (RGB24TO32_BENCH_WIDTH * 4 / 3 + 8)
932  LOCAL_ALIGNED_32(uint8_t, dst0, [RGB24TO32_DST_SIZE]);
933  LOCAL_ALIGNED_32(uint8_t, dst1, [RGB24TO32_DST_SIZE]);
934 
935  declare_func(void, const uint8_t *src, uint8_t *dst, int src_size);
936 
938  memcpy(src1, src0, RGB24TO32_BENCH_WIDTH);
939 
940  if (check_func(rgb24tobgr32, "rgb24tobgr32")) {
941  for (int i = 0; i < FF_ARRAY_ELEMS(rgb24to32_widths); i++) {
942  int out_size = rgb24to32_widths[i] * 4 / 3;
943  memset(dst0, 0xAA, RGB24TO32_DST_SIZE);
944  memset(dst1, 0xAA, RGB24TO32_DST_SIZE);
945  call_ref(src0, dst0, rgb24to32_widths[i]);
946  call_new(src1, dst1, rgb24to32_widths[i]);
947  if (memcmp(dst0, dst1, out_size) ||
948  dst0[out_size] != 0xAA ||
949  dst1[out_size] != 0xAA)
950  fail();
951  }
953  }
954 #undef RGB24TO32_DST_SIZE
955 #undef RGB24TO32_BENCH_WIDTH
956  }
957  report("rgb24tobgr32");
958 
960  report("uyvytoyuv422");
961 
963  report("interleave_bytes");
964 
966  report("deinterleave_bytes");
967 
971  if (!sws)
972  fail();
973 
974  check_rgb_to_y(sws);
975  report("rgb_to_y");
976 
977  check_rgb_to_uv(sws);
978  report("rgb_to_uv");
979 
980  check_rgba_to_a(sws);
981  report("rgba_to_a");
982 
983  check_rgb24toyv12(sws);
984  report("rgb24toyv12");
985 
986  sws_freeContext(sws);
987 
989  report("yuv2packed1");
990 
992  report("yuv2packed2");
993 
995  report("yuv2packedX");
996 }
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:66
declare_func_emms
#define declare_func_emms(cpu_flags, ret,...)
Definition: checkasm.h:220
planes
static const struct @583 planes[]
AVPixelFormat
AVPixelFormat
Pixel format.
Definition: pixfmt.h:71
AV_PIX_FMT_BGR48LE
@ AV_PIX_FMT_BGR48LE
packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as lit...
Definition: pixfmt.h:146
rgb32tobgr24
void(* rgb32tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb.c:35
shuffle_bytes_3012
void(* shuffle_bytes_3012)(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb.c:57
AV_PIX_FMT_BGRA64BE
@ AV_PIX_FMT_BGRA64BE
packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is st...
Definition: pixfmt.h:204
mem_internal.h
AV_PIX_FMT_RGB444LE
@ AV_PIX_FMT_RGB444LE
packed RGB 4:4:4, 16bpp, (msb)4X 4R 4G 4B(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:136
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:3456
MAX_STRIDE
#define MAX_STRIDE
Definition: sw_rgb.c:45
src1
const pixel * src1
Definition: h264pred_template.c:420
rgb_formats
static enum AVPixelFormat rgb_formats[]
Definition: sw_rgb.c:348
h
uint8_t h
Definition: sw_rgb.c:41
sws_freeContext
void sws_freeContext(SwsContext *swsContext)
Free the swscaler context swsContext.
Definition: utils.c:2285
out_size
static int out_size
Definition: movenc.c:56
pixdesc.h
AV_PIX_FMT_RGBA64BE
@ AV_PIX_FMT_RGBA64BE
packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is st...
Definition: pixfmt.h:202
SWS_BITEXACT
@ SWS_BITEXACT
Definition: swscale.h:157
check_func
#define check_func(func,...)
Definition: checkasm.h:213
shuffle_bytes_3210
void(* shuffle_bytes_3210)(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb.c:58
test
Definition: idctdsp.c:35
rgb2yuv
static const char rgb2yuv[]
Definition: vf_scale_vulkan.c:86
AV_PIX_FMT_RGB32_1
#define AV_PIX_FMT_RGB32_1
Definition: pixfmt.h:512
RGB32_BENCH_WIDTH
#define RGB32_BENCH_WIDTH
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
AV_PIX_FMT_BGRA
@ AV_PIX_FMT_BGRA
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
Definition: pixfmt.h:102
call_ref
#define call_ref(...)
Definition: checkasm.h:229
BUFSIZE
#define BUFSIZE
Definition: sw_rgb.c:119
check_shuffle_bytes
static void check_shuffle_bytes(void *func, const char *report)
Definition: sw_rgb.c:48
AV_PIX_FMT_RGB555BE
@ AV_PIX_FMT_RGB555BE
packed RGB 5:5:5, 16bpp, (msb)1X 5R 5G 5B(lsb), big-endian , X=unused/undefined
Definition: pixfmt.h:114
check_yuv2packedX
static void check_yuv2packedX(void)
Definition: sw_rgb.c:698
s
uint8_t s
Definition: sw_rgb.c:41
shuffle_bytes_2130
void(* shuffle_bytes_2130)(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb.c:61
check_yuv2packed1
static void check_yuv2packed1(void)
Definition: sw_rgb.c:537
fail
#define fail()
Definition: checkasm.h:223
RGB24TO32_BENCH_WIDTH
#define RGB24TO32_BENCH_WIDTH
checkasm.h
cmp_off_by_n
static int cmp_off_by_n(const uint8_t *ref, const uint8_t *test, size_t n, int accuracy)
Definition: sw_rgb.c:121
AV_PIX_FMT_BGR8
@ AV_PIX_FMT_BGR8
packed RGB 3:3:2, 8bpp, (msb)2B 3G 3R(lsb)
Definition: pixfmt.h:90
randomize_buffers
#define randomize_buffers(buf, size)
Definition: sw_rgb.c:33
rnd
#define rnd()
Definition: checkasm.h:206
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
check_rgb_to_y
static void check_rgb_to_y(SwsContext *sws)
Definition: sw_rgb.c:357
intreadwrite.h
AV_CEIL_RSHIFT
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:60
check_uyvy_to_422p
static void check_uyvy_to_422p(void)
Definition: sw_rgb.c:74
input_sizes
static const int input_sizes[]
Definition: sw_rgb.c:347
shuffle_bytes_1230
void(* shuffle_bytes_1230)(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb.c:56
LOCAL_ALIGNED_16
#define LOCAL_ALIGNED_16(t, v,...)
Definition: mem_internal.h:130
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
AV_PIX_FMT_RGB4
@ AV_PIX_FMT_RGB4
packed RGB 1:2:1 bitstream, 4bpp, (msb)1R 2G 1B(lsb), a byte contains two pixels, the first pixel in ...
Definition: pixfmt.h:94
shuffle_bytes_2103
void(* shuffle_bytes_2103)(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb.c:55
AV_PIX_FMT_YUV420P
@ AV_PIX_FMT_YUV420P
planar YUV 4:2:0, 12bpp, (1 Cr & Cb sample per 2x2 Y samples)
Definition: pixfmt.h:73
LARGEST_FILTER
#define LARGEST_FILTER
AV_PIX_FMT_RGBA
@ AV_PIX_FMT_RGBA
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
Definition: pixfmt.h:100
LOCAL_ALIGNED_8
#define LOCAL_ALIGNED_8(t, v,...)
Definition: mem_internal.h:128
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
INPUT_SIZE
#define INPUT_SIZE
Definition: sw_rgb.c:535
interleaveBytes
void(* interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst, int width, int height, int src1Stride, int src2Stride, int dstStride)
Definition: rgb2rgb.c:88
checkasm_check_sw_rgb
void checkasm_check_sw_rgb(void)
Definition: sw_rgb.c:804
AV_PIX_FMT_RGB565LE
@ AV_PIX_FMT_RGB565LE
packed RGB 5:6:5, 16bpp, (msb) 5R 6G 5B(lsb), little-endian
Definition: pixfmt.h:113
shuffle_bytes_3102
void(* shuffle_bytes_3102)(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb.c:59
call_new
#define call_new(...)
Definition: checkasm.h:237
NULL
#define NULL
Definition: coverity.c:32
LOCAL_ALIGNED_32
#define LOCAL_ALIGNED_32(t, v,...)
Definition: mem_internal.h:132
AV_PIX_FMT_RGB48LE
@ AV_PIX_FMT_RGB48LE
packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, the 2-byte value for each R/G/B component is stored as lit...
Definition: pixfmt.h:110
AV_PIX_FMT_BGR565LE
@ AV_PIX_FMT_BGR565LE
packed BGR 5:6:5, 16bpp, (msb) 5B 6G 5R(lsb), little-endian
Definition: pixfmt.h:118
AV_PIX_FMT_RGBA64LE
@ AV_PIX_FMT_RGBA64LE
packed RGBA 16:16:16:16, 64bpp, 16R, 16G, 16B, 16A, the 2-byte value for each R/G/B/A component is st...
Definition: pixfmt.h:203
RGB24TO32_DST_SIZE
#define RGB24TO32_DST_SIZE
check_rgb24toyv12
static void check_rgb24toyv12(SwsContext *sws)
Definition: sw_rgb.c:130
AV_PIX_FMT_RGB8
@ AV_PIX_FMT_RGB8
packed RGB 3:3:2, 8bpp, (msb)3R 3G 2B(lsb)
Definition: pixfmt.h:93
ff_sws_rgb2rgb_init
av_cold void ff_sws_rgb2rgb_init(void)
Definition: rgb2rgb.c:127
abs
#define abs(x)
Definition: cuda_runtime.h:35
AV_PIX_FMT_BGR4
@ AV_PIX_FMT_BGR4
packed RGB 1:2:1 bitstream, 4bpp, (msb)1B 2G 1R(lsb), a byte contains two pixels, the first pixel in ...
Definition: pixfmt.h:91
RGB32_DST_SIZE
#define RGB32_DST_SIZE
AV_PIX_FMT_BGR555BE
@ AV_PIX_FMT_BGR555BE
packed BGR 5:5:5, 16bpp, (msb)1X 5B 5G 5R(lsb), big-endian , X=unused/undefined
Definition: pixfmt.h:119
AV_PIX_FMT_ABGR
@ AV_PIX_FMT_ABGR
packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
Definition: pixfmt.h:101
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
AV_PIX_FMT_BGR4_BYTE
@ AV_PIX_FMT_BGR4_BYTE
packed RGB 1:2:1, 8bpp, (msb)1B 2G 1R(lsb)
Definition: pixfmt.h:92
MAX_HEIGHT
#define MAX_HEIGHT
Definition: sw_rgb.c:46
rgb24tobgr32
void(* rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb.c:38
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
height
#define height
Definition: dsp.h:89
check_interleave_bytes
static void check_interleave_bytes(void)
Definition: sw_rgb.c:204
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:87
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
AV_PIX_FMT_RGB444BE
@ AV_PIX_FMT_RGB444BE
packed RGB 4:4:4, 16bpp, (msb)4X 4R 4G 4B(lsb), big-endian, X=unused/undefined
Definition: pixfmt.h:137
MAX_LINE_SIZE
#define MAX_LINE_SIZE
Definition: sw_rgb.c:346
AV_PIX_FMT_FLAG_BITSTREAM
#define AV_PIX_FMT_FLAG_BITSTREAM
All values of a component are bit-wise packed end to end.
Definition: pixdesc.h:124
shuffle_bytes_0321
void(* shuffle_bytes_0321)(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb.c:54
AV_PIX_FMT_BGR444BE
@ AV_PIX_FMT_BGR444BE
packed BGR 4:4:4, 16bpp, (msb)4X 4B 4G 4R(lsb), big-endian, X=unused/undefined
Definition: pixfmt.h:139
AV_PIX_FMT_RGB32
#define AV_PIX_FMT_RGB32
Definition: pixfmt.h:511
AV_PIX_FMT_BGR565BE
@ AV_PIX_FMT_BGR565BE
packed BGR 5:6:5, 16bpp, (msb) 5B 6G 5R(lsb), big-endian
Definition: pixfmt.h:117
check_rgba_to_a
static void check_rgba_to_a(SwsContext *sws)
Definition: sw_rgb.c:456
SwsContext::dst_format
int dst_format
Destination pixel format.
Definition: swscale.h:256
AV_PIX_FMT_ARGB
@ AV_PIX_FMT_ARGB
packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
Definition: pixfmt.h:99
AV_PIX_FMT_BGRA64LE
@ AV_PIX_FMT_BGRA64LE
packed RGBA 16:16:16:16, 64bpp, 16B, 16G, 16R, 16A, the 2-byte value for each R/G/B/A component is st...
Definition: pixfmt.h:205
uyvytoyuv422
void(* uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride)
Definition: rgb2rgb.c:97
report
#define report
Definition: checkasm.h:226
AV_PIX_FMT_RGB555LE
@ AV_PIX_FMT_RGB555LE
packed RGB 5:5:5, 16bpp, (msb)1X 5R 5G 5B(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:115
ff_rgb24toyv12
void(* ff_rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t *udst, uint8_t *vdst, int width, int height, int lumStride, int chromStride, int srcStride, const int32_t *rgb2yuv)
Height should be a multiple of 2 and width should be a multiple of 2.
Definition: rgb2rgb.c:81
AV_PIX_FMT_RGB48BE
@ AV_PIX_FMT_RGB48BE
packed RGB 16:16:16, 48bpp, 16R, 16G, 16B, the 2-byte value for each R/G/B component is stored as big...
Definition: pixfmt.h:109
packed_rgb_fmts
static const int packed_rgb_fmts[]
Definition: sw_rgb.c:500
bench_new
#define bench_new(...)
Definition: checkasm.h:428
ff_sws_init_scale
void ff_sws_init_scale(SwsInternal *c)
Definition: swscale.c:698
src2
const pixel * src2
Definition: h264pred_template.c:421
common.h
swscale_internal.h
width
static const uint8_t width[]
Definition: sw_rgb.c:40
AV_PIX_FMT_RGB4_BYTE
@ AV_PIX_FMT_RGB4_BYTE
packed RGB 1:2:1, 8bpp, (msb)1R 2G 1B(lsb)
Definition: pixfmt.h:95
w
uint8_t w
Definition: sw_rgb.c:41
SwsInternal
Definition: swscale_internal.h:334
check_rgb_to_uv
static void check_rgb_to_uv(SwsContext *sws)
Definition: sw_rgb.c:402
deinterleaveBytes
void(* deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t *dst2, int width, int height, int srcStride, int dst1Stride, int dst2Stride)
Definition: rgb2rgb.c:91
sws_getContext
SwsContext * sws_getContext(int srcW, int srcH, enum AVPixelFormat srcFormat, int dstW, int dstH, enum AVPixelFormat dstFormat, int flags, SwsFilter *srcFilter, SwsFilter *dstFilter, const double *param)
Allocate and return an SwsContext.
Definition: utils.c:1954
check_yuv2packed2
static void check_yuv2packed2(void)
Definition: sw_rgb.c:617
AV_CPU_FLAG_MMX
#define AV_CPU_FLAG_MMX
standard MMX
Definition: cpu.h:32
shuffle_bytes_2013
void(* shuffle_bytes_2013)(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb.c:60
shuffle_bytes_1203
void(* shuffle_bytes_1203)(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb.c:62
ref
static int ref[MAX_W *MAX_W]
Definition: jpeg2000dwt.c:117
AV_CPU_FLAG_MMXEXT
#define AV_CPU_FLAG_MMXEXT
SSE integer functions or AMD MMX ext.
Definition: cpu.h:33
AV_PIX_FMT_YUV444P
@ AV_PIX_FMT_YUV444P
planar YUV 4:4:4, 24bpp, (1 Cr & Cb sample per 1x1 Y samples)
Definition: pixfmt.h:78
rgb24tobgr24
void(* rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
Definition: rgb2rgb.c:39
AV_PIX_FMT_RGB565BE
@ AV_PIX_FMT_RGB565BE
packed RGB 5:6:5, 16bpp, (msb) 5R 6G 5B(lsb), big-endian
Definition: pixfmt.h:112
src0
const pixel *const src0
Definition: h264pred_template.c:419
desc
const char * desc
Definition: libsvtav1.c:82
SwsContext::src_format
int src_format
Source pixel format.
Definition: swscale.h:255
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
AV_PIX_FMT_BGR555LE
@ AV_PIX_FMT_BGR555LE
packed BGR 5:5:5, 16bpp, (msb)1X 5B 5G 5R(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:120
declare_func
#define declare_func(ret,...)
Definition: checkasm.h:218
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
RGB24_BENCH_WIDTH
#define RGB24_BENCH_WIDTH
int32_t
int32_t
Definition: audioconvert.c:56
sws_internal
static SwsInternal * sws_internal(const SwsContext *sws)
Definition: swscale_internal.h:78
SWS_ACCURATE_RND
@ SWS_ACCURATE_RND
Force bit-exact output.
Definition: swscale.h:156
checkasm_check
#define checkasm_check(prefix,...)
Definition: checkasm.h:472
check_deinterleave_bytes
static void check_deinterleave_bytes(void)
Definition: sw_rgb.c:272
SwsContext
Main external API structure.
Definition: swscale.h:206
AV_PIX_FMT_BGR444LE
@ AV_PIX_FMT_BGR444LE
packed BGR 4:4:4, 16bpp, (msb)4X 4B 4G 4R(lsb), little-endian, X=unused/undefined
Definition: pixfmt.h:138
rgb2rgb.h
src
#define src
Definition: vp8dsp.c:248
swscale.h
AV_PIX_FMT_BGR48BE
@ AV_PIX_FMT_BGR48BE
packed RGB 16:16:16, 48bpp, 16B, 16G, 16R, the 2-byte value for each R/G/B component is stored as big...
Definition: pixfmt.h:145