32 #define randomize_buffers(buf, size) \ 35 for (j = 0; j < size; j+=4) \ 36 AV_WN32(buf + j, rnd()); \ 46 d = ((filterSize - 1) * 8 + dither[0]) >> 4;
47 for ( i = 0; i < dstW; i++) {
54 for (j = 0; j < filterSize; j++){
55 t.val = (
int)src[j][i + offset] * (
int)filter[j];
65 int fsi, osi, isi,
i, j;
67 #define LARGEST_FILTER 16 68 #define FILTER_SIZES 4 69 static const int filter_sizes[
FILTER_SIZES] = {1, 4, 8, 16};
70 #define LARGEST_INPUT_SIZE 512 72 static const int input_sizes[
INPUT_SIZES] = {128, 144, 256, 512};
75 int filterSize,
const int16_t **
src,
uint8_t *dest,
89 memset(dither, d_val, LARGEST_INPUT_SIZE);
98 dstW = input_sizes[isi];
99 for(osi = 0; osi < 64; osi += 16){
101 src =
av_malloc(
sizeof(int16_t*) * filter_sizes[fsi]);
102 vFilterData =
av_malloc((filter_sizes[fsi] + 2) *
sizeof(
union VFilterData));
103 memset(vFilterData, 0, (filter_sizes[fsi] + 2) *
sizeof(
union VFilterData));
104 for(i = 0; i < filter_sizes[fsi]; ++
i){
106 vFilterData[
i].src = src[
i];
107 for(j = 0; j < 4; ++j)
108 vFilterData[i].
coeff[j + 4] = filter_coeff[i];
111 memset(dst0, 0, LARGEST_INPUT_SIZE *
sizeof(dst0[0]));
112 memset(dst1, 0, LARGEST_INPUT_SIZE *
sizeof(dst1[0]));
118 ref_function(&filter_coeff[0], filter_sizes[fsi], src, dst0, dstW - osi, dither, osi);
121 call_new((
const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
122 if (memcmp(dst0, dst1, LARGEST_INPUT_SIZE *
sizeof(dst0[0])))
124 if(dstW == LARGEST_INPUT_SIZE)
125 bench_new((
const int16_t*)vFilterData, filter_sizes[fsi], src, dst1, dstW - osi, dither, osi);
138 #define SRC_PIXELS 128 142 #define MAX_FILTER_WIDTH 40 143 #define FILTER_SIZES 5 144 static const int filter_sizes[
FILTER_SIZES] = { 4, 8, 16, 32, 40 };
146 #define HSCALE_PAIRS 2 152 int i, j, fsi, hpi,
width;
168 const int32_t *filterPos,
int filterSize);
178 width = filter_sizes[fsi];
180 ctx->
srcBpc = hscale_pairs[hpi][0];
181 ctx->
dstBpc = hscale_pairs[hpi][1];
200 for (j = 0; j <
width; j++) {
201 filter[i * width + j] = -((1 << 14) / (width - 1));
203 filter[i * width + (
rnd() %
width)] = ((1 << 15) - 1);
211 filter[SRC_PIXELS * width +
i] =
rnd();
216 memset(dst0, 0, SRC_PIXELS *
sizeof(dst0[0]));
217 memset(dst1, 0, SRC_PIXELS *
sizeof(dst1[0]));
219 call_ref(
NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
220 call_new(
NULL, dst1, SRC_PIXELS, src, filter, filterPos, width);
221 if (memcmp(dst0, dst1, SRC_PIXELS *
sizeof(dst0[0])))
223 bench_new(
NULL, dst0, SRC_PIXELS, src, filter, filterPos, width);
void checkasm_check_sw_scale(void)
void(* hcScale)(struct SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, const int16_t *filter, const int32_t *filterPos, int filterSize)
static void check_hscale(void)
Memory handling functions.
#define LARGEST_INPUT_SIZE
static void ref_function(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
av_warn_unused_result int sws_init_context(struct SwsContext *sws_context, SwsFilter *srcFilter, SwsFilter *dstFilter)
Initialize the swscaler context sws_context.
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define LOCAL_ALIGNED_32(t, v,...)
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce then the filter should push the output frames on the output link immediately As an exception to the previous rule if the input frame is enough to produce several output frames then the filter needs output only at least one per link The additional frames can be left buffered in the filter
int hLumFilterSize
Horizontal filter size for luma/alpha pixels.
SwsFunc ff_getSwsFunc(SwsContext *c)
Return function pointer to fastest main scaler path function depending on architecture and available ...
static const uint8_t dither[8][8]
int hChrFilterSize
Horizontal filter size for chroma pixels.
void sws_freeContext(struct SwsContext *swsContext)
Free the swscaler context swsContext.
#define randomize_buffers(buf, size)
#define declare_func_emms(cpu_flags, ret,...)
int dstW
Width of destination luma/alpha planes.
#define LOCAL_ALIGNED_8(t, v,...)
yuv2planarX_fn yuv2planeX
#define AV_CPU_FLAG_MMX
standard MMX
#define check_func(func,...)
struct SwsContext * sws_alloc_context(void)
Allocate an empty SwsContext.
common internal and external API header
static void check_yuv2yuvX(void)
static const double coeff[2][5]
static double val(void *priv, double ch)