FFmpeg
diracdsp_init.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2010 David Conrad
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/x86/cpu.h"
22 #include "libavcodec/diracdsp.h"
23 #include "fpel.h"
24 
25 void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
26 void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
27 
28 void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
29 void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
30 void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
31 
32 void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
33 void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
34 
35 void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
36 void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
37 void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
38 void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
39 void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
40 
41 void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
42 
43 #if HAVE_X86ASM
44 
45 #define HPEL_FILTER(MMSIZE, EXT) \
46  void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \
47  void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \
48  \
49  static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \
50  const uint8_t *src, int stride, int width, int height) \
51  { \
52  while( height-- ) \
53  { \
54  ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \
55  ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \
56  ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \
57  \
58  dsth += stride; \
59  dstv += stride; \
60  dstc += stride; \
61  src += stride; \
62  } \
63  }
64 
65 #define PIXFUNC(PFX, IDX, EXT) \
66  /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = PFX ## _dirac_pixels8_ ## EXT;*/ \
67  c->PFX ## _dirac_pixels_tab[1][IDX] = PFX ## _dirac_pixels16_ ## EXT; \
68  c->PFX ## _dirac_pixels_tab[2][IDX] = PFX ## _dirac_pixels32_ ## EXT
69 
70 #define DIRAC_PIXOP(OPNAME, EXT)\
71 static void OPNAME ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], \
72  int stride, int h) \
73 {\
74  if (h&3)\
75  ff_ ## OPNAME ## _dirac_pixels16_c(dst, src, stride, h);\
76  else\
77  ff_ ## OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
78 }\
79 static void OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], \
80  int stride, int h) \
81 {\
82  if (h&3) {\
83  ff_ ## OPNAME ## _dirac_pixels32_c(dst, src, stride, h);\
84  } else {\
85  ff_ ## OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\
86  ff_ ## OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
87  }\
88 }
89 
90 DIRAC_PIXOP(put, mmx)
91 DIRAC_PIXOP(avg, mmx)
92 DIRAC_PIXOP(avg, mmxext)
93 
94 DIRAC_PIXOP(put, sse2)
95 DIRAC_PIXOP(avg, sse2)
96 
97 #if !ARCH_X86_64
98 HPEL_FILTER(8, mmx)
99 #endif
100 HPEL_FILTER(16, sse2)
101 
102 #endif // HAVE_X86ASM
103 
105 {
106 #if HAVE_X86ASM
107  int mm_flags = av_get_cpu_flags();
108 
109  if (EXTERNAL_MMX(mm_flags)) {
110  c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
111 #if !ARCH_X86_64
112  c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
113  c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
114  c->dirac_hpel_filter = dirac_hpel_filter_mmx;
115  c->add_rect_clamped = ff_add_rect_clamped_mmx;
116  c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx;
117 #endif
118  PIXFUNC(put, 0, mmx);
119  PIXFUNC(avg, 0, mmx);
120  }
121 
122  if (EXTERNAL_MMXEXT(mm_flags)) {
123  PIXFUNC(avg, 0, mmxext);
124  }
125 
126  if (EXTERNAL_SSE2(mm_flags)) {
127  c->dirac_hpel_filter = dirac_hpel_filter_sse2;
128  c->add_rect_clamped = ff_add_rect_clamped_sse2;
129  c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2;
130 
131  c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
132  c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
133 
134  c->put_dirac_pixels_tab[1][0] = put_dirac_pixels16_sse2;
135  c->avg_dirac_pixels_tab[1][0] = avg_dirac_pixels16_sse2;
136  c->put_dirac_pixels_tab[2][0] = put_dirac_pixels32_sse2;
137  c->avg_dirac_pixels_tab[2][0] = avg_dirac_pixels32_sse2;
138  }
139 
140  if (EXTERNAL_SSE4(mm_flags)) {
141  c->dequant_subband[1] = ff_dequant_subband_32_sse4;
142  c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
143  }
144 #endif
145 }
stride
int stride
Definition: mace.c:144
cpu.h
ff_put_rect_clamped_sse2
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
ff_add_dirac_obmc16_mmx
void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:98
PIXFUNC
#define PIXFUNC(PFX, WIDTH)
Definition: diracdsp.c:215
ff_diracdsp_init_x86
void ff_diracdsp_init_x86(DiracDSPContext *c)
Definition: diracdsp_init.c:104
diracdsp.h
width
#define width
ff_add_dirac_obmc8_mmx
void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
ff_put_signed_rect_clamped_10_sse4
void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
src
#define src
Definition: vp8dsp.c:255
ff_add_rect_clamped_mmx
void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
ff_put_signed_rect_clamped_mmx
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
height
#define height
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
ff_put_rect_clamped_mmx
void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
DiracDSPContext
Definition: diracdsp.h:30
ff_add_rect_clamped_sse2
void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int)
ff_add_dirac_obmc16_sse2
void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
fpel.h
ff_add_dirac_obmc32_mmx
void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
ff_put_signed_rect_clamped_sse2
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
ff_add_dirac_obmc32_sse2
void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
ff_dequant_subband_32_sse4
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h)
EXTERNAL_MMX
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57