FFmpeg
diracdsp_init.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2010 David Conrad
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/x86/cpu.h"
22 #include "libavcodec/diracdsp.h"
23 #include "fpel.h"
24 
25 DECL_DIRAC_PIXOP(put, mmx);
26 DECL_DIRAC_PIXOP(avg, mmx);
27 DECL_DIRAC_PIXOP(avg, mmxext);
28 
29 void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
30 void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
31 void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
32 void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h);
33 
34 void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
35 void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int);
36 
37 void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
38 void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
39 void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
40 
41 void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
42 void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen);
43 
44 void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
45 void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
46 void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
47 void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
48 void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
49 
50 void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
51 
52 #if HAVE_X86ASM
53 
54 #define HPEL_FILTER(MMSIZE, EXT) \
55  void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, const uint8_t *, int, int); \
56  void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, const uint8_t *, int); \
57  \
58  static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \
59  const uint8_t *src, int stride, int width, int height) \
60  { \
61  while( height-- ) \
62  { \
63  ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \
64  ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \
65  ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \
66  \
67  dsth += stride; \
68  dstv += stride; \
69  dstc += stride; \
70  src += stride; \
71  } \
72  }
73 
74 #define PIXFUNC(PFX, IDX, EXT) \
75  /*MMXDISABLEDc->PFX ## _dirac_pixels_tab[0][IDX] = ff_ ## PFX ## _dirac_pixels8_ ## EXT;*/ \
76  c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \
77  c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT
78 
79 #define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\
80 void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
81 {\
82  if (h&3)\
83  ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\
84  else\
85  OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\
86 }\
87 void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
88 {\
89  if (h&3)\
90  ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\
91  else\
92  OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
93 }\
94 void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
95 {\
96  if (h&3) {\
97  ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\
98  } else {\
99  OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\
100  OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
101  }\
102 }
103 
104 DIRAC_PIXOP(put, ff_put, mmx)
105 DIRAC_PIXOP(avg, ff_avg, mmx)
106 DIRAC_PIXOP(avg, ff_avg, mmxext)
107 
108 void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
109 {
110  if (h&3)
111  ff_put_dirac_pixels16_c(dst, src, stride, h);
112  else
113  ff_put_pixels16_sse2(dst, src[0], stride, h);
114 }
115 void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
116 {
117  if (h&3)
118  ff_avg_dirac_pixels16_c(dst, src, stride, h);
119  else
120  ff_avg_pixels16_sse2(dst, src[0], stride, h);
121 }
122 void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
123 {
124  if (h&3) {
125  ff_put_dirac_pixels32_c(dst, src, stride, h);
126  } else {
127  ff_put_pixels16_sse2(dst , src[0] , stride, h);
128  ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h);
129  }
130 }
131 void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
132 {
133  if (h&3) {
134  ff_avg_dirac_pixels32_c(dst, src, stride, h);
135  } else {
136  ff_avg_pixels16_sse2(dst , src[0] , stride, h);
137  ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h);
138  }
139 }
140 
141 #else // HAVE_X86ASM
142 
143 #define HPEL_FILTER(MMSIZE, EXT) \
144  void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \
145  const uint8_t *src, int stride, int width, int height);
146 
147 #define PIXFUNC(PFX, IDX, EXT) do {} while (0)
148 
149 #endif // HAVE_X86ASM
150 
151 #if !ARCH_X86_64
152 HPEL_FILTER(8, mmx)
153 #endif
154 HPEL_FILTER(16, sse2)
155 
157 {
158  int mm_flags = av_get_cpu_flags();
159 
160  if (EXTERNAL_MMX(mm_flags)) {
161  c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx;
162 #if !ARCH_X86_64
163  c->add_dirac_obmc[1] = ff_add_dirac_obmc16_mmx;
164  c->add_dirac_obmc[2] = ff_add_dirac_obmc32_mmx;
165  c->dirac_hpel_filter = dirac_hpel_filter_mmx;
166  c->add_rect_clamped = ff_add_rect_clamped_mmx;
167  c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_mmx;
168 #endif
169  PIXFUNC(put, 0, mmx);
170  PIXFUNC(avg, 0, mmx);
171  }
172 
173  if (EXTERNAL_MMXEXT(mm_flags)) {
174  PIXFUNC(avg, 0, mmxext);
175  }
176 
177  if (EXTERNAL_SSE2(mm_flags)) {
178  c->dirac_hpel_filter = dirac_hpel_filter_sse2;
179  c->add_rect_clamped = ff_add_rect_clamped_sse2;
180  c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2;
181 
182  c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2;
183  c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2;
184 
185  c->put_dirac_pixels_tab[1][0] = ff_put_dirac_pixels16_sse2;
186  c->avg_dirac_pixels_tab[1][0] = ff_avg_dirac_pixels16_sse2;
187  c->put_dirac_pixels_tab[2][0] = ff_put_dirac_pixels32_sse2;
188  c->avg_dirac_pixels_tab[2][0] = ff_avg_dirac_pixels32_sse2;
189  }
190 
191  if (EXTERNAL_SSE4(mm_flags)) {
192  c->dequant_subband[1] = ff_dequant_subband_32_sse4;
193  c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
194  }
195 }
stride
int stride
Definition: mace.c:144
HPEL_FILTER
#define HPEL_FILTER(MMSIZE, EXT)
Definition: diracdsp_init.c:143
cpu.h
ff_put_rect_clamped_sse2
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
ff_put_dirac_pixels16_sse2
void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
ff_add_dirac_obmc16_mmx
void ff_add_dirac_obmc16_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
DECL_DIRAC_PIXOP
DECL_DIRAC_PIXOP(put, mmx)
ff_diracdsp_init_x86
void ff_diracdsp_init_x86(DiracDSPContext *c)
Definition: diracdsp_init.c:156
src
#define src
Definition: vp8dsp.c:254
diracdsp.h
width
#define width
ff_add_dirac_obmc8_mmx
void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
ff_put_pixels16_sse2
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
ff_put_signed_rect_clamped_10_sse4
void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
ff_add_rect_clamped_mmx
void ff_add_rect_clamped_mmx(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int)
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_avg_pixels16_sse2
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
ff_avg_dirac_pixels32_sse2
void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
ff_put_signed_rect_clamped_mmx
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
ff_avg_dirac_pixels16_sse2
void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
height
#define height
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
ff_put_rect_clamped_mmx
void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
DiracDSPContext
Definition: diracdsp.h:30
uint8_t
uint8_t
Definition: audio_convert.c:194
ff_add_rect_clamped_sse2
void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int)
ff_put_dirac_pixels32_sse2
void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
ff_add_dirac_obmc16_sse2
void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
EXTERNAL_SSE4
#define EXTERNAL_SSE4(flags)
Definition: cpu.h:68
fpel.h
ff_add_dirac_obmc32_mmx
void ff_add_dirac_obmc32_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
ff_put_signed_rect_clamped_sse2
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height)
PIXFUNC
#define PIXFUNC(PFX, IDX, EXT)
Definition: diracdsp_init.c:147
ff_add_dirac_obmc32_sse2
void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen)
h
h
Definition: vp9dsp_template.c:2038
ff_dequant_subband_32_sse4
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h)
EXTERNAL_MMX
#define EXTERNAL_MMX(flags)
Definition: cpu.h:56
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57