FFmpeg
dirac_dwt_init.c
Go to the documentation of this file.
1 /*
2  * x86 optimized discrete wavelet transform
3  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
4  * Copyright (c) 2010 David Conrad
5  *
6  * This file is part of FFmpeg.
7  *
8  * FFmpeg is free software; you can redistribute it and/or
9  * modify it under the terms of the GNU Lesser General Public
10  * License as published by the Free Software Foundation; either
11  * version 2.1 of the License, or (at your option) any later version.
12  *
13  * FFmpeg is distributed in the hope that it will be useful,
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  * Lesser General Public License for more details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with FFmpeg; if not, write to the Free Software
20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21  */
22 
23 #include "libavutil/x86/cpu.h"
24 #include "libavcodec/dirac_dwt.h"
25 
26 #define COMPOSE_VERTICAL(ext, align) \
27 void ff_vertical_compose53iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
28 void ff_vertical_compose_dirac53iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int width); \
29 void ff_vertical_compose_dd137iL0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
30 void ff_vertical_compose_dd97iH0##ext(int16_t *b0, int16_t *b1, int16_t *b2, int16_t *b3, int16_t *b4, int width); \
31 void ff_vertical_compose_haar##ext(int16_t *b0, int16_t *b1, int width); \
32 void ff_horizontal_compose_haar0i##ext(int16_t *b, int16_t *tmp, int w);\
33 void ff_horizontal_compose_haar1i##ext(int16_t *b, int16_t *tmp, int w);\
34 \
35 static void vertical_compose53iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
36 { \
37  int i, width_align = width&~(align-1); \
38  int16_t *b0 = (int16_t *)_b0; \
39  int16_t *b1 = (int16_t *)_b1; \
40  int16_t *b2 = (int16_t *)_b2; \
41 \
42  for(i=width_align; i<width; i++) \
43  b1[i] = COMPOSE_53iL0(b0[i], b1[i], b2[i]); \
44 \
45  ff_vertical_compose53iL0##ext(b0, b1, b2, width_align); \
46 } \
47 \
48 static void vertical_compose_dirac53iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, int width) \
49 { \
50  int i, width_align = width&~(align-1); \
51  int16_t *b0 = (int16_t *)_b0; \
52  int16_t *b1 = (int16_t *)_b1; \
53  int16_t *b2 = (int16_t *)_b2; \
54 \
55  for(i=width_align; i<width; i++) \
56  b1[i] = COMPOSE_DIRAC53iH0(b0[i], b1[i], b2[i]); \
57 \
58  ff_vertical_compose_dirac53iH0##ext(b0, b1, b2, width_align); \
59 } \
60 \
61 static void vertical_compose_dd137iL0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
62  uint8_t *_b3, uint8_t *_b4, int width) \
63 { \
64  int i, width_align = width&~(align-1); \
65  int16_t *b0 = (int16_t *)_b0; \
66  int16_t *b1 = (int16_t *)_b1; \
67  int16_t *b2 = (int16_t *)_b2; \
68  int16_t *b3 = (int16_t *)_b3; \
69  int16_t *b4 = (int16_t *)_b4; \
70 \
71  for(i=width_align; i<width; i++) \
72  b2[i] = COMPOSE_DD137iL0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
73 \
74  ff_vertical_compose_dd137iL0##ext(b0, b1, b2, b3, b4, width_align); \
75 } \
76 \
77 static void vertical_compose_dd97iH0##ext(uint8_t *_b0, uint8_t *_b1, uint8_t *_b2, \
78  uint8_t *_b3, uint8_t *_b4, int width) \
79 { \
80  int i, width_align = width&~(align-1); \
81  int16_t *b0 = (int16_t *)_b0; \
82  int16_t *b1 = (int16_t *)_b1; \
83  int16_t *b2 = (int16_t *)_b2; \
84  int16_t *b3 = (int16_t *)_b3; \
85  int16_t *b4 = (int16_t *)_b4; \
86 \
87  for(i=width_align; i<width; i++) \
88  b2[i] = COMPOSE_DD97iH0(b0[i], b1[i], b2[i], b3[i], b4[i]); \
89 \
90  ff_vertical_compose_dd97iH0##ext(b0, b1, b2, b3, b4, width_align); \
91 } \
92 static void vertical_compose_haar##ext(uint8_t *_b0, uint8_t *_b1, int width) \
93 { \
94  int i, width_align = width&~(align-1); \
95  int16_t *b0 = (int16_t *)_b0; \
96  int16_t *b1 = (int16_t *)_b1; \
97 \
98  for(i=width_align; i<width; i++) { \
99  b0[i] = COMPOSE_HAARiL0(b0[i], b1[i]); \
100  b1[i] = COMPOSE_HAARiH0(b1[i], b0[i]); \
101  } \
102 \
103  ff_vertical_compose_haar##ext(b0, b1, width_align); \
104 } \
105 static void horizontal_compose_haar0i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
106 {\
107  int w2= w>>1;\
108  int x= w2 - (w2&(align-1));\
109  int16_t *b = (int16_t *)_b; \
110  int16_t *tmp = (int16_t *)_tmp; \
111 \
112  ff_horizontal_compose_haar0i##ext(b, tmp, w);\
113 \
114  for (; x < w2; x++) {\
115  b[2*x ] = tmp[x];\
116  b[2*x+1] = COMPOSE_HAARiH0(b[x+w2], tmp[x]);\
117  }\
118 }\
119 static void horizontal_compose_haar1i##ext(uint8_t *_b, uint8_t *_tmp, int w)\
120 {\
121  int w2= w>>1;\
122  int x= w2 - (w2&(align-1));\
123  int16_t *b = (int16_t *)_b; \
124  int16_t *tmp = (int16_t *)_tmp; \
125 \
126  ff_horizontal_compose_haar1i##ext(b, tmp, w);\
127 \
128  for (; x < w2; x++) {\
129  b[2*x ] = (tmp[x] + 1)>>1;\
130  b[2*x+1] = (COMPOSE_HAARiH0(b[x+w2], tmp[x]) + 1)>>1;\
131  }\
132 }\
133 \
134 
135 COMPOSE_VERTICAL(_sse2, 8)
136 
137 void ff_horizontal_compose_dd97i_ssse3(int16_t *_b, int16_t *_tmp, int w);
138 
139 static void horizontal_compose_dd97i_ssse3(uint8_t *_b, uint8_t *_tmp, int w)
140 {
141  int w2= w>>1;
142  int x= w2 - (w2&7);
143  int16_t *b = (int16_t *)_b;
144  int16_t *tmp = (int16_t *)_tmp;
145 
147 
148  for (; x < w2; x++) {
149  b[2*x ] = (tmp[x] + 1)>>1;
150  b[2*x+1] = (COMPOSE_DD97iH0(tmp[x-1], tmp[x], b[x+w2], tmp[x+1], tmp[x+2]) + 1)>>1;
151  }
152 }
153 
155 {
156  int mm_flags = av_get_cpu_flags();
157 
158  if (!(mm_flags & AV_CPU_FLAG_SSE2))
159  return;
160 
161  switch (type) {
162  case DWT_DIRAC_DD9_7:
163  d->vertical_compose_l0.tap3 = vertical_compose53iL0_sse2;
164  d->vertical_compose_h0.tap5 = vertical_compose_dd97iH0_sse2;
165  break;
166  case DWT_DIRAC_LEGALL5_3:
167  d->vertical_compose_l0.tap3 = vertical_compose53iL0_sse2;
168  d->vertical_compose_h0.tap3 = vertical_compose_dirac53iH0_sse2;
169  break;
170  case DWT_DIRAC_DD13_7:
171  d->vertical_compose_l0.tap5 = vertical_compose_dd137iL0_sse2;
172  d->vertical_compose_h0.tap5 = vertical_compose_dd97iH0_sse2;
173  break;
174  case DWT_DIRAC_HAAR0:
175  d->vertical_compose = vertical_compose_haar_sse2;
176  d->horizontal_compose = horizontal_compose_haar0i_sse2;
177  break;
178  case DWT_DIRAC_HAAR1:
179  d->vertical_compose = vertical_compose_haar_sse2;
180  d->horizontal_compose = horizontal_compose_haar1i_sse2;
181  break;
182  }
183 
184  if (!(mm_flags & AV_CPU_FLAG_SSSE3))
185  return;
186 
187  switch (type) {
188  case DWT_DIRAC_DD9_7:
190  break;
191  }
192 }
cpu.h
horizontal_compose_dd97i_ssse3
static void horizontal_compose_dd97i_ssse3(uint8_t *_b, uint8_t *_tmp, int w)
Definition: dirac_dwt_init.c:139
w
uint8_t w
Definition: llviddspenc.c:38
b
#define b
Definition: input.c:42
dirac_dwt.h
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
AV_CPU_FLAG_SSSE3
#define AV_CPU_FLAG_SSSE3
Conroe SSSE3 functions.
Definition: cpu.h:44
DWTContext::vertical_compose
vertical_compose_2tap vertical_compose
one set of lowpass and highpass combined
Definition: dirac_dwt.h:71
type
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf type
Definition: writing_filters.txt:86
DWT_DIRAC_HAAR1
@ DWT_DIRAC_HAAR1
Definition: dirac_dwt.h:84
DWT_DIRAC_HAAR0
@ DWT_DIRAC_HAAR0
Definition: dirac_dwt.h:83
DWTContext::tap5
vertical_compose_5tap tap5
Definition: dirac_dwt.h:66
DWTContext::horizontal_compose
void(* horizontal_compose)(uint8_t *b, uint8_t *tmp, int width)
Definition: dirac_dwt.h:72
tmp
static uint8_t tmp[40]
Definition: aes_ctr.c:52
dwt_type
dwt_type
Definition: dirac_dwt.h:77
DWTContext::vertical_compose_l0
union DWTContext::@109 vertical_compose_l0
DWT_DIRAC_DD13_7
@ DWT_DIRAC_DD13_7
Definition: dirac_dwt.h:82
AV_CPU_FLAG_SSE2
#define AV_CPU_FLAG_SSE2
PIV SSE2 functions.
Definition: cpu.h:37
COMPOSE_DD97iH0
#define COMPOSE_DD97iH0(b0, b1, b2, b3, b4)
Definition: dirac_dwt.h:104
COMPOSE_VERTICAL
#define COMPOSE_VERTICAL(ext, align)
Definition: dirac_dwt_init.c:26
ff_horizontal_compose_dd97i_ssse3
void ff_horizontal_compose_dd97i_ssse3(int16_t *_b, int16_t *_tmp, int w)
ff_spatial_idwt_init_x86
void ff_spatial_idwt_init_x86(DWTContext *d, enum dwt_type type)
Definition: dirac_dwt_init.c:154
DWTContext::tap3
vertical_compose_3tap tap3
Definition: dirac_dwt.h:65
DWTContext
Definition: dirac_dwt.h:54
DWTContext::vertical_compose_h0
union DWTContext::@109 vertical_compose_h0
DWT_DIRAC_LEGALL5_3
@ DWT_DIRAC_LEGALL5_3
Definition: dirac_dwt.h:81
DWT_DIRAC_DD9_7
@ DWT_DIRAC_DD9_7
Definition: dirac_dwt.h:80