FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
mpegvideoencdsp.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "config.h"
20 #include <stdint.h>
21 #if HAVE_ALTIVEC_H
22 #include <altivec.h>
23 #endif
24 
25 #include "libavutil/attributes.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/ppc/cpu.h"
31 
32 #if HAVE_ALTIVEC
33 
34 #if HAVE_VSX
35 static int pix_norm1_altivec(uint8_t *pix, int line_size)
36 {
37  int i, s = 0;
38  const vector unsigned int zero =
39  (const vector unsigned int) vec_splat_u32(0);
40  vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);
41  vector signed int sum;
42 
43  for (i = 0; i < 16; i++) {
44  /* Read the potentially unaligned pixels. */
45  //vector unsigned char pixl = vec_ld(0, pix);
46  //vector unsigned char pixr = vec_ld(15, pix);
47  //vector unsigned char pixv = vec_perm(pixl, pixr, perm);
48  vector unsigned char pixv = vec_vsx_ld(0, pix);
49 
50  /* Square the values, and add them to our sum. */
51  sv = vec_msum(pixv, pixv, sv);
52 
53  pix += line_size;
54  }
55  /* Sum up the four partial sums, and put the result into s. */
56  sum = vec_sums((vector signed int) sv, (vector signed int) zero);
57  sum = vec_splat(sum, 3);
58  vec_ste(sum, 0, &s);
59  return s;
60 }
61 #else
62 static int pix_norm1_altivec(uint8_t *pix, int line_size)
63 {
64  int i, s = 0;
65  const vector unsigned int zero =
66  (const vector unsigned int) vec_splat_u32(0);
67  vector unsigned char perm = vec_lvsl(0, pix);
68  vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);
69  vector signed int sum;
70 
71  for (i = 0; i < 16; i++) {
72  /* Read the potentially unaligned pixels. */
73  vector unsigned char pixl = vec_ld(0, pix);
74  vector unsigned char pixr = vec_ld(15, pix);
75  vector unsigned char pixv = vec_perm(pixl, pixr, perm);
76 
77  /* Square the values, and add them to our sum. */
78  sv = vec_msum(pixv, pixv, sv);
79 
80  pix += line_size;
81  }
82  /* Sum up the four partial sums, and put the result into s. */
83  sum = vec_sums((vector signed int) sv, (vector signed int) zero);
84  sum = vec_splat(sum, 3);
85  vec_ste(sum, 0, &s);
86 
87  return s;
88 }
89 #endif /* HAVE_VSX */
90 
91 #if HAVE_VSX
92 static int pix_sum_altivec(uint8_t *pix, int line_size)
93 {
94  int i, s;
95  const vector unsigned int zero =
96  (const vector unsigned int) vec_splat_u32(0);
97  vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
98  vector signed int sumdiffs;
99 
100  for (i = 0; i < 16; i++) {
101  /* Read the potentially unaligned 16 pixels into t1. */
102  //vector unsigned char pixl = vec_ld(0, pix);
103  //vector unsigned char pixr = vec_ld(15, pix);
104  //vector unsigned char t1 = vec_perm(pixl, pixr, perm);
105  vector unsigned char t1 = vec_vsx_ld(0, pix);
106 
107  /* Add each 4 pixel group together and put 4 results into sad. */
108  sad = vec_sum4s(t1, sad);
109 
110  pix += line_size;
111  }
112 
113  /* Sum up the four partial sums, and put the result into s. */
114  sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
115  sumdiffs = vec_splat(sumdiffs, 3);
116  vec_ste(sumdiffs, 0, &s);
117  return s;
118 }
119 #else
120 static int pix_sum_altivec(uint8_t *pix, int line_size)
121 {
122  int i, s;
123  const vector unsigned int zero =
124  (const vector unsigned int) vec_splat_u32(0);
125  vector unsigned char perm = vec_lvsl(0, pix);
126  vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
127  vector signed int sumdiffs;
128 
129  for (i = 0; i < 16; i++) {
130  /* Read the potentially unaligned 16 pixels into t1. */
131  vector unsigned char pixl = vec_ld(0, pix);
132  vector unsigned char pixr = vec_ld(15, pix);
133  vector unsigned char t1 = vec_perm(pixl, pixr, perm);
134 
135  /* Add each 4 pixel group together and put 4 results into sad. */
136  sad = vec_sum4s(t1, sad);
137 
138  pix += line_size;
139  }
140 
141  /* Sum up the four partial sums, and put the result into s. */
142  sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
143  sumdiffs = vec_splat(sumdiffs, 3);
144  vec_ste(sumdiffs, 0, &s);
145 
146  return s;
147 }
148 
149 #endif /* HAVE_VSX */
150 
151 #endif /* HAVE_ALTIVEC */
152 
154  AVCodecContext *avctx)
155 {
156 #if HAVE_ALTIVEC
158  return;
159 
160  c->pix_norm1 = pix_norm1_altivec;
161  c->pix_sum = pix_sum_altivec;
162 #endif /* HAVE_ALTIVEC */
163 }
const char * s
Definition: avisynth_c.h:631
av_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c, AVCodecContext *avctx)
Macro definitions for various function/variable attributes.
uint8_t
#define av_cold
Definition: attributes.h:74
#define PPC_ALTIVEC(flags)
Definition: cpu.h:26
#define t1
Definition: regdef.h:29
#define zero
Definition: regdef.h:64
int(* pix_sum)(uint8_t *pix, int line_size)
perm
Definition: f_perms.c:74
main external API structure.
Definition: avcodec.h:1502
int(* pix_norm1)(uint8_t *pix, int line_size)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:76
Contains misc utility macros and inline functions.
static double c[64]