FFmpeg
h264dsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "config.h"
22 
23 #include <stdint.h>
24 #include <string.h>
25 
26 #include "libavutil/attributes.h"
27 #include "libavutil/cpu.h"
28 #include "libavutil/intreadwrite.h"
29 #include "libavutil/mem.h"
30 #include "libavutil/ppc/cpu.h"
32 
33 #include "libavcodec/h264dec.h"
34 #include "libavcodec/h264dsp.h"
35 
36 #if HAVE_ALTIVEC
37 
38 /****************************************************************************
39  * IDCT transform:
40  ****************************************************************************/
41 
42 #define VEC_1D_DCT(vb0,vb1,vb2,vb3,va0,va1,va2,va3) \
43  /* 1st stage */ \
44  vz0 = vec_add(vb0,vb2); /* temp[0] = Y[0] + Y[2] */ \
45  vz1 = vec_sub(vb0,vb2); /* temp[1] = Y[0] - Y[2] */ \
46  vz2 = vec_sra(vb1,vec_splat_u16(1)); \
47  vz2 = vec_sub(vz2,vb3); /* temp[2] = Y[1].1/2 - Y[3] */ \
48  vz3 = vec_sra(vb3,vec_splat_u16(1)); \
49  vz3 = vec_add(vb1,vz3); /* temp[3] = Y[1] + Y[3].1/2 */ \
50  /* 2nd stage: output */ \
51  va0 = vec_add(vz0,vz3); /* x[0] = temp[0] + temp[3] */ \
52  va1 = vec_add(vz1,vz2); /* x[1] = temp[1] + temp[2] */ \
53  va2 = vec_sub(vz1,vz2); /* x[2] = temp[1] - temp[2] */ \
54  va3 = vec_sub(vz0,vz3) /* x[3] = temp[0] - temp[3] */
55 
56 #define VEC_TRANSPOSE_4(a0,a1,a2,a3,b0,b1,b2,b3) \
57  b0 = vec_mergeh( a0, a0 ); \
58  b1 = vec_mergeh( a1, a0 ); \
59  b2 = vec_mergeh( a2, a0 ); \
60  b3 = vec_mergeh( a3, a0 ); \
61  a0 = vec_mergeh( b0, b2 ); \
62  a1 = vec_mergel( b0, b2 ); \
63  a2 = vec_mergeh( b1, b3 ); \
64  a3 = vec_mergel( b1, b3 ); \
65  b0 = vec_mergeh( a0, a2 ); \
66  b1 = vec_mergel( a0, a2 ); \
67  b2 = vec_mergeh( a1, a3 ); \
68  b3 = vec_mergel( a1, a3 )
69 
70 #if HAVE_BIGENDIAN
71 #define vdst_load(d) \
72  vdst_orig = vec_ld(0, dst); \
73  vdst = vec_perm(vdst_orig, zero_u8v, vdst_mask);
74 #else
75 #define vdst_load(d) vdst = vec_vsx_ld(0, dst)
76 #endif
77 
78 #define VEC_LOAD_U8_ADD_S16_STORE_U8(va) \
79  vdst_load(); \
80  vdst_ss = (vec_s16) VEC_MERGEH(zero_u8v, vdst); \
81  va = vec_add(va, vdst_ss); \
82  va_u8 = vec_packsu(va, zero_s16v); \
83  va_u32 = vec_splat((vec_u32)va_u8, 0); \
84  vec_ste(va_u32, element, (uint32_t*)dst);
85 
86 static void h264_idct_add_altivec(uint8_t *dst, int16_t *block, int stride)
87 {
88  vec_s16 va0, va1, va2, va3;
89  vec_s16 vz0, vz1, vz2, vz3;
90  vec_s16 vtmp0, vtmp1, vtmp2, vtmp3;
91  vec_u8 va_u8;
92  vec_u32 va_u32;
93  vec_s16 vdst_ss;
94  const vec_u16 v6us = vec_splat_u16(6);
95  vec_u8 vdst, vdst_orig;
96  vec_u8 vdst_mask = vec_lvsl(0, dst);
97  int element = ((unsigned long)dst & 0xf) >> 2;
98  LOAD_ZERO;
99 
100  block[0] += 32; /* add 32 as a DC-level for rounding */
101 
102  vtmp0 = vec_ld(0,block);
103  vtmp1 = vec_sld(vtmp0, vtmp0, 8);
104  vtmp2 = vec_ld(16,block);
105  vtmp3 = vec_sld(vtmp2, vtmp2, 8);
106  memset(block, 0, 16 * sizeof(int16_t));
107 
108  VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);
109  VEC_TRANSPOSE_4(va0,va1,va2,va3,vtmp0,vtmp1,vtmp2,vtmp3);
110  VEC_1D_DCT(vtmp0,vtmp1,vtmp2,vtmp3,va0,va1,va2,va3);
111 
112  va0 = vec_sra(va0,v6us);
113  va1 = vec_sra(va1,v6us);
114  va2 = vec_sra(va2,v6us);
115  va3 = vec_sra(va3,v6us);
116 
117  VEC_LOAD_U8_ADD_S16_STORE_U8(va0);
118  dst += stride;
119  VEC_LOAD_U8_ADD_S16_STORE_U8(va1);
120  dst += stride;
121  VEC_LOAD_U8_ADD_S16_STORE_U8(va2);
122  dst += stride;
123  VEC_LOAD_U8_ADD_S16_STORE_U8(va3);
124 }
125 
126 #define IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7) {\
127  /* a0 = SRC(0) + SRC(4); */ \
128  vec_s16 a0v = vec_add(s0, s4); \
129  /* a2 = SRC(0) - SRC(4); */ \
130  vec_s16 a2v = vec_sub(s0, s4); \
131  /* a4 = (SRC(2)>>1) - SRC(6); */ \
132  vec_s16 a4v = vec_sub(vec_sra(s2, onev), s6); \
133  /* a6 = (SRC(6)>>1) + SRC(2); */ \
134  vec_s16 a6v = vec_add(vec_sra(s6, onev), s2); \
135  /* b0 = a0 + a6; */ \
136  vec_s16 b0v = vec_add(a0v, a6v); \
137  /* b2 = a2 + a4; */ \
138  vec_s16 b2v = vec_add(a2v, a4v); \
139  /* b4 = a2 - a4; */ \
140  vec_s16 b4v = vec_sub(a2v, a4v); \
141  /* b6 = a0 - a6; */ \
142  vec_s16 b6v = vec_sub(a0v, a6v); \
143  /* a1 = SRC(5) - SRC(3) - SRC(7) - (SRC(7)>>1); */ \
144  /* a1 = (SRC(5)-SRC(3)) - (SRC(7) + (SRC(7)>>1)); */ \
145  vec_s16 a1v = vec_sub( vec_sub(s5, s3), vec_add(s7, vec_sra(s7, onev)) ); \
146  /* a3 = SRC(7) + SRC(1) - SRC(3) - (SRC(3)>>1); */ \
147  /* a3 = (SRC(7)+SRC(1)) - (SRC(3) + (SRC(3)>>1)); */ \
148  vec_s16 a3v = vec_sub( vec_add(s7, s1), vec_add(s3, vec_sra(s3, onev)) );\
149  /* a5 = SRC(7) - SRC(1) + SRC(5) + (SRC(5)>>1); */ \
150  /* a5 = (SRC(7)-SRC(1)) + SRC(5) + (SRC(5)>>1); */ \
151  vec_s16 a5v = vec_add( vec_sub(s7, s1), vec_add(s5, vec_sra(s5, onev)) );\
152  /* a7 = SRC(5)+SRC(3) + SRC(1) + (SRC(1)>>1); */ \
153  vec_s16 a7v = vec_add( vec_add(s5, s3), vec_add(s1, vec_sra(s1, onev)) );\
154  /* b1 = (a7>>2) + a1; */ \
155  vec_s16 b1v = vec_add( vec_sra(a7v, twov), a1v); \
156  /* b3 = a3 + (a5>>2); */ \
157  vec_s16 b3v = vec_add(a3v, vec_sra(a5v, twov)); \
158  /* b5 = (a3>>2) - a5; */ \
159  vec_s16 b5v = vec_sub( vec_sra(a3v, twov), a5v); \
160  /* b7 = a7 - (a1>>2); */ \
161  vec_s16 b7v = vec_sub( a7v, vec_sra(a1v, twov)); \
162  /* DST(0, b0 + b7); */ \
163  d0 = vec_add(b0v, b7v); \
164  /* DST(1, b2 + b5); */ \
165  d1 = vec_add(b2v, b5v); \
166  /* DST(2, b4 + b3); */ \
167  d2 = vec_add(b4v, b3v); \
168  /* DST(3, b6 + b1); */ \
169  d3 = vec_add(b6v, b1v); \
170  /* DST(4, b6 - b1); */ \
171  d4 = vec_sub(b6v, b1v); \
172  /* DST(5, b4 - b3); */ \
173  d5 = vec_sub(b4v, b3v); \
174  /* DST(6, b2 - b5); */ \
175  d6 = vec_sub(b2v, b5v); \
176  /* DST(7, b0 - b7); */ \
177  d7 = vec_sub(b0v, b7v); \
178 }
179 
180 #if HAVE_BIGENDIAN
181 #define GET_2PERM(ldv, stv, d) \
182  ldv = vec_lvsl(0, d); \
183  stv = vec_lvsr(8, d);
184 #define dstv_load(d) \
185  vec_u8 hv = vec_ld( 0, d ); \
186  vec_u8 lv = vec_ld( 7, d); \
187  vec_u8 dstv = vec_perm( hv, lv, (vec_u8)perm_ldv );
188 #define dest_unligned_store(d) \
189  vec_u8 edgehv; \
190  vec_u8 bodyv = vec_perm( idstsum8, idstsum8, perm_stv ); \
191  vec_u8 edgelv = vec_perm( sel, zero_u8v, perm_stv ); \
192  lv = vec_sel( lv, bodyv, edgelv ); \
193  vec_st( lv, 7, d ); \
194  hv = vec_ld( 0, d ); \
195  edgehv = vec_perm( zero_u8v, sel, perm_stv ); \
196  hv = vec_sel( hv, bodyv, edgehv ); \
197  vec_st( hv, 0, d );
198 #else
199 
200 #define GET_2PERM(ldv, stv, d) {}
201 #define dstv_load(d) vec_u8 dstv = vec_vsx_ld(0, d)
202 #define dest_unligned_store(d)\
203  vec_u8 dst8 = vec_perm((vec_u8)idstsum8, dstv, vcprm(2,3,s2,s3));\
204  vec_vsx_st(dst8, 0, d)
205 #endif /* HAVE_BIGENDIAN */
206 
207 #define ALTIVEC_STORE_SUM_CLIP(dest, idctv, perm_ldv, perm_stv, sel) { \
208  /* unaligned load */ \
209  dstv_load(dest); \
210  vec_s16 idct_sh6 = vec_sra(idctv, sixv); \
211  vec_u16 dst16 = (vec_u16)VEC_MERGEH(zero_u8v, dstv); \
212  vec_s16 idstsum = vec_adds(idct_sh6, (vec_s16)dst16); \
213  vec_u8 idstsum8 = vec_packsu(zero_s16v, idstsum); \
214  /* unaligned store */ \
215  dest_unligned_store(dest);\
216 }
217 
218 static void h264_idct8_add_altivec(uint8_t *dst, int16_t *dct, int stride)
219 {
220  vec_s16 s0, s1, s2, s3, s4, s5, s6, s7;
221  vec_s16 d0, d1, d2, d3, d4, d5, d6, d7;
222  vec_s16 idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7;
223 
224  vec_u8 perm_ldv, perm_stv;
225  GET_2PERM(perm_ldv, perm_stv, dst);
226 
227  const vec_u16 onev = vec_splat_u16(1);
228  const vec_u16 twov = vec_splat_u16(2);
229  const vec_u16 sixv = vec_splat_u16(6);
230 
231  const vec_u8 sel = (vec_u8) {0,0,0,0,0,0,0,0,-1,-1,-1,-1,-1,-1,-1,-1};
232  LOAD_ZERO;
233 
234  dct[0] += 32; // rounding for the >>6 at the end
235 
236  s0 = vec_ld(0x00, (int16_t*)dct);
237  s1 = vec_ld(0x10, (int16_t*)dct);
238  s2 = vec_ld(0x20, (int16_t*)dct);
239  s3 = vec_ld(0x30, (int16_t*)dct);
240  s4 = vec_ld(0x40, (int16_t*)dct);
241  s5 = vec_ld(0x50, (int16_t*)dct);
242  s6 = vec_ld(0x60, (int16_t*)dct);
243  s7 = vec_ld(0x70, (int16_t*)dct);
244  memset(dct, 0, 64 * sizeof(int16_t));
245 
246  IDCT8_1D_ALTIVEC(s0, s1, s2, s3, s4, s5, s6, s7,
247  d0, d1, d2, d3, d4, d5, d6, d7);
248 
249  TRANSPOSE8( d0, d1, d2, d3, d4, d5, d6, d7 );
250 
251  IDCT8_1D_ALTIVEC(d0, d1, d2, d3, d4, d5, d6, d7,
252  idct0, idct1, idct2, idct3, idct4, idct5, idct6, idct7);
253 
254  ALTIVEC_STORE_SUM_CLIP(&dst[0*stride], idct0, perm_ldv, perm_stv, sel);
255  ALTIVEC_STORE_SUM_CLIP(&dst[1*stride], idct1, perm_ldv, perm_stv, sel);
256  ALTIVEC_STORE_SUM_CLIP(&dst[2*stride], idct2, perm_ldv, perm_stv, sel);
257  ALTIVEC_STORE_SUM_CLIP(&dst[3*stride], idct3, perm_ldv, perm_stv, sel);
258  ALTIVEC_STORE_SUM_CLIP(&dst[4*stride], idct4, perm_ldv, perm_stv, sel);
259  ALTIVEC_STORE_SUM_CLIP(&dst[5*stride], idct5, perm_ldv, perm_stv, sel);
260  ALTIVEC_STORE_SUM_CLIP(&dst[6*stride], idct6, perm_ldv, perm_stv, sel);
261  ALTIVEC_STORE_SUM_CLIP(&dst[7*stride], idct7, perm_ldv, perm_stv, sel);
262 }
263 
264 #if HAVE_BIGENDIAN
265 #define DST_LD vec_ld
266 #else
267 #define DST_LD vec_vsx_ld
268 #endif
269 static av_always_inline void h264_idct_dc_add_internal(uint8_t *dst, int16_t *block, int stride, int size)
270 {
271  vec_s16 dc16;
272  vec_u8 dcplus, dcminus, v0, v1, v2, v3, aligner;
273  vec_s32 v_dc32;
274  LOAD_ZERO;
275  DECLARE_ALIGNED(16, int, dc);
276  int i;
277 
278  dc = (block[0] + 32) >> 6;
279  block[0] = 0;
280  v_dc32 = vec_lde(0, &dc);
281  dc16 = VEC_SPLAT16((vec_s16)v_dc32, 1);
282 
283  if (size == 4)
284  dc16 = VEC_SLD16(dc16, zero_s16v, 8);
285  dcplus = vec_packsu(dc16, zero_s16v);
286  dcminus = vec_packsu(vec_sub(zero_s16v, dc16), zero_s16v);
287 
288 #if HAVE_BIGENDIAN
289  aligner = vec_lvsr(0, dst);
290  dcplus = vec_perm(dcplus, dcplus, aligner);
291  dcminus = vec_perm(dcminus, dcminus, aligner);
292 #endif
293 
294  for (i = 0; i < size; i += 4) {
295  v0 = DST_LD(0, dst+0*stride);
296  v1 = DST_LD(0, dst+1*stride);
297  v2 = DST_LD(0, dst+2*stride);
298  v3 = DST_LD(0, dst+3*stride);
299 
300  v0 = vec_adds(v0, dcplus);
301  v1 = vec_adds(v1, dcplus);
302  v2 = vec_adds(v2, dcplus);
303  v3 = vec_adds(v3, dcplus);
304 
305  v0 = vec_subs(v0, dcminus);
306  v1 = vec_subs(v1, dcminus);
307  v2 = vec_subs(v2, dcminus);
308  v3 = vec_subs(v3, dcminus);
309 
310  VEC_ST(v0, 0, dst+0*stride);
311  VEC_ST(v1, 0, dst+1*stride);
312  VEC_ST(v2, 0, dst+2*stride);
313  VEC_ST(v3, 0, dst+3*stride);
314 
315  dst += 4*stride;
316  }
317 }
318 
319 static void h264_idct_dc_add_altivec(uint8_t *dst, int16_t *block, int stride)
320 {
321  h264_idct_dc_add_internal(dst, block, stride, 4);
322 }
323 
324 static void h264_idct8_dc_add_altivec(uint8_t *dst, int16_t *block, int stride)
325 {
326  h264_idct_dc_add_internal(dst, block, stride, 8);
327 }
328 
329 static void h264_idct_add16_altivec(uint8_t *dst, const int *block_offset,
330  int16_t *block, int stride,
331  const uint8_t nnzc[15 * 8])
332 {
333  int i;
334  for(i=0; i<16; i++){
335  int nnz = nnzc[ scan8[i] ];
336  if(nnz){
337  if(nnz==1 && block[i*16]) h264_idct_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
338  else h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride);
339  }
340  }
341 }
342 
343 static void h264_idct_add16intra_altivec(uint8_t *dst, const int *block_offset,
344  int16_t *block, int stride,
345  const uint8_t nnzc[15 * 8])
346 {
347  int i;
348  for(i=0; i<16; i++){
349  if(nnzc[ scan8[i] ]) h264_idct_add_altivec(dst + block_offset[i], block + i*16, stride);
350  else if(block[i*16]) h264_idct_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
351  }
352 }
353 
354 static void h264_idct8_add4_altivec(uint8_t *dst, const int *block_offset,
355  int16_t *block, int stride,
356  const uint8_t nnzc[15 * 8])
357 {
358  int i;
359  for(i=0; i<16; i+=4){
360  int nnz = nnzc[ scan8[i] ];
361  if(nnz){
362  if(nnz==1 && block[i*16]) h264_idct8_dc_add_altivec(dst + block_offset[i], block + i*16, stride);
363  else h264_idct8_add_altivec(dst + block_offset[i], block + i*16, stride);
364  }
365  }
366 }
367 
368 static void h264_idct_add8_altivec(uint8_t **dest, const int *block_offset,
369  int16_t *block, int stride,
370  const uint8_t nnzc[15 * 8])
371 {
372  int i, j;
373  for (j = 1; j < 3; j++) {
374  for(i = j * 16; i < j * 16 + 4; i++){
375  if(nnzc[ scan8[i] ])
376  h264_idct_add_altivec(dest[j-1] + block_offset[i], block + i*16, stride);
377  else if(block[i*16])
378  h264_idct_dc_add_altivec(dest[j-1] + block_offset[i], block + i*16, stride);
379  }
380  }
381 }
382 
383 #define transpose4x16(r0, r1, r2, r3) { \
384  register vec_u8 r4; \
385  register vec_u8 r5; \
386  register vec_u8 r6; \
387  register vec_u8 r7; \
388  \
389  r4 = vec_mergeh(r0, r2); /*0, 2 set 0*/ \
390  r5 = vec_mergel(r0, r2); /*0, 2 set 1*/ \
391  r6 = vec_mergeh(r1, r3); /*1, 3 set 0*/ \
392  r7 = vec_mergel(r1, r3); /*1, 3 set 1*/ \
393  \
394  r0 = vec_mergeh(r4, r6); /*all set 0*/ \
395  r1 = vec_mergel(r4, r6); /*all set 1*/ \
396  r2 = vec_mergeh(r5, r7); /*all set 2*/ \
397  r3 = vec_mergel(r5, r7); /*all set 3*/ \
398 }
399 
400 static inline void write16x4(uint8_t *dst, int dst_stride,
401  register vec_u8 r0, register vec_u8 r1,
402  register vec_u8 r2, register vec_u8 r3) {
403  DECLARE_ALIGNED(16, unsigned char, result)[64];
404  uint32_t *src_int = (uint32_t *)result, *dst_int = (uint32_t *)dst;
405  int int_dst_stride = dst_stride/4;
406 
407  vec_st(r0, 0, result);
408  vec_st(r1, 16, result);
409  vec_st(r2, 32, result);
410  vec_st(r3, 48, result);
411  /* FIXME: there has to be a better way!!!! */
412  *dst_int = *src_int;
413  *(dst_int+ int_dst_stride) = *(src_int + 1);
414  *(dst_int+ 2*int_dst_stride) = *(src_int + 2);
415  *(dst_int+ 3*int_dst_stride) = *(src_int + 3);
416  *(dst_int+ 4*int_dst_stride) = *(src_int + 4);
417  *(dst_int+ 5*int_dst_stride) = *(src_int + 5);
418  *(dst_int+ 6*int_dst_stride) = *(src_int + 6);
419  *(dst_int+ 7*int_dst_stride) = *(src_int + 7);
420  *(dst_int+ 8*int_dst_stride) = *(src_int + 8);
421  *(dst_int+ 9*int_dst_stride) = *(src_int + 9);
422  *(dst_int+10*int_dst_stride) = *(src_int + 10);
423  *(dst_int+11*int_dst_stride) = *(src_int + 11);
424  *(dst_int+12*int_dst_stride) = *(src_int + 12);
425  *(dst_int+13*int_dst_stride) = *(src_int + 13);
426  *(dst_int+14*int_dst_stride) = *(src_int + 14);
427  *(dst_int+15*int_dst_stride) = *(src_int + 15);
428 }
429 
430 /** @brief performs a 6x16 transpose of data in src, and stores it to dst
431  @todo FIXME: see if we can't spare some vec_lvsl() by them factorizing
432  out of unaligned_load() */
433 #define readAndTranspose16x6(src, src_stride, r8, r9, r10, r11, r12, r13) {\
434  register vec_u8 r0 = unaligned_load(0, src); \
435  register vec_u8 r1 = unaligned_load( src_stride, src); \
436  register vec_u8 r2 = unaligned_load(2* src_stride, src); \
437  register vec_u8 r3 = unaligned_load(3* src_stride, src); \
438  register vec_u8 r4 = unaligned_load(4* src_stride, src); \
439  register vec_u8 r5 = unaligned_load(5* src_stride, src); \
440  register vec_u8 r6 = unaligned_load(6* src_stride, src); \
441  register vec_u8 r7 = unaligned_load(7* src_stride, src); \
442  register vec_u8 r14 = unaligned_load(14*src_stride, src); \
443  register vec_u8 r15 = unaligned_load(15*src_stride, src); \
444  \
445  r8 = unaligned_load( 8*src_stride, src); \
446  r9 = unaligned_load( 9*src_stride, src); \
447  r10 = unaligned_load(10*src_stride, src); \
448  r11 = unaligned_load(11*src_stride, src); \
449  r12 = unaligned_load(12*src_stride, src); \
450  r13 = unaligned_load(13*src_stride, src); \
451  \
452  /*Merge first pairs*/ \
453  r0 = vec_mergeh(r0, r8); /*0, 8*/ \
454  r1 = vec_mergeh(r1, r9); /*1, 9*/ \
455  r2 = vec_mergeh(r2, r10); /*2,10*/ \
456  r3 = vec_mergeh(r3, r11); /*3,11*/ \
457  r4 = vec_mergeh(r4, r12); /*4,12*/ \
458  r5 = vec_mergeh(r5, r13); /*5,13*/ \
459  r6 = vec_mergeh(r6, r14); /*6,14*/ \
460  r7 = vec_mergeh(r7, r15); /*7,15*/ \
461  \
462  /*Merge second pairs*/ \
463  r8 = vec_mergeh(r0, r4); /*0,4, 8,12 set 0*/ \
464  r9 = vec_mergel(r0, r4); /*0,4, 8,12 set 1*/ \
465  r10 = vec_mergeh(r1, r5); /*1,5, 9,13 set 0*/ \
466  r11 = vec_mergel(r1, r5); /*1,5, 9,13 set 1*/ \
467  r12 = vec_mergeh(r2, r6); /*2,6,10,14 set 0*/ \
468  r13 = vec_mergel(r2, r6); /*2,6,10,14 set 1*/ \
469  r14 = vec_mergeh(r3, r7); /*3,7,11,15 set 0*/ \
470  r15 = vec_mergel(r3, r7); /*3,7,11,15 set 1*/ \
471  \
472  /*Third merge*/ \
473  r0 = vec_mergeh(r8, r12); /*0,2,4,6,8,10,12,14 set 0*/ \
474  r1 = vec_mergel(r8, r12); /*0,2,4,6,8,10,12,14 set 1*/ \
475  r2 = vec_mergeh(r9, r13); /*0,2,4,6,8,10,12,14 set 2*/ \
476  r4 = vec_mergeh(r10, r14); /*1,3,5,7,9,11,13,15 set 0*/ \
477  r5 = vec_mergel(r10, r14); /*1,3,5,7,9,11,13,15 set 1*/ \
478  r6 = vec_mergeh(r11, r15); /*1,3,5,7,9,11,13,15 set 2*/ \
479  /* Don't need to compute 3 and 7*/ \
480  \
481  /*Final merge*/ \
482  r8 = vec_mergeh(r0, r4); /*all set 0*/ \
483  r9 = vec_mergel(r0, r4); /*all set 1*/ \
484  r10 = vec_mergeh(r1, r5); /*all set 2*/ \
485  r11 = vec_mergel(r1, r5); /*all set 3*/ \
486  r12 = vec_mergeh(r2, r6); /*all set 4*/ \
487  r13 = vec_mergel(r2, r6); /*all set 5*/ \
488  /* Don't need to compute 14 and 15*/ \
489  \
490 }
491 
492 // out: o = |x-y| < a
493 static inline vec_u8 diff_lt_altivec ( register vec_u8 x,
494  register vec_u8 y,
495  register vec_u8 a) {
496 
497  register vec_u8 diff = vec_subs(x, y);
498  register vec_u8 diffneg = vec_subs(y, x);
499  register vec_u8 o = vec_or(diff, diffneg); /* |x-y| */
500  o = (vec_u8)vec_cmplt(o, a);
501  return o;
502 }
503 
504 static inline vec_u8 h264_deblock_mask ( register vec_u8 p0,
505  register vec_u8 p1,
506  register vec_u8 q0,
507  register vec_u8 q1,
508  register vec_u8 alpha,
509  register vec_u8 beta) {
510 
511  register vec_u8 mask;
512  register vec_u8 tempmask;
513 
514  mask = diff_lt_altivec(p0, q0, alpha);
515  tempmask = diff_lt_altivec(p1, p0, beta);
516  mask = vec_and(mask, tempmask);
517  tempmask = diff_lt_altivec(q1, q0, beta);
518  mask = vec_and(mask, tempmask);
519 
520  return mask;
521 }
522 
523 // out: newp1 = clip((p2 + ((p0 + q0 + 1) >> 1)) >> 1, p1-tc0, p1+tc0)
524 static inline vec_u8 h264_deblock_q1(register vec_u8 p0,
525  register vec_u8 p1,
526  register vec_u8 p2,
527  register vec_u8 q0,
528  register vec_u8 tc0) {
529 
530  register vec_u8 average = vec_avg(p0, q0);
531  register vec_u8 temp;
532  register vec_u8 unclipped;
533  register vec_u8 ones;
534  register vec_u8 max;
535  register vec_u8 min;
536  register vec_u8 newp1;
537 
538  temp = vec_xor(average, p2);
539  average = vec_avg(average, p2); /*avg(p2, avg(p0, q0)) */
540  ones = vec_splat_u8(1);
541  temp = vec_and(temp, ones); /*(p2^avg(p0, q0)) & 1 */
542  unclipped = vec_subs(average, temp); /*(p2+((p0+q0+1)>>1))>>1 */
543  max = vec_adds(p1, tc0);
544  min = vec_subs(p1, tc0);
545  newp1 = vec_max(min, unclipped);
546  newp1 = vec_min(max, newp1);
547  return newp1;
548 }
549 
550 #define h264_deblock_p0_q0(p0, p1, q0, q1, tc0masked) { \
551  \
552  const vec_u8 A0v = vec_sl(vec_splat_u8(10), vec_splat_u8(4)); \
553  \
554  register vec_u8 pq0bit = vec_xor(p0,q0); \
555  register vec_u8 q1minus; \
556  register vec_u8 p0minus; \
557  register vec_u8 stage1; \
558  register vec_u8 stage2; \
559  register vec_u8 vec160; \
560  register vec_u8 delta; \
561  register vec_u8 deltaneg; \
562  \
563  q1minus = vec_nor(q1, q1); /* 255 - q1 */ \
564  stage1 = vec_avg(p1, q1minus); /* (p1 - q1 + 256)>>1 */ \
565  stage2 = vec_sr(stage1, vec_splat_u8(1)); /* (p1 - q1 + 256)>>2 = 64 + (p1 - q1) >> 2 */ \
566  p0minus = vec_nor(p0, p0); /* 255 - p0 */ \
567  stage1 = vec_avg(q0, p0minus); /* (q0 - p0 + 256)>>1 */ \
568  pq0bit = vec_and(pq0bit, vec_splat_u8(1)); \
569  stage2 = vec_avg(stage2, pq0bit); /* 32 + ((q0 - p0)&1 + (p1 - q1) >> 2 + 1) >> 1 */ \
570  stage2 = vec_adds(stage2, stage1); /* 160 + ((p0 - q0) + (p1 - q1) >> 2 + 1) >> 1 */ \
571  vec160 = vec_ld(0, &A0v); \
572  deltaneg = vec_subs(vec160, stage2); /* -d */ \
573  delta = vec_subs(stage2, vec160); /* d */ \
574  deltaneg = vec_min(tc0masked, deltaneg); \
575  delta = vec_min(tc0masked, delta); \
576  p0 = vec_subs(p0, deltaneg); \
577  q0 = vec_subs(q0, delta); \
578  p0 = vec_adds(p0, delta); \
579  q0 = vec_adds(q0, deltaneg); \
580 }
581 
582 #define h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0) { \
583  DECLARE_ALIGNED(16, unsigned char, temp)[16]; \
584  register vec_u8 alphavec; \
585  register vec_u8 betavec; \
586  register vec_u8 mask; \
587  register vec_u8 p1mask; \
588  register vec_u8 q1mask; \
589  register vector signed char tc0vec; \
590  register vec_u8 finaltc0; \
591  register vec_u8 tc0masked; \
592  register vec_u8 newp1; \
593  register vec_u8 newq1; \
594  \
595  temp[0] = alpha; \
596  temp[1] = beta; \
597  alphavec = vec_ld(0, temp); \
598  betavec = vec_splat(alphavec, 0x1); \
599  alphavec = vec_splat(alphavec, 0x0); \
600  mask = h264_deblock_mask(p0, p1, q0, q1, alphavec, betavec); /*if in block */ \
601  \
602  AV_COPY32(temp, tc0); \
603  tc0vec = vec_ld(0, (signed char*)temp); \
604  tc0vec = vec_mergeh(tc0vec, tc0vec); \
605  tc0vec = vec_mergeh(tc0vec, tc0vec); \
606  mask = vec_and(mask, vec_cmpgt(tc0vec, vec_splat_s8(-1))); /* if tc0[i] >= 0 */ \
607  finaltc0 = vec_and((vec_u8)tc0vec, mask); /* tc = tc0 */ \
608  \
609  p1mask = diff_lt_altivec(p2, p0, betavec); \
610  p1mask = vec_and(p1mask, mask); /* if ( |p2 - p0| < beta) */ \
611  tc0masked = vec_and(p1mask, (vec_u8)tc0vec); \
612  finaltc0 = vec_sub(finaltc0, p1mask); /* tc++ */ \
613  newp1 = h264_deblock_q1(p0, p1, p2, q0, tc0masked); \
614  /*end if*/ \
615  \
616  q1mask = diff_lt_altivec(q2, q0, betavec); \
617  q1mask = vec_and(q1mask, mask); /* if ( |q2 - q0| < beta ) */\
618  tc0masked = vec_and(q1mask, (vec_u8)tc0vec); \
619  finaltc0 = vec_sub(finaltc0, q1mask); /* tc++ */ \
620  newq1 = h264_deblock_q1(p0, q1, q2, q0, tc0masked); \
621  /*end if*/ \
622  \
623  h264_deblock_p0_q0(p0, p1, q0, q1, finaltc0); \
624  p1 = newp1; \
625  q1 = newq1; \
626 }
627 
628 static void h264_v_loop_filter_luma_altivec(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) {
629 
630  if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) >= 0) {
631  register vec_u8 p2 = vec_ld(-3*stride, pix);
632  register vec_u8 p1 = vec_ld(-2*stride, pix);
633  register vec_u8 p0 = vec_ld(-1*stride, pix);
634  register vec_u8 q0 = vec_ld(0, pix);
635  register vec_u8 q1 = vec_ld(stride, pix);
636  register vec_u8 q2 = vec_ld(2*stride, pix);
637  h264_loop_filter_luma_altivec(p2, p1, p0, q0, q1, q2, alpha, beta, tc0);
638  vec_st(p1, -2*stride, pix);
639  vec_st(p0, -1*stride, pix);
640  vec_st(q0, 0, pix);
641  vec_st(q1, stride, pix);
642  }
643 }
644 
645 static void h264_h_loop_filter_luma_altivec(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0) {
646 
647  register vec_u8 line0, line1, line2, line3, line4, line5;
648  if ((tc0[0] & tc0[1] & tc0[2] & tc0[3]) < 0)
649  return;
650  readAndTranspose16x6(pix-3, stride, line0, line1, line2, line3, line4, line5);
651  h264_loop_filter_luma_altivec(line0, line1, line2, line3, line4, line5, alpha, beta, tc0);
652  transpose4x16(line1, line2, line3, line4);
653  write16x4(pix-2, stride, line1, line2, line3, line4);
654 }
655 
656 static av_always_inline
657 void weight_h264_W_altivec(uint8_t *block, int stride, int height,
658  int log2_denom, int weight, int offset, int w)
659 {
660  int y, aligned;
661  vec_u8 vblock;
662  vec_s16 vtemp, vweight, voffset, v0, v1;
663  vec_u16 vlog2_denom;
664  DECLARE_ALIGNED(16, int32_t, temp)[4];
665  LOAD_ZERO;
666 
667  offset <<= log2_denom;
668  if(log2_denom) offset += 1<<(log2_denom-1);
669  temp[0] = log2_denom;
670  temp[1] = weight;
671  temp[2] = offset;
672 
673  vtemp = (vec_s16)vec_ld(0, temp);
674 #if !HAVE_BIGENDIAN
675  vtemp =(vec_s16)vec_perm(vtemp, vtemp, vcswapi2s(0,1,2,3));
676 #endif
677  vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
678  vweight = vec_splat(vtemp, 3);
679  voffset = vec_splat(vtemp, 5);
680  aligned = !((unsigned long)block & 0xf);
681 
682  for (y = 0; y < height; y++) {
683  vblock = vec_ld(0, block);
684 
685  v0 = (vec_s16)VEC_MERGEH(zero_u8v, vblock);
686  v1 = (vec_s16)VEC_MERGEL(zero_u8v, vblock);
687 
688  if (w == 16 || aligned) {
689  v0 = vec_mladd(v0, vweight, zero_s16v);
690  v0 = vec_adds(v0, voffset);
691  v0 = vec_sra(v0, vlog2_denom);
692  }
693  if (w == 16 || !aligned) {
694  v1 = vec_mladd(v1, vweight, zero_s16v);
695  v1 = vec_adds(v1, voffset);
696  v1 = vec_sra(v1, vlog2_denom);
697  }
698  vblock = vec_packsu(v0, v1);
699  vec_st(vblock, 0, block);
700 
701  block += stride;
702  }
703 }
704 
705 static av_always_inline
706 void biweight_h264_W_altivec(uint8_t *dst, uint8_t *src, int stride, int height,
707  int log2_denom, int weightd, int weights, int offset, int w)
708 {
709  int y, dst_aligned, src_aligned;
710  vec_u8 vsrc, vdst;
711  vec_s16 vtemp, vweights, vweightd, voffset, v0, v1, v2, v3;
712  vec_u16 vlog2_denom;
713  DECLARE_ALIGNED(16, int32_t, temp)[4];
714  LOAD_ZERO;
715 
716  offset = ((offset + 1) | 1) << log2_denom;
717  temp[0] = log2_denom+1;
718  temp[1] = weights;
719  temp[2] = weightd;
720  temp[3] = offset;
721 
722  vtemp = (vec_s16)vec_ld(0, temp);
723 #if !HAVE_BIGENDIAN
724  vtemp =(vec_s16)vec_perm(vtemp, vtemp, vcswapi2s(0,1,2,3));
725 #endif
726  vlog2_denom = (vec_u16)vec_splat(vtemp, 1);
727  vweights = vec_splat(vtemp, 3);
728  vweightd = vec_splat(vtemp, 5);
729  voffset = vec_splat(vtemp, 7);
730  dst_aligned = !((unsigned long)dst & 0xf);
731  src_aligned = !((unsigned long)src & 0xf);
732 
733  for (y = 0; y < height; y++) {
734  vdst = vec_ld(0, dst);
735  vsrc = vec_ld(0, src);
736 
737  v0 = (vec_s16)VEC_MERGEH(zero_u8v, vdst);
738  v1 = (vec_s16)VEC_MERGEL(zero_u8v, vdst);
739  v2 = (vec_s16)VEC_MERGEH(zero_u8v, vsrc);
740  v3 = (vec_s16)VEC_MERGEL(zero_u8v, vsrc);
741 
742  if (w == 8) {
743  if (src_aligned)
744  v3 = v2;
745  else
746  v2 = v3;
747  }
748 
749  if (w == 16 || dst_aligned) {
750  v0 = vec_mladd(v0, vweightd, zero_s16v);
751  v2 = vec_mladd(v2, vweights, zero_s16v);
752 
753  v0 = vec_adds(v0, voffset);
754  v0 = vec_adds(v0, v2);
755  v0 = vec_sra(v0, vlog2_denom);
756  }
757  if (w == 16 || !dst_aligned) {
758  v1 = vec_mladd(v1, vweightd, zero_s16v);
759  v3 = vec_mladd(v3, vweights, zero_s16v);
760 
761  v1 = vec_adds(v1, voffset);
762  v1 = vec_adds(v1, v3);
763  v1 = vec_sra(v1, vlog2_denom);
764  }
765  vdst = vec_packsu(v0, v1);
766  vec_st(vdst, 0, dst);
767 
768  dst += stride;
769  src += stride;
770  }
771 }
772 
773 #define H264_WEIGHT(W) \
774 static void weight_h264_pixels ## W ## _altivec(uint8_t *block, ptrdiff_t stride, int height, \
775  int log2_denom, int weight, int offset) \
776 { \
777  weight_h264_W_altivec(block, stride, height, log2_denom, weight, offset, W); \
778 }\
779 static void biweight_h264_pixels ## W ## _altivec(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, \
780  int log2_denom, int weightd, int weights, int offset) \
781 { \
782  biweight_h264_W_altivec(dst, src, stride, height, log2_denom, weightd, weights, offset, W); \
783 }
784 
785 H264_WEIGHT(16)
786 H264_WEIGHT( 8)
787 #endif /* HAVE_ALTIVEC */
788 
790  const int chroma_format_idc)
791 {
792 #if HAVE_ALTIVEC
794  return;
795 
796  if (bit_depth == 8) {
797  c->h264_idct_add = h264_idct_add_altivec;
798  if (chroma_format_idc <= 1)
799  c->h264_idct_add8 = h264_idct_add8_altivec;
800  c->h264_idct_add16 = h264_idct_add16_altivec;
801  c->h264_idct_add16intra = h264_idct_add16intra_altivec;
802  c->h264_idct_dc_add= h264_idct_dc_add_altivec;
803  c->h264_idct8_dc_add = h264_idct8_dc_add_altivec;
804  c->h264_idct8_add = h264_idct8_add_altivec;
805  c->h264_idct8_add4 = h264_idct8_add4_altivec;
806  c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_altivec;
807  c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_altivec;
808 
809  c->weight_h264_pixels_tab[0] = weight_h264_pixels16_altivec;
810  c->weight_h264_pixels_tab[1] = weight_h264_pixels8_altivec;
811  c->biweight_h264_pixels_tab[0] = biweight_h264_pixels16_altivec;
812  c->biweight_h264_pixels_tab[1] = biweight_h264_pixels8_altivec;
813  }
814 #endif /* HAVE_ALTIVEC */
815 }
Memory handling functions.
else temp
Definition: vf_mcdeint.c:256
#define vec_s32
Definition: util_altivec.h:39
#define LOAD_ZERO
Definition: util_altivec.h:45
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
GLfloat v0
Definition: opengl_enc.c:107
static const uint8_t q1[256]
Definition: twofish.c:96
#define src
Definition: vp8dsp.c:254
H.264 DSP functions.
Macro definitions for various function/variable attributes.
The exact code depends on how similar the blocks are and how related they are to the block
uint8_t
#define av_cold
Definition: attributes.h:82
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
h264_weight_func weight_h264_pixels_tab[4]
Definition: h264dsp.h:44
void(* h264_idct_add16intra)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp.h:99
#define DECLARE_ALIGNED(n, t, v)
Declare a variable that is aligned in memory.
Definition: mem.h:112
#define height
void(* h264_idct_add)(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp.h:81
ptrdiff_t size
Definition: opengl_enc.c:101
void(* h264_idct8_dc_add)(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp.h:87
void(* h264_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp.h:50
#define vec_s16
Definition: util_altivec.h:37
static int aligned(int val)
Definition: dashdec.c:178
#define s2
Definition: regdef.h:39
static const uint16_t mask[17]
Definition: lzw.c:38
#define PPC_ALTIVEC(flags)
Definition: cpu.h:25
#define s0
Definition: regdef.h:37
#define H264_WEIGHT(W)
static const uint8_t q0[256]
Definition: twofish.c:77
#define s5
Definition: regdef.h:42
static void idct6(int pre_mant[6])
Calculate 6-point IDCT of the pre-mantissas.
Definition: eac3dec.c:166
void(* h264_idct_add16)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp.h:90
h264_biweight_func biweight_h264_pixels_tab[4]
Definition: h264dsp.h:45
Context for storing H.264 DSP functions.
Definition: h264dsp.h:42
uint8_t w
Definition: llviddspenc.c:38
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
Definition: af_astats.c:205
int32_t
H.264 / AVC / MPEG-4 part10 codec.
av_cold void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
Definition: h264dsp.c:789
#define vec_u8
Definition: util_altivec.h:34
#define s4
Definition: regdef.h:41
void(* h264_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp.h:48
#define s3
Definition: regdef.h:40
void(* h264_idct8_add)(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp.h:83
#define vec_u32
Definition: util_altivec.h:38
static const int16_t alpha[]
Definition: ilbcdata.h:55
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2]...the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so...,+,-,+,-,+,+,-,+,-,+,...hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32-hcoeff[1]-hcoeff[2]-...a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2}an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||.........intra?||||:Block01:yes no||||:Block02:.................||||:Block03::y DC::ref index:||||:Block04::cb DC::motion x:||||.........:cr DC::motion y:||||.................|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------------------------------|||Y subbands||Cb subbands||Cr subbands||||------||------||------|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||------||------||------||||------||------||------|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||------||------||------||||------||------||------|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||------||------||------||||------||------||------|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------------------------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction------------|\Dequantization-------------------\||Reference frames|\IDWT|--------------|Motion\|||Frame 0||Frame 1||Compensation.OBMC v-------|--------------|--------------.\------> Frame n output Frame Frame<----------------------------------/|...|-------------------Range Coder:============Binary Range Coder:-------------------The implemented range coder is an adapted version based upon"Range encoding: an algorithm for removing redundancy from a digitised message."by G.N.N.Martin.The symbols encoded by the Snow range coder are bits(0|1).The associated probabilities are not fix but change depending on the symbol mix seen so far.bit seen|new state---------+-----------------------------------------------0|256-state_transition_table[256-old_state];1|state_transition_table[old_state];state_transition_table={0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:-------------------------FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1.the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff)*mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
static const uint8_t scan8[16 *3+3]
Definition: h264dec.h:644
void(* h264_idct8_add4)(uint8_t *dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp.h:93
#define zero_u8v
Definition: util_altivec.h:47
#define s1
Definition: regdef.h:38
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:93
static int weight(int i, int blen, int offset)
Definition: diracdec.c:1557
Contains misc utility macros and inline functions.
#define zero_s16v
Definition: util_altivec.h:50
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
static av_always_inline int diff(const uint32_t a, const uint32_t b)
#define xf(width, name, var, range_min, range_max, subs,...)
Definition: cbs_av1.c:708
#define vec_u16
Definition: util_altivec.h:36
and forward the result(frame or status change) to the corresponding input.If nothing is possible
#define s6
Definition: regdef.h:43
void(* h264_idct_dc_add)(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp.h:85
#define av_always_inline
Definition: attributes.h:39
void(* h264_idct_add8)(uint8_t **dst, const int *blockoffset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp.h:96
#define stride
float min