FFmpeg
idctdsp_alpha.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/attributes.h"
22 #include "libavcodec/idctdsp.h"
23 #include "idctdsp_alpha.h"
24 #include "asm.h"
25 
26 void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
27  ptrdiff_t line_size);
28 void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels,
29  ptrdiff_t line_size);
30 
32  ptrdiff_t line_size);
34  ptrdiff_t line_size);
35 
36 #if 0
37 /* These functions were the base for the optimized assembler routines,
38  and remain here for documentation purposes. */
39 static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
40  ptrdiff_t line_size)
41 {
42  int i = 8;
43  uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
44 
45  do {
46  uint64_t shorts0, shorts1;
47 
48  shorts0 = ldq(block);
49  shorts0 = maxsw4(shorts0, 0);
50  shorts0 = minsw4(shorts0, clampmask);
51  stl(pkwb(shorts0), pixels);
52 
53  shorts1 = ldq(block + 4);
54  shorts1 = maxsw4(shorts1, 0);
55  shorts1 = minsw4(shorts1, clampmask);
56  stl(pkwb(shorts1), pixels + 4);
57 
58  pixels += line_size;
59  block += 8;
60  } while (--i);
61 }
62 
63 void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
64  ptrdiff_t line_size)
65 {
66  int h = 8;
67  /* Keep this function a leaf function by generating the constants
68  manually (mainly for the hack value ;-). */
69  uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
70  uint64_t signmask = zap(-1, 0x33);
71  signmask ^= signmask >> 1; /* 0x8000800080008000 */
72 
73  do {
74  uint64_t shorts0, pix0, signs0;
75  uint64_t shorts1, pix1, signs1;
76 
77  shorts0 = ldq(block);
78  shorts1 = ldq(block + 4);
79 
80  pix0 = unpkbw(ldl(pixels));
81  /* Signed subword add (MMX paddw). */
82  signs0 = shorts0 & signmask;
83  shorts0 &= ~signmask;
84  shorts0 += pix0;
85  shorts0 ^= signs0;
86  /* Clamp. */
87  shorts0 = maxsw4(shorts0, 0);
88  shorts0 = minsw4(shorts0, clampmask);
89 
90  /* Next 4. */
91  pix1 = unpkbw(ldl(pixels + 4));
92  signs1 = shorts1 & signmask;
93  shorts1 &= ~signmask;
94  shorts1 += pix1;
95  shorts1 ^= signs1;
96  shorts1 = maxsw4(shorts1, 0);
97  shorts1 = minsw4(shorts1, clampmask);
98 
99  stl(pkwb(shorts0), pixels);
100  stl(pkwb(shorts1), pixels + 4);
101 
102  pixels += line_size;
103  block += 8;
104  } while (--h);
105 }
106 #endif
107 
109  unsigned high_bit_depth)
110 {
111  /* amask clears all bits that correspond to present features. */
112  if (amask(AMASK_MVI) == 0) {
115  }
116 
119 
120  if (!high_bit_depth && !avctx->lowres &&
121  (avctx->idct_algo == FF_IDCT_AUTO)) {
125  }
126 }
#define minsw4(a, b)
Definition: asm.h:137
#define maxsw4(a, b)
Definition: asm.h:141
void(* put_pixels_clamped)(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
Definition: idctdsp.h:55
#define ldq(p)
Definition: asm.h:59
void(* add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
Definition: idctdsp_alpha.c:33
void(* put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
Definition: idctdsp_alpha.c:31
Macro definitions for various function/variable attributes.
The exact code depends on how similar the blocks are and how related they are to the block
uint8_t
#define av_cold
Definition: attributes.h:82
void ff_simple_idct_axp(int16_t *block)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
void ff_simple_idct_put_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
#define pkwb(a)
Definition: asm.h:144
int lowres
low resolution decoding, 1-> 1/2 size, 2->1/4 size
Definition: avcodec.h:2807
#define AMASK_MVI
Definition: asm.h:40
#define FF_IDCT_AUTO
Definition: avcodec.h:2772
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
#define amask
Definition: asm.h:99
void add_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
av_cold void ff_idctdsp_init_alpha(IDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth)
#define stl(l, p)
Definition: asm.h:76
int idct_algo
IDCT algorithm, see FF_IDCT_* below.
Definition: avcodec.h:2771
typedef void(APIENTRY *FF_PFNGLACTIVETEXTUREPROC)(GLenum texture)
void put_pixels_clamped_mvi_asm(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
void(* idct_add)(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
Definition: idctdsp.h:79
void(* idct_put)(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
block -> idct -> clip to unsigned 8 bit -> dest.
Definition: idctdsp.h:72
main external API structure.
Definition: avcodec.h:1568
#define zap
Definition: asm.h:97
#define unpkbw(a)
Definition: asm.h:146
int pixels
Definition: avisynth_c.h:390
#define ldl(p)
Definition: asm.h:64
void(* idct)(int16_t *block)
Definition: idctdsp.h:65
void ff_simple_idct_add_axp(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
void(* add_pixels_clamped)(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
Definition: idctdsp.h:61