FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
hpeldsp_rnd_template.c
Go to the documentation of this file.
1 /*
2  * DSP utils mmx functions are compiled twice for rnd/no_rnd
3  * Copyright (c) 2000, 2001 Fabrice Bellard
4  * Copyright (c) 2003-2004 Michael Niedermayer <michaelni@gmx.at>
5  *
6  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
7  * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
8  * and improved by Zdenek Kabelac <kabi@users.sf.net>
9  *
10  * This file is part of FFmpeg.
11  *
12  * FFmpeg is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU Lesser General Public
14  * License as published by the Free Software Foundation; either
15  * version 2.1 of the License, or (at your option) any later version.
16  *
17  * FFmpeg is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20  * Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public
23  * License along with FFmpeg; if not, write to the Free Software
24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25  */
26 
27 // put_pixels
28 static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
29 {
30  MOVQ_BFE(mm6);
31  __asm__ volatile(
32  "lea (%3, %3), %%"REG_a" \n\t"
33  ".p2align 3 \n\t"
34  "1: \n\t"
35  "movq (%1), %%mm0 \n\t"
36  "movq 1(%1), %%mm1 \n\t"
37  "movq (%1, %3), %%mm2 \n\t"
38  "movq 1(%1, %3), %%mm3 \n\t"
39  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
40  "movq %%mm4, (%2) \n\t"
41  "movq %%mm5, (%2, %3) \n\t"
42  "add %%"REG_a", %1 \n\t"
43  "add %%"REG_a", %2 \n\t"
44  "movq (%1), %%mm0 \n\t"
45  "movq 1(%1), %%mm1 \n\t"
46  "movq (%1, %3), %%mm2 \n\t"
47  "movq 1(%1, %3), %%mm3 \n\t"
48  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
49  "movq %%mm4, (%2) \n\t"
50  "movq %%mm5, (%2, %3) \n\t"
51  "add %%"REG_a", %1 \n\t"
52  "add %%"REG_a", %2 \n\t"
53  "subl $4, %0 \n\t"
54  "jnz 1b \n\t"
55  :"+g"(h), "+S"(pixels), "+D"(block)
56  :"r"((x86_reg)line_size)
57  :REG_a, "memory");
58 }
59 
60 static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
61 {
62  MOVQ_BFE(mm6);
63  __asm__ volatile(
64  "lea (%3, %3), %%"REG_a" \n\t"
65  ".p2align 3 \n\t"
66  "1: \n\t"
67  "movq (%1), %%mm0 \n\t"
68  "movq 1(%1), %%mm1 \n\t"
69  "movq (%1, %3), %%mm2 \n\t"
70  "movq 1(%1, %3), %%mm3 \n\t"
71  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
72  "movq %%mm4, (%2) \n\t"
73  "movq %%mm5, (%2, %3) \n\t"
74  "movq 8(%1), %%mm0 \n\t"
75  "movq 9(%1), %%mm1 \n\t"
76  "movq 8(%1, %3), %%mm2 \n\t"
77  "movq 9(%1, %3), %%mm3 \n\t"
78  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
79  "movq %%mm4, 8(%2) \n\t"
80  "movq %%mm5, 8(%2, %3) \n\t"
81  "add %%"REG_a", %1 \n\t"
82  "add %%"REG_a", %2 \n\t"
83  "movq (%1), %%mm0 \n\t"
84  "movq 1(%1), %%mm1 \n\t"
85  "movq (%1, %3), %%mm2 \n\t"
86  "movq 1(%1, %3), %%mm3 \n\t"
87  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
88  "movq %%mm4, (%2) \n\t"
89  "movq %%mm5, (%2, %3) \n\t"
90  "movq 8(%1), %%mm0 \n\t"
91  "movq 9(%1), %%mm1 \n\t"
92  "movq 8(%1, %3), %%mm2 \n\t"
93  "movq 9(%1, %3), %%mm3 \n\t"
94  PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
95  "movq %%mm4, 8(%2) \n\t"
96  "movq %%mm5, 8(%2, %3) \n\t"
97  "add %%"REG_a", %1 \n\t"
98  "add %%"REG_a", %2 \n\t"
99  "subl $4, %0 \n\t"
100  "jnz 1b \n\t"
101  :"+g"(h), "+S"(pixels), "+D"(block)
102  :"r"((x86_reg)line_size)
103  :REG_a, "memory");
104 }
105 
106 static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
107 {
108  MOVQ_BFE(mm6);
109  __asm__ volatile(
110  "lea (%3, %3), %%"REG_a" \n\t"
111  "movq (%1), %%mm0 \n\t"
112  ".p2align 3 \n\t"
113  "1: \n\t"
114  "movq (%1, %3), %%mm1 \n\t"
115  "movq (%1, %%"REG_a"),%%mm2 \n\t"
116  PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
117  "movq %%mm4, (%2) \n\t"
118  "movq %%mm5, (%2, %3) \n\t"
119  "add %%"REG_a", %1 \n\t"
120  "add %%"REG_a", %2 \n\t"
121  "movq (%1, %3), %%mm1 \n\t"
122  "movq (%1, %%"REG_a"),%%mm0 \n\t"
123  PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
124  "movq %%mm4, (%2) \n\t"
125  "movq %%mm5, (%2, %3) \n\t"
126  "add %%"REG_a", %1 \n\t"
127  "add %%"REG_a", %2 \n\t"
128  "subl $4, %0 \n\t"
129  "jnz 1b \n\t"
130  :"+g"(h), "+S"(pixels), "+D"(block)
131  :"r"((x86_reg)line_size)
132  :REG_a, "memory");
133 }
134 
135 static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
136 {
137  MOVQ_BFE(mm6);
138  JUMPALIGN();
139  do {
140  __asm__ volatile(
141  "movq %1, %%mm0 \n\t"
142  "movq 1%1, %%mm1 \n\t"
143  "movq %0, %%mm3 \n\t"
144  PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
145  PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
146  "movq %%mm0, %0 \n\t"
147  "movq 8%1, %%mm0 \n\t"
148  "movq 9%1, %%mm1 \n\t"
149  "movq 8%0, %%mm3 \n\t"
150  PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
151  PAVGB_MMX(%%mm3, %%mm2, %%mm0, %%mm6)
152  "movq %%mm0, 8%0 \n\t"
153  :"+m"(*block)
154  :"m"(*pixels)
155  :"memory");
156  pixels += line_size;
157  block += line_size;
158  } while (--h);
159 }
160 
161 static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
162 {
163  MOVQ_BFE(mm6);
164  __asm__ volatile(
165  "lea (%3, %3), %%"REG_a" \n\t"
166  "movq (%1), %%mm0 \n\t"
167  ".p2align 3 \n\t"
168  "1: \n\t"
169  "movq (%1, %3), %%mm1 \n\t"
170  "movq (%1, %%"REG_a"), %%mm2 \n\t"
171  PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
172  "movq (%2), %%mm3 \n\t"
173  PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6)
174  "movq (%2, %3), %%mm3 \n\t"
175  PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
176  "movq %%mm0, (%2) \n\t"
177  "movq %%mm1, (%2, %3) \n\t"
178  "add %%"REG_a", %1 \n\t"
179  "add %%"REG_a", %2 \n\t"
180 
181  "movq (%1, %3), %%mm1 \n\t"
182  "movq (%1, %%"REG_a"), %%mm0 \n\t"
183  PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
184  "movq (%2), %%mm3 \n\t"
185  PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6)
186  "movq (%2, %3), %%mm3 \n\t"
187  PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
188  "movq %%mm2, (%2) \n\t"
189  "movq %%mm1, (%2, %3) \n\t"
190  "add %%"REG_a", %1 \n\t"
191  "add %%"REG_a", %2 \n\t"
192 
193  "subl $4, %0 \n\t"
194  "jnz 1b \n\t"
195  :"+g"(h), "+S"(pixels), "+D"(block)
196  :"r"((x86_reg)line_size)
197  :REG_a, "memory");
198 }