39 "ldc1 $f4, 16+%3 \r\n"
40 "ldc1 $f6, 24+%3 \r\n"
41 "ldc1 $f8, 32+%3 \r\n"
42 "ldc1 $f10, 40+%3 \r\n"
43 "ldc1 $f12, 48+%3 \r\n"
44 "ldc1 $f14, 56+%3 \r\n"
45 "dadd $10, %0, %1 \r\n"
46 "packushb $f0, $f0, $f2 \r\n"
47 "packushb $f4, $f4, $f6 \r\n"
48 "packushb $f8, $f8, $f10 \r\n"
49 "packushb $f12, $f12, $f14 \r\n"
50 "sdc1 $f0, 0(%0) \r\n"
51 "sdc1 $f4, 0($10) \r\n"
52 "gssdxc1 $f8, 0($10, %1) \r\n"
53 "gssdxc1 $f12, 0(%0, %2) \r\n"
54 ::
"r"(pix),
"r"((
int)line_size),
55 "r"((
int)line_size*3),
"m"(*p)
65 "ldc1 $f4, 16+%3 \r\n"
66 "ldc1 $f6, 24+%3 \r\n"
67 "ldc1 $f8, 32+%3 \r\n"
68 "ldc1 $f10, 40+%3 \r\n"
69 "ldc1 $f12, 48+%3 \r\n"
70 "ldc1 $f14, 56+%3 \r\n"
71 "dadd $10, %0, %1 \r\n"
72 "packushb $f0, $f0, $f2 \r\n"
73 "packushb $f4, $f4, $f6 \r\n"
74 "packushb $f8, $f8, $f10 \r\n"
75 "packushb $f12, $f12, $f14 \r\n"
76 "sdc1 $f0, 0(%0) \r\n"
77 "sdc1 $f4, 0($10) \r\n"
78 "gssdxc1 $f8, 0($10, %1) \r\n"
79 "gssdxc1 $f12, 0(%0, %2) \r\n"
80 ::
"r"(pix),
"r"((
int)line_size),
81 "r"((
int)line_size*3),
"m"(*p)
89 int64_t line_skip = line_size;
94 "daddu %1, %3, %3 \n\t"
95 "ldc1 $f2, 0(%2) \n\t"
96 "ldc1 $f10, 8(%2) \n\t"
97 "packsshb $f2, $f2, $f10 \n\t"
98 "ldc1 $f4, 16(%2) \n\t"
99 "ldc1 $f10, 24(%2) \n\t"
100 "packsshb $f4, $f4, $f10 \n\t"
101 "ldc1 $f6, 32(%2) \n\t"
102 "ldc1 $f10, 40(%2) \n\t"
103 "packsshb $f6, $f6, $f10 \n\t"
104 "ldc1 $f8, 48(%2) \n\t"
105 "ldc1 $f10, 56(%2) \n\t"
106 "packsshb $f8, $f8, $f10 \n\t"
107 "paddb $f2, $f2, $f0 \n\t"
108 "paddb $f4, $f4, $f0 \n\t"
109 "paddb $f6, $f6, $f0 \n\t"
110 "paddb $f8, $f8, $f0 \n\t"
111 "sdc1 $f2, 0(%0) \n\t"
112 "gssdxc1 $f4, 0(%0, %3) \n\t"
113 "gssdxc1 $f6, 0(%0, %1) \n\t"
114 "daddu %1, %1, %3 \n\t"
115 "gssdxc1 $f8, 0(%0, %1) \n\t"
116 "daddu $10, %1, %3 \n\t"
117 "daddu %0, %0, $10 \n\t"
118 "ldc1 $f2, 64(%2) \n\t"
119 "ldc1 $f10, 8+64(%2) \n\t"
120 "packsshb $f2, $f2, $f10 \n\t"
121 "ldc1 $f4, 16+64(%2) \n\t"
122 "ldc1 $f10, 24+64(%2) \n\t"
123 "packsshb $f4, $f4, $f10 \n\t"
124 "ldc1 $f6, 32+64(%2) \n\t"
125 "ldc1 $f10, 40+64(%2) \n\t"
126 "packsshb $f6, $f6, $f10 \n\t"
127 "ldc1 $f8, 48+64(%2) \n\t"
128 "ldc1 $f10, 56+64(%2) \n\t"
129 "packsshb $f8, $f8, $f10 \n\t"
130 "paddb $f2, $f2, $f0 \n\t"
131 "paddb $f4, $f4, $f0 \n\t"
132 "paddb $f6, $f6, $f0 \n\t"
133 "paddb $f8, $f8, $f0 \n\t"
134 "sdc1 $f2, 0(%0) \n\t"
135 "gssdxc1 $f4, 0(%0, %3) \n\t"
136 "daddu $10, %3, %3 \n\t"
137 "gssdxc1 $f6, 0(%0, $10) \n\t"
138 "gssdxc1 $f8, 0(%0, %1) \n\t"
139 :
"+&r"(
pixels),
"=&r"(line_skip3)
156 "xor $f14, $f14, $f14 \r\n"
162 "ldc1 $f0, 0+%2 \r\n"
163 "ldc1 $f2, 8+%2 \r\n"
164 "ldc1 $f4, 16+%2 \r\n"
165 "ldc1 $f6, 24+%2 \r\n"
168 "mov.d $f10, $f8 \r\n"
169 "punpcklbh $f8, $f8, $f14 \r\n"
170 "punpckhbh $f10, $f10, $f14 \r\n"
171 "paddsh $f0, $f0, $f8 \r\n"
172 "paddsh $f2, $f2, $f10 \r\n"
173 "mov.d $f10, $f12 \r\n"
174 "punpcklbh $f12, $f12, $f14 \r\n"
175 "punpckhbh $f10, $f10, $f14 \r\n"
176 "paddsh $f4, $f4, $f12 \r\n"
177 "paddsh $f6, $f6, $f10 \r\n"
178 "packushb $f0, $f0, $f2 \r\n"
179 "packushb $f4, $f4, $f6 \r\n"
182 :
"+m"(*pix),
"+m"(*(pix+line_size))
void ff_put_signed_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
void ff_put_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)
void ff_add_pixels_clamped_mmi(const int16_t *block, uint8_t *av_restrict pixels, ptrdiff_t line_size)