39 int w,
int h,
int sides)
44 last_line = buf + (height - 1) * wrap;
50 "movd (%0), %%mm0 \n\t"
51 "punpcklbw %%mm0, %%mm0 \n\t"
52 "punpcklwd %%mm0, %%mm0 \n\t"
53 "punpckldq %%mm0, %%mm0 \n\t"
54 "movq %%mm0, -8(%0) \n\t"
55 "movq -8(%0, %2), %%mm1 \n\t"
56 "punpckhbw %%mm1, %%mm1 \n\t"
57 "punpckhwd %%mm1, %%mm1 \n\t"
58 "punpckhdq %%mm1, %%mm1 \n\t"
59 "movq %%mm1, (%0, %2) \n\t"
65 "r" (ptr + wrap * height));
69 "movd (%0), %%mm0 \n\t"
70 "punpcklbw %%mm0, %%mm0 \n\t"
71 "punpcklwd %%mm0, %%mm0 \n\t"
72 "punpckldq %%mm0, %%mm0 \n\t"
73 "movq %%mm0, -8(%0) \n\t"
74 "movq %%mm0, -16(%0) \n\t"
75 "movq -8(%0, %2), %%mm1 \n\t"
76 "punpckhbw %%mm1, %%mm1 \n\t"
77 "punpckhwd %%mm1, %%mm1 \n\t"
78 "punpckhdq %%mm1, %%mm1 \n\t"
79 "movq %%mm1, (%0, %2) \n\t"
80 "movq %%mm1, 8(%0, %2) \n\t"
91 "movd (%0), %%mm0 \n\t"
92 "punpcklbw %%mm0, %%mm0 \n\t"
93 "punpcklwd %%mm0, %%mm0 \n\t"
94 "movd %%mm0, -4(%0) \n\t"
95 "movd -4(%0, %2), %%mm1 \n\t"
96 "punpcklbw %%mm1, %%mm1 \n\t"
97 "punpckhwd %%mm1, %%mm1 \n\t"
98 "punpckhdq %%mm1, %%mm1 \n\t"
99 "movd %%mm1, (%0, %2) \n\t"
105 "r" (ptr + wrap * height));
110 for (i = 0; i < h; i += 4) {
111 ptr = buf - (i + 1) * wrap - w;
114 "movq (%1, %0), %%mm0 \n\t"
115 "movq %%mm0, (%0) \n\t"
116 "movq %%mm0, (%0, %2) \n\t"
117 "movq %%mm0, (%0, %2, 2) \n\t"
118 "movq %%mm0, (%0, %3) \n\t"
125 "r" (ptr + width + 2 * w));
130 for (i = 0; i < h; i += 4) {
131 ptr = last_line + (i + 1) * wrap - w;
134 "movq (%1, %0), %%mm0 \n\t"
135 "movq %%mm0, (%0) \n\t"
136 "movq %%mm0, (%0, %2) \n\t"
137 "movq %%mm0, (%0, %2, 2) \n\t"
138 "movq %%mm0, (%0, %3) \n\t"
145 "r" (ptr + width + 2 * w));