00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #include "libavcodec/dsputil.h"
00023
00024 #define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
00025 #define SET_RND(regd) __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
00026 #define WAVG2B "wavg2b"
00027 #include "dsputil_iwmmxt_rnd_template.c"
00028 #undef DEF
00029 #undef SET_RND
00030 #undef WAVG2B
00031
00032 #define DEF(x, y) x ## _ ## y ##_iwmmxt
00033 #define SET_RND(regd) __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
00034 #define WAVG2B "wavg2br"
00035 #include "dsputil_iwmmxt_rnd_template.c"
00036 #undef DEF
00037 #undef SET_RND
00038 #undef WAVG2BR
00039
00040
00041 #define OP(AVG) \
00042 __asm__ volatile ( \
00043 \
00044 "and r12, %[pixels], #7 \n\t" \
00045 "bic %[pixels], %[pixels], #7 \n\t" \
00046 "tmcr wcgr1, r12 \n\t" \
00047 \
00048 "wldrd wr0, [%[pixels]] \n\t" \
00049 "wldrd wr1, [%[pixels], #8] \n\t" \
00050 "add %[pixels], %[pixels], %[line_size] \n\t" \
00051 "walignr1 wr4, wr0, wr1 \n\t" \
00052 \
00053 "1: \n\t" \
00054 \
00055 "wldrd wr2, [%[pixels]] \n\t" \
00056 "wldrd wr3, [%[pixels], #8] \n\t" \
00057 "add %[pixels], %[pixels], %[line_size] \n\t" \
00058 "pld [%[pixels]] \n\t" \
00059 "walignr1 wr5, wr2, wr3 \n\t" \
00060 AVG " wr6, wr4, wr5 \n\t" \
00061 "wstrd wr6, [%[block]] \n\t" \
00062 "add %[block], %[block], %[line_size] \n\t" \
00063 \
00064 "wldrd wr0, [%[pixels]] \n\t" \
00065 "wldrd wr1, [%[pixels], #8] \n\t" \
00066 "add %[pixels], %[pixels], %[line_size] \n\t" \
00067 "walignr1 wr4, wr0, wr1 \n\t" \
00068 "pld [%[pixels]] \n\t" \
00069 AVG " wr6, wr4, wr5 \n\t" \
00070 "wstrd wr6, [%[block]] \n\t" \
00071 "add %[block], %[block], %[line_size] \n\t" \
00072 \
00073 "subs %[h], %[h], #2 \n\t" \
00074 "bne 1b \n\t" \
00075 : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \
00076 : [line_size]"r"(line_size) \
00077 : "memory", "r12");
00078 void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
00079 {
00080 OP("wavg2br");
00081 }
00082 void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
00083 {
00084 OP("wavg2b");
00085 }
00086 #undef OP
00087
00088 void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size)
00089 {
00090 uint8_t *pixels2 = pixels + line_size;
00091
00092 __asm__ volatile (
00093 "mov r12, #4 \n\t"
00094 "1: \n\t"
00095 "pld [%[pixels], %[line_size2]] \n\t"
00096 "pld [%[pixels2], %[line_size2]] \n\t"
00097 "wldrd wr4, [%[pixels]] \n\t"
00098 "wldrd wr5, [%[pixels2]] \n\t"
00099 "pld [%[block], #32] \n\t"
00100 "wunpckelub wr6, wr4 \n\t"
00101 "wldrd wr0, [%[block]] \n\t"
00102 "wunpckehub wr7, wr4 \n\t"
00103 "wldrd wr1, [%[block], #8] \n\t"
00104 "wunpckelub wr8, wr5 \n\t"
00105 "wldrd wr2, [%[block], #16] \n\t"
00106 "wunpckehub wr9, wr5 \n\t"
00107 "wldrd wr3, [%[block], #24] \n\t"
00108 "add %[block], %[block], #32 \n\t"
00109 "waddhss wr10, wr0, wr6 \n\t"
00110 "waddhss wr11, wr1, wr7 \n\t"
00111 "waddhss wr12, wr2, wr8 \n\t"
00112 "waddhss wr13, wr3, wr9 \n\t"
00113 "wpackhus wr14, wr10, wr11 \n\t"
00114 "wpackhus wr15, wr12, wr13 \n\t"
00115 "wstrd wr14, [%[pixels]] \n\t"
00116 "add %[pixels], %[pixels], %[line_size2] \n\t"
00117 "subs r12, r12, #1 \n\t"
00118 "wstrd wr15, [%[pixels2]] \n\t"
00119 "add %[pixels2], %[pixels2], %[line_size2] \n\t"
00120 "bne 1b \n\t"
00121 : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2)
00122 : [line_size2]"r"(line_size << 1)
00123 : "cc", "memory", "r12");
00124 }
00125
00126 static void clear_blocks_iwmmxt(DCTELEM *blocks)
00127 {
00128 __asm__ volatile(
00129 "wzero wr0 \n\t"
00130 "mov r1, #(128 * 6 / 32) \n\t"
00131 "1: \n\t"
00132 "wstrd wr0, [%0] \n\t"
00133 "wstrd wr0, [%0, #8] \n\t"
00134 "wstrd wr0, [%0, #16] \n\t"
00135 "wstrd wr0, [%0, #24] \n\t"
00136 "subs r1, r1, #1 \n\t"
00137 "add %0, %0, #32 \n\t"
00138 "bne 1b \n\t"
00139 : "+r"(blocks)
00140 :
00141 : "r1"
00142 );
00143 }
00144
00145 static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00146 {
00147 return;
00148 }
00149
00150
00151
00152
00153 int mm_flags = FF_MM_IWMMXT;
00154
00155 void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
00156 {
00157 if (avctx->dsp_mask) {
00158 if (avctx->dsp_mask & FF_MM_FORCE)
00159 mm_flags |= (avctx->dsp_mask & 0xffff);
00160 else
00161 mm_flags &= ~(avctx->dsp_mask & 0xffff);
00162 }
00163
00164 if (!(mm_flags & FF_MM_IWMMXT)) return;
00165
00166 c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
00167
00168 c->clear_blocks = clear_blocks_iwmmxt;
00169
00170 c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
00171 c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt;
00172 c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt;
00173 c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt;
00174 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt;
00175 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt;
00176 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt;
00177 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt;
00178
00179 c->put_pixels_tab[1][0] = put_pixels8_iwmmxt;
00180 c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt;
00181 c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt;
00182 c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt;
00183 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt;
00184 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt;
00185 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt;
00186 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt;
00187
00188 c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt;
00189 c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt;
00190 c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt;
00191 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt;
00192 c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt;
00193 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt;
00194 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt;
00195 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt;
00196
00197 c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt;
00198 c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt;
00199 c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt;
00200 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt;
00201 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt;
00202 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
00203 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
00204 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
00205 }