00001
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "preprocessor.h"
00048
00049 #if !defined(FORCE_ALL_GENERICS) && \
00050 !defined(FORCE_GENERIC_VECT32_REALMUL) && \
00051 defined(TARGET_SPECIFIC_VECT32_REALMUL)
00052
00053 #if __GNUC__
00054 # define DSP32_REALMUL_END_KERNEL_X_FCT(x_num, data) __attribute__((__naked__)) DSP32_REALMUL_END_KERNEL_X_FCT__(x_num, data)
00055 #elif __ICCAVR32__
00056 # define DSP32_REALMUL_END_KERNEL_X_FCT(x_num, data) DSP32_REALMUL_END_KERNEL_X_FCT__(x_num, data)
00057 #endif
00058
00059
00060 #if __GNUC__
00061 # define ASM_INSTRUCT_COMPACKED(str) str
00062 # define ASM_INSTRUCT_EXTENDED(str) str
00063 #elif __ICCAVR32__
00064 # define ASM_INSTRUCT_COMPACKED(str) str":C"
00065 # define ASM_INSTRUCT_EXTENDED(str) str":E"
00066 #endif
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081 #define DSP32_REALMULTIPLICATION_0(r_vect1, r_vect2, real)
00082
00083 #define DSP32_REALMULTIPLICATION_1(r_vect1, r_vect2, real) \
00084 "ld.w r0, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00085 \
00086 "muls.d r4, r0, "ASTRINGZ(real)"\n\t" \
00087 "lsl r3, r5, "ASTRINGZ(32-DSP32_QB)"\n\t" \
00088 "or r3, r3, r4 >> "ASTRINGZ(DSP32_QB)"\n\t" \
00089 "st.w "ASTRINGZ(r_vect1)"[0x0], r3\n\t"
00090
00091 #define DSP32_REALMULTIPLICATION_2(r_vect1, r_vect2, real) \
00092 "ld.d r0, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00093 \
00094 "muls.d r4, r1, "ASTRINGZ(real)"\n\t" \
00095 "lsl r3, r5, "ASTRINGZ(32-DSP32_QB)"\n\t" \
00096 "or r3, r3, r4 >> "ASTRINGZ(DSP32_QB)"\n\t" \
00097 \
00098 "muls.d r4, r0, "ASTRINGZ(real)"\n\t" \
00099 "lsl r2, r5, "ASTRINGZ(32-DSP32_QB)"\n\t" \
00100 "or r2, r2, r4 >> "ASTRINGZ(DSP32_QB)"\n\t" \
00101 "st.d "ASTRINGZ(r_vect1)"[0x0], r2\n\t"
00102
00103 #define DSP32_REALMULTIPLICATION_3(r_vect1, r_vect2, real) \
00104 "ld.d r0, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00105 \
00106 "muls.d r4, r1, "ASTRINGZ(real)"\n\t" \
00107 "lsl r3, r5, "ASTRINGZ(32-DSP32_QB)"\n\t" \
00108 "or r3, r3, r4 >> "ASTRINGZ(DSP32_QB)"\n\t" \
00109 \
00110 "muls.d r4, r0, "ASTRINGZ(real)"\n\t" \
00111 "lsl r2, r5, "ASTRINGZ(32-DSP32_QB)"\n\t" \
00112 "or r2, r2, r4 >> "ASTRINGZ(DSP32_QB)"\n\t" \
00113 "st.d "ASTRINGZ(r_vect1)"[0x0], r2\n\t" \
00114 \
00115 "ld.w r0, "ASTRINGZ(r_vect2)"[0x8]\n\t" \
00116 \
00117 "muls.d r4, r0, "ASTRINGZ(real)"\n\t" \
00118 "lsl r3, r5, "ASTRINGZ(32-DSP32_QB)"\n\t" \
00119 "or r3, r3, r4 >> "ASTRINGZ(DSP32_QB)"\n\t" \
00120 "st.w "ASTRINGZ(r_vect1)"[0x8], r3\n\t"
00121
00122
00123
00124
00125
00126
00127
00128 #define DSP32_REALMUL_END_KERNEL_X_FCT__(x_num, data) \
00129 static void TPASTE2(dsp32_vect_realmul_end_kernel_x, x_num)(dsp32_t *vect1, dsp32_t *vect2, dsp32_t real) \
00130 { \
00131 __asm__ __volatile__ ( \
00132 "pushm r0-r7, lr\n\t" \
00133 TPASTE2(DSP32_REALMULTIPLICATION_, x_num)(r12, r11, r10) \
00134 "popm r0-r7, pc\n\t" \
00135 ); \
00136 }
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00152 #if __GNUC__
00153 __attribute__((__naked__))
00154 __attribute__((__noinline__))
00155 #elif __ICCAVR32__
00156 # pragma shadow_registers=full
00157 # pragma optimize=none no_inline
00158 #endif
00159 static int dsp32_vect_realmul_kernel_ext(dsp32_t *vect1, dsp32_t *vect2, int size, dsp32_t real)
00160 {
00161 __asm__ __volatile__ (
00162 "pushm r0-r7, lr\n\t"
00163
00164 "mov lr, 0\n\t"
00165 "sub r10, 3\n\t"
00166
00167 "cp.h lr, r10\n\t"
00168 ASM_INSTRUCT_COMPACKED("brge __dsp32_realmul_ext_end_loop")"\n"
00169
00170 "__dsp32_realmul_ext_loop:\n\t"
00171
00172 "ld.d r0, r11[lr << 2]\n\t"
00173
00174 "muls.d r4, r1, r9\n\t"
00175 "lsl r3, r5, "ASTRINGZ(32-DSP32_QB)"\n\t"
00176 #if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00177 "or r3, r3, r4 >> "ASTRINGZ(DSP32_QB)"\n\t"
00178 #endif
00179 "muls.d r4, r0, r9\n\t"
00180 "lsl r2, r5, "ASTRINGZ(32-DSP32_QB)"\n\t"
00181 #if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00182 "or r2, r2, r4 >> "ASTRINGZ(DSP32_QB)"\n\t"
00183 #endif
00184 "st.d r12[lr << 2], r2\n\t"
00185
00186 "sub lr, -2\n\t"
00187
00188 "ld.d r0, r11[lr << 2]\n\t"
00189
00190 "muls.d r4, r1, r9\n\t" \
00191 "lsl r3, r5, "ASTRINGZ(32-DSP32_QB)"\n\t"
00192 #if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00193 "or r3, r3, r4 >> "ASTRINGZ(DSP32_QB)"\n\t"
00194 #endif
00195 "muls.d r4, r0, r9\n\t"
00196 "lsl r2, r5, "ASTRINGZ(32-DSP32_QB)"\n\t"
00197 #if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00198 "or r2, r2, r4 >> "ASTRINGZ(DSP32_QB)"\n\t"
00199 #endif
00200 "st.d r12[lr << 2], r2\n\t"
00201
00202 "sub lr, -2\n\t"
00203
00204 "cp.h lr, r10\n\t"
00205 ASM_INSTRUCT_COMPACKED("brlt __dsp32_realmul_ext_loop")"\n"
00206
00207 "__dsp32_realmul_ext_end_loop:\n\t"
00208
00209 "mov r12, lr\n\t"
00210 "popm r0-r7, pc\n\t"
00211 );
00212
00213 return 0;
00214 }
00215
00216
00217 DSP32_REALMUL_END_KERNEL_X_FCT(0, "")
00218 DSP32_REALMUL_END_KERNEL_X_FCT(1, "")
00219 DSP32_REALMUL_END_KERNEL_X_FCT(2, "")
00220 DSP32_REALMUL_END_KERNEL_X_FCT(3, "")
00221
00222 void dsp32_vect_realmul(dsp32_t *vect1, dsp32_t *vect2, int size, dsp32_t real)
00223 {
00224 typedef void (*realmul_end_kernel_opti_t)(dsp32_t *, dsp32_t *, dsp32_t);
00225 static const realmul_end_kernel_opti_t realmul_end_kernel_opti[4] = {
00226 dsp32_vect_realmul_end_kernel_x0,
00227 dsp32_vect_realmul_end_kernel_x1,
00228 dsp32_vect_realmul_end_kernel_x2,
00229 dsp32_vect_realmul_end_kernel_x3
00230 };
00231 int n;
00232
00233 n = dsp32_vect_realmul_kernel_ext(vect1, vect2, size, real);
00234
00235
00236 realmul_end_kernel_opti[size&0x3](&vect1[n], &vect2[n], real);
00237 }
00238
00239 #endif