00001
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "dsp.h"
00048 #include "preprocessor.h"
00049
00050 #if !defined(FORCE_ALL_GENERICS) && \
00051 !defined(FORCE_GENERIC_VECT16_REALMUL) && \
00052 defined(TARGET_SPECIFIC_VECT16_REALMUL)
00053
00054 #if defined(__GNUC__)
00055 # define DSP16_REALMUL_END_KERNEL_X_FCT(x_num, data) __attribute__((__naked__)) DSP16_REALMUL_END_KERNEL_X_FCT__(x_num, data)
00056 #elif defined(__ICCAVR32__)
00057 # define DSP16_REALMUL_END_KERNEL_X_FCT(x_num, data) DSP16_REALMUL_END_KERNEL_X_FCT__(x_num, data)
00058 #endif
00059
00060
00061 #if defined(__GNUC__)
00062 # define ASM_INSTRUCT_COMPACKED(str) str
00063 # define ASM_INSTRUCT_EXTENDED(str) str
00064 #elif defined(__ICCAVR32__)
00065 # define ASM_INSTRUCT_COMPACKED(str) str":C"
00066 # define ASM_INSTRUCT_EXTENDED(str) str":E"
00067 #endif
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082 #define DSP16_REALMULTIPLICATION_0(r_vect1, r_vect2, real)
00083
00084 #define DSP16_REALMULTIPLICATION_1(r_vect1, r_vect2, real) \
00085 "ld.sh r2, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00086 \
00087 "mul r1, r2, "ASTRINGZ(real)"\n\t" \
00088 "lsr r1, "ASTRINGZ(DSP16_QB)"\n\t" \
00089 "st.h "ASTRINGZ(r_vect1)"[0x0], r1\n\t"
00090
00091 #define DSP16_REALMULTIPLICATION_2(r_vect1, r_vect2, real) \
00092 "ld.w r2, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00093 \
00094 "mulhh.w r1, r2:b, "ASTRINGZ(real)":b\n\t" \
00095 "lsr r1, "ASTRINGZ(DSP16_QB)"\n\t" \
00096 "mulhh.w r0, r2:t, "ASTRINGZ(real)":b\n\t" \
00097 "lsr r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00098 "sthh.w "ASTRINGZ(r_vect1)"[0x0], r0:b, r1:b\n\t"
00099
00100 #define DSP16_REALMULTIPLICATION_3(r_vect1, r_vect2, real) \
00101 "ld.w r2, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00102 \
00103 "mulhh.w r1, r2:b, "ASTRINGZ(real)":b\n\t" \
00104 "lsr r1, "ASTRINGZ(DSP16_QB)"\n\t" \
00105 "mulhh.w r0, r2:t, "ASTRINGZ(real)":b\n\t" \
00106 "lsr r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00107 "sthh.w "ASTRINGZ(r_vect1)"[0x0], r0:b, r1:b\n\t" \
00108 \
00109 "ld.sh r2, "ASTRINGZ(r_vect2)"[0x4]\n\t" \
00110 \
00111 "mul r1, r2, "ASTRINGZ(real)"\n\t" \
00112 "lsr r1, "ASTRINGZ(DSP16_QB)"\n\t" \
00113 "st.h "ASTRINGZ(r_vect1)"[0x4], r1\n\t"
00114
00115
00116
00117
00118
00119
00120 #define DSP16_REALMUL_END_KERNEL_X_FCT__(x_num, data) \
00121 static void TPASTE2(dsp16_vect_realmul_end_kernel_x, x_num)(dsp16_t *vect1, dsp16_t *vect2, dsp16_t real) \
00122 { \
00123 __asm__ __volatile__ ( \
00124 "pushm r0-r3, lr\n\t" \
00125 TPASTE2(DSP16_REALMULTIPLICATION_, x_num)(r12, r11, r10) \
00126 "popm r0-r3, pc\n\t" \
00127 ); \
00128 }
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00144 #if defined(__GNUC__)
00145 __attribute__((__naked__))
00146 __attribute__((__noinline__))
00147 #elif defined(__ICCAVR32__)
00148 # pragma shadow_registers=full
00149 # pragma optimize=none no_inline
00150 #endif
00151 static int dsp16_vect_realmul_kernel_ext(dsp16_t *vect1, dsp16_t *vect2, int size, dsp16_t real)
00152 {
00153 __asm__ __volatile__ ( \
00154 "pushm r0-r7, lr\n\t" \
00155 \
00156 "mov lr, 0\n\t" \
00157 "sub r10, 3\n\t" \
00158 \
00159 "cp.h lr, r10\n\t" \
00160 ASM_INSTRUCT_COMPACKED("brge __dsp16_realmul_ext_end_loop")"\n" \
00161 \
00162 "__dsp16_realmul_ext_loop:\n\t" \
00163 \
00164 "ld.d r0, r11[lr << 1]\n\t" \
00165 \
00166 "mulhh.w r4, r1:b, r9:b\n\t" \
00167 "lsr r4, "ASTRINGZ(DSP16_QB)"\n\t" \
00168 "mulhh.w r5, r1:t, r9:b\n\t" \
00169 "lsr r5, "ASTRINGZ(DSP16_QB)"\n\t" \
00170 "sthh.w r12[lr << 1], r5:b, r4:b\n\t" \
00171 \
00172 "sub lr, -2\n\t" \
00173 "mulhh.w r4, r0:b, r9:b\n\t" \
00174 "lsr r4, "ASTRINGZ(DSP16_QB)"\n\t" \
00175 "mulhh.w r5, r0:t, r9:b\n\t" \
00176 "lsr r5, "ASTRINGZ(DSP16_QB)"\n\t" \
00177 "sthh.w r12[lr << 1], r5:b, r4:b\n\t" \
00178 \
00179 "sub lr, -2\n\t" \
00180 \
00181 "cp.h lr, r10\n\t" \
00182 ASM_INSTRUCT_COMPACKED("brlt __dsp16_realmul_ext_loop")"\n" \
00183 \
00184 "__dsp16_realmul_ext_end_loop:\n\t" \
00185 \
00186 "mov r12, lr\n\t" \
00187 "popm r0-r7, pc\n\t" \
00188 ); \
00189
00190 return 0;
00191 }
00192
00193
00194 DSP16_REALMUL_END_KERNEL_X_FCT(0, "")
00195 DSP16_REALMUL_END_KERNEL_X_FCT(1, "")
00196 DSP16_REALMUL_END_KERNEL_X_FCT(2, "")
00197 DSP16_REALMUL_END_KERNEL_X_FCT(3, "")
00198
00199 void dsp16_vect_realmul(dsp16_t *vect1, dsp16_t *vect2, int size, dsp16_t real)
00200 {
00201 typedef void (*realmul_end_kernel_opti_t)(dsp16_t *, dsp16_t *, dsp16_t);
00202 static const realmul_end_kernel_opti_t realmul_end_kernel_opti[4] = {
00203 dsp16_vect_realmul_end_kernel_x0,
00204 dsp16_vect_realmul_end_kernel_x1,
00205 dsp16_vect_realmul_end_kernel_x2,
00206 dsp16_vect_realmul_end_kernel_x3
00207 };
00208 int n;
00209
00210 n = dsp16_vect_realmul_kernel_ext(vect1, vect2, size, real);
00211
00212
00213 realmul_end_kernel_opti[size&0x3](&vect1[n], &vect2[n], real);
00214 }
00215
00216 #endif
00217