00001
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "preprocessor.h"
00048
00049 #if !defined(FORCE_ALL_GENERICS) && \
00050 !defined(FORCE_GENERIC_VECT16_DOTMUL) && \
00051 defined(TARGET_SPECIFIC_VECT16_DOTMUL)
00052
00053 #if __GNUC__
00054 # define DSP16_DOTMUL_END_KERNEL_X_FCT(x_num, data) __attribute__((__naked__)) DSP16_DOTMUL_END_KERNEL_X_FCT__(x_num, data)
00055 #elif __ICCAVR32__
00056 # define DSP16_DOTMUL_END_KERNEL_X_FCT(x_num, data) DSP16_DOTMUL_END_KERNEL_X_FCT__(x_num, data)
00057 #endif
00058
00059
00060 #if __GNUC__
00061 # define ASM_INSTRUCT_COMPACKED(str) str
00062 # define ASM_INSTRUCT_EXTENDED(str) str
00063 #elif __ICCAVR32__
00064 # define ASM_INSTRUCT_COMPACKED(str) str":C"
00065 # define ASM_INSTRUCT_EXTENDED(str) str":E"
00066 #endif
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081 #define DSP16_DOTMULTIPLICATION_0(r_vect1, r_vect2, r_vect3)
00082
00083 #define DSP16_DOTMULTIPLICATION_1(r_vect1, r_vect2, r_vect3) \
00084 "ld.sh r2, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00085 "ld.sh r3, "ASTRINGZ(r_vect3)"[0x0]\n\t" \
00086 \
00087 "mul r1, r2, r3\n\t" \
00088 "lsr r1, "ASTRINGZ(DSP16_QB)"\n\t" \
00089 "st.h "ASTRINGZ(r_vect1)"[0x0], r1\n\t"
00090
00091 #define DSP16_DOTMULTIPLICATION_2(r_vect1, r_vect2, r_vect3) \
00092 "ld.w r2, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00093 "ld.w r3, "ASTRINGZ(r_vect3)"[0x0]\n\t" \
00094 \
00095 "mulhh.w r1, r2:b, r3:b\n\t" \
00096 "lsr r1, "ASTRINGZ(DSP16_QB)"\n\t" \
00097 "mulhh.w r0, r2:t, r3:t\n\t" \
00098 "lsr r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00099 "sthh.w "ASTRINGZ(r_vect1)"[0x0], r0:b, r1:b\n\t"
00100
00101 #define DSP16_DOTMULTIPLICATION_3(r_vect1, r_vect2, r_vect3) \
00102 "ld.w r2, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00103 "ld.w r3, "ASTRINGZ(r_vect3)"[0x0]\n\t" \
00104 \
00105 "mulhh.w r1, r2:b, r3:b\n\t" \
00106 "lsr r1, "ASTRINGZ(DSP16_QB)"\n\t" \
00107 "mulhh.w r0, r2:t, r3:t\n\t" \
00108 "lsr r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00109 "sthh.w "ASTRINGZ(r_vect1)"[0x0], r0:b, r1:b\n\t" \
00110 \
00111 "ld.sh r2, "ASTRINGZ(r_vect2)"[0x4]\n\t" \
00112 "ld.sh r3, "ASTRINGZ(r_vect3)"[0x4]\n\t" \
00113 \
00114 "mul r1, r2, r3\n\t" \
00115 "lsr r1, "ASTRINGZ(DSP16_QB)"\n\t" \
00116 "st.h "ASTRINGZ(r_vect1)"[0x4], r1\n\t"
00117
00118
00119
00120
00121
00122
00123 #define DSP16_DOTMUL_END_KERNEL_X_FCT__(x_num, data) \
00124 static void TPASTE2(dsp16_vect_dotmul_end_kernel_x, x_num)(dsp16_t *vect1, dsp16_t *vect2, dsp16_t *vect3) \
00125 { \
00126 __asm__ __volatile__ ( \
00127 "pushm r0-r3, lr\n\t" \
00128 TPASTE2(DSP16_DOTMULTIPLICATION_, x_num)(r12, r11, r10) \
00129 "popm r0-r3, pc\n\t" \
00130 ); \
00131 }
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00147 #if __GNUC__
00148 __attribute__((__naked__))
00149 __attribute__((__noinline__))
00150 #elif __ICCAVR32__
00151 # pragma shadow_registers=full
00152 # pragma optimize=none no_inline
00153 #endif
00154 static int dsp16_vect_dotmul_kernel_ext(dsp16_t *vect1, dsp16_t *vect2, dsp16_t *vect3, int size)
00155 {
00156 __asm__ __volatile__ ( \
00157 "pushm r0-r7, lr\n\t" \
00158 \
00159 "mov lr, 0\n\t" \
00160 "sub r9, 3\n\t" \
00161 \
00162 "cp.h lr, r9\n\t" \
00163 ASM_INSTRUCT_COMPACKED("brge __dsp16_dotmul_ext_end_loop")"\n" \
00164 \
00165 "__dsp16_dotmul_ext_loop:\n\t" \
00166 \
00167 "ld.d r0, r11[lr << 1]\n\t" \
00168 "ld.d r2, r10[lr << 1]\n\t" \
00169 \
00170 "mulhh.w r4, r1:b, r3:b\n\t" \
00171 "lsr r4, "ASTRINGZ(DSP16_QB)"\n\t" \
00172 "mulhh.w r5, r1:t, r3:t\n\t" \
00173 "lsr r5, "ASTRINGZ(DSP16_QB)"\n\t" \
00174 "sthh.w r12[lr << 1], r5:b, r4:b\n\t" \
00175 \
00176 "sub lr, -2\n\t" \
00177 "mulhh.w r4, r0:b, r2:b\n\t" \
00178 "lsr r4, "ASTRINGZ(DSP16_QB)"\n\t" \
00179 "mulhh.w r5, r0:t, r2:t\n\t" \
00180 "lsr r5, "ASTRINGZ(DSP16_QB)"\n\t" \
00181 "sthh.w r12[lr << 1], r5:b, r4:b\n\t" \
00182 \
00183 "sub lr, -2\n\t" \
00184 \
00185 "cp.h lr, r9\n\t" \
00186 ASM_INSTRUCT_COMPACKED("brlt __dsp16_dotmul_ext_loop")"\n" \
00187 \
00188 "__dsp16_dotmul_ext_end_loop:\n\t" \
00189 \
00190 "mov r12, lr\n\t" \
00191 "popm r0-r7, pc\n\t" \
00192 ); \
00193
00194 return 0;
00195 }
00196
00197
00198 DSP16_DOTMUL_END_KERNEL_X_FCT(0, "")
00199 DSP16_DOTMUL_END_KERNEL_X_FCT(1, "")
00200 DSP16_DOTMUL_END_KERNEL_X_FCT(2, "")
00201 DSP16_DOTMUL_END_KERNEL_X_FCT(3, "")
00202
00203 void dsp16_vect_dotmul(dsp16_t *vect1, dsp16_t *vect2, dsp16_t *vect3, int size)
00204 {
00205 typedef void (*dotmul_end_kernel_opti_t)(dsp16_t *, dsp16_t *, dsp16_t *);
00206 static const dotmul_end_kernel_opti_t dotmul_end_kernel_opti[4] = {
00207 dsp16_vect_dotmul_end_kernel_x0,
00208 dsp16_vect_dotmul_end_kernel_x1,
00209 dsp16_vect_dotmul_end_kernel_x2,
00210 dsp16_vect_dotmul_end_kernel_x3
00211 };
00212 int n;
00213
00214 n = dsp16_vect_dotmul_kernel_ext(vect1, vect2, vect3, size);
00215
00216
00217 dotmul_end_kernel_opti[size&0x3](&vect1[n], &vect2[n], &vect3[n]);
00218 }
00219
00220 #endif