00001
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "preprocessor.h"
00048
00049 #if !defined(FORCE_ALL_GENERICS) && \
00050 !defined(FORCE_GENERIC_VECT16_DOTDIV) && \
00051 defined(TARGET_SPECIFIC_VECT16_DOTDIV)
00052
00053 #if __GNUC__
00054 # define DSP16_DOTDIV_END_KERNEL_X_FCT(x_num, data) __attribute__((__naked__)) DSP16_DOTDIV_END_KERNEL_X_FCT__(x_num, data)
00055 #elif __ICCAVR32__
00056 # define DSP16_DOTDIV_END_KERNEL_X_FCT(x_num, data) DSP16_DOTDIV_END_KERNEL_X_FCT__(x_num, data)
00057 #endif
00058
00059
00060 #if __GNUC__
00061 # define ASM_INSTRUCT_COMPACKED(str) str
00062 # define ASM_INSTRUCT_EXTENDED(str) str
00063 #elif __ICCAVR32__
00064 # define ASM_INSTRUCT_COMPACKED(str) str":C"
00065 # define ASM_INSTRUCT_EXTENDED(str) str":E"
00066 #endif
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081 #define DSP16_DOTDIVISION_0(r_vect1, r_vect2, r_vect3)
00082
00083 #define DSP16_DOTDIVISION_1(r_vect1, r_vect2, r_vect3) \
00084 "ld.sh r2, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00085 "ld.sh r3, "ASTRINGZ(r_vect3)"[0x0]\n\t" \
00086 "lsl r2, "ASTRINGZ(DSP16_QB)"\n\t" \
00087 \
00088 "divs r0, r2, r3\n\t" \
00089 "st.h "ASTRINGZ(r_vect1)"[0x0], r0\n\t"
00090
00091 #define DSP16_DOTDIVISION_2(r_vect1, r_vect2, r_vect3) \
00092 "ld.sh r0, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00093 "ld.sh r1, "ASTRINGZ(r_vect3)"[0x0]\n\t" \
00094 "lsl r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00095 "divs r2, r0, r1\n\t" \
00096 \
00097 "ld.sh r0, "ASTRINGZ(r_vect2)"[0x2]\n\t" \
00098 "ld.sh r1, "ASTRINGZ(r_vect3)"[0x2]\n\t" \
00099 "lsl r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00100 "divs r4, r0, r1\n\t" \
00101 \
00102 "sthh.w r12[0x0], r2:b, r4:b\n\t"
00103
00104 #define DSP16_DOTDIVISION_3(r_vect1, r_vect2, r_vect3) \
00105 "ld.sh r0, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00106 "ld.sh r1, "ASTRINGZ(r_vect3)"[0x0]\n\t" \
00107 "lsl r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00108 "divs r2, r0, r1\n\t" \
00109 \
00110 "ld.sh r0, "ASTRINGZ(r_vect2)"[0x2]\n\t" \
00111 "ld.sh r1, "ASTRINGZ(r_vect3)"[0x2]\n\t" \
00112 "lsl r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00113 "divs r4, r0, r1\n\t" \
00114 \
00115 "sthh.w r12[0x0], r2:b, r4:b\n\t" \
00116 \
00117 "ld.sh r2, "ASTRINGZ(r_vect2)"[0x4]\n\t" \
00118 "ld.sh r3, "ASTRINGZ(r_vect3)"[0x4]\n\t" \
00119 "lsl r2, "ASTRINGZ(DSP16_QB)"\n\t" \
00120 \
00121 "divs r0, r2, r3\n\t" \
00122 "st.h "ASTRINGZ(r_vect1)"[0x4], r0\n\t"
00123
00124
00125
00126
00127
00128
00129
00130 #define DSP16_DOTDIV_END_KERNEL_X_FCT__(x_num, data) \
00131 static void TPASTE2(dsp16_vect_dotdiv_end_kernel_x, x_num)(dsp16_t *vect1, dsp16_t *vect2, dsp16_t *vect3) \
00132 { \
00133 __asm__ __volatile__ ( \
00134 "pushm r0-r7, lr\n\t" \
00135 TPASTE2(DSP16_DOTDIVISION_, x_num)(r12, r11, r10) \
00136 "popm r0-r7, pc\n\t" \
00137 ); \
00138 }
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00154 #if __GNUC__
00155 __attribute__((__naked__))
00156 __attribute__((__noinline__))
00157 #elif __ICCAVR32__
00158 # pragma shadow_registers=full
00159 # pragma optimize=none no_inline
00160 #endif
00161 static int dsp16_vect_dotdiv_kernel_ext(dsp16_t *vect1, dsp16_t *vect2, dsp16_t *vect3, int size)
00162 {
00163 __asm__ __volatile__ ( \
00164 "pushm r0-r7, lr\n\t" \
00165 \
00166 "mov lr, 0\n\t" \
00167 "sub r9, 3\n\t" \
00168 \
00169 "cp.h lr, r9\n\t" \
00170 ASM_INSTRUCT_COMPACKED("brge __dsp16_dotdiv_ext_end_loop")"\n" \
00171 \
00172 "__dsp16_dotdiv_ext_loop:\n\t" \
00173 \
00174 "ld.sh r0, r11[0x0]\n\t" \
00175 "ld.sh r1, r10[0x0]\n\t" \
00176 "lsl r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00177 "divs r2, r0, r1\n\t" \
00178 \
00179 "ld.sh r0, r11[0x2]\n\t" \
00180 "ld.sh r1, r10[0x2]\n\t" \
00181 "lsl r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00182 "divs r4, r0, r1\n\t" \
00183 \
00184 "sthh.w r12[lr << 1], r2:b, r4:b\n\t" \
00185 "sub lr, -2\n\t" \
00186 \
00187 "ld.sh r0, r11[0x4]\n\t" \
00188 "ld.sh r1, r10[0x4]\n\t" \
00189 "lsl r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00190 "divs r2, r0, r1\n\t" \
00191 \
00192 "ld.sh r0, r11[0x6]\n\t" \
00193 "ld.sh r1, r10[0x6]\n\t" \
00194 "lsl r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00195 "divs r4, r0, r1\n\t" \
00196 \
00197 "sub r10, -8\n\t" \
00198 "sub r11, -8\n\t" \
00199 \
00200 "sthh.w r12[lr << 1], r2:b, r4:b\n\t" \
00201 "sub lr, -2\n\t" \
00202 \
00203 "cp.h lr, r9\n\t" \
00204 ASM_INSTRUCT_COMPACKED("brlt __dsp16_dotdiv_ext_loop")"\n" \
00205 \
00206 "__dsp16_dotdiv_ext_end_loop:\n\t" \
00207 \
00208 "mov r12, lr\n\t" \
00209 "popm r0-r7, pc\n\t" \
00210 ); \
00211
00212 return 0;
00213 }
00214
00215
00216 DSP16_DOTDIV_END_KERNEL_X_FCT(0, "")
00217 DSP16_DOTDIV_END_KERNEL_X_FCT(1, "")
00218 DSP16_DOTDIV_END_KERNEL_X_FCT(2, "")
00219 DSP16_DOTDIV_END_KERNEL_X_FCT(3, "")
00220
00221 void dsp16_vect_dotdiv(dsp16_t *vect1, dsp16_t *vect2, dsp16_t *vect3, int size)
00222 {
00223 typedef void (*dotdiv_end_kernel_opti_t)(dsp16_t *, dsp16_t *, dsp16_t *);
00224 static const dotdiv_end_kernel_opti_t dotdiv_end_kernel_opti[4] = {
00225 dsp16_vect_dotdiv_end_kernel_x0,
00226 dsp16_vect_dotdiv_end_kernel_x1,
00227 dsp16_vect_dotdiv_end_kernel_x2,
00228 dsp16_vect_dotdiv_end_kernel_x3
00229 };
00230 int n;
00231
00232 n = dsp16_vect_dotdiv_kernel_ext(vect1, vect2, vect3, size);
00233
00234
00235 dotdiv_end_kernel_opti[size&0x3](&vect1[n], &vect2[n], &vect3[n]);
00236 }
00237
00238 #endif