00001
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "preprocessor.h"
00048
00049 #if !defined(FORCE_ALL_GENERICS) && \
00050 !defined(FORCE_GENERIC_VECT32_CONVPART) && \
00051 defined(TARGET_SPECIFIC_VECT32_CONVPART)
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063 #define DSP32_COMPUT_TAP_8(r_vect1, r_h, r_i, r_sum1) \
00064 "sub "ASTRINGZ(r_h)", 32\n\t" \
00065 "add r2, "ASTRINGZ(r_vect1)", "ASTRINGZ(r_i)" << 2\n\t" \
00066 \
00067 "ld.d r4, "ASTRINGZ(r_h)"[24]\n\t" \
00068 "ld.d r6, r2[0]\n\t" \
00069 \
00070 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00071 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00072 \
00073 "ld.d r4, "ASTRINGZ(r_h)"[16]\n\t" \
00074 "ld.d r6, r2[8]\n\t" \
00075 \
00076 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00077 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00078 \
00079 "ld.d r4, "ASTRINGZ(r_h)"[8]\n\t" \
00080 "ld.d r6, r2[16]\n\t" \
00081 \
00082 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00083 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00084 \
00085 "ld.d r4, "ASTRINGZ(r_h)"[0]\n\t" \
00086 "ld.d r6, r2[24]\n\t" \
00087 \
00088 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00089 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00090 \
00091 "sub "ASTRINGZ(r_i)", -8\n\t"
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103 #define DSP32_COMPUT_TAP_ENDING_0(r_vect1, r_h, r_i, r_sum1)
00104
00105 #define DSP32_COMPUT_TAP_ENDING_1(r_vect1, r_h, r_i, r_sum1) \
00106 "ld.w r4, --"ASTRINGZ(r_h)"\n\t" \
00107 "ld.w r6, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 2]\n\t" \
00108 \
00109 "macs.d "ASTRINGZ(r_sum1)", r4, r6\n\t"
00110
00111 #define DSP32_COMPUT_TAP_ENDING_2(r_vect1, r_h, r_i, r_sum1) \
00112 "ld.d r4, --"ASTRINGZ(r_h)"\n\t" \
00113 "ld.d r6, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 2]\n\t" \
00114 \
00115 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00116 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t"
00117
00118
00119 #define DSP32_COMPUT_TAP_ENDING_3(r_vect1, r_h, r_i, r_sum1) \
00120 "sub "ASTRINGZ(r_h)", 12\n\t" \
00121 "add r2, "ASTRINGZ(r_vect1)", "ASTRINGZ(r_i)" << 2\n\t" \
00122 \
00123 "ld.d r4, "ASTRINGZ(r_h)"[4]\n\t" \
00124 "ld.d r6, r2[0]\n\t" \
00125 \
00126 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00127 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00128 \
00129 "ld.w r4, "ASTRINGZ(r_h)"[0]\n\t" \
00130 "ld.w r6, r2[8]\n\t" \
00131 \
00132 "macs.d "ASTRINGZ(r_sum1)", r4, r6\n\t"
00133
00134
00135 #define DSP32_COMPUT_TAP_ENDING_4(r_vect1, r_h, r_i, r_sum1) \
00136 "sub "ASTRINGZ(r_h)", 16\n\t" \
00137 "add r2, "ASTRINGZ(r_vect1)", "ASTRINGZ(r_i)" << 2\n\t" \
00138 \
00139 "ld.d r4, "ASTRINGZ(r_h)"[8]\n\t" \
00140 "ld.d r6, r2[0]\n\t" \
00141 \
00142 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00143 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00144 \
00145 "ld.d r4, "ASTRINGZ(r_h)"[0]\n\t" \
00146 "ld.d r6, r2[8]\n\t" \
00147 \
00148 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00149 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t"
00150
00151
00152 #define DSP32_COMPUT_TAP_ENDING_5(r_vect1, r_h, r_i, r_sum1) \
00153 "sub "ASTRINGZ(r_h)", 20\n\t" \
00154 "add r2, "ASTRINGZ(r_vect1)", "ASTRINGZ(r_i)" << 2\n\t" \
00155 \
00156 "ld.d r4, "ASTRINGZ(r_h)"[12]\n\t" \
00157 "ld.d r6, r2[0]\n\t" \
00158 \
00159 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00160 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00161 \
00162 "ld.d r4, "ASTRINGZ(r_h)"[4]\n\t" \
00163 "ld.d r6, r2[8]\n\t" \
00164 \
00165 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00166 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00167 \
00168 "ld.w r4, "ASTRINGZ(r_h)"[0]\n\t" \
00169 "ld.w r6, r2[16]\n\t" \
00170 \
00171 "macs.d "ASTRINGZ(r_sum1)", r4, r6\n\t"
00172
00173 #define DSP32_COMPUT_TAP_ENDING_6(r_vect1, r_h, r_i, r_sum1) \
00174 "sub "ASTRINGZ(r_h)", 24\n\t" \
00175 "add r2, "ASTRINGZ(r_vect1)", "ASTRINGZ(r_i)" << 2\n\t" \
00176 \
00177 "ld.d r4, "ASTRINGZ(r_h)"[16]\n\t" \
00178 "ld.d r6, r2[0]\n\t" \
00179 \
00180 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00181 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00182 \
00183 "ld.d r4, "ASTRINGZ(r_h)"[8]\n\t" \
00184 "ld.d r6, r2[8]\n\t" \
00185 \
00186 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00187 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00188 \
00189 "ld.d r4, "ASTRINGZ(r_h)"[0]\n\t" \
00190 "ld.d r6, r2[16]\n\t" \
00191 \
00192 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00193 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t"
00194
00195 #define DSP32_COMPUT_TAP_ENDING_7(r_vect1, r_h, r_i, r_sum1) \
00196 "sub "ASTRINGZ(r_h)", 28\n\t" \
00197 "add r2, "ASTRINGZ(r_vect1)", "ASTRINGZ(r_i)" << 2\n\t" \
00198 \
00199 "ld.d r4, "ASTRINGZ(r_h)"[20]\n\t" \
00200 "ld.d r6, r2[0]\n\t" \
00201 \
00202 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00203 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00204 \
00205 "ld.d r4, "ASTRINGZ(r_h)"[12]\n\t" \
00206 "ld.d r6, r2[8]\n\t" \
00207 \
00208 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00209 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00210 \
00211 "ld.d r4, "ASTRINGZ(r_h)"[4]\n\t" \
00212 "ld.d r6, r2[16]\n\t" \
00213 \
00214 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00215 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00216 \
00217 "ld.w r4, "ASTRINGZ(r_h)"[0]\n\t" \
00218 "ld.w r6, r2[24]\n\t" \
00219 \
00220 "macs.d "ASTRINGZ(r_sum1)", r4, r6\n\t"
00221
00222
00223 #if __GNUC__
00224 # define DSP32_CONVPART_KERNEL_X_FCT(x_num, data) __attribute__((__naked__)) DSP32_CONVPART_KERNEL_X_FCT__(x_num, data)
00225 #elif __ICCAVR32__
00226 # define DSP32_CONVPART_KERNEL_X_FCT(x_num, data) DSP32_CONVPART_KERNEL_X_FCT__(x_num, data)
00227 #endif
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259 #if __GNUC__
00260 # define ASM_INSTRUCT_COMPACKED(str) str
00261 # define ASM_INSTRUCT_EXTENDED(str) str
00262 #elif __ICCAVR32__
00263 # define ASM_INSTRUCT_COMPACKED(str) str":C"
00264 # define ASM_INSTRUCT_EXTENDED(str) str":E"
00265 #endif
00266
00267 #define DSP32_CONVPART_KERNEL_X_FCT__(x_num, data) \
00268 static void TPASTE2(dsp32_vect_convpart_kernel_x, x_num)(dsp32_t *vect1, dsp32_t *vect2, int vect1_size, dsp32_t *vect3, int vect3_size) \
00269 { \
00270 __asm__ __volatile__ ( \
00271 "pushm r0-r7, lr\n\t" \
00272 "sub sp, 12\n\t" \
00273 \
00274 "mov r1, r11\n\t" \
00275 "mov r11, r12\n\t" \
00276 "mov r12, r1\n\t" \
00277 \
00278 "add r9, r9, r8 << 2\n\t" \
00279 "stdsp sp[0x08], r9\n\t" \
00280 \
00281 "sub r8, 7\n" \
00282 \
00283 "__dsp32_convpart_loop_main"ASTRINGZ(x_num)":\n\t" \
00284 "stdsp sp[0x04], r11\n\t" \
00285 "stdsp sp[0x00], r10\n\t" \
00286 "lddsp r9, sp[0x08]\n\t" \
00287 \
00288 "mov r0, 0\n\t" \
00289 "mov r1, 0\n\t" \
00290 "mov lr, r0\n" \
00291 \
00292 "__dsp32_convpart_loop_tap"ASTRINGZ(x_num)":\n\t" \
00293 \
00294 DSP32_COMPUT_TAP_8(r12, r9, lr, r0) \
00295 \
00296 "cp.h lr, r8\n\t" \
00297 ASM_INSTRUCT_COMPACKED("brlt __dsp32_convpart_loop_tap"ASTRINGZ(x_num))"\n\t" \
00298 \
00299 TPASTE2(DSP32_COMPUT_TAP_ENDING_, x_num)(r12, r9, lr, r0) \
00300 \
00301 "lsr r0, "ASTRINGZ(DSP32_QB)"\n\t" \
00302 "bfins r0, r1, "ASTRINGZ(32-DSP32_QB)", "ASTRINGZ(DSP32_QB)"\n\t" \
00303 \
00304 "lddsp r11, sp[0x04]\n\t" \
00305 "st.w r11++, r0\n\t" \
00306 \
00307 "sub r12, -4\n\t" \
00308 \
00309 "lddsp r10, sp[0x00]\n\t" \
00310 "sub r10, 1\n\t" \
00311 "brgt __dsp32_convpart_loop_main"ASTRINGZ(x_num)"\n\t" \
00312 \
00313 "sub sp, -12\n\t" \
00314 "popm r0-r7, pc\n\t" \
00315 ); \
00316 }
00317
00318 DSP32_CONVPART_KERNEL_X_FCT(0, )
00319 DSP32_CONVPART_KERNEL_X_FCT(1, )
00320 DSP32_CONVPART_KERNEL_X_FCT(2, )
00321 DSP32_CONVPART_KERNEL_X_FCT(3, )
00322 DSP32_CONVPART_KERNEL_X_FCT(4, )
00323 DSP32_CONVPART_KERNEL_X_FCT(5, )
00324 DSP32_CONVPART_KERNEL_X_FCT(6, )
00325 DSP32_CONVPART_KERNEL_X_FCT(7, )
00326
00327 void dsp32_vect_convpart(dsp32_t *vect1, dsp32_t *vect2, int vect2_size, dsp32_t *vect3, int vect3_size)
00328 {
00329 typedef void (*convpart_kernel_opti_t)(dsp32_t *, dsp32_t *, int, dsp32_t *, int);
00330 static const convpart_kernel_opti_t convpart_kernel_opti[8] = {
00331 dsp32_vect_convpart_kernel_x0,
00332 dsp32_vect_convpart_kernel_x1,
00333 dsp32_vect_convpart_kernel_x2,
00334 dsp32_vect_convpart_kernel_x3,
00335 dsp32_vect_convpart_kernel_x4,
00336 dsp32_vect_convpart_kernel_x5,
00337 dsp32_vect_convpart_kernel_x6,
00338 dsp32_vect_convpart_kernel_x7
00339 };
00340
00341
00342 convpart_kernel_opti[vect3_size&0x7](vect1, vect2, vect2_size - vect3_size + 1, vect3, vect3_size);
00343 }
00344
00345 #endif