00001
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "preprocessor.h"
00048
00049 #if !defined(FORCE_ALL_GENERICS) && \
00050 !defined(FORCE_GENERIC_FILT32_IIRPART) && \
00051 defined(TARGET_SPECIFIC_FILT32_IIRPART)
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063 #define DSP32_COMPUT_TAP_8(r_vect1, r_h, r_i, r_sum1) \
00064 "sub "ASTRINGZ(r_h)", 32\n\t" \
00065 "add r2, "ASTRINGZ(r_vect1)", "ASTRINGZ(r_i)" << 2\n\t" \
00066 \
00067 "ld.d r4, "ASTRINGZ(r_h)"[24]\n\t" \
00068 "ld.d r6, r2[0]\n\t" \
00069 \
00070 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00071 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00072 \
00073 "ld.d r4, "ASTRINGZ(r_h)"[16]\n\t" \
00074 "ld.d r6, r2[8]\n\t" \
00075 \
00076 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00077 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00078 \
00079 "ld.d r4, "ASTRINGZ(r_h)"[8]\n\t" \
00080 "ld.d r6, r2[16]\n\t" \
00081 \
00082 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00083 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00084 \
00085 "ld.d r4, "ASTRINGZ(r_h)"[0]\n\t" \
00086 "ld.d r6, r2[24]\n\t" \
00087 \
00088 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00089 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00090 \
00091 "sub "ASTRINGZ(r_i)", -8\n\t"
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102 #define DSP32_COMPUT_TAP_ENDING_0(r_vect1, r_h, r_i, r_sum1)
00103
00104 #define DSP32_COMPUT_TAP_ENDING_1(r_vect1, r_h, r_i, r_sum1) \
00105 "ld.w r4, --"ASTRINGZ(r_h)"\n\t" \
00106 "ld.w r6, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 2]\n\t" \
00107 \
00108 "macs.d "ASTRINGZ(r_sum1)", r4, r6\n\t"
00109
00110 #define DSP32_COMPUT_TAP_ENDING_2(r_vect1, r_h, r_i, r_sum1) \
00111 "ld.d r4, --"ASTRINGZ(r_h)"\n\t" \
00112 "ld.d r6, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 2]\n\t" \
00113 \
00114 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00115 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t"
00116
00117
00118 #define DSP32_COMPUT_TAP_ENDING_3(r_vect1, r_h, r_i, r_sum1) \
00119 "sub "ASTRINGZ(r_h)", 12\n\t" \
00120 "add r2, "ASTRINGZ(r_vect1)", "ASTRINGZ(r_i)" << 2\n\t" \
00121 \
00122 "ld.d r4, "ASTRINGZ(r_h)"[4]\n\t" \
00123 "ld.d r6, r2[0]\n\t" \
00124 \
00125 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00126 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00127 \
00128 "ld.w r4, "ASTRINGZ(r_h)"[0]\n\t" \
00129 "ld.w r6, r2[8]\n\t" \
00130 \
00131 "macs.d "ASTRINGZ(r_sum1)", r4, r6\n\t"
00132
00133
00134 #define DSP32_COMPUT_TAP_ENDING_4(r_vect1, r_h, r_i, r_sum1) \
00135 "sub "ASTRINGZ(r_h)", 16\n\t" \
00136 "add r2, "ASTRINGZ(r_vect1)", "ASTRINGZ(r_i)" << 2\n\t" \
00137 \
00138 "ld.d r4, "ASTRINGZ(r_h)"[8]\n\t" \
00139 "ld.d r6, r2[0]\n\t" \
00140 \
00141 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00142 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00143 \
00144 "ld.d r4, "ASTRINGZ(r_h)"[0]\n\t" \
00145 "ld.d r6, r2[8]\n\t" \
00146 \
00147 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00148 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t"
00149
00150
00151 #define DSP32_COMPUT_TAP_ENDING_5(r_vect1, r_h, r_i, r_sum1) \
00152 "sub "ASTRINGZ(r_h)", 20\n\t" \
00153 "add r2, "ASTRINGZ(r_vect1)", "ASTRINGZ(r_i)" << 2\n\t" \
00154 \
00155 "ld.d r4, "ASTRINGZ(r_h)"[12]\n\t" \
00156 "ld.d r6, r2[0]\n\t" \
00157 \
00158 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00159 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00160 \
00161 "ld.d r4, "ASTRINGZ(r_h)"[4]\n\t" \
00162 "ld.d r6, r2[8]\n\t" \
00163 \
00164 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00165 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00166 \
00167 "ld.w r4, "ASTRINGZ(r_h)"[0]\n\t" \
00168 "ld.w r6, r2[16]\n\t" \
00169 \
00170 "macs.d "ASTRINGZ(r_sum1)", r4, r6\n\t"
00171
00172 #define DSP32_COMPUT_TAP_ENDING_6(r_vect1, r_h, r_i, r_sum1) \
00173 "sub "ASTRINGZ(r_h)", 24\n\t" \
00174 "add r2, "ASTRINGZ(r_vect1)", "ASTRINGZ(r_i)" << 2\n\t" \
00175 \
00176 "ld.d r4, "ASTRINGZ(r_h)"[16]\n\t" \
00177 "ld.d r6, r2[0]\n\t" \
00178 \
00179 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00180 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00181 \
00182 "ld.d r4, "ASTRINGZ(r_h)"[8]\n\t" \
00183 "ld.d r6, r2[8]\n\t" \
00184 \
00185 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00186 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00187 \
00188 "ld.d r4, "ASTRINGZ(r_h)"[0]\n\t" \
00189 "ld.d r6, r2[16]\n\t" \
00190 \
00191 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00192 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t"
00193
00194 #define DSP32_COMPUT_TAP_ENDING_7(r_vect1, r_h, r_i, r_sum1) \
00195 "sub "ASTRINGZ(r_h)", 28\n\t" \
00196 "add r2, "ASTRINGZ(r_vect1)", "ASTRINGZ(r_i)" << 2\n\t" \
00197 \
00198 "ld.d r4, "ASTRINGZ(r_h)"[20]\n\t" \
00199 "ld.d r6, r2[0]\n\t" \
00200 \
00201 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00202 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00203 \
00204 "ld.d r4, "ASTRINGZ(r_h)"[12]\n\t" \
00205 "ld.d r6, r2[8]\n\t" \
00206 \
00207 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00208 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00209 \
00210 "ld.d r4, "ASTRINGZ(r_h)"[4]\n\t" \
00211 "ld.d r6, r2[16]\n\t" \
00212 \
00213 "macs.d "ASTRINGZ(r_sum1)", r5, r6\n\t" \
00214 "macs.d "ASTRINGZ(r_sum1)", r4, r7\n\t" \
00215 \
00216 "ld.w r4, "ASTRINGZ(r_h)"[0]\n\t" \
00217 "ld.w r6, r2[24]\n\t" \
00218 \
00219 "macs.d "ASTRINGZ(r_sum1)", r4, r6\n\t"
00220
00221
00222 #if __GNUC__
00223 # define DSP32_IIRPART_NUM_KERNEL_X_FCT(x_num, data) __attribute__((__naked__)) DSP32_IIRPART_NUM_KERNEL_X_FCT__(x_num, data)
00224 # define DSP32_IIRPART_DEN_KERNEL_X_FCT(x_num, data) __attribute__((__naked__)) DSP32_IIRPART_DEN_KERNEL_X_FCT__(x_num, data)
00225 #elif __ICCAVR32__
00226 # define DSP32_IIRPART_NUM_KERNEL_X_FCT(x_num, data) DSP32_IIRPART_NUM_KERNEL_X_FCT__(x_num, data)
00227 # define DSP32_IIRPART_DEN_KERNEL_X_FCT(x_num, data) DSP32_IIRPART_DEN_KERNEL_X_FCT__(x_num, data)
00228 #endif
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260 #if __GNUC__
00261 # define ASM_INSTRUCT_COMPACKED(str) str
00262 # define ASM_INSTRUCT_EXTENDED(str) str
00263 #elif __ICCAVR32__
00264 # define ASM_INSTRUCT_COMPACKED(str) str":C"
00265 # define ASM_INSTRUCT_EXTENDED(str) str":E"
00266 #endif
00267
00268 #define DSP32_IIRPART_NUM_KERNEL_X_FCT__(x_num, data) \
00269 static void TPASTE2(dsp32_filt_iirpart_num_kernel_x, x_num)(dsp32_t *vect1, dsp32_t *vect2, int vect1_size, dsp32_t *vect3, int vect3_size, int prediv) \
00270 { \
00271 __asm__ __volatile__ ( \
00272 "pushm r0-r7, lr\n\t" \
00273 "sub sp, 12\n\t" \
00274 \
00275 "lddsp r4, sp[48]\n\t" \
00276 "rsub r4, r4, "ASTRINGZ(DSP32_QB)"\n\t" \
00277 "stdsp sp[48], r4\n\t" \
00278 \
00279 "mov r1, r11\n\t" \
00280 "mov r11, r12\n\t" \
00281 "mov r12, r1\n\t" \
00282 \
00283 "add r9, r9, r8 << 2\n\t" \
00284 "stdsp sp[0x08], r9\n\t" \
00285 \
00286 "sub r8, 7\n" \
00287 \
00288 "__dsp32_iirpart_num_loop_main"ASTRINGZ(x_num)":\n\t" \
00289 "stdsp sp[0x04], r11\n\t" \
00290 "stdsp sp[0x00], r10\n\t" \
00291 "lddsp r9, sp[0x08]\n\t" \
00292 \
00293 "mov r0, 0\n\t" \
00294 "mov r1, 0\n\t" \
00295 "mov lr, r0\n" \
00296 \
00297 "__dsp32_iirpart_num_loop_tap"ASTRINGZ(x_num)":\n\t" \
00298 \
00299 "cp.h lr, r8\n\t" \
00300 ASM_INSTRUCT_COMPACKED("brge __dsp32_iirpart_num_endloop_tap"ASTRINGZ(x_num))"\n\t" \
00301 \
00302 DSP32_COMPUT_TAP_8(r12, r9, lr, r0) \
00303 \
00304 "bral __dsp32_iirpart_num_loop_tap"ASTRINGZ(x_num)"\n" \
00305 "__dsp32_iirpart_num_endloop_tap"ASTRINGZ(x_num)":\n\t" \
00306 \
00307 TPASTE2(DSP32_COMPUT_TAP_ENDING_, x_num)(r12, r9, lr, r0) \
00308 \
00309 "lddsp r4, sp[48]\n\t" \
00310 "rsub r11, r4, 32\n\t" \
00311 "lsr r0, r0, r4\n\t" \
00312 "lsl r1, r1, r11\n\t" \
00313 "or r0, r1\n\t" \
00314 \
00315 "lddsp r11, sp[0x04]\n\t" \
00316 "st.w r11++, r0\n\t" \
00317 \
00318 "sub r12, -4\n\t" \
00319 \
00320 "lddsp r10, sp[0x00]\n\t" \
00321 "sub r10, 1\n\t" \
00322 "brgt __dsp32_iirpart_num_loop_main"ASTRINGZ(x_num)"\n\t" \
00323 \
00324 "sub sp, -12\n\t" \
00325 "popm r0-r7, pc\n\t" \
00326 ); \
00327 }
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349
00350
00351
00352
00353
00354
00355
00356
00357
00358 #define DSP32_IIRPART_DEN_KERNEL_X_FCT__(x_num, data) \
00359 static void TPASTE2(dsp32_filt_iirpart_den_kernel_x, x_num)(dsp32_t *vect1, dsp32_t *vect2, int vect1_size, dsp32_t *vect3, int vect3_size, int prediv) \
00360 { \
00361 __asm__ __volatile__ ( \
00362 "pushm r0-r7, lr\n\t" \
00363 "sub sp, 12\n\t" \
00364 \
00365 "mov r1, r11\n\t" \
00366 "mov r11, r12\n\t" \
00367 "mov r12, r1\n\t" \
00368 \
00369 "lddsp r3, sp[48]\n\t" \
00370 "rsub r3, r3, "ASTRINGZ(DSP32_QB)"\n\t" \
00371 \
00372 "add r9, r9, r8 << 2\n\t" \
00373 "stdsp sp[0x08], r9\n\t" \
00374 \
00375 "sub r8, 7\n" \
00376 \
00377 "__dsp32_iirpart_den_loop_main"ASTRINGZ(x_num)":\n\t" \
00378 "stdsp sp[0x04], r11\n\t" \
00379 "stdsp sp[0x00], r10\n\t" \
00380 "lddsp r9, sp[0x08]\n\t" \
00381 \
00382 "mov r0, 0\n\t" \
00383 "mov r1, 0\n\t" \
00384 "mov lr, r0\n" \
00385 \
00386 "__dsp32_iirpart_den_loop_tap"ASTRINGZ(x_num)":\n\t" \
00387 \
00388 "cp.h lr, r8\n\t" \
00389 ASM_INSTRUCT_COMPACKED("brge __dsp32_iirpart_den_endloop_tap"ASTRINGZ(x_num))"\n\t" \
00390 \
00391 DSP32_COMPUT_TAP_8(r12, r9, lr, r0) \
00392 \
00393 "bral __dsp32_iirpart_den_loop_tap"ASTRINGZ(x_num)"\n" \
00394 "__dsp32_iirpart_den_endloop_tap"ASTRINGZ(x_num)":\n\t" \
00395 \
00396 TPASTE2(DSP32_COMPUT_TAP_ENDING_, x_num)(r12, r9, lr, r0) \
00397 \
00398 "lsr r0, r0, r3\n\t" \
00399 "rsub r2, r3, 32\n\t" \
00400 "lsl r1, r1, r2\n\t" \
00401 "or r0, r1\n\t" \
00402 \
00403 "lddsp r11, sp[0x04]\n\t" \
00404 "ld.w r2, r11[0x0]\n\t" \
00405 "sub r2, r0\n\t" \
00406 "st.w r11++, r2\n\t" \
00407 \
00408 "sub r12, -4\n\t" \
00409 \
00410 "lddsp r10, sp[0x00]\n\t" \
00411 "sub r10, 1\n\t" \
00412 "brgt __dsp32_iirpart_den_loop_main"ASTRINGZ(x_num)"\n\t" \
00413 \
00414 "sub sp, -12\n\t" \
00415 "popm r0-r7, pc\n\t" \
00416 ); \
00417 }
00418
00419
00420
00421 DSP32_IIRPART_NUM_KERNEL_X_FCT(0, )
00422 DSP32_IIRPART_NUM_KERNEL_X_FCT(1, )
00423 DSP32_IIRPART_NUM_KERNEL_X_FCT(2, )
00424 DSP32_IIRPART_NUM_KERNEL_X_FCT(3, )
00425 DSP32_IIRPART_NUM_KERNEL_X_FCT(4, )
00426 DSP32_IIRPART_NUM_KERNEL_X_FCT(5, )
00427 DSP32_IIRPART_NUM_KERNEL_X_FCT(6, )
00428 DSP32_IIRPART_NUM_KERNEL_X_FCT(7, )
00429
00430
00431 DSP32_IIRPART_DEN_KERNEL_X_FCT(0, )
00432 DSP32_IIRPART_DEN_KERNEL_X_FCT(1, )
00433 DSP32_IIRPART_DEN_KERNEL_X_FCT(2, )
00434 DSP32_IIRPART_DEN_KERNEL_X_FCT(3, )
00435 DSP32_IIRPART_DEN_KERNEL_X_FCT(4, )
00436 DSP32_IIRPART_DEN_KERNEL_X_FCT(5, )
00437 DSP32_IIRPART_DEN_KERNEL_X_FCT(6, )
00438 DSP32_IIRPART_DEN_KERNEL_X_FCT(7, )
00439
00440 void dsp32_filt_iirpart(dsp32_t *vect1, dsp32_t *vect2, int size, dsp32_t *num, int num_size, dsp32_t *den, int den_size, int num_prediv, int den_prediv)
00441 {
00442 typedef void (*iirpart_kernel_opti_num_t)(dsp32_t *, dsp32_t *, int, dsp32_t *, int, int);
00443 static const iirpart_kernel_opti_num_t iirpart_kernel_opti_num[8] = {
00444 dsp32_filt_iirpart_num_kernel_x0,
00445 dsp32_filt_iirpart_num_kernel_x1,
00446 dsp32_filt_iirpart_num_kernel_x2,
00447 dsp32_filt_iirpart_num_kernel_x3,
00448 dsp32_filt_iirpart_num_kernel_x4,
00449 dsp32_filt_iirpart_num_kernel_x5,
00450 dsp32_filt_iirpart_num_kernel_x6,
00451 dsp32_filt_iirpart_num_kernel_x7
00452 };
00453 typedef void (*iirpart_kernel_opti_den_t)(dsp32_t *, dsp32_t *, int, dsp32_t *, int, int);
00454 static const iirpart_kernel_opti_den_t iirpart_kernel_opti_den[8] = {
00455 dsp32_filt_iirpart_den_kernel_x0,
00456 dsp32_filt_iirpart_den_kernel_x1,
00457 dsp32_filt_iirpart_den_kernel_x2,
00458 dsp32_filt_iirpart_den_kernel_x3,
00459 dsp32_filt_iirpart_den_kernel_x4,
00460 dsp32_filt_iirpart_den_kernel_x5,
00461 dsp32_filt_iirpart_den_kernel_x6,
00462 dsp32_filt_iirpart_den_kernel_x7
00463 };
00464 int n, m;
00465 long long sum;
00466
00467
00468 iirpart_kernel_opti_num[num_size&0x7](vect1, vect2, size - num_size + 1, num, num_size, num_prediv);
00469
00470
00471
00472 for(n=1; n<den_size; n++)
00473 {
00474 sum = 0;
00475 for(m=0; m<n; m++)
00476 sum += ((S64) den[m])*((S64) vect1[n-m-1]);
00477 vect1[n] -= (sum >> (DSP32_QB - den_prediv));
00478 }
00479
00480 iirpart_kernel_opti_den[den_size&0x7](&vect1[n], vect1 + 1, size - num_size - den_size, den, den_size, den_prediv);
00481 }
00482
00483 #endif