00001
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075 #include "dsp.h"
00076
00077 #if !defined(FORCE_ALL_GENERICS) && \
00078 !defined(FORCE_GENERIC_TRANS16_COMPLEXFFT) && \
00079 defined(TARGET_SPECIFIC_TRANS16_COMPLEXFFT)
00080
00081 #include "trans_dsp16_twiddle_factors.h"
00082
00083 #if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00084
00085 #define M_BUTTERFLY4_COMPUT() \
00086 "ld.w r5, r4[0]\n\t" \
00087 "mov lr, -65536\n\t" \
00088 "ld.w r6, r4[r1 << 1]\n\t" \
00089 "mulnhh.w r12, r6:b, r0:b\n\t" \
00090 "machh.w r12, r6:t, r0:t\n\t" \
00091 "and r12, lr, r12 << "ASTRINGZ(16-DSP16_QB)"\n\t" \
00092 "mulhh.w r9, r6:t, r0:b\n\t" \
00093 "machh.w r9, r6:b, r0:t\n\t" \
00094 "or r6, r12, r9 >> "ASTRINGZ(DSP16_QB)"\n\t" \
00095 "ld.d r10, sp[0x1C]\n\t" \
00096 "ld.w r8, r4[r2]\n\t" \
00097 "mulnhh.w r12, r8:b, r10:b\n\t" \
00098 "machh.w r12, r8:t, r10:t\n\t" \
00099 "and r12, lr, r12 << "ASTRINGZ(16-DSP16_QB)"\n\t" \
00100 "mulhh.w r9, r8:t, r10:b\n\t" \
00101 "machh.w r9, r8:b, r10:t\n\t" \
00102 "or r8, r12, r9 >> "ASTRINGZ(DSP16_QB)"\n\t" \
00103 "ld.w r7, r4[r1]\n\t" \
00104 "mulnhh.w r12, r7:b, r11:b\n\t" \
00105 "machh.w r12, r7:t, r11:t\n\t" \
00106 "and r12, lr, r12 << "ASTRINGZ(16-DSP16_QB)"\n\t" \
00107 "mulhh.w r9, r7:t, r11:b\n\t" \
00108 "machh.w r9, r7:b, r11:t\n\t" \
00109 "or r7, r12, r9 >> "ASTRINGZ(DSP16_QB)"\n\t" \
00110 \
00111 "addhh.w r9, r5:t, r7:t\n\t" \
00112 "addhh.w r10, r6:t, r8:t\n\t" \
00113 "addhh.w r11, r5:b, r7:b\n\t" \
00114 "addhh.w r12, r6:b, r8:b\n\t" \
00115 \
00116 "add lr, r9, r10\n\t" \
00117 "add r3, r11, r12\n\t" \
00118 "asr lr, 2\n\t" \
00119 "asr r3, 2\n\t" \
00120 "sthh.w r4[0], lr:b, r3:b\n\t" \
00121 \
00122 "sub lr, r9, r10\n\t" \
00123 "sub r3, r11, r12\n\t" \
00124 "asr lr, 2\n\t" \
00125 "asr r3, 2\n\t" \
00126 "sthh.w r4[r1 << 1], lr:b, r3:b\n\t" \
00127 \
00128 "subhh.w r9, r5:t, r7:t\n\t" \
00129 "subhh.w r10, r6:t, r8:t\n\t" \
00130 "subhh.w r11, r5:b, r7:b\n\t" \
00131 "subhh.w r12, r6:b, r8:b\n\t" \
00132 \
00133 "add lr, r9, r12\n\t" \
00134 "sub r3, r11, r10\n\t" \
00135 "asr lr, 2\n\t" \
00136 "asr r3, 2\n\t" \
00137 "sthh.w r4[r1], lr:b, r3:b\n\t" \
00138 \
00139 "sub lr, r9, r12\n\t" \
00140 "add r3, r11, r10\n\t" \
00141 "asr lr, 2\n\t" \
00142 "asr r3, 2\n\t" \
00143 "sthh.w r4[r2], lr:b, r3:b\n\t"
00144
00145 #else
00146
00147 #define M_BUTTERFLY4_COMPUT() \
00148 "ld.w r7, r4[r1 << 1]\n\t" \
00149 "mulnhh.w r8, r7:b, r0:b\n\t" \
00150 "machh.w r8, r7:t, r0:t\n\t" \
00151 "mulhh.w r9, r7:t, r0:b\n\t" \
00152 "machh.w r9, r7:b, r0:t\n\t" \
00153 \
00154 "ld.d r6, sp[0x1C]\n\t" \
00155 "ld.w r3, r4[r2]\n\t" \
00156 "mulnhh.w r12, r3:b, r6:b\n\t" \
00157 "machh.w r12, r3:t, r6:t\n\t" \
00158 "mulhh.w lr, r3:t, r6:b\n\t" \
00159 "machh.w lr, r3:b, r6:t\n\t" \
00160 \
00161 "ld.w r3, r4[r1]\n\t" \
00162 "mulnhh.w r10, r3:b, r7:b\n\t" \
00163 "machh.w r10, r3:t, r7:t\n\t" \
00164 "mulhh.w r11, r3:t, r7:b\n\t" \
00165 "machh.w r11, r3:b, r7:t\n\t" \
00166 \
00167 "ld.w r5, r4[0]\n\t" \
00168 "asr r5, 2\n\t" \
00169 "bfexts r3, r5, 0, 14\n\t" \
00170 "bfins r5, r3, 0, 16\n\t" \
00171 \
00172 "add r3, r8, r12\n\t" \
00173 "sub r12, r8, r12\n\t" \
00174 "add r6, r9, lr\n\t" \
00175 "sub lr, r9, lr\n\t" \
00176 \
00177 "addhh.w r8, r5:t, r10:t\n\t" \
00178 "subhh.w r10, r5:t, r10:t\n\t" \
00179 "addhh.w r9, r5:b, r11:t\n\t" \
00180 "subhh.w r11, r5:b, r11:t\n\t" \
00181 \
00182 "addhh.w r7, r8:b, r3:t\n\t" \
00183 "addhh.w r5, r9:b, r6:t\n\t" \
00184 "sthh.w r4[0], r7:b, r5:b\n\t" \
00185 \
00186 "subhh.w r7, r8:b, r3:t\n\t" \
00187 "subhh.w r5, r9:b, r6:t\n\t" \
00188 "sthh.w r4[r1 << 1], r7:b, r5:b\n\t" \
00189 \
00190 "addhh.w r7, r10:b, lr:t\n\t" \
00191 "subhh.w r5, r11:b, r12:t\n\t" \
00192 "sthh.w r4[r1], r7:b, r5:b\n\t" \
00193 \
00194 "subhh.w r7, r10:b, lr:t\n\t" \
00195 "addhh.w r5, r11:b, r12:t\n\t" \
00196 "sthh.w r4[r2], r7:b, r5:b\n\t"
00197
00198 #endif
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217 #define M_BUTTERFLY4_ZERO_COMPUT() \
00218 "ld.w r5, r4[0]\n\t" \
00219 "ld.w r6, r4[r1 << 1]\n\t" \
00220 "ld.w r8, r4[r2]\n\t" \
00221 "ld.w r7, r4[r1]\n\t" \
00222 \
00223 "addhh.w r9, r5:t, r7:t\n\t" \
00224 "addhh.w r10, r6:t, r8:t\n\t" \
00225 "addhh.w r11, r5:b, r7:b\n\t" \
00226 "addhh.w r12, r6:b, r8:b\n\t" \
00227 \
00228 "add lr, r9, r10\n\t" \
00229 "add r3, r11, r12\n\t" \
00230 "asr lr, 2\n\t" \
00231 "asr r3, 2\n\t" \
00232 "sthh.w r4[0], lr:b, r3:b\n\t" \
00233 \
00234 "sub lr, r9, r10\n\t" \
00235 "sub r0, r11, r12\n\t" \
00236 "asr lr, 2\n\t" \
00237 "asr r0, 2\n\t" \
00238 "sthh.w r4[r1 << 1], lr:b, r0:b\n\t" \
00239 \
00240 "subhh.w r9, r5:t, r7:t\n\t" \
00241 "subhh.w r10, r6:t, r8:t\n\t" \
00242 "subhh.w r11, r5:b, r7:b\n\t" \
00243 "subhh.w r12, r6:b, r8:b\n\t" \
00244 \
00245 "add lr, r9, r12\n\t" \
00246 "sub r0, r11, r10\n\t" \
00247 "asr lr, 2\n\t" \
00248 "asr r0, 2\n\t" \
00249 "sthh.w r4[r1], lr:b, r0:b\n\t" \
00250 \
00251 "sub lr, r9, r12\n\t" \
00252 "add r0, r11, r10\n\t" \
00253 "asr lr, 2\n\t" \
00254 "asr r0, 2\n\t" \
00255 "sthh.w r4[r2], lr:b, r0:b\n\t"
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279 #define M_BITREV_BUTTERFLY4_ZERO_COMPUT() \
00280 "lsl r9, r1, r2\n\t" \
00281 "brev r9\n\t" \
00282 \
00283 "ld.w r5, r11[r9 << 2]\n\t" \
00284 "add r9, r12\n\t" \
00285 "ld.w r6, r11[r9 << 2]\n\t" \
00286 "add r9, r12\n\t" \
00287 "ld.w r7, r11[r9 << 2]\n\t" \
00288 "add r9, r12\n\t" \
00289 "ld.w r8, r11[r9 << 2]\n\t" \
00290 \
00291 "addhh.w r11, r5:t, r7:t\n\t" \
00292 "addhh.w r12, r6:t, r8:t\n\t" \
00293 "addhh.w r9, r5:b, r7:b\n\t" \
00294 "addhh.w r10, r6:b, r8:b\n\t" \
00295 \
00296 "add lr, r9, r10\n\t" \
00297 "add r3, r11, r12\n\t" \
00298 "asr lr, 2\n\t" \
00299 "asr r3, 2\n\t" \
00300 "sthh.w r4[0], r3:b, lr:b\n\t" \
00301 \
00302 "sub lr, r9, r10\n\t" \
00303 "sub r3, r11, r12\n\t" \
00304 "asr lr, 2\n\t" \
00305 "asr r3, 2\n\t" \
00306 "sthh.w r4[8], r3:b, lr:b\n\t" \
00307 \
00308 "subhh.w r11, r5:t, r7:t\n\t" \
00309 "subhh.w r12, r6:t, r8:t\n\t" \
00310 "subhh.w r9, r5:b, r7:b\n\t" \
00311 "subhh.w r10, r6:b, r8:b\n\t" \
00312 \
00313 "sub lr, r9, r12\n\t" \
00314 "add r3, r11, r10\n\t" \
00315 "asr lr, 2\n\t" \
00316 "asr r3, 2\n\t" \
00317 "sthh.w r4[4], r3:b, lr:b\n\t" \
00318 \
00319 "add lr, r9, r12\n\t" \
00320 "sub r3, r11, r10\n\t" \
00321 "asr lr, 2\n\t" \
00322 "asr r3, 2\n\t" \
00323 "sthh.w r4[12], r3:b, lr:b\n\t"
00324
00325
00326 #if (defined __GNUC__)
00327 # define LO "lo"
00328 # define HI "hi"
00329 #elif (defined __ICCAVR32__)
00330 # define LO "LWRD"
00331 # define HI "HWRD"
00332 #endif
00333
00334 #if (defined __GNUC__)
00335 # define ASM_INSTRUCT_COMPACKED(str) str
00336 # define ASM_INSTRUCT_EXTENDED(str) str
00337 #elif (defined __ICCAVR32__)
00338 # define ASM_INSTRUCT_COMPACKED(str) str":C"
00339 # define ASM_INSTRUCT_EXTENDED(str) str":E"
00340 #endif
00341
00343 #if (defined __GNUC__)
00344 __attribute__((__naked__))
00345 __attribute__((__noinline__))
00346 #elif __ICCAVR32__
00347 # pragma shadow_registers=full
00348 # pragma optimize=none no_inline
00349 #endif
00350 void dsp16_trans_complexfft(dsp16_complex_t *vect1, dsp16_complex_t *vect2, int nlog)
00351 {
00352
00353 #if __ICCAVR32__
00354
00355
00356 # pragma diag_suppress=Pe174
00357 dsp16_twiddle_factors;
00358 #if !(DSP_OPTIMIZATION & DSP_OPTI_SIZE)
00359 dsp16_twiddle_factors2;
00360 #endif
00361
00362 # pragma diag_default=Pe174
00363 #endif
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375 __asm__ __volatile__ (
00376 "pushm r0-r7, lr\n\t"
00377 "sub sp, 36\n\t"
00378
00379
00380 "sub r0, r10, 2\n\t"
00381 "stdsp sp[0x00], r0\n\t"
00382
00383
00384 "mov r1, 1\n\t"
00385 "lsl r2, r1, r10\n\t"
00386
00387
00388 "mov r4, r12\n\t"
00389
00390 "stdsp sp[0x04], r4\n\t"
00391
00392
00393 "add r0, r4, r2 << 2\n\t"
00394 "stdsp sp[0x08], r0\n\t"
00395
00396
00397 "mov r1, "ASTRINGZ(DSP16_N_TWIDDLE_FACTORS/4)"\n\t"
00398 "stdsp sp[0x14], r1\n\t"
00399
00400
00401 "asr r12, r2, 2\n\t"
00402
00403 "mov r1, 0\n\t"
00404
00405 "mov r3, 0\n\t"
00406
00407 "rsub r2, r10, 32\n\t"
00408
00409
00410 "stdsp sp[0x10], r12\n\t"
00411 "stdsp sp[0x0C], r11\n"
00412
00413 "__loop_init:\n\t"
00414
00415 "lddsp r12, sp[0x10]\n\t"
00416 "lddsp r11, sp[0x0C]\n\t"
00417
00418
00419
00420 M_BITREV_BUTTERFLY4_ZERO_COMPUT()
00421
00422
00423
00424
00425 "sub r1, r1, -4\n\t"
00426
00427
00428 "sub r4, r4, -16\n\t"
00429 "cp.w r0, r4\n\t"
00430
00431 "brgt __loop_init\n\t"
00432
00433
00434
00435 "lddsp r4, sp[0x04]\n\t"
00436
00437 "mov r1, 4\n\t"
00438 "stdsp sp[0x18], r1\n"
00439
00440
00441 "__loop_stage:\n\t"
00442
00443 "lddsp r1, sp[0x18]\n\t"
00444
00445 "lsl r1, r1, 2\n\t"
00446 "stdsp sp[0x18], r1\n\t"
00447
00448
00449 "mov r0, 0\n\t"
00450 "stdsp sp[0x10], r0\n\t"
00451
00452
00453
00454
00455 "lddsp r4, sp[0x04]\n\t"
00456
00457
00458 "mul r2, r1, 3\n"
00459
00460
00461 "__loop_r1:\n\t"
00462
00463
00464
00465 M_BUTTERFLY4_ZERO_COMPUT()
00466
00467
00468
00469
00470
00471
00472
00473 "add r4, r4, r1 << 2\n\t"
00474
00475 "lddsp r8, sp[0x08]\n\t"
00476 "cp.w r8, r4\n\t"
00477
00478
00479 "brgt __loop_r1\n\t"
00480
00481
00482
00483
00484 "cp.w r1, 4\n\t"
00485 "brls __loop_stage_end\n\t"
00486
00487
00488
00489 "mov r9, 4\n\t"
00490
00491 "stdsp sp[0x0C], r9\n"
00492
00493
00494
00495
00496
00497 "__loop_j:\n\t"
00498
00499
00500
00501 "lddsp r4, sp[0x04]\n\t"
00502
00503
00504 "add r4, r9\n\t"
00505
00506
00507
00508 "lddsp r1, sp[0x14]\n\t"
00509
00510 "lddsp r9, sp[0x10]\n\t"
00511 "add r9, r1\n\t"
00512 "stdsp sp[0x10], r9\n\t"
00513
00514
00515 "mov r1, "LO"("ASTRINGZ(dsp16_twiddle_factors)")\n\t"
00516 "orh r1, "HI"("ASTRINGZ(dsp16_twiddle_factors)")\n\t"
00517 "ld.w r0, r1[r9]\n\t"
00518
00519 #if (DSP_OPTIMIZATION & DSP_OPTI_SIZE)
00520
00521
00522 "cp.w r9, "ASTRINGZ(DSP16_N_TWIDDLE_FACTORS/2)"\n\t"
00523 ASM_INSTRUCT_COMPACKED("brlt __address_indice_not_overflow")"\n\t"
00524
00525
00526 "mov r8, "ASTRINGZ(DSP16_N_TWIDDLE_FACTORS*2)"\n\t"
00527 "sub r10, r8, r9 << 1\n\t"
00528
00529
00530 "ld.w r8, r1[r10]\n\t"
00531
00532
00533 # if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00534 "lsr r7, r8, 16\n\t"
00535 "neg r7\n\t"
00536 "bfins r7, r8, 16, 16\n\t"
00537 "swap.h r7\n\t"
00538 # else
00539 "mov r7, r8\n\t"
00540 "neg r7\n\t"
00541 "bfins r7, r8, 0, 16\n\t"
00542 # endif
00543
00544
00545
00546 "cp.w r10, "ASTRINGZ((DSP16_N_TWIDDLE_FACTORS*2)/3)"\n\t"
00547 ASM_INSTRUCT_COMPACKED("brge __address_r_not_overflow1")"\n\t"
00548
00549
00550 "sub r10, r9, r10\n\t"
00551
00552 "ld.w r6, r1[r10]\n\t"
00553
00554
00555 # if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00556 "lsr r8, r6, 16\n\t"
00557 "neg r6\n\t"
00558 "neg r8\n\t"
00559 "bfins r6, r8, 16, 16\n\t"
00560 # else
00561 "neg r6\n\t"
00562 # endif
00563
00564 "bral __address_end_twiddle_factors\n"
00565 "__address_r_not_overflow1:\n\t"
00566
00567
00568 "sub r10, r10, r9\n\t"
00569
00570
00571 "ld.w r8, r1[r10]\n\t"
00572
00573
00574 # if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00575 "lsr r6, r8, 16\n\t"
00576 "neg r6\n\t"
00577 "bfins r6, r8, 16, 16\n\t"
00578 "swap.h r6\n\t"
00579 # else
00580 "mov r6, r8\n\t"
00581 "neg r6\n\t"
00582 "bfins r6, r8, 0, 16\n\t"
00583 # endif
00584 "bral __address_end_twiddle_factors\n"
00585 "__address_indice_not_overflow:\n\t"
00586
00587
00588 "ld.w r7, r1[r9 << 1]\n\t"
00589
00590
00591 "cp.w r9, "ASTRINGZ(DSP16_N_TWIDDLE_FACTORS/3)"\n\t"
00592
00593 ASM_INSTRUCT_COMPACKED("brlt __address_r_not_overflow2")"\n\t"
00594
00595
00596 "mov r8, "ASTRINGZ(DSP16_N_TWIDDLE_FACTORS*2)"\n\t"
00597 "sub r8, r8, r9\n\t"
00598 "sub r10, r8, r9 << 1\n\t"
00599
00600
00601 "ld.w r8, r1[r10]\n\t"
00602
00603 # if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00604 "lsr r6, r8, 16\n\t"
00605 "neg r6\n\t"
00606 "bfins r6, r8, 16, 16\n\t"
00607 "swap.h r6\n\t"
00608 # else
00609 "mov r6, r8\n\t"
00610 "neg r6\n\t"
00611 "bfins r6, r8, 0, 16\n\t"
00612 # endif
00613
00614 "bral __address_end_twiddle_factors\n"
00615 "__address_r_not_overflow2:\n\t"
00616
00617 "add r10, r9, r9 << 1\n\t"
00618 "ld.w r6, r1[r10]\n"
00619
00620 "__address_end_twiddle_factors:\n\t"
00621
00622 "st.d sp[0x1C], r6\n\t"
00623 #else
00624
00625
00626 "mov r1, "LO"("ASTRINGZ(dsp16_twiddle_factors2)")\n\t"
00627 "orh r1, "HI"("ASTRINGZ(dsp16_twiddle_factors2)")\n\t"
00628
00629
00630
00631
00632 "ld.d r6, r1[r9 << 1]\n\t"
00633 "st.d sp[0x1C], r6\n\t"
00634
00635 #endif
00636
00637
00638 "lddsp r1, sp[0x18]\n"
00639
00640
00641 "__loop_r2:\n\t"
00642
00643
00644
00645 M_BUTTERFLY4_COMPUT()
00646
00647
00648
00649
00650
00651 "add r4, r4, r1 << 2\n\t"
00652
00653
00654 "lddsp r8, sp[0x08]\n\t"
00655 "cp.w r8, r4\n\t"
00656
00657 "brgt __loop_r2\n\t"
00658
00659
00660
00661
00662 "lddsp r9, sp[0x0C]\n\t"
00663 "sub r9, -4\n\t"
00664 "stdsp sp[0x0C], r9\n\t"
00665
00666 "lddsp r5, sp[0x18]\n\t"
00667
00668
00669 "cp.w r5, r9\n\t"
00670 "brgt __loop_j\n"
00671
00672 "__loop_stage_end:\n\t"
00673
00674
00675 "lddsp r1, sp[0x14]\n\t"
00676 "lsr r1, r1, 2\n\t"
00677 "stdsp sp[0x14], r1\n\t"
00678
00679
00680 "lddsp r1, sp[0x00]\n\t"
00681 "sub r1, 2\n\t"
00682 "stdsp sp[0x00], r1\n\t"
00683 "brgt __loop_stage\n"
00684
00685 "__address_end:\n\t"
00686 "sub sp, -36\n\t"
00687 "popm r0-r7, pc\n\t"
00688 #if (defined __GNUC__)
00689 :
00690 :
00691 # if !(DSP_OPTIMIZATION & DSP_OPTI_SIZE)
00692 [VAR_TWIDDLE_FACTORS] "i" (dsp16_twiddle_factors),
00693 [VAR_TWIDDLE_FACTORS2] "i" (dsp16_twiddle_factors2)
00694 # else
00695 [VAR_TWIDDLE_FACTORS] "i" (dsp16_twiddle_factors)
00696 # endif
00697 #endif
00698 );
00699 }
00700
00701 #endif