00001
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075 #include "dsp.h"
00076
00077 #if !defined(FORCE_ALL_GENERICS) && \
00078 !defined(FORCE_GENERIC_TRANS16_REALCOMPLEXFFT) && \
00079 defined(TARGET_SPECIFIC_TRANS16_REALCOMPLEXFFT)
00080
00081 #include "trans_dsp16_twiddle_factors.h"
00082
00083 #if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00084
00085 #define M_BUTTERFLY4_COMPUT() \
00086 "ld.w r5, r4[0]\n\t" \
00087 "mov lr, -65536\n\t" \
00088 "ld.w r6, r4[r1 << 1]\n\t" \
00089 "mulnhh.w r12, r6:b, r0:b\n\t" \
00090 "machh.w r12, r6:t, r0:t\n\t" \
00091 "and r12, lr, r12 << "ASTRINGZ(16-DSP16_QB)"\n\t" \
00092 "mulhh.w r9, r6:t, r0:b\n\t" \
00093 "machh.w r9, r6:b, r0:t\n\t" \
00094 "or r6, r12, r9 >> "ASTRINGZ(DSP16_QB)"\n\t" \
00095 "ld.d r10, sp[0x1C]\n\t" \
00096 "ld.w r8, r4[r2]\n\t" \
00097 "mulnhh.w r12, r8:b, r10:b\n\t" \
00098 "machh.w r12, r8:t, r10:t\n\t" \
00099 "and r12, lr, r12 << "ASTRINGZ(16-DSP16_QB)"\n\t" \
00100 "mulhh.w r9, r8:t, r10:b\n\t" \
00101 "machh.w r9, r8:b, r10:t\n\t" \
00102 "or r8, r12, r9 >> "ASTRINGZ(DSP16_QB)"\n\t" \
00103 "ld.w r7, r4[r1]\n\t" \
00104 "mulnhh.w r12, r7:b, r11:b\n\t" \
00105 "machh.w r12, r7:t, r11:t\n\t" \
00106 "and r12, lr, r12 << "ASTRINGZ(16-DSP16_QB)"\n\t" \
00107 "mulhh.w r9, r7:t, r11:b\n\t" \
00108 "machh.w r9, r7:b, r11:t\n\t" \
00109 "or r7, r12, r9 >> "ASTRINGZ(DSP16_QB)"\n\t" \
00110 \
00111 "addhh.w r9, r5:t, r7:t\n\t" \
00112 "addhh.w r10, r6:t, r8:t\n\t" \
00113 "addhh.w r11, r5:b, r7:b\n\t" \
00114 "addhh.w r12, r6:b, r8:b\n\t" \
00115 \
00116 "add lr, r9, r10\n\t" \
00117 "add r3, r11, r12\n\t" \
00118 "asr lr, 2\n\t" \
00119 "asr r3, 2\n\t" \
00120 "sthh.w r4[0], lr:b, r3:b\n\t" \
00121 \
00122 "sub lr, r9, r10\n\t" \
00123 "sub r3, r11, r12\n\t" \
00124 "asr lr, 2\n\t" \
00125 "asr r3, 2\n\t" \
00126 "sthh.w r4[r1 << 1], lr:b, r3:b\n\t" \
00127 \
00128 "subhh.w r9, r5:t, r7:t\n\t" \
00129 "subhh.w r10, r6:t, r8:t\n\t" \
00130 "subhh.w r11, r5:b, r7:b\n\t" \
00131 "subhh.w r12, r6:b, r8:b\n\t" \
00132 \
00133 "add lr, r9, r12\n\t" \
00134 "sub r3, r11, r10\n\t" \
00135 "asr lr, 2\n\t" \
00136 "asr r3, 2\n\t" \
00137 "sthh.w r4[r1], lr:b, r3:b\n\t" \
00138 \
00139 "sub lr, r9, r12\n\t" \
00140 "add r3, r11, r10\n\t" \
00141 "asr lr, 2\n\t" \
00142 "asr r3, 2\n\t" \
00143 "sthh.w r4[r2], lr:b, r3:b\n\t"
00144
00145 #else
00146
00147 #define M_BUTTERFLY4_COMPUT() \
00148 "ld.w r7, r4[r1 << 1]\n\t" \
00149 "mulnhh.w r8, r7:b, r0:b\n\t" \
00150 "machh.w r8, r7:t, r0:t\n\t" \
00151 "mulhh.w r9, r7:t, r0:b\n\t" \
00152 "machh.w r9, r7:b, r0:t\n\t" \
00153 \
00154 "ld.d r6, sp[0x1C]\n\t" \
00155 "ld.w r3, r4[r2]\n\t" \
00156 "mulnhh.w r12, r3:b, r6:b\n\t" \
00157 "machh.w r12, r3:t, r6:t\n\t" \
00158 "mulhh.w lr, r3:t, r6:b\n\t" \
00159 "machh.w lr, r3:b, r6:t\n\t" \
00160 \
00161 "ld.w r3, r4[r1]\n\t" \
00162 "mulnhh.w r10, r3:b, r7:b\n\t" \
00163 "machh.w r10, r3:t, r7:t\n\t" \
00164 "mulhh.w r11, r3:t, r7:b\n\t" \
00165 "machh.w r11, r3:b, r7:t\n\t" \
00166 \
00167 "ld.w r5, r4[0]\n\t" \
00168 "asr r5, 2\n\t" \
00169 "bfexts r3, r5, 0, 14\n\t" \
00170 "bfins r5, r3, 0, 16\n\t" \
00171 \
00172 "add r3, r8, r12\n\t" \
00173 "sub r12, r8, r12\n\t" \
00174 "add r6, r9, lr\n\t" \
00175 "sub lr, r9, lr\n\t" \
00176 \
00177 "addhh.w r8, r5:t, r10:t\n\t" \
00178 "subhh.w r10, r5:t, r10:t\n\t" \
00179 "addhh.w r9, r5:b, r11:t\n\t" \
00180 "subhh.w r11, r5:b, r11:t\n\t" \
00181 \
00182 "addhh.w r7, r8:b, r3:t\n\t" \
00183 "addhh.w r5, r9:b, r6:t\n\t" \
00184 "sthh.w r4[0], r7:b, r5:b\n\t" \
00185 \
00186 "subhh.w r7, r8:b, r3:t\n\t" \
00187 "subhh.w r5, r9:b, r6:t\n\t" \
00188 "sthh.w r4[r1 << 1], r7:b, r5:b\n\t" \
00189 \
00190 "addhh.w r7, r10:b, lr:t\n\t" \
00191 "subhh.w r5, r11:b, r12:t\n\t" \
00192 "sthh.w r4[r1], r7:b, r5:b\n\t" \
00193 \
00194 "subhh.w r7, r10:b, lr:t\n\t" \
00195 "addhh.w r5, r11:b, r12:t\n\t" \
00196 "sthh.w r4[r2], r7:b, r5:b\n\t"
00197
00198 #endif
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217 #define M_BUTTERFLY4_ZERO_COMPUT() \
00218 "ld.w r5, r4[0]\n\t" \
00219 "ld.w r6, r4[r1 << 1]\n\t" \
00220 "ld.w r8, r4[r2]\n\t" \
00221 "ld.w r7, r4[r1]\n\t" \
00222 \
00223 "addhh.w r9, r5:t, r7:t\n\t" \
00224 "addhh.w r10, r6:t, r8:t\n\t" \
00225 "addhh.w r11, r5:b, r7:b\n\t" \
00226 "addhh.w r12, r6:b, r8:b\n\t" \
00227 \
00228 "add lr, r9, r10\n\t" \
00229 "add r3, r11, r12\n\t" \
00230 "asr lr, 2\n\t" \
00231 "asr r3, 2\n\t" \
00232 "sthh.w r4[0], lr:b, r3:b\n\t" \
00233 \
00234 "sub lr, r9, r10\n\t" \
00235 "sub r0, r11, r12\n\t" \
00236 "asr lr, 2\n\t" \
00237 "asr r0, 2\n\t" \
00238 "sthh.w r4[r1 << 1], lr:b, r0:b\n\t" \
00239 \
00240 "subhh.w r9, r5:t, r7:t\n\t" \
00241 "subhh.w r10, r6:t, r8:t\n\t" \
00242 "subhh.w r11, r5:b, r7:b\n\t" \
00243 "subhh.w r12, r6:b, r8:b\n\t" \
00244 \
00245 "add lr, r9, r12\n\t" \
00246 "sub r0, r11, r10\n\t" \
00247 "asr lr, 2\n\t" \
00248 "asr r0, 2\n\t" \
00249 "sthh.w r4[r1], lr:b, r0:b\n\t" \
00250 \
00251 "sub lr, r9, r12\n\t" \
00252 "add r0, r11, r10\n\t" \
00253 "asr lr, 2\n\t" \
00254 "asr r0, 2\n\t" \
00255 "sthh.w r4[r2], lr:b, r0:b\n\t"
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265
00266
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278 #if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00279
00280 # define M_BITREV_BUTTERFLY4_ZERO_COMPUT() \
00281 "lsl r9, r1, r2\n\t" \
00282 "brev r9\n\t" \
00283 \
00284 "ld.sh r5, r11[r9 << 1]\n\t" \
00285 "add r9, r12\n\t" \
00286 "ld.sh r6, r11[r9 << 1]\n\t" \
00287 "add r9, r12\n\t" \
00288 "ld.sh r7, r11[r9 << 1]\n\t" \
00289 "add r9, r12\n\t" \
00290 "ld.sh r8, r11[r9 << 1]\n\t" \
00291 \
00292 "add r9, r5, r7\n\t" \
00293 "add r10, r6, r8\n\t" \
00294 \
00295 "add lr, r9, r10\n\t" \
00296 "asr lr, 2\n\t" \
00297 "sthh.w r4[0], lr:b, r3:b\n\t" \
00298 \
00299 "sub lr, r9, r10\n\t" \
00300 "asr lr, 2\n\t" \
00301 "sthh.w r4[8], lr:b, r3:b\n\t" \
00302 \
00303 "sub lr, r5, r7\n\t" \
00304 \
00305 "asr lr, 2\n\t" \
00306 "sub r5, r8, r6\n\t" \
00307 "asr r5, 2\n\t" \
00308 "sthh.w r4[4], lr:b, r5:b\n\t" \
00309 \
00310 "sub r7, r6, r8\n\t" \
00311 "asr r7, 2\n\t" \
00312 "sthh.w r4[12], lr:b, r7:b\n\t"
00313
00314 #else
00315
00316 # define M_BITREV_BUTTERFLY4_ZERO_COMPUT() \
00317 "lsl r9, r1, r2\n\t" \
00318 "brev r9\n\t" \
00319 \
00320 "ld.sh r5, r11[r9 << 1]\n\t" \
00321 "add r9, r12\n\t" \
00322 "ld.sh r6, r11[r9 << 1]\n\t" \
00323 "add r9, r12\n\t" \
00324 "ld.sh r7, r11[r9 << 1]\n\t" \
00325 "add r9, r12\n\t" \
00326 "ld.sh r8, r11[r9 << 1]\n\t" \
00327 "asr r5, 2\n\t" \
00328 "asr r6, 2\n\t" \
00329 "asr r7, 2\n\t" \
00330 "asr r8, 2\n\t" \
00331 \
00332 "add r9, r5, r7\n\t" \
00333 "add r10, r6, r8\n\t" \
00334 \
00335 "add lr, r9, r10\n\t" \
00336 "sthh.w r4[0], lr:b, r3:b\n\t" \
00337 \
00338 "sub lr, r9, r10\n\t" \
00339 "sthh.w r4[8], lr:b, r3:b\n\t" \
00340 \
00341 "sub lr, r5, r7\n\t" \
00342 \
00343 "sub r5, r8, r6\n\t" \
00344 "sthh.w r4[4], lr:b, r5:b\n\t" \
00345 \
00346 "sub r7, r6, r8\n\t" \
00347 "sthh.w r4[12], lr:b, r7:b\n\t"
00348
00349 #endif
00350
00351
00352 #if (defined __GNUC__)
00353 # define LO "lo"
00354 # define HI "hi"
00355 #elif (defined __ICCAVR32__)
00356 # define LO "LWRD"
00357 # define HI "HWRD"
00358 #endif
00359
00360 #if (defined __GNUC__)
00361 # define ASM_INSTRUCT_COMPACKED(str) str
00362 # define ASM_INSTRUCT_EXTENDED(str) str
00363 #elif (defined __ICCAVR32__)
00364 # define ASM_INSTRUCT_COMPACKED(str) str":C"
00365 # define ASM_INSTRUCT_EXTENDED(str) str":E"
00366 #endif
00367
00369 #if (defined __GNUC__)
00370 __attribute__((__naked__))
00371 __attribute__((__noinline__))
00372 #elif (defined __ICCAVR32__)
00373 # pragma shadow_registers=full
00374 # pragma optimize=none no_inline
00375 #endif
00376 void dsp16_trans_realcomplexfft(dsp16_complex_t *vect1, dsp16_t *vect2, int nlog)
00377 {
00378
00379 #if __ICCAVR32__
00380
00381
00382 # pragma diag_suppress=Pe174
00383 dsp16_twiddle_factors;
00384 #if !(DSP_OPTIMIZATION & DSP_OPTI_SIZE)
00385 dsp16_twiddle_factors2;
00386 #endif
00387
00388 # pragma diag_default=Pe174
00389 #endif
00390
00391
00392
00393
00394
00395
00396
00397
00398
00399
00400
00401 __asm__ __volatile__ (
00402 "pushm r0-r7, lr\n\t"
00403 "sub sp, 36\n\t"
00404
00405
00406 "sub r0, r10, 2\n\t"
00407 "stdsp sp[0x00], r0\n\t"
00408
00409
00410 "mov r1, 1\n\t"
00411 "lsl r2, r1, r10\n\t"
00412
00413
00414 "mov r4, r12\n\t"
00415
00416 "stdsp sp[0x04], r4\n\t"
00417
00418
00419 "add r0, r4, r2 << 2\n\t"
00420 "stdsp sp[0x08], r0\n\t"
00421
00422
00423 "mov r1, "ASTRINGZ(DSP16_N_TWIDDLE_FACTORS/4)"\n\t"
00424 "stdsp sp[0x14], r1\n\t"
00425
00426
00427 "asr r12, r2, 2\n\t"
00428
00429 "mov r1, 0\n\t"
00430
00431 "mov r3, 0\n\t"
00432
00433 "rsub r2, r10, 32\n"
00434
00435
00436
00437
00438 "__loop_init:\n\t"
00439
00440
00441
00442 M_BITREV_BUTTERFLY4_ZERO_COMPUT()
00443
00444
00445
00446
00447 "sub r1, r1, -4\n\t"
00448
00449
00450 "sub r4, r4, -16\n\t"
00451 "cp.w r0, r4\n\t"
00452
00453 "brgt __loop_init\n\t"
00454
00455
00456
00457 "lddsp r4, sp[0x04]\n\t"
00458
00459 "mov r1, 4\n\t"
00460 "stdsp sp[0x18], r1\n"
00461
00462
00463 "__loop_stage:\n\t"
00464
00465 "lddsp r1, sp[0x18]\n\t"
00466
00467 "lsl r1, r1, 2\n\t"
00468 "stdsp sp[0x18], r1\n\t"
00469
00470
00471 "mov r0, 0\n\t"
00472 "stdsp sp[0x10], r0\n\t"
00473
00474
00475
00476
00477 "lddsp r4, sp[0x04]\n\t"
00478
00479
00480 "mul r2, r1, 3\n"
00481
00482
00483 "__loop_r1:\n\t"
00484
00485
00486
00487 M_BUTTERFLY4_ZERO_COMPUT()
00488
00489
00490
00491
00492
00493
00494
00495 "add r4, r4, r1 << 2\n\t"
00496
00497 "lddsp r8, sp[0x08]\n\t"
00498 "cp.w r8, r4\n\t"
00499
00500
00501 "brgt __loop_r1\n\t"
00502
00503
00504
00505
00506 "cp.w r1, 4\n\t"
00507 "brls __loop_stage_end\n\t"
00508
00509
00510
00511 "mov r9, 4\n\t"
00512
00513 "stdsp sp[0x0C], r9\n"
00514
00515
00516
00517
00518
00519 "__loop_j:\n\t"
00520
00521
00522
00523 "lddsp r4, sp[0x04]\n\t"
00524
00525
00526 "add r4, r9\n\t"
00527
00528
00529
00530 "lddsp r1, sp[0x14]\n\t"
00531
00532 "lddsp r9, sp[0x10]\n\t"
00533 "add r9, r1\n\t"
00534 "stdsp sp[0x10], r9\n\t"
00535
00536
00537 "mov r1, "LO"("ASTRINGZ(dsp16_twiddle_factors)")\n\t"
00538 "orh r1, "HI"("ASTRINGZ(dsp16_twiddle_factors)")\n\t"
00539 "ld.w r0, r1[r9]\n\t"
00540
00541 #if (DSP_OPTIMIZATION & DSP_OPTI_SIZE)
00542
00543
00544 "cp.w r9, "ASTRINGZ(DSP16_N_TWIDDLE_FACTORS/2)"\n\t"
00545 ASM_INSTRUCT_COMPACKED("brlt __address_indice_not_overflow")"\n\t"
00546
00547
00548 "mov r8, "ASTRINGZ(DSP16_N_TWIDDLE_FACTORS*2)"\n\t"
00549 "sub r10, r8, r9 << 1\n\t"
00550
00551
00552 "ld.w r8, r1[r10]\n\t"
00553
00554
00555 # if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00556 "lsr r7, r8, 16\n\t"
00557 "neg r7\n\t"
00558 "bfins r7, r8, 16, 16\n\t"
00559 "swap.h r7\n\t"
00560 # else
00561 "mov r7, r8\n\t"
00562 "neg r7\n\t"
00563 "bfins r7, r8, 0, 16\n\t"
00564 # endif
00565
00566
00567
00568 "cp.w r10, "ASTRINGZ((DSP16_N_TWIDDLE_FACTORS*2)/3)"\n\t"
00569 ASM_INSTRUCT_COMPACKED("brge __address_r_not_overflow1")"\n\t"
00570
00571
00572 "sub r10, r9, r10\n\t"
00573
00574 "ld.w r6, r1[r10]\n\t"
00575
00576
00577 # if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00578 "lsr r8, r6, 16\n\t"
00579 "neg r6\n\t"
00580 "neg r8\n\t"
00581 "bfins r6, r8, 16, 16\n\t"
00582 # else
00583 "neg r6\n\t"
00584 # endif
00585
00586 "bral __address_end_twiddle_factors\n"
00587 "__address_r_not_overflow1:\n\t"
00588
00589
00590 "sub r10, r10, r9\n\t"
00591
00592
00593 "ld.w r8, r1[r10]\n\t"
00594
00595
00596 # if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00597 "lsr r6, r8, 16\n\t"
00598 "neg r6\n\t"
00599 "bfins r6, r8, 16, 16\n\t"
00600 "swap.h r6\n\t"
00601 # else
00602 "mov r6, r8\n\t"
00603 "neg r6\n\t"
00604 "bfins r6, r8, 0, 16\n\t"
00605 # endif
00606 "bral __address_end_twiddle_factors\n"
00607 "__address_indice_not_overflow:\n\t"
00608
00609
00610 "ld.w r7, r1[r9 << 1]\n\t"
00611
00612
00613 "cp.w r9, "ASTRINGZ(DSP16_N_TWIDDLE_FACTORS/3)"\n\t"
00614
00615 ASM_INSTRUCT_COMPACKED("brlt __address_r_not_overflow2")"\n\t"
00616
00617
00618 "mov r8, "ASTRINGZ(DSP16_N_TWIDDLE_FACTORS*2)"\n\t"
00619 "sub r8, r8, r9\n\t"
00620 "sub r10, r8, r9 << 1\n\t"
00621
00622
00623 "ld.w r8, r1[r10]\n\t"
00624
00625 # if (DSP_OPTIMIZATION & DSP_OPTI_ACCURACY)
00626 "lsr r6, r8, 16\n\t"
00627 "neg r6\n\t"
00628 "bfins r6, r8, 16, 16\n\t"
00629 "swap.h r6\n\t"
00630 # else
00631 "mov r6, r8\n\t"
00632 "neg r6\n\t"
00633 "bfins r6, r8, 0, 16\n\t"
00634 # endif
00635
00636 "bral __address_end_twiddle_factors\n"
00637 "__address_r_not_overflow2:\n\t"
00638
00639 "add r10, r9, r9 << 1\n\t"
00640 "ld.w r6, r1[r10]\n"
00641
00642 "__address_end_twiddle_factors:\n\t"
00643
00644 "st.d sp[0x1C], r6\n\t"
00645 #else
00646
00647
00648 "mov r1, "LO"("ASTRINGZ(dsp16_twiddle_factors2)")\n\t"
00649 "orh r1, "HI"("ASTRINGZ(dsp16_twiddle_factors2)")\n\t"
00650
00651
00652
00653
00654 "ld.d r6, r1[r9 << 1]\n\t"
00655 "st.d sp[0x1C], r6\n\t"
00656
00657 #endif
00658
00659
00660 "lddsp r1, sp[0x18]\n"
00661
00662
00663 "__loop_r2:\n\t"
00664
00665
00666
00667 M_BUTTERFLY4_COMPUT()
00668
00669
00670
00671
00672
00673 "add r4, r4, r1 << 2\n\t"
00674
00675
00676 "lddsp r8, sp[0x08]\n\t"
00677 "cp.w r8, r4\n\t"
00678
00679 "brgt __loop_r2\n\t"
00680
00681
00682
00683
00684 "lddsp r9, sp[0x0C]\n\t"
00685 "sub r9, -4\n\t"
00686 "stdsp sp[0x0C], r9\n\t"
00687
00688 "lddsp r5, sp[0x18]\n\t"
00689
00690
00691 "cp.w r5, r9\n\t"
00692 "brgt __loop_j\n"
00693
00694 "__loop_stage_end:\n\t"
00695
00696
00697 "lddsp r1, sp[0x14]\n\t"
00698 "lsr r1, r1, 2\n\t"
00699 "stdsp sp[0x14], r1\n\t"
00700
00701
00702 "lddsp r1, sp[0x00]\n\t"
00703 "sub r1, 2\n\t"
00704 "stdsp sp[0x00], r1\n\t"
00705 "brgt __loop_stage\n"
00706
00707 "__address_end:\n\t"
00708 "sub sp, -36\n\t"
00709 "popm r0-r7, pc\n\t"
00710 #if (defined __GNUC__)
00711 :
00712 :
00713 # if !(DSP_OPTIMIZATION & DSP_OPTI_SIZE)
00714 [VAR_TWIDDLE_FACTORS] "i" (dsp16_twiddle_factors),
00715 [VAR_TWIDDLE_FACTORS2] "i" (dsp16_twiddle_factors2)
00716 # else
00717 [VAR_TWIDDLE_FACTORS] "i" (dsp16_twiddle_factors)
00718 # endif
00719 #endif
00720 );
00721 }
00722
00723 #endif