00001
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "preprocessor.h"
00048
00049 #if !defined(FORCE_ALL_GENERICS) && \
00050 !defined(FORCE_GENERIC_VECT16_CONVPART) && \
00051 defined(TARGET_SPECIFIC_VECT16_CONVPART)
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065 #define DSP16_COMPUT_TAP_4x4_0(r_vect1, r_h, r_i, r_sum1, r_sum2, r_sum3, r_sum4) \
00066 "ld.d r4, --"ASTRINGZ(r_h)"\n\t" \
00067 "ld.d r6, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 1]\n\t" \
00068 \
00069 "machh.w "ASTRINGZ(r_sum1)", r4:b, r7:t\n\t" \
00070 "machh.w "ASTRINGZ(r_sum1)", r4:t, r7:b\n\t" \
00071 "machh.w "ASTRINGZ(r_sum1)", r5:b, r6:t\n\t" \
00072 "machh.w "ASTRINGZ(r_sum1)", r5:t, r6:b\n\t" \
00073 \
00074 "sub "ASTRINGZ(r_i)", -4\n\t" \
00075 \
00076 "ld.d r10, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 1]\n\t" \
00077 \
00078 "machh.w "ASTRINGZ(r_sum2)", r4:b, r7:b\n\t" \
00079 "machh.w "ASTRINGZ(r_sum2)", r4:t, r6:t\n\t" \
00080 "machh.w "ASTRINGZ(r_sum2)", r5:b, r6:b\n\t" \
00081 "machh.w "ASTRINGZ(r_sum2)", r5:t, r11:t\n\t" \
00082 \
00083 "machh.w "ASTRINGZ(r_sum3)", r4:b, r6:t\n\t" \
00084 "machh.w "ASTRINGZ(r_sum3)", r4:t, r6:b\n\t" \
00085 "machh.w "ASTRINGZ(r_sum3)", r5:b, r11:t\n\t" \
00086 "machh.w "ASTRINGZ(r_sum3)", r5:t, r11:b\n\t" \
00087 \
00088 "machh.w "ASTRINGZ(r_sum4)", r4:b, r6:b\n\t" \
00089 "machh.w "ASTRINGZ(r_sum4)", r4:t, r11:t\n\t" \
00090 "machh.w "ASTRINGZ(r_sum4)", r5:b, r11:b\n\t" \
00091 "machh.w "ASTRINGZ(r_sum4)", r5:t, r10:t\n\t"
00092
00093
00094 #define DSP16_COMPUT_TAP_4x4_1(r_vect1, r_h, r_i, r_sum1, r_sum2, r_sum3, r_sum4) \
00095 "ld.d r4, --"ASTRINGZ(r_h)"\n\t" \
00096 "ldins.h r5:t, "ASTRINGZ(r_h)"[8]\n\t" \
00097 "ld.d r6, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 1]\n\t" \
00098 \
00099 "machh.w "ASTRINGZ(r_sum1)", r5:t, r7:t\n\t" \
00100 "machh.w "ASTRINGZ(r_sum1)", r4:b, r7:b\n\t" \
00101 "machh.w "ASTRINGZ(r_sum1)", r4:t, r6:t\n\t" \
00102 "machh.w "ASTRINGZ(r_sum1)", r5:b, r6:b\n\t" \
00103 \
00104 "sub "ASTRINGZ(r_i)", -4\n\t" \
00105 \
00106 "ld.d r10, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 1]\n\t" \
00107 \
00108 "machh.w "ASTRINGZ(r_sum2)", r5:t, r7:b\n\t" \
00109 "machh.w "ASTRINGZ(r_sum2)", r4:b, r6:t\n\t" \
00110 "machh.w "ASTRINGZ(r_sum2)", r4:t, r6:b\n\t" \
00111 "machh.w "ASTRINGZ(r_sum2)", r5:b, r11:t\n\t" \
00112 \
00113 "machh.w "ASTRINGZ(r_sum3)", r5:t, r6:t\n\t" \
00114 "machh.w "ASTRINGZ(r_sum3)", r4:b, r6:b\n\t" \
00115 "machh.w "ASTRINGZ(r_sum3)", r4:t, r11:t\n\t" \
00116 "machh.w "ASTRINGZ(r_sum3)", r5:b, r11:b\n\t" \
00117 \
00118 "machh.w "ASTRINGZ(r_sum4)", r5:t, r6:b\n\t" \
00119 "machh.w "ASTRINGZ(r_sum4)", r4:b, r11:t\n\t" \
00120 "machh.w "ASTRINGZ(r_sum4)", r4:t, r11:b\n\t" \
00121 "machh.w "ASTRINGZ(r_sum4)", r5:b, r10:t\n\t"
00122
00123
00124 #define DSP16_COMPUT_TAP_4x4_2(r_vect1, r_h, r_i, r_sum1, r_sum2, r_sum3, r_sum4) \
00125 "ld.d r4, --"ASTRINGZ(r_h)"\n\t" \
00126 "ld.d r6, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 1]\n\t" \
00127 \
00128 "machh.w "ASTRINGZ(r_sum1)", r4:b, r7:t\n\t" \
00129 "machh.w "ASTRINGZ(r_sum1)", r4:t, r7:b\n\t" \
00130 "machh.w "ASTRINGZ(r_sum1)", r5:b, r6:t\n\t" \
00131 "machh.w "ASTRINGZ(r_sum1)", r5:t, r6:b\n\t" \
00132 \
00133 "sub "ASTRINGZ(r_i)", -4\n\t" \
00134 \
00135 "ld.d r10, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 1]\n\t" \
00136 \
00137 "machh.w "ASTRINGZ(r_sum2)", r4:b, r7:b\n\t" \
00138 "machh.w "ASTRINGZ(r_sum2)", r4:t, r6:t\n\t" \
00139 "machh.w "ASTRINGZ(r_sum2)", r5:b, r6:b\n\t" \
00140 "machh.w "ASTRINGZ(r_sum2)", r5:t, r11:t\n\t" \
00141 \
00142 "machh.w "ASTRINGZ(r_sum3)", r4:b, r6:t\n\t" \
00143 "machh.w "ASTRINGZ(r_sum3)", r4:t, r6:b\n\t" \
00144 "machh.w "ASTRINGZ(r_sum3)", r5:b, r11:t\n\t" \
00145 "machh.w "ASTRINGZ(r_sum3)", r5:t, r11:b\n\t" \
00146 \
00147 "machh.w "ASTRINGZ(r_sum4)", r4:b, r6:b\n\t" \
00148 "machh.w "ASTRINGZ(r_sum4)", r4:t, r11:t\n\t" \
00149 "machh.w "ASTRINGZ(r_sum4)", r5:b, r11:b\n\t" \
00150 "machh.w "ASTRINGZ(r_sum4)", r5:t, r10:t\n\t"
00151
00152
00153 #define DSP16_COMPUT_TAP_4x4_3(r_vect1, r_h, r_i, r_sum1, r_sum2, r_sum3, r_sum4) \
00154 "ld.d r4, --"ASTRINGZ(r_h)"\n\t" \
00155 "ldins.h r5:t, "ASTRINGZ(r_h)"[8]\n\t" \
00156 "ld.d r6, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 1]\n\t" \
00157 \
00158 "machh.w "ASTRINGZ(r_sum1)", r5:t, r7:t\n\t" \
00159 "machh.w "ASTRINGZ(r_sum1)", r4:b, r7:b\n\t" \
00160 "machh.w "ASTRINGZ(r_sum1)", r4:t, r6:t\n\t" \
00161 "machh.w "ASTRINGZ(r_sum1)", r5:b, r6:b\n\t" \
00162 \
00163 "sub "ASTRINGZ(r_i)", -4\n\t" \
00164 \
00165 "ld.d r10, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 1]\n\t" \
00166 \
00167 "machh.w "ASTRINGZ(r_sum2)", r5:t, r7:b\n\t" \
00168 "machh.w "ASTRINGZ(r_sum2)", r4:b, r6:t\n\t" \
00169 "machh.w "ASTRINGZ(r_sum2)", r4:t, r6:b\n\t" \
00170 "machh.w "ASTRINGZ(r_sum2)", r5:b, r11:t\n\t" \
00171 \
00172 "machh.w "ASTRINGZ(r_sum3)", r5:t, r6:t\n\t" \
00173 "machh.w "ASTRINGZ(r_sum3)", r4:b, r6:b\n\t" \
00174 "machh.w "ASTRINGZ(r_sum3)", r4:t, r11:t\n\t" \
00175 "machh.w "ASTRINGZ(r_sum3)", r5:b, r11:b\n\t" \
00176 \
00177 "machh.w "ASTRINGZ(r_sum4)", r5:t, r6:b\n\t" \
00178 "machh.w "ASTRINGZ(r_sum4)", r4:b, r11:t\n\t" \
00179 "machh.w "ASTRINGZ(r_sum4)", r4:t, r11:b\n\t" \
00180 "machh.w "ASTRINGZ(r_sum4)", r5:b, r10:t\n\t"
00181
00182
00183
00184
00185
00186
00187
00188
00189
00190
00191
00192
00193
00194
00195
00196
00197 #define DSP16_COMPUT_TAP_ENDING_0(r_vect1, r_h, r_i, r_sum1, r_sum2, r_sum3, r_sum4)
00198
00199 #define DSP16_COMPUT_TAP_ENDING_1(r_vect1, r_h, r_i, r_sum1, r_sum2, r_sum3, r_sum4) \
00200 "ld.sh r4, "ASTRINGZ(r_h)"[0x0]\n\t" \
00201 \
00202 "machh.w "ASTRINGZ(r_sum1)", r4:b, r11:t\n\t" \
00203 "machh.w "ASTRINGZ(r_sum2)", r4:b, r11:b\n\t" \
00204 "machh.w "ASTRINGZ(r_sum3)", r4:b, r10:t\n\t" \
00205 "machh.w "ASTRINGZ(r_sum4)", r4:b, r10:b\n\t"
00206
00207
00208 #define DSP16_COMPUT_TAP_ENDING_2(r_vect1, r_h, r_i, r_sum1, r_sum2, r_sum3, r_sum4) \
00209 \
00210 "ld.w r4, --"ASTRINGZ(r_h)"\n\t" \
00211 \
00212 "sub "ASTRINGZ(r_i)", -4\n\t" \
00213 \
00214 "ld.sh r6, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 1]\n\t" \
00215 \
00216 "machh.w "ASTRINGZ(r_sum1)", r4:b, r11:t\n\t" \
00217 "machh.w "ASTRINGZ(r_sum1)", r4:t, r11:b\n\t" \
00218 \
00219 "machh.w "ASTRINGZ(r_sum2)", r4:b, r11:b\n\t" \
00220 "machh.w "ASTRINGZ(r_sum2)", r4:t, r10:t\n\t" \
00221 \
00222 "machh.w "ASTRINGZ(r_sum3)", r4:b, r10:t\n\t" \
00223 "machh.w "ASTRINGZ(r_sum3)", r4:t, r10:b\n\t" \
00224 \
00225 "machh.w "ASTRINGZ(r_sum4)", r4:b, r10:b\n\t" \
00226 "machh.w "ASTRINGZ(r_sum4)", r4:t, r6:b\n\t"
00227
00228
00229 #define DSP16_COMPUT_TAP_ENDING_3(r_vect1, r_h, r_i, r_sum1, r_sum2, r_sum3, r_sum4) \
00230 \
00231 "sub "ASTRINGZ(r_h)", 4\n\t" \
00232 "ld.d r4, "ASTRINGZ(r_h)"\n\t" \
00233 \
00234 "sub "ASTRINGZ(r_i)", -4\n\t" \
00235 "ld.w r6, "ASTRINGZ(r_vect1)"["ASTRINGZ(r_i)" << 1]\n\t" \
00236 \
00237 "machh.w "ASTRINGZ(r_sum1)", r4:t, r11:t\n\t" \
00238 "machh.w "ASTRINGZ(r_sum1)", r5:b, r11:b\n\t" \
00239 "machh.w "ASTRINGZ(r_sum1)", r5:t, r10:t\n\t" \
00240 \
00241 "machh.w "ASTRINGZ(r_sum2)", r4:t, r11:b\n\t" \
00242 "machh.w "ASTRINGZ(r_sum2)", r5:b, r10:t\n\t" \
00243 "machh.w "ASTRINGZ(r_sum2)", r5:t, r10:b\n\t" \
00244 \
00245 "machh.w "ASTRINGZ(r_sum3)", r4:t, r10:t\n\t" \
00246 "machh.w "ASTRINGZ(r_sum3)", r5:b, r10:b\n\t" \
00247 "machh.w "ASTRINGZ(r_sum3)", r5:t, r6:t\n\t" \
00248 \
00249 "machh.w "ASTRINGZ(r_sum4)", r4:t, r10:b\n\t" \
00250 "machh.w "ASTRINGZ(r_sum4)", r5:b, r6:t\n\t" \
00251 "machh.w "ASTRINGZ(r_sum4)", r5:t, r6:b\n\t" \
00252
00253
00254 #if __GNUC__
00255 # define DSP16_CONVPART_KERNEL_X_FCT(x_num, data) __attribute__((__naked__)) DSP16_CONVPART_KERNEL_X_FCT__(x_num, data)
00256 #elif __ICCAVR32__
00257 # define DSP16_CONVPART_KERNEL_X_FCT(x_num, data) DSP16_CONVPART_KERNEL_X_FCT__(x_num, data)
00258 #endif
00259
00260 #if __GNUC__
00261 # define ASM_INSTRUCT_COMPACKED(str) str
00262 # define ASM_INSTRUCT_EXTENDED(str) str
00263 #elif __ICCAVR32__
00264 # define ASM_INSTRUCT_COMPACKED(str) str":C"
00265 # define ASM_INSTRUCT_EXTENDED(str) str":E"
00266 #endif
00267
00268
00269
00270
00271
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281
00282
00283
00284
00285
00286
00287
00288
00289
00290
00291
00292
00293
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303
00304
00305
00306
00307
00308 #define DSP16_CONVPART_KERNEL_X_FCT__(x_num, data) \
00309 static void TPASTE2(dsp16_vect_convpart_kernel_x, x_num)(dsp16_t *vect1, dsp16_t *vect2, int vect1_size, dsp16_t *vect3, int vect3_size) \
00310 { \
00311 __asm__ __volatile__ ( \
00312 "pushm r0-r7, lr\n\t" \
00313 "sub sp, 12\n\t" \
00314 \
00315 "mov r1, r11\n\t" \
00316 "mov r11, r12\n\t" \
00317 "mov r12, r1\n\t" \
00318 \
00319 "add r9, r9, r8 << 1\n\t" \
00320 "mov lr, 0x3\n\t" \
00321 "andn r9, lr\n\t" \
00322 "stdsp sp[0x08], r9\n\t" \
00323 \
00324 "sub r8, 3\n" \
00325 \
00326 "__dsp16_convpart_loop_main"ASTRINGZ(x_num)":\n\t" \
00327 "stdsp sp[0x04], r11\n\t" \
00328 "stdsp sp[0x00], r10\n\t" \
00329 "lddsp r9, sp[0x08]\n\t" \
00330 \
00331 "mov r0, 0\n\t" \
00332 "mov r1, r0\n\t" \
00333 "mov r2, r0\n\t" \
00334 "mov r3, r0\n\t" \
00335 "mov lr, r0\n" \
00336 \
00337 "__dsp16_convpart_loop_tap"ASTRINGZ(x_num)":\n\t" \
00338 \
00339 TPASTE2(DSP16_COMPUT_TAP_4x4_, x_num)(r12, r9, lr, r3, r2, r1, r0) \
00340 \
00341 "cp.h lr, r8\n\t" \
00342 ASM_INSTRUCT_COMPACKED("brlt __dsp16_convpart_loop_tap"ASTRINGZ(x_num))"\n\t" \
00343 \
00344 TPASTE2(DSP16_COMPUT_TAP_ENDING_, x_num)(r12, r9, lr, r3, r2, r1, r0) \
00345 \
00346 "asr r3, "ASTRINGZ(DSP16_QB)"\n\t" \
00347 "asr r2, "ASTRINGZ(DSP16_QB)"\n\t" \
00348 "asr r1, "ASTRINGZ(DSP16_QB)"\n\t" \
00349 "asr r0, "ASTRINGZ(DSP16_QB)"\n\t" \
00350 \
00351 "lddsp r11, sp[0x04]\n\t" \
00352 "sthh.w r11[0x0], r3:b, r2:b\n\t" \
00353 "sthh.w r11[0x4], r1:b, r0:b\n\t" \
00354 \
00355 "sub r11, -8\n\t" \
00356 "sub r12, -8\n\t" \
00357 \
00358 "lddsp r10, sp[0x00]\n\t" \
00359 "sub r10, 4\n\t" \
00360 "brgt __dsp16_convpart_loop_main"ASTRINGZ(x_num)"\n\t" \
00361 \
00362 "sub sp, -12\n\t" \
00363 "popm r0-r7, pc\n\t" \
00364 ); \
00365 }
00366
00367
00368 DSP16_CONVPART_KERNEL_X_FCT(0, "")
00369 DSP16_CONVPART_KERNEL_X_FCT(1, "")
00370 DSP16_CONVPART_KERNEL_X_FCT(2, "")
00371 DSP16_CONVPART_KERNEL_X_FCT(3, "")
00372
00373 void dsp16_vect_convpart(dsp16_t *vect1, dsp16_t *vect2, int vect2_size, dsp16_t *vect3, int vect3_size)
00374 {
00375 typedef void (*convpart_kernel_opti_t)(dsp16_t *, dsp16_t *, int, dsp16_t *, int);
00376 static const convpart_kernel_opti_t convpart_kernel_opti[4] = {
00377 dsp16_vect_convpart_kernel_x0,
00378 dsp16_vect_convpart_kernel_x1,
00379 dsp16_vect_convpart_kernel_x2,
00380 dsp16_vect_convpart_kernel_x3
00381 };
00382
00383
00384 convpart_kernel_opti[vect3_size&0x3](vect1, vect2, vect2_size - vect3_size + 1, vect3, vect3_size);
00385 }
00386
00387 #endif