00001
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "preprocessor.h"
00048
00049 #if !defined(FORCE_ALL_GENERICS) && \
00050 !defined(FORCE_GENERIC_VECT32_ADD) && \
00051 defined(TARGET_SPECIFIC_VECT32_ADD)
00052
00053 #if __GNUC__
00054 # define DSP32_ADD_END_KERNEL_X_FCT(x_num, data) __attribute__((__naked__)) DSP32_ADD_END_KERNEL_X_FCT__(x_num, data)
00055 #elif __ICCAVR32__
00056 # define DSP32_ADD_END_KERNEL_X_FCT(x_num, data) DSP32_ADD_END_KERNEL_X_FCT__(x_num, data)
00057 #endif
00058
00059
00060 #if __GNUC__
00061 # define ASM_INSTRUCT_COMPACKED(str) str
00062 # define ASM_INSTRUCT_EXTENDED(str) str
00063 #elif __ICCAVR32__
00064 # define ASM_INSTRUCT_COMPACKED(str) str":C"
00065 # define ASM_INSTRUCT_EXTENDED(str) str":E"
00066 #endif
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080 #define DSP32_ADDITION_0(r_vect1, r_vect2, r_vect3)
00081
00082 #define DSP32_ADDITION_1(r_vect1, r_vect2, r_vect3) \
00083 "ld.w r2, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00084 "ld.w r3, "ASTRINGZ(r_vect3)"[0x0]\n\t" \
00085 \
00086 "add r1, r2, r3\n\t" \
00087 "st.w "ASTRINGZ(r_vect1)"[0x0], r1\n\t"
00088
00089 #define DSP32_ADDITION_2(r_vect1, r_vect2, r_vect3) \
00090 "ld.d r0, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00091 "ld.d r2, "ASTRINGZ(r_vect3)"[0x0]\n\t" \
00092 \
00093 "add r5, r1, r3\n\t" \
00094 "add r4, r0, r2\n\t" \
00095 "st.d "ASTRINGZ(r_vect1)"[0x0], r4\n\t"
00096
00097 #define DSP32_ADDITION_3(r_vect1, r_vect2, r_vect3) \
00098 "ld.d r0, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00099 "ld.d r2, "ASTRINGZ(r_vect3)"[0x0]\n\t" \
00100 \
00101 "add r5, r1, r3\n\t" \
00102 "add r4, r0, r2\n\t" \
00103 "st.d "ASTRINGZ(r_vect1)"[0x0], r4\n\t" \
00104 \
00105 "ld.w r2, "ASTRINGZ(r_vect2)"[0x8]\n\t" \
00106 "ld.w r3, "ASTRINGZ(r_vect3)"[0x8]\n\t" \
00107 \
00108 "add r1, r2, r3\n\t" \
00109 "st.w "ASTRINGZ(r_vect1)"[0x8], r1\n\t"
00110
00111
00112
00113
00114
00115
00116 #define DSP32_ADD_END_KERNEL_X_FCT__(x_num, data) \
00117 static void TPASTE2(dsp32_vect_add_end_kernel_x, x_num)(dsp32_t *vect1, dsp32_t *vect2, dsp32_t *vect3) \
00118 { \
00119 __asm__ __volatile__ ( \
00120 "pushm r0-r7, lr\n\t" \
00121 TPASTE2(DSP32_ADDITION_, x_num)(r12, r11, r10) \
00122 "popm r0-r7, pc\n\t" \
00123 ); \
00124 }
00125
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00140 #if __GNUC__
00141 __attribute__((__naked__))
00142 __attribute__((__noinline__))
00143 #elif __ICCAVR32__
00144 # pragma shadow_registers=full
00145 # pragma optimize=none no_inline
00146 #endif
00147 static int dsp32_vect_add_kernel_ext(dsp32_t *vect1, dsp32_t *vect2, dsp32_t *vect3, int size)
00148 {
00149 __asm__ __volatile__ ( \
00150 "pushm r0-r7, lr\n\t" \
00151 \
00152 "mov lr, 0\n\t" \
00153 "sub r9, 3\n\t" \
00154 \
00155 "cp.h lr, r9\n\t" \
00156 ASM_INSTRUCT_COMPACKED("brge __dsp32_add_ext_end_loop")"\n" \
00157 \
00158 "__dsp32_add_ext_loop:\n\t" \
00159 \
00160 "ld.d r0, r11[lr << 2]\n\t" \
00161 "ld.d r2, r10[lr << 2]\n\t" \
00162 \
00163 "add r5, r1, r3\n\t" \
00164 "add r4, r0, r2\n\t" \
00165 "st.d r12[lr << 2], r4\n\t" \
00166 \
00167 "sub lr, -2\n\t" \
00168 \
00169 "ld.d r0, r11[lr << 2]\n\t" \
00170 "ld.d r2, r10[lr << 2]\n\t" \
00171 \
00172 "add r5, r1, r3\n\t" \
00173 "add r4, r0, r2\n\t" \
00174 "st.d r12[lr << 2], r4\n\t" \
00175 \
00176 "sub lr, -2\n\t" \
00177 \
00178 "cp.h lr, r9\n\t" \
00179 ASM_INSTRUCT_COMPACKED("brlt __dsp32_add_ext_loop")"\n" \
00180 \
00181 "__dsp32_add_ext_end_loop:\n\t" \
00182 \
00183 "mov r12, lr\n\t" \
00184 "popm r0-r7, pc\n\t" \
00185 ); \
00186
00187 return 0;
00188 }
00189
00190
00191 DSP32_ADD_END_KERNEL_X_FCT(0, "")
00192 DSP32_ADD_END_KERNEL_X_FCT(1, "")
00193 DSP32_ADD_END_KERNEL_X_FCT(2, "")
00194 DSP32_ADD_END_KERNEL_X_FCT(3, "")
00195
00196 void dsp32_vect_add(dsp32_t *vect1, dsp32_t *vect2, dsp32_t *vect3, int size)
00197 {
00198 typedef void (*add_end_kernel_opti_t)(dsp32_t *, dsp32_t *, dsp32_t *);
00199 static const add_end_kernel_opti_t add_end_kernel_opti[4] = {
00200 dsp32_vect_add_end_kernel_x0,
00201 dsp32_vect_add_end_kernel_x1,
00202 dsp32_vect_add_end_kernel_x2,
00203 dsp32_vect_add_end_kernel_x3
00204 };
00205 int n;
00206
00207 n = dsp32_vect_add_kernel_ext(vect1, vect2, vect3, size);
00208
00209
00210 add_end_kernel_opti[size&0x3](&vect1[n], &vect2[n], &vect3[n]);
00211 }
00212
00213 #endif