00001
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047 #include "preprocessor.h"
00048
00049 #if !defined(FORCE_ALL_GENERICS) && \
00050 !defined(FORCE_GENERIC_VECT16_SUB) && \
00051 defined(TARGET_SPECIFIC_VECT16_SUB)
00052
00053 #if __GNUC__
00054 # define DSP16_SUB_END_KERNEL_X_FCT(x_num, data) __attribute__((__naked__)) DSP16_SUB_END_KERNEL_X_FCT__(x_num, data)
00055 #elif __ICCAVR32__
00056 # define DSP16_SUB_END_KERNEL_X_FCT(x_num, data) DSP16_SUB_END_KERNEL_X_FCT__(x_num, data)
00057 #endif
00058
00059
00060 #if __GNUC__
00061 # define ASM_INSTRUCT_COMPACKED(str) str
00062 # define ASM_INSTRUCT_EXTENDED(str) str
00063 #elif __ICCAVR32__
00064 # define ASM_INSTRUCT_COMPACKED(str) str":C"
00065 # define ASM_INSTRUCT_EXTENDED(str) str":E"
00066 #endif
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081 #define DSP16_SUBTRACTION_0(r_vect1, r_vect2, r_vect3)
00082
00083 #define DSP16_SUBTRACTION_1(r_vect1, r_vect2, r_vect3) \
00084 "ld.sh r2, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00085 "ld.sh r3, "ASTRINGZ(r_vect3)"[0x0]\n\t" \
00086 \
00087 "sub r1, r2, r3\n\t" \
00088 "st.h "ASTRINGZ(r_vect1)"[0x0], r1\n\t"
00089
00090 #define DSP16_SUBTRACTION_2(r_vect1, r_vect2, r_vect3) \
00091 "ld.w r2, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00092 "ld.w r3, "ASTRINGZ(r_vect3)"[0x0]\n\t" \
00093 \
00094 "subhh.w r1, r2:b, r3:b\n\t" \
00095 "subhh.w r0, r2:t, r3:t\n\t" \
00096 "sthh.w "ASTRINGZ(r_vect1)"[0x0], r0:b, r1:b\n\t"
00097
00098 #define DSP16_SUBTRACTION_3(r_vect1, r_vect2, r_vect3) \
00099 "ld.w r2, "ASTRINGZ(r_vect2)"[0x0]\n\t" \
00100 "ld.w r3, "ASTRINGZ(r_vect3)"[0x0]\n\t" \
00101 \
00102 "subhh.w r1, r2:b, r3:b\n\t" \
00103 "subhh.w r0, r2:t, r3:t\n\t" \
00104 "sthh.w "ASTRINGZ(r_vect1)"[0x0], r0:b, r1:b\n\t" \
00105 \
00106 "ld.sh r2, "ASTRINGZ(r_vect2)"[0x4]\n\t" \
00107 "ld.sh r3, "ASTRINGZ(r_vect3)"[0x4]\n\t" \
00108 \
00109 "sub r1, r2, r3\n\t" \
00110 "st.h "ASTRINGZ(r_vect1)"[0x4], r1\n\t"
00111
00112
00113
00114
00115
00116
00117 #define DSP16_SUB_END_KERNEL_X_FCT__(x_num, data) \
00118 static void TPASTE2(dsp16_vect_sub_end_kernel_x, x_num)(dsp16_t *vect1, dsp16_t *vect2, dsp16_t *vect3) \
00119 { \
00120 __asm__ __volatile__ ( \
00121 "pushm r0-r3, lr\n\t" \
00122 TPASTE2(DSP16_SUBTRACTION_, x_num)(r12, r11, r10) \
00123 "popm r0-r3, pc\n\t" \
00124 ); \
00125 }
00126
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00141 #if __GNUC__
00142 __attribute__((__naked__))
00143 __attribute__((__noinline__))
00144 #elif __ICCAVR32__
00145 # pragma shadow_registers=full
00146 # pragma optimize=none no_inline
00147 #endif
00148 static int dsp16_vect_sub_kernel_ext(dsp16_t *vect1, dsp16_t *vect2, dsp16_t *vect3, int size)
00149 {
00150 __asm__ __volatile__ ( \
00151 "pushm r0-r7, lr\n\t" \
00152 \
00153 "mov lr, 0\n\t" \
00154 "sub r9, 3\n\t" \
00155 \
00156 "cp.h lr, r9\n\t" \
00157 ASM_INSTRUCT_COMPACKED("brge __dsp16_sub_ext_end_loop")"\n" \
00158 \
00159 "__dsp16_sub_ext_loop:\n\t" \
00160 \
00161 "ld.d r0, r11[lr << 1]\n\t" \
00162 "ld.d r2, r10[lr << 1]\n\t" \
00163 \
00164 "subhh.w r4, r1:b, r3:b\n\t" \
00165 "subhh.w r5, r1:t, r3:t\n\t" \
00166 "sthh.w r12[lr << 1], r5:b, r4:b\n\t" \
00167 \
00168 "sub lr, -2\n\t" \
00169 "subhh.w r4, r0:b, r2:b\n\t" \
00170 "subhh.w r5, r0:t, r2:t\n\t" \
00171 "sthh.w r12[lr << 1], r5:b, r4:b\n\t" \
00172 \
00173 "sub lr, -2\n\t" \
00174 \
00175 "cp.h lr, r9\n\t" \
00176 ASM_INSTRUCT_COMPACKED("brlt __dsp16_sub_ext_loop")"\n" \
00177 \
00178 "__dsp16_sub_ext_end_loop:\n\t" \
00179 \
00180 "mov r12, lr\n\t" \
00181 "popm r0-r7, pc\n\t" \
00182 ); \
00183
00184 return 0;
00185 }
00186
00187
00188 DSP16_SUB_END_KERNEL_X_FCT(0, "")
00189 DSP16_SUB_END_KERNEL_X_FCT(1, "")
00190 DSP16_SUB_END_KERNEL_X_FCT(2, "")
00191 DSP16_SUB_END_KERNEL_X_FCT(3, "")
00192
00193 void dsp16_vect_sub(dsp16_t *vect1, dsp16_t *vect2, dsp16_t *vect3, int size)
00194 {
00195 typedef void (*sub_end_kernel_opti_t)(dsp16_t *, dsp16_t *, dsp16_t *);
00196 static const sub_end_kernel_opti_t sub_end_kernel_opti[4] = {
00197 dsp16_vect_sub_end_kernel_x0,
00198 dsp16_vect_sub_end_kernel_x1,
00199 dsp16_vect_sub_end_kernel_x2,
00200 dsp16_vect_sub_end_kernel_x3
00201 };
00202 int n;
00203
00204 n = dsp16_vect_sub_kernel_ext(vect1, vect2, vect3, size);
00205
00206
00207 sub_end_kernel_opti[size&0x3](&vect1[n], &vect2[n], &vect3[n]);
00208 }
00209
00210 #endif