00001
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048 #include "dsp.h"
00049
00050 #if defined(FORCE_ALL_GENERICS) || \
00051 defined(FORCE_GENERIC_FILT16_INTERPOLATION) || \
00052 !defined(TARGET_SPECIFIC_FILT16_INTERPOLATION)
00053
00054
00055 #define LOOP_UNROLL 6
00056 #define LOOP_UNROLL_PLUS_ONE 7
00057
00058 #define DSP16_INTREPOLATION_FILTER_FUNCTION_NAME(x_num, data) \
00059 TPASTE2(dsp16_filt_interpolation_kernel_x, x_num),
00060
00061 #define DSP16_INTREPOLATION_NO_LOOP_FILTER_FUNCTION_NAME(x_num, data) \
00062 TPASTE2(dsp16_filt_interpolation_no_loop_kernel_x, x_num),
00063
00064 #define DSP16_INTREPOLATION_FILTER(x_num, data) \
00065 sum += ph[(x_num+data)] * pvect2[-(x_num+data)];
00066
00067 #if LOOP_UNROLL > 4
00068
00069 #define DSP16_INTERPOLATION_KERNEL_X_FCT(x_num, data) \
00070 static void TPASTE2(dsp16_filt_interpolation_kernel_x, x_num)(dsp16_t *vect1, dsp16_t *vect2, int vect2_size, dsp16_t *h, int h_size, int interpolation_ratio) \
00071 { \
00072 S32 sum = 0; \
00073 int i, k, n; \
00074 int n_tap; \
00075 dsp16_t *ph; \
00076 dsp16_t *pvect1; \
00077 dsp16_t *pvect2; \
00078 \
00079 pvect1 = vect1; \
00080 n_tap = h_size / interpolation_ratio; \
00081 \
00082 for(n=0; n<vect2_size; n++) \
00083 { \
00084 for(k=0; k<interpolation_ratio; k++) \
00085 { \
00086 sum = 0; \
00087 ph = &h[k*n_tap]; \
00088 pvect2 = &vect2[n + n_tap - 1]; \
00089 for(i=0; i<n_tap - LOOP_UNROLL + 1; i += LOOP_UNROLL) \
00090 { \
00091 MREPEAT(LOOP_UNROLL, DSP16_INTREPOLATION_FILTER, 0) \
00092 ph += LOOP_UNROLL; \
00093 pvect2 -= LOOP_UNROLL; \
00094 } \
00095 MREPEAT(x_num, DSP16_INTREPOLATION_FILTER, 0); \
00096 *pvect1++ = sum >> DSP16_QB; \
00097 } \
00098 } \
00099 }
00100
00101 #else // LOOP_UNROLL <= 4
00102
00103 #define DSP16_INTERPOLATION_KERNEL_X_FCT(x_num, data) \
00104 static void TPASTE2(dsp16_filt_interpolation_kernel_x, x_num)(dsp16_t *vect1, dsp16_t *vect2, int vect2_size, dsp16_t *h, int h_size, int interpolation_ratio) \
00105 { \
00106 S32 sum = 0; \
00107 int i, k, n; \
00108 int n_tap; \
00109 dsp16_t *ph; \
00110 dsp16_t *pvect1; \
00111 dsp16_t *pvect2; \
00112 \
00113 pvect1 = vect1; \
00114 n_tap = h_size / interpolation_ratio; \
00115 \
00116 for(n=0; n<vect2_size; n++) \
00117 { \
00118 pvect2 = &vect2[n + n_tap - 1]; \
00119 for(k=0; k<interpolation_ratio; k++) \
00120 { \
00121 sum = 0; \
00122 ph = &h[k*n_tap]; \
00123 for(i=0; i<n_tap - LOOP_UNROLL + 1; i += LOOP_UNROLL) \
00124 { \
00125 MREPEAT(LOOP_UNROLL, DSP16_INTREPOLATION_FILTER, i) \
00126 } \
00127 MREPEAT(x_num, DSP16_INTREPOLATION_FILTER, i); \
00128 *pvect1++ = sum >> DSP16_QB; \
00129 } \
00130 } \
00131 }
00132
00133 #endif // LOOP_UNROLL > 4
00134
00135 #define DSP16_INTERPOLATION_NO_LOOP_KERNEL_X_FCT(x_num, data) \
00136 static void TPASTE2(dsp16_filt_interpolation_no_loop_kernel_x, x_num)(dsp16_t *vect1, dsp16_t *vect2, int vect2_size, dsp16_t *h, int h_size, int interpolation_ratio) \
00137 { \
00138 S32 sum = 0; \
00139 int k, n; \
00140 int n_tap; \
00141 dsp16_t *ph; \
00142 dsp16_t *pvect1; \
00143 dsp16_t *pvect2; \
00144 \
00145 pvect1 = vect1; \
00146 n_tap = h_size / interpolation_ratio; \
00147 \
00148 for(n=0; n<vect2_size; n++) \
00149 { \
00150 pvect2 = &vect2[n + n_tap - 1]; \
00151 for(k=0; k<interpolation_ratio; k++) \
00152 { \
00153 sum = 0; \
00154 ph = &h[k*n_tap]; \
00155 MREPEAT(x_num, DSP16_INTREPOLATION_FILTER, 0) \
00156 *pvect1++ = sum >> DSP16_QB; \
00157 } \
00158 } \
00159 }
00160
00161 static void dsp16_filt_interpolation_no_loop_kernel_x0(dsp16_t *vect1, dsp16_t *vect2, int vect2_size, dsp16_t *h, int h_size, int interpolation_ratio)
00162 {
00163 int k, n;
00164 dsp16_t *pvect1;
00165 dsp16_t *pvect2;
00166
00167 pvect1 = vect1;
00168
00169 for(n=0; n<vect2_size; n++)
00170 {
00171 pvect2 = &vect2[n + 1];
00172 for(k=0; k<interpolation_ratio; k++)
00173 *pvect1++ = *pvect2;
00174 }
00175 }
00176 DSP16_INTERPOLATION_NO_LOOP_KERNEL_X_FCT(1, )
00177 DSP16_INTERPOLATION_NO_LOOP_KERNEL_X_FCT(2, )
00178 DSP16_INTERPOLATION_NO_LOOP_KERNEL_X_FCT(3, )
00179 DSP16_INTERPOLATION_NO_LOOP_KERNEL_X_FCT(4, )
00180 DSP16_INTERPOLATION_NO_LOOP_KERNEL_X_FCT(5, )
00181 DSP16_INTERPOLATION_NO_LOOP_KERNEL_X_FCT(6, )
00182
00183 DSP16_INTERPOLATION_KERNEL_X_FCT(0, )
00184 DSP16_INTERPOLATION_KERNEL_X_FCT(1, )
00185 DSP16_INTERPOLATION_KERNEL_X_FCT(2, )
00186 DSP16_INTERPOLATION_KERNEL_X_FCT(3, )
00187 DSP16_INTERPOLATION_KERNEL_X_FCT(4, )
00188 DSP16_INTERPOLATION_KERNEL_X_FCT(5, )
00189
00190 void dsp16_filt_interpolation(dsp16_t *vect1, dsp16_t *vect2, int vect2_size, dsp16_t *h, int h_size, int interpolation_ratio)
00191 {
00192 int n_tap;
00193 typedef void (*interpolation_kernel_opti_t)(dsp16_t *, dsp16_t *, int, dsp16_t *, int, int);
00194 static const interpolation_kernel_opti_t interpolation_end_kernel_opti[] = {
00195 MREPEAT(LOOP_UNROLL, DSP16_INTREPOLATION_FILTER_FUNCTION_NAME, )
00196 };
00197 static const interpolation_kernel_opti_t interpolation_no_loop_end_kernel_opti[] = {
00198 MREPEAT(LOOP_UNROLL_PLUS_ONE, DSP16_INTREPOLATION_NO_LOOP_FILTER_FUNCTION_NAME, )
00199 };
00200
00201 n_tap = h_size / interpolation_ratio;
00202
00203 if (n_tap <= LOOP_UNROLL)
00204 interpolation_no_loop_end_kernel_opti[n_tap](vect1, vect2, vect2_size, h, h_size, interpolation_ratio);
00205 else
00206 interpolation_end_kernel_opti[n_tap%LOOP_UNROLL](vect1, vect2, vect2_size, h, h_size, interpolation_ratio);
00207 }
00208
00209 #endif