Vector Optimized Library of Kernels  3.3.0
Architecture-tuned implementations of math kernels
volk_32fc_s32f_power_spectrum_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
40 #ifndef INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
41 #define INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H
42 
43 #include <inttypes.h>
44 #include <math.h>
45 #include <stdio.h>
46 
47 #ifdef LV_HAVE_GENERIC
48 
49 static inline void
51  const lv_32fc_t* complexFFTInput,
52  const float normalizationFactor,
53  unsigned int num_points)
54 {
55  // Calculate the Power of the complex point
56  const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
57 
58  // Calculate dBm
59  // 50 ohm load assumption
60  // 10 * log10 (v^2 / (2 * 50.0 * .001)) = 10 * log10( v^2 * 10)
61  // 75 ohm load assumption
62  // 10 * log10 (v^2 / (2 * 75.0 * .001)) = 10 * log10( v^2 * 15)
63 
64  /*
65  * For generic reference, the code below is a volk-optimized
66  * approach that also leverages a faster log2 calculation
67  * to calculate the log10:
68  * n*log10(x) = n*log2(x)/log2(10) = (n/log2(10)) * log2(x)
69  *
70  * Generic code:
71  *
72  * const float real = *inputPtr++ * iNormalizationFactor;
73  * const float imag = *inputPtr++ * iNormalizationFactor;
74  * realFFTDataPointsPtr = 10.0*log10f(((real * real) + (imag * imag)) + 1e-20);
75  * realFFTDataPointsPtr++;
76  *
77  */
78 
79  // Calc mag^2
80  volk_32fc_magnitude_squared_32f(logPowerOutput, complexFFTInput, num_points);
81 
82  // Finish ((real * real) + (imag * imag)) calculation:
83  volk_32f_s32f_multiply_32f(logPowerOutput, logPowerOutput, normFactSq, num_points);
84 
85  // The following calculates 10*log10(x) = 10*log2(x)/log2(10) = (10/log2(10))
86  // * log2(x)
87  volk_32f_log2_32f(logPowerOutput, logPowerOutput, num_points);
88  volk_32f_s32f_multiply_32f(
89  logPowerOutput, logPowerOutput, volk_log2to10factor, num_points);
90 }
91 #endif /* LV_HAVE_GENERIC */
92 
93 #ifdef LV_HAVE_NEON
94 #include <arm_neon.h>
96 
97 static inline void
99  const lv_32fc_t* complexFFTInput,
100  const float normalizationFactor,
101  unsigned int num_points)
102 {
103  float* logPowerOutputPtr = logPowerOutput;
104  const lv_32fc_t* complexFFTInputPtr = complexFFTInput;
105  const float iNormalizationFactor = 1.0 / normalizationFactor;
106  unsigned int number;
107  unsigned int quarter_points = num_points / 4;
108  float32x4x2_t fft_vec;
109  float32x4_t log_pwr_vec;
110  float32x4_t mag_squared_vec;
111 
112  const float inv_ln10_10 = 4.34294481903f; // 10.0/ln(10.)
113 
114  for (number = 0; number < quarter_points; number++) {
115  // Load
116  fft_vec = vld2q_f32((float*)complexFFTInputPtr);
117  // Prefetch next 4
118  __VOLK_PREFETCH(complexFFTInputPtr + 4);
119  // Normalize
120  fft_vec.val[0] = vmulq_n_f32(fft_vec.val[0], iNormalizationFactor);
121  fft_vec.val[1] = vmulq_n_f32(fft_vec.val[1], iNormalizationFactor);
122  mag_squared_vec = _vmagnitudesquaredq_f32(fft_vec);
123  log_pwr_vec = vmulq_n_f32(_vlogq_f32(mag_squared_vec), inv_ln10_10);
124  // Store
125  vst1q_f32(logPowerOutputPtr, log_pwr_vec);
126  // Move pointers ahead
127  complexFFTInputPtr += 4;
128  logPowerOutputPtr += 4;
129  }
130 
131  // deal with the rest
132  for (number = quarter_points * 4; number < num_points; number++) {
133  const float real = lv_creal(*complexFFTInputPtr) * iNormalizationFactor;
134  const float imag = lv_cimag(*complexFFTInputPtr) * iNormalizationFactor;
135 
136  *logPowerOutputPtr =
137  volk_log2to10factor * log2f_non_ieee(((real * real) + (imag * imag)));
138  complexFFTInputPtr++;
139  logPowerOutputPtr++;
140  }
141 }
142 
143 #endif /* LV_HAVE_NEON */
144 
145 
146 #ifdef LV_HAVE_RVV
147 #include <riscv_vector.h>
148 
149 #ifndef LOG_POLY_DEGREE
150 #define LOG_POLY_DEGREE 6
151 #endif
152 
153 static inline void volk_32fc_s32f_power_spectrum_32f_rvv(float* logPowerOutput,
154  const lv_32fc_t* complexFFTInput,
155  const float normalizationFactor,
156  unsigned int num_points)
157 {
158  size_t vlmax = __riscv_vsetvlmax_e32m2();
159 
160 #if LOG_POLY_DEGREE == 6
161  const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(3.1157899f, vlmax);
162  const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(-3.3241990f, vlmax);
163  const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.5988452f, vlmax);
164  const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.2315303f, vlmax);
165  const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(3.1821337e-1f, vlmax);
166  const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-3.4436006e-2f, vlmax);
167 #elif LOG_POLY_DEGREE == 5
168  const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(2.8882704548164776201f, vlmax);
169  const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-2.52074962577807006663f, vlmax);
170  const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(1.48116647521213171641f, vlmax);
171  const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-0.465725644288844778798f, vlmax);
172  const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.0596515482674574969533f, vlmax);
173 #elif LOG_POLY_DEGREE == 4
174  const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.61761038894603480148f, vlmax);
175  const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.75647175389045657003f, vlmax);
176  const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(0.688243882994381274313f, vlmax);
177  const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-0.107254423828329604454f, vlmax);
178 #elif LOG_POLY_DEGREE == 3
179  const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(2.28330284476918490682f, vlmax);
180  const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-1.04913055217340124191f, vlmax);
181  const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.204446009836232697516f, vlmax);
182 #else
183 #error
184 #endif
185 
186  const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
187  const vint32m2_t m1 = __riscv_vreinterpret_i32m2(cf1);
188  const vint32m2_t m2 = __riscv_vmv_v_x_i32m2(0x7FFFFF, vlmax);
189  const vint32m2_t c127 = __riscv_vmv_v_x_i32m2(127, vlmax);
190 
191  const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
192 
193  size_t n = num_points;
194  for (size_t vl; n > 0; n -= vl, complexFFTInput += vl, logPowerOutput += vl) {
195  vl = __riscv_vsetvl_e32m2(n);
196  vuint64m4_t vc = __riscv_vle64_v_u64m4((const uint64_t*)complexFFTInput, vl);
197  vfloat32m2_t vr = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vc, 0, vl));
198  vfloat32m2_t vi = __riscv_vreinterpret_f32m2(__riscv_vnsrl(vc, 32, vl));
199  vfloat32m2_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
200  v = __riscv_vfmul(v, normFactSq, vl);
201 
202  vfloat32m2_t a = __riscv_vfabs(v, vl);
203  vfloat32m2_t exp = __riscv_vfcvt_f(
204  __riscv_vsub(__riscv_vsra(__riscv_vreinterpret_i32m2(a), 23, vl), c127, vl),
205  vl);
206  vfloat32m2_t frac = __riscv_vreinterpret_f32m2(
207  __riscv_vor(__riscv_vand(__riscv_vreinterpret_i32m2(v), m2, vl), m1, vl));
208 
209  vfloat32m2_t mant = c0;
210  mant = __riscv_vfmadd(mant, frac, c1, vl);
211  mant = __riscv_vfmadd(mant, frac, c2, vl);
212 #if LOG_POLY_DEGREE >= 4
213  mant = __riscv_vfmadd(mant, frac, c3, vl);
214 #if LOG_POLY_DEGREE >= 5
215  mant = __riscv_vfmadd(mant, frac, c4, vl);
216 #if LOG_POLY_DEGREE >= 6
217  mant = __riscv_vfmadd(mant, frac, c5, vl);
218 #endif
219 #endif
220 #endif
221  v = __riscv_vfmacc(exp, mant, __riscv_vfsub(frac, cf1, vl), vl);
222  v = __riscv_vfmul(v, volk_log2to10factor, vl);
223 
224  __riscv_vse32(logPowerOutput, v, vl);
225  }
226 }
227 #endif /*LV_HAVE_RVV*/
228 
229 
230 #ifdef LV_HAVE_RVVSEG
231 #include <riscv_vector.h>
232 
233 #ifndef LOG_POLY_DEGREE
234 #define LOG_POLY_DEGREE 6
235 #endif
236 
237 static inline void
238 volk_32fc_s32f_power_spectrum_32f_rvvseg(float* logPowerOutput,
239  const lv_32fc_t* complexFFTInput,
240  const float normalizationFactor,
241  unsigned int num_points)
242 {
243  size_t vlmax = __riscv_vsetvlmax_e32m2();
244 
245 #if LOG_POLY_DEGREE == 6
246  const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(3.1157899f, vlmax);
247  const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(-3.3241990f, vlmax);
248  const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.5988452f, vlmax);
249  const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.2315303f, vlmax);
250  const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(3.1821337e-1f, vlmax);
251  const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-3.4436006e-2f, vlmax);
252 #elif LOG_POLY_DEGREE == 5
253  const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(2.8882704548164776201f, vlmax);
254  const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-2.52074962577807006663f, vlmax);
255  const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(1.48116647521213171641f, vlmax);
256  const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-0.465725644288844778798f, vlmax);
257  const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.0596515482674574969533f, vlmax);
258 #elif LOG_POLY_DEGREE == 4
259  const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(2.61761038894603480148f, vlmax);
260  const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(-1.75647175389045657003f, vlmax);
261  const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(0.688243882994381274313f, vlmax);
262  const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(-0.107254423828329604454f, vlmax);
263 #elif LOG_POLY_DEGREE == 3
264  const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(2.28330284476918490682f, vlmax);
265  const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-1.04913055217340124191f, vlmax);
266  const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(0.204446009836232697516f, vlmax);
267 #else
268 #error
269 #endif
270 
271  const vfloat32m2_t cf1 = __riscv_vfmv_v_f_f32m2(1.0f, vlmax);
272  const vint32m2_t m1 = __riscv_vreinterpret_i32m2(cf1);
273  const vint32m2_t m2 = __riscv_vmv_v_x_i32m2(0x7FFFFF, vlmax);
274  const vint32m2_t c127 = __riscv_vmv_v_x_i32m2(127, vlmax);
275 
276  const float normFactSq = 1.0 / (normalizationFactor * normalizationFactor);
277 
278  size_t n = num_points;
279  for (size_t vl; n > 0; n -= vl, complexFFTInput += vl, logPowerOutput += vl) {
280  vl = __riscv_vsetvl_e32m2(n);
281  vfloat32m2x2_t vc =
282  __riscv_vlseg2e32_v_f32m2x2((const float*)complexFFTInput, vl);
283  vfloat32m2_t vr = __riscv_vget_f32m2(vc, 0);
284  vfloat32m2_t vi = __riscv_vget_f32m2(vc, 1);
285  vfloat32m2_t v = __riscv_vfmacc(__riscv_vfmul(vi, vi, vl), vr, vr, vl);
286  v = __riscv_vfmul(v, normFactSq, vl);
287 
288  vfloat32m2_t a = __riscv_vfabs(v, vl);
289  vfloat32m2_t exp = __riscv_vfcvt_f(
290  __riscv_vsub(__riscv_vsra(__riscv_vreinterpret_i32m2(a), 23, vl), c127, vl),
291  vl);
292  vfloat32m2_t frac = __riscv_vreinterpret_f32m2(
293  __riscv_vor(__riscv_vand(__riscv_vreinterpret_i32m2(v), m2, vl), m1, vl));
294 
295  vfloat32m2_t mant = c0;
296  mant = __riscv_vfmadd(mant, frac, c1, vl);
297  mant = __riscv_vfmadd(mant, frac, c2, vl);
298 #if LOG_POLY_DEGREE >= 4
299  mant = __riscv_vfmadd(mant, frac, c3, vl);
300 #if LOG_POLY_DEGREE >= 5
301  mant = __riscv_vfmadd(mant, frac, c4, vl);
302 #if LOG_POLY_DEGREE >= 6
303  mant = __riscv_vfmadd(mant, frac, c5, vl);
304 #endif
305 #endif
306 #endif
307  v = __riscv_vfmacc(exp, mant, __riscv_vfsub(frac, cf1, vl), vl);
308  v = __riscv_vfmul(v, volk_log2to10factor, vl);
309 
310  __riscv_vse32(logPowerOutput, v, vl);
311  }
312 }
313 
314 #endif /*LV_HAVE_RVVSEG*/
315 
316 #endif /* INCLUDED_volk_32fc_s32f_power_spectrum_32f_a_H */
lv_cimag
#define lv_cimag(x)
Definition: volk_complex.h:98
log2f_non_ieee
static float log2f_non_ieee(float f)
Definition: volk_common.h:156
_vmagnitudesquaredq_f32
static float32x4_t _vmagnitudesquaredq_f32(float32x4x2_t cmplxValue)
Definition: volk_neon_intrinsics.h:74
volk_32fc_s32f_power_spectrum_32f_generic
static void volk_32fc_s32f_power_spectrum_32f_generic(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:50
volk_log2to10factor
#define volk_log2to10factor
Definition: volk_common.h:171
__VOLK_PREFETCH
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:68
_vlogq_f32
static float32x4_t _vlogq_f32(float32x4_t x)
Definition: volk_neon_intrinsics.h:210
lv_32fc_t
float complex lv_32fc_t
Definition: volk_complex.h:74
volk_32fc_s32f_power_spectrum_32f_neon
static void volk_32fc_s32f_power_spectrum_32f_neon(float *logPowerOutput, const lv_32fc_t *complexFFTInput, const float normalizationFactor, unsigned int num_points)
Definition: volk_32fc_s32f_power_spectrum_32f.h:98
bit128::f
float f[4]
Definition: volk_common.h:120
volk_neon_intrinsics.h
lv_creal
#define lv_creal(x)
Definition: volk_complex.h:96