Vector Optimized Library of Kernels  3.3.0
Architecture-tuned implementations of math kernels
volk_32f_s32f_s32f_mod_range_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2017 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
32 #ifndef INCLUDED_VOLK_32F_S32F_S32F_MOD_RANGE_32F_A_H
33 #define INCLUDED_VOLK_32F_S32F_S32F_MOD_RANGE_32F_A_H
34 
35 #ifdef LV_HAVE_GENERIC
36 
37 static inline void volk_32f_s32f_s32f_mod_range_32f_generic(float* outputVector,
38  const float* inputVector,
39  const float lower_bound,
40  const float upper_bound,
41  unsigned int num_points)
42 {
43  float* outPtr = outputVector;
44  const float* inPtr;
45  const float distance = upper_bound - lower_bound;
46 
47  for (inPtr = inputVector; inPtr < inputVector + num_points; inPtr++) {
48  float val = *inPtr;
49  if (val < lower_bound) {
50  float excess = lower_bound - val;
51  signed int count = (int)(excess / distance);
52  *outPtr = val + (count + 1) * distance;
53  } else if (val > upper_bound) {
54  float excess = val - upper_bound;
55  signed int count = (int)(excess / distance);
56  *outPtr = val - (count + 1) * distance;
57  } else
58  *outPtr = val;
59  outPtr++;
60  }
61 }
62 #endif /* LV_HAVE_GENERIC */
63 
64 
65 #ifdef LV_HAVE_AVX
66 #include <xmmintrin.h>
67 
68 static inline void volk_32f_s32f_s32f_mod_range_32f_u_avx(float* outputVector,
69  const float* inputVector,
70  const float lower_bound,
71  const float upper_bound,
72  unsigned int num_points)
73 {
74  const __m256 lower = _mm256_set1_ps(lower_bound);
75  const __m256 upper = _mm256_set1_ps(upper_bound);
76  const __m256 distance = _mm256_sub_ps(upper, lower);
77  __m256 input, output;
78  __m256 is_smaller, is_bigger;
79  __m256 excess, adj;
80 
81  const float* inPtr = inputVector;
82  float* outPtr = outputVector;
83  const size_t eight_points = num_points / 8;
84  for (size_t counter = 0; counter < eight_points; counter++) {
85  input = _mm256_loadu_ps(inPtr);
86  // calculate mask: input < lower, input > upper
87  is_smaller = _mm256_cmp_ps(
88  input, lower, _CMP_LT_OQ); // 0x11: Less than, ordered, non-signalling
89  is_bigger = _mm256_cmp_ps(
90  input, upper, _CMP_GT_OQ); // 0x1e: greater than, ordered, non-signalling
91  // find out how far we are out-of-bound – positive values!
92  excess = _mm256_and_ps(_mm256_sub_ps(lower, input), is_smaller);
93  excess =
94  _mm256_or_ps(_mm256_and_ps(_mm256_sub_ps(input, upper), is_bigger), excess);
95  // how many do we have to add? (int(excess/distance+1)*distance)
96  excess = _mm256_div_ps(excess, distance);
97  // round down
98  excess = _mm256_cvtepi32_ps(_mm256_cvttps_epi32(excess));
99  // plus 1
100  adj = _mm256_set1_ps(1.0f);
101  excess = _mm256_add_ps(excess, adj);
102  // get the sign right, adj is still {1.0f,1.0f,1.0f,1.0f}
103  adj = _mm256_and_ps(adj, is_smaller);
104  adj = _mm256_or_ps(_mm256_and_ps(_mm256_set1_ps(-1.0f), is_bigger), adj);
105  // scale by distance, sign
106  excess = _mm256_mul_ps(_mm256_mul_ps(excess, adj), distance);
107  output = _mm256_add_ps(input, excess);
108  _mm256_storeu_ps(outPtr, output);
109  inPtr += 8;
110  outPtr += 8;
111  }
112 
114  outPtr, inPtr, lower_bound, upper_bound, num_points - eight_points * 8);
115 }
116 static inline void volk_32f_s32f_s32f_mod_range_32f_a_avx(float* outputVector,
117  const float* inputVector,
118  const float lower_bound,
119  const float upper_bound,
120  unsigned int num_points)
121 {
122  const __m256 lower = _mm256_set1_ps(lower_bound);
123  const __m256 upper = _mm256_set1_ps(upper_bound);
124  const __m256 distance = _mm256_sub_ps(upper, lower);
125  __m256 input, output;
126  __m256 is_smaller, is_bigger;
127  __m256 excess, adj;
128 
129  const float* inPtr = inputVector;
130  float* outPtr = outputVector;
131  const size_t eight_points = num_points / 8;
132  for (size_t counter = 0; counter < eight_points; counter++) {
133  input = _mm256_load_ps(inPtr);
134  // calculate mask: input < lower, input > upper
135  is_smaller = _mm256_cmp_ps(
136  input, lower, _CMP_LT_OQ); // 0x11: Less than, ordered, non-signalling
137  is_bigger = _mm256_cmp_ps(
138  input, upper, _CMP_GT_OQ); // 0x1e: greater than, ordered, non-signalling
139  // find out how far we are out-of-bound – positive values!
140  excess = _mm256_and_ps(_mm256_sub_ps(lower, input), is_smaller);
141  excess =
142  _mm256_or_ps(_mm256_and_ps(_mm256_sub_ps(input, upper), is_bigger), excess);
143  // how many do we have to add? (int(excess/distance+1)*distance)
144  excess = _mm256_div_ps(excess, distance);
145  // round down
146  excess = _mm256_cvtepi32_ps(_mm256_cvttps_epi32(excess));
147  // plus 1
148  adj = _mm256_set1_ps(1.0f);
149  excess = _mm256_add_ps(excess, adj);
150  // get the sign right, adj is still {1.0f,1.0f,1.0f,1.0f}
151  adj = _mm256_and_ps(adj, is_smaller);
152  adj = _mm256_or_ps(_mm256_and_ps(_mm256_set1_ps(-1.0f), is_bigger), adj);
153  // scale by distance, sign
154  excess = _mm256_mul_ps(_mm256_mul_ps(excess, adj), distance);
155  output = _mm256_add_ps(input, excess);
156  _mm256_store_ps(outPtr, output);
157  inPtr += 8;
158  outPtr += 8;
159  }
160 
162  outPtr, inPtr, lower_bound, upper_bound, num_points - eight_points * 8);
163 }
164 #endif /* LV_HAVE_AVX */
165 
166 
167 #ifdef LV_HAVE_SSE2
168 #include <xmmintrin.h>
169 
170 static inline void volk_32f_s32f_s32f_mod_range_32f_u_sse2(float* outputVector,
171  const float* inputVector,
172  const float lower_bound,
173  const float upper_bound,
174  unsigned int num_points)
175 {
176  const __m128 lower = _mm_set_ps1(lower_bound);
177  const __m128 upper = _mm_set_ps1(upper_bound);
178  const __m128 distance = _mm_sub_ps(upper, lower);
179  __m128 input, output;
180  __m128 is_smaller, is_bigger;
181  __m128 excess, adj;
182 
183  const float* inPtr = inputVector;
184  float* outPtr = outputVector;
185  const size_t quarter_points = num_points / 4;
186  for (size_t counter = 0; counter < quarter_points; counter++) {
187  input = _mm_loadu_ps(inPtr);
188  // calculate mask: input < lower, input > upper
189  is_smaller = _mm_cmplt_ps(input, lower);
190  is_bigger = _mm_cmpgt_ps(input, upper);
191  // find out how far we are out-of-bound – positive values!
192  excess = _mm_and_ps(_mm_sub_ps(lower, input), is_smaller);
193  excess = _mm_or_ps(_mm_and_ps(_mm_sub_ps(input, upper), is_bigger), excess);
194  // how many do we have to add? (int(excess/distance+1)*distance)
195  excess = _mm_div_ps(excess, distance);
196  // round down
197  excess = _mm_cvtepi32_ps(_mm_cvttps_epi32(excess));
198  // plus 1
199  adj = _mm_set_ps1(1.0f);
200  excess = _mm_add_ps(excess, adj);
201  // get the sign right, adj is still {1.0f,1.0f,1.0f,1.0f}
202  adj = _mm_and_ps(adj, is_smaller);
203  adj = _mm_or_ps(_mm_and_ps(_mm_set_ps1(-1.0f), is_bigger), adj);
204  // scale by distance, sign
205  excess = _mm_mul_ps(_mm_mul_ps(excess, adj), distance);
206  output = _mm_add_ps(input, excess);
207  _mm_storeu_ps(outPtr, output);
208  inPtr += 4;
209  outPtr += 4;
210  }
211 
213  outPtr, inPtr, lower_bound, upper_bound, num_points - quarter_points * 4);
214 }
215 static inline void volk_32f_s32f_s32f_mod_range_32f_a_sse2(float* outputVector,
216  const float* inputVector,
217  const float lower_bound,
218  const float upper_bound,
219  unsigned int num_points)
220 {
221  const __m128 lower = _mm_set_ps1(lower_bound);
222  const __m128 upper = _mm_set_ps1(upper_bound);
223  const __m128 distance = _mm_sub_ps(upper, lower);
224  __m128 input, output;
225  __m128 is_smaller, is_bigger;
226  __m128 excess, adj;
227 
228  const float* inPtr = inputVector;
229  float* outPtr = outputVector;
230  const size_t quarter_points = num_points / 4;
231  for (size_t counter = 0; counter < quarter_points; counter++) {
232  input = _mm_load_ps(inPtr);
233  // calculate mask: input < lower, input > upper
234  is_smaller = _mm_cmplt_ps(input, lower);
235  is_bigger = _mm_cmpgt_ps(input, upper);
236  // find out how far we are out-of-bound – positive values!
237  excess = _mm_and_ps(_mm_sub_ps(lower, input), is_smaller);
238  excess = _mm_or_ps(_mm_and_ps(_mm_sub_ps(input, upper), is_bigger), excess);
239  // how many do we have to add? (int(excess/distance+1)*distance)
240  excess = _mm_div_ps(excess, distance);
241  // round down – for some reason, SSE doesn't come with a 4x float -> 4x int32
242  // conversion.
243  excess = _mm_cvtepi32_ps(_mm_cvttps_epi32(excess));
244  // plus 1
245  adj = _mm_set_ps1(1.0f);
246  excess = _mm_add_ps(excess, adj);
247  // get the sign right, adj is still {1.0f,1.0f,1.0f,1.0f}
248  adj = _mm_and_ps(adj, is_smaller);
249  adj = _mm_or_ps(_mm_and_ps(_mm_set_ps1(-1.0f), is_bigger), adj);
250  // scale by distance, sign
251  excess = _mm_mul_ps(_mm_mul_ps(excess, adj), distance);
252  output = _mm_add_ps(input, excess);
253  _mm_store_ps(outPtr, output);
254  inPtr += 4;
255  outPtr += 4;
256  }
257 
259  outPtr, inPtr, lower_bound, upper_bound, num_points - quarter_points * 4);
260 }
261 #endif /* LV_HAVE_SSE2 */
262 
263 #ifdef LV_HAVE_SSE
264 #include <xmmintrin.h>
265 
266 static inline void volk_32f_s32f_s32f_mod_range_32f_u_sse(float* outputVector,
267  const float* inputVector,
268  const float lower_bound,
269  const float upper_bound,
270  unsigned int num_points)
271 {
272  const __m128 lower = _mm_set_ps1(lower_bound);
273  const __m128 upper = _mm_set_ps1(upper_bound);
274  const __m128 distance = _mm_sub_ps(upper, lower);
275  __m128 input, output;
276  __m128 is_smaller, is_bigger;
277  __m128 excess, adj;
278  __m128i rounddown;
279 
280  const float* inPtr = inputVector;
281  float* outPtr = outputVector;
282  const size_t quarter_points = num_points / 4;
283  for (size_t counter = 0; counter < quarter_points; counter++) {
284  input = _mm_loadu_ps(inPtr);
285  // calculate mask: input < lower, input > upper
286  is_smaller = _mm_cmplt_ps(input, lower);
287  is_bigger = _mm_cmpgt_ps(input, upper);
288  // find out how far we are out-of-bound – positive values!
289  excess = _mm_and_ps(_mm_sub_ps(lower, input), is_smaller);
290  excess = _mm_or_ps(_mm_and_ps(_mm_sub_ps(input, upper), is_bigger), excess);
291  // how many do we have to add? (int(excess/distance+1)*distance)
292  excess = _mm_div_ps(excess, distance);
293  // round down – for some reason
294  rounddown = _mm_cvttps_epi32(excess);
295  excess = _mm_cvtepi32_ps(rounddown);
296  // plus 1
297  adj = _mm_set_ps1(1.0f);
298  excess = _mm_add_ps(excess, adj);
299  // get the sign right, adj is still {1.0f,1.0f,1.0f,1.0f}
300  adj = _mm_and_ps(adj, is_smaller);
301  adj = _mm_or_ps(_mm_and_ps(_mm_set_ps1(-1.0f), is_bigger), adj);
302  // scale by distance, sign
303  excess = _mm_mul_ps(_mm_mul_ps(excess, adj), distance);
304  output = _mm_add_ps(input, excess);
305  _mm_storeu_ps(outPtr, output);
306  inPtr += 4;
307  outPtr += 4;
308  }
309 
311  outPtr, inPtr, lower_bound, upper_bound, num_points - quarter_points * 4);
312 }
313 static inline void volk_32f_s32f_s32f_mod_range_32f_a_sse(float* outputVector,
314  const float* inputVector,
315  const float lower_bound,
316  const float upper_bound,
317  unsigned int num_points)
318 {
319  const __m128 lower = _mm_set_ps1(lower_bound);
320  const __m128 upper = _mm_set_ps1(upper_bound);
321  const __m128 distance = _mm_sub_ps(upper, lower);
322  __m128 input, output;
323  __m128 is_smaller, is_bigger;
324  __m128 excess, adj;
325  __m128i rounddown;
326 
327  const float* inPtr = inputVector;
328  float* outPtr = outputVector;
329  const size_t quarter_points = num_points / 4;
330  for (size_t counter = 0; counter < quarter_points; counter++) {
331  input = _mm_load_ps(inPtr);
332  // calculate mask: input < lower, input > upper
333  is_smaller = _mm_cmplt_ps(input, lower);
334  is_bigger = _mm_cmpgt_ps(input, upper);
335  // find out how far we are out-of-bound – positive values!
336  excess = _mm_and_ps(_mm_sub_ps(lower, input), is_smaller);
337  excess = _mm_or_ps(_mm_and_ps(_mm_sub_ps(input, upper), is_bigger), excess);
338  // how many do we have to add? (int(excess/distance+1)*distance)
339  excess = _mm_div_ps(excess, distance);
340  // round down
341  rounddown = _mm_cvttps_epi32(excess);
342  excess = _mm_cvtepi32_ps(rounddown);
343  // plus 1
344  adj = _mm_set_ps1(1.0f);
345  excess = _mm_add_ps(excess, adj);
346  // get the sign right, adj is still {1.0f,1.0f,1.0f,1.0f}
347  adj = _mm_and_ps(adj, is_smaller);
348  adj = _mm_or_ps(_mm_and_ps(_mm_set_ps1(-1.0f), is_bigger), adj);
349  // scale by distance, sign
350  excess = _mm_mul_ps(_mm_mul_ps(excess, adj), distance);
351  output = _mm_add_ps(input, excess);
352  _mm_store_ps(outPtr, output);
353  inPtr += 4;
354  outPtr += 4;
355  }
356 
358  outPtr, inPtr, lower_bound, upper_bound, num_points - quarter_points * 4);
359 }
360 #endif /* LV_HAVE_SSE */
361 
362 #ifdef LV_HAVE_NEON
363 #include <arm_neon.h>
364 
365 static inline void volk_32f_s32f_s32f_mod_range_32f_neon(float* outputVector,
366  const float* inputVector,
367  const float lower_bound,
368  const float upper_bound,
369  unsigned int num_points)
370 {
371  const float32x4_t lower = vdupq_n_f32(lower_bound);
372  const float32x4_t upper = vdupq_n_f32(upper_bound);
373  const float32x4_t distance = vsubq_f32(upper, lower);
374  const float32x4_t fone = vdupq_n_f32(1.0f);
375  const float32x4_t fmone = vdupq_n_f32(-1.0f);
376 
377  const float* inPtr = inputVector;
378  float* outPtr = outputVector;
379  const size_t quarter_points = num_points / 4;
380 
381  for (size_t counter = 0; counter < quarter_points; counter++) {
382  float32x4_t input = vld1q_f32(inPtr);
383 
384  // Calculate masks
385  uint32x4_t is_smaller = vcltq_f32(input, lower);
386  uint32x4_t is_bigger = vcgtq_f32(input, upper);
387 
388  // Find excess (positive values for both cases)
389  float32x4_t excess_low = vsubq_f32(lower, input);
390  float32x4_t excess_high = vsubq_f32(input, upper);
391  float32x4_t excess = vbslq_f32(is_smaller, excess_low, vdupq_n_f32(0.0f));
392  excess = vbslq_f32(is_bigger, excess_high, excess);
393 
394  // Calculate count: int(excess / distance)
395  float32x4_t recip = vrecpeq_f32(distance);
396  recip = vmulq_f32(recip, vrecpsq_f32(distance, recip));
397  excess = vmulq_f32(excess, recip);
398 
399  // Truncate to integer and back to float
400  int32x4_t excess_int = vcvtq_s32_f32(excess);
401  excess = vcvtq_f32_s32(excess_int);
402 
403  // Add 1
404  excess = vaddq_f32(excess, fone);
405 
406  // Get sign adjustment
407  float32x4_t adj = vbslq_f32(is_smaller, fone, vdupq_n_f32(0.0f));
408  adj = vbslq_f32(is_bigger, fmone, adj);
409 
410  // Scale by distance and sign
411  excess = vmulq_f32(vmulq_f32(excess, adj), distance);
412  float32x4_t output = vaddq_f32(input, excess);
413 
414  vst1q_f32(outPtr, output);
415  inPtr += 4;
416  outPtr += 4;
417  }
418 
420  outPtr, inPtr, lower_bound, upper_bound, num_points - quarter_points * 4);
421 }
422 
423 #endif /* LV_HAVE_NEON */
424 
425 #ifdef LV_HAVE_NEONV8
426 #include <arm_neon.h>
427 
428 static inline void volk_32f_s32f_s32f_mod_range_32f_neonv8(float* outputVector,
429  const float* inputVector,
430  const float lower_bound,
431  const float upper_bound,
432  unsigned int num_points)
433 {
434  const float32x4_t lower = vdupq_n_f32(lower_bound);
435  const float32x4_t upper = vdupq_n_f32(upper_bound);
436  const float32x4_t distance = vsubq_f32(upper, lower);
437  const float32x4_t fone = vdupq_n_f32(1.0f);
438  const float32x4_t fmone = vdupq_n_f32(-1.0f);
439 
440  const float* inPtr = inputVector;
441  float* outPtr = outputVector;
442  const size_t eighth_points = num_points / 8;
443 
444  for (size_t counter = 0; counter < eighth_points; counter++) {
445  __VOLK_PREFETCH(inPtr + 16);
446 
447  float32x4_t input0 = vld1q_f32(inPtr);
448  float32x4_t input1 = vld1q_f32(inPtr + 4);
449 
450  // Calculate masks
451  uint32x4_t is_smaller0 = vcltq_f32(input0, lower);
452  uint32x4_t is_smaller1 = vcltq_f32(input1, lower);
453  uint32x4_t is_bigger0 = vcgtq_f32(input0, upper);
454  uint32x4_t is_bigger1 = vcgtq_f32(input1, upper);
455 
456  // Find excess
457  float32x4_t excess_low0 = vsubq_f32(lower, input0);
458  float32x4_t excess_low1 = vsubq_f32(lower, input1);
459  float32x4_t excess_high0 = vsubq_f32(input0, upper);
460  float32x4_t excess_high1 = vsubq_f32(input1, upper);
461  float32x4_t excess0 = vbslq_f32(is_smaller0, excess_low0, vdupq_n_f32(0.0f));
462  float32x4_t excess1 = vbslq_f32(is_smaller1, excess_low1, vdupq_n_f32(0.0f));
463  excess0 = vbslq_f32(is_bigger0, excess_high0, excess0);
464  excess1 = vbslq_f32(is_bigger1, excess_high1, excess1);
465 
466  // Calculate count using division
467  excess0 = vdivq_f32(excess0, distance);
468  excess1 = vdivq_f32(excess1, distance);
469 
470  // Truncate to integer and back to float
471  int32x4_t excess_int0 = vcvtq_s32_f32(excess0);
472  int32x4_t excess_int1 = vcvtq_s32_f32(excess1);
473  excess0 = vcvtq_f32_s32(excess_int0);
474  excess1 = vcvtq_f32_s32(excess_int1);
475 
476  // Add 1
477  excess0 = vaddq_f32(excess0, fone);
478  excess1 = vaddq_f32(excess1, fone);
479 
480  // Get sign adjustment
481  float32x4_t adj0 = vbslq_f32(is_smaller0, fone, vdupq_n_f32(0.0f));
482  float32x4_t adj1 = vbslq_f32(is_smaller1, fone, vdupq_n_f32(0.0f));
483  adj0 = vbslq_f32(is_bigger0, fmone, adj0);
484  adj1 = vbslq_f32(is_bigger1, fmone, adj1);
485 
486  // Scale by distance and sign
487  excess0 = vmulq_f32(vmulq_f32(excess0, adj0), distance);
488  excess1 = vmulq_f32(vmulq_f32(excess1, adj1), distance);
489  float32x4_t output0 = vaddq_f32(input0, excess0);
490  float32x4_t output1 = vaddq_f32(input1, excess1);
491 
492  vst1q_f32(outPtr, output0);
493  vst1q_f32(outPtr + 4, output1);
494  inPtr += 8;
495  outPtr += 8;
496  }
497 
499  outPtr, inPtr, lower_bound, upper_bound, num_points - eighth_points * 8);
500 }
501 
502 #endif /* LV_HAVE_NEONV8 */
503 
504 #ifdef LV_HAVE_RVV
505 #include <riscv_vector.h>
506 
507 static inline void volk_32f_s32f_s32f_mod_range_32f_rvv(float* outputVector,
508  const float* inputVector,
509  const float lower_bound,
510  const float upper_bound,
511  unsigned int num_points)
512 {
513  const float dist = upper_bound - lower_bound;
514  size_t vlmax = __riscv_vsetvlmax_e32m4();
515  vfloat32m4_t vdist = __riscv_vfmv_v_f_f32m4(dist, vlmax);
516  vfloat32m4_t vmdist = __riscv_vfmv_v_f_f32m4(-dist, vlmax);
517  vfloat32m4_t vupper = __riscv_vfmv_v_f_f32m4(upper_bound, vlmax);
518  vfloat32m4_t vlower = __riscv_vfmv_v_f_f32m4(lower_bound, vlmax);
519  size_t n = num_points;
520  for (size_t vl; n > 0; n -= vl, outputVector += vl, inputVector += vl) {
521  vl = __riscv_vsetvl_e32m4(n);
522  vfloat32m4_t v = __riscv_vle32_v_f32m4(inputVector, vl);
523  vfloat32m4_t vlt = __riscv_vfsub(vlower, v, vl);
524  vfloat32m4_t vgt = __riscv_vfsub(v, vupper, vl);
525  vbool8_t mlt = __riscv_vmflt(v, vlower, vl);
526  vfloat32m4_t vmul = __riscv_vmerge(vmdist, vdist, mlt, vl);
527  vfloat32m4_t vcnt = __riscv_vfdiv(__riscv_vmerge(vgt, vlt, mlt, vl), vdist, vl);
528  vcnt = __riscv_vfcvt_f(__riscv_vadd(__riscv_vfcvt_rtz_x(vcnt, vl), 1, vl), vl);
529  vbool8_t mgt = __riscv_vmfgt(v, vupper, vl);
530  v = __riscv_vfmacc_mu(__riscv_vmor(mlt, mgt, vl), v, vcnt, vmul, vl);
531 
532  __riscv_vse32(outputVector, v, vl);
533  }
534 }
535 #endif /*LV_HAVE_RVV*/
536 
537 #endif /* INCLUDED_VOLK_32F_S32F_S32F_MOD_RANGE_32F_A_H */
volk_32f_s32f_s32f_mod_range_32f_neon
static void volk_32f_s32f_s32f_mod_range_32f_neon(float *outputVector, const float *inputVector, const float lower_bound, const float upper_bound, unsigned int num_points)
Definition: volk_32f_s32f_s32f_mod_range_32f.h:365
volk_arch_defs.val
val
Definition: volk_arch_defs.py:57
volk_32f_s32f_s32f_mod_range_32f_u_sse
static void volk_32f_s32f_s32f_mod_range_32f_u_sse(float *outputVector, const float *inputVector, const float lower_bound, const float upper_bound, unsigned int num_points)
Definition: volk_32f_s32f_s32f_mod_range_32f.h:266
volk_32f_s32f_s32f_mod_range_32f_u_avx
static void volk_32f_s32f_s32f_mod_range_32f_u_avx(float *outputVector, const float *inputVector, const float lower_bound, const float upper_bound, unsigned int num_points)
Definition: volk_32f_s32f_s32f_mod_range_32f.h:68
__VOLK_PREFETCH
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:68
volk_32f_s32f_s32f_mod_range_32f_u_sse2
static void volk_32f_s32f_s32f_mod_range_32f_u_sse2(float *outputVector, const float *inputVector, const float lower_bound, const float upper_bound, unsigned int num_points)
Definition: volk_32f_s32f_s32f_mod_range_32f.h:170
volk_32f_s32f_s32f_mod_range_32f_a_sse
static void volk_32f_s32f_s32f_mod_range_32f_a_sse(float *outputVector, const float *inputVector, const float lower_bound, const float upper_bound, unsigned int num_points)
Definition: volk_32f_s32f_s32f_mod_range_32f.h:313
volk_32f_s32f_s32f_mod_range_32f_a_sse2
static void volk_32f_s32f_s32f_mod_range_32f_a_sse2(float *outputVector, const float *inputVector, const float lower_bound, const float upper_bound, unsigned int num_points)
Definition: volk_32f_s32f_s32f_mod_range_32f.h:215
volk_32f_s32f_s32f_mod_range_32f_a_avx
static void volk_32f_s32f_s32f_mod_range_32f_a_avx(float *outputVector, const float *inputVector, const float lower_bound, const float upper_bound, unsigned int num_points)
Definition: volk_32f_s32f_s32f_mod_range_32f.h:116
volk_32f_s32f_s32f_mod_range_32f_generic
static void volk_32f_s32f_s32f_mod_range_32f_generic(float *outputVector, const float *inputVector, const float lower_bound, const float upper_bound, unsigned int num_points)
Definition: volk_32f_s32f_s32f_mod_range_32f.h:37