Vector Optimized Library of Kernels  3.3.0
Architecture-tuned implementations of math kernels
volk_32f_s32f_convert_8i.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
60 #ifndef INCLUDED_volk_32f_s32f_convert_8i_u_H
61 #define INCLUDED_volk_32f_s32f_convert_8i_u_H
62 
63 #include <inttypes.h>
64 
65 static inline void volk_32f_s32f_convert_8i_single(int8_t* out, const float in)
66 {
67  const float min_val = INT8_MIN;
68  const float max_val = INT8_MAX;
69  if (in > max_val) {
70  *out = (int8_t)(max_val);
71  } else if (in < min_val) {
72  *out = (int8_t)(min_val);
73  } else {
74  *out = (int8_t)(rintf(in));
75  }
76 }
77 
78 #ifdef LV_HAVE_GENERIC
79 
80 static inline void volk_32f_s32f_convert_8i_generic(int8_t* outputVector,
81  const float* inputVector,
82  const float scalar,
83  unsigned int num_points)
84 {
85  const float* inputVectorPtr = inputVector;
86 
87  for (unsigned int number = 0; number < num_points; number++) {
88  const float r = *inputVectorPtr++ * scalar;
89  volk_32f_s32f_convert_8i_single(&outputVector[number], r);
90  }
91 }
92 
93 #endif /* LV_HAVE_GENERIC */
94 
95 
96 #ifdef LV_HAVE_AVX2
97 #include <immintrin.h>
98 
99 static inline void volk_32f_s32f_convert_8i_u_avx2(int8_t* outputVector,
100  const float* inputVector,
101  const float scalar,
102  unsigned int num_points)
103 {
104  const unsigned int thirtysecondPoints = num_points / 32;
105 
106  const float* inputVectorPtr = (const float*)inputVector;
107  int8_t* outputVectorPtr = outputVector;
108 
109  const float min_val = INT8_MIN;
110  const float max_val = INT8_MAX;
111  const __m256 vmin_val = _mm256_set1_ps(min_val);
112  const __m256 vmax_val = _mm256_set1_ps(max_val);
113 
114  const __m256 vScalar = _mm256_set1_ps(scalar);
115 
116  for (unsigned int number = 0; number < thirtysecondPoints; number++) {
117  __m256 inputVal1 = _mm256_loadu_ps(inputVectorPtr);
118  inputVectorPtr += 8;
119  __m256 inputVal2 = _mm256_loadu_ps(inputVectorPtr);
120  inputVectorPtr += 8;
121  __m256 inputVal3 = _mm256_loadu_ps(inputVectorPtr);
122  inputVectorPtr += 8;
123  __m256 inputVal4 = _mm256_loadu_ps(inputVectorPtr);
124  inputVectorPtr += 8;
125 
126  inputVal1 = _mm256_max_ps(
127  _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
128  inputVal2 = _mm256_max_ps(
129  _mm256_min_ps(_mm256_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
130  inputVal3 = _mm256_max_ps(
131  _mm256_min_ps(_mm256_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
132  inputVal4 = _mm256_max_ps(
133  _mm256_min_ps(_mm256_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
134 
135  __m256i intInputVal1 = _mm256_cvtps_epi32(inputVal1);
136  __m256i intInputVal2 = _mm256_cvtps_epi32(inputVal2);
137  __m256i intInputVal3 = _mm256_cvtps_epi32(inputVal3);
138  __m256i intInputVal4 = _mm256_cvtps_epi32(inputVal4);
139 
140  intInputVal1 = _mm256_packs_epi32(intInputVal1, intInputVal2);
141  intInputVal1 = _mm256_permute4x64_epi64(intInputVal1, 0b11011000);
142  intInputVal3 = _mm256_packs_epi32(intInputVal3, intInputVal4);
143  intInputVal3 = _mm256_permute4x64_epi64(intInputVal3, 0b11011000);
144 
145  intInputVal1 = _mm256_packs_epi16(intInputVal1, intInputVal3);
146  const __m256i intInputVal = _mm256_permute4x64_epi64(intInputVal1, 0b11011000);
147 
148  _mm256_storeu_si256((__m256i*)outputVectorPtr, intInputVal);
149  outputVectorPtr += 32;
150  }
151 
152  for (unsigned int number = thirtysecondPoints * 32; number < num_points; number++) {
153  float r = inputVector[number] * scalar;
154  volk_32f_s32f_convert_8i_single(&outputVector[number], r);
155  }
156 }
157 
158 #endif /* LV_HAVE_AVX2 */
159 
160 #ifdef LV_HAVE_AVX512F
161 #include <immintrin.h>
162 
163 static inline void volk_32f_s32f_convert_8i_u_avx512(int8_t* outputVector,
164  const float* inputVector,
165  const float scalar,
166  unsigned int num_points)
167 {
168  unsigned int number = 0;
169 
170  const unsigned int thirtysecondPoints = num_points / 32;
171 
172  const float* inputVectorPtr = (const float*)inputVector;
173  int8_t* outputVectorPtr = outputVector;
174 
175  float min_val = INT8_MIN;
176  float max_val = INT8_MAX;
177  float r;
178 
179  __m512 vScalar = _mm512_set1_ps(scalar);
180  __m512 inputVal1, inputVal2;
181  __m512i intInputVal1, intInputVal2;
182  __m512 vmin_val = _mm512_set1_ps(min_val);
183  __m512 vmax_val = _mm512_set1_ps(max_val);
184  __m128i packed_result;
185 
186  for (; number < thirtysecondPoints; number++) {
187  inputVal1 = _mm512_loadu_ps(inputVectorPtr);
188  inputVectorPtr += 16;
189  inputVal2 = _mm512_loadu_ps(inputVectorPtr);
190  inputVectorPtr += 16;
191 
192  inputVal1 = _mm512_max_ps(
193  _mm512_min_ps(_mm512_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
194  inputVal2 = _mm512_max_ps(
195  _mm512_min_ps(_mm512_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
196 
197  intInputVal1 = _mm512_cvtps_epi32(inputVal1);
198  intInputVal2 = _mm512_cvtps_epi32(inputVal2);
199 
200  // Pack int32 -> int16 -> int8
201  packed_result = _mm512_cvtsepi32_epi8(intInputVal1);
202  _mm_storeu_si128((__m128i*)outputVectorPtr, packed_result);
203  outputVectorPtr += 16;
204 
205  packed_result = _mm512_cvtsepi32_epi8(intInputVal2);
206  _mm_storeu_si128((__m128i*)outputVectorPtr, packed_result);
207  outputVectorPtr += 16;
208  }
209 
210  number = thirtysecondPoints * 32;
211  for (; number < num_points; number++) {
212  r = inputVector[number] * scalar;
213  volk_32f_s32f_convert_8i_single(&outputVector[number], r);
214  }
215 }
216 
217 #endif /* LV_HAVE_AVX512F */
218 
219 
220 #ifdef LV_HAVE_SSE2
221 #include <emmintrin.h>
222 
223 static inline void volk_32f_s32f_convert_8i_u_sse2(int8_t* outputVector,
224  const float* inputVector,
225  const float scalar,
226  unsigned int num_points)
227 {
228  const unsigned int sixteenthPoints = num_points / 16;
229 
230  const float* inputVectorPtr = (const float*)inputVector;
231  int8_t* outputVectorPtr = outputVector;
232 
233  const float min_val = INT8_MIN;
234  const float max_val = INT8_MAX;
235  const __m128 vmin_val = _mm_set_ps1(min_val);
236  const __m128 vmax_val = _mm_set_ps1(max_val);
237 
238  const __m128 vScalar = _mm_set_ps1(scalar);
239 
240  for (unsigned int number = 0; number < sixteenthPoints; number++) {
241  __m128 inputVal1 = _mm_loadu_ps(inputVectorPtr);
242  inputVectorPtr += 4;
243  __m128 inputVal2 = _mm_loadu_ps(inputVectorPtr);
244  inputVectorPtr += 4;
245  __m128 inputVal3 = _mm_loadu_ps(inputVectorPtr);
246  inputVectorPtr += 4;
247  __m128 inputVal4 = _mm_loadu_ps(inputVectorPtr);
248  inputVectorPtr += 4;
249 
250  inputVal1 =
251  _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
252  inputVal2 =
253  _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
254  inputVal3 =
255  _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
256  inputVal4 =
257  _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
258 
259  __m128i intInputVal1 = _mm_cvtps_epi32(inputVal1);
260  __m128i intInputVal2 = _mm_cvtps_epi32(inputVal2);
261  __m128i intInputVal3 = _mm_cvtps_epi32(inputVal3);
262  __m128i intInputVal4 = _mm_cvtps_epi32(inputVal4);
263 
264  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
265  intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
266 
267  intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
268 
269  _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
270  outputVectorPtr += 16;
271  }
272 
273  for (unsigned int number = sixteenthPoints * 16; number < num_points; number++) {
274  const float r = inputVector[number] * scalar;
275  volk_32f_s32f_convert_8i_single(&outputVector[number], r);
276  }
277 }
278 
279 #endif /* LV_HAVE_SSE2 */
280 
281 
282 #ifdef LV_HAVE_SSE
283 #include <xmmintrin.h>
284 
285 static inline void volk_32f_s32f_convert_8i_u_sse(int8_t* outputVector,
286  const float* inputVector,
287  const float scalar,
288  unsigned int num_points)
289 {
290  const unsigned int quarterPoints = num_points / 4;
291 
292  const float* inputVectorPtr = (const float*)inputVector;
293  int8_t* outputVectorPtr = outputVector;
294 
295  const float min_val = INT8_MIN;
296  const float max_val = INT8_MAX;
297  const __m128 vmin_val = _mm_set_ps1(min_val);
298  const __m128 vmax_val = _mm_set_ps1(max_val);
299 
300  const __m128 vScalar = _mm_set_ps1(scalar);
301 
302  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
303 
304  for (unsigned int number = 0; number < quarterPoints; number++) {
305  __m128 ret = _mm_loadu_ps(inputVectorPtr);
306  inputVectorPtr += 4;
307 
308  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
309 
310  _mm_store_ps(outputFloatBuffer, ret);
311  for (size_t inner_loop = 0; inner_loop < 4; inner_loop++) {
312  *outputVectorPtr++ = (int8_t)(rintf(outputFloatBuffer[inner_loop]));
313  }
314  }
315 
316  for (unsigned int number = quarterPoints * 4; number < num_points; number++) {
317  const float r = inputVector[number] * scalar;
318  volk_32f_s32f_convert_8i_single(&outputVector[number], r);
319  }
320 }
321 
322 #endif /* LV_HAVE_SSE */
323 
324 
325 #endif /* INCLUDED_volk_32f_s32f_convert_8i_u_H */
326 #ifndef INCLUDED_volk_32f_s32f_convert_8i_a_H
327 #define INCLUDED_volk_32f_s32f_convert_8i_a_H
328 
329 #include <inttypes.h>
330 
331 #ifdef LV_HAVE_AVX2
332 #include <immintrin.h>
333 
334 static inline void volk_32f_s32f_convert_8i_a_avx2(int8_t* outputVector,
335  const float* inputVector,
336  const float scalar,
337  unsigned int num_points)
338 {
339  const unsigned int thirtysecondPoints = num_points / 32;
340 
341  const float* inputVectorPtr = (const float*)inputVector;
342  int8_t* outputVectorPtr = outputVector;
343 
344  const float min_val = INT8_MIN;
345  const float max_val = INT8_MAX;
346  const __m256 vmin_val = _mm256_set1_ps(min_val);
347  const __m256 vmax_val = _mm256_set1_ps(max_val);
348 
349  const __m256 vScalar = _mm256_set1_ps(scalar);
350 
351  for (unsigned int number = 0; number < thirtysecondPoints; number++) {
352  __m256 inputVal1 = _mm256_load_ps(inputVectorPtr);
353  inputVectorPtr += 8;
354  __m256 inputVal2 = _mm256_load_ps(inputVectorPtr);
355  inputVectorPtr += 8;
356  __m256 inputVal3 = _mm256_load_ps(inputVectorPtr);
357  inputVectorPtr += 8;
358  __m256 inputVal4 = _mm256_load_ps(inputVectorPtr);
359  inputVectorPtr += 8;
360 
361  inputVal1 = _mm256_max_ps(
362  _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
363  inputVal2 = _mm256_max_ps(
364  _mm256_min_ps(_mm256_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
365  inputVal3 = _mm256_max_ps(
366  _mm256_min_ps(_mm256_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
367  inputVal4 = _mm256_max_ps(
368  _mm256_min_ps(_mm256_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
369 
370  __m256i intInputVal1 = _mm256_cvtps_epi32(inputVal1);
371  __m256i intInputVal2 = _mm256_cvtps_epi32(inputVal2);
372  __m256i intInputVal3 = _mm256_cvtps_epi32(inputVal3);
373  __m256i intInputVal4 = _mm256_cvtps_epi32(inputVal4);
374 
375  intInputVal1 = _mm256_packs_epi32(intInputVal1, intInputVal2);
376  intInputVal1 = _mm256_permute4x64_epi64(intInputVal1, 0b11011000);
377  intInputVal3 = _mm256_packs_epi32(intInputVal3, intInputVal4);
378  intInputVal3 = _mm256_permute4x64_epi64(intInputVal3, 0b11011000);
379 
380  intInputVal1 = _mm256_packs_epi16(intInputVal1, intInputVal3);
381  __m256i intInputVal = _mm256_permute4x64_epi64(intInputVal1, 0b11011000);
382 
383  _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal);
384  outputVectorPtr += 32;
385  }
386 
387  for (unsigned int number = thirtysecondPoints * 32; number < num_points; number++) {
388  const float r = inputVector[number] * scalar;
389  volk_32f_s32f_convert_8i_single(&outputVector[number], r);
390  }
391 }
392 
393 #endif /* LV_HAVE_AVX2 */
394 
395 #ifdef LV_HAVE_AVX512F
396 #include <immintrin.h>
397 
398 static inline void volk_32f_s32f_convert_8i_a_avx512(int8_t* outputVector,
399  const float* inputVector,
400  const float scalar,
401  unsigned int num_points)
402 {
403  unsigned int number = 0;
404 
405  const unsigned int thirtysecondPoints = num_points / 32;
406 
407  const float* inputVectorPtr = (const float*)inputVector;
408  int8_t* outputVectorPtr = outputVector;
409 
410  float min_val = INT8_MIN;
411  float max_val = INT8_MAX;
412  float r;
413 
414  __m512 vScalar = _mm512_set1_ps(scalar);
415  __m512 inputVal1, inputVal2;
416  __m512i intInputVal1, intInputVal2;
417  __m512 vmin_val = _mm512_set1_ps(min_val);
418  __m512 vmax_val = _mm512_set1_ps(max_val);
419  __m128i packed_result;
420 
421  for (; number < thirtysecondPoints; number++) {
422  inputVal1 = _mm512_load_ps(inputVectorPtr);
423  inputVectorPtr += 16;
424  inputVal2 = _mm512_load_ps(inputVectorPtr);
425  inputVectorPtr += 16;
426 
427  inputVal1 = _mm512_max_ps(
428  _mm512_min_ps(_mm512_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
429  inputVal2 = _mm512_max_ps(
430  _mm512_min_ps(_mm512_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
431 
432  intInputVal1 = _mm512_cvtps_epi32(inputVal1);
433  intInputVal2 = _mm512_cvtps_epi32(inputVal2);
434 
435  // Pack int32 -> int16 -> int8
436  packed_result = _mm512_cvtsepi32_epi8(intInputVal1);
437  _mm_store_si128((__m128i*)outputVectorPtr, packed_result);
438  outputVectorPtr += 16;
439 
440  packed_result = _mm512_cvtsepi32_epi8(intInputVal2);
441  _mm_store_si128((__m128i*)outputVectorPtr, packed_result);
442  outputVectorPtr += 16;
443  }
444 
445  number = thirtysecondPoints * 32;
446  for (; number < num_points; number++) {
447  r = inputVector[number] * scalar;
448  volk_32f_s32f_convert_8i_single(&outputVector[number], r);
449  }
450 }
451 
452 #endif /* LV_HAVE_AVX512F */
453 
454 
455 #ifdef LV_HAVE_SSE2
456 #include <emmintrin.h>
457 
458 static inline void volk_32f_s32f_convert_8i_a_sse2(int8_t* outputVector,
459  const float* inputVector,
460  const float scalar,
461  unsigned int num_points)
462 {
463  const unsigned int sixteenthPoints = num_points / 16;
464 
465  const float* inputVectorPtr = (const float*)inputVector;
466  int8_t* outputVectorPtr = outputVector;
467 
468  const float min_val = INT8_MIN;
469  const float max_val = INT8_MAX;
470  const __m128 vmin_val = _mm_set_ps1(min_val);
471  const __m128 vmax_val = _mm_set_ps1(max_val);
472 
473  const __m128 vScalar = _mm_set_ps1(scalar);
474 
475  for (unsigned int number = 0; number < sixteenthPoints; number++) {
476  __m128 inputVal1 = _mm_load_ps(inputVectorPtr);
477  inputVectorPtr += 4;
478  __m128 inputVal2 = _mm_load_ps(inputVectorPtr);
479  inputVectorPtr += 4;
480  __m128 inputVal3 = _mm_load_ps(inputVectorPtr);
481  inputVectorPtr += 4;
482  __m128 inputVal4 = _mm_load_ps(inputVectorPtr);
483  inputVectorPtr += 4;
484 
485  inputVal1 =
486  _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
487  inputVal2 =
488  _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val);
489  inputVal3 =
490  _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal3, vScalar), vmax_val), vmin_val);
491  inputVal4 =
492  _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal4, vScalar), vmax_val), vmin_val);
493 
494  __m128i intInputVal1 = _mm_cvtps_epi32(inputVal1);
495  __m128i intInputVal2 = _mm_cvtps_epi32(inputVal2);
496  __m128i intInputVal3 = _mm_cvtps_epi32(inputVal3);
497  __m128i intInputVal4 = _mm_cvtps_epi32(inputVal4);
498 
499  intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2);
500  intInputVal3 = _mm_packs_epi32(intInputVal3, intInputVal4);
501 
502  intInputVal1 = _mm_packs_epi16(intInputVal1, intInputVal3);
503 
504  _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
505  outputVectorPtr += 16;
506  }
507 
508  for (unsigned int number = sixteenthPoints * 16; number < num_points; number++) {
509  const float r = inputVector[number] * scalar;
510  volk_32f_s32f_convert_8i_single(&outputVector[number], r);
511  }
512 }
513 #endif /* LV_HAVE_SSE2 */
514 
515 
516 #ifdef LV_HAVE_SSE
517 #include <xmmintrin.h>
518 
519 static inline void volk_32f_s32f_convert_8i_a_sse(int8_t* outputVector,
520  const float* inputVector,
521  const float scalar,
522  unsigned int num_points)
523 {
524  const unsigned int quarterPoints = num_points / 4;
525 
526  const float* inputVectorPtr = (const float*)inputVector;
527  int8_t* outputVectorPtr = outputVector;
528 
529  const float min_val = INT8_MIN;
530  const float max_val = INT8_MAX;
531  const __m128 vmin_val = _mm_set_ps1(min_val);
532  const __m128 vmax_val = _mm_set_ps1(max_val);
533 
534  const __m128 vScalar = _mm_set_ps1(scalar);
535 
536  __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
537 
538  for (unsigned int number = 0; number < quarterPoints; number++) {
539  __m128 ret = _mm_load_ps(inputVectorPtr);
540  inputVectorPtr += 4;
541 
542  ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
543 
544  _mm_store_ps(outputFloatBuffer, ret);
545  for (size_t inner_loop = 0; inner_loop < 4; inner_loop++) {
546  *outputVectorPtr++ = (int8_t)(rintf(outputFloatBuffer[inner_loop]));
547  }
548  }
549 
550  for (unsigned int number = quarterPoints * 4; number < num_points; number++) {
551  const float r = inputVector[number] * scalar;
552  volk_32f_s32f_convert_8i_single(&outputVector[number], r);
553  }
554 }
555 
556 #endif /* LV_HAVE_SSE */
557 
558 
559 #ifdef LV_HAVE_NEON
560 #include <arm_neon.h>
561 
562 static inline void volk_32f_s32f_convert_8i_neon(int8_t* outputVector,
563  const float* inputVector,
564  const float scalar,
565  unsigned int num_points)
566 {
567  unsigned int number = 0;
568  const unsigned int sixteenthPoints = num_points / 16;
569 
570  const float* inputVectorPtr = inputVector;
571  int8_t* outputVectorPtr = outputVector;
572 
573  const float min_val = INT8_MIN;
574  const float max_val = INT8_MAX;
575 
576  float32x4_t vScalar = vdupq_n_f32(scalar);
577  float32x4_t vmin_val = vdupq_n_f32(min_val);
578  float32x4_t vmax_val = vdupq_n_f32(max_val);
579  float32x4_t half = vdupq_n_f32(0.5f);
580  float32x4_t neg_half = vdupq_n_f32(-0.5f);
581  float32x4_t zero = vdupq_n_f32(0.0f);
582 
583  for (; number < sixteenthPoints; number++) {
584  float32x4_t inputVal0 = vld1q_f32(inputVectorPtr);
585  float32x4_t inputVal1 = vld1q_f32(inputVectorPtr + 4);
586  float32x4_t inputVal2 = vld1q_f32(inputVectorPtr + 8);
587  float32x4_t inputVal3 = vld1q_f32(inputVectorPtr + 12);
588  inputVectorPtr += 16;
589 
590  // Scale and clip
591  float32x4_t ret0 =
592  vmaxq_f32(vminq_f32(vmulq_f32(inputVal0, vScalar), vmax_val), vmin_val);
593  float32x4_t ret1 =
594  vmaxq_f32(vminq_f32(vmulq_f32(inputVal1, vScalar), vmax_val), vmin_val);
595  float32x4_t ret2 =
596  vmaxq_f32(vminq_f32(vmulq_f32(inputVal2, vScalar), vmax_val), vmin_val);
597  float32x4_t ret3 =
598  vmaxq_f32(vminq_f32(vmulq_f32(inputVal3, vScalar), vmax_val), vmin_val);
599 
600  // Round to nearest: add copysign(0.5, x) before truncating
601  uint32x4_t neg0 = vcltq_f32(ret0, zero);
602  uint32x4_t neg1 = vcltq_f32(ret1, zero);
603  uint32x4_t neg2 = vcltq_f32(ret2, zero);
604  uint32x4_t neg3 = vcltq_f32(ret3, zero);
605  ret0 = vaddq_f32(ret0, vbslq_f32(neg0, neg_half, half));
606  ret1 = vaddq_f32(ret1, vbslq_f32(neg1, neg_half, half));
607  ret2 = vaddq_f32(ret2, vbslq_f32(neg2, neg_half, half));
608  ret3 = vaddq_f32(ret3, vbslq_f32(neg3, neg_half, half));
609 
610  // Convert to int32 (truncates towards zero, but we pre-rounded)
611  int32x4_t intVal0 = vcvtq_s32_f32(ret0);
612  int32x4_t intVal1 = vcvtq_s32_f32(ret1);
613  int32x4_t intVal2 = vcvtq_s32_f32(ret2);
614  int32x4_t intVal3 = vcvtq_s32_f32(ret3);
615 
616  // Narrow to int16 with saturation
617  int16x4_t narrow16_0 = vqmovn_s32(intVal0);
618  int16x4_t narrow16_1 = vqmovn_s32(intVal1);
619  int16x4_t narrow16_2 = vqmovn_s32(intVal2);
620  int16x4_t narrow16_3 = vqmovn_s32(intVal3);
621  int16x8_t wide16_0 = vcombine_s16(narrow16_0, narrow16_1);
622  int16x8_t wide16_1 = vcombine_s16(narrow16_2, narrow16_3);
623 
624  // Narrow to int8 with saturation
625  int8x8_t narrow8_0 = vqmovn_s16(wide16_0);
626  int8x8_t narrow8_1 = vqmovn_s16(wide16_1);
627  int8x16_t result = vcombine_s8(narrow8_0, narrow8_1);
628 
629  vst1q_s8(outputVectorPtr, result);
630  outputVectorPtr += 16;
631  }
632 
633  number = sixteenthPoints * 16;
634  for (; number < num_points; number++) {
635  float r = inputVector[number] * scalar;
636  volk_32f_s32f_convert_8i_single(&outputVector[number], r);
637  }
638 }
639 #endif /* LV_HAVE_NEON */
640 
641 
642 #ifdef LV_HAVE_NEONV8
643 #include <arm_neon.h>
644 
645 static inline void volk_32f_s32f_convert_8i_neonv8(int8_t* outputVector,
646  const float* inputVector,
647  const float scalar,
648  unsigned int num_points)
649 {
650  unsigned int number = 0;
651  const unsigned int thirtysecondPoints = num_points / 32;
652 
653  const float* inputVectorPtr = inputVector;
654  int8_t* outputVectorPtr = outputVector;
655 
656  const float min_val = INT8_MIN;
657  const float max_val = INT8_MAX;
658 
659  float32x4_t vScalar = vdupq_n_f32(scalar);
660  float32x4_t vmin_val = vdupq_n_f32(min_val);
661  float32x4_t vmax_val = vdupq_n_f32(max_val);
662 
663  for (; number < thirtysecondPoints; number++) {
664  float32x4_t inputVal0 = vld1q_f32(inputVectorPtr);
665  float32x4_t inputVal1 = vld1q_f32(inputVectorPtr + 4);
666  float32x4_t inputVal2 = vld1q_f32(inputVectorPtr + 8);
667  float32x4_t inputVal3 = vld1q_f32(inputVectorPtr + 12);
668  float32x4_t inputVal4 = vld1q_f32(inputVectorPtr + 16);
669  float32x4_t inputVal5 = vld1q_f32(inputVectorPtr + 20);
670  float32x4_t inputVal6 = vld1q_f32(inputVectorPtr + 24);
671  float32x4_t inputVal7 = vld1q_f32(inputVectorPtr + 28);
672  __VOLK_PREFETCH(inputVectorPtr + 32);
673  inputVectorPtr += 32;
674 
675  // Scale and clip
676  float32x4_t ret0 =
677  vmaxq_f32(vminq_f32(vmulq_f32(inputVal0, vScalar), vmax_val), vmin_val);
678  float32x4_t ret1 =
679  vmaxq_f32(vminq_f32(vmulq_f32(inputVal1, vScalar), vmax_val), vmin_val);
680  float32x4_t ret2 =
681  vmaxq_f32(vminq_f32(vmulq_f32(inputVal2, vScalar), vmax_val), vmin_val);
682  float32x4_t ret3 =
683  vmaxq_f32(vminq_f32(vmulq_f32(inputVal3, vScalar), vmax_val), vmin_val);
684  float32x4_t ret4 =
685  vmaxq_f32(vminq_f32(vmulq_f32(inputVal4, vScalar), vmax_val), vmin_val);
686  float32x4_t ret5 =
687  vmaxq_f32(vminq_f32(vmulq_f32(inputVal5, vScalar), vmax_val), vmin_val);
688  float32x4_t ret6 =
689  vmaxq_f32(vminq_f32(vmulq_f32(inputVal6, vScalar), vmax_val), vmin_val);
690  float32x4_t ret7 =
691  vmaxq_f32(vminq_f32(vmulq_f32(inputVal7, vScalar), vmax_val), vmin_val);
692 
693  // Convert to int32 using round-to-nearest (ARMv8)
694  int32x4_t intVal0 = vcvtnq_s32_f32(ret0);
695  int32x4_t intVal1 = vcvtnq_s32_f32(ret1);
696  int32x4_t intVal2 = vcvtnq_s32_f32(ret2);
697  int32x4_t intVal3 = vcvtnq_s32_f32(ret3);
698  int32x4_t intVal4 = vcvtnq_s32_f32(ret4);
699  int32x4_t intVal5 = vcvtnq_s32_f32(ret5);
700  int32x4_t intVal6 = vcvtnq_s32_f32(ret6);
701  int32x4_t intVal7 = vcvtnq_s32_f32(ret7);
702 
703  // Narrow to int16 with saturation
704  int16x4_t narrow16_0 = vqmovn_s32(intVal0);
705  int16x4_t narrow16_1 = vqmovn_s32(intVal1);
706  int16x4_t narrow16_2 = vqmovn_s32(intVal2);
707  int16x4_t narrow16_3 = vqmovn_s32(intVal3);
708  int16x4_t narrow16_4 = vqmovn_s32(intVal4);
709  int16x4_t narrow16_5 = vqmovn_s32(intVal5);
710  int16x4_t narrow16_6 = vqmovn_s32(intVal6);
711  int16x4_t narrow16_7 = vqmovn_s32(intVal7);
712 
713  int16x8_t wide16_0 = vcombine_s16(narrow16_0, narrow16_1);
714  int16x8_t wide16_1 = vcombine_s16(narrow16_2, narrow16_3);
715  int16x8_t wide16_2 = vcombine_s16(narrow16_4, narrow16_5);
716  int16x8_t wide16_3 = vcombine_s16(narrow16_6, narrow16_7);
717 
718  // Narrow to int8 with saturation
719  int8x8_t narrow8_0 = vqmovn_s16(wide16_0);
720  int8x8_t narrow8_1 = vqmovn_s16(wide16_1);
721  int8x8_t narrow8_2 = vqmovn_s16(wide16_2);
722  int8x8_t narrow8_3 = vqmovn_s16(wide16_3);
723 
724  int8x16_t result0 = vcombine_s8(narrow8_0, narrow8_1);
725  int8x16_t result1 = vcombine_s8(narrow8_2, narrow8_3);
726 
727  vst1q_s8(outputVectorPtr, result0);
728  vst1q_s8(outputVectorPtr + 16, result1);
729  outputVectorPtr += 32;
730  }
731 
732  number = thirtysecondPoints * 32;
733  for (; number < num_points; number++) {
734  float r = inputVector[number] * scalar;
735  volk_32f_s32f_convert_8i_single(&outputVector[number], r);
736  }
737 }
738 #endif /* LV_HAVE_NEONV8 */
739 
740 
741 #ifdef LV_HAVE_RVV
742 #include <riscv_vector.h>
743 
744 static inline void volk_32f_s32f_convert_8i_rvv(int8_t* outputVector,
745  const float* inputVector,
746  const float scalar,
747  unsigned int num_points)
748 {
749  size_t n = num_points;
750  for (size_t vl; n > 0; n -= vl, inputVector += vl, outputVector += vl) {
751  vl = __riscv_vsetvl_e32m8(n);
752  vfloat32m8_t v = __riscv_vle32_v_f32m8(inputVector, vl);
753  vint16m4_t vi = __riscv_vfncvt_x(__riscv_vfmul(v, scalar, vl), vl);
754  __riscv_vse8(outputVector, __riscv_vnclip(vi, 0, 0, vl), vl);
755  }
756 }
757 #endif /*LV_HAVE_RVV*/
758 
759 #endif /* INCLUDED_volk_32f_s32f_convert_8i_a_H */
volk_32f_s32f_convert_8i_a_sse2
static void volk_32f_s32f_convert_8i_a_sse2(int8_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_8i.h:458
__VOLK_ATTR_ALIGNED
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:62
volk_32f_s32f_convert_8i_u_sse
static void volk_32f_s32f_convert_8i_u_sse(int8_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_8i.h:285
volk_32f_s32f_convert_8i_neon
static void volk_32f_s32f_convert_8i_neon(int8_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_8i.h:562
__VOLK_PREFETCH
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:68
volk_32f_s32f_convert_8i_u_sse2
static void volk_32f_s32f_convert_8i_u_sse2(int8_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_8i.h:223
volk_32f_s32f_convert_8i_a_sse
static void volk_32f_s32f_convert_8i_a_sse(int8_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_8i.h:519
volk_32f_s32f_convert_8i_single
static void volk_32f_s32f_convert_8i_single(int8_t *out, const float in)
Definition: volk_32f_s32f_convert_8i.h:65
volk_32f_s32f_convert_8i_generic
static void volk_32f_s32f_convert_8i_generic(int8_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_8i.h:80
rintf
static float rintf(float x)
Definition: config.h:45