Vector Optimized Library of Kernels  3.3.0
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_imag_32f.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2012, 2014 Free Software Foundation, Inc.
4  *
5  * This file is part of VOLK
6  *
7  * SPDX-License-Identifier: LGPL-3.0-or-later
8  */
9 
57 #ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
58 #define INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
59 
60 #include <inttypes.h>
61 #include <stdio.h>
62 
63 #ifdef LV_HAVE_AVX
64 #include <immintrin.h>
65 
66 static inline void volk_32fc_deinterleave_imag_32f_a_avx(float* qBuffer,
67  const lv_32fc_t* complexVector,
68  unsigned int num_points)
69 {
70  unsigned int number = 0;
71  const unsigned int eighthPoints = num_points / 8;
72  const float* complexVectorPtr = (const float*)complexVector;
73  float* qBufferPtr = qBuffer;
74 
75  __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
76  for (; number < eighthPoints; number++) {
77 
78  cplxValue1 = _mm256_load_ps(complexVectorPtr);
79  complexVectorPtr += 8;
80 
81  cplxValue2 = _mm256_load_ps(complexVectorPtr);
82  complexVectorPtr += 8;
83 
84  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
85  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
86 
87  // Arrange in q1q2q3q4 format
88  qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
89 
90  _mm256_store_ps(qBufferPtr, qValue);
91 
92  qBufferPtr += 8;
93  }
94 
95  number = eighthPoints * 8;
96  for (; number < num_points; number++) {
97  complexVectorPtr++;
98  *qBufferPtr++ = *complexVectorPtr++;
99  }
100 }
101 #endif /* LV_HAVE_AVX */
102 
103 #ifdef LV_HAVE_SSE
104 #include <xmmintrin.h>
105 
106 static inline void volk_32fc_deinterleave_imag_32f_a_sse(float* qBuffer,
107  const lv_32fc_t* complexVector,
108  unsigned int num_points)
109 {
110  unsigned int number = 0;
111  const unsigned int quarterPoints = num_points / 4;
112 
113  const float* complexVectorPtr = (const float*)complexVector;
114  float* qBufferPtr = qBuffer;
115 
116  __m128 cplxValue1, cplxValue2, iValue;
117  for (; number < quarterPoints; number++) {
118 
119  cplxValue1 = _mm_load_ps(complexVectorPtr);
120  complexVectorPtr += 4;
121 
122  cplxValue2 = _mm_load_ps(complexVectorPtr);
123  complexVectorPtr += 4;
124 
125  // Arrange in q1q2q3q4 format
126  iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
127 
128  _mm_store_ps(qBufferPtr, iValue);
129 
130  qBufferPtr += 4;
131  }
132 
133  number = quarterPoints * 4;
134  for (; number < num_points; number++) {
135  complexVectorPtr++;
136  *qBufferPtr++ = *complexVectorPtr++;
137  }
138 }
139 #endif /* LV_HAVE_SSE */
140 
141 #ifdef LV_HAVE_NEON
142 #include <arm_neon.h>
143 
144 static inline void volk_32fc_deinterleave_imag_32f_neon(float* qBuffer,
145  const lv_32fc_t* complexVector,
146  unsigned int num_points)
147 {
148  unsigned int number = 0;
149  unsigned int quarter_points = num_points / 4;
150  const float* complexVectorPtr = (float*)complexVector;
151  float* qBufferPtr = qBuffer;
152  float32x4x2_t complexInput;
153 
154  for (number = 0; number < quarter_points; number++) {
155  complexInput = vld2q_f32(complexVectorPtr);
156  vst1q_f32(qBufferPtr, complexInput.val[1]);
157  complexVectorPtr += 8;
158  qBufferPtr += 4;
159  }
160 
161  for (number = quarter_points * 4; number < num_points; number++) {
162  complexVectorPtr++;
163  *qBufferPtr++ = *complexVectorPtr++;
164  }
165 }
166 #endif /* LV_HAVE_NEON */
167 
168 #ifdef LV_HAVE_NEONV8
169 #include <arm_neon.h>
170 
171 static inline void volk_32fc_deinterleave_imag_32f_neonv8(float* qBuffer,
172  const lv_32fc_t* complexVector,
173  unsigned int num_points)
174 {
175  const unsigned int eighthPoints = num_points / 8;
176  const float* complexVectorPtr = (float*)complexVector;
177  float* qBufferPtr = qBuffer;
178 
179  for (unsigned int number = 0; number < eighthPoints; number++) {
180  float32x4x2_t cplx0 = vld2q_f32(complexVectorPtr);
181  float32x4x2_t cplx1 = vld2q_f32(complexVectorPtr + 8);
182  __VOLK_PREFETCH(complexVectorPtr + 32);
183 
184  vst1q_f32(qBufferPtr, cplx0.val[1]);
185  vst1q_f32(qBufferPtr + 4, cplx1.val[1]);
186 
187  complexVectorPtr += 16;
188  qBufferPtr += 8;
189  }
190 
191  for (unsigned int number = eighthPoints * 8; number < num_points; number++) {
192  complexVectorPtr++;
193  *qBufferPtr++ = *complexVectorPtr++;
194  }
195 }
196 #endif /* LV_HAVE_NEONV8 */
197 
198 #ifdef LV_HAVE_GENERIC
199 
200 static inline void volk_32fc_deinterleave_imag_32f_generic(float* qBuffer,
201  const lv_32fc_t* complexVector,
202  unsigned int num_points)
203 {
204  unsigned int number = 0;
205  const float* complexVectorPtr = (float*)complexVector;
206  float* qBufferPtr = qBuffer;
207  for (number = 0; number < num_points; number++) {
208  complexVectorPtr++;
209  *qBufferPtr++ = *complexVectorPtr++;
210  }
211 }
212 #endif /* LV_HAVE_GENERIC */
213 
214 
215 #endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_a_H */
216 
217 #ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_u_H
218 #define INCLUDED_volk_32fc_deinterleave_imag_32f_u_H
219 
220 #include <inttypes.h>
221 #include <stdio.h>
222 
223 #ifdef LV_HAVE_AVX
224 #include <immintrin.h>
225 
226 static inline void volk_32fc_deinterleave_imag_32f_u_avx(float* qBuffer,
227  const lv_32fc_t* complexVector,
228  unsigned int num_points)
229 {
230  unsigned int number = 0;
231  const unsigned int eighthPoints = num_points / 8;
232  const float* complexVectorPtr = (const float*)complexVector;
233  float* qBufferPtr = qBuffer;
234 
235  __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
236  for (; number < eighthPoints; number++) {
237 
238  cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
239  complexVectorPtr += 8;
240 
241  cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
242  complexVectorPtr += 8;
243 
244  complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
245  complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
246 
247  // Arrange in q1q2q3q4 format
248  qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
249 
250  _mm256_storeu_ps(qBufferPtr, qValue);
251 
252  qBufferPtr += 8;
253  }
254 
255  number = eighthPoints * 8;
256  for (; number < num_points; number++) {
257  complexVectorPtr++;
258  *qBufferPtr++ = *complexVectorPtr++;
259  }
260 }
261 #endif /* LV_HAVE_AVX */
262 
263 #ifdef LV_HAVE_RVV
264 #include <riscv_vector.h>
265 
266 static inline void volk_32fc_deinterleave_imag_32f_rvv(float* qBuffer,
267  const lv_32fc_t* complexVector,
268  unsigned int num_points)
269 {
270  const uint64_t* in = (const uint64_t*)complexVector;
271  size_t n = num_points;
272  for (size_t vl; n > 0; n -= vl, in += vl, qBuffer += vl) {
273  vl = __riscv_vsetvl_e64m8(n);
274  vuint64m8_t vc = __riscv_vle64_v_u64m8(in, vl);
275  __riscv_vse32((uint32_t*)qBuffer, __riscv_vnsrl(vc, 32, vl), vl);
276  }
277 }
278 #endif /*LV_HAVE_RVV*/
279 
280 #endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_u_H */
volk_32fc_deinterleave_imag_32f_generic
static void volk_32fc_deinterleave_imag_32f_generic(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:200
volk_32fc_deinterleave_imag_32f_neon
static void volk_32fc_deinterleave_imag_32f_neon(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:144
__VOLK_PREFETCH
#define __VOLK_PREFETCH(addr)
Definition: volk_common.h:68
volk_32fc_deinterleave_imag_32f_a_sse
static void volk_32fc_deinterleave_imag_32f_a_sse(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:106
lv_32fc_t
float complex lv_32fc_t
Definition: volk_complex.h:74
volk_32fc_deinterleave_imag_32f_a_avx
static void volk_32fc_deinterleave_imag_32f_a_avx(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:66
volk_32fc_deinterleave_imag_32f_u_avx
static void volk_32fc_deinterleave_imag_32f_u_avx(float *qBuffer, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_deinterleave_imag_32f.h:226