11 #ifndef INCLUDED_LIBVOLK_COMMON_H
12 #define INCLUDED_LIBVOLK_COMMON_H
18 #define __VOLK_ATTR_ALIGNED(x) __declspec(align(x))
19 #define __VOLK_ATTR_UNUSED
20 #define __VOLK_ATTR_INLINE __forceinline
21 #define __VOLK_ATTR_DEPRECATED __declspec(deprecated)
22 #define __VOLK_ATTR_EXPORT __declspec(dllexport)
23 #define __VOLK_ATTR_IMPORT __declspec(dllimport)
24 #define __VOLK_PREFETCH(addr)
25 #define __VOLK_ASM __asm
26 #elif defined(__clang__)
30 #define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x)))
31 #define __VOLK_ATTR_UNUSED __attribute__((unused))
32 #define __VOLK_ATTR_INLINE __attribute__((always_inline))
33 #define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
34 #define __VOLK_ASM __asm__
35 #define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
36 #define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
37 #define __VOLK_PREFETCH(addr) __builtin_prefetch(addr)
38 #elif defined __GNUC__
39 #define __VOLK_ATTR_ALIGNED(x) __attribute__((aligned(x)))
40 #define __VOLK_ATTR_UNUSED __attribute__((unused))
41 #define __VOLK_ATTR_INLINE __attribute__((always_inline))
42 #define __VOLK_ATTR_DEPRECATED __attribute__((deprecated))
43 #define __VOLK_ASM __asm__
45 #define __VOLK_ATTR_EXPORT __attribute__((visibility("default")))
46 #define __VOLK_ATTR_IMPORT __attribute__((visibility("default")))
48 #define __VOLK_ATTR_EXPORT
49 #define __VOLK_ATTR_IMPORT
51 #define __VOLK_PREFETCH(addr) __builtin_prefetch(addr)
53 #define __VOLK_ATTR_ALIGNED(x) __declspec(align(x))
54 #define __VOLK_ATTR_UNUSED
55 #define __VOLK_ATTR_INLINE __forceinline
56 #define __VOLK_ATTR_DEPRECATED __declspec(deprecated)
57 #define __VOLK_ATTR_EXPORT __declspec(dllexport)
58 #define __VOLK_ATTR_IMPORT __declspec(dllimport)
59 #define __VOLK_PREFETCH(addr)
60 #define __VOLK_ASM __asm
62 #define __VOLK_ATTR_ALIGNED(x)
63 #define __VOLK_ATTR_UNUSED
64 #define __VOLK_ATTR_INLINE
65 #define __VOLK_ATTR_DEPRECATED
66 #define __VOLK_ATTR_EXPORT
67 #define __VOLK_ATTR_IMPORT
68 #define __VOLK_PREFETCH(addr)
69 #define __VOLK_ASM __asm__
76 #pragma warning(disable : 4244) //'conversion' conversion from 'type1' to 'type2',
78 #pragma warning(disable : 4305) //'identifier' : truncation from 'type1' to 'type2'
85 #if defined(__cplusplus) && (__GNUC__)
86 #define __VOLK_DECL_BEGIN extern "C" {
87 #define __VOLK_DECL_END }
89 #define __VOLK_DECL_BEGIN
90 #define __VOLK_DECL_END
98 #define VOLK_API __VOLK_ATTR_EXPORT
100 #define VOLK_API __VOLK_ATTR_IMPORT
112 #include <x86intrin.h>
147 #define bit128_p(x) ((union bit128*)(x))
148 #define bit256_p(x) ((union bit256*)(x))
158 float const result = log2f(f);
164 return isinf(result) ? copysignf(127.0f, result) : result;
171 #define volk_log2to10factor (0x1.815182p1) // 3.01029995663981209120
182 const float a1 = +0x1.ffffeap-1f;
183 const float a3 = -0x1.55437p-2f;
184 const float a5 = +0x1.972be6p-3f;
185 const float a7 = -0x1.1436ap-3f;
186 const float a9 = +0x1.5785aap-4f;
187 const float a11 = -0x1.2f3004p-5f;
188 const float a13 = +0x1.01a37cp-7f;
190 const float x_times_x = x * x;
192 arctan = fmaf(x_times_x, arctan, a11);
193 arctan = fmaf(x_times_x, arctan, a9);
194 arctan = fmaf(x_times_x, arctan, a7);
195 arctan = fmaf(x_times_x, arctan, a5);
196 arctan = fmaf(x_times_x, arctan, a3);
197 arctan = fmaf(x_times_x, arctan, a1);
213 const float s1 = -0x1.555552p-3f;
214 const float s2 = +0x1.110be2p-7f;
215 const float s3 = -0x1.9ab22ap-13f;
217 const float x2 = x * x;
218 const float x3 = x2 * x;
220 float poly = fmaf(x2, s3, s2);
221 poly = fmaf(x2, poly, s1);
222 return fmaf(x3, poly, x);
235 const float c1 = -0x1.fffff4p-2f;
236 const float c2 = +0x1.554a46p-5f;
237 const float c3 = -0x1.661be2p-10f;
239 const float x2 = x * x;
241 float poly = fmaf(x2, c3, c2);
242 poly = fmaf(x2, poly, c1);
243 return fmaf(x2, poly, 1.0f);
254 const float two_over_pi = 0x1.45f306p-1f;
255 const float pi_over_2_hi = 0x1.921fb6p+0f;
256 const float pi_over_2_lo = -0x1.777a5cp-25f;
258 float n_f =
rintf(x * two_over_pi);
261 float r = fmaf(-n_f, pi_over_2_hi, x);
262 r = fmaf(-n_f, pi_over_2_lo, r);
268 float result = (n & 1) ? cos_r : sin_r;
269 return (n & 2) ? -result : result;
280 const float two_over_pi = 0x1.45f306p-1f;
281 const float pi_over_2_hi = 0x1.921fb6p+0f;
282 const float pi_over_2_lo = -0x1.777a5cp-25f;
284 float n_f =
rintf(x * two_over_pi);
287 float r = fmaf(-n_f, pi_over_2_hi, x);
288 r = fmaf(-n_f, pi_over_2_lo, r);
294 float result = (n & 1) ? sin_r : cos_r;
295 return ((n + 1) & 2) ? -result : result;
305 const float pi_2 = 0x1.921fb6p0f;
314 return copysignf(pi_2, x);
337 const float pi = 0x1.921fb6p1f;
338 const float pi_2 = 0x1.921fb6p0f;
341 if (isnan(x) || isnan(y)) {
349 const float angle = (x > 0.f) ? (pi_2 / 2.f) : (3.f * pi_2 / 2.f);
350 return copysignf(angle, y);
353 return copysignf(pi_2, y);
358 return (x > 0.f) ? copysignf(0.f, y) : copysignf(pi, y);
361 if (fabs(x) == 0.f) {
362 return (fabs(y) == 0.f) ? copysignf(0.f, y) : copysignf(pi_2, y);
364 const int swap = fabs(x) < fabs(y);
365 const float numerator = swap ? x : y;
366 const float denominator = swap ? y : x;
367 float input = numerator / denominator;
374 result = swap ? (input >= 0.f ? pi_2 : -pi_2) - result : result;
376 result += copysignf(pi, y);
388 const float c0 = 0x1.ffffcep-1f;
389 const float c1 = 0x1.55b648p-3f;
390 const float c2 = 0x1.24d192p-4f;
391 const float c3 = 0x1.0a788p-4f;
393 const float u = x * x;
406 const float pi_2 = 0x1.921fb6p0f;
408 const float ax = fabsf(x);
414 const float t = (1.0f - ax) * 0.5f;
415 const float s = sqrtf(t);
417 const float result = pi_2 - 2.0f * inner;
418 return copysignf(result, x);
426 const float pi_2 = 0x1.921fb6p0f;