Vector Optimized Library of Kernels  3.3.0
Architecture-tuned implementations of math kernels
kernel_tests.h
Go to the documentation of this file.
1 /* -*- c++ -*- */
2 /*
3  * Copyright 2014 - 2021 Free Software Foundation, Inc.
4  * Copyright 2023 - 2025 Magnus Lundmark <magnuslundmark@gmail.com>
5  *
6  * This file is part of VOLK
7  *
8  * SPDX-License-Identifier: LGPL-3.0-or-later
9  */
10 
11 #include "qa_utils.h"
12 
13 #include <volk/volk.h>
14 #include <cmath>
15 #include <limits>
16 #include <vector>
17 
18 // macros for initializing volk_test_case_t. Macros are needed to generate
19 // function names of the pattern kernel_name_*
20 
21 // for puppets we need to get all the func_variants for the puppet and just
22 // keep track of the actual function name to write to results
23 #define VOLK_INIT_PUPP(func, puppet_master_func, test_params) \
24  volk_test_case_t(func##_get_func_desc(), \
25  (void (*)())func##_manual, \
26  std::string(#func), \
27  std::string(#puppet_master_func), \
28  test_params)
29 
30 #define VOLK_INIT_TEST(func, test_params) \
31  volk_test_case_t(func##_get_func_desc(), \
32  (void (*)())func##_manual, \
33  std::string(#func), \
34  test_params)
35 
36 #define QA(test) test_cases.push_back(test);
37 std::vector<volk_test_case_t> init_test_list(volk_test_params_t test_params)
38 {
39  const float inf = std::numeric_limits<float>::infinity();
40  const float nan = std::nanf("");
41 
42  // Some kernels need a lower tolerance
43  volk_test_params_t test_params_inacc = test_params.make_tol(1e-2);
44  volk_test_params_t test_params_inacc_tenth = test_params.make_tol(1e-1);
45 
46  volk_test_params_t test_params_power(test_params);
47  test_params_power.set_scalar(2.5);
48 
49  volk_test_params_t test_params_clamp(test_params);
50  test_params_clamp.set_scalar(-.5f);
51 
52  volk_test_params_t test_params_rotator(test_params);
53  test_params_rotator.set_scalar(std::polar(1.0f, 0.1f));
54  test_params_rotator.set_tol(1e-3);
55 
56  volk_test_params_t test_params_snf(test_params);
57  test_params_snf.set_scalar(0.5);
58  test_params_snf.set_tol(1e-4);
59 
60  std::vector<volk_test_case_t> test_cases;
61  QA(VOLK_INIT_PUPP(volk_64u_popcntpuppet_64u, volk_64u_popcnt, test_params))
62  QA(VOLK_INIT_PUPP(volk_16u_byteswappuppet_16u, volk_16u_byteswap, test_params))
63  QA(VOLK_INIT_PUPP(volk_32u_byteswappuppet_32u, volk_32u_byteswap, test_params))
64  QA(VOLK_INIT_PUPP(volk_32u_popcntpuppet_32u, volk_32u_popcnt, test_params))
65  QA(VOLK_INIT_PUPP(volk_64u_byteswappuppet_64u, volk_64u_byteswap, test_params))
66  QA(VOLK_INIT_PUPP(volk_32fc_s32fc_rotator2puppet_32fc,
67  volk_32fc_s32fc_x2_rotator2_32fc,
68  test_params_rotator))
70  volk_8u_conv_k7_r2puppet_8u, volk_8u_x4_conv_k7_r2_8u, test_params.make_tol(0)))
71  QA(VOLK_INIT_PUPP(volk_32f_x2_fm_detectpuppet_32f,
72  volk_32f_s32f_32f_fm_detect_32f,
74  QA(VOLK_INIT_TEST(volk_16ic_s32f_deinterleave_real_32f, test_params))
75  QA(VOLK_INIT_TEST(volk_16ic_deinterleave_real_8i, test_params))
76  QA(VOLK_INIT_TEST(volk_16ic_deinterleave_16i_x2, test_params))
77  QA(VOLK_INIT_TEST(volk_16ic_s32f_deinterleave_32f_x2, test_params))
78  QA(VOLK_INIT_TEST(volk_16ic_deinterleave_real_16i, test_params))
79  QA(VOLK_INIT_TEST(volk_16ic_magnitude_16i, test_params))
80  QA(VOLK_INIT_TEST(volk_16ic_s32f_magnitude_32f, test_params))
81  QA(VOLK_INIT_TEST(volk_16ic_convert_32fc, test_params))
82  QA(VOLK_INIT_TEST(volk_16ic_x2_multiply_16ic, test_params))
83  QA(VOLK_INIT_TEST(volk_16ic_x2_dot_prod_16ic, test_params))
84  QA(VOLK_INIT_TEST(volk_16i_s32f_convert_32f, test_params))
85  QA(VOLK_INIT_TEST(volk_16i_convert_8i, test_params))
86  QA(VOLK_INIT_TEST(volk_16i_32fc_dot_prod_32fc, test_params.make_absolute(1e-1)))
87  QA(VOLK_INIT_TEST(volk_32f_accumulator_s32f, test_params.make_absolute(2e-2)))
88  QA(VOLK_INIT_TEST(volk_32f_x2_add_32f, test_params))
89 
90  // Index kernels need identical values to test tie-breaking (first index wins)
91  volk_test_params_t test_params_index(test_params.make_tol(0));
92  test_params_index.add_float_edge_cases({
93  1.0f,
94  1.0f,
95  1.0f,
96  1.0f, // 4 identical (SSE lane width)
97  1.0f,
98  1.0f,
99  1.0f,
100  1.0f, // 8 total (AVX lane width)
101  1.0f,
102  1.0f,
103  1.0f,
104  1.0f, // 12
105  1.0f,
106  1.0f,
107  1.0f,
108  1.0f, // 16 total (AVX512 lane width)
109  });
110  QA(VOLK_INIT_TEST(volk_32f_index_max_16u, test_params_index))
111  QA(VOLK_INIT_TEST(volk_32f_index_max_32u, test_params_index))
112  QA(VOLK_INIT_TEST(volk_32f_index_min_16u, test_params_index))
113  QA(VOLK_INIT_TEST(volk_32f_index_min_32u, test_params_index))
114  QA(VOLK_INIT_TEST(volk_32fc_32f_multiply_32fc, test_params))
115  QA(VOLK_INIT_TEST(volk_32fc_32f_add_32fc, test_params))
116 
117  volk_test_params_t test_params_log2(test_params.make_absolute(5e-6));
118  test_params_log2.add_float_edge_cases({ -1.f, 0.f, inf, 65536.f });
119  QA(VOLK_INIT_TEST(volk_32f_log2_32f, test_params_log2))
120 
121  QA(VOLK_INIT_TEST(volk_32f_expfast_32f, test_params_inacc_tenth))
122  QA(VOLK_INIT_TEST(volk_32f_sin_32f, test_params))
123  QA(VOLK_INIT_TEST(volk_32f_cos_32f, test_params))
124  QA(VOLK_INIT_TEST(volk_32f_sincos_32f_x2, test_params))
125  QA(VOLK_INIT_TEST(volk_32f_tan_32f, test_params_inacc))
126 
127  volk_test_params_t test_params_atan(test_params);
128  test_params_atan.add_float_edge_cases({ std::nanf(""),
129  std::numeric_limits<float>::infinity(),
130  -std::numeric_limits<float>::infinity(),
131  0.0f,
132  -0.0f,
133  1e10f,
134  -1e10f,
135  1.0f,
136  -1.0f });
137  QA(VOLK_INIT_TEST(volk_32f_atan_32f, test_params_atan))
138 
139  volk_test_params_t test_params_asin(test_params);
140  test_params_asin.set_tol(1e-5);
141  test_params_asin.add_float_edge_cases({ std::nanf(""),
142  1.0f,
143  -1.0f,
144  0.0f,
145  -0.0f,
146  0.5f,
147  -0.5f,
148  0.99f,
149  -0.99f,
150  0.707107f,
151  -0.707107f });
152  QA(VOLK_INIT_TEST(volk_32f_asin_32f, test_params_asin))
153  QA(VOLK_INIT_TEST(volk_32f_acos_32f, test_params_asin))
154  QA(VOLK_INIT_TEST(volk_32fc_s32f_power_32fc, test_params_power))
155  QA(VOLK_INIT_TEST(volk_32f_s32f_calc_spectral_noise_floor_32f, test_params_snf))
156 
157  volk_test_params_t test_params_atan2(test_params);
158  test_params_atan2.add_complex_edge_cases(
159  { lv_cmake(0.0f, 0.0f), // atan2(0, 0) = 0
160  lv_cmake(0.0f, -0.0f), // atan2(-0, 0) = -0 (preserve sign)
161  lv_cmake(0.0f, 1.0f), // atan2(1, 0) = π/2
162  lv_cmake(0.0f, -1.0f), // atan2(-1, 0) = -π/2
163  lv_cmake(1.0f, 0.0f), // atan2(0, 1) = 0
164  lv_cmake(-1.0f, 0.0f), // atan2(0, -1) = π
165  lv_cmake(1.0f, 1.0f), // atan2(1, 1) = π/4
166  lv_cmake(-1.0f, 1.0f), // atan2(1, -1) = 3π/4
167  lv_cmake(-1.0f, -1.0f), // atan2(-1, -1) = -3π/4
168  lv_cmake(1.0f, -1.0f), // atan2(-1, 1) = -π/4
169  lv_cmake(inf, inf), // atan2(inf, inf) = π/4
170  lv_cmake(inf, -inf), // atan2(-inf, inf) = -π/4
171  lv_cmake(-inf, inf), // atan2(inf, -inf) = 3π/4
172  lv_cmake(-inf, -inf), // atan2(-inf, -inf) = -3π/4
173  lv_cmake(inf, 0.0f), // atan2(0, inf) = 0
174  lv_cmake(-inf, 0.0f), // atan2(0, -inf) = π
175  lv_cmake(1.0f, inf), // atan2(inf, 1) = π/2
176  lv_cmake(1.0f, -inf), // atan2(-inf, 1) = -π/2
177  lv_cmake(nan, 1.0f), // atan2(1, nan) = nan (propagate)
178  lv_cmake(1.0f, nan) }); // atan2(nan, 1) = nan (propagate)
179  QA(VOLK_INIT_TEST(volk_32fc_s32f_atan2_32f, test_params_atan2))
180  QA(VOLK_INIT_TEST(volk_32fc_x2_conjugate_dot_prod_32fc,
181  test_params.make_absolute(2e-2)))
182  QA(VOLK_INIT_TEST(volk_32fc_deinterleave_32f_x2, test_params))
183  QA(VOLK_INIT_TEST(volk_32fc_accumulator_s32fc, test_params.make_absolute(3e-2)))
184  QA(VOLK_INIT_TEST(volk_32fc_deinterleave_64f_x2, test_params))
185  QA(VOLK_INIT_TEST(volk_32fc_s32f_deinterleave_real_16i, test_params.make_tol(1)))
186  QA(VOLK_INIT_TEST(volk_32fc_deinterleave_imag_32f, test_params))
187  QA(VOLK_INIT_TEST(volk_32fc_deinterleave_real_32f, test_params))
188  QA(VOLK_INIT_TEST(volk_32fc_deinterleave_real_64f, test_params))
189  QA(VOLK_INIT_TEST(volk_32fc_x2_dot_prod_32fc, test_params.make_absolute(2e-2)))
190  QA(VOLK_INIT_TEST(volk_32fc_32f_dot_prod_32fc, test_params.make_absolute(1e-2)))
191 
192  // Complex index kernels: same magnitude values to test tie-breaking
193  volk_test_params_t test_params_index_fc(test_params.make_tol(0));
194  test_params_index_fc.add_complex_edge_cases({
195  lv_cmake(1.0f, 0.0f),
196  lv_cmake(1.0f, 0.0f), // 2 same magnitude
197  lv_cmake(0.0f, 1.0f),
198  lv_cmake(0.0f, 1.0f), // 4 (all |z|=1)
199  lv_cmake(1.0f, 0.0f),
200  lv_cmake(1.0f, 0.0f), // 6
201  lv_cmake(0.0f, 1.0f),
202  lv_cmake(0.0f, 1.0f), // 8 (covers AVX 8-wide)
203  });
204  QA(VOLK_INIT_TEST(volk_32fc_index_max_16u, test_params_index_fc))
205  QA(VOLK_INIT_TEST(volk_32fc_index_max_32u, test_params_index_fc))
206  QA(VOLK_INIT_TEST(volk_32fc_index_min_16u, test_params_index_fc))
207  QA(VOLK_INIT_TEST(volk_32fc_index_min_32u, test_params_index_fc))
208  QA(VOLK_INIT_TEST(volk_32fc_s32f_magnitude_16i, test_params.make_tol(1)))
209  QA(VOLK_INIT_TEST(volk_32fc_magnitude_32f, test_params_inacc_tenth))
210  QA(VOLK_INIT_TEST(volk_32fc_magnitude_squared_32f, test_params))
211  QA(VOLK_INIT_TEST(volk_32fc_x2_add_32fc, test_params))
212  QA(VOLK_INIT_TEST(volk_32fc_x2_multiply_32fc, test_params))
213  QA(VOLK_INIT_TEST(volk_32fc_x2_multiply_conjugate_32fc, test_params))
214  QA(VOLK_INIT_TEST(volk_32fc_x2_divide_32fc, test_params))
215  QA(VOLK_INIT_TEST(volk_32fc_conjugate_32fc, test_params))
216  QA(VOLK_INIT_TEST(volk_32f_s32f_convert_16i, test_params.make_tol(1)))
217  QA(VOLK_INIT_TEST(volk_32f_s32f_convert_32i, test_params.make_tol(1)))
218  QA(VOLK_INIT_TEST(volk_32f_convert_64f, test_params))
219  QA(VOLK_INIT_TEST(volk_32f_s32f_convert_8i, test_params.make_tol(1)))
220  QA(VOLK_INIT_TEST(volk_32fc_convert_16ic, test_params.make_tol(1)))
221  QA(VOLK_INIT_TEST(volk_32fc_s32f_power_spectrum_32f, test_params.make_tol(2e-6)))
222  QA(VOLK_INIT_TEST(volk_32fc_x2_square_dist_32f, test_params))
223  QA(VOLK_INIT_TEST(volk_32fc_x2_s32f_square_dist_scalar_mult_32f, test_params))
224  QA(VOLK_INIT_TEST(volk_32f_x2_divide_32f, test_params))
225  QA(VOLK_INIT_TEST(volk_32f_x2_dot_prod_32f, test_params.make_absolute(1.5e-2)))
226  QA(VOLK_INIT_TEST(volk_32f_x2_s32f_interleave_16ic, test_params.make_tol(1)))
227  QA(VOLK_INIT_TEST(volk_32f_x2_interleave_32fc, test_params))
228  QA(VOLK_INIT_TEST(volk_32f_x2_max_32f, test_params))
229  QA(VOLK_INIT_TEST(volk_32f_x2_min_32f, test_params))
230  QA(VOLK_INIT_TEST(volk_32f_x2_multiply_32f, test_params))
231  QA(VOLK_INIT_TEST(volk_32f_64f_multiply_64f, test_params))
232  QA(VOLK_INIT_TEST(volk_32f_64f_add_64f, test_params))
233  QA(VOLK_INIT_TEST(volk_32f_s32f_normalize, test_params))
234  QA(VOLK_INIT_TEST(volk_32f_s32f_power_32f, test_params))
235  QA(VOLK_INIT_TEST(volk_32f_reciprocal_32f, test_params.make_tol(6.15e-5)))
236  QA(VOLK_INIT_TEST(volk_32f_sqrt_32f, test_params_inacc))
237 
238  volk_test_params_t test_params_invsqrt(test_params.make_tol(1e-6));
239  test_params_invsqrt.add_float_edge_cases(
240  { -1.f, 1.f, 0.f, inf, 1e-2f, 1e2f, 1e-10, 1e10 });
241  QA(VOLK_INIT_TEST(volk_32f_invsqrt_32f, test_params_invsqrt))
242  QA(VOLK_INIT_TEST(volk_32f_s32f_stddev_32f, test_params_inacc))
243  QA(VOLK_INIT_TEST(volk_32f_stddev_and_mean_32f_x2, test_params.make_absolute(1e-5)))
244  QA(VOLK_INIT_TEST(volk_32f_x2_subtract_32f, test_params))
245  QA(VOLK_INIT_TEST(volk_32f_x3_sum_of_poly_32f, test_params.make_absolute(1e+3)))
246  QA(VOLK_INIT_TEST(volk_32i_x2_and_32i, test_params))
247  QA(VOLK_INIT_TEST(volk_32i_s32f_convert_32f, test_params))
248  QA(VOLK_INIT_TEST(volk_32i_x2_or_32i, test_params))
249  QA(VOLK_INIT_TEST(volk_32f_x2_dot_prod_16i, test_params.make_tol(1)))
250  QA(VOLK_INIT_TEST(volk_64f_convert_32f, test_params))
251  QA(VOLK_INIT_TEST(volk_64f_x2_max_64f, test_params))
252  QA(VOLK_INIT_TEST(volk_64f_x2_min_64f, test_params))
253  QA(VOLK_INIT_TEST(volk_64f_x2_multiply_64f, test_params))
254  QA(VOLK_INIT_TEST(volk_64f_x2_add_64f, test_params))
255  QA(VOLK_INIT_TEST(volk_64f_x2_dot_prod_64f, test_params))
256  QA(VOLK_INIT_TEST(volk_8ic_deinterleave_16i_x2, test_params))
257  QA(VOLK_INIT_TEST(volk_8ic_s32f_deinterleave_32f_x2, test_params))
258  QA(VOLK_INIT_TEST(volk_8ic_deinterleave_real_16i, test_params))
259  QA(VOLK_INIT_TEST(volk_8ic_s32f_deinterleave_real_32f, test_params))
260  QA(VOLK_INIT_TEST(volk_8ic_deinterleave_real_8i, test_params))
261  QA(VOLK_INIT_TEST(volk_8ic_x2_multiply_conjugate_16ic, test_params))
262  QA(VOLK_INIT_TEST(volk_8ic_x2_s32f_multiply_conjugate_32fc, test_params))
263  QA(VOLK_INIT_TEST(volk_8i_convert_16i, test_params))
264  QA(VOLK_INIT_TEST(volk_8i_s32f_convert_32f, test_params))
265  QA(VOLK_INIT_TEST(volk_8i_x2_add_saturated_8i, test_params))
266  QA(VOLK_INIT_TEST(volk_8u_x2_add_saturated_8u, test_params))
267  QA(VOLK_INIT_TEST(volk_16i_x2_add_saturated_16i, test_params))
268  QA(VOLK_INIT_TEST(volk_16u_x2_add_saturated_16u, test_params))
269  QA(VOLK_INIT_TEST(volk_32fc_s32fc_multiply2_32fc, test_params))
270  QA(VOLK_INIT_TEST(volk_32f_s32f_multiply_32f, test_params))
271  QA(VOLK_INIT_TEST(volk_32f_s32f_add_32f, test_params))
272  QA(VOLK_INIT_TEST(volk_32f_binary_slicer_32i, test_params))
273  QA(VOLK_INIT_TEST(volk_32f_binary_slicer_8i, test_params))
274  QA(VOLK_INIT_TEST(volk_32u_reverse_32u, test_params))
275  QA(VOLK_INIT_TEST(volk_32f_tanh_32f, test_params_inacc))
276  QA(VOLK_INIT_TEST(volk_32fc_x2_s32fc_multiply_conjugate_add2_32fc, test_params))
277  QA(VOLK_INIT_TEST(volk_32f_exp_32f, test_params))
278  QA(VOLK_INIT_PUPP(volk_32f_x2_powpuppet_32f, volk_32f_x2_pow_32f, test_params_inacc))
280  volk_32f_s32f_mod_rangepuppet_32f, volk_32f_s32f_s32f_mod_range_32f, test_params))
282  volk_8u_x3_encodepolarpuppet_8u, volk_8u_x3_encodepolar_8u_x2, test_params))
283  QA(VOLK_INIT_PUPP(volk_32f_8u_polarbutterflypuppet_32f,
284  volk_32f_8u_polarbutterfly_32f,
285  test_params))
286  QA(VOLK_INIT_PUPP(volk_32fc_s32f_power_spectral_densitypuppet_32f,
287  volk_32fc_s32f_x2_power_spectral_density_32f,
288  test_params))
290  volk_32f_s32f_clamppuppet_32f, volk_32f_s32f_x2_clamp_32f, test_params_clamp))
291  QA(VOLK_INIT_PUPP(volk_32f_s32f_convertpuppet_8u,
292  volk_32f_s32f_x2_convert_8u,
293  test_params.make_tol(1)))
294  // no one uses these, so don't test them
295  // VOLK_PROFILE(volk_16i_x5_add_quad_16i_x4, 1e-4, 2046, 10000, &results,
296  // benchmark_mode, kernel_regex); VOLK_PROFILE(volk_16i_branch_4_state_8, 1e-4, 2046,
297  // 10000, &results, benchmark_mode, kernel_regex); VOLK_PROFILE(volk_16i_max_star_16i,
298  // 0, 0, 204602, 10000, &results, benchmark_mode, kernel_regex);
299  // VOLK_PROFILE(volk_16i_max_star_horizontal_16i, 0, 0, 204602, 10000, &results,
300  // benchmark_mode, kernel_regex); VOLK_PROFILE(volk_16i_permute_and_scalar_add, 1e-4,
301  // 0, 2046, 10000, &results, benchmark_mode, kernel_regex);
302  // VOLK_PROFILE(volk_16i_x4_quad_max_star_16i, 1e-4, 0, 2046, 10000, &results,
303  // benchmark_mode, kernel_regex);
304 
305  return test_cases;
306 }
test_params
volk_test_params_t test_params(1e-6f, 327.f, 131071, 1987, false, "")
volk_test_params_t
Definition: qa_utils.h:63
volk_test_params_t::add_float_edge_cases
void add_float_edge_cases(const std::vector< float > &edge_cases)
Definition: qa_utils.h:97
volk_test_params_t::make_tol
volk_test_params_t make_tol(float tol)
Definition: qa_utils.h:125
volk_test_params_t::make_absolute
volk_test_params_t make_absolute(float tol)
Definition: qa_utils.h:118
volk_test_params_t::set_scalar
void set_scalar(lv_32fc_t scalar)
Definition: qa_utils.h:92
volk_test_params_t::set_tol
void set_tol(float tol)
Definition: qa_utils.h:91
lv_cmake
#define lv_cmake(r, i)
Definition: volk_complex.h:77
VOLK_INIT_PUPP
#define VOLK_INIT_PUPP(func, puppet_master_func, test_params)
Definition: kernel_tests.h:23
init_test_list
std::vector< volk_test_case_t > init_test_list(volk_test_params_t test_params)
Definition: kernel_tests.h:37
volk_test_params_t::add_complex_edge_cases
void add_complex_edge_cases(const std::vector< lv_32fc_t > &edge_cases)
Definition: qa_utils.h:101
VOLK_INIT_TEST
#define VOLK_INIT_TEST(func, test_params)
Definition: kernel_tests.h:30
QA
#define QA(test)
Definition: kernel_tests.h:36
qa_utils.h