47 #ifndef INCLUDED_volk_64u_popcnt_a_H
48 #define INCLUDED_volk_64u_popcnt_a_H
54 #ifdef LV_HAVE_GENERIC
63 uint32_t retVal = (uint32_t)(value & 0x00000000FFFFFFFFull);
65 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
66 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
67 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
68 retVal = (retVal + (retVal >> 8));
69 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
70 uint64_t retVal64 = retVal;
73 retVal = (uint32_t)((value & 0xFFFFFFFF00000000ull) >> 32);
74 retVal = (retVal & 0x55555555) + (retVal >> 1 & 0x55555555);
75 retVal = (retVal & 0x33333333) + (retVal >> 2 & 0x33333333);
76 retVal = (retVal + (retVal >> 4)) & 0x0F0F0F0F;
77 retVal = (retVal + (retVal >> 8));
78 retVal = (retVal + (retVal >> 16)) & 0x0000003F;
87 #if LV_HAVE_SSE4_2 && LV_HAVE_64
89 #include <nmmintrin.h>
91 static inline void volk_64u_popcnt_a_sse4_2(uint64_t* ret,
const uint64_t value)
93 *ret = _mm_popcnt_u64(value);
100 #include <arm_neon.h>
103 uint8x8_t input_val, count8x8_val;
104 uint16x4_t count16x4_val;
105 uint32x2_t count32x2_val;
106 uint64x1_t count64x1_val;
108 input_val = vld1_u8((
unsigned char*)&value);
109 count8x8_val = vcnt_u8(input_val);
110 count16x4_val = vpaddl_u8(count8x8_val);
111 count32x2_val = vpaddl_u16(count16x4_val);
112 count64x1_val = vpaddl_u32(count32x2_val);
113 vst1_u64(ret, count64x1_val);
119 #ifdef LV_HAVE_NEONV8
120 #include <arm_neon.h>
122 static inline void volk_64u_popcnt_neonv8(uint64_t* ret,
const uint64_t value)
125 uint8x8_t input_val = vreinterpret_u8_u64(vcreate_u64(value));
126 uint8x8_t count8x8_val = vcnt_u8(input_val);
127 *ret = vaddlv_u8(count8x8_val);
132 #include <riscv_vector.h>
134 static inline void volk_64u_popcnt_rvv(uint64_t* ret,
const uint64_t value)
136 *ret = __riscv_vcpop(__riscv_vreinterpret_b2(__riscv_vmv_s_x_u64m1(value, 1)), 64);
140 #ifdef LV_HAVE_RVA22V
141 #include <riscv_bitmanip.h>
143 static inline void volk_64u_popcnt_rva22(uint64_t* ret,
const uint64_t value)
145 *ret = __riscv_cpop_64(value);