15#ifndef INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_
16#define INCLUDE_VOLK_VOLK_RVV_INTRINSICS_H_
17#include <riscv_vector.h>
19#define RISCV_SHRINK2(op, T, S, v) \
20 __riscv_##op(__riscv_vget_##T##S##m1(v, 0), \
21 __riscv_vget_##T##S##m1(v, 1), \
22 __riscv_vsetvlmax_e##S##m1())
24#define RISCV_SHRINK4(op, T, S, v) \
25 __riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 0), \
26 __riscv_vget_##T##S##m1(v, 1), \
27 __riscv_vsetvlmax_e##S##m1()), \
28 __riscv_##op(__riscv_vget_##T##S##m1(v, 2), \
29 __riscv_vget_##T##S##m1(v, 3), \
30 __riscv_vsetvlmax_e##S##m1()), \
31 __riscv_vsetvlmax_e##S##m1())
33#define RISCV_SHRINK8(op, T, S, v) \
34 __riscv_##op(__riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 0), \
35 __riscv_vget_##T##S##m1(v, 1), \
36 __riscv_vsetvlmax_e##S##m1()), \
37 __riscv_##op(__riscv_vget_##T##S##m1(v, 2), \
38 __riscv_vget_##T##S##m1(v, 3), \
39 __riscv_vsetvlmax_e##S##m1()), \
40 __riscv_vsetvlmax_e##S##m1()), \
41 __riscv_##op(__riscv_##op(__riscv_vget_##T##S##m1(v, 4), \
42 __riscv_vget_##T##S##m1(v, 5), \
43 __riscv_vsetvlmax_e##S##m1()), \
44 __riscv_##op(__riscv_vget_##T##S##m1(v, 6), \
45 __riscv_vget_##T##S##m1(v, 7), \
46 __riscv_vsetvlmax_e##S##m1()), \
47 __riscv_vsetvlmax_e##S##m1()), \
48 __riscv_vsetvlmax_e##S##m1())
50#define RISCV_PERM4(f, v, vidx) \
51 __riscv_vcreate_v_u8m1_u8m4( \
52 f(__riscv_vget_u8m1(v, 0), vidx, __riscv_vsetvlmax_e8m1()), \
53 f(__riscv_vget_u8m1(v, 1), vidx, __riscv_vsetvlmax_e8m1()), \
54 f(__riscv_vget_u8m1(v, 2), vidx, __riscv_vsetvlmax_e8m1()), \
55 f(__riscv_vget_u8m1(v, 3), vidx, __riscv_vsetvlmax_e8m1()))
57#define RISCV_LUT4(f, vtbl, v) \
58 __riscv_vcreate_v_u8m1_u8m4( \
59 f(vtbl, __riscv_vget_u8m1(v, 0), __riscv_vsetvlmax_e8m1()), \
60 f(vtbl, __riscv_vget_u8m1(v, 1), __riscv_vsetvlmax_e8m1()), \
61 f(vtbl, __riscv_vget_u8m1(v, 2), __riscv_vsetvlmax_e8m1()), \
62 f(vtbl, __riscv_vget_u8m1(v, 3), __riscv_vsetvlmax_e8m1()))
64#define RISCV_PERM8(f, v, vidx) \
65 __riscv_vcreate_v_u8m1_u8m8( \
66 f(__riscv_vget_u8m1(v, 0), vidx, __riscv_vsetvlmax_e8m1()), \
67 f(__riscv_vget_u8m1(v, 1), vidx, __riscv_vsetvlmax_e8m1()), \
68 f(__riscv_vget_u8m1(v, 2), vidx, __riscv_vsetvlmax_e8m1()), \
69 f(__riscv_vget_u8m1(v, 3), vidx, __riscv_vsetvlmax_e8m1()), \
70 f(__riscv_vget_u8m1(v, 4), vidx, __riscv_vsetvlmax_e8m1()), \
71 f(__riscv_vget_u8m1(v, 5), vidx, __riscv_vsetvlmax_e8m1()), \
72 f(__riscv_vget_u8m1(v, 6), vidx, __riscv_vsetvlmax_e8m1()), \
73 f(__riscv_vget_u8m1(v, 7), vidx, __riscv_vsetvlmax_e8m1()))
75#define RISCV_VMFLTZ(T, v, vl) __riscv_vmslt(__riscv_vreinterpret_i##T(v), 0, vl)
90static inline vfloat32m2_t
93 const vfloat32m2_t c0 = __riscv_vfmv_v_f_f32m2(+0x1.a8a726p+1f, vlmax);
94 const vfloat32m2_t c1 = __riscv_vfmv_v_f_f32m2(-0x1.0b7f7ep+2f, vlmax);
95 const vfloat32m2_t c2 = __riscv_vfmv_v_f_f32m2(+0x1.05d9ccp+2f, vlmax);
96 const vfloat32m2_t c3 = __riscv_vfmv_v_f_f32m2(-0x1.4d476cp+1f, vlmax);
97 const vfloat32m2_t c4 = __riscv_vfmv_v_f_f32m2(+0x1.04fc3ap+0f, vlmax);
98 const vfloat32m2_t c5 = __riscv_vfmv_v_f_f32m2(-0x1.c97982p-3f, vlmax);
99 const vfloat32m2_t c6 = __riscv_vfmv_v_f_f32m2(+0x1.57aa42p-6f, vlmax);
102 vfloat32m2_t poly = c6;
103 poly = __riscv_vfmadd(poly, x, c5, vl);
104 poly = __riscv_vfmadd(poly, x, c4, vl);
105 poly = __riscv_vfmadd(poly, x, c3, vl);
106 poly = __riscv_vfmadd(poly, x, c2, vl);
107 poly = __riscv_vfmadd(poly, x, c1, vl);
108 poly = __riscv_vfmadd(poly, x, c0, vl);
static vfloat32m2_t __riscv_vlog2_poly_f32m2(vfloat32m2_t x, size_t vl, size_t vlmax)
Definition volk_rvv_intrinsics.h:91