86#ifndef INCLUDED_volk_32f_exp_32f_a_H
87#define INCLUDED_volk_32f_exp_32f_a_H
95 float* bPtr = bVector;
96 const float* aPtr = aVector;
98 unsigned int number = 0;
99 unsigned int quarterPoints = num_points / 4;
102 __m128 aVal, bVal, tmp, fx, mask, pow2n, z, y;
103 __m128 one, exp_hi, exp_lo, log2EF, half, exp_C1, exp_C2;
104 __m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5;
123 for (; number < quarterPoints; number++) {
159 number = quarterPoints * 4;
160 for (; number < num_points; number++) {
161 *bPtr++ = expf(*aPtr++);
170#ifndef INCLUDED_volk_32f_exp_32f_u_H
171#define INCLUDED_volk_32f_exp_32f_u_H
174#include <emmintrin.h>
179 float* bPtr = bVector;
180 const float* aPtr = aVector;
182 unsigned int number = 0;
183 unsigned int quarterPoints = num_points / 4;
186 __m128 aVal, bVal, tmp, fx, mask, pow2n, z, y;
187 __m128 one, exp_hi, exp_lo, log2EF, half, exp_C1, exp_C2;
188 __m128 exp_p0, exp_p1, exp_p2, exp_p3, exp_p4, exp_p5;
208 for (; number < quarterPoints; number++) {
244 number = quarterPoints * 4;
245 for (; number < num_points; number++) {
246 *bPtr++ = expf(*aPtr++);
253#ifdef LV_HAVE_GENERIC
258 float* bPtr = bVector;
259 const float* aPtr = aVector;
260 unsigned int number = 0;
262 for (number = 0; number < num_points; number++) {
263 *bPtr++ = expf(*aPtr++);
FORCE_INLINE __m128i _mm_slli_epi32(__m128i a, int imm)
Definition: sse2neon.h:5213
FORCE_INLINE __m128 _mm_sub_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2788
float32x4_t __m128
Definition: sse2neon.h:366
FORCE_INLINE __m128i _mm_add_epi32(__m128i a, __m128i b)
Definition: sse2neon.h:2920
FORCE_INLINE __m128i _mm_set1_epi32(int)
Definition: sse2neon.h:4927
FORCE_INLINE void _mm_storeu_ps(float *p, __m128 a)
Definition: sse2neon.h:2745
FORCE_INLINE __m128 _mm_mul_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2152
FORCE_INLINE __m128i _mm_cvttps_epi32(__m128 a)
Definition: sse2neon.h:4139
FORCE_INLINE __m128 _mm_set1_ps(float _w)
Definition: sse2neon.h:2467
FORCE_INLINE __m128 _mm_cmpgt_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1244
FORCE_INLINE __m128 _mm_loadu_ps(const float *p)
Definition: sse2neon.h:1910
FORCE_INLINE __m128 _mm_setzero_ps(void)
Definition: sse2neon.h:2500
FORCE_INLINE __m128 _mm_and_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1171
FORCE_INLINE __m128 _mm_castsi128_ps(__m128i a)
Definition: sse2neon.h:3135
FORCE_INLINE __m128 _mm_add_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1150
FORCE_INLINE __m128 _mm_load_ps(const float *p)
Definition: sse2neon.h:1843
int64x2_t __m128i
Definition: sse2neon.h:375
FORCE_INLINE void _mm_store_ps(float *p, __m128 a)
Definition: sse2neon.h:2685
FORCE_INLINE __m128 _mm_min_ps(__m128 a, __m128 b)
Definition: sse2neon.h:2033
FORCE_INLINE __m128 _mm_cvtepi32_ps(__m128i a)
Definition: sse2neon.h:3816
FORCE_INLINE __m128 _mm_max_ps(__m128 a, __m128 b)
Definition: sse2neon.h:1985
static void volk_32f_exp_32f_a_sse2(float *bVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_exp_32f.h:93
static void volk_32f_exp_32f_generic(float *bVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_exp_32f.h:256
static void volk_32f_exp_32f_u_sse2(float *bVector, const float *aVector, unsigned int num_points)
Definition: volk_32f_exp_32f.h:177