Vector Optimized Library of Kernels 3.1.1
Architecture-tuned implementations of math kernels
 
Loading...
Searching...
No Matches
common.h
Go to the documentation of this file.
1#ifndef SSE2NEONCOMMON_H
2#define SSE2NEONCOMMON_H
3#include <cstdint>
4#if (defined(__aarch64__) || defined(_M_ARM64)) || defined(__arm__)
5#include "sse2neon.h"
6#elif defined(__x86_64__) || defined(__i386__)
7#include <emmintrin.h>
8#include <smmintrin.h>
9#include <tmmintrin.h>
10#include <wmmintrin.h>
11#include <x86intrin.h>
12#include <xmmintrin.h>
13
14// __int64 is defined in the Intrinsics Guide which maps to different datatype
15// in different data model
16#if !(defined(_WIN32) || defined(_WIN64) || defined(__int64))
17#if (defined(__x86_64__) || defined(__i386__))
18#define __int64 long long
19#else
20#define __int64 int64_t
21#endif
22#endif
23
24#if defined(__GNUC__) || defined(__clang__)
25#pragma push_macro("ALIGN_STRUCT")
26#define ALIGN_STRUCT(x) __attribute__((aligned(x)))
27#else
28#define ALIGN_STRUCT(x) __declspec(align(x))
29#endif
30
31typedef union ALIGN_STRUCT(16) SIMDVec {
32 float m128_f32[4]; // as floats - DON'T USE. Added for convenience.
33 int8_t m128_i8[16]; // as signed 8-bit integers.
34 int16_t m128_i16[8]; // as signed 16-bit integers.
35 int32_t m128_i32[4]; // as signed 32-bit integers.
36 int64_t m128_i64[2]; // as signed 64-bit integers.
37 uint8_t m128_u8[16]; // as unsigned 8-bit integers.
38 uint16_t m128_u16[8]; // as unsigned 16-bit integers.
39 uint32_t m128_u32[4]; // as unsigned 32-bit integers.
40 uint64_t m128_u64[2]; // as unsigned 64-bit integers.
41} SIMDVec;
42
43#if defined(__GNUC__) || defined(__clang__)
44#pragma pop_macro("ALIGN_STRUCT")
45#endif
46
47/* Tunable testing configuration for precise testing */
48/* _mm_min|max_ps|ss|pd|sd */
49#ifndef SSE2NEON_PRECISE_MINMAX
50#define SSE2NEON_PRECISE_MINMAX (0)
51#endif
52#endif
53
54#define ASSERT_RETURN(x) \
55 if (!(x)) \
56 return TEST_FAIL;
57
58namespace SSE2NEON
59{
65extern int32_t NaN;
66extern int64_t NaN64;
67#define ALL_BIT_1_32 (*(float *) &NaN)
68#define ALL_BIT_1_64 (*(double *) &NaN64)
69
70template <typename T>
72{
73 const int32_t *t1 = (const int32_t *) &a;
74 const int32_t *t2 = (const int32_t *) &b;
75
76 ASSERT_RETURN(t1[0] == t2[0]);
77 ASSERT_RETURN(t1[1] == t2[1]);
78 ASSERT_RETURN(t1[2] == t2[2]);
79 ASSERT_RETURN(t1[3] == t2[3]);
80 return TEST_SUCCESS;
81}
82result_t validateInt64(__m128i a, int64_t i0, int64_t i1);
83result_t validateInt64(__m64 a, int64_t i0);
84result_t validateUInt64(__m128i a, uint64_t u0, uint64_t u1);
85result_t validateUInt64(__m64 a, uint64_t u0);
87 int32_t i0,
88 int32_t i1,
89 int32_t i2,
90 int32_t i3);
92 uint32_t u0,
93 uint32_t u1,
94 uint32_t u2,
95 uint32_t u3);
96result_t validateUInt32(__m64 a, uint32_t u0, uint32_t u1);
97result_t validateInt32(__m64 a, int32_t u0, int32_t u1);
99 int16_t i0,
100 int16_t i1,
101 int16_t i2,
102 int16_t i3,
103 int16_t i4,
104 int16_t i5,
105 int16_t i6,
106 int16_t i7);
107result_t validateInt16(__m64 a, int16_t i0, int16_t i1, int16_t i2, int16_t i3);
109 uint16_t u0,
110 uint16_t u1,
111 uint16_t u2,
112 uint16_t u3,
113 uint16_t u4,
114 uint16_t u5,
115 uint16_t u6,
116 uint16_t u7);
118 uint16_t u0,
119 uint16_t u1,
120 uint16_t u2,
121 uint16_t u3);
123 int8_t i0,
124 int8_t i1,
125 int8_t i2,
126 int8_t i3,
127 int8_t i4,
128 int8_t i5,
129 int8_t i6,
130 int8_t i7,
131 int8_t i8,
132 int8_t i9,
133 int8_t i10,
134 int8_t i11,
135 int8_t i12,
136 int8_t i13,
137 int8_t i14,
138 int8_t i15);
140 int8_t i0,
141 int8_t i1,
142 int8_t i2,
143 int8_t i3,
144 int8_t i4,
145 int8_t i5,
146 int8_t i6,
147 int8_t i7);
149 uint8_t u0,
150 uint8_t u1,
151 uint8_t u2,
152 uint8_t u3,
153 uint8_t u4,
154 uint8_t u5,
155 uint8_t u6,
156 uint8_t u7,
157 uint8_t u8,
158 uint8_t u9,
159 uint8_t u10,
160 uint8_t u11,
161 uint8_t u12,
162 uint8_t u13,
163 uint8_t u14,
164 uint8_t u15);
166 uint8_t u0,
167 uint8_t u1,
168 uint8_t u2,
169 uint8_t u3,
170 uint8_t u4,
171 uint8_t u5,
172 uint8_t u6,
173 uint8_t u7);
174result_t validateSingleFloatPair(float a, float b);
175result_t validateSingleDoublePair(double a, double b);
176result_t validateFloat(__m128 a, float f0, float f1, float f2, float f3);
178 float f0,
179 float f1,
180 float f2,
181 float f3,
182 float epsilon);
184 float f0,
185 float f1,
186 float f2,
187 float f3,
188 float err);
189result_t validateDouble(__m128d a, double d0, double d1);
190result_t validateFloatError(__m128d a, double d0, double d1, double err);
191
192#define VALIDATE_INT8_M128(A, B) \
193 validateInt8(A, B[0], B[1], B[2], B[3], B[4], B[5], B[6], B[7], B[8], \
194 B[9], B[10], B[11], B[12], B[13], B[14], B[15])
195#define VALIDATE_UINT8_M128(A, B) \
196 validateUInt8(A, B[0], B[1], B[2], B[3], B[4], B[5], B[6], B[7], B[8], \
197 B[9], B[10], B[11], B[12], B[13], B[14], B[15])
198#define VALIDATE_INT16_M128(A, B) \
199 validateInt16(A, B[0], B[1], B[2], B[3], B[4], B[5], B[6], B[7])
200#define VALIDATE_UINT16_M128(A, B) \
201 validateUInt16(A, B[0], B[1], B[2], B[3], B[4], B[5], B[6], B[7])
202#define VALIDATE_INT32_M128(A, B) validateInt32(A, B[0], B[1], B[2], B[3])
203#define VALIDATE_UINT32_M128(A, B) validateUInt32(A, B[0], B[1], B[2], B[3])
204
205#define VALIDATE_INT8_M64(A, B) \
206 validateInt8(A, B[0], B[1], B[2], B[3], B[4], B[5], B[6], B[7])
207#define VALIDATE_UINT8_M64(A, B) \
208 validateUInt8(A, B[0], B[1], B[2], B[3], B[4], B[5], B[6], B[7])
209#define VALIDATE_INT16_M64(A, B) validateInt16(A, B[0], B[1], B[2], B[3])
210#define VALIDATE_UINT16_M64(A, B) validateUInt16(A, B[0], B[1], B[2], B[3])
211#define VALIDATE_INT32_M64(A, B) validateInt32(A, B[0], B[1])
212#define VALIDATE_UINT32_M64(A, B) validateUInt32(A, B[0], B[1])
213#define CHECK_RESULT(EXP) \
214 if (EXP != TEST_SUCCESS) { \
215 return TEST_FAIL; \
216 }
217#define IMM_2_ITER \
218 TEST_IMPL(0) \
219 TEST_IMPL(1)
220#define IMM_4_ITER \
221 IMM_2_ITER \
222 TEST_IMPL(2) \
223 TEST_IMPL(3)
224#define IMM_8_ITER \
225 IMM_4_ITER \
226 TEST_IMPL(4) \
227 TEST_IMPL(5) \
228 TEST_IMPL(6) \
229 TEST_IMPL(7)
230#define IMM_16_ITER \
231 IMM_8_ITER \
232 TEST_IMPL(8) \
233 TEST_IMPL(9) \
234 TEST_IMPL(10) \
235 TEST_IMPL(11) \
236 TEST_IMPL(12) \
237 TEST_IMPL(13) \
238 TEST_IMPL(14) \
239 TEST_IMPL(15)
240#define IMM_32_ITER \
241 IMM_16_ITER \
242 TEST_IMPL(16) \
243 TEST_IMPL(17) \
244 TEST_IMPL(18) \
245 TEST_IMPL(19) \
246 TEST_IMPL(20) \
247 TEST_IMPL(21) \
248 TEST_IMPL(22) \
249 TEST_IMPL(23) \
250 TEST_IMPL(24) \
251 TEST_IMPL(25) \
252 TEST_IMPL(26) \
253 TEST_IMPL(27) \
254 TEST_IMPL(28) \
255 TEST_IMPL(29) \
256 TEST_IMPL(30) \
257 TEST_IMPL(31)
258#define IMM_64_ITER \
259 IMM_32_ITER \
260 TEST_IMPL(32) \
261 TEST_IMPL(33) \
262 TEST_IMPL(34) \
263 TEST_IMPL(35) \
264 TEST_IMPL(36) \
265 TEST_IMPL(37) \
266 TEST_IMPL(38) \
267 TEST_IMPL(39) \
268 TEST_IMPL(40) \
269 TEST_IMPL(41) \
270 TEST_IMPL(42) \
271 TEST_IMPL(43) \
272 TEST_IMPL(44) \
273 TEST_IMPL(45) \
274 TEST_IMPL(46) \
275 TEST_IMPL(47) \
276 TEST_IMPL(48) \
277 TEST_IMPL(49) \
278 TEST_IMPL(50) \
279 TEST_IMPL(51) \
280 TEST_IMPL(52) \
281 TEST_IMPL(53) \
282 TEST_IMPL(54) \
283 TEST_IMPL(55) \
284 TEST_IMPL(56) \
285 TEST_IMPL(57) \
286 TEST_IMPL(58) \
287 TEST_IMPL(59) \
288 TEST_IMPL(60) \
289 TEST_IMPL(61) \
290 TEST_IMPL(62) \
291 TEST_IMPL(63)
292#define IMM_128_ITER \
293 IMM_64_ITER \
294 TEST_IMPL(64) \
295 TEST_IMPL(65) \
296 TEST_IMPL(66) \
297 TEST_IMPL(67) \
298 TEST_IMPL(68) \
299 TEST_IMPL(69) \
300 TEST_IMPL(70) \
301 TEST_IMPL(71) \
302 TEST_IMPL(72) \
303 TEST_IMPL(73) \
304 TEST_IMPL(74) \
305 TEST_IMPL(75) \
306 TEST_IMPL(76) \
307 TEST_IMPL(77) \
308 TEST_IMPL(78) \
309 TEST_IMPL(79) \
310 TEST_IMPL(80) \
311 TEST_IMPL(81) \
312 TEST_IMPL(82) \
313 TEST_IMPL(83) \
314 TEST_IMPL(84) \
315 TEST_IMPL(85) \
316 TEST_IMPL(86) \
317 TEST_IMPL(87) \
318 TEST_IMPL(88) \
319 TEST_IMPL(89) \
320 TEST_IMPL(90) \
321 TEST_IMPL(91) \
322 TEST_IMPL(92) \
323 TEST_IMPL(93) \
324 TEST_IMPL(94) \
325 TEST_IMPL(95) \
326 TEST_IMPL(96) \
327 TEST_IMPL(97) \
328 TEST_IMPL(98) \
329 TEST_IMPL(99) \
330 TEST_IMPL(100) \
331 TEST_IMPL(101) \
332 TEST_IMPL(102) \
333 TEST_IMPL(103) \
334 TEST_IMPL(104) \
335 TEST_IMPL(105) \
336 TEST_IMPL(106) \
337 TEST_IMPL(107) \
338 TEST_IMPL(108) \
339 TEST_IMPL(109) \
340 TEST_IMPL(110) \
341 TEST_IMPL(111) \
342 TEST_IMPL(112) \
343 TEST_IMPL(113) \
344 TEST_IMPL(114) \
345 TEST_IMPL(115) \
346 TEST_IMPL(116) \
347 TEST_IMPL(117) \
348 TEST_IMPL(118) \
349 TEST_IMPL(119) \
350 TEST_IMPL(120) \
351 TEST_IMPL(121) \
352 TEST_IMPL(122) \
353 TEST_IMPL(123) \
354 TEST_IMPL(124) \
355 TEST_IMPL(125) \
356 TEST_IMPL(126) \
357 TEST_IMPL(127)
358#define IMM_256_ITER \
359 IMM_128_ITER \
360 TEST_IMPL(128) \
361 TEST_IMPL(129) \
362 TEST_IMPL(130) \
363 TEST_IMPL(131) \
364 TEST_IMPL(132) \
365 TEST_IMPL(133) \
366 TEST_IMPL(134) \
367 TEST_IMPL(135) \
368 TEST_IMPL(136) \
369 TEST_IMPL(137) \
370 TEST_IMPL(138) \
371 TEST_IMPL(139) \
372 TEST_IMPL(140) \
373 TEST_IMPL(141) \
374 TEST_IMPL(142) \
375 TEST_IMPL(143) \
376 TEST_IMPL(144) \
377 TEST_IMPL(145) \
378 TEST_IMPL(146) \
379 TEST_IMPL(147) \
380 TEST_IMPL(148) \
381 TEST_IMPL(149) \
382 TEST_IMPL(150) \
383 TEST_IMPL(151) \
384 TEST_IMPL(152) \
385 TEST_IMPL(153) \
386 TEST_IMPL(154) \
387 TEST_IMPL(155) \
388 TEST_IMPL(156) \
389 TEST_IMPL(157) \
390 TEST_IMPL(158) \
391 TEST_IMPL(159) \
392 TEST_IMPL(160) \
393 TEST_IMPL(161) \
394 TEST_IMPL(162) \
395 TEST_IMPL(163) \
396 TEST_IMPL(164) \
397 TEST_IMPL(165) \
398 TEST_IMPL(166) \
399 TEST_IMPL(167) \
400 TEST_IMPL(168) \
401 TEST_IMPL(169) \
402 TEST_IMPL(170) \
403 TEST_IMPL(171) \
404 TEST_IMPL(172) \
405 TEST_IMPL(173) \
406 TEST_IMPL(174) \
407 TEST_IMPL(175) \
408 TEST_IMPL(176) \
409 TEST_IMPL(177) \
410 TEST_IMPL(178) \
411 TEST_IMPL(179) \
412 TEST_IMPL(180) \
413 TEST_IMPL(181) \
414 TEST_IMPL(182) \
415 TEST_IMPL(183) \
416 TEST_IMPL(184) \
417 TEST_IMPL(185) \
418 TEST_IMPL(186) \
419 TEST_IMPL(187) \
420 TEST_IMPL(188) \
421 TEST_IMPL(189) \
422 TEST_IMPL(190) \
423 TEST_IMPL(191) \
424 TEST_IMPL(192) \
425 TEST_IMPL(193) \
426 TEST_IMPL(194) \
427 TEST_IMPL(195) \
428 TEST_IMPL(196) \
429 TEST_IMPL(197) \
430 TEST_IMPL(198) \
431 TEST_IMPL(199) \
432 TEST_IMPL(200) \
433 TEST_IMPL(201) \
434 TEST_IMPL(202) \
435 TEST_IMPL(203) \
436 TEST_IMPL(204) \
437 TEST_IMPL(205) \
438 TEST_IMPL(206) \
439 TEST_IMPL(207) \
440 TEST_IMPL(208) \
441 TEST_IMPL(209) \
442 TEST_IMPL(210) \
443 TEST_IMPL(211) \
444 TEST_IMPL(212) \
445 TEST_IMPL(213) \
446 TEST_IMPL(214) \
447 TEST_IMPL(215) \
448 TEST_IMPL(216) \
449 TEST_IMPL(217) \
450 TEST_IMPL(218) \
451 TEST_IMPL(219) \
452 TEST_IMPL(220) \
453 TEST_IMPL(221) \
454 TEST_IMPL(222) \
455 TEST_IMPL(223) \
456 TEST_IMPL(224) \
457 TEST_IMPL(225) \
458 TEST_IMPL(226) \
459 TEST_IMPL(227) \
460 TEST_IMPL(228) \
461 TEST_IMPL(229) \
462 TEST_IMPL(230) \
463 TEST_IMPL(231) \
464 TEST_IMPL(232) \
465 TEST_IMPL(233) \
466 TEST_IMPL(234) \
467 TEST_IMPL(235) \
468 TEST_IMPL(236) \
469 TEST_IMPL(237) \
470 TEST_IMPL(238) \
471 TEST_IMPL(239) \
472 TEST_IMPL(240) \
473 TEST_IMPL(241) \
474 TEST_IMPL(242) \
475 TEST_IMPL(243) \
476 TEST_IMPL(244) \
477 TEST_IMPL(245) \
478 TEST_IMPL(246) \
479 TEST_IMPL(247) \
480 TEST_IMPL(248) \
481 TEST_IMPL(249) \
482 TEST_IMPL(250) \
483 TEST_IMPL(251) \
484 TEST_IMPL(252) \
485 TEST_IMPL(253) \
486 TEST_IMPL(254) \
487 TEST_IMPL(255)
488} // namespace SSE2NEON
489
490#endif
#define ASSERT_RETURN(x)
Definition common.h:54
Definition binding.cpp:7
int32_t NaN
Definition common.cpp:7
result_t
Definition common.h:60
@ TEST_UNIMPL
Definition common.h:63
@ TEST_SUCCESS
Definition common.h:61
@ TEST_FAIL
Definition common.h:62
result_t validateUInt64(__m128i a, uint64_t u0, uint64_t u1)
Definition common.cpp:25
result_t validateSingleFloatPair(float a, float b)
Definition common.cpp:272
result_t validateDouble(__m128d a, double d0, double d1)
Definition common.cpp:387
result_t validateSingleDoublePair(double a, double b)
Definition common.cpp:282
result_t validateInt16(__m128i a, int16_t i0, int16_t i1, int16_t i2, int16_t i3, int16_t i4, int16_t i5, int16_t i6, int16_t i7)
Definition common.cpp:76
result_t validateUInt32(__m128i a, uint32_t u0, uint32_t u1, uint32_t u2, uint32_t u3)
Definition common.cpp:54
result_t validateFloat(__m128 a, float f0, float f1, float f2, float f3)
Definition common.cpp:297
result_t validateInt64(__m128i a, int64_t i0, int64_t i1)
Definition common.cpp:10
result_t validateUInt8(__m128i a, uint8_t u0, uint8_t u1, uint8_t u2, uint8_t u3, uint8_t u4, uint8_t u5, uint8_t u6, uint8_t u7, uint8_t u8, uint8_t u9, uint8_t u10, uint8_t u11, uint8_t u12, uint8_t u13, uint8_t u14, uint8_t u15)
Definition common.cpp:212
int64_t NaN64
Definition common.cpp:8
result_t validateInt8(__m128i a, int8_t i0, int8_t i1, int8_t i2, int8_t i3, int8_t i4, int8_t i5, int8_t i6, int8_t i7, int8_t i8, int8_t i9, int8_t i10, int8_t i11, int8_t i12, int8_t i13, int8_t i14, int8_t i15)
Definition common.cpp:152
result_t validateInt32(__m128i a, int32_t i0, int32_t i1, int32_t i2, int32_t i3)
Definition common.cpp:40
result_t validateFloatError(__m128 a, float f0, float f1, float f2, float f3, float err)
Definition common.cpp:347
result_t validate128(T a, T b)
Definition common.h:71
result_t validateUInt16(__m128i a, uint16_t u0, uint16_t u1, uint16_t u2, uint16_t u3, uint16_t u4, uint16_t u5, uint16_t u6, uint16_t u7)
Definition common.cpp:108
result_t validateFloatEpsilon(__m128 a, float f0, float f1, float f2, float f3, float epsilon)
Definition common.cpp:307
float32x4_t __m128
Definition sse2neon.h:366
float32x4_t __m128d
Definition sse2neon.h:373
union ALIGN_STRUCT(16) SIMDVec
Definition sse2neon.h:522
int64x1_t __m64
Definition sse2neon.h:365
int64x2_t __m128i
Definition sse2neon.h:375
SIMDVec
Definition sse2neon.h:532