GNU Radio's BLOCKSTREAM Package
mipp.h
Go to the documentation of this file.
1 /*
2 The MIT License (MIT)
3 
4 Copyright (c) 2016 MIPP
5 
6 Permission is hereby granted, free of charge, to any person obtaining a copy
7 of this software and associated documentation files (the "Software"), to deal
8 in the Software without restriction, including without limitation the rights
9 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 copies of the Software, and to permit persons to whom the Software is
11 furnished to do so, subject to the following conditions:
12 
13 The above copyright notice and this permission notice shall be included in all
14 copies or substantial portions of the Software.
15 
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 SOFTWARE.
23 */
24 
25 /*
26  * mipp.h
27  *
28  * Created on: 5 oct. 2014
29  * Author: Adrien Cassagne
30  */
31 
32 #ifndef MY_INTRINSICS_PLUS_PLUS_H_
33 #define MY_INTRINSICS_PLUS_PLUS_H_
34 
35 #ifndef MIPP_NO_INTRINSICS
36 #if defined(__ARM_NEON__) || defined(__ARM_NEON)
37 #include <arm_neon.h>
38 #include "math/neon_mathfun.h"
39 #elif defined(__SSE__) || defined(__AVX__) || defined(__MIC__) || defined(__KNCNI__) || defined(__AVX512__) || defined(__AVX512F__)
40 // header for special functions: log, exp, sin, cos
41 #if !defined(__INTEL_COMPILER) && !defined(__ICL) && !defined(__ICC)
42 #if defined(__AVX__)
43 #include "math/avx_mathfun.h"
44 #elif defined(__SSE__)
45 #include "math/sse_mathfun.h"
46 #endif
47 #endif
48 #include <immintrin.h>
49 #ifdef __SSE__
50 #include <xmmintrin.h>
51 #endif
52 #ifdef __SSE2__
53 #include <emmintrin.h>
54 #endif
55 #ifdef __SSE3__
56 #include <pmmintrin.h>
57 #endif
58 #ifdef __SSSE3__
59 #include <tmmintrin.h>
60 #endif
61 #ifdef __SSE4_1__
62 #include <smmintrin.h>
63 #endif
64 #else
65 #include "mipp_scalar_op.h"
66 #endif
67 #else
68 #include "mipp_scalar_op.h"
69 #endif
70 
71 #include <unordered_map>
72 #include <typeindex>
73 #include <stdexcept>
74 #include <typeinfo>
75 #include <iostream>
76 #include <iomanip>
77 #include <cstddef>
78 #include <cassert>
79 #include <cstdint>
80 #include <string>
81 #include <vector>
82 #include <cmath>
83 #include <map>
84 
85 #if (defined(__GNUC__) || defined(__clang__) || defined(__llvm__)) && (defined(__linux__) || defined(__linux) || defined(__APPLE__))
86 #include <execinfo.h>
87 #include <unistd.h>
88 #include <cstdlib>
89 #endif
90 
91 #ifdef _MSC_VER
92 #ifndef NOMINMAX
93 #define NOMINMAX
94 #endif
95 #undef min
96 #undef max
97 #endif
98 
99 namespace mipp // My Intrinsics Plus Plus => mipp
100 {
101 // ------------------------------------------------------------------------------------------ myIntrinsics vector sizes
102 // --------------------------------------------------------------------------------------------------------------------
103 #ifndef MIPP_NO_INTRINSICS
104 // ------------------------------------------------------------------------------------------------------- ARM NEON-128
105 #if defined(__ARM_NEON__) || defined(__ARM_NEON)
106  const std::string InstructionType = "NEON";
107  #define MIPP_NEON
108 
109  #define MIPP_REQUIRED_ALIGNMENT 16
110 #ifdef __aarch64__
111  const std::string InstructionFullType = InstructionType + "v2";
112  const std::string InstructionVersion = "2";
113  #define MIPP_NEONV2
114  #define MIPP_INSTR_VERSION 2
115  #define MIPP_64BIT
116 #else
117  const std::string InstructionFullType = InstructionType + "v1";
118  const std::string InstructionVersion = "1";
119  #define MIPP_NEONV1
120  #define MIPP_INSTR_VERSION 1
121 #endif
122  #define MIPP_BW
123  #define MIPP_REGISTER_SIZE 128
124  #define MIPP_LANES 1
125 
126  using msk = uint32x4_t;
127  using reg = float32x4_t;
128  using reg_2 = float32x2_t; // half a full register
129 
130  template <int N>
131  inline reg cvt_reg(const msk m) {
132  return (reg)m;
133  }
134 
135  inline std::vector<std::string> InstructionExtensions()
136  {
137  std::vector<std::string> ext;
138 #ifdef __ARM_FEATURE_FMA
139  ext.push_back("FMA");
140 #endif
141  return ext;
142  }
143 
144 // -------------------------------------------------------------------------------------------------------- X86 AVX-512
145 #elif defined(__MIC__) || defined(__KNCNI__) || defined(__AVX512__) || defined(__AVX512F__)
146  const std::string InstructionType = "AVX512";
147  #define MIPP_AVX512
148 
149  #define MIPP_REQUIRED_ALIGNMENT 64
150  #define MIPP_64BIT
151 
152 #if defined(__MIC__) || defined(__KNCNI__)
153  #define MIPP_AVX512KNC
154 #endif
155 #ifdef __AVX512F__
156  #define MIPP_AVX512F
157 #endif
158 #ifdef __AVX512BW__
159  #define MIPP_AVX512BW
160 #endif
161 #ifdef __AVX512CD__
162  #define MIPP_AVX512CD
163 #endif
164 #ifdef __AVX512ER__
165  #define MIPP_AVX512ER
166 #endif
167 #ifdef __AVX512PF__
168  #define MIPP_AVX512PF
169 #endif
170 #ifdef __AVX512DQ__
171  #define MIPP_AVX512DQ
172 #endif
173 #ifdef __AVX512VL__
174  #define MIPP_AVX512VL
175 #endif
176 
177  const std::string InstructionFullType = InstructionType;
178  const std::string InstructionVersion = "1";
179 
180  #define MIPP_INSTR_VERSION 1
181  #define MIPP_REGISTER_SIZE 512
182  #define MIPP_LANES 4
183 
184 #ifdef __AVX512BW__
185  using msk = __mmask64;
186 #else
187  using msk = __mmask16;
188 #endif
189  using reg = __m512;
190  using reg_2 = __m256; // half a full register
192  template <int N>
193  inline reg cvt_reg(const msk m) {
194  throw std::runtime_error("mipp: Invalid mask size 'N' = " + std::to_string(N) + ".");
195  }
197  inline std::vector<std::string> InstructionExtensions()
198  {
199  std::vector<std::string> ext;
200 #if defined(__MIC__) || defined(__KNCNI__)
201  ext.push_back("KNC");
202 #endif
203 #ifdef __AVX512F__
204  ext.push_back("F");
205 #endif
206 #ifdef __AVX512BW__
207  ext.push_back("BW");
208 #endif
209 #ifdef __AVX512CD__
210  ext.push_back("CD");
211 #endif
212 #ifdef __AVX512ER__
213  ext.push_back("ER");
214 #endif
215 #ifdef __AVX512PF__
216  ext.push_back("PF");
217 #endif
218 #ifdef __AVX512DQ__
219  ext.push_back("DQ");
220 #endif
221 #ifdef __AVX512VL__
222  ext.push_back("VL");
223 #endif
224  return ext;
225  }
227 // -------------------------------------------------------------------------------------------------------- X86 AVX-256
228 #elif defined(__AVX__)
229  const std::string InstructionType = "AVX";
230  #define MIPP_AVX
232  #define MIPP_REQUIRED_ALIGNMENT 32
233  #define MIPP_64BIT
234 #ifdef __AVX2__
235  const std::string InstructionFullType = InstructionType + "2";
236  const std::string InstructionVersion = "2";
237  #define MIPP_AVX2
238  #define MIPP_INSTR_VERSION 2
239  #define MIPP_BW
240 #else
242  const std::string InstructionVersion = "1";
243  #define MIPP_AVX1
244  #define MIPP_INSTR_VERSION 1
245 #endif
246  #define MIPP_REGISTER_SIZE 256
247  #define MIPP_LANES 2
249  using msk = __m256i;
250  using reg = __m256;
251  using reg_2 = __m128; // half a full register
252 
253  template <int N>
254  inline reg cvt_reg(const msk m) {
255  return _mm256_castsi256_ps(m);
256  }
257 
258  inline std::vector<std::string> InstructionExtensions()
259  {
260  std::vector<std::string> ext;
261 #ifdef __FMA__
262  ext.push_back("FMA");
263 #endif
264  return ext;
265  }
267 // -------------------------------------------------------------------------------------------------------- X86 SSE-128
268 #elif defined(__SSE__)
269  const std::string InstructionType = "SSE";
270  #define MIPP_SSE
271 
272  #define MIPP_REQUIRED_ALIGNMENT 16
273 #ifdef __SSE2__
274  #define MIPP_64BIT
275  #define MIPP_BW
276 #endif
277 #ifdef __SSE4_2__
278  const std::string InstructionFullType = InstructionType + "4.2";
279  const std::string InstructionVersion = "4.2";
280  #define MIPP_SSE4_2
281  #define MIPP_INSTR_VERSION 42
282 #elif defined(__SSE4_1__)
283  const std::string InstructionFullType = InstructionType + "4.1";
284  const std::string InstructionVersion = "4.1";
285  #define MIPP_SSE4_1
286  #define MIPP_INSTR_VERSION 41
287 #elif defined(__SSSE3__)
288  const std::string InstructionFullType = "SSSE3";
289  const std::string InstructionVersion = "3";
290  #define MIPP_SSSE3
291  #define MIPP_INSTR_VERSION 31
292 #elif defined(__SSE3__)
293  const std::string InstructionFullType = InstructionType + "3";
294  const std::string InstructionVersion = "3";
295  #define MIPP_SSE3
296  #define MIPP_INSTR_VERSION 3
297 #elif defined(__SSE2__)
298  const std::string InstructionFullType = InstructionType + "2";
299  const std::string InstructionVersion = "2";
300  #define MIPP_SSE2
301  #define MIPP_INSTR_VERSION 2
302 #else
303  const std::string InstructionFullType = InstructionType;
304  const std::string InstructionVersion = "1";
305  #define MIPP_SSE1
306  #define MIPP_INSTR_VERSION 1
307 #endif
308  #define MIPP_REGISTER_SIZE 128
309  #define MIPP_LANES 1
310 
311  using msk = __m128i;
312  using reg = __m128;
313  using reg_2 = __m128d; // half a full register (information is in the lower part of the 128 bit register)
315  template <int N>
316  inline reg cvt_reg(const msk m) {
317  return _mm_castsi128_ps(m);
318  }
319 
320  inline std::vector<std::string> InstructionExtensions()
321  {
322  std::vector<std::string> ext;
323  return ext;
324  }
326 // ------------------------------------------------------------------------------------------------- MIPP_NO_INTRINSICS
327 #else
328  const std::string InstructionType = "NO";
329  #define MIPP_NO
330 
331  const std::string InstructionFullType = "NO_INTRINSICS";
332  const std::string InstructionVersion = "1";
333 
334  #define MIPP_NO_INTRINSICS
335  #define MIPP_REQUIRED_ALIGNMENT 1
336 #if UINTPTR_MAX == 0xffffffffffffffff
337 #define MIPP_64BIT
338 #endif
339  #define MIPP_BW
340  #define MIPP_INSTR_VERSION 1
341  #define MIPP_REGISTER_SIZE 0
342  #define MIPP_LANES 1
343 
344  using msk = uint8_t;
345  using reg = uint32_t;
346  using reg_2 = uint16_t;
347 
348  template <int N>
349  inline reg cvt_reg(const msk m) {
350  return (reg)m;
351  }
353  inline std::vector<std::string> InstructionExtensions()
354  {
355  std::vector<std::string> ext;
356  return ext;
357  }
358 #endif
359 
360 // ------------------------------------------------------------------------------------------------- MIPP_NO_INTRINSICS
361 #else
362  const std::string InstructionType = "NO";
363  #define MIPP_NO
365  const std::string InstructionFullType = "NO_INTRINSICS";
366  const std::string InstructionVersion = "1";
367 
368  #define MIPP_REQUIRED_ALIGNMENT 1
369 #if UINTPTR_MAX == 0xffffffffffffffff
370 #define MIPP_64BIT
371 #endif
372  #define MIPP_BW
373  #define MIPP_INSTR_VERSION 1
374  #define MIPP_REGISTER_SIZE 0
375  #define MIPP_LANES 1
377  using msk = uint8_t;
378  using reg = uint32_t;
379  using reg_2 = uint16_t;
380 
381  template <int N>
382  inline reg cvt_reg(const msk m) {
383  return (reg)m;
384  }
385 
386  inline std::vector<std::string> InstructionExtensions()
387  {
388  std::vector<std::string> ext;
389  return ext;
390  }
391 #endif
394 constexpr uint32_t RegisterSizeBit = MIPP_REGISTER_SIZE;
395 constexpr uint32_t Lanes = MIPP_LANES;
396 
397 #ifdef MIPP_64BIT
398 const bool Support64Bit = true;
399 #else
400 const bool Support64Bit = false;
401 #endif
402 #ifdef MIPP_BW
403 const bool SupportByteWord = true;
404 #else
405 const bool SupportByteWord = false;
406 #endif
407 
408 typedef struct regx2 { reg val[2]; } regx2;
409 
410 template <typename T>
411 constexpr int32_t nElmtsPerRegister()
412 {
413 #ifdef MIPP_NO_INTRINSICS
414  return 1;
415 #else
416  return RegisterSizeBit / (8 * sizeof(T));
417 #endif
418 }
419 
420 template <typename T>
421 constexpr int32_t nElReg()
422 {
423 #ifdef MIPP_NO_INTRINSICS
424  return 1;
425 #else
426  return RegisterSizeBit / (8 * sizeof(T));
427 #endif
428 }
429 
430 template <typename T>
431 constexpr int32_t N()
432 {
433 #ifndef MIPP_NO_INTRINSICS
434  return mipp::nElReg<T>();
435 #else
436  return 1;
437 #endif
438 }
440 // --------------------------------------------------------------------------------------------------------------------
441 // --------------------------------------------------------------------------------------------------- memory allocator
442 template <typename T>
443 T* malloc(uint32_t nData)
444 {
445  T* ptr = nullptr;
447 #if !defined(MIPP_NO_INTRINSICS) && (defined(__SSE2__) || defined(__AVX__) || defined(__MIC__) || defined(__KNCNI__) || defined(__AVX512__) || defined(__AVX512F__))
448  ptr = (T*)_mm_malloc(nData * sizeof(T), mipp::RequiredAlignment);
449 #else
450  ptr = new T[nData];
451 #endif
452 
453  return ptr;
454 }
456 template <typename T>
457 void free(T* ptr)
458 {
459 #if !defined(MIPP_NO_INTRINSICS) && (defined(__SSE2__) || defined(__AVX__) || defined(__MIC__) || defined(__KNCNI__) || defined(__AVX512__) || defined(__AVX512F__))
460  _mm_free(ptr);
461 #else
462  delete[] ptr;
463 #endif
464 }
466 template <class T>
468 {
469  typedef T value_type;
471  template <class C> AlignedAllocator(const AlignedAllocator<C>& other) { }
472  T* allocate(std::size_t n) { return mipp::malloc<T>((int)n); }
473  void deallocate(T* p, std::size_t n) { mipp::free<T>(p); }
474 };
475 
476 // returns true if and only if storage allocated from ma1 can be deallocated from ma2, and vice versa.
477 // always returns true for stateless allocators.
478 template <class C1, class C2>
479 bool operator==(const AlignedAllocator<C1>& ma1, const AlignedAllocator<C2>& ma2) { return true; }
480 
481 template <class C1, class C2>
482 bool operator!=(const AlignedAllocator<C1>& ma1, const AlignedAllocator<C2>& ma2) { return !(ma1 == ma2); }
483 
484 // override vector type
485 template<class T> using vector = std::vector<T, AlignedAllocator<T>>;
486 
487 // --------------------------------------------------------------------------------------------------- memory allocator
488 // --------------------------------------------------------------------------------------------------------------------
489 
490 // -------------------------------------------------------------------------------------------- myIntrinsics prototypes
491 // --------------------------------------------------------------------------------------------------------------------
492 
493 static inline std::string get_back_trace()
494 {
495  std::string bt_str;
496 #if defined(MIPP_ENABLE_BACKTRACE) && (defined(__GNUC__) || defined(__clang__) || defined(__llvm__)) && (defined(__linux__) || defined(__linux) || defined(__APPLE__))
497  const int bt_max_depth = 32;
498  void *bt_array[bt_max_depth];
499 
500  size_t size = backtrace(bt_array, bt_max_depth);
501  char** bt_symbs = backtrace_symbols(bt_array, size);
502 
503  bt_str += "\nBacktrace:";
504  for (size_t i = 0; i < size; i++)
505  bt_str += "\n" + std::string(bt_symbs[i]);
506  free(bt_symbs);
507 #endif
508 
509  return bt_str;
510 }
511 
512 template <typename T>
513 static inline void errorMessage(std::string instr)
514 {
515  // define type names
516  std::unordered_map<std::type_index,std::string> type_names;
517  type_names[typeid(int8_t) ] = "int8_t";
518  type_names[typeid(uint8_t) ] = "uint8_t";
519  type_names[typeid(int16_t) ] = "int16_t";
520  type_names[typeid(uint16_t)] = "uint16_t";
521  type_names[typeid(int32_t) ] = "int32_t";
522  type_names[typeid(uint32_t)] = "uint32_t";
523  type_names[typeid(int64_t) ] = "int64_t";
524  type_names[typeid(uint64_t)] = "uint64_t";
525  type_names[typeid(float) ] = "float";
526  type_names[typeid(double) ] = "double";
527 
528  std::string message;
529  if (RegisterSizeBit == 0)
530  message = "mipp::" + instr + "<" + type_names[typeid(T)] + "> (" + InstructionFullType + ") is undefined!, "
531  "try to add -mfpu=neon-vfpv4, -msse4.2, -mavx, -march=native... at the compile time.";
532  else
533  message = "mipp::" + instr + "<" + type_names[typeid(T)] + "> (" + InstructionFullType + ") is undefined!";
534 
535  message += get_back_trace();
536 
537  throw std::runtime_error(message);
538 }
539 
540 template <int N>
541 static inline void errorMessage(std::string instr)
542 {
543  std::string message;
544  if (RegisterSizeBit == 0)
545  message = "mipp::" + instr + "<" + std::to_string(N) + "> (" + InstructionFullType + ") is undefined!, "
546  "try to add -mfpu=neon-vfpv4, -msse4.2, -mavx, -march=native... at the compile time.";
547  else
548  message = "mipp::" + instr + "<" + std::to_string(N) + "> (" + InstructionFullType + ") is undefined!";
549 
550  message += get_back_trace();
551 
552  throw std::runtime_error(message);
553 }
554 
555 template <typename T1, typename T2>
556 static inline void errorMessage(std::string instr)
557 {
558  // define type names
559  std::unordered_map<std::type_index,std::string> type_names;
560  type_names[typeid(int8_t )] = "int8_t";
561  type_names[typeid(uint8_t )] = "uint8_t";
562  type_names[typeid(int16_t )] = "int16_t";
563  type_names[typeid(uint16_t)] = "uint16_t";
564  type_names[typeid(int32_t )] = "int32_t";
565  type_names[typeid(uint32_t)] = "uint32_t";
566  type_names[typeid(int64_t )] = "int64_t";
567  type_names[typeid(uint64_t)] = "uint64_t";
568  type_names[typeid(float )] = "float";
569  type_names[typeid(double )] = "double";
571  std::string message;
572  if (RegisterSizeBit == 0)
573  message = "mipp::" + instr + "<" + type_names[typeid(T1)] + "," + type_names[typeid(T2)] + "> (" +
574  InstructionFullType + ") is undefined!, try to add -mfpu=neon, -msse4.2, -mavx, -march=native... "
575  "at the compile time.";
576  else
577  message = "mipp::" + instr + "<" + type_names[typeid(T1)] + "," + type_names[typeid(T2)] + "> (" +
578  InstructionFullType + ") is undefined!";
580  message += get_back_trace();
582  throw std::runtime_error(message);
583 }
585 template <typename T> inline reg load (const T*) { errorMessage<T>("load"); exit(-1); }
586 template <typename T> inline reg loadu (const T*) { errorMessage<T>("loadu"); exit(-1); }
587 template <typename T> inline void store (T*, const reg) { errorMessage<T>("store"); exit(-1); }
588 template <typename T> inline void storeu (T*, const reg) { errorMessage<T>("storeu"); exit(-1); }
589 template <typename T> inline reg set (const T[nElReg<T>()]) { errorMessage<T>("set"); exit(-1); }
590 #ifdef _MSC_VER
591 template <int N> inline msk set (const bool[]) { errorMessage<N>("set"); exit(-1); }
592 #else
593 template <int N> inline msk set (const bool[N]) { errorMessage<N>("set"); exit(-1); }
594 #endif
595 template <typename T> inline reg set1 (const T) { errorMessage<T>("set1"); exit(-1); }
596 template <int N> inline msk set1 (const bool) { errorMessage<N>("set1"); exit(-1); }
597 template <typename T> inline reg set0 () { errorMessage<T>("set0"); exit(-1); }
598 template <int N> inline msk set0 () { errorMessage<N>("set0"); exit(-1); }
599 template <typename T> inline reg_2 low (const reg) { errorMessage<T>("low"); exit(-1); }
600 template <typename T> inline reg_2 high (const reg) { errorMessage<T>("high"); exit(-1); }
601 template <typename T> inline reg cmask (const uint32_t[nElReg<T>()]) { errorMessage<T>("cmask"); exit(-1); }
602 template <typename T> inline reg cmask2 (const uint32_t[nElReg<T>()/2]) { errorMessage<T>("cmask2"); exit(-1); }
603 template <typename T> inline reg shuff (const reg, const reg) { errorMessage<T>("shuff"); exit(-1); }
604 template <typename T> inline reg shuff2 (const reg, const reg) { errorMessage<T>("shuff2"); exit(-1); }
605 template <typename T> inline reg interleavelo (const reg, const reg) { errorMessage<T>("interleavelo"); exit(-1); }
606 template <typename T> inline reg interleavehi (const reg, const reg) { errorMessage<T>("interleavehi"); exit(-1); }
607 template <typename T> inline reg interleavelo2(const reg, const reg) { errorMessage<T>("interleavelo2"); exit(-1); }
608 template <typename T> inline reg interleavehi2(const reg, const reg) { errorMessage<T>("interleavehi2"); exit(-1); }
609 template <typename T> inline regx2 interleave (const reg, const reg) { errorMessage<T>("interleave"); exit(-1); }
610 template <typename T> inline regx2 interleave2 (const reg, const reg) { errorMessage<T>("interleave2"); exit(-1); }
611 template <typename T> inline reg interleave (const reg) { errorMessage<T>("interleave"); exit(-1); }
612 template <typename T> inline regx2 interleavex2 (const reg, const reg) { errorMessage<T>("interleavex2"); exit(-1); }
613 template <typename T> inline reg interleavex4 (const reg) { errorMessage<T>("interleavex4"); exit(-1); }
614 template <typename T> inline reg interleavex16(const reg) { errorMessage<T>("interleavex16"); exit(-1); }
615 template <typename T> inline void transpose ( reg[nElReg<T>()]) { errorMessage<T>("transpose"); exit(-1); }
616 template <typename T> inline void transpose8x8 ( reg[8]) { errorMessage<T>("transpose8x8"); exit(-1); }
617 template <typename T> inline void transpose2 ( reg[nElReg<T>()/2]) { errorMessage<T>("transpose2"); exit(-1); }
618 template <typename T> inline void transpose28x8( reg[8]) { errorMessage<T>("transpose28x8"); exit(-1); }
619 template <typename T> inline reg andb (const reg, const reg) { errorMessage<T>("andb"); exit(-1); }
620 template <int N> inline msk andb (const msk, const msk) { errorMessage<N>("andb"); exit(-1); }
621 template <typename T> inline reg andnb (const reg, const reg) { errorMessage<T>("andnb"); exit(-1); }
622 template <int N> inline msk andnb (const msk, const msk) { errorMessage<N>("andnb"); exit(-1); }
623 template <typename T> inline reg notb (const reg) { errorMessage<T>("notb"); exit(-1); }
624 template <int N> inline msk notb (const msk) { errorMessage<N>("notb"); exit(-1); }
625 template <typename T> inline reg orb (const reg, const reg) { errorMessage<T>("orb"); exit(-1); }
626 template <int N> inline msk orb (const msk, const msk) { errorMessage<N>("orb"); exit(-1); }
627 template <typename T> inline reg xorb (const reg, const reg) { errorMessage<T>("xorb"); exit(-1); }
628 template <int N> inline msk xorb (const msk, const msk) { errorMessage<N>("xorb"); exit(-1); }
629 template <typename T> inline reg lshift (const reg, const uint32_t) { errorMessage<T>("lshift"); exit(-1); }
630 template <int N> inline msk lshift (const msk, const uint32_t) { errorMessage<N>("lshift"); exit(-1); }
631 template <typename T> inline reg rshift (const reg, const uint32_t) { errorMessage<T>("rshift"); exit(-1); }
632 template <int N> inline msk rshift (const msk, const uint32_t) { errorMessage<N>("rshift"); exit(-1); }
633 template <typename T> inline msk cmpeq (const reg, const reg) { errorMessage<T>("cmpeq"); exit(-1); }
634 template <typename T> inline msk cmpneq (const reg, const reg) { errorMessage<T>("cmpneq"); exit(-1); }
635 template <typename T> inline msk cmplt (const reg, const reg) { errorMessage<T>("cmplt"); exit(-1); }
636 template <typename T> inline msk cmple (const reg, const reg) { errorMessage<T>("cmple"); exit(-1); }
637 template <typename T> inline msk cmpgt (const reg, const reg) { errorMessage<T>("cmpgt"); exit(-1); }
638 template <typename T> inline msk cmpge (const reg, const reg) { errorMessage<T>("cmpge"); exit(-1); }
639 template <typename T> inline reg add (const reg, const reg) { errorMessage<T>("add"); exit(-1); }
640 template <typename T> inline reg sub (const reg, const reg) { errorMessage<T>("sub"); exit(-1); }
641 template <typename T> inline reg mul (const reg, const reg) { errorMessage<T>("mul"); exit(-1); }
642 template <typename T> inline reg div (const reg, const reg) { errorMessage<T>("div"); exit(-1); }
643 template <typename T> inline reg min (const reg, const reg) { errorMessage<T>("min"); exit(-1); }
644 template <typename T> inline reg max (const reg, const reg) { errorMessage<T>("max"); exit(-1); }
645 template <typename T> inline reg msb (const reg) { errorMessage<T>("msb"); exit(-1); }
646 template <typename T> inline reg msb (const reg, const reg) { errorMessage<T>("msb"); exit(-1); }
647 template <typename T> inline msk sign (const reg) { errorMessage<T>("sign"); exit(-1); }
648 template <typename T> inline reg neg (const reg, const reg) { errorMessage<T>("neg"); exit(-1); }
649 template <typename T> inline reg neg (const reg, const msk) { errorMessage<T>("neg"); exit(-1); }
650 template <typename T> inline reg abs (const reg) { errorMessage<T>("abs"); exit(-1); }
651 template <typename T> inline reg sqrt (const reg) { errorMessage<T>("sqrt"); exit(-1); }
652 template <typename T> inline reg rsqrt (const reg) { errorMessage<T>("rsqrt"); exit(-1); }
653 template <typename T> inline reg log (const reg) { errorMessage<T>("log"); exit(-1); }
654 template <typename T> inline reg exp (const reg) { errorMessage<T>("exp"); exit(-1); }
655 template <typename T> inline reg sin (const reg) { errorMessage<T>("sin"); exit(-1); }
656 template <typename T> inline reg cos (const reg) { errorMessage<T>("cos"); exit(-1); }
657 template <typename T> inline void sincos (const reg, reg&, reg&) { errorMessage<T>("sincos"); exit(-1); }
658 template <typename T> inline reg fmadd (const reg, const reg, const reg) { errorMessage<T>("fmadd"); exit(-1); }
659 template <typename T> inline reg fnmadd (const reg, const reg, const reg) { errorMessage<T>("fnmadd"); exit(-1); }
660 template <typename T> inline reg fmsub (const reg, const reg, const reg) { errorMessage<T>("fmsub"); exit(-1); }
661 template <typename T> inline reg fnmsub (const reg, const reg, const reg) { errorMessage<T>("fnmsub"); exit(-1); }
662 template <typename T> inline reg blend (const reg, const reg, const msk) { errorMessage<T>("blend"); exit(-1); }
663 template <typename T> inline reg lrot (const reg) { errorMessage<T>("lrot"); exit(-1); }
664 template <typename T> inline reg rrot (const reg) { errorMessage<T>("rrot"); exit(-1); }
665 template <typename T> inline reg div2 (const reg) { errorMessage<T>("div2"); exit(-1); }
666 template <typename T> inline reg div4 (const reg) { errorMessage<T>("div4"); exit(-1); }
667 template <typename T> inline reg sat (const reg, T, T) { errorMessage<T>("sat"); exit(-1); }
668 template <typename T> inline reg round (const reg) { errorMessage<T>("round"); exit(-1); }
669 
670 template <typename T1, typename T2>
671 inline reg cvt(const reg) {
672  errorMessage<T1,T2>("cvt");
673  exit(-1);
674 }
675 
676 template <typename T1, typename T2>
677 inline reg cvt(const reg_2) {
678  errorMessage<T1,T2>("cvt");
679  exit(-1);
680 }
681 
682 template <typename T1, typename T2>
683 inline reg pack(const reg, const reg) {
684  errorMessage<T1,T2>("pack");
685  exit(-1);
686 }
687 
688 // ------------------------------------------------------------------------------------------------------------ aliases
689 // --------------------------------------------------------------------------------------------------------------------
690 template <typename T> inline reg copysign(const reg r1, const reg r2) { return neg<T>(r1, r2); }
691 template <typename T> inline reg copysign(const reg r1, const msk r2) { return neg<T>(r1, r2); }
692 
693 // ------------------------------------------------------------------------------------------------------------ masking
694 // --------------------------------------------------------------------------------------------------------------------
695 
696 template <typename T> using proto_i1 = reg (*)(const reg a);
697 template <typename T> using proto_i2 = reg (*)(const reg a, const reg b);
698 template <typename T> using proto_i3 = reg (*)(const reg a, const reg b, const reg c);
699 
700 template <typename T, proto_i1<T> I1>
701 inline reg mask(const msk m, const reg src, const reg a)
702 {
703  return blend<T>(I1(a), src, m);
704 }
705 
706 template <typename T, proto_i2<T> I2>
707 inline reg mask(const msk m, const reg src, const reg a, const reg b)
708 {
709  return blend<T>(I2(a, b), src, m);
710 }
711 
712 template <typename T, proto_i3<T> I3>
713 inline reg mask(const msk m, const reg src, const reg a, const reg b, const reg c)
714 {
715  return blend<T>(I3(a, b, c), src, m);
716 }
717 
718 template <typename T, proto_i1<T> I1>
719 inline reg maskz(const msk m, const reg a)
720 {
721  auto m_reg = cvt_reg<N<T>()>(m);
722  auto a_modif = I1(a);
723  return andb<T>(m_reg, a_modif);
724 }
725 
726 template <typename T, proto_i2<T> I2>
727 inline reg maskz(const msk m, const reg a, const reg b)
728 {
729  auto m_reg = cvt_reg<N<T>()>(m);
730  auto a_modif = I2(a, b);
731  return andb<T>(m_reg, a_modif);
732 }
733 
734 template <typename T, proto_i3<T> I3>
735 inline reg maskz(const msk m, const reg a, const reg b, const reg c)
736 {
737  auto m_reg = cvt_reg<N<T>()>(m);
738  auto a_modif = I3(a, b, c);
739  return andb<T>(m_reg, a_modif);
740 }
741 
742 // -------------------------------------------------------------------------------------------------------- obj masking
743 
744 template <typename T>
745 class Reg;
746 
747 template <int N>
748 class Msk;
749 
750 template <typename T> inline Reg<T> blend(const Reg<T> v1, const Reg<T> v2, const Msk<N<T>()> m);
751 template <typename T> inline Reg<T> andb (const Reg<T> v1, const Reg<T> v2);
752 
753 template <typename T> using proto_I1 = Reg<T> (*)(const Reg<T> a);
754 template <typename T> using proto_I2 = Reg<T> (*)(const Reg<T> a, const Reg<T> b);
755 template <typename T> using proto_I3 = Reg<T> (*)(const Reg<T> a, const Reg<T> b, const Reg<T> c);
756 
757 template <typename T, proto_I1<T> I1>
758 inline Reg<T> mask(const Msk<N<T>()> m, const Reg<T> src, const Reg<T> a)
759 {
760 #ifndef MIPP_NO
761  return blend<T>(I1(a), src, m);
762 #else
763  return m.m ? I1(a) : src;
764 #endif
765 }
766 
767 template <typename T, proto_I2<T> I2>
768 inline Reg<T> mask(const Msk<N<T>()> m, const Reg<T> src, const Reg<T> a, const Reg<T> b)
769 {
770 #ifndef MIPP_NO
771  return blend<T>(I2(a, b), src, m);
772 #else
773  return m.m ? I2(a, b) : src;
774 #endif
775 }
776 
777 template <typename T, proto_I3<T> I3>
778 inline Reg<T> mask(const Msk<N<T>()> m, const Reg<T> src, const Reg<T> a, const Reg<T> b, const Reg<T> c)
779 {
780 #ifndef MIPP_NO
781  return blend<T>(I3(a, b, c), src, m);
782 #else
783  return m.m ? I3(a, b, c) : src;
784 #endif
785 }
786 
787 template <typename T, proto_I1<T> I1>
788 inline Reg<T> maskz(const Msk<N<T>()> m, const Reg<T> a)
789 {
790 #ifndef MIPP_NO
791  auto m_reg = m.template cvt_reg<T>();
792  auto a_modif = I1(a);
793  return andb<T>(m_reg, a_modif);
794 #else
795  return m.m ? I1(a) : Reg<T>((T)0);
796 #endif
797 }
798 
799 template <typename T, proto_I2<T> I2>
800 inline Reg<T> maskz(const Msk<N<T>()> m, const Reg<T> a, const Reg<T> b)
801 {
802 #ifndef MIPP_NO
803  auto m_reg = m.template cvt_reg<T>();
804  auto a_modif = I2(a, b);
805  return andb<T>(m_reg, a_modif);
806 #else
807  return m.m ? I2(a, b) : Reg<T>((T)0);
808 #endif
809 }
810 
811 template <typename T, proto_I3<T> I3>
812 inline Reg<T> maskz(const Msk<N<T>()> m, const Reg<T> a, const Reg<T> b, const Reg<T> c)
813 {
814 #ifndef MIPP_NO
815  auto m_reg = m.template cvt_reg<T>();
816  auto a_modif = I3(a, b, c);
817  return andb<T>(m_reg, a_modif);
818 #else
819  return m.m ? I3(a, b, c) : Reg<T>((T)0);
820 #endif
821 }
822 
823 // --------------------------------------------------------------------------------------- myIntrinsics implementations
824 // --------------------------------------------------------------------------------------------------------------------
825 
826 // --------------------------------------------------------------------------------------------------------------- dump
827 
828 template <typename T>
829 void dump(const mipp::reg r, std::ostream &stream = std::cout, const uint32_t elmtWidth = 6)
830 {
831  constexpr int32_t lane_size = (int32_t)(mipp::N<T>() / mipp::Lanes);
832 
833 // const T* data = (T*)&r;
834  T data[mipp::nElReg<T>()];
835  store<T>(data, r);
836 
837  stream << "[";
838  for (auto l = 0; l < (int)mipp::Lanes; l++)
839  {
840  for (auto i = 0; i < lane_size; i++)
841  stream << std::setw(elmtWidth) << +data[l * lane_size +i] << ((i < lane_size -1) ? ", " : "");
842  stream << ((l < mipp::Lanes -1) ? " | " : "");
843  }
844  stream << "]";
845 }
846 
847 template <int N>
848 void dump(const mipp::msk m, std::ostream &stream = std::cout, const uint32_t elmtWidth = 6)
849 {
850  constexpr int32_t lane_size = (int32_t)(N / mipp::Lanes);
851  constexpr int bits = mipp::RegisterSizeBit / N;
852 
853  const auto r = cvt_reg<N>(m);
854 
855  stream << "[";
856  if (bits == 8)
857  {
858  // const int8_t* data = (int8_t*)&r;
859  int8_t data[N];
860  store<int8_t>(data, r);
861 
862  for (auto l = 0; l < (int)mipp::Lanes; l++)
863  {
864  for (auto i = 0; i < lane_size; i++)
865  stream << std::setw(elmtWidth) << (data[l * lane_size +i] ? 1 : 0) << ((i < lane_size -1) ? ", " : "");
866  stream << ((l < (int)mipp::Lanes -1) ? " | " : "");
867  }
868  }
869  else if (bits == 16)
870  {
871  // const int16_t* data = (int16_t*)&r;
872  int16_t data[N];
873  store<int16_t>(data, r);
874 
875  for (auto l = 0; l < (int)mipp::Lanes; l++)
876  {
877  for (auto i = 0; i < lane_size; i++)
878  stream << std::setw(elmtWidth) << (data[l * lane_size +i] ? 1 : 0) << ((i < lane_size -1) ? ", " : "");
879  stream << ((l < (int)mipp::Lanes -1) ? " | " : "");
880  }
881  }
882  else if (bits == 32)
883  {
884  // const int32_t* data = (int32_t*)&r;
885  int32_t data[N];
886  store<int32_t>(data, r);
887 
888  for (auto l = 0; l < (int)mipp::Lanes; l++)
889  {
890  for (auto i = 0; i < lane_size; i++)
891  stream << std::setw(elmtWidth) << (data[l * lane_size +i] ? 1 : 0) << ((i < lane_size -1) ? ", " : "");
892  stream << ((l < (int)mipp::Lanes -1) ? " | " : "");
893  }
894  }
895  else if (bits == 64)
896  {
897  // const int64_t* data = (int64_t*)&r;
898  int64_t data[N];
899  store<int64_t>(data, r);
900 
901  for (auto l = 0; l < (int)mipp::Lanes; l++)
902  {
903  for (auto i = 0; i < lane_size; i++)
904  stream << std::setw(elmtWidth) << (data[l * lane_size +i] ? 1 : 0) << ((i < lane_size -1) ? ", " : "");
905  stream << ((l < (int)mipp::Lanes -1) ? " | " : "");
906  }
907  }
908 
909  stream << "]";
910 }
911 
912 // ---------------------------------------------------------------------------------------------------------- reduction
913 
914 template <typename T>
915 using red_op = reg (*)(const reg, const reg);
916 
917 template <typename T>
918 using Red_op = Reg<T> (*)(const Reg<T>, const Reg<T>);
919 
920 template <typename T>
921 using ld_op = reg (*)(const T*);
922 
923 template <typename T, red_op<T> OP>
925 {
926  static reg apply(const reg) {
927  errorMessage<T>("_reduction::apply");
928  exit(-1);
929  }
930 };
931 
932 template <typename T, Red_op<T> OP>
934 {
935  static Reg<T> apply(const Reg<T> r) {
936 #ifndef MIPP_NO_INTRINSICS
937  errorMessage<T>("_Reduction::apply");
938  exit(-1);
939 #else
940  return r;
941 #endif
942  }
943 };
944 
945 template <typename T, red_op<T> OP>
946 struct reduction
947 {
948  static reg apply(const reg r)
949  {
950  return _reduction<T,OP>::apply(r);
951  }
952 
953  static T apply_v(const reg r)
954  {
955  auto red = reduction<T,OP>::apply(r);
956 #ifdef _MSC_VER
957  return *((T*)&red);
958 #else
959 #pragma GCC diagnostic ignored "-Wstrict-aliasing"
960  return *((T*)&red);
961 #endif
962  }
963 
964  template <ld_op<T> LD = mipp::load<T>>
965  static T apply(const mipp::vector<T> &data)
966  {
967  return reduction<T,OP>::template apply<LD>(data.data(), data.size());
968  }
969 
970  template <ld_op<T> LD = mipp::loadu<T>>
971  static T apply(const std::vector<T> &data)
972  {
973  return reduction<T,OP>::template apply<LD>(data.data(), data.size());
974  }
975 
976  template <ld_op<T> LD = mipp::loadu<T>>
977  static T apply(const T *data, const uint32_t dataSize)
978  {
979  assert(dataSize > 0);
980  assert(dataSize % mipp::nElReg<T>() == 0);
981 
982  auto rRed = LD(&data[0]);
983  for (auto i = mipp::nElReg<T>(); i < dataSize; i += mipp::nElReg<T>())
984  rRed = OP(rRed, LD(&data[i]));
985  rRed = reduction<T,OP>::apply(rRed);
986 
987  T tRed[mipp::nElReg<T>()];
988  mipp::store<T>(tRed, rRed);
989 
990  return tRed[0];
991  }
992 };
993 
994 template <typename T, Red_op<T> OP>
995 struct Reduction
996 {
997  static Reg<T> apply(const Reg<T> r)
998  {
999  return _Reduction<T,OP>::apply(r);
1000  }
1001 
1002  static T apply_v(const Reg<T> r)
1003  {
1004  auto red = Reduction<T,OP>::apply(r);
1005  return red[0];
1006  }
1007 
1008  template <ld_op<T> LD = mipp::load<T>>
1009  static T apply(const mipp::vector<T> &data)
1010  {
1011  return Reduction<T,OP>::template apply<LD>(data.data(), data.size());
1012  }
1013 
1014  template <ld_op<T> LD = mipp::loadu<T>>
1015  static T apply(const std::vector<T> &data)
1016  {
1017  return Reduction<T,OP>::template apply<LD>(data.data(), data.size());
1018  }
1019 
1020  template <ld_op<T> LD = mipp::loadu<T>>
1021  static T apply(const T *data, const uint32_t dataSize)
1022  {
1023  assert(dataSize > 0);
1024  assert(dataSize % mipp::nElReg<T>() == 0);
1025 
1026 #ifndef MIPP_NO_INTRINSICS
1027  auto rRed = Reg<T>(LD(&data[0]));
1028 #else
1029  auto rRed = Reg<T>(data[0]);
1030 #endif
1031  for (auto i = mipp::nElReg<T>(); i < dataSize; i += mipp::nElReg<T>())
1032 #ifndef MIPP_NO_INTRINSICS
1033  rRed = OP(rRed, Reg<T>(LD(&data[i])));
1034 #else
1035  rRed = OP(rRed, Reg<T>(data[i]));
1036 #endif
1037  rRed = Reduction<T,OP>::apply(rRed);
1038 
1039  T tRed[mipp::nElReg<T>()];
1040  rRed.store(tRed);
1041 
1042  return tRed[0];
1043  }
1044 };
1045 
1046 // ------------------------------------------------------------------------- special reduction functions implementation
1047 
1048 template <typename T> inline T sum (const reg v) { return reduction<T,mipp::add<T>>::apply_v(v); }
1049 template <typename T> inline T hadd(const reg v) { return reduction<T,mipp::add<T>>::apply_v(v); }
1050 template <typename T> inline T hmul(const reg v) { return reduction<T,mipp::mul<T>>::apply_v(v); }
1051 template <typename T> inline T hmin(const reg v) { return reduction<T,mipp::min<T>>::apply_v(v); }
1052 template <typename T> inline T hmax(const reg v) { return reduction<T,mipp::max<T>>::apply_v(v); }
1053 
1054 // ------------------------------------------------------------------------------------------------- wrapper to objects
1055 #include "mipp_object.hxx"
1056 
1057 #ifndef MIPP_NO_INTRINSICS
1058 // ------------------------------------------------------------------------------------------------------- ARM NEON-128
1059 // --------------------------------------------------------------------------------------------------------------------
1060 #if defined(__ARM_NEON__) || defined(__ARM_NEON)
1061 #include "mipp_impl_NEON.hxx"
1062 // -------------------------------------------------------------------------------------------------------- X86 AVX-512
1063 // --------------------------------------------------------------------------------------------------------------------
1064 #elif defined(__MIC__) || defined(__KNCNI__) || defined(__AVX512__) || defined(__AVX512F__)
1065 #include "mipp_impl_AVX512.hxx"
1066 // -------------------------------------------------------------------------------------------------------- X86 AVX-256
1067 // --------------------------------------------------------------------------------------------------------------------
1068 #elif defined(__AVX__)
1069 #include "mipp_impl_AVX.hxx"
1070 // -------------------------------------------------------------------------------------------------------- X86 SSE-128
1071 // --------------------------------------------------------------------------------------------------------------------
1072 #elif defined(__SSE__)
1073 #include "mipp_impl_SSE.hxx"
1074 #endif
1075 #endif
1076 
1077 }
1078 
1079 #endif /* MY_INTRINSICS_PLUS_PLUS_H_ */
constexpr int32_t nElReg()
Definition: mipp.h:421
reg fnmadd(const reg, const reg, const reg)
Definition: mipp.h:659
Definition: mipp.h:408
void deallocate(T *p, std::size_t n)
Definition: mipp.h:473
T * allocate(std::size_t n)
Definition: mipp.h:472
T hmax(const reg v)
Definition: mipp.h:1052
reg shuff(const reg, const reg)
Definition: mipp.h:603
const std::string InstructionVersion
Definition: mipp.h:332
static T apply(const T *data, const uint32_t dataSize)
Definition: mipp.h:977
static Reg< T > apply(const Reg< T > r)
Definition: mipp.h:935
msk sign(const reg)
Definition: mipp.h:647
T hmin(const reg v)
Definition: mipp.h:1051
reg interleavelo(const reg, const reg)
Definition: mipp.h:605
reg set(const T[nElReg< T >()])
Definition: mipp.h:589
reg(*)(const reg a) proto_i1
Definition: mipp.h:696
reg sub(const reg, const reg)
Definition: mipp.h:640
uint16_t reg_2
Definition: mipp.h:346
constexpr uint32_t Lanes
Definition: mipp.h:395
msk cmpge(const reg, const reg)
Definition: mipp.h:638
constexpr int32_t N()
Definition: mipp.h:431
Definition: mipp.h:933
reg cmask(const uint32_t[nElReg< T >()])
Definition: mipp.h:601
reg set1(const T)
Definition: mipp.h:595
bool operator==(const AlignedAllocator< C1 > &ma1, const AlignedAllocator< C2 > &ma2)
Definition: mipp.h:479
reg sat(const reg, T, T)
Definition: mipp.h:667
Definition: mipp.h:946
reg cvt_reg(const msk m)
Definition: mipp.h:349
reg min(const reg, const reg)
Definition: mipp.h:643
Definition: mipp.h:748
uint32_t reg
Definition: mipp.h:345
reg xorb(const reg, const reg)
Definition: mipp.h:627
void transpose28x8(reg[8])
Definition: mipp.h:618
void sincos(const reg, reg &, reg &)
Definition: mipp.h:657
reg copysign(const reg r1, const reg r2)
Definition: mipp.h:690
static void errorMessage(std::string instr)
Definition: mipp.h:513
reg fmadd(const reg, const reg, const reg)
Definition: mipp.h:658
T hadd(const reg v)
Definition: mipp.h:1049
Definition: mipp.h:924
reg msb(const reg)
Definition: mipp.h:645
reg andb(const reg, const reg)
Definition: mipp.h:619
#define MIPP_LANES
Definition: mipp.h:342
static T apply(const std::vector< T > &data)
Definition: mipp.h:1015
const bool Support64Bit
Definition: mipp.h:400
reg interleavelo2(const reg, const reg)
Definition: mipp.h:607
void free(T *ptr)
Definition: mipp.h:457
static std::string get_back_trace()
Definition: mipp.h:493
reg div2(const reg)
Definition: mipp.h:665
#define MIPP_REGISTER_SIZE
Definition: mipp.h:341
reg interleavex4(const reg)
Definition: mipp.h:613
static T apply(const T *data, const uint32_t dataSize)
Definition: mipp.h:1021
void dump(const mipp::reg r, std::ostream &stream=std::cout, const uint32_t elmtWidth=6)
Definition: mipp.h:829
reg loadu(const T *)
Definition: mipp.h:586
reg sin(const reg)
Definition: mipp.h:655
constexpr uint32_t RequiredAlignment
Definition: mipp.h:393
void transpose8x8(reg[8])
Definition: mipp.h:616
reg add(const reg, const reg)
Definition: mipp.h:639
Definition: mipp.h:467
reg interleavehi2(const reg, const reg)
Definition: mipp.h:608
static reg apply(const reg r)
Definition: mipp.h:948
reg notb(const reg)
Definition: mipp.h:623
Definition: mipp.h:995
regx2 interleavex2(const reg, const reg)
Definition: mipp.h:612
reg lrot(const reg)
Definition: mipp.h:663
reg mask(const msk m, const reg src, const reg a)
Definition: mipp.h:701
void storeu(T *, const reg)
Definition: mipp.h:588
static T apply_v(const Reg< T > r)
Definition: mipp.h:1002
T sum(const reg v)
Definition: mipp.h:1048
static T apply(const mipp::vector< T > &data)
Definition: mipp.h:1009
reg abs(const reg)
Definition: mipp.h:650
reg(*)(const reg, const reg) red_op
Definition: mipp.h:915
void transpose(reg[nElReg< T >()])
Definition: mipp.h:615
reg maskz(const msk m, const reg a)
Definition: mipp.h:719
msk cmplt(const reg, const reg)
Definition: mipp.h:635
T hmul(const reg v)
Definition: mipp.h:1050
struct mipp::regx2 regx2
#define MIPP_REQUIRED_ALIGNMENT
Definition: mipp.h:335
msk cmple(const reg, const reg)
Definition: mipp.h:636
void store(T *, const reg)
Definition: mipp.h:587
static T apply_v(const reg r)
Definition: mipp.h:953
constexpr uint32_t RegisterSizeBit
Definition: mipp.h:394
uint8_t msk
Definition: mipp.h:344
reg cos(const reg)
Definition: mipp.h:656
reg pack(const reg, const reg)
Definition: mipp.h:683
reg max(const reg, const reg)
Definition: mipp.h:644
reg cmask2(const uint32_t[nElReg< T >()/2])
Definition: mipp.h:602
static T apply(const mipp::vector< T > &data)
Definition: mipp.h:965
reg orb(const reg, const reg)
Definition: mipp.h:625
T value_type
Definition: mipp.h:469
reg blend(const reg, const reg, const msk)
Definition: mipp.h:662
bool operator!=(const AlignedAllocator< C1 > &ma1, const AlignedAllocator< C2 > &ma2)
Definition: mipp.h:482
reg_2 low(const reg)
Definition: mipp.h:599
reg rshift(const reg, const uint32_t)
Definition: mipp.h:631
reg andnb(const reg, const reg)
Definition: mipp.h:621
reg(*)(const T *) ld_op
Definition: mipp.h:921
reg neg(const reg, const reg)
Definition: mipp.h:648
const std::string InstructionFullType
Definition: mipp.h:331
reg val[2]
Definition: mipp.h:408
std::vector< T, AlignedAllocator< T >> vector
Definition: mipp.h:485
reg fmsub(const reg, const reg, const reg)
Definition: mipp.h:660
reg set0()
Definition: mipp.h:597
AlignedAllocator()
Definition: mipp.h:470
constexpr int32_t nElmtsPerRegister()
Definition: mipp.h:411
reg shuff2(const reg, const reg)
Definition: mipp.h:604
reg div4(const reg)
Definition: mipp.h:666
Definition: mipp.h:745
std::vector< std::string > InstructionExtensions()
Definition: mipp.h:353
void store(T *data) const
Definition: mipp.h:188
reg rsqrt(const reg)
Definition: mipp.h:652
static reg apply(const reg)
Definition: mipp.h:926
msk cmpgt(const reg, const reg)
Definition: mipp.h:637
reg fnmsub(const reg, const reg, const reg)
Definition: mipp.h:661
static Reg< T > apply(const Reg< T > r)
Definition: mipp.h:997
void transpose2(reg[nElReg< T >()/2])
Definition: mipp.h:617
reg interleavex16(const reg)
Definition: mipp.h:614
regx2 interleave(const reg, const reg)
Definition: mipp.h:609
reg cvt(const reg)
Definition: mipp.h:671
reg rrot(const reg)
Definition: mipp.h:664
reg mul(const reg, const reg)
Definition: mipp.h:641
const std::string InstructionType
Definition: mipp.h:328
reg interleavehi(const reg, const reg)
Definition: mipp.h:606
reg(*)(const reg a, const reg b, const reg c) proto_i3
Definition: mipp.h:698
msk cmpeq(const reg, const reg)
Definition: mipp.h:633
reg log(const reg)
Definition: mipp.h:653
regx2 interleave2(const reg, const reg)
Definition: mipp.h:610
T * malloc(uint32_t nData)
Definition: mipp.h:443
reg lshift(const reg, const uint32_t)
Definition: mipp.h:629
msk cmpneq(const reg, const reg)
Definition: mipp.h:634
const bool SupportByteWord
Definition: mipp.h:403
reg exp(const reg)
Definition: mipp.h:654
reg sqrt(const reg)
Definition: mipp.h:651
reg div(const reg, const reg)
Definition: mipp.h:642
reg_2 high(const reg)
Definition: mipp.h:600
reg load(const T *)
Definition: mipp.h:585
static T apply(const std::vector< T > &data)
Definition: mipp.h:971
reg round(const reg)
Definition: mipp.h:668
reg(*)(const reg a, const reg b) proto_i2
Definition: mipp.h:697