32 #ifndef MY_INTRINSICS_PLUS_PLUS_H_
33 #define MY_INTRINSICS_PLUS_PLUS_H_
35 #ifndef MIPP_NO_INTRINSICS
36 #if defined(__ARM_NEON__) || defined(__ARM_NEON)
39 #elif defined(__SSE__) || defined(__AVX__) || defined(__MIC__) || defined(__KNCNI__) || defined(__AVX512__) || defined(__AVX512F__)
41 #if !defined(__INTEL_COMPILER) && !defined(__ICL) && !defined(__ICC)
44 #elif defined(__SSE__)
48 #include <immintrin.h>
50 #include <xmmintrin.h>
53 #include <emmintrin.h>
56 #include <pmmintrin.h>
59 #include <tmmintrin.h>
62 #include <smmintrin.h>
71 #include <unordered_map>
85 #if (defined(__GNUC__) || defined(__clang__) || defined(__llvm__)) && (defined(__linux__) || defined(__linux) || defined(__APPLE__))
103 #ifndef MIPP_NO_INTRINSICS
105 #if defined(__ARM_NEON__) || defined(__ARM_NEON)
109 #define MIPP_REQUIRED_ALIGNMENT 16
114 #define MIPP_INSTR_VERSION 2
120 #define MIPP_INSTR_VERSION 1
123 #define MIPP_REGISTER_SIZE 128
126 using msk = uint32x4_t;
127 using reg = float32x4_t;
128 using reg_2 = float32x2_t;
137 std::vector<std::string> ext;
138 #ifdef __ARM_FEATURE_FMA
139 ext.push_back(
"FMA");
145 #elif defined(__MIC__) || defined(__KNCNI__) || defined(__AVX512__) || defined(__AVX512F__)
149 #define MIPP_REQUIRED_ALIGNMENT 64
152 #if defined(__MIC__) || defined(__KNCNI__)
153 #define MIPP_AVX512KNC
159 #define MIPP_AVX512BW
162 #define MIPP_AVX512CD
165 #define MIPP_AVX512ER
168 #define MIPP_AVX512PF
171 #define MIPP_AVX512DQ
174 #define MIPP_AVX512VL
180 #define MIPP_INSTR_VERSION 1
181 #define MIPP_REGISTER_SIZE 512
194 throw std::runtime_error(
"mipp: Invalid mask size 'N' = " + std::to_string(
N) +
".");
199 std::vector<std::string> ext;
200 #if defined(__MIC__) || defined(__KNCNI__)
201 ext.push_back(
"KNC");
228 #elif defined(__AVX__)
232 #define MIPP_REQUIRED_ALIGNMENT 32
238 #define MIPP_INSTR_VERSION 2
244 #define MIPP_INSTR_VERSION 1
246 #define MIPP_REGISTER_SIZE 256
255 return _mm256_castsi256_ps(m);
260 std::vector<std::string> ext;
262 ext.push_back(
"FMA");
268 #elif defined(__SSE__)
272 #define MIPP_REQUIRED_ALIGNMENT 16
281 #define MIPP_INSTR_VERSION 42
282 #elif defined(__SSE4_1__)
286 #define MIPP_INSTR_VERSION 41
287 #elif defined(__SSSE3__)
291 #define MIPP_INSTR_VERSION 31
292 #elif defined(__SSE3__)
296 #define MIPP_INSTR_VERSION 3
297 #elif defined(__SSE2__)
301 #define MIPP_INSTR_VERSION 2
306 #define MIPP_INSTR_VERSION 1
308 #define MIPP_REGISTER_SIZE 128
313 using reg_2 = __m128d;
317 return _mm_castsi128_ps(m);
322 std::vector<std::string> ext;
334 #define MIPP_NO_INTRINSICS
335 #define MIPP_REQUIRED_ALIGNMENT 1
336 #if UINTPTR_MAX == 0xffffffffffffffff
340 #define MIPP_INSTR_VERSION 1
341 #define MIPP_REGISTER_SIZE 0
355 std::vector<std::string> ext;
368 #define MIPP_REQUIRED_ALIGNMENT 1
369 #if UINTPTR_MAX == 0xffffffffffffffff
373 #define MIPP_INSTR_VERSION 1
374 #define MIPP_REGISTER_SIZE 0
378 using reg = uint32_t;
379 using reg_2 = uint16_t;
388 std::vector<std::string> ext;
410 template <
typename T>
413 #ifdef MIPP_NO_INTRINSICS
420 template <
typename T>
423 #ifdef MIPP_NO_INTRINSICS
430 template <
typename T>
431 constexpr int32_t
N()
433 #ifndef MIPP_NO_INTRINSICS
434 return mipp::nElReg<T>();
442 template <
typename T>
447 #if !defined(MIPP_NO_INTRINSICS) && (defined(__SSE2__) || defined(__AVX__) || defined(__MIC__) || defined(__KNCNI__) || defined(__AVX512__) || defined(__AVX512F__))
456 template <
typename T>
459 #if !defined(MIPP_NO_INTRINSICS) && (defined(__SSE2__) || defined(__AVX__) || defined(__MIC__) || defined(__KNCNI__) || defined(__AVX512__) || defined(__AVX512F__))
472 T*
allocate(std::size_t n) {
return mipp::malloc<T>((int)n); }
478 template <
class C1,
class C2>
481 template <
class C1,
class C2>
485 template<
class T>
using vector = std::vector<T, AlignedAllocator<T>>;
496 #if defined(MIPP_ENABLE_BACKTRACE) && (defined(__GNUC__) || defined(__clang__) || defined(__llvm__)) && (defined(__linux__) || defined(__linux) || defined(__APPLE__))
497 const int bt_max_depth = 32;
498 void *bt_array[bt_max_depth];
500 size_t size = backtrace(bt_array, bt_max_depth);
501 char** bt_symbs = backtrace_symbols(bt_array, size);
503 bt_str +=
"\nBacktrace:";
504 for (
size_t i = 0; i < size; i++)
505 bt_str +=
"\n" + std::string(bt_symbs[i]);
512 template <
typename T>
516 std::unordered_map<std::type_index,std::string> type_names;
517 type_names[
typeid(int8_t) ] =
"int8_t";
518 type_names[
typeid(uint8_t) ] =
"uint8_t";
519 type_names[
typeid(int16_t) ] =
"int16_t";
520 type_names[
typeid(uint16_t)] =
"uint16_t";
521 type_names[
typeid(int32_t) ] =
"int32_t";
522 type_names[
typeid(uint32_t)] =
"uint32_t";
523 type_names[
typeid(int64_t) ] =
"int64_t";
524 type_names[
typeid(uint64_t)] =
"uint64_t";
525 type_names[
typeid(float) ] =
"float";
526 type_names[
typeid(double) ] =
"double";
530 message =
"mipp::" + instr +
"<" + type_names[
typeid(T)] +
"> (" +
InstructionFullType +
") is undefined!, "
531 "try to add -mfpu=neon-vfpv4, -msse4.2, -mavx, -march=native... at the compile time.";
533 message =
"mipp::" + instr +
"<" + type_names[
typeid(T)] +
"> (" +
InstructionFullType +
") is undefined!";
537 throw std::runtime_error(message);
545 message =
"mipp::" + instr +
"<" + std::to_string(
N) +
"> (" +
InstructionFullType +
") is undefined!, "
546 "try to add -mfpu=neon-vfpv4, -msse4.2, -mavx, -march=native... at the compile time.";
548 message =
"mipp::" + instr +
"<" + std::to_string(
N) +
"> (" + InstructionFullType +
") is undefined!";
552 throw std::runtime_error(message);
555 template <
typename T1,
typename T2>
559 std::unordered_map<std::type_index,std::string> type_names;
560 type_names[
typeid(int8_t )] =
"int8_t";
561 type_names[
typeid(uint8_t )] =
"uint8_t";
562 type_names[
typeid(int16_t )] =
"int16_t";
563 type_names[
typeid(uint16_t)] =
"uint16_t";
564 type_names[
typeid(int32_t )] =
"int32_t";
565 type_names[
typeid(uint32_t)] =
"uint32_t";
566 type_names[
typeid(int64_t )] =
"int64_t";
567 type_names[
typeid(uint64_t)] =
"uint64_t";
568 type_names[
typeid(float )] =
"float";
569 type_names[
typeid(double )] =
"double";
573 message =
"mipp::" + instr +
"<" + type_names[
typeid(T1)] +
"," + type_names[
typeid(T2)] +
"> (" +
575 "at the compile time.";
577 message =
"mipp::" + instr +
"<" + type_names[
typeid(T1)] +
"," + type_names[
typeid(T2)] +
"> (" +
582 throw std::runtime_error(message);
585 template <
typename T>
inline reg load (
const T*) { errorMessage<T>(
"load"); exit(-1); }
586 template <
typename T>
inline reg loadu (
const T*) { errorMessage<T>(
"loadu"); exit(-1); }
587 template <
typename T>
inline void store (T*,
const reg) { errorMessage<T>(
"store"); exit(-1); }
588 template <
typename T>
inline void storeu (T*,
const reg) { errorMessage<T>(
"storeu"); exit(-1); }
589 template <
typename T>
inline reg set (
const T[nElReg<T>()]) { errorMessage<T>(
"set"); exit(-1); }
591 template <
int N>
inline msk set (
const bool[]) { errorMessage<N>(
"set"); exit(-1); }
593 template <
int N>
inline msk set (
const bool[
N]) { errorMessage<N>(
"set"); exit(-1); }
595 template <
typename T>
inline reg set1 (
const T) { errorMessage<T>(
"set1"); exit(-1); }
596 template <
int N>
inline msk set1 (
const bool) { errorMessage<N>(
"set1"); exit(-1); }
597 template <
typename T>
inline reg set0 () { errorMessage<T>(
"set0"); exit(-1); }
598 template <
int N>
inline msk set0 () { errorMessage<N>(
"set0"); exit(-1); }
599 template <
typename T>
inline reg_2 low (
const reg) { errorMessage<T>(
"low"); exit(-1); }
600 template <
typename T>
inline reg_2 high (
const reg) { errorMessage<T>(
"high"); exit(-1); }
601 template <
typename T>
inline reg cmask (
const uint32_t[nElReg<T>()]) { errorMessage<T>(
"cmask"); exit(-1); }
602 template <
typename T>
inline reg cmask2 (
const uint32_t[nElReg<T>()/2]) { errorMessage<T>(
"cmask2"); exit(-1); }
603 template <
typename T>
inline reg shuff (
const reg,
const reg) { errorMessage<T>(
"shuff"); exit(-1); }
604 template <
typename T>
inline reg shuff2 (
const reg,
const reg) { errorMessage<T>(
"shuff2"); exit(-1); }
611 template <
typename T>
inline reg interleave (
const reg) { errorMessage<T>(
"interleave"); exit(-1); }
613 template <
typename T>
inline reg interleavex4 (
const reg) { errorMessage<T>(
"interleavex4"); exit(-1); }
615 template <
typename T>
inline void transpose (
reg[nElReg<T>()]) { errorMessage<T>(
"transpose"); exit(-1); }
616 template <
typename T>
inline void transpose8x8 (
reg[8]) { errorMessage<T>(
"transpose8x8"); exit(-1); }
617 template <
typename T>
inline void transpose2 (
reg[nElReg<T>()/2]) { errorMessage<T>(
"transpose2"); exit(-1); }
618 template <
typename T>
inline void transpose28x8(
reg[8]) { errorMessage<T>(
"transpose28x8"); exit(-1); }
619 template <
typename T>
inline reg andb (
const reg,
const reg) { errorMessage<T>(
"andb"); exit(-1); }
620 template <
int N>
inline msk andb (
const msk,
const msk) { errorMessage<N>(
"andb"); exit(-1); }
621 template <
typename T>
inline reg andnb (
const reg,
const reg) { errorMessage<T>(
"andnb"); exit(-1); }
622 template <
int N>
inline msk andnb (
const msk,
const msk) { errorMessage<N>(
"andnb"); exit(-1); }
623 template <
typename T>
inline reg notb (
const reg) { errorMessage<T>(
"notb"); exit(-1); }
624 template <
int N>
inline msk notb (
const msk) { errorMessage<N>(
"notb"); exit(-1); }
625 template <
typename T>
inline reg orb (
const reg,
const reg) { errorMessage<T>(
"orb"); exit(-1); }
626 template <
int N>
inline msk orb (
const msk,
const msk) { errorMessage<N>(
"orb"); exit(-1); }
627 template <
typename T>
inline reg xorb (
const reg,
const reg) { errorMessage<T>(
"xorb"); exit(-1); }
628 template <
int N>
inline msk xorb (
const msk,
const msk) { errorMessage<N>(
"xorb"); exit(-1); }
629 template <
typename T>
inline reg lshift (
const reg,
const uint32_t) { errorMessage<T>(
"lshift"); exit(-1); }
630 template <
int N>
inline msk lshift (
const msk,
const uint32_t) { errorMessage<N>(
"lshift"); exit(-1); }
631 template <
typename T>
inline reg rshift (
const reg,
const uint32_t) { errorMessage<T>(
"rshift"); exit(-1); }
632 template <
int N>
inline msk rshift (
const msk,
const uint32_t) { errorMessage<N>(
"rshift"); exit(-1); }
633 template <
typename T>
inline msk cmpeq (
const reg,
const reg) { errorMessage<T>(
"cmpeq"); exit(-1); }
634 template <
typename T>
inline msk cmpneq (
const reg,
const reg) { errorMessage<T>(
"cmpneq"); exit(-1); }
635 template <
typename T>
inline msk cmplt (
const reg,
const reg) { errorMessage<T>(
"cmplt"); exit(-1); }
636 template <
typename T>
inline msk cmple (
const reg,
const reg) { errorMessage<T>(
"cmple"); exit(-1); }
637 template <
typename T>
inline msk cmpgt (
const reg,
const reg) { errorMessage<T>(
"cmpgt"); exit(-1); }
638 template <
typename T>
inline msk cmpge (
const reg,
const reg) { errorMessage<T>(
"cmpge"); exit(-1); }
639 template <
typename T>
inline reg add (
const reg,
const reg) { errorMessage<T>(
"add"); exit(-1); }
640 template <
typename T>
inline reg sub (
const reg,
const reg) { errorMessage<T>(
"sub"); exit(-1); }
641 template <
typename T>
inline reg mul (
const reg,
const reg) { errorMessage<T>(
"mul"); exit(-1); }
642 template <
typename T>
inline reg div (
const reg,
const reg) { errorMessage<T>(
"div"); exit(-1); }
643 template <
typename T>
inline reg min (
const reg,
const reg) { errorMessage<T>(
"min"); exit(-1); }
644 template <
typename T>
inline reg max (
const reg,
const reg) { errorMessage<T>(
"max"); exit(-1); }
645 template <
typename T>
inline reg msb (
const reg) { errorMessage<T>(
"msb"); exit(-1); }
646 template <
typename T>
inline reg msb (
const reg,
const reg) { errorMessage<T>(
"msb"); exit(-1); }
647 template <
typename T>
inline msk sign (
const reg) { errorMessage<T>(
"sign"); exit(-1); }
648 template <
typename T>
inline reg neg (
const reg,
const reg) { errorMessage<T>(
"neg"); exit(-1); }
649 template <
typename T>
inline reg neg (
const reg,
const msk) { errorMessage<T>(
"neg"); exit(-1); }
650 template <
typename T>
inline reg abs (
const reg) { errorMessage<T>(
"abs"); exit(-1); }
651 template <
typename T>
inline reg sqrt (
const reg) { errorMessage<T>(
"sqrt"); exit(-1); }
652 template <
typename T>
inline reg rsqrt (
const reg) { errorMessage<T>(
"rsqrt"); exit(-1); }
653 template <
typename T>
inline reg log (
const reg) { errorMessage<T>(
"log"); exit(-1); }
654 template <
typename T>
inline reg exp (
const reg) { errorMessage<T>(
"exp"); exit(-1); }
655 template <
typename T>
inline reg sin (
const reg) { errorMessage<T>(
"sin"); exit(-1); }
656 template <
typename T>
inline reg cos (
const reg) { errorMessage<T>(
"cos"); exit(-1); }
657 template <
typename T>
inline void sincos (
const reg,
reg&,
reg&) { errorMessage<T>(
"sincos"); exit(-1); }
658 template <
typename T>
inline reg fmadd (
const reg,
const reg,
const reg) { errorMessage<T>(
"fmadd"); exit(-1); }
659 template <
typename T>
inline reg fnmadd (
const reg,
const reg,
const reg) { errorMessage<T>(
"fnmadd"); exit(-1); }
660 template <
typename T>
inline reg fmsub (
const reg,
const reg,
const reg) { errorMessage<T>(
"fmsub"); exit(-1); }
661 template <
typename T>
inline reg fnmsub (
const reg,
const reg,
const reg) { errorMessage<T>(
"fnmsub"); exit(-1); }
662 template <
typename T>
inline reg blend (
const reg,
const reg,
const msk) { errorMessage<T>(
"blend"); exit(-1); }
663 template <
typename T>
inline reg lrot (
const reg) { errorMessage<T>(
"lrot"); exit(-1); }
664 template <
typename T>
inline reg rrot (
const reg) { errorMessage<T>(
"rrot"); exit(-1); }
665 template <
typename T>
inline reg div2 (
const reg) { errorMessage<T>(
"div2"); exit(-1); }
666 template <
typename T>
inline reg div4 (
const reg) { errorMessage<T>(
"div4"); exit(-1); }
667 template <
typename T>
inline reg sat (
const reg, T, T) { errorMessage<T>(
"sat"); exit(-1); }
668 template <
typename T>
inline reg round (
const reg) { errorMessage<T>(
"round"); exit(-1); }
670 template <
typename T1,
typename T2>
672 errorMessage<T1,T2>(
"cvt");
676 template <
typename T1,
typename T2>
678 errorMessage<T1,T2>(
"cvt");
682 template <
typename T1,
typename T2>
684 errorMessage<T1,T2>(
"pack");
690 template <
typename T>
inline reg copysign(
const reg r1,
const reg r2) {
return neg<T>(r1, r2); }
691 template <
typename T>
inline reg copysign(
const reg r1,
const msk r2) {
return neg<T>(r1, r2); }
700 template <
typename T, proto_i1<T> I1>
703 return blend<T>(I1(a), src, m);
706 template <
typename T, proto_i2<T> I2>
709 return blend<T>(I2(a, b), src, m);
712 template <
typename T, proto_i3<T> I3>
715 return blend<T>(I3(a, b, c), src, m);
718 template <
typename T, proto_i1<T> I1>
721 auto m_reg = cvt_reg<N<T>()>(m);
722 auto a_modif = I1(a);
723 return andb<T>(m_reg, a_modif);
726 template <
typename T, proto_i2<T> I2>
729 auto m_reg = cvt_reg<N<T>()>(m);
730 auto a_modif = I2(a, b);
731 return andb<T>(m_reg, a_modif);
734 template <
typename T, proto_i3<T> I3>
737 auto m_reg = cvt_reg<N<T>()>(m);
738 auto a_modif = I3(a, b, c);
739 return andb<T>(m_reg, a_modif);
744 template <
typename T>
757 template <
typename T, proto_I1<T> I1>
761 return blend<T>(I1(a), src, m);
763 return m.m ? I1(a) : src;
767 template <
typename T, proto_I2<T> I2>
771 return blend<T>(I2(a, b), src, m);
773 return m.m ? I2(a, b) : src;
777 template <
typename T, proto_I3<T> I3>
781 return blend<T>(I3(a, b, c), src, m);
783 return m.m ? I3(a, b, c) : src;
787 template <
typename T, proto_I1<T> I1>
791 auto m_reg = m.template cvt_reg<T>();
792 auto a_modif = I1(a);
793 return andb<T>(m_reg, a_modif);
795 return m.m ? I1(a) :
Reg<T>((T)0);
799 template <
typename T, proto_I2<T> I2>
803 auto m_reg = m.template cvt_reg<T>();
804 auto a_modif = I2(a, b);
805 return andb<T>(m_reg, a_modif);
807 return m.m ? I2(a, b) :
Reg<T>((T)0);
811 template <
typename T, proto_I3<T> I3>
815 auto m_reg = m.template cvt_reg<T>();
816 auto a_modif = I3(a, b, c);
817 return andb<T>(m_reg, a_modif);
819 return m.m ? I3(a, b, c) :
Reg<T>((T)0);
828 template <
typename T>
829 void dump(
const mipp::reg r, std::ostream &stream = std::cout,
const uint32_t elmtWidth = 6)
831 constexpr int32_t lane_size = (int32_t)(mipp::N<T>() /
mipp::Lanes);
834 T data[mipp::nElReg<T>()];
840 for (
auto i = 0; i < lane_size; i++)
841 stream << std::setw(elmtWidth) << +data[l * lane_size +i] << ((i < lane_size -1) ?
", " :
"");
848 void dump(
const mipp::msk m, std::ostream &stream = std::cout,
const uint32_t elmtWidth = 6)
850 constexpr int32_t lane_size = (int32_t)(
N /
mipp::Lanes);
853 const auto r = cvt_reg<N>(m);
860 store<int8_t>(data, r);
864 for (
auto i = 0; i < lane_size; i++)
865 stream << std::setw(elmtWidth) << (data[l * lane_size +i] ? 1 : 0) << ((i < lane_size -1) ?
", " :
"");
866 stream << ((l < (int)
mipp::Lanes -1) ?
" | " :
"");
873 store<int16_t>(data, r);
877 for (
auto i = 0; i < lane_size; i++)
878 stream << std::setw(elmtWidth) << (data[l * lane_size +i] ? 1 : 0) << ((i < lane_size -1) ?
", " :
"");
879 stream << ((l < (int)
mipp::Lanes -1) ?
" | " :
"");
886 store<int32_t>(data, r);
890 for (
auto i = 0; i < lane_size; i++)
891 stream << std::setw(elmtWidth) << (data[l * lane_size +i] ? 1 : 0) << ((i < lane_size -1) ?
", " :
"");
892 stream << ((l < (int)
mipp::Lanes -1) ?
" | " :
"");
899 store<int64_t>(data, r);
903 for (
auto i = 0; i < lane_size; i++)
904 stream << std::setw(elmtWidth) << (data[l * lane_size +i] ? 1 : 0) << ((i < lane_size -1) ?
", " :
"");
905 stream << ((l < (int)
mipp::Lanes -1) ?
" | " :
"");
914 template <
typename T>
917 template <
typename T>
920 template <
typename T>
923 template <
typename T, red_op<T> OP>
927 errorMessage<T>(
"_reduction::apply");
932 template <
typename T, Red_op<T> OP>
936 #ifndef MIPP_NO_INTRINSICS
937 errorMessage<T>(
"_Reduction::apply");
945 template <
typename T, red_op<T> OP>
959 #pragma GCC diagnostic ignored "-Wstrict-aliasing"
964 template <ld_op<T> LD = mipp::load<T>>
970 template <ld_op<T> LD = mipp::loadu<T>>
971 static T
apply(
const std::vector<T> &data)
976 template <ld_op<T> LD = mipp::loadu<T>>
977 static T
apply(
const T *data,
const uint32_t dataSize)
979 assert(dataSize > 0);
980 assert(dataSize % mipp::nElReg<T>() == 0);
982 auto rRed = LD(&data[0]);
983 for (
auto i = mipp::nElReg<T>(); i < dataSize; i += mipp::nElReg<T>())
984 rRed = OP(rRed, LD(&data[i]));
987 T tRed[mipp::nElReg<T>()];
988 mipp::store<T>(tRed, rRed);
994 template <
typename T, Red_op<T> OP>
1008 template <ld_op<T> LD = mipp::load<T>>
1014 template <ld_op<T> LD = mipp::loadu<T>>
1015 static T
apply(
const std::vector<T> &data)
1020 template <ld_op<T> LD = mipp::loadu<T>>
1021 static T
apply(
const T *data,
const uint32_t dataSize)
1023 assert(dataSize > 0);
1024 assert(dataSize % mipp::nElReg<T>() == 0);
1026 #ifndef MIPP_NO_INTRINSICS
1027 auto rRed =
Reg<T>(LD(&data[0]));
1029 auto rRed =
Reg<T>(data[0]);
1031 for (
auto i = mipp::nElReg<T>(); i < dataSize; i += mipp::nElReg<T>())
1032 #ifndef MIPP_NO_INTRINSICS
1033 rRed = OP(rRed,
Reg<T>(LD(&data[i])));
1035 rRed = OP(rRed,
Reg<T>(data[i]));
1039 T tRed[mipp::nElReg<T>()];
1055 #include "mipp_object.hxx"
1057 #ifndef MIPP_NO_INTRINSICS
1060 #if defined(__ARM_NEON__) || defined(__ARM_NEON)
1061 #include "mipp_impl_NEON.hxx"
1064 #elif defined(__MIC__) || defined(__KNCNI__) || defined(__AVX512__) || defined(__AVX512F__)
1065 #include "mipp_impl_AVX512.hxx"
1068 #elif defined(__AVX__)
1069 #include "mipp_impl_AVX.hxx"
1072 #elif defined(__SSE__)
1073 #include "mipp_impl_SSE.hxx"
constexpr int32_t nElReg()
Definition: mipp.h:421
reg fnmadd(const reg, const reg, const reg)
Definition: mipp.h:659
void deallocate(T *p, std::size_t n)
Definition: mipp.h:473
T * allocate(std::size_t n)
Definition: mipp.h:472
T hmax(const reg v)
Definition: mipp.h:1052
reg shuff(const reg, const reg)
Definition: mipp.h:603
const std::string InstructionVersion
Definition: mipp.h:332
static T apply(const T *data, const uint32_t dataSize)
Definition: mipp.h:977
static Reg< T > apply(const Reg< T > r)
Definition: mipp.h:935
msk sign(const reg)
Definition: mipp.h:647
T hmin(const reg v)
Definition: mipp.h:1051
reg interleavelo(const reg, const reg)
Definition: mipp.h:605
reg set(const T[nElReg< T >()])
Definition: mipp.h:589
reg(*)(const reg a) proto_i1
Definition: mipp.h:696
reg sub(const reg, const reg)
Definition: mipp.h:640
uint16_t reg_2
Definition: mipp.h:346
constexpr uint32_t Lanes
Definition: mipp.h:395
msk cmpge(const reg, const reg)
Definition: mipp.h:638
constexpr int32_t N()
Definition: mipp.h:431
reg cmask(const uint32_t[nElReg< T >()])
Definition: mipp.h:601
reg set1(const T)
Definition: mipp.h:595
bool operator==(const AlignedAllocator< C1 > &ma1, const AlignedAllocator< C2 > &ma2)
Definition: mipp.h:479
reg sat(const reg, T, T)
Definition: mipp.h:667
reg cvt_reg(const msk m)
Definition: mipp.h:349
reg min(const reg, const reg)
Definition: mipp.h:643
uint32_t reg
Definition: mipp.h:345
reg xorb(const reg, const reg)
Definition: mipp.h:627
void transpose28x8(reg[8])
Definition: mipp.h:618
void sincos(const reg, reg &, reg &)
Definition: mipp.h:657
reg copysign(const reg r1, const reg r2)
Definition: mipp.h:690
static void errorMessage(std::string instr)
Definition: mipp.h:513
reg fmadd(const reg, const reg, const reg)
Definition: mipp.h:658
T hadd(const reg v)
Definition: mipp.h:1049
reg msb(const reg)
Definition: mipp.h:645
reg andb(const reg, const reg)
Definition: mipp.h:619
#define MIPP_LANES
Definition: mipp.h:342
static T apply(const std::vector< T > &data)
Definition: mipp.h:1015
const bool Support64Bit
Definition: mipp.h:400
reg interleavelo2(const reg, const reg)
Definition: mipp.h:607
void free(T *ptr)
Definition: mipp.h:457
static std::string get_back_trace()
Definition: mipp.h:493
reg div2(const reg)
Definition: mipp.h:665
#define MIPP_REGISTER_SIZE
Definition: mipp.h:341
reg interleavex4(const reg)
Definition: mipp.h:613
static T apply(const T *data, const uint32_t dataSize)
Definition: mipp.h:1021
void dump(const mipp::reg r, std::ostream &stream=std::cout, const uint32_t elmtWidth=6)
Definition: mipp.h:829
reg loadu(const T *)
Definition: mipp.h:586
reg sin(const reg)
Definition: mipp.h:655
constexpr uint32_t RequiredAlignment
Definition: mipp.h:393
void transpose8x8(reg[8])
Definition: mipp.h:616
reg add(const reg, const reg)
Definition: mipp.h:639
reg interleavehi2(const reg, const reg)
Definition: mipp.h:608
static reg apply(const reg r)
Definition: mipp.h:948
reg notb(const reg)
Definition: mipp.h:623
regx2 interleavex2(const reg, const reg)
Definition: mipp.h:612
reg lrot(const reg)
Definition: mipp.h:663
reg mask(const msk m, const reg src, const reg a)
Definition: mipp.h:701
void storeu(T *, const reg)
Definition: mipp.h:588
static T apply_v(const Reg< T > r)
Definition: mipp.h:1002
T sum(const reg v)
Definition: mipp.h:1048
static T apply(const mipp::vector< T > &data)
Definition: mipp.h:1009
reg abs(const reg)
Definition: mipp.h:650
reg(*)(const reg, const reg) red_op
Definition: mipp.h:915
void transpose(reg[nElReg< T >()])
Definition: mipp.h:615
reg maskz(const msk m, const reg a)
Definition: mipp.h:719
msk cmplt(const reg, const reg)
Definition: mipp.h:635
T hmul(const reg v)
Definition: mipp.h:1050
#define MIPP_REQUIRED_ALIGNMENT
Definition: mipp.h:335
msk cmple(const reg, const reg)
Definition: mipp.h:636
void store(T *, const reg)
Definition: mipp.h:587
static T apply_v(const reg r)
Definition: mipp.h:953
constexpr uint32_t RegisterSizeBit
Definition: mipp.h:394
uint8_t msk
Definition: mipp.h:344
reg cos(const reg)
Definition: mipp.h:656
reg pack(const reg, const reg)
Definition: mipp.h:683
reg max(const reg, const reg)
Definition: mipp.h:644
reg cmask2(const uint32_t[nElReg< T >()/2])
Definition: mipp.h:602
static T apply(const mipp::vector< T > &data)
Definition: mipp.h:965
reg orb(const reg, const reg)
Definition: mipp.h:625
T value_type
Definition: mipp.h:469
reg blend(const reg, const reg, const msk)
Definition: mipp.h:662
bool operator!=(const AlignedAllocator< C1 > &ma1, const AlignedAllocator< C2 > &ma2)
Definition: mipp.h:482
reg_2 low(const reg)
Definition: mipp.h:599
reg rshift(const reg, const uint32_t)
Definition: mipp.h:631
reg andnb(const reg, const reg)
Definition: mipp.h:621
reg(*)(const T *) ld_op
Definition: mipp.h:921
reg neg(const reg, const reg)
Definition: mipp.h:648
const std::string InstructionFullType
Definition: mipp.h:331
reg val[2]
Definition: mipp.h:408
std::vector< T, AlignedAllocator< T >> vector
Definition: mipp.h:485
reg fmsub(const reg, const reg, const reg)
Definition: mipp.h:660
reg set0()
Definition: mipp.h:597
AlignedAllocator()
Definition: mipp.h:470
constexpr int32_t nElmtsPerRegister()
Definition: mipp.h:411
reg shuff2(const reg, const reg)
Definition: mipp.h:604
reg div4(const reg)
Definition: mipp.h:666
std::vector< std::string > InstructionExtensions()
Definition: mipp.h:353
void store(T *data) const
Definition: mipp.h:188
reg rsqrt(const reg)
Definition: mipp.h:652
static reg apply(const reg)
Definition: mipp.h:926
msk cmpgt(const reg, const reg)
Definition: mipp.h:637
reg fnmsub(const reg, const reg, const reg)
Definition: mipp.h:661
static Reg< T > apply(const Reg< T > r)
Definition: mipp.h:997
void transpose2(reg[nElReg< T >()/2])
Definition: mipp.h:617
reg interleavex16(const reg)
Definition: mipp.h:614
regx2 interleave(const reg, const reg)
Definition: mipp.h:609
reg cvt(const reg)
Definition: mipp.h:671
reg rrot(const reg)
Definition: mipp.h:664
reg mul(const reg, const reg)
Definition: mipp.h:641
const std::string InstructionType
Definition: mipp.h:328
reg interleavehi(const reg, const reg)
Definition: mipp.h:606
reg(*)(const reg a, const reg b, const reg c) proto_i3
Definition: mipp.h:698
msk cmpeq(const reg, const reg)
Definition: mipp.h:633
reg log(const reg)
Definition: mipp.h:653
regx2 interleave2(const reg, const reg)
Definition: mipp.h:610
T * malloc(uint32_t nData)
Definition: mipp.h:443
reg lshift(const reg, const uint32_t)
Definition: mipp.h:629
msk cmpneq(const reg, const reg)
Definition: mipp.h:634
const bool SupportByteWord
Definition: mipp.h:403
reg exp(const reg)
Definition: mipp.h:654
reg sqrt(const reg)
Definition: mipp.h:651
reg div(const reg, const reg)
Definition: mipp.h:642
reg_2 high(const reg)
Definition: mipp.h:600
reg load(const T *)
Definition: mipp.h:585
static T apply(const std::vector< T > &data)
Definition: mipp.h:971
reg round(const reg)
Definition: mipp.h:668
reg(*)(const reg a, const reg b) proto_i2
Definition: mipp.h:697