28#ifndef VC_COMMON_SIMDARRAY_H_
29#define VC_COMMON_SIMDARRAY_H_
37#include "writemaskedvector.h"
38#include "simdarrayhelper.h"
39#include "simdmaskarray.h"
41#include "interleave.h"
42#include "indexsequence.h"
46namespace Vc_VERSIONED_NAMESPACE
57template <std::size_t N,
class... Candidates>
struct select_best_vector_type_impl;
59template <std::
size_t N,
class T>
struct select_best_vector_type_impl<N, T> {
63template <std::size_t N,
class T,
class... Candidates>
64struct select_best_vector_type_impl<N, T, Candidates...> {
65 using type =
typename std::conditional<
66 (N < T::Size),
typename select_best_vector_type_impl<N, Candidates...>::type,
69template <
class T, std::
size_t N>
70struct select_best_vector_type : select_best_vector_type_impl<N,
73#elif defined Vc_IMPL_AVX
79 Vc::Scalar::Vector<T>> {
87template <
typename T> T Vc_INTRINSIC Vc_PURE product_helper_(
const T &l,
const T &r) {
return l * r; }
88template <
typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(
const T &l,
const T &r) {
return l + r; }
92template <
typename T, std::
size_t N,
typename V, std::
size_t M>
93inline fixed_size_simd<T, N>
min(
const SimdArray<T, N, V, M> &x,
94 const SimdArray<T, N, V, M> &y);
95template <
typename T, std::
size_t N,
typename V, std::
size_t M>
96inline fixed_size_simd<T, N>
max(
const SimdArray<T, N, V, M> &x,
97 const SimdArray<T, N, V, M> &y);
104#define Vc_CURRENT_CLASS_NAME SimdArray
114template <
typename T, std::
size_t N,
typename VectorType_>
115class SimdArray<T, N, VectorType_, N>
117 static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
118 std::is_same<T, int32_t>::value ||
119 std::is_same<T, uint32_t>::value ||
120 std::is_same<T, int16_t>::value ||
121 std::is_same<T, uint16_t>::value,
122 "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
123 "int16_t, uint16_t }");
125 std::is_same<VectorType_,
126 typename Common::select_best_vector_type<T, N>::type>::value &&
127 VectorType_::size() == N,
128 "ERROR: leave the third and fourth template parameters with their defaults. They "
129 "are implementation details.");
132 static constexpr bool is_atomic =
true;
133 using VectorType = VectorType_;
134 using vector_type = VectorType;
135 using storage_type = vector_type;
136 using vectorentry_type =
typename vector_type::VectorEntryType;
137 using value_type = T;
138 using mask_type = fixed_size_simd_mask<T, N>;
139 using index_type = fixed_size_simd<int, N>;
140 static constexpr std::size_t size() {
return N; }
141 using Mask = mask_type;
142 using MaskType = Mask;
143 using MaskArgument =
const MaskType &;
144 using VectorEntryType = vectorentry_type;
145 using EntryType = value_type;
146 using IndexType = index_type;
147 using AsArg =
const SimdArray &;
148 using reference = Detail::ElementReference<SimdArray>;
149 static constexpr std::size_t Size = size();
153 Vc_INTRINSIC SimdArray() =
default;
156 Vc_INTRINSIC SimdArray(
const SimdArray &) =
default;
157 Vc_INTRINSIC SimdArray(SimdArray &&) =
default;
158 Vc_INTRINSIC SimdArray &operator=(
const SimdArray &) =
default;
161 Vc_INTRINSIC SimdArray(
const value_type &a) : data(a) {}
162 Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
163 Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
166 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
167 Vc_INTRINSIC SimdArray(U a)
168 : SimdArray(static_cast<value_type>(a))
173 template <
class U,
class V,
class = enable_if<N == V::Size>>
174 Vc_INTRINSIC SimdArray(
const SimdArray<U, N, V> &x)
175 : data(simd_cast<vector_type>(internal_data(x)))
178 template <
class U,
class V,
class = enable_if<(N > V::Size && N <= 2 * V::Size)>,
180 Vc_INTRINSIC SimdArray(
const SimdArray<U, N, V> &x)
181 : data(simd_cast<vector_type>(internal_data(internal_data0(x)),
182 internal_data(internal_data1(x))))
185 template <
class U,
class V,
class = enable_if<(N > 2 * V::Size && N <= 4 * V::Size)>,
186 class = U,
class = U>
187 Vc_INTRINSIC SimdArray(
const SimdArray<U, N, V> &x)
188 : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
189 internal_data(internal_data1(internal_data0(x))),
190 internal_data(internal_data0(internal_data1(x))),
191 internal_data(internal_data1(internal_data1(x)))))
195 template <
typename V, std::
size_t Pieces, std::
size_t Index>
196 Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
197 : data(simd_cast<vector_type, Index>(x.data))
201 Vc_INTRINSIC SimdArray(
const std::initializer_list<value_type> &init)
204 Vc_ASSERT(init.size() == size());
210 typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
211 Vc_INTRINSIC SimdArray(
const V &x)
212 : data(simd_cast<vector_type>(x))
218 template <
typename U,
typename A,
220 enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
221 !std::is_same<A, simd_abi::fixed_size<N>>::value>>
222 Vc_INTRINSIC
operator Vector<U, A>()
const
224 return simd_cast<Vector<U, A>>(data);
226 operator fixed_size_simd<T, N> &()
228 return static_cast<fixed_size_simd<T, N> &
>(*this);
230 operator const fixed_size_simd<T, N> &()
const
232 return static_cast<const fixed_size_simd<T, N> &
>(*this);
235#include "gatherinterface.h"
236#include "scatterinterface.h"
238 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data() {}
239 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data(o) {}
240 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i) : data(i)
243 template <std::
size_t Offset>
244 explicit Vc_INTRINSIC SimdArray(
245 Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
248 data += value_type(Offset);
251 Vc_INTRINSIC
void setZero() { data.setZero(); }
252 Vc_INTRINSIC
void setZero(mask_type k) { data.setZero(internal_data(k)); }
253 Vc_INTRINSIC
void setZeroInverted() { data.setZeroInverted(); }
254 Vc_INTRINSIC
void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
256 Vc_INTRINSIC
void setQnan() { data.setQnan(); }
257 Vc_INTRINSIC
void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
260 template <
typename Op,
typename... Args>
261 static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
263 fixed_size_simd<T, N> r;
264 Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
268 template <
typename Op,
typename... Args>
269 static Vc_INTRINSIC
void callOperation(Op op, Args &&... args)
271 Common::unpackArgumentsAuto(op,
nullptr, std::forward<Args>(args)...);
274 static Vc_INTRINSIC fixed_size_simd<T, N>
Zero()
278 static Vc_INTRINSIC fixed_size_simd<T, N>
One()
286 static Vc_INTRINSIC fixed_size_simd<T, N> Random()
288 return fromOperation(Common::Operations::random());
293 class = enable_if<std::is_arithmetic<U>::value &&
294 Traits::is_load_store_flag<Flags>::value>>
295 explicit Vc_INTRINSIC SimdArray(
const U *mem, Flags f = Flags()) : data(mem, f)
299 template <
typename... Args> Vc_INTRINSIC
void load(Args &&... args)
301 data.load(std::forward<Args>(args)...);
304 template <
typename... Args> Vc_INTRINSIC
void store(Args &&... args)
const
306 data.store(std::forward<Args>(args)...);
309 Vc_INTRINSIC mask_type operator!()
const
311 return {private_init, !data};
314 Vc_INTRINSIC fixed_size_simd<T, N>
operator-()
const
316 return {private_init, -data};
320 Vc_INTRINSIC fixed_size_simd<T, N>
operator+()
const {
return *
this; }
322 Vc_INTRINSIC fixed_size_simd<T, N> operator~()
const
324 return {private_init, ~data};
327 template <
typename U,
328 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
329 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N>
operator<<(U x)
const
331 return {private_init, data << x};
333 template <
typename U,
334 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
335 Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
340 template <
typename U,
341 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
342 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x)
const
344 return {private_init, data >> x};
346 template <
typename U,
347 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
348 Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
354#define Vc_BINARY_OPERATOR_(op) \
355 Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
357 data op## = rhs.data; \
360 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
361 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
362 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
363#undef Vc_BINARY_OPERATOR_
366 Vc_DEPRECATED(
"use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative()
const
373 Vc_INTRINSIC
static value_type get(
const SimdArray &o,
int i)
noexcept
377 template <
typename U>
378 Vc_INTRINSIC
static void set(SimdArray &o,
int i, U &&v)
noexcept(
379 noexcept(std::declval<value_type &>() = v))
391 Vc_INTRINSIC reference operator[](
size_t i)
noexcept
393 static_assert(
noexcept(reference{std::declval<SimdArray &>(), int()}),
"");
394 return {*
this, int(i)};
396 Vc_INTRINSIC value_type operator[](
size_t i)
const noexcept
398 return get(*
this,
int(i));
401 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
const mask_type &k)
406 Vc_INTRINSIC
void assign(
const SimdArray &v,
const mask_type &k)
408 data.assign(v.data, internal_data(k));
412#define Vc_REDUCTION_FUNCTION_(name_) \
413 Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \
414 Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \
416 return data.name_(internal_data(mask)); \
418 Vc_NOTHING_EXPECTING_SEMICOLON
419 Vc_REDUCTION_FUNCTION_(
min);
420 Vc_REDUCTION_FUNCTION_(
max);
421 Vc_REDUCTION_FUNCTION_(product);
422 Vc_REDUCTION_FUNCTION_(sum);
423#undef Vc_REDUCTION_FUNCTION_
424 Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum()
const
426 return {private_init, data.partialSum()};
429 template <
typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f)
const
431 return {private_init, data.apply(std::forward<F>(f))};
433 template <
typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f,
const mask_type &k)
const
435 return {private_init, data.apply(std::forward<F>(f), k)};
438 Vc_INTRINSIC fixed_size_simd<T, N>
shifted(
int amount)
const
440 return {private_init, data.shifted(amount)};
443 template <std::
size_t NN>
444 Vc_INTRINSIC fixed_size_simd<T, N>
shifted(
int amount,
const SimdArray<value_type, NN> &shiftIn)
447 return {private_init, data.shifted(amount, simd_cast<VectorType>(shiftIn))};
450 Vc_INTRINSIC fixed_size_simd<T, N> rotated(
int amount)
const
452 return {private_init, data.rotated(amount)};
456 Vc_DEPRECATED(
"use exponent(x) instead") Vc_INTRINSIC fixed_size_simd<T, N> exponent()
const
458 return {private_init,
exponent(data)};
461 Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(SimdArray x)
const
463 return {private_init, data.interleaveLow(x.data)};
465 Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(SimdArray x)
const
467 return {private_init, data.interleaveHigh(x.data)};
470 Vc_INTRINSIC fixed_size_simd<T, N> reversed()
const
472 return {private_init, data.reversed()};
475 Vc_INTRINSIC fixed_size_simd<T, N> sorted()
const
477 return {private_init, data.sorted()};
480 template <class G, class = decltype(std::declval<G>()(std::size_t())),
481 class = enable_if<!Traits::is_simd_vector<G>::value>>
482 Vc_INTRINSIC SimdArray(const G &gen) : data(gen)
485 template <
typename G>
static Vc_INTRINSIC fixed_size_simd<T, N> generate(
const G &gen)
487 return {private_init, VectorType::generate(gen)};
490 Vc_DEPRECATED(
"use copysign(x, y) instead")
491 Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x)
const
496 friend VectorType &internal_data<>(SimdArray &x);
497 friend const VectorType &internal_data<>(
const SimdArray &x);
500 Vc_INTRINSIC SimdArray(private_init_t, VectorType &&x) : data(std::move(x)) {}
502 Vc_FREE_STORE_OPERATORS_ALIGNED(
alignof(storage_type));
508 alignas(
static_cast<std::size_t
>(
509 Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value *
sizeof(VectorType_) /
510 VectorType_::size()>::value)) storage_type data;
512template <
typename T, std::
size_t N,
typename VectorType>
constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
513template <
typename T, std::
size_t N,
typename VectorType>
515template <
typename T, std::
size_t N,
typename VectorType>
519VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
523template <
typename T, std::
size_t N,
typename VectorType>
527const VectorType &internal_data(
const SimdArray<T, N, VectorType, N> &x)
533template <
class T> Vc_INTRINSIC T unwrap(
const T &x) {
return x; }
535template <
class T,
size_t N,
class V>
536Vc_INTRINSIC V unwrap(
const SimdArray<T, N, V, N> &x)
538 return internal_data(x);
541template <
class T,
size_t Pieces,
size_t Index>
542Vc_INTRINSIC
auto unwrap(
const Common::Segment<T, Pieces, Index> &x)
543 ->
decltype(x.to_fixed_size())
545 return unwrap(x.to_fixed_size());
549template <
typename T, std::
size_t N,
typename VectorType>
550template <
class MT,
class IT,
int Scale>
551Vc_INTRINSIC
void SimdArray<T, N, VectorType, N>::gatherImplementation(
552 const Common::GatherArguments<MT, IT, Scale> &args)
554 data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)));
556template <
typename T, std::
size_t N,
typename VectorType>
557template <
class MT,
class IT,
int Scale>
558Vc_INTRINSIC
void SimdArray<T, N, VectorType, N>::gatherImplementation(
559 const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
561 data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)),
566template <
typename T, std::
size_t N,
typename VectorType>
567template <
typename MT,
typename IT>
568inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
571 data.scatter(mem, unwrap(std::forward<IT>(indexes)));
573template <
typename T, std::
size_t N,
typename VectorType>
574template <
typename MT,
typename IT>
575inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
577 MaskArgument mask)
const
579 data.scatter(mem, unwrap(std::forward<IT>(indexes)), mask);
615template <
typename T,
size_t N,
typename V,
size_t Wt>
class SimdArray
617 static_assert(std::is_same<T, double>::value ||
618 std::is_same<T, float>::value ||
619 std::is_same<T, int32_t>::value ||
620 std::is_same<T, uint32_t>::value ||
621 std::is_same<T, int16_t>::value ||
622 std::is_same<T, uint16_t>::value,
"SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
624 std::is_same<V, typename Common::select_best_vector_type<T, N>::type>::value &&
626 "ERROR: leave the third and fourth template parameters with their defaults. They "
627 "are implementation details.");
630 std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
632 (N % V::size() == 0),
633 "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
634 "MIC::(u)short_v::size(), i.e. k * 16.");
636 using my_traits = SimdArrayTraits<T, N>;
637 static constexpr std::size_t N0 = my_traits::N0;
638 static constexpr std::size_t N1 = my_traits::N1;
639 using Split = Common::Split<N0>;
640 template <
typename U, std::
size_t K>
using CArray = U[K];
643 static constexpr bool is_atomic =
false;
646 static_assert(storage_type0::size() == N0,
"");
651 using vector_type = V;
652 using vectorentry_type =
typename storage_type0::vectorentry_type;
653 typedef vectorentry_type alias_type Vc_MAY_ALIAS;
674 static constexpr std::size_t
size() {
return N; }
681 using VectorEntryType = vectorentry_type;
688 using reference = Detail::ElementReference<SimdArray>;
720 return fromOperation(Common::Operations::random());
723 template <class G, class = decltype(std::declval<G>()(std::size_t())),
724 class = enable_if<!Traits::is_simd_vector<G>::value>>
726 : data0(gen), data1([&](std::size_t i) { return gen(i + storage_type0::size()); })
733 auto tmp = storage_type0::generate(gen);
738 return {std::move(tmp),
739 storage_type1::generate([&](std::size_t i) {
return gen(i + N0); })};
757 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
765 SimdArray(
const SimdArray &) =
default;
766 SimdArray(SimdArray &&) =
default;
767 SimdArray &operator=(
const SimdArray &) =
default;
770 template <
typename U,
typename Flags = DefaultLoadTag,
771 typename = enable_if<std::is_arithmetic<U>::value &&
772 Traits::is_load_store_flag<Flags>::value>>
773 explicit Vc_INTRINSIC SimdArray(
const U *mem, Flags f = Flags())
774 : data0(mem, f), data1(mem + storage_type0::size(), f)
786 template <
typename U, std::size_t Extent,
typename Flags =
DefaultLoadTag,
787 typename = enable_if<std::is_arithmetic<U>::value &&
788 Traits::is_load_store_flag<Flags>::value>>
789 explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = Flags())
790 : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
796 template <
typename U, std::size_t Extent,
typename Flags =
DefaultLoadTag,
797 typename = enable_if<std::is_arithmetic<U>::value &&
798 Traits::is_load_store_flag<Flags>::value>>
799 explicit Vc_INTRINSIC SimdArray(
const CArray<U, Extent> &mem, Flags f = Flags())
800 : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
806 Vc_INTRINSIC SimdArray(
const std::initializer_list<value_type> &init)
808 , data1(init.begin() + storage_type0::size(),
Vc::
Unaligned)
810 Vc_ASSERT(init.size() == size());
813#include "gatherinterface.h"
814#include "scatterinterface.h"
816 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data0(), data1() {}
817 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data0(o), data1(o) {}
818 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i)
820 , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
821 storage_type0::size()>())
824 template <
size_t Offset>
825 explicit Vc_INTRINSIC SimdArray(
826 Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset> i)
828 , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
829 storage_type0::size() + Offset>())
834 template <
class W,
class = enable_if<
835 (Traits::is_simd_vector<W>::value &&
836 Traits::simd_vector_size<W>::value == N &&
837 !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
838 Traits::isSimdArray<W>::value))>>
839 Vc_INTRINSIC
explicit SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
844 template <
class W,
class = enable_if<
845 (Traits::isSimdArray<W>::value &&
846 Traits::simd_vector_size<W>::value == N &&
847 std::is_convertible<Traits::entry_type_of<W>, T>::value)>,
849 Vc_INTRINSIC SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
853 template <
class W, std::
size_t Pieces, std::
size_t Index>
854 Vc_INTRINSIC SimdArray(Common::Segment<W, Pieces, Index> &&x)
855 : data0(Common::Segment<W, 2 * Pieces, 2 * Index>{x.data})
856 , data1(Common::Segment<W, 2 * Pieces, 2 * Index + 1>{x.data})
862 template <
typename U,
typename A,
864 enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
865 !std::is_same<A, simd_abi::fixed_size<N>>::value>>
866 operator Vector<U, A>()
const
868 auto r = simd_cast<Vector<U, A>>(data0, data1);
871 Vc_INTRINSIC
operator fixed_size_simd<T, N> &()
873 return static_cast<fixed_size_simd<T, N> &
>(*this);
875 Vc_INTRINSIC
operator const fixed_size_simd<T, N> &()
const
877 return static_cast<const fixed_size_simd<T, N> &
>(*this);
882 Vc_INTRINSIC
void setZero()
887 Vc_INTRINSIC
void setZero(
const mask_type &k)
889 data0.setZero(Split::lo(k));
890 data1.setZero(Split::hi(k));
892 Vc_INTRINSIC
void setZeroInverted()
894 data0.setZeroInverted();
895 data1.setZeroInverted();
897 Vc_INTRINSIC
void setZeroInverted(
const mask_type &k)
899 data0.setZeroInverted(Split::lo(k));
900 data1.setZeroInverted(Split::hi(k));
904 Vc_INTRINSIC
void setQnan() {
908 Vc_INTRINSIC
void setQnan(
const mask_type &m) {
909 data0.setQnan(Split::lo(m));
910 data1.setQnan(Split::hi(m));
914 template <
typename Op,
typename... Args>
915 static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
917 fixed_size_simd<T, N> r = {
918 storage_type0::fromOperation(op, Split::lo(args)...),
921 storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
926 template <
typename Op,
typename... Args>
927 static Vc_INTRINSIC
void callOperation(Op op, Args &&... args)
929 storage_type0::callOperation(op, Split::lo(args)...);
930 storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
934 template <
typename U,
typename... Args> Vc_INTRINSIC
void load(
const U *mem, Args &&... args)
936 data0.load(mem, Split::lo(args)...);
938 data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
941 template <
typename U,
typename... Args> Vc_INTRINSIC
void store(U *mem, Args &&... args)
const
943 data0.store(mem, Split::lo(args)...);
945 data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
948 Vc_INTRINSIC mask_type operator!()
const
950 return {!data0, !data1};
953 Vc_INTRINSIC fixed_size_simd<T, N>
operator-()
const
955 return {-data0, -data1};
963 return {~data0, ~data1};
967 template <
typename U,
968 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
969 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N>
operator<<(U x)
const
971 return {data0 << x, data1 << x};
973 template <
typename U,
974 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
975 Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
981 template <
typename U,
982 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
983 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x)
const
985 return {data0 >> x, data1 >> x};
987 template <
typename U,
988 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
989 Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
997#define Vc_BINARY_OPERATOR_(op) \
998 Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
1000 data0 op## = rhs.data0; \
1001 data1 op## = rhs.data1; \
1004 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
1005 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
1006 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
1007#undef Vc_BINARY_OPERATOR_
1015 Vc_INTRINSIC
static value_type get(
const SimdArray &o,
int i)
noexcept
1017 return reinterpret_cast<const alias_type *
>(&o)[i];
1019 template <
typename U>
1020 Vc_INTRINSIC
static void set(SimdArray &o,
int i, U &&v)
noexcept(
1021 noexcept(std::declval<value_type &>() = v))
1023 reinterpret_cast<alias_type *
>(&o)[i] = v;
1036 static_assert(
noexcept(reference{std::declval<SimdArray &>(), int()}),
"");
1037 return {*
this, int(i)};
1043 return get(*
this,
int(index));
1049 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type>
operator()(
1052 return {*
this, mask};
1058 data0.assign(v.data0, internal_data0(k));
1059 data1.assign(v.data1, internal_data1(k));
1063#define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \
1065 template <typename ForSfinae = void> \
1066 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1067 storage_type0::Size == storage_type1::Size, \
1068 value_type> name_##_impl() const \
1070 return binary_fun_(data0, data1).name_(); \
1073 template <typename ForSfinae = void> \
1074 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1075 storage_type0::Size != storage_type1::Size, \
1076 value_type> name_##_impl() const \
1078 return scalar_fun_(data0.name_(), data1.name_()); \
1083 Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \
1085 Vc_INTRINSIC value_type name_(const mask_type &mask) const \
1087 if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
1088 return data1.name_(Split::hi(mask)); \
1089 } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
1090 return data0.name_(Split::lo(mask)); \
1092 return scalar_fun_(data0.name_(Split::lo(mask)), \
1093 data1.name_(Split::hi(mask))); \
1096 Vc_NOTHING_EXPECTING_SEMICOLON
1099 Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1100 Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1101#undef Vc_REDUCTION_FUNCTION_
1105 auto ps0 = data0.partialSum();
1107 tmp[0] += ps0[data0.size() - 1];
1108 return {std::move(ps0), tmp.partialSum()};
1115 return {data0.apply(f), data1.apply(f)};
1118 template <
typename F>
1121 return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1128 constexpr int SSize = Size;
1129 constexpr int SSize0 = storage_type0::Size;
1130 constexpr int SSize1 = storage_type1::Size;
1135 if (amount > -SSize0) {
1136 return {data0.shifted(amount), data1.shifted(amount, data0)};
1138 if (amount == -SSize0) {
1139 return {storage_type0(0), simd_cast<storage_type1>(data0)};
1141 if (amount < -SSize0) {
1142 return {storage_type0(0), simd_cast<storage_type1>(data0.shifted(
1147 if (amount >= SSize) {
1149 }
else if (amount >= SSize0) {
1151 simd_cast<storage_type0>(data1).shifted(amount - SSize0),
1153 }
else if (amount >= SSize1) {
1154 return {data0.shifted(amount, data1), storage_type1(0)};
1156 return {data0.shifted(amount, data1), data1.shifted(amount)};
1161 template <std::
size_t NN>
1163 !(std::is_same<storage_type0, storage_type1>::value &&
1168 constexpr int SSize = Size;
1173 return operator[](i);
1174 }
else if (i >= -SSize) {
1175 return shiftIn[i + SSize];
1180 return fixed_size_simd<T, N>([&](
int i) -> value_type {
1183 return operator[](i);
1184 }
else if (i < 2 * SSize) {
1185 return shiftIn[i - SSize];
1194 template <std::
size_t NN>
struct bisectable_shift
1195 :
public std::integral_constant<bool,
1196 std::is_same<storage_type0, storage_type1>::value &&
1202 template <std::
size_t NN>
1203 inline fixed_size_simd<T, N>
shifted(
1204 enable_if<bisectable_shift<NN>::value,
int> amount,
1205 const SimdArray<value_type, NN> &shiftIn)
const
1207 constexpr int SSize = Size;
1209 if (amount > -
static_cast<int>(storage_type0::Size)) {
1210 return {data0.shifted(amount, internal_data1(shiftIn)),
1211 data1.shifted(amount, data0)};
1213 if (amount == -
static_cast<int>(storage_type0::Size)) {
1214 return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1216 if (amount > -SSize) {
1218 internal_data1(shiftIn)
1219 .shifted(amount +
static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1220 data0.shifted(amount +
static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1222 if (amount == -SSize) {
1225 if (amount > -2 * SSize) {
1226 return shiftIn.shifted(amount + SSize);
1232 if (amount <
static_cast<int>(storage_type0::Size)) {
1233 return {data0.shifted(amount, data1),
1234 data1.shifted(amount, internal_data0(shiftIn))};
1236 if (amount ==
static_cast<int>(storage_type0::Size)) {
1237 return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1239 if (amount < SSize) {
1240 return {data1.shifted(amount -
static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1241 internal_data0(shiftIn)
1242 .shifted(amount -
static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1244 if (amount == SSize) {
1247 if (amount < 2 * SSize) {
1248 return shiftIn.shifted(amount - SSize);
1257 amount %= int(size());
1260 }
else if (amount < 0) {
1274 r.data1.load(&tmp[(amount + data0.size()) % size()],
Vc::Unaligned);
1277 auto &&d0cvtd = simd_cast<storage_type1>(data0);
1278 auto &&d1cvtd = simd_cast<storage_type0>(data1);
1279 constexpr int size0 = storage_type0::size();
1280 constexpr int size1 = storage_type1::size();
1282 if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1283 return {std::move(d1cvtd), std::move(d0cvtd)};
1284 }
else if (amount < size1) {
1285 return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1286 }
else if (amount == size1) {
1287 return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1288 }
else if (
int(size()) - amount < size1) {
1289 return {data0.shifted(amount -
int(size()), d1cvtd.shifted(size1 - size0)),
1290 data1.shifted(amount -
int(size()), data0.shifted(size0 - size1))};
1291 }
else if (
int(size()) - amount == size1) {
1292 return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1293 simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1294 }
else if (amount <= size0) {
1295 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1296 simd_cast<storage_type1>(data0.shifted(amount - size1))};
1298 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1299 simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1310 return {data0.interleaveLow(x.data0),
1311 simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1314 Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(
const SimdArray &x)
const
1316 return interleaveHighImpl(
1318 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1323 Vc_INTRINSIC fixed_size_simd<T, N> interleaveHighImpl(
const SimdArray &x, std::true_type)
const
1325 return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1328 inline fixed_size_simd<T, N> interleaveHighImpl(
const SimdArray &x, std::false_type)
const
1330 return {data0.interleaveHigh(x.data0)
1331 .shifted(storage_type1::Size,
1332 simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1333 data1.interleaveHigh(x.data1)};
1340 if (std::is_same<storage_type0, storage_type1>::value) {
1341 return {simd_cast<storage_type0>(data1).reversed(),
1342 simd_cast<storage_type1>(data0).reversed()};
1353 return {data0.shifted(storage_type1::Size, data1).reversed(),
1354 simd_cast<storage_type1>(data0.reversed().shifted(
1355 storage_type0::Size - storage_type1::Size))};
1363 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1369#ifdef Vc_DEBUG_SORTED
1370 std::cerr <<
"-- " << data0 << data1 <<
'\n';
1372 const auto a = data0.
sorted();
1374 const auto lo =
Vc::min(a, b);
1375 const auto hi =
Vc::max(a, b);
1376 return {lo.sorted(), hi.sorted()};
1380 Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::false_type)
const
1382 using SortableArray =
1383 fixed_size_simd<value_type, Common::NextPowerOfTwo<size()>::value>;
1384 auto sortable = simd_cast<SortableArray>(*
this);
1385 for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1386 using limits = std::numeric_limits<value_type>;
1387 if (limits::has_infinity) {
1388 sortable[i] = limits::infinity();
1393 return simd_cast<fixed_size_simd<T, N>>(sortable.sorted());
1427 static constexpr std::size_t Size = size();
1430 Vc_DEPRECATED(
"use exponent(x) instead")
1437 Vc_DEPRECATED(
"use isnegative(x) instead") Vc_INTRINSIC
MaskType isNegative()
const
1443 Vc_DEPRECATED(
"use copysign(x, y) instead")
1452 friend storage_type0 &internal_data0<>(
SimdArray &x);
1453 friend storage_type1 &internal_data1<>(
SimdArray &x);
1454 friend const storage_type0 &internal_data0<>(
const SimdArray &x);
1455 friend const storage_type1 &internal_data1<>(
const SimdArray &x);
1458 Vc_INTRINSIC
SimdArray(storage_type0 &&x, storage_type1 &&y)
1459 : data0(std::move(x)), data1(std::move(y))
1463 Vc_FREE_STORE_OPERATORS_ALIGNED(
alignof(storage_type0));
1469 alignas(
static_cast<std::size_t
>(
1470 Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value *
sizeof(V) /
1471 V::size()>::value)) storage_type0 data0;
1472 storage_type1 data1;
1474#undef Vc_CURRENT_CLASS_NAME
1475template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1477template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1481template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1482template <
class MT,
class IT,
int Scale>
1484 const Common::GatherArguments<MT, IT, Scale> &args)
1486 data0.gather(Common::make_gather<Scale>(
1487 args.address, Split::lo(Common::Operations::gather(), args.indexes)));
1488 data1.gather(Common::make_gather<Scale>(
1489 args.address, Split::hi(Common::Operations::gather(), args.indexes)));
1491template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1492template <
class MT,
class IT,
int Scale>
1493inline void SimdArray<T, N, VectorType, M>::gatherImplementation(
1494 const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
1496 data0.gather(Common::make_gather<Scale>(
1497 args.address, Split::lo(Common::Operations::gather(), args.indexes)),
1499 data1.gather(Common::make_gather<Scale>(
1500 args.address, Split::hi(Common::Operations::gather(), args.indexes)),
1505template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1506template <
typename MT,
typename IT>
1507inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1510 data0.scatter(mem, Split::lo(Common::Operations::gather(),
1513 data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1515template <
typename T, std::
size_t N,
typename VectorType, std::
size_t M>
1516template <
typename MT,
typename IT>
1517inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1518 IT &&indexes, MaskArgument mask)
const
1520 data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1523 data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1529template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1533typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1534 SimdArray<T, N, V, M> &x)
1539template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1543typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1544 SimdArray<T, N, V, M> &x)
1549template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1553const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1554 const SimdArray<T, N, V, M> &x)
1559template <
typename T, std::
size_t N,
typename V, std::
size_t M>
1563const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1564 const SimdArray<T, N, V, M> &x)
1572#if defined Vc_MSVC && defined Vc_IMPL_SSE && !defined Vc_IMPL_AVX
1574Vc_INTRINSIC SimdArray<double, 8>::SimdArray(fixed_size_simd<double, 4> &&x,
1575 fixed_size_simd<double, 4> &&y)
1576 : data0(x), data1(0)
1585#define Vc_FIXED_OP(op) \
1586 template <class T, int N, \
1587 class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1588 fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1589 const fixed_size_simd<T, N> &b) \
1591 return {private_init, internal_data(a) op internal_data(b)}; \
1593 template <class T, int N, \
1594 class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1596 fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1597 const fixed_size_simd<T, N> &b) \
1599 return {internal_data0(a) op internal_data0(b), \
1600 internal_data1(a) op internal_data1(b)}; \
1602Vc_ALL_ARITHMETICS(Vc_FIXED_OP);
1603Vc_ALL_BINARY(Vc_FIXED_OP);
1604Vc_ALL_SHIFTS(Vc_FIXED_OP);
1606#define Vc_FIXED_OP(op) \
1607 template <class T, int N, \
1608 class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1609 fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1610 const fixed_size_simd<T, N> &b) \
1612 return {private_init, internal_data(a) op internal_data(b)}; \
1614 template <class T, int N, \
1615 class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1617 fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1618 const fixed_size_simd<T, N> &b) \
1620 return {internal_data0(a) op internal_data0(b), \
1621 internal_data1(a) op internal_data1(b)}; \
1623Vc_ALL_COMPARES(Vc_FIXED_OP);
1629namespace result_vector_type_internal
1631template <
typename T>
1632using remove_cvref =
typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1634template <
typename T>
1635using is_integer_larger_than_int = std::integral_constant<
1636 bool, std::is_integral<T>::value &&(
sizeof(T) >
sizeof(
int) ||
1637 std::is_same<T, long>::value ||
1638 std::is_same<T, unsigned long>::value)>;
1641 typename L,
typename R,
1642 std::size_t N = Traits::isSimdArray<L>::value ? Traits::simd_vector_size<L>::value
1643 : Traits::simd_vector_size<R>::value,
1644 bool = (Traits::isSimdArray<L>::value ||
1645 Traits::isSimdArray<R>::value) &&
1646 !(Traits::is_fixed_size_simd<L>::value &&
1647 Traits::is_fixed_size_simd<R>::value) &&
1648 ((std::is_arithmetic<remove_cvref<L>>::value &&
1649 !is_integer_larger_than_int<remove_cvref<L>>::value) ||
1650 (std::is_arithmetic<remove_cvref<R>>::value &&
1651 !is_integer_larger_than_int<remove_cvref<R>>::value) ||
1654 Traits::simd_vector_size<L>::value == Traits::simd_vector_size<R>::value)>
1657template <
typename L,
typename R, std::
size_t N>
struct evaluate<L, R, N, true>
1660 using LScalar = Traits::entry_type_of<L>;
1661 using RScalar = Traits::entry_type_of<R>;
1663 template <
bool B,
typename T,
typename F>
1664 using conditional =
typename std::conditional<B, T, F>::type;
1677 using type = fixed_size_simd<
1678 conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1679 sizeof(LScalar) <
sizeof(
int) &&
1680 sizeof(RScalar) <
sizeof(
int)),
1681 conditional<(
sizeof(LScalar) ==
sizeof(RScalar)),
1682 conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1683 conditional<(
sizeof(LScalar) >
sizeof(RScalar)), LScalar, RScalar>>,
1684 decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1690template <
typename L,
typename R>
1691using result_vector_type =
typename result_vector_type_internal::evaluate<L, R>::type;
1693#define Vc_BINARY_OPERATORS_(op_) \
1695 template <typename L, typename R> \
1696 Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \
1698 using Return = result_vector_type<L, R>; \
1699 return Vc::Detail::operator op_( \
1700 static_cast<const Return &>(std::forward<L>(lhs)), \
1701 static_cast<const Return &>(std::forward<R>(rhs))); \
1721Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1722Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1724#undef Vc_BINARY_OPERATORS_
1725#define Vc_BINARY_OPERATORS_(op_) \
1727 template <typename L, typename R> \
1728 Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \
1731 using Promote = result_vector_type<L, R>; \
1732 return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \
1752Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1754#undef Vc_BINARY_OPERATORS_
1757#define Vc_FORWARD_UNARY_OPERATOR(name_) \
1759 template <typename T, std::size_t N, typename V, std::size_t M> \
1760 inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x) \
1762 return fixed_size_simd<T, N>::fromOperation( \
1763 Common::Operations::Forward_##name_(), x); \
1765 template <class T, int N> \
1766 fixed_size_simd<T, N> name_(const fixed_size_simd<T, N> &x) \
1768 return fixed_size_simd<T, N>::fromOperation( \
1769 Common::Operations::Forward_##name_(), x); \
1771 Vc_NOTHING_EXPECTING_SEMICOLON
1773#define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \
1775 template <typename T, std::size_t N, typename V, std::size_t M> \
1776 inline fixed_size_simd_mask<T, N> name_(const SimdArray<T, N, V, M> &x) \
1778 return fixed_size_simd_mask<T, N>::fromOperation( \
1779 Common::Operations::Forward_##name_(), x); \
1781 template <class T, int N> \
1782 fixed_size_simd_mask<T, N> name_(const fixed_size_simd<T, N> &x) \
1784 return fixed_size_simd_mask<T, N>::fromOperation( \
1785 Common::Operations::Forward_##name_(), x); \
1787 Vc_NOTHING_EXPECTING_SEMICOLON
1789#define Vc_FORWARD_BINARY_OPERATOR(name_) \
1791 template <typename T, std::size_t N, typename V, std::size_t M> \
1792 inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x, \
1793 const SimdArray<T, N, V, M> &y) \
1795 return fixed_size_simd<T, N>::fromOperation( \
1796 Common::Operations::Forward_##name_(), x, y); \
1798 Vc_NOTHING_EXPECTING_SEMICOLON
1805Vc_FORWARD_UNARY_OPERATOR(
abs);
1811Vc_FORWARD_UNARY_OPERATOR(
cos);
1812Vc_FORWARD_UNARY_OPERATOR(
exp);
1816template <
typename T, std::
size_t N>
1827template <
typename T, std::
size_t N>
1833template <
typename T, std::
size_t N>
1838Vc_FORWARD_UNARY_OPERATOR(
log);
1844Vc_FORWARD_UNARY_OPERATOR(
sin);
1846template <
typename T, std::
size_t N>
1853Vc_FORWARD_BINARY_OPERATOR(
min);
1854Vc_FORWARD_BINARY_OPERATOR(
max);
1856#undef Vc_FORWARD_UNARY_OPERATOR
1857#undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1858#undef Vc_FORWARD_BINARY_OPERATOR
1862#define Vc_DUMMY_ARG0 , int = 0
1863#define Vc_DUMMY_ARG1 , long = 0
1864#define Vc_DUMMY_ARG2 , short = 0
1865#define Vc_DUMMY_ARG3 , char = '0'
1866#define Vc_DUMMY_ARG4 , unsigned = 0u
1867#define Vc_DUMMY_ARG5 , unsigned short = 0u
1869#define Vc_DUMMY_ARG0
1870#define Vc_DUMMY_ARG1
1871#define Vc_DUMMY_ARG2
1872#define Vc_DUMMY_ARG3
1873#define Vc_DUMMY_ARG4
1874#define Vc_DUMMY_ARG5
1881template <
typename Return, std::size_t N,
typename T,
typename... From>
1882Vc_INTRINSIC Vc_CONST enable_if<
sizeof...(From) != 0, Return>
1883simd_cast_impl_smaller_input(
const From &... xs,
const T &last)
1885 Return r = simd_cast<Return>(xs...);
1886 for (
size_t i = 0; i < N; ++i) {
1887 r[i + N *
sizeof...(From)] =
static_cast<typename Return::EntryType
>(last[i]);
1891template <
typename Return, std::
size_t N,
typename T>
1892Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(
const T &last)
1894 Return r = Return();
1895 for (
size_t i = 0; i < N; ++i) {
1896 r[i] =
static_cast<typename Return::EntryType
>(last[i]);
1900template <
typename Return, std::size_t N,
typename T,
typename... From>
1901Vc_INTRINSIC Vc_CONST enable_if<
sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1902 const From &... xs,
const T &last)
1904 Return r = simd_cast<Return>(xs...);
1905 for (
size_t i = N *
sizeof...(From); i < Return::Size; ++i) {
1906 r[i] =
static_cast<typename Return::EntryType
>(last[i - N *
sizeof...(From)]);
1910template <
typename Return, std::
size_t N,
typename T>
1911Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(
const T &last)
1913 Return r = Return();
1914 for (
size_t i = 0; i < Return::size(); ++i) {
1915 r[i] =
static_cast<typename Return::EntryType
>(last[i]);
1921template <
typename Return,
typename T,
typename... From>
1922Vc_INTRINSIC_L Vc_CONST_L Return
1923 simd_cast_without_last(
const From &... xs,
const T &) Vc_INTRINSIC_R Vc_CONST_R;
1926template <
typename... Ts>
struct are_all_types_equal;
1927template <
typename T>
1928struct are_all_types_equal<T> :
public std::integral_constant<bool, true>
1931template <
typename T0,
typename T1,
typename... Ts>
1932struct are_all_types_equal<T0, T1, Ts...>
1933 :
public std::integral_constant<
1934 bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1958template <
typename Return,
typename... Ts>
1959Vc_INTRINSIC Vc_CONST Return
1960 simd_cast_interleaved_argument_order(
const Ts &... a,
const Ts &... b);
1964template <
typename Return, std::size_t offset,
typename From,
typename... Froms>
1965Vc_INTRINSIC Vc_CONST
1966 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1967 simd_cast_with_offset(
const From &x,
const Froms &... xs);
1969template <
typename Return, std::
size_t offset,
typename From>
1970Vc_INTRINSIC Vc_CONST
1971 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1972 simd_cast_with_offset(
const From &x);
1974template <
typename Return, std::
size_t offset,
typename From>
1975Vc_INTRINSIC Vc_CONST
1976 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1977 ((Traits::isSimdArray<Return>::value &&
1978 !Traits::isAtomicSimdArray<Return>::value) ||
1979 (Traits::isSimdMaskArray<Return>::value &&
1980 !Traits::isAtomicSimdMaskArray<Return>::value))),
1982 simd_cast_with_offset(
const From &x);
1984template <
typename Return, std::
size_t offset,
typename From>
1985Vc_INTRINSIC Vc_CONST
1986 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1987 ((Traits::isSimdArray<Return>::value &&
1988 Traits::isAtomicSimdArray<Return>::value) ||
1989 (Traits::isSimdMaskArray<Return>::value &&
1990 Traits::isAtomicSimdMaskArray<Return>::value))),
1992 simd_cast_with_offset(
const From &x);
1994template <
typename Return, std::size_t offset,
typename From,
typename... Froms>
1995Vc_INTRINSIC Vc_CONST enable_if<
1996 (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1997 simd_cast_with_offset(
const From &,
const Froms &... xs)
1999 return simd_cast_with_offset<Return, offset - From::Size>(xs...);
2003template <
typename Return, std::
size_t offset,
typename From>
2004Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
2011template <
typename T,
typename... Ts>
struct first_type_of_impl
2015template <
typename... Ts>
using first_type_of =
typename first_type_of_impl<Ts...>::type;
2018template <
typename Return,
typename From>
2019Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
2020template <
typename Return,
typename... Froms>
2021Vc_INTRINSIC Vc_CONST
2022 enable_if<(are_all_types_equal<Froms...>::value &&
2023 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2025 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
2029template <
typename Return,
typename From,
typename... Froms>
2030Vc_INTRINSIC Vc_CONST enable_if<
2031 (are_all_types_equal<From, Froms...>::value &&
2032 (1 +
sizeof...(Froms)) * From::Size >= Return::Size &&
sizeof...(Froms) != 0),
2034simd_cast_drop_arguments(Froms... xs, From x, From);
2035template <
typename Return,
typename From>
2036Vc_INTRINSIC Vc_CONST
2037 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2038 simd_cast_drop_arguments(From x, From);
2042#ifdef Vc_DEBUG_SIMD_CAST
2043void debugDoNothing(
const std::initializer_list<void *> &) {}
2044template <
typename T0,
typename... Ts>
2045inline void vc_debug_(
const char *prefix,
const char *suffix,
const T0 &arg0,
2048 std::cerr << prefix << arg0;
2049 debugDoNothing({&(std::cerr <<
", " << args)...});
2050 std::cerr << suffix;
2053template <
typename T0,
typename... Ts>
2054Vc_INTRINSIC
void vc_debug_(
const char *,
const char *,
const T0 &,
const Ts &...)
2061template <
size_t A,
size_t B>
2062struct is_less :
public std::integral_constant<bool, (A < B)> {
2067struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
2071#define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2072 template <typename Return, typename T, typename A, typename... Froms> \
2073 Vc_INTRINSIC Vc_CONST enable_if< \
2074 (Traits::isAtomic##SimdArrayType_<Return>::value && \
2075 is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2076 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2077 !detail::is_fixed_size_abi<A>::value), \
2079 simd_cast(NativeType_<T, A> x, Froms... xs) \
2081 vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
2082 return {private_init, simd_cast<typename Return::storage_type>(x, xs...)}; \
2084 template <typename Return, typename T, typename A, typename... Froms> \
2085 Vc_INTRINSIC Vc_CONST enable_if< \
2086 (Traits::isAtomic##SimdArrayType_<Return>::value && \
2087 !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2088 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2089 !detail::is_fixed_size_abi<A>::value), \
2091 simd_cast(NativeType_<T, A> x, Froms... xs) \
2093 vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
2094 return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \
2096 template <typename Return, typename T, typename A, typename... Froms> \
2097 Vc_INTRINSIC Vc_CONST \
2098 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2099 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2100 is_less<Common::left_size<Return::Size>(), \
2101 NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2102 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2103 !detail::is_fixed_size_abi<A>::value), \
2105 simd_cast(NativeType_<T, A> x, Froms... xs) \
2107 vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
2108 using R0 = typename Return::storage_type0; \
2109 using R1 = typename Return::storage_type1; \
2110 return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
2111 simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
2113 template <typename Return, typename T, typename A, typename... Froms> \
2114 Vc_INTRINSIC Vc_CONST \
2115 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2116 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2117 !is_less<Common::left_size<Return::Size>(), \
2118 NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2119 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2120 !detail::is_fixed_size_abi<A>::value), \
2122 simd_cast(NativeType_<T, A> x, Froms... xs) \
2124 vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
2125 using R0 = typename Return::storage_type0; \
2126 using R1 = typename Return::storage_type1; \
2127 return {simd_cast<R0>(x, xs...), R1(0)}; \
2129 Vc_NOTHING_EXPECTING_SEMICOLON
2131Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2132Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2133#undef Vc_SIMDARRAY_CASTS
2136#define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2138 template <typename Return, int offset, typename T, typename A> \
2139 Vc_INTRINSIC Vc_CONST \
2140 enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \
2141 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \
2143 vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
2144 return {private_init, simd_cast<typename Return::storage_type, offset>(x)}; \
2147 template <typename Return, int offset, typename T, typename A> \
2148 Vc_INTRINSIC Vc_CONST \
2149 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2150 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2151 Return::Size * offset + Common::left_size<Return::Size>() < \
2152 NativeType_<T, A>::Size), \
2154 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \
2156 vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
2157 using R0 = typename Return::storage_type0; \
2158 constexpr int entries_offset = offset * Return::Size; \
2159 constexpr int entries_offset_right = entries_offset + R0::Size; \
2161 simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
2162 simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2167 template <typename Return, int offset, typename T, typename A> \
2168 Vc_INTRINSIC Vc_CONST \
2169 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2170 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2171 Return::Size * offset + Common::left_size<Return::Size>() >= \
2172 NativeType_<T, A>::Size), \
2174 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \
2176 vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
2177 using R0 = typename Return::storage_type0; \
2178 using R1 = typename Return::storage_type1; \
2179 constexpr int entries_offset = offset * Return::Size; \
2180 return {simd_cast_with_offset<R0, entries_offset>(x), R1(0)}; \
2182 Vc_NOTHING_EXPECTING_SEMICOLON
2184Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2185Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2186#undef Vc_SIMDARRAY_CASTS
2189#define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2191 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2192 Vc_INTRINSIC Vc_CONST \
2193 enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2194 (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
2195 !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2197 simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2199 vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
2200 return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
2203 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2204 Vc_INTRINSIC Vc_CONST \
2205 enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2206 (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
2207 !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2209 simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2211 vc_debug_(
"simd_cast{indivisible2}(",
")\n", x0, xs...); \
2212 return simd_cast_without_last<Return, \
2213 typename SimdArrayType_<T, N, V, N>::storage_type, \
2214 typename From::storage_type...>( \
2215 internal_data(x0), internal_data(xs)...); \
2218 template <
typename Return,
typename T, std::size_t N,
typename V, std::size_t M, \
2220 Vc_INTRINSIC Vc_CONST enable_if< \
2221 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2222 !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \
2223 is_less<N *
sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2225 simd_cast(
const SimdArrayType_<T, N, V, M> &x0,
const From &... xs) \
2227 vc_debug_(
"simd_cast{bisectable}(",
")\n", x0, xs...); \
2228 return simd_cast_interleaved_argument_order< \
2229 Return,
typename SimdArrayType_<T, N, V, M>::storage_type0, \
2230 typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
2231 internal_data1(x0), internal_data1(xs)...); \
2235 template <
typename Return,
typename T, std::size_t N,
typename V, std::size_t M, \
2237 Vc_INTRINSIC Vc_CONST enable_if< \
2238 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2239 !is_less<N *
sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2241 simd_cast(
const SimdArrayType_<T, N, V, M> &x0,
const From &... xs) \
2243 vc_debug_(
"simd_cast{bisectable2}(",
")\n", x0, xs...); \
2244 return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \
2248 template <
typename Return,
typename T, std::size_t N,
typename V, std::size_t M, \
2250 Vc_INTRINSIC Vc_CONST enable_if< \
2251 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2252 N * (1 +
sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \
2254 simd_cast(
const SimdArrayType_<T, N, V, M> &x0,
const From &... xs) \
2256 vc_debug_(
"simd_cast{remaining}(",
")\n", x0, xs...); \
2257 return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \
2258 From...>(x0, xs...); \
2261 template <
typename Return,
typename T, std::size_t N,
typename V, std::size_t M, \
2263 Vc_INTRINSIC Vc_CONST enable_if< \
2264 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2265 N * (1 +
sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \
2267 simd_cast(
const SimdArrayType_<T, N, V, M> &x0,
const From &... xs) \
2269 vc_debug_(
"simd_cast{remaining2}(",
")\n", x0, xs...); \
2270 return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \
2271 From...>(x0, xs...); \
2274 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2275 Vc_INTRINSIC Vc_CONST \
2276 enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \
2277 simd_cast(
const SimdArrayType_<T, N, V, M> &x) \
2279 vc_debug_(
"simd_cast{single bisectable}(",
")\n", x); \
2280 return simd_cast<Return>(internal_data0(x)); \
2282 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2283 Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
2284 N < 2 * Return::Size && is_power_of_2<N>::value), \
2286 simd_cast(
const SimdArrayType_<T, N, V, M> &x) \
2288 vc_debug_(
"simd_cast{single bisectable2}(",
")\n", x); \
2289 return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
2291 Vc_NOTHING_EXPECTING_SEMICOLON
2293Vc_SIMDARRAY_CASTS(SimdArray);
2294Vc_SIMDARRAY_CASTS(SimdMaskArray);
2295#undef Vc_SIMDARRAY_CASTS
2296template <
class Return,
class T,
int N,
class... Ts,
2297 class = enable_if<!std::is_same<Return, fixed_size_simd<T, N>>::value>>
2298Vc_INTRINSIC Return
simd_cast(
const fixed_size_simd<T, N> &x,
const Ts &... xs)
2300 return simd_cast<Return>(
static_cast<const SimdArray<T, N> &
>(x),
2301 static_cast<const SimdArray<T, N> &
>(xs)...);
2303template <
class Return,
class T,
int N,
class... Ts,
2304 class = enable_if<!std::is_same<Return, fixed_size_simd_mask<T, N>>::value>>
2305Vc_INTRINSIC Return
simd_cast(
const fixed_size_simd_mask<T, N> &x,
const Ts &... xs)
2307 return simd_cast<Return>(
static_cast<const SimdMaskArray<T, N> &
>(x),
2308 static_cast<const SimdMaskArray<T, N> &
>(xs)...);
2312#define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2314 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2316 Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
2317 const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \
2319 vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
2320 return simd_cast<Return>(x); \
2323 template <typename Return, int offset, typename T, std::size_t N, typename V> \
2324 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
2325 const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \
2327 vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
2328 return simd_cast<Return, offset>(internal_data(x)); \
2331 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2333 Vc_INTRINSIC Vc_CONST \
2334 enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2335 offset != 0 && Common::left_size<N>() % Return::Size == 0), \
2337 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \
2339 vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
2340 return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \
2341 internal_data1(x)); \
2345 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2347 Vc_INTRINSIC Vc_CONST \
2348 enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2349 offset != 0 && Common::left_size<N>() % Return::Size != 0), \
2351 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \
2353 vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
2354 return simd_cast_with_offset<Return, \
2355 offset * Return::Size - Common::left_size<N>()>( \
2356 internal_data1(x)); \
2359 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2361 Vc_INTRINSIC Vc_CONST enable_if< \
2363 offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \
2365 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \
2367 vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
2368 return simd_cast<Return, offset>(internal_data0(x)); \
2371 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2373 Vc_INTRINSIC Vc_CONST \
2374 enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \
2375 offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2377 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \
2379 vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
2380 using R = typename Return::EntryType; \
2381 Return r = Return(0); \
2382 for (std::size_t i = offset * Return::Size; \
2383 i < std::min(N, (offset + 1) * Return::Size); ++i) { \
2384 r[i - offset * Return::Size] = static_cast<R>(x[i]); \
2388 Vc_NOTHING_EXPECTING_SEMICOLON
2389Vc_SIMDARRAY_CASTS(SimdArray);
2390Vc_SIMDARRAY_CASTS(SimdMaskArray);
2391#undef Vc_SIMDARRAY_CASTS
2393template <
typename Return,
typename From>
2394Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2396 return simd_cast<Return>(x);
2398template <
typename Return,
typename... Froms>
2399Vc_INTRINSIC Vc_CONST
2400 enable_if<(are_all_types_equal<Froms...>::value &&
2401 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2403 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2405 return simd_cast<Return>(xs..., x);
2410template <
typename Return,
typename From,
typename... Froms>
2411Vc_INTRINSIC Vc_CONST enable_if<
2412 (are_all_types_equal<From, Froms...>::value &&
2413 (1 +
sizeof...(Froms)) * From::Size >= Return::Size &&
sizeof...(Froms) != 0),
2415simd_cast_drop_arguments(Froms... xs, From x, From)
2417 return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2419template <
typename Return,
typename From>
2420Vc_INTRINSIC Vc_CONST
2421 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2422 simd_cast_drop_arguments(From x, From)
2424 return simd_cast_drop_arguments<Return>(x);
2428 template <
typename Return, std::
size_t offset,
typename From>
2429 Vc_INTRINSIC Vc_CONST
2430 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2431 Return> simd_cast_with_offset(
const From &x)
2433 return simd_cast<Return, offset / Return::Size>(x);
2435template <
typename Return, std::
size_t offset,
typename From>
2436Vc_INTRINSIC Vc_CONST
2437 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2438 ((Traits::isSimdArray<Return>::value &&
2439 !Traits::isAtomicSimdArray<Return>::value) ||
2440 (Traits::isSimdMaskArray<Return>::value &&
2441 !Traits::isAtomicSimdMaskArray<Return>::value))),
2443 simd_cast_with_offset(
const From &x)
2445 using R0 =
typename Return::storage_type0;
2446 using R1 =
typename Return::storage_type1;
2447 return {simd_cast_with_offset<R0, offset>(x),
2448 simd_cast_with_offset<R1, offset + R0::Size>(x)};
2450template <
typename Return, std::
size_t offset,
typename From>
2451Vc_INTRINSIC Vc_CONST
2452 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2453 ((Traits::isSimdArray<Return>::value &&
2454 Traits::isAtomicSimdArray<Return>::value) ||
2455 (Traits::isSimdMaskArray<Return>::value &&
2456 Traits::isAtomicSimdMaskArray<Return>::value))),
2458 simd_cast_with_offset(
const From &x)
2460 return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2462template <
typename Return, std::size_t offset,
typename From,
typename... Froms>
2463Vc_INTRINSIC Vc_CONST
2464 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2465 simd_cast_with_offset(
const From &x,
const Froms &... xs)
2467 return simd_cast<Return>(x, xs...);
2471template <
typename Return,
typename T,
typename... From>
2472Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(
const From &... xs,
const T &)
2474 return simd_cast<Return>(xs...);
2483template <std::
size_t I,
typename T0>
2484Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(
const T0 &a0,
const T0 &)
2488template <std::
size_t I,
typename T0>
2489Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(
const T0 &,
const T0 &b0)
2496template <std::size_t I,
typename T0,
typename... Ts>
2497Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(
const T0 &a0,
2505template <std::size_t I,
typename T0,
typename... Ts>
2506Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(
const T0 &,
2514template <std::size_t I,
typename T0,
typename... Ts>
2515Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(
const T0 &,
2520 return extract_interleaved<I - 2, Ts...>(a..., b...);
2523template <
typename Return,
typename... Ts, std::size_t... Indexes>
2524Vc_INTRINSIC Vc_CONST Return
2525 simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>,
const Ts &... a,
2528 return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2532template <
typename Return,
typename... Ts>
2533Vc_INTRINSIC Vc_CONST Return
2534 simd_cast_interleaved_argument_order(
const Ts &... a,
const Ts &... b)
2536 using seq = make_index_sequence<
sizeof...(Ts)*2>;
2537 return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2541#define Vc_CONDITIONAL_ASSIGN(name_, op_) \
2542 template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \
2544 Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
2545 SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \
2547 lhs(mask) op_ rhs; \
2549 Vc_NOTHING_EXPECTING_SEMICOLON
2550Vc_CONDITIONAL_ASSIGN( Assign, =);
2551Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
2552Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
2553Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
2554Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
2555Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2556Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
2557Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
2558Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
2559Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2560Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2561#undef Vc_CONDITIONAL_ASSIGN
2563#define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
2564 template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \
2565 Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \
2566 conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \
2570 Vc_NOTHING_EXPECTING_SEMICOLON
2571Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2572Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2573Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2574Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2575#undef Vc_CONDITIONAL_ASSIGN
2579template <
typename T,
size_t N,
typename V>
2580inline void transpose_impl(
2581 TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2582 const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2583 SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
2585 V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2586 &internal_data(*r[2]), &internal_data(*r[3])};
2587 transpose_impl(TransposeTag<4, 4>(), &r2[0],
2588 TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2589 internal_data(std::get<1>(proxy.in)),
2590 internal_data(std::get<2>(proxy.in)),
2591 internal_data(std::get<3>(proxy.in))});
2594template <
typename T,
typename V>
2595inline void transpose_impl(
2596 TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2597 const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2598 SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
2602 internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2603 internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2604 internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2605 internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2606 internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2607 internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2608 internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2609 internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2612template <
typename T,
typename V>
2613inline void transpose_impl(
2614 TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2615 const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2616 SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>> &proxy)
2618 V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2619 &internal_data(*r[2]), &internal_data(*r[3])};
2620 transpose_impl(TransposeTag<4, 4>(), &r2[0],
2621 TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2622 internal_data(std::get<1>(proxy.in)),
2623 internal_data(std::get<2>(proxy.in)),
2624 internal_data(std::get<3>(proxy.in))});
2627template <
typename T,
size_t N,
typename V>
2628inline void transpose_impl(
2629 TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2630 const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2631 SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
2633 SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2634 SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2635 using H = SimdArray<T, 2>;
2636 transpose_impl(TransposeTag<2, 4>(), &r0[0],
2637 TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2638 internal_data0(std::get<1>(proxy.in)),
2639 internal_data0(std::get<2>(proxy.in)),
2640 internal_data0(std::get<3>(proxy.in))});
2641 transpose_impl(TransposeTag<2, 4>(), &r1[0],
2642 TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2643 internal_data1(std::get<1>(proxy.in)),
2644 internal_data1(std::get<2>(proxy.in)),
2645 internal_data1(std::get<3>(proxy.in))});
2686template <
class T,
size_t N,
class V,
size_t VSizeof>
2687struct InterleaveImpl<SimdArray<T, N, V, N>, N, VSizeof> {
2688 template <
class I,
class... VV>
2689 static Vc_INTRINSIC
void interleave(T *
const data,
const I &i,
const VV &... vv)
2693 template <
class I,
class... VV>
2694 static Vc_INTRINSIC
void deinterleave(T
const *
const data,
const I &i, VV &... vv)
2735template <
typename T,
size_t N,
typename V,
size_t VN>
2736struct numeric_limits<
Vc::SimdArray<T, N, V, VN>> :
public numeric_limits<T> {
2743 static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2745 return numeric_limits<T>::lowest();
2747 static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2749 return numeric_limits<T>::epsilon();
2751 static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2753 return numeric_limits<T>::round_error();
2755 static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2757 return numeric_limits<T>::infinity();
2759 static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2761 return numeric_limits<T>::quiet_NaN();
2763 static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2765 return numeric_limits<T>::signaling_NaN();
2767 static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2769 return numeric_limits<T>::denorm_min();
The main SIMD mask class.
Data-parallel arithmetic type with user-defined number of elements.
fixed_size_simd< T, N > sorted() const
Return a sorted copy of the vector.
fixed_size_simd< T, N > apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
Common::WriteMaskedVector< SimdArray, mask_type > operator()(const mask_type &mask)
Writemask the vector before an assignment.
static fixed_size_simd< T, N > IndexesFromZero()
Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
fixed_size_simd< T, N > partialSum() const
Returns a vector containing the sum of all entries with smaller index.
reference operator[](size_t i) noexcept
This operator can be used to modify scalar entries of the vector.
fixed_size_simd< T, N > shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
SimdArray()=default
Construct a zero-initialized vector object.
static fixed_size_simd< T, N > generate(const G &gen)
Generate a vector object from return values of gen (static variant of fill).
static constexpr std::size_t size()
Returns N, the number of scalar components in an object of this type.
SimdArray(value_type a)
Broadcast Constructor.
value_type operator[](size_t index) const noexcept
This operator can be used to read scalar entries of the vector.
static fixed_size_simd< T, N > One()
Returns a vector with the entries initialized to one.
fixed_size_simd< T, N > rotated(int amount) const
Rotate vector entries to the left by amount.
static fixed_size_simd< T, N > Random()
Returns a vector with pseudo-random entries.
fixed_size_simd< T, N > apply(F &&f, const mask_type &k) const
As above, but skip the entries where mask is not set.
static fixed_size_simd< T, N > Zero()
Returns a vector with the entries initialized to zero.
fixed_size_simd< T, N > reversed() const
Returns a vector with all components reversed.
value_type EntryType
The type of the elements (i.e. T)
fixed_size_simd< T, N > operator+() const
Returns a copy of itself.
T value_type
The type of the elements (i.e. T)
The main vector class for expressing data parallelism.
Vector sorted() const
Return a sorted copy of the vector.
Vector reversed() const
Returns a vector with all components reversed.
fixed_size_simd< T, N > asin(const SimdArray< T, N, V, M > &x)
Applies the std:: asin function component-wise and concurrently.
fixed_size_simd_mask< T, N > isnan(const SimdArray< T, N, V, M > &x)
Applies the std:: isnan function component-wise and concurrently.
fixed_size_simd< T, N > max(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: max function component-wise and concurrently.
fixed_size_simd< T, N > min(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: min function component-wise and concurrently.
fixed_size_simd< T, N > exponent(const SimdArray< T, N, V, M > &x)
Applies the std:: exponent function component-wise and concurrently.
fixed_size_simd< T, N > log10(const SimdArray< T, N, V, M > &x)
Applies the std:: log10 function component-wise and concurrently.
fixed_size_simd< T, N > rsqrt(const SimdArray< T, N, V, M > &x)
Applies the std:: rsqrt function component-wise and concurrently.
fixed_size_simd< T, N > sin(const SimdArray< T, N, V, M > &x)
Applies the std:: sin function component-wise and concurrently.
fixed_size_simd_mask< T, N > isfinite(const SimdArray< T, N, V, M > &x)
Applies the std:: isfinite function component-wise and concurrently.
SimdArray< T, N > fma(const SimdArray< T, N > &a, const SimdArray< T, N > &b, const SimdArray< T, N > &c)
Applies the std::fma function component-wise and concurrently.
fixed_size_simd_mask< T, N > isinf(const SimdArray< T, N, V, M > &x)
Applies the std:: isinf function component-wise and concurrently.
fixed_size_simd< T, N > log2(const SimdArray< T, N, V, M > &x)
Applies the std:: log2 function component-wise and concurrently.
fixed_size_simd< T, N > copysign(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: copysign function component-wise and concurrently.
fixed_size_simd< T, N > exp(const SimdArray< T, N, V, M > &x)
Applies the std:: exp function component-wise and concurrently.
fixed_size_simd< T, N > atan(const SimdArray< T, N, V, M > &x)
Applies the std:: atan function component-wise and concurrently.
fixed_size_simd< T, N > reciprocal(const SimdArray< T, N, V, M > &x)
Applies the std:: reciprocal function component-wise and concurrently.
fixed_size_simd< T, N > abs(const SimdArray< T, N, V, M > &x)
Applies the std:: abs function component-wise and concurrently.
fixed_size_simd< T, N > trunc(const SimdArray< T, N, V, M > &x)
Applies the std:: trunc function component-wise and concurrently.
fixed_size_simd_mask< T, N > isnegative(const SimdArray< T, N, V, M > &x)
Applies the std:: isnegative function component-wise and concurrently.
fixed_size_simd< T, N > round(const SimdArray< T, N, V, M > &x)
Applies the std:: round function component-wise and concurrently.
fixed_size_simd< T, N > log(const SimdArray< T, N, V, M > &x)
Applies the std:: log function component-wise and concurrently.
fixed_size_simd< T, N > cos(const SimdArray< T, N, V, M > &x)
Applies the std:: cos function component-wise and concurrently.
fixed_size_simd< T, N > sqrt(const SimdArray< T, N, V, M > &x)
Applies the std:: sqrt function component-wise and concurrently.
result_vector_type< L, R > operator-(L &&lhs, R &&rhs)
Applies - component-wise and concurrently.
fixed_size_simd< T, N > floor(const SimdArray< T, N, V, M > &x)
Applies the std:: floor function component-wise and concurrently.
SimdArray< T, N > frexp(const SimdArray< T, N > &x, SimdArray< int, N > *e)
Applies the std::frexp function component-wise and concurrently.
void sincos(const SimdArray< T, N > &x, SimdArray< T, N > *sin, SimdArray< T, N > *cos)
Determines sine and cosine concurrently and component-wise on x.
fixed_size_simd< T, N > atan2(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: atan2 function component-wise and concurrently.
result_vector_type< L, R > operator+(L &&lhs, R &&rhs)
Applies + component-wise and concurrently.
fixed_size_simd< T, N > ceil(const SimdArray< T, N, V, M > &x)
Applies the std:: ceil function component-wise and concurrently.
SimdArray< T, N > ldexp(const SimdArray< T, N > &x, const SimdArray< int, N > &e)
Applies the std::ldexp function component-wise and concurrently.
Adapter< S, T, N > shifted(const Adapter< S, T, N > &a, int shift)
Returns a new vectorized object where each entry is shifted by shift.
std::pair< V, V > interleave(const V &a, const V &b)
Interleaves the entries from a and b into two vectors of the same type.
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero
The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values ...
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true.
constexpr UnalignedTag Unaligned
Use this object for a flags parameter to request unaligned loads and stores.
constexpr AlignedTag Aligned
Use this object for a flags parameter to request aligned loads and stores.
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
void deinterleave(V *a, V *b, const M *memory, A align)
std::ostream & operator<<(std::ostream &out, const Vc::Vector< T, Abi > &v)
Prints the contents of a vector into a stream object.
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Vector Classes Namespace.
UnalignedTag DefaultLoadTag
The default load tag type uses unaligned (non-streaming) loads.
To simd_cast(From &&x, enable_if< std::is_same< To, Traits::decay< From > >::value >=nullarg)
Casts the argument x from type From to type To.
void assign(SimdizeDetail::Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.