Vc 1.4.1
SIMD Vector Classes for C++
simdarray.h
1/* This file is part of the Vc library. {{{
2Copyright © 2013-2015 Matthias Kretz <kretz@kde.org>
3
4Redistribution and use in source and binary forms, with or without
5modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the names of contributing organizations nor the
12 names of its contributors may be used to endorse or promote products
13 derived from this software without specific prior written permission.
14
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
26}}}*/
27
28#ifndef VC_COMMON_SIMDARRAY_H_
29#define VC_COMMON_SIMDARRAY_H_
30
31//#define Vc_DEBUG_SIMD_CAST 1
32//#define Vc_DEBUG_SORTED 1
33//#include "../IO"
34
35#include <array>
36
37#include "writemaskedvector.h"
38#include "simdarrayhelper.h"
39#include "simdmaskarray.h"
40#include "utility.h"
41#include "interleave.h"
42#include "indexsequence.h"
43#include "transpose.h"
44#include "macros.h"
45
46namespace Vc_VERSIONED_NAMESPACE
47{
48// select_best_vector_type {{{
49namespace Common
50{
53
57template <std::size_t N, class... Candidates> struct select_best_vector_type_impl;
58// last candidate; this one must work; assume it does:
59template <std::size_t N, class T> struct select_best_vector_type_impl<N, T> {
60 using type = T;
61};
62// check the next candidate; use it if N >= T::size(); recurse otherwise:
63template <std::size_t N, class T, class... Candidates>
64struct select_best_vector_type_impl<N, T, Candidates...> {
65 using type = typename std::conditional<
66 (N < T::Size), typename select_best_vector_type_impl<N, Candidates...>::type,
67 T>::type;
68};
69template <class T, std::size_t N>
70struct select_best_vector_type : select_best_vector_type_impl<N,
71#ifdef Vc_IMPL_AVX2
72 Vc::AVX2::Vector<T>,
73#elif defined Vc_IMPL_AVX
74 Vc::AVX::Vector<T>,
75#endif
76#ifdef Vc_IMPL_SSE
77 Vc::SSE::Vector<T>,
78#endif
79 Vc::Scalar::Vector<T>> {
80};
82} // namespace Common
83// }}}
84// internal namespace (product & sum helper) {{{1
85namespace internal
86{
87template <typename T> T Vc_INTRINSIC Vc_PURE product_helper_(const T &l, const T &r) { return l * r; }
88template <typename T> T Vc_INTRINSIC Vc_PURE sum_helper_(const T &l, const T &r) { return l + r; }
89} // namespace internal
90
91// min & max declarations {{{1
92template <typename T, std::size_t N, typename V, std::size_t M>
93inline fixed_size_simd<T, N> min(const SimdArray<T, N, V, M> &x,
94 const SimdArray<T, N, V, M> &y);
95template <typename T, std::size_t N, typename V, std::size_t M>
96inline fixed_size_simd<T, N> max(const SimdArray<T, N, V, M> &x,
97 const SimdArray<T, N, V, M> &y);
98
99// SimdArray class {{{1
102
103// atomic SimdArray {{{1
104#define Vc_CURRENT_CLASS_NAME SimdArray
114template <typename T, std::size_t N, typename VectorType_>
115class SimdArray<T, N, VectorType_, N>
116{
117 static_assert(std::is_same<T, double>::value || std::is_same<T, float>::value ||
118 std::is_same<T, int32_t>::value ||
119 std::is_same<T, uint32_t>::value ||
120 std::is_same<T, int16_t>::value ||
121 std::is_same<T, uint16_t>::value,
122 "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, "
123 "int16_t, uint16_t }");
124 static_assert(
125 std::is_same<VectorType_,
126 typename Common::select_best_vector_type<T, N>::type>::value &&
127 VectorType_::size() == N,
128 "ERROR: leave the third and fourth template parameters with their defaults. They "
129 "are implementation details.");
130
131public:
132 static constexpr bool is_atomic = true;
133 using VectorType = VectorType_;
134 using vector_type = VectorType;
135 using storage_type = vector_type;
136 using vectorentry_type = typename vector_type::VectorEntryType;
137 using value_type = T;
138 using mask_type = fixed_size_simd_mask<T, N>;
139 using index_type = fixed_size_simd<int, N>;
140 static constexpr std::size_t size() { return N; }
141 using Mask = mask_type;
142 using MaskType = Mask;
143 using MaskArgument = const MaskType &;
144 using VectorEntryType = vectorentry_type;
145 using EntryType = value_type;
146 using IndexType = index_type;
147 using AsArg = const SimdArray &;
148 using reference = Detail::ElementReference<SimdArray>;
149 static constexpr std::size_t Size = size();
150 static constexpr std::size_t MemoryAlignment = storage_type::MemoryAlignment;
151
152 // zero init
153 Vc_INTRINSIC SimdArray() = default;
154
155 // default copy ctor/operator
156 Vc_INTRINSIC SimdArray(const SimdArray &) = default;
157 Vc_INTRINSIC SimdArray(SimdArray &&) = default;
158 Vc_INTRINSIC SimdArray &operator=(const SimdArray &) = default;
159
160 // broadcast
161 Vc_INTRINSIC SimdArray(const value_type &a) : data(a) {}
162 Vc_INTRINSIC SimdArray(value_type &a) : data(a) {}
163 Vc_INTRINSIC SimdArray(value_type &&a) : data(a) {}
164 template <
165 typename U,
166 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
167 Vc_INTRINSIC SimdArray(U a)
168 : SimdArray(static_cast<value_type>(a))
169 {
170 }
171
172 // implicit casts
173 template <class U, class V, class = enable_if<N == V::Size>>
174 Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
175 : data(simd_cast<vector_type>(internal_data(x)))
176 {
177 }
178 template <class U, class V, class = enable_if<(N > V::Size && N <= 2 * V::Size)>,
179 class = U>
180 Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
181 : data(simd_cast<vector_type>(internal_data(internal_data0(x)),
182 internal_data(internal_data1(x))))
183 {
184 }
185 template <class U, class V, class = enable_if<(N > 2 * V::Size && N <= 4 * V::Size)>,
186 class = U, class = U>
187 Vc_INTRINSIC SimdArray(const SimdArray<U, N, V> &x)
188 : data(simd_cast<vector_type>(internal_data(internal_data0(internal_data0(x))),
189 internal_data(internal_data1(internal_data0(x))),
190 internal_data(internal_data0(internal_data1(x))),
191 internal_data(internal_data1(internal_data1(x)))))
192 {
193 }
194
195 template <typename V, std::size_t Pieces, std::size_t Index>
196 Vc_INTRINSIC SimdArray(Common::Segment<V, Pieces, Index> &&x)
197 : data(simd_cast<vector_type, Index>(x.data))
198 {
199 }
200
201 Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
202 : data(init.begin(), Vc::Unaligned)
203 {
204 Vc_ASSERT(init.size() == size());
205 }
206
207 // implicit conversion from underlying vector_type
208 template <
209 typename V,
210 typename = enable_if<Traits::is_simd_vector<V>::value && !Traits::isSimdArray<V>::value>>
211 Vc_INTRINSIC SimdArray(const V &x)
212 : data(simd_cast<vector_type>(x))
213 {
214 }
215
216 // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
217 // T implicitly convertible to U
218 template <typename U, typename A,
219 typename =
220 enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
221 !std::is_same<A, simd_abi::fixed_size<N>>::value>>
222 Vc_INTRINSIC operator Vector<U, A>() const
223 {
224 return simd_cast<Vector<U, A>>(data);
225 }
226 operator fixed_size_simd<T, N> &()
227 {
228 return static_cast<fixed_size_simd<T, N> &>(*this);
229 }
230 operator const fixed_size_simd<T, N> &() const
231 {
232 return static_cast<const fixed_size_simd<T, N> &>(*this);
233 }
234
235#include "gatherinterface.h"
236#include "scatterinterface.h"
237
238 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data() {}
239 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data(o) {}
240 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i) : data(i)
241 {
242 }
243 template <std::size_t Offset>
244 explicit Vc_INTRINSIC SimdArray(
245 Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset>)
246 : data(Vc::IndexesFromZero)
247 {
248 data += value_type(Offset);
249 }
250
251 Vc_INTRINSIC void setZero() { data.setZero(); }
252 Vc_INTRINSIC void setZero(mask_type k) { data.setZero(internal_data(k)); }
253 Vc_INTRINSIC void setZeroInverted() { data.setZeroInverted(); }
254 Vc_INTRINSIC void setZeroInverted(mask_type k) { data.setZeroInverted(internal_data(k)); }
255
256 Vc_INTRINSIC void setQnan() { data.setQnan(); }
257 Vc_INTRINSIC void setQnan(mask_type m) { data.setQnan(internal_data(m)); }
258
259 // internal: execute specified Operation
260 template <typename Op, typename... Args>
261 static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
262 {
263 fixed_size_simd<T, N> r;
264 Common::unpackArgumentsAuto(op, r.data, std::forward<Args>(args)...);
265 return r;
266 }
267
268 template <typename Op, typename... Args>
269 static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
270 {
271 Common::unpackArgumentsAuto(op, nullptr, std::forward<Args>(args)...);
272 }
273
274 static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
275 {
276 return SimdArray(Vc::Zero);
277 }
278 static Vc_INTRINSIC fixed_size_simd<T, N> One()
279 {
280 return SimdArray(Vc::One);
281 }
282 static Vc_INTRINSIC fixed_size_simd<T, N> IndexesFromZero()
283 {
284 return SimdArray(Vc::IndexesFromZero);
285 }
286 static Vc_INTRINSIC fixed_size_simd<T, N> Random()
287 {
288 return fromOperation(Common::Operations::random());
289 }
290
291 // load ctor
292 template <class U, class Flags = DefaultLoadTag,
293 class = enable_if<std::is_arithmetic<U>::value &&
294 Traits::is_load_store_flag<Flags>::value>>
295 explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = Flags()) : data(mem, f)
296 {
297 }
298
299 template <typename... Args> Vc_INTRINSIC void load(Args &&... args)
300 {
301 data.load(std::forward<Args>(args)...);
302 }
303
304 template <typename... Args> Vc_INTRINSIC void store(Args &&... args) const
305 {
306 data.store(std::forward<Args>(args)...);
307 }
308
309 Vc_INTRINSIC mask_type operator!() const
310 {
311 return {private_init, !data};
312 }
313
314 Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
315 {
316 return {private_init, -data};
317 }
318
320 Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
321
322 Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
323 {
324 return {private_init, ~data};
325 }
326
327 template <typename U,
328 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
329 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
330 {
331 return {private_init, data << x};
332 }
333 template <typename U,
334 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
335 Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
336 {
337 data <<= x;
338 return *this;
339 }
340 template <typename U,
341 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
342 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
343 {
344 return {private_init, data >> x};
345 }
346 template <typename U,
347 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
348 Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
349 {
350 data >>= x;
351 return *this;
352 }
353
354#define Vc_BINARY_OPERATOR_(op) \
355 Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
356 { \
357 data op## = rhs.data; \
358 return *this; \
359 }
360 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
361 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
362 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
363#undef Vc_BINARY_OPERATOR_
364
366 Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
367 {
368 return {private_init, isnegative(data)};
369 }
370
371private:
372 friend reference;
373 Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
374 {
375 return o.data[i];
376 }
377 template <typename U>
378 Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
379 noexcept(std::declval<value_type &>() = v))
380 {
381 o.data[i] = v;
382 }
383
384public:
391 Vc_INTRINSIC reference operator[](size_t i) noexcept
392 {
393 static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
394 return {*this, int(i)};
395 }
396 Vc_INTRINSIC value_type operator[](size_t i) const noexcept
397 {
398 return get(*this, int(i));
399 }
400
401 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(const mask_type &k)
402 {
403 return {*this, k};
404 }
405
406 Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k)
407 {
408 data.assign(v.data, internal_data(k));
409 }
410
411 // reductions ////////////////////////////////////////////////////////
412#define Vc_REDUCTION_FUNCTION_(name_) \
413 Vc_INTRINSIC Vc_PURE value_type name_() const { return data.name_(); } \
414 Vc_INTRINSIC Vc_PURE value_type name_(mask_type mask) const \
415 { \
416 return data.name_(internal_data(mask)); \
417 } \
418 Vc_NOTHING_EXPECTING_SEMICOLON
419 Vc_REDUCTION_FUNCTION_(min);
420 Vc_REDUCTION_FUNCTION_(max);
421 Vc_REDUCTION_FUNCTION_(product);
422 Vc_REDUCTION_FUNCTION_(sum);
423#undef Vc_REDUCTION_FUNCTION_
424 Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const
425 {
426 return {private_init, data.partialSum()};
427 }
428
429 template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f) const
430 {
431 return {private_init, data.apply(std::forward<F>(f))};
432 }
433 template <typename F> Vc_INTRINSIC fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
434 {
435 return {private_init, data.apply(std::forward<F>(f), k)};
436 }
437
438 Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount) const
439 {
440 return {private_init, data.shifted(amount)};
441 }
442
443 template <std::size_t NN>
444 Vc_INTRINSIC fixed_size_simd<T, N> shifted(int amount, const SimdArray<value_type, NN> &shiftIn)
445 const
446 {
447 return {private_init, data.shifted(amount, simd_cast<VectorType>(shiftIn))};
448 }
449
450 Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
451 {
452 return {private_init, data.rotated(amount)};
453 }
454
456 Vc_DEPRECATED("use exponent(x) instead") Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
457 {
458 return {private_init, exponent(data)};
459 }
460
461 Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(SimdArray x) const
462 {
463 return {private_init, data.interleaveLow(x.data)};
464 }
465 Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(SimdArray x) const
466 {
467 return {private_init, data.interleaveHigh(x.data)};
468 }
469
470 Vc_INTRINSIC fixed_size_simd<T, N> reversed() const
471 {
472 return {private_init, data.reversed()};
473 }
474
475 Vc_INTRINSIC fixed_size_simd<T, N> sorted() const
476 {
477 return {private_init, data.sorted()};
478 }
479
480 template <class G, class = decltype(std::declval<G>()(std::size_t())),
481 class = enable_if<!Traits::is_simd_vector<G>::value>>
482 Vc_INTRINSIC SimdArray(const G &gen) : data(gen)
483 {
484 }
485 template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen)
486 {
487 return {private_init, VectorType::generate(gen)};
488 }
489
490 Vc_DEPRECATED("use copysign(x, y) instead")
491 Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
492 {
493 return {private_init, Vc::copysign(data, x.data)};
494 }
495
496 friend VectorType &internal_data<>(SimdArray &x);
497 friend const VectorType &internal_data<>(const SimdArray &x);
498
500 Vc_INTRINSIC SimdArray(private_init_t, VectorType &&x) : data(std::move(x)) {}
501
502 Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type));
503
504private:
505 // The alignas attribute attached to the class declaration above is ignored by ICC
506 // 17.0.0 (at least). So just move the alignas attribute down here where it works for
507 // all compilers.
508 alignas(static_cast<std::size_t>(
509 Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(VectorType_) /
510 VectorType_::size()>::value)) storage_type data;
511};
512template <typename T, std::size_t N, typename VectorType> constexpr std::size_t SimdArray<T, N, VectorType, N>::Size;
513template <typename T, std::size_t N, typename VectorType>
515template <typename T, std::size_t N, typename VectorType>
516#ifndef Vc_MSVC
517Vc_INTRINSIC
518#endif
519VectorType &internal_data(SimdArray<T, N, VectorType, N> &x)
520{
521 return x.data;
522}
523template <typename T, std::size_t N, typename VectorType>
524#ifndef Vc_MSVC
525Vc_INTRINSIC
526#endif
527const VectorType &internal_data(const SimdArray<T, N, VectorType, N> &x)
528{
529 return x.data;
530}
531
532// unwrap {{{2
533template <class T> Vc_INTRINSIC T unwrap(const T &x) { return x; }
534
535template <class T, size_t N, class V>
536Vc_INTRINSIC V unwrap(const SimdArray<T, N, V, N> &x)
537{
538 return internal_data(x);
539}
540
541template <class T, size_t Pieces, size_t Index>
542Vc_INTRINSIC auto unwrap(const Common::Segment<T, Pieces, Index> &x)
543 -> decltype(x.to_fixed_size())
544{
545 return unwrap(x.to_fixed_size());
546}
547
548// gatherImplementation {{{2
549template <typename T, std::size_t N, typename VectorType>
550template <class MT, class IT, int Scale>
551Vc_INTRINSIC void SimdArray<T, N, VectorType, N>::gatherImplementation(
552 const Common::GatherArguments<MT, IT, Scale> &args)
553{
554 data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)));
555}
556template <typename T, std::size_t N, typename VectorType>
557template <class MT, class IT, int Scale>
558Vc_INTRINSIC void SimdArray<T, N, VectorType, N>::gatherImplementation(
559 const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
560{
561 data.gather(Common::make_gather<Scale>(args.address, unwrap(args.indexes)),
562 mask);
563}
564
565// scatterImplementation {{{2
566template <typename T, std::size_t N, typename VectorType>
567template <typename MT, typename IT>
568inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
569 IT &&indexes) const
570{
571 data.scatter(mem, unwrap(std::forward<IT>(indexes)));
572}
573template <typename T, std::size_t N, typename VectorType>
574template <typename MT, typename IT>
575inline void SimdArray<T, N, VectorType, N>::scatterImplementation(MT *mem,
576 IT &&indexes,
577 MaskArgument mask) const
578{
579 data.scatter(mem, unwrap(std::forward<IT>(indexes)), mask);
580}
581
582// generic SimdArray {{{1
615template <typename T, size_t N, typename V, size_t Wt> class SimdArray
616{
617 static_assert(std::is_same<T, double>::value ||
618 std::is_same<T, float>::value ||
619 std::is_same<T, int32_t>::value ||
620 std::is_same<T, uint32_t>::value ||
621 std::is_same<T, int16_t>::value ||
622 std::is_same<T, uint16_t>::value, "SimdArray<T, N> may only be used with T = { double, float, int32_t, uint32_t, int16_t, uint16_t }");
623 static_assert(
624 std::is_same<V, typename Common::select_best_vector_type<T, N>::type>::value &&
625 V::size() == Wt,
626 "ERROR: leave the third and fourth template parameters with their defaults. They "
627 "are implementation details.");
628 static_assert(
629 // either the EntryType and VectorEntryType of the main V are equal
630 std::is_same<typename V::EntryType, typename V::VectorEntryType>::value ||
631 // or N is a multiple of V::size()
632 (N % V::size() == 0),
633 "SimdArray<(un)signed short, N> on MIC only works correctly for N = k * "
634 "MIC::(u)short_v::size(), i.e. k * 16.");
635
636 using my_traits = SimdArrayTraits<T, N>;
637 static constexpr std::size_t N0 = my_traits::N0;
638 static constexpr std::size_t N1 = my_traits::N1;
639 using Split = Common::Split<N0>;
640 template <typename U, std::size_t K> using CArray = U[K];
641
642public:
643 static constexpr bool is_atomic = false;
644 using storage_type0 = typename my_traits::storage_type0;
645 using storage_type1 = typename my_traits::storage_type1;
646 static_assert(storage_type0::size() == N0, "");
647
651 using vector_type = V;
652 using vectorentry_type = typename storage_type0::vectorentry_type;
653 typedef vectorentry_type alias_type Vc_MAY_ALIAS;
654
656 using value_type = T;
657
660
663
674 static constexpr std::size_t size() { return N; }
675
679 using MaskType = Mask;
680 using MaskArgument = const MaskType &;
681 using VectorEntryType = vectorentry_type;
686 using AsArg = const SimdArray &;
687
688 using reference = Detail::ElementReference<SimdArray>;
689
691 static constexpr std::size_t MemoryAlignment =
695
698
700 static Vc_INTRINSIC fixed_size_simd<T, N> Zero()
701 {
702 return SimdArray(Vc::Zero);
703 }
704
706 static Vc_INTRINSIC fixed_size_simd<T, N> One()
707 {
708 return SimdArray(Vc::One);
709 }
710
713 {
715 }
716
718 static Vc_INTRINSIC fixed_size_simd<T, N> Random()
719 {
720 return fromOperation(Common::Operations::random());
721 }
722
723 template <class G, class = decltype(std::declval<G>()(std::size_t())),
724 class = enable_if<!Traits::is_simd_vector<G>::value>>
725 Vc_INTRINSIC SimdArray(const G &gen)
726 : data0(gen), data1([&](std::size_t i) { return gen(i + storage_type0::size()); })
727 {
728 }
729
731 template <typename G> static Vc_INTRINSIC fixed_size_simd<T, N> generate(const G &gen) // {{{2
732 {
733 auto tmp = storage_type0::generate(gen); // GCC bug: the order of evaluation in
734 // an initializer list is well-defined
735 // (front to back), but GCC 4.8 doesn't
736 // implement this correctly. Therefore
737 // we enforce correct order.
738 return {std::move(tmp),
739 storage_type1::generate([&](std::size_t i) { return gen(i + N0); })};
740 }
742
745
747 SimdArray() = default;
749
752
754 Vc_INTRINSIC SimdArray(value_type a) : data0(a), data1(a) {}
755 template <
756 typename U,
757 typename = enable_if<std::is_same<U, int>::value && !std::is_same<int, value_type>::value>>
758 SimdArray(U a)
759 : SimdArray(static_cast<value_type>(a))
760 {
761 }
763
764 // default copy ctor/operator
765 SimdArray(const SimdArray &) = default;
766 SimdArray(SimdArray &&) = default;
767 SimdArray &operator=(const SimdArray &) = default;
768
769 // load ctor
770 template <typename U, typename Flags = DefaultLoadTag,
771 typename = enable_if<std::is_arithmetic<U>::value &&
772 Traits::is_load_store_flag<Flags>::value>>
773 explicit Vc_INTRINSIC SimdArray(const U *mem, Flags f = Flags())
774 : data0(mem, f), data1(mem + storage_type0::size(), f)
775 {
776 }
777
778// MSVC does overload resolution differently and takes the const U *mem overload (I hope)
779#ifndef Vc_MSVC
786 template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
787 typename = enable_if<std::is_arithmetic<U>::value &&
788 Traits::is_load_store_flag<Flags>::value>>
789 explicit Vc_INTRINSIC SimdArray(CArray<U, Extent> &mem, Flags f = Flags())
790 : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
791 {
792 }
796 template <typename U, std::size_t Extent, typename Flags = DefaultLoadTag,
797 typename = enable_if<std::is_arithmetic<U>::value &&
798 Traits::is_load_store_flag<Flags>::value>>
799 explicit Vc_INTRINSIC SimdArray(const CArray<U, Extent> &mem, Flags f = Flags())
800 : data0(&mem[0], f), data1(&mem[storage_type0::size()], f)
801 {
802 }
803#endif
804
805 // initializer list
806 Vc_INTRINSIC SimdArray(const std::initializer_list<value_type> &init)
807 : data0(init.begin(), Vc::Unaligned)
808 , data1(init.begin() + storage_type0::size(), Vc::Unaligned)
809 {
810 Vc_ASSERT(init.size() == size());
811 }
812
813#include "gatherinterface.h"
814#include "scatterinterface.h"
815
816 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerZero) : data0(), data1() {}
817 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerOne o) : data0(o), data1(o) {}
818 explicit Vc_INTRINSIC SimdArray(VectorSpecialInitializerIndexesFromZero i)
819 : data0(i)
820 , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
821 storage_type0::size()>())
822 {
823 }
824 template <size_t Offset>
825 explicit Vc_INTRINSIC SimdArray(
826 Common::AddOffset<VectorSpecialInitializerIndexesFromZero, Offset> i)
827 : data0(i)
828 , data1(Common::AddOffset<VectorSpecialInitializerIndexesFromZero,
829 storage_type0::size() + Offset>())
830 {
831 }
832
833 // explicit casts
834 template <class W, class = enable_if<
835 (Traits::is_simd_vector<W>::value &&
836 Traits::simd_vector_size<W>::value == N &&
837 !(std::is_convertible<Traits::entry_type_of<W>, T>::value &&
838 Traits::isSimdArray<W>::value))>>
839 Vc_INTRINSIC explicit SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
840 {
841 }
842
843 // implicit casts
844 template <class W, class = enable_if<
845 (Traits::isSimdArray<W>::value &&
846 Traits::simd_vector_size<W>::value == N &&
847 std::is_convertible<Traits::entry_type_of<W>, T>::value)>,
848 class = W>
849 Vc_INTRINSIC SimdArray(W &&x) : data0(Split::lo(x)), data1(Split::hi(x))
850 {
851 }
852
853 template <class W, std::size_t Pieces, std::size_t Index>
854 Vc_INTRINSIC SimdArray(Common::Segment<W, Pieces, Index> &&x)
855 : data0(Common::Segment<W, 2 * Pieces, 2 * Index>{x.data})
856 , data1(Common::Segment<W, 2 * Pieces, 2 * Index + 1>{x.data})
857 {
858 }
859
860 // implicit conversion to Vector<U, AnyAbi> for if Vector<U, AnyAbi>::size() == N and
861 // T implicitly convertible to U
862 template <typename U, typename A,
863 typename =
864 enable_if<std::is_convertible<T, U>::value && Vector<U, A>::Size == N &&
865 !std::is_same<A, simd_abi::fixed_size<N>>::value>>
866 operator Vector<U, A>() const
867 {
868 auto r = simd_cast<Vector<U, A>>(data0, data1);
869 return r;
870 }
871 Vc_INTRINSIC operator fixed_size_simd<T, N> &()
872 {
873 return static_cast<fixed_size_simd<T, N> &>(*this);
874 }
875 Vc_INTRINSIC operator const fixed_size_simd<T, N> &() const
876 {
877 return static_cast<const fixed_size_simd<T, N> &>(*this);
878 }
879
881
882 Vc_INTRINSIC void setZero()
883 {
884 data0.setZero();
885 data1.setZero();
886 }
887 Vc_INTRINSIC void setZero(const mask_type &k)
888 {
889 data0.setZero(Split::lo(k));
890 data1.setZero(Split::hi(k));
891 }
892 Vc_INTRINSIC void setZeroInverted()
893 {
894 data0.setZeroInverted();
895 data1.setZeroInverted();
896 }
897 Vc_INTRINSIC void setZeroInverted(const mask_type &k)
898 {
899 data0.setZeroInverted(Split::lo(k));
900 data1.setZeroInverted(Split::hi(k));
901 }
902
903
904 Vc_INTRINSIC void setQnan() {
905 data0.setQnan();
906 data1.setQnan();
907 }
908 Vc_INTRINSIC void setQnan(const mask_type &m) {
909 data0.setQnan(Split::lo(m));
910 data1.setQnan(Split::hi(m));
911 }
912
914 template <typename Op, typename... Args>
915 static Vc_INTRINSIC fixed_size_simd<T, N> fromOperation(Op op, Args &&... args)
916 {
917 fixed_size_simd<T, N> r = {
918 storage_type0::fromOperation(op, Split::lo(args)...), // no forward here - it
919 // could move and thus
920 // break the next line
921 storage_type1::fromOperation(op, Split::hi(std::forward<Args>(args))...)};
922 return r;
923 }
924
926 template <typename Op, typename... Args>
927 static Vc_INTRINSIC void callOperation(Op op, Args &&... args)
928 {
929 storage_type0::callOperation(op, Split::lo(args)...);
930 storage_type1::callOperation(op, Split::hi(std::forward<Args>(args))...);
931 }
932
933
934 template <typename U, typename... Args> Vc_INTRINSIC void load(const U *mem, Args &&... args)
935 {
936 data0.load(mem, Split::lo(args)...); // no forward here - it could move and thus
937 // break the next line
938 data1.load(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
939 }
940
941 template <typename U, typename... Args> Vc_INTRINSIC void store(U *mem, Args &&... args) const
942 {
943 data0.store(mem, Split::lo(args)...); // no forward here - it could move and thus
944 // break the next line
945 data1.store(mem + storage_type0::size(), Split::hi(std::forward<Args>(args))...);
946 }
947
948 Vc_INTRINSIC mask_type operator!() const
949 {
950 return {!data0, !data1};
951 }
952
953 Vc_INTRINSIC fixed_size_simd<T, N> operator-() const
954 {
955 return {-data0, -data1};
956 }
957
959 Vc_INTRINSIC fixed_size_simd<T, N> operator+() const { return *this; }
960
961 Vc_INTRINSIC fixed_size_simd<T, N> operator~() const
962 {
963 return {~data0, ~data1};
964 }
965
966 // left/right shift operators {{{2
967 template <typename U,
968 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
969 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator<<(U x) const
970 {
971 return {data0 << x, data1 << x};
972 }
973 template <typename U,
974 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
975 Vc_INTRINSIC fixed_size_simd<T, N> &operator<<=(U x)
976 {
977 data0 <<= x;
978 data1 <<= x;
979 return *this;
980 }
981 template <typename U,
982 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
983 Vc_INTRINSIC Vc_CONST fixed_size_simd<T, N> operator>>(U x) const
984 {
985 return {data0 >> x, data1 >> x};
986 }
987 template <typename U,
988 typename = enable_if<std::is_integral<T>::value && std::is_integral<U>::value>>
989 Vc_INTRINSIC fixed_size_simd<T, N> &operator>>=(U x)
990 {
991 data0 >>= x;
992 data1 >>= x;
993 return *this;
994 }
995
996 // binary operators {{{2
997#define Vc_BINARY_OPERATOR_(op) \
998 Vc_INTRINSIC fixed_size_simd<T, N> &operator op##=(const SimdArray &rhs) \
999 { \
1000 data0 op## = rhs.data0; \
1001 data1 op## = rhs.data1; \
1002 return *this; \
1003 }
1004 Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATOR_);
1005 Vc_ALL_BINARY(Vc_BINARY_OPERATOR_);
1006 Vc_ALL_SHIFTS(Vc_BINARY_OPERATOR_);
1007#undef Vc_BINARY_OPERATOR_
1008
1009 // operator[] {{{2
1012
1013private:
1014 friend reference;
1015 Vc_INTRINSIC static value_type get(const SimdArray &o, int i) noexcept
1016 {
1017 return reinterpret_cast<const alias_type *>(&o)[i];
1018 }
1019 template <typename U>
1020 Vc_INTRINSIC static void set(SimdArray &o, int i, U &&v) noexcept(
1021 noexcept(std::declval<value_type &>() = v))
1022 {
1023 reinterpret_cast<alias_type *>(&o)[i] = v;
1024 }
1025
1026public:
1028
1034 Vc_INTRINSIC reference operator[](size_t i) noexcept
1035 {
1036 static_assert(noexcept(reference{std::declval<SimdArray &>(), int()}), "");
1037 return {*this, int(i)};
1038 }
1039
1041 Vc_INTRINSIC value_type operator[](size_t index) const noexcept
1042 {
1043 return get(*this, int(index));
1044 }
1046
1047 // operator(){{{2
1049 Vc_INTRINSIC Common::WriteMaskedVector<SimdArray, mask_type> operator()(
1050 const mask_type &mask)
1051 {
1052 return {*this, mask};
1053 }
1054
1056 Vc_INTRINSIC void assign(const SimdArray &v, const mask_type &k) //{{{2
1057 {
1058 data0.assign(v.data0, internal_data0(k));
1059 data1.assign(v.data1, internal_data1(k));
1060 }
1061
1062 // reductions {{{2
1063#define Vc_REDUCTION_FUNCTION_(name_, binary_fun_, scalar_fun_) \
1064private: \
1065 template <typename ForSfinae = void> \
1066 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1067 storage_type0::Size == storage_type1::Size, \
1068 value_type> name_##_impl() const \
1069 { \
1070 return binary_fun_(data0, data1).name_(); \
1071 } \
1072 \
1073 template <typename ForSfinae = void> \
1074 Vc_INTRINSIC enable_if<std::is_same<ForSfinae, void>::value && \
1075 storage_type0::Size != storage_type1::Size, \
1076 value_type> name_##_impl() const \
1077 { \
1078 return scalar_fun_(data0.name_(), data1.name_()); \
1079 } \
1080 \
1081public: \
1082 \
1083 Vc_INTRINSIC value_type name_() const { return name_##_impl(); } \
1084 \
1085 Vc_INTRINSIC value_type name_(const mask_type &mask) const \
1086 { \
1087 if (Vc_IS_UNLIKELY(Split::lo(mask).isEmpty())) { \
1088 return data1.name_(Split::hi(mask)); \
1089 } else if (Vc_IS_UNLIKELY(Split::hi(mask).isEmpty())) { \
1090 return data0.name_(Split::lo(mask)); \
1091 } else { \
1092 return scalar_fun_(data0.name_(Split::lo(mask)), \
1093 data1.name_(Split::hi(mask))); \
1094 } \
1095 } \
1096 Vc_NOTHING_EXPECTING_SEMICOLON
1097 Vc_REDUCTION_FUNCTION_(min, Vc::min, std::min);
1098 Vc_REDUCTION_FUNCTION_(max, Vc::max, std::max);
1099 Vc_REDUCTION_FUNCTION_(product, internal::product_helper_, internal::product_helper_);
1100 Vc_REDUCTION_FUNCTION_(sum, internal::sum_helper_, internal::sum_helper_);
1101#undef Vc_REDUCTION_FUNCTION_
1103 Vc_INTRINSIC Vc_PURE fixed_size_simd<T, N> partialSum() const //{{{2
1104 {
1105 auto ps0 = data0.partialSum();
1106 auto tmp = data1;
1107 tmp[0] += ps0[data0.size() - 1];
1108 return {std::move(ps0), tmp.partialSum()};
1109 }
1110
1111 // apply {{{2
1113 template <typename F> inline fixed_size_simd<T, N> apply(F &&f) const
1114 {
1115 return {data0.apply(f), data1.apply(f)};
1116 }
1118 template <typename F>
1119 inline fixed_size_simd<T, N> apply(F &&f, const mask_type &k) const
1120 {
1121 return {data0.apply(f, Split::lo(k)), data1.apply(f, Split::hi(k))};
1122 }
1123
1124 // shifted {{{2
1126 inline fixed_size_simd<T, N> shifted(int amount) const
1127 {
1128 constexpr int SSize = Size;
1129 constexpr int SSize0 = storage_type0::Size;
1130 constexpr int SSize1 = storage_type1::Size;
1131 if (amount == 0) {
1132 return *this;
1133 }
1134 if (amount < 0) {
1135 if (amount > -SSize0) {
1136 return {data0.shifted(amount), data1.shifted(amount, data0)};
1137 }
1138 if (amount == -SSize0) {
1139 return {storage_type0(0), simd_cast<storage_type1>(data0)};
1140 }
1141 if (amount < -SSize0) {
1142 return {storage_type0(0), simd_cast<storage_type1>(data0.shifted(
1143 amount + SSize0))};
1144 }
1145 return Zero();
1146 } else {
1147 if (amount >= SSize) {
1148 return Zero();
1149 } else if (amount >= SSize0) {
1150 return {
1151 simd_cast<storage_type0>(data1).shifted(amount - SSize0),
1152 storage_type1(0)};
1153 } else if (amount >= SSize1) {
1154 return {data0.shifted(amount, data1), storage_type1(0)};
1155 } else {
1156 return {data0.shifted(amount, data1), data1.shifted(amount)};
1157 }
1158 }
1159 }
1160
1161 template <std::size_t NN>
1162 inline enable_if<
1163 !(std::is_same<storage_type0, storage_type1>::value && // not bisectable
1164 N == NN),
1166 shifted(int amount, const SimdArray<value_type, NN> &shiftIn) const
1167 {
1168 constexpr int SSize = Size;
1169 if (amount < 0) {
1170 return fixed_size_simd<T, N>([&](int i) -> value_type {
1171 i += amount;
1172 if (i >= 0) {
1173 return operator[](i);
1174 } else if (i >= -SSize) {
1175 return shiftIn[i + SSize];
1176 }
1177 return 0;
1178 });
1179 }
1180 return fixed_size_simd<T, N>([&](int i) -> value_type {
1181 i += amount;
1182 if (i < SSize) {
1183 return operator[](i);
1184 } else if (i < 2 * SSize) {
1185 return shiftIn[i - SSize];
1186 }
1187 return 0;
1188 });
1189 }
1190
1191private:
1192 // workaround for MSVC not understanding the simpler and shorter expression of the boolean
1193 // expression directly in the enable_if below
1194 template <std::size_t NN> struct bisectable_shift
1195 : public std::integral_constant<bool,
1196 std::is_same<storage_type0, storage_type1>::value && // bisectable
1197 N == NN>
1198 {
1199 };
1200
1201public:
1202 template <std::size_t NN>
1203 inline fixed_size_simd<T, N> shifted(
1204 enable_if<bisectable_shift<NN>::value, int> amount,
1205 const SimdArray<value_type, NN> &shiftIn) const
1206 {
1207 constexpr int SSize = Size;
1208 if (amount < 0) {
1209 if (amount > -static_cast<int>(storage_type0::Size)) {
1210 return {data0.shifted(amount, internal_data1(shiftIn)),
1211 data1.shifted(amount, data0)};
1212 }
1213 if (amount == -static_cast<int>(storage_type0::Size)) {
1214 return {storage_type0(internal_data1(shiftIn)), storage_type1(data0)};
1215 }
1216 if (amount > -SSize) {
1217 return {
1218 internal_data1(shiftIn)
1219 .shifted(amount + static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1220 data0.shifted(amount + static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1221 }
1222 if (amount == -SSize) {
1223 return shiftIn;
1224 }
1225 if (amount > -2 * SSize) {
1226 return shiftIn.shifted(amount + SSize);
1227 }
1228 }
1229 if (amount == 0) {
1230 return *this;
1231 }
1232 if (amount < static_cast<int>(storage_type0::Size)) {
1233 return {data0.shifted(amount, data1),
1234 data1.shifted(amount, internal_data0(shiftIn))};
1235 }
1236 if (amount == static_cast<int>(storage_type0::Size)) {
1237 return {storage_type0(data1), storage_type1(internal_data0(shiftIn))};
1238 }
1239 if (amount < SSize) {
1240 return {data1.shifted(amount - static_cast<int>(storage_type0::Size), internal_data0(shiftIn)),
1241 internal_data0(shiftIn)
1242 .shifted(amount - static_cast<int>(storage_type0::Size), internal_data1(shiftIn))};
1243 }
1244 if (amount == SSize) {
1245 return shiftIn;
1246 }
1247 if (amount < 2 * SSize) {
1248 return shiftIn.shifted(amount - SSize);
1249 }
1250 return Zero();
1251 }
1252
1253 // rotated {{{2
1255 Vc_INTRINSIC fixed_size_simd<T, N> rotated(int amount) const
1256 {
1257 amount %= int(size());
1258 if (amount == 0) {
1259 return *this;
1260 } else if (amount < 0) {
1261 amount += size();
1262 }
1263
1264#ifdef Vc_MSVC
1265 // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use store
1266 // ->
1267 // load to implement the function instead.
1268 alignas(MemoryAlignment) T tmp[N + data0.size()];
1269 data0.store(&tmp[0], Vc::Aligned);
1270 data1.store(&tmp[data0.size()], Vc::Aligned);
1271 data0.store(&tmp[N], Vc::Unaligned);
1273 r.data0.load(&tmp[amount], Vc::Unaligned);
1274 r.data1.load(&tmp[(amount + data0.size()) % size()], Vc::Unaligned);
1275 return r;
1276#else
1277 auto &&d0cvtd = simd_cast<storage_type1>(data0);
1278 auto &&d1cvtd = simd_cast<storage_type0>(data1);
1279 constexpr int size0 = storage_type0::size();
1280 constexpr int size1 = storage_type1::size();
1281
1282 if (amount == size0 && std::is_same<storage_type0, storage_type1>::value) {
1283 return {std::move(d1cvtd), std::move(d0cvtd)};
1284 } else if (amount < size1) {
1285 return {data0.shifted(amount, d1cvtd), data1.shifted(amount, d0cvtd)};
1286 } else if (amount == size1) {
1287 return {data0.shifted(amount, d1cvtd), std::move(d0cvtd)};
1288 } else if (int(size()) - amount < size1) {
1289 return {data0.shifted(amount - int(size()), d1cvtd.shifted(size1 - size0)),
1290 data1.shifted(amount - int(size()), data0.shifted(size0 - size1))};
1291 } else if (int(size()) - amount == size1) {
1292 return {data0.shifted(-size1, d1cvtd.shifted(size1 - size0)),
1293 simd_cast<storage_type1>(data0.shifted(size0 - size1))};
1294 } else if (amount <= size0) {
1295 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1296 simd_cast<storage_type1>(data0.shifted(amount - size1))};
1297 } else {
1298 return {data0.shifted(size1, d1cvtd).shifted(amount - size1, data0),
1299 simd_cast<storage_type1>(data0.shifted(amount - size1, d1cvtd))};
1300 }
1301 return *this;
1302#endif
1303 }
1304
1305 // interleaveLow/-High {{{2
1307 Vc_INTRINSIC fixed_size_simd<T, N> interleaveLow(const SimdArray &x) const
1308 {
1309 // return data0[0], x.data0[0], data0[1], x.data0[1], ...
1310 return {data0.interleaveLow(x.data0),
1311 simd_cast<storage_type1>(data0.interleaveHigh(x.data0))};
1312 }
1314 Vc_INTRINSIC fixed_size_simd<T, N> interleaveHigh(const SimdArray &x) const
1315 {
1316 return interleaveHighImpl(
1317 x,
1318 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1319 }
1320
1321private:
1323 Vc_INTRINSIC fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::true_type) const
1324 {
1325 return {data1.interleaveLow(x.data1), data1.interleaveHigh(x.data1)};
1326 }
1328 inline fixed_size_simd<T, N> interleaveHighImpl(const SimdArray &x, std::false_type) const
1329 {
1330 return {data0.interleaveHigh(x.data0)
1331 .shifted(storage_type1::Size,
1332 simd_cast<storage_type0>(data1.interleaveLow(x.data1))),
1333 data1.interleaveHigh(x.data1)};
1334 }
1335
1336public:
1338 inline fixed_size_simd<T, N> reversed() const //{{{2
1339 {
1340 if (std::is_same<storage_type0, storage_type1>::value) {
1341 return {simd_cast<storage_type0>(data1).reversed(),
1342 simd_cast<storage_type1>(data0).reversed()};
1343 } else {
1344#ifdef Vc_MSVC
1345 // MSVC fails to find a SimdArray::shifted function with 2 arguments. So use
1346 // store
1347 // -> load to implement the function instead.
1348 alignas(MemoryAlignment) T tmp[N];
1349 data1.reversed().store(&tmp[0], Vc::Aligned);
1350 data0.reversed().store(&tmp[data1.size()], Vc::Unaligned);
1351 return fixed_size_simd<T, N>{&tmp[0], Vc::Aligned};
1352#else
1353 return {data0.shifted(storage_type1::Size, data1).reversed(),
1354 simd_cast<storage_type1>(data0.reversed().shifted(
1355 storage_type0::Size - storage_type1::Size))};
1356#endif
1357 }
1358 }
1360 inline fixed_size_simd<T, N> sorted() const //{{{2
1361 {
1362 return sortedImpl(
1363 std::integral_constant<bool, storage_type0::Size == storage_type1::Size>());
1364 }
1365
1367 Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::true_type) const
1368 {
1369#ifdef Vc_DEBUG_SORTED
1370 std::cerr << "-- " << data0 << data1 << '\n';
1371#endif
1372 const auto a = data0.sorted();
1373 const auto b = data1.sorted().reversed();
1374 const auto lo = Vc::min(a, b);
1375 const auto hi = Vc::max(a, b);
1376 return {lo.sorted(), hi.sorted()};
1377 }
1378
1380 Vc_INTRINSIC fixed_size_simd<T, N> sortedImpl(std::false_type) const
1381 {
1382 using SortableArray =
1383 fixed_size_simd<value_type, Common::NextPowerOfTwo<size()>::value>;
1384 auto sortable = simd_cast<SortableArray>(*this);
1385 for (std::size_t i = Size; i < SortableArray::Size; ++i) {
1386 using limits = std::numeric_limits<value_type>;
1387 if (limits::has_infinity) {
1388 sortable[i] = limits::infinity();
1389 } else {
1391 }
1392 }
1393 return simd_cast<fixed_size_simd<T, N>>(sortable.sorted());
1394
1395 /* The following implementation appears to be less efficient. But this may need further
1396 * work.
1397 const auto a = data0.sorted();
1398 const auto b = data1.sorted();
1399#ifdef Vc_DEBUG_SORTED
1400 std::cerr << "== " << a << b << '\n';
1401#endif
1402 auto aIt = Vc::begin(a);
1403 auto bIt = Vc::begin(b);
1404 const auto aEnd = Vc::end(a);
1405 const auto bEnd = Vc::end(b);
1406 return SimdArray::generate([&](std::size_t) {
1407 if (aIt == aEnd) {
1408 return *(bIt++);
1409 }
1410 if (bIt == bEnd) {
1411 return *(aIt++);
1412 }
1413 if (*aIt < *bIt) {
1414 return *(aIt++);
1415 } else {
1416 return *(bIt++);
1417 }
1418 });
1419 */
1420 }
1421
1424
1427 static constexpr std::size_t Size = size();
1428
1430 Vc_DEPRECATED("use exponent(x) instead")
1431 Vc_INTRINSIC fixed_size_simd<T, N> exponent() const
1432 {
1433 return {exponent(data0), exponent(data1)};
1434 }
1435
1437 Vc_DEPRECATED("use isnegative(x) instead") Vc_INTRINSIC MaskType isNegative() const
1438 {
1439 return {isnegative(data0), isnegative(data1)};
1440 }
1441
1443 Vc_DEPRECATED("use copysign(x, y) instead")
1444 Vc_INTRINSIC fixed_size_simd<T, N> copySign(const SimdArray &x) const
1445 {
1446 return {Vc::copysign(data0, x.data0),
1447 Vc::copysign(data1, x.data1)};
1448 }
1450
1451 // internal_data0/1 {{{2
1452 friend storage_type0 &internal_data0<>(SimdArray &x);
1453 friend storage_type1 &internal_data1<>(SimdArray &x);
1454 friend const storage_type0 &internal_data0<>(const SimdArray &x);
1455 friend const storage_type1 &internal_data1<>(const SimdArray &x);
1456
1458 Vc_INTRINSIC SimdArray(storage_type0 &&x, storage_type1 &&y) //{{{2
1459 : data0(std::move(x)), data1(std::move(y))
1460 {
1461 }
1462
1463 Vc_FREE_STORE_OPERATORS_ALIGNED(alignof(storage_type0));
1464
1465private: //{{{2
1466 // The alignas attribute attached to the class declaration above is ignored by ICC
1467 // 17.0.0 (at least). So just move the alignas attribute down here where it works for
1468 // all compilers.
1469 alignas(static_cast<std::size_t>(
1470 Common::BoundedAlignment<Common::NextPowerOfTwo<N>::value * sizeof(V) /
1471 V::size()>::value)) storage_type0 data0;
1472 storage_type1 data1;
1473};
1474#undef Vc_CURRENT_CLASS_NAME
1475template <typename T, std::size_t N, typename V, std::size_t M>
1476constexpr std::size_t SimdArray<T, N, V, M>::Size;
1477template <typename T, std::size_t N, typename V, std::size_t M>
1478constexpr std::size_t SimdArray<T, N, V, M>::MemoryAlignment;
1479
1480// gatherImplementation {{{2
1481template <typename T, std::size_t N, typename VectorType, std::size_t M>
1482template <class MT, class IT, int Scale>
1484 const Common::GatherArguments<MT, IT, Scale> &args)
1485{
1486 data0.gather(Common::make_gather<Scale>(
1487 args.address, Split::lo(Common::Operations::gather(), args.indexes)));
1488 data1.gather(Common::make_gather<Scale>(
1489 args.address, Split::hi(Common::Operations::gather(), args.indexes)));
1490}
1491template <typename T, std::size_t N, typename VectorType, std::size_t M>
1492template <class MT, class IT, int Scale>
1493inline void SimdArray<T, N, VectorType, M>::gatherImplementation(
1494 const Common::GatherArguments<MT, IT, Scale> &args, MaskArgument mask)
1495{
1496 data0.gather(Common::make_gather<Scale>(
1497 args.address, Split::lo(Common::Operations::gather(), args.indexes)),
1498 Split::lo(mask));
1499 data1.gather(Common::make_gather<Scale>(
1500 args.address, Split::hi(Common::Operations::gather(), args.indexes)),
1501 Split::hi(mask));
1502}
1503
1504// scatterImplementation {{{2
1505template <typename T, std::size_t N, typename VectorType, std::size_t M>
1506template <typename MT, typename IT>
1507inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1508 IT &&indexes) const
1509{
1510 data0.scatter(mem, Split::lo(Common::Operations::gather(),
1511 indexes)); // don't forward indexes - it could move and
1512 // thus break the next line
1513 data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)));
1514}
1515template <typename T, std::size_t N, typename VectorType, std::size_t M>
1516template <typename MT, typename IT>
1517inline void SimdArray<T, N, VectorType, M>::scatterImplementation(MT *mem,
1518 IT &&indexes, MaskArgument mask) const
1519{
1520 data0.scatter(mem, Split::lo(Common::Operations::gather(), indexes),
1521 Split::lo(mask)); // don't forward indexes - it could move and
1522 // thus break the next line
1523 data1.scatter(mem, Split::hi(Common::Operations::gather(), std::forward<IT>(indexes)),
1524 Split::hi(mask));
1525}
1526
1527// internal_data0/1 (SimdArray) {{{1
1529template <typename T, std::size_t N, typename V, std::size_t M>
1530#ifndef Vc_MSVC
1531Vc_INTRINSIC
1532#endif
1533typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1534 SimdArray<T, N, V, M> &x)
1535{
1536 return x.data0;
1537}
1539template <typename T, std::size_t N, typename V, std::size_t M>
1540#ifndef Vc_MSVC
1541Vc_INTRINSIC
1542#endif
1543typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1544 SimdArray<T, N, V, M> &x)
1545{
1546 return x.data1;
1547}
1549template <typename T, std::size_t N, typename V, std::size_t M>
1550#ifndef Vc_MSVC
1551Vc_INTRINSIC
1552#endif
1553const typename SimdArrayTraits<T, N>::storage_type0 &internal_data0(
1554 const SimdArray<T, N, V, M> &x)
1555{
1556 return x.data0;
1557}
1559template <typename T, std::size_t N, typename V, std::size_t M>
1560#ifndef Vc_MSVC
1561Vc_INTRINSIC
1562#endif
1563const typename SimdArrayTraits<T, N>::storage_type1 &internal_data1(
1564 const SimdArray<T, N, V, M> &x)
1565{
1566 return x.data1;
1567}
1568
1569// MSVC workaround for SimdArray(storage_type0, storage_type1) ctor{{{1
1570// MSVC sometimes stores x to data1. By first broadcasting 0 and then assigning y
1571// in the body the bug is supressed.
1572#if defined Vc_MSVC && defined Vc_IMPL_SSE && !defined Vc_IMPL_AVX
1573template <>
1574Vc_INTRINSIC SimdArray<double, 8>::SimdArray(fixed_size_simd<double, 4> &&x,
1575 fixed_size_simd<double, 4> &&y)
1576 : data0(x), data1(0)
1577{
1578 data1 = y;
1579}
1580#endif
1581
1582// binary operators {{{
1583namespace Detail
1584{
1585#define Vc_FIXED_OP(op) \
1586 template <class T, int N, \
1587 class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1588 fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1589 const fixed_size_simd<T, N> &b) \
1590 { \
1591 return {private_init, internal_data(a) op internal_data(b)}; \
1592 } \
1593 template <class T, int N, \
1594 class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1595 class = T> \
1596 fixed_size_simd<T, N> operator op(const fixed_size_simd<T, N> &a, \
1597 const fixed_size_simd<T, N> &b) \
1598 { \
1599 return {internal_data0(a) op internal_data0(b), \
1600 internal_data1(a) op internal_data1(b)}; \
1601 }
1602Vc_ALL_ARITHMETICS(Vc_FIXED_OP);
1603Vc_ALL_BINARY(Vc_FIXED_OP);
1604Vc_ALL_SHIFTS(Vc_FIXED_OP);
1605#undef Vc_FIXED_OP
1606#define Vc_FIXED_OP(op) \
1607 template <class T, int N, \
1608 class = typename std::enable_if<fixed_size_simd<T, N>::is_atomic>::type> \
1609 fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1610 const fixed_size_simd<T, N> &b) \
1611 { \
1612 return {private_init, internal_data(a) op internal_data(b)}; \
1613 } \
1614 template <class T, int N, \
1615 class = typename std::enable_if<!fixed_size_simd<T, N>::is_atomic>::type, \
1616 class = T> \
1617 fixed_size_simd_mask<T, N> operator op(const fixed_size_simd<T, N> &a, \
1618 const fixed_size_simd<T, N> &b) \
1619 { \
1620 return {internal_data0(a) op internal_data0(b), \
1621 internal_data1(a) op internal_data1(b)}; \
1622 }
1623Vc_ALL_COMPARES(Vc_FIXED_OP);
1624#undef Vc_FIXED_OP
1625} // namespace Detail
1626
1627// }}}
1628// binary operators {{{1
1629namespace result_vector_type_internal
1630{
1631template <typename T>
1632using remove_cvref = typename std::remove_cv<typename std::remove_reference<T>::type>::type;
1633
1634template <typename T>
1635using is_integer_larger_than_int = std::integral_constant<
1636 bool, std::is_integral<T>::value &&(sizeof(T) > sizeof(int) ||
1637 std::is_same<T, long>::value ||
1638 std::is_same<T, unsigned long>::value)>;
1639
1640template <
1641 typename L, typename R,
1642 std::size_t N = Traits::isSimdArray<L>::value ? Traits::simd_vector_size<L>::value
1643 : Traits::simd_vector_size<R>::value,
1644 bool = (Traits::isSimdArray<L>::value ||
1645 Traits::isSimdArray<R>::value) && // one of the operands must be a SimdArray
1646 !(Traits::is_fixed_size_simd<L>::value && // if both are fixed_size, use
1647 Traits::is_fixed_size_simd<R>::value) && // common/operators.h
1648 ((std::is_arithmetic<remove_cvref<L>>::value && // one of the operands is a
1649 !is_integer_larger_than_int<remove_cvref<L>>::value) || // scalar type
1650 (std::is_arithmetic<remove_cvref<R>>::value &&
1651 !is_integer_larger_than_int<remove_cvref<R>>::value) ||
1652 // or one of the operands is Vector<T> with Vector<T>::size() ==
1653 // SimdArray::size()
1654 Traits::simd_vector_size<L>::value == Traits::simd_vector_size<R>::value)>
1655struct evaluate;
1656
1657template <typename L, typename R, std::size_t N> struct evaluate<L, R, N, true>
1658{
1659private:
1660 using LScalar = Traits::entry_type_of<L>;
1661 using RScalar = Traits::entry_type_of<R>;
1662
1663 template <bool B, typename T, typename F>
1664 using conditional = typename std::conditional<B, T, F>::type;
1665
1666public:
1667 // In principle we want the exact same rules for SimdArray<T> ⨉ SimdArray<U> as the standard
1668 // defines for T ⨉ U. BUT: short ⨉ short returns int (because all integral types smaller than
1669 // int are promoted to int before any operation). This would imply that SIMD types with integral
1670 // types smaller than int are more or less useless - and you could use SimdArray<int> from the
1671 // start. Therefore we special-case those operations where the scalar type of both operands is
1672 // integral and smaller than int.
1673 // In addition, there is no generic support for 64-bit int SIMD types. Therefore
1674 // promotion to a 64-bit integral type (including `long` because it can potentially have 64
1675 // bits) also is not done. But if one of the operands is a scalar type that is larger than int
1676 // then the operator is disabled altogether. We do not want an implicit demotion.
1677 using type = fixed_size_simd<
1678 conditional<(std::is_integral<LScalar>::value &&std::is_integral<RScalar>::value &&
1679 sizeof(LScalar) < sizeof(int) &&
1680 sizeof(RScalar) < sizeof(int)),
1681 conditional<(sizeof(LScalar) == sizeof(RScalar)),
1682 conditional<std::is_unsigned<LScalar>::value, LScalar, RScalar>,
1683 conditional<(sizeof(LScalar) > sizeof(RScalar)), LScalar, RScalar>>,
1684 decltype(std::declval<LScalar>() + std::declval<RScalar>())>,
1685 N>;
1686};
1687
1688} // namespace result_vector_type_internal
1689
1690template <typename L, typename R>
1691using result_vector_type = typename result_vector_type_internal::evaluate<L, R>::type;
1692
1693#define Vc_BINARY_OPERATORS_(op_) \
1694 \
1695 template <typename L, typename R> \
1696 Vc_INTRINSIC result_vector_type<L, R> operator op_(L &&lhs, R &&rhs) \
1697 { \
1698 using Return = result_vector_type<L, R>; \
1699 return Vc::Detail::operator op_( \
1700 static_cast<const Return &>(std::forward<L>(lhs)), \
1701 static_cast<const Return &>(std::forward<R>(rhs))); \
1702 }
1721Vc_ALL_ARITHMETICS(Vc_BINARY_OPERATORS_);
1722Vc_ALL_BINARY(Vc_BINARY_OPERATORS_);
1724#undef Vc_BINARY_OPERATORS_
1725#define Vc_BINARY_OPERATORS_(op_) \
1726 \
1727 template <typename L, typename R> \
1728 Vc_INTRINSIC typename result_vector_type<L, R>::mask_type operator op_(L &&lhs, \
1729 R &&rhs) \
1730 { \
1731 using Promote = result_vector_type<L, R>; \
1732 return Promote(std::forward<L>(lhs)) op_ Promote(std::forward<R>(rhs)); \
1733 }
1752Vc_ALL_COMPARES(Vc_BINARY_OPERATORS_);
1754#undef Vc_BINARY_OPERATORS_
1755
1756// math functions {{{1
1757#define Vc_FORWARD_UNARY_OPERATOR(name_) \
1758 \
1759 template <typename T, std::size_t N, typename V, std::size_t M> \
1760 inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x) \
1761 { \
1762 return fixed_size_simd<T, N>::fromOperation( \
1763 Common::Operations::Forward_##name_(), x); \
1764 } \
1765 template <class T, int N> \
1766 fixed_size_simd<T, N> name_(const fixed_size_simd<T, N> &x) \
1767 { \
1768 return fixed_size_simd<T, N>::fromOperation( \
1769 Common::Operations::Forward_##name_(), x); \
1770 } \
1771 Vc_NOTHING_EXPECTING_SEMICOLON
1772
1773#define Vc_FORWARD_UNARY_BOOL_OPERATOR(name_) \
1774 \
1775 template <typename T, std::size_t N, typename V, std::size_t M> \
1776 inline fixed_size_simd_mask<T, N> name_(const SimdArray<T, N, V, M> &x) \
1777 { \
1778 return fixed_size_simd_mask<T, N>::fromOperation( \
1779 Common::Operations::Forward_##name_(), x); \
1780 } \
1781 template <class T, int N> \
1782 fixed_size_simd_mask<T, N> name_(const fixed_size_simd<T, N> &x) \
1783 { \
1784 return fixed_size_simd_mask<T, N>::fromOperation( \
1785 Common::Operations::Forward_##name_(), x); \
1786 } \
1787 Vc_NOTHING_EXPECTING_SEMICOLON
1788
1789#define Vc_FORWARD_BINARY_OPERATOR(name_) \
1790 \
1791 template <typename T, std::size_t N, typename V, std::size_t M> \
1792 inline fixed_size_simd<T, N> name_(const SimdArray<T, N, V, M> &x, \
1793 const SimdArray<T, N, V, M> &y) \
1794 { \
1795 return fixed_size_simd<T, N>::fromOperation( \
1796 Common::Operations::Forward_##name_(), x, y); \
1797 } \
1798 Vc_NOTHING_EXPECTING_SEMICOLON
1799
1805Vc_FORWARD_UNARY_OPERATOR(abs);
1806Vc_FORWARD_UNARY_OPERATOR(asin);
1807Vc_FORWARD_UNARY_OPERATOR(atan);
1808Vc_FORWARD_BINARY_OPERATOR(atan2);
1809Vc_FORWARD_UNARY_OPERATOR(ceil);
1810Vc_FORWARD_BINARY_OPERATOR(copysign);
1811Vc_FORWARD_UNARY_OPERATOR(cos);
1812Vc_FORWARD_UNARY_OPERATOR(exp);
1813Vc_FORWARD_UNARY_OPERATOR(exponent);
1814Vc_FORWARD_UNARY_OPERATOR(floor);
1816template <typename T, std::size_t N>
1818 const SimdArray<T, N> &c)
1819{
1820 return SimdArray<T, N>::fromOperation(Common::Operations::Forward_fma(), a, b, c);
1821}
1822Vc_FORWARD_UNARY_BOOL_OPERATOR(isfinite);
1823Vc_FORWARD_UNARY_BOOL_OPERATOR(isinf);
1824Vc_FORWARD_UNARY_BOOL_OPERATOR(isnan);
1825Vc_FORWARD_UNARY_BOOL_OPERATOR(isnegative);
1827template <typename T, std::size_t N>
1829{
1830 return SimdArray<T, N>::fromOperation(Common::Operations::Forward_frexp(), x, e);
1831}
1833template <typename T, std::size_t N>
1835{
1836 return SimdArray<T, N>::fromOperation(Common::Operations::Forward_ldexp(), x, e);
1837}
1838Vc_FORWARD_UNARY_OPERATOR(log);
1839Vc_FORWARD_UNARY_OPERATOR(log10);
1840Vc_FORWARD_UNARY_OPERATOR(log2);
1841Vc_FORWARD_UNARY_OPERATOR(reciprocal);
1842Vc_FORWARD_UNARY_OPERATOR(round);
1843Vc_FORWARD_UNARY_OPERATOR(rsqrt);
1844Vc_FORWARD_UNARY_OPERATOR(sin);
1846template <typename T, std::size_t N>
1848{
1849 SimdArray<T, N>::callOperation(Common::Operations::Forward_sincos(), x, sin, cos);
1850}
1851Vc_FORWARD_UNARY_OPERATOR(sqrt);
1852Vc_FORWARD_UNARY_OPERATOR(trunc);
1853Vc_FORWARD_BINARY_OPERATOR(min);
1854Vc_FORWARD_BINARY_OPERATOR(max);
1856#undef Vc_FORWARD_UNARY_OPERATOR
1857#undef Vc_FORWARD_UNARY_BOOL_OPERATOR
1858#undef Vc_FORWARD_BINARY_OPERATOR
1859
1860// simd_cast {{{1
1861#ifdef Vc_MSVC
1862#define Vc_DUMMY_ARG0 , int = 0
1863#define Vc_DUMMY_ARG1 , long = 0
1864#define Vc_DUMMY_ARG2 , short = 0
1865#define Vc_DUMMY_ARG3 , char = '0'
1866#define Vc_DUMMY_ARG4 , unsigned = 0u
1867#define Vc_DUMMY_ARG5 , unsigned short = 0u
1868#else
1869#define Vc_DUMMY_ARG0
1870#define Vc_DUMMY_ARG1
1871#define Vc_DUMMY_ARG2
1872#define Vc_DUMMY_ARG3
1873#define Vc_DUMMY_ARG4
1874#define Vc_DUMMY_ARG5
1875#endif // Vc_MSVC
1876
1877// simd_cast_impl_smaller_input {{{2
1878// The following function can be implemented without the sizeof...(From) overload.
1879// However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
1880// function in two works around the issue.
1881template <typename Return, std::size_t N, typename T, typename... From>
1882Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return>
1883simd_cast_impl_smaller_input(const From &... xs, const T &last)
1884{
1885 Return r = simd_cast<Return>(xs...);
1886 for (size_t i = 0; i < N; ++i) {
1887 r[i + N * sizeof...(From)] = static_cast<typename Return::EntryType>(last[i]);
1888 }
1889 return r;
1890}
1891template <typename Return, std::size_t N, typename T>
1892Vc_INTRINSIC Vc_CONST Return simd_cast_impl_smaller_input(const T &last)
1893{
1894 Return r = Return();
1895 for (size_t i = 0; i < N; ++i) {
1896 r[i] = static_cast<typename Return::EntryType>(last[i]);
1897 }
1898 return r;
1899}
1900template <typename Return, std::size_t N, typename T, typename... From>
1901Vc_INTRINSIC Vc_CONST enable_if<sizeof...(From) != 0, Return> simd_cast_impl_larger_input(
1902 const From &... xs, const T &last)
1903{
1904 Return r = simd_cast<Return>(xs...);
1905 for (size_t i = N * sizeof...(From); i < Return::Size; ++i) {
1906 r[i] = static_cast<typename Return::EntryType>(last[i - N * sizeof...(From)]);
1907 }
1908 return r;
1909}
1910template <typename Return, std::size_t N, typename T>
1911Vc_INTRINSIC Vc_CONST Return simd_cast_impl_larger_input(const T &last)
1912{
1913 Return r = Return();
1914 for (size_t i = 0; i < Return::size(); ++i) {
1915 r[i] = static_cast<typename Return::EntryType>(last[i]);
1916 }
1917 return r;
1918}
1919
1920// simd_cast_without_last (declaration) {{{2
1921template <typename Return, typename T, typename... From>
1922Vc_INTRINSIC_L Vc_CONST_L Return
1923 simd_cast_without_last(const From &... xs, const T &) Vc_INTRINSIC_R Vc_CONST_R;
1924
1925// are_all_types_equal {{{2
1926template <typename... Ts> struct are_all_types_equal;
1927template <typename T>
1928struct are_all_types_equal<T> : public std::integral_constant<bool, true>
1929{
1930};
1931template <typename T0, typename T1, typename... Ts>
1932struct are_all_types_equal<T0, T1, Ts...>
1933 : public std::integral_constant<
1934 bool, std::is_same<T0, T1>::value && are_all_types_equal<T1, Ts...>::value>
1935{
1936};
1937
1938// simd_cast_interleaved_argument_order (declarations) {{{2
1958template <typename Return, typename... Ts>
1959Vc_INTRINSIC Vc_CONST Return
1960 simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b);
1961
1962// simd_cast_with_offset (declarations and one impl) {{{2
1963// offset == 0 {{{3
1964template <typename Return, std::size_t offset, typename From, typename... Froms>
1965Vc_INTRINSIC Vc_CONST
1966 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
1967 simd_cast_with_offset(const From &x, const Froms &... xs);
1968// offset > 0 && offset divisible by Return::Size {{{3
1969template <typename Return, std::size_t offset, typename From>
1970Vc_INTRINSIC Vc_CONST
1971 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0), Return>
1972 simd_cast_with_offset(const From &x);
1973// offset > 0 && offset NOT divisible && Return is non-atomic simd(mask)array {{{3
1974template <typename Return, std::size_t offset, typename From>
1975Vc_INTRINSIC Vc_CONST
1976 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1977 ((Traits::isSimdArray<Return>::value &&
1978 !Traits::isAtomicSimdArray<Return>::value) ||
1979 (Traits::isSimdMaskArray<Return>::value &&
1980 !Traits::isAtomicSimdMaskArray<Return>::value))),
1981 Return>
1982 simd_cast_with_offset(const From &x);
1983// offset > 0 && offset NOT divisible && Return is atomic simd(mask)array {{{3
1984template <typename Return, std::size_t offset, typename From>
1985Vc_INTRINSIC Vc_CONST
1986 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
1987 ((Traits::isSimdArray<Return>::value &&
1988 Traits::isAtomicSimdArray<Return>::value) ||
1989 (Traits::isSimdMaskArray<Return>::value &&
1990 Traits::isAtomicSimdMaskArray<Return>::value))),
1991 Return>
1992 simd_cast_with_offset(const From &x);
1993// offset > first argument (drops first arg) {{{3
1994template <typename Return, std::size_t offset, typename From, typename... Froms>
1995Vc_INTRINSIC Vc_CONST enable_if<
1996 (are_all_types_equal<From, Froms...>::value && From::Size <= offset), Return>
1997 simd_cast_with_offset(const From &, const Froms &... xs)
1998{
1999 return simd_cast_with_offset<Return, offset - From::Size>(xs...);
2000}
2001
2002// offset > first and only argument (returns Zero) {{{3
2003template <typename Return, std::size_t offset, typename From>
2004Vc_INTRINSIC Vc_CONST enable_if<(From::Size <= offset), Return> simd_cast_with_offset(
2005 const From &)
2006{
2007 return Return(0);
2008}
2009
2010// first_type_of {{{2
2011template <typename T, typename... Ts> struct first_type_of_impl
2012{
2013 using type = T;
2014};
2015template <typename... Ts> using first_type_of = typename first_type_of_impl<Ts...>::type;
2016
2017// simd_cast_drop_arguments (declarations) {{{2
2018template <typename Return, typename From>
2019Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x);
2020template <typename Return, typename... Froms>
2021Vc_INTRINSIC Vc_CONST
2022 enable_if<(are_all_types_equal<Froms...>::value &&
2023 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2024 Return>
2025 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x);
2026// The following function can be implemented without the sizeof...(From) overload.
2027// However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2028// function in two works around the issue.
2029template <typename Return, typename From, typename... Froms>
2030Vc_INTRINSIC Vc_CONST enable_if<
2031 (are_all_types_equal<From, Froms...>::value &&
2032 (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2033 Return>
2034simd_cast_drop_arguments(Froms... xs, From x, From);
2035template <typename Return, typename From>
2036Vc_INTRINSIC Vc_CONST
2037 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2038 simd_cast_drop_arguments(From x, From);
2039
2040namespace
2041{
2042#ifdef Vc_DEBUG_SIMD_CAST
2043void debugDoNothing(const std::initializer_list<void *> &) {}
2044template <typename T0, typename... Ts>
2045inline void vc_debug_(const char *prefix, const char *suffix, const T0 &arg0,
2046 const Ts &... args)
2047{
2048 std::cerr << prefix << arg0;
2049 debugDoNothing({&(std::cerr << ", " << args)...});
2050 std::cerr << suffix;
2051}
2052#else
2053template <typename T0, typename... Ts>
2054Vc_INTRINSIC void vc_debug_(const char *, const char *, const T0 &, const Ts &...)
2055{
2056}
2057#endif
2058} // unnamed namespace
2059
2060// is_less trait{{{2
2061template <size_t A, size_t B>
2062struct is_less : public std::integral_constant<bool, (A < B)> {
2063};
2064
2065// is_power_of_2 trait{{{2
2066template <size_t N>
2067struct is_power_of_2 : public std::integral_constant<bool, ((N - 1) & N) == 0> {
2068};
2069
2070// simd_cast<T>(xs...) to SimdArray/-mask {{{2
2071#define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2072 template <typename Return, typename T, typename A, typename... Froms> \
2073 Vc_INTRINSIC Vc_CONST enable_if< \
2074 (Traits::isAtomic##SimdArrayType_<Return>::value && \
2075 is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2076 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2077 !detail::is_fixed_size_abi<A>::value), \
2078 Return> \
2079 simd_cast(NativeType_<T, A> x, Froms... xs) \
2080 { \
2081 vc_debug_("simd_cast{1}(", ")\n", x, xs...); \
2082 return {private_init, simd_cast<typename Return::storage_type>(x, xs...)}; \
2083 } \
2084 template <typename Return, typename T, typename A, typename... Froms> \
2085 Vc_INTRINSIC Vc_CONST enable_if< \
2086 (Traits::isAtomic##SimdArrayType_<Return>::value && \
2087 !is_less<NativeType_<T, A>::Size * sizeof...(Froms), Return::Size>::value && \
2088 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2089 !detail::is_fixed_size_abi<A>::value), \
2090 Return> \
2091 simd_cast(NativeType_<T, A> x, Froms... xs) \
2092 { \
2093 vc_debug_("simd_cast{2}(", ")\n", x, xs...); \
2094 return {simd_cast_without_last<Return, NativeType_<T, A>, Froms...>(x, xs...)}; \
2095 } \
2096 template <typename Return, typename T, typename A, typename... Froms> \
2097 Vc_INTRINSIC Vc_CONST \
2098 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2099 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2100 is_less<Common::left_size<Return::Size>(), \
2101 NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2102 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2103 !detail::is_fixed_size_abi<A>::value), \
2104 Return> \
2105 simd_cast(NativeType_<T, A> x, Froms... xs) \
2106 { \
2107 vc_debug_("simd_cast{3}(", ")\n", x, xs...); \
2108 using R0 = typename Return::storage_type0; \
2109 using R1 = typename Return::storage_type1; \
2110 return {simd_cast_drop_arguments<R0, Froms...>(x, xs...), \
2111 simd_cast_with_offset<R1, R0::Size>(x, xs...)}; \
2112 } \
2113 template <typename Return, typename T, typename A, typename... Froms> \
2114 Vc_INTRINSIC Vc_CONST \
2115 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2116 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2117 !is_less<Common::left_size<Return::Size>(), \
2118 NativeType_<T, A>::Size *(1 + sizeof...(Froms))>::value && \
2119 are_all_types_equal<NativeType_<T, A>, Froms...>::value && \
2120 !detail::is_fixed_size_abi<A>::value), \
2121 Return> \
2122 simd_cast(NativeType_<T, A> x, Froms... xs) \
2123 { \
2124 vc_debug_("simd_cast{4}(", ")\n", x, xs...); \
2125 using R0 = typename Return::storage_type0; \
2126 using R1 = typename Return::storage_type1; \
2127 return {simd_cast<R0>(x, xs...), R1(0)}; \
2128 } \
2129 Vc_NOTHING_EXPECTING_SEMICOLON
2130
2131Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2132Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2133#undef Vc_SIMDARRAY_CASTS
2134
2135// simd_cast<SimdArray/-mask, offset>(V) {{{2
2136#define Vc_SIMDARRAY_CASTS(SimdArrayType_, NativeType_) \
2137 /* SIMD Vector/Mask to atomic SimdArray/simdmaskarray */ \
2138 template <typename Return, int offset, typename T, typename A> \
2139 Vc_INTRINSIC Vc_CONST \
2140 enable_if<Traits::isAtomic##SimdArrayType_<Return>::value, Return> \
2141 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG0) \
2142 { \
2143 vc_debug_("simd_cast{offset, atomic}(", ")\n", offset, x); \
2144 return {private_init, simd_cast<typename Return::storage_type, offset>(x)}; \
2145 } \
2146 /* both halves of Return array are extracted from argument */ \
2147 template <typename Return, int offset, typename T, typename A> \
2148 Vc_INTRINSIC Vc_CONST \
2149 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2150 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2151 Return::Size * offset + Common::left_size<Return::Size>() < \
2152 NativeType_<T, A>::Size), \
2153 Return> \
2154 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG1) \
2155 { \
2156 vc_debug_("simd_cast{offset, split Return}(", ")\n", offset, x); \
2157 using R0 = typename Return::storage_type0; \
2158 constexpr int entries_offset = offset * Return::Size; \
2159 constexpr int entries_offset_right = entries_offset + R0::Size; \
2160 return { \
2161 simd_cast_with_offset<typename Return::storage_type0, entries_offset>(x), \
2162 simd_cast_with_offset<typename Return::storage_type1, entries_offset_right>( \
2163 x)}; \
2164 } \
2165 /* SIMD Vector/Mask to non-atomic SimdArray/simdmaskarray */ \
2166 /* right half of Return array is zero */ \
2167 template <typename Return, int offset, typename T, typename A> \
2168 Vc_INTRINSIC Vc_CONST \
2169 enable_if<(Traits::is##SimdArrayType_<Return>::value && \
2170 !Traits::isAtomic##SimdArrayType_<Return>::value && \
2171 Return::Size * offset + Common::left_size<Return::Size>() >= \
2172 NativeType_<T, A>::Size), \
2173 Return> \
2174 simd_cast(NativeType_<T, A> x Vc_DUMMY_ARG2) \
2175 { \
2176 vc_debug_("simd_cast{offset, R1::Zero}(", ")\n", offset, x); \
2177 using R0 = typename Return::storage_type0; \
2178 using R1 = typename Return::storage_type1; \
2179 constexpr int entries_offset = offset * Return::Size; \
2180 return {simd_cast_with_offset<R0, entries_offset>(x), R1(0)}; \
2181 } \
2182 Vc_NOTHING_EXPECTING_SEMICOLON
2183
2184Vc_SIMDARRAY_CASTS(SimdArray, Vc::Vector);
2185Vc_SIMDARRAY_CASTS(SimdMaskArray, Vc::Mask);
2186#undef Vc_SIMDARRAY_CASTS
2187
2188// simd_cast<T>(xs...) from SimdArray/-mask {{{2
2189#define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2190 /* indivisible SimdArrayType_ */ \
2191 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2192 Vc_INTRINSIC Vc_CONST \
2193 enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2194 (sizeof...(From) == 0 || N * sizeof...(From) < Return::Size) && \
2195 !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2196 Return> \
2197 simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2198 { \
2199 vc_debug_("simd_cast{indivisible}(", ")\n", x0, xs...); \
2200 return simd_cast<Return>(internal_data(x0), internal_data(xs)...); \
2201 } \
2202 /* indivisible SimdArrayType_ && can drop arguments from the end */ \
2203 template <typename Return, typename T, std::size_t N, typename V, typename... From> \
2204 Vc_INTRINSIC Vc_CONST \
2205 enable_if<(are_all_types_equal<SimdArrayType_<T, N, V, N>, From...>::value && \
2206 (sizeof...(From) > 0 && (N * sizeof...(From) >= Return::Size)) && \
2207 !std::is_same<Return, SimdArrayType_<T, N, V, N>>::value), \
2208 Return> \
2209 simd_cast(const SimdArrayType_<T, N, V, N> &x0, const From &... xs) \
2210 { \
2211 vc_debug_("simd_cast{indivisible2}(", ")\n", x0, xs...); \
2212 return simd_cast_without_last<Return, \
2213 typename SimdArrayType_<T, N, V, N>::storage_type, \
2214 typename From::storage_type...>( \
2215 internal_data(x0), internal_data(xs)...); \
2216 } \
2217 /* bisectable SimdArrayType_ (N = 2^n) && never too large */ \
2218 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2219 typename... From> \
2220 Vc_INTRINSIC Vc_CONST enable_if< \
2221 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2222 !std::is_same<Return, SimdArrayType_<T, N, V, M>>::value && \
2223 is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2224 Return> \
2225 simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2226 { \
2227 vc_debug_("simd_cast{bisectable}(", ")\n", x0, xs...); \
2228 return simd_cast_interleaved_argument_order< \
2229 Return, typename SimdArrayType_<T, N, V, M>::storage_type0, \
2230 typename From::storage_type0...>(internal_data0(x0), internal_data0(xs)..., \
2231 internal_data1(x0), internal_data1(xs)...); \
2232 } \
2233 /* bisectable SimdArrayType_ (N = 2^n) && input so large that at least the last \
2234 * input can be dropped */ \
2235 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2236 typename... From> \
2237 Vc_INTRINSIC Vc_CONST enable_if< \
2238 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2239 !is_less<N * sizeof...(From), Return::Size>::value && is_power_of_2<N>::value), \
2240 Return> \
2241 simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2242 { \
2243 vc_debug_("simd_cast{bisectable2}(", ")\n", x0, xs...); \
2244 return simd_cast_without_last<Return, SimdArrayType_<T, N, V, M>, From...>( \
2245 x0, xs...); \
2246 } \
2247 /* remaining SimdArrayType_ input never larger (N != 2^n) */ \
2248 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2249 typename... From> \
2250 Vc_INTRINSIC Vc_CONST enable_if< \
2251 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2252 N * (1 + sizeof...(From)) <= Return::Size && !is_power_of_2<N>::value), \
2253 Return> \
2254 simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2255 { \
2256 vc_debug_("simd_cast{remaining}(", ")\n", x0, xs...); \
2257 return simd_cast_impl_smaller_input<Return, N, SimdArrayType_<T, N, V, M>, \
2258 From...>(x0, xs...); \
2259 } \
2260 /* remaining SimdArrayType_ input larger (N != 2^n) */ \
2261 template <typename Return, typename T, std::size_t N, typename V, std::size_t M, \
2262 typename... From> \
2263 Vc_INTRINSIC Vc_CONST enable_if< \
2264 (N != M && are_all_types_equal<SimdArrayType_<T, N, V, M>, From...>::value && \
2265 N * (1 + sizeof...(From)) > Return::Size && !is_power_of_2<N>::value), \
2266 Return> \
2267 simd_cast(const SimdArrayType_<T, N, V, M> &x0, const From &... xs) \
2268 { \
2269 vc_debug_("simd_cast{remaining2}(", ")\n", x0, xs...); \
2270 return simd_cast_impl_larger_input<Return, N, SimdArrayType_<T, N, V, M>, \
2271 From...>(x0, xs...); \
2272 } \
2273 /* a single bisectable SimdArrayType_ (N = 2^n) too large */ \
2274 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2275 Vc_INTRINSIC Vc_CONST \
2276 enable_if<(N != M && N >= 2 * Return::Size && is_power_of_2<N>::value), Return> \
2277 simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2278 { \
2279 vc_debug_("simd_cast{single bisectable}(", ")\n", x); \
2280 return simd_cast<Return>(internal_data0(x)); \
2281 } \
2282 template <typename Return, typename T, std::size_t N, typename V, std::size_t M> \
2283 Vc_INTRINSIC Vc_CONST enable_if<(N != M && N > Return::Size && \
2284 N < 2 * Return::Size && is_power_of_2<N>::value), \
2285 Return> \
2286 simd_cast(const SimdArrayType_<T, N, V, M> &x) \
2287 { \
2288 vc_debug_("simd_cast{single bisectable2}(", ")\n", x); \
2289 return simd_cast<Return>(internal_data0(x), internal_data1(x)); \
2290 } \
2291 Vc_NOTHING_EXPECTING_SEMICOLON
2292
2293Vc_SIMDARRAY_CASTS(SimdArray);
2294Vc_SIMDARRAY_CASTS(SimdMaskArray);
2295#undef Vc_SIMDARRAY_CASTS
2296template <class Return, class T, int N, class... Ts,
2297 class = enable_if<!std::is_same<Return, fixed_size_simd<T, N>>::value>>
2298Vc_INTRINSIC Return simd_cast(const fixed_size_simd<T, N> &x, const Ts &... xs)
2299{
2300 return simd_cast<Return>(static_cast<const SimdArray<T, N> &>(x),
2301 static_cast<const SimdArray<T, N> &>(xs)...);
2302}
2303template <class Return, class T, int N, class... Ts,
2304 class = enable_if<!std::is_same<Return, fixed_size_simd_mask<T, N>>::value>>
2305Vc_INTRINSIC Return simd_cast(const fixed_size_simd_mask<T, N> &x, const Ts &... xs)
2306{
2307 return simd_cast<Return>(static_cast<const SimdMaskArray<T, N> &>(x),
2308 static_cast<const SimdMaskArray<T, N> &>(xs)...);
2309}
2310
2311// simd_cast<T, offset>(SimdArray/-mask) {{{2
2312#define Vc_SIMDARRAY_CASTS(SimdArrayType_) \
2313 /* offset == 0 is like without offset */ \
2314 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2315 std::size_t M> \
2316 Vc_INTRINSIC Vc_CONST enable_if<(offset == 0), Return> simd_cast( \
2317 const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG0) \
2318 { \
2319 vc_debug_("simd_cast{offset == 0}(", ")\n", offset, x); \
2320 return simd_cast<Return>(x); \
2321 } \
2322 /* forward to V */ \
2323 template <typename Return, int offset, typename T, std::size_t N, typename V> \
2324 Vc_INTRINSIC Vc_CONST enable_if<(offset != 0), Return> simd_cast( \
2325 const SimdArrayType_<T, N, V, N> &x Vc_DUMMY_ARG1) \
2326 { \
2327 vc_debug_("simd_cast{offset, forward}(", ")\n", offset, x); \
2328 return simd_cast<Return, offset>(internal_data(x)); \
2329 } \
2330 /* convert from right member of SimdArray */ \
2331 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2332 std::size_t M> \
2333 Vc_INTRINSIC Vc_CONST \
2334 enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2335 offset != 0 && Common::left_size<N>() % Return::Size == 0), \
2336 Return> \
2337 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG2) \
2338 { \
2339 vc_debug_("simd_cast{offset, right}(", ")\n", offset, x); \
2340 return simd_cast<Return, offset - Common::left_size<N>() / Return::Size>( \
2341 internal_data1(x)); \
2342 } \
2343 /* same as above except for odd cases where offset * Return::Size doesn't fit the \
2344 * left side of the SimdArray */ \
2345 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2346 std::size_t M> \
2347 Vc_INTRINSIC Vc_CONST \
2348 enable_if<(N != M && offset * Return::Size >= Common::left_size<N>() && \
2349 offset != 0 && Common::left_size<N>() % Return::Size != 0), \
2350 Return> \
2351 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG3) \
2352 { \
2353 vc_debug_("simd_cast{offset, right, nofit}(", ")\n", offset, x); \
2354 return simd_cast_with_offset<Return, \
2355 offset * Return::Size - Common::left_size<N>()>( \
2356 internal_data1(x)); \
2357 } \
2358 /* convert from left member of SimdArray */ \
2359 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2360 std::size_t M> \
2361 Vc_INTRINSIC Vc_CONST enable_if< \
2362 (N != M && /*offset * Return::Size < Common::left_size<N>() &&*/ \
2363 offset != 0 && (offset + 1) * Return::Size <= Common::left_size<N>()), \
2364 Return> \
2365 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG4) \
2366 { \
2367 vc_debug_("simd_cast{offset, left}(", ")\n", offset, x); \
2368 return simd_cast<Return, offset>(internal_data0(x)); \
2369 } \
2370 /* fallback to copying scalars */ \
2371 template <typename Return, int offset, typename T, std::size_t N, typename V, \
2372 std::size_t M> \
2373 Vc_INTRINSIC Vc_CONST \
2374 enable_if<(N != M && (offset * Return::Size < Common::left_size<N>()) && \
2375 offset != 0 && (offset + 1) * Return::Size > Common::left_size<N>()), \
2376 Return> \
2377 simd_cast(const SimdArrayType_<T, N, V, M> &x Vc_DUMMY_ARG5) \
2378 { \
2379 vc_debug_("simd_cast{offset, copy scalars}(", ")\n", offset, x); \
2380 using R = typename Return::EntryType; \
2381 Return r = Return(0); \
2382 for (std::size_t i = offset * Return::Size; \
2383 i < std::min(N, (offset + 1) * Return::Size); ++i) { \
2384 r[i - offset * Return::Size] = static_cast<R>(x[i]); \
2385 } \
2386 return r; \
2387 } \
2388 Vc_NOTHING_EXPECTING_SEMICOLON
2389Vc_SIMDARRAY_CASTS(SimdArray);
2390Vc_SIMDARRAY_CASTS(SimdMaskArray);
2391#undef Vc_SIMDARRAY_CASTS
2392// simd_cast_drop_arguments (definitions) {{{2
2393template <typename Return, typename From>
2394Vc_INTRINSIC Vc_CONST Return simd_cast_drop_arguments(From x)
2395{
2396 return simd_cast<Return>(x);
2397}
2398template <typename Return, typename... Froms>
2399Vc_INTRINSIC Vc_CONST
2400 enable_if<(are_all_types_equal<Froms...>::value &&
2401 sizeof...(Froms) * first_type_of<Froms...>::Size < Return::Size),
2402 Return>
2403 simd_cast_drop_arguments(Froms... xs, first_type_of<Froms...> x)
2404{
2405 return simd_cast<Return>(xs..., x);
2406}
2407// The following function can be implemented without the sizeof...(From) overload.
2408// However, ICC has a bug (Premier Issue #6000116338) which leads to an ICE. Splitting the
2409// function in two works around the issue.
2410template <typename Return, typename From, typename... Froms>
2411Vc_INTRINSIC Vc_CONST enable_if<
2412 (are_all_types_equal<From, Froms...>::value &&
2413 (1 + sizeof...(Froms)) * From::Size >= Return::Size && sizeof...(Froms) != 0),
2414 Return>
2415simd_cast_drop_arguments(Froms... xs, From x, From)
2416{
2417 return simd_cast_drop_arguments<Return, Froms...>(xs..., x);
2418}
2419template <typename Return, typename From>
2420Vc_INTRINSIC Vc_CONST
2421 enable_if<(are_all_types_equal<From>::value && From::Size >= Return::Size), Return>
2422 simd_cast_drop_arguments(From x, From)
2423{
2424 return simd_cast_drop_arguments<Return>(x);
2425}
2426
2427// simd_cast_with_offset (definitions) {{{2
2428 template <typename Return, std::size_t offset, typename From>
2429 Vc_INTRINSIC Vc_CONST
2430 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size == 0),
2431 Return> simd_cast_with_offset(const From &x)
2432{
2433 return simd_cast<Return, offset / Return::Size>(x);
2434}
2435template <typename Return, std::size_t offset, typename From>
2436Vc_INTRINSIC Vc_CONST
2437 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2438 ((Traits::isSimdArray<Return>::value &&
2439 !Traits::isAtomicSimdArray<Return>::value) ||
2440 (Traits::isSimdMaskArray<Return>::value &&
2441 !Traits::isAtomicSimdMaskArray<Return>::value))),
2442 Return>
2443 simd_cast_with_offset(const From &x)
2444{
2445 using R0 = typename Return::storage_type0;
2446 using R1 = typename Return::storage_type1;
2447 return {simd_cast_with_offset<R0, offset>(x),
2448 simd_cast_with_offset<R1, offset + R0::Size>(x)};
2449}
2450template <typename Return, std::size_t offset, typename From>
2451Vc_INTRINSIC Vc_CONST
2452 enable_if<(From::Size > offset && offset > 0 && offset % Return::Size != 0 &&
2453 ((Traits::isSimdArray<Return>::value &&
2454 Traits::isAtomicSimdArray<Return>::value) ||
2455 (Traits::isSimdMaskArray<Return>::value &&
2456 Traits::isAtomicSimdMaskArray<Return>::value))),
2457 Return>
2458 simd_cast_with_offset(const From &x)
2459{
2460 return simd_cast<Return, offset / Return::Size>(x.shifted(offset % Return::Size));
2461}
2462template <typename Return, std::size_t offset, typename From, typename... Froms>
2463Vc_INTRINSIC Vc_CONST
2464 enable_if<(are_all_types_equal<From, Froms...>::value && offset == 0), Return>
2465 simd_cast_with_offset(const From &x, const Froms &... xs)
2466{
2467 return simd_cast<Return>(x, xs...);
2468}
2469
2470// simd_cast_without_last (definition) {{{2
2471template <typename Return, typename T, typename... From>
2472Vc_INTRINSIC Vc_CONST Return simd_cast_without_last(const From &... xs, const T &)
2473{
2474 return simd_cast<Return>(xs...);
2475}
2476
2477// simd_cast_interleaved_argument_order (definitions) {{{2
2478
2479#ifdef Vc_MSVC
2480// MSVC doesn't see that the Ts pack below can be empty and thus complains when extract_interleaved
2481// is called with only 2 arguments. These overloads here are *INCORRECT standard C++*, but they make
2482// MSVC do the right thing.
2483template <std::size_t I, typename T0>
2484Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0, const T0 &)
2485{
2486 return a0;
2487}
2488template <std::size_t I, typename T0>
2489Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &, const T0 &b0)
2490{
2491 return b0;
2492}
2493#endif // Vc_MSVC
2494
2496template <std::size_t I, typename T0, typename... Ts>
2497Vc_INTRINSIC Vc_CONST enable_if<(I == 0), T0> extract_interleaved(const T0 &a0,
2498 const Ts &...,
2499 const T0 &,
2500 const Ts &...)
2501{
2502 return a0;
2503}
2505template <std::size_t I, typename T0, typename... Ts>
2506Vc_INTRINSIC Vc_CONST enable_if<(I == 1), T0> extract_interleaved(const T0 &,
2507 const Ts &...,
2508 const T0 &b0,
2509 const Ts &...)
2510{
2511 return b0;
2512}
2514template <std::size_t I, typename T0, typename... Ts>
2515Vc_INTRINSIC Vc_CONST enable_if<(I > 1), T0> extract_interleaved(const T0 &,
2516 const Ts &... a,
2517 const T0 &,
2518 const Ts &... b)
2519{
2520 return extract_interleaved<I - 2, Ts...>(a..., b...);
2521}
2523template <typename Return, typename... Ts, std::size_t... Indexes>
2524Vc_INTRINSIC Vc_CONST Return
2525 simd_cast_interleaved_argument_order_1(index_sequence<Indexes...>, const Ts &... a,
2526 const Ts &... b)
2527{
2528 return simd_cast<Return>(extract_interleaved<Indexes, Ts...>(a..., b...)...);
2529}
2532template <typename Return, typename... Ts>
2533Vc_INTRINSIC Vc_CONST Return
2534 simd_cast_interleaved_argument_order(const Ts &... a, const Ts &... b)
2535{
2536 using seq = make_index_sequence<sizeof...(Ts)*2>;
2537 return simd_cast_interleaved_argument_order_1<Return, Ts...>(seq(), a..., b...);
2538}
2539
2540// conditional_assign {{{1
2541#define Vc_CONDITIONAL_ASSIGN(name_, op_) \
2542 template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M, \
2543 typename U> \
2544 Vc_INTRINSIC enable_if<O == Operator::name_, void> conditional_assign( \
2545 SimdArray<T, N, V, VN> &lhs, M &&mask, U &&rhs) \
2546 { \
2547 lhs(mask) op_ rhs; \
2548 } \
2549 Vc_NOTHING_EXPECTING_SEMICOLON
2550Vc_CONDITIONAL_ASSIGN( Assign, =);
2551Vc_CONDITIONAL_ASSIGN( PlusAssign, +=);
2552Vc_CONDITIONAL_ASSIGN( MinusAssign, -=);
2553Vc_CONDITIONAL_ASSIGN( MultiplyAssign, *=);
2554Vc_CONDITIONAL_ASSIGN( DivideAssign, /=);
2555Vc_CONDITIONAL_ASSIGN( RemainderAssign, %=);
2556Vc_CONDITIONAL_ASSIGN( XorAssign, ^=);
2557Vc_CONDITIONAL_ASSIGN( AndAssign, &=);
2558Vc_CONDITIONAL_ASSIGN( OrAssign, |=);
2559Vc_CONDITIONAL_ASSIGN( LeftShiftAssign,<<=);
2560Vc_CONDITIONAL_ASSIGN(RightShiftAssign,>>=);
2561#undef Vc_CONDITIONAL_ASSIGN
2562
2563#define Vc_CONDITIONAL_ASSIGN(name_, expr_) \
2564 template <Operator O, typename T, std::size_t N, typename V, size_t VN, typename M> \
2565 Vc_INTRINSIC enable_if<O == Operator::name_, SimdArray<T, N, V, VN>> \
2566 conditional_assign(SimdArray<T, N, V, VN> &lhs, M &&mask) \
2567 { \
2568 return expr_; \
2569 } \
2570 Vc_NOTHING_EXPECTING_SEMICOLON
2571Vc_CONDITIONAL_ASSIGN(PostIncrement, lhs(mask)++);
2572Vc_CONDITIONAL_ASSIGN( PreIncrement, ++lhs(mask));
2573Vc_CONDITIONAL_ASSIGN(PostDecrement, lhs(mask)--);
2574Vc_CONDITIONAL_ASSIGN( PreDecrement, --lhs(mask));
2575#undef Vc_CONDITIONAL_ASSIGN
2576// transpose_impl {{{1
2577namespace Common
2578{
2579template <typename T, size_t N, typename V>
2580inline void transpose_impl(
2581 TransposeTag<4, 4>, SimdArray<T, N, V, N> *Vc_RESTRICT r[],
2582 const TransposeProxy<SimdArray<T, N, V, N>, SimdArray<T, N, V, N>,
2583 SimdArray<T, N, V, N>, SimdArray<T, N, V, N>> &proxy)
2584{
2585 V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2586 &internal_data(*r[2]), &internal_data(*r[3])};
2587 transpose_impl(TransposeTag<4, 4>(), &r2[0],
2588 TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2589 internal_data(std::get<1>(proxy.in)),
2590 internal_data(std::get<2>(proxy.in)),
2591 internal_data(std::get<3>(proxy.in))});
2592}
2593
2594template <typename T, typename V>
2595inline void transpose_impl(
2596 TransposeTag<2, 4>, SimdArray<T, 4, V, 1> *Vc_RESTRICT r[],
2597 const TransposeProxy<SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>,
2598 SimdArray<T, 2, V, 1>, SimdArray<T, 2, V, 1>> &proxy)
2599{
2600 auto &lo = *r[0];
2601 auto &hi = *r[1];
2602 internal_data0(internal_data0(lo)) = internal_data0(std::get<0>(proxy.in));
2603 internal_data1(internal_data0(lo)) = internal_data0(std::get<1>(proxy.in));
2604 internal_data0(internal_data1(lo)) = internal_data0(std::get<2>(proxy.in));
2605 internal_data1(internal_data1(lo)) = internal_data0(std::get<3>(proxy.in));
2606 internal_data0(internal_data0(hi)) = internal_data1(std::get<0>(proxy.in));
2607 internal_data1(internal_data0(hi)) = internal_data1(std::get<1>(proxy.in));
2608 internal_data0(internal_data1(hi)) = internal_data1(std::get<2>(proxy.in));
2609 internal_data1(internal_data1(hi)) = internal_data1(std::get<3>(proxy.in));
2610}
2611
2612template <typename T, typename V>
2613inline void transpose_impl(
2614 TransposeTag<4, 4>, SimdArray<T, 1, V, 1> *Vc_RESTRICT r[],
2615 const TransposeProxy<SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>,
2616 SimdArray<T, 1, V, 1>, SimdArray<T, 1, V, 1>> &proxy)
2617{
2618 V *Vc_RESTRICT r2[4] = {&internal_data(*r[0]), &internal_data(*r[1]),
2619 &internal_data(*r[2]), &internal_data(*r[3])};
2620 transpose_impl(TransposeTag<4, 4>(), &r2[0],
2621 TransposeProxy<V, V, V, V>{internal_data(std::get<0>(proxy.in)),
2622 internal_data(std::get<1>(proxy.in)),
2623 internal_data(std::get<2>(proxy.in)),
2624 internal_data(std::get<3>(proxy.in))});
2625}
2626
2627template <typename T, size_t N, typename V>
2628inline void transpose_impl(
2629 TransposeTag<4, 4>, SimdArray<T, N, V, 1> *Vc_RESTRICT r[],
2630 const TransposeProxy<SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>,
2631 SimdArray<T, N, V, 1>, SimdArray<T, N, V, 1>> &proxy)
2632{
2633 SimdArray<T, N, V, 1> *Vc_RESTRICT r0[4 / 2] = {r[0], r[1]};
2634 SimdArray<T, N, V, 1> *Vc_RESTRICT r1[4 / 2] = {r[2], r[3]};
2635 using H = SimdArray<T, 2>;
2636 transpose_impl(TransposeTag<2, 4>(), &r0[0],
2637 TransposeProxy<H, H, H, H>{internal_data0(std::get<0>(proxy.in)),
2638 internal_data0(std::get<1>(proxy.in)),
2639 internal_data0(std::get<2>(proxy.in)),
2640 internal_data0(std::get<3>(proxy.in))});
2641 transpose_impl(TransposeTag<2, 4>(), &r1[0],
2642 TransposeProxy<H, H, H, H>{internal_data1(std::get<0>(proxy.in)),
2643 internal_data1(std::get<1>(proxy.in)),
2644 internal_data1(std::get<2>(proxy.in)),
2645 internal_data1(std::get<3>(proxy.in))});
2646}
2647
2648/* TODO:
2649template <typename T, std::size_t N, typename V, std::size_t VSize>
2650inline enable_if<(N > VSize), void> transpose_impl(
2651 std::array<SimdArray<T, N, V, VSize> * Vc_RESTRICT, 4> & r,
2652 const TransposeProxy<SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>,
2653 SimdArray<T, N, V, VSize>, SimdArray<T, N, V, VSize>> &proxy)
2654{
2655 typedef SimdArray<T, N, V, VSize> SA;
2656 std::array<typename SA::storage_type0 * Vc_RESTRICT, 4> r0 = {
2657 {&internal_data0(*r[0]), &internal_data0(*r[1]), &internal_data0(*r[2]),
2658 &internal_data0(*r[3])}};
2659 transpose_impl(
2660 r0, TransposeProxy<typename SA::storage_type0, typename SA::storage_type0,
2661 typename SA::storage_type0, typename SA::storage_type0>{
2662 internal_data0(std::get<0>(proxy.in)),
2663 internal_data0(std::get<1>(proxy.in)),
2664 internal_data0(std::get<2>(proxy.in)),
2665 internal_data0(std::get<3>(proxy.in))});
2666
2667 std::array<typename SA::storage_type1 * Vc_RESTRICT, 4> r1 = {
2668 {&internal_data1(*r[0]), &internal_data1(*r[1]), &internal_data1(*r[2]),
2669 &internal_data1(*r[3])}};
2670 transpose_impl(
2671 r1, TransposeProxy<typename SA::storage_type1, typename SA::storage_type1,
2672 typename SA::storage_type1, typename SA::storage_type1>{
2673 internal_data1(std::get<0>(proxy.in)),
2674 internal_data1(std::get<1>(proxy.in)),
2675 internal_data1(std::get<2>(proxy.in)),
2676 internal_data1(std::get<3>(proxy.in))});
2677}
2678*/
2679} // namespace Common
2680
2681// }}}1
2682namespace Detail
2683{
2684// InterleaveImpl for SimdArrays {{{
2685// atomic {{{1
2686template <class T, size_t N, class V, size_t VSizeof>
2687struct InterleaveImpl<SimdArray<T, N, V, N>, N, VSizeof> {
2688 template <class I, class... VV>
2689 static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2690 {
2691 InterleaveImpl<V, N, VSizeof>::interleave(data, i, internal_data(vv)...);
2692 }
2693 template <class I, class... VV>
2694 static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2695 {
2696 InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2697 }
2698};
2699
2700// generic (TODO) {{{1
2701/*
2702template <class T, size_t N, class V, size_t Wt, size_t VSizeof>
2703struct InterleaveImpl<SimdArray<T, N, V, Wt>, N, VSizeof> {
2704 using SA = SimdArray<T, N, V, Wt>;
2705 using SA0 = typename SA::storage_type0;
2706 using SA1 = typename SA::storage_type1;
2707
2708 template <class I, class... VV>
2709 static Vc_INTRINSIC void interleave(T *const data, const I &i, const VV &... vv)
2710 {
2711 InterleaveImpl<SA0, SA0::size(), sizeof(SA0)>::interleave(
2712 data, i, // i needs to be split
2713 internal_data0(vv)...);
2714 InterleaveImpl<SA1, SA1::size(), sizeof(SA1)>::interleave(
2715 data, // how far to advance data?
2716 i, // i needs to be split
2717 internal_data1(vv)...);
2718 }
2719 template <class I, class... VV>
2720 static Vc_INTRINSIC void deinterleave(T const *const data, const I &i, VV &... vv)
2721 {
2722 InterleaveImpl<V, N, VSizeof>::deinterleave(data, i, internal_data(vv)...);
2723 }
2724};
2725*/
2726} // namespace Detail
2727// }}}
2729
2730} // namespace Vc_VERSIONED_NAMESPACE
2731
2732// numeric_limits {{{1
2733namespace std
2734{
2735template <typename T, size_t N, typename V, size_t VN>
2736struct numeric_limits<Vc::SimdArray<T, N, V, VN>> : public numeric_limits<T> {
2737private:
2739
2740public:
2741 static Vc_ALWAYS_INLINE Vc_CONST R max() noexcept { return numeric_limits<T>::max(); }
2742 static Vc_ALWAYS_INLINE Vc_CONST R min() noexcept { return numeric_limits<T>::min(); }
2743 static Vc_ALWAYS_INLINE Vc_CONST R lowest() noexcept
2744 {
2745 return numeric_limits<T>::lowest();
2746 }
2747 static Vc_ALWAYS_INLINE Vc_CONST R epsilon() noexcept
2748 {
2749 return numeric_limits<T>::epsilon();
2750 }
2751 static Vc_ALWAYS_INLINE Vc_CONST R round_error() noexcept
2752 {
2753 return numeric_limits<T>::round_error();
2754 }
2755 static Vc_ALWAYS_INLINE Vc_CONST R infinity() noexcept
2756 {
2757 return numeric_limits<T>::infinity();
2758 }
2759 static Vc_ALWAYS_INLINE Vc_CONST R quiet_NaN() noexcept
2760 {
2761 return numeric_limits<T>::quiet_NaN();
2762 }
2763 static Vc_ALWAYS_INLINE Vc_CONST R signaling_NaN() noexcept
2764 {
2765 return numeric_limits<T>::signaling_NaN();
2766 }
2767 static Vc_ALWAYS_INLINE Vc_CONST R denorm_min() noexcept
2768 {
2769 return numeric_limits<T>::denorm_min();
2770 }
2771};
2772} // namespace std
2773//}}}1
2774
2775#endif // VC_COMMON_SIMDARRAY_H_
2776
2777// vim: foldmethod=marker
The main SIMD mask class.
Definition: mask.h:42
Data-parallel arithmetic type with user-defined number of elements.
Definition: simdarray.h:616
fixed_size_simd< T, N > sorted() const
Return a sorted copy of the vector.
Definition: simdarray.h:1360
fixed_size_simd< T, N > apply(F &&f) const
Call f on every entry of the vector and return the results as a new vector.
Definition: simdarray.h:1113
Common::WriteMaskedVector< SimdArray, mask_type > operator()(const mask_type &mask)
Writemask the vector before an assignment.
Definition: simdarray.h:1049
static fixed_size_simd< T, N > IndexesFromZero()
Returns a vector with the entries initialized to 0, 1, 2, 3, 4, 5, ...
Definition: simdarray.h:712
fixed_size_simd< T, N > partialSum() const
Returns a vector containing the sum of all entries with smaller index.
Definition: simdarray.h:1103
reference operator[](size_t i) noexcept
This operator can be used to modify scalar entries of the vector.
Definition: simdarray.h:1034
fixed_size_simd< T, N > shifted(int amount) const
Shift vector entries to the left by amount; shifting in zeros.
Definition: simdarray.h:1126
SimdArray()=default
Construct a zero-initialized vector object.
static fixed_size_simd< T, N > generate(const G &gen)
Generate a vector object from return values of gen (static variant of fill).
Definition: simdarray.h:731
static constexpr std::size_t size()
Returns N, the number of scalar components in an object of this type.
Definition: simdarray.h:674
SimdArray(value_type a)
Broadcast Constructor.
Definition: simdarray.h:754
value_type operator[](size_t index) const noexcept
This operator can be used to read scalar entries of the vector.
Definition: simdarray.h:1041
static fixed_size_simd< T, N > One()
Returns a vector with the entries initialized to one.
Definition: simdarray.h:706
fixed_size_simd< T, N > rotated(int amount) const
Rotate vector entries to the left by amount.
Definition: simdarray.h:1255
static fixed_size_simd< T, N > Random()
Returns a vector with pseudo-random entries.
Definition: simdarray.h:718
fixed_size_simd< T, N > apply(F &&f, const mask_type &k) const
As above, but skip the entries where mask is not set.
Definition: simdarray.h:1119
static fixed_size_simd< T, N > Zero()
Returns a vector with the entries initialized to zero.
Definition: simdarray.h:700
fixed_size_simd< T, N > reversed() const
Returns a vector with all components reversed.
Definition: simdarray.h:1338
value_type EntryType
The type of the elements (i.e. T)
Definition: simdarray.h:683
fixed_size_simd< T, N > operator+() const
Returns a copy of itself.
Definition: simdarray.h:959
T value_type
The type of the elements (i.e. T)
Definition: simdarray.h:656
The main vector class for expressing data parallelism.
Definition: vector.h:126
Vector sorted() const
Return a sorted copy of the vector.
Vector reversed() const
Returns a vector with all components reversed.
fixed_size_simd< T, N > asin(const SimdArray< T, N, V, M > &x)
Applies the std:: asin function component-wise and concurrently.
Definition: simdarray.h:1806
fixed_size_simd_mask< T, N > isnan(const SimdArray< T, N, V, M > &x)
Applies the std:: isnan function component-wise and concurrently.
Definition: simdarray.h:1824
fixed_size_simd< T, N > max(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: max function component-wise and concurrently.
Definition: simdarray.h:1854
fixed_size_simd< T, N > min(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: min function component-wise and concurrently.
Definition: simdarray.h:1853
fixed_size_simd< T, N > exponent(const SimdArray< T, N, V, M > &x)
Applies the std:: exponent function component-wise and concurrently.
Definition: simdarray.h:1813
fixed_size_simd< T, N > log10(const SimdArray< T, N, V, M > &x)
Applies the std:: log10 function component-wise and concurrently.
Definition: simdarray.h:1839
fixed_size_simd< T, N > rsqrt(const SimdArray< T, N, V, M > &x)
Applies the std:: rsqrt function component-wise and concurrently.
Definition: simdarray.h:1843
fixed_size_simd< T, N > sin(const SimdArray< T, N, V, M > &x)
Applies the std:: sin function component-wise and concurrently.
Definition: simdarray.h:1844
fixed_size_simd_mask< T, N > isfinite(const SimdArray< T, N, V, M > &x)
Applies the std:: isfinite function component-wise and concurrently.
Definition: simdarray.h:1822
SimdArray< T, N > fma(const SimdArray< T, N > &a, const SimdArray< T, N > &b, const SimdArray< T, N > &c)
Applies the std::fma function component-wise and concurrently.
Definition: simdarray.h:1817
fixed_size_simd_mask< T, N > isinf(const SimdArray< T, N, V, M > &x)
Applies the std:: isinf function component-wise and concurrently.
Definition: simdarray.h:1823
fixed_size_simd< T, N > log2(const SimdArray< T, N, V, M > &x)
Applies the std:: log2 function component-wise and concurrently.
Definition: simdarray.h:1840
fixed_size_simd< T, N > copysign(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: copysign function component-wise and concurrently.
Definition: simdarray.h:1810
fixed_size_simd< T, N > exp(const SimdArray< T, N, V, M > &x)
Applies the std:: exp function component-wise and concurrently.
Definition: simdarray.h:1812
fixed_size_simd< T, N > atan(const SimdArray< T, N, V, M > &x)
Applies the std:: atan function component-wise and concurrently.
Definition: simdarray.h:1807
fixed_size_simd< T, N > reciprocal(const SimdArray< T, N, V, M > &x)
Applies the std:: reciprocal function component-wise and concurrently.
Definition: simdarray.h:1841
fixed_size_simd< T, N > abs(const SimdArray< T, N, V, M > &x)
Applies the std:: abs function component-wise and concurrently.
Definition: simdarray.h:1805
fixed_size_simd< T, N > trunc(const SimdArray< T, N, V, M > &x)
Applies the std:: trunc function component-wise and concurrently.
Definition: simdarray.h:1852
fixed_size_simd_mask< T, N > isnegative(const SimdArray< T, N, V, M > &x)
Applies the std:: isnegative function component-wise and concurrently.
Definition: simdarray.h:1825
fixed_size_simd< T, N > round(const SimdArray< T, N, V, M > &x)
Applies the std:: round function component-wise and concurrently.
Definition: simdarray.h:1842
fixed_size_simd< T, N > log(const SimdArray< T, N, V, M > &x)
Applies the std:: log function component-wise and concurrently.
Definition: simdarray.h:1838
fixed_size_simd< T, N > cos(const SimdArray< T, N, V, M > &x)
Applies the std:: cos function component-wise and concurrently.
Definition: simdarray.h:1811
fixed_size_simd< T, N > sqrt(const SimdArray< T, N, V, M > &x)
Applies the std:: sqrt function component-wise and concurrently.
Definition: simdarray.h:1851
result_vector_type< L, R > operator-(L &&lhs, R &&rhs)
Applies - component-wise and concurrently.
Definition: simdarray.h:1721
fixed_size_simd< T, N > floor(const SimdArray< T, N, V, M > &x)
Applies the std:: floor function component-wise and concurrently.
Definition: simdarray.h:1814
SimdArray< T, N > frexp(const SimdArray< T, N > &x, SimdArray< int, N > *e)
Applies the std::frexp function component-wise and concurrently.
Definition: simdarray.h:1828
void sincos(const SimdArray< T, N > &x, SimdArray< T, N > *sin, SimdArray< T, N > *cos)
Determines sine and cosine concurrently and component-wise on x.
Definition: simdarray.h:1847
fixed_size_simd< T, N > atan2(const SimdArray< T, N, V, M > &x, const SimdArray< T, N, V, M > &y)
Applies the std:: atan2 function component-wise and concurrently.
Definition: simdarray.h:1808
result_vector_type< L, R > operator+(L &&lhs, R &&rhs)
Applies + component-wise and concurrently.
Definition: simdarray.h:1721
fixed_size_simd< T, N > ceil(const SimdArray< T, N, V, M > &x)
Applies the std:: ceil function component-wise and concurrently.
Definition: simdarray.h:1809
SimdArray< T, N > ldexp(const SimdArray< T, N > &x, const SimdArray< int, N > &e)
Applies the std::ldexp function component-wise and concurrently.
Definition: simdarray.h:1834
Adapter< S, T, N > shifted(const Adapter< S, T, N > &a, int shift)
Returns a new vectorized object where each entry is shifted by shift.
Definition: simdize.h:1069
std::pair< V, V > interleave(const V &a, const V &b)
Interleaves the entries from a and b into two vectors of the same type.
Definition: interleave.h:55
constexpr VectorSpecialInitializerIndexesFromZero IndexesFromZero
The special object Vc::IndexesFromZero can be used to construct Vector objects initialized to values ...
Definition: types.h:91
constexpr VectorSpecialInitializerOne One
The special object Vc::One can be used to construct Vector and Mask objects initialized to one/true.
Definition: types.h:86
constexpr UnalignedTag Unaligned
Use this object for a flags parameter to request unaligned loads and stores.
constexpr AlignedTag Aligned
Use this object for a flags parameter to request aligned loads and stores.
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:81
void deinterleave(V *a, V *b, const M *memory, A align)
Definition: deinterleave.h:76
std::ostream & operator<<(std::ostream &out, const Vc::Vector< T, Abi > &v)
Prints the contents of a vector into a stream object.
Definition: IO:117
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition: vector.h:215
Vector Classes Namespace.
Definition: dox.h:585
UnalignedTag DefaultLoadTag
The default load tag type uses unaligned (non-streaming) loads.
To simd_cast(From &&x, enable_if< std::is_same< To, Traits::decay< From > >::value >=nullarg)
Casts the argument x from type From to type To.
Definition: simd_cast.h:52
void assign(SimdizeDetail::Adapter< S, T, N > &a, size_t i, const S &x)
Assigns one scalar object x to a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:1221