Vc 1.4.1
SIMD Vector Classes for C++
memory.h
1/* This file is part of the Vc library. {{{
2Copyright © 2009-2015 Matthias Kretz <kretz@kde.org>
3
4Redistribution and use in source and binary forms, with or without
5modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright
7 notice, this list of conditions and the following disclaimer.
8 * Redistributions in binary form must reproduce the above copyright
9 notice, this list of conditions and the following disclaimer in the
10 documentation and/or other materials provided with the distribution.
11 * Neither the names of contributing organizations nor the
12 names of its contributors may be used to endorse or promote products
13 derived from this software without specific prior written permission.
14
15THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE FOR ANY
19DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
26}}}*/
27
28#ifndef VC_COMMON_MEMORY_H_
29#define VC_COMMON_MEMORY_H_
30
31#include "memorybase.h"
32#include <assert.h>
33#include <algorithm>
34#include <cstring>
35#include <cstddef>
36#include <initializer_list>
37#include "memoryfwd.h"
38#include "malloc.h"
39#include "macros.h"
40
41namespace Vc_VERSIONED_NAMESPACE
42{
43namespace Common
44{
45template<typename V, size_t Size> struct _MemorySizeCalculation
46{
47 enum AlignmentCalculations {
48 Alignment = V::Size,
49 AlignmentMask = Alignment - 1,
50 MaskedSize = Size & AlignmentMask,
51 Padding = Alignment - MaskedSize,
52 PaddedSize = MaskedSize == 0 ? Size : Size + Padding
53 };
54};
55
66template <typename V, size_t Size1, size_t Size2, bool InitPadding>
67#ifdef Vc_RECURSIVE_MEMORY
68class Memory : public MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
69 Memory<V, Size2, 0, InitPadding>>
70#else
71class Memory : public AlignedBase<V::MemoryAlignment>,
72 public MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
73 Memory<V, Size2, 0, false>>
74#endif
75{
76public:
77 typedef typename V::EntryType EntryType;
78
79private:
80#ifdef Vc_RECURSIVE_MEMORY
82#else
84#endif
86 friend class MemoryBase<V, Memory<V, Size1, Size2, InitPadding>, 2, RowMemory>;
87 friend class MemoryDimensionBase<V, Memory<V, Size1, Size2, InitPadding>, 2,
88 RowMemory>;
89 enum : size_t {
90 Alignment = V::MemoryAlignment,
91 PaddedSize2 = _MemorySizeCalculation<V, Size2>::PaddedSize
92 };
93 alignas(static_cast<size_t>(Alignment)) // GCC complains about 'is not an
94 // integer constant' unless the
95 // static_cast is present
96#ifdef Vc_RECURSIVE_MEMORY
97 RowMemory m_mem[Size1];
98#else
99 EntryType m_mem[Size1][PaddedSize2];
100#endif
101
102 public:
103 using Base::vector;
104 enum Constants {
105 RowCount = Size1,
106 VectorsCount = PaddedSize2 / V::Size
107 };
108
109#ifdef Vc_RECURSIVE_MEMORY
110 Memory() = default;
111#else
112 Memory()
113 {
114 if (InitPadding) {
115 if (Size1 > 32)
116 for (size_t i = 0; i < Size1; ++i) {
117 V::Zero().store(&m_mem[i][PaddedSize2 - V::Size], Vc::Streaming);
118 }
119 }
120 }
121#endif
122
128 static constexpr size_t rowsCount() { return RowCount; }
137 static constexpr size_t entriesCount() { return Size1 * Size2; }
143 static constexpr size_t vectorsCount() { return VectorsCount * Size1; }
144
154 template<typename Parent, typename RM>
155 Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 2, RM> &rhs) {
156 assert(vectorsCount() == rhs.vectorsCount());
157 Detail::copyVectors(*this, rhs);
158 return *this;
159 }
160
161 Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
162 Detail::copyVectors(*this, rhs);
163 return *this;
164 }
165
173 inline Memory &operator=(const V &v) {
174 for (size_t i = 0; i < vectorsCount(); ++i) {
175 vector(i) = v;
176 }
177 return *this;
178 }
179};
180
224template <typename V, size_t Size, bool InitPadding>
225class Memory<V, Size, 0u, InitPadding> :
226#ifndef Vc_RECURSIVE_MEMORY
227 public AlignedBase<V::MemoryAlignment>,
228#endif
229 public MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>
230 {
231 public:
232 typedef typename V::EntryType EntryType;
233 private:
235 friend class MemoryBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>;
236 friend class MemoryDimensionBase<V, Memory<V, Size, 0u, InitPadding>, 1, void>;
237 enum : size_t {
238 Alignment = V::MemoryAlignment, // in Bytes
239 MaskedSize = Size & (V::Size - 1), // the fraction of Size that exceeds
240 // an integral multiple of V::Size
241 Padding = V::Size - MaskedSize,
242 PaddedSize = MaskedSize == 0 ? Size : Size + Padding
243 };
244 alignas(static_cast<size_t>(Alignment)) // GCC complains about 'is not an
245 // integer constant' unless the
246 // static_cast is present
247 EntryType m_mem[PaddedSize];
248
249 public:
250 using Base::vector;
251 enum Constants {
252 EntriesCount = Size,
253 VectorsCount = PaddedSize / V::Size
254 };
255
256 Memory()
257 {
258 if (InitPadding) {
259 Base::lastVector() = V::Zero();
260 }
261 }
262
263 Memory(std::initializer_list<EntryType> init)
264 {
265 Vc_ASSERT(init.size() <= Size);
266 Base::lastVector() = V::Zero();
267 std::copy(init.begin(), init.end(), &m_mem[0]);
268 }
269
292 static Vc_ALWAYS_INLINE Vc_CONST Memory<V, Size, 0u, false> &fromRawData(EntryType *ptr)
293 {
294 // DANGER! This placement new has to use the right address. If the compiler decides
295 // RowMemory requires padding before the actual data then the address has to be adjusted
296 // accordingly
297 char *addr = reinterpret_cast<char *>(ptr);
299 addr -= Vc_OFFSETOF(MM, m_mem);
300 return *new(addr) MM;
301 }
302
308 static constexpr size_t entriesCount() { return EntriesCount; }
309
315 static constexpr size_t vectorsCount() { return VectorsCount; }
316
317 inline Memory(const Memory &rhs)
318 {
319 Detail::copyVectors(*this, rhs);
320 }
321
322 template <size_t S> inline Memory(const Memory<V, S> &rhs)
323 {
324 assert(vectorsCount() == rhs.vectorsCount());
325 Detail::copyVectors(*this, rhs);
326 }
327
328 inline Memory &operator=(const Memory &rhs)
329 {
330 Detail::copyVectors(*this, rhs);
331 return *this;
332 }
333
334 template <size_t S> inline Memory &operator=(const Memory<V, S> &rhs)
335 {
336 assert(vectorsCount() == rhs.vectorsCount());
337 Detail::copyVectors(*this, rhs);
338 return *this;
339 }
340
341 Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
342 std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
343 return *this;
344 }
345 inline Memory &operator=(const V &v) {
346 for (size_t i = 0; i < vectorsCount(); ++i) {
347 vector(i) = v;
348 }
349 return *this;
350 }
351 };
352
393 template<typename V> class Memory<V, 0u, 0u, true> : public MemoryBase<V, Memory<V, 0u, 0u, true>, 1, void>
394 {
395 public:
396 typedef typename V::EntryType EntryType;
397 private:
398 typedef MemoryBase<V, Memory<V>, 1, void> Base;
399 friend class MemoryBase<V, Memory<V>, 1, void>;
400 friend class MemoryDimensionBase<V, Memory<V>, 1, void>;
401 enum InternalConstants {
402 Alignment = V::Size,
403 AlignmentMask = Alignment - 1
404 };
405 size_t m_entriesCount;
406 size_t m_vectorsCount;
407 EntryType *m_mem;
408 size_t calcPaddedEntriesCount(size_t x)
409 {
410 size_t masked = x & AlignmentMask;
411 return (masked == 0 ? x : x + (Alignment - masked));
412 }
413 public:
414 using Base::vector;
415
423 Vc_ALWAYS_INLINE Memory(size_t size)
424 : m_entriesCount(size),
425 m_vectorsCount(calcPaddedEntriesCount(m_entriesCount)),
426 m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount))
427 {
428 m_vectorsCount /= V::Size;
429 Base::lastVector() = V::Zero();
430 }
431
439 template<typename Parent, typename RM>
440 Vc_ALWAYS_INLINE Memory(const MemoryBase<V, Parent, 1, RM> &rhs)
441 : m_entriesCount(rhs.entriesCount()),
442 m_vectorsCount(rhs.vectorsCount()),
443 m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
444 {
445 Detail::copyVectors(*this, rhs);
446 }
447
455 Vc_ALWAYS_INLINE Memory(const Memory &rhs)
456 : m_entriesCount(rhs.entriesCount()),
457 m_vectorsCount(rhs.vectorsCount()),
458 m_mem(Vc::malloc<EntryType, Vc::AlignOnVector>(m_vectorsCount * V::Size))
459 {
460 Detail::copyVectors(*this, rhs);
461 }
462
466 Vc_ALWAYS_INLINE ~Memory()
467 {
468 Vc::free(m_mem);
469 }
470
476 inline void swap(Memory &rhs) {
477 std::swap(m_mem, rhs.m_mem);
478 std::swap(m_entriesCount, rhs.m_entriesCount);
479 std::swap(m_vectorsCount, rhs.m_vectorsCount);
480 }
481
485 Vc_ALWAYS_INLINE Vc_PURE size_t entriesCount() const { return m_entriesCount; }
486
490 Vc_ALWAYS_INLINE Vc_PURE size_t vectorsCount() const { return m_vectorsCount; }
491
501 template<typename Parent, typename RM>
502 Vc_ALWAYS_INLINE Memory &operator=(const MemoryBase<V, Parent, 1, RM> &rhs) {
503 assert(vectorsCount() == rhs.vectorsCount());
504 Detail::copyVectors(*this, rhs);
505 return *this;
506 }
507
508 Vc_ALWAYS_INLINE Memory &operator=(const Memory &rhs) {
509 assert(vectorsCount() == rhs.vectorsCount());
510 Detail::copyVectors(*this, rhs);
511 return *this;
512 }
513
523 Vc_ALWAYS_INLINE Memory &operator=(const EntryType *rhs) {
524 std::memcpy(m_mem, rhs, entriesCount() * sizeof(EntryType));
525 return *this;
526 }
527};
528
539Vc_ALWAYS_INLINE void prefetchForOneRead(const void *addr)
540{
541 Vc::Detail::prefetchForOneRead(addr, VectorAbi::Best<float>());
542}
543
556Vc_ALWAYS_INLINE void prefetchForModify(const void *addr)
557{
558 Vc::Detail::prefetchForModify(addr, VectorAbi::Best<float>());
559}
560
571Vc_ALWAYS_INLINE void prefetchClose(const void *addr)
572{
573 Vc::Detail::prefetchClose(addr, VectorAbi::Best<float>());
574}
575
586Vc_ALWAYS_INLINE void prefetchMid(const void *addr)
587{
588 Vc::Detail::prefetchMid(addr, VectorAbi::Best<float>());
589}
590
601Vc_ALWAYS_INLINE void prefetchFar(const void *addr)
602{
603 Vc::Detail::prefetchFar(addr, VectorAbi::Best<float>());
604}
605} // namespace Common
606
607using Common::Memory;
613} // namespace Vc
614
615namespace std
616{
617 template<typename V> Vc_ALWAYS_INLINE void swap(Vc::Memory<V> &a, Vc::Memory<V> &b) { a.swap(b); }
618} // namespace std
619
620#endif // VC_COMMON_MEMORY_H_
Common interface to all Memory classes, independent of allocation on the stack or heap.
Definition: memorybase.h:368
size_t vectorsCount() const
Definition: memorybase.h:397
Memory(const MemoryBase< V, Parent, 1, RM > &rhs)
Copy the memory into a new memory area.
Definition: memory.h:440
Memory(const Memory &rhs)
Overload of the above function.
Definition: memory.h:455
~Memory()
Frees the memory which was allocated in the constructor.
Definition: memory.h:466
Memory & operator=(const EntryType *rhs)
Overwrite all entries with the values stored in the memory at rhs.
Definition: memory.h:523
void swap(Memory &rhs)
Swap the contents and size information of two Memory objects.
Definition: memory.h:476
Memory(size_t size)
Allocate enough memory to access size values of type V::EntryType.
Definition: memory.h:423
Memory & operator=(const MemoryBase< V, Parent, 1, RM > &rhs)
Overwrite all entries with the values stored in rhs.
Definition: memory.h:502
static constexpr size_t entriesCount()
Definition: memory.h:308
static Memory< V, Size, 0u, false > & fromRawData(EntryType *ptr)
Wrap existing data with the Memory convenience class.
Definition: memory.h:292
static constexpr size_t vectorsCount()
Definition: memory.h:315
A helper class for fixed-size two-dimensional arrays.
Definition: memory.h:75
Memory & operator=(const V &v)
Initialize all data with the given vector.
Definition: memory.h:173
static constexpr size_t entriesCount()
Definition: memory.h:137
Memory & operator=(const MemoryBase< V, Parent, 2, RM > &rhs)
Copies the data from a different object.
Definition: memory.h:155
static constexpr size_t rowsCount()
Definition: memory.h:128
static constexpr size_t vectorsCount()
Definition: memory.h:143
Common::AdaptSubscriptOperator< std::vector< T, Allocator > > vector
An adapted std::vector container with an additional subscript operator which implements gather and sc...
Definition: vector:55
void swap(Adapter< S, T, N > &a, std::size_t i, S &x)
Swaps one scalar object x with a SIMD slot at offset i in the simdized object a.
Definition: simdize.h:1108
void prefetchForModify(const void *addr)
Prefetch the cacheline containing addr for modification.
Definition: memory.h:556
T * malloc(size_t n)
Allocates memory on the Heap with alignment and padding suitable for vectorized access.
Definition: malloc.h:136
void prefetchClose(const void *addr)
Prefetch the cacheline containing addr to L1 cache.
Definition: memory.h:571
void prefetchFar(const void *addr)
Prefetch the cacheline containing addr to L3 cache.
Definition: memory.h:601
void prefetchMid(const void *addr)
Prefetch the cacheline containing addr to L2 cache.
Definition: memory.h:586
constexpr VectorSpecialInitializerZero Zero
The special object Vc::Zero can be used to construct Vector and Mask objects initialized to zero/fals...
Definition: types.h:81
constexpr StreamingTag Streaming
Use this object for a flags parameter to request streaming loads and stores.
void prefetchForOneRead(const void *addr)
Prefetch the cacheline containing addr for a single read access.
Definition: memory.h:539
void free(T *p)
Frees memory that was allocated with Vc::malloc.
Definition: malloc.h:163
@ AlignOnVector
Align on boundary of vector sizes (e.g.
Definition: global.h:443
constexpr std::size_t MemoryAlignment
Specifies the most conservative memory alignment necessary for aligned loads and stores of Vector typ...
Definition: vector.h:215
Vector Classes Namespace.
Definition: dox.h:585
Helper class to ensure a given alignment.
Definition: alignedbase.h:68