Halide 17.0.2
Halide compiler and libraries
Loading...
Searching...
No Matches
HalideBuffer.h
Go to the documentation of this file.
1/** \file
2 * Defines a Buffer type that wraps from halide_buffer_t and adds
3 * functionality, and methods for more conveniently iterating over the
4 * samples in a halide_buffer_t outside of Halide code. */
5
6#ifndef HALIDE_RUNTIME_BUFFER_H
7#define HALIDE_RUNTIME_BUFFER_H
8
9#include <algorithm>
10#include <atomic>
11#include <cassert>
12#include <cstdint>
13#include <cstdlib>
14#include <cstring>
15#include <limits>
16#include <memory>
17#include <vector>
18
19#ifdef __APPLE__
20#include <AvailabilityVersions.h>
21#include <TargetConditionals.h>
22#endif
23
24#if defined(__has_feature)
25#if __has_feature(memory_sanitizer)
26#include <sanitizer/msan_interface.h>
27#endif
28#endif
29
30#include "HalideRuntime.h"
31
32#ifdef _MSC_VER
33#include <malloc.h>
34#define HALIDE_ALLOCA _alloca
35#else
36#define HALIDE_ALLOCA __builtin_alloca
37#endif
38
39// gcc 5.1 has a false positive warning on this code
40#if __GNUC__ == 5 && __GNUC_MINOR__ == 1
41#pragma GCC diagnostic ignored "-Warray-bounds"
42#endif
43
44#ifndef HALIDE_RUNTIME_BUFFER_CHECK_INDICES
45#define HALIDE_RUNTIME_BUFFER_CHECK_INDICES 0
46#endif
47
48#ifndef HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
49// Conservatively align buffer allocations to 128 bytes by default.
50// This is enough alignment for all the platforms currently in use.
51// Redefine this in your compiler settings if you desire more/less alignment.
52#define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT 128
53#endif
54
56 "HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT must be a power of 2.");
57
58// Unfortunately, not all C++17 runtimes support aligned_alloc
59// (it may depends on OS/SDK version); this is provided as an opt-out
60// if you are compiling on a platform that doesn't provide a (good)
61// implementation. (Note that we actually use the C11 `::aligned_alloc()`
62// rather than the C++17 `std::aligned_alloc()` because at least one platform
63// we found supports the former but not the latter.)
64#ifndef HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
65
66// clang-format off
67#ifdef _MSC_VER
68
69 // MSVC doesn't implement aligned_alloc(), even in C++17 mode, and
70 // has stated they probably never will, so, always default it off here.
71 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
72
73#elif defined(__ANDROID_API__) && __ANDROID_API__ < 28
74
75 // Android doesn't provide aligned_alloc until API 28
76 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
77
78#elif defined(__APPLE__)
79
80 #if TARGET_OS_OSX && (__MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_15)
81
82 // macOS doesn't provide aligned_alloc until 10.15
83 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
84
85 #elif TARGET_OS_IPHONE && (__IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_14_0)
86
87 // iOS doesn't provide aligned_alloc until 14.0
88 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
89
90 #else
91
92 // Assume it's ok on all other Apple targets
93 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
94
95 #endif
96
97#else
98
99 #if defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)
100
101 // ARM GNU-A baremetal compiler doesn't provide aligned_alloc as of 12.2
102 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
103
104 #else
105
106 // Not Windows, Android, or Apple: just assume it's ok
107 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
108
109 #endif
110
111#endif
112// clang-format on
113
114#endif // HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
115
116namespace Halide {
117namespace Runtime {
118
119// Forward-declare our Buffer class
120template<typename T, int Dims, int InClassDimStorage>
121class Buffer;
122
123// A helper to check if a parameter pack is entirely implicitly
124// int-convertible to use with std::enable_if
125template<typename... Args>
126struct AllInts : std::false_type {};
127
128template<>
129struct AllInts<> : std::true_type {};
130
131template<typename T, typename... Args>
132struct AllInts<T, Args...> {
133 static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
134};
135
136// Floats and doubles are technically implicitly int-convertible, but
137// doing so produces a warning we treat as an error, so just disallow
138// it here.
139template<typename... Args>
140struct AllInts<float, Args...> : std::false_type {};
141
142template<typename... Args>
143struct AllInts<double, Args...> : std::false_type {};
144
145// A helper to detect if there are any zeros in a container
146namespace Internal {
147template<typename Container>
148bool any_zero(const Container &c) {
149 for (int i : c) {
150 if (i == 0) {
151 return true;
152 }
153 }
154 return false;
155}
156} // namespace Internal
157
158/** A struct acting as a header for allocations owned by the Buffer
159 * class itself. */
161 void (*deallocate_fn)(void *);
162 std::atomic<int> ref_count;
163
164 // Note that ref_count always starts at 1
165 explicit AllocationHeader(void (*deallocate_fn)(void *))
167 }
168};
169
170/** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
171enum struct BufferDeviceOwnership : int {
172 Allocated, ///> halide_device_free will be called when device ref count goes to zero
173 WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
174 Unmanaged, ///> No free routine will be called when device ref count goes to zero
175 AllocatedDeviceAndHost, ///> Call device_and_host_free when DevRefCount goes to zero.
176 Cropped, ///> Call halide_device_release_crop when DevRefCount goes to zero.
177};
178
179/** A similar struct for managing device allocations. */
181 // This is only ever constructed when there's something to manage,
182 // so start at one.
183 std::atomic<int> count{1};
185};
186
187constexpr int AnyDims = -1;
188
189/** A templated Buffer class that wraps halide_buffer_t and adds
190 * functionality. When using Halide from C++, this is the preferred
191 * way to create input and output buffers. The overhead of using this
192 * class relative to a naked halide_buffer_t is minimal - it uses another
193 * ~16 bytes on the stack, and does no dynamic allocations when using
194 * it to represent existing memory of a known maximum dimensionality.
195 *
196 * The template parameter T is the element type. For buffers where the
197 * element type is unknown, or may vary, use void or const void.
198 *
199 * The template parameter Dims is the number of dimensions. For buffers where
200 * the dimensionality type is unknown at, or may vary, use AnyDims.
201 *
202 * InClassDimStorage is the maximum number of dimensions that can be represented
203 * using space inside the class itself. Set it to the maximum dimensionality
204 * you expect this buffer to be. If the actual dimensionality exceeds
205 * this, heap storage is allocated to track the shape of the buffer.
206 * InClassDimStorage defaults to 4, which should cover nearly all usage.
207 *
208 * The class optionally allocates and owns memory for the image using
209 * a shared pointer allocated with the provided allocator. If they are
210 * null, malloc and free are used. Any device-side allocation is
211 * considered as owned if and only if the host-side allocation is
212 * owned. */
213template<typename T = void,
214 int Dims = AnyDims,
215 int InClassDimStorage = (Dims == AnyDims ? 4 : std::max(Dims, 1))>
216class Buffer {
217 /** The underlying halide_buffer_t */
218 halide_buffer_t buf = {};
219
220 /** Some in-class storage for shape of the dimensions. */
221 halide_dimension_t shape[InClassDimStorage];
222
223 /** The allocation owned by this Buffer. NULL if the Buffer does not
224 * own the memory. */
225 AllocationHeader *alloc = nullptr;
226
227 /** A reference count for the device allocation owned by this
228 * buffer. */
229 mutable DeviceRefCount *dev_ref_count = nullptr;
230
231 /** True if T is of type void or const void */
232 static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
233
234 /** A type function that adds a const qualifier if T is a const type. */
235 template<typename T2>
236 using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
237
238 /** T unless T is (const) void, in which case (const)
239 * uint8_t. Useful for providing return types for operator() */
240 using not_void_T = typename std::conditional<T_is_void,
241 add_const_if_T_is_const<uint8_t>,
242 T>::type;
243
244 /** T with constness removed. Useful for return type of copy(). */
245 using not_const_T = typename std::remove_const<T>::type;
246
247 /** The type the elements are stored as. Equal to not_void_T
248 * unless T is a pointer, in which case uint64_t. Halide stores
249 * all pointer types as uint64s internally, even on 32-bit
250 * systems. */
251 using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
252
253public:
254 /** True if the Halide type is not void (or const void). */
255 static constexpr bool has_static_halide_type = !T_is_void;
256
257 /** Get the Halide type of T. Callers should not use the result if
258 * has_static_halide_type is false. */
260 return halide_type_of<typename std::remove_cv<not_void_T>::type>();
261 }
262
263 /** Does this Buffer own the host memory it refers to? */
264 bool owns_host_memory() const {
265 return alloc != nullptr;
266 }
267
268 static constexpr bool has_static_dimensions = (Dims != AnyDims);
269
270 /** Callers should not use the result if
271 * has_static_dimensions is false. */
272 static constexpr int static_dimensions() {
273 return Dims;
274 }
275
276 static_assert(!has_static_dimensions || static_dimensions() >= 0);
277
278private:
279 /** Increment the reference count of any owned allocation */
280 void incref() const {
281 if (owns_host_memory()) {
282 alloc->ref_count++;
283 }
284 if (buf.device) {
285 if (!dev_ref_count) {
286 // I seem to have a non-zero dev field but no
287 // reference count for it. I must have been given a
288 // device allocation by a Halide pipeline, and have
289 // never been copied from since. Take sole ownership
290 // of it.
291 dev_ref_count = new DeviceRefCount;
292 }
293 dev_ref_count->count++;
294 }
295 }
296
297 // Note that this is called "cropped" but can also encompass a slice/embed
298 // operation as well.
299 struct DevRefCountCropped : DeviceRefCount {
300 Buffer<T, Dims, InClassDimStorage> cropped_from;
301 explicit DevRefCountCropped(const Buffer<T, Dims, InClassDimStorage> &cropped_from)
302 : cropped_from(cropped_from) {
303 ownership = BufferDeviceOwnership::Cropped;
304 }
305 };
306
307 /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */
308 void crop_from(const Buffer<T, Dims, InClassDimStorage> &cropped_from) {
309 assert(dev_ref_count == nullptr);
310 dev_ref_count = new DevRefCountCropped(cropped_from);
311 }
312
313 /** Decrement the reference count of any owned allocation and free host
314 * and device memory if it hits zero. Sets alloc to nullptr. */
315 void decref(bool device_only = false) {
316 if (owns_host_memory() && !device_only) {
317 int new_count = --(alloc->ref_count);
318 if (new_count == 0) {
319 void (*fn)(void *) = alloc->deallocate_fn;
320 alloc->~AllocationHeader();
321 fn(alloc);
322 }
323 buf.host = nullptr;
324 alloc = nullptr;
325 set_host_dirty(false);
326 }
327 int new_count = 0;
328 if (dev_ref_count) {
329 new_count = --(dev_ref_count->count);
330 }
331 if (new_count == 0) {
332 if (buf.device) {
333 assert(!(alloc && device_dirty()) &&
334 "Implicitly freeing a dirty device allocation while a host allocation still lives. "
335 "Call device_free explicitly if you want to drop dirty device-side data. "
336 "Call copy_to_host explicitly if you want the data copied to the host allocation "
337 "before the device allocation is freed.");
338 int result = halide_error_code_success;
339 if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
340 result = buf.device_interface->detach_native(nullptr, &buf);
341 } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
342 result = buf.device_interface->device_and_host_free(nullptr, &buf);
343 } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
344 result = buf.device_interface->device_release_crop(nullptr, &buf);
345 } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
346 result = buf.device_interface->device_free(nullptr, &buf);
347 }
348 // No reasonable way to return the error, but we can at least assert-fail in debug builds.
349 assert((result == halide_error_code_success) && "device_interface call returned a nonzero result in Buffer::decref()");
350 (void)result;
351 }
352 if (dev_ref_count) {
353 if (dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
354 delete (DevRefCountCropped *)dev_ref_count;
355 } else {
356 delete dev_ref_count;
357 }
358 }
359 }
360 dev_ref_count = nullptr;
361 buf.device = 0;
362 buf.device_interface = nullptr;
363 }
364
365 void free_shape_storage() {
366 if (buf.dim != shape) {
367 delete[] buf.dim;
368 buf.dim = nullptr;
369 }
370 }
371
372 template<int DimsSpecified>
373 void make_static_shape_storage() {
374 static_assert(Dims == AnyDims || Dims == DimsSpecified,
375 "Number of arguments to Buffer() does not match static dimensionality");
376 buf.dimensions = DimsSpecified;
377 if constexpr (Dims == AnyDims) {
378 if constexpr (DimsSpecified <= InClassDimStorage) {
379 buf.dim = shape;
380 } else {
381 static_assert(DimsSpecified >= 1);
382 buf.dim = new halide_dimension_t[DimsSpecified];
383 }
384 } else {
385 static_assert(InClassDimStorage >= Dims);
386 buf.dim = shape;
387 }
388 }
389
390 void make_shape_storage(const int dimensions) {
391 if (Dims != AnyDims && Dims != dimensions) {
392 assert(false && "Number of arguments to Buffer() does not match static dimensionality");
393 }
394 // This should usually be inlined, so if dimensions is statically known,
395 // we can skip the call to new
396 buf.dimensions = dimensions;
397 buf.dim = (dimensions <= InClassDimStorage) ? shape : new halide_dimension_t[dimensions];
398 }
399
400 void copy_shape_from(const halide_buffer_t &other) {
401 // All callers of this ensure that buf.dimensions == other.dimensions.
402 make_shape_storage(other.dimensions);
403 std::copy(other.dim, other.dim + other.dimensions, buf.dim);
404 }
405
406 template<typename T2, int D2, int S2>
407 void move_shape_from(Buffer<T2, D2, S2> &&other) {
408 if (other.shape == other.buf.dim) {
409 copy_shape_from(other.buf);
410 } else {
411 buf.dim = other.buf.dim;
412 other.buf.dim = nullptr;
413 }
414 }
415
416 /** Initialize the shape from a halide_buffer_t. */
417 void initialize_from_buffer(const halide_buffer_t &b,
418 BufferDeviceOwnership ownership) {
419 memcpy(&buf, &b, sizeof(halide_buffer_t));
420 copy_shape_from(b);
421 if (b.device) {
422 dev_ref_count = new DeviceRefCount;
423 dev_ref_count->ownership = ownership;
424 }
425 }
426
427 /** Initialize the shape from an array of ints */
428 void initialize_shape(const int *sizes) {
429 for (int i = 0; i < buf.dimensions; i++) {
430 buf.dim[i].min = 0;
431 buf.dim[i].extent = sizes[i];
432 if (i == 0) {
433 buf.dim[i].stride = 1;
434 } else {
435 buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
436 }
437 }
438 }
439
440 /** Initialize the shape from a vector of extents */
441 void initialize_shape(const std::vector<int> &sizes) {
442 assert(buf.dimensions == (int)sizes.size());
443 initialize_shape(sizes.data());
444 }
445
446 /** Initialize the shape from the static shape of an array */
447 template<typename Array, size_t N>
448 void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
449 buf.dim[next].min = 0;
450 buf.dim[next].extent = (int)N;
451 if (next == 0) {
452 buf.dim[next].stride = 1;
453 } else {
454 initialize_shape_from_array_shape(next - 1, vals[0]);
455 buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
456 }
457 }
458
459 /** Base case for the template recursion above. */
460 template<typename T2>
461 void initialize_shape_from_array_shape(int, const T2 &) {
462 }
463
464 /** Get the dimensionality of a multi-dimensional C array */
465 template<typename Array, size_t N>
466 static int dimensionality_of_array(Array (&vals)[N]) {
467 return dimensionality_of_array(vals[0]) + 1;
468 }
469
470 template<typename T2>
471 static int dimensionality_of_array(const T2 &) {
472 return 0;
473 }
474
475 /** Get the underlying halide_type_t of an array's element type. */
476 template<typename Array, size_t N>
477 static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
478 return scalar_type_of_array(vals[0]);
479 }
480
481 template<typename T2>
482 static halide_type_t scalar_type_of_array(const T2 &) {
483 return halide_type_of<typename std::remove_cv<T2>::type>();
484 }
485
486 /** Crop a single dimension without handling device allocation. */
487 void crop_host(int d, int min, int extent) {
488 assert(dim(d).min() <= min);
489 assert(dim(d).max() >= min + extent - 1);
490 ptrdiff_t shift = min - dim(d).min();
491 if (buf.host != nullptr) {
492 buf.host += (shift * dim(d).stride()) * type().bytes();
493 }
494 buf.dim[d].min = min;
495 buf.dim[d].extent = extent;
496 }
497
498 /** Crop as many dimensions as are in rect, without handling device allocation. */
499 void crop_host(const std::vector<std::pair<int, int>> &rect) {
500 assert(rect.size() <= static_cast<decltype(rect.size())>(std::numeric_limits<int>::max()));
501 int limit = (int)rect.size();
502 assert(limit <= dimensions());
503 for (int i = 0; i < limit; i++) {
504 crop_host(i, rect[i].first, rect[i].second);
505 }
506 }
507
508 void complete_device_crop(Buffer<T, Dims, InClassDimStorage> &result_host_cropped) const {
509 assert(buf.device_interface != nullptr);
510 if (buf.device_interface->device_crop(nullptr, &this->buf, &result_host_cropped.buf) == halide_error_code_success) {
511 const Buffer<T, Dims, InClassDimStorage> *cropped_from = this;
512 // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
513 // is it possible to get to this point without incref having run at least once since
514 // the device field was set? (I.e. in the internal logic of crop. incref might have been
515 // called.)
516 if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
517 cropped_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
518 }
519 result_host_cropped.crop_from(*cropped_from);
520 }
521 }
522
523 /** slice a single dimension without handling device allocation. */
524 void slice_host(int d, int pos) {
525 static_assert(Dims == AnyDims);
526 assert(dimensions() > 0);
527 assert(d >= 0 && d < dimensions());
528 assert(pos >= dim(d).min() && pos <= dim(d).max());
529 buf.dimensions--;
530 ptrdiff_t shift = pos - buf.dim[d].min;
531 if (buf.host != nullptr) {
532 buf.host += (shift * buf.dim[d].stride) * type().bytes();
533 }
534 for (int i = d; i < buf.dimensions; i++) {
535 buf.dim[i] = buf.dim[i + 1];
536 }
537 buf.dim[buf.dimensions] = {0, 0, 0};
538 }
539
540 void complete_device_slice(Buffer<T, AnyDims, InClassDimStorage> &result_host_sliced, int d, int pos) const {
541 assert(buf.device_interface != nullptr);
542 if (buf.device_interface->device_slice(nullptr, &this->buf, d, pos, &result_host_sliced.buf) == halide_error_code_success) {
543 const Buffer<T, Dims, InClassDimStorage> *sliced_from = this;
544 // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
545 // is it possible to get to this point without incref having run at least once since
546 // the device field was set? (I.e. in the internal logic of slice. incref might have been
547 // called.)
548 if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
549 sliced_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
550 }
551 // crop_from() is correct here, despite the fact that we are slicing.
552 result_host_sliced.crop_from(*sliced_from);
553 }
554 }
555
556public:
557 typedef T ElemType;
558
559 /** Read-only access to the shape */
560 class Dimension {
561 const halide_dimension_t &d;
562
563 public:
564 /** The lowest coordinate in this dimension */
566 return d.min;
567 }
568
569 /** The number of elements in memory you have to step over to
570 * increment this coordinate by one. */
572 return d.stride;
573 }
574
575 /** The extent of the image along this dimension */
577 return d.extent;
578 }
579
580 /** The highest coordinate in this dimension */
582 return min() + extent() - 1;
583 }
584
585 /** An iterator class, so that you can iterate over
586 * coordinates in a dimensions using a range-based for loop. */
587 struct iterator {
588 int val;
589 int operator*() const {
590 return val;
591 }
592 bool operator!=(const iterator &other) const {
593 return val != other.val;
594 }
596 val++;
597 return *this;
598 }
599 };
600
601 /** An iterator that points to the min coordinate */
603 return {min()};
604 }
605
606 /** An iterator that points to one past the max coordinate */
608 return {min() + extent()};
609 }
610
611 explicit Dimension(const halide_dimension_t &dim)
612 : d(dim) {
613 }
614 };
615
616 /** Access the shape of the buffer */
618 assert(i >= 0 && i < this->dimensions());
619 return Dimension(buf.dim[i]);
620 }
621
622 /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
623 // @{
624 int min(int i) const {
625 return dim(i).min();
626 }
627 int extent(int i) const {
628 return dim(i).extent();
629 }
630 int stride(int i) const {
631 return dim(i).stride();
632 }
633 // @}
634
635 /** The total number of elements this buffer represents. Equal to
636 * the product of the extents */
637 size_t number_of_elements() const {
638 return buf.number_of_elements();
639 }
640
641 /** Get the dimensionality of the buffer. */
642 int dimensions() const {
643 if constexpr (has_static_dimensions) {
644 return Dims;
645 } else {
646 return buf.dimensions;
647 }
648 }
649
650 /** Get the type of the elements. */
652 return buf.type;
653 }
654
655 /** A pointer to the element with the lowest address. If all
656 * strides are positive, equal to the host pointer. */
657 T *begin() const {
658 assert(buf.host != nullptr); // Cannot call begin() on an unallocated Buffer.
659 return (T *)buf.begin();
660 }
661
662 /** A pointer to one beyond the element with the highest address. */
663 T *end() const {
664 assert(buf.host != nullptr); // Cannot call end() on an unallocated Buffer.
665 return (T *)buf.end();
666 }
667
668 /** The total number of bytes spanned by the data in memory. */
669 size_t size_in_bytes() const {
670 return buf.size_in_bytes();
671 }
672
673 /** Reset the Buffer to be equivalent to a default-constructed Buffer
674 * of the same static type (if any); Buffer<void> will have its runtime
675 * type reset to uint8. */
676 void reset() {
677 *this = Buffer();
678 }
679
681 : shape() {
682 buf.type = static_halide_type();
683 // If Dims are statically known, must create storage that many.
684 // otherwise, make a zero-dimensional buffer.
685 constexpr int buf_dimensions = (Dims == AnyDims) ? 0 : Dims;
686 make_static_shape_storage<buf_dimensions>();
687 }
688
689 /** Make a Buffer from a halide_buffer_t */
690 explicit Buffer(const halide_buffer_t &buf,
692 assert(T_is_void || buf.type == static_halide_type());
693 initialize_from_buffer(buf, ownership);
694 }
695
696 /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
697 template<typename T2, int D2, int S2>
698 friend class Buffer;
699
700private:
701 template<typename T2, int D2, int S2>
702 static void static_assert_can_convert_from() {
703 static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
704 "Can't convert from a Buffer<const T> to a Buffer<T>");
705 static_assert(std::is_same<typename std::remove_const<T>::type,
706 typename std::remove_const<T2>::type>::value ||
708 "type mismatch constructing Buffer");
709 static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2,
710 "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality");
711 }
712
713public:
714 /** Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
715 * If this can be determined at compile time, fail with a static assert; otherwise
716 * return a boolean based on runtime typing. */
717 template<typename T2, int D2, int S2>
718 static bool can_convert_from(const Buffer<T2, D2, S2> &other) {
719 static_assert_can_convert_from<T2, D2, S2>();
720 if (Buffer<T2, D2, S2>::T_is_void && !T_is_void) {
721 if (other.type() != static_halide_type()) {
722 return false;
723 }
724 }
725 if (Dims != AnyDims) {
726 if (other.dimensions() != Dims) {
727 return false;
728 }
729 }
730 return true;
731 }
732
733 /** Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage>
734 * cannot be constructed from some other Buffer type. */
735 template<typename T2, int D2, int S2>
736 static void assert_can_convert_from(const Buffer<T2, D2, S2> &other) {
737 // Explicitly call static_assert_can_convert_from() here so
738 // that we always get compile-time checking, even if compiling with
739 // assertions disabled.
740 static_assert_can_convert_from<T2, D2, S2>();
741 assert(can_convert_from(other));
742 }
743
744 /** Copy constructor. Does not copy underlying data. */
746 : buf(other.buf),
747 alloc(other.alloc) {
748 other.incref();
749 dev_ref_count = other.dev_ref_count;
750 copy_shape_from(other.buf);
751 }
752
753 /** Construct a Buffer from a Buffer of different dimensionality
754 * and type. Asserts that the type and dimensionality matches (at runtime,
755 * if one of the types is void). Note that this constructor is
756 * implicit. This, for example, lets you pass things like
757 * Buffer<T> or Buffer<const void> to functions expected
758 * Buffer<const T>. */
759 template<typename T2, int D2, int S2>
761 : buf(other.buf),
762 alloc(other.alloc) {
763 assert_can_convert_from(other);
764 other.incref();
765 dev_ref_count = other.dev_ref_count;
766 copy_shape_from(other.buf);
767 }
768
769 /** Move constructor */
771 : buf(other.buf),
772 alloc(other.alloc),
773 dev_ref_count(other.dev_ref_count) {
774 other.dev_ref_count = nullptr;
775 other.alloc = nullptr;
776 move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
777 other.buf = halide_buffer_t();
778 }
779
780 /** Move-construct a Buffer from a Buffer of different
781 * dimensionality and type. Asserts that the types match (at
782 * runtime if one of the types is void). */
783 template<typename T2, int D2, int S2>
785 : buf(other.buf),
786 alloc(other.alloc),
787 dev_ref_count(other.dev_ref_count) {
788 assert_can_convert_from(other);
789 other.dev_ref_count = nullptr;
790 other.alloc = nullptr;
791 move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
792 other.buf = halide_buffer_t();
793 }
794
795 /** Assign from another Buffer of possibly-different
796 * dimensionality and type. Asserts that the types match (at
797 * runtime if one of the types is void). */
798 template<typename T2, int D2, int S2>
800 if ((const void *)this == (const void *)&other) {
801 return *this;
802 }
803 assert_can_convert_from(other);
804 other.incref();
805 decref();
806 dev_ref_count = other.dev_ref_count;
807 alloc = other.alloc;
808 free_shape_storage();
809 buf = other.buf;
810 copy_shape_from(other.buf);
811 return *this;
812 }
813
814 /** Standard assignment operator */
816 // The cast to void* here is just to satisfy clang-tidy
817 if ((const void *)this == (const void *)&other) {
818 return *this;
819 }
820 other.incref();
821 decref();
822 dev_ref_count = other.dev_ref_count;
823 alloc = other.alloc;
824 free_shape_storage();
825 buf = other.buf;
826 copy_shape_from(other.buf);
827 return *this;
828 }
829
830 /** Move from another Buffer of possibly-different
831 * dimensionality and type. Asserts that the types match (at
832 * runtime if one of the types is void). */
833 template<typename T2, int D2, int S2>
835 assert_can_convert_from(other);
836 decref();
837 alloc = other.alloc;
838 other.alloc = nullptr;
839 dev_ref_count = other.dev_ref_count;
840 other.dev_ref_count = nullptr;
841 free_shape_storage();
842 buf = other.buf;
843 move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
844 other.buf = halide_buffer_t();
845 return *this;
846 }
847
848 /** Standard move-assignment operator */
850 decref();
851 alloc = other.alloc;
852 other.alloc = nullptr;
853 dev_ref_count = other.dev_ref_count;
854 other.dev_ref_count = nullptr;
855 free_shape_storage();
856 buf = other.buf;
857 move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
858 other.buf = halide_buffer_t();
859 return *this;
860 }
861
862 /** Check the product of the extents fits in memory. */
864 size_t size = type().bytes();
865 for (int i = 0; i < dimensions(); i++) {
866 size *= dim(i).extent();
867 }
868 // We allow 2^31 or 2^63 bytes, so drop the top bit.
869 size = (size << 1) >> 1;
870 for (int i = 0; i < dimensions(); i++) {
871 size /= dim(i).extent();
872 }
873 assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
874 }
875
876 /** Allocate memory for this Buffer. Drops the reference to any
877 * owned memory. */
878 void allocate(void *(*allocate_fn)(size_t) = nullptr,
879 void (*deallocate_fn)(void *) = nullptr) {
880 // Drop any existing allocation
881 deallocate();
882
883 // Conservatively align images to (usually) 128 bytes. This is enough
884 // alignment for all the platforms we might use. Also ensure that the allocation
885 // is such that the logical size is an integral multiple of 128 bytes (or a bit more).
886 constexpr size_t alignment = HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT;
887
888 const auto align_up = [=](size_t value) -> size_t {
889 return (value + alignment - 1) & ~(alignment - 1);
890 };
891
892 size_t size = size_in_bytes();
893
894#if HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
895 // Only use aligned_alloc() if no custom allocators are specified.
896 if (!allocate_fn && !deallocate_fn) {
897 // As a practical matter, sizeof(AllocationHeader) is going to be no more than 16 bytes
898 // on any supported platform, so we will just overallocate by 'alignment'
899 // so that the user storage also starts at an aligned point. This is a bit
900 // wasteful, but probably not a big deal.
901 static_assert(sizeof(AllocationHeader) <= alignment);
902 void *alloc_storage = ::aligned_alloc(alignment, align_up(size) + alignment);
903 assert((uintptr_t)alloc_storage == align_up((uintptr_t)alloc_storage));
904 alloc = new (alloc_storage) AllocationHeader(free);
905 buf.host = (uint8_t *)((uintptr_t)alloc_storage + alignment);
906 return;
907 }
908 // else fall thru
909#endif
910 if (!allocate_fn) {
911 allocate_fn = malloc;
912 }
913 if (!deallocate_fn) {
914 deallocate_fn = free;
915 }
916
917 static_assert(sizeof(AllocationHeader) <= alignment);
918
919 // malloc() and friends must return a pointer aligned to at least alignof(std::max_align_t);
920 // make sure this is OK for AllocationHeader, since it always goes at the start
921 static_assert(alignof(AllocationHeader) <= alignof(std::max_align_t));
922
923 const size_t requested_size = align_up(size + alignment +
924 std::max(0, (int)sizeof(AllocationHeader) -
925 (int)sizeof(std::max_align_t)));
926 void *alloc_storage = allocate_fn(requested_size);
927 alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
928 uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
929 buf.host = (uint8_t *)align_up((uintptr_t)unaligned_ptr);
930 }
931
932 /** Drop reference to any owned host or device memory, possibly
933 * freeing it, if this buffer held the last reference to
934 * it. Retains the shape of the buffer. Does nothing if this
935 * buffer did not allocate its own memory. */
936 void deallocate() {
937 decref();
938 }
939
940 /** Drop reference to any owned device memory, possibly freeing it
941 * if this buffer held the last reference to it. Asserts that
942 * device_dirty is false. */
944 decref(true);
945 }
946
947 /** Allocate a new image of the given size with a runtime
948 * type. Only used when you do know what size you want but you
949 * don't know statically what type the elements are. Pass zeroes
950 * to make a buffer suitable for bounds query calls. */
951 template<typename... Args,
952 typename = typename std::enable_if<AllInts<Args...>::value>::type>
953 Buffer(halide_type_t t, int first, Args... rest) {
954 if (!T_is_void) {
955 assert(static_halide_type() == t);
956 }
957 int extents[] = {first, (int)rest...};
958 buf.type = t;
959 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
960 make_static_shape_storage<buf_dimensions>();
961 initialize_shape(extents);
962 if (!Internal::any_zero(extents)) {
963 check_overflow();
964 allocate();
965 }
966 }
967
968 /** Allocate a new image of the given size. Pass zeroes to make a
969 * buffer suitable for bounds query calls. */
970 // @{
971
972 // The overload with one argument is 'explicit', so that
973 // (say) int is not implicitly convertible to Buffer<int>
974 explicit Buffer(int first) {
975 static_assert(!T_is_void,
976 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
977 int extents[] = {first};
978 buf.type = static_halide_type();
979 constexpr int buf_dimensions = 1;
980 make_static_shape_storage<buf_dimensions>();
981 initialize_shape(extents);
982 if (first != 0) {
983 check_overflow();
984 allocate();
985 }
986 }
987
988 template<typename... Args,
989 typename = typename std::enable_if<AllInts<Args...>::value>::type>
990 Buffer(int first, int second, Args... rest) {
991 static_assert(!T_is_void,
992 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
993 int extents[] = {first, second, (int)rest...};
994 buf.type = static_halide_type();
995 constexpr int buf_dimensions = 2 + (int)(sizeof...(rest));
996 make_static_shape_storage<buf_dimensions>();
997 initialize_shape(extents);
998 if (!Internal::any_zero(extents)) {
999 check_overflow();
1000 allocate();
1001 }
1002 }
1003 // @}
1004
1005 /** Allocate a new image of unknown type using a vector of ints as the size. */
1006 Buffer(halide_type_t t, const std::vector<int> &sizes) {
1007 if (!T_is_void) {
1008 assert(static_halide_type() == t);
1009 }
1010 buf.type = t;
1011 // make_shape_storage() will do a runtime check that dimensionality matches.
1012 make_shape_storage((int)sizes.size());
1013 initialize_shape(sizes);
1014 if (!Internal::any_zero(sizes)) {
1015 check_overflow();
1016 allocate();
1017 }
1018 }
1019
1020 /** Allocate a new image of known type using a vector of ints as the size. */
1021 explicit Buffer(const std::vector<int> &sizes)
1022 : Buffer(static_halide_type(), sizes) {
1023 }
1024
1025private:
1026 // Create a copy of the sizes vector, ordered as specified by order.
1027 static std::vector<int> make_ordered_sizes(const std::vector<int> &sizes, const std::vector<int> &order) {
1028 assert(order.size() == sizes.size());
1029 std::vector<int> ordered_sizes(sizes.size());
1030 for (size_t i = 0; i < sizes.size(); ++i) {
1031 ordered_sizes[i] = sizes.at(order[i]);
1032 }
1033 return ordered_sizes;
1034 }
1035
1036public:
1037 /** Allocate a new image of unknown type using a vector of ints as the size and
1038 * a vector of indices indicating the storage order for each dimension. The
1039 * length of the sizes vector and the storage-order vector must match. For instance,
1040 * to allocate an interleaved RGB buffer, you would pass {2, 0, 1} for storage_order. */
1041 Buffer(halide_type_t t, const std::vector<int> &sizes, const std::vector<int> &storage_order)
1042 : Buffer(t, make_ordered_sizes(sizes, storage_order)) {
1043 transpose(storage_order);
1044 }
1045
1046 Buffer(const std::vector<int> &sizes, const std::vector<int> &storage_order)
1047 : Buffer(static_halide_type(), sizes, storage_order) {
1048 }
1049
1050 /** Make an Buffer that refers to a statically sized array. Does not
1051 * take ownership of the data, and does not set the host_dirty flag. */
1052 template<typename Array, size_t N>
1053 explicit Buffer(Array (&vals)[N]) {
1054 const int buf_dimensions = dimensionality_of_array(vals);
1055 buf.type = scalar_type_of_array(vals);
1056 buf.host = (uint8_t *)vals;
1057 make_shape_storage(buf_dimensions);
1058 initialize_shape_from_array_shape(buf.dimensions - 1, vals);
1059 }
1060
1061 /** Initialize an Buffer of runtime type from a pointer and some
1062 * sizes. Assumes dense row-major packing and a min coordinate of
1063 * zero. Does not take ownership of the data and does not set the
1064 * host_dirty flag. */
1065 template<typename... Args,
1066 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1067 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args &&...rest) {
1068 if (!T_is_void) {
1069 assert(static_halide_type() == t);
1070 }
1071 int extents[] = {first, (int)rest...};
1072 buf.type = t;
1073 buf.host = (uint8_t *)const_cast<void *>(data);
1074 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1075 make_static_shape_storage<buf_dimensions>();
1076 initialize_shape(extents);
1077 }
1078
1079 /** Initialize an Buffer from a pointer and some sizes. Assumes
1080 * dense row-major packing and a min coordinate of zero. Does not
1081 * take ownership of the data and does not set the host_dirty flag. */
1082 template<typename... Args,
1083 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1084 explicit Buffer(T *data, int first, Args &&...rest) {
1085 int extents[] = {first, (int)rest...};
1086 buf.type = static_halide_type();
1087 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1088 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1089 make_static_shape_storage<buf_dimensions>();
1090 initialize_shape(extents);
1091 }
1092
1093 /** Initialize an Buffer from a pointer and a vector of
1094 * sizes. Assumes dense row-major packing and a min coordinate of
1095 * zero. Does not take ownership of the data and does not set the
1096 * host_dirty flag. */
1097 explicit Buffer(T *data, const std::vector<int> &sizes) {
1098 buf.type = static_halide_type();
1099 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1100 make_shape_storage((int)sizes.size());
1101 initialize_shape(sizes);
1102 }
1103
1104 /** Initialize an Buffer of runtime type from a pointer and a
1105 * vector of sizes. Assumes dense row-major packing and a min
1106 * coordinate of zero. Does not take ownership of the data and
1107 * does not set the host_dirty flag. */
1108 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
1109 if (!T_is_void) {
1110 assert(static_halide_type() == t);
1111 }
1112 buf.type = t;
1113 buf.host = (uint8_t *)const_cast<void *>(data);
1114 make_shape_storage((int)sizes.size());
1115 initialize_shape(sizes);
1116 }
1117
1118 /** Initialize an Buffer from a pointer to the min coordinate and
1119 * an array describing the shape. Does not take ownership of the
1120 * data, and does not set the host_dirty flag. */
1121 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
1122 if (!T_is_void) {
1123 assert(static_halide_type() == t);
1124 }
1125 buf.type = t;
1126 buf.host = (uint8_t *)const_cast<void *>(data);
1127 make_shape_storage(d);
1128 for (int i = 0; i < d; i++) {
1129 buf.dim[i] = shape[i];
1130 }
1131 }
1132
1133 /** Initialize a Buffer from a pointer to the min coordinate and
1134 * a vector describing the shape. Does not take ownership of the
1135 * data, and does not set the host_dirty flag. */
1136 explicit inline Buffer(halide_type_t t, add_const_if_T_is_const<void> *data,
1137 const std::vector<halide_dimension_t> &shape)
1138 : Buffer(t, data, (int)shape.size(), shape.data()) {
1139 }
1140
1141 /** Initialize an Buffer from a pointer to the min coordinate and
1142 * an array describing the shape. Does not take ownership of the
1143 * data and does not set the host_dirty flag. */
1144 explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
1145 buf.type = static_halide_type();
1146 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1147 make_shape_storage(d);
1148 for (int i = 0; i < d; i++) {
1149 buf.dim[i] = shape[i];
1150 }
1151 }
1152
1153 /** Initialize a Buffer from a pointer to the min coordinate and
1154 * a vector describing the shape. Does not take ownership of the
1155 * data, and does not set the host_dirty flag. */
1156 explicit inline Buffer(T *data, const std::vector<halide_dimension_t> &shape)
1157 : Buffer(data, (int)shape.size(), shape.data()) {
1158 }
1159
1160 /** Destructor. Will release any underlying owned allocation if
1161 * this is the last reference to it. Will assert fail if there are
1162 * weak references to this Buffer outstanding. */
1164 decref();
1165 free_shape_storage();
1166 }
1167
1168 /** Get a pointer to the raw halide_buffer_t this wraps. */
1169 // @{
1171 return &buf;
1172 }
1173
1175 return &buf;
1176 }
1177 // @}
1178
1179 /** Provide a cast operator to halide_buffer_t *, so that
1180 * instances can be passed directly to Halide filters. */
1181 operator halide_buffer_t *() {
1182 return &buf;
1183 }
1184
1185 /** Return a typed reference to this Buffer. Useful for converting
1186 * a reference to a Buffer<void> to a reference to, for example, a
1187 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1188 * You can also optionally sspecify a new value for Dims; this is useful
1189 * mainly for removing the dimensionality constraint on a Buffer with
1190 * explicit dimensionality. Does a runtime assert if the source buffer type
1191 * is void or the new dimensionality is incompatible. */
1192 template<typename T2, int D2 = Dims>
1195 return *((Buffer<T2, D2, InClassDimStorage> *)this);
1196 }
1197
1198 /** Return a const typed reference to this Buffer. Useful for converting
1199 * a reference to a Buffer<void> to a reference to, for example, a
1200 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1201 * You can also optionally sspecify a new value for Dims; this is useful
1202 * mainly for removing the dimensionality constraint on a Buffer with
1203 * explicit dimensionality. Does a runtime assert if the source buffer type
1204 * is void or the new dimensionality is incompatible. */
1205 template<typename T2, int D2 = Dims>
1208 return *((const Buffer<T2, D2, InClassDimStorage> *)this);
1209 }
1210
1211 /** Return an rval reference to this Buffer. Useful for converting
1212 * a reference to a Buffer<void> to a reference to, for example, a
1213 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1214 * You can also optionally sspecify a new value for Dims; this is useful
1215 * mainly for removing the dimensionality constraint on a Buffer with
1216 * explicit dimensionality. Does a runtime assert if the source buffer type
1217 * is void or the new dimensionality is incompatible. */
1218 template<typename T2, int D2 = Dims>
1221 return *((Buffer<T2, D2, InClassDimStorage> *)this);
1222 }
1223
1224 /** as_const() is syntactic sugar for .as<const T>(), to avoid the need
1225 * to recapitulate the type argument. */
1226 // @{
1228 Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() & {
1229 // Note that we can skip the assert_can_convert_from(), since T -> const T
1230 // conversion is always legal.
1231 return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1232 }
1233
1235 const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() const & {
1236 return *((const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1237 }
1238
1240 Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> as_const() && {
1241 return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1242 }
1243 // @}
1244
1245 /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<const T>& when
1246 * passing arguments */
1247 template<typename T2 = T, typename = typename std::enable_if<!std::is_const<T2>::value>::type>
1248 operator Buffer<typename std::add_const<T2>::type, Dims, InClassDimStorage> &() & {
1249 return as_const();
1250 }
1251
1252 /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<void>& when
1253 * passing arguments */
1254 template<typename TVoid,
1255 typename T2 = T,
1256 typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1257 !std::is_void<T2>::value &&
1258 !std::is_const<T2>::value>::type>
1260 return as<TVoid, Dims>();
1261 }
1262
1263 /** Add some syntactic sugar to allow autoconversion from Buffer<const T> to Buffer<const void>& when
1264 * passing arguments */
1265 template<typename TVoid,
1266 typename T2 = T,
1267 typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1268 !std::is_void<T2>::value &&
1269 std::is_const<T2>::value>::type>
1271 return as<const TVoid, Dims>();
1272 }
1273
1274 /** Conventional names for the first three dimensions. */
1275 // @{
1276 int width() const {
1277 return (dimensions() > 0) ? dim(0).extent() : 1;
1278 }
1279 int height() const {
1280 return (dimensions() > 1) ? dim(1).extent() : 1;
1281 }
1282 int channels() const {
1283 return (dimensions() > 2) ? dim(2).extent() : 1;
1284 }
1285 // @}
1286
1287 /** Conventional names for the min and max value of each dimension */
1288 // @{
1289 int left() const {
1290 return dim(0).min();
1291 }
1292
1293 int right() const {
1294 return dim(0).max();
1295 }
1296
1297 int top() const {
1298 return dim(1).min();
1299 }
1300
1301 int bottom() const {
1302 return dim(1).max();
1303 }
1304 // @}
1305
1306 /** Make a new image which is a deep copy of this image. Use crop
1307 * or slice followed by copy to make a copy of only a portion of
1308 * the image. The new image uses the same memory layout as the
1309 * original, with holes compacted away. Note that the returned
1310 * Buffer is always of a non-const type T (ie:
1311 *
1312 * Buffer<const T>.copy() -> Buffer<T> rather than Buffer<const T>
1313 *
1314 * which is always safe, since we are making a deep copy. (The caller
1315 * can easily cast it back to Buffer<const T> if desired, which is
1316 * always safe and free.)
1317 */
1318 Buffer<not_const_T, Dims, InClassDimStorage> copy(void *(*allocate_fn)(size_t) = nullptr,
1319 void (*deallocate_fn)(void *) = nullptr) const {
1321 dst.copy_from(*this);
1322 return dst;
1323 }
1324
1325 /** Like copy(), but the copy is created in interleaved memory layout
1326 * (vs. keeping the same memory layout as the original). Requires that 'this'
1327 * has exactly 3 dimensions.
1328 */
1330 void (*deallocate_fn)(void *) = nullptr) const {
1331 static_assert(Dims == AnyDims || Dims == 3);
1332 assert(dimensions() == 3);
1334 dst.set_min(min(0), min(1), min(2));
1335 dst.allocate(allocate_fn, deallocate_fn);
1336 dst.copy_from(*this);
1337 return dst;
1338 }
1339
1340 /** Like copy(), but the copy is created in planar memory layout
1341 * (vs. keeping the same memory layout as the original).
1342 */
1343 Buffer<not_const_T, Dims, InClassDimStorage> copy_to_planar(void *(*allocate_fn)(size_t) = nullptr,
1344 void (*deallocate_fn)(void *) = nullptr) const {
1345 std::vector<int> mins, extents;
1346 const int dims = dimensions();
1347 mins.reserve(dims);
1348 extents.reserve(dims);
1349 for (int d = 0; d < dims; ++d) {
1350 mins.push_back(dim(d).min());
1351 extents.push_back(dim(d).extent());
1352 }
1354 dst.set_min(mins);
1355 dst.allocate(allocate_fn, deallocate_fn);
1356 dst.copy_from(*this);
1357 return dst;
1358 }
1359
1360 /** Make a copy of the Buffer which shares the underlying host and/or device
1361 * allocations as the existing Buffer. This is purely syntactic sugar for
1362 * cases where you have a const reference to a Buffer but need a temporary
1363 * non-const copy (e.g. to make a call into AOT-generated Halide code), and want a terse
1364 * inline way to create a temporary. \code
1365 * void call_my_func(const Buffer<const uint8_t>& input) {
1366 * my_func(input.alias(), output);
1367 * }\endcode
1368 */
1370 return *this;
1371 }
1372
1373 /** Fill a Buffer with the values at the same coordinates in
1374 * another Buffer. Restricts itself to coordinates contained
1375 * within the intersection of the two buffers. If the two Buffers
1376 * are not in the same coordinate system, you will need to
1377 * translate the argument Buffer first. E.g. if you're blitting a
1378 * sprite onto a framebuffer, you'll want to translate the sprite
1379 * to the correct location first like so: \code
1380 * framebuffer.copy_from(sprite.translated({x, y})); \endcode
1381 */
1382 template<typename T2, int D2, int S2>
1384 static_assert(!std::is_const<T>::value, "Cannot call copy_from() on a Buffer<const T>");
1385 assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1386 assert(!src.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1387
1389
1390 static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2);
1391 assert(src.dimensions() == dst.dimensions());
1392
1393 // Trim the copy to the region in common
1394 const int d = dimensions();
1395 for (int i = 0; i < d; i++) {
1396 int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1397 int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1398 if (max_coord < min_coord) {
1399 // The buffers do not overlap.
1400 return;
1401 }
1402 dst.crop(i, min_coord, max_coord - min_coord + 1);
1403 src.crop(i, min_coord, max_coord - min_coord + 1);
1404 }
1405
1406 // If T is void, we need to do runtime dispatch to an
1407 // appropriately-typed lambda. We're copying, so we only care
1408 // about the element size. (If not, this should optimize away
1409 // into a static dispatch to the right-sized copy.)
1410 if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) {
1411 using MemType = uint8_t;
1412 auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1413 auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1414 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1415 } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) {
1416 using MemType = uint16_t;
1417 auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1418 auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1419 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1420 } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) {
1421 using MemType = uint32_t;
1422 auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1423 auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1424 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1425 } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) {
1426 using MemType = uint64_t;
1427 auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1428 auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1429 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1430 } else {
1431 assert(false && "type().bytes() must be 1, 2, 4, or 8");
1432 }
1433 set_host_dirty();
1434 }
1435
1436 /** Make an image that refers to a sub-range of this image along
1437 * the given dimension. Asserts that the crop region is within
1438 * the existing bounds: you cannot "crop outwards", even if you know there
1439 * is valid Buffer storage (e.g. because you already cropped inwards). */
1440 Buffer<T, Dims, InClassDimStorage> cropped(int d, int min, int extent) const {
1441 // Make a fresh copy of the underlying buffer (but not a fresh
1442 // copy of the allocation, if there is one).
1444
1445 // This guarantees the prexisting device ref is dropped if the
1446 // device_crop call fails and maintains the buffer in a consistent
1447 // state.
1448 im.device_deallocate();
1449
1450 im.crop_host(d, min, extent);
1451 if (buf.device_interface != nullptr) {
1452 complete_device_crop(im);
1453 }
1454 return im;
1455 }
1456
1457 /** Crop an image in-place along the given dimension. This does
1458 * not move any data around in memory - it just changes the min
1459 * and extent of the given dimension. */
1460 void crop(int d, int min, int extent) {
1461 // An optimization for non-device buffers. For the device case,
1462 // a temp buffer is required, so reuse the not-in-place version.
1463 // TODO(zalman|abadams): Are nop crops common enough to special
1464 // case the device part of the if to do nothing?
1465 if (buf.device_interface != nullptr) {
1466 *this = cropped(d, min, extent);
1467 } else {
1468 crop_host(d, min, extent);
1469 }
1470 }
1471
1472 /** Make an image that refers to a sub-rectangle of this image along
1473 * the first N dimensions. Asserts that the crop region is within
1474 * the existing bounds. The cropped image may drop any device handle
1475 * if the device_interface cannot accomplish the crop in-place. */
1476 Buffer<T, Dims, InClassDimStorage> cropped(const std::vector<std::pair<int, int>> &rect) const {
1477 // Make a fresh copy of the underlying buffer (but not a fresh
1478 // copy of the allocation, if there is one).
1480
1481 // This guarantees the prexisting device ref is dropped if the
1482 // device_crop call fails and maintains the buffer in a consistent
1483 // state.
1484 im.device_deallocate();
1485
1486 im.crop_host(rect);
1487 if (buf.device_interface != nullptr) {
1488 complete_device_crop(im);
1489 }
1490 return im;
1491 }
1492
1493 /** Crop an image in-place along the first N dimensions. This does
1494 * not move any data around in memory, nor does it free memory. It
1495 * just rewrites the min/extent of each dimension to refer to a
1496 * subregion of the same allocation. */
1497 void crop(const std::vector<std::pair<int, int>> &rect) {
1498 // An optimization for non-device buffers. For the device case,
1499 // a temp buffer is required, so reuse the not-in-place version.
1500 // TODO(zalman|abadams): Are nop crops common enough to special
1501 // case the device part of the if to do nothing?
1502 if (buf.device_interface != nullptr) {
1503 *this = cropped(rect);
1504 } else {
1505 crop_host(rect);
1506 }
1507 }
1508
1509 /** Make an image which refers to the same data with using
1510 * translated coordinates in the given dimension. Positive values
1511 * move the image data to the right or down relative to the
1512 * coordinate system. Drops any device handle. */
1515 im.translate(d, dx);
1516 return im;
1517 }
1518
1519 /** Translate an image in-place along one dimension by changing
1520 * how it is indexed. Does not move any data around in memory. */
1521 void translate(int d, int delta) {
1522 assert(d >= 0 && d < this->dimensions());
1523 device_deallocate();
1524 buf.dim[d].min += delta;
1525 }
1526
1527 /** Make an image which refers to the same data translated along
1528 * the first N dimensions. */
1529 Buffer<T, Dims, InClassDimStorage> translated(const std::vector<int> &delta) const {
1531 im.translate(delta);
1532 return im;
1533 }
1534
1535 /** Translate an image along the first N dimensions by changing
1536 * how it is indexed. Does not move any data around in memory. */
1537 void translate(const std::vector<int> &delta) {
1538 device_deallocate();
1539 assert(delta.size() <= static_cast<decltype(delta.size())>(std::numeric_limits<int>::max()));
1540 int limit = (int)delta.size();
1541 assert(limit <= dimensions());
1542 for (int i = 0; i < limit; i++) {
1543 translate(i, delta[i]);
1544 }
1545 }
1546
1547 /** Set the min coordinate of an image in the first N dimensions. */
1548 // @{
1549 void set_min(const std::vector<int> &mins) {
1550 assert(mins.size() <= static_cast<decltype(mins.size())>(dimensions()));
1551 device_deallocate();
1552 for (size_t i = 0; i < mins.size(); i++) {
1553 buf.dim[i].min = mins[i];
1554 }
1555 }
1556
1557 template<typename... Args>
1558 void set_min(Args... args) {
1559 set_min(std::vector<int>{args...});
1560 }
1561 // @}
1562
1563 /** Test if a given coordinate is within the bounds of an image. */
1564 // @{
1565 bool contains(const std::vector<int> &coords) const {
1566 assert(coords.size() <= static_cast<decltype(coords.size())>(dimensions()));
1567 for (size_t i = 0; i < coords.size(); i++) {
1568 if (coords[i] < dim((int)i).min() || coords[i] > dim((int)i).max()) {
1569 return false;
1570 }
1571 }
1572 return true;
1573 }
1574
1575 template<typename... Args>
1576 bool contains(Args... args) const {
1577 return contains(std::vector<int>{args...});
1578 }
1579 // @}
1580
1581 /** Make a buffer which refers to the same data in the same layout
1582 * using a swapped indexing order for the dimensions given. So
1583 * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more
1584 * strongly that A.address_of(i, j) == B.address_of(j, i). */
1587 im.transpose(d1, d2);
1588 return im;
1589 }
1590
1591 /** Transpose a buffer in-place by changing how it is indexed. For
1592 * example, transpose(0, 1) on a two-dimensional buffer means that
1593 * the value referred to by coordinates (i, j) is now reached at
1594 * the coordinates (j, i), and vice versa. This is done by
1595 * reordering the per-dimension metadata rather than by moving
1596 * data around in memory, so other views of the same memory will
1597 * not see the data as having been transposed. */
1598 void transpose(int d1, int d2) {
1599 assert(d1 >= 0 && d1 < this->dimensions());
1600 assert(d2 >= 0 && d2 < this->dimensions());
1601 std::swap(buf.dim[d1], buf.dim[d2]);
1602 }
1603
1604 /** A generalized transpose: instead of swapping two dimensions,
1605 * pass a vector that lists each dimension index exactly once, in
1606 * the desired order. This does not move any data around in memory
1607 * - it just permutes how it is indexed. */
1608 void transpose(const std::vector<int> &order) {
1609 assert((int)order.size() == dimensions());
1610 if (dimensions() < 2) {
1611 // My, that was easy
1612 return;
1613 }
1614
1615 std::vector<int> order_sorted = order;
1616 for (size_t i = 1; i < order_sorted.size(); i++) {
1617 for (size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1618 std::swap(order_sorted[j], order_sorted[j - 1]);
1619 transpose(j, j - 1);
1620 }
1621 }
1622 }
1623
1624 /** Make a buffer which refers to the same data in the same
1625 * layout using a different ordering of the dimensions. */
1626 Buffer<T, Dims, InClassDimStorage> transposed(const std::vector<int> &order) const {
1628 im.transpose(order);
1629 return im;
1630 }
1631
1632 /** Make a lower-dimensional buffer that refers to one slice of
1633 * this buffer. */
1634 Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1635 sliced(int d, int pos) const {
1636 static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1637 assert(dimensions() > 0);
1638
1640
1641 // This guarantees the prexisting device ref is dropped if the
1642 // device_slice call fails and maintains the buffer in a consistent
1643 // state.
1644 im.device_deallocate();
1645
1646 im.slice_host(d, pos);
1647 if (buf.device_interface != nullptr) {
1648 complete_device_slice(im, d, pos);
1649 }
1650 return im;
1651 }
1652
1653 /** Make a lower-dimensional buffer that refers to one slice of this
1654 * buffer at the dimension's minimum. */
1655 Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1656 sliced(int d) const {
1657 static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1658 assert(dimensions() > 0);
1659
1660 return sliced(d, dim(d).min());
1661 }
1662
1663 /** Rewrite the buffer to refer to a single lower-dimensional
1664 * slice of itself along the given dimension at the given
1665 * coordinate. Does not move any data around or free the original
1666 * memory, so other views of the same data are unaffected. Can
1667 * only be called on a Buffer with dynamic dimensionality. */
1668 void slice(int d, int pos) {
1669 static_assert(Dims == AnyDims, "Cannot call slice() on a Buffer with static dimensionality.");
1670 assert(dimensions() > 0);
1671
1672 // An optimization for non-device buffers. For the device case,
1673 // a temp buffer is required, so reuse the not-in-place version.
1674 // TODO(zalman|abadams): Are nop slices common enough to special
1675 // case the device part of the if to do nothing?
1676 if (buf.device_interface != nullptr) {
1677 *this = sliced(d, pos);
1678 } else {
1679 slice_host(d, pos);
1680 }
1681 }
1682
1683 /** Slice a buffer in-place at the dimension's minimum. */
1684 inline void slice(int d) {
1685 slice(d, dim(d).min());
1686 }
1687
1688 /** Make a new buffer that views this buffer as a single slice in a
1689 * higher-dimensional space. The new dimension has extent one and
1690 * the given min. This operation is the opposite of slice. As an
1691 * example, the following condition is true:
1692 *
1693 \code
1694 im2 = im.embedded(1, 17);
1695 &im(x, y, c) == &im2(x, 17, y, c);
1696 \endcode
1697 */
1698 Buffer<T, (Dims == AnyDims ? AnyDims : Dims + 1)>
1699 embedded(int d, int pos = 0) const {
1701 im.embed(d, pos);
1702 return im;
1703 }
1704
1705 /** Embed a buffer in-place, increasing the
1706 * dimensionality. */
1707 void embed(int d, int pos = 0) {
1708 static_assert(Dims == AnyDims, "Cannot call embed() on a Buffer with static dimensionality.");
1709 assert(d >= 0 && d <= dimensions());
1710 add_dimension();
1711 translate(dimensions() - 1, pos);
1712 for (int i = dimensions() - 1; i > d; i--) {
1713 transpose(i, i - 1);
1714 }
1715 }
1716
1717 /** Add a new dimension with a min of zero and an extent of
1718 * one. The stride is the extent of the outermost dimension times
1719 * its stride. The new dimension is the last dimension. This is a
1720 * special case of embed. */
1722 static_assert(Dims == AnyDims, "Cannot call add_dimension() on a Buffer with static dimensionality.");
1723 const int dims = buf.dimensions;
1724 buf.dimensions++;
1725 if (buf.dim != shape) {
1726 // We're already on the heap. Reallocate.
1727 halide_dimension_t *new_shape = new halide_dimension_t[buf.dimensions];
1728 for (int i = 0; i < dims; i++) {
1729 new_shape[i] = buf.dim[i];
1730 }
1731 delete[] buf.dim;
1732 buf.dim = new_shape;
1733 } else if (dims == InClassDimStorage) {
1734 // Transition from the in-class storage to the heap
1735 make_shape_storage(buf.dimensions);
1736 for (int i = 0; i < dims; i++) {
1737 buf.dim[i] = shape[i];
1738 }
1739 } else {
1740 // We still fit in the class
1741 }
1742 buf.dim[dims] = {0, 1, 0};
1743 if (dims == 0) {
1744 buf.dim[dims].stride = 1;
1745 } else {
1746 buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1747 }
1748 }
1749
1750 /** Add a new dimension with a min of zero, an extent of one, and
1751 * the specified stride. The new dimension is the last
1752 * dimension. This is a special case of embed. */
1754 add_dimension();
1755 buf.dim[buf.dimensions - 1].stride = s;
1756 }
1757
1758 /** Methods for managing any GPU allocation. */
1759 // @{
1760 // Set the host dirty flag. Called by every operator()
1761 // access. Must be inlined so it can be hoisted out of loops.
1763 void set_host_dirty(bool v = true) {
1764 assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1765 buf.set_host_dirty(v);
1766 }
1767
1768 // Check if the device allocation is dirty. Called by
1769 // set_host_dirty, which is called by every accessor. Must be
1770 // inlined so it can be hoisted out of loops.
1772 bool device_dirty() const {
1773 return buf.device_dirty();
1774 }
1775
1776 bool host_dirty() const {
1777 return buf.host_dirty();
1778 }
1779
1780 void set_device_dirty(bool v = true) {
1781 assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1782 buf.set_device_dirty(v);
1783 }
1784
1785 int copy_to_host(void *ctx = nullptr) {
1786 if (device_dirty()) {
1787 return buf.device_interface->copy_to_host(ctx, &buf);
1788 }
1790 }
1791
1792 int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1793 if (host_dirty()) {
1794 return device_interface->copy_to_device(ctx, &buf, device_interface);
1795 }
1797 }
1798
1799 int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1800 return device_interface->device_malloc(ctx, &buf, device_interface);
1801 }
1802
1803 int device_free(void *ctx = nullptr) {
1804 if (dev_ref_count) {
1805 assert(dev_ref_count->ownership == BufferDeviceOwnership::Allocated &&
1806 "Can't call device_free on an unmanaged or wrapped native device handle. "
1807 "Free the source allocation or call device_detach_native instead.");
1808 // Multiple people may be holding onto this dev field
1809 assert(dev_ref_count->count == 1 &&
1810 "Multiple Halide::Runtime::Buffer objects share this device "
1811 "allocation. Freeing it would create dangling references. "
1812 "Don't call device_free on Halide buffers that you have copied or "
1813 "passed by value.");
1814 }
1815 int ret = halide_error_code_success;
1816 if (buf.device_interface) {
1817 ret = buf.device_interface->device_free(ctx, &buf);
1818 }
1819 if (dev_ref_count) {
1820 delete dev_ref_count;
1821 dev_ref_count = nullptr;
1822 }
1823 return ret;
1824 }
1825
1826 int device_wrap_native(const struct halide_device_interface_t *device_interface,
1827 uint64_t handle, void *ctx = nullptr) {
1828 assert(device_interface);
1829 dev_ref_count = new DeviceRefCount;
1831 return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1832 }
1833
1834 int device_detach_native(void *ctx = nullptr) {
1835 assert(dev_ref_count &&
1837 "Only call device_detach_native on buffers wrapping a native "
1838 "device handle via device_wrap_native. This buffer was allocated "
1839 "using device_malloc, or is unmanaged. "
1840 "Call device_free or free the original allocation instead.");
1841 // Multiple people may be holding onto this dev field
1842 assert(dev_ref_count->count == 1 &&
1843 "Multiple Halide::Runtime::Buffer objects share this device "
1844 "allocation. Freeing it could create dangling references. "
1845 "Don't call device_detach_native on Halide buffers that you "
1846 "have copied or passed by value.");
1847 int ret = halide_error_code_success;
1848 if (buf.device_interface) {
1849 ret = buf.device_interface->detach_native(ctx, &buf);
1850 }
1851 delete dev_ref_count;
1852 dev_ref_count = nullptr;
1853 return ret;
1854 }
1855
1856 int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1857 return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1858 }
1859
1860 int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1861 if (dev_ref_count) {
1863 "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1864 "Free the source allocation or call device_detach_native instead.");
1865 // Multiple people may be holding onto this dev field
1866 assert(dev_ref_count->count == 1 &&
1867 "Multiple Halide::Runtime::Buffer objects share this device "
1868 "allocation. Freeing it would create dangling references. "
1869 "Don't call device_and_host_free on Halide buffers that you have copied or "
1870 "passed by value.");
1871 }
1872 int ret = halide_error_code_success;
1873 if (buf.device_interface) {
1874 ret = buf.device_interface->device_and_host_free(ctx, &buf);
1875 }
1876 if (dev_ref_count) {
1877 delete dev_ref_count;
1878 dev_ref_count = nullptr;
1879 }
1880 return ret;
1881 }
1882
1883 int device_sync(void *ctx = nullptr) {
1884 return buf.device_sync(ctx);
1885 }
1886
1888 return buf.device != 0;
1889 }
1890
1891 /** Return the method by which the device field is managed. */
1893 if (dev_ref_count == nullptr) {
1895 }
1896 return dev_ref_count->ownership;
1897 }
1898 // @}
1899
1900 /** If you use the (x, y, c) indexing convention, then Halide
1901 * Buffers are stored planar by default. This function constructs
1902 * an interleaved RGB or RGBA image that can still be indexed
1903 * using (x, y, c). Passing it to a generator requires that the
1904 * generator has been compiled with support for interleaved (also
1905 * known as packed or chunky) memory layouts. */
1906 static Buffer<void, Dims, InClassDimStorage> make_interleaved(halide_type_t t, int width, int height, int channels) {
1907 static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1908 Buffer<void, Dims, InClassDimStorage> im(t, channels, width, height);
1909 // Note that this is equivalent to calling transpose({2, 0, 1}),
1910 // but slightly more efficient.
1911 im.transpose(0, 1);
1912 im.transpose(1, 2);
1913 return im;
1914 }
1915
1916 /** If you use the (x, y, c) indexing convention, then Halide
1917 * Buffers are stored planar by default. This function constructs
1918 * an interleaved RGB or RGBA image that can still be indexed
1919 * using (x, y, c). Passing it to a generator requires that the
1920 * generator has been compiled with support for interleaved (also
1921 * known as packed or chunky) memory layouts. */
1922 static Buffer<T, Dims, InClassDimStorage> make_interleaved(int width, int height, int channels) {
1923 return make_interleaved(static_halide_type(), width, height, channels);
1924 }
1925
1926 /** Wrap an existing interleaved image. */
1927 static Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage>
1928 make_interleaved(halide_type_t t, T *data, int width, int height, int channels) {
1929 static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1930 Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage> im(t, data, channels, width, height);
1931 im.transpose(0, 1);
1932 im.transpose(1, 2);
1933 return im;
1934 }
1935
1936 /** Wrap an existing interleaved image. */
1937 static Buffer<T, Dims, InClassDimStorage> make_interleaved(T *data, int width, int height, int channels) {
1938 return make_interleaved(static_halide_type(), data, width, height, channels);
1939 }
1940
1941 /** Make a zero-dimensional Buffer */
1943 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1944 Buffer<add_const_if_T_is_const<void>, AnyDims, InClassDimStorage> buf(t, 1);
1945 buf.slice(0, 0);
1946 return buf;
1947 }
1948
1949 /** Make a zero-dimensional Buffer */
1951 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1953 buf.slice(0, 0);
1954 return buf;
1955 }
1956
1957 /** Make a zero-dimensional Buffer that points to non-owned, existing data */
1959 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1961 buf.slice(0, 0);
1962 return buf;
1963 }
1964
1965 /** Make a buffer with the same shape and memory nesting order as
1966 * another buffer. It may have a different type. */
1967 template<typename T2, int D2, int S2>
1969 void *(*allocate_fn)(size_t) = nullptr,
1970 void (*deallocate_fn)(void *) = nullptr) {
1971 static_assert(Dims == D2 || Dims == AnyDims);
1972 const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1973 return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim,
1974 allocate_fn, deallocate_fn);
1975 }
1976
1977private:
1978 static Buffer<> make_with_shape_of_helper(halide_type_t dst_type,
1979 int dimensions,
1980 halide_dimension_t *shape,
1981 void *(*allocate_fn)(size_t),
1982 void (*deallocate_fn)(void *)) {
1983 // Reorder the dimensions of src to have strides in increasing order
1984 std::vector<int> swaps;
1985 for (int i = dimensions - 1; i > 0; i--) {
1986 for (int j = i; j > 0; j--) {
1987 if (shape[j - 1].stride > shape[j].stride) {
1988 std::swap(shape[j - 1], shape[j]);
1989 swaps.push_back(j);
1990 }
1991 }
1992 }
1993
1994 // Rewrite the strides to be dense (this messes up src, which
1995 // is why we took it by value).
1996 for (int i = 0; i < dimensions; i++) {
1997 if (i == 0) {
1998 shape[i].stride = 1;
1999 } else {
2000 shape[i].stride = shape[i - 1].extent * shape[i - 1].stride;
2001 }
2002 }
2003
2004 // Undo the dimension reordering
2005 while (!swaps.empty()) {
2006 int j = swaps.back();
2007 std::swap(shape[j - 1], shape[j]);
2008 swaps.pop_back();
2009 }
2010
2011 // Use an explicit runtime type, and make dst a Buffer<void>, to allow
2012 // using this method with Buffer<void> for either src or dst.
2013 Buffer<> dst(dst_type, nullptr, dimensions, shape);
2014 dst.allocate(allocate_fn, deallocate_fn);
2015
2016 return dst;
2017 }
2018
2019 template<typename... Args>
2021 ptrdiff_t
2022 offset_of(int d, int first, Args... rest) const {
2023#if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2024 assert(first >= this->buf.dim[d].min);
2025 assert(first < this->buf.dim[d].min + this->buf.dim[d].extent);
2026#endif
2027 return offset_of(d + 1, rest...) + (ptrdiff_t)this->buf.dim[d].stride * (first - this->buf.dim[d].min);
2028 }
2029
2031 ptrdiff_t offset_of(int d) const {
2032 return 0;
2033 }
2034
2035 template<typename... Args>
2037 storage_T *
2038 address_of(Args... args) const {
2039 if (T_is_void) {
2040 return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
2041 } else {
2042 return (storage_T *)(this->buf.host) + offset_of(0, args...);
2043 }
2044 }
2045
2047 ptrdiff_t offset_of(const int *pos) const {
2048 ptrdiff_t offset = 0;
2049 for (int i = this->dimensions() - 1; i >= 0; i--) {
2050#if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2051 assert(pos[i] >= this->buf.dim[i].min);
2052 assert(pos[i] < this->buf.dim[i].min + this->buf.dim[i].extent);
2053#endif
2054 offset += (ptrdiff_t)this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
2055 }
2056 return offset;
2057 }
2058
2060 storage_T *address_of(const int *pos) const {
2061 if (T_is_void) {
2062 return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
2063 } else {
2064 return (storage_T *)this->buf.host + offset_of(pos);
2065 }
2066 }
2067
2068public:
2069 /** Get a pointer to the address of the min coordinate. */
2070 T *data() const {
2071 return (T *)(this->buf.host);
2072 }
2073
2074 /** Access elements. Use im(...) to get a reference to an element,
2075 * and use &im(...) to get the address of an element. If you pass
2076 * fewer arguments than the buffer has dimensions, the rest are
2077 * treated as their min coordinate. The non-const versions set the
2078 * host_dirty flag to true.
2079 */
2080 //@{
2081 template<typename... Args,
2082 typename = typename std::enable_if<AllInts<Args...>::value>::type>
2083 HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const {
2084 static_assert(!T_is_void,
2085 "Cannot use operator() on Buffer<void> types");
2086 constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2087 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2088 assert(!device_dirty());
2089 return *((const not_void_T *)(address_of(first, rest...)));
2090 }
2091
2093 const not_void_T &
2094 operator()() const {
2095 static_assert(!T_is_void,
2096 "Cannot use operator() on Buffer<void> types");
2097 constexpr int expected_dims = 0;
2098 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2099 assert(!device_dirty());
2100 return *((const not_void_T *)(data()));
2101 }
2102
2104 const not_void_T &
2105 operator()(const int *pos) const {
2106 static_assert(!T_is_void,
2107 "Cannot use operator() on Buffer<void> types");
2108 assert(!device_dirty());
2109 return *((const not_void_T *)(address_of(pos)));
2110 }
2111
2112 template<typename... Args,
2113 typename = typename std::enable_if<AllInts<Args...>::value>::type>
2115 not_void_T &
2116 operator()(int first, Args... rest) {
2117 static_assert(!T_is_void,
2118 "Cannot use operator() on Buffer<void> types");
2119 constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2120 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2121 set_host_dirty();
2122 return *((not_void_T *)(address_of(first, rest...)));
2123 }
2124
2126 not_void_T &
2128 static_assert(!T_is_void,
2129 "Cannot use operator() on Buffer<void> types");
2130 constexpr int expected_dims = 0;
2131 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2132 set_host_dirty();
2133 return *((not_void_T *)(data()));
2134 }
2135
2137 not_void_T &
2138 operator()(const int *pos) {
2139 static_assert(!T_is_void,
2140 "Cannot use operator() on Buffer<void> types");
2141 set_host_dirty();
2142 return *((not_void_T *)(address_of(pos)));
2143 }
2144 // @}
2145
2146 /** Tests that all values in this buffer are equal to val. */
2147 bool all_equal(not_void_T val) const {
2148 bool all_equal = true;
2149 for_each_element([&](const int *pos) { all_equal &= (*this)(pos) == val; });
2150 return all_equal;
2151 }
2152
2154 set_host_dirty();
2155 for_each_value([=](T &v) { v = val; });
2156 return *this;
2157 }
2158
2159private:
2160 /** Helper functions for for_each_value. */
2161 // @{
2162 template<int N>
2163 struct for_each_value_task_dim {
2164 std::ptrdiff_t extent;
2165 std::ptrdiff_t stride[N];
2166 };
2167
2168 // Given an array of strides, and a bunch of pointers to pointers
2169 // (all of different types), advance the pointers using the
2170 // strides.
2171 template<typename Ptr, typename... Ptrs>
2172 HALIDE_ALWAYS_INLINE static void advance_ptrs(const std::ptrdiff_t *stride, Ptr &ptr, Ptrs &...ptrs) {
2173 ptr += *stride;
2174 advance_ptrs(stride + 1, ptrs...);
2175 }
2176
2178 static void advance_ptrs(const std::ptrdiff_t *) {
2179 }
2180
2181 template<typename Fn, typename Ptr, typename... Ptrs>
2182 HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f, int d, bool innermost_strides_are_one,
2183 const for_each_value_task_dim<sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
2184 if (d == 0) {
2185 if (innermost_strides_are_one) {
2186 Ptr end = ptr + t[0].extent;
2187 while (ptr != end) {
2188 f(*ptr++, (*ptrs++)...);
2189 }
2190 } else {
2191 for (std::ptrdiff_t i = t[0].extent; i != 0; i--) {
2192 f(*ptr, (*ptrs)...);
2193 advance_ptrs(t[0].stride, ptr, ptrs...);
2194 }
2195 }
2196 } else {
2197 for (std::ptrdiff_t i = t[d].extent; i != 0; i--) {
2198 for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
2199 advance_ptrs(t[d].stride, ptr, ptrs...);
2200 }
2201 }
2202 }
2203
2204 // Return pair is <new_dimensions, innermost_strides_are_one>
2205 template<int N>
2206 HALIDE_NEVER_INLINE static std::pair<int, bool> for_each_value_prep(for_each_value_task_dim<N> *t,
2207 const halide_buffer_t **buffers) {
2208 const int dimensions = buffers[0]->dimensions;
2209 assert(dimensions > 0);
2210
2211 // Check the buffers all have clean host allocations
2212 for (int i = 0; i < N; i++) {
2213 if (buffers[i]->device) {
2214 assert(buffers[i]->host &&
2215 "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
2216 assert(!buffers[i]->device_dirty() &&
2217 "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
2218 } else {
2219 assert(buffers[i]->host &&
2220 "Buffer passed to for_each_value has no host or device allocation");
2221 }
2222 }
2223
2224 // Extract the strides in all the dimensions
2225 for (int i = 0; i < dimensions; i++) {
2226 for (int j = 0; j < N; j++) {
2227 assert(buffers[j]->dimensions == dimensions);
2228 assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
2229 buffers[j]->dim[i].min == buffers[0]->dim[i].min);
2230 const int s = buffers[j]->dim[i].stride;
2231 t[i].stride[j] = s;
2232 }
2233 t[i].extent = buffers[0]->dim[i].extent;
2234
2235 // Order the dimensions by stride, so that the traversal is cache-coherent.
2236 // Use the last dimension for this, because this is the source in copies.
2237 // It appears to be better to optimize read order than write order.
2238 for (int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
2239 std::swap(t[j], t[j - 1]);
2240 }
2241 }
2242
2243 // flatten dimensions where possible to make a larger inner
2244 // loop for autovectorization.
2245 int d = dimensions;
2246 for (int i = 1; i < d; i++) {
2247 bool flat = true;
2248 for (int j = 0; j < N; j++) {
2249 flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2250 }
2251 if (flat) {
2252 t[i - 1].extent *= t[i].extent;
2253 for (int j = i; j < d - 1; j++) {
2254 t[j] = t[j + 1];
2255 }
2256 i--;
2257 d--;
2258 }
2259 }
2260
2261 // Note that we assert() that dimensions > 0 above
2262 // (our one-and-only caller will only call us that way)
2263 // so the unchecked access to t[0] should be safe.
2264 bool innermost_strides_are_one = true;
2265 for (int i = 0; i < N; i++) {
2266 innermost_strides_are_one &= (t[0].stride[i] == 1);
2267 }
2268
2269 return {d, innermost_strides_are_one};
2270 }
2271
2272 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2273 void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
2274 if (dimensions() > 0) {
2275 const size_t alloc_size = dimensions() * sizeof(for_each_value_task_dim<N>);
2276 Buffer<>::for_each_value_task_dim<N> *t =
2277 (Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA(alloc_size);
2278 // Move the preparatory code into a non-templated helper to
2279 // save code size.
2280 const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
2281 auto [new_dims, innermost_strides_are_one] = Buffer<>::for_each_value_prep(t, buffers);
2282 if (new_dims > 0) {
2283 Buffer<>::for_each_value_helper(f, new_dims - 1,
2284 innermost_strides_are_one,
2285 t,
2286 data(), (other_buffers.data())...);
2287 return;
2288 }
2289 // else fall thru
2290 }
2291
2292 // zero-dimensional case
2293 f(*data(), (*other_buffers.data())...);
2294 }
2295 // @}
2296
2297public:
2298 /** Call a function on every value in the buffer, and the
2299 * corresponding values in some number of other buffers of the
2300 * same size. The function should take a reference, const
2301 * reference, or value of the correct type for each buffer. This
2302 * effectively lifts a function of scalars to an element-wise
2303 * function of buffers. This produces code that the compiler can
2304 * autovectorize. This is slightly cheaper than for_each_element,
2305 * because it does not need to track the coordinates.
2306 *
2307 * Note that constness of Buffers is preserved: a const Buffer<T> (for either
2308 * 'this' or the other-buffers arguments) will allow mutation of the
2309 * buffer contents, while a Buffer<const T> will not. Attempting to specify
2310 * a mutable reference for the lambda argument of a Buffer<const T>
2311 * will result in a compilation error. */
2312 // @{
2313 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2314 HALIDE_ALWAYS_INLINE const Buffer<T, Dims, InClassDimStorage> &for_each_value(Fn &&f, Args &&...other_buffers) const {
2315 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2316 return *this;
2317 }
2318
2319 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2322 for_each_value(Fn &&f, Args &&...other_buffers) {
2323 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2324 return *this;
2325 }
2326 // @}
2327
2328private:
2329 // Helper functions for for_each_element
2330 struct for_each_element_task_dim {
2331 int min, max;
2332 };
2333
2334 /** If f is callable with this many args, call it. The first
2335 * argument is just to make the overloads distinct. Actual
2336 * overload selection is done using the enable_if. */
2337 template<typename Fn,
2338 typename... Args,
2339 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2340 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
2341 f(args...);
2342 }
2343
2344 /** If the above overload is impossible, we add an outer loop over
2345 * an additional argument and try again. */
2346 template<typename Fn,
2347 typename... Args>
2348 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
2349 for (int i = t[d].min; i <= t[d].max; i++) {
2350 for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2351 }
2352 }
2353
2354 /** Determine the minimum number of arguments a callable can take
2355 * using the same trick. */
2356 template<typename Fn,
2357 typename... Args,
2358 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2359 HALIDE_ALWAYS_INLINE static int num_args(int, Fn &&, Args...) {
2360 return (int)(sizeof...(Args));
2361 }
2362
2363 /** The recursive version is only enabled up to a recursion limit
2364 * of 256. This catches callables that aren't callable with any
2365 * number of ints. */
2366 template<typename Fn,
2367 typename... Args>
2368 HALIDE_ALWAYS_INLINE static int num_args(double, Fn &&f, Args... args) {
2369 static_assert(sizeof...(args) <= 256,
2370 "Callable passed to for_each_element must accept either a const int *,"
2371 " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2372 return num_args(0, std::forward<Fn>(f), 0, args...);
2373 }
2374
2375 /** A version where the callable takes a position array instead,
2376 * with compile-time recursion on the dimensionality. This
2377 * overload is preferred to the one below using the same int vs
2378 * double trick as above, but is impossible once d hits -1 using
2379 * std::enable_if. */
2380 template<int d,
2381 typename Fn,
2382 typename = typename std::enable_if<(d >= 0)>::type>
2383 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2384 for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2385 for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2386 }
2387 }
2388
2389 /** Base case for recursion above. */
2390 template<int d,
2391 typename Fn,
2392 typename = typename std::enable_if<(d < 0)>::type>
2393 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2394 f(pos);
2395 }
2396
2397 /** A run-time-recursive version (instead of
2398 * compile-time-recursive) that requires the callable to take a
2399 * pointer to a position array instead. Dispatches to the
2400 * compile-time-recursive version once the dimensionality gets
2401 * small. */
2402 template<typename Fn>
2403 static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2404 if (d == -1) {
2405 f(pos);
2406 } else if (d == 0) {
2407 // Once the dimensionality gets small enough, dispatch to
2408 // a compile-time-recursive version for better codegen of
2409 // the inner loops.
2410 for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2411 } else if (d == 1) {
2412 for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2413 } else if (d == 2) {
2414 for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2415 } else if (d == 3) {
2416 for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2417 } else {
2418 for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2419 for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2420 }
2421 }
2422 }
2423
2424 /** We now have two overloads for for_each_element. This one
2425 * triggers if the callable takes a const int *.
2426 */
2427 template<typename Fn,
2428 typename = decltype(std::declval<Fn>()((const int *)nullptr))>
2429 static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
2430 const int size = dims * sizeof(int);
2431 int *pos = (int *)HALIDE_ALLOCA(size);
2432 // At least one version of GCC will (incorrectly) report that pos "may be used uninitialized".
2433 // Add this memset to silence it.
2434 memset(pos, 0, size);
2435 for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2436 }
2437
2438 /** This one triggers otherwise. It treats the callable as
2439 * something that takes some number of ints. */
2440 template<typename Fn>
2441 HALIDE_ALWAYS_INLINE static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
2442 int args = num_args(0, std::forward<Fn>(f));
2443 assert(dims >= args);
2444 for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2445 }
2446
2447 template<typename Fn>
2448 void for_each_element_impl(Fn &&f) const {
2449 for_each_element_task_dim *t =
2450 (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
2451 for (int i = 0; i < dimensions(); i++) {
2452 t[i].min = dim(i).min();
2453 t[i].max = dim(i).max();
2454 }
2455 for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2456 }
2457
2458public:
2459 /** Call a function at each site in a buffer. This is likely to be
2460 * much slower than using Halide code to populate a buffer, but is
2461 * convenient for tests. If the function has more arguments than the
2462 * buffer has dimensions, the remaining arguments will be zero. If it
2463 * has fewer arguments than the buffer has dimensions then the last
2464 * few dimensions of the buffer are not iterated over. For example,
2465 * the following code exploits this to set a floating point RGB image
2466 * to red:
2467
2468 \code
2469 Buffer<float, 3> im(100, 100, 3);
2470 im.for_each_element([&](int x, int y) {
2471 im(x, y, 0) = 1.0f;
2472 im(x, y, 1) = 0.0f;
2473 im(x, y, 2) = 0.0f:
2474 });
2475 \endcode
2476
2477 * The compiled code is equivalent to writing the a nested for loop,
2478 * and compilers are capable of optimizing it in the same way.
2479 *
2480 * If the callable can be called with an int * as the sole argument,
2481 * that version is called instead. Each location in the buffer is
2482 * passed to it in a coordinate array. This version is higher-overhead
2483 * than the variadic version, but is useful for writing generic code
2484 * that accepts buffers of arbitrary dimensionality. For example, the
2485 * following sets the value at all sites in an arbitrary-dimensional
2486 * buffer to their first coordinate:
2487
2488 \code
2489 im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
2490 \endcode
2491
2492 * It is also possible to use for_each_element to iterate over entire
2493 * rows or columns by cropping the buffer to a single column or row
2494 * respectively and iterating over elements of the result. For example,
2495 * to set the diagonal of the image to 1 by iterating over the columns:
2496
2497 \code
2498 Buffer<float, 3> im(100, 100, 3);
2499 im.sliced(1, 0).for_each_element([&](int x, int c) {
2500 im(x, x, c) = 1.0f;
2501 });
2502 \endcode
2503
2504 * Or, assuming the memory layout is known to be dense per row, one can
2505 * memset each row of an image like so:
2506
2507 \code
2508 Buffer<float, 3> im(100, 100, 3);
2509 im.sliced(0, 0).for_each_element([&](int y, int c) {
2510 memset(&im(0, y, c), 0, sizeof(float) * im.width());
2511 });
2512 \endcode
2513
2514 */
2515 // @{
2516 template<typename Fn>
2518 for_each_element_impl(f);
2519 return *this;
2520 }
2521
2522 template<typename Fn>
2526 for_each_element_impl(f);
2527 return *this;
2528 }
2529 // @}
2530
2531private:
2532 template<typename Fn>
2533 struct FillHelper {
2534 Fn f;
2536
2537 template<typename... Args,
2538 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2539 void operator()(Args... args) {
2540 (*buf)(args...) = f(args...);
2541 }
2542
2543 FillHelper(Fn &&f, Buffer<T, Dims, InClassDimStorage> *buf)
2544 : f(std::forward<Fn>(f)), buf(buf) {
2545 }
2546 };
2547
2548public:
2549 /** Fill a buffer by evaluating a callable at every site. The
2550 * callable should look much like a callable passed to
2551 * for_each_element, but it should return the value that should be
2552 * stored to the coordinate corresponding to the arguments. */
2553 template<typename Fn,
2554 typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2556 // We'll go via for_each_element. We need a variadic wrapper lambda.
2557 FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
2558 return for_each_element(wrapper);
2559 }
2560
2561 /** Check if an input buffer passed extern stage is a querying
2562 * bounds. Compared to doing the host pointer check directly,
2563 * this both adds clarity to code and will facilitate moving to
2564 * another representation for bounds query arguments. */
2565 bool is_bounds_query() const {
2566 return buf.is_bounds_query();
2567 }
2568
2569 /** Convenient check to verify that all of the interesting bytes in the Buffer
2570 * are initialized under MSAN. Note that by default, we use for_each_value() here so that
2571 * we skip any unused padding that isn't part of the Buffer; this isn't efficient,
2572 * but in MSAN mode, it doesn't matter. (Pass true for the flag to force check
2573 * the entire Buffer storage.) */
2574 void msan_check_mem_is_initialized(bool entire = false) const {
2575#if defined(__has_feature)
2576#if __has_feature(memory_sanitizer)
2577 if (entire) {
2578 __msan_check_mem_is_initialized(data(), size_in_bytes());
2579 } else {
2580 for_each_value([](T &v) { __msan_check_mem_is_initialized(&v, sizeof(T)); ; });
2581 }
2582#endif
2583#endif
2584 }
2585};
2586
2587} // namespace Runtime
2588} // namespace Halide
2589
2590#undef HALIDE_ALLOCA
2591
2592#endif // HALIDE_RUNTIME_IMAGE_H
#define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
#define HALIDE_ALLOCA
This file declares the routines used by Halide internally in its runtime.
#define HALIDE_NEVER_INLINE
@ halide_error_code_success
There was no error.
#define HALIDE_ALWAYS_INLINE
Read-only access to the shape.
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Dimension(const halide_dimension_t &dim)
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one.
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T2, D2, S2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
Buffer< T, Dims, InClassDimStorage > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Buffer< not_const_T, Dims, InClassDimStorage > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims+1)> embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space.
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
int width() const
Conventional names for the first three dimensions.
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f)
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > as() &&
Return an rval reference to this Buffer.
int copy_to_host(void *ctx=nullptr)
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_free(void *ctx=nullptr)
int extent(int i) const
bool contains(Args... args) const
void crop(const std::vector< std::pair< int, int > > &rect)
Crop an image in-place along the first N dimensions.
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() const &
void set_device_dirty(bool v=true)
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Buffer(Buffer< T2, D2, S2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > as_const() &&
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Standard move-assignment operator.
int device_detach_native(void *ctx=nullptr)
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Buffer(int first, int second, Args... rest)
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
Buffer< T, Dims, InClassDimStorage > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers)
HALIDE_ALWAYS_INLINE not_void_T & operator()()
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
void check_overflow()
Check the product of the extents fits in memory.
static bool can_convert_from(const Buffer< T2, D2, S2 > &other)
Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_sync(void *ctx=nullptr)
static Buffer< void, Dims, InClassDimStorage > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
static constexpr halide_type_t static_halide_type()
Get the Halide type of T.
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
const halide_buffer_t * raw_buffer() const
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
static Buffer< T, Dims, InClassDimStorage > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
halide_type_t type() const
Get the type of the elements.
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(int first)
Allocate a new image of the given size.
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
T * end() const
A pointer to one beyond the element with the highest address.
HALIDE_ALWAYS_INLINE bool device_dirty() const
Buffer< T, Dims, InClassDimStorage > cropped(const std::vector< std::pair< int, int > > &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
static constexpr int static_dimensions()
Callers should not use the result if has_static_dimensions is false.
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
bool has_device_allocation() const
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
int dimensions() const
Get the dimensionality of the buffer.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
int min(int i) const
Access to the mins, strides, extents.
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
static Buffer< T, Dims, InClassDimStorage > make_scalar()
Make a zero-dimensional Buffer.
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
Buffer(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Move constructor.
Buffer< T, Dims, InClassDimStorage > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension.
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
Buffer< T, Dims, InClassDimStorage > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
static Buffer< T, Dims, InClassDimStorage > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
Buffer< T, Dims, InClassDimStorage > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
void set_min(Args... args)
size_t number_of_elements() const
The total number of elements this buffer represents.
static void assert_can_convert_from(const Buffer< T2, D2, S2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage> cannot be const...
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.
Buffer(const Buffer< T, Dims, InClassDimStorage > &other)
Copy constructor.
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
T * data() const
Get a pointer to the address of the min coordinate.
Buffer< T, Dims, InClassDimStorage > & fill(not_void_T val)
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T2, D2, S2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum.
int left() const
Conventional names for the min and max value of each dimension.
void copy_from(Buffer< T2, D2, S2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
Buffer< T, Dims, InClassDimStorage > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension.
int stride(int i) const
static Buffer< T, Dims, InClassDimStorage > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
static Buffer< T, Dims, InClassDimStorage > make_with_shape_of(Buffer< T2, D2, S2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
Buffer(const Buffer< T2, D2, S2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T, Dims, InClassDimStorage > &other)
Standard assignment operator.
T * begin() const
A pointer to the element with the lowest address.
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > & as() &
Return a typed reference to this Buffer.
HALIDE_ALWAYS_INLINE const Buffer< T2, D2, InClassDimStorage > & as() const &
Return a const typed reference to this Buffer.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
bool any_zero(const Container &c)
constexpr int AnyDims
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
@ AllocatedDeviceAndHost
‍No free routine will be called when device ref count goes to zero
@ WrappedNative
‍halide_device_free will be called when device ref count goes to zero
@ Unmanaged
‍halide_device_detach_native will be called when device ref count goes to zero
@ Cropped
‍Call device_and_host_free when DevRefCount goes to zero.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition Func.h:603
Expr max(const FuncRef &a, const FuncRef &b)
Definition Func.h:606
unsigned __INT64_TYPE__ uint64_t
__UINTPTR_TYPE__ uintptr_t
void * malloc(size_t)
ALWAYS_INLINE T align_up(T p, size_t alignment)
unsigned __INT8_TYPE__ uint8_t
__PTRDIFF_TYPE__ ptrdiff_t
unsigned __INT16_TYPE__ uint16_t
void * memcpy(void *s1, const void *s2, size_t n)
void * memset(void *s, int val, size_t n)
unsigned __INT32_TYPE__ uint32_t
void free(void *)
A struct acting as a header for allocations owned by the Buffer class itself.
AllocationHeader(void(*deallocate_fn)(void *))
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
bool operator!=(const iterator &other) const
A similar struct for managing device allocations.
BufferDeviceOwnership ownership
The raw representation of an image passed around by generated Halide code.
int32_t dimensions
The dimensionality of the buffer.
halide_dimension_t * dim
The shape of the buffer.
uint64_t device
A device-handle for e.g.
uint8_t * host
A pointer to the start of the data in main memory.
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
A runtime tag for a type in the halide type system.