Halide  17.0.2
Halide compiler and libraries
HalideBuffer.h
Go to the documentation of this file.
1 /** \file
2  * Defines a Buffer type that wraps from halide_buffer_t and adds
3  * functionality, and methods for more conveniently iterating over the
4  * samples in a halide_buffer_t outside of Halide code. */
5 
6 #ifndef HALIDE_RUNTIME_BUFFER_H
7 #define HALIDE_RUNTIME_BUFFER_H
8 
9 #include <algorithm>
10 #include <atomic>
11 #include <cassert>
12 #include <cstdint>
13 #include <cstdlib>
14 #include <cstring>
15 #include <limits>
16 #include <memory>
17 #include <vector>
18 
19 #ifdef __APPLE__
20 #include <AvailabilityVersions.h>
21 #include <TargetConditionals.h>
22 #endif
23 
24 #if defined(__has_feature)
25 #if __has_feature(memory_sanitizer)
26 #include <sanitizer/msan_interface.h>
27 #endif
28 #endif
29 
30 #include "HalideRuntime.h"
31 
32 #ifdef _MSC_VER
33 #include <malloc.h>
34 #define HALIDE_ALLOCA _alloca
35 #else
36 #define HALIDE_ALLOCA __builtin_alloca
37 #endif
38 
39 // gcc 5.1 has a false positive warning on this code
40 #if __GNUC__ == 5 && __GNUC_MINOR__ == 1
41 #pragma GCC diagnostic ignored "-Warray-bounds"
42 #endif
43 
44 #ifndef HALIDE_RUNTIME_BUFFER_CHECK_INDICES
45 #define HALIDE_RUNTIME_BUFFER_CHECK_INDICES 0
46 #endif
47 
48 #ifndef HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
49 // Conservatively align buffer allocations to 128 bytes by default.
50 // This is enough alignment for all the platforms currently in use.
51 // Redefine this in your compiler settings if you desire more/less alignment.
52 #define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT 128
53 #endif
54 
56  "HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT must be a power of 2.");
57 
58 // Unfortunately, not all C++17 runtimes support aligned_alloc
59 // (it may depends on OS/SDK version); this is provided as an opt-out
60 // if you are compiling on a platform that doesn't provide a (good)
61 // implementation. (Note that we actually use the C11 `::aligned_alloc()`
62 // rather than the C++17 `std::aligned_alloc()` because at least one platform
63 // we found supports the former but not the latter.)
64 #ifndef HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
65 
66 // clang-format off
67 #ifdef _MSC_VER
68 
69  // MSVC doesn't implement aligned_alloc(), even in C++17 mode, and
70  // has stated they probably never will, so, always default it off here.
71  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
72 
73 #elif defined(__ANDROID_API__) && __ANDROID_API__ < 28
74 
75  // Android doesn't provide aligned_alloc until API 28
76  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
77 
78 #elif defined(__APPLE__)
79 
80  #if TARGET_OS_OSX && (__MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_15)
81 
82  // macOS doesn't provide aligned_alloc until 10.15
83  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
84 
85  #elif TARGET_OS_IPHONE && (__IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_14_0)
86 
87  // iOS doesn't provide aligned_alloc until 14.0
88  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
89 
90  #else
91 
92  // Assume it's ok on all other Apple targets
93  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
94 
95  #endif
96 
97 #else
98 
99  #if defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)
100 
101  // ARM GNU-A baremetal compiler doesn't provide aligned_alloc as of 12.2
102  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
103 
104  #else
105 
106  // Not Windows, Android, or Apple: just assume it's ok
107  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
108 
109  #endif
110 
111 #endif
112 // clang-format on
113 
114 #endif // HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
115 
116 namespace Halide {
117 namespace Runtime {
118 
119 // Forward-declare our Buffer class
120 template<typename T, int Dims, int InClassDimStorage>
121 class Buffer;
122 
123 // A helper to check if a parameter pack is entirely implicitly
124 // int-convertible to use with std::enable_if
125 template<typename... Args>
126 struct AllInts : std::false_type {};
127 
128 template<>
129 struct AllInts<> : std::true_type {};
130 
131 template<typename T, typename... Args>
132 struct AllInts<T, Args...> {
133  static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
134 };
135 
136 // Floats and doubles are technically implicitly int-convertible, but
137 // doing so produces a warning we treat as an error, so just disallow
138 // it here.
139 template<typename... Args>
140 struct AllInts<float, Args...> : std::false_type {};
141 
142 template<typename... Args>
143 struct AllInts<double, Args...> : std::false_type {};
144 
145 // A helper to detect if there are any zeros in a container
146 namespace Internal {
147 template<typename Container>
148 bool any_zero(const Container &c) {
149  for (int i : c) {
150  if (i == 0) {
151  return true;
152  }
153  }
154  return false;
155 }
156 } // namespace Internal
157 
158 /** A struct acting as a header for allocations owned by the Buffer
159  * class itself. */
161  void (*deallocate_fn)(void *);
162  std::atomic<int> ref_count;
163 
164  // Note that ref_count always starts at 1
165  explicit AllocationHeader(void (*deallocate_fn)(void *))
167  }
168 };
169 
170 /** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
171 enum struct BufferDeviceOwnership : int {
172  Allocated, ///> halide_device_free will be called when device ref count goes to zero
173  WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
174  Unmanaged, ///> No free routine will be called when device ref count goes to zero
175  AllocatedDeviceAndHost, ///> Call device_and_host_free when DevRefCount goes to zero.
176  Cropped, ///> Call halide_device_release_crop when DevRefCount goes to zero.
177 };
178 
179 /** A similar struct for managing device allocations. */
181  // This is only ever constructed when there's something to manage,
182  // so start at one.
183  std::atomic<int> count{1};
185 };
186 
187 constexpr int AnyDims = -1;
188 
189 /** A templated Buffer class that wraps halide_buffer_t and adds
190  * functionality. When using Halide from C++, this is the preferred
191  * way to create input and output buffers. The overhead of using this
192  * class relative to a naked halide_buffer_t is minimal - it uses another
193  * ~16 bytes on the stack, and does no dynamic allocations when using
194  * it to represent existing memory of a known maximum dimensionality.
195  *
196  * The template parameter T is the element type. For buffers where the
197  * element type is unknown, or may vary, use void or const void.
198  *
199  * The template parameter Dims is the number of dimensions. For buffers where
200  * the dimensionality type is unknown at, or may vary, use AnyDims.
201  *
202  * InClassDimStorage is the maximum number of dimensions that can be represented
203  * using space inside the class itself. Set it to the maximum dimensionality
204  * you expect this buffer to be. If the actual dimensionality exceeds
205  * this, heap storage is allocated to track the shape of the buffer.
206  * InClassDimStorage defaults to 4, which should cover nearly all usage.
207  *
208  * The class optionally allocates and owns memory for the image using
209  * a shared pointer allocated with the provided allocator. If they are
210  * null, malloc and free are used. Any device-side allocation is
211  * considered as owned if and only if the host-side allocation is
212  * owned. */
213 template<typename T = void,
214  int Dims = AnyDims,
215  int InClassDimStorage = (Dims == AnyDims ? 4 : std::max(Dims, 1))>
216 class Buffer {
217  /** The underlying halide_buffer_t */
218  halide_buffer_t buf = {};
219 
220  /** Some in-class storage for shape of the dimensions. */
221  halide_dimension_t shape[InClassDimStorage];
222 
223  /** The allocation owned by this Buffer. NULL if the Buffer does not
224  * own the memory. */
225  AllocationHeader *alloc = nullptr;
226 
227  /** A reference count for the device allocation owned by this
228  * buffer. */
229  mutable DeviceRefCount *dev_ref_count = nullptr;
230 
231  /** True if T is of type void or const void */
232  static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
233 
234  /** A type function that adds a const qualifier if T is a const type. */
235  template<typename T2>
236  using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
237 
238  /** T unless T is (const) void, in which case (const)
239  * uint8_t. Useful for providing return types for operator() */
240  using not_void_T = typename std::conditional<T_is_void,
241  add_const_if_T_is_const<uint8_t>,
242  T>::type;
243 
244  /** T with constness removed. Useful for return type of copy(). */
245  using not_const_T = typename std::remove_const<T>::type;
246 
247  /** The type the elements are stored as. Equal to not_void_T
248  * unless T is a pointer, in which case uint64_t. Halide stores
249  * all pointer types as uint64s internally, even on 32-bit
250  * systems. */
251  using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
252 
253 public:
254  /** True if the Halide type is not void (or const void). */
255  static constexpr bool has_static_halide_type = !T_is_void;
256 
257  /** Get the Halide type of T. Callers should not use the result if
258  * has_static_halide_type is false. */
259  static constexpr halide_type_t static_halide_type() {
260  return halide_type_of<typename std::remove_cv<not_void_T>::type>();
261  }
262 
263  /** Does this Buffer own the host memory it refers to? */
264  bool owns_host_memory() const {
265  return alloc != nullptr;
266  }
267 
268  static constexpr bool has_static_dimensions = (Dims != AnyDims);
269 
270  /** Callers should not use the result if
271  * has_static_dimensions is false. */
272  static constexpr int static_dimensions() {
273  return Dims;
274  }
275 
276  static_assert(!has_static_dimensions || static_dimensions() >= 0);
277 
278 private:
279  /** Increment the reference count of any owned allocation */
280  void incref() const {
281  if (owns_host_memory()) {
282  alloc->ref_count++;
283  }
284  if (buf.device) {
285  if (!dev_ref_count) {
286  // I seem to have a non-zero dev field but no
287  // reference count for it. I must have been given a
288  // device allocation by a Halide pipeline, and have
289  // never been copied from since. Take sole ownership
290  // of it.
291  dev_ref_count = new DeviceRefCount;
292  }
293  dev_ref_count->count++;
294  }
295  }
296 
297  // Note that this is called "cropped" but can also encompass a slice/embed
298  // operation as well.
299  struct DevRefCountCropped : DeviceRefCount {
300  Buffer<T, Dims, InClassDimStorage> cropped_from;
301  explicit DevRefCountCropped(const Buffer<T, Dims, InClassDimStorage> &cropped_from)
302  : cropped_from(cropped_from) {
303  ownership = BufferDeviceOwnership::Cropped;
304  }
305  };
306 
307  /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */
308  void crop_from(const Buffer<T, Dims, InClassDimStorage> &cropped_from) {
309  assert(dev_ref_count == nullptr);
310  dev_ref_count = new DevRefCountCropped(cropped_from);
311  }
312 
313  /** Decrement the reference count of any owned allocation and free host
314  * and device memory if it hits zero. Sets alloc to nullptr. */
315  void decref(bool device_only = false) {
316  if (owns_host_memory() && !device_only) {
317  int new_count = --(alloc->ref_count);
318  if (new_count == 0) {
319  void (*fn)(void *) = alloc->deallocate_fn;
320  alloc->~AllocationHeader();
321  fn(alloc);
322  }
323  buf.host = nullptr;
324  alloc = nullptr;
325  set_host_dirty(false);
326  }
327  int new_count = 0;
328  if (dev_ref_count) {
329  new_count = --(dev_ref_count->count);
330  }
331  if (new_count == 0) {
332  if (buf.device) {
333  assert(!(alloc && device_dirty()) &&
334  "Implicitly freeing a dirty device allocation while a host allocation still lives. "
335  "Call device_free explicitly if you want to drop dirty device-side data. "
336  "Call copy_to_host explicitly if you want the data copied to the host allocation "
337  "before the device allocation is freed.");
338  int result = halide_error_code_success;
339  if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
340  result = buf.device_interface->detach_native(nullptr, &buf);
341  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
342  result = buf.device_interface->device_and_host_free(nullptr, &buf);
343  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
344  result = buf.device_interface->device_release_crop(nullptr, &buf);
345  } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
346  result = buf.device_interface->device_free(nullptr, &buf);
347  }
348  // No reasonable way to return the error, but we can at least assert-fail in debug builds.
349  assert((result == halide_error_code_success) && "device_interface call returned a nonzero result in Buffer::decref()");
350  (void)result;
351  }
352  if (dev_ref_count) {
353  if (dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
354  delete (DevRefCountCropped *)dev_ref_count;
355  } else {
356  delete dev_ref_count;
357  }
358  }
359  }
360  dev_ref_count = nullptr;
361  buf.device = 0;
362  buf.device_interface = nullptr;
363  }
364 
365  void free_shape_storage() {
366  if (buf.dim != shape) {
367  delete[] buf.dim;
368  buf.dim = nullptr;
369  }
370  }
371 
372  template<int DimsSpecified>
373  void make_static_shape_storage() {
374  static_assert(Dims == AnyDims || Dims == DimsSpecified,
375  "Number of arguments to Buffer() does not match static dimensionality");
376  buf.dimensions = DimsSpecified;
377  if constexpr (Dims == AnyDims) {
378  if constexpr (DimsSpecified <= InClassDimStorage) {
379  buf.dim = shape;
380  } else {
381  static_assert(DimsSpecified >= 1);
382  buf.dim = new halide_dimension_t[DimsSpecified];
383  }
384  } else {
385  static_assert(InClassDimStorage >= Dims);
386  buf.dim = shape;
387  }
388  }
389 
390  void make_shape_storage(const int dimensions) {
391  if (Dims != AnyDims && Dims != dimensions) {
392  assert(false && "Number of arguments to Buffer() does not match static dimensionality");
393  }
394  // This should usually be inlined, so if dimensions is statically known,
395  // we can skip the call to new
396  buf.dimensions = dimensions;
397  buf.dim = (dimensions <= InClassDimStorage) ? shape : new halide_dimension_t[dimensions];
398  }
399 
400  void copy_shape_from(const halide_buffer_t &other) {
401  // All callers of this ensure that buf.dimensions == other.dimensions.
402  make_shape_storage(other.dimensions);
403  std::copy(other.dim, other.dim + other.dimensions, buf.dim);
404  }
405 
406  template<typename T2, int D2, int S2>
407  void move_shape_from(Buffer<T2, D2, S2> &&other) {
408  if (other.shape == other.buf.dim) {
409  copy_shape_from(other.buf);
410  } else {
411  buf.dim = other.buf.dim;
412  other.buf.dim = nullptr;
413  }
414  }
415 
416  /** Initialize the shape from a halide_buffer_t. */
417  void initialize_from_buffer(const halide_buffer_t &b,
418  BufferDeviceOwnership ownership) {
419  memcpy(&buf, &b, sizeof(halide_buffer_t));
420  copy_shape_from(b);
421  if (b.device) {
422  dev_ref_count = new DeviceRefCount;
423  dev_ref_count->ownership = ownership;
424  }
425  }
426 
427  /** Initialize the shape from an array of ints */
428  void initialize_shape(const int *sizes) {
429  for (int i = 0; i < buf.dimensions; i++) {
430  buf.dim[i].min = 0;
431  buf.dim[i].extent = sizes[i];
432  if (i == 0) {
433  buf.dim[i].stride = 1;
434  } else {
435  buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
436  }
437  }
438  }
439 
440  /** Initialize the shape from a vector of extents */
441  void initialize_shape(const std::vector<int> &sizes) {
442  assert(buf.dimensions == (int)sizes.size());
443  initialize_shape(sizes.data());
444  }
445 
446  /** Initialize the shape from the static shape of an array */
447  template<typename Array, size_t N>
448  void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
449  buf.dim[next].min = 0;
450  buf.dim[next].extent = (int)N;
451  if (next == 0) {
452  buf.dim[next].stride = 1;
453  } else {
454  initialize_shape_from_array_shape(next - 1, vals[0]);
455  buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
456  }
457  }
458 
459  /** Base case for the template recursion above. */
460  template<typename T2>
461  void initialize_shape_from_array_shape(int, const T2 &) {
462  }
463 
464  /** Get the dimensionality of a multi-dimensional C array */
465  template<typename Array, size_t N>
466  static int dimensionality_of_array(Array (&vals)[N]) {
467  return dimensionality_of_array(vals[0]) + 1;
468  }
469 
470  template<typename T2>
471  static int dimensionality_of_array(const T2 &) {
472  return 0;
473  }
474 
475  /** Get the underlying halide_type_t of an array's element type. */
476  template<typename Array, size_t N>
477  static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
478  return scalar_type_of_array(vals[0]);
479  }
480 
481  template<typename T2>
482  static halide_type_t scalar_type_of_array(const T2 &) {
483  return halide_type_of<typename std::remove_cv<T2>::type>();
484  }
485 
486  /** Crop a single dimension without handling device allocation. */
487  void crop_host(int d, int min, int extent) {
488  assert(dim(d).min() <= min);
489  assert(dim(d).max() >= min + extent - 1);
490  ptrdiff_t shift = min - dim(d).min();
491  if (buf.host != nullptr) {
492  buf.host += (shift * dim(d).stride()) * type().bytes();
493  }
494  buf.dim[d].min = min;
495  buf.dim[d].extent = extent;
496  }
497 
498  /** Crop as many dimensions as are in rect, without handling device allocation. */
499  void crop_host(const std::vector<std::pair<int, int>> &rect) {
500  assert(rect.size() <= static_cast<decltype(rect.size())>(std::numeric_limits<int>::max()));
501  int limit = (int)rect.size();
502  assert(limit <= dimensions());
503  for (int i = 0; i < limit; i++) {
504  crop_host(i, rect[i].first, rect[i].second);
505  }
506  }
507 
508  void complete_device_crop(Buffer<T, Dims, InClassDimStorage> &result_host_cropped) const {
509  assert(buf.device_interface != nullptr);
510  if (buf.device_interface->device_crop(nullptr, &this->buf, &result_host_cropped.buf) == halide_error_code_success) {
511  const Buffer<T, Dims, InClassDimStorage> *cropped_from = this;
512  // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
513  // is it possible to get to this point without incref having run at least once since
514  // the device field was set? (I.e. in the internal logic of crop. incref might have been
515  // called.)
516  if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
517  cropped_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
518  }
519  result_host_cropped.crop_from(*cropped_from);
520  }
521  }
522 
523  /** slice a single dimension without handling device allocation. */
524  void slice_host(int d, int pos) {
525  static_assert(Dims == AnyDims);
526  assert(dimensions() > 0);
527  assert(d >= 0 && d < dimensions());
528  assert(pos >= dim(d).min() && pos <= dim(d).max());
529  buf.dimensions--;
530  ptrdiff_t shift = pos - buf.dim[d].min;
531  if (buf.host != nullptr) {
532  buf.host += (shift * buf.dim[d].stride) * type().bytes();
533  }
534  for (int i = d; i < buf.dimensions; i++) {
535  buf.dim[i] = buf.dim[i + 1];
536  }
537  buf.dim[buf.dimensions] = {0, 0, 0};
538  }
539 
540  void complete_device_slice(Buffer<T, AnyDims, InClassDimStorage> &result_host_sliced, int d, int pos) const {
541  assert(buf.device_interface != nullptr);
542  if (buf.device_interface->device_slice(nullptr, &this->buf, d, pos, &result_host_sliced.buf) == halide_error_code_success) {
543  const Buffer<T, Dims, InClassDimStorage> *sliced_from = this;
544  // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
545  // is it possible to get to this point without incref having run at least once since
546  // the device field was set? (I.e. in the internal logic of slice. incref might have been
547  // called.)
548  if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
549  sliced_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
550  }
551  // crop_from() is correct here, despite the fact that we are slicing.
552  result_host_sliced.crop_from(*sliced_from);
553  }
554  }
555 
556 public:
557  typedef T ElemType;
558 
559  /** Read-only access to the shape */
560  class Dimension {
561  const halide_dimension_t &d;
562 
563  public:
564  /** The lowest coordinate in this dimension */
565  HALIDE_ALWAYS_INLINE int min() const {
566  return d.min;
567  }
568 
569  /** The number of elements in memory you have to step over to
570  * increment this coordinate by one. */
572  return d.stride;
573  }
574 
575  /** The extent of the image along this dimension */
577  return d.extent;
578  }
579 
580  /** The highest coordinate in this dimension */
581  HALIDE_ALWAYS_INLINE int max() const {
582  return min() + extent() - 1;
583  }
584 
585  /** An iterator class, so that you can iterate over
586  * coordinates in a dimensions using a range-based for loop. */
587  struct iterator {
588  int val;
589  int operator*() const {
590  return val;
591  }
592  bool operator!=(const iterator &other) const {
593  return val != other.val;
594  }
596  val++;
597  return *this;
598  }
599  };
600 
601  /** An iterator that points to the min coordinate */
603  return {min()};
604  }
605 
606  /** An iterator that points to one past the max coordinate */
608  return {min() + extent()};
609  }
610 
611  explicit Dimension(const halide_dimension_t &dim)
612  : d(dim) {
613  }
614  };
615 
616  /** Access the shape of the buffer */
617  HALIDE_ALWAYS_INLINE Dimension dim(int i) const {
618  assert(i >= 0 && i < this->dimensions());
619  return Dimension(buf.dim[i]);
620  }
621 
622  /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
623  // @{
624  int min(int i) const {
625  return dim(i).min();
626  }
627  int extent(int i) const {
628  return dim(i).extent();
629  }
630  int stride(int i) const {
631  return dim(i).stride();
632  }
633  // @}
634 
635  /** The total number of elements this buffer represents. Equal to
636  * the product of the extents */
637  size_t number_of_elements() const {
638  return buf.number_of_elements();
639  }
640 
641  /** Get the dimensionality of the buffer. */
642  int dimensions() const {
643  if constexpr (has_static_dimensions) {
644  return Dims;
645  } else {
646  return buf.dimensions;
647  }
648  }
649 
650  /** Get the type of the elements. */
651  halide_type_t type() const {
652  return buf.type;
653  }
654 
655  /** A pointer to the element with the lowest address. If all
656  * strides are positive, equal to the host pointer. */
657  T *begin() const {
658  assert(buf.host != nullptr); // Cannot call begin() on an unallocated Buffer.
659  return (T *)buf.begin();
660  }
661 
662  /** A pointer to one beyond the element with the highest address. */
663  T *end() const {
664  assert(buf.host != nullptr); // Cannot call end() on an unallocated Buffer.
665  return (T *)buf.end();
666  }
667 
668  /** The total number of bytes spanned by the data in memory. */
669  size_t size_in_bytes() const {
670  return buf.size_in_bytes();
671  }
672 
673  /** Reset the Buffer to be equivalent to a default-constructed Buffer
674  * of the same static type (if any); Buffer<void> will have its runtime
675  * type reset to uint8. */
676  void reset() {
677  *this = Buffer();
678  }
679 
681  : shape() {
682  buf.type = static_halide_type();
683  // If Dims are statically known, must create storage that many.
684  // otherwise, make a zero-dimensional buffer.
685  constexpr int buf_dimensions = (Dims == AnyDims) ? 0 : Dims;
686  make_static_shape_storage<buf_dimensions>();
687  }
688 
689  /** Make a Buffer from a halide_buffer_t */
690  explicit Buffer(const halide_buffer_t &buf,
692  assert(T_is_void || buf.type == static_halide_type());
693  initialize_from_buffer(buf, ownership);
694  }
695 
696  /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
697  template<typename T2, int D2, int S2>
698  friend class Buffer;
699 
700 private:
701  template<typename T2, int D2, int S2>
702  static void static_assert_can_convert_from() {
703  static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
704  "Can't convert from a Buffer<const T> to a Buffer<T>");
705  static_assert(std::is_same<typename std::remove_const<T>::type,
706  typename std::remove_const<T2>::type>::value ||
707  T_is_void || Buffer<T2, D2, S2>::T_is_void,
708  "type mismatch constructing Buffer");
709  static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2,
710  "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality");
711  }
712 
713 public:
714  /** Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
715  * If this can be determined at compile time, fail with a static assert; otherwise
716  * return a boolean based on runtime typing. */
717  template<typename T2, int D2, int S2>
718  static bool can_convert_from(const Buffer<T2, D2, S2> &other) {
719  static_assert_can_convert_from<T2, D2, S2>();
720  if (Buffer<T2, D2, S2>::T_is_void && !T_is_void) {
721  if (other.type() != static_halide_type()) {
722  return false;
723  }
724  }
725  if (Dims != AnyDims) {
726  if (other.dimensions() != Dims) {
727  return false;
728  }
729  }
730  return true;
731  }
732 
733  /** Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage>
734  * cannot be constructed from some other Buffer type. */
735  template<typename T2, int D2, int S2>
736  static void assert_can_convert_from(const Buffer<T2, D2, S2> &other) {
737  // Explicitly call static_assert_can_convert_from() here so
738  // that we always get compile-time checking, even if compiling with
739  // assertions disabled.
740  static_assert_can_convert_from<T2, D2, S2>();
741  assert(can_convert_from(other));
742  }
743 
744  /** Copy constructor. Does not copy underlying data. */
746  : buf(other.buf),
747  alloc(other.alloc) {
748  other.incref();
749  dev_ref_count = other.dev_ref_count;
750  copy_shape_from(other.buf);
751  }
752 
753  /** Construct a Buffer from a Buffer of different dimensionality
754  * and type. Asserts that the type and dimensionality matches (at runtime,
755  * if one of the types is void). Note that this constructor is
756  * implicit. This, for example, lets you pass things like
757  * Buffer<T> or Buffer<const void> to functions expected
758  * Buffer<const T>. */
759  template<typename T2, int D2, int S2>
761  : buf(other.buf),
762  alloc(other.alloc) {
763  assert_can_convert_from(other);
764  other.incref();
765  dev_ref_count = other.dev_ref_count;
766  copy_shape_from(other.buf);
767  }
768 
769  /** Move constructor */
771  : buf(other.buf),
772  alloc(other.alloc),
773  dev_ref_count(other.dev_ref_count) {
774  other.dev_ref_count = nullptr;
775  other.alloc = nullptr;
776  move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
777  other.buf = halide_buffer_t();
778  }
779 
780  /** Move-construct a Buffer from a Buffer of different
781  * dimensionality and type. Asserts that the types match (at
782  * runtime if one of the types is void). */
783  template<typename T2, int D2, int S2>
785  : buf(other.buf),
786  alloc(other.alloc),
787  dev_ref_count(other.dev_ref_count) {
788  assert_can_convert_from(other);
789  other.dev_ref_count = nullptr;
790  other.alloc = nullptr;
791  move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
792  other.buf = halide_buffer_t();
793  }
794 
795  /** Assign from another Buffer of possibly-different
796  * dimensionality and type. Asserts that the types match (at
797  * runtime if one of the types is void). */
798  template<typename T2, int D2, int S2>
800  if ((const void *)this == (const void *)&other) {
801  return *this;
802  }
803  assert_can_convert_from(other);
804  other.incref();
805  decref();
806  dev_ref_count = other.dev_ref_count;
807  alloc = other.alloc;
808  free_shape_storage();
809  buf = other.buf;
810  copy_shape_from(other.buf);
811  return *this;
812  }
813 
814  /** Standard assignment operator */
816  // The cast to void* here is just to satisfy clang-tidy
817  if ((const void *)this == (const void *)&other) {
818  return *this;
819  }
820  other.incref();
821  decref();
822  dev_ref_count = other.dev_ref_count;
823  alloc = other.alloc;
824  free_shape_storage();
825  buf = other.buf;
826  copy_shape_from(other.buf);
827  return *this;
828  }
829 
830  /** Move from another Buffer of possibly-different
831  * dimensionality and type. Asserts that the types match (at
832  * runtime if one of the types is void). */
833  template<typename T2, int D2, int S2>
835  assert_can_convert_from(other);
836  decref();
837  alloc = other.alloc;
838  other.alloc = nullptr;
839  dev_ref_count = other.dev_ref_count;
840  other.dev_ref_count = nullptr;
841  free_shape_storage();
842  buf = other.buf;
843  move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
844  other.buf = halide_buffer_t();
845  return *this;
846  }
847 
848  /** Standard move-assignment operator */
850  decref();
851  alloc = other.alloc;
852  other.alloc = nullptr;
853  dev_ref_count = other.dev_ref_count;
854  other.dev_ref_count = nullptr;
855  free_shape_storage();
856  buf = other.buf;
857  move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
858  other.buf = halide_buffer_t();
859  return *this;
860  }
861 
862  /** Check the product of the extents fits in memory. */
863  void check_overflow() {
864  size_t size = type().bytes();
865  for (int i = 0; i < dimensions(); i++) {
866  size *= dim(i).extent();
867  }
868  // We allow 2^31 or 2^63 bytes, so drop the top bit.
869  size = (size << 1) >> 1;
870  for (int i = 0; i < dimensions(); i++) {
871  size /= dim(i).extent();
872  }
873  assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
874  }
875 
876  /** Allocate memory for this Buffer. Drops the reference to any
877  * owned memory. */
878  void allocate(void *(*allocate_fn)(size_t) = nullptr,
879  void (*deallocate_fn)(void *) = nullptr) {
880  // Drop any existing allocation
881  deallocate();
882 
883  // Conservatively align images to (usually) 128 bytes. This is enough
884  // alignment for all the platforms we might use. Also ensure that the allocation
885  // is such that the logical size is an integral multiple of 128 bytes (or a bit more).
886  constexpr size_t alignment = HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT;
887 
888  const auto align_up = [=](size_t value) -> size_t {
889  return (value + alignment - 1) & ~(alignment - 1);
890  };
891 
892  size_t size = size_in_bytes();
893 
894 #if HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
895  // Only use aligned_alloc() if no custom allocators are specified.
896  if (!allocate_fn && !deallocate_fn) {
897  // As a practical matter, sizeof(AllocationHeader) is going to be no more than 16 bytes
898  // on any supported platform, so we will just overallocate by 'alignment'
899  // so that the user storage also starts at an aligned point. This is a bit
900  // wasteful, but probably not a big deal.
901  static_assert(sizeof(AllocationHeader) <= alignment);
902  void *alloc_storage = ::aligned_alloc(alignment, align_up(size) + alignment);
903  assert((uintptr_t)alloc_storage == align_up((uintptr_t)alloc_storage));
904  alloc = new (alloc_storage) AllocationHeader(free);
905  buf.host = (uint8_t *)((uintptr_t)alloc_storage + alignment);
906  return;
907  }
908  // else fall thru
909 #endif
910  if (!allocate_fn) {
911  allocate_fn = malloc;
912  }
913  if (!deallocate_fn) {
914  deallocate_fn = free;
915  }
916 
917  static_assert(sizeof(AllocationHeader) <= alignment);
918 
919  // malloc() and friends must return a pointer aligned to at least alignof(std::max_align_t);
920  // make sure this is OK for AllocationHeader, since it always goes at the start
921  static_assert(alignof(AllocationHeader) <= alignof(std::max_align_t));
922 
923  const size_t requested_size = align_up(size + alignment +
924  std::max(0, (int)sizeof(AllocationHeader) -
925  (int)sizeof(std::max_align_t)));
926  void *alloc_storage = allocate_fn(requested_size);
927  alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
928  uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
929  buf.host = (uint8_t *)align_up((uintptr_t)unaligned_ptr);
930  }
931 
932  /** Drop reference to any owned host or device memory, possibly
933  * freeing it, if this buffer held the last reference to
934  * it. Retains the shape of the buffer. Does nothing if this
935  * buffer did not allocate its own memory. */
936  void deallocate() {
937  decref();
938  }
939 
940  /** Drop reference to any owned device memory, possibly freeing it
941  * if this buffer held the last reference to it. Asserts that
942  * device_dirty is false. */
944  decref(true);
945  }
946 
947  /** Allocate a new image of the given size with a runtime
948  * type. Only used when you do know what size you want but you
949  * don't know statically what type the elements are. Pass zeroes
950  * to make a buffer suitable for bounds query calls. */
951  template<typename... Args,
952  typename = typename std::enable_if<AllInts<Args...>::value>::type>
953  Buffer(halide_type_t t, int first, Args... rest) {
954  if (!T_is_void) {
955  assert(static_halide_type() == t);
956  }
957  int extents[] = {first, (int)rest...};
958  buf.type = t;
959  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
960  make_static_shape_storage<buf_dimensions>();
961  initialize_shape(extents);
962  if (!Internal::any_zero(extents)) {
963  check_overflow();
964  allocate();
965  }
966  }
967 
968  /** Allocate a new image of the given size. Pass zeroes to make a
969  * buffer suitable for bounds query calls. */
970  // @{
971 
972  // The overload with one argument is 'explicit', so that
973  // (say) int is not implicitly convertible to Buffer<int>
974  explicit Buffer(int first) {
975  static_assert(!T_is_void,
976  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
977  int extents[] = {first};
978  buf.type = static_halide_type();
979  constexpr int buf_dimensions = 1;
980  make_static_shape_storage<buf_dimensions>();
981  initialize_shape(extents);
982  if (first != 0) {
983  check_overflow();
984  allocate();
985  }
986  }
987 
988  template<typename... Args,
989  typename = typename std::enable_if<AllInts<Args...>::value>::type>
990  Buffer(int first, int second, Args... rest) {
991  static_assert(!T_is_void,
992  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
993  int extents[] = {first, second, (int)rest...};
994  buf.type = static_halide_type();
995  constexpr int buf_dimensions = 2 + (int)(sizeof...(rest));
996  make_static_shape_storage<buf_dimensions>();
997  initialize_shape(extents);
998  if (!Internal::any_zero(extents)) {
999  check_overflow();
1000  allocate();
1001  }
1002  }
1003  // @}
1004 
1005  /** Allocate a new image of unknown type using a vector of ints as the size. */
1006  Buffer(halide_type_t t, const std::vector<int> &sizes) {
1007  if (!T_is_void) {
1008  assert(static_halide_type() == t);
1009  }
1010  buf.type = t;
1011  // make_shape_storage() will do a runtime check that dimensionality matches.
1012  make_shape_storage((int)sizes.size());
1013  initialize_shape(sizes);
1014  if (!Internal::any_zero(sizes)) {
1015  check_overflow();
1016  allocate();
1017  }
1018  }
1019 
1020  /** Allocate a new image of known type using a vector of ints as the size. */
1021  explicit Buffer(const std::vector<int> &sizes)
1022  : Buffer(static_halide_type(), sizes) {
1023  }
1024 
1025 private:
1026  // Create a copy of the sizes vector, ordered as specified by order.
1027  static std::vector<int> make_ordered_sizes(const std::vector<int> &sizes, const std::vector<int> &order) {
1028  assert(order.size() == sizes.size());
1029  std::vector<int> ordered_sizes(sizes.size());
1030  for (size_t i = 0; i < sizes.size(); ++i) {
1031  ordered_sizes[i] = sizes.at(order[i]);
1032  }
1033  return ordered_sizes;
1034  }
1035 
1036 public:
1037  /** Allocate a new image of unknown type using a vector of ints as the size and
1038  * a vector of indices indicating the storage order for each dimension. The
1039  * length of the sizes vector and the storage-order vector must match. For instance,
1040  * to allocate an interleaved RGB buffer, you would pass {2, 0, 1} for storage_order. */
1041  Buffer(halide_type_t t, const std::vector<int> &sizes, const std::vector<int> &storage_order)
1042  : Buffer(t, make_ordered_sizes(sizes, storage_order)) {
1043  transpose(storage_order);
1044  }
1045 
1046  Buffer(const std::vector<int> &sizes, const std::vector<int> &storage_order)
1047  : Buffer(static_halide_type(), sizes, storage_order) {
1048  }
1049 
1050  /** Make an Buffer that refers to a statically sized array. Does not
1051  * take ownership of the data, and does not set the host_dirty flag. */
1052  template<typename Array, size_t N>
1053  explicit Buffer(Array (&vals)[N]) {
1054  const int buf_dimensions = dimensionality_of_array(vals);
1055  buf.type = scalar_type_of_array(vals);
1056  buf.host = (uint8_t *)vals;
1057  make_shape_storage(buf_dimensions);
1058  initialize_shape_from_array_shape(buf.dimensions - 1, vals);
1059  }
1060 
1061  /** Initialize an Buffer of runtime type from a pointer and some
1062  * sizes. Assumes dense row-major packing and a min coordinate of
1063  * zero. Does not take ownership of the data and does not set the
1064  * host_dirty flag. */
1065  template<typename... Args,
1066  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1067  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args &&...rest) {
1068  if (!T_is_void) {
1069  assert(static_halide_type() == t);
1070  }
1071  int extents[] = {first, (int)rest...};
1072  buf.type = t;
1073  buf.host = (uint8_t *)const_cast<void *>(data);
1074  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1075  make_static_shape_storage<buf_dimensions>();
1076  initialize_shape(extents);
1077  }
1078 
1079  /** Initialize an Buffer from a pointer and some sizes. Assumes
1080  * dense row-major packing and a min coordinate of zero. Does not
1081  * take ownership of the data and does not set the host_dirty flag. */
1082  template<typename... Args,
1083  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1084  explicit Buffer(T *data, int first, Args &&...rest) {
1085  int extents[] = {first, (int)rest...};
1086  buf.type = static_halide_type();
1087  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1088  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1089  make_static_shape_storage<buf_dimensions>();
1090  initialize_shape(extents);
1091  }
1092 
1093  /** Initialize an Buffer from a pointer and a vector of
1094  * sizes. Assumes dense row-major packing and a min coordinate of
1095  * zero. Does not take ownership of the data and does not set the
1096  * host_dirty flag. */
1097  explicit Buffer(T *data, const std::vector<int> &sizes) {
1098  buf.type = static_halide_type();
1099  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1100  make_shape_storage((int)sizes.size());
1101  initialize_shape(sizes);
1102  }
1103 
1104  /** Initialize an Buffer of runtime type from a pointer and a
1105  * vector of sizes. Assumes dense row-major packing and a min
1106  * coordinate of zero. Does not take ownership of the data and
1107  * does not set the host_dirty flag. */
1108  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
1109  if (!T_is_void) {
1110  assert(static_halide_type() == t);
1111  }
1112  buf.type = t;
1113  buf.host = (uint8_t *)const_cast<void *>(data);
1114  make_shape_storage((int)sizes.size());
1115  initialize_shape(sizes);
1116  }
1117 
1118  /** Initialize an Buffer from a pointer to the min coordinate and
1119  * an array describing the shape. Does not take ownership of the
1120  * data, and does not set the host_dirty flag. */
1121  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
1122  if (!T_is_void) {
1123  assert(static_halide_type() == t);
1124  }
1125  buf.type = t;
1126  buf.host = (uint8_t *)const_cast<void *>(data);
1127  make_shape_storage(d);
1128  for (int i = 0; i < d; i++) {
1129  buf.dim[i] = shape[i];
1130  }
1131  }
1132 
1133  /** Initialize a Buffer from a pointer to the min coordinate and
1134  * a vector describing the shape. Does not take ownership of the
1135  * data, and does not set the host_dirty flag. */
1136  explicit inline Buffer(halide_type_t t, add_const_if_T_is_const<void> *data,
1137  const std::vector<halide_dimension_t> &shape)
1138  : Buffer(t, data, (int)shape.size(), shape.data()) {
1139  }
1140 
1141  /** Initialize an Buffer from a pointer to the min coordinate and
1142  * an array describing the shape. Does not take ownership of the
1143  * data and does not set the host_dirty flag. */
1144  explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
1145  buf.type = static_halide_type();
1146  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1147  make_shape_storage(d);
1148  for (int i = 0; i < d; i++) {
1149  buf.dim[i] = shape[i];
1150  }
1151  }
1152 
1153  /** Initialize a Buffer from a pointer to the min coordinate and
1154  * a vector describing the shape. Does not take ownership of the
1155  * data, and does not set the host_dirty flag. */
1156  explicit inline Buffer(T *data, const std::vector<halide_dimension_t> &shape)
1157  : Buffer(data, (int)shape.size(), shape.data()) {
1158  }
1159 
1160  /** Destructor. Will release any underlying owned allocation if
1161  * this is the last reference to it. Will assert fail if there are
1162  * weak references to this Buffer outstanding. */
1164  decref();
1165  free_shape_storage();
1166  }
1167 
1168  /** Get a pointer to the raw halide_buffer_t this wraps. */
1169  // @{
1171  return &buf;
1172  }
1173 
1174  const halide_buffer_t *raw_buffer() const {
1175  return &buf;
1176  }
1177  // @}
1178 
1179  /** Provide a cast operator to halide_buffer_t *, so that
1180  * instances can be passed directly to Halide filters. */
1181  operator halide_buffer_t *() {
1182  return &buf;
1183  }
1184 
1185  /** Return a typed reference to this Buffer. Useful for converting
1186  * a reference to a Buffer<void> to a reference to, for example, a
1187  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1188  * You can also optionally sspecify a new value for Dims; this is useful
1189  * mainly for removing the dimensionality constraint on a Buffer with
1190  * explicit dimensionality. Does a runtime assert if the source buffer type
1191  * is void or the new dimensionality is incompatible. */
1192  template<typename T2, int D2 = Dims>
1195  return *((Buffer<T2, D2, InClassDimStorage> *)this);
1196  }
1197 
1198  /** Return a const typed reference to this Buffer. Useful for converting
1199  * a reference to a Buffer<void> to a reference to, for example, a
1200  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1201  * You can also optionally sspecify a new value for Dims; this is useful
1202  * mainly for removing the dimensionality constraint on a Buffer with
1203  * explicit dimensionality. Does a runtime assert if the source buffer type
1204  * is void or the new dimensionality is incompatible. */
1205  template<typename T2, int D2 = Dims>
1208  return *((const Buffer<T2, D2, InClassDimStorage> *)this);
1209  }
1210 
1211  /** Return an rval reference to this Buffer. Useful for converting
1212  * a reference to a Buffer<void> to a reference to, for example, a
1213  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1214  * You can also optionally sspecify a new value for Dims; this is useful
1215  * mainly for removing the dimensionality constraint on a Buffer with
1216  * explicit dimensionality. Does a runtime assert if the source buffer type
1217  * is void or the new dimensionality is incompatible. */
1218  template<typename T2, int D2 = Dims>
1221  return *((Buffer<T2, D2, InClassDimStorage> *)this);
1222  }
1223 
1224  /** as_const() is syntactic sugar for .as<const T>(), to avoid the need
1225  * to recapitulate the type argument. */
1226  // @{
1228  Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() & {
1229  // Note that we can skip the assert_can_convert_from(), since T -> const T
1230  // conversion is always legal.
1231  return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1232  }
1233 
1235  const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() const & {
1236  return *((const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1237  }
1238 
1240  Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> as_const() && {
1241  return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1242  }
1243  // @}
1244 
1245  /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<const T>& when
1246  * passing arguments */
1247  template<typename T2 = T, typename = typename std::enable_if<!std::is_const<T2>::value>::type>
1248  operator Buffer<typename std::add_const<T2>::type, Dims, InClassDimStorage> &() & {
1249  return as_const();
1250  }
1251 
1252  /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<void>& when
1253  * passing arguments */
1254  template<typename TVoid,
1255  typename T2 = T,
1256  typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1257  !std::is_void<T2>::value &&
1258  !std::is_const<T2>::value>::type>
1260  return as<TVoid, Dims>();
1261  }
1262 
1263  /** Add some syntactic sugar to allow autoconversion from Buffer<const T> to Buffer<const void>& when
1264  * passing arguments */
1265  template<typename TVoid,
1266  typename T2 = T,
1267  typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1268  !std::is_void<T2>::value &&
1269  std::is_const<T2>::value>::type>
1271  return as<const TVoid, Dims>();
1272  }
1273 
1274  /** Conventional names for the first three dimensions. */
1275  // @{
1276  int width() const {
1277  return (dimensions() > 0) ? dim(0).extent() : 1;
1278  }
1279  int height() const {
1280  return (dimensions() > 1) ? dim(1).extent() : 1;
1281  }
1282  int channels() const {
1283  return (dimensions() > 2) ? dim(2).extent() : 1;
1284  }
1285  // @}
1286 
1287  /** Conventional names for the min and max value of each dimension */
1288  // @{
1289  int left() const {
1290  return dim(0).min();
1291  }
1292 
1293  int right() const {
1294  return dim(0).max();
1295  }
1296 
1297  int top() const {
1298  return dim(1).min();
1299  }
1300 
1301  int bottom() const {
1302  return dim(1).max();
1303  }
1304  // @}
1305 
1306  /** Make a new image which is a deep copy of this image. Use crop
1307  * or slice followed by copy to make a copy of only a portion of
1308  * the image. The new image uses the same memory layout as the
1309  * original, with holes compacted away. Note that the returned
1310  * Buffer is always of a non-const type T (ie:
1311  *
1312  * Buffer<const T>.copy() -> Buffer<T> rather than Buffer<const T>
1313  *
1314  * which is always safe, since we are making a deep copy. (The caller
1315  * can easily cast it back to Buffer<const T> if desired, which is
1316  * always safe and free.)
1317  */
1318  Buffer<not_const_T, Dims, InClassDimStorage> copy(void *(*allocate_fn)(size_t) = nullptr,
1319  void (*deallocate_fn)(void *) = nullptr) const {
1321  dst.copy_from(*this);
1322  return dst;
1323  }
1324 
1325  /** Like copy(), but the copy is created in interleaved memory layout
1326  * (vs. keeping the same memory layout as the original). Requires that 'this'
1327  * has exactly 3 dimensions.
1328  */
1330  void (*deallocate_fn)(void *) = nullptr) const {
1331  static_assert(Dims == AnyDims || Dims == 3);
1332  assert(dimensions() == 3);
1334  dst.set_min(min(0), min(1), min(2));
1335  dst.allocate(allocate_fn, deallocate_fn);
1336  dst.copy_from(*this);
1337  return dst;
1338  }
1339 
1340  /** Like copy(), but the copy is created in planar memory layout
1341  * (vs. keeping the same memory layout as the original).
1342  */
1343  Buffer<not_const_T, Dims, InClassDimStorage> copy_to_planar(void *(*allocate_fn)(size_t) = nullptr,
1344  void (*deallocate_fn)(void *) = nullptr) const {
1345  std::vector<int> mins, extents;
1346  const int dims = dimensions();
1347  mins.reserve(dims);
1348  extents.reserve(dims);
1349  for (int d = 0; d < dims; ++d) {
1350  mins.push_back(dim(d).min());
1351  extents.push_back(dim(d).extent());
1352  }
1354  dst.set_min(mins);
1355  dst.allocate(allocate_fn, deallocate_fn);
1356  dst.copy_from(*this);
1357  return dst;
1358  }
1359 
1360  /** Make a copy of the Buffer which shares the underlying host and/or device
1361  * allocations as the existing Buffer. This is purely syntactic sugar for
1362  * cases where you have a const reference to a Buffer but need a temporary
1363  * non-const copy (e.g. to make a call into AOT-generated Halide code), and want a terse
1364  * inline way to create a temporary. \code
1365  * void call_my_func(const Buffer<const uint8_t>& input) {
1366  * my_func(input.alias(), output);
1367  * }\endcode
1368  */
1370  return *this;
1371  }
1372 
1373  /** Fill a Buffer with the values at the same coordinates in
1374  * another Buffer. Restricts itself to coordinates contained
1375  * within the intersection of the two buffers. If the two Buffers
1376  * are not in the same coordinate system, you will need to
1377  * translate the argument Buffer first. E.g. if you're blitting a
1378  * sprite onto a framebuffer, you'll want to translate the sprite
1379  * to the correct location first like so: \code
1380  * framebuffer.copy_from(sprite.translated({x, y})); \endcode
1381  */
1382  template<typename T2, int D2, int S2>
1384  static_assert(!std::is_const<T>::value, "Cannot call copy_from() on a Buffer<const T>");
1385  assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1386  assert(!src.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1387 
1389 
1390  static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2);
1391  assert(src.dimensions() == dst.dimensions());
1392 
1393  // Trim the copy to the region in common
1394  const int d = dimensions();
1395  for (int i = 0; i < d; i++) {
1396  int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1397  int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1398  if (max_coord < min_coord) {
1399  // The buffers do not overlap.
1400  return;
1401  }
1402  dst.crop(i, min_coord, max_coord - min_coord + 1);
1403  src.crop(i, min_coord, max_coord - min_coord + 1);
1404  }
1405 
1406  // If T is void, we need to do runtime dispatch to an
1407  // appropriately-typed lambda. We're copying, so we only care
1408  // about the element size. (If not, this should optimize away
1409  // into a static dispatch to the right-sized copy.)
1410  if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) {
1411  using MemType = uint8_t;
1412  auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1413  auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1414  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1415  } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) {
1416  using MemType = uint16_t;
1417  auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1418  auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1419  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1420  } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) {
1421  using MemType = uint32_t;
1422  auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1423  auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1424  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1425  } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) {
1426  using MemType = uint64_t;
1427  auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1428  auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1429  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1430  } else {
1431  assert(false && "type().bytes() must be 1, 2, 4, or 8");
1432  }
1433  set_host_dirty();
1434  }
1435 
1436  /** Make an image that refers to a sub-range of this image along
1437  * the given dimension. Asserts that the crop region is within
1438  * the existing bounds: you cannot "crop outwards", even if you know there
1439  * is valid Buffer storage (e.g. because you already cropped inwards). */
1440  Buffer<T, Dims, InClassDimStorage> cropped(int d, int min, int extent) const {
1441  // Make a fresh copy of the underlying buffer (but not a fresh
1442  // copy of the allocation, if there is one).
1444 
1445  // This guarantees the prexisting device ref is dropped if the
1446  // device_crop call fails and maintains the buffer in a consistent
1447  // state.
1448  im.device_deallocate();
1449 
1450  im.crop_host(d, min, extent);
1451  if (buf.device_interface != nullptr) {
1452  complete_device_crop(im);
1453  }
1454  return im;
1455  }
1456 
1457  /** Crop an image in-place along the given dimension. This does
1458  * not move any data around in memory - it just changes the min
1459  * and extent of the given dimension. */
1460  void crop(int d, int min, int extent) {
1461  // An optimization for non-device buffers. For the device case,
1462  // a temp buffer is required, so reuse the not-in-place version.
1463  // TODO(zalman|abadams): Are nop crops common enough to special
1464  // case the device part of the if to do nothing?
1465  if (buf.device_interface != nullptr) {
1466  *this = cropped(d, min, extent);
1467  } else {
1468  crop_host(d, min, extent);
1469  }
1470  }
1471 
1472  /** Make an image that refers to a sub-rectangle of this image along
1473  * the first N dimensions. Asserts that the crop region is within
1474  * the existing bounds. The cropped image may drop any device handle
1475  * if the device_interface cannot accomplish the crop in-place. */
1476  Buffer<T, Dims, InClassDimStorage> cropped(const std::vector<std::pair<int, int>> &rect) const {
1477  // Make a fresh copy of the underlying buffer (but not a fresh
1478  // copy of the allocation, if there is one).
1480 
1481  // This guarantees the prexisting device ref is dropped if the
1482  // device_crop call fails and maintains the buffer in a consistent
1483  // state.
1484  im.device_deallocate();
1485 
1486  im.crop_host(rect);
1487  if (buf.device_interface != nullptr) {
1488  complete_device_crop(im);
1489  }
1490  return im;
1491  }
1492 
1493  /** Crop an image in-place along the first N dimensions. This does
1494  * not move any data around in memory, nor does it free memory. It
1495  * just rewrites the min/extent of each dimension to refer to a
1496  * subregion of the same allocation. */
1497  void crop(const std::vector<std::pair<int, int>> &rect) {
1498  // An optimization for non-device buffers. For the device case,
1499  // a temp buffer is required, so reuse the not-in-place version.
1500  // TODO(zalman|abadams): Are nop crops common enough to special
1501  // case the device part of the if to do nothing?
1502  if (buf.device_interface != nullptr) {
1503  *this = cropped(rect);
1504  } else {
1505  crop_host(rect);
1506  }
1507  }
1508 
1509  /** Make an image which refers to the same data with using
1510  * translated coordinates in the given dimension. Positive values
1511  * move the image data to the right or down relative to the
1512  * coordinate system. Drops any device handle. */
1515  im.translate(d, dx);
1516  return im;
1517  }
1518 
1519  /** Translate an image in-place along one dimension by changing
1520  * how it is indexed. Does not move any data around in memory. */
1521  void translate(int d, int delta) {
1522  assert(d >= 0 && d < this->dimensions());
1523  device_deallocate();
1524  buf.dim[d].min += delta;
1525  }
1526 
1527  /** Make an image which refers to the same data translated along
1528  * the first N dimensions. */
1529  Buffer<T, Dims, InClassDimStorage> translated(const std::vector<int> &delta) const {
1531  im.translate(delta);
1532  return im;
1533  }
1534 
1535  /** Translate an image along the first N dimensions by changing
1536  * how it is indexed. Does not move any data around in memory. */
1537  void translate(const std::vector<int> &delta) {
1538  device_deallocate();
1539  assert(delta.size() <= static_cast<decltype(delta.size())>(std::numeric_limits<int>::max()));
1540  int limit = (int)delta.size();
1541  assert(limit <= dimensions());
1542  for (int i = 0; i < limit; i++) {
1543  translate(i, delta[i]);
1544  }
1545  }
1546 
1547  /** Set the min coordinate of an image in the first N dimensions. */
1548  // @{
1549  void set_min(const std::vector<int> &mins) {
1550  assert(mins.size() <= static_cast<decltype(mins.size())>(dimensions()));
1551  device_deallocate();
1552  for (size_t i = 0; i < mins.size(); i++) {
1553  buf.dim[i].min = mins[i];
1554  }
1555  }
1556 
1557  template<typename... Args>
1558  void set_min(Args... args) {
1559  set_min(std::vector<int>{args...});
1560  }
1561  // @}
1562 
1563  /** Test if a given coordinate is within the bounds of an image. */
1564  // @{
1565  bool contains(const std::vector<int> &coords) const {
1566  assert(coords.size() <= static_cast<decltype(coords.size())>(dimensions()));
1567  for (size_t i = 0; i < coords.size(); i++) {
1568  if (coords[i] < dim((int)i).min() || coords[i] > dim((int)i).max()) {
1569  return false;
1570  }
1571  }
1572  return true;
1573  }
1574 
1575  template<typename... Args>
1576  bool contains(Args... args) const {
1577  return contains(std::vector<int>{args...});
1578  }
1579  // @}
1580 
1581  /** Make a buffer which refers to the same data in the same layout
1582  * using a swapped indexing order for the dimensions given. So
1583  * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more
1584  * strongly that A.address_of(i, j) == B.address_of(j, i). */
1587  im.transpose(d1, d2);
1588  return im;
1589  }
1590 
1591  /** Transpose a buffer in-place by changing how it is indexed. For
1592  * example, transpose(0, 1) on a two-dimensional buffer means that
1593  * the value referred to by coordinates (i, j) is now reached at
1594  * the coordinates (j, i), and vice versa. This is done by
1595  * reordering the per-dimension metadata rather than by moving
1596  * data around in memory, so other views of the same memory will
1597  * not see the data as having been transposed. */
1598  void transpose(int d1, int d2) {
1599  assert(d1 >= 0 && d1 < this->dimensions());
1600  assert(d2 >= 0 && d2 < this->dimensions());
1601  std::swap(buf.dim[d1], buf.dim[d2]);
1602  }
1603 
1604  /** A generalized transpose: instead of swapping two dimensions,
1605  * pass a vector that lists each dimension index exactly once, in
1606  * the desired order. This does not move any data around in memory
1607  * - it just permutes how it is indexed. */
1608  void transpose(const std::vector<int> &order) {
1609  assert((int)order.size() == dimensions());
1610  if (dimensions() < 2) {
1611  // My, that was easy
1612  return;
1613  }
1614 
1615  std::vector<int> order_sorted = order;
1616  for (size_t i = 1; i < order_sorted.size(); i++) {
1617  for (size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1618  std::swap(order_sorted[j], order_sorted[j - 1]);
1619  transpose(j, j - 1);
1620  }
1621  }
1622  }
1623 
1624  /** Make a buffer which refers to the same data in the same
1625  * layout using a different ordering of the dimensions. */
1626  Buffer<T, Dims, InClassDimStorage> transposed(const std::vector<int> &order) const {
1628  im.transpose(order);
1629  return im;
1630  }
1631 
1632  /** Make a lower-dimensional buffer that refers to one slice of
1633  * this buffer. */
1634  Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1635  sliced(int d, int pos) const {
1636  static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1637  assert(dimensions() > 0);
1638 
1640 
1641  // This guarantees the prexisting device ref is dropped if the
1642  // device_slice call fails and maintains the buffer in a consistent
1643  // state.
1644  im.device_deallocate();
1645 
1646  im.slice_host(d, pos);
1647  if (buf.device_interface != nullptr) {
1648  complete_device_slice(im, d, pos);
1649  }
1650  return im;
1651  }
1652 
1653  /** Make a lower-dimensional buffer that refers to one slice of this
1654  * buffer at the dimension's minimum. */
1655  Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1656  sliced(int d) const {
1657  static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1658  assert(dimensions() > 0);
1659 
1660  return sliced(d, dim(d).min());
1661  }
1662 
1663  /** Rewrite the buffer to refer to a single lower-dimensional
1664  * slice of itself along the given dimension at the given
1665  * coordinate. Does not move any data around or free the original
1666  * memory, so other views of the same data are unaffected. Can
1667  * only be called on a Buffer with dynamic dimensionality. */
1668  void slice(int d, int pos) {
1669  static_assert(Dims == AnyDims, "Cannot call slice() on a Buffer with static dimensionality.");
1670  assert(dimensions() > 0);
1671 
1672  // An optimization for non-device buffers. For the device case,
1673  // a temp buffer is required, so reuse the not-in-place version.
1674  // TODO(zalman|abadams): Are nop slices common enough to special
1675  // case the device part of the if to do nothing?
1676  if (buf.device_interface != nullptr) {
1677  *this = sliced(d, pos);
1678  } else {
1679  slice_host(d, pos);
1680  }
1681  }
1682 
1683  /** Slice a buffer in-place at the dimension's minimum. */
1684  inline void slice(int d) {
1685  slice(d, dim(d).min());
1686  }
1687 
1688  /** Make a new buffer that views this buffer as a single slice in a
1689  * higher-dimensional space. The new dimension has extent one and
1690  * the given min. This operation is the opposite of slice. As an
1691  * example, the following condition is true:
1692  *
1693  \code
1694  im2 = im.embedded(1, 17);
1695  &im(x, y, c) == &im2(x, 17, y, c);
1696  \endcode
1697  */
1699  embedded(int d, int pos = 0) const {
1701  im.embed(d, pos);
1702  return im;
1703  }
1704 
1705  /** Embed a buffer in-place, increasing the
1706  * dimensionality. */
1707  void embed(int d, int pos = 0) {
1708  static_assert(Dims == AnyDims, "Cannot call embed() on a Buffer with static dimensionality.");
1709  assert(d >= 0 && d <= dimensions());
1710  add_dimension();
1711  translate(dimensions() - 1, pos);
1712  for (int i = dimensions() - 1; i > d; i--) {
1713  transpose(i, i - 1);
1714  }
1715  }
1716 
1717  /** Add a new dimension with a min of zero and an extent of
1718  * one. The stride is the extent of the outermost dimension times
1719  * its stride. The new dimension is the last dimension. This is a
1720  * special case of embed. */
1721  void add_dimension() {
1722  static_assert(Dims == AnyDims, "Cannot call add_dimension() on a Buffer with static dimensionality.");
1723  const int dims = buf.dimensions;
1724  buf.dimensions++;
1725  if (buf.dim != shape) {
1726  // We're already on the heap. Reallocate.
1727  halide_dimension_t *new_shape = new halide_dimension_t[buf.dimensions];
1728  for (int i = 0; i < dims; i++) {
1729  new_shape[i] = buf.dim[i];
1730  }
1731  delete[] buf.dim;
1732  buf.dim = new_shape;
1733  } else if (dims == InClassDimStorage) {
1734  // Transition from the in-class storage to the heap
1735  make_shape_storage(buf.dimensions);
1736  for (int i = 0; i < dims; i++) {
1737  buf.dim[i] = shape[i];
1738  }
1739  } else {
1740  // We still fit in the class
1741  }
1742  buf.dim[dims] = {0, 1, 0};
1743  if (dims == 0) {
1744  buf.dim[dims].stride = 1;
1745  } else {
1746  buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1747  }
1748  }
1749 
1750  /** Add a new dimension with a min of zero, an extent of one, and
1751  * the specified stride. The new dimension is the last
1752  * dimension. This is a special case of embed. */
1754  add_dimension();
1755  buf.dim[buf.dimensions - 1].stride = s;
1756  }
1757 
1758  /** Methods for managing any GPU allocation. */
1759  // @{
1760  // Set the host dirty flag. Called by every operator()
1761  // access. Must be inlined so it can be hoisted out of loops.
1763  void set_host_dirty(bool v = true) {
1764  assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1765  buf.set_host_dirty(v);
1766  }
1767 
1768  // Check if the device allocation is dirty. Called by
1769  // set_host_dirty, which is called by every accessor. Must be
1770  // inlined so it can be hoisted out of loops.
1772  bool device_dirty() const {
1773  return buf.device_dirty();
1774  }
1775 
1776  bool host_dirty() const {
1777  return buf.host_dirty();
1778  }
1779 
1780  void set_device_dirty(bool v = true) {
1781  assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1782  buf.set_device_dirty(v);
1783  }
1784 
1785  int copy_to_host(void *ctx = nullptr) {
1786  if (device_dirty()) {
1787  return buf.device_interface->copy_to_host(ctx, &buf);
1788  }
1790  }
1791 
1792  int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1793  if (host_dirty()) {
1794  return device_interface->copy_to_device(ctx, &buf, device_interface);
1795  }
1797  }
1798 
1799  int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1800  return device_interface->device_malloc(ctx, &buf, device_interface);
1801  }
1802 
1803  int device_free(void *ctx = nullptr) {
1804  if (dev_ref_count) {
1805  assert(dev_ref_count->ownership == BufferDeviceOwnership::Allocated &&
1806  "Can't call device_free on an unmanaged or wrapped native device handle. "
1807  "Free the source allocation or call device_detach_native instead.");
1808  // Multiple people may be holding onto this dev field
1809  assert(dev_ref_count->count == 1 &&
1810  "Multiple Halide::Runtime::Buffer objects share this device "
1811  "allocation. Freeing it would create dangling references. "
1812  "Don't call device_free on Halide buffers that you have copied or "
1813  "passed by value.");
1814  }
1815  int ret = halide_error_code_success;
1816  if (buf.device_interface) {
1817  ret = buf.device_interface->device_free(ctx, &buf);
1818  }
1819  if (dev_ref_count) {
1820  delete dev_ref_count;
1821  dev_ref_count = nullptr;
1822  }
1823  return ret;
1824  }
1825 
1826  int device_wrap_native(const struct halide_device_interface_t *device_interface,
1827  uint64_t handle, void *ctx = nullptr) {
1828  assert(device_interface);
1829  dev_ref_count = new DeviceRefCount;
1831  return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1832  }
1833 
1834  int device_detach_native(void *ctx = nullptr) {
1835  assert(dev_ref_count &&
1836  dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative &&
1837  "Only call device_detach_native on buffers wrapping a native "
1838  "device handle via device_wrap_native. This buffer was allocated "
1839  "using device_malloc, or is unmanaged. "
1840  "Call device_free or free the original allocation instead.");
1841  // Multiple people may be holding onto this dev field
1842  assert(dev_ref_count->count == 1 &&
1843  "Multiple Halide::Runtime::Buffer objects share this device "
1844  "allocation. Freeing it could create dangling references. "
1845  "Don't call device_detach_native on Halide buffers that you "
1846  "have copied or passed by value.");
1847  int ret = halide_error_code_success;
1848  if (buf.device_interface) {
1849  ret = buf.device_interface->detach_native(ctx, &buf);
1850  }
1851  delete dev_ref_count;
1852  dev_ref_count = nullptr;
1853  return ret;
1854  }
1855 
1856  int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1857  return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1858  }
1859 
1860  int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1861  if (dev_ref_count) {
1862  assert(dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost &&
1863  "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1864  "Free the source allocation or call device_detach_native instead.");
1865  // Multiple people may be holding onto this dev field
1866  assert(dev_ref_count->count == 1 &&
1867  "Multiple Halide::Runtime::Buffer objects share this device "
1868  "allocation. Freeing it would create dangling references. "
1869  "Don't call device_and_host_free on Halide buffers that you have copied or "
1870  "passed by value.");
1871  }
1872  int ret = halide_error_code_success;
1873  if (buf.device_interface) {
1874  ret = buf.device_interface->device_and_host_free(ctx, &buf);
1875  }
1876  if (dev_ref_count) {
1877  delete dev_ref_count;
1878  dev_ref_count = nullptr;
1879  }
1880  return ret;
1881  }
1882 
1883  int device_sync(void *ctx = nullptr) {
1884  return buf.device_sync(ctx);
1885  }
1886 
1887  bool has_device_allocation() const {
1888  return buf.device != 0;
1889  }
1890 
1891  /** Return the method by which the device field is managed. */
1893  if (dev_ref_count == nullptr) {
1895  }
1896  return dev_ref_count->ownership;
1897  }
1898  // @}
1899 
1900  /** If you use the (x, y, c) indexing convention, then Halide
1901  * Buffers are stored planar by default. This function constructs
1902  * an interleaved RGB or RGBA image that can still be indexed
1903  * using (x, y, c). Passing it to a generator requires that the
1904  * generator has been compiled with support for interleaved (also
1905  * known as packed or chunky) memory layouts. */
1906  static Buffer<void, Dims, InClassDimStorage> make_interleaved(halide_type_t t, int width, int height, int channels) {
1907  static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1908  Buffer<void, Dims, InClassDimStorage> im(t, channels, width, height);
1909  // Note that this is equivalent to calling transpose({2, 0, 1}),
1910  // but slightly more efficient.
1911  im.transpose(0, 1);
1912  im.transpose(1, 2);
1913  return im;
1914  }
1915 
1916  /** If you use the (x, y, c) indexing convention, then Halide
1917  * Buffers are stored planar by default. This function constructs
1918  * an interleaved RGB or RGBA image that can still be indexed
1919  * using (x, y, c). Passing it to a generator requires that the
1920  * generator has been compiled with support for interleaved (also
1921  * known as packed or chunky) memory layouts. */
1922  static Buffer<T, Dims, InClassDimStorage> make_interleaved(int width, int height, int channels) {
1923  return make_interleaved(static_halide_type(), width, height, channels);
1924  }
1925 
1926  /** Wrap an existing interleaved image. */
1927  static Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage>
1928  make_interleaved(halide_type_t t, T *data, int width, int height, int channels) {
1929  static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1930  Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage> im(t, data, channels, width, height);
1931  im.transpose(0, 1);
1932  im.transpose(1, 2);
1933  return im;
1934  }
1935 
1936  /** Wrap an existing interleaved image. */
1937  static Buffer<T, Dims, InClassDimStorage> make_interleaved(T *data, int width, int height, int channels) {
1938  return make_interleaved(static_halide_type(), data, width, height, channels);
1939  }
1940 
1941  /** Make a zero-dimensional Buffer */
1943  static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1944  Buffer<add_const_if_T_is_const<void>, AnyDims, InClassDimStorage> buf(t, 1);
1945  buf.slice(0, 0);
1946  return buf;
1947  }
1948 
1949  /** Make a zero-dimensional Buffer */
1951  static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1953  buf.slice(0, 0);
1954  return buf;
1955  }
1956 
1957  /** Make a zero-dimensional Buffer that points to non-owned, existing data */
1959  static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1961  buf.slice(0, 0);
1962  return buf;
1963  }
1964 
1965  /** Make a buffer with the same shape and memory nesting order as
1966  * another buffer. It may have a different type. */
1967  template<typename T2, int D2, int S2>
1969  void *(*allocate_fn)(size_t) = nullptr,
1970  void (*deallocate_fn)(void *) = nullptr) {
1971  static_assert(Dims == D2 || Dims == AnyDims);
1972  const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1973  return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim,
1974  allocate_fn, deallocate_fn);
1975  }
1976 
1977 private:
1978  static Buffer<> make_with_shape_of_helper(halide_type_t dst_type,
1979  int dimensions,
1980  halide_dimension_t *shape,
1981  void *(*allocate_fn)(size_t),
1982  void (*deallocate_fn)(void *)) {
1983  // Reorder the dimensions of src to have strides in increasing order
1984  std::vector<int> swaps;
1985  for (int i = dimensions - 1; i > 0; i--) {
1986  for (int j = i; j > 0; j--) {
1987  if (shape[j - 1].stride > shape[j].stride) {
1988  std::swap(shape[j - 1], shape[j]);
1989  swaps.push_back(j);
1990  }
1991  }
1992  }
1993 
1994  // Rewrite the strides to be dense (this messes up src, which
1995  // is why we took it by value).
1996  for (int i = 0; i < dimensions; i++) {
1997  if (i == 0) {
1998  shape[i].stride = 1;
1999  } else {
2000  shape[i].stride = shape[i - 1].extent * shape[i - 1].stride;
2001  }
2002  }
2003 
2004  // Undo the dimension reordering
2005  while (!swaps.empty()) {
2006  int j = swaps.back();
2007  std::swap(shape[j - 1], shape[j]);
2008  swaps.pop_back();
2009  }
2010 
2011  // Use an explicit runtime type, and make dst a Buffer<void>, to allow
2012  // using this method with Buffer<void> for either src or dst.
2013  Buffer<> dst(dst_type, nullptr, dimensions, shape);
2014  dst.allocate(allocate_fn, deallocate_fn);
2015 
2016  return dst;
2017  }
2018 
2019  template<typename... Args>
2021  ptrdiff_t
2022  offset_of(int d, int first, Args... rest) const {
2023 #if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2024  assert(first >= this->buf.dim[d].min);
2025  assert(first < this->buf.dim[d].min + this->buf.dim[d].extent);
2026 #endif
2027  return offset_of(d + 1, rest...) + (ptrdiff_t)this->buf.dim[d].stride * (first - this->buf.dim[d].min);
2028  }
2029 
2031  ptrdiff_t offset_of(int d) const {
2032  return 0;
2033  }
2034 
2035  template<typename... Args>
2037  storage_T *
2038  address_of(Args... args) const {
2039  if (T_is_void) {
2040  return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
2041  } else {
2042  return (storage_T *)(this->buf.host) + offset_of(0, args...);
2043  }
2044  }
2045 
2047  ptrdiff_t offset_of(const int *pos) const {
2048  ptrdiff_t offset = 0;
2049  for (int i = this->dimensions() - 1; i >= 0; i--) {
2050 #if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2051  assert(pos[i] >= this->buf.dim[i].min);
2052  assert(pos[i] < this->buf.dim[i].min + this->buf.dim[i].extent);
2053 #endif
2054  offset += (ptrdiff_t)this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
2055  }
2056  return offset;
2057  }
2058 
2060  storage_T *address_of(const int *pos) const {
2061  if (T_is_void) {
2062  return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
2063  } else {
2064  return (storage_T *)this->buf.host + offset_of(pos);
2065  }
2066  }
2067 
2068 public:
2069  /** Get a pointer to the address of the min coordinate. */
2070  T *data() const {
2071  return (T *)(this->buf.host);
2072  }
2073 
2074  /** Access elements. Use im(...) to get a reference to an element,
2075  * and use &im(...) to get the address of an element. If you pass
2076  * fewer arguments than the buffer has dimensions, the rest are
2077  * treated as their min coordinate. The non-const versions set the
2078  * host_dirty flag to true.
2079  */
2080  //@{
2081  template<typename... Args,
2082  typename = typename std::enable_if<AllInts<Args...>::value>::type>
2083  HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const {
2084  static_assert(!T_is_void,
2085  "Cannot use operator() on Buffer<void> types");
2086  constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2087  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2088  assert(!device_dirty());
2089  return *((const not_void_T *)(address_of(first, rest...)));
2090  }
2091 
2093  const not_void_T &
2094  operator()() const {
2095  static_assert(!T_is_void,
2096  "Cannot use operator() on Buffer<void> types");
2097  constexpr int expected_dims = 0;
2098  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2099  assert(!device_dirty());
2100  return *((const not_void_T *)(data()));
2101  }
2102 
2104  const not_void_T &
2105  operator()(const int *pos) const {
2106  static_assert(!T_is_void,
2107  "Cannot use operator() on Buffer<void> types");
2108  assert(!device_dirty());
2109  return *((const not_void_T *)(address_of(pos)));
2110  }
2111 
2112  template<typename... Args,
2113  typename = typename std::enable_if<AllInts<Args...>::value>::type>
2115  not_void_T &
2116  operator()(int first, Args... rest) {
2117  static_assert(!T_is_void,
2118  "Cannot use operator() on Buffer<void> types");
2119  constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2120  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2121  set_host_dirty();
2122  return *((not_void_T *)(address_of(first, rest...)));
2123  }
2124 
2126  not_void_T &
2128  static_assert(!T_is_void,
2129  "Cannot use operator() on Buffer<void> types");
2130  constexpr int expected_dims = 0;
2131  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2132  set_host_dirty();
2133  return *((not_void_T *)(data()));
2134  }
2135 
2137  not_void_T &
2138  operator()(const int *pos) {
2139  static_assert(!T_is_void,
2140  "Cannot use operator() on Buffer<void> types");
2141  set_host_dirty();
2142  return *((not_void_T *)(address_of(pos)));
2143  }
2144  // @}
2145 
2146  /** Tests that all values in this buffer are equal to val. */
2147  bool all_equal(not_void_T val) const {
2148  bool all_equal = true;
2149  for_each_element([&](const int *pos) { all_equal &= (*this)(pos) == val; });
2150  return all_equal;
2151  }
2152 
2154  set_host_dirty();
2155  for_each_value([=](T &v) { v = val; });
2156  return *this;
2157  }
2158 
2159 private:
2160  /** Helper functions for for_each_value. */
2161  // @{
2162  template<int N>
2163  struct for_each_value_task_dim {
2164  std::ptrdiff_t extent;
2165  std::ptrdiff_t stride[N];
2166  };
2167 
2168  // Given an array of strides, and a bunch of pointers to pointers
2169  // (all of different types), advance the pointers using the
2170  // strides.
2171  template<typename Ptr, typename... Ptrs>
2172  HALIDE_ALWAYS_INLINE static void advance_ptrs(const std::ptrdiff_t *stride, Ptr &ptr, Ptrs &...ptrs) {
2173  ptr += *stride;
2174  advance_ptrs(stride + 1, ptrs...);
2175  }
2176 
2178  static void advance_ptrs(const std::ptrdiff_t *) {
2179  }
2180 
2181  template<typename Fn, typename Ptr, typename... Ptrs>
2182  HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f, int d, bool innermost_strides_are_one,
2183  const for_each_value_task_dim<sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
2184  if (d == 0) {
2185  if (innermost_strides_are_one) {
2186  Ptr end = ptr + t[0].extent;
2187  while (ptr != end) {
2188  f(*ptr++, (*ptrs++)...);
2189  }
2190  } else {
2191  for (std::ptrdiff_t i = t[0].extent; i != 0; i--) {
2192  f(*ptr, (*ptrs)...);
2193  advance_ptrs(t[0].stride, ptr, ptrs...);
2194  }
2195  }
2196  } else {
2197  for (std::ptrdiff_t i = t[d].extent; i != 0; i--) {
2198  for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
2199  advance_ptrs(t[d].stride, ptr, ptrs...);
2200  }
2201  }
2202  }
2203 
2204  // Return pair is <new_dimensions, innermost_strides_are_one>
2205  template<int N>
2206  HALIDE_NEVER_INLINE static std::pair<int, bool> for_each_value_prep(for_each_value_task_dim<N> *t,
2207  const halide_buffer_t **buffers) {
2208  const int dimensions = buffers[0]->dimensions;
2209  assert(dimensions > 0);
2210 
2211  // Check the buffers all have clean host allocations
2212  for (int i = 0; i < N; i++) {
2213  if (buffers[i]->device) {
2214  assert(buffers[i]->host &&
2215  "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
2216  assert(!buffers[i]->device_dirty() &&
2217  "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
2218  } else {
2219  assert(buffers[i]->host &&
2220  "Buffer passed to for_each_value has no host or device allocation");
2221  }
2222  }
2223 
2224  // Extract the strides in all the dimensions
2225  for (int i = 0; i < dimensions; i++) {
2226  for (int j = 0; j < N; j++) {
2227  assert(buffers[j]->dimensions == dimensions);
2228  assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
2229  buffers[j]->dim[i].min == buffers[0]->dim[i].min);
2230  const int s = buffers[j]->dim[i].stride;
2231  t[i].stride[j] = s;
2232  }
2233  t[i].extent = buffers[0]->dim[i].extent;
2234 
2235  // Order the dimensions by stride, so that the traversal is cache-coherent.
2236  // Use the last dimension for this, because this is the source in copies.
2237  // It appears to be better to optimize read order than write order.
2238  for (int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
2239  std::swap(t[j], t[j - 1]);
2240  }
2241  }
2242 
2243  // flatten dimensions where possible to make a larger inner
2244  // loop for autovectorization.
2245  int d = dimensions;
2246  for (int i = 1; i < d; i++) {
2247  bool flat = true;
2248  for (int j = 0; j < N; j++) {
2249  flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2250  }
2251  if (flat) {
2252  t[i - 1].extent *= t[i].extent;
2253  for (int j = i; j < d - 1; j++) {
2254  t[j] = t[j + 1];
2255  }
2256  i--;
2257  d--;
2258  }
2259  }
2260 
2261  // Note that we assert() that dimensions > 0 above
2262  // (our one-and-only caller will only call us that way)
2263  // so the unchecked access to t[0] should be safe.
2264  bool innermost_strides_are_one = true;
2265  for (int i = 0; i < N; i++) {
2266  innermost_strides_are_one &= (t[0].stride[i] == 1);
2267  }
2268 
2269  return {d, innermost_strides_are_one};
2270  }
2271 
2272  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2273  void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
2274  if (dimensions() > 0) {
2275  const size_t alloc_size = dimensions() * sizeof(for_each_value_task_dim<N>);
2276  Buffer<>::for_each_value_task_dim<N> *t =
2277  (Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA(alloc_size);
2278  // Move the preparatory code into a non-templated helper to
2279  // save code size.
2280  const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
2281  auto [new_dims, innermost_strides_are_one] = Buffer<>::for_each_value_prep(t, buffers);
2282  if (new_dims > 0) {
2283  Buffer<>::for_each_value_helper(f, new_dims - 1,
2284  innermost_strides_are_one,
2285  t,
2286  data(), (other_buffers.data())...);
2287  return;
2288  }
2289  // else fall thru
2290  }
2291 
2292  // zero-dimensional case
2293  f(*data(), (*other_buffers.data())...);
2294  }
2295  // @}
2296 
2297 public:
2298  /** Call a function on every value in the buffer, and the
2299  * corresponding values in some number of other buffers of the
2300  * same size. The function should take a reference, const
2301  * reference, or value of the correct type for each buffer. This
2302  * effectively lifts a function of scalars to an element-wise
2303  * function of buffers. This produces code that the compiler can
2304  * autovectorize. This is slightly cheaper than for_each_element,
2305  * because it does not need to track the coordinates.
2306  *
2307  * Note that constness of Buffers is preserved: a const Buffer<T> (for either
2308  * 'this' or the other-buffers arguments) will allow mutation of the
2309  * buffer contents, while a Buffer<const T> will not. Attempting to specify
2310  * a mutable reference for the lambda argument of a Buffer<const T>
2311  * will result in a compilation error. */
2312  // @{
2313  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2314  HALIDE_ALWAYS_INLINE const Buffer<T, Dims, InClassDimStorage> &for_each_value(Fn &&f, Args &&...other_buffers) const {
2315  for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2316  return *this;
2317  }
2318 
2319  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2322  for_each_value(Fn &&f, Args &&...other_buffers) {
2323  for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2324  return *this;
2325  }
2326  // @}
2327 
2328 private:
2329  // Helper functions for for_each_element
2330  struct for_each_element_task_dim {
2331  int min, max;
2332  };
2333 
2334  /** If f is callable with this many args, call it. The first
2335  * argument is just to make the overloads distinct. Actual
2336  * overload selection is done using the enable_if. */
2337  template<typename Fn,
2338  typename... Args,
2339  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2340  HALIDE_ALWAYS_INLINE static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
2341  f(args...);
2342  }
2343 
2344  /** If the above overload is impossible, we add an outer loop over
2345  * an additional argument and try again. */
2346  template<typename Fn,
2347  typename... Args>
2348  HALIDE_ALWAYS_INLINE static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
2349  for (int i = t[d].min; i <= t[d].max; i++) {
2350  for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2351  }
2352  }
2353 
2354  /** Determine the minimum number of arguments a callable can take
2355  * using the same trick. */
2356  template<typename Fn,
2357  typename... Args,
2358  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2359  HALIDE_ALWAYS_INLINE static int num_args(int, Fn &&, Args...) {
2360  return (int)(sizeof...(Args));
2361  }
2362 
2363  /** The recursive version is only enabled up to a recursion limit
2364  * of 256. This catches callables that aren't callable with any
2365  * number of ints. */
2366  template<typename Fn,
2367  typename... Args>
2368  HALIDE_ALWAYS_INLINE static int num_args(double, Fn &&f, Args... args) {
2369  static_assert(sizeof...(args) <= 256,
2370  "Callable passed to for_each_element must accept either a const int *,"
2371  " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2372  return num_args(0, std::forward<Fn>(f), 0, args...);
2373  }
2374 
2375  /** A version where the callable takes a position array instead,
2376  * with compile-time recursion on the dimensionality. This
2377  * overload is preferred to the one below using the same int vs
2378  * double trick as above, but is impossible once d hits -1 using
2379  * std::enable_if. */
2380  template<int d,
2381  typename Fn,
2382  typename = typename std::enable_if<(d >= 0)>::type>
2383  HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2384  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2385  for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2386  }
2387  }
2388 
2389  /** Base case for recursion above. */
2390  template<int d,
2391  typename Fn,
2392  typename = typename std::enable_if<(d < 0)>::type>
2393  HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2394  f(pos);
2395  }
2396 
2397  /** A run-time-recursive version (instead of
2398  * compile-time-recursive) that requires the callable to take a
2399  * pointer to a position array instead. Dispatches to the
2400  * compile-time-recursive version once the dimensionality gets
2401  * small. */
2402  template<typename Fn>
2403  static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2404  if (d == -1) {
2405  f(pos);
2406  } else if (d == 0) {
2407  // Once the dimensionality gets small enough, dispatch to
2408  // a compile-time-recursive version for better codegen of
2409  // the inner loops.
2410  for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2411  } else if (d == 1) {
2412  for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2413  } else if (d == 2) {
2414  for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2415  } else if (d == 3) {
2416  for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2417  } else {
2418  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2419  for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2420  }
2421  }
2422  }
2423 
2424  /** We now have two overloads for for_each_element. This one
2425  * triggers if the callable takes a const int *.
2426  */
2427  template<typename Fn,
2428  typename = decltype(std::declval<Fn>()((const int *)nullptr))>
2429  static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
2430  const int size = dims * sizeof(int);
2431  int *pos = (int *)HALIDE_ALLOCA(size);
2432  // At least one version of GCC will (incorrectly) report that pos "may be used uninitialized".
2433  // Add this memset to silence it.
2434  memset(pos, 0, size);
2435  for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2436  }
2437 
2438  /** This one triggers otherwise. It treats the callable as
2439  * something that takes some number of ints. */
2440  template<typename Fn>
2441  HALIDE_ALWAYS_INLINE static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
2442  int args = num_args(0, std::forward<Fn>(f));
2443  assert(dims >= args);
2444  for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2445  }
2446 
2447  template<typename Fn>
2448  void for_each_element_impl(Fn &&f) const {
2449  for_each_element_task_dim *t =
2450  (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
2451  for (int i = 0; i < dimensions(); i++) {
2452  t[i].min = dim(i).min();
2453  t[i].max = dim(i).max();
2454  }
2455  for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2456  }
2457 
2458 public:
2459  /** Call a function at each site in a buffer. This is likely to be
2460  * much slower than using Halide code to populate a buffer, but is
2461  * convenient for tests. If the function has more arguments than the
2462  * buffer has dimensions, the remaining arguments will be zero. If it
2463  * has fewer arguments than the buffer has dimensions then the last
2464  * few dimensions of the buffer are not iterated over. For example,
2465  * the following code exploits this to set a floating point RGB image
2466  * to red:
2467 
2468  \code
2469  Buffer<float, 3> im(100, 100, 3);
2470  im.for_each_element([&](int x, int y) {
2471  im(x, y, 0) = 1.0f;
2472  im(x, y, 1) = 0.0f;
2473  im(x, y, 2) = 0.0f:
2474  });
2475  \endcode
2476 
2477  * The compiled code is equivalent to writing the a nested for loop,
2478  * and compilers are capable of optimizing it in the same way.
2479  *
2480  * If the callable can be called with an int * as the sole argument,
2481  * that version is called instead. Each location in the buffer is
2482  * passed to it in a coordinate array. This version is higher-overhead
2483  * than the variadic version, but is useful for writing generic code
2484  * that accepts buffers of arbitrary dimensionality. For example, the
2485  * following sets the value at all sites in an arbitrary-dimensional
2486  * buffer to their first coordinate:
2487 
2488  \code
2489  im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
2490  \endcode
2491 
2492  * It is also possible to use for_each_element to iterate over entire
2493  * rows or columns by cropping the buffer to a single column or row
2494  * respectively and iterating over elements of the result. For example,
2495  * to set the diagonal of the image to 1 by iterating over the columns:
2496 
2497  \code
2498  Buffer<float, 3> im(100, 100, 3);
2499  im.sliced(1, 0).for_each_element([&](int x, int c) {
2500  im(x, x, c) = 1.0f;
2501  });
2502  \endcode
2503 
2504  * Or, assuming the memory layout is known to be dense per row, one can
2505  * memset each row of an image like so:
2506 
2507  \code
2508  Buffer<float, 3> im(100, 100, 3);
2509  im.sliced(0, 0).for_each_element([&](int y, int c) {
2510  memset(&im(0, y, c), 0, sizeof(float) * im.width());
2511  });
2512  \endcode
2513 
2514  */
2515  // @{
2516  template<typename Fn>
2518  for_each_element_impl(f);
2519  return *this;
2520  }
2521 
2522  template<typename Fn>
2526  for_each_element_impl(f);
2527  return *this;
2528  }
2529  // @}
2530 
2531 private:
2532  template<typename Fn>
2533  struct FillHelper {
2534  Fn f;
2536 
2537  template<typename... Args,
2538  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2539  void operator()(Args... args) {
2540  (*buf)(args...) = f(args...);
2541  }
2542 
2543  FillHelper(Fn &&f, Buffer<T, Dims, InClassDimStorage> *buf)
2544  : f(std::forward<Fn>(f)), buf(buf) {
2545  }
2546  };
2547 
2548 public:
2549  /** Fill a buffer by evaluating a callable at every site. The
2550  * callable should look much like a callable passed to
2551  * for_each_element, but it should return the value that should be
2552  * stored to the coordinate corresponding to the arguments. */
2553  template<typename Fn,
2554  typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2556  // We'll go via for_each_element. We need a variadic wrapper lambda.
2557  FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
2558  return for_each_element(wrapper);
2559  }
2560 
2561  /** Check if an input buffer passed extern stage is a querying
2562  * bounds. Compared to doing the host pointer check directly,
2563  * this both adds clarity to code and will facilitate moving to
2564  * another representation for bounds query arguments. */
2565  bool is_bounds_query() const {
2566  return buf.is_bounds_query();
2567  }
2568 
2569  /** Convenient check to verify that all of the interesting bytes in the Buffer
2570  * are initialized under MSAN. Note that by default, we use for_each_value() here so that
2571  * we skip any unused padding that isn't part of the Buffer; this isn't efficient,
2572  * but in MSAN mode, it doesn't matter. (Pass true for the flag to force check
2573  * the entire Buffer storage.) */
2574  void msan_check_mem_is_initialized(bool entire = false) const {
2575 #if defined(__has_feature)
2576 #if __has_feature(memory_sanitizer)
2577  if (entire) {
2578  __msan_check_mem_is_initialized(data(), size_in_bytes());
2579  } else {
2580  for_each_value([](T &v) { __msan_check_mem_is_initialized(&v, sizeof(T)); ; });
2581  }
2582 #endif
2583 #endif
2584  }
2585 };
2586 
2587 } // namespace Runtime
2588 } // namespace Halide
2589 
2590 #undef HALIDE_ALLOCA
2591 
2592 #endif // HALIDE_RUNTIME_IMAGE_H
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
Definition: HalideBuffer.h:587
int left() const
Conventional names for the min and max value of each dimension.
Buffer< not_const_T, Dims, InClassDimStorage > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
Expr max(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:606
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
ALWAYS_INLINE T align_up(T p, size_t alignment)
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
bool host_dirty() const
Methods for managing any GPU allocation.
Buffer< T, Dims, InClassDimStorage > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension. ...
T * end() const
A pointer to one beyond the element with the highest address.
Definition: HalideBuffer.h:663
static Buffer< T, Dims, InClassDimStorage > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape...
Buffer< T,(Dims==AnyDims ? AnyDims :Dims+1)> embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space...
typename std::conditional< std::is_const< T >::value, const T2, T2 >::type add_const_if_T_is_const
Definition: Buffer.h:83
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
Access elements.
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
static constexpr halide_type_t static_halide_type()
Get the Halide type of T.
Definition: HalideBuffer.h:259
Buffer< T, Dims, InClassDimStorage > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
int device_free(void *ctx=nullptr)
Methods for managing any GPU allocation.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:603
static constexpr int static_dimensions()
Callers should not use the result if has_static_dimensions is false.
Definition: HalideBuffer.h:272
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
Definition: HalideBuffer.h:943
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
Definition: HalideBuffer.h:953
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
Buffer< T, Dims, InClassDimStorage > & fill(not_void_T val)
static bool can_convert_from(const Buffer< T2, D2, S2 > &other)
Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type...
Definition: HalideBuffer.h:718
int height() const
Conventional names for the first three dimensions.
bool contains(Args... args) const
Test if a given coordinate is within the bounds of an image.
T * begin() const
A pointer to the element with the lowest address.
Definition: HalideBuffer.h:657
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
Buffer< T, Dims, InClassDimStorage > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
halide_device_detach_native will be called when device ref count goes to zero
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Definition: HalideBuffer.h:565
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f)
Call a function at each site in a buffer.
void * memcpy(void *s1, const void *s2, size_t n)
Buffer< T, Dims, InClassDimStorage > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
void set_device_dirty(bool v=true)
Methods for managing any GPU allocation.
Buffer(int first)
Allocate a new image of the given size.
Definition: HalideBuffer.h:974
STL namespace.
No free routine will be called when device ref count goes to zero
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension&#39;s minimum...
Buffer< T, Dims, InClassDimStorage > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension...
int top() const
Conventional names for the min and max value of each dimension.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one...
Definition: HalideBuffer.h:571
halide_dimension_t * dim
The shape of the buffer.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline, and contains methods to using Halide&#39;s bounds tools to query properties of it.
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
Definition: HalideBuffer.h:602
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
int channels() const
Conventional names for the first three dimensions.
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > as() &&
Return an rval reference to this Buffer.
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() const &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
AllocationHeader(void(*deallocate_fn)(void *))
Definition: HalideBuffer.h:165
unsigned __INT8_TYPE__ uint8_t
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Methods for managing any GPU allocation.
int copy_to_host(void *ctx=nullptr)
Methods for managing any GPU allocation.
#define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
Definition: HalideBuffer.h:52
int extent(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:627
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape...
Buffer(Buffer< T2, D2, S2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:784
__PTRDIFF_TYPE__ ptrdiff_t
int bottom() const
Conventional names for the min and max value of each dimension.
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
Definition: HalideBuffer.h:607
HALIDE_ALWAYS_INLINE bool device_dirty() const
Methods for managing any GPU allocation.
void crop(const std::vector< std::pair< int, int >> &rect)
Crop an image in-place along the first N dimensions.
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
Methods for managing any GPU allocation.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape...
Dimension(const halide_dimension_t &dim)
Definition: HalideBuffer.h:611
const halide_buffer_t * raw_buffer() const
Get a pointer to the raw halide_buffer_t this wraps.
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
bool any_zero(const Container &c)
Definition: HalideBuffer.h:148
size_t number_of_elements() const
The total number of elements this buffer represents.
Definition: HalideBuffer.h:637
int width() const
Conventional names for the first three dimensions.
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
Definition: HalideBuffer.h:690
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
Buffer(const Buffer< T2, D2, S2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:760
#define HALIDE_NEVER_INLINE
Definition: HalideRuntime.h:50
static Buffer< T, Dims, InClassDimStorage > make_with_shape_of(Buffer< T2, D2, S2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
This file declares the routines used by Halide internally in its runtime.
void set_min(Args... args)
Set the min coordinate of an image in the first N dimensions.
static Buffer< T, Dims, InClassDimStorage > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
Definition: HalideBuffer.h:171
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Definition: HalideBuffer.h:121
Call device_and_host_free when DevRefCount goes to zero.
int right() const
Conventional names for the min and max value of each dimension.
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
int32_t dimensions
The dimensionality of the buffer.
unsigned __INT32_TYPE__ uint32_t
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
static Buffer< T, Dims, InClassDimStorage > make_scalar()
Make a zero-dimensional Buffer.
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
Definition: HalideBuffer.h:581
Not visible externally, similar to &#39;static&#39; linkage in C.
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
constexpr int AnyDims
Definition: HalideBuffer.h:187
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
halide_type_t type() const
Get the type of the elements.
Definition: HalideBuffer.h:651
T * data() const
Get a pointer to the address of the min coordinate.
bool operator!=(const iterator &other) const
Definition: HalideBuffer.h:592
halide_device_free will be called when device ref count goes to zero
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T2, D2, S2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:834
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:49
static void assert_can_convert_from(const Buffer< T2, D2, S2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage> cannot be const...
Definition: HalideBuffer.h:736
Buffer(const Buffer< T, Dims, InClassDimStorage > &other)
Copy constructor.
Definition: HalideBuffer.h:745
int device_detach_native(void *ctx=nullptr)
Methods for managing any GPU allocation.
static Buffer< T, Dims, InClassDimStorage > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default...
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
Access elements.
A runtime tag for a type in the halide type system.
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
Buffer(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Move constructor.
Definition: HalideBuffer.h:770
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape...
int dimensions() const
Get the dimensionality of the buffer.
Definition: HalideBuffer.h:642
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
HALIDE_ALWAYS_INLINE not_void_T & operator()()
Access elements.
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
void slice(int d)
Slice a buffer in-place at the dimension&#39;s minimum.
unsigned __INT16_TYPE__ uint16_t
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Methods for managing any GPU allocation.
uint8_t * host
A pointer to the start of the data in main memory.
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
Access elements.
void * malloc(size_t)
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Methods for managing any GPU allocation.
Buffer< T, Dims, InClassDimStorage > cropped(const std::vector< std::pair< int, int >> &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Methods for managing any GPU allocation.
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > as_const() &&
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
uint64_t device
A device-handle for e.g.
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers)
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
Definition: HalideBuffer.h:576
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
void copy_from(Buffer< T2, D2, S2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
void check_overflow()
Check the product of the extents fits in memory.
Definition: HalideBuffer.h:863
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
#define HALIDE_ALLOCA
Definition: HalideBuffer.h:36
void * memset(void *s, int val, size_t n)
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
Access elements.
A similar struct for managing device allocations.
Definition: HalideBuffer.h:180
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
Definition: HalideBuffer.h:936
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Definition: HalideBuffer.h:617
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Standard move-assignment operator.
Definition: HalideBuffer.h:849
Buffer< T, Dims, InClassDimStorage > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
bool has_device_allocation() const
Methods for managing any GPU allocation.
int stride(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:630
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
int device_sync(void *ctx=nullptr)
Methods for managing any GPU allocation.
__UINTPTR_TYPE__ uintptr_t
The raw representation of an image passed around by generated Halide code.
A struct acting as a header for allocations owned by the Buffer class itself.
Definition: HalideBuffer.h:160
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
Definition: HalideBuffer.h:669
HALIDE_ALWAYS_INLINE auto slice(Vec vec, Base base, Stride stride, Lanes lanes) noexcept -> SliceOp< decltype(pattern_arg(vec)), decltype(pattern_arg(base)), decltype(pattern_arg(stride)), decltype(pattern_arg(lanes))>
Definition: IRMatch.h:2142
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T, Dims, InClassDimStorage > &other)
Standard assignment operator.
Definition: HalideBuffer.h:815
unsigned __INT64_TYPE__ uint64_t
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T2, D2, S2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:799
int min(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:624
Buffer(int first, int second, Args... rest)
Allocate a new image of the given size.
Definition: HalideBuffer.h:990
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > & as() &
Return a typed reference to this Buffer.
static Buffer< void, Dims, InClassDimStorage > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default...
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Read-only access to the shape.
Definition: HalideBuffer.h:560
There was no error.
HALIDE_ALWAYS_INLINE const Buffer< T2, D2, InClassDimStorage > & as() const &
Return a const typed reference to this Buffer.
BufferDeviceOwnership ownership
Definition: HalideBuffer.h:184
Buffer< T, Dims, InClassDimStorage > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
Definition: HalideBuffer.h:676
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Definition: HalideBuffer.h:878
void free(void *)
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
Definition: HalideBuffer.h:264
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.