Halide  19.0.0
Halide compiler and libraries
HalideBuffer.h
Go to the documentation of this file.
1 /** \file
2  * Defines a Buffer type that wraps from halide_buffer_t and adds
3  * functionality, and methods for more conveniently iterating over the
4  * samples in a halide_buffer_t outside of Halide code. */
5 
6 #ifndef HALIDE_RUNTIME_BUFFER_H
7 #define HALIDE_RUNTIME_BUFFER_H
8 
9 #include <algorithm>
10 #include <atomic>
11 #include <cassert>
12 #include <cstdint>
13 #include <cstdlib>
14 #include <cstring>
15 #include <limits>
16 #include <memory>
17 #include <vector>
18 
19 #ifdef __APPLE__
20 #include <AvailabilityVersions.h>
21 #include <TargetConditionals.h>
22 #endif
23 
24 #if defined(__has_feature)
25 #if __has_feature(memory_sanitizer)
26 #include <sanitizer/msan_interface.h>
27 #endif
28 #endif
29 
30 #include "HalideRuntime.h"
31 
32 #ifdef _MSC_VER
33 #include <malloc.h>
34 #define HALIDE_ALLOCA _alloca
35 #else
36 #define HALIDE_ALLOCA __builtin_alloca
37 #endif
38 
39 // gcc 5.1 has a false positive warning on this code
40 #if __GNUC__ == 5 && __GNUC_MINOR__ == 1
41 #pragma GCC diagnostic ignored "-Warray-bounds"
42 #endif
43 
44 #ifndef HALIDE_RUNTIME_BUFFER_CHECK_INDICES
45 #define HALIDE_RUNTIME_BUFFER_CHECK_INDICES 0
46 #endif
47 
48 #ifndef HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
49 // Conservatively align buffer allocations to 128 bytes by default.
50 // This is enough alignment for all the platforms currently in use.
51 // Redefine this in your compiler settings if you desire more/less alignment.
52 #define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT 128
53 #endif
54 
56  "HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT must be a power of 2.");
57 
58 // Unfortunately, not all C++17 runtimes support aligned_alloc
59 // (it may depends on OS/SDK version); this is provided as an opt-out
60 // if you are compiling on a platform that doesn't provide a (good)
61 // implementation. (Note that we actually use the C11 `::aligned_alloc()`
62 // rather than the C++17 `std::aligned_alloc()` because at least one platform
63 // we found supports the former but not the latter.)
64 #ifndef HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
65 
66 // clang-format off
67 #ifdef _MSC_VER
68 
69  // MSVC doesn't implement aligned_alloc(), even in C++17 mode, and
70  // has stated they probably never will, so, always default it off here.
71  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
72 
73 #elif defined(__ANDROID_API__) && __ANDROID_API__ < 28
74 
75  // Android doesn't provide aligned_alloc until API 28
76  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
77 
78 #elif defined(__APPLE__)
79 
80  #if TARGET_OS_OSX && (__MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_15)
81 
82  // macOS doesn't provide aligned_alloc until 10.15
83  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
84 
85  #elif TARGET_OS_IPHONE && (__IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_14_0)
86 
87  // iOS doesn't provide aligned_alloc until 14.0
88  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
89 
90  #else
91 
92  // Assume it's ok on all other Apple targets
93  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
94 
95  #endif
96 
97 #else
98 
99  #if defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)
100 
101  // ARM GNU-A baremetal compiler doesn't provide aligned_alloc as of 12.2
102  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
103 
104  #else
105 
106  // Not Windows, Android, or Apple: just assume it's ok
107  #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
108 
109  #endif
110 
111 #endif
112 // clang-format on
113 
114 #endif // HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
115 
116 namespace Halide {
117 namespace Runtime {
118 
119 // Forward-declare our Buffer class
120 template<typename T, int Dims, int InClassDimStorage>
121 class Buffer;
122 
123 // A helper to check if a parameter pack is entirely implicitly
124 // int-convertible to use with std::enable_if
125 template<typename... Args>
126 struct AllInts : std::false_type {};
127 
128 template<>
129 struct AllInts<> : std::true_type {};
130 
131 template<typename T, typename... Args>
132 struct AllInts<T, Args...> {
133  static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
134 };
135 
136 // Floats and doubles are technically implicitly int-convertible, but
137 // doing so produces a warning we treat as an error, so just disallow
138 // it here.
139 template<typename... Args>
140 struct AllInts<float, Args...> : std::false_type {};
141 
142 template<typename... Args>
143 struct AllInts<double, Args...> : std::false_type {};
144 
145 namespace Internal {
146 // A helper to detect if there are any zeros in a container
147 template<typename Container>
148 bool any_zero(const Container &c) {
149  for (int i : c) {
150  if (i == 0) {
151  return true;
152  }
153  }
154  return false;
155 }
156 
158  static inline void *(*default_allocate_fn)(size_t) = nullptr;
159  static inline void (*default_deallocate_fn)(void *) = nullptr;
160 };
161 } // namespace Internal
162 
163 /** A struct acting as a header for allocations owned by the Buffer
164  * class itself. */
166  void (*deallocate_fn)(void *);
167  std::atomic<int> ref_count;
168 
169  // Note that ref_count always starts at 1
170  explicit AllocationHeader(void (*deallocate_fn)(void *))
172  }
173 };
174 
175 /** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
176 enum struct BufferDeviceOwnership : int {
177  Allocated, ///> halide_device_free will be called when device ref count goes to zero
178  WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
179  Unmanaged, ///> No free routine will be called when device ref count goes to zero
180  AllocatedDeviceAndHost, ///> Call device_and_host_free when DevRefCount goes to zero.
181  Cropped, ///> Call halide_device_release_crop when DevRefCount goes to zero.
182 };
183 
184 /** A similar struct for managing device allocations. */
186  // This is only ever constructed when there's something to manage,
187  // so start at one.
188  std::atomic<int> count{1};
190 };
191 
192 constexpr int AnyDims = -1;
193 
194 /** A templated Buffer class that wraps halide_buffer_t and adds
195  * functionality. When using Halide from C++, this is the preferred
196  * way to create input and output buffers. The overhead of using this
197  * class relative to a naked halide_buffer_t is minimal - it uses another
198  * ~16 bytes on the stack, and does no dynamic allocations when using
199  * it to represent existing memory of a known maximum dimensionality.
200  *
201  * The template parameter T is the element type. For buffers where the
202  * element type is unknown, or may vary, use void or const void.
203  *
204  * The template parameter Dims is the number of dimensions. For buffers where
205  * the dimensionality type is unknown at, or may vary, use AnyDims.
206  *
207  * InClassDimStorage is the maximum number of dimensions that can be represented
208  * using space inside the class itself. Set it to the maximum dimensionality
209  * you expect this buffer to be. If the actual dimensionality exceeds
210  * this, heap storage is allocated to track the shape of the buffer.
211  * InClassDimStorage defaults to 4, which should cover nearly all usage.
212  *
213  * The class optionally allocates and owns memory for the image using
214  * a shared pointer allocated with the provided allocator. If they are
215  * null, malloc and free are used. Any device-side allocation is
216  * considered as owned if and only if the host-side allocation is
217  * owned. */
218 template<typename T = void,
219  int Dims = AnyDims,
220  int InClassDimStorage = (Dims == AnyDims ? 4 : std::max(Dims, 1))>
221 class Buffer {
222  /** The underlying halide_buffer_t */
223  halide_buffer_t buf = {};
224 
225  /** Some in-class storage for shape of the dimensions. */
226  halide_dimension_t shape[InClassDimStorage];
227 
228  /** The allocation owned by this Buffer. NULL if the Buffer does not
229  * own the memory. */
230  AllocationHeader *alloc = nullptr;
231 
232  /** A reference count for the device allocation owned by this
233  * buffer. */
234  mutable DeviceRefCount *dev_ref_count = nullptr;
235 
236  /** True if T is of type void or const void */
237  static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
238 
239  /** A type function that adds a const qualifier if T is a const type. */
240  template<typename T2>
241  using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
242 
243  /** T unless T is (const) void, in which case (const)
244  * uint8_t. Useful for providing return types for operator() */
245  using not_void_T = typename std::conditional<T_is_void,
246  add_const_if_T_is_const<uint8_t>,
247  T>::type;
248 
249  /** T with constness removed. Useful for return type of copy(). */
250  using not_const_T = typename std::remove_const<T>::type;
251 
252  /** The type the elements are stored as. Equal to not_void_T
253  * unless T is a pointer, in which case uint64_t. Halide stores
254  * all pointer types as uint64s internally, even on 32-bit
255  * systems. */
256  using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
257 
258 public:
259  /** True if the Halide type is not void (or const void). */
260  static constexpr bool has_static_halide_type = !T_is_void;
261 
262  /** Get the Halide type of T. Callers should not use the result if
263  * has_static_halide_type is false. */
264  static constexpr halide_type_t static_halide_type() {
265  return halide_type_of<typename std::remove_cv<not_void_T>::type>();
266  }
267 
268  /** Does this Buffer own the host memory it refers to? */
269  bool owns_host_memory() const {
270  return alloc != nullptr;
271  }
272 
273  static constexpr bool has_static_dimensions = (Dims != AnyDims);
274 
275  /** Callers should not use the result if
276  * has_static_dimensions is false. */
277  static constexpr int static_dimensions() {
278  return Dims;
279  }
280 
281  static_assert(!has_static_dimensions || static_dimensions() >= 0);
282 
283 private:
284  /** Increment the reference count of any owned allocation */
285  void incref() const {
286  if (owns_host_memory()) {
287  alloc->ref_count++;
288  }
289  if (buf.device) {
290  if (!dev_ref_count) {
291  // I seem to have a non-zero dev field but no
292  // reference count for it. I must have been given a
293  // device allocation by a Halide pipeline, and have
294  // never been copied from since. Take sole ownership
295  // of it.
296  dev_ref_count = new DeviceRefCount;
297  }
298  dev_ref_count->count++;
299  }
300  }
301 
302  // Note that this is called "cropped" but can also encompass a slice/embed
303  // operation as well.
304  struct DevRefCountCropped : DeviceRefCount {
305  // We will only store Buffers that have a dynamic number of dimensions.
306  // Buffers that cropped or sliced from need to be first converted to
307  // one with variable size. This is required because we cannot possibly
308  // know what the actual dimensionality is of the buffer this is a
309  // crop or slice from. Since cropping a sliced buffer is also possible,
310  // no optimizations can be made for cropped buffers either.
311  Buffer<T, AnyDims> cropped_from;
312  explicit DevRefCountCropped(const Buffer<T, AnyDims> &cropped_from)
313  : cropped_from(cropped_from) {
314  ownership = BufferDeviceOwnership::Cropped;
315  }
316  };
317 
318  /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */
319  void crop_from(const Buffer<T, AnyDims> &cropped_from) {
320  assert(dev_ref_count == nullptr);
321  dev_ref_count = new DevRefCountCropped(cropped_from);
322  }
323 
324  /** Decrement the reference count of any owned allocation and free host
325  * and device memory if it hits zero. Sets alloc to nullptr. */
326  void decref(bool device_only = false) {
327  if (owns_host_memory() && !device_only) {
328  int new_count = --(alloc->ref_count);
329  if (new_count == 0) {
330  void (*fn)(void *) = alloc->deallocate_fn;
331  alloc->~AllocationHeader();
332  fn(alloc);
333  }
334  buf.host = nullptr;
335  alloc = nullptr;
336  set_host_dirty(false);
337  }
338  int new_count = 0;
339  if (dev_ref_count) {
340  new_count = --(dev_ref_count->count);
341  }
342  if (new_count == 0) {
343  if (buf.device) {
344  assert(!(alloc && device_dirty()) &&
345  "Implicitly freeing a dirty device allocation while a host allocation still lives. "
346  "Call device_free explicitly if you want to drop dirty device-side data. "
347  "Call copy_to_host explicitly if you want the data copied to the host allocation "
348  "before the device allocation is freed.");
349  int result = halide_error_code_success;
350  if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
351  result = buf.device_interface->detach_native(nullptr, &buf);
352  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
353  result = buf.device_interface->device_and_host_free(nullptr, &buf);
354  } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
355  result = buf.device_interface->device_release_crop(nullptr, &buf);
356  } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
357  result = buf.device_interface->device_free(nullptr, &buf);
358  }
359  // No reasonable way to return the error, but we can at least assert-fail in debug builds.
360  assert((result == halide_error_code_success) && "device_interface call returned a nonzero result in Buffer::decref()");
361  (void)result;
362  }
363  if (dev_ref_count) {
364  if (dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
365  delete (DevRefCountCropped *)dev_ref_count;
366  } else {
367  delete dev_ref_count;
368  }
369  }
370  }
371  dev_ref_count = nullptr;
372  buf.device = 0;
373  buf.device_interface = nullptr;
374  }
375 
376  void free_shape_storage() {
377  if (buf.dim != shape) {
378  delete[] buf.dim;
379  buf.dim = nullptr;
380  }
381  }
382 
383  template<int DimsSpecified>
384  void make_static_shape_storage() {
385  static_assert(Dims == AnyDims || Dims == DimsSpecified,
386  "Number of arguments to Buffer() does not match static dimensionality");
387  buf.dimensions = DimsSpecified;
388  if constexpr (Dims == AnyDims) {
389  if constexpr (DimsSpecified <= InClassDimStorage) {
390  buf.dim = shape;
391  } else {
392  static_assert(DimsSpecified >= 1);
393  buf.dim = new halide_dimension_t[DimsSpecified];
394  }
395  } else {
396  static_assert(InClassDimStorage >= Dims);
397  buf.dim = shape;
398  }
399  }
400 
401  void make_shape_storage(const int dimensions) {
402  if (Dims != AnyDims && Dims != dimensions) {
403  assert(false && "Number of arguments to Buffer() does not match static dimensionality");
404  }
405  // This should usually be inlined, so if dimensions is statically known,
406  // we can skip the call to new
407  buf.dimensions = dimensions;
408  buf.dim = (dimensions <= InClassDimStorage) ? shape : new halide_dimension_t[dimensions];
409  }
410 
411  void copy_shape_from(const halide_buffer_t &other) {
412  // All callers of this ensure that buf.dimensions == other.dimensions.
413  make_shape_storage(other.dimensions);
414  std::copy(other.dim, other.dim + other.dimensions, buf.dim);
415  }
416 
417  template<typename T2, int D2, int S2>
418  void move_shape_from(Buffer<T2, D2, S2> &&other) {
419  if (other.shape == other.buf.dim) {
420  copy_shape_from(other.buf);
421  } else {
422  buf.dim = other.buf.dim;
423  other.buf.dim = nullptr;
424  }
425  }
426 
427  /** Initialize the shape from a halide_buffer_t. */
428  void initialize_from_buffer(const halide_buffer_t &b,
429  BufferDeviceOwnership ownership) {
430  memcpy(&buf, &b, sizeof(halide_buffer_t));
431  copy_shape_from(b);
432  if (b.device) {
433  dev_ref_count = new DeviceRefCount;
434  dev_ref_count->ownership = ownership;
435  }
436  }
437 
438  /** Initialize the shape from an array of ints */
439  void initialize_shape(const int *sizes) {
440  for (int i = 0; i < buf.dimensions; i++) {
441  buf.dim[i].min = 0;
442  buf.dim[i].extent = sizes[i];
443  if (i == 0) {
444  buf.dim[i].stride = 1;
445  } else {
446  buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
447  }
448  }
449  }
450 
451  /** Initialize the shape from a vector of extents */
452  void initialize_shape(const std::vector<int> &sizes) {
453  assert(buf.dimensions == (int)sizes.size());
454  initialize_shape(sizes.data());
455  }
456 
457  /** Initialize the shape from the static shape of an array */
458  template<typename Array, size_t N>
459  void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
460  buf.dim[next].min = 0;
461  buf.dim[next].extent = (int)N;
462  if (next == 0) {
463  buf.dim[next].stride = 1;
464  } else {
465  initialize_shape_from_array_shape(next - 1, vals[0]);
466  buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
467  }
468  }
469 
470  /** Base case for the template recursion above. */
471  template<typename T2>
472  void initialize_shape_from_array_shape(int, const T2 &) {
473  }
474 
475  /** Get the dimensionality of a multi-dimensional C array */
476  template<typename Array, size_t N>
477  static int dimensionality_of_array(Array (&vals)[N]) {
478  return dimensionality_of_array(vals[0]) + 1;
479  }
480 
481  template<typename T2>
482  static int dimensionality_of_array(const T2 &) {
483  return 0;
484  }
485 
486  /** Get the underlying halide_type_t of an array's element type. */
487  template<typename Array, size_t N>
488  static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
489  return scalar_type_of_array(vals[0]);
490  }
491 
492  template<typename T2>
493  static halide_type_t scalar_type_of_array(const T2 &) {
494  return halide_type_of<typename std::remove_cv<T2>::type>();
495  }
496 
497  /** Crop a single dimension without handling device allocation. */
498  void crop_host(int d, int min, int extent) {
499  assert(dim(d).min() <= min);
500  assert(dim(d).max() >= min + extent - 1);
501  ptrdiff_t shift = min - dim(d).min();
502  if (buf.host != nullptr) {
503  buf.host += (shift * dim(d).stride()) * type().bytes();
504  }
505  buf.dim[d].min = min;
506  buf.dim[d].extent = extent;
507  }
508 
509  /** Crop as many dimensions as are in rect, without handling device allocation. */
510  void crop_host(const std::vector<std::pair<int, int>> &rect) {
511  assert(rect.size() <= static_cast<decltype(rect.size())>(std::numeric_limits<int>::max()));
512  int limit = (int)rect.size();
513  assert(limit <= dimensions());
514  for (int i = 0; i < limit; i++) {
515  crop_host(i, rect[i].first, rect[i].second);
516  }
517  }
518 
519  void complete_device_crop(Buffer<T, Dims, InClassDimStorage> &result_host_cropped) const {
520  assert(buf.device_interface != nullptr);
521  if (buf.device_interface->device_crop(nullptr, &this->buf, &result_host_cropped.buf) == halide_error_code_success) {
522  // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
523  // is it possible to get to this point without incref having run at least once since
524  // the device field was set? (I.e. in the internal logic of crop. incref might have been
525  // called.)
526  if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
527  result_host_cropped.crop_from(((DevRefCountCropped *)dev_ref_count)->cropped_from);
528  } else {
529  result_host_cropped.crop_from(*this);
530  }
531  }
532  }
533 
534  /** slice a single dimension without handling device allocation. */
535  void slice_host(int d, int pos) {
536  static_assert(Dims == AnyDims);
537  assert(dimensions() > 0);
538  assert(d >= 0 && d < dimensions());
539  assert(pos >= dim(d).min() && pos <= dim(d).max());
540  buf.dimensions--;
541  ptrdiff_t shift = pos - buf.dim[d].min;
542  if (buf.host != nullptr) {
543  buf.host += (shift * buf.dim[d].stride) * type().bytes();
544  }
545  for (int i = d; i < buf.dimensions; i++) {
546  buf.dim[i] = buf.dim[i + 1];
547  }
548  buf.dim[buf.dimensions] = {0, 0, 0};
549  }
550 
551  void complete_device_slice(Buffer<T, AnyDims, InClassDimStorage> &result_host_sliced, int d, int pos) const {
552  assert(buf.device_interface != nullptr);
553  if (buf.device_interface->device_slice(nullptr, &this->buf, d, pos, &result_host_sliced.buf) == halide_error_code_success) {
554  // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
555  // is it possible to get to this point without incref having run at least once since
556  // the device field was set? (I.e. in the internal logic of slice. incref might have been
557  // called.)
558  if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
559  // crop_from() is correct here, despite the fact that we are slicing.
560  result_host_sliced.crop_from(((DevRefCountCropped *)dev_ref_count)->cropped_from);
561  } else {
562  // crop_from() is correct here, despite the fact that we are slicing.
563  result_host_sliced.crop_from(*this);
564  }
565  }
566  }
567 
568 public:
569  typedef T ElemType;
570 
571  /** Read-only access to the shape */
572  class Dimension {
573  const halide_dimension_t &d;
574 
575  public:
576  /** The lowest coordinate in this dimension */
577  HALIDE_ALWAYS_INLINE int min() const {
578  return d.min;
579  }
580 
581  /** The number of elements in memory you have to step over to
582  * increment this coordinate by one. */
584  return d.stride;
585  }
586 
587  /** The extent of the image along this dimension */
589  return d.extent;
590  }
591 
592  /** The highest coordinate in this dimension */
593  HALIDE_ALWAYS_INLINE int max() const {
594  return min() + extent() - 1;
595  }
596 
597  /** An iterator class, so that you can iterate over
598  * coordinates in a dimensions using a range-based for loop. */
599  struct iterator {
600  int val;
601  int operator*() const {
602  return val;
603  }
604  bool operator!=(const iterator &other) const {
605  return val != other.val;
606  }
608  val++;
609  return *this;
610  }
611  };
612 
613  /** An iterator that points to the min coordinate */
615  return {min()};
616  }
617 
618  /** An iterator that points to one past the max coordinate */
620  return {min() + extent()};
621  }
622 
623  explicit Dimension(const halide_dimension_t &dim)
624  : d(dim) {
625  }
626  };
627 
628  /** Access the shape of the buffer */
630  assert(i >= 0 && i < this->dimensions());
631  return Dimension(buf.dim[i]);
632  }
633 
634  /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
635  // @{
636  int min(int i) const {
637  return dim(i).min();
638  }
639  int extent(int i) const {
640  return dim(i).extent();
641  }
642  int stride(int i) const {
643  return dim(i).stride();
644  }
645  // @}
646 
647  /** The total number of elements this buffer represents. Equal to
648  * the product of the extents */
649  size_t number_of_elements() const {
650  return buf.number_of_elements();
651  }
652 
653  /** Get the dimensionality of the buffer. */
654  int dimensions() const {
655  if constexpr (has_static_dimensions) {
656  return Dims;
657  } else {
658  return buf.dimensions;
659  }
660  }
661 
662  /** Get the type of the elements. */
663  halide_type_t type() const {
664  return buf.type;
665  }
666 
667  /** A pointer to the element with the lowest address. If all
668  * strides are positive, equal to the host pointer. */
669  T *begin() const {
670  assert(buf.host != nullptr); // Cannot call begin() on an unallocated Buffer.
671  return (T *)buf.begin();
672  }
673 
674  /** A pointer to one beyond the element with the highest address. */
675  T *end() const {
676  assert(buf.host != nullptr); // Cannot call end() on an unallocated Buffer.
677  return (T *)buf.end();
678  }
679 
680  /** The total number of bytes spanned by the data in memory. */
681  size_t size_in_bytes() const {
682  return buf.size_in_bytes();
683  }
684 
685  /** Reset the Buffer to be equivalent to a default-constructed Buffer
686  * of the same static type (if any); Buffer<void> will have its runtime
687  * type reset to uint8. */
688  void reset() {
689  *this = Buffer();
690  }
691 
693  : shape() {
694  buf.type = static_halide_type();
695  // If Dims are statically known, must create storage that many.
696  // otherwise, make a zero-dimensional buffer.
697  constexpr int buf_dimensions = (Dims == AnyDims) ? 0 : Dims;
698  make_static_shape_storage<buf_dimensions>();
699  }
700 
701  /** Make a Buffer from a halide_buffer_t */
702  explicit Buffer(const halide_buffer_t &buf,
704  assert(T_is_void || buf.type == static_halide_type());
705  initialize_from_buffer(buf, ownership);
706  }
707 
708  /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
709  template<typename T2, int D2, int S2>
710  friend class Buffer;
711 
712 private:
713  template<typename T2, int D2, int S2>
714  static void static_assert_can_convert_from() {
715  static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
716  "Can't convert from a Buffer<const T> to a Buffer<T>");
717  static_assert(std::is_same<typename std::remove_const<T>::type,
718  typename std::remove_const<T2>::type>::value ||
719  T_is_void || Buffer<T2, D2, S2>::T_is_void,
720  "type mismatch constructing Buffer");
721  static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2,
722  "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality");
723  }
724 
725 public:
726  static void set_default_allocate_fn(void *(*allocate_fn)(size_t)) {
728  }
729  static void set_default_deallocate_fn(void (*deallocate_fn)(void *)) {
731  }
732 
733  /** Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
734  * If this can be determined at compile time, fail with a static assert; otherwise
735  * return a boolean based on runtime typing. */
736  template<typename T2, int D2, int S2>
737  static bool can_convert_from(const Buffer<T2, D2, S2> &other) {
738  static_assert_can_convert_from<T2, D2, S2>();
739  if (Buffer<T2, D2, S2>::T_is_void && !T_is_void) {
740  if (other.type() != static_halide_type()) {
741  return false;
742  }
743  }
744  if (Dims != AnyDims) {
745  if (other.dimensions() != Dims) {
746  return false;
747  }
748  }
749  return true;
750  }
751 
752  /** Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage>
753  * cannot be constructed from some other Buffer type. */
754  template<typename T2, int D2, int S2>
755  static void assert_can_convert_from(const Buffer<T2, D2, S2> &other) {
756  // Explicitly call static_assert_can_convert_from() here so
757  // that we always get compile-time checking, even if compiling with
758  // assertions disabled.
759  static_assert_can_convert_from<T2, D2, S2>();
760  assert(can_convert_from(other));
761  }
762 
763  /** Copy constructor. Does not copy underlying data. */
765  : buf(other.buf),
766  alloc(other.alloc) {
767  other.incref();
768  dev_ref_count = other.dev_ref_count;
769  copy_shape_from(other.buf);
770  }
771 
772  /** Construct a Buffer from a Buffer of different dimensionality
773  * and type. Asserts that the type and dimensionality matches (at runtime,
774  * if one of the types is void). Note that this constructor is
775  * implicit. This, for example, lets you pass things like
776  * Buffer<T> or Buffer<const void> to functions expected
777  * Buffer<const T>. */
778  template<typename T2, int D2, int S2>
780  : buf(other.buf),
781  alloc(other.alloc) {
782  assert_can_convert_from(other);
783  other.incref();
784  dev_ref_count = other.dev_ref_count;
785  copy_shape_from(other.buf);
786  }
787 
788  /** Move constructor */
790  : buf(other.buf),
791  alloc(other.alloc),
792  dev_ref_count(other.dev_ref_count) {
793  other.dev_ref_count = nullptr;
794  other.alloc = nullptr;
795  move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
796  other.buf = halide_buffer_t();
797  }
798 
799  /** Move-construct a Buffer from a Buffer of different
800  * dimensionality and type. Asserts that the types match (at
801  * runtime if one of the types is void). */
802  template<typename T2, int D2, int S2>
804  : buf(other.buf),
805  alloc(other.alloc),
806  dev_ref_count(other.dev_ref_count) {
807  assert_can_convert_from(other);
808  other.dev_ref_count = nullptr;
809  other.alloc = nullptr;
810  move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
811  other.buf = halide_buffer_t();
812  }
813 
814  /** Assign from another Buffer of possibly-different
815  * dimensionality and type. Asserts that the types match (at
816  * runtime if one of the types is void). */
817  template<typename T2, int D2, int S2>
819  if ((const void *)this == (const void *)&other) {
820  return *this;
821  }
822  assert_can_convert_from(other);
823  other.incref();
824  decref();
825  dev_ref_count = other.dev_ref_count;
826  alloc = other.alloc;
827  free_shape_storage();
828  buf = other.buf;
829  copy_shape_from(other.buf);
830  return *this;
831  }
832 
833  /** Standard assignment operator */
835  // The cast to void* here is just to satisfy clang-tidy
836  if ((const void *)this == (const void *)&other) {
837  return *this;
838  }
839  other.incref();
840  decref();
841  dev_ref_count = other.dev_ref_count;
842  alloc = other.alloc;
843  free_shape_storage();
844  buf = other.buf;
845  copy_shape_from(other.buf);
846  return *this;
847  }
848 
849  /** Move from another Buffer of possibly-different
850  * dimensionality and type. Asserts that the types match (at
851  * runtime if one of the types is void). */
852  template<typename T2, int D2, int S2>
854  assert_can_convert_from(other);
855  decref();
856  alloc = other.alloc;
857  other.alloc = nullptr;
858  dev_ref_count = other.dev_ref_count;
859  other.dev_ref_count = nullptr;
860  free_shape_storage();
861  buf = other.buf;
862  move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
863  other.buf = halide_buffer_t();
864  return *this;
865  }
866 
867  /** Standard move-assignment operator */
869  decref();
870  alloc = other.alloc;
871  other.alloc = nullptr;
872  dev_ref_count = other.dev_ref_count;
873  other.dev_ref_count = nullptr;
874  free_shape_storage();
875  buf = other.buf;
876  move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
877  other.buf = halide_buffer_t();
878  return *this;
879  }
880 
881  /** Check the product of the extents fits in memory. */
882  void check_overflow() {
883  size_t size = type().bytes();
884  for (int i = 0; i < dimensions(); i++) {
885  size *= dim(i).extent();
886  }
887  // We allow 2^31 or 2^63 bytes, so drop the top bit.
888  size = (size << 1) >> 1;
889  for (int i = 0; i < dimensions(); i++) {
890  size /= dim(i).extent();
891  }
892  assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
893  }
894 
895  /** Allocate memory for this Buffer. Drops the reference to any
896  * owned memory. */
897  void allocate(void *(*allocate_fn)(size_t) = nullptr,
898  void (*deallocate_fn)(void *) = nullptr) {
899  // Drop any existing allocation
900  deallocate();
901 
902  // Conservatively align images to (usually) 128 bytes. This is enough
903  // alignment for all the platforms we might use. Also ensure that the allocation
904  // is such that the logical size is an integral multiple of 128 bytes (or a bit more).
905  constexpr size_t alignment = HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT;
906 
907  const auto align_up = [=](size_t value) -> size_t {
908  return (value + alignment - 1) & ~(alignment - 1);
909  };
910 
911  size_t size = size_in_bytes();
912 
913 #if HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
914  // Only use aligned_alloc() if no custom allocators are specified.
916  // As a practical matter, sizeof(AllocationHeader) is going to be no more than 16 bytes
917  // on any supported platform, so we will just overallocate by 'alignment'
918  // so that the user storage also starts at an aligned point. This is a bit
919  // wasteful, but probably not a big deal.
920  static_assert(sizeof(AllocationHeader) <= alignment);
921  void *alloc_storage = ::aligned_alloc(alignment, align_up(size) + alignment);
922  assert((uintptr_t)alloc_storage == align_up((uintptr_t)alloc_storage));
923  alloc = new (alloc_storage) AllocationHeader(free);
924  buf.host = (uint8_t *)((uintptr_t)alloc_storage + alignment);
925  return;
926  }
927  // else fall thru
928 #endif
929  if (!allocate_fn) {
931  if (!allocate_fn) {
932  allocate_fn = malloc;
933  }
934  }
935  if (!deallocate_fn) {
937  if (!deallocate_fn) {
938  deallocate_fn = free;
939  }
940  }
941 
942  static_assert(sizeof(AllocationHeader) <= alignment);
943 
944  // malloc() and friends must return a pointer aligned to at least alignof(std::max_align_t);
945  // make sure this is OK for AllocationHeader, since it always goes at the start
946  static_assert(alignof(AllocationHeader) <= alignof(std::max_align_t));
947 
948  const size_t requested_size = align_up(size + alignment +
949  std::max(0, (int)sizeof(AllocationHeader) -
950  (int)sizeof(std::max_align_t)));
951  void *alloc_storage = allocate_fn(requested_size);
952  alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
953  uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
954  buf.host = (uint8_t *)align_up((uintptr_t)unaligned_ptr);
955  }
956 
957  /** Drop reference to any owned host or device memory, possibly
958  * freeing it, if this buffer held the last reference to
959  * it. Retains the shape of the buffer. Does nothing if this
960  * buffer did not allocate its own memory. */
961  void deallocate() {
962  decref();
963  }
964 
965  /** Drop reference to any owned device memory, possibly freeing it
966  * if this buffer held the last reference to it. Asserts that
967  * device_dirty is false. */
969  decref(true);
970  }
971 
972  /** Allocate a new image of the given size with a runtime
973  * type. Only used when you do know what size you want but you
974  * don't know statically what type the elements are. Pass zeroes
975  * to make a buffer suitable for bounds query calls. */
976  template<typename... Args,
977  typename = typename std::enable_if<AllInts<Args...>::value>::type>
978  Buffer(halide_type_t t, int first, Args... rest) {
979  if (!T_is_void) {
980  assert(static_halide_type() == t);
981  }
982  int extents[] = {first, (int)rest...};
983  buf.type = t;
984  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
985  make_static_shape_storage<buf_dimensions>();
986  initialize_shape(extents);
987  if (!Internal::any_zero(extents)) {
988  check_overflow();
989  allocate();
990  }
991  }
992 
993  /** Allocate a new image of the given size. Pass zeroes to make a
994  * buffer suitable for bounds query calls. */
995  // @{
996 
997  // The overload with one argument is 'explicit', so that
998  // (say) int is not implicitly convertible to Buffer<int>
999  explicit Buffer(int first) {
1000  static_assert(!T_is_void,
1001  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
1002  int extents[] = {first};
1003  buf.type = static_halide_type();
1004  constexpr int buf_dimensions = 1;
1005  make_static_shape_storage<buf_dimensions>();
1006  initialize_shape(extents);
1007  if (first != 0) {
1008  check_overflow();
1009  allocate();
1010  }
1011  }
1012 
1013  template<typename... Args,
1014  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1015  Buffer(int first, int second, Args... rest) {
1016  static_assert(!T_is_void,
1017  "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
1018  int extents[] = {first, second, (int)rest...};
1019  buf.type = static_halide_type();
1020  constexpr int buf_dimensions = 2 + (int)(sizeof...(rest));
1021  make_static_shape_storage<buf_dimensions>();
1022  initialize_shape(extents);
1023  if (!Internal::any_zero(extents)) {
1024  check_overflow();
1025  allocate();
1026  }
1027  }
1028  // @}
1029 
1030  /** Allocate a new image of unknown type using a vector of ints as the size. */
1031  Buffer(halide_type_t t, const std::vector<int> &sizes) {
1032  if (!T_is_void) {
1033  assert(static_halide_type() == t);
1034  }
1035  buf.type = t;
1036  // make_shape_storage() will do a runtime check that dimensionality matches.
1037  make_shape_storage((int)sizes.size());
1038  initialize_shape(sizes);
1039  if (!Internal::any_zero(sizes)) {
1040  check_overflow();
1041  allocate();
1042  }
1043  }
1044 
1045  /** Allocate a new image of known type using a vector of ints as the size. */
1046  explicit Buffer(const std::vector<int> &sizes)
1047  : Buffer(static_halide_type(), sizes) {
1048  }
1049 
1050 private:
1051  // Create a copy of the sizes vector, ordered as specified by order.
1052  static std::vector<int> make_ordered_sizes(const std::vector<int> &sizes, const std::vector<int> &order) {
1053  assert(order.size() == sizes.size());
1054  std::vector<int> ordered_sizes(sizes.size());
1055  for (size_t i = 0; i < sizes.size(); ++i) {
1056  ordered_sizes[i] = sizes.at(order[i]);
1057  }
1058  return ordered_sizes;
1059  }
1060 
1061 public:
1062  /** Allocate a new image of unknown type using a vector of ints as the size and
1063  * a vector of indices indicating the storage order for each dimension. The
1064  * length of the sizes vector and the storage-order vector must match. For instance,
1065  * to allocate an interleaved RGB buffer, you would pass {2, 0, 1} for storage_order. */
1066  Buffer(halide_type_t t, const std::vector<int> &sizes, const std::vector<int> &storage_order)
1067  : Buffer(t, make_ordered_sizes(sizes, storage_order)) {
1068  transpose(storage_order);
1069  }
1070 
1071  Buffer(const std::vector<int> &sizes, const std::vector<int> &storage_order)
1072  : Buffer(static_halide_type(), sizes, storage_order) {
1073  }
1074 
1075  /** Make an Buffer that refers to a statically sized array. Does not
1076  * take ownership of the data, and does not set the host_dirty flag. */
1077  template<typename Array, size_t N>
1078  explicit Buffer(Array (&vals)[N]) {
1079  const int buf_dimensions = dimensionality_of_array(vals);
1080  buf.type = scalar_type_of_array(vals);
1081  buf.host = (uint8_t *)vals;
1082  make_shape_storage(buf_dimensions);
1083  initialize_shape_from_array_shape(buf.dimensions - 1, vals);
1084  }
1085 
1086  /** Initialize an Buffer of runtime type from a pointer and some
1087  * sizes. Assumes dense row-major packing and a min coordinate of
1088  * zero. Does not take ownership of the data and does not set the
1089  * host_dirty flag. */
1090  template<typename... Args,
1091  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1092  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args &&...rest) {
1093  if (!T_is_void) {
1094  assert(static_halide_type() == t);
1095  }
1096  int extents[] = {first, (int)rest...};
1097  buf.type = t;
1098  buf.host = (uint8_t *)const_cast<void *>(data);
1099  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1100  make_static_shape_storage<buf_dimensions>();
1101  initialize_shape(extents);
1102  }
1103 
1104  /** Initialize an Buffer from a pointer and some sizes. Assumes
1105  * dense row-major packing and a min coordinate of zero. Does not
1106  * take ownership of the data and does not set the host_dirty flag. */
1107  template<typename... Args,
1108  typename = typename std::enable_if<AllInts<Args...>::value>::type>
1109  explicit Buffer(T *data, int first, Args &&...rest) {
1110  int extents[] = {first, (int)rest...};
1111  buf.type = static_halide_type();
1112  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1113  constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1114  make_static_shape_storage<buf_dimensions>();
1115  initialize_shape(extents);
1116  }
1117 
1118  /** Initialize an Buffer from a pointer and a vector of
1119  * sizes. Assumes dense row-major packing and a min coordinate of
1120  * zero. Does not take ownership of the data and does not set the
1121  * host_dirty flag. */
1122  explicit Buffer(T *data, const std::vector<int> &sizes) {
1123  buf.type = static_halide_type();
1124  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1125  make_shape_storage((int)sizes.size());
1126  initialize_shape(sizes);
1127  }
1128 
1129  /** Initialize an Buffer of runtime type from a pointer and a
1130  * vector of sizes. Assumes dense row-major packing and a min
1131  * coordinate of zero. Does not take ownership of the data and
1132  * does not set the host_dirty flag. */
1133  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
1134  if (!T_is_void) {
1135  assert(static_halide_type() == t);
1136  }
1137  buf.type = t;
1138  buf.host = (uint8_t *)const_cast<void *>(data);
1139  make_shape_storage((int)sizes.size());
1140  initialize_shape(sizes);
1141  }
1142 
1143  /** Initialize an Buffer from a pointer to the min coordinate and
1144  * an array describing the shape. Does not take ownership of the
1145  * data, and does not set the host_dirty flag. */
1146  explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
1147  if (!T_is_void) {
1148  assert(static_halide_type() == t);
1149  }
1150  buf.type = t;
1151  buf.host = (uint8_t *)const_cast<void *>(data);
1152  make_shape_storage(d);
1153  for (int i = 0; i < d; i++) {
1154  buf.dim[i] = shape[i];
1155  }
1156  }
1157 
1158  /** Initialize a Buffer from a pointer to the min coordinate and
1159  * a vector describing the shape. Does not take ownership of the
1160  * data, and does not set the host_dirty flag. */
1161  explicit inline Buffer(halide_type_t t, add_const_if_T_is_const<void> *data,
1162  const std::vector<halide_dimension_t> &shape)
1163  : Buffer(t, data, (int)shape.size(), shape.data()) {
1164  }
1165 
1166  /** Initialize an Buffer from a pointer to the min coordinate and
1167  * an array describing the shape. Does not take ownership of the
1168  * data and does not set the host_dirty flag. */
1169  explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
1170  buf.type = static_halide_type();
1171  buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1172  make_shape_storage(d);
1173  for (int i = 0; i < d; i++) {
1174  buf.dim[i] = shape[i];
1175  }
1176  }
1177 
1178  /** Initialize a Buffer from a pointer to the min coordinate and
1179  * a vector describing the shape. Does not take ownership of the
1180  * data, and does not set the host_dirty flag. */
1181  explicit inline Buffer(T *data, const std::vector<halide_dimension_t> &shape)
1182  : Buffer(data, (int)shape.size(), shape.data()) {
1183  }
1184 
1185  /** Destructor. Will release any underlying owned allocation if
1186  * this is the last reference to it. Will assert fail if there are
1187  * weak references to this Buffer outstanding. */
1189  decref();
1190  free_shape_storage();
1191  }
1192 
1193  /** Get a pointer to the raw halide_buffer_t this wraps. */
1194  // @{
1196  return &buf;
1197  }
1198 
1199  const halide_buffer_t *raw_buffer() const {
1200  return &buf;
1201  }
1202  // @}
1203 
1204  /** Provide a cast operator to halide_buffer_t *, so that
1205  * instances can be passed directly to Halide filters. */
1206  operator halide_buffer_t *() {
1207  return &buf;
1208  }
1209 
1210  /** Return a typed reference to this Buffer. Useful for converting
1211  * a reference to a Buffer<void> to a reference to, for example, a
1212  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1213  * You can also optionally sspecify a new value for Dims; this is useful
1214  * mainly for removing the dimensionality constraint on a Buffer with
1215  * explicit dimensionality. Does a runtime assert if the source buffer type
1216  * is void or the new dimensionality is incompatible. */
1217  template<typename T2, int D2 = Dims>
1220  return *((Buffer<T2, D2, InClassDimStorage> *)this);
1221  }
1222 
1223  /** Return a const typed reference to this Buffer. Useful for converting
1224  * a reference to a Buffer<void> to a reference to, for example, a
1225  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1226  * You can also optionally sspecify a new value for Dims; this is useful
1227  * mainly for removing the dimensionality constraint on a Buffer with
1228  * explicit dimensionality. Does a runtime assert if the source buffer type
1229  * is void or the new dimensionality is incompatible. */
1230  template<typename T2, int D2 = Dims>
1233  return *((const Buffer<T2, D2, InClassDimStorage> *)this);
1234  }
1235 
1236  /** Return an rval reference to this Buffer. Useful for converting
1237  * a reference to a Buffer<void> to a reference to, for example, a
1238  * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1239  * You can also optionally sspecify a new value for Dims; this is useful
1240  * mainly for removing the dimensionality constraint on a Buffer with
1241  * explicit dimensionality. Does a runtime assert if the source buffer type
1242  * is void or the new dimensionality is incompatible. */
1243  template<typename T2, int D2 = Dims>
1246  return *((Buffer<T2, D2, InClassDimStorage> *)this);
1247  }
1248 
1249  /** as_const() is syntactic sugar for .as<const T>(), to avoid the need
1250  * to recapitulate the type argument. */
1251  // @{
1253  Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() & {
1254  // Note that we can skip the assert_can_convert_from(), since T -> const T
1255  // conversion is always legal.
1256  return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1257  }
1258 
1260  const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> &as_const() const & {
1261  return *((const Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1262  }
1263 
1265  Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> as_const() && {
1266  return *((Buffer<typename std::add_const<T>::type, Dims, InClassDimStorage> *)this);
1267  }
1268  // @}
1269 
1270  /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<const T>& when
1271  * passing arguments */
1272  template<typename T2 = T, typename = typename std::enable_if<!std::is_const<T2>::value>::type>
1273  operator Buffer<typename std::add_const<T2>::type, Dims, InClassDimStorage> &() & {
1274  return as_const();
1275  }
1276 
1277  /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<void>& when
1278  * passing arguments */
1279  template<typename TVoid,
1280  typename T2 = T,
1281  typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1282  !std::is_void<T2>::value &&
1283  !std::is_const<T2>::value>::type>
1285  return as<TVoid, Dims>();
1286  }
1287 
1288  /** Add some syntactic sugar to allow autoconversion from Buffer<const T> to Buffer<const void>& when
1289  * passing arguments */
1290  template<typename TVoid,
1291  typename T2 = T,
1292  typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1293  !std::is_void<T2>::value &&
1294  std::is_const<T2>::value>::type>
1296  return as<const TVoid, Dims>();
1297  }
1298 
1299  /** Conventional names for the first three dimensions. */
1300  // @{
1301  int width() const {
1302  return (dimensions() > 0) ? dim(0).extent() : 1;
1303  }
1304  int height() const {
1305  return (dimensions() > 1) ? dim(1).extent() : 1;
1306  }
1307  int channels() const {
1308  return (dimensions() > 2) ? dim(2).extent() : 1;
1309  }
1310  // @}
1311 
1312  /** Conventional names for the min and max value of each dimension */
1313  // @{
1314  int left() const {
1315  return dim(0).min();
1316  }
1317 
1318  int right() const {
1319  return dim(0).max();
1320  }
1321 
1322  int top() const {
1323  return dim(1).min();
1324  }
1325 
1326  int bottom() const {
1327  return dim(1).max();
1328  }
1329  // @}
1330 
1331  /** Make a new image which is a deep copy of this image. Use crop
1332  * or slice followed by copy to make a copy of only a portion of
1333  * the image. The new image uses the same memory layout as the
1334  * original, with holes compacted away. Note that the returned
1335  * Buffer is always of a non-const type T (ie:
1336  *
1337  * Buffer<const T>.copy() -> Buffer<T> rather than Buffer<const T>
1338  *
1339  * which is always safe, since we are making a deep copy. (The caller
1340  * can easily cast it back to Buffer<const T> if desired, which is
1341  * always safe and free.)
1342  */
1343  Buffer<not_const_T, Dims, InClassDimStorage> copy(void *(*allocate_fn)(size_t) = nullptr,
1344  void (*deallocate_fn)(void *) = nullptr) const {
1346  dst.copy_from(*this);
1347  return dst;
1348  }
1349 
1350  /** Like copy(), but the copy is created in interleaved memory layout
1351  * (vs. keeping the same memory layout as the original). Requires that 'this'
1352  * has exactly 3 dimensions.
1353  */
1355  void (*deallocate_fn)(void *) = nullptr) const {
1356  static_assert(Dims == AnyDims || Dims == 3);
1357  assert(dimensions() == 3);
1359  dst.set_min(min(0), min(1), min(2));
1360  dst.allocate(allocate_fn, deallocate_fn);
1361  dst.copy_from(*this);
1362  return dst;
1363  }
1364 
1365  /** Like copy(), but the copy is created in planar memory layout
1366  * (vs. keeping the same memory layout as the original).
1367  */
1368  Buffer<not_const_T, Dims, InClassDimStorage> copy_to_planar(void *(*allocate_fn)(size_t) = nullptr,
1369  void (*deallocate_fn)(void *) = nullptr) const {
1370  std::vector<int> mins, extents;
1371  const int dims = dimensions();
1372  mins.reserve(dims);
1373  extents.reserve(dims);
1374  for (int d = 0; d < dims; ++d) {
1375  mins.push_back(dim(d).min());
1376  extents.push_back(dim(d).extent());
1377  }
1379  dst.set_min(mins);
1380  dst.allocate(allocate_fn, deallocate_fn);
1381  dst.copy_from(*this);
1382  return dst;
1383  }
1384 
1385  /** Make a copy of the Buffer which shares the underlying host and/or device
1386  * allocations as the existing Buffer. This is purely syntactic sugar for
1387  * cases where you have a const reference to a Buffer but need a temporary
1388  * non-const copy (e.g. to make a call into AOT-generated Halide code), and want a terse
1389  * inline way to create a temporary. \code
1390  * void call_my_func(const Buffer<const uint8_t>& input) {
1391  * my_func(input.alias(), output);
1392  * }\endcode
1393  */
1395  return *this;
1396  }
1397 
1398  /** Fill a Buffer with the values at the same coordinates in
1399  * another Buffer. Restricts itself to coordinates contained
1400  * within the intersection of the two buffers. If the two Buffers
1401  * are not in the same coordinate system, you will need to
1402  * translate the argument Buffer first. E.g. if you're blitting a
1403  * sprite onto a framebuffer, you'll want to translate the sprite
1404  * to the correct location first like so: \code
1405  * framebuffer.copy_from(sprite.translated({x, y})); \endcode
1406  */
1407  template<typename T2, int D2, int S2>
1409  static_assert(!std::is_const<T>::value, "Cannot call copy_from() on a Buffer<const T>");
1410  assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1411  assert(!src.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1412 
1414 
1415  static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2);
1416  assert(src.dimensions() == dst.dimensions());
1417 
1418  // Trim the copy to the region in common
1419  const int d = dimensions();
1420  for (int i = 0; i < d; i++) {
1421  int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1422  int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1423  if (max_coord < min_coord) {
1424  // The buffers do not overlap.
1425  return;
1426  }
1427  dst.crop(i, min_coord, max_coord - min_coord + 1);
1428  src.crop(i, min_coord, max_coord - min_coord + 1);
1429  }
1430 
1431  // If T is void, we need to do runtime dispatch to an
1432  // appropriately-typed lambda. We're copying, so we only care
1433  // about the element size. (If not, this should optimize away
1434  // into a static dispatch to the right-sized copy.)
1435  if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) {
1436  using MemType = uint8_t;
1437  auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1438  auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1439  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1440  } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) {
1441  using MemType = uint16_t;
1442  auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1443  auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1444  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1445  } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) {
1446  using MemType = uint32_t;
1447  auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1448  auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1449  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1450  } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) {
1451  using MemType = uint64_t;
1452  auto &typed_dst = (Buffer<MemType, Dims, InClassDimStorage> &)dst;
1453  auto &typed_src = (Buffer<const MemType, D2, S2> &)src;
1454  typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1455  } else {
1456  assert(false && "type().bytes() must be 1, 2, 4, or 8");
1457  }
1458  set_host_dirty();
1459  }
1460 
1461  /** Make an image that refers to a sub-range of this image along
1462  * the given dimension. Asserts that the crop region is within
1463  * the existing bounds: you cannot "crop outwards", even if you know there
1464  * is valid Buffer storage (e.g. because you already cropped inwards). */
1465  Buffer<T, Dims, InClassDimStorage> cropped(int d, int min, int extent) const {
1466  // Make a fresh copy of the underlying buffer (but not a fresh
1467  // copy of the allocation, if there is one).
1469 
1470  // This guarantees the prexisting device ref is dropped if the
1471  // device_crop call fails and maintains the buffer in a consistent
1472  // state.
1473  im.device_deallocate();
1474 
1475  im.crop_host(d, min, extent);
1476  if (buf.device_interface != nullptr) {
1477  complete_device_crop(im);
1478  }
1479  return im;
1480  }
1481 
1482  /** Crop an image in-place along the given dimension. This does
1483  * not move any data around in memory - it just changes the min
1484  * and extent of the given dimension. */
1485  void crop(int d, int min, int extent) {
1486  // An optimization for non-device buffers. For the device case,
1487  // a temp buffer is required, so reuse the not-in-place version.
1488  // TODO(zalman|abadams): Are nop crops common enough to special
1489  // case the device part of the if to do nothing?
1490  if (buf.device_interface != nullptr) {
1491  *this = cropped(d, min, extent);
1492  } else {
1493  crop_host(d, min, extent);
1494  }
1495  }
1496 
1497  /** Make an image that refers to a sub-rectangle of this image along
1498  * the first N dimensions. Asserts that the crop region is within
1499  * the existing bounds. The cropped image may drop any device handle
1500  * if the device_interface cannot accomplish the crop in-place. */
1501  Buffer<T, Dims, InClassDimStorage> cropped(const std::vector<std::pair<int, int>> &rect) const {
1502  // Make a fresh copy of the underlying buffer (but not a fresh
1503  // copy of the allocation, if there is one).
1505 
1506  // This guarantees the prexisting device ref is dropped if the
1507  // device_crop call fails and maintains the buffer in a consistent
1508  // state.
1509  im.device_deallocate();
1510 
1511  im.crop_host(rect);
1512  if (buf.device_interface != nullptr) {
1513  complete_device_crop(im);
1514  }
1515  return im;
1516  }
1517 
1518  /** Crop an image in-place along the first N dimensions. This does
1519  * not move any data around in memory, nor does it free memory. It
1520  * just rewrites the min/extent of each dimension to refer to a
1521  * subregion of the same allocation. */
1522  void crop(const std::vector<std::pair<int, int>> &rect) {
1523  // An optimization for non-device buffers. For the device case,
1524  // a temp buffer is required, so reuse the not-in-place version.
1525  // TODO(zalman|abadams): Are nop crops common enough to special
1526  // case the device part of the if to do nothing?
1527  if (buf.device_interface != nullptr) {
1528  *this = cropped(rect);
1529  } else {
1530  crop_host(rect);
1531  }
1532  }
1533 
1534  /** Make an image which refers to the same data with using
1535  * translated coordinates in the given dimension. Positive values
1536  * move the image data to the right or down relative to the
1537  * coordinate system. Drops any device handle. */
1540  im.translate(d, dx);
1541  return im;
1542  }
1543 
1544  /** Translate an image in-place along one dimension by changing
1545  * how it is indexed. Does not move any data around in memory. */
1546  void translate(int d, int delta) {
1547  assert(d >= 0 && d < this->dimensions());
1548  device_deallocate();
1549  buf.dim[d].min += delta;
1550  }
1551 
1552  /** Make an image which refers to the same data translated along
1553  * the first N dimensions. */
1554  Buffer<T, Dims, InClassDimStorage> translated(const std::vector<int> &delta) const {
1556  im.translate(delta);
1557  return im;
1558  }
1559 
1560  /** Translate an image along the first N dimensions by changing
1561  * how it is indexed. Does not move any data around in memory. */
1562  void translate(const std::vector<int> &delta) {
1563  device_deallocate();
1564  assert(delta.size() <= static_cast<decltype(delta.size())>(std::numeric_limits<int>::max()));
1565  int limit = (int)delta.size();
1566  assert(limit <= dimensions());
1567  for (int i = 0; i < limit; i++) {
1568  translate(i, delta[i]);
1569  }
1570  }
1571 
1572  /** Set the min coordinate of an image in the first N dimensions. */
1573  // @{
1574  void set_min(const std::vector<int> &mins) {
1575  assert(mins.size() <= static_cast<decltype(mins.size())>(dimensions()));
1576  device_deallocate();
1577  for (size_t i = 0; i < mins.size(); i++) {
1578  buf.dim[i].min = mins[i];
1579  }
1580  }
1581 
1582  template<typename... Args>
1583  void set_min(Args... args) {
1584  set_min(std::vector<int>{args...});
1585  }
1586  // @}
1587 
1588  /** Test if a given coordinate is within the bounds of an image. */
1589  // @{
1590  bool contains(const std::vector<int> &coords) const {
1591  assert(coords.size() <= static_cast<decltype(coords.size())>(dimensions()));
1592  for (size_t i = 0; i < coords.size(); i++) {
1593  if (coords[i] < dim((int)i).min() || coords[i] > dim((int)i).max()) {
1594  return false;
1595  }
1596  }
1597  return true;
1598  }
1599 
1600  template<typename... Args>
1601  bool contains(Args... args) const {
1602  return contains(std::vector<int>{args...});
1603  }
1604  // @}
1605 
1606  /** Make a buffer which refers to the same data in the same layout
1607  * using a swapped indexing order for the dimensions given. So
1608  * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more
1609  * strongly that A.address_of(i, j) == B.address_of(j, i). */
1612  im.transpose(d1, d2);
1613  return im;
1614  }
1615 
1616  /** Transpose a buffer in-place by changing how it is indexed. For
1617  * example, transpose(0, 1) on a two-dimensional buffer means that
1618  * the value referred to by coordinates (i, j) is now reached at
1619  * the coordinates (j, i), and vice versa. This is done by
1620  * reordering the per-dimension metadata rather than by moving
1621  * data around in memory, so other views of the same memory will
1622  * not see the data as having been transposed. */
1623  void transpose(int d1, int d2) {
1624  assert(d1 >= 0 && d1 < this->dimensions());
1625  assert(d2 >= 0 && d2 < this->dimensions());
1626  std::swap(buf.dim[d1], buf.dim[d2]);
1627  }
1628 
1629  /** A generalized transpose: instead of swapping two dimensions,
1630  * pass a vector that lists each dimension index exactly once, in
1631  * the desired order. This does not move any data around in memory
1632  * - it just permutes how it is indexed. */
1633  void transpose(const std::vector<int> &order) {
1634  assert((int)order.size() == dimensions());
1635  if (dimensions() < 2) {
1636  // My, that was easy
1637  return;
1638  }
1639 
1640  std::vector<int> order_sorted = order;
1641  for (size_t i = 1; i < order_sorted.size(); i++) {
1642  for (size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1643  std::swap(order_sorted[j], order_sorted[j - 1]);
1644  transpose(j, j - 1);
1645  }
1646  }
1647  }
1648 
1649  /** Make a buffer which refers to the same data in the same
1650  * layout using a different ordering of the dimensions. */
1651  Buffer<T, Dims, InClassDimStorage> transposed(const std::vector<int> &order) const {
1653  im.transpose(order);
1654  return im;
1655  }
1656 
1657  /** Make a lower-dimensional buffer that refers to one slice of
1658  * this buffer. */
1659  Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1660  sliced(int d, int pos) const {
1661  static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1662  assert(dimensions() > 0);
1663 
1665 
1666  // This guarantees the prexisting device ref is dropped if the
1667  // device_slice call fails and maintains the buffer in a consistent
1668  // state.
1669  im.device_deallocate();
1670 
1671  im.slice_host(d, pos);
1672  if (buf.device_interface != nullptr) {
1673  complete_device_slice(im, d, pos);
1674  }
1675  return im;
1676  }
1677 
1678  /** Make a lower-dimensional buffer that refers to one slice of this
1679  * buffer at the dimension's minimum. */
1680  Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1681  sliced(int d) const {
1682  static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1683  assert(dimensions() > 0);
1684 
1685  return sliced(d, dim(d).min());
1686  }
1687 
1688  /** Rewrite the buffer to refer to a single lower-dimensional
1689  * slice of itself along the given dimension at the given
1690  * coordinate. Does not move any data around or free the original
1691  * memory, so other views of the same data are unaffected. Can
1692  * only be called on a Buffer with dynamic dimensionality. */
1693  void slice(int d, int pos) {
1694  static_assert(Dims == AnyDims, "Cannot call slice() on a Buffer with static dimensionality.");
1695  assert(dimensions() > 0);
1696 
1697  // An optimization for non-device buffers. For the device case,
1698  // a temp buffer is required, so reuse the not-in-place version.
1699  // TODO(zalman|abadams): Are nop slices common enough to special
1700  // case the device part of the if to do nothing?
1701  if (buf.device_interface != nullptr) {
1702  *this = sliced(d, pos);
1703  } else {
1704  slice_host(d, pos);
1705  }
1706  }
1707 
1708  /** Slice a buffer in-place at the dimension's minimum. */
1709  inline void slice(int d) {
1710  slice(d, dim(d).min());
1711  }
1712 
1713  /** Make a new buffer that views this buffer as a single slice in a
1714  * higher-dimensional space. The new dimension has extent one and
1715  * the given min. This operation is the opposite of slice. As an
1716  * example, the following condition is true:
1717  *
1718  \code
1719  im2 = im.embedded(1, 17);
1720  &im(x, y, c) == &im2(x, 17, y, c);
1721  \endcode
1722  */
1723  Buffer<T, (Dims == AnyDims ? AnyDims : Dims + 1)>
1724  embedded(int d, int pos = 0) const {
1726  im.embed(d, pos);
1727  return im;
1728  }
1729 
1730  /** Embed a buffer in-place, increasing the
1731  * dimensionality. */
1732  void embed(int d, int pos = 0) {
1733  static_assert(Dims == AnyDims, "Cannot call embed() on a Buffer with static dimensionality.");
1734  assert(d >= 0 && d <= dimensions());
1735  add_dimension();
1736  translate(dimensions() - 1, pos);
1737  for (int i = dimensions() - 1; i > d; i--) {
1738  transpose(i, i - 1);
1739  }
1740  }
1741 
1742  /** Add a new dimension with a min of zero and an extent of
1743  * one. The stride is the extent of the outermost dimension times
1744  * its stride. The new dimension is the last dimension. This is a
1745  * special case of embed. */
1746  void add_dimension() {
1747  static_assert(Dims == AnyDims, "Cannot call add_dimension() on a Buffer with static dimensionality.");
1748  const int dims = buf.dimensions;
1749  buf.dimensions++;
1750  if (buf.dim != shape) {
1751  // We're already on the heap. Reallocate.
1752  halide_dimension_t *new_shape = new halide_dimension_t[buf.dimensions];
1753  for (int i = 0; i < dims; i++) {
1754  new_shape[i] = buf.dim[i];
1755  }
1756  delete[] buf.dim;
1757  buf.dim = new_shape;
1758  } else if (dims == InClassDimStorage) {
1759  // Transition from the in-class storage to the heap
1760  make_shape_storage(buf.dimensions);
1761  for (int i = 0; i < dims; i++) {
1762  buf.dim[i] = shape[i];
1763  }
1764  } else {
1765  // We still fit in the class
1766  }
1767  buf.dim[dims] = {0, 1, 0};
1768  if (dims == 0) {
1769  buf.dim[dims].stride = 1;
1770  } else {
1771  buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1772  }
1773  }
1774 
1775  /** Add a new dimension with a min of zero, an extent of one, and
1776  * the specified stride. The new dimension is the last
1777  * dimension. This is a special case of embed. */
1779  add_dimension();
1780  buf.dim[buf.dimensions - 1].stride = s;
1781  }
1782 
1783  /** Methods for managing any GPU allocation. */
1784  // @{
1785  // Set the host dirty flag. Called by every operator()
1786  // access. Must be inlined so it can be hoisted out of loops.
1788  void set_host_dirty(bool v = true) {
1789  assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1790  buf.set_host_dirty(v);
1791  }
1792 
1793  // Check if the device allocation is dirty. Called by
1794  // set_host_dirty, which is called by every accessor. Must be
1795  // inlined so it can be hoisted out of loops.
1797  bool device_dirty() const {
1798  return buf.device_dirty();
1799  }
1800 
1801  bool host_dirty() const {
1802  return buf.host_dirty();
1803  }
1804 
1805  void set_device_dirty(bool v = true) {
1806  assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1807  buf.set_device_dirty(v);
1808  }
1809 
1810  int copy_to_host(void *ctx = nullptr) {
1811  if (device_dirty()) {
1812  return buf.device_interface->copy_to_host(ctx, &buf);
1813  }
1815  }
1816 
1817  int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1818  if (host_dirty()) {
1819  return device_interface->copy_to_device(ctx, &buf, device_interface);
1820  }
1822  }
1823 
1824  int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1825  return device_interface->device_malloc(ctx, &buf, device_interface);
1826  }
1827 
1828  int device_free(void *ctx = nullptr) {
1829  if (dev_ref_count) {
1830  assert(dev_ref_count->ownership == BufferDeviceOwnership::Allocated &&
1831  "Can't call device_free on an unmanaged or wrapped native device handle. "
1832  "Free the source allocation or call device_detach_native instead.");
1833  // Multiple people may be holding onto this dev field
1834  assert(dev_ref_count->count == 1 &&
1835  "Multiple Halide::Runtime::Buffer objects share this device "
1836  "allocation. Freeing it would create dangling references. "
1837  "Don't call device_free on Halide buffers that you have copied or "
1838  "passed by value.");
1839  }
1840  int ret = halide_error_code_success;
1841  if (buf.device_interface) {
1842  ret = buf.device_interface->device_free(ctx, &buf);
1843  }
1844  if (dev_ref_count) {
1845  delete dev_ref_count;
1846  dev_ref_count = nullptr;
1847  }
1848  return ret;
1849  }
1850 
1851  int device_wrap_native(const struct halide_device_interface_t *device_interface,
1852  uint64_t handle, void *ctx = nullptr) {
1853  assert(device_interface);
1854  dev_ref_count = new DeviceRefCount;
1856  return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1857  }
1858 
1859  int device_detach_native(void *ctx = nullptr) {
1860  assert(dev_ref_count &&
1861  dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative &&
1862  "Only call device_detach_native on buffers wrapping a native "
1863  "device handle via device_wrap_native. This buffer was allocated "
1864  "using device_malloc, or is unmanaged. "
1865  "Call device_free or free the original allocation instead.");
1866  // Multiple people may be holding onto this dev field
1867  assert(dev_ref_count->count == 1 &&
1868  "Multiple Halide::Runtime::Buffer objects share this device "
1869  "allocation. Freeing it could create dangling references. "
1870  "Don't call device_detach_native on Halide buffers that you "
1871  "have copied or passed by value.");
1872  int ret = halide_error_code_success;
1873  if (buf.device_interface) {
1874  ret = buf.device_interface->detach_native(ctx, &buf);
1875  }
1876  delete dev_ref_count;
1877  dev_ref_count = nullptr;
1878  return ret;
1879  }
1880 
1881  int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1882  return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1883  }
1884 
1885  int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1886  if (dev_ref_count) {
1887  assert(dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost &&
1888  "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1889  "Free the source allocation or call device_detach_native instead.");
1890  // Multiple people may be holding onto this dev field
1891  assert(dev_ref_count->count == 1 &&
1892  "Multiple Halide::Runtime::Buffer objects share this device "
1893  "allocation. Freeing it would create dangling references. "
1894  "Don't call device_and_host_free on Halide buffers that you have copied or "
1895  "passed by value.");
1896  }
1897  int ret = halide_error_code_success;
1898  if (buf.device_interface) {
1899  ret = buf.device_interface->device_and_host_free(ctx, &buf);
1900  }
1901  if (dev_ref_count) {
1902  delete dev_ref_count;
1903  dev_ref_count = nullptr;
1904  }
1905  return ret;
1906  }
1907 
1908  int device_sync(void *ctx = nullptr) {
1909  return buf.device_sync(ctx);
1910  }
1911 
1912  bool has_device_allocation() const {
1913  return buf.device != 0;
1914  }
1915 
1916  /** Return the method by which the device field is managed. */
1918  if (dev_ref_count == nullptr) {
1920  }
1921  return dev_ref_count->ownership;
1922  }
1923  // @}
1924 
1925  /** If you use the (x, y, c) indexing convention, then Halide
1926  * Buffers are stored planar by default. This function constructs
1927  * an interleaved RGB or RGBA image that can still be indexed
1928  * using (x, y, c). Passing it to a generator requires that the
1929  * generator has been compiled with support for interleaved (also
1930  * known as packed or chunky) memory layouts. */
1931  static Buffer<void, Dims, InClassDimStorage> make_interleaved(halide_type_t t, int width, int height, int channels) {
1932  static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1933  Buffer<void, Dims, InClassDimStorage> im(t, channels, width, height);
1934  // Note that this is equivalent to calling transpose({2, 0, 1}),
1935  // but slightly more efficient.
1936  im.transpose(0, 1);
1937  im.transpose(1, 2);
1938  return im;
1939  }
1940 
1941  /** If you use the (x, y, c) indexing convention, then Halide
1942  * Buffers are stored planar by default. This function constructs
1943  * an interleaved RGB or RGBA image that can still be indexed
1944  * using (x, y, c). Passing it to a generator requires that the
1945  * generator has been compiled with support for interleaved (also
1946  * known as packed or chunky) memory layouts. */
1947  static Buffer<T, Dims, InClassDimStorage> make_interleaved(int width, int height, int channels) {
1948  return make_interleaved(static_halide_type(), width, height, channels);
1949  }
1950 
1951  /** Wrap an existing interleaved image. */
1952  static Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage>
1953  make_interleaved(halide_type_t t, T *data, int width, int height, int channels) {
1954  static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1955  Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage> im(t, data, channels, width, height);
1956  im.transpose(0, 1);
1957  im.transpose(1, 2);
1958  return im;
1959  }
1960 
1961  /** Wrap an existing interleaved image. */
1962  static Buffer<T, Dims, InClassDimStorage> make_interleaved(T *data, int width, int height, int channels) {
1963  return make_interleaved(static_halide_type(), data, width, height, channels);
1964  }
1965 
1966  /** Make a zero-dimensional Buffer */
1968  static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1969  Buffer<add_const_if_T_is_const<void>, AnyDims, InClassDimStorage> buf(t, 1);
1970  buf.slice(0, 0);
1971  return buf;
1972  }
1973 
1974  /** Make a zero-dimensional Buffer */
1976  static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1978  buf.slice(0, 0);
1979  return buf;
1980  }
1981 
1982  /** Make a zero-dimensional Buffer that points to non-owned, existing data */
1984  static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1986  buf.slice(0, 0);
1987  return buf;
1988  }
1989 
1990  /** Make a buffer with the same shape and memory nesting order as
1991  * another buffer. It may have a different type. */
1992  template<typename T2, int D2, int S2>
1994  void *(*allocate_fn)(size_t) = nullptr,
1995  void (*deallocate_fn)(void *) = nullptr) {
1996  static_assert(Dims == D2 || Dims == AnyDims);
1997  const halide_type_t dst_type = T_is_void ? src.type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1998  return Buffer<>::make_with_shape_of_helper(dst_type, src.dimensions(), src.buf.dim,
1999  allocate_fn, deallocate_fn);
2000  }
2001 
2002 private:
2003  static Buffer<> make_with_shape_of_helper(halide_type_t dst_type,
2004  int dimensions,
2005  halide_dimension_t *shape,
2006  void *(*allocate_fn)(size_t),
2007  void (*deallocate_fn)(void *)) {
2008  // Reorder the dimensions of src to have strides in increasing order
2009  std::vector<int> swaps;
2010  for (int i = dimensions - 1; i > 0; i--) {
2011  for (int j = i; j > 0; j--) {
2012  if (shape[j - 1].stride > shape[j].stride) {
2013  std::swap(shape[j - 1], shape[j]);
2014  swaps.push_back(j);
2015  }
2016  }
2017  }
2018 
2019  // Rewrite the strides to be dense (this messes up src, which
2020  // is why we took it by value).
2021  for (int i = 0; i < dimensions; i++) {
2022  if (i == 0) {
2023  shape[i].stride = 1;
2024  } else {
2025  shape[i].stride = shape[i - 1].extent * shape[i - 1].stride;
2026  }
2027  }
2028 
2029  // Undo the dimension reordering
2030  while (!swaps.empty()) {
2031  int j = swaps.back();
2032  std::swap(shape[j - 1], shape[j]);
2033  swaps.pop_back();
2034  }
2035 
2036  // Use an explicit runtime type, and make dst a Buffer<void>, to allow
2037  // using this method with Buffer<void> for either src or dst.
2038  Buffer<> dst(dst_type, nullptr, dimensions, shape);
2039  dst.allocate(allocate_fn, deallocate_fn);
2040 
2041  return dst;
2042  }
2043 
2044  template<typename... Args>
2046  ptrdiff_t
2047  offset_of(int d, int first, Args... rest) const {
2048 #if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2049  assert(first >= this->buf.dim[d].min);
2050  assert(first < this->buf.dim[d].min + this->buf.dim[d].extent);
2051 #endif
2052  return offset_of(d + 1, rest...) + (ptrdiff_t)this->buf.dim[d].stride * (first - this->buf.dim[d].min);
2053  }
2054 
2056  ptrdiff_t offset_of(int d) const {
2057  return 0;
2058  }
2059 
2060  template<typename... Args>
2062  storage_T *
2063  address_of(Args... args) const {
2064  if (T_is_void) {
2065  return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
2066  } else {
2067  return (storage_T *)(this->buf.host) + offset_of(0, args...);
2068  }
2069  }
2070 
2072  ptrdiff_t offset_of(const int *pos) const {
2073  ptrdiff_t offset = 0;
2074  for (int i = this->dimensions() - 1; i >= 0; i--) {
2075 #if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2076  assert(pos[i] >= this->buf.dim[i].min);
2077  assert(pos[i] < this->buf.dim[i].min + this->buf.dim[i].extent);
2078 #endif
2079  offset += (ptrdiff_t)this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
2080  }
2081  return offset;
2082  }
2083 
2085  storage_T *address_of(const int *pos) const {
2086  if (T_is_void) {
2087  return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
2088  } else {
2089  return (storage_T *)this->buf.host + offset_of(pos);
2090  }
2091  }
2092 
2093 public:
2094  /** Get a pointer to the address of the min coordinate. */
2095  T *data() const {
2096  return (T *)(this->buf.host);
2097  }
2098 
2099  /** Access elements. Use im(...) to get a reference to an element,
2100  * and use &im(...) to get the address of an element. If you pass
2101  * fewer arguments than the buffer has dimensions, the rest are
2102  * treated as their min coordinate. The non-const versions set the
2103  * host_dirty flag to true.
2104  */
2105  //@{
2106  template<typename... Args,
2107  typename = typename std::enable_if<AllInts<Args...>::value>::type>
2108  HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const {
2109  static_assert(!T_is_void,
2110  "Cannot use operator() on Buffer<void> types");
2111  constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2112  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2113  assert(!device_dirty());
2114  return *((const not_void_T *)(address_of(first, rest...)));
2115  }
2116 
2118  const not_void_T &
2119  operator()() const {
2120  static_assert(!T_is_void,
2121  "Cannot use operator() on Buffer<void> types");
2122  constexpr int expected_dims = 0;
2123  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2124  assert(!device_dirty());
2125  return *((const not_void_T *)(data()));
2126  }
2127 
2129  const not_void_T &
2130  operator()(const int *pos) const {
2131  static_assert(!T_is_void,
2132  "Cannot use operator() on Buffer<void> types");
2133  assert(!device_dirty());
2134  return *((const not_void_T *)(address_of(pos)));
2135  }
2136 
2137  template<typename... Args,
2138  typename = typename std::enable_if<AllInts<Args...>::value>::type>
2140  not_void_T &
2141  operator()(int first, Args... rest) {
2142  static_assert(!T_is_void,
2143  "Cannot use operator() on Buffer<void> types");
2144  constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2145  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2146  set_host_dirty();
2147  return *((not_void_T *)(address_of(first, rest...)));
2148  }
2149 
2151  not_void_T &
2153  static_assert(!T_is_void,
2154  "Cannot use operator() on Buffer<void> types");
2155  constexpr int expected_dims = 0;
2156  static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2157  set_host_dirty();
2158  return *((not_void_T *)(data()));
2159  }
2160 
2162  not_void_T &
2163  operator()(const int *pos) {
2164  static_assert(!T_is_void,
2165  "Cannot use operator() on Buffer<void> types");
2166  set_host_dirty();
2167  return *((not_void_T *)(address_of(pos)));
2168  }
2169  // @}
2170 
2171  /** Tests that all values in this buffer are equal to val. */
2172  bool all_equal(not_void_T val) const {
2173  bool all_equal = true;
2174  for_each_element([&](const int *pos) { all_equal &= (*this)(pos) == val; });
2175  return all_equal;
2176  }
2177 
2179  set_host_dirty();
2180  for_each_value([=](T &v) { v = val; });
2181  return *this;
2182  }
2183 
2184 private:
2185  /** Helper functions for for_each_value. */
2186  // @{
2187  template<int N>
2188  struct for_each_value_task_dim {
2189  std::ptrdiff_t extent;
2190  std::ptrdiff_t stride[N];
2191  };
2192 
2193  // Given an array of strides, and a bunch of pointers to pointers
2194  // (all of different types), advance the pointers using the
2195  // strides.
2196  template<typename Ptr, typename... Ptrs>
2197  HALIDE_ALWAYS_INLINE static void advance_ptrs(const std::ptrdiff_t *stride, Ptr &ptr, Ptrs &...ptrs) {
2198  ptr += *stride;
2199  advance_ptrs(stride + 1, ptrs...);
2200  }
2201 
2203  static void advance_ptrs(const std::ptrdiff_t *) {
2204  }
2205 
2206  template<typename Fn, typename Ptr, typename... Ptrs>
2207  HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f, int d, bool innermost_strides_are_one,
2208  const for_each_value_task_dim<sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
2209  if (d == 0) {
2210  if (innermost_strides_are_one) {
2211  Ptr end = ptr + t[0].extent;
2212  while (ptr != end) {
2213  f(*ptr++, (*ptrs++)...);
2214  }
2215  } else {
2216  for (std::ptrdiff_t i = t[0].extent; i != 0; i--) {
2217  f(*ptr, (*ptrs)...);
2218  advance_ptrs(t[0].stride, ptr, ptrs...);
2219  }
2220  }
2221  } else {
2222  for (std::ptrdiff_t i = t[d].extent; i != 0; i--) {
2223  for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
2224  advance_ptrs(t[d].stride, ptr, ptrs...);
2225  }
2226  }
2227  }
2228 
2229  // Return pair is <new_dimensions, innermost_strides_are_one>
2230  template<int N>
2231  HALIDE_NEVER_INLINE static std::pair<int, bool> for_each_value_prep(for_each_value_task_dim<N> *t,
2232  const halide_buffer_t **buffers) {
2233  const int dimensions = buffers[0]->dimensions;
2234  assert(dimensions > 0);
2235 
2236  // Check the buffers all have clean host allocations
2237  for (int i = 0; i < N; i++) {
2238  if (buffers[i]->device) {
2239  assert(buffers[i]->host &&
2240  "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
2241  assert(!buffers[i]->device_dirty() &&
2242  "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
2243  } else {
2244  assert(buffers[i]->host &&
2245  "Buffer passed to for_each_value has no host or device allocation");
2246  }
2247  }
2248 
2249  // Extract the strides in all the dimensions
2250  for (int i = 0; i < dimensions; i++) {
2251  for (int j = 0; j < N; j++) {
2252  assert(buffers[j]->dimensions == dimensions);
2253  assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
2254  buffers[j]->dim[i].min == buffers[0]->dim[i].min);
2255  const int s = buffers[j]->dim[i].stride;
2256  t[i].stride[j] = s;
2257  }
2258  t[i].extent = buffers[0]->dim[i].extent;
2259 
2260  // Order the dimensions by stride, so that the traversal is cache-coherent.
2261  // Use the last dimension for this, because this is the source in copies.
2262  // It appears to be better to optimize read order than write order.
2263  for (int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
2264  std::swap(t[j], t[j - 1]);
2265  }
2266  }
2267 
2268  // flatten dimensions where possible to make a larger inner
2269  // loop for autovectorization.
2270  int d = dimensions;
2271  for (int i = 1; i < d; i++) {
2272  bool flat = true;
2273  for (int j = 0; j < N; j++) {
2274  flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2275  }
2276  if (flat) {
2277  t[i - 1].extent *= t[i].extent;
2278  for (int j = i; j < d - 1; j++) {
2279  t[j] = t[j + 1];
2280  }
2281  i--;
2282  d--;
2283  }
2284  }
2285 
2286  // Note that we assert() that dimensions > 0 above
2287  // (our one-and-only caller will only call us that way)
2288  // so the unchecked access to t[0] should be safe.
2289  bool innermost_strides_are_one = true;
2290  for (int i = 0; i < N; i++) {
2291  innermost_strides_are_one &= (t[0].stride[i] == 1);
2292  }
2293 
2294  return {d, innermost_strides_are_one};
2295  }
2296 
2297  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2298  void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
2299  if (dimensions() > 0) {
2300  const size_t alloc_size = dimensions() * sizeof(for_each_value_task_dim<N>);
2301  Buffer<>::for_each_value_task_dim<N> *t =
2302  (Buffer<>::for_each_value_task_dim<N> *)HALIDE_ALLOCA(alloc_size);
2303  // Move the preparatory code into a non-templated helper to
2304  // save code size.
2305  const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
2306  auto [new_dims, innermost_strides_are_one] = Buffer<>::for_each_value_prep(t, buffers);
2307  if (new_dims > 0) {
2308  Buffer<>::for_each_value_helper(f, new_dims - 1,
2309  innermost_strides_are_one,
2310  t,
2311  data(), (other_buffers.data())...);
2312  return;
2313  }
2314  // else fall thru
2315  }
2316 
2317  // zero-dimensional case
2318  f(*data(), (*other_buffers.data())...);
2319  }
2320  // @}
2321 
2322 public:
2323  /** Call a function on every value in the buffer, and the
2324  * corresponding values in some number of other buffers of the
2325  * same size. The function should take a reference, const
2326  * reference, or value of the correct type for each buffer. This
2327  * effectively lifts a function of scalars to an element-wise
2328  * function of buffers. This produces code that the compiler can
2329  * autovectorize. This is slightly cheaper than for_each_element,
2330  * because it does not need to track the coordinates.
2331  *
2332  * Note that constness of Buffers is preserved: a const Buffer<T> (for either
2333  * 'this' or the other-buffers arguments) will allow mutation of the
2334  * buffer contents, while a Buffer<const T> will not. Attempting to specify
2335  * a mutable reference for the lambda argument of a Buffer<const T>
2336  * will result in a compilation error. */
2337  // @{
2338  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2339  HALIDE_ALWAYS_INLINE const Buffer<T, Dims, InClassDimStorage> &for_each_value(Fn &&f, Args &&...other_buffers) const {
2340  for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2341  return *this;
2342  }
2343 
2344  template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2347  for_each_value(Fn &&f, Args &&...other_buffers) {
2348  for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2349  return *this;
2350  }
2351  // @}
2352 
2353 private:
2354  // Helper functions for for_each_element
2355  struct for_each_element_task_dim {
2356  int min, max;
2357  };
2358 
2359  /** If f is callable with this many args, call it. The first
2360  * argument is just to make the overloads distinct. Actual
2361  * overload selection is done using the enable_if. */
2362  template<typename Fn,
2363  typename... Args,
2364  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2365  HALIDE_ALWAYS_INLINE static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
2366  f(args...);
2367  }
2368 
2369  /** If the above overload is impossible, we add an outer loop over
2370  * an additional argument and try again. */
2371  template<typename Fn,
2372  typename... Args>
2373  HALIDE_ALWAYS_INLINE static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
2374  for (int i = t[d].min; i <= t[d].max; i++) {
2375  for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2376  }
2377  }
2378 
2379  /** Determine the minimum number of arguments a callable can take
2380  * using the same trick. */
2381  template<typename Fn,
2382  typename... Args,
2383  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2384  HALIDE_ALWAYS_INLINE static int num_args(int, Fn &&, Args...) {
2385  return (int)(sizeof...(Args));
2386  }
2387 
2388  /** The recursive version is only enabled up to a recursion limit
2389  * of 256. This catches callables that aren't callable with any
2390  * number of ints. */
2391  template<typename Fn,
2392  typename... Args>
2393  HALIDE_ALWAYS_INLINE static int num_args(double, Fn &&f, Args... args) {
2394  static_assert(sizeof...(args) <= 256,
2395  "Callable passed to for_each_element must accept either a const int *,"
2396  " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2397  return num_args(0, std::forward<Fn>(f), 0, args...);
2398  }
2399 
2400  /** A version where the callable takes a position array instead,
2401  * with compile-time recursion on the dimensionality. This
2402  * overload is preferred to the one below using the same int vs
2403  * double trick as above, but is impossible once d hits -1 using
2404  * std::enable_if. */
2405  template<int d,
2406  typename Fn,
2407  typename = typename std::enable_if<(d >= 0)>::type>
2408  HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2409  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2410  for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2411  }
2412  }
2413 
2414  /** Base case for recursion above. */
2415  template<int d,
2416  typename Fn,
2417  typename = typename std::enable_if<(d < 0)>::type>
2418  HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2419  f(pos);
2420  }
2421 
2422  /** A run-time-recursive version (instead of
2423  * compile-time-recursive) that requires the callable to take a
2424  * pointer to a position array instead. Dispatches to the
2425  * compile-time-recursive version once the dimensionality gets
2426  * small. */
2427  template<typename Fn>
2428  static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2429  if (d == -1) {
2430  f(pos);
2431  } else if (d == 0) {
2432  // Once the dimensionality gets small enough, dispatch to
2433  // a compile-time-recursive version for better codegen of
2434  // the inner loops.
2435  for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2436  } else if (d == 1) {
2437  for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2438  } else if (d == 2) {
2439  for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2440  } else if (d == 3) {
2441  for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2442  } else {
2443  for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2444  for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2445  }
2446  }
2447  }
2448 
2449  /** We now have two overloads for for_each_element. This one
2450  * triggers if the callable takes a const int *.
2451  */
2452  template<typename Fn,
2453  typename = decltype(std::declval<Fn>()((const int *)nullptr))>
2454  static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
2455  const int size = dims * sizeof(int);
2456  int *pos = (int *)HALIDE_ALLOCA(size);
2457  // At least one version of GCC will (incorrectly) report that pos "may be used uninitialized".
2458  // Add this memset to silence it.
2459  memset(pos, 0, size);
2460  for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2461  }
2462 
2463  /** This one triggers otherwise. It treats the callable as
2464  * something that takes some number of ints. */
2465  template<typename Fn>
2466  HALIDE_ALWAYS_INLINE static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
2467  int args = num_args(0, std::forward<Fn>(f));
2468  assert(dims >= args);
2469  for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2470  }
2471 
2472  template<typename Fn>
2473  void for_each_element_impl(Fn &&f) const {
2474  for_each_element_task_dim *t =
2475  (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
2476  for (int i = 0; i < dimensions(); i++) {
2477  t[i].min = dim(i).min();
2478  t[i].max = dim(i).max();
2479  }
2480  for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2481  }
2482 
2483 public:
2484  /** Call a function at each site in a buffer. This is likely to be
2485  * much slower than using Halide code to populate a buffer, but is
2486  * convenient for tests. If the function has more arguments than the
2487  * buffer has dimensions, the remaining arguments will be zero. If it
2488  * has fewer arguments than the buffer has dimensions then the last
2489  * few dimensions of the buffer are not iterated over. For example,
2490  * the following code exploits this to set a floating point RGB image
2491  * to red:
2492 
2493  \code
2494  Buffer<float, 3> im(100, 100, 3);
2495  im.for_each_element([&](int x, int y) {
2496  im(x, y, 0) = 1.0f;
2497  im(x, y, 1) = 0.0f;
2498  im(x, y, 2) = 0.0f:
2499  });
2500  \endcode
2501 
2502  * The compiled code is equivalent to writing the a nested for loop,
2503  * and compilers are capable of optimizing it in the same way.
2504  *
2505  * If the callable can be called with an int * as the sole argument,
2506  * that version is called instead. Each location in the buffer is
2507  * passed to it in a coordinate array. This version is higher-overhead
2508  * than the variadic version, but is useful for writing generic code
2509  * that accepts buffers of arbitrary dimensionality. For example, the
2510  * following sets the value at all sites in an arbitrary-dimensional
2511  * buffer to their first coordinate:
2512 
2513  \code
2514  im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
2515  \endcode
2516 
2517  * It is also possible to use for_each_element to iterate over entire
2518  * rows or columns by cropping the buffer to a single column or row
2519  * respectively and iterating over elements of the result. For example,
2520  * to set the diagonal of the image to 1 by iterating over the columns:
2521 
2522  \code
2523  Buffer<float, 3> im(100, 100, 3);
2524  im.sliced(1, 0).for_each_element([&](int x, int c) {
2525  im(x, x, c) = 1.0f;
2526  });
2527  \endcode
2528 
2529  * Or, assuming the memory layout is known to be dense per row, one can
2530  * memset each row of an image like so:
2531 
2532  \code
2533  Buffer<float, 3> im(100, 100, 3);
2534  im.sliced(0, 0).for_each_element([&](int y, int c) {
2535  memset(&im(0, y, c), 0, sizeof(float) * im.width());
2536  });
2537  \endcode
2538 
2539  */
2540  // @{
2541  template<typename Fn>
2543  for_each_element_impl(f);
2544  return *this;
2545  }
2546 
2547  template<typename Fn>
2551  for_each_element_impl(f);
2552  return *this;
2553  }
2554  // @}
2555 
2556 private:
2557  template<typename Fn>
2558  struct FillHelper {
2559  Fn f;
2561 
2562  template<typename... Args,
2563  typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2564  void operator()(Args... args) {
2565  (*buf)(args...) = f(args...);
2566  }
2567 
2568  FillHelper(Fn &&f, Buffer<T, Dims, InClassDimStorage> *buf)
2569  : f(std::forward<Fn>(f)), buf(buf) {
2570  }
2571  };
2572 
2573 public:
2574  /** Fill a buffer by evaluating a callable at every site. The
2575  * callable should look much like a callable passed to
2576  * for_each_element, but it should return the value that should be
2577  * stored to the coordinate corresponding to the arguments. */
2578  template<typename Fn,
2579  typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2581  // We'll go via for_each_element. We need a variadic wrapper lambda.
2582  FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
2583  return for_each_element(wrapper);
2584  }
2585 
2586  /** Check if an input buffer passed extern stage is a querying
2587  * bounds. Compared to doing the host pointer check directly,
2588  * this both adds clarity to code and will facilitate moving to
2589  * another representation for bounds query arguments. */
2590  bool is_bounds_query() const {
2591  return buf.is_bounds_query();
2592  }
2593 
2594  /** Convenient check to verify that all of the interesting bytes in the Buffer
2595  * are initialized under MSAN. Note that by default, we use for_each_value() here so that
2596  * we skip any unused padding that isn't part of the Buffer; this isn't efficient,
2597  * but in MSAN mode, it doesn't matter. (Pass true for the flag to force check
2598  * the entire Buffer storage.) */
2599  void msan_check_mem_is_initialized(bool entire = false) const {
2600 #if defined(__has_feature)
2601 #if __has_feature(memory_sanitizer)
2602  if (entire) {
2603  __msan_check_mem_is_initialized(data(), size_in_bytes());
2604  } else {
2605  for_each_value([](T &v) { __msan_check_mem_is_initialized(&v, sizeof(T)); ; });
2606  }
2607 #endif
2608 #endif
2609  }
2610 };
2611 
2612 } // namespace Runtime
2613 } // namespace Halide
2614 
2615 #undef HALIDE_ALLOCA
2616 
2617 #endif // HALIDE_RUNTIME_IMAGE_H
#define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
Definition: HalideBuffer.h:52
#define HALIDE_ALLOCA
Definition: HalideBuffer.h:36
This file declares the routines used by Halide internally in its runtime.
#define HALIDE_NEVER_INLINE
Definition: HalideRuntime.h:50
@ halide_error_code_success
There was no error.
#define HALIDE_ALWAYS_INLINE
Definition: HalideRuntime.h:49
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
A Halide::Buffer is a named shared reference to a Halide::Runtime::Buffer.
Definition: Buffer.h:122
Read-only access to the shape.
Definition: HalideBuffer.h:572
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Definition: HalideBuffer.h:577
Dimension(const halide_dimension_t &dim)
Definition: HalideBuffer.h:623
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
Definition: HalideBuffer.h:593
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
Definition: HalideBuffer.h:619
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one.
Definition: HalideBuffer.h:583
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
Definition: HalideBuffer.h:614
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
Definition: HalideBuffer.h:588
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Definition: HalideBuffer.h:221
Buffer< T,(Dims==AnyDims ? AnyDims :Dims+1)> embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space.
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
Definition: HalideBuffer.h:702
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Definition: HalideBuffer.h:897
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
T * end() const
A pointer to one beyond the element with the highest address.
Definition: HalideBuffer.h:675
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
static void set_default_allocate_fn(void *(*allocate_fn)(size_t))
Definition: HalideBuffer.h:726
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
Definition: HalideBuffer.h:269
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
int width() const
Conventional names for the first three dimensions.
Buffer< T, Dims, InClassDimStorage > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension.
HALIDE_ALWAYS_INLINE const Buffer< T2, D2, InClassDimStorage > & as() const &
Return a const typed reference to this Buffer.
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
static Buffer< T, Dims, InClassDimStorage > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
int copy_to_host(void *ctx=nullptr)
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_free(void *ctx=nullptr)
int extent(int i) const
Definition: HalideBuffer.h:639
bool contains(Args... args) const
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers)
void set_device_dirty(bool v=true)
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Buffer(Buffer< T2, D2, S2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:803
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > as() &&
Return an rval reference to this Buffer.
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T2, D2, S2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:818
static Buffer< T, Dims, InClassDimStorage > make_scalar()
Make a zero-dimensional Buffer.
int device_detach_native(void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Definition: HalideBuffer.h:629
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f)
Buffer< T, Dims, InClassDimStorage > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension.
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > as_const() &&
Buffer(int first, int second, Args... rest)
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() const &
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
void check_overflow()
Check the product of the extents fits in memory.
Definition: HalideBuffer.h:882
static bool can_convert_from(const Buffer< T2, D2, S2 > &other)
Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
Definition: HalideBuffer.h:737
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_sync(void *ctx=nullptr)
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
static constexpr halide_type_t static_halide_type()
Get the Halide type of T.
Definition: HalideBuffer.h:264
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > cropped(const std::vector< std::pair< int, int >> &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
static Buffer< void, Dims, InClassDimStorage > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
halide_type_t type() const
Get the type of the elements.
Definition: HalideBuffer.h:663
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
Buffer(int first)
Allocate a new image of the given size.
Definition: HalideBuffer.h:999
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
HALIDE_ALWAYS_INLINE bool device_dirty() const
static Buffer< T, Dims, InClassDimStorage > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
static constexpr int static_dimensions()
Callers should not use the result if has_static_dimensions is false.
Definition: HalideBuffer.h:277
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
Definition: HalideBuffer.h:961
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
Definition: HalideBuffer.h:681
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
bool has_device_allocation() const
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Standard move-assignment operator.
Definition: HalideBuffer.h:868
Buffer< T, Dims, InClassDimStorage > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
static Buffer< T, Dims, InClassDimStorage > make_with_shape_of(Buffer< T2, D2, S2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
Definition: HalideBuffer.h:688
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
Definition: HalideBuffer.h:978
int dimensions() const
Get the dimensionality of the buffer.
Definition: HalideBuffer.h:654
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T, Dims, InClassDimStorage > &other)
Standard assignment operator.
Definition: HalideBuffer.h:834
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
int min(int i) const
Access to the mins, strides, extents.
Definition: HalideBuffer.h:636
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T2, D2, S2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Definition: HalideBuffer.h:853
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
Definition: HalideBuffer.h:968
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
Buffer< T, Dims, InClassDimStorage > & fill(not_void_T val)
Buffer(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Move constructor.
Definition: HalideBuffer.h:789
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
HALIDE_ALWAYS_INLINE not_void_T & operator()()
void crop(const std::vector< std::pair< int, int >> &rect)
Crop an image in-place along the first N dimensions.
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
void set_min(Args... args)
Buffer< T, Dims, InClassDimStorage > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum.
size_t number_of_elements() const
The total number of elements this buffer represents.
Definition: HalideBuffer.h:649
static void assert_can_convert_from(const Buffer< T2, D2, S2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage> cannot be const...
Definition: HalideBuffer.h:755
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.
Buffer(const Buffer< T, Dims, InClassDimStorage > &other)
Copy constructor.
Definition: HalideBuffer.h:764
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
T * data() const
Get a pointer to the address of the min coordinate.
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
int left() const
Conventional names for the min and max value of each dimension.
void copy_from(Buffer< T2, D2, S2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
static Buffer< T, Dims, InClassDimStorage > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
Buffer< not_const_T, Dims, InClassDimStorage > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
Buffer< T, Dims, InClassDimStorage > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
int stride(int i) const
Definition: HalideBuffer.h:642
static void set_default_deallocate_fn(void(*deallocate_fn)(void *))
Definition: HalideBuffer.h:729
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
Buffer(const Buffer< T2, D2, S2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
Definition: HalideBuffer.h:779
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
T * begin() const
A pointer to the element with the lowest address.
Definition: HalideBuffer.h:669
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
const halide_buffer_t * raw_buffer() const
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > & as() &
Return a typed reference to this Buffer.
HALIDE_ALWAYS_INLINE auto slice(Vec vec, Base base, Stride stride, Lanes lanes) noexcept -> SliceOp< decltype(pattern_arg(vec)), decltype(pattern_arg(base)), decltype(pattern_arg(stride)), decltype(pattern_arg(lanes))>
Definition: IRMatch.h:2177
ConstantInterval min(const ConstantInterval &a, const ConstantInterval &b)
ConstantInterval max(const ConstantInterval &a, const ConstantInterval &b)
bool any_zero(const Container &c)
Definition: HalideBuffer.h:148
constexpr int AnyDims
Definition: HalideBuffer.h:192
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
Definition: HalideBuffer.h:176
@ AllocatedDeviceAndHost
No free routine will be called when device ref count goes to zero
@ WrappedNative
halide_device_free will be called when device ref count goes to zero
@ Unmanaged
halide_device_detach_native will be called when device ref count goes to zero
@ Cropped
Call device_and_host_free when DevRefCount goes to zero.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:597
Expr max(const FuncRef &a, const FuncRef &b)
Definition: Func.h:600
unsigned __INT64_TYPE__ uint64_t
void * malloc(size_t)
__UINTPTR_TYPE__ uintptr_t
ALWAYS_INLINE T align_up(T p, size_t alignment)
unsigned __INT8_TYPE__ uint8_t
void * memset(void *s, int val, size_t n)
__PTRDIFF_TYPE__ ptrdiff_t
unsigned __INT16_TYPE__ uint16_t
__SIZE_TYPE__ size_t
unsigned __INT32_TYPE__ uint32_t
void * memcpy(void *s1, const void *s2, size_t n)
void free(void *)
A struct acting as a header for allocations owned by the Buffer class itself.
Definition: HalideBuffer.h:165
AllocationHeader(void(*deallocate_fn)(void *))
Definition: HalideBuffer.h:170
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
Definition: HalideBuffer.h:599
bool operator!=(const iterator &other) const
Definition: HalideBuffer.h:604
A similar struct for managing device allocations.
Definition: HalideBuffer.h:185
BufferDeviceOwnership ownership
Definition: HalideBuffer.h:189
The raw representation of an image passed around by generated Halide code.
int32_t dimensions
The dimensionality of the buffer.
halide_dimension_t * dim
The shape of the buffer.
uint64_t device
A device-handle for e.g.
uint8_t * host
A pointer to the start of the data in main memory.
struct halide_type_t type
The type of each buffer element.
const struct halide_device_interface_t * device_interface
The interface used to interpret the above handle.
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
int(* device_slice)(void *user_context, const struct halide_buffer_t *src, int slice_dim, int slice_pos, struct halide_buffer_t *dst)
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
int(* device_release_crop)(void *user_context, struct halide_buffer_t *buf)
int(* device_crop)(void *user_context, const struct halide_buffer_t *src, struct halide_buffer_t *dst)
int(* copy_to_host)(void *user_context, struct halide_buffer_t *buf)
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* device_free)(void *user_context, struct halide_buffer_t *buf)
int(* detach_native)(void *user_context, struct halide_buffer_t *buf)
int(* device_and_host_free)(void *user_context, struct halide_buffer_t *buf)
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
A runtime tag for a type in the halide type system.