6 #ifndef HALIDE_RUNTIME_BUFFER_H 7 #define HALIDE_RUNTIME_BUFFER_H 20 #include <AvailabilityVersions.h> 21 #include <TargetConditionals.h> 24 #if defined(__has_feature) 25 #if __has_feature(memory_sanitizer) 26 #include <sanitizer/msan_interface.h> 34 #define HALIDE_ALLOCA _alloca 36 #define HALIDE_ALLOCA __builtin_alloca 40 #if __GNUC__ == 5 && __GNUC_MINOR__ == 1 41 #pragma GCC diagnostic ignored "-Warray-bounds" 44 #ifndef HALIDE_RUNTIME_BUFFER_CHECK_INDICES 45 #define HALIDE_RUNTIME_BUFFER_CHECK_INDICES 0 48 #ifndef HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT 52 #define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT 128 56 "HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT must be a power of 2.");
64 #ifndef HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 71 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0 73 #elif defined(__ANDROID_API__) && __ANDROID_API__ < 28 76 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0 78 #elif defined(__APPLE__) 80 #if TARGET_OS_OSX && (__MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_15) 83 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0 85 #elif TARGET_OS_IPHONE && (__IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_14_0) 88 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0 93 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1 99 #if defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC) 102 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0 107 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1 114 #endif // HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 120 template<
typename T,
int Dims,
int InClassDimStorage>
125 template<
typename... Args>
131 template<
typename T,
typename... Args>
133 static const bool value = std::is_convertible<T, int>::value &&
AllInts<Args...>::value;
139 template<
typename... Args>
140 struct AllInts<float, Args...> : std::false_type {};
142 template<
typename... Args>
143 struct AllInts<double, Args...> : std::false_type {};
147 template<
typename Container>
213 template<
typename T = void,
232 static const bool T_is_void = std::is_same<typename std::remove_const<T>::type,
void>::value;
235 template<
typename T2>
240 using not_void_T =
typename std::conditional<T_is_void,
241 add_const_if_T_is_const<uint8_t>,
245 using not_const_T =
typename std::remove_const<T>::type;
251 using storage_T =
typename std::conditional<std::is_pointer<T>::value,
uint64_t, not_void_T>::type;
255 static constexpr
bool has_static_halide_type = !T_is_void;
260 return halide_type_of<typename std::remove_cv<not_void_T>::type>();
265 return alloc !=
nullptr;
268 static constexpr
bool has_static_dimensions = (Dims !=
AnyDims);
276 static_assert(!has_static_dimensions || static_dimensions() >= 0);
280 void incref()
const {
281 if (owns_host_memory()) {
285 if (!dev_ref_count) {
291 dev_ref_count =
new DeviceRefCount;
293 dev_ref_count->
count++;
299 struct DevRefCountCropped : DeviceRefCount {
300 Buffer<T, Dims, InClassDimStorage> cropped_from;
301 explicit DevRefCountCropped(
const Buffer<T, Dims, InClassDimStorage> &cropped_from)
302 : cropped_from(cropped_from) {
308 void crop_from(
const Buffer<T, Dims, InClassDimStorage> &cropped_from) {
309 assert(dev_ref_count ==
nullptr);
310 dev_ref_count =
new DevRefCountCropped(cropped_from);
315 void decref(
bool device_only =
false) {
316 if (owns_host_memory() && !device_only) {
318 if (new_count == 0) {
320 alloc->~AllocationHeader();
325 set_host_dirty(
false);
329 new_count = --(dev_ref_count->
count);
331 if (new_count == 0) {
333 assert(!(alloc && device_dirty()) &&
334 "Implicitly freeing a dirty device allocation while a host allocation still lives. " 335 "Call device_free explicitly if you want to drop dirty device-side data. " 336 "Call copy_to_host explicitly if you want the data copied to the host allocation " 337 "before the device allocation is freed.");
340 result = buf.device_interface->detach_native(
nullptr, &buf);
342 result = buf.device_interface->device_and_host_free(
nullptr, &buf);
344 result = buf.device_interface->device_release_crop(
nullptr, &buf);
346 result = buf.device_interface->device_free(
nullptr, &buf);
354 delete (DevRefCountCropped *)dev_ref_count;
356 delete dev_ref_count;
360 dev_ref_count =
nullptr;
362 buf.device_interface =
nullptr;
365 void free_shape_storage() {
366 if (buf.dim != shape) {
372 template<
int DimsSpecified>
373 void make_static_shape_storage() {
374 static_assert(Dims ==
AnyDims || Dims == DimsSpecified,
375 "Number of arguments to Buffer() does not match static dimensionality");
376 buf.dimensions = DimsSpecified;
377 if constexpr (Dims ==
AnyDims) {
378 if constexpr (DimsSpecified <= InClassDimStorage) {
381 static_assert(DimsSpecified >= 1);
385 static_assert(InClassDimStorage >= Dims);
390 void make_shape_storage(
const int dimensions) {
391 if (Dims !=
AnyDims && Dims != dimensions) {
392 assert(
false &&
"Number of arguments to Buffer() does not match static dimensionality");
396 buf.dimensions = dimensions;
397 buf.dim = (dimensions <= InClassDimStorage) ? shape :
new halide_dimension_t[dimensions];
406 template<
typename T2,
int D2,
int S2>
407 void move_shape_from(Buffer<T2, D2, S2> &&other) {
408 if (other.shape == other.buf.dim) {
409 copy_shape_from(other.buf);
411 buf.dim = other.buf.dim;
412 other.buf.dim =
nullptr;
422 dev_ref_count =
new DeviceRefCount;
428 void initialize_shape(
const int *sizes) {
429 for (
int i = 0; i < buf.dimensions; i++) {
431 buf.dim[i].extent = sizes[i];
433 buf.dim[i].stride = 1;
435 buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
441 void initialize_shape(
const std::vector<int> &sizes) {
442 assert(buf.dimensions == (
int)sizes.size());
443 initialize_shape(sizes.data());
447 template<
typename Array,
size_t N>
448 void initialize_shape_from_array_shape(
int next, Array (&vals)[N]) {
449 buf.dim[next].min = 0;
450 buf.dim[next].extent = (int)N;
452 buf.dim[next].stride = 1;
454 initialize_shape_from_array_shape(next - 1, vals[0]);
455 buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
460 template<
typename T2>
461 void initialize_shape_from_array_shape(
int,
const T2 &) {
465 template<
typename Array,
size_t N>
466 static int dimensionality_of_array(Array (&vals)[N]) {
467 return dimensionality_of_array(vals[0]) + 1;
470 template<
typename T2>
471 static int dimensionality_of_array(
const T2 &) {
476 template<
typename Array,
size_t N>
477 static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
478 return scalar_type_of_array(vals[0]);
481 template<
typename T2>
483 return halide_type_of<typename std::remove_cv<T2>::type>();
487 void crop_host(
int d,
int min,
int extent) {
488 assert(dim(d).
min() <=
min);
489 assert(dim(d).
max() >=
min + extent - 1);
491 if (buf.host !=
nullptr) {
492 buf.host += (shift * dim(d).stride()) * type().bytes();
494 buf.dim[d].min =
min;
495 buf.dim[d].extent = extent;
499 void crop_host(
const std::vector<std::pair<int, int>> &rect) {
501 int limit = (int)rect.size();
502 assert(limit <= dimensions());
503 for (
int i = 0; i < limit; i++) {
504 crop_host(i, rect[i].first, rect[i].second);
508 void complete_device_crop(Buffer<T, Dims, InClassDimStorage> &result_host_cropped)
const {
509 assert(buf.device_interface !=
nullptr);
511 const Buffer<T, Dims, InClassDimStorage> *cropped_from =
this;
517 cropped_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
519 result_host_cropped.crop_from(*cropped_from);
524 void slice_host(
int d,
int pos) {
525 static_assert(Dims ==
AnyDims);
526 assert(dimensions() > 0);
527 assert(d >= 0 && d < dimensions());
528 assert(pos >= dim(d).
min() && pos <= dim(d).
max());
531 if (buf.host !=
nullptr) {
532 buf.host += (shift * buf.dim[d].stride) * type().bytes();
534 for (
int i = d; i < buf.dimensions; i++) {
535 buf.dim[i] = buf.dim[i + 1];
537 buf.dim[buf.dimensions] = {0, 0, 0};
540 void complete_device_slice(Buffer<T, AnyDims, InClassDimStorage> &result_host_sliced,
int d,
int pos)
const {
541 assert(buf.device_interface !=
nullptr);
542 if (buf.device_interface->device_slice(
nullptr, &this->buf, d, pos, &result_host_sliced.buf) ==
halide_error_code_success) {
543 const Buffer<T, Dims, InClassDimStorage> *sliced_from =
this;
549 sliced_from = &((DevRefCountCropped *)dev_ref_count)->cropped_from;
552 result_host_sliced.crop_from(*sliced_from);
582 return min() + extent() - 1;
593 return val != other.
val;
608 return {
min() + extent()};
618 assert(i >= 0 && i < this->dimensions());
619 return Dimension(buf.dim[i]);
628 return dim(i).extent();
631 return dim(i).stride();
638 return buf.number_of_elements();
643 if constexpr (has_static_dimensions) {
646 return buf.dimensions;
658 assert(buf.host !=
nullptr);
659 return (T *)buf.begin();
664 assert(buf.host !=
nullptr);
665 return (T *)buf.end();
670 return buf.size_in_bytes();
682 buf.type = static_halide_type();
685 constexpr
int buf_dimensions = (Dims ==
AnyDims) ? 0 : Dims;
686 make_static_shape_storage<buf_dimensions>();
692 assert(T_is_void || buf.type == static_halide_type());
693 initialize_from_buffer(buf, ownership);
697 template<
typename T2,
int D2,
int S2>
701 template<
typename T2,
int D2,
int S2>
702 static void static_assert_can_convert_from() {
703 static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
704 "Can't convert from a Buffer<const T> to a Buffer<T>");
705 static_assert(std::is_same<
typename std::remove_const<T>::type,
706 typename std::remove_const<T2>::type>::value ||
708 "type mismatch constructing Buffer");
710 "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality");
717 template<
typename T2,
int D2,
int S2>
719 static_assert_can_convert_from<T2, D2, S2>();
721 if (other.
type() != static_halide_type()) {
735 template<
typename T2,
int D2,
int S2>
740 static_assert_can_convert_from<T2, D2, S2>();
741 assert(can_convert_from(other));
749 dev_ref_count = other.dev_ref_count;
750 copy_shape_from(other.buf);
759 template<
typename T2,
int D2,
int S2>
763 assert_can_convert_from(other);
765 dev_ref_count = other.dev_ref_count;
766 copy_shape_from(other.buf);
773 dev_ref_count(other.dev_ref_count) {
774 other.dev_ref_count =
nullptr;
775 other.alloc =
nullptr;
783 template<
typename T2,
int D2,
int S2>
787 dev_ref_count(other.dev_ref_count) {
788 assert_can_convert_from(other);
789 other.dev_ref_count =
nullptr;
790 other.alloc =
nullptr;
798 template<
typename T2,
int D2,
int S2>
800 if ((
const void *)
this == (
const void *)&other) {
803 assert_can_convert_from(other);
806 dev_ref_count = other.dev_ref_count;
808 free_shape_storage();
810 copy_shape_from(other.buf);
817 if ((
const void *)
this == (
const void *)&other) {
822 dev_ref_count = other.dev_ref_count;
824 free_shape_storage();
826 copy_shape_from(other.buf);
833 template<
typename T2,
int D2,
int S2>
835 assert_can_convert_from(other);
838 other.alloc =
nullptr;
839 dev_ref_count = other.dev_ref_count;
840 other.dev_ref_count =
nullptr;
841 free_shape_storage();
852 other.alloc =
nullptr;
853 dev_ref_count = other.dev_ref_count;
854 other.dev_ref_count =
nullptr;
855 free_shape_storage();
864 size_t size = type().bytes();
865 for (
int i = 0; i < dimensions(); i++) {
866 size *= dim(i).extent();
869 size = (size << 1) >> 1;
870 for (
int i = 0; i < dimensions(); i++) {
871 size /= dim(i).extent();
873 assert(size == (
size_t)type().bytes() &&
"Error: Overflow computing total size of buffer.");
878 void allocate(
void *(*allocate_fn)(
size_t) =
nullptr,
879 void (*deallocate_fn)(
void *) =
nullptr) {
888 const auto align_up = [=](
size_t value) ->
size_t {
889 return (value + alignment - 1) & ~(alignment - 1);
892 size_t size = size_in_bytes();
894 #if HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 896 if (!allocate_fn && !deallocate_fn) {
902 void *alloc_storage = ::aligned_alloc(alignment,
align_up(size) + alignment);
913 if (!deallocate_fn) {
914 deallocate_fn =
free;
923 const size_t requested_size =
align_up(size + alignment +
925 (
int)
sizeof(std::max_align_t)));
926 void *alloc_storage = allocate_fn(requested_size);
951 template<
typename... Args,
952 typename =
typename std::enable_if<
AllInts<Args...>::value>::type>
955 assert(static_halide_type() == t);
957 int extents[] = {first, (int)rest...};
959 constexpr
int buf_dimensions = 1 + (int)(
sizeof...(rest));
960 make_static_shape_storage<buf_dimensions>();
961 initialize_shape(extents);
975 static_assert(!T_is_void,
976 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
977 int extents[] = {first};
978 buf.type = static_halide_type();
979 constexpr
int buf_dimensions = 1;
980 make_static_shape_storage<buf_dimensions>();
981 initialize_shape(extents);
988 template<
typename... Args,
989 typename =
typename std::enable_if<
AllInts<Args...>::value>::type>
990 Buffer(
int first,
int second, Args... rest) {
991 static_assert(!T_is_void,
992 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
993 int extents[] = {first, second, (int)rest...};
994 buf.type = static_halide_type();
995 constexpr
int buf_dimensions = 2 + (int)(
sizeof...(rest));
996 make_static_shape_storage<buf_dimensions>();
997 initialize_shape(extents);
1008 assert(static_halide_type() == t);
1012 make_shape_storage((
int)sizes.size());
1013 initialize_shape(sizes);
1021 explicit Buffer(
const std::vector<int> &sizes)
1022 :
Buffer(static_halide_type(), sizes) {
1027 static std::vector<int> make_ordered_sizes(
const std::vector<int> &sizes,
const std::vector<int> &order) {
1028 assert(order.size() == sizes.size());
1029 std::vector<int> ordered_sizes(sizes.size());
1030 for (
size_t i = 0; i < sizes.size(); ++i) {
1031 ordered_sizes[i] = sizes.at(order[i]);
1033 return ordered_sizes;
1042 :
Buffer(t, make_ordered_sizes(sizes, storage_order)) {
1043 transpose(storage_order);
1046 Buffer(
const std::vector<int> &sizes,
const std::vector<int> &storage_order)
1047 :
Buffer(static_halide_type(), sizes, storage_order) {
1052 template<
typename Array,
size_t N>
1054 const int buf_dimensions = dimensionality_of_array(vals);
1055 buf.type = scalar_type_of_array(vals);
1057 make_shape_storage(buf_dimensions);
1058 initialize_shape_from_array_shape(buf.dimensions - 1, vals);
1065 template<
typename... Args,
1066 typename =
typename std::enable_if<
AllInts<Args...>::value>::type>
1069 assert(static_halide_type() == t);
1071 int extents[] = {first, (int)rest...};
1073 buf.host = (
uint8_t *)const_cast<void *>(data);
1074 constexpr
int buf_dimensions = 1 + (int)(
sizeof...(rest));
1075 make_static_shape_storage<buf_dimensions>();
1076 initialize_shape(extents);
1082 template<
typename... Args,
1083 typename =
typename std::enable_if<
AllInts<Args...>::value>::type>
1084 explicit Buffer(T *data,
int first, Args &&...rest) {
1085 int extents[] = {first, (int)rest...};
1086 buf.type = static_halide_type();
1087 buf.host = (
uint8_t *)
const_cast<typename std::remove_const<T>::type *
>(data);
1088 constexpr
int buf_dimensions = 1 + (int)(
sizeof...(rest));
1089 make_static_shape_storage<buf_dimensions>();
1090 initialize_shape(extents);
1097 explicit Buffer(T *data,
const std::vector<int> &sizes) {
1098 buf.type = static_halide_type();
1099 buf.host = (
uint8_t *)
const_cast<typename std::remove_const<T>::type *
>(data);
1100 make_shape_storage((
int)sizes.size());
1101 initialize_shape(sizes);
1110 assert(static_halide_type() == t);
1113 buf.host = (
uint8_t *)const_cast<void *>(data);
1114 make_shape_storage((
int)sizes.size());
1115 initialize_shape(sizes);
1123 assert(static_halide_type() == t);
1126 buf.host = (
uint8_t *)const_cast<void *>(data);
1127 make_shape_storage(d);
1128 for (
int i = 0; i < d; i++) {
1129 buf.dim[i] = shape[i];
1137 const std::vector<halide_dimension_t> &shape)
1138 :
Buffer(t, data, (int)shape.size(), shape.data()) {
1145 buf.type = static_halide_type();
1146 buf.host = (
uint8_t *)
const_cast<typename std::remove_const<T>::type *
>(data);
1147 make_shape_storage(d);
1148 for (
int i = 0; i < d; i++) {
1149 buf.dim[i] = shape[i];
1156 explicit inline Buffer(T *data,
const std::vector<halide_dimension_t> &shape)
1157 :
Buffer(data, (int)shape.size(), shape.data()) {
1165 free_shape_storage();
1192 template<
typename T2,
int D2 = Dims>
1205 template<
typename T2,
int D2 = Dims>
1218 template<
typename T2,
int D2 = Dims>
1247 template<typename T2 = T, typename = typename std::enable_if<!std::is_const<T2>::value>::type>
1254 template<
typename TVoid,
1256 typename =
typename std::enable_if<std::is_same<TVoid, void>::value &&
1257 !std::is_void<T2>::value &&
1258 !std::is_const<T2>::value>::type>
1260 return as<TVoid, Dims>();
1265 template<
typename TVoid,
1267 typename =
typename std::enable_if<std::is_same<TVoid, void>::value &&
1268 !std::is_void<T2>::value &&
1269 std::is_const<T2>::value>::type>
1271 return as<const TVoid, Dims>();
1277 return (dimensions() > 0) ? dim(0).extent() : 1;
1280 return (dimensions() > 1) ? dim(1).extent() : 1;
1283 return (dimensions() > 2) ? dim(2).extent() : 1;
1290 return dim(0).min();
1294 return dim(0).max();
1298 return dim(1).min();
1302 return dim(1).max();
1319 void (*deallocate_fn)(
void *) =
nullptr)
const {
1321 dst.copy_from(*
this);
1330 void (*deallocate_fn)(
void *) =
nullptr)
const {
1331 static_assert(Dims ==
AnyDims || Dims == 3);
1332 assert(dimensions() == 3);
1335 dst.allocate(allocate_fn, deallocate_fn);
1336 dst.copy_from(*
this);
1344 void (*deallocate_fn)(
void *) =
nullptr)
const {
1345 std::vector<int> mins, extents;
1346 const int dims = dimensions();
1348 extents.reserve(dims);
1349 for (
int d = 0; d < dims; ++d) {
1350 mins.push_back(dim(d).
min());
1351 extents.push_back(dim(d).extent());
1355 dst.allocate(allocate_fn, deallocate_fn);
1356 dst.copy_from(*
this);
1382 template<
typename T2,
int D2,
int S2>
1384 static_assert(!std::is_const<T>::value,
"Cannot call copy_from() on a Buffer<const T>");
1385 assert(!device_dirty() &&
"Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1386 assert(!src.
device_dirty() &&
"Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1391 assert(src.
dimensions() == dst.dimensions());
1394 const int d = dimensions();
1395 for (
int i = 0; i < d; i++) {
1396 int min_coord =
std::max(dst.dim(i).min(), src.
dim(i).min());
1397 int max_coord =
std::min(dst.dim(i).max(), src.
dim(i).max());
1398 if (max_coord < min_coord) {
1402 dst.crop(i, min_coord, max_coord - min_coord + 1);
1403 src.
crop(i, min_coord, max_coord - min_coord + 1);
1410 if (T_is_void ? (type().bytes() == 1) : (
sizeof(not_void_T) == 1)) {
1414 typed_dst.
for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1415 }
else if (T_is_void ? (type().bytes() == 2) : (
sizeof(not_void_T) == 2)) {
1419 typed_dst.
for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1420 }
else if (T_is_void ? (type().bytes() == 4) : (
sizeof(not_void_T) == 4)) {
1424 typed_dst.
for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1425 }
else if (T_is_void ? (type().bytes() == 8) : (
sizeof(not_void_T) == 8)) {
1429 typed_dst.
for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1431 assert(
false &&
"type().bytes() must be 1, 2, 4, or 8");
1450 im.crop_host(d,
min, extent);
1451 if (buf.device_interface !=
nullptr) {
1452 complete_device_crop(im);
1465 if (buf.device_interface !=
nullptr) {
1466 *
this = cropped(d,
min, extent);
1468 crop_host(d,
min, extent);
1487 if (buf.device_interface !=
nullptr) {
1488 complete_device_crop(im);
1497 void crop(
const std::vector<std::pair<int, int>> &rect) {
1502 if (buf.device_interface !=
nullptr) {
1503 *
this = cropped(rect);
1522 assert(d >= 0 && d < this->dimensions());
1523 device_deallocate();
1524 buf.dim[d].min += delta;
1538 device_deallocate();
1540 int limit = (int)delta.size();
1541 assert(limit <= dimensions());
1542 for (
int i = 0; i < limit; i++) {
1543 translate(i, delta[i]);
1550 assert(mins.size() <=
static_cast<decltype(mins.size())
>(dimensions()));
1551 device_deallocate();
1552 for (
size_t i = 0; i < mins.size(); i++) {
1553 buf.dim[i].min = mins[i];
1557 template<
typename... Args>
1559 set_min(std::vector<int>{args...});
1566 assert(coords.size() <=
static_cast<decltype(coords.size())
>(dimensions()));
1567 for (
size_t i = 0; i < coords.size(); i++) {
1568 if (coords[i] < dim((
int)i).min() || coords[i] > dim((
int)i).max()) {
1575 template<
typename... Args>
1577 return contains(std::vector<int>{args...});
1599 assert(d1 >= 0 && d1 < this->dimensions());
1600 assert(d2 >= 0 && d2 < this->dimensions());
1601 std::swap(buf.dim[d1], buf.dim[d2]);
1609 assert((
int)order.size() == dimensions());
1610 if (dimensions() < 2) {
1615 std::vector<int> order_sorted = order;
1616 for (
size_t i = 1; i < order_sorted.size(); i++) {
1617 for (
size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1618 std::swap(order_sorted[j], order_sorted[j - 1]);
1619 transpose(j, j - 1);
1636 static_assert(Dims ==
AnyDims || Dims > 0,
"Cannot slice a 0-dimensional buffer");
1637 assert(dimensions() > 0);
1646 im.slice_host(d, pos);
1647 if (buf.device_interface !=
nullptr) {
1648 complete_device_slice(im, d, pos);
1657 static_assert(Dims ==
AnyDims || Dims > 0,
"Cannot slice a 0-dimensional buffer");
1658 assert(dimensions() > 0);
1660 return sliced(d, dim(d).
min());
1669 static_assert(Dims ==
AnyDims,
"Cannot call slice() on a Buffer with static dimensionality.");
1670 assert(dimensions() > 0);
1676 if (buf.device_interface !=
nullptr) {
1677 *
this = sliced(d, pos);
1708 static_assert(Dims ==
AnyDims,
"Cannot call embed() on a Buffer with static dimensionality.");
1709 assert(d >= 0 && d <= dimensions());
1711 translate(dimensions() - 1, pos);
1712 for (
int i = dimensions() - 1; i > d; i--) {
1713 transpose(i, i - 1);
1722 static_assert(Dims ==
AnyDims,
"Cannot call add_dimension() on a Buffer with static dimensionality.");
1723 const int dims = buf.dimensions;
1725 if (buf.dim != shape) {
1728 for (
int i = 0; i < dims; i++) {
1729 new_shape[i] = buf.dim[i];
1732 buf.dim = new_shape;
1733 }
else if (dims == InClassDimStorage) {
1735 make_shape_storage(buf.dimensions);
1736 for (
int i = 0; i < dims; i++) {
1737 buf.dim[i] = shape[i];
1742 buf.dim[dims] = {0, 1, 0};
1744 buf.dim[dims].stride = 1;
1746 buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1755 buf.dim[buf.dimensions - 1].stride = s;
1764 assert((!v || !device_dirty()) &&
"Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1765 buf.set_host_dirty(v);
1773 return buf.device_dirty();
1777 return buf.host_dirty();
1781 assert((!v || !host_dirty()) &&
"Cannot set device dirty when host is already dirty.");
1782 buf.set_device_dirty(v);
1786 if (device_dirty()) {
1787 return buf.device_interface->copy_to_host(ctx, &buf);
1794 return device_interface->
copy_to_device(ctx, &buf, device_interface);
1800 return device_interface->
device_malloc(ctx, &buf, device_interface);
1804 if (dev_ref_count) {
1806 "Can't call device_free on an unmanaged or wrapped native device handle. " 1807 "Free the source allocation or call device_detach_native instead.");
1809 assert(dev_ref_count->
count == 1 &&
1810 "Multiple Halide::Runtime::Buffer objects share this device " 1811 "allocation. Freeing it would create dangling references. " 1812 "Don't call device_free on Halide buffers that you have copied or " 1813 "passed by value.");
1816 if (buf.device_interface) {
1817 ret = buf.device_interface->device_free(ctx, &buf);
1819 if (dev_ref_count) {
1820 delete dev_ref_count;
1821 dev_ref_count =
nullptr;
1827 uint64_t handle,
void *ctx =
nullptr) {
1828 assert(device_interface);
1831 return device_interface->
wrap_native(ctx, &buf, handle, device_interface);
1835 assert(dev_ref_count &&
1837 "Only call device_detach_native on buffers wrapping a native " 1838 "device handle via device_wrap_native. This buffer was allocated " 1839 "using device_malloc, or is unmanaged. " 1840 "Call device_free or free the original allocation instead.");
1842 assert(dev_ref_count->
count == 1 &&
1843 "Multiple Halide::Runtime::Buffer objects share this device " 1844 "allocation. Freeing it could create dangling references. " 1845 "Don't call device_detach_native on Halide buffers that you " 1846 "have copied or passed by value.");
1848 if (buf.device_interface) {
1849 ret = buf.device_interface->detach_native(ctx, &buf);
1851 delete dev_ref_count;
1852 dev_ref_count =
nullptr;
1861 if (dev_ref_count) {
1863 "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. " 1864 "Free the source allocation or call device_detach_native instead.");
1866 assert(dev_ref_count->
count == 1 &&
1867 "Multiple Halide::Runtime::Buffer objects share this device " 1868 "allocation. Freeing it would create dangling references. " 1869 "Don't call device_and_host_free on Halide buffers that you have copied or " 1870 "passed by value.");
1873 if (buf.device_interface) {
1874 ret = buf.device_interface->device_and_host_free(ctx, &buf);
1876 if (dev_ref_count) {
1877 delete dev_ref_count;
1878 dev_ref_count =
nullptr;
1884 return buf.device_sync(ctx);
1888 return buf.device != 0;
1893 if (dev_ref_count ==
nullptr) {
1907 static_assert(Dims ==
AnyDims || Dims == 3,
"make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1923 return make_interleaved(static_halide_type(), width, height, channels);
1929 static_assert(Dims ==
AnyDims || Dims == 3,
"make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1938 return make_interleaved(static_halide_type(), data, width, height, channels);
1943 static_assert(Dims ==
AnyDims || Dims == 0,
"make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1951 static_assert(Dims ==
AnyDims || Dims == 0,
"make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1959 static_assert(Dims ==
AnyDims || Dims == 0,
"make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1967 template<
typename T2,
int D2,
int S2>
1969 void *(*allocate_fn)(
size_t) =
nullptr,
1970 void (*deallocate_fn)(
void *) =
nullptr) {
1971 static_assert(Dims == D2 || Dims ==
AnyDims);
1972 const halide_type_t dst_type = T_is_void ? src.
type() : halide_type_of<typename std::remove_cv<not_void_T>::type>();
1974 allocate_fn, deallocate_fn);
1981 void *(*allocate_fn)(
size_t),
1982 void (*deallocate_fn)(
void *)) {
1984 std::vector<int> swaps;
1985 for (
int i = dimensions - 1; i > 0; i--) {
1986 for (
int j = i; j > 0; j--) {
1987 if (shape[j - 1].stride > shape[j].stride) {
1988 std::swap(shape[j - 1], shape[j]);
1996 for (
int i = 0; i < dimensions; i++) {
2005 while (!swaps.empty()) {
2006 int j = swaps.back();
2007 std::swap(shape[j - 1], shape[j]);
2013 Buffer<> dst(dst_type,
nullptr, dimensions, shape);
2014 dst.allocate(allocate_fn, deallocate_fn);
2019 template<
typename... Args>
2022 offset_of(
int d,
int first, Args... rest)
const {
2023 #if HALIDE_RUNTIME_BUFFER_CHECK_INDICES 2024 assert(first >= this->buf.
dim[d].
min);
2025 assert(first < this->buf.dim[d].min + this->buf.dim[d].extent);
2027 return offset_of(d + 1, rest...) + (
ptrdiff_t)this->buf.
dim[d].
stride * (first - this->buf.dim[d].min);
2035 template<
typename... Args>
2038 address_of(Args... args)
const {
2040 return (storage_T *)(this->buf.
host) + offset_of(0, args...) * type().bytes();
2042 return (storage_T *)(this->buf.
host) + offset_of(0, args...);
2047 ptrdiff_t offset_of(
const int *pos)
const {
2049 for (
int i = this->dimensions() - 1; i >= 0; i--) {
2050 #if HALIDE_RUNTIME_BUFFER_CHECK_INDICES 2051 assert(pos[i] >= this->buf.
dim[i].
min);
2052 assert(pos[i] < this->buf.
dim[i].
min + this->buf.dim[i].extent);
2060 storage_T *address_of(
const int *pos)
const {
2062 return (storage_T *)this->buf.
host + offset_of(pos) * type().bytes();
2064 return (storage_T *)this->buf.
host + offset_of(pos);
2071 return (T *)(this->buf.
host);
2081 template<
typename... Args,
2082 typename =
typename std::enable_if<
AllInts<Args...>::value>::type>
2084 static_assert(!T_is_void,
2085 "Cannot use operator() on Buffer<void> types");
2086 constexpr
int expected_dims = 1 + (int)(
sizeof...(rest));
2087 static_assert(Dims ==
AnyDims || Dims == expected_dims,
"Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2088 assert(!device_dirty());
2089 return *((
const not_void_T *)(address_of(first, rest...)));
2095 static_assert(!T_is_void,
2096 "Cannot use operator() on Buffer<void> types");
2097 constexpr
int expected_dims = 0;
2098 static_assert(Dims ==
AnyDims || Dims == expected_dims,
"Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2099 assert(!device_dirty());
2100 return *((
const not_void_T *)(data()));
2106 static_assert(!T_is_void,
2107 "Cannot use operator() on Buffer<void> types");
2108 assert(!device_dirty());
2109 return *((
const not_void_T *)(address_of(pos)));
2112 template<
typename... Args,
2113 typename =
typename std::enable_if<
AllInts<Args...>::value>::type>
2117 static_assert(!T_is_void,
2118 "Cannot use operator() on Buffer<void> types");
2119 constexpr
int expected_dims = 1 + (int)(
sizeof...(rest));
2120 static_assert(Dims ==
AnyDims || Dims == expected_dims,
"Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2122 return *((not_void_T *)(address_of(first, rest...)));
2128 static_assert(!T_is_void,
2129 "Cannot use operator() on Buffer<void> types");
2130 constexpr
int expected_dims = 0;
2131 static_assert(Dims ==
AnyDims || Dims == expected_dims,
"Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2133 return *((not_void_T *)(data()));
2139 static_assert(!T_is_void,
2140 "Cannot use operator() on Buffer<void> types");
2142 return *((not_void_T *)(address_of(pos)));
2148 bool all_equal =
true;
2149 for_each_element([&](
const int *pos) { all_equal &= (*this)(pos) == val; });
2155 for_each_value([=](T &v) { v = val; });
2163 struct for_each_value_task_dim {
2171 template<
typename Ptr,
typename... Ptrs>
2174 advance_ptrs(stride + 1, ptrs...);
2181 template<
typename Fn,
typename Ptr,
typename... Ptrs>
2182 HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f,
int d,
bool innermost_strides_are_one,
2183 const for_each_value_task_dim<
sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
2185 if (innermost_strides_are_one) {
2186 Ptr end = ptr + t[0].extent;
2187 while (ptr != end) {
2188 f(*ptr++, (*ptrs++)...);
2192 f(*ptr, (*ptrs)...);
2193 advance_ptrs(t[0].stride, ptr, ptrs...);
2198 for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
2199 advance_ptrs(t[d].stride, ptr, ptrs...);
2206 HALIDE_NEVER_INLINE static std::pair<int, bool> for_each_value_prep(for_each_value_task_dim<N> *t,
2208 const int dimensions = buffers[0]->
dimensions;
2209 assert(dimensions > 0);
2212 for (
int i = 0; i < N; i++) {
2213 if (buffers[i]->device) {
2214 assert(buffers[i]->host &&
2215 "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
2216 assert(!buffers[i]->device_dirty() &&
2217 "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
2219 assert(buffers[i]->host &&
2220 "Buffer passed to for_each_value has no host or device allocation");
2225 for (
int i = 0; i < dimensions; i++) {
2226 for (
int j = 0; j < N; j++) {
2227 assert(buffers[j]->dimensions == dimensions);
2228 assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
2229 buffers[j]->dim[i].
min == buffers[0]->dim[i].
min);
2230 const int s = buffers[j]->
dim[i].
stride;
2233 t[i].extent = buffers[0]->
dim[i].
extent;
2238 for (
int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
2239 std::swap(t[j], t[j - 1]);
2246 for (
int i = 1; i < d; i++) {
2248 for (
int j = 0; j < N; j++) {
2249 flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2252 t[i - 1].extent *= t[i].extent;
2253 for (
int j = i; j < d - 1; j++) {
2264 bool innermost_strides_are_one =
true;
2265 for (
int i = 0; i < N; i++) {
2266 innermost_strides_are_one &= (t[0].stride[i] == 1);
2269 return {d, innermost_strides_are_one};
2272 template<
typename Fn,
typename... Args,
int N =
sizeof...(Args) + 1>
2273 void for_each_value_impl(Fn &&f, Args &&...other_buffers)
const {
2274 if (dimensions() > 0) {
2275 const size_t alloc_size = dimensions() *
sizeof(for_each_value_task_dim<N>);
2276 Buffer<>::for_each_value_task_dim<N> *t =
2277 (Buffer<>::for_each_value_task_dim<N> *)
HALIDE_ALLOCA(alloc_size);
2281 auto [new_dims, innermost_strides_are_one] = Buffer<>::for_each_value_prep(t, buffers);
2283 Buffer<>::for_each_value_helper(f, new_dims - 1,
2284 innermost_strides_are_one,
2286 data(), (other_buffers.data())...);
2293 f(*data(), (*other_buffers.data())...);
2313 template<
typename Fn,
typename... Args,
int N =
sizeof...(Args) + 1>
2315 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2319 template<
typename Fn,
typename... Args,
int N =
sizeof...(Args) + 1>
2323 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2330 struct for_each_element_task_dim {
2337 template<
typename Fn,
2339 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2340 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(
int,
int,
const for_each_element_task_dim *, Fn &&f, Args... args) {
2346 template<
typename Fn,
2348 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(
double,
int d,
const for_each_element_task_dim *t, Fn &&f, Args... args) {
2349 for (
int i = t[d].
min; i <= t[d].max; i++) {
2350 for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2356 template<
typename Fn,
2358 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2360 return (
int)(
sizeof...(Args));
2366 template<
typename Fn,
2369 static_assert(
sizeof...(args) <= 256,
2370 "Callable passed to for_each_element must accept either a const int *," 2371 " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2372 return num_args(0, std::forward<Fn>(f), 0, args...);
2382 typename =
typename std::enable_if<(d >= 0)>::type>
2383 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(
int,
const for_each_element_task_dim *t, Fn &&f,
int *pos) {
2384 for (pos[d] = t[d].
min; pos[d] <= t[d].max; pos[d]++) {
2385 for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2392 typename =
typename std::enable_if<(d < 0)>::type>
2393 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(
double,
const for_each_element_task_dim *t, Fn &&f,
int *pos) {
2402 template<
typename Fn>
2403 static void for_each_element_array(
int d,
const for_each_element_task_dim *t, Fn &&f,
int *pos) {
2406 }
else if (d == 0) {
2410 for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2411 }
else if (d == 1) {
2412 for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2413 }
else if (d == 2) {
2414 for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2415 }
else if (d == 3) {
2416 for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2418 for (pos[d] = t[d].
min; pos[d] <= t[d].max; pos[d]++) {
2419 for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2427 template<
typename Fn,
2428 typename = decltype(std::declval<Fn>()((
const int *)
nullptr))>
2429 static void for_each_element(
int,
int dims,
const for_each_element_task_dim *t, Fn &&f,
int check = 0) {
2430 const int size = dims *
sizeof(int);
2435 for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2440 template<
typename Fn>
2441 HALIDE_ALWAYS_INLINE static void for_each_element(
double,
int dims,
const for_each_element_task_dim *t, Fn &&f) {
2442 int args = num_args(0, std::forward<Fn>(f));
2443 assert(dims >= args);
2444 for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2447 template<
typename Fn>
2448 void for_each_element_impl(Fn &&f)
const {
2449 for_each_element_task_dim *t =
2450 (for_each_element_task_dim *)
HALIDE_ALLOCA(dimensions() *
sizeof(for_each_element_task_dim));
2451 for (
int i = 0; i < dimensions(); i++) {
2452 t[i].min = dim(i).min();
2453 t[i].max = dim(i).max();
2455 for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2516 template<
typename Fn>
2518 for_each_element_impl(f);
2522 template<
typename Fn>
2526 for_each_element_impl(f);
2532 template<
typename Fn>
2537 template<
typename... Args,
2538 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2539 void operator()(Args... args) {
2540 (*buf)(args...) = f(args...);
2544 : f(
std::forward<Fn>(f)), buf(buf) {
2553 template<
typename Fn,
2554 typename =
typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2557 FillHelper<Fn> wrapper(std::forward<Fn>(f),
this);
2558 return for_each_element(wrapper);
2566 return buf.is_bounds_query();
2575 #if defined(__has_feature) 2576 #if __has_feature(memory_sanitizer) 2578 __msan_check_mem_is_initialized(data(), size_in_bytes());
2580 for_each_value([](T &v) { __msan_check_mem_is_initialized(&v,
sizeof(T)); ; });
2590 #undef HALIDE_ALLOCA 2592 #endif // HALIDE_RUNTIME_IMAGE_H
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
int left() const
Conventional names for the min and max value of each dimension.
Buffer< not_const_T, Dims, InClassDimStorage > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
Expr max(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
ALWAYS_INLINE T align_up(T p, size_t alignment)
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
bool host_dirty() const
Methods for managing any GPU allocation.
Buffer< T, Dims, InClassDimStorage > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension. ...
T * end() const
A pointer to one beyond the element with the highest address.
static Buffer< T, Dims, InClassDimStorage > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape...
Buffer< T,(Dims==AnyDims ? AnyDims :Dims+1)> embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space...
typename std::conditional< std::is_const< T >::value, const T2, T2 >::type add_const_if_T_is_const
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
Access elements.
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
static constexpr halide_type_t static_halide_type()
Get the Halide type of T.
Buffer< T, Dims, InClassDimStorage > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
int device_free(void *ctx=nullptr)
Methods for managing any GPU allocation.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
static constexpr int static_dimensions()
Callers should not use the result if has_static_dimensions is false.
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
Buffer< T, Dims, InClassDimStorage > & fill(not_void_T val)
static bool can_convert_from(const Buffer< T2, D2, S2 > &other)
Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type...
int height() const
Conventional names for the first three dimensions.
bool contains(Args... args) const
Test if a given coordinate is within the bounds of an image.
T * begin() const
A pointer to the element with the lowest address.
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
Buffer< T, Dims, InClassDimStorage > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
halide_device_detach_native will be called when device ref count goes to zero
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f)
Call a function at each site in a buffer.
void * memcpy(void *s1, const void *s2, size_t n)
Buffer< T, Dims, InClassDimStorage > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
void set_device_dirty(bool v=true)
Methods for managing any GPU allocation.
Buffer(int first)
Allocate a new image of the given size.
No free routine will be called when device ref count goes to zero
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum...
Buffer< T, Dims, InClassDimStorage > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension...
int top() const
Conventional names for the min and max value of each dimension.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one...
halide_dimension_t * dim
The shape of the buffer.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline, and contains methods to using Halide's bounds tools to query properties of it.
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
int channels() const
Conventional names for the first three dimensions.
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > as() &&
Return an rval reference to this Buffer.
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() const &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
unsigned __INT8_TYPE__ uint8_t
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Methods for managing any GPU allocation.
int copy_to_host(void *ctx=nullptr)
Methods for managing any GPU allocation.
#define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
int extent(int i) const
Access to the mins, strides, extents.
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape...
Buffer(Buffer< T2, D2, S2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
__PTRDIFF_TYPE__ ptrdiff_t
int bottom() const
Conventional names for the min and max value of each dimension.
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
HALIDE_ALWAYS_INLINE bool device_dirty() const
Methods for managing any GPU allocation.
void crop(const std::vector< std::pair< int, int >> &rect)
Crop an image in-place along the first N dimensions.
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
Methods for managing any GPU allocation.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape...
Dimension(const halide_dimension_t &dim)
const halide_buffer_t * raw_buffer() const
Get a pointer to the raw halide_buffer_t this wraps.
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
bool any_zero(const Container &c)
size_t number_of_elements() const
The total number of elements this buffer represents.
int width() const
Conventional names for the first three dimensions.
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
Buffer(const Buffer< T2, D2, S2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
#define HALIDE_NEVER_INLINE
static Buffer< T, Dims, InClassDimStorage > make_with_shape_of(Buffer< T2, D2, S2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
This file declares the routines used by Halide internally in its runtime.
void set_min(Args... args)
Set the min coordinate of an image in the first N dimensions.
static Buffer< T, Dims, InClassDimStorage > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Call device_and_host_free when DevRefCount goes to zero.
int right() const
Conventional names for the min and max value of each dimension.
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
int32_t dimensions
The dimensionality of the buffer.
unsigned __INT32_TYPE__ uint32_t
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
static Buffer< T, Dims, InClassDimStorage > make_scalar()
Make a zero-dimensional Buffer.
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
Not visible externally, similar to 'static' linkage in C.
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
halide_type_t type() const
Get the type of the elements.
T * data() const
Get a pointer to the address of the min coordinate.
bool operator!=(const iterator &other) const
halide_device_free will be called when device ref count goes to zero
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T2, D2, S2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
#define HALIDE_ALWAYS_INLINE
static void assert_can_convert_from(const Buffer< T2, D2, S2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage> cannot be const...
Buffer(const Buffer< T, Dims, InClassDimStorage > &other)
Copy constructor.
int device_detach_native(void *ctx=nullptr)
Methods for managing any GPU allocation.
static Buffer< T, Dims, InClassDimStorage > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default...
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
Access elements.
A runtime tag for a type in the halide type system.
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
Buffer(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Move constructor.
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape...
int dimensions() const
Get the dimensionality of the buffer.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
HALIDE_ALWAYS_INLINE not_void_T & operator()()
Access elements.
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
unsigned __INT16_TYPE__ uint16_t
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Methods for managing any GPU allocation.
uint8_t * host
A pointer to the start of the data in main memory.
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
Access elements.
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Methods for managing any GPU allocation.
Buffer< T, Dims, InClassDimStorage > cropped(const std::vector< std::pair< int, int >> &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Methods for managing any GPU allocation.
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > as_const() &&
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
uint64_t device
A device-handle for e.g.
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers)
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
void copy_from(Buffer< T2, D2, S2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
void check_overflow()
Check the product of the extents fits in memory.
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
void * memset(void *s, int val, size_t n)
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
Access elements.
A similar struct for managing device allocations.
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Standard move-assignment operator.
Buffer< T, Dims, InClassDimStorage > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
bool has_device_allocation() const
Methods for managing any GPU allocation.
int stride(int i) const
Access to the mins, strides, extents.
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
int device_sync(void *ctx=nullptr)
Methods for managing any GPU allocation.
__UINTPTR_TYPE__ uintptr_t
The raw representation of an image passed around by generated Halide code.
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
HALIDE_ALWAYS_INLINE auto slice(Vec vec, Base base, Stride stride, Lanes lanes) noexcept -> SliceOp< decltype(pattern_arg(vec)), decltype(pattern_arg(base)), decltype(pattern_arg(stride)), decltype(pattern_arg(lanes))>
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T, Dims, InClassDimStorage > &other)
Standard assignment operator.
unsigned __INT64_TYPE__ uint64_t
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T2, D2, S2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
int min(int i) const
Access to the mins, strides, extents.
Buffer(int first, int second, Args... rest)
Allocate a new image of the given size.
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > & as() &
Return a typed reference to this Buffer.
static Buffer< void, Dims, InClassDimStorage > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default...
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Read-only access to the shape.
HALIDE_ALWAYS_INLINE const Buffer< T2, D2, InClassDimStorage > & as() const &
Return a const typed reference to this Buffer.
BufferDeviceOwnership ownership
Buffer< T, Dims, InClassDimStorage > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.