1 #ifndef HALIDE_HALIDERUNTIME_H 2 #define HALIDE_HALIDERUNTIME_H 4 #ifndef COMPILING_HALIDE_RUNTIME 10 #include <string_view> 26 #define HALIDE_VERSION_MAJOR 17 27 #define HALIDE_VERSION_MINOR 0 28 #define HALIDE_VERSION_PATCH 2 44 #define HALIDE_ALWAYS_INLINE __forceinline 45 #define HALIDE_NEVER_INLINE __declspec(noinline) 49 #define HALIDE_ALWAYS_INLINE inline __attribute__((always_inline)) 50 #define HALIDE_NEVER_INLINE __attribute__((noinline)) 53 #ifndef HALIDE_MUST_USE_RESULT 54 #ifdef __has_attribute 55 #if __has_attribute(nodiscard) 57 #define HALIDE_MUST_USE_RESULT [[nodiscard]] 58 #elif __has_attribute(warn_unused_result) 60 #define HALIDE_MUST_USE_RESULT __attribute__((warn_unused_result)) 62 #define HALIDE_MUST_USE_RESULT 65 #define HALIDE_MUST_USE_RESULT 75 #ifndef HALIDE_FUNCTION_ATTRS 76 #define HALIDE_FUNCTION_ATTRS 79 #ifndef HALIDE_EXPORT_SYMBOL 81 #define HALIDE_EXPORT_SYMBOL __declspec(dllexport) 83 #define HALIDE_EXPORT_SYMBOL __attribute__((visibility("default"))) 87 #ifndef COMPILING_HALIDE_RUNTIME 94 #if defined(__clang__) && (__clang_major__ >= 16) && !defined(__EMSCRIPTEN__) 95 #if defined(__is_identifier) 96 #if !__is_identifier(_Float16) 97 #define HALIDE_CPP_COMPILER_HAS_FLOAT16 105 #if defined(__GNUC__) && (__GNUC__ >= 12) 106 #if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) 107 #define HALIDE_CPP_COMPILER_HAS_FLOAT16 111 #endif // !COMPILING_HALIDE_RUNTIME 152 extern void halide_print(
void *user_context,
const char *);
166 extern void halide_error(
void *user_context,
const char *);
248 uint8_t *closure,
void *task_parent);
326 uint8_t *closure,
void *task_parent);
367 struct halide_thread;
410 extern void halide_free(
void *user_context,
void *ptr);
413 typedef void *(*halide_malloc_t)(
void *,
size_t);
436 typedef void *(*halide_get_symbol_t)(
const char *name);
437 typedef void *(*halide_load_library_t)(
const char *name);
438 typedef void *(*halide_get_library_symbol_t)(
void *lib,
const char *name);
459 #if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L) 472 #ifndef HALIDE_ATTRIBUTE_ALIGN 474 #define HALIDE_ATTRIBUTE_ALIGN(x) __declspec(align(x)) 476 #define HALIDE_ATTRIBUTE_ALIGN(x) __attribute__((aligned(x))) 487 #if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L) 503 #if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L) 527 return as_u32() == other.as_u32();
531 return !(*
this == other);
535 return as_u32() < other.as_u32();
540 return (
bits + 7) / 8;
549 (static_cast<uint16_t>(
bits) << 8) |
550 (static_cast<uint32_t>(
lanes) << 16);
555 #if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L) 679 #if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L) 684 return (
const int *)(
this + 1);
688 return (
int *)(
this + 1);
695 return (
const void *)(coordinates() +
dimensions);
716 const char *f = func();
876 int slice_dim,
int slice_pos,
981 bool has_eviction_key,
uint64_t eviction_key);
1265 int min_bound,
int max_bound,
int min_required,
int max_required);
1271 int dimension,
int min_touched,
int max_touched,
1272 int min_valid,
int max_valid);
1280 int constrained_min,
int constrained_extent,
1281 int required_min,
int required_extent);
1283 const char *constrained_var,
int constrained_val);
1289 double val,
double min_val);
1295 double val,
double max_val);
1299 const char *filename,
int error_code);
1303 const char *loop_name);
1305 int dim,
int min,
int extent,
int valid_min,
int fold_factor);
1308 int fold_factor,
const char *loop_name,
int required_extent);
1317 int provided_size,
int required_size);
1467 #if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L) 1479 return (
min == other.
min) &&
1486 return !(*
this == other);
1536 #if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L) 1540 return (
flags & flag) != 0;
1612 return host + begin_offset() *
type.bytes();
1618 return host + end_offset() *
type.bytes();
1623 return (
size_t)(end_offset() - begin_offset()) *
type.bytes();
1632 return host + index *
type.bytes();
1660 #ifndef HALIDE_ATTRIBUTE_DEPRECATED 1661 #ifdef HALIDE_ALLOW_DEPRECATED 1662 #define HALIDE_ATTRIBUTE_DEPRECATED(x) 1665 #define HALIDE_ATTRIBUTE_DEPRECATED(x) __declspec(deprecated(x)) 1667 #define HALIDE_ATTRIBUTE_DEPRECATED(x) __attribute__((deprecated(x))) 1754 static const int32_t VERSION = 1;
1788 int (*filter_argv_call)(
void **),
1790 const char *
const *extra_key_value_pairs);
1814 uint64_t active_threads_numerator, active_threads_denominator;
1840 uint64_t active_threads_numerator, active_threads_denominator;
1846 struct halide_profiler_func_stats *funcs;
2029 #if (__cplusplus >= 201103L || _MSVC_LANG >= 201103L) 2033 template<
typename T>
2034 struct check_is_pointer {
2035 static constexpr
bool value =
false;
2038 template<
typename T>
2039 struct check_is_pointer<T *> {
2040 static constexpr
bool value =
true;
2046 template<
typename T>
2052 static_assert(check_is_pointer<T>::value,
"Expected a pointer type here");
2056 #ifdef HALIDE_CPP_COMPILER_HAS_FLOAT16 2118 #ifndef COMPILING_HALIDE_RUNTIME 2123 namespace HalideFunctionInfo {
2125 enum ArgumentKind { InputScalar = 0,
2129 struct ArgumentInfo {
2130 std::string_view name;
2138 #endif // COMPILING_HALIDE_RUNTIME 2140 #endif // (__cplusplus >= 201103L || _MSVC_LANG >= 201103L) 2142 #endif // HALIDE_HALIDERUNTIME_H auto operator<(const Other &a, const GeneratorParam< T > &b) -> decltype(a<(T) b)
Less than comparison between GeneratorParam<T> and any type that supports operator< with T...
The Halide runtime encountered an error while trying to detach a native device handle.
The halide_buffer_t * passed to a halide runtime routine is nullptr and this is not allowed...
int halide_device_sync(void *user_context, struct halide_buffer_t *buf)
Wait for current GPU operations to complete.
int(* halide_semaphore_init_t)(struct halide_semaphore_t *, int)
Enable Vulkan 64-bit integer support.
struct halide_mutex * array
int halide_error_param_too_large_u64(void *user_context, const char *param_name, uint64_t val, uint64_t max_val)
Various other error conditions.
Disable the bounds querying functionality.
int halide_error_bad_dimensions(void *user_context, const char *func_name, int32_t dimensions_given, int32_t correct_dimensions)
Various other error conditions.
Enable ARM Scalable Vector Extensions v2.
int(* halide_do_par_for_t)(void *, halide_task_t, int, int, uint8_t *)
Set a custom method for performing a parallel for loop.
void halide_error(void *user_context, const char *)
Halide calls this function on runtime errors (for example bounds checking failures).
There is a bug in the Halide compiler.
int32_t dimensions
The length of the coordinates array.
int(* release_unused)(void *user_context)
A halide_buffer_t was given with extents that multiply to a number greater than 2^31-1.
struct halide_thread * halide_spawn_thread(void(*f)(void *), void *closure)
Spawn a thread.
int halide_error_param_too_small_f64(void *user_context, const char *param_name, double val, double min_val)
Various other error conditions.
struct halide_profiler_state * halide_profiler_get_state()
Get a pointer to the global profiler state for programmatic inspection.
int32_t dimensions
The remaining fields are equivalent to those in halide_trace_event_t.
The Halide runtime encountered a host pointer that violated the alignment set for it by way of a call...
void *(* halide_get_library_symbol_t)(void *lib, const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
Cross platform condition variable.
void halide_cond_wait(struct halide_cond *cond, struct halide_mutex *mutex)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
Enable x86 (AMD) FMA4 instruction set.
halide_trace_event_code_t
int halide_error_storage_bound_too_small(void *user_context, const char *func_name, const char *var_name, int provided_size, int required_size)
Various other error conditions.
int halide_do_parallel_tasks(void *user_context, int num_tasks, struct halide_parallel_task_t *tasks, void *task_parent)
Enqueue some number of the tasks described above and wait for them to complete.
A type traits template to provide a halide_handle_cplusplus_type value from a C++ type...
int halide_error_fold_factor_too_small(void *user_context, const char *func_name, const char *var_name, int fold_factor, const char *loop_name, int required_extent)
Various other error conditions.
A fold_storage directive was used on a dimension that is not accessed in a monotonically increasing o...
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int halide_device_malloc(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
Allocate device memory to back a halide_buffer_t.
int active_threads
The number of threads currently doing work.
current_func takes on this value when not inside Halide code
The Halide runtime encountered an error while trying to allocate memory on device.
struct halide_dimension_t halide_dimension_t
int halide_error_buffer_extents_negative(void *user_context, const char *buffer_name, int dimension, int extent)
Various other error conditions.
void halide_mutex_array_destroy(void *user_context, void *array)
int halide_error_out_of_memory(void *user_context)
Various other error conditions.
Enable +simd128 instructions for WebAssembly codegen.
struct halide_type_t type
If the event type is a load or a store, this is the type of the data.
void halide_print(void *user_context, const char *)
Print a message to stderr.
int(* halide_do_parallel_tasks_t)(void *, int, struct halide_parallel_task_t *, void *task_parent)
Provide an entire custom tasking runtime via function pointers.
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
The header of a packet in a binary trace.
int halide_msan_check_buffer_is_initialized(void *user_context, struct halide_buffer_t *buffer, const char *buf_name)
Verify that the data pointed to by the halide_buffer_t is initialized (but not the halide_buffer_t it...
Enable ARM Scalable Vector Extensions.
int(* detach_native)(void *user_context, struct halide_buffer_t *buf)
int first_free_id
An internal id used for bookkeeping.
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
Enable hooks for MSAN support.
A fold_storage directive was used with a fold factor that was too small to store all the values of a ...
auto operator!=(const Other &a, const GeneratorParam< T > &b) -> decltype(a !=(T) b)
Inequality comparison between between GeneratorParam<T> and any type that supports operator!= with T...
struct halide_type_t type
The type of each buffer element.
halide_do_task_t halide_set_custom_do_task(halide_do_task_t do_task)
If you use the default do_par_for, you can still set a custom handler to perform each individual task...
Enable Vulkan runtime support.
The Halide runtime encountered an error while trying to wrap a native device handle.
uint16_t lanes
How many elements in a vector.
Enable hooks for TSAN support.
void halide_msan_annotate_buffer_is_initialized_as_destructor(void *user_context, void *buffer)
int halide_error_param_too_small_u64(void *user_context, const char *param_name, uint64_t val, uint64_t min_val)
Various other error conditions.
int halide_error_device_interface_no_device(void *user_context)
Various other error conditions.
int(* halide_can_use_target_features_t)(int count, const uint64_t *features)
This function is called internally by Halide in some situations to determine if the current execution...
int(* device_and_host_free)(void *user_context, struct halide_buffer_t *buf)
Enable half support on OpenCL targets.
This part of the Halide runtime is unimplemented on this platform.
int halide_error_unaligned_host_ptr(void *user_context, const char *func_name, int alignment)
Various other error conditions.
Used together with Target::NoOS for the baremetal target built with semihosting library and run with ...
A specialize_fail() schedule branch was selected at runtime.
The Halide runtime encountered an error while trying to launch a GPU kernel.
int halide_error_extern_stage_failed(void *user_context, const char *extern_stage_name, int result)
A call to an extern stage failed.
int64_t const *const * buffer_estimates
int halide_semaphore_release(struct halide_semaphore_t *, int n)
An uncategorized error occurred.
int halide_default_do_parallel_tasks(void *user_context, int num_tasks, struct halide_parallel_task_t *tasks, void *task_parent)
The default versions of the parallel runtime functions.
halide_can_use_target_features_t halide_set_custom_can_use_target_features(halide_can_use_target_features_t)
This function is called internally by Halide in some situations to determine if the current execution...
Generate C++ mangled names for result function, et al.
void(* halide_error_handler_t)(void *, const char *)
Halide calls this function on runtime errors (for example bounds checking failures).
IEEE floating point numbers.
Enable the AVX512 features supported by Knight's Landing chips, such as the Xeon Phi x200...
void * halide_malloc(void *user_context, size_t x)
Halide calls these functions to allocate and free memory.
int halide_error_specialize_fail(void *user_context, const char *message)
Various other error conditions.
int halide_error_buffer_is_null(void *user_context, const char *routine)
Various other error conditions.
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
uint8_t code
The basic type code: signed integer, unsigned integer, or floating point.
halide_free_t halide_set_custom_free(halide_free_t user_free)
Halide calls these functions to allocate and free memory.
int halide_buffer_copy(void *user_context, struct halide_buffer_t *src, const struct halide_device_interface_t *dst_device_interface, struct halide_buffer_t *dst)
Copy data from one buffer to another.
struct halide_profiler_pipeline_stats * pipelines
A linked list of stats gathered for each pipeline.
signed __INT8_TYPE__ int8_t
Obsolete version of halide_filter_argument_t; only present in code that wrote halide_filter_metadata_...
void halide_default_print(void *user_context, const char *)
Print a message to stderr.
Do not include a copy of the Halide runtime in any generated object file or assembly.
const struct halide_scalar_value_t * max
A halide_buffer_t pointer passed in was NULL.
Enable Vulkan v1.3 runtime target support.
int halide_get_trace_file(void *user_context)
Halide calls this to retrieve the file descriptor to write binary trace events to.
int halide_error_constraint_violated(void *user_context, const char *var, int val, const char *constrained_var, int constrained_val)
Various other error conditions.
Enable Hexagon v62 architecture.
void *(* halide_load_library_t)(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
void(* device_release)(void *user_context, const struct halide_device_interface_t *device_interface)
The elem_size field of a halide_buffer_t does not match the size in bytes of the type of that ImagePa...
struct halide_profiler_pipeline_stats * halide_profiler_get_pipeline_state(const char *pipeline_name)
Get a pointer to the pipeline state associated with pipeline_name.
int(* halide_semaphore_release_t)(struct halide_semaphore_t *, int)
uint64_t flags
flags with various meanings.
halide_dimension_t * dim
The shape of the buffer.
Enable HVX 128 byte mode.
halide_do_loop_task_t halide_set_custom_do_loop_task(halide_do_loop_task_t do_task)
The version of do_task called for loop tasks.
int(* halide_task_t)(void *user_context, int task_number, uint8_t *closure)
Define halide_do_par_for to replace the default thread pool implementation.
int32_t halide_debug_to_file(void *user_context, const char *filename, int32_t type_code, struct halide_buffer_t *buf)
Called when debug_to_file is used inside Halide code.
void halide_shutdown_thread_pool()
Define halide_do_par_for to replace the default thread pool implementation.
int32_t parent_id
The remaining fields are equivalent to those in halide_trace_event_t.
const struct halide_scalar_value_t * scalar_min
int halide_memoization_cache_store(void *user_context, const uint8_t *cache_key, int32_t size, struct halide_buffer_t *realized_bounds, int32_t tuple_count, struct halide_buffer_t **tuple_buffers, bool has_eviction_key, uint64_t eviction_key)
Given a cache key for a memoized result, currently constructed from the Func name and top-level Func ...
void * halide_default_get_library_symbol(void *lib, const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
The Halide runtime encountered an error while trying to copy from host to device. ...
Enable Hexagon v66 architecture.
unsigned __INT8_TYPE__ uint8_t
void(* halide_free_t)(void *, void *)
Halide calls these functions to allocate and free memory.
void halide_profiler_shutdown()
Reset all profiler state.
Failure recording trace packets for one of the halide_target_feature_trace features.
void halide_register_argv_and_metadata(int(*filter_argv_call)(void **), const struct halide_filter_metadata_t *filter_metadata, const char *const *extra_key_value_pairs)
halide_register_argv_and_metadata() is a user-defined function that must be provided in order to use ...
int(* device_slice)(void *user_context, const struct halide_buffer_t *src, int slice_dim, int slice_pos, struct halide_buffer_t *dst)
void halide_set_custom_parallel_runtime(halide_do_par_for_t, halide_do_task_t, halide_do_loop_task_t, halide_do_parallel_tasks_t, halide_semaphore_init_t, halide_semaphore_try_acquire_t, halide_semaphore_release_t)
Provide an entire custom tasking runtime via function pointers.
int halide_error_buffer_argument_is_null(void *user_context, const char *buffer_name)
Various other error conditions.
halide_filter_argument_t is essentially a plain-C-struct equivalent to Halide::Argument; most user co...
Enable CUDA compute capability 3.5 (Kepler)
int halide_copy_to_device(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
Copy image data from host memory to device memory.
int32_t(* halide_trace_t)(void *user_context, const struct halide_trace_event_t *)
int halide_do_loop_task(void *user_context, halide_loop_task_t f, int min, int extent, uint8_t *closure, void *task_parent)
The version of do_task called for loop tasks.
halide_get_symbol_t halide_set_custom_get_symbol(halide_get_symbol_t user_get_symbol)
Halide calls these functions to interact with the underlying system runtime functions.
__PTRDIFF_TYPE__ ptrdiff_t
Enable Vulkan 16-bit float support.
Enable the base AVX512 subset supported by all AVX512 architectures. The specific feature sets are AV...
int(* compute_capability)(void *user_context, int *major, int *minor)
int halide_device_release_crop(void *user_context, struct halide_buffer_t *buf)
Release any resources associated with a cropped/sliced view of another buffer.
struct halide_mutex lock
Guards access to the fields below.
struct halide_type_t type
The remaining fields are equivalent to those in halide_trace_event_t.
Enable CUDA compute capability 5.0 (Maxwell)
Enable the OpenCL runtime.
Generate code for ARMv7s. Only relevant for 32-bit ARM.
int(* device_sync)(void *user_context, struct halide_buffer_t *buf)
void halide_start_timer_chain()
For timer based profiling, this routine starts the timer chain running.
The host field on an input or output was null, the device field was not zero, and the pipeline tries ...
The Halide runtime encountered an error while trying to copy from device to host. ...
Enable RISCV "V" Vector Extension.
A halide_buffer_t was given that spans more than 2GB of memory.
int halide_device_wrap_native(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
Wrap or detach a native device handle, setting the device field and device_interface field as appropr...
Use SSE 4.1 and earlier instructions. Only relevant on x86.
Enable CUDA compute capability 8.6 (Ampere)
void halide_mutex_lock(struct halide_mutex *mutex)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
A buffer with the device_dirty flag set was passed to a pipeline compiled with no device backends ena...
struct halide_semaphore_acquire_t * semaphores
Enable the WebGPU runtime.
bool halide_can_reuse_device_allocations(void *user_context)
Determines whether on device_free the memory is returned immediately to the device API...
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
debug_to_file failed to open or write to the specified file.
int32_t id
The id of this packet (for the purpose of parent_id).
int(* device_release_crop)(void *user_context, struct halide_buffer_t *buf)
void halide_free(void *user_context, void *ptr)
Halide calls these functions to allocate and free memory.
auto operator==(const Other &a, const GeneratorParam< T > &b) -> decltype(a==(T) b)
Equality comparison between GeneratorParam<T> and any type that supports operator== with T...
halide_scalar_value_t is a simple union able to represent all the well-known scalar values in a filte...
void halide_memoization_cache_evict(void *user_context, uint64_t eviction_key)
Evict all cache entries that were tagged with the given eviction_key in the memoize scheduling direct...
int halide_error_requirement_failed(void *user_context, const char *condition, const char *message)
Various other error conditions.
Enable Direct3D 12 Compute runtime.
int halide_default_can_use_target_features(int count, const uint64_t *features)
This is the default implementation of halide_can_use_target_features; it is provided for convenience ...
int(* halide_do_loop_task_t)(void *, halide_loop_task_t, int, int, uint8_t *, void *)
The version of do_task called for loop tasks.
struct halide_buffer_t halide_buffer_t
The raw representation of an image passed around by generated Halide code.
double halide_float16_bits_to_double(uint16_t)
Read bits representing a half precision floating point number and return the double that represents t...
The dimensions field of a halide_buffer_t does not match the dimensions of that ImageParam.
halide_error_code_t
The error codes that may be returned by a Halide pipeline.
uint8_t bits
The number of bits of precision of a single scalar value of this type.
int halide_error_host_is_null(void *user_context, const char *func_name)
Various other error conditions.
void * halide_load_library(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
int halide_do_par_for(void *user_context, halide_task_t task, int min, int size, uint8_t *closure)
Define halide_do_par_for to replace the default thread pool implementation.
A Func was given an explicit bound via Func::bound, but this was not large enough to encompass the re...
uint32_t size
The total size of this packet in bytes.
Expr print(const std::vector< Expr > &values)
Create an Expr that prints out its value whenever it is evaluated.
void halide_profiler_report(void *user_context)
Print out timing statistics for everything run since the last reset.
struct halide_type_t type
Enable soft float ABI. This only enables the soft float ABI calling convention, which does not necess...
Enable 64-bit buffer indexing to support buffers > 2GB. Ignored if bits != 64.
struct halide_type_t type
int halide_error_bad_fold(void *user_context, const char *func_name, const char *var_name, const char *loop_name)
Various other error conditions.
Launch a sampling profiler alongside the Halide pipeline that monitors and reports the runtime used b...
Use AVX 2 instructions. Only relevant on x86.
Enable the AVX512 features supported by Skylake Xeon server processors. This adds AVX512-VL...
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
halide_do_par_for_t halide_set_custom_do_par_for(halide_do_par_for_t do_par_for)
Buffer has both host and device dirty bits set, which violates a Halide invariant.
void halide_cond_signal(struct halide_cond *cond)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
Enable SPIR-V code generation support.
opaque pointer type (void *)
Enable CUDA compute capability 7.0 (Volta)
void halide_disable_timer_interrupt()
These routines are called to temporarily disable and then reenable timer interuppts for profiling...
int halide_error_bad_extern_fold(void *user_context, const char *func_name, int dim, int min, int extent, int valid_min, int fold_factor)
Various other error conditions.
int32_t dimensions
The dimensionality of the buffer.
int halide_device_free(void *user_context, struct halide_buffer_t *buf)
Free device memory.
void * halide_default_load_library(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
unsigned __INT32_TYPE__ uint32_t
int halide_profiler_sample(struct halide_profiler_state *s, uint64_t *prev_t)
Collects profiling information.
Enable CUDA compute capability 6.1 (Pascal)
Turn off all non-IEEE floating-point optimization. Currently applies only to LLVM targets...
Generate code that will run immediately inside the calling process.
int halide_do_task(void *user_context, halide_task_t f, int idx, uint8_t *closure)
If you use the default do_par_for, you can still set a custom handler to perform each individual task...
union halide_scalar_value_t::@4 u
Enable the AVX512 features supported by Sapphire Rapids processors. This include all of the Zen4 feat...
int halide_can_use_target_features(int count, const uint64_t *features)
This function is called internally by Halide in some situations to determine if the current execution...
halide_error_handler_t halide_set_error_handler(halide_error_handler_t handler)
Halide calls this function on runtime errors (for example bounds checking failures).
Enable 64-bit atomics operations on OpenCL targets.
Enable hooks for ASAN support.
Enable +bulk-memory instructions for WebAssembly codegen.
Applying explicit constraints on the size of an input or output buffer shrank the size of that buffer...
Buffer has a non-null device_interface but device is 0, which violates a Halide invariant.
Enable loop vectorization + unrolling in LLVM. Overrides halide_target_feature_disable_llvm_loop_opt...
An explicit storage bound provided is too small to store all the values produced by the function...
void * halide_default_get_symbol(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
void halide_memoization_cache_set_size(int64_t size)
Set the soft maximum amount of memory, in bytes, that the LRU cache will use to memoize Func results...
const struct halide_scalar_value_t * scalar_max
void halide_set_gpu_device(int n)
Selects which gpu device to use.
Enable double support on OpenCL targets.
Call(s) to a GPU backend API failed.
At least one of the buffer's extents are negative.
int halide_msan_annotate_buffer_is_initialized(void *user_context, struct halide_buffer_t *buffer)
Mark the data pointed to by the halide_buffer_t as initialized (but not the halide_buffer_t itself)...
The Halide runtime encountered an error while trying to copy from one buffer to another.
float halide_float16_bits_to_float(uint16_t)
Read bits representing a half precision floating point number and return the float that represents th...
signed __INT64_TYPE__ int64_t
Trace all stores done by the pipeline. Equivalent to calling Func::trace_stores on every non-inlined ...
int32_t * coordinates
For loads and stores, an array which contains the location being accessed.
A call to halide_malloc returned NULL.
int32_t halide_default_trace(void *user_context, const struct halide_trace_event_t *event)
Enable Vulkan 8-bit integer support.
halide_get_library_symbol_t halide_set_custom_get_library_symbol(halide_get_library_symbol_t user_get_library_symbol)
Halide calls these functions to interact with the underlying system runtime functions.
int halide_error_buffer_extents_too_large(void *user_context, const char *buffer_name, int64_t actual_size, int64_t max_size)
Various other error conditions.
Enable hooks for SanitizerCoverage support.
Disable all runtime checks, for slightly tighter code.
halide_target_feature_t
Optional features a compilation Target can have.
Enable the CUDA runtime. Defaults to compute capability 2.0 (Fermi)
int halide_error_explicit_bounds_too_small(void *user_context, const char *func_name, const char *var_name, int min_bound, int max_bound, int min_required, int max_required)
Various other error conditions.
A parallel task to be passed to halide_do_parallel_tasks.
int halide_default_semaphore_init(struct halide_semaphore_t *, int n)
The default versions of the parallel runtime functions.
Use VSX instructions. Only relevant on POWERPC.
Use the LLVM large code model to compile.
#define HALIDE_ALWAYS_INLINE
bool halide_semaphore_try_acquire(struct halide_semaphore_t *, int n)
Trace all loads done by the pipeline. Equivalent to calling Func::trace_loads on every non-inlined Fu...
Enable ARMv8.2-a half-precision floating point data processing.
void halide_set_trace_file(int fd)
Set the file descriptor that Halide should write binary trace events to.
int halide_error_debug_to_file_failed(void *user_context, const char *func, const char *filename, int error_code)
Various other error conditions.
Turn on debug info and output for runtime code.
int halide_mutex_array_lock(struct halide_mutex_array *array, int entry)
int32_t halide_trace(void *user_context, const struct halide_trace_event_t *event)
Called when Funcs are marked as trace_load, trace_store, or trace_realization.
int halide_device_crop(void *user_context, const struct halide_buffer_t *src, struct halide_buffer_t *dst)
Give the destination buffer a device allocation which is an alias for the same coordinate range in th...
int halide_default_do_par_for(void *user_context, halide_task_t task, int min, int size, uint8_t *closure)
The default versions of the parallel runtime functions.
bool halide_default_semaphore_try_acquire(struct halide_semaphore_t *, int n)
The default versions of the parallel runtime functions.
A runtime tag for a type in the halide type system.
int halide_device_sync_global(void *user_context, const struct halide_device_interface_t *device_interface)
Wait for current GPU operations to complete.
void halide_memoization_cache_release(void *user_context, void *host)
If halide_memoization_cache_lookup succeeds, halide_memoization_cache_release must be called to signa...
int halide_memoization_cache_lookup(void *user_context, const uint8_t *cache_key, int32_t size, struct halide_buffer_t *realized_bounds, int32_t tuple_count, struct halide_buffer_t **tuple_buffers)
Given a cache key for a memoized result, currently constructed from the Func name and top-level Func ...
struct halide_semaphore_t * semaphore
Enable CUDA compute capability 7.5 (Turing)
Force use of EGL support.
Enable Vulkan 64-bit float support.
A folded buffer was passed to an extern stage, but the region touched wraps around the fold boundary...
int halide_default_semaphore_release(struct halide_semaphore_t *, int n)
The default versions of the parallel runtime functions.
void * halide_get_symbol(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
const struct halide_scalar_value_t * scalar_def
int halide_reuse_device_allocations(void *user_context, bool)
Tell Halide whether or not it is permitted to hold onto device allocations to service future requests...
const struct halide_scalar_value_t * min
Enable x86 16-bit float support.
void halide_enable_timer_interrupt()
These routines are called to temporarily disable and then reenable timer interuppts for profiling...
void halide_profiler_reset()
Reset profiler state cheaply.
A sentinel. Every target is considered to have this feature, and setting this feature does nothing...
Use POWER ISA 2.07 new instructions. Only relevant on POWERPC.
Disable all extensions to WebAssembly codegen (including +sign-ext and +nontrapping-fptoint, which are on by default).
struct halide_thread * sampling_thread
Sampling thread reference to be joined at shutdown.
A constraint on a size or stride of an input or output buffer was not met by the halide_buffer_t pass...
int halide_device_detach_native(void *user_context, struct halide_buffer_t *buf)
Wrap or detach a native device handle, setting the device field and device_interface field as appropr...
void * value
If the event type is a load or a store, this points to the value being loaded or stored.
int halide_default_do_loop_task(void *user_context, halide_loop_task_t f, int min, int extent, uint8_t *closure, void *task_parent)
The default versions of the parallel runtime functions.
void halide_default_error(void *user_context, const char *)
Halide calls this function on runtime errors (for example bounds checking failures).
int halide_default_do_task(void *user_context, halide_task_t f, int idx, uint8_t *closure)
The default versions of the parallel runtime functions.
Avoid using NEON instructions. Only relevant for 32-bit ARM.
const struct halide_device_interface_t * device_interface
The interface used to interpret the above handle.
void halide_cond_broadcast(struct halide_cond *cond)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
const char * func
The name of the Func or Pipeline that this event refers to.
unsigned __INT16_TYPE__ uint16_t
On every floating point store, set the last bit of the mantissa to zero. Pipelines for which the outp...
int halide_msan_check_memory_is_initialized(void *user_context, const void *ptr, uint64_t len, const char *name)
Verify that a given range of memory has been initialized; only used when Target::MSAN is enabled...
floating point numbers in the bfloat format
Enable ARMv8.1-a instructions.
uint8_t * host
A pointer to the start of the data in main memory.
void halide_device_release(void *user_context, const struct halide_device_interface_t *device_interface)
Release all data associated with the given device interface, in particular all resources (memory...
int halide_copy_to_host(void *user_context, struct halide_buffer_t *buf)
Copy image data from device memory to host memory.
Insert assertions for promises.
const struct halide_device_interface_impl_t * impl
Enable use of threads in WebAssembly codegen. Requires the use of a wasm runtime that provides pthrea...
Generated code takes a user_context pointer as first argument.
int halide_error_buffer_allocation_too_large(void *user_context, const char *buffer_name, uint64_t allocation_size, uint64_t max_size)
Various other error conditions.
void * padding
Pads the buffer up to a multiple of 8 bytes.
A pipeline would access memory outside of the halide_buffer_t passed in.
Enable x86 FMA instruction.
Enable Hexagon DMA buffers.
The Halide runtime encountered an error while trying to free a device allocation. ...
int halide_shutdown_trace()
If tracing is writing to a file.
enum halide_trace_event_code_t event
The type of event.
void *(* halide_get_symbol_t)(const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
void halide_join_thread(struct halide_thread *)
Join a thread.
Trace all realizations done by the pipeline. Equivalent to calling Func::trace_realizations on every ...
int halide_msan_annotate_memory_is_initialized(void *user_context, const void *ptr, uint64_t len)
Annotate that a given range of memory has been initialized; only used when Target::MSAN is enabled...
A runtime symbol could not be loaded.
Emulate clang -fembed-bitcode flag.
void halide_mutex_unlock(struct halide_mutex *mutex)
A basic set of mutex and condition variable functions, which call platform specific code for mutual e...
halide_type_code_t
Types in the halide type system.
int(* copy_to_host)(void *user_context, struct halide_buffer_t *buf)
uint64_t device
A device-handle for e.g.
An operation on a buffer required an allocation on a particular device interface, but a device alloca...
Use AVX 1 instructions. Only relevant on x86.
halide_print_t halide_set_custom_print(halide_print_t print)
Print a message to stderr.
int halide_error_param_too_large_f64(void *user_context, const char *param_name, double val, double max_val)
Various other error conditions.
halide_trace_t halide_set_custom_trace(halide_trace_t trace)
int halide_error_host_and_device_dirty(void *user_context)
Various other error conditions.
int(* buffer_copy)(void *user_context, struct halide_buffer_t *src, const struct halide_device_interface_t *dst_device_interface, struct halide_buffer_t *dst)
The global state of the profiler.
int sleep_time
The amount of time the profiler thread sleeps between samples in milliseconds.
void * halide_get_library_symbol(void *lib, const char *name)
Halide calls these functions to interact with the underlying system runtime functions.
void halide_memoization_cache_cleanup()
Free all memory and resources associated with the memoization cache.
void(* get_remote_profiler_state)(int *func, int *active_workers)
Retrieve remote profiler state.
int halide_error_no_device_interface(void *user_context)
Various other error conditions.
int halide_error_constraints_make_required_region_smaller(void *user_context, const char *buffer_name, int dimension, int constrained_min, int constrained_extent, int required_min, int required_extent)
Various other error conditions.
Set current_func to this value to tell the profiling thread to halt.
Expr with_lanes(const Expr &x, int lanes)
Rewrite the expression x to have lanes lanes.
The Halide runtime encountered an error while trying to synchronize with a device.
void halide_default_free(void *user_context, void *ptr)
Halide calls these functions to allocate and free memory.
Enable CUDA compute capability 3.2 (Tegra K1)
bool(* halide_semaphore_try_acquire_t)(struct halide_semaphore_t *, int)
halide_load_library_t halide_set_custom_load_library(halide_load_library_t user_load_library)
Halide calls these functions to interact with the underlying system runtime functions.
int halide_error_param_too_small_i64(void *user_context, const char *param_name, int64_t val, int64_t min_val)
Various other error conditions.
Alternative to halide_target_feature_profile using timer interrupt for systems without threads or app...
Enable Hexagon v65 architecture.
Enable CUDA compute capability 3.0 (Kepler)
Cropping/slicing a buffer failed for some other reason.
int halide_error_device_crop_failed(void *user_context)
Various other error conditions.
User-specified require() expression was not satisfied.
int32_t value_index
The remaining fields are equivalent to those in halide_trace_event_t.
__UINTPTR_TYPE__ uintptr_t
Attempted to make cropped/sliced alias of a buffer with a device field, but the device_interface does...
The raw representation of an image passed around by generated Halide code.
int current_func
The id of the current running Func.
int halide_error_bad_type(void *user_context, const char *func_name, uint32_t type_given, uint32_t correct_type)
Various other error conditions.
int(* halide_loop_task_t)(void *user_context, int min, int extent, uint8_t *closure, void *task_parent)
A task representing a serial for loop evaluated over some range.
unsigned __INT64_TYPE__ uint64_t
int halide_mutex_array_unlock(struct halide_mutex_array *array, int entry)
Buffer has a non-zero device but no device interface, which violates a Halide invariant.
int halide_device_slice(void *user_context, const struct halide_buffer_t *src, int slice_dim, int slice_pos, struct halide_buffer_t *dst)
Give the destination buffer a device allocation which is an alias for a similar coordinate range in t...
const struct halide_scalar_value_t * def
Enable the AVX512 features supported by Zen4 processors. This include all of the Cannonlake features...
void(* halide_print_t)(void *, const char *)
Print a message to stderr.
A scalar parameter passed in was greater than its minimum declared value.
int halide_error_bounds_inference_call_failed(void *user_context, const char *extern_stage_name, int result)
Halide calls the functions below on various error conditions.
struct halide_mutex_array * halide_mutex_array_create(int sz)
enum halide_trace_event_code_t event
The remaining fields are equivalent to those in halide_trace_event_t.
Enable OpenGL Compute runtime. NOTE: This feature is deprecated and will be removed in Halide 17...
int halide_error_param_too_large_i64(void *user_context, const char *param_name, int64_t val, int64_t max_val)
Various other error conditions.
int halide_error_access_out_of_bounds(void *user_context, const char *func_name, int dimension, int min_touched, int max_touched, int min_valid, int max_valid)
Various other error conditions.
Enable CUDA compute capability 8.0 (Ampere)
int halide_error_device_dirty_with_no_device_support(void *user_context, const char *buffer_name)
Various other error conditions.
Enable the (Apple) Metal runtime.
Enable Vulkan v1.2 runtime target support.
Enable the AVX512 features expected to be supported by future Cannonlake processors. This includes all of the Skylake features, plus AVX512-IFMA and AVX512-VBMI.
halide_malloc_t halide_set_custom_malloc(halide_malloc_t user_malloc)
Halide calls these functions to allocate and free memory.
int(* halide_do_task_t)(void *, halide_task_t, int, uint8_t *)
If you use the default do_par_for, you can still set a custom handler to perform each individual task...
A struct representing a semaphore and a number of items that must be acquired from it...
signed __INT32_TYPE__ int32_t
struct halide_device_allocation_pool * next
int halide_get_gpu_device(void *user_context)
Halide calls this to get the desired halide gpu device setting.
int32_t value_index
If this was a load or store of a Tuple-valued Func, this is which tuple element was accessed...
int(* device_crop)(void *user_context, const struct halide_buffer_t *src, struct halide_buffer_t *dst)
Enable Vulkan 16-bit integer support.
const char * trace_tag
For halide_trace_tag, this points to a read-only null-terminated string of arbitrary text...
signed __INT16_TYPE__ int16_t
const struct halide_scalar_value_t * scalar_estimate
Enable ARMv8.2-a dotprod extension (i.e. udot and sdot instructions)
#define HALIDE_ATTRIBUTE_ALIGN(x)
int halide_set_num_threads(int n)
Set the number of threads used by Halide's thread pool.
A scalar parameter passed in was smaller than its minimum declared value.
void halide_register_device_allocation_pool(struct halide_device_allocation_pool *)
Register a callback to be informed when halide_reuse_device_allocations(false) is called...
int halide_semaphore_init(struct halide_semaphore_t *, int n)
int(* device_free)(void *user_context, struct halide_buffer_t *buf)
void * halide_default_malloc(void *user_context, size_t x)
Halide calls these functions to allocate and free memory.
Enable Vulkan v1.0 runtime target support.
void *(* halide_malloc_t)(void *, size_t)
Halide calls these functions to allocate and free memory.
An opaque struct representing a semaphore.