Halide  17.0.2
Halide compiler and libraries
Target.h
Go to the documentation of this file.
1 #ifndef HALIDE_TARGET_H
2 #define HALIDE_TARGET_H
3 
4 /** \file
5  * Defines the structure that describes a Halide target.
6  */
7 
8 #include <bitset>
9 #include <cstdint>
10 #include <string>
11 
12 #include "DeviceAPI.h"
13 #include "Type.h"
14 #include "runtime/HalideRuntime.h"
15 
16 namespace Halide {
17 
18 /** A struct representing a target machine and os to generate code for. */
19 struct Target {
20  /** The operating system used by the target. Determines which
21  * system calls to generate.
22  * Corresponds to os_name_map in Target.cpp. */
23  enum OS {
24  OSUnknown = 0,
27  OSX,
29  IOS,
34  } os = OSUnknown;
35 
36  /** The architecture used by the target. Determines the
37  * instruction set to use.
38  * Corresponds to arch_name_map in Target.cpp. */
39  enum Arch {
41  X86,
42  ARM,
47  } arch = ArchUnknown;
48 
49  /** The bit-width of the target machine. Must be 0 for unknown, or 32 or 64. */
50  int bits = 0;
51 
52  /** The bit-width of a vector register for targets where this is configurable and
53  * targeting a fixed size is desired. The default of 0 indicates no assumption of
54  * fixed size is allowed. */
55  int vector_bits = 0;
56 
57  /** The specific processor to be targeted, tuned for.
58  * Corresponds to processor_name_map in Target.cpp.
59  *
60  * New entries should be added to the end. */
61  enum Processor {
62  /// Do not tune for any specific CPU. In practice, this means that halide will decide the tune CPU based on the enabled features.
64  K8, /// Tune for AMD K8 Hammer CPU (AMD Family 0Fh, launched 2003).
65  K8_SSE3, /// Tune for later versions of AMD K8 CPU, with SSE3 support.
66  AMDFam10, /// Tune for AMD K10 "Barcelona" CPU (AMD Family 10h, launched 2007).
67  BtVer1, /// Tune for AMD Bobcat CPU (AMD Family 14h, launched 2011).
68  BdVer1, /// Tune for AMD Bulldozer CPU (AMD Family 15h, launched 2011).
69  BdVer2, /// Tune for AMD Piledriver CPU (AMD Family 15h (2nd-gen), launched 2012).
70  BdVer3, /// Tune for AMD Steamroller CPU (AMD Family 15h (3nd-gen), launched 2014).
71  BdVer4, /// Tune for AMD Excavator CPU (AMD Family 15h (4th-gen), launched 2015).
72  BtVer2, /// Tune for AMD Jaguar CPU (AMD Family 16h, launched 2013).
73  ZnVer1, /// Tune for AMD Zen CPU (AMD Family 17h, launched 2017).
74  ZnVer2, /// Tune for AMD Zen 2 CPU (AMD Family 17h, launched 2019).
75  ZnVer3, /// Tune for AMD Zen 3 CPU (AMD Family 19h, launched 2020).
76  ZnVer4, /// Tune for AMD Zen 4 CPU (AMD Family 19h, launched 2022).
78 
79  /** Optional features a target can have.
80  * Corresponds to feature_name_map in Target.cpp.
81  * See definitions in HalideRuntime.h for full information.
82  */
83  enum Feature {
112  OpenGLCompute = halide_target_feature_openglcompute, // NOTE: This feature is deprecated and will be removed in Halide 17.
172  };
173  Target() = default;
174  Target(OS o, Arch a, int b, Processor pt, const std::vector<Feature> &initial_features = std::vector<Feature>(),
175  int vb = 0)
176  : os(o), arch(a), bits(b), vector_bits(vb), processor_tune(pt) {
177  for (const auto &f : initial_features) {
178  set_feature(f);
179  }
180  validate_features();
181  }
182 
183  Target(OS o, Arch a, int b, const std::vector<Feature> &initial_features = std::vector<Feature>())
184  : Target(o, a, b, ProcessorGeneric, initial_features) {
185  }
186 
187  /** Given a string of the form used in HL_TARGET
188  * (e.g. "x86-64-avx"), construct the Target it specifies. Note
189  * that this always starts with the result of get_host_target(),
190  * replacing only the parts found in the target string, so if you
191  * omit (say) an OS specification, the host OS will be used
192  * instead. An empty string is exactly equivalent to
193  * get_host_target().
194  *
195  * Invalid target strings will fail with a user_error.
196  */
197  // @{
198  explicit Target(const std::string &s);
199  explicit Target(const char *s);
200  // @}
201 
202  /** Check if a target string is valid. */
203  static bool validate_target_string(const std::string &s);
204 
205  /** Return true if any of the arch/bits/os fields are "unknown"/0;
206  return false otherwise. */
207  bool has_unknowns() const;
208 
209  void set_feature(Feature f, bool value = true);
210 
211  void set_features(const std::vector<Feature> &features_to_set, bool value = true);
212 
213  bool has_feature(Feature f) const;
214 
215  inline bool has_feature(halide_target_feature_t f) const {
216  return has_feature((Feature)f);
217  }
218 
219  bool features_any_of(const std::vector<Feature> &test_features) const;
220 
221  bool features_all_of(const std::vector<Feature> &test_features) const;
222 
223  /** Return a copy of the target with the given feature set.
224  * This is convenient when enabling certain features (e.g. NoBoundsQuery)
225  * in an initialization list, where the target to be mutated may be
226  * a const reference. */
227  Target with_feature(Feature f) const;
228 
229  /** Return a copy of the target with the given feature cleared.
230  * This is convenient when disabling certain features (e.g. NoBoundsQuery)
231  * in an initialization list, where the target to be mutated may be
232  * a const reference. */
233  Target without_feature(Feature f) const;
234 
235  /** Is a fully feature GPU compute runtime enabled? I.e. is
236  * Func::gpu_tile and similar going to work? Currently includes
237  * CUDA, OpenCL, Metal and D3D12Compute. We do not include OpenGL,
238  * because it is not capable of gpgpu, and is not scheduled via
239  * Func::gpu_tile.
240  * TODO: Should OpenGLCompute be included here? */
241  bool has_gpu_feature() const;
242 
243  /** Does this target allow using a certain type. Generally all
244  * types except 64-bit float and int/uint should be supported by
245  * all backends.
246  *
247  * It is likely better to call the version below which takes a DeviceAPI.
248  */
249  bool supports_type(const Type &t) const;
250 
251  /** Does this target allow using a certain type on a certain device.
252  * This is the prefered version of this routine.
253  */
254  bool supports_type(const Type &t, DeviceAPI device) const;
255 
256  /** Returns whether a particular device API can be used with this
257  * Target. */
258  bool supports_device_api(DeviceAPI api) const;
259 
260  /** If this Target (including all Features) requires a specific DeviceAPI,
261  * return it. If it doesn't, return DeviceAPI::None. If the Target has
262  * features with multiple (different) DeviceAPI requirements, the result
263  * will be an arbitrary DeviceAPI. */
265 
266  bool operator==(const Target &other) const {
267  return os == other.os &&
268  arch == other.arch &&
269  bits == other.bits &&
270  processor_tune == other.processor_tune &&
271  features == other.features;
272  }
273 
274  bool operator!=(const Target &other) const {
275  return !(*this == other);
276  }
277 
278  /**
279  * Create a "greatest common denominator" runtime target that is compatible with
280  * both this target and \p other. Used by generators to conveniently select a suitable
281  * runtime when linking together multiple functions.
282  *
283  * @param other The other target from which we compute the gcd target.
284  * @param[out] result The gcd target if we return true, otherwise unmodified. Can be the same as *this.
285  * @return Whether it was possible to find a compatible target (true) or not.
286  */
287  bool get_runtime_compatible_target(const Target &other, Target &result);
288 
289  /** Convert the Target into a string form that can be reconstituted
290  * by merge_string(), which will always be of the form
291  *
292  * arch-bits-os-processor-feature1-feature2...featureN.
293  *
294  * Note that is guaranteed that Target(t1.to_string()) == t1,
295  * but not that Target(s).to_string() == s (since there can be
296  * multiple strings that parse to the same Target)...
297  * *unless* t1 contains 'unknown' fields (in which case you'll get a string
298  * that can't be parsed, which is intentional).
299  */
300  std::string to_string() const;
301 
302  /** Given a data type, return an estimate of the "natural" vector size
303  * for that data type when compiling for this Target. */
304  int natural_vector_size(const Halide::Type &t) const;
305 
306  /** Given a data type, return an estimate of the "natural" vector size
307  * for that data type when compiling for this Target. */
308  template<typename data_t>
309  int natural_vector_size() const {
310  return natural_vector_size(type_of<data_t>());
311  }
312 
313  /** Return true iff 64 bits and has_feature(LargeBuffers). */
314  bool has_large_buffers() const {
315  return bits == 64 && has_feature(LargeBuffers);
316  }
317 
318  /** Return the maximum buffer size in bytes supported on this
319  * Target. This is 2^31 - 1 except on 64-bit targets when the LargeBuffers
320  * feature is enabled, which expands the maximum to 2^63 - 1. */
322  if (has_large_buffers()) {
323  return (((uint64_t)1) << 63) - 1;
324  } else {
325  return (((uint64_t)1) << 31) - 1;
326  }
327  }
328 
329  /** Get the minimum cuda capability found as an integer. Returns
330  * 20 (our minimum supported cuda compute capability) if no cuda
331  * features are set. */
333 
334  /** Get the minimum Vulkan capability found as an integer. Returns
335  * 10 (our minimum supported Vulkan compute capability) if no Vulkan
336  * features are set. */
338 
339  /** Was libHalide compiled with support for this target? */
340  bool supported() const;
341 
342  /** Return a bitset of the Featuress set in this Target (set = 1).
343  * Note that while this happens to be the current internal representation,
344  * that might not always be the case. */
345  const std::bitset<FeatureEnd> &get_features_bitset() const {
346  return features;
347  }
348 
349  /** Return the name corresponding to a given Feature, in the form
350  * used to construct Target strings (e.g., Feature::Debug is "debug" and not "Debug"). */
351  static std::string feature_to_name(Target::Feature feature);
352 
353  /** Return the feature corresponding to a given name, in the form
354  * used to construct Target strings (e.g., Feature::Debug is "debug" and not "Debug").
355  * If the string is not a known feature name, return FeatureEnd. */
356  static Target::Feature feature_from_name(const std::string &name);
357 
358 private:
359  /** A bitmask that stores the active features. */
360  std::bitset<FeatureEnd> features;
361 
362  /** Attempt to validate that all features set are sensible for the base Target.
363  * This is *not* guaranteed to get all invalid combinations, but is intended
364  * to catch at least the most common (e.g., setting arm-specific features on x86). */
365  void validate_features() const;
366 };
367 
368 /** Return the target corresponding to the host machine. */
369 Target get_host_target();
370 
371 /** Return the target that Halide will use. If HL_TARGET is set it
372  * uses that. Otherwise calls \ref get_host_target */
374 
375 /** Return the target that Halide will use for jit-compilation. If
376  * HL_JIT_TARGET is set it uses that. Otherwise calls \ref
377  * get_host_target. Throws an error if the architecture, bit width,
378  * and OS of the target do not match the host target, so this is only
379  * useful for controlling the feature set. */
381 
382 /** Get the Target feature corresponding to a DeviceAPI. For device
383  * apis that do not correspond to any single target feature, returns
384  * Target::FeatureEnd */
386 
387 namespace Internal {
388 
389 void target_test();
390 }
391 
392 } // namespace Halide
393 
394 #endif
Enable Vulkan 64-bit integer support.
Disable the bounds querying functionality.
Enable ARM Scalable Vector Extensions v2.
Enable x86 (AMD) FMA4 instruction set.
bool supports_device_api(DeviceAPI api) const
Returns whether a particular device API can be used with this Target.
Target get_target_from_environment()
Return the target that Halide will use.
enum Halide::Target::Arch arch
static bool validate_target_string(const std::string &s)
Check if a target string is valid.
bool features_all_of(const std::vector< Feature > &test_features) const
enum Halide::Target::Processor processor_tune
Enable +simd128 instructions for WebAssembly codegen.
Enable ARM Scalable Vector Extensions.
Defines DeviceAPI.
Enable hooks for MSAN support.
Tune for AMD Zen CPU (AMD Family 17h, launched 2017).
Definition: Target.h:74
bool has_feature(Feature f) const
Tune for AMD Bulldozer CPU (AMD Family 15h, launched 2011).
Definition: Target.h:69
Enable Vulkan runtime support.
Enable hooks for TSAN support.
Target(OS o, Arch a, int b, Processor pt, const std::vector< Feature > &initial_features=std::vector< Feature >(), int vb=0)
Definition: Target.h:174
bool supported() const
Was libHalide compiled with support for this target?
Enable half support on OpenCL targets.
Used together with Target::NoOS for the baremetal target built with semihosting library and run with ...
A struct representing a target machine and os to generate code for.
Definition: Target.h:19
Generate C++ mangled names for result function, et al.
Enable the AVX512 features supported by Knight&#39;s Landing chips, such as the Xeon Phi x200...
Target::Feature target_feature_for_device_api(DeviceAPI api)
Get the Target feature corresponding to a DeviceAPI.
Arch
The architecture used by the target.
Definition: Target.h:39
int bits
The bit-width of the target machine.
Definition: Target.h:50
Do not include a copy of the Halide runtime in any generated object file or assembly.
Tune for later versions of AMD K8 CPU, with SSE3 support.
Definition: Target.h:66
Enable Vulkan v1.3 runtime target support.
Enable Hexagon v62 architecture.
static Target::Feature feature_from_name(const std::string &name)
Return the feature corresponding to a given name, in the form used to construct Target strings (e...
bool has_large_buffers() const
Return true iff 64 bits and has_feature(LargeBuffers).
Definition: Target.h:314
Processor
The specific processor to be targeted, tuned for.
Definition: Target.h:61
This file defines the class FunctionDAG, which is our representation of a Halide pipeline, and contains methods to using Halide&#39;s bounds tools to query properties of it.
Enable HVX 128 byte mode.
bool get_runtime_compatible_target(const Target &other, Target &result)
Create a "greatest common denominator" runtime target that is compatible with both this target and ot...
Enable Hexagon v66 architecture.
bool has_feature(halide_target_feature_t f) const
Definition: Target.h:215
bool has_unknowns() const
Return true if any of the arch/bits/os fields are "unknown"/0; return false otherwise.
Enable CUDA compute capability 3.5 (Kepler)
Enable Vulkan 16-bit float support.
Enable the base AVX512 subset supported by all AVX512 architectures. The specific feature sets are AV...
Enable CUDA compute capability 5.0 (Maxwell)
Enable the OpenCL runtime.
Generate code for ARMv7s. Only relevant for 32-bit ARM.
Enable RISCV "V" Vector Extension.
Use SSE 4.1 and earlier instructions. Only relevant on x86.
Enable CUDA compute capability 8.6 (Ampere)
bool operator==(const Target &other) const
Definition: Target.h:266
Enable the WebGPU runtime.
Defines halide types.
Tune for AMD Bobcat CPU (AMD Family 14h, launched 2011).
Definition: Target.h:68
Enable Direct3D 12 Compute runtime.
OS
The operating system used by the target.
Definition: Target.h:23
Enable soft float ABI. This only enables the soft float ABI calling convention, which does not necess...
Enable 64-bit buffer indexing to support buffers > 2GB. Ignored if bits != 64.
This file declares the routines used by Halide internally in its runtime.
Launch a sampling profiler alongside the Halide pipeline that monitors and reports the runtime used b...
Use AVX 2 instructions. Only relevant on x86.
Enable the AVX512 features supported by Skylake Xeon server processors. This adds AVX512-VL...
Enable SPIR-V code generation support.
Enable CUDA compute capability 7.0 (Volta)
void set_feature(Feature f, bool value=true)
Enable CUDA compute capability 6.1 (Pascal)
Turn off all non-IEEE floating-point optimization. Currently applies only to LLVM targets...
Generate code that will run immediately inside the calling process.
Enable the AVX512 features supported by Sapphire Rapids processors. This include all of the Zen4 feat...
Enable 64-bit atomics operations on OpenCL targets.
Enable hooks for ASAN support.
Enable +bulk-memory instructions for WebAssembly codegen.
Enable loop vectorization + unrolling in LLVM. Overrides halide_target_feature_disable_llvm_loop_opt...
int get_vulkan_capability_lower_bound() const
Get the minimum Vulkan capability found as an integer.
bool has_gpu_feature() const
Is a fully feature GPU compute runtime enabled? I.e.
Enable double support on OpenCL targets.
signed __INT64_TYPE__ int64_t
Trace all stores done by the pipeline. Equivalent to calling Func::trace_stores on every non-inlined ...
bool operator!=(const Target &other) const
Definition: Target.h:274
Enable Vulkan 8-bit integer support.
Enable hooks for SanitizerCoverage support.
Disable all runtime checks, for slightly tighter code.
halide_target_feature_t
Optional features a compilation Target can have.
Target with_feature(Feature f) const
Return a copy of the target with the given feature set.
Enable the CUDA runtime. Defaults to compute capability 2.0 (Fermi)
Use VSX instructions. Only relevant on POWERPC.
Use the LLVM large code model to compile.
Trace all loads done by the pipeline. Equivalent to calling Func::trace_loads on every non-inlined Fu...
Enable ARMv8.2-a half-precision floating point data processing.
Turn on debug info and output for runtime code.
Tune for AMD Jaguar CPU (AMD Family 16h, launched 2013).
Definition: Target.h:73
bool supports_type(const Type &t) const
Does this target allow using a certain type.
Tune for AMD Steamroller CPU (AMD Family 15h (3nd-gen), launched 2014).
Definition: Target.h:71
Enable CUDA compute capability 7.5 (Turing)
Force use of EGL support.
Enable Vulkan 64-bit float support.
const std::bitset< FeatureEnd > & get_features_bitset() const
Return a bitset of the Featuress set in this Target (set = 1).
Definition: Target.h:345
Enable x86 16-bit float support.
A sentinel. Every target is considered to have this feature, and setting this feature does nothing...
Use POWER ISA 2.07 new instructions. Only relevant on POWERPC.
Disable all extensions to WebAssembly codegen (including +sign-ext and +nontrapping-fptoint, which are on by default).
Feature
Optional features a target can have.
Definition: Target.h:83
Avoid using NEON instructions. Only relevant for 32-bit ARM.
Do not tune for any specific CPU. In practice, this means that halide will decide the tune CPU based ...
Definition: Target.h:63
void set_features(const std::vector< Feature > &features_to_set, bool value=true)
int get_cuda_capability_lower_bound() const
Get the minimum cuda capability found as an integer.
On every floating point store, set the last bit of the mantissa to zero. Pipelines for which the outp...
Types in the halide type system.
Definition: Type.h:276
Enable ARMv8.1-a instructions.
Insert assertions for promises.
Enable use of threads in WebAssembly codegen. Requires the use of a wasm runtime that provides pthrea...
Generated code takes a user_context pointer as first argument.
Enable x86 FMA instruction.
Tune for AMD Zen 2 CPU (AMD Family 17h, launched 2019).
Definition: Target.h:75
Enable Hexagon DMA buffers.
Trace all realizations done by the pipeline. Equivalent to calling Func::trace_realizations on every ...
Emulate clang -fembed-bitcode flag.
int64_t maximum_buffer_size() const
Return the maximum buffer size in bytes supported on this Target.
Definition: Target.h:321
Tune for AMD Excavator CPU (AMD Family 15h (4th-gen), launched 2015).
Definition: Target.h:72
Use AVX 1 instructions. Only relevant on x86.
bool features_any_of(const std::vector< Feature > &test_features) const
Tune for AMD Piledriver CPU (AMD Family 15h (2nd-gen), launched 2012).
Definition: Target.h:70
Target(OS o, Arch a, int b, const std::vector< Feature > &initial_features=std::vector< Feature >())
Definition: Target.h:183
Target()=default
int vector_bits
The bit-width of a vector register for targets where this is configurable and targeting a fixed size ...
Definition: Target.h:55
Target get_jit_target_from_environment()
Return the target that Halide will use for jit-compilation.
Enable CUDA compute capability 3.2 (Tegra K1)
Alternative to halide_target_feature_profile using timer interrupt for systems without threads or app...
Enable Hexagon v65 architecture.
Enable CUDA compute capability 3.0 (Kepler)
static std::string feature_to_name(Target::Feature feature)
Return the name corresponding to a given Feature, in the form used to construct Target strings (e...
enum Halide::Target::OS os
DeviceAPI get_required_device_api() const
If this Target (including all Features) requires a specific DeviceAPI, return it. ...
unsigned __INT64_TYPE__ uint64_t
Enable the AVX512 features supported by Zen4 processors. This include all of the Cannonlake features...
std::string to_string() const
Convert the Target into a string form that can be reconstituted by merge_string(), which will always be of the form.
Enable OpenGL Compute runtime. NOTE: This feature is deprecated and will be removed in Halide 17...
Tune for AMD K10 "Barcelona" CPU (AMD Family 10h, launched 2007).
Definition: Target.h:67
DeviceAPI
An enum describing a type of device API.
Definition: DeviceAPI.h:15
Enable CUDA compute capability 8.0 (Ampere)
Enable the (Apple) Metal runtime.
Enable Vulkan v1.2 runtime target support.
Enable the AVX512 features expected to be supported by future Cannonlake processors. This includes all of the Skylake features, plus AVX512-IFMA and AVX512-VBMI.
int natural_vector_size() const
Given a data type, return an estimate of the "natural" vector size for that data type when compiling ...
Definition: Target.h:309
Target get_host_target()
Return the target corresponding to the host machine.
Enable Vulkan 16-bit integer support.
Enable ARMv8.2-a dotprod extension (i.e. udot and sdot instructions)
Target without_feature(Feature f) const
Return a copy of the target with the given feature cleared.
Tune for AMD K8 Hammer CPU (AMD Family 0Fh, launched 2003).
Definition: Target.h:65
Tune for AMD Zen 3 CPU (AMD Family 19h, launched 2020).
Definition: Target.h:76
Enable Vulkan v1.0 runtime target support.