Halide  17.0.2
Halide compiler and libraries
CodeGen_GPU_Dev.h
Go to the documentation of this file.
1 #ifndef HALIDE_CODEGEN_GPU_DEV_H
2 #define HALIDE_CODEGEN_GPU_DEV_H
3 
4 /** \file
5  * Defines the code-generator interface for producing GPU device code
6  */
7 #include <string>
8 #include <vector>
9 
10 #include "CodeGen_C.h"
11 #include "DeviceArgument.h"
12 #include "Expr.h"
13 
14 namespace Halide {
15 namespace Internal {
16 
17 /** A code generator that emits GPU code from a given Halide stmt. */
19  virtual ~CodeGen_GPU_Dev();
20 
21  /** Compile a GPU kernel into the module. This may be called many times
22  * with different kernels, which will all be accumulated into a single
23  * source module shared by a given Halide pipeline. */
24  virtual void add_kernel(Stmt stmt,
25  const std::string &name,
26  const std::vector<DeviceArgument> &args) = 0;
27 
28  /** (Re)initialize the GPU kernel module. This is separate from compile,
29  * since a GPU device module will often have many kernels compiled into it
30  * for a single pipeline. */
31  virtual void init_module() = 0;
32 
33  virtual std::vector<char> compile_to_src() = 0;
34 
35  virtual std::string get_current_kernel_name() = 0;
36 
37  virtual void dump() = 0;
38 
39  /** This routine returns the GPU API name that is combined into
40  * runtime routine names to ensure each GPU API has a unique
41  * name.
42  */
43  virtual std::string api_unique_name() = 0;
44 
45  /** Returns the specified name transformed by the variable naming rules
46  * for the GPU language backend. Used to determine the name of a parameter
47  * during host codegen. */
48  virtual std::string print_gpu_name(const std::string &name) = 0;
49 
50  /** Allows the GPU device specific code to request halide_type_t
51  * values to be passed to the kernel_run routine rather than just
52  * argument type sizes.
53  */
54  virtual bool kernel_run_takes_types() const {
55  return false;
56  }
57 
58  static bool is_gpu_var(const std::string &name);
59  static bool is_gpu_block_var(const std::string &name);
60  static bool is_gpu_thread_var(const std::string &name);
61 
62  /** Checks if expr is block uniform, i.e. does not depend on a thread
63  * var. */
64  static bool is_block_uniform(const Expr &expr);
65  /** Checks if the buffer is a candidate for constant storage. Most
66  * GPUs (APIs) support a constant memory storage class that cannot be
67  * written to and performs well for block uniform accesses. A buffer is a
68  * candidate for constant storage if it is never written to, and loads are
69  * uniform within the workgroup. */
70  static bool is_buffer_constant(const Stmt &kernel, const std::string &buffer);
71 
72  /** Modifies predicated loads and stores to be non-predicated, since most
73  * GPU backends do not support predication. */
75 
76  /** An mask describing which type of memory fence to use for the gpu_thread_barrier()
77  * intrinsic. Not all GPUs APIs support all types.
78  */
80  None = 0, // No fence required (just a sync)
81  Device = 1, // Device/global memory fence
82  Shared = 2 // Threadgroup/shared memory fence
83  };
84 };
85 
86 /** A base class for GPU backends that require C-like shader output.
87  * GPU backends derive from and specialize this class. */
88 class CodeGen_GPU_C : public CodeGen_C {
89 public:
90  /** OpenCL and WGSL use different syntax than C for immediate vectors. This
91  enum defines which style should be used by the backend. */
93  CLikeSyntax = 0,
94  OpenCLSyntax = 1,
95  WGSLSyntax = 2,
96  };
97 
98  CodeGen_GPU_C(std::ostream &s, Target t)
99  : CodeGen_C(s, t) {
100  }
101 
102 protected:
103  using CodeGen_C::visit;
104  void visit(const Shuffle *op) override;
105  void visit(const Call *op) override;
106 
108 };
109 
110 } // namespace Internal
111 } // namespace Halide
112 
113 #endif
Defines an IRPrinter that emits C++ code equivalent to a halide stmt.
Defines helpers for passing arguments to separate devices, such as GPUs.
Base classes for Halide expressions (Halide::Expr) and statements (Halide::Internal::Stmt)
This class emits C++ code equivalent to a halide Stmt.
Definition: CodeGen_C.h:27
void visit(const IntImm *) override
A base class for GPU backends that require C-like shader output.
VectorDeclarationStyle
OpenCL and WGSL use different syntax than C for immediate vectors.
void visit(const Call *op) override
CodeGen_GPU_C(std::ostream &s, Target t)
void visit(const Shuffle *op) override
VectorDeclarationStyle vector_declaration_style
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
A fragment of Halide syntax.
Definition: Expr.h:258
A function call.
Definition: IR.h:490
A code generator that emits GPU code from a given Halide stmt.
static bool is_gpu_thread_var(const std::string &name)
static bool is_gpu_var(const std::string &name)
virtual void init_module()=0
(Re)initialize the GPU kernel module.
static bool is_gpu_block_var(const std::string &name)
static bool is_block_uniform(const Expr &expr)
Checks if expr is block uniform, i.e.
MemoryFenceType
An mask describing which type of memory fence to use for the gpu_thread_barrier() intrinsic.
virtual std::vector< char > compile_to_src()=0
static Stmt scalarize_predicated_loads_stores(Stmt &s)
Modifies predicated loads and stores to be non-predicated, since most GPU backends do not support pre...
virtual std::string get_current_kernel_name()=0
virtual std::string print_gpu_name(const std::string &name)=0
Returns the specified name transformed by the variable naming rules for the GPU language backend.
virtual bool kernel_run_takes_types() const
Allows the GPU device specific code to request halide_type_t values to be passed to the kernel_run ro...
virtual std::string api_unique_name()=0
This routine returns the GPU API name that is combined into runtime routine names to ensure each GPU ...
static bool is_buffer_constant(const Stmt &kernel, const std::string &buffer)
Checks if the buffer is a candidate for constant storage.
virtual void add_kernel(Stmt stmt, const std::string &name, const std::vector< DeviceArgument > &args)=0
Compile a GPU kernel into the module.
Construct a new vector by taking elements from another sequence of vectors.
Definition: IR.h:841
A reference-counted handle to a statement node.
Definition: Expr.h:419
A struct representing a target machine and os to generate code for.
Definition: Target.h:19