Halide  17.0.2
Halide compiler and libraries
Tiling.h
Go to the documentation of this file.
1 #ifndef TILING_H
2 #define TILING_H
3 
4 #include <cstdint>
5 #include <vector>
6 
7 namespace Halide {
8 namespace Internal {
9 namespace Autoscheduler {
10 
11 bool all_ones(const std::vector<int64_t> &nums);
12 
13 bool equal_to_existing_size(const std::vector<int64_t> &s,
14  const std::vector<int64_t> &nums);
15 
16 std::vector<std::vector<int64_t>> generate_serial_tilings(const std::vector<int64_t> &s,
17  int d,
18  int last_d,
19  int vectorized_index,
20  const std::vector<int> &vec_dim_serial_sizes,
21  bool filter_small_outer_extents = false,
22  bool allow_inner_ones = false);
23 
24 // Given a multi-dimensional box of dimensionality d, generate a list
25 // of candidate tile sizes for it, logarithmically spacing the sizes
26 // using the given factor. If 'allow_splits' is false, every dimension
27 // must either be one, or the full extent of the box. This function is
28 // used to generate candidate tilings when tiling for
29 // producer-consumer fusion, or tiling for parallelism.
30 // inner_sizes is optional vector of fixed sizes to choose from for inner loop.
31 // used for GPU schedules when we split a 'none' loop into a parallel loop and a serial loop
32 std::vector<std::vector<int64_t>> generate_tilings(const std::vector<int64_t> &s,
33  int d,
34  int factor,
35  bool allow_splits,
36  const std::vector<int> &inner_sizes = std::vector<int>());
37 
38 /** moves vectorized dimension first and also removes dimensions with size 1
39  to reflect actual thread dimensions when loop nests are lowered **/
40 void lowered_dims(const std::vector<int64_t> &size,
41  int vector_loop_i,
42  std::vector<int64_t> &lowered_size);
43 
44 // creates tilings for gpu threads loops.
45 // Innermost thread loop is always the vectorized dim and its extent is a multiple of 32.
46 // Other loop extents are sized to be powers of 2 such that total extent is < 1024
47 // called either when we are creating parallel -> (blocks, threads) loop when computing at root
48 // OR when we are creating none -> (threads, SIMD) loop when computing at a serial loop
49 // serial_inner = True when we're generating (thread, serial) tilings, False when generating (block,thread) tilings
50 // max_s hold max gpu_thread counts of all siblings in each dimension. Used to make sure union of
51 // thread counts is under 1024 threshold.
52 std::vector<std::vector<int64_t>> generate_gpu_tilings(const std::vector<std::vector<int64_t>> &stage_sizes,
53  const std::vector<std::vector<int>> &pure_dims,
54  const std::vector<int64_t> &max_s,
55  int d,
56  const std::vector<int> &vectorized_indices,
57  bool serial_inner,
58  bool is_compute_root_stage);
59 
60 } // namespace Autoscheduler
61 } // namespace Internal
62 } // namespace Halide
63 
64 #endif // TILING_H
std::vector< std::vector< int64_t > > generate_serial_tilings(const std::vector< int64_t > &s, int d, int last_d, int vectorized_index, const std::vector< int > &vec_dim_serial_sizes, bool filter_small_outer_extents=false, bool allow_inner_ones=false)
std::vector< std::vector< int64_t > > generate_gpu_tilings(const std::vector< std::vector< int64_t >> &stage_sizes, const std::vector< std::vector< int >> &pure_dims, const std::vector< int64_t > &max_s, int d, const std::vector< int > &vectorized_indices, bool serial_inner, bool is_compute_root_stage)
This file defines the class FunctionDAG, which is our representation of a Halide pipeline, and contains methods to using Halide&#39;s bounds tools to query properties of it.
Not visible externally, similar to &#39;static&#39; linkage in C.
bool equal_to_existing_size(const std::vector< int64_t > &s, const std::vector< int64_t > &nums)
std::vector< std::vector< int64_t > > generate_tilings(const vector< int64_t > &s, int d, int factor, bool allow_splits)
bool all_ones(const std::vector< int64_t > &nums)
void lowered_dims(const std::vector< int64_t > &size, int vector_loop_i, std::vector< int64_t > &lowered_size)
moves vectorized dimension first and also removes dimensions with size 1 to reflect actual thread dim...