Halide  19.0.0
Halide compiler and libraries
FunctionDAG.h
Go to the documentation of this file.
1 /** This file defines the class FunctionDAG, which is our
2  * representation of a Halide pipeline, and contains methods to using
3  * Halide's bounds tools to query properties of it. */
4 
5 #ifndef FUNCTION_DAG_H
6 #define FUNCTION_DAG_H
7 
8 #include <algorithm>
9 #include <cstdint>
10 #include <map>
11 #include <string>
12 #include <utility>
13 #include <vector>
14 
15 #include "Errors.h"
16 #include "Featurization.h"
17 #include "Halide.h"
18 #include "PerfectHashMap.h"
19 
20 namespace Halide {
21 namespace Internal {
22 namespace Autoscheduler {
23 
24 using std::map;
25 using std::pair;
26 using std::string;
27 using std::unique_ptr;
28 using std::vector;
29 
30 // First we have various utility classes.
31 
32 // An optional rational type used when analyzing memory dependencies.
33 struct OptionalRational {
35 
36  bool exists() const {
37  return denominator != 0;
38  }
39 
40  OptionalRational() = default;
42  : numerator(n),
43  denominator(d) {
44  }
45 
46  void operator+=(const OptionalRational &other) {
47  if ((denominator & other.denominator) == 0) {
48  numerator = denominator = 0;
49  return;
50  }
51  if (denominator == other.denominator) {
52  numerator += other.numerator;
53  return;
54  }
55 
56  int64_t l = lcm(denominator, other.denominator);
57  numerator *= l / denominator;
58  denominator = l;
59  numerator += other.numerator * (l / other.denominator);
61  numerator /= g;
62  denominator /= g;
63  }
64 
66  if ((*this) == 0) {
67  return *this;
68  }
69  int64_t num = numerator * factor;
70  return OptionalRational{num, denominator};
71  }
72 
74  if ((*this) == 0) {
75  return *this;
76  }
77  if (other == 0) {
78  return other;
79  }
80  int64_t num = numerator * other.numerator;
81  int64_t den = denominator * other.denominator;
82  return OptionalRational{num, den};
83  }
84 
85  // Because this type is optional (exists may be false), we don't
86  // have a total ordering. These methods all return false when the
87  // operators are not comparable, so a < b is not the same as !(a
88  // >= b).
89  bool operator<(int x) const {
90  if (denominator == 0) {
91  return false;
92  } else if (denominator > 0) {
93  return numerator < x * denominator;
94  } else {
95  return numerator > x * denominator;
96  }
97  }
98 
99  bool operator<=(int x) const {
100  if (denominator == 0) {
101  return false;
102  } else if (denominator > 0) {
103  return numerator <= x * denominator;
104  } else {
105  return numerator >= x * denominator;
106  }
107  }
108 
109  bool operator>(int x) const {
110  if (!exists()) {
111  return false;
112  }
113  return !((*this) <= x);
114  }
115 
116  bool operator>=(int x) const {
117  if (!exists()) {
118  return false;
119  }
120  return !((*this) < x);
121  }
122 
123  bool operator==(int x) const {
124  return exists() && (numerator == (x * denominator));
125  }
126 
127  bool operator==(const OptionalRational &other) const {
128  return (exists() == other.exists()) && (numerator * other.denominator == denominator * other.numerator);
129  }
130 };
131 
132 // A LoadJacobian records the derivative of the coordinate accessed in
133 // some producer w.r.t the loops of the consumer.
134 class LoadJacobian {
135  std::vector<OptionalRational> coeffs;
136  int64_t c;
137  size_t rows, cols;
138 
139 public:
141  : c(count),
142  rows(producer_storage_dims),
143  cols(consumer_loop_dims) {
144  coeffs.resize(rows * cols);
145  }
146 
147  bool all_coeffs_exist() const {
148  for (const auto &coeff : coeffs) {
149  if (!coeff.exists()) {
150  return false;
151  }
152  }
153  return true;
154  }
155 
156  bool empty() const {
157  return rows == 0;
158  }
159 
160  size_t producer_storage_dims() const {
161  return rows;
162  }
163 
164  size_t consumer_loop_dims() const {
165  return cols;
166  }
167 
168  bool is_constant() const {
169  for (const auto &c : coeffs) {
170  if (!c.exists() || !(c == 0)) {
171  return false;
172  }
173  }
174 
175  return true;
176  }
177 
178  OptionalRational operator()(int producer_storage_dim, int consumer_loop_dim) const {
179  if (producer_storage_dims() == 0 || consumer_loop_dims() == 0) {
180  // The producer or consumer is scalar, so all strides are zero.
181  return {0, 1};
182  }
183  return coeffs[producer_storage_dim * cols + consumer_loop_dim];
184  }
185 
186  OptionalRational &operator()(int producer_storage_dim, int consumer_loop_dim) {
187  return coeffs[producer_storage_dim * cols + consumer_loop_dim];
188  }
189 
190  // To avoid redundantly re-recording copies of the same
191  // load Jacobian, we keep a count of how many times a
192  // load with this Jacobian occurs.
193  int64_t count() const {
194  return c;
195  }
196 
197  // Try to merge another LoadJacobian into this one, increasing the
198  // count if the coefficients match.
199  bool merge(const LoadJacobian &other) {
200  if (other.rows != rows || other.cols != cols) {
201  return false;
202  }
203  for (size_t i = 0; i < rows * cols; i++) {
204  if (!(other.coeffs[i] == coeffs[i])) {
205  return false;
206  }
207  }
208  c += other.count();
209  return true;
210  }
211 
212  // Scale the matrix coefficients by the given factors
213  LoadJacobian operator*(const std::vector<int64_t> &factors) const {
214  LoadJacobian result(rows, cols, c);
215  for (size_t i = 0; i < producer_storage_dims(); i++) {
216  for (size_t j = 0; j < consumer_loop_dims(); j++) {
217  result(i, j) = (*this)(i, j) * factors[j];
218  }
219  }
220  return result;
221  }
222 
223  // Multiply Jacobians, used to look at memory dependencies through
224  // inlined functions.
225  LoadJacobian operator*(const LoadJacobian &other) const {
226  LoadJacobian result(producer_storage_dims(), other.consumer_loop_dims(), count() * other.count());
227  for (size_t i = 0; i < producer_storage_dims(); i++) {
228  for (size_t j = 0; j < other.consumer_loop_dims(); j++) {
229  result(i, j) = OptionalRational{0, 1};
230  for (size_t k = 0; k < consumer_loop_dims(); k++) {
231  result(i, j) += (*this)(i, k) * other(k, j);
232  }
233  }
234  }
235  return result;
236  }
237 
238  void dump(const char *prefix) const;
239 };
240 
241 // Classes to represent a concrete set of bounds for a Func. A Span is
242 // single-dimensional, and a Bound is a multi-dimensional box. For
243 // each dimension we track the estimated size, and also whether or not
244 // the size is known to be constant at compile-time. For each Func we
245 // track three different types of bounds:
246 
247 // 1) The region required by consumers of the Func, which determines
248 // 2) The region actually computed, which in turn determines
249 // 3) The min and max of all loops in the loop next.
250 
251 // 3 in turn determines the region required of the inputs to a Func,
252 // which determines their region computed, and hence their loop nest,
253 // and so on back up the Function DAG from outputs back to inputs.
254 
255 class Span {
256  int64_t min_, max_;
257  bool constant_extent_;
258 
259 public:
260  int64_t min() const {
261  return min_;
262  }
263  int64_t max() const {
264  return max_;
265  }
266  int64_t extent() const {
267  return max_ - min_ + 1;
268  }
269  bool constant_extent() const {
270  return constant_extent_;
271  }
272 
273  void union_with(const Span &other) {
274  min_ = std::min(min_, other.min());
275  max_ = std::max(max_, other.max());
276  constant_extent_ = constant_extent_ && other.constant_extent();
277  }
278 
279  void set_extent(int64_t e) {
280  max_ = min_ + e - 1;
281  }
282 
283  void translate(int64_t x) {
284  min_ += x;
285  max_ += x;
286  }
287 
288  Span(int64_t a, int64_t b, bool c)
289  : min_(a),
290  max_(b),
291  constant_extent_(c) {
292  }
293  Span() = default;
294  Span(const Span &other) = default;
295  static Span empty_span() {
296  return Span(INT64_MAX, INT64_MIN, true);
297  }
298 };
299 
300 // Bounds objects are created and destroyed very frequently while
301 // exploring scheduling options, so we have a custom allocator and
302 // memory pool. Much like IR nodes, we treat them as immutable once
303 // created and wrapped in a Bound object so that they can be shared
304 // safely across scheduling alternatives.
305 struct BoundContents {
306  mutable RefCount ref_count;
307 
308  class Layout;
309  const Layout *layout = nullptr;
310 
311  Span *data() const {
312  // This struct is a header
313  return (Span *)(const_cast<BoundContents *>(this) + 1);
314  }
315 
317  return data()[i];
318  }
319 
321  return data()[i + layout->computed_offset];
322  }
323 
324  Span &loops(int i, int j) {
325  return data()[j + layout->loop_offset[i]];
326  }
327 
328  const Span &region_required(int i) const {
329  return data()[i];
330  }
331 
332  const Span &region_computed(int i) const {
333  return data()[i + layout->computed_offset];
334  }
335 
336  const Span &loops(int i, int j) const {
337  return data()[j + layout->loop_offset[i]];
338  }
339 
341  auto *b = layout->make();
342  size_t bytes = sizeof(data()[0]) * layout->total_size;
343  memcpy(b->data(), data(), bytes);
344  return b;
345  }
346 
347  void validate() const;
348 
349  // We're frequently going to need to make these concrete bounds
350  // arrays. It makes things more efficient if we figure out the
351  // memory layout of those data structures once ahead of time, and
352  // make each individual instance just use that. Note that this is
353  // not thread-safe.
354  class Layout {
355  // A memory pool of free BoundContent objects with this layout
356  mutable std::vector<BoundContents *> pool;
357 
358  // All the blocks of memory allocated
359  mutable std::vector<void *> blocks;
360 
361  mutable size_t num_live = 0;
362 
363  void allocate_some_more() const;
364 
365  public:
366  // number of Span to allocate
367  int total_size;
368 
369  // region_computed comes next at the following index
370  int computed_offset;
371 
372  // the loop for each stage starts at the following index
373  std::vector<int> loop_offset;
374 
375  Layout() = default;
377 
378  Layout(const Layout &) = delete;
379  void operator=(const Layout &) = delete;
380  Layout(Layout &&) = delete;
381  void operator=(Layout &&) = delete;
382 
383  // Make a BoundContents object with this layout
384  BoundContents *make() const;
385 
386  // Release a BoundContents object with this layout back to the pool
387  void release(const BoundContents *b) const;
388  };
389 };
390 
392 
393 // A representation of the function DAG. The nodes and edges are both
394 // in reverse realization order, so if you want to walk backwards up
395 // the DAG, just iterate the nodes or edges in-order.
396 struct FunctionDAG {
397 
398  // An edge is a producer-consumer relationship
399  struct Edge;
400 
401  struct SymbolicInterval {
404  };
405 
406  // A Node represents a single Func
407  struct Node {
408  // A pointer back to the owning DAG
409  FunctionDAG *dag;
410 
411  // The Halide Func this represents
412  Function func;
413 
414  // The number of bytes per point stored.
415  double bytes_per_point;
416 
417  // The min/max variables used to denote a symbolic region of
418  // this Func. Used in the cost above, and in the Edges below.
419  vector<SymbolicInterval> region_required;
420 
421  // A concrete region required from a bounds estimate. Only
422  // defined for outputs.
423  vector<Span> estimated_region_required;
424 
425  // The region computed of a Func, in terms of the region
426  // required. For simple Funcs this is identical to the
427  // region_required. However, in some Funcs computing one
428  // output requires computing other outputs too. You can't
429  // really ask for a single output pixel from something blurred
430  // with an IIR without computing the others, for example.
431  struct RegionComputedInfo {
432  // The min and max in their full symbolic glory. We use
433  // these in the general case.
434  Interval in;
435 
436  // Analysis used to accelerate common cases
438  int64_t c_min = 0, c_max = 0;
439  };
440  vector<RegionComputedInfo> region_computed;
442 
443  // Expand a region required into a region computed, using the
444  // symbolic intervals above.
445  void required_to_computed(const Span *required, Span *computed) const;
446 
447  // Metadata about one symbolic loop in a Func's default loop nest.
448  struct Loop {
449  string var;
450  bool pure, rvar;
451  Expr min, max;
452 
453  // Which pure dimension does this loop correspond to? Invalid if it's an rvar
454  int pure_dim;
455 
456  // Precomputed metadata to accelerate common cases:
457 
458  // If true, the loop bounds are just the region computed in the given dimension
459  bool equals_region_computed = false;
460  int region_computed_dim = 0;
461 
462  // If true, the loop bounds are a constant with the given min and max
463  bool bounds_are_constant = false;
464  int64_t c_min = 0, c_max = 0;
465 
466  // A persistent fragment of source for getting this Var
467  // from its owner Func. Used for printing source code
468  // equivalent to a computed schedule.
469  string accessor;
470  };
471 
472  // Get the loop nest shape as a function of the region computed
473  void loop_nest_for_region(int stage_idx, const Span *computed, Span *loop) const;
474 
475  // One stage of a Func
476  struct Stage {
477  // The owning Node
478  Node *node;
479 
480  // Which stage of the Func is this. 0 = pure.
481  int index;
482 
483  // The loop nest that computes this stage, from innermost out.
484  vector<Loop> loop;
485  bool loop_nest_all_common_cases = false;
486 
487  // The vectorization width that will be used for
488  // compute. Corresponds to the natural width for the
489  // narrowest type used.
490  int vector_size;
491 
492  // The featurization of the compute done
494 
495  // The actual Halide front-end stage object
497 
498  // The name for scheduling (e.g. "foo.update(3)")
499  string name;
500 
502 
503  // Ids for perfect hashing on stages.
504  int id, max_id;
505 
506  std::unique_ptr<LoadJacobian> store_jacobian;
507 
508  vector<Edge *> incoming_edges;
509 
510  vector<bool> dependencies;
511  bool downstream_of(const Node &n) const {
512  return dependencies[n.id];
513  };
514 
515  explicit Stage(Halide::Stage s)
516  : stage(std::move(s)) {
517  }
518 
519  int get_loop_index_from_var(const std::string &var) const {
520  int i = 0;
521  for (const auto &l : loop) {
522  if (l.var == var) {
523  return i;
524  }
525 
526  ++i;
527  }
528 
529  return -1;
530  }
531  };
532  vector<Stage> stages;
533 
534  vector<Edge *> outgoing_edges;
535 
536  // Max vector size across the stages
537  int vector_size;
538 
539  // A unique ID for this node, allocated consecutively starting
540  // at zero for each pipeline.
541  int id, max_id;
542 
543  // Just func->dimensions(), but we ask for it so many times
544  // that's it's worth avoiding the function call into
545  // libHalide.
546  int dimensions;
547 
548  // Is a single pointwise call to another Func
549  bool is_wrapper;
550 
551  // We represent the input buffers as node, though we do not attempt to schedule them.
552  bool is_input;
553 
554  // Is one of the pipeline outputs
555  bool is_output;
556 
557  // Only uses pointwise calls
558  bool is_pointwise;
559 
560  // Only uses pointwise calls + clamping on all indices
562 
563  std::unique_ptr<BoundContents::Layout> bounds_memory_layout;
564 
566  return bounds_memory_layout->make();
567  }
568  };
569 
570  // A representation of a producer-consumer relationship
571  struct Edge {
572  struct BoundInfo {
573  // The symbolic expression for the bound in this dimension
574  Expr expr;
575 
576  // Fields below are the results of additional analysis
577  // used to evaluate this bound more quickly.
580  bool affine, uses_max;
581 
582  BoundInfo(const Expr &e, const Node::Stage &consumer);
583  };
584 
585  // Memory footprint on producer required by consumer.
586  vector<pair<BoundInfo, BoundInfo>> bounds;
587 
590 
591  // The number of calls the consumer makes to the producer, per
592  // point in the loop nest of the consumer.
593  int calls;
594 
595  bool all_bounds_affine;
596 
597  vector<LoadJacobian> load_jacobians;
598 
600 
602 
603  // Given a loop nest of the consumer stage, expand a region
604  // required of the producer to be large enough to include all
605  // points required.
606  void expand_footprint(const Span *consumer_loop, Span *producer_required) const;
607  };
608 
609  vector<Node> nodes;
610  vector<Edge> edges;
611 
613 
614  // We're going to be querying this DAG a lot while searching for
615  // an optimal schedule, so we'll also create a variety of
616  // auxiliary data structures.
617  map<int, const Node *> stage_id_to_node_map;
618 
619  // Create the function DAG, and do all the dependency and cost
620  // analysis. This is done once up-front before the tree search.
621  FunctionDAG(const vector<Function> &outputs, const Target &target);
622 
623  void dump() const;
624  std::ostream &dump(std::ostream &os) const;
625 
626  // This class uses a lot of internal pointers, so we'll hide the copy constructor.
627  FunctionDAG(const FunctionDAG &other) = delete;
628  void operator=(const FunctionDAG &other) = delete;
629 
630 private:
631  // Compute the featurization for the entire DAG
632  void featurize();
633 
634  template<typename OS>
635  void dump_internal(OS &os) const;
636 };
637 
638 template<typename T>
640 
641 class ExprBranching : public VariadicVisitor<ExprBranching, int, int> {
643 
644 private:
645  const NodeMap<int64_t> &inlined;
646 
647 public:
648  int visit(const Reinterpret *op);
649  int visit(const IntImm *op);
650  int visit(const UIntImm *op);
651  int visit(const FloatImm *op);
652  int visit(const StringImm *op);
653  int visit(const Broadcast *op);
654  int visit(const Cast *op);
655  int visit(const Variable *op);
656  int visit(const Add *op);
657  int visit(const Sub *op);
658  int visit(const Mul *op);
659  int visit(const Div *op);
660  int visit(const Mod *op);
661  int visit(const Min *op);
662  int visit(const Max *op);
663  int visit(const EQ *op);
664  int visit(const NE *op);
665  int visit(const LT *op);
666  int visit(const LE *op);
667  int visit(const GT *op);
668  int visit(const GE *op);
669  int visit(const And *op);
670  int visit(const Or *op);
671  int visit(const Not *op);
672  int visit(const Select *op);
673  int visit(const Ramp *op);
674  int visit(const Load *op);
675  int visit(const Call *op);
676  int visit(const Shuffle *op);
677  int visit(const Let *op);
678  int visit(const VectorReduce *op);
679  int visit_binary(const Expr &a, const Expr &b);
680  int visit_nary(const std::vector<Expr> &exprs);
681 
682  explicit ExprBranching(const NodeMap<int64_t> &inlined)
683  : inlined{inlined} {
684  }
685 
686  int compute(const Function &f);
687 };
688 
689 void sanitize_names(std::string &str);
690 
691 } // namespace Autoscheduler
692 } // namespace Internal
693 } // namespace Halide
694 
695 #endif // FUNCTION_DAG_H
void release(const BoundContents *b) const
int visit_binary(const Expr &a, const Expr &b)
ExprBranching(const NodeMap< int64_t > &inlined)
Definition: FunctionDAG.h:682
int visit_nary(const std::vector< Expr > &exprs)
OptionalRational operator()(int producer_storage_dim, int consumer_loop_dim) const
Definition: FunctionDAG.h:178
LoadJacobian operator*(const LoadJacobian &other) const
Definition: FunctionDAG.h:225
LoadJacobian(size_t producer_storage_dims, size_t consumer_loop_dims, int64_t count)
Definition: FunctionDAG.h:140
OptionalRational & operator()(int producer_storage_dim, int consumer_loop_dim)
Definition: FunctionDAG.h:186
bool merge(const LoadJacobian &other)
Definition: FunctionDAG.h:199
void dump(const char *prefix) const
LoadJacobian operator*(const std::vector< int64_t > &factors) const
Definition: FunctionDAG.h:213
Span(int64_t a, int64_t b, bool c)
Definition: FunctionDAG.h:288
Span(const Span &other)=default
void union_with(const Span &other)
Definition: FunctionDAG.h:273
A reference-counted handle to Halide's internal representation of a function.
Definition: Function.h:39
A class representing a reference count to be used with IntrusivePtr.
Definition: IntrusivePtr.h:19
A visitor/mutator capable of passing arbitrary arguments to the visit methods using CRTP and returnin...
Definition: IRVisitor.h:161
A single definition of a Func.
Definition: Func.h:69
A Halide variable, to be used when defining functions.
Definition: Var.h:19
void sanitize_names(std::string &str)
int64_t gcd(int64_t, int64_t)
The greatest common divisor of two integers.
ConstantInterval min(const ConstantInterval &a, const ConstantInterval &b)
ConstantInterval max(const ConstantInterval &a, const ConstantInterval &b)
int64_t lcm(int64_t, int64_t)
The least common multiple of two integers.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
signed __INT64_TYPE__ int64_t
signed __INT32_TYPE__ int32_t
void * memcpy(void *s1, const void *s2, size_t n)
A fragment of Halide syntax.
Definition: Expr.h:258
The sum of two expressions.
Definition: IR.h:56
Logical and - are both expressions true.
Definition: IR.h:175
const Span & loops(int i, int j) const
Definition: FunctionDAG.h:336
BoundInfo(const Expr &e, const Node::Stage &consumer)
void expand_footprint(const Span *consumer_loop, Span *producer_required) const
vector< pair< BoundInfo, BoundInfo > > bounds
Definition: FunctionDAG.h:542
int get_loop_index_from_var(const std::string &var) const
Definition: FunctionDAG.h:519
void loop_nest_for_region(int stage_idx, const Span *computed, Span *loop) const
vector< RegionComputedInfo > region_computed
Definition: FunctionDAG.h:413
std::unique_ptr< BoundContents::Layout > bounds_memory_layout
Definition: FunctionDAG.h:519
void required_to_computed(const Span *required, Span *computed) const
void operator=(const FunctionDAG &other)=delete
std::ostream & dump(std::ostream &os) const
FunctionDAG(const FunctionDAG &other)=delete
map< int, const Node * > stage_id_to_node_map
Definition: FunctionDAG.h:617
FunctionDAG(const vector< Function > &outputs, const Target &target)
void operator+=(const OptionalRational &other)
Definition: FunctionDAG.h:46
bool operator==(const OptionalRational &other) const
Definition: FunctionDAG.h:127
OptionalRational operator*(int64_t factor) const
Definition: FunctionDAG.h:65
OptionalRational operator*(const OptionalRational &other) const
Definition: FunctionDAG.h:73
A vector with 'lanes' elements, in which every element is 'value'.
Definition: IR.h:259
A function call.
Definition: IR.h:490
The actual IR nodes begin here.
Definition: IR.h:30
The ratio of two expressions.
Definition: IR.h:83
Is the first expression equal to the second.
Definition: IR.h:121
Floating point constants.
Definition: Expr.h:236
Is the first expression greater than or equal to the second.
Definition: IR.h:166
Is the first expression greater than the second.
Definition: IR.h:157
Integer constants.
Definition: Expr.h:218
A class to represent ranges of Exprs.
Definition: Interval.h:14
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself.
Definition: IntrusivePtr.h:71
Is the first expression less than or equal to the second.
Definition: IR.h:148
Is the first expression less than the second.
Definition: IR.h:139
A let expression, like you might find in a functional language.
Definition: IR.h:271
Load a value from a named symbol if predicate is true.
Definition: IR.h:217
The greater of two values.
Definition: IR.h:112
The lesser of two values.
Definition: IR.h:103
The remainder of a / b.
Definition: IR.h:94
The product of two expressions.
Definition: IR.h:74
Is the first expression not equal to the second.
Definition: IR.h:130
Logical not - true if the expression false.
Definition: IR.h:193
Logical or - is at least one of the expression true.
Definition: IR.h:184
A linear ramp vector node.
Definition: IR.h:247
Reinterpret value as another type, without affecting any of the bits (on little-endian systems).
Definition: IR.h:47
A ternary operator.
Definition: IR.h:204
Construct a new vector by taking elements from another sequence of vectors.
Definition: IR.h:848
String constants.
Definition: Expr.h:245
The difference of two expressions.
Definition: IR.h:65
Unsigned integer constants.
Definition: Expr.h:227
A named variable.
Definition: IR.h:765
Horizontally reduce a vector to a scalar or narrower vector using the given commutative and associati...
Definition: IR.h:972
A struct representing a target machine and os to generate code for.
Definition: Target.h:19