Halide  17.0.2
Halide compiler and libraries
FunctionDAG.h
Go to the documentation of this file.
1 /** This file defines the class FunctionDAG, which is our
2  * representation of a Halide pipeline, and contains methods to using
3  * Halide's bounds tools to query properties of it. */
4 
5 #ifndef FUNCTION_DAG_H
6 #define FUNCTION_DAG_H
7 
8 #include <algorithm>
9 #include <cstdint>
10 #include <map>
11 #include <string>
12 #include <utility>
13 
14 #include <vector>
15 
16 #include "Errors.h"
17 #include "Featurization.h"
18 #include "Halide.h"
19 
20 namespace Halide {
21 namespace Internal {
22 namespace Autoscheduler {
23 
24 using std::map;
25 using std::pair;
26 using std::string;
27 using std::unique_ptr;
28 using std::vector;
29 
30 struct Adams2019Params;
31 
32 // First we have various utility classes.
33 
34 // An optional rational type used when analyzing memory dependencies.
36  bool exists = false;
38 
39  OptionalRational() = default;
41  : exists(e), numerator(n), denominator(d) {
42  }
43 
44  void operator+=(const OptionalRational &other) {
45  if (!exists || !other.exists) {
46  exists = false;
47  return;
48  }
49  if (denominator == other.denominator) {
50  numerator += other.numerator;
51  return;
52  }
53 
54  int64_t l = lcm(denominator, other.denominator);
55  numerator *= l / denominator;
56  denominator = l;
57  numerator += other.numerator * (l / other.denominator);
59  numerator /= g;
60  denominator /= g;
61  }
62 
64  if ((*this) == 0) {
65  return *this;
66  }
67  if (other == 0) {
68  return other;
69  }
70  int64_t num = numerator * other.numerator;
71  int64_t den = denominator * other.denominator;
72  bool e = exists && other.exists;
73  return OptionalRational{e, num, den};
74  }
75 
76  // Because this type is optional (exists may be false), we don't
77  // have a total ordering. These methods all return false when the
78  // operators are not comparable, so a < b is not the same as !(a
79  // >= b).
80  bool operator<(int x) const {
81  if (!exists) {
82  return false;
83  }
84  if (denominator > 0) {
85  return numerator < x * denominator;
86  } else {
87  return numerator > x * denominator;
88  }
89  }
90 
91  bool operator<=(int x) const {
92  if (!exists) {
93  return false;
94  }
95  if (denominator > 0) {
96  return numerator <= x * denominator;
97  } else {
98  return numerator >= x * denominator;
99  }
100  }
101 
102  bool operator>(int x) const {
103  if (!exists) {
104  return false;
105  }
106  return !((*this) <= x);
107  }
108 
109  bool operator>=(int x) const {
110  if (!exists) {
111  return false;
112  }
113  return !((*this) < x);
114  }
115 
116  bool operator==(int x) const {
117  return exists && (numerator == (x * denominator));
118  }
119 
120  bool operator==(const OptionalRational &other) const {
121  return (exists == other.exists) && (numerator * other.denominator == denominator * other.numerator);
122  }
123 };
124 
125 // A LoadJacobian records the derivative of the coordinate accessed in
126 // some producer w.r.t the loops of the consumer.
128  vector<vector<OptionalRational>> coeffs;
129  int64_t c;
130 
131 public:
132  explicit LoadJacobian(vector<vector<OptionalRational>> &&matrix, int64_t c = 1)
133  : coeffs(matrix), c(c) {
134  }
135 
136  size_t producer_storage_dims() const {
137  return coeffs.size();
138  }
139 
140  size_t consumer_loop_dims() const {
141  if (coeffs.empty() || coeffs[0].empty()) {
142  // The producer is scalar, and we don't know how
143  // many consumer loops there are.
144  return 0;
145  }
146  return coeffs[0].size();
147  }
148 
149  OptionalRational operator()(int producer_storage_dim, int consumer_loop_dim) const {
150  if (coeffs.empty()) {
151  // The producer is scalar, so all strides are zero.
152  return {true, 0, 1};
153  }
154  internal_assert(producer_storage_dim < (int)coeffs.size());
155  const auto &p = coeffs[producer_storage_dim];
156  if (p.empty()) {
157  // The consumer is scalar, so all strides are zero.
158  return {true, 0, 1};
159  }
160  internal_assert(consumer_loop_dim < (int)p.size());
161  return p[consumer_loop_dim];
162  }
163 
164  // To avoid redundantly re-recording copies of the same
165  // load Jacobian, we keep a count of how many times a
166  // load with this Jacobian occurs.
167  int64_t count() const {
168  return c;
169  }
170 
171  // Try to merge another LoadJacobian into this one, increasing the
172  // count if the coefficients match.
173  bool merge(const LoadJacobian &other) {
174  if (other.coeffs.size() != coeffs.size()) {
175  return false;
176  }
177  for (size_t i = 0; i < coeffs.size(); i++) {
178  if (other.coeffs[i].size() != coeffs[i].size()) {
179  return false;
180  }
181  for (size_t j = 0; j < coeffs[i].size(); j++) {
182  if (!(other.coeffs[i][j] == coeffs[i][j])) {
183  return false;
184  }
185  }
186  }
187  c += other.count();
188  return true;
189  }
190 
191  // Multiply Jacobians, used to look at memory dependencies through
192  // inlined functions.
193  LoadJacobian operator*(const LoadJacobian &other) const {
194  vector<vector<OptionalRational>> matrix;
196  matrix.resize(producer_storage_dims());
197  for (size_t i = 0; i < producer_storage_dims(); i++) {
198  matrix[i].resize(other.consumer_loop_dims());
199  for (size_t j = 0; j < other.consumer_loop_dims(); j++) {
200  matrix[i][j] = OptionalRational{true, 0, 1};
201  for (size_t k = 0; k < consumer_loop_dims(); k++) {
202  matrix[i][j] += (*this)(i, k) * other(k, j);
203  }
204  }
205  }
206  LoadJacobian result(std::move(matrix), count() * other.count());
207  return result;
208  }
209 
210  void dump(std::ostream &os, const char *prefix) const;
211 };
212 
213 // Classes to represent a concrete set of bounds for a Func. A Span is
214 // single-dimensional, and a Bound is a multi-dimensional box. For
215 // each dimension we track the estimated size, and also whether or not
216 // the size is known to be constant at compile-time. For each Func we
217 // track three different types of bounds:
218 
219 // 1) The region required by consumers of the Func, which determines
220 // 2) The region actually computed, which in turn determines
221 // 3) The min and max of all loops in the loop next.
222 
223 // 3 in turn determines the region required of the inputs to a Func,
224 // which determines their region computed, and hence their loop nest,
225 // and so on back up the Function DAG from outputs back to inputs.
226 
227 class Span {
228  int64_t min_, max_;
229  bool constant_extent_;
230 
231 public:
232  int64_t min() const {
233  return min_;
234  }
235  int64_t max() const {
236  return max_;
237  }
238  int64_t extent() const {
239  return max_ - min_ + 1;
240  }
241  bool constant_extent() const {
242  return constant_extent_;
243  }
244 
245  void union_with(const Span &other) {
246  min_ = std::min(min_, other.min());
247  max_ = std::max(max_, other.max());
248  constant_extent_ = constant_extent_ && other.constant_extent();
249  }
250 
251  void set_extent(int64_t e) {
252  max_ = min_ + e - 1;
253  }
254 
255  void translate(int64_t x) {
256  min_ += x;
257  max_ += x;
258  }
259 
260  Span(int64_t a, int64_t b, bool c)
261  : min_(a), max_(b), constant_extent_(c) {
262  }
263  Span() = default;
264  Span(const Span &other) = default;
265  static Span empty_span() {
266  return Span(INT64_MAX, INT64_MIN, true);
267  }
268 };
269 
270 // Bounds objects are created and destroyed very frequently while
271 // exploring scheduling options, so we have a custom allocator and
272 // memory pool. Much like IR nodes, we treat them as immutable once
273 // created and wrapped in a Bound object so that they can be shared
274 // safely across scheduling alternatives.
277 
278  class Layout;
279  const Layout *layout = nullptr;
280 
281  Span *data() const {
282  // This struct is a header
283  return (Span *)(const_cast<BoundContents *>(this) + 1);
284  }
285 
287  return data()[i];
288  }
289 
291  return data()[i + layout->computed_offset];
292  }
293 
294  Span &loops(int i, int j) {
295  return data()[j + layout->loop_offset[i]];
296  }
297 
298  const Span &region_required(int i) const {
299  return data()[i];
300  }
301 
302  const Span &region_computed(int i) const {
303  return data()[i + layout->computed_offset];
304  }
305 
306  const Span &loops(int i, int j) const {
307  return data()[j + layout->loop_offset[i]];
308  }
309 
311  auto *b = layout->make();
312  size_t bytes = sizeof(data()[0]) * layout->total_size;
313  memcpy(b->data(), data(), bytes);
314  return b;
315  }
316 
317  void validate() const;
318 
319  // We're frequently going to need to make these concrete bounds
320  // arrays. It makes things more efficient if we figure out the
321  // memory layout of those data structures once ahead of time, and
322  // make each individual instance just use that. Note that this is
323  // not thread-safe.
324  class Layout {
325  // A memory pool of free BoundContent objects with this layout
326  mutable std::vector<BoundContents *> pool;
327 
328  // All the blocks of memory allocated
329  mutable std::vector<void *> blocks;
330 
331  mutable size_t num_live = 0;
332 
333  void allocate_some_more() const;
334 
335  public:
336  // number of Span to allocate
338 
339  // region_required has size func->dimensions() and comes first in the memory layout
340 
341  // region_computed comes next at the following index
343 
344  // the loop for each stage starts at the following index
345  std::vector<int> loop_offset;
346 
347  Layout() = default;
348  ~Layout();
349 
350  Layout(const Layout &) = delete;
351  void operator=(const Layout &) = delete;
352  Layout(Layout &&) = delete;
353  void operator=(Layout &&) = delete;
354 
355  // Make a BoundContents object with this layout
356  BoundContents *make() const;
357 
358  // Release a BoundContents object with this layout back to the pool
359  void release(const BoundContents *b) const;
360  };
361 };
362 
364 
365 // A representation of the function DAG. The nodes and edges are both
366 // in reverse realization order, so if you want to walk backwards up
367 // the DAG, just iterate the nodes or edges in-order.
368 struct FunctionDAG {
369 
370  // An edge is a producer-consumer relationship
371  struct Edge;
372 
376  };
377 
378  // A Node represents a single Func
379  struct Node {
380  // A pointer back to the owning DAG
382 
383  // The Halide Func this represents
385 
386  // The number of bytes per point stored.
388 
389  // The min/max variables used to denote a symbolic region of
390  // this Func. Used in the cost above, and in the Edges below.
391  vector<SymbolicInterval> region_required;
392 
393  // A concrete region required from a bounds estimate. Only
394  // defined for outputs.
396 
397  // The region computed of a Func, in terms of the region
398  // required. For simple Funcs this is identical to the
399  // region_required. However, in some Funcs computing one
400  // output requires computing other outputs too. You can't
401  // really ask for a single output pixel from something blurred
402  // with an IIR without computing the others, for example.
404  // The min and max in their full symbolic glory. We use
405  // these in the general case.
407  bool depends_on_estimate = false;
408 
409  // Analysis used to accelerate common cases
411  int64_t c_min = 0, c_max = 0;
412  };
413  vector<RegionComputedInfo> region_computed;
415 
416  // Expand a region required into a region computed, using the
417  // symbolic intervals above.
418  void required_to_computed(const Span *required, Span *computed) const;
419 
420  // Metadata about one symbolic loop in a Func's default loop nest.
421  struct Loop {
422  string var;
423  bool pure, rvar;
425 
426  // Which pure dimension does this loop correspond to? Invalid if it's an rvar
427  int pure_dim;
428 
429  // Precomputed metadata to accelerate common cases:
430 
431  // If true, the loop bounds are just the region computed in the given dimension
434 
435  // If true, the loop bounds are a constant with the given min and max
436  bool bounds_are_constant = false;
437  int64_t c_min = 0, c_max = 0;
438 
439  // A persistent fragment of source for getting this Var
440  // from its owner Func. Used for printing source code
441  // equivalent to a computed schedule.
442  string accessor;
443  };
444 
445  // Get the loop nest shape as a function of the region computed
446  void loop_nest_for_region(int stage_idx, const Span *computed, Span *loop) const;
447 
448  // One stage of a Func
449  struct Stage {
450  // The owning Node
452 
453  // Which stage of the Func is this. 0 = pure.
454  int index;
455 
456  // The loop nest that computes this stage, from innermost out.
457  vector<Loop> loop;
459 
460  // The vectorization width that will be used for
461  // compute. Corresponds to the natural width for the
462  // narrowest type used.
464 
465  // The featurization of the compute done
467 
468  // The actual Halide front-end stage object
470 
471  // The name for scheduling (e.g. "foo.update(3)")
472  string name;
473 
474  // Ids for perfect hashing on stages.
475  int id, max_id;
476 
477  vector<Edge *> incoming_edges;
478 
479  vector<bool> dependencies;
480  bool downstream_of(const Node &n) const {
481  return dependencies[n.id];
482  };
483 
484  explicit Stage(Halide::Stage s)
485  : stage(std::move(s)) {
486  }
487  };
488  vector<Stage> stages;
489 
490  vector<Edge *> outgoing_edges;
491 
492  // Max vector size across the stages
494 
495  // A unique ID for this node, allocated consecutively starting
496  // at zero for each pipeline.
497  int id, max_id;
498 
499  // Just func->dimensions(), but we ask for it so many times
500  // that's it's worth avoiding the function call into
501  // libHalide.
503 
504  // Is a single pointwise call to another Func
506 
507  // We represent the input buffers as node, though we do not attempt to schedule them.
508  bool is_input;
509 
510  // Is one of the pipeline outputs
511  bool is_output;
512 
513  // Only uses pointwise calls
515 
516  // Only uses pointwise calls + clamping on all indices
518 
519  std::unique_ptr<BoundContents::Layout> bounds_memory_layout;
520 
522  return bounds_memory_layout->make();
523  }
524  };
525 
526  // A representation of a producer-consumer relationship
527  struct Edge {
528  struct BoundInfo {
529  // The symbolic expression for the bound in this dimension
531 
532  // Fields below are the results of additional analysis
533  // used to evaluate this bound more quickly.
537 
538  BoundInfo(const Expr &e, const Node::Stage &consumer, bool dependent);
539  };
540 
541  // Memory footprint on producer required by consumer.
542  vector<pair<BoundInfo, BoundInfo>> bounds;
543 
546 
547  // The number of calls the consumer makes to the producer, per
548  // point in the loop nest of the consumer.
549  int calls;
550 
552 
553  vector<LoadJacobian> load_jacobians;
554 
556 
557  // Given a loop nest of the consumer stage, expand a region
558  // required of the producer to be large enough to include all
559  // points required.
560  void expand_footprint(const Span *consumer_loop, Span *producer_required) const;
561  };
562 
563  vector<Node> nodes;
564  vector<Edge> edges;
565 
566  // Create the function DAG, and do all the dependency and cost
567  // analysis. This is done once up-front before the tree search.
568  FunctionDAG(const vector<Function> &outputs, const Target &target);
569 
570  void dump(std::ostream &os) const;
571 
572 private:
573  // Compute the featurization for the entire DAG
574  void featurize();
575 
576 public:
577  // This class uses a lot of internal pointers, so we'll make it uncopyable/unmovable.
578  FunctionDAG(const FunctionDAG &other) = delete;
579  FunctionDAG &operator=(const FunctionDAG &other) = delete;
580  FunctionDAG(FunctionDAG &&other) = delete;
581  FunctionDAG &operator=(FunctionDAG &&other) = delete;
582 };
583 
584 } // namespace Autoscheduler
585 } // namespace Internal
586 } // namespace Halide
587 
588 #endif // FUNCTION_DAG_H
OptionalRational operator()(int producer_storage_dim, int consumer_loop_dim) const
Definition: FunctionDAG.h:149
Expr max(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:606
int64_t lcm(int64_t, int64_t)
The least common multiple of two integers.
A fragment of Halide syntax.
Definition: Expr.h:258
LoadJacobian(vector< vector< OptionalRational >> &&matrix, int64_t c=1)
Definition: FunctionDAG.h:132
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:603
A class to represent ranges of Exprs.
Definition: Interval.h:14
A struct representing a target machine and os to generate code for.
Definition: Target.h:19
#define internal_assert(c)
Definition: Errors.h:19
void * memcpy(void *s1, const void *s2, size_t n)
A Halide variable, to be used when defining functions.
Definition: Var.h:19
void loop_nest_for_region(int stage_idx, const Span *computed, Span *loop) const
STL namespace.
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself...
Definition: IntrusivePtr.h:68
This file defines the class FunctionDAG, which is our representation of a Halide pipeline, and contains methods to using Halide&#39;s bounds tools to query properties of it.
OptionalRational operator*(const OptionalRational &other) const
Definition: FunctionDAG.h:63
void expand_footprint(const Span *consumer_loop, Span *producer_required) const
void release(const BoundContents *b) const
OptionalRational(bool e, int64_t n, int64_t d)
Definition: FunctionDAG.h:40
LoadJacobian operator*(const LoadJacobian &other) const
Definition: FunctionDAG.h:193
vector< pair< BoundInfo, BoundInfo > > bounds
Definition: FunctionDAG.h:542
Not visible externally, similar to &#39;static&#39; linkage in C.
signed __INT64_TYPE__ int64_t
const Span & loops(int i, int j) const
Definition: FunctionDAG.h:306
bool operator==(const OptionalRational &other) const
Definition: FunctionDAG.h:120
A reference-counted handle to Halide&#39;s internal representation of a function.
Definition: Function.h:38
std::unique_ptr< BoundContents::Layout > bounds_memory_layout
Definition: FunctionDAG.h:519
void operator+=(const OptionalRational &other)
Definition: FunctionDAG.h:44
FunctionDAG & operator=(const FunctionDAG &other)=delete
int64_t gcd(int64_t, int64_t)
The greatest common divisor of two integers.
A single definition of a Func.
Definition: Func.h:69
Span(int64_t a, int64_t b, bool c)
Definition: FunctionDAG.h:260
A class representing a reference count to be used with IntrusivePtr.
Definition: IntrusivePtr.h:19
FunctionDAG(const vector< Function > &outputs, const Target &target)
void dump(std::ostream &os, const char *prefix) const
bool merge(const LoadJacobian &other)
Definition: FunctionDAG.h:173
BoundInfo(const Expr &e, const Node::Stage &consumer, bool dependent)
void union_with(const Span &other)
Definition: FunctionDAG.h:245
void required_to_computed(const Span *required, Span *computed) const
vector< RegionComputedInfo > region_computed
Definition: FunctionDAG.h:413