Halide  17.0.2
Halide compiler and libraries
IROperator.h
Go to the documentation of this file.
1 #ifndef HALIDE_IR_OPERATOR_H
2 #define HALIDE_IR_OPERATOR_H
3 
4 /** \file
5  *
6  * Defines various operator overloads and utility functions that make
7  * it more pleasant to work with Halide expressions.
8  */
9 
10 #include <cmath>
11 
12 #include "Expr.h"
13 #include "Tuple.h"
14 
15 namespace Halide {
16 
17 namespace Internal {
18 /** Is the expression either an IntImm, a FloatImm, a StringImm, or a
19  * Cast of the same, or a Ramp or Broadcast of the same. Doesn't do
20  * any constant folding. */
21 bool is_const(const Expr &e);
22 
23 /** Is the expression an IntImm, FloatImm of a particular value, or a
24  * Cast, or Broadcast of the same. */
25 bool is_const(const Expr &e, int64_t v);
26 
27 /** If an expression is an IntImm or a Broadcast of an IntImm, return
28  * a pointer to its value. Otherwise returns nullptr. */
29 const int64_t *as_const_int(const Expr &e);
30 
31 /** If an expression is a UIntImm or a Broadcast of a UIntImm, return
32  * a pointer to its value. Otherwise returns nullptr. */
33 const uint64_t *as_const_uint(const Expr &e);
34 
35 /** If an expression is a FloatImm or a Broadcast of a FloatImm,
36  * return a pointer to its value. Otherwise returns nullptr. */
37 const double *as_const_float(const Expr &e);
38 
39 /** Is the expression a constant integer power of two. Also returns
40  * log base two of the expression if it is. Only returns true for
41  * integer types. */
42 bool is_const_power_of_two_integer(const Expr &e, int *bits);
43 
44 /** Is the expression a const (as defined by is_const), and also
45  * strictly greater than zero (in all lanes, if a vector expression) */
46 bool is_positive_const(const Expr &e);
47 
48 /** Is the expression a const (as defined by is_const), and also
49  * strictly less than zero (in all lanes, if a vector expression) */
50 bool is_negative_const(const Expr &e);
51 
52 /** Is the expression an undef */
53 bool is_undef(const Expr &e);
54 
55 /** Is the expression a const (as defined by is_const), and also equal
56  * to zero (in all lanes, if a vector expression) */
57 bool is_const_zero(const Expr &e);
58 
59 /** Is the expression a const (as defined by is_const), and also equal
60  * to one (in all lanes, if a vector expression) */
61 bool is_const_one(const Expr &e);
62 
63 /** Is the statement a no-op (which we represent as either an
64  * undefined Stmt, or as an Evaluate node of a constant) */
65 bool is_no_op(const Stmt &s);
66 
67 /** Does the expression
68  * 1) Take on the same value no matter where it appears in a Stmt, and
69  * 2) Evaluating it has no side-effects
70  */
71 bool is_pure(const Expr &e);
72 
73 /** Construct an immediate of the given type from any numeric C++ type. */
74 // @{
77 Expr make_const(Type t, double val);
78 inline Expr make_const(Type t, int32_t val) {
79  return make_const(t, (int64_t)val);
80 }
81 inline Expr make_const(Type t, uint32_t val) {
82  return make_const(t, (uint64_t)val);
83 }
84 inline Expr make_const(Type t, int16_t val) {
85  return make_const(t, (int64_t)val);
86 }
87 inline Expr make_const(Type t, uint16_t val) {
88  return make_const(t, (uint64_t)val);
89 }
90 inline Expr make_const(Type t, int8_t val) {
91  return make_const(t, (int64_t)val);
92 }
93 inline Expr make_const(Type t, uint8_t val) {
94  return make_const(t, (uint64_t)val);
95 }
96 inline Expr make_const(Type t, bool val) {
97  return make_const(t, (uint64_t)val);
98 }
99 inline Expr make_const(Type t, float val) {
100  return make_const(t, (double)val);
101 }
102 inline Expr make_const(Type t, float16_t val) {
103  return make_const(t, (double)val);
104 }
105 // @}
106 
107 /** Construct a unique signed_integer_overflow Expr */
109 
110 /** Check if an expression is a signed_integer_overflow */
112 
113 /** Check if a constant value can be correctly represented as the given type. */
115 
116 /** Construct a boolean constant from a C++ boolean value.
117  * May also be a vector if width is given.
118  * It is not possible to coerce a C++ boolean to Expr because
119  * if we provide such a path then char objects can ambiguously
120  * be converted to Halide Expr or to std::string. The problem
121  * is that C++ does not have a real bool type - it is in fact
122  * close enough to char that C++ does not know how to distinguish them.
123  * make_bool is the explicit coercion. */
124 Expr make_bool(bool val, int lanes = 1);
125 
126 /** Construct the representation of zero in the given type */
128 
129 /** Construct the representation of one in the given type */
131 
132 /** Construct the representation of two in the given type */
134 
135 /** Construct the constant boolean true. May also be a vector of
136  * trues, if a lanes argument is given. */
137 Expr const_true(int lanes = 1);
138 
139 /** Construct the constant boolean false. May also be a vector of
140  * falses, if a lanes argument is given. */
141 Expr const_false(int lanes = 1);
142 
143 /** Attempt to cast an expression to a smaller type while provably not
144  * losing information. If it can't be done, return an undefined
145  * Expr. */
147 
148 /** Attempt to negate x without introducing new IR and without overflow.
149  * If it can't be done, return an undefined Expr. */
151 
152 /** Coerce the two expressions to have the same type, using C-style
153  * casting rules. For the purposes of casting, a boolean type is
154  * UInt(1). We use the following procedure:
155  *
156  * If the types already match, do nothing.
157  *
158  * Then, if one type is a vector and the other is a scalar, the scalar
159  * is broadcast to match the vector width, and we continue.
160  *
161  * Then, if one type is floating-point and the other is not, the
162  * non-float is cast to the floating-point type, and we're done.
163  *
164  * Then, if both types are unsigned ints, the one with fewer bits is
165  * cast to match the one with more bits and we're done.
166  *
167  * Then, if both types are signed ints, the one with fewer bits is
168  * cast to match the one with more bits and we're done.
169  *
170  * Finally, if one type is an unsigned int and the other type is a signed
171  * int, both are cast to a signed int with the greater of the two
172  * bit-widths. For example, matching an Int(8) with a UInt(16) results
173  * in an Int(16).
174  *
175  */
176 void match_types(Expr &a, Expr &b);
177 
178 /** Asserts that both expressions are integer types and are either
179  * both signed or both unsigned. If one argument is scalar and the
180  * other a vector, the scalar is broadcasted to have the same number
181  * of lanes as the vector. If one expression is of narrower type than
182  * the other, it is widened to the bit width of the wider. */
183 void match_types_bitwise(Expr &a, Expr &b, const char *op_name);
184 
185 /** Halide's vectorizable transcendentals. */
186 // @{
187 Expr halide_log(const Expr &a);
188 Expr halide_exp(const Expr &a);
189 Expr halide_erf(const Expr &a);
190 // @}
191 
192 /** Raise an expression to an integer power by repeatedly multiplying
193  * it by itself. */
195 
196 /** Split a boolean condition into vector of ANDs. If 'cond' is undefined,
197  * return an empty vector. */
198 void split_into_ands(const Expr &cond, std::vector<Expr> &result);
199 
200 /** A builder to help create Exprs representing halide_buffer_t
201  * structs (e.g. foo.buffer) via calls to halide_buffer_init. Fill out
202  * the fields and then call build. The resulting Expr will be a call
203  * to halide_buffer_init with the struct members as arguments. If the
204  * buffer_memory field is undefined, it uses a call to alloca to make
205  * some stack memory for the buffer. If the shape_memory field is
206  * undefined, it similarly uses stack memory for the shape. If the
207  * shape_memory field is null, it uses the dim field already in the
208  * buffer. Other unitialized fields will take on a value of zero in
209  * the constructed buffer. */
214  int dimensions = 0;
215  std::vector<Expr> mins, extents, strides;
217  Expr build() const;
218 };
219 
220 /** If e is a ramp expression with stride, default 1, return the base,
221  * otherwise undefined. */
222 Expr strided_ramp_base(const Expr &e, int stride = 1);
223 
224 /** Implementations of division and mod that are specific to Halide.
225  * Use these implementations; do not use native C division or mod to
226  * simplify Halide expressions. Halide division and modulo satisify
227  * the Euclidean definition of division for integers a and b:
228  *
229  /code
230  when b != 0, (a/b)*b + a%b = a
231  0 <= a%b < |b|
232  /endcode
233  *
234  * Additionally, mod by zero returns zero, and div by zero returns
235  * zero. This makes mod and div total functions.
236  */
237 // @{
238 template<typename T>
239 inline T mod_imp(T a, T b) {
240  Type t = type_of<T>();
241  if (!t.is_float() && b == 0) {
242  return 0;
243  } else if (t.is_int()) {
244  int64_t ia = a;
245  int64_t ib = b;
246  int64_t a_neg = ia >> 63;
247  int64_t b_neg = ib >> 63;
248  int64_t b_zero = (ib == 0) ? -1 : 0;
249  ia -= a_neg;
250  int64_t r = ia % (ib | b_zero);
251  r += (a_neg & ((ib ^ b_neg) + ~b_neg));
252  r &= ~b_zero;
253  return r;
254  } else {
255  return a % b;
256  }
257 }
258 
259 template<typename T>
260 inline T div_imp(T a, T b) {
261  Type t = type_of<T>();
262  if (!t.is_float() && b == 0) {
263  return (T)0;
264  } else if (t.is_int()) {
265  // Do it as 64-bit
266  int64_t ia = a;
267  int64_t ib = b;
268  int64_t a_neg = ia >> 63;
269  int64_t b_neg = ib >> 63;
270  int64_t b_zero = (ib == 0) ? -1 : 0;
271  ib -= b_zero;
272  ia -= a_neg;
273  int64_t q = ia / ib;
274  q += a_neg & (~b_neg - b_neg);
275  q &= ~b_zero;
276  return (T)q;
277  } else {
278  return a / b;
279  }
280 }
281 // @}
282 
283 // Special cases for float, double.
284 template<>
285 inline float mod_imp<float>(float a, float b) {
286  float f = a - b * (floorf(a / b));
287  // The remainder has the same sign as b.
288  return f;
289 }
290 template<>
291 inline double mod_imp<double>(double a, double b) {
292  double f = a - b * (std::floor(a / b));
293  return f;
294 }
295 
296 template<>
297 inline float div_imp<float>(float a, float b) {
298  return a / b;
299 }
300 template<>
301 inline double div_imp<double>(double a, double b) {
302  return a / b;
303 }
304 
305 /** Return an Expr that is identical to the input Expr, but with
306  * all calls to likely() and likely_if_innermost() removed. */
308 
309 /** Return a Stmt that is identical to the input Stmt, but with
310  * all calls to likely() and likely_if_innermost() removed. */
312 
313 /** Return an Expr that is identical to the input Expr, but with
314  * all calls to promise_clamped() and unsafe_promise_clamped() removed. */
316 
317 /** Return a Stmt that is identical to the input Stmt, but with
318  * all calls to promise_clamped() and unsafe_promise_clamped() removed. */
320 
321 /** If the expression is a tag helper call, remove it and return
322  * the tagged expression. If not, returns the expression. */
324 
325 template<typename T>
327  static constexpr bool value = std::is_convertible<T, const char *>::value ||
328  std::is_convertible<T, Halide::Expr>::value;
329 };
330 
331 template<typename... Args>
332 struct all_are_printable_args : meta_and<is_printable_arg<Args>...> {};
333 
334 // Secondary args to print can be Exprs or const char *
335 inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args) {
336 }
337 
338 template<typename... Args>
339 inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args, const char *arg, Args &&...more_args) {
340  args.emplace_back(std::string(arg));
341  collect_print_args(args, std::forward<Args>(more_args)...);
342 }
343 
344 template<typename... Args>
345 inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args, Expr arg, Args &&...more_args) {
346  args.push_back(std::move(arg));
347  collect_print_args(args, std::forward<Args>(more_args)...);
348 }
349 
350 Expr requirement_failed_error(Expr condition, const std::vector<Expr> &args);
351 
352 Expr memoize_tag_helper(Expr result, const std::vector<Expr> &cache_key_values);
353 
354 /** Reset the counters used for random-number seeds in random_float/int/uint.
355  * (Note that the counters are incremented for each call, even if a seed is passed in.)
356  * This is used for multitarget compilation to ensure that each subtarget gets
357  * the same sequence of random numbers. */
359 
360 } // namespace Internal
361 
362 /** Cast an expression to the halide type corresponding to the C++ type T. */
363 template<typename T>
364 inline Expr cast(Expr a) {
365  return cast(type_of<T>(), std::move(a));
366 }
367 
368 /** Cast an expression to a new type. */
370 
371 /** Return the sum of two expressions, doing any necessary type
372  * coercion using \ref Internal::match_types */
374 
375 /** Add an expression and a constant integer. Coerces the type of the
376  * integer to match the type of the expression. Errors if the integer
377  * cannot be represented in the type of the expression. */
378 // @{
379 Expr operator+(Expr a, int b);
380 
381 /** Add a constant integer and an expression. Coerces the type of the
382  * integer to match the type of the expression. Errors if the integer
383  * cannot be represented in the type of the expression. */
384 Expr operator+(int a, Expr b);
385 
386 /** Modify the first expression to be the sum of two expressions,
387  * without changing its type. This casts the second argument to match
388  * the type of the first. */
390 
391 /** Return the difference of two expressions, doing any necessary type
392  * coercion using \ref Internal::match_types */
394 
395 /** Subtracts a constant integer from an expression. Coerces the type of the
396  * integer to match the type of the expression. Errors if the integer
397  * cannot be represented in the type of the expression. */
398 Expr operator-(Expr a, int b);
399 
400 /** Subtracts an expression from a constant integer. Coerces the type
401  * of the integer to match the type of the expression. Errors if the
402  * integer cannot be represented in the type of the expression. */
403 Expr operator-(int a, Expr b);
404 
405 /** Return the negative of the argument. Does no type casting, so more
406  * formally: return that number which when added to the original,
407  * yields zero of the same type. For unsigned integers the negative is
408  * still an unsigned integer. E.g. in UInt(8), the negative of 56 is
409  * 200, because 56 + 200 == 0 */
411 
412 /** Modify the first expression to be the difference of two expressions,
413  * without changing its type. This casts the second argument to match
414  * the type of the first. */
416 
417 /** Return the product of two expressions, doing any necessary type
418  * coercion using \ref Internal::match_types */
420 
421 /** Multiply an expression and a constant integer. Coerces the type of the
422  * integer to match the type of the expression. Errors if the integer
423  * cannot be represented in the type of the expression. */
424 Expr operator*(Expr a, int b);
425 
426 /** Multiply a constant integer and an expression. Coerces the type of
427  * the integer to match the type of the expression. Errors if the
428  * integer cannot be represented in the type of the expression. */
429 Expr operator*(int a, Expr b);
430 
431 /** Modify the first expression to be the product of two expressions,
432  * without changing its type. This casts the second argument to match
433  * the type of the first. */
435 
436 /** Return the ratio of two expressions, doing any necessary type
437  * coercion using \ref Internal::match_types. Note that integer
438  * division in Halide is not the same as integer division in C-like
439  * languages in two ways.
440  *
441  * First, signed integer division in Halide rounds according to the
442  * sign of the denominator. This means towards minus infinity for
443  * positive denominators, and towards positive infinity for negative
444  * denominators. This is unlike C, which rounds towards zero. This
445  * decision ensures that upsampling expressions like f(x/2, y/2) don't
446  * have funny discontinuities when x and y cross zero.
447  *
448  * Second, division by zero returns zero instead of faulting. For
449  * types where overflow is defined behavior, division of the largest
450  * negative signed integer by -1 returns the larged negative signed
451  * integer for the type (i.e. it wraps). This ensures that a division
452  * operation can never have a side-effect, which is helpful in Halide
453  * because scheduling directives can expand the domain of computation
454  * of a Func, potentially introducing new zero-division.
455  */
457 
458 /** Modify the first expression to be the ratio of two expressions,
459  * without changing its type. This casts the second argument to match
460  * the type of the first. Note that signed integer division in Halide
461  * rounds towards minus infinity, unlike C, which rounds towards
462  * zero. */
464 
465 /** Divides an expression by a constant integer. Coerces the type
466  * of the integer to match the type of the expression. Errors if the
467  * integer cannot be represented in the type of the expression. */
468 Expr operator/(Expr a, int b);
469 
470 /** Divides a constant integer by an expression. Coerces the type
471  * of the integer to match the type of the expression. Errors if the
472  * integer cannot be represented in the type of the expression. */
473 Expr operator/(int a, Expr b);
474 
475 /** Return the first argument reduced modulo the second, doing any
476  * necessary type coercion using \ref Internal::match_types. There are
477  * two key differences between C-like languages and Halide for the
478  * modulo operation, which complement the way division works.
479  *
480  * First, the result is never negative, so x % 2 is always zero or
481  * one, unlike in C-like languages. x % -2 is equivalent, and is also
482  * always zero or one. Second, mod by zero evaluates to zero (unlike
483  * in C, where it faults). This makes modulo, like division, a
484  * side-effect-free operation. */
486 
487 /** Mods an expression by a constant integer. Coerces the type
488  * of the integer to match the type of the expression. Errors if the
489  * integer cannot be represented in the type of the expression. */
490 Expr operator%(Expr a, int b);
491 
492 /** Mods a constant integer by an expression. Coerces the type
493  * of the integer to match the type of the expression. Errors if the
494  * integer cannot be represented in the type of the expression. */
495 Expr operator%(int a, Expr b);
496 
497 /** Return a boolean expression that tests whether the first argument
498  * is greater than the second, after doing any necessary type coercion
499  * using \ref Internal::match_types */
501 
502 /** Return a boolean expression that tests whether an expression is
503  * greater than a constant integer. Coerces the integer to the type of
504  * the expression. Errors if the integer is not representable in that
505  * type. */
506 Expr operator>(Expr a, int b);
507 
508 /** Return a boolean expression that tests whether a constant integer is
509  * greater than an expression. Coerces the integer to the type of
510  * the expression. Errors if the integer is not representable in that
511  * type. */
512 Expr operator>(int a, Expr b);
513 
514 /** Return a boolean expression that tests whether the first argument
515  * is less than the second, after doing any necessary type coercion
516  * using \ref Internal::match_types */
518 
519 /** Return a boolean expression that tests whether an expression is
520  * less than a constant integer. Coerces the integer to the type of
521  * the expression. Errors if the integer is not representable in that
522  * type. */
523 Expr operator<(Expr a, int b);
524 
525 /** Return a boolean expression that tests whether a constant integer is
526  * less than an expression. Coerces the integer to the type of
527  * the expression. Errors if the integer is not representable in that
528  * type. */
529 Expr operator<(int a, Expr b);
530 
531 /** Return a boolean expression that tests whether the first argument
532  * is less than or equal to the second, after doing any necessary type
533  * coercion using \ref Internal::match_types */
535 
536 /** Return a boolean expression that tests whether an expression is
537  * less than or equal to a constant integer. Coerces the integer to
538  * the type of the expression. Errors if the integer is not
539  * representable in that type. */
540 Expr operator<=(Expr a, int b);
541 
542 /** Return a boolean expression that tests whether a constant integer
543  * is less than or equal to an expression. Coerces the integer to the
544  * type of the expression. Errors if the integer is not representable
545  * in that type. */
546 Expr operator<=(int a, Expr b);
547 
548 /** Return a boolean expression that tests whether the first argument
549  * is greater than or equal to the second, after doing any necessary
550  * type coercion using \ref Internal::match_types */
552 
553 /** Return a boolean expression that tests whether an expression is
554  * greater than or equal to a constant integer. Coerces the integer to
555  * the type of the expression. Errors if the integer is not
556  * representable in that type. */
557 Expr operator>=(const Expr &a, int b);
558 
559 /** Return a boolean expression that tests whether a constant integer
560  * is greater than or equal to an expression. Coerces the integer to the
561  * type of the expression. Errors if the integer is not representable
562  * in that type. */
563 Expr operator>=(int a, const Expr &b);
564 
565 /** Return a boolean expression that tests whether the first argument
566  * is equal to the second, after doing any necessary type coercion
567  * using \ref Internal::match_types */
569 
570 /** Return a boolean expression that tests whether an expression is
571  * equal to a constant integer. Coerces the integer to the type of the
572  * expression. Errors if the integer is not representable in that
573  * type. */
574 Expr operator==(Expr a, int b);
575 
576 /** Return a boolean expression that tests whether a constant integer
577  * is equal to an expression. Coerces the integer to the type of the
578  * expression. Errors if the integer is not representable in that
579  * type. */
580 Expr operator==(int a, Expr b);
581 
582 /** Return a boolean expression that tests whether the first argument
583  * is not equal to the second, after doing any necessary type coercion
584  * using \ref Internal::match_types */
586 
587 /** Return a boolean expression that tests whether an expression is
588  * not equal to a constant integer. Coerces the integer to the type of
589  * the expression. Errors if the integer is not representable in that
590  * type. */
591 Expr operator!=(Expr a, int b);
592 
593 /** Return a boolean expression that tests whether a constant integer
594  * is not equal to an expression. Coerces the integer to the type of
595  * the expression. Errors if the integer is not representable in that
596  * type. */
597 Expr operator!=(int a, Expr b);
598 
599 /** Returns the logical and of the two arguments */
601 
602 /** Logical and of an Expr and a bool. Either returns the Expr or an
603  * Expr representing false, depending on the bool. */
604 // @{
605 Expr operator&&(Expr a, bool b);
606 Expr operator&&(bool a, Expr b);
607 // @}
608 
609 /** Returns the logical or of the two arguments */
611 
612 /** Logical or of an Expr and a bool. Either returns the Expr or an
613  * Expr representing true, depending on the bool. */
614 // @{
615 Expr operator||(Expr a, bool b);
616 Expr operator||(bool a, Expr b);
617 // @}
618 
619 /** Returns the logical not the argument */
621 
622 /** Returns an expression representing the greater of the two
623  * arguments, after doing any necessary type coercion using
624  * \ref Internal::match_types. Vectorizes cleanly on most platforms
625  * (with the exception of integer types on x86 without SSE4). */
627 
628 /** Returns an expression representing the greater of an expression
629  * and a constant integer. The integer is coerced to the type of the
630  * expression. Errors if the integer is not representable as that
631  * type. Vectorizes cleanly on most platforms (with the exception of
632  * integer types on x86 without SSE4). */
633 Expr max(Expr a, int b);
634 
635 /** Returns an expression representing the greater of a constant
636  * integer and an expression. The integer is coerced to the type of
637  * the expression. Errors if the integer is not representable as that
638  * type. Vectorizes cleanly on most platforms (with the exception of
639  * integer types on x86 without SSE4). */
640 Expr max(int a, Expr b);
641 
642 inline Expr max(float a, Expr b) {
643  return max(Expr(a), std::move(b));
644 }
645 inline Expr max(Expr a, float b) {
646  return max(std::move(a), Expr(b));
647 }
648 
649 /** Returns an expression representing the greater of an expressions
650  * vector, after doing any necessary type coersion using
651  * \ref Internal::match_types. Vectorizes cleanly on most platforms
652  * (with the exception of integer types on x86 without SSE4).
653  * The expressions are folded from right ie. max(.., max(.., ..)).
654  * The arguments can be any mix of types but must all be convertible to Expr. */
655 template<typename A, typename B, typename C, typename... Rest,
656  typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Rest...>::value>::type * = nullptr>
657 inline Expr max(A &&a, B &&b, C &&c, Rest &&...rest) {
658  return max(std::forward<A>(a), max(std::forward<B>(b), std::forward<C>(c), std::forward<Rest>(rest)...));
659 }
660 
662 
663 /** Returns an expression representing the lesser of an expression
664  * and a constant integer. The integer is coerced to the type of the
665  * expression. Errors if the integer is not representable as that
666  * type. Vectorizes cleanly on most platforms (with the exception of
667  * integer types on x86 without SSE4). */
668 Expr min(Expr a, int b);
669 
670 /** Returns an expression representing the lesser of a constant
671  * integer and an expression. The integer is coerced to the type of
672  * the expression. Errors if the integer is not representable as that
673  * type. Vectorizes cleanly on most platforms (with the exception of
674  * integer types on x86 without SSE4). */
675 Expr min(int a, Expr b);
676 
677 inline Expr min(float a, Expr b) {
678  return min(Expr(a), std::move(b));
679 }
680 inline Expr min(Expr a, float b) {
681  return min(std::move(a), Expr(b));
682 }
683 
684 /** Returns an expression representing the lesser of an expressions
685  * vector, after doing any necessary type coersion using
686  * \ref Internal::match_types. Vectorizes cleanly on most platforms
687  * (with the exception of integer types on x86 without SSE4).
688  * The expressions are folded from right ie. min(.., min(.., ..)).
689  * The arguments can be any mix of types but must all be convertible to Expr. */
690 template<typename A, typename B, typename C, typename... Rest,
691  typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Rest...>::value>::type * = nullptr>
692 inline Expr min(A &&a, B &&b, C &&c, Rest &&...rest) {
693  return min(std::forward<A>(a), min(std::forward<B>(b), std::forward<C>(c), std::forward<Rest>(rest)...));
694 }
695 
696 /** Operators on floats treats those floats as Exprs. Making these
697  * explicit prevents implicit float->int casts that might otherwise
698  * occur. */
699 // @{
700 inline Expr operator+(Expr a, float b) {
701  return std::move(a) + Expr(b);
702 }
703 inline Expr operator+(float a, Expr b) {
704  return Expr(a) + std::move(b);
705 }
706 inline Expr operator-(Expr a, float b) {
707  return std::move(a) - Expr(b);
708 }
709 inline Expr operator-(float a, Expr b) {
710  return Expr(a) - std::move(b);
711 }
712 inline Expr operator*(Expr a, float b) {
713  return std::move(a) * Expr(b);
714 }
715 inline Expr operator*(float a, Expr b) {
716  return Expr(a) * std::move(b);
717 }
718 inline Expr operator/(Expr a, float b) {
719  return std::move(a) / Expr(b);
720 }
721 inline Expr operator/(float a, Expr b) {
722  return Expr(a) / std::move(b);
723 }
724 inline Expr operator%(Expr a, float b) {
725  return std::move(a) % Expr(b);
726 }
727 inline Expr operator%(float a, Expr b) {
728  return Expr(a) % std::move(b);
729 }
730 inline Expr operator>(Expr a, float b) {
731  return std::move(a) > Expr(b);
732 }
733 inline Expr operator>(float a, Expr b) {
734  return Expr(a) > std::move(b);
735 }
736 inline Expr operator<(Expr a, float b) {
737  return std::move(a) < Expr(b);
738 }
739 inline Expr operator<(float a, Expr b) {
740  return Expr(a) < std::move(b);
741 }
742 inline Expr operator>=(Expr a, float b) {
743  return std::move(a) >= Expr(b);
744 }
745 inline Expr operator>=(float a, Expr b) {
746  return Expr(a) >= std::move(b);
747 }
748 inline Expr operator<=(Expr a, float b) {
749  return std::move(a) <= Expr(b);
750 }
751 inline Expr operator<=(float a, Expr b) {
752  return Expr(a) <= std::move(b);
753 }
754 inline Expr operator==(Expr a, float b) {
755  return std::move(a) == Expr(b);
756 }
757 inline Expr operator==(float a, Expr b) {
758  return Expr(a) == std::move(b);
759 }
760 inline Expr operator!=(Expr a, float b) {
761  return std::move(a) != Expr(b);
762 }
763 inline Expr operator!=(float a, Expr b) {
764  return Expr(a) != std::move(b);
765 }
766 // @}
767 
768 /** Clamps an expression to lie within the given bounds. The bounds
769  * are type-cast to match the expression. Vectorizes as well as min/max. */
770 Expr clamp(Expr a, const Expr &min_val, const Expr &max_val);
771 
772 /** Returns the absolute value of a signed integer or floating-point
773  * expression. Vectorizes cleanly. Unlike in C, abs of a signed
774  * integer returns an unsigned integer of the same bit width. This
775  * means that abs of the most negative integer doesn't overflow. */
777 
778 /** Return the absolute difference between two values. Vectorizes
779  * cleanly. Returns an unsigned value of the same bit width. There are
780  * various ways to write this yourself, but they contain numerous
781  * gotchas and don't always compile to good code, so use this
782  * instead. */
784 
785 /** Returns an expression similar to the ternary operator in C, except
786  * that it always evaluates all arguments. If the first argument is
787  * true, then return the second, else return the third. Typically
788  * vectorizes cleanly, but benefits from SSE41 or newer on x86. */
789 Expr select(Expr condition, Expr true_value, Expr false_value);
790 
791 /** A multi-way variant of select similar to a switch statement in C,
792  * which can accept multiple conditions and values in pairs. Evaluates
793  * to the first value for which the condition is true. Returns the
794  * final value if all conditions are false. */
795 template<typename... Args,
796  typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Args...>::value>::type * = nullptr>
797 inline Expr select(Expr c0, Expr v0, Expr c1, Expr v1, Args &&...args) {
798  return select(std::move(c0), std::move(v0), select(std::move(c1), std::move(v1), std::forward<Args>(args)...));
799 }
800 
801 /** Equivalent of ternary select(), but taking/returning tuples. If the condition is
802  * a Tuple, it must match the size of the true and false Tuples. */
803 // @{
804 HALIDE_ATTRIBUTE_DEPRECATED("tuple_select has been deprecated. Use select instead (which now works for Tuples)")
805 Tuple tuple_select(const Tuple &condition, const Tuple &true_value, const Tuple &false_value);
806 HALIDE_ATTRIBUTE_DEPRECATED("tuple_select has been deprecated. Use select instead (which now works for Tuples)")
807 Tuple tuple_select(const Expr &condition, const Tuple &true_value, const Tuple &false_value);
808 Tuple select(const Tuple &condition, const Tuple &true_value, const Tuple &false_value);
809 Tuple select(const Expr &condition, const Tuple &true_value, const Tuple &false_value);
810 // @}
811 
812 /** Equivalent of multiway select(), but taking/returning tuples. If the condition is
813  * a Tuple, it must match the size of the true and false Tuples. */
814 // @{
815 template<typename... Args>
816 HALIDE_ATTRIBUTE_DEPRECATED("tuple_select has been deprecated. Use select instead (which now works for Tuples)")
817 inline Tuple tuple_select(const Tuple &c0, const Tuple &v0, const Tuple &c1, const Tuple &v1, Args &&...args) {
818  return tuple_select(c0, v0, tuple_select(c1, v1, std::forward<Args>(args)...));
819 }
820 template<typename... Args>
821 HALIDE_ATTRIBUTE_DEPRECATED("tuple_select has been deprecated. Use select instead (which now works for Tuples)")
822 inline Tuple tuple_select(const Expr &c0, const Tuple &v0, const Expr &c1, const Tuple &v1, Args &&...args) {
823  return tuple_select(c0, v0, tuple_select(c1, v1, std::forward<Args>(args)...));
824 }
825 template<typename... Args>
826 inline Tuple select(const Tuple &c0, const Tuple &v0, const Tuple &c1, const Tuple &v1, Args &&...args) {
827  return select(c0, v0, select(c1, v1, std::forward<Args>(args)...));
828 }
829 template<typename... Args>
830 inline Tuple select(const Expr &c0, const Tuple &v0, const Expr &c1, const Tuple &v1, Args &&...args) {
831  return select(c0, v0, select(c1, v1, std::forward<Args>(args)...));
832 }
833 // @}
834 
835 /** select applied to FuncRefs (e.g. select(x < 100, f(x), g(x))) is assumed to
836  * return an Expr. A runtime error is produced if this is applied to
837  * tuple-valued Funcs. In that case you should explicitly cast the second and
838  * third args to Tuple to remove the ambiguity. */
839 // @{
840 Expr select(const Expr &condition, const FuncRef &true_value, const FuncRef &false_value);
841 template<typename... Args>
842 inline Expr select(const Expr &c0, const FuncRef &v0, const Expr &c1, const FuncRef &v1, Args &&...args) {
843  return select(c0, v0, select(c1, v1, std::forward<Args>(args)...));
844 }
845 // @}
846 
847 /** Oftentimes we want to pack a list of expressions with the same type
848  * into a channel dimension, e.g.,
849  * img(x, y, c) = select(c == 0, 100, // Red
850  * c == 1, 50, // Green
851  * 25); // Blue
852  * This is tedious when the list is long. The following function
853  * provide convinent syntax that allow one to write:
854  * img(x, y, c) = mux(c, {100, 50, 25});
855  *
856  * As with the select equivalent, if the first argument (the index) is
857  * out of range, the expression evaluates to the last value.
858  */
859 // @{
860 Expr mux(const Expr &id, const std::initializer_list<Expr> &values);
861 Expr mux(const Expr &id, const std::vector<Expr> &values);
862 Expr mux(const Expr &id, const Tuple &values);
863 Expr mux(const Expr &id, const std::initializer_list<FuncRef> &values);
864 Tuple mux(const Expr &id, const std::initializer_list<Tuple> &values);
865 Tuple mux(const Expr &id, const std::vector<Tuple> &values);
866 // @}
867 
868 /** Return the sine of a floating-point expression. If the argument is
869  * not floating-point, it is cast to Float(32). Does not vectorize
870  * well. */
872 
873 /** Return the arcsine of a floating-point expression. If the argument
874  * is not floating-point, it is cast to Float(32). Does not vectorize
875  * well. */
877 
878 /** Return the cosine of a floating-point expression. If the argument
879  * is not floating-point, it is cast to Float(32). Does not vectorize
880  * well. */
882 
883 /** Return the arccosine of a floating-point expression. If the
884  * argument is not floating-point, it is cast to Float(32). Does not
885  * vectorize well. */
887 
888 /** Return the tangent of a floating-point expression. If the argument
889  * is not floating-point, it is cast to Float(32). Does not vectorize
890  * well. */
892 
893 /** Return the arctangent of a floating-point expression. If the
894  * argument is not floating-point, it is cast to Float(32). Does not
895  * vectorize well. */
897 
898 /** Return the angle of a floating-point gradient. If the argument is
899  * not floating-point, it is cast to Float(32). Does not vectorize
900  * well. */
902 
903 /** Return the hyperbolic sine of a floating-point expression. If the
904  * argument is not floating-point, it is cast to Float(32). Does not
905  * vectorize well. */
907 
908 /** Return the hyperbolic arcsinhe of a floating-point expression. If
909  * the argument is not floating-point, it is cast to Float(32). Does
910  * not vectorize well. */
912 
913 /** Return the hyperbolic cosine of a floating-point expression. If
914  * the argument is not floating-point, it is cast to Float(32). Does
915  * not vectorize well. */
917 
918 /** Return the hyperbolic arccosine of a floating-point expression.
919  * If the argument is not floating-point, it is cast to
920  * Float(32). Does not vectorize well. */
922 
923 /** Return the hyperbolic tangent of a floating-point expression. If
924  * the argument is not floating-point, it is cast to Float(32). Does
925  * not vectorize well. */
927 
928 /** Return the hyperbolic arctangent of a floating-point expression.
929  * If the argument is not floating-point, it is cast to
930  * Float(32). Does not vectorize well. */
932 
933 /** Return the square root of a floating-point expression. If the
934  * argument is not floating-point, it is cast to Float(32). Typically
935  * vectorizes cleanly. */
937 
938 /** Return the square root of the sum of the squares of two
939  * floating-point expressions. If the argument is not floating-point,
940  * it is cast to Float(32). Vectorizes cleanly. */
941 Expr hypot(const Expr &x, const Expr &y);
942 
943 /** Return the exponential of a floating-point expression. If the
944  * argument is not floating-point, it is cast to Float(32). For
945  * Float(64) arguments, this calls the system exp function, and does
946  * not vectorize well. For Float(32) arguments, this function is
947  * vectorizable, does the right thing for extremely small or extremely
948  * large inputs, and is accurate up to the last bit of the
949  * mantissa. Vectorizes cleanly. */
951 
952 /** Return the logarithm of a floating-point expression. If the
953  * argument is not floating-point, it is cast to Float(32). For
954  * Float(64) arguments, this calls the system log function, and does
955  * not vectorize well. For Float(32) arguments, this function is
956  * vectorizable, does the right thing for inputs <= 0 (returns -inf or
957  * nan), and is accurate up to the last bit of the
958  * mantissa. Vectorizes cleanly. */
960 
961 /** Return one floating point expression raised to the power of
962  * another. The type of the result is given by the type of the first
963  * argument. If the first argument is not a floating-point type, it is
964  * cast to Float(32). For Float(32), cleanly vectorizable, and
965  * accurate up to the last few bits of the mantissa. Gets worse when
966  * approaching overflow. Vectorizes cleanly. */
968 
969 /** Evaluate the error function erf. Only available for
970  * Float(32). Accurate up to the last three bits of the
971  * mantissa. Vectorizes cleanly. */
972 Expr erf(const Expr &x);
973 
974 /** Fast vectorizable approximation to some trigonometric functions for Float(32).
975  * Absolute approximation error is less than 1e-5. */
976 // @{
977 Expr fast_sin(const Expr &x);
978 Expr fast_cos(const Expr &x);
979 // @}
980 
981 /** Fast approximate cleanly vectorizable log for Float(32). Returns
982  * nonsense for x <= 0.0f. Accurate up to the last 5 bits of the
983  * mantissa. Vectorizes cleanly. */
984 Expr fast_log(const Expr &x);
985 
986 /** Fast approximate cleanly vectorizable exp for Float(32). Returns
987  * nonsense for inputs that would overflow or underflow. Typically
988  * accurate up to the last 5 bits of the mantissa. Gets worse when
989  * approaching overflow. Vectorizes cleanly. */
990 Expr fast_exp(const Expr &x);
991 
992 /** Fast approximate cleanly vectorizable pow for Float(32). Returns
993  * nonsense for x < 0.0f. Accurate up to the last 5 bits of the
994  * mantissa for typical exponents. Gets worse when approaching
995  * overflow. Vectorizes cleanly. */
997 
998 /** Fast approximate inverse for Float(32). Corresponds to the rcpps
999  * instruction on x86, and the vrecpe instruction on ARM. Vectorizes
1000  * cleanly. Note that this can produce slightly different results
1001  * across different implementations of the same architecture (e.g. AMD vs Intel),
1002  * even when strict_float is enabled. */
1004 
1005 /** Fast approximate inverse square root for Float(32). Corresponds to
1006  * the rsqrtps instruction on x86, and the vrsqrte instruction on
1007  * ARM. Vectorizes cleanly. Note that this can produce slightly different results
1008  * across different implementations of the same architecture (e.g. AMD vs Intel),
1009  * even when strict_float is enabled. */
1011 
1012 /** Return the greatest whole number less than or equal to a
1013  * floating-point expression. If the argument is not floating-point,
1014  * it is cast to Float(32). The return value is still in floating
1015  * point, despite being a whole number. Vectorizes cleanly. */
1017 
1018 /** Return the least whole number greater than or equal to a
1019  * floating-point expression. If the argument is not floating-point,
1020  * it is cast to Float(32). The return value is still in floating
1021  * point, despite being a whole number. Vectorizes cleanly. */
1023 
1024 /** Return the whole number closest to a floating-point expression. If the
1025  * argument is not floating-point, it is cast to Float(32). The return value is
1026  * still in floating point, despite being a whole number. On ties, we round
1027  * towards the nearest even integer. Note that this is not the same as
1028  * std::round in C, which rounds away from zero. On platforms without a native
1029  * instruction for this, it is emulated, and may be more expensive than
1030  * cast<int>(x + 0.5f) or similar. */
1032 
1033 /** Return the integer part of a floating-point expression. If the argument is
1034  * not floating-point, it is cast to Float(32). The return value is still in
1035  * floating point, despite being a whole number. Vectorizes cleanly. */
1037 
1038 /** Returns true if the argument is a Not a Number (NaN). Requires a
1039  * floating point argument. Vectorizes cleanly.
1040  * Note that the Expr passed in will be evaluated in strict_float mode,
1041  * regardless of whether strict_float mode is enabled in the current Target. */
1043 
1044 /** Returns true if the argument is Inf or -Inf. Requires a
1045  * floating point argument. Vectorizes cleanly.
1046  * Note that the Expr passed in will be evaluated in strict_float mode,
1047  * regardless of whether strict_float mode is enabled in the current Target. */
1049 
1050 /** Returns true if the argument is a finite value (ie, neither NaN nor Inf).
1051  * Requires a floating point argument. Vectorizes cleanly.
1052  * Note that the Expr passed in will be evaluated in strict_float mode,
1053  * regardless of whether strict_float mode is enabled in the current Target. */
1055 
1056 /** Return the fractional part of a floating-point expression. If the argument
1057  * is not floating-point, it is cast to Float(32). The return value has the
1058  * same sign as the original expression. Vectorizes cleanly. */
1059 Expr fract(const Expr &x);
1060 
1061 /** Reinterpret the bits of one value as another type. */
1063 
1064 template<typename T>
1066  return reinterpret(type_of<T>(), std::move(e));
1067 }
1068 
1069 /** Return the bitwise and of two expressions (which need not have the
1070  * same type). The result type is the wider of the two expressions.
1071  * Only integral types are allowed and both expressions must be signed
1072  * or both must be unsigned. */
1074 
1075 /** Return the bitwise and of an expression and an integer. The type
1076  * of the result is the type of the expression argument. */
1077 // @{
1078 Expr operator&(Expr x, int y);
1079 Expr operator&(int x, Expr y);
1080 // @}
1081 
1082 /** Return the bitwise or of two expressions (which need not have the
1083  * same type). The result type is the wider of the two expressions.
1084  * Only integral types are allowed and both expressions must be signed
1085  * or both must be unsigned. */
1087 
1088 /** Return the bitwise or of an expression and an integer. The type of
1089  * the result is the type of the expression argument. */
1090 // @{
1091 Expr operator|(Expr x, int y);
1092 Expr operator|(int x, Expr y);
1093 // @}
1094 
1095 /** Return the bitwise xor of two expressions (which need not have the
1096  * same type). The result type is the wider of the two expressions.
1097  * Only integral types are allowed and both expressions must be signed
1098  * or both must be unsigned. */
1100 
1101 /** Return the bitwise xor of an expression and an integer. The type
1102  * of the result is the type of the expression argument. */
1103 // @{
1104 Expr operator^(Expr x, int y);
1105 Expr operator^(int x, Expr y);
1106 // @}
1107 
1108 /** Return the bitwise not of an expression. */
1110 
1111 /** Shift the bits of an integer value left. This is actually less
1112  * efficient than multiplying by 2^n, because Halide's optimization
1113  * passes understand multiplication, and will compile it to
1114  * shifting. This operator is only for if you really really need bit
1115  * shifting (e.g. because the exponent is a run-time parameter). The
1116  * type of the result is equal to the type of the first argument. Both
1117  * arguments must have integer type. */
1118 // @{
1121 // @}
1122 
1123 /** Shift the bits of an integer value right. Does sign extension for
1124  * signed integers. This is less efficient than dividing by a power of
1125  * two. Halide's definition of division (always round to negative
1126  * infinity) means that all divisions by powers of two get compiled to
1127  * bit-shifting, and Halide's optimization routines understand
1128  * division and can work with it. The type of the result is equal to
1129  * the type of the first argument. Both arguments must have integer
1130  * type. */
1131 // @{
1134 // @}
1135 
1136 /** Linear interpolate between the two values according to a weight.
1137  * \param zero_val The result when weight is 0
1138  * \param one_val The result when weight is 1
1139  * \param weight The interpolation amount
1140  *
1141  * Both zero_val and one_val must have the same type. All types are
1142  * supported, including bool.
1143  *
1144  * The weight is treated as its own type and must be float or an
1145  * unsigned integer type. It is scaled to the bit-size of the type of
1146  * x and y if they are integer, or converted to float if they are
1147  * float. Integer weights are converted to float via division by the
1148  * full-range value of the weight's type. Floating-point weights used
1149  * to interpolate between integer values must be between 0.0f and
1150  * 1.0f, and an error may be signaled if it is not provably so. (clamp
1151  * operators can be added to provide proof. Currently an error is only
1152  * signalled for constant weights.)
1153  *
1154  * For integer linear interpolation, out of range values cannot be
1155  * represented. In particular, weights that are conceptually less than
1156  * 0 or greater than 1.0 are not representable. As such the result is
1157  * always between x and y (inclusive of course). For lerp with
1158  * floating-point values and floating-point weight, the full range of
1159  * a float is valid, however underflow and overflow can still occur.
1160  *
1161  * Ordering is not required between zero_val and one_val:
1162  * lerp(42, 69, .5f) == lerp(69, 42, .5f) == 56
1163  *
1164  * Results for integer types are for exactly rounded arithmetic. As
1165  * such, there are cases where 16-bit and float differ because 32-bit
1166  * floating-point (float) does not have enough precision to produce
1167  * the exact result. (Likely true for 32-bit integer
1168  * vs. double-precision floating-point as well.)
1169  *
1170  * At present, double precision and 64-bit integers are not supported.
1171  *
1172  * Generally, lerp will vectorize as if it were an operation on a type
1173  * twice the bit size of the inferred type for x and y.
1174  *
1175  * Some examples:
1176  * \code
1177  *
1178  * // Since Halide does not have direct type delcarations, casts
1179  * // below are used to indicate the types of the parameters.
1180  * // Such casts not required or expected in actual code where types
1181  * // are inferred.
1182  *
1183  * lerp(cast<float>(x), cast<float>(y), cast<float>(w)) ->
1184  * x * (1.0f - w) + y * w
1185  *
1186  * lerp(cast<uint8_t>(x), cast<uint8_t>(y), cast<uint8_t>(w)) ->
1187  * cast<uint8_t>(cast<uint8_t>(x) * (1.0f - cast<uint8_t>(w) / 255.0f) +
1188  * cast<uint8_t>(y) * cast<uint8_t>(w) / 255.0f + .5f)
1189  *
1190  * // Note addition in Halide promoted uint8_t + int8_t to int16_t already,
1191  * // the outer cast is added for clarity.
1192  * lerp(cast<uint8_t>(x), cast<int8_t>(y), cast<uint8_t>(w)) ->
1193  * cast<int16_t>(cast<uint8_t>(x) * (1.0f - cast<uint8_t>(w) / 255.0f) +
1194  * cast<int8_t>(y) * cast<uint8_t>(w) / 255.0f + .5f)
1195  *
1196  * lerp(cast<int8_t>(x), cast<int8_t>(y), cast<float>(w)) ->
1197  * cast<int8_t>(cast<int8_t>(x) * (1.0f - cast<float>(w)) +
1198  * cast<int8_t>(y) * cast<uint8_t>(w))
1199  *
1200  * \endcode
1201  * */
1202 Expr lerp(Expr zero_val, Expr one_val, Expr weight);
1203 
1204 /** Count the number of set bits in an expression. */
1206 
1207 /** Count the number of leading zero bits in an expression. If the expression is
1208  * zero, the result is the number of bits in the type. */
1210 
1211 /** Count the number of trailing zero bits in an expression. If the expression is
1212  * zero, the result is the number of bits in the type. */
1214 
1215 /** Divide two integers, rounding towards zero. This is the typical
1216  * behavior of most hardware architectures, which differs from
1217  * Halide's division operator, which is Euclidean (rounds towards
1218  * -infinity). Will throw a runtime error if y is zero, or if y is -1
1219  * and x is the minimum signed integer. */
1221 
1222 /** Compute the remainder of dividing two integers, when division is
1223  * rounding toward zero. This is the typical behavior of most hardware
1224  * architectures, which differs from Halide's mod operator, which is
1225  * Euclidean (produces the remainder when division rounds towards
1226  * -infinity). Will throw a runtime error if y is zero. */
1228 
1229 /** Return a random variable representing a uniformly distributed
1230  * float in the half-open interval [0.0f, 1.0f). For random numbers of
1231  * other types, use lerp with a random float as the last parameter.
1232  *
1233  * Optionally takes a seed.
1234  *
1235  * Note that:
1236  \code
1237  Expr x = random_float();
1238  Expr y = x + x;
1239  \endcode
1240  *
1241  * is very different to
1242  *
1243  \code
1244  Expr y = random_float() + random_float();
1245  \endcode
1246  *
1247  * The first doubles a random variable, and the second adds two
1248  * independent random variables.
1249  *
1250  * A given random variable takes on a unique value that depends
1251  * deterministically on the pure variables of the function they belong
1252  * to, the identity of the function itself, and which definition of
1253  * the function it is used in. They are, however, shared across tuple
1254  * elements.
1255  *
1256  * This function vectorizes cleanly.
1257  */
1259 
1260 /** Return a random variable representing a uniformly distributed
1261  * unsigned 32-bit integer. See \ref random_float. Vectorizes cleanly. */
1263 
1264 /** Return a random variable representing a uniformly distributed
1265  * 32-bit integer. See \ref random_float. Vectorizes cleanly. */
1267 
1268 /** Create an Expr that prints out its value whenever it is
1269  * evaluated. It also prints out everything else in the arguments
1270  * list, separated by spaces. This can include string literals. */
1271 //@{
1272 Expr print(const std::vector<Expr> &values);
1273 
1274 template<typename... Args>
1275 inline HALIDE_NO_USER_CODE_INLINE Expr print(Expr a, Args &&...args) {
1276  std::vector<Expr> collected_args = {std::move(a)};
1277  Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1278  return print(collected_args);
1279 }
1280 //@}
1281 
1282 /** Create an Expr that prints whenever it is evaluated, provided that
1283  * the condition is true. */
1284 // @{
1285 Expr print_when(Expr condition, const std::vector<Expr> &values);
1286 
1287 template<typename... Args>
1288 inline HALIDE_NO_USER_CODE_INLINE Expr print_when(Expr condition, Expr a, Args &&...args) {
1289  std::vector<Expr> collected_args = {std::move(a)};
1290  Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1291  return print_when(std::move(condition), collected_args);
1292 }
1293 
1294 // @}
1295 
1296 /** Create an Expr that that guarantees a precondition.
1297  * If 'condition' is true, the return value is equal to the first Expr.
1298  * If 'condition' is false, halide_error() is called, and the return value
1299  * is arbitrary. Any additional arguments after the first Expr are stringified
1300  * and passed as a user-facing message to halide_error(), similar to print().
1301  *
1302  * Note that this essentially *always* inserts a runtime check into the
1303  * generated code (except when the condition can be proven at compile time);
1304  * as such, it should be avoided inside inner loops, except for debugging
1305  * or testing purposes. Note also that it does not vectorize cleanly (vector
1306  * values will be scalarized for the check).
1307  *
1308  * However, using this to make assertions about (say) input values
1309  * can be useful, both in terms of correctness and (potentially) in terms
1310  * of code generation, e.g.
1311  \code
1312  Param<int> p;
1313  Expr y = require(p > 0, p);
1314  \endcode
1315  * will allow the optimizer to assume positive, nonzero values for y.
1316  */
1317 // @{
1318 Expr require(Expr condition, const std::vector<Expr> &values);
1319 
1320 template<typename... Args>
1321 inline HALIDE_NO_USER_CODE_INLINE Expr require(Expr condition, Expr value, Args &&...args) {
1322  std::vector<Expr> collected_args = {std::move(value)};
1323  Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1324  return require(std::move(condition), collected_args);
1325 }
1326 // @}
1327 
1328 /** Return an undef value of the given type. Halide skips stores that
1329  * depend on undef values, so you can use this to mean "do not modify
1330  * this memory location". This is an escape hatch that can be used for
1331  * several things:
1332  *
1333  * You can define a reduction with no pure step, by setting the pure
1334  * step to undef. Do this only if you're confident that the update
1335  * steps are sufficient to correctly fill in the domain.
1336  *
1337  * For a tuple-valued reduction, you can write an update step that
1338  * only updates some tuple elements.
1339  *
1340  * You can define single-stage pipeline that only has update steps,
1341  * and depends on the values already in the output buffer.
1342  *
1343  * Use this feature with great caution, as you can use it to load from
1344  * uninitialized memory.
1345  */
1347 
1348 template<typename T>
1349 inline Expr undef() {
1350  return undef(type_of<T>());
1351 }
1352 
1353 namespace Internal {
1354 
1355 /** Return an expression that should never be evaluated. Expressions
1356  * that depend on unreachabale values are also unreachable, and
1357  * statements that execute unreachable expressions are also considered
1358  * unreachable. */
1360 
1361 template<typename T>
1362 inline Expr unreachable() {
1363  return unreachable(type_of<T>());
1364 }
1365 
1366 } // namespace Internal
1367 
1368 /** Control the values used in the memoization cache key for memoize.
1369  * Normally parameters and other external dependencies are
1370  * automatically inferred and added to the cache key. The memoize_tag
1371  * operator allows computing one expression and using either the
1372  * computed value, or one or more other expressions in the cache key
1373  * instead of the parameter dependencies of the computation. The
1374  * single argument version is completely safe in that the cache key
1375  * will use the actual computed value -- it is difficult or imposible
1376  * to produce erroneous caching this way. The more-than-one argument
1377  * version allows generating cache keys that do not uniquely identify
1378  * the computation and thus can result in caching errors.
1379  *
1380  * A potential use for the single argument version is to handle a
1381  * floating-point parameter that is quantized to a small
1382  * integer. Mutliple values of the float will produce the same integer
1383  * and moving the caching to using the integer for the key is more
1384  * efficient.
1385  *
1386  * The main use for the more-than-one argument version is to provide
1387  * cache key information for Handles and ImageParams, which otherwise
1388  * are not allowed inside compute_cached operations. E.g. when passing
1389  * a group of parameters to an external array function via a Handle,
1390  * memoize_tag can be used to isolate the actual values used by that
1391  * computation. If an ImageParam is a constant image with a persistent
1392  * digest, memoize_tag can be used to key computations using that image
1393  * on the digest. */
1394 // @{
1395 template<typename... Args>
1396 inline HALIDE_NO_USER_CODE_INLINE Expr memoize_tag(Expr result, Args &&...args) {
1397  std::vector<Expr> collected_args{std::forward<Args>(args)...};
1398  return Internal::memoize_tag_helper(std::move(result), collected_args);
1399 }
1400 // @}
1401 
1402 /** Expressions tagged with this intrinsic are considered to be part
1403  * of the steady state of some loop with a nasty beginning and end
1404  * (e.g. a boundary condition). When Halide encounters likely
1405  * intrinsics, it splits the containing loop body into three, and
1406  * tries to simplify down all conditions that lead to the likely. For
1407  * example, given the expression: select(x < 1, bar, x > 10, bar,
1408  * likely(foo)), Halide will split the loop over x into portions where
1409  * x < 1, 1 <= x <= 10, and x > 10.
1410  *
1411  * You're unlikely to want to call this directly. You probably want to
1412  * use the boundary condition helpers in the BoundaryConditions
1413  * namespace instead.
1414  */
1416 
1417 /** Equivalent to likely, but only triggers a loop partitioning if
1418  * found in an innermost loop. */
1420 
1421 /** Cast an expression to the halide type corresponding to the C++
1422  * type T. As part of the cast, clamp to the minimum and maximum
1423  * values of the result type. */
1424 template<typename T>
1426  return saturating_cast(type_of<T>(), std::move(e));
1427 }
1428 
1429 /** Cast an expression to a new type, clamping to the minimum and
1430  * maximum values of the result type. */
1432 
1433 /** Makes a best effort attempt to preserve IEEE floating-point
1434  * semantics in evaluating an expression. May not be implemented for
1435  * all backends. (E.g. it is difficult to do this for C++ code
1436  * generation as it depends on the compiler flags used to compile the
1437  * generated code. */
1439 
1440 /** Create an Expr that that promises another Expr is clamped but do
1441  * not generate code to check the assertion or modify the value. No
1442  * attempt is made to prove the bound at compile time. (If it is
1443  * proved false as a result of something else, an error might be
1444  * generated, but it is also possible the compiler will crash.) The
1445  * promised bound is used in bounds inference so it will allow
1446  * satisfying bounds checks as well as possibly aiding optimization.
1447  *
1448  * unsafe_promise_clamped returns its first argument, the Expr 'value'
1449  *
1450  * This is a very easy way to make Halide generate erroneous code if
1451  * the bound promises is not kept. Use sparingly when there is no
1452  * other way to convey the information to the compiler and it is
1453  * required for a valuable optimization.
1454  *
1455  * Unsafe promises can be checked by turning on
1456  * Target::CheckUnsafePromises. This is intended for debugging only.
1457  */
1458 Expr unsafe_promise_clamped(const Expr &value, const Expr &min, const Expr &max);
1459 
1460 namespace Internal {
1461 /**
1462  * FOR INTERNAL USE ONLY.
1463  *
1464  * An entirely unchecked version of unsafe_promise_clamped, used
1465  * inside the compiler as an annotation of the known bounds of an Expr
1466  * when it has proved something is bounded and wants to record that
1467  * fact for later passes (notably bounds inference) to exploit. This
1468  * gets introduced by GuardWithIf tail strategies, because the bounds
1469  * machinery has a hard time exploiting if statement conditions.
1470  *
1471  * Unlike unsafe_promise_clamped, this expression is
1472  * context-dependent, because 'value' might be statically bounded at
1473  * some point in the IR (e.g. due to a containing if statement), but
1474  * not elsewhere.
1475  *
1476  * This intrinsic always evaluates to its first argument. If this value is
1477  * used by a side-effecting operation and it is outside the range specified
1478  * by its second and third arguments, behavior is undefined. The compiler can
1479  * therefore assume that the value is within the range given and optimize
1480  * accordingly. Note that this permits promise_clamped to evaluate to
1481  * something outside of the range, provided that this value is not used.
1482  *
1483  * Note that this produces an intrinsic that is marked as 'pure' and thus is
1484  * allowed to be hoisted, etc.; thus, extra care must be taken with its use.
1485  **/
1486 Expr promise_clamped(const Expr &value, const Expr &min, const Expr &max);
1487 } // namespace Internal
1488 
1489 /** Scatter and gather are used for update definition which must store
1490  * multiple values to distinct locations at the same time. The
1491  * multiple expressions on the right-hand-side are bundled together
1492  * into a "gather", which must match a "scatter" the the same number
1493  * of arguments on the left-hand-size. For example, to store the
1494  * values 1 and 2 to the locations (x, y, 3) and (x, y, 4),
1495  * respectively:
1496  *
1497 \code
1498 f(x, y, scatter(3, 4)) = gather(1, 2);
1499 \endcode
1500  *
1501  * The result of gather or scatter can be treated as an
1502  * expression. Any containing operations on it can be assumed to
1503  * distribute over the elements. If two gather expressions are
1504  * combined with an arithmetic operator (e.g. added), they combine
1505  * element-wise. The following example stores the values 2 * x, 2 * y,
1506  * and 2 * c to the locations (x + 1, y, c), (x, y + 3, c), and (x, y,
1507  * c + 2) respectively:
1508  *
1509 \code
1510 f(x + scatter(1, 0, 0), y + scatter(0, 3, 0), c + scatter(0, 0, 2)) = 2 * gather(x, y, c);
1511 \endcode
1512 *
1513 * Repeated values in the scatter cause multiple stores to the same
1514 * location. The stores happen in order from left to right, so the
1515 * rightmost value wins. The following code is equivalent to f(x) = 5
1516 *
1517 \code
1518 f(scatter(x, x)) = gather(3, 5);
1519 \endcode
1520 *
1521 * Gathers are most useful for algorithms which require in-place
1522 * swapping or permutation of multiple elements, or other kinds of
1523 * in-place mutations that require loading multiple inputs, doing some
1524 * operations to them jointly, then storing them again. The following
1525 * update definition swaps the values of f at locations 3 and 5 if an
1526 * input parameter p is true:
1527 *
1528 \code
1529 f(scatter(3, 5)) = f(select(p, gather(5, 3), gather(3, 5)));
1530 \endcode
1531 *
1532 * For more examples of the use of scatter and gather, see
1533 * test/correctness/multiple_scatter.cpp
1534 *
1535 * It is not currently possible to use scatter and gather to write an
1536 * update definition in which the *number* of values loaded or stored
1537 * varies, as the size of the scatter/gather packet must be fixed a
1538 * compile-time. A workaround is to make the unwanted extra operations
1539 * a redundant copy of the last operation, which will be
1540 * dead-code-eliminated by the compiler. For example, the following
1541 * update definition swaps the values at locations 3 and 5 when the
1542 * parameter p is true, and rotates the values at locations 1, 2, and 3
1543 * when it is false. The load from 3 and store to 5 will be redundantly
1544 * repeated:
1545 *
1546 \code
1547 f(select(p, scatter(3, 5, 5), scatter(1, 2, 3))) = f(select(p, gather(5, 3, 3), gather(2, 3, 1)));
1548 \endcode
1549 *
1550 * Note that in the p == true case, we redudantly load from 3 and write
1551 * to 5 twice.
1552 */
1553 //@{
1554 Expr scatter(const std::vector<Expr> &args);
1555 Expr gather(const std::vector<Expr> &args);
1556 
1557 template<typename... Args>
1558 Expr scatter(const Expr &e, Args &&...args) {
1559  return scatter({e, std::forward<Args>(args)...});
1560 }
1561 
1562 template<typename... Args>
1563 Expr gather(const Expr &e, Args &&...args) {
1564  return gather({e, std::forward<Args>(args)...});
1565 }
1566 // @}
1567 
1568 /** Extract a contiguous subsequence of the bits of 'e', starting at the bit
1569  * index given by 'lsb', where zero is the least-significant bit, returning a
1570  * value of type 't'. Any out-of-range bits requested are filled with zeros.
1571  *
1572  * extract_bits is especially useful when one wants to load a small vector of a
1573  * wide type, and treat it as a larger vector of a smaller type. For example,
1574  * loading a vector of 32 uint8 values from a uint32 Func can be done as
1575  * follows:
1576 \code
1577 f8(x) = extract_bits<uint8_t>(f32(x/4), 8*(x%4));
1578 f8.align_bounds(x, 4).vectorize(x, 32);
1579 \endcode
1580  * Note that the align_bounds call is critical so that the narrow Exprs are
1581  * aligned to the wider Exprs. This makes the x%4 term collapse to a
1582  * constant. If f8 is an output Func, then constraining the min value of x to be
1583  * a known multiple of four would also be sufficient, e.g. via:
1584 \code
1585 f8.output_buffer().dim(0).set_min(0);
1586 \endcode
1587  *
1588  * See test/correctness/extract_concat_bits.cpp for a complete example. */
1589 // @{
1590 Expr extract_bits(Type t, const Expr &e, const Expr &lsb);
1591 
1592 template<typename T>
1593 Expr extract_bits(const Expr &e, const Expr &lsb) {
1594  return extract_bits(type_of<T>(), e, lsb);
1595 }
1596 // @}
1597 
1598 /** Given a number of Exprs of the same type, concatenate their bits producing a
1599  * single Expr of the same type code of the input but with more bits. The
1600  * number of arguments must be a power of two.
1601  *
1602  * concat_bits is especially useful when one wants to treat a Func containing
1603  * values of a narrow type as a Func containing fewer values of a wider
1604  * type. For example, the following code reinterprets vectors of 32 uint8 values
1605  * as a vector of 8 uint32s:
1606  *
1607 \code
1608 f32(x) = concat_bits({f8(4*x), f8(4*x + 1), f8(4*x + 2), f8(4*x + 3)});
1609 f32.vectorize(x, 8);
1610 \endcode
1611  *
1612  * See test/correctness/extract_concat_bits.cpp for a complete example.
1613  */
1614 Expr concat_bits(const std::vector<Expr> &e);
1615 
1616 /** Below is a collection of intrinsics for fixed-point programming. Most of
1617  * them can be expressed via other means, but this is more natural for some, as
1618  * it avoids ghost widened intermediates that don't (or shouldn't) actually show
1619  * up in codegen, and doesn't rely on pattern-matching inside the compiler to
1620  * succeed to get good instruction selection.
1621  *
1622  * The semantics of each call are defined in terms of a non-existent 'widen' and
1623  * 'narrow' operators, which stand in for casts that double or halve the
1624  * bit-width of a type respectively.
1625  */
1626 
1627 /** Compute a + widen(b). */
1629 
1630 /** Compute a * widen(b). */
1632 
1633 /** Compute a - widen(b). */
1635 
1636 /** Compute widen(a) + widen(b). */
1638 
1639 /** Compute widen(a) * widen(b). a and b may have different signedness, in which
1640  * case the result is signed. */
1642 
1643 /** Compute widen(a) - widen(b). The result is always signed. */
1645 
1646 /** Compute widen(a) << b. */
1647 //@{
1650 //@}
1651 
1652 /** Compute widen(a) >> b. */
1653 //@{
1656 //@}
1657 
1658 /** Compute saturating_narrow(widening_add(a, (1 >> min(b, 0)) / 2) << b).
1659  * When b is positive indicating a left shift, the rounding term is zero. */
1660 //@{
1663 //@}
1664 
1665 /** Compute saturating_narrow(widening_add(a, (1 << max(b, 0)) / 2) >> b).
1666  * When b is negative indicating a left shift, the rounding term is zero. */
1667 //@{
1670 //@}
1671 
1672 /** Compute saturating_narrow(widen(a) + widen(b)) */
1674 
1675 /** Compute saturating_narrow(widen(a) - widen(b)) */
1677 
1678 /** Compute narrow((widen(a) + widen(b)) / 2) */
1680 
1681 /** Compute narrow((widen(a) + widen(b) + 1) / 2) */
1683 
1684 /** Compute narrow((widen(a) - widen(b)) / 2) */
1686 
1687 /** Compute saturating_narrow(shift_right(widening_mul(a, b), q)) */
1688 //@{
1691 //@}
1692 
1693 /** Compute saturating_narrow(rounding_shift_right(widening_mul(a, b), q)) */
1694 //@{
1697 //@}
1698 
1699 namespace Internal {
1700 
1701 template<typename T = void>
1702 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1703 Expr widen_right_add(const Expr &a, const Expr &b, T * = nullptr) {
1704  return Halide::widen_right_add(a, b);
1705 }
1706 template<typename T = void>
1707 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1708 Expr widen_right_mul(const Expr &a, const Expr &b, T * = nullptr) {
1709  return Halide::widen_right_mul(a, b);
1710 }
1711 template<typename T = void>
1712 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1713 Expr widen_right_sub(const Expr &a, const Expr &b, T * = nullptr) {
1714  return Halide::widen_right_sub(a, b);
1715 }
1716 template<typename T = void>
1717 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1718 Expr widening_add(const Expr &a, const Expr &b, T * = nullptr) {
1719  return Halide::widening_add(a, b);
1720 }
1721 template<typename T = void>
1722 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1723 Expr widening_mul(const Expr &a, const Expr &b, T * = nullptr) {
1724  return Halide::widening_mul(a, b);
1725 }
1726 template<typename T = void>
1727 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1728 Expr widening_sub(const Expr &a, const Expr &b, T * = nullptr) {
1729  return Halide::widening_sub(a, b);
1730 }
1731 template<typename T = void>
1732 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1733 Expr widening_shift_left(const Expr &a, const Expr &b, T * = nullptr) {
1734  return Halide::widening_shift_left(a, b);
1735 }
1736 template<typename T = void>
1737 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1738 Expr widening_shift_left(const Expr &a, int b, T * = nullptr) {
1739  return Halide::widening_shift_left(a, b);
1740 }
1741 template<typename T = void>
1742 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1743 Expr widening_shift_right(const Expr &a, const Expr &b, T * = nullptr) {
1744  return Halide::widening_shift_right(a, b);
1745 }
1746 template<typename T = void>
1747 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1748 Expr widening_shift_right(const Expr &a, int b, T * = nullptr) {
1749  return Halide::widening_shift_right(a, b);
1750 }
1751 template<typename T = void>
1752 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1753 Expr rounding_shift_left(const Expr &a, const Expr &b, T * = nullptr) {
1754  return Halide::widening_shift_left(a, b);
1755 }
1756 template<typename T = void>
1757 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1758 Expr rounding_shift_left(const Expr &a, int b, T * = nullptr) {
1759  return Halide::widening_shift_left(a, b);
1760 }
1761 template<typename T = void>
1762 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1763 Expr rounding_shift_right(const Expr &a, const Expr &b, T * = nullptr) {
1764  return Halide::rounding_shift_right(a, b);
1765 }
1766 template<typename T = void>
1767 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1768 Expr rounding_shift_right(const Expr &a, int b, T * = nullptr) {
1769  return Halide::rounding_shift_right(a, b);
1770 }
1771 template<typename T = void>
1772 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1773 Expr saturating_add(const Expr &a, const Expr &b, T * = nullptr) {
1774  return Halide::saturating_add(a, b);
1775 }
1776 template<typename T = void>
1777 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1778 Expr saturating_sub(const Expr &a, const Expr &b, T * = nullptr) {
1779  return Halide::saturating_sub(a, b);
1780 }
1781 template<typename T = void>
1782 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1783 Expr halving_add(const Expr &a, const Expr &b, T * = nullptr) {
1784  return Halide::halving_add(a, b);
1785 }
1786 template<typename T = void>
1787 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1788 Expr rounding_halving_add(const Expr &a, const Expr &b, T * = nullptr) {
1789  return Halide::rounding_halving_add(a, b);
1790 }
1791 template<typename T = void>
1792 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1793 Expr halving_sub(const Expr &a, const Expr &b, T * = nullptr) {
1794  return Halide::halving_sub(a, b);
1795 }
1796 template<typename T = void>
1797 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1798 Expr mul_shift_right(const Expr &a, const Expr &b, const Expr &q, T * = nullptr) {
1799  return Halide::mul_shift_right(a, b, q);
1800 }
1801 template<typename T = void>
1802 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1803 Expr mul_shift_right(const Expr &a, const Expr &b, int q, T * = nullptr) {
1804  return Halide::mul_shift_right(a, b, q);
1805 }
1806 template<typename T = void>
1807 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1808 Expr rounding_mul_shift_right(const Expr &a, const Expr &b, const Expr &q, T * = nullptr) {
1809  return Halide::rounding_mul_shift_right(a, b, q);
1810 }
1811 template<typename T = void>
1812 HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1813 Expr rounding_mul_shift_right(const Expr &a, const Expr &b, int q, T * = nullptr) {
1814  return Halide::rounding_mul_shift_right(a, b, q);
1815 }
1816 } // namespace Internal
1817 
1818 } // namespace Halide
1819 
1820 #endif
Base classes for Halide expressions (Halide::Expr) and statements (Halide::Internal::Stmt)
#define HALIDE_ATTRIBUTE_DEPRECATED(x)
Defines Tuple - the front-end handle on small arrays of expressions.
#define HALIDE_NO_USER_CODE_INLINE
Definition: Util.h:46
A fragment of front-end syntax of the form f(x, y, z), where x, y, z are Vars or Exprs.
Definition: Func.h:497
Create a small array of Exprs for defining and calling functions with multiple outputs.
Definition: Tuple.h:18
Expr make_one(Type t)
Construct the representation of one in the given type.
T div_imp(T a, T b)
Definition: IROperator.h:260
bool is_const_zero(const Expr &e)
Is the expression a const (as defined by is_const), and also equal to zero (in all lanes,...
Expr saturating_add(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1773
Expr memoize_tag_helper(Expr result, const std::vector< Expr > &cache_key_values)
Expr widen_right_sub(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1713
Expr make_zero(Type t)
Construct the representation of zero in the given type.
bool is_negative_const(const Expr &e)
Is the expression a const (as defined by is_const), and also strictly less than zero (in all lanes,...
bool is_undef(const Expr &e)
Is the expression an undef.
Expr requirement_failed_error(Expr condition, const std::vector< Expr > &args)
Expr make_two(Type t)
Construct the representation of two in the given type.
Expr unreachable(Type t=Int(32))
Return an expression that should never be evaluated.
void check_representable(Type t, int64_t val)
Check if a constant value can be correctly represented as the given type.
Expr halide_erf(const Expr &a)
bool is_const_one(const Expr &e)
Is the expression a const (as defined by is_const), and also equal to one (in all lanes,...
Expr widening_shift_left(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1733
void match_types(Expr &a, Expr &b)
Coerce the two expressions to have the same type, using C-style casting rules.
double div_imp< double >(double a, double b)
Definition: IROperator.h:301
Expr saturating_sub(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1778
Expr halide_exp(const Expr &a)
Expr halving_sub(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1793
Expr make_const(Type t, int64_t val)
Construct an immediate of the given type from any numeric C++ type.
Expr widening_shift_right(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1743
bool is_positive_const(const Expr &e)
Is the expression a const (as defined by is_const), and also strictly greater than zero (in all lanes...
Expr const_true(int lanes=1)
Construct the constant boolean true.
const double * as_const_float(const Expr &e)
If an expression is a FloatImm or a Broadcast of a FloatImm, return a pointer to its value.
bool is_signed_integer_overflow(const Expr &expr)
Check if an expression is a signed_integer_overflow.
T mod_imp(T a, T b)
Implementations of division and mod that are specific to Halide.
Definition: IROperator.h:239
void reset_random_counters()
Reset the counters used for random-number seeds in random_float/int/uint.
Expr halide_log(const Expr &a)
Halide's vectorizable transcendentals.
bool is_pure(const Expr &e)
Does the expression 1) Take on the same value no matter where it appears in a Stmt,...
void split_into_ands(const Expr &cond, std::vector< Expr > &result)
Split a boolean condition into vector of ANDs.
Expr promise_clamped(const Expr &value, const Expr &min, const Expr &max)
FOR INTERNAL USE ONLY.
bool is_no_op(const Stmt &s)
Is the statement a no-op (which we represent as either an undefined Stmt, or as an Evaluate node of a...
const int64_t * as_const_int(const Expr &e)
If an expression is an IntImm or a Broadcast of an IntImm, return a pointer to its value.
Expr unwrap_tags(const Expr &e)
If the expression is a tag helper call, remove it and return the tagged expression.
float div_imp< float >(float a, float b)
Definition: IROperator.h:297
bool is_const_power_of_two_integer(const Expr &e, int *bits)
Is the expression a constant integer power of two.
Expr lossless_negate(const Expr &x)
Attempt to negate x without introducing new IR and without overflow.
Expr strided_ramp_base(const Expr &e, int stride=1)
If e is a ramp expression with stride, default 1, return the base, otherwise undefined.
Expr remove_promises(const Expr &e)
Return an Expr that is identical to the input Expr, but with all calls to promise_clamped() and unsaf...
Expr widening_sub(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1728
Expr rounding_mul_shift_right(const Expr &a, const Expr &b, const Expr &q, T *=nullptr)
Definition: IROperator.h:1808
Expr rounding_halving_add(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1788
Expr widening_add(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1718
Expr const_false(int lanes=1)
Construct the constant boolean false.
Expr rounding_shift_left(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1753
double mod_imp< double >(double a, double b)
Definition: IROperator.h:291
Expr widen_right_mul(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1708
Expr rounding_shift_right(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1763
Expr make_bool(bool val, int lanes=1)
Construct a boolean constant from a C++ boolean value.
Expr mul_shift_right(const Expr &a, const Expr &b, const Expr &q, T *=nullptr)
Definition: IROperator.h:1798
HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector< Expr > &args)
Definition: IROperator.h:335
void match_types_bitwise(Expr &a, Expr &b, const char *op_name)
Asserts that both expressions are integer types and are either both signed or both unsigned.
Expr halving_add(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1783
float mod_imp< float >(float a, float b)
Definition: IROperator.h:285
Expr lossless_cast(Type t, Expr e)
Attempt to cast an expression to a smaller type while provably not losing information.
const uint64_t * as_const_uint(const Expr &e)
If an expression is a UIntImm or a Broadcast of a UIntImm, return a pointer to its value.
Expr widening_mul(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1723
Expr raise_to_integer_power(Expr a, int64_t b)
Raise an expression to an integer power by repeatedly multiplying it by itself.
Expr make_signed_integer_overflow(Type type)
Construct a unique signed_integer_overflow Expr.
bool is_const(const Expr &e)
Is the expression either an IntImm, a FloatImm, a StringImm, or a Cast of the same,...
Expr widen_right_add(const Expr &a, const Expr &b, T *=nullptr)
Definition: IROperator.h:1703
Expr remove_likelies(const Expr &e)
Return an Expr that is identical to the input Expr, but with all calls to likely() and likely_if_inne...
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
auto operator>=(const Other &a, const GeneratorParam< T > &b) -> decltype(a >=(T) b)
Greater than or equal comparison between GeneratorParam<T> and any type that supports operator>= with...
Definition: Generator.h:1105
Expr log(Expr x)
Return the logarithm of a floating-point expression.
Expr operator>>(Expr x, Expr y)
Shift the bits of an integer value right.
Expr ceil(Expr x)
Return the least whole number greater than or equal to a floating-point expression.
Expr widen_right_add(Expr a, Expr b)
Below is a collection of intrinsics for fixed-point programming.
Expr rounding_shift_right(Expr a, Expr b)
Compute saturating_narrow(widening_add(a, (1 << max(b, 0)) / 2) >> b).
HALIDE_NO_USER_CODE_INLINE Expr memoize_tag(Expr result, Args &&...args)
Control the values used in the memoization cache key for memoize.
Definition: IROperator.h:1396
Expr fast_log(const Expr &x)
Fast approximate cleanly vectorizable log for Float(32).
Expr count_leading_zeros(Expr x)
Count the number of leading zero bits in an expression.
Expr reinterpret(Type t, Expr e)
Reinterpret the bits of one value as another type.
Expr saturating_add(Expr a, Expr b)
Compute saturating_narrow(widen(a) + widen(b))
auto operator==(const Other &a, const GeneratorParam< T > &b) -> decltype(a==(T) b)
Equality comparison between GeneratorParam<T> and any type that supports operator== with T.
Definition: Generator.h:1131
Expr fast_cos(const Expr &x)
Expr random_uint(Expr seed=Expr())
Return a random variable representing a uniformly distributed unsigned 32-bit integer.
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr fract(const Expr &x)
Return the fractional part of a floating-point expression.
Expr halving_add(Expr a, Expr b)
Compute narrow((widen(a) + widen(b)) / 2)
auto operator<(const Other &a, const GeneratorParam< T > &b) -> decltype(a<(T) b)
Less than comparison between GeneratorParam<T> and any type that supports operator< with T.
Definition: Generator.h:1092
Expr widening_shift_right(Expr a, Expr b)
Compute widen(a) >> b.
auto operator*(const Other &a, const GeneratorParam< T > &b) -> decltype(a *(T) b)
Multiplication between GeneratorParam<T> and any type that supports operator* with T.
Definition: Generator.h:1040
Expr trunc(Expr x)
Return the integer part of a floating-point expression.
Expr halving_sub(Expr a, Expr b)
Compute narrow((widen(a) - widen(b)) / 2)
auto operator||(const Other &a, const GeneratorParam< T > &b) -> decltype(a||(T) b)
Logical or between between GeneratorParam<T> and any type that supports operator|| with T.
Definition: Generator.h:1174
Expr acosh(Expr x)
Return the hyperbolic arccosine of a floating-point expression.
Expr fast_inverse(Expr x)
Fast approximate inverse for Float(32).
Expr asin(Expr x)
Return the arcsine of a floating-point expression.
Expr rounding_shift_left(Expr a, Expr b)
Compute saturating_narrow(widening_add(a, (1 >> min(b, 0)) / 2) << b).
Expr & operator/=(Expr &a, Expr b)
Modify the first expression to be the ratio of two expressions, without changing its type.
auto operator-(const Other &a, const GeneratorParam< T > &b) -> decltype(a -(T) b)
Subtraction between GeneratorParam<T> and any type that supports operator- with T.
Definition: Generator.h:1027
Expr clamp(Expr a, const Expr &min_val, const Expr &max_val)
Clamps an expression to lie within the given bounds.
Expr hypot(const Expr &x, const Expr &y)
Return the square root of the sum of the squares of two floating-point expressions.
Expr popcount(Expr x)
Count the number of set bits in an expression.
Expr saturating_sub(Expr a, Expr b)
Compute saturating_narrow(widen(a) - widen(b))
Expr gather(const std::vector< Expr > &args)
Expr print_when(Expr condition, const std::vector< Expr > &values)
Create an Expr that prints whenever it is evaluated, provided that the condition is true.
Expr widening_shift_left(Expr a, Expr b)
Compute widen(a) << b.
Expr pow(Expr x, Expr y)
Return one floating point expression raised to the power of another.
Expr operator&(Expr x, Expr y)
Return the bitwise and of two expressions (which need not have the same type).
Expr cast(Expr a)
Cast an expression to the halide type corresponding to the C++ type T.
Definition: IROperator.h:364
auto operator!(const GeneratorParam< T > &a) -> decltype(!(T) a)
Not operator for GeneratorParam.
Definition: Generator.h:1246
Expr lerp(Expr zero_val, Expr one_val, Expr weight)
Linear interpolate between the two values according to a weight.
Expr atan2(Expr y, Expr x)
Return the angle of a floating-point gradient.
Expr saturating_cast(Expr e)
Cast an expression to the halide type corresponding to the C++ type T.
Definition: IROperator.h:1425
Expr random_float(Expr seed=Expr())
Return a random variable representing a uniformly distributed float in the half-open interval [0....
Expr sin(Expr x)
Return the sine of a floating-point expression.
Expr unsafe_promise_clamped(const Expr &value, const Expr &min, const Expr &max)
Create an Expr that that promises another Expr is clamped but do not generate code to check the asser...
Expr rounding_halving_add(Expr a, Expr b)
Compute narrow((widen(a) + widen(b) + 1) / 2)
Expr extract_bits(Type t, const Expr &e, const Expr &lsb)
Extract a contiguous subsequence of the bits of 'e', starting at the bit index given by 'lsb',...
Expr concat_bits(const std::vector< Expr > &e)
Given a number of Exprs of the same type, concatenate their bits producing a single Expr of the same ...
Expr mux(const Expr &id, const std::initializer_list< Expr > &values)
Oftentimes we want to pack a list of expressions with the same type into a channel dimension,...
Expr cosh(Expr x)
Return the hyperbolic cosine of a floating-point expression.
std::ostream & operator<<(std::ostream &stream, const Expr &)
Emit an expression on an output stream (such as std::cout) in human-readable form.
Type Int(int bits, int lanes=1)
Constructing a signed integer type.
Definition: Type.h:530
Expr acos(Expr x)
Return the arccosine of a floating-point expression.
Expr fast_exp(const Expr &x)
Fast approximate cleanly vectorizable exp for Float(32).
Expr widening_add(Expr a, Expr b)
Compute widen(a) + widen(b).
Expr cos(Expr x)
Return the cosine of a floating-point expression.
auto operator+(const Other &a, const GeneratorParam< T > &b) -> decltype(a+(T) b)
Addition between GeneratorParam<T> and any type that supports operator+ with T.
Definition: Generator.h:1014
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition: Func.h:603
Expr exp(Expr x)
Return the exponential of a floating-point expression.
Expr widen_right_mul(Expr a, Expr b)
Compute a * widen(b).
Expr absd(Expr a, Expr b)
Return the absolute difference between two values.
auto operator&&(const Other &a, const GeneratorParam< T > &b) -> decltype(a &&(T) b)
Logical and between between GeneratorParam<T> and any type that supports operator&& with T.
Definition: Generator.h:1157
Tuple tuple_select(const Tuple &condition, const Tuple &true_value, const Tuple &false_value)
Equivalent of ternary select(), but taking/returning tuples.
Expr fast_sin(const Expr &x)
Fast vectorizable approximation to some trigonometric functions for Float(32).
Expr undef(Type t)
Return an undef value of the given type.
Expr & operator-=(Expr &a, Expr b)
Modify the first expression to be the difference of two expressions, without changing its type.
Expr fast_pow(Expr x, Expr y)
Fast approximate cleanly vectorizable pow for Float(32).
auto operator%(const Other &a, const GeneratorParam< T > &b) -> decltype(a %(T) b)
Modulo between GeneratorParam<T> and any type that supports operator% with T.
Definition: Generator.h:1066
Expr round(Expr x)
Return the whole number closest to a floating-point expression.
Expr select(Expr condition, Expr true_value, Expr false_value)
Returns an expression similar to the ternary operator in C, except that it always evaluates all argum...
Expr count_trailing_zeros(Expr x)
Count the number of trailing zero bits in an expression.
Expr scatter(const std::vector< Expr > &args)
Scatter and gather are used for update definition which must store multiple values to distinct locati...
auto operator<=(const Other &a, const GeneratorParam< T > &b) -> decltype(a<=(T) b)
Less than or equal comparison between GeneratorParam<T> and any type that supports operator<= with T.
Definition: Generator.h:1118
Expr rounding_mul_shift_right(Expr a, Expr b, Expr q)
Compute saturating_narrow(rounding_shift_right(widening_mul(a, b), q))
Expr random_int(Expr seed=Expr())
Return a random variable representing a uniformly distributed 32-bit integer.
Expr mod_round_to_zero(Expr x, Expr y)
Compute the remainder of dividing two integers, when division is rounding toward zero.
Expr strict_float(Expr e)
Makes a best effort attempt to preserve IEEE floating-point semantics in evaluating an expression.
Expr widening_mul(Expr a, Expr b)
Compute widen(a) * widen(b).
auto operator>(const Other &a, const GeneratorParam< T > &b) -> decltype(a >(T) b)
Greater than comparison between GeneratorParam<T> and any type that supports operator> with T.
Definition: Generator.h:1079
Expr is_nan(Expr x)
Returns true if the argument is a Not a Number (NaN).
Expr asinh(Expr x)
Return the hyperbolic arcsinhe of a floating-point expression.
Expr sqrt(Expr x)
Return the square root of a floating-point expression.
Expr sinh(Expr x)
Return the hyperbolic sine of a floating-point expression.
Expr atan(Expr x)
Return the arctangent of a floating-point expression.
Expr operator|(Expr x, Expr y)
Return the bitwise or of two expressions (which need not have the same type).
auto operator!=(const Other &a, const GeneratorParam< T > &b) -> decltype(a !=(T) b)
Inequality comparison between between GeneratorParam<T> and any type that supports operator!...
Definition: Generator.h:1144
Expr & operator*=(Expr &a, Expr b)
Modify the first expression to be the product of two expressions, without changing its type.
Expr require(Expr condition, const std::vector< Expr > &values)
Create an Expr that that guarantees a precondition.
Expr is_inf(Expr x)
Returns true if the argument is Inf or -Inf.
Expr is_finite(Expr x)
Returns true if the argument is a finite value (ie, neither NaN nor Inf).
Expr tanh(Expr x)
Return the hyperbolic tangent of a floating-point expression.
Expr likely_if_innermost(Expr e)
Equivalent to likely, but only triggers a loop partitioning if found in an innermost loop.
Expr & operator+=(Expr &a, Expr b)
Modify the first expression to be the sum of two expressions, without changing its type.
Expr atanh(Expr x)
Return the hyperbolic arctangent of a floating-point expression.
Expr tan(Expr x)
Return the tangent of a floating-point expression.
Expr fast_inverse_sqrt(Expr x)
Fast approximate inverse square root for Float(32).
Expr print(const std::vector< Expr > &values)
Create an Expr that prints out its value whenever it is evaluated.
Expr mul_shift_right(Expr a, Expr b, Expr q)
Compute saturating_narrow(shift_right(widening_mul(a, b), q))
auto operator/(const Other &a, const GeneratorParam< T > &b) -> decltype(a/(T) b)
Division between GeneratorParam<T> and any type that supports operator/ with T.
Definition: Generator.h:1053
Expr abs(Expr a)
Returns the absolute value of a signed integer or floating-point expression.
Expr widen_right_sub(Expr a, Expr b)
Compute a - widen(b).
Expr max(const FuncRef &a, const FuncRef &b)
Definition: Func.h:606
Expr floor(Expr x)
Return the greatest whole number less than or equal to a floating-point expression.
Expr div_round_to_zero(Expr x, Expr y)
Divide two integers, rounding towards zero.
Expr widening_sub(Expr a, Expr b)
Compute widen(a) - widen(b).
Expr likely(Expr e)
Expressions tagged with this intrinsic are considered to be part of the steady state of some loop wit...
Expr operator~(Expr x)
Return the bitwise not of an expression.
Expr erf(const Expr &x)
Evaluate the error function erf.
Expr operator^(Expr x, Expr y)
Return the bitwise xor of two expressions (which need not have the same type).
unsigned __INT64_TYPE__ uint64_t
signed __INT64_TYPE__ int64_t
signed __INT32_TYPE__ int32_t
unsigned __INT8_TYPE__ uint8_t
unsigned __INT16_TYPE__ uint16_t
unsigned __INT32_TYPE__ uint32_t
signed __INT16_TYPE__ int16_t
signed __INT8_TYPE__ int8_t
A fragment of Halide syntax.
Definition: Expr.h:258
A builder to help create Exprs representing halide_buffer_t structs (e.g.
Definition: IROperator.h:210
std::vector< Expr > strides
Definition: IROperator.h:215
std::vector< Expr > extents
Definition: IROperator.h:215
std::vector< Expr > mins
Definition: IROperator.h:215
A reference-counted handle to a statement node.
Definition: Expr.h:419
static constexpr bool value
Definition: IROperator.h:327
Types in the halide type system.
Definition: Type.h:276
HALIDE_ALWAYS_INLINE bool is_int() const
Is this type a signed integer type?
Definition: Type.h:428
HALIDE_ALWAYS_INLINE bool is_float() const
Is this type a floating point type (float or double).
Definition: Type.h:416
Class that provides a type that implements half precision floating point (IEEE754 2008 binary16) in s...
Definition: Float16.h:17