Halide 17.0.2
Halide compiler and libraries
Loading...
Searching...
No Matches
IROperator.h
Go to the documentation of this file.
1#ifndef HALIDE_IR_OPERATOR_H
2#define HALIDE_IR_OPERATOR_H
3
4/** \file
5 *
6 * Defines various operator overloads and utility functions that make
7 * it more pleasant to work with Halide expressions.
8 */
9
10#include <cmath>
11
12#include "Expr.h"
13#include "Tuple.h"
14
15namespace Halide {
16
17namespace Internal {
18/** Is the expression either an IntImm, a FloatImm, a StringImm, or a
19 * Cast of the same, or a Ramp or Broadcast of the same. Doesn't do
20 * any constant folding. */
21bool is_const(const Expr &e);
22
23/** Is the expression an IntImm, FloatImm of a particular value, or a
24 * Cast, or Broadcast of the same. */
25bool is_const(const Expr &e, int64_t v);
26
27/** If an expression is an IntImm or a Broadcast of an IntImm, return
28 * a pointer to its value. Otherwise returns nullptr. */
29const int64_t *as_const_int(const Expr &e);
30
31/** If an expression is a UIntImm or a Broadcast of a UIntImm, return
32 * a pointer to its value. Otherwise returns nullptr. */
33const uint64_t *as_const_uint(const Expr &e);
34
35/** If an expression is a FloatImm or a Broadcast of a FloatImm,
36 * return a pointer to its value. Otherwise returns nullptr. */
37const double *as_const_float(const Expr &e);
38
39/** Is the expression a constant integer power of two. Also returns
40 * log base two of the expression if it is. Only returns true for
41 * integer types. */
42bool is_const_power_of_two_integer(const Expr &e, int *bits);
43
44/** Is the expression a const (as defined by is_const), and also
45 * strictly greater than zero (in all lanes, if a vector expression) */
46bool is_positive_const(const Expr &e);
47
48/** Is the expression a const (as defined by is_const), and also
49 * strictly less than zero (in all lanes, if a vector expression) */
50bool is_negative_const(const Expr &e);
51
52/** Is the expression an undef */
53bool is_undef(const Expr &e);
54
55/** Is the expression a const (as defined by is_const), and also equal
56 * to zero (in all lanes, if a vector expression) */
57bool is_const_zero(const Expr &e);
58
59/** Is the expression a const (as defined by is_const), and also equal
60 * to one (in all lanes, if a vector expression) */
61bool is_const_one(const Expr &e);
62
63/** Is the statement a no-op (which we represent as either an
64 * undefined Stmt, or as an Evaluate node of a constant) */
65bool is_no_op(const Stmt &s);
66
67/** Does the expression
68 * 1) Take on the same value no matter where it appears in a Stmt, and
69 * 2) Evaluating it has no side-effects
70 */
71bool is_pure(const Expr &e);
72
73/** Construct an immediate of the given type from any numeric C++ type. */
74// @{
77Expr make_const(Type t, double val);
78inline Expr make_const(Type t, int32_t val) {
79 return make_const(t, (int64_t)val);
80}
81inline Expr make_const(Type t, uint32_t val) {
82 return make_const(t, (uint64_t)val);
83}
84inline Expr make_const(Type t, int16_t val) {
85 return make_const(t, (int64_t)val);
86}
87inline Expr make_const(Type t, uint16_t val) {
88 return make_const(t, (uint64_t)val);
89}
90inline Expr make_const(Type t, int8_t val) {
91 return make_const(t, (int64_t)val);
92}
93inline Expr make_const(Type t, uint8_t val) {
94 return make_const(t, (uint64_t)val);
95}
96inline Expr make_const(Type t, bool val) {
97 return make_const(t, (uint64_t)val);
98}
99inline Expr make_const(Type t, float val) {
100 return make_const(t, (double)val);
101}
103 return make_const(t, (double)val);
104}
105// @}
106
107/** Construct a unique signed_integer_overflow Expr */
109
110/** Check if an expression is a signed_integer_overflow */
112
113/** Check if a constant value can be correctly represented as the given type. */
115
116/** Construct a boolean constant from a C++ boolean value.
117 * May also be a vector if width is given.
118 * It is not possible to coerce a C++ boolean to Expr because
119 * if we provide such a path then char objects can ambiguously
120 * be converted to Halide Expr or to std::string. The problem
121 * is that C++ does not have a real bool type - it is in fact
122 * close enough to char that C++ does not know how to distinguish them.
123 * make_bool is the explicit coercion. */
124Expr make_bool(bool val, int lanes = 1);
125
126/** Construct the representation of zero in the given type */
128
129/** Construct the representation of one in the given type */
131
132/** Construct the representation of two in the given type */
134
135/** Construct the constant boolean true. May also be a vector of
136 * trues, if a lanes argument is given. */
137Expr const_true(int lanes = 1);
138
139/** Construct the constant boolean false. May also be a vector of
140 * falses, if a lanes argument is given. */
141Expr const_false(int lanes = 1);
142
143/** Attempt to cast an expression to a smaller type while provably not
144 * losing information. If it can't be done, return an undefined
145 * Expr. */
147
148/** Attempt to negate x without introducing new IR and without overflow.
149 * If it can't be done, return an undefined Expr. */
151
152/** Coerce the two expressions to have the same type, using C-style
153 * casting rules. For the purposes of casting, a boolean type is
154 * UInt(1). We use the following procedure:
155 *
156 * If the types already match, do nothing.
157 *
158 * Then, if one type is a vector and the other is a scalar, the scalar
159 * is broadcast to match the vector width, and we continue.
160 *
161 * Then, if one type is floating-point and the other is not, the
162 * non-float is cast to the floating-point type, and we're done.
163 *
164 * Then, if both types are unsigned ints, the one with fewer bits is
165 * cast to match the one with more bits and we're done.
166 *
167 * Then, if both types are signed ints, the one with fewer bits is
168 * cast to match the one with more bits and we're done.
169 *
170 * Finally, if one type is an unsigned int and the other type is a signed
171 * int, both are cast to a signed int with the greater of the two
172 * bit-widths. For example, matching an Int(8) with a UInt(16) results
173 * in an Int(16).
174 *
175 */
176void match_types(Expr &a, Expr &b);
177
178/** Asserts that both expressions are integer types and are either
179 * both signed or both unsigned. If one argument is scalar and the
180 * other a vector, the scalar is broadcasted to have the same number
181 * of lanes as the vector. If one expression is of narrower type than
182 * the other, it is widened to the bit width of the wider. */
183void match_types_bitwise(Expr &a, Expr &b, const char *op_name);
184
185/** Halide's vectorizable transcendentals. */
186// @{
190// @}
191
192/** Raise an expression to an integer power by repeatedly multiplying
193 * it by itself. */
195
196/** Split a boolean condition into vector of ANDs. If 'cond' is undefined,
197 * return an empty vector. */
198void split_into_ands(const Expr &cond, std::vector<Expr> &result);
199
200/** A builder to help create Exprs representing halide_buffer_t
201 * structs (e.g. foo.buffer) via calls to halide_buffer_init. Fill out
202 * the fields and then call build. The resulting Expr will be a call
203 * to halide_buffer_init with the struct members as arguments. If the
204 * buffer_memory field is undefined, it uses a call to alloca to make
205 * some stack memory for the buffer. If the shape_memory field is
206 * undefined, it similarly uses stack memory for the shape. If the
207 * shape_memory field is null, it uses the dim field already in the
208 * buffer. Other unitialized fields will take on a value of zero in
209 * the constructed buffer. */
219
220/** If e is a ramp expression with stride, default 1, return the base,
221 * otherwise undefined. */
222Expr strided_ramp_base(const Expr &e, int stride = 1);
223
224/** Implementations of division and mod that are specific to Halide.
225 * Use these implementations; do not use native C division or mod to
226 * simplify Halide expressions. Halide division and modulo satisify
227 * the Euclidean definition of division for integers a and b:
228 *
229 /code
230 when b != 0, (a/b)*b + a%b = a
231 0 <= a%b < |b|
232 /endcode
233 *
234 * Additionally, mod by zero returns zero, and div by zero returns
235 * zero. This makes mod and div total functions.
236 */
237// @{
238template<typename T>
239inline T mod_imp(T a, T b) {
240 Type t = type_of<T>();
241 if (!t.is_float() && b == 0) {
242 return 0;
243 } else if (t.is_int()) {
244 int64_t ia = a;
245 int64_t ib = b;
246 int64_t a_neg = ia >> 63;
247 int64_t b_neg = ib >> 63;
248 int64_t b_zero = (ib == 0) ? -1 : 0;
249 ia -= a_neg;
250 int64_t r = ia % (ib | b_zero);
251 r += (a_neg & ((ib ^ b_neg) + ~b_neg));
252 r &= ~b_zero;
253 return r;
254 } else {
255 return a % b;
256 }
257}
258
259template<typename T>
260inline T div_imp(T a, T b) {
261 Type t = type_of<T>();
262 if (!t.is_float() && b == 0) {
263 return (T)0;
264 } else if (t.is_int()) {
265 // Do it as 64-bit
266 int64_t ia = a;
267 int64_t ib = b;
268 int64_t a_neg = ia >> 63;
269 int64_t b_neg = ib >> 63;
270 int64_t b_zero = (ib == 0) ? -1 : 0;
271 ib -= b_zero;
272 ia -= a_neg;
273 int64_t q = ia / ib;
274 q += a_neg & (~b_neg - b_neg);
275 q &= ~b_zero;
276 return (T)q;
277 } else {
278 return a / b;
279 }
280}
281// @}
282
283// Special cases for float, double.
284template<>
285inline float mod_imp<float>(float a, float b) {
286 float f = a - b * (floorf(a / b));
287 // The remainder has the same sign as b.
288 return f;
289}
290template<>
291inline double mod_imp<double>(double a, double b) {
292 double f = a - b * (std::floor(a / b));
293 return f;
294}
295
296template<>
297inline float div_imp<float>(float a, float b) {
298 return a / b;
299}
300template<>
301inline double div_imp<double>(double a, double b) {
302 return a / b;
303}
304
305/** Return an Expr that is identical to the input Expr, but with
306 * all calls to likely() and likely_if_innermost() removed. */
308
309/** Return a Stmt that is identical to the input Stmt, but with
310 * all calls to likely() and likely_if_innermost() removed. */
312
313/** Return an Expr that is identical to the input Expr, but with
314 * all calls to promise_clamped() and unsafe_promise_clamped() removed. */
316
317/** Return a Stmt that is identical to the input Stmt, but with
318 * all calls to promise_clamped() and unsafe_promise_clamped() removed. */
320
321/** If the expression is a tag helper call, remove it and return
322 * the tagged expression. If not, returns the expression. */
324
325template<typename T>
327 static constexpr bool value = std::is_convertible<T, const char *>::value ||
328 std::is_convertible<T, Halide::Expr>::value;
329};
330
331template<typename... Args>
332struct all_are_printable_args : meta_and<is_printable_arg<Args>...> {};
333
334// Secondary args to print can be Exprs or const char *
335inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args) {
336}
337
338template<typename... Args>
339inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args, const char *arg, Args &&...more_args) {
340 args.emplace_back(std::string(arg));
341 collect_print_args(args, std::forward<Args>(more_args)...);
342}
343
344template<typename... Args>
345inline HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector<Expr> &args, Expr arg, Args &&...more_args) {
346 args.push_back(std::move(arg));
347 collect_print_args(args, std::forward<Args>(more_args)...);
348}
349
350Expr requirement_failed_error(Expr condition, const std::vector<Expr> &args);
351
352Expr memoize_tag_helper(Expr result, const std::vector<Expr> &cache_key_values);
353
354/** Reset the counters used for random-number seeds in random_float/int/uint.
355 * (Note that the counters are incremented for each call, even if a seed is passed in.)
356 * This is used for multitarget compilation to ensure that each subtarget gets
357 * the same sequence of random numbers. */
359
360} // namespace Internal
361
362/** Cast an expression to the halide type corresponding to the C++ type T. */
363template<typename T>
364inline Expr cast(Expr a) {
365 return cast(type_of<T>(), std::move(a));
366}
367
368/** Cast an expression to a new type. */
370
371/** Return the sum of two expressions, doing any necessary type
372 * coercion using \ref Internal::match_types */
374
375/** Add an expression and a constant integer. Coerces the type of the
376 * integer to match the type of the expression. Errors if the integer
377 * cannot be represented in the type of the expression. */
378// @{
380
381/** Add a constant integer and an expression. Coerces the type of the
382 * integer to match the type of the expression. Errors if the integer
383 * cannot be represented in the type of the expression. */
385
386/** Modify the first expression to be the sum of two expressions,
387 * without changing its type. This casts the second argument to match
388 * the type of the first. */
390
391/** Return the difference of two expressions, doing any necessary type
392 * coercion using \ref Internal::match_types */
394
395/** Subtracts a constant integer from an expression. Coerces the type of the
396 * integer to match the type of the expression. Errors if the integer
397 * cannot be represented in the type of the expression. */
399
400/** Subtracts an expression from a constant integer. Coerces the type
401 * of the integer to match the type of the expression. Errors if the
402 * integer cannot be represented in the type of the expression. */
404
405/** Return the negative of the argument. Does no type casting, so more
406 * formally: return that number which when added to the original,
407 * yields zero of the same type. For unsigned integers the negative is
408 * still an unsigned integer. E.g. in UInt(8), the negative of 56 is
409 * 200, because 56 + 200 == 0 */
411
412/** Modify the first expression to be the difference of two expressions,
413 * without changing its type. This casts the second argument to match
414 * the type of the first. */
416
417/** Return the product of two expressions, doing any necessary type
418 * coercion using \ref Internal::match_types */
420
421/** Multiply an expression and a constant integer. Coerces the type of the
422 * integer to match the type of the expression. Errors if the integer
423 * cannot be represented in the type of the expression. */
425
426/** Multiply a constant integer and an expression. Coerces the type of
427 * the integer to match the type of the expression. Errors if the
428 * integer cannot be represented in the type of the expression. */
430
431/** Modify the first expression to be the product of two expressions,
432 * without changing its type. This casts the second argument to match
433 * the type of the first. */
435
436/** Return the ratio of two expressions, doing any necessary type
437 * coercion using \ref Internal::match_types. Note that integer
438 * division in Halide is not the same as integer division in C-like
439 * languages in two ways.
440 *
441 * First, signed integer division in Halide rounds according to the
442 * sign of the denominator. This means towards minus infinity for
443 * positive denominators, and towards positive infinity for negative
444 * denominators. This is unlike C, which rounds towards zero. This
445 * decision ensures that upsampling expressions like f(x/2, y/2) don't
446 * have funny discontinuities when x and y cross zero.
447 *
448 * Second, division by zero returns zero instead of faulting. For
449 * types where overflow is defined behavior, division of the largest
450 * negative signed integer by -1 returns the larged negative signed
451 * integer for the type (i.e. it wraps). This ensures that a division
452 * operation can never have a side-effect, which is helpful in Halide
453 * because scheduling directives can expand the domain of computation
454 * of a Func, potentially introducing new zero-division.
455 */
457
458/** Modify the first expression to be the ratio of two expressions,
459 * without changing its type. This casts the second argument to match
460 * the type of the first. Note that signed integer division in Halide
461 * rounds towards minus infinity, unlike C, which rounds towards
462 * zero. */
464
465/** Divides an expression by a constant integer. Coerces the type
466 * of the integer to match the type of the expression. Errors if the
467 * integer cannot be represented in the type of the expression. */
469
470/** Divides a constant integer by an expression. Coerces the type
471 * of the integer to match the type of the expression. Errors if the
472 * integer cannot be represented in the type of the expression. */
474
475/** Return the first argument reduced modulo the second, doing any
476 * necessary type coercion using \ref Internal::match_types. There are
477 * two key differences between C-like languages and Halide for the
478 * modulo operation, which complement the way division works.
479 *
480 * First, the result is never negative, so x % 2 is always zero or
481 * one, unlike in C-like languages. x % -2 is equivalent, and is also
482 * always zero or one. Second, mod by zero evaluates to zero (unlike
483 * in C, where it faults). This makes modulo, like division, a
484 * side-effect-free operation. */
486
487/** Mods an expression by a constant integer. Coerces the type
488 * of the integer to match the type of the expression. Errors if the
489 * integer cannot be represented in the type of the expression. */
491
492/** Mods a constant integer by an expression. Coerces the type
493 * of the integer to match the type of the expression. Errors if the
494 * integer cannot be represented in the type of the expression. */
496
497/** Return a boolean expression that tests whether the first argument
498 * is greater than the second, after doing any necessary type coercion
499 * using \ref Internal::match_types */
501
502/** Return a boolean expression that tests whether an expression is
503 * greater than a constant integer. Coerces the integer to the type of
504 * the expression. Errors if the integer is not representable in that
505 * type. */
507
508/** Return a boolean expression that tests whether a constant integer is
509 * greater than an expression. Coerces the integer to the type of
510 * the expression. Errors if the integer is not representable in that
511 * type. */
513
514/** Return a boolean expression that tests whether the first argument
515 * is less than the second, after doing any necessary type coercion
516 * using \ref Internal::match_types */
518
519/** Return a boolean expression that tests whether an expression is
520 * less than a constant integer. Coerces the integer to the type of
521 * the expression. Errors if the integer is not representable in that
522 * type. */
524
525/** Return a boolean expression that tests whether a constant integer is
526 * less than an expression. Coerces the integer to the type of
527 * the expression. Errors if the integer is not representable in that
528 * type. */
530
531/** Return a boolean expression that tests whether the first argument
532 * is less than or equal to the second, after doing any necessary type
533 * coercion using \ref Internal::match_types */
535
536/** Return a boolean expression that tests whether an expression is
537 * less than or equal to a constant integer. Coerces the integer to
538 * the type of the expression. Errors if the integer is not
539 * representable in that type. */
541
542/** Return a boolean expression that tests whether a constant integer
543 * is less than or equal to an expression. Coerces the integer to the
544 * type of the expression. Errors if the integer is not representable
545 * in that type. */
547
548/** Return a boolean expression that tests whether the first argument
549 * is greater than or equal to the second, after doing any necessary
550 * type coercion using \ref Internal::match_types */
552
553/** Return a boolean expression that tests whether an expression is
554 * greater than or equal to a constant integer. Coerces the integer to
555 * the type of the expression. Errors if the integer is not
556 * representable in that type. */
557Expr operator>=(const Expr &a, int b);
558
559/** Return a boolean expression that tests whether a constant integer
560 * is greater than or equal to an expression. Coerces the integer to the
561 * type of the expression. Errors if the integer is not representable
562 * in that type. */
563Expr operator>=(int a, const Expr &b);
564
565/** Return a boolean expression that tests whether the first argument
566 * is equal to the second, after doing any necessary type coercion
567 * using \ref Internal::match_types */
569
570/** Return a boolean expression that tests whether an expression is
571 * equal to a constant integer. Coerces the integer to the type of the
572 * expression. Errors if the integer is not representable in that
573 * type. */
575
576/** Return a boolean expression that tests whether a constant integer
577 * is equal to an expression. Coerces the integer to the type of the
578 * expression. Errors if the integer is not representable in that
579 * type. */
581
582/** Return a boolean expression that tests whether the first argument
583 * is not equal to the second, after doing any necessary type coercion
584 * using \ref Internal::match_types */
586
587/** Return a boolean expression that tests whether an expression is
588 * not equal to a constant integer. Coerces the integer to the type of
589 * the expression. Errors if the integer is not representable in that
590 * type. */
592
593/** Return a boolean expression that tests whether a constant integer
594 * is not equal to an expression. Coerces the integer to the type of
595 * the expression. Errors if the integer is not representable in that
596 * type. */
598
599/** Returns the logical and of the two arguments */
601
602/** Logical and of an Expr and a bool. Either returns the Expr or an
603 * Expr representing false, depending on the bool. */
604// @{
607// @}
608
609/** Returns the logical or of the two arguments */
611
612/** Logical or of an Expr and a bool. Either returns the Expr or an
613 * Expr representing true, depending on the bool. */
614// @{
617// @}
618
619/** Returns the logical not the argument */
621
622/** Returns an expression representing the greater of the two
623 * arguments, after doing any necessary type coercion using
624 * \ref Internal::match_types. Vectorizes cleanly on most platforms
625 * (with the exception of integer types on x86 without SSE4). */
627
628/** Returns an expression representing the greater of an expression
629 * and a constant integer. The integer is coerced to the type of the
630 * expression. Errors if the integer is not representable as that
631 * type. Vectorizes cleanly on most platforms (with the exception of
632 * integer types on x86 without SSE4). */
633Expr max(Expr a, int b);
634
635/** Returns an expression representing the greater of a constant
636 * integer and an expression. The integer is coerced to the type of
637 * the expression. Errors if the integer is not representable as that
638 * type. Vectorizes cleanly on most platforms (with the exception of
639 * integer types on x86 without SSE4). */
640Expr max(int a, Expr b);
641
642inline Expr max(float a, Expr b) {
643 return max(Expr(a), std::move(b));
644}
645inline Expr max(Expr a, float b) {
646 return max(std::move(a), Expr(b));
647}
648
649/** Returns an expression representing the greater of an expressions
650 * vector, after doing any necessary type coersion using
651 * \ref Internal::match_types. Vectorizes cleanly on most platforms
652 * (with the exception of integer types on x86 without SSE4).
653 * The expressions are folded from right ie. max(.., max(.., ..)).
654 * The arguments can be any mix of types but must all be convertible to Expr. */
655template<typename A, typename B, typename C, typename... Rest,
656 typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Rest...>::value>::type * = nullptr>
657inline Expr max(A &&a, B &&b, C &&c, Rest &&...rest) {
658 return max(std::forward<A>(a), max(std::forward<B>(b), std::forward<C>(c), std::forward<Rest>(rest)...));
659}
660
662
663/** Returns an expression representing the lesser of an expression
664 * and a constant integer. The integer is coerced to the type of the
665 * expression. Errors if the integer is not representable as that
666 * type. Vectorizes cleanly on most platforms (with the exception of
667 * integer types on x86 without SSE4). */
668Expr min(Expr a, int b);
669
670/** Returns an expression representing the lesser of a constant
671 * integer and an expression. The integer is coerced to the type of
672 * the expression. Errors if the integer is not representable as that
673 * type. Vectorizes cleanly on most platforms (with the exception of
674 * integer types on x86 without SSE4). */
675Expr min(int a, Expr b);
676
677inline Expr min(float a, Expr b) {
678 return min(Expr(a), std::move(b));
679}
680inline Expr min(Expr a, float b) {
681 return min(std::move(a), Expr(b));
682}
683
684/** Returns an expression representing the lesser of an expressions
685 * vector, after doing any necessary type coersion using
686 * \ref Internal::match_types. Vectorizes cleanly on most platforms
687 * (with the exception of integer types on x86 without SSE4).
688 * The expressions are folded from right ie. min(.., min(.., ..)).
689 * The arguments can be any mix of types but must all be convertible to Expr. */
690template<typename A, typename B, typename C, typename... Rest,
691 typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Rest...>::value>::type * = nullptr>
692inline Expr min(A &&a, B &&b, C &&c, Rest &&...rest) {
693 return min(std::forward<A>(a), min(std::forward<B>(b), std::forward<C>(c), std::forward<Rest>(rest)...));
694}
695
696/** Operators on floats treats those floats as Exprs. Making these
697 * explicit prevents implicit float->int casts that might otherwise
698 * occur. */
699// @{
700inline Expr operator+(Expr a, float b) {
701 return std::move(a) + Expr(b);
702}
703inline Expr operator+(float a, Expr b) {
704 return Expr(a) + std::move(b);
705}
706inline Expr operator-(Expr a, float b) {
707 return std::move(a) - Expr(b);
708}
709inline Expr operator-(float a, Expr b) {
710 return Expr(a) - std::move(b);
711}
712inline Expr operator*(Expr a, float b) {
713 return std::move(a) * Expr(b);
714}
715inline Expr operator*(float a, Expr b) {
716 return Expr(a) * std::move(b);
717}
718inline Expr operator/(Expr a, float b) {
719 return std::move(a) / Expr(b);
720}
721inline Expr operator/(float a, Expr b) {
722 return Expr(a) / std::move(b);
723}
724inline Expr operator%(Expr a, float b) {
725 return std::move(a) % Expr(b);
726}
727inline Expr operator%(float a, Expr b) {
728 return Expr(a) % std::move(b);
729}
730inline Expr operator>(Expr a, float b) {
731 return std::move(a) > Expr(b);
732}
733inline Expr operator>(float a, Expr b) {
734 return Expr(a) > std::move(b);
735}
736inline Expr operator<(Expr a, float b) {
737 return std::move(a) < Expr(b);
738}
739inline Expr operator<(float a, Expr b) {
740 return Expr(a) < std::move(b);
741}
742inline Expr operator>=(Expr a, float b) {
743 return std::move(a) >= Expr(b);
744}
745inline Expr operator>=(float a, Expr b) {
746 return Expr(a) >= std::move(b);
747}
748inline Expr operator<=(Expr a, float b) {
749 return std::move(a) <= Expr(b);
750}
751inline Expr operator<=(float a, Expr b) {
752 return Expr(a) <= std::move(b);
753}
754inline Expr operator==(Expr a, float b) {
755 return std::move(a) == Expr(b);
756}
757inline Expr operator==(float a, Expr b) {
758 return Expr(a) == std::move(b);
759}
760inline Expr operator!=(Expr a, float b) {
761 return std::move(a) != Expr(b);
762}
763inline Expr operator!=(float a, Expr b) {
764 return Expr(a) != std::move(b);
765}
766// @}
767
768/** Clamps an expression to lie within the given bounds. The bounds
769 * are type-cast to match the expression. Vectorizes as well as min/max. */
770Expr clamp(Expr a, const Expr &min_val, const Expr &max_val);
771
772/** Returns the absolute value of a signed integer or floating-point
773 * expression. Vectorizes cleanly. Unlike in C, abs of a signed
774 * integer returns an unsigned integer of the same bit width. This
775 * means that abs of the most negative integer doesn't overflow. */
777
778/** Return the absolute difference between two values. Vectorizes
779 * cleanly. Returns an unsigned value of the same bit width. There are
780 * various ways to write this yourself, but they contain numerous
781 * gotchas and don't always compile to good code, so use this
782 * instead. */
784
785/** Returns an expression similar to the ternary operator in C, except
786 * that it always evaluates all arguments. If the first argument is
787 * true, then return the second, else return the third. Typically
788 * vectorizes cleanly, but benefits from SSE41 or newer on x86. */
789Expr select(Expr condition, Expr true_value, Expr false_value);
790
791/** A multi-way variant of select similar to a switch statement in C,
792 * which can accept multiple conditions and values in pairs. Evaluates
793 * to the first value for which the condition is true. Returns the
794 * final value if all conditions are false. */
795template<typename... Args,
796 typename std::enable_if<Halide::Internal::all_are_convertible<Expr, Args...>::value>::type * = nullptr>
797inline Expr select(Expr c0, Expr v0, Expr c1, Expr v1, Args &&...args) {
798 return select(std::move(c0), std::move(v0), select(std::move(c1), std::move(v1), std::forward<Args>(args)...));
799}
800
801/** Equivalent of ternary select(), but taking/returning tuples. If the condition is
802 * a Tuple, it must match the size of the true and false Tuples. */
803// @{
804HALIDE_ATTRIBUTE_DEPRECATED("tuple_select has been deprecated. Use select instead (which now works for Tuples)")
805Tuple tuple_select(const Tuple &condition, const Tuple &true_value, const Tuple &false_value);
806HALIDE_ATTRIBUTE_DEPRECATED("tuple_select has been deprecated. Use select instead (which now works for Tuples)")
807Tuple tuple_select(const Expr &condition, const Tuple &true_value, const Tuple &false_value);
808Tuple select(const Tuple &condition, const Tuple &true_value, const Tuple &false_value);
809Tuple select(const Expr &condition, const Tuple &true_value, const Tuple &false_value);
810// @}
811
812/** Equivalent of multiway select(), but taking/returning tuples. If the condition is
813 * a Tuple, it must match the size of the true and false Tuples. */
814// @{
815template<typename... Args>
816HALIDE_ATTRIBUTE_DEPRECATED("tuple_select has been deprecated. Use select instead (which now works for Tuples)")
817inline Tuple tuple_select(const Tuple &c0, const Tuple &v0, const Tuple &c1, const Tuple &v1, Args &&...args) {
818 return tuple_select(c0, v0, tuple_select(c1, v1, std::forward<Args>(args)...));
819}
820template<typename... Args>
821HALIDE_ATTRIBUTE_DEPRECATED("tuple_select has been deprecated. Use select instead (which now works for Tuples)")
822inline Tuple tuple_select(const Expr &c0, const Tuple &v0, const Expr &c1, const Tuple &v1, Args &&...args) {
823 return tuple_select(c0, v0, tuple_select(c1, v1, std::forward<Args>(args)...));
824}
825template<typename... Args>
826inline Tuple select(const Tuple &c0, const Tuple &v0, const Tuple &c1, const Tuple &v1, Args &&...args) {
827 return select(c0, v0, select(c1, v1, std::forward<Args>(args)...));
828}
829template<typename... Args>
830inline Tuple select(const Expr &c0, const Tuple &v0, const Expr &c1, const Tuple &v1, Args &&...args) {
831 return select(c0, v0, select(c1, v1, std::forward<Args>(args)...));
832}
833// @}
834
835/** select applied to FuncRefs (e.g. select(x < 100, f(x), g(x))) is assumed to
836 * return an Expr. A runtime error is produced if this is applied to
837 * tuple-valued Funcs. In that case you should explicitly cast the second and
838 * third args to Tuple to remove the ambiguity. */
839// @{
840Expr select(const Expr &condition, const FuncRef &true_value, const FuncRef &false_value);
841template<typename... Args>
842inline Expr select(const Expr &c0, const FuncRef &v0, const Expr &c1, const FuncRef &v1, Args &&...args) {
843 return select(c0, v0, select(c1, v1, std::forward<Args>(args)...));
844}
845// @}
846
847/** Oftentimes we want to pack a list of expressions with the same type
848 * into a channel dimension, e.g.,
849 * img(x, y, c) = select(c == 0, 100, // Red
850 * c == 1, 50, // Green
851 * 25); // Blue
852 * This is tedious when the list is long. The following function
853 * provide convinent syntax that allow one to write:
854 * img(x, y, c) = mux(c, {100, 50, 25});
855 *
856 * As with the select equivalent, if the first argument (the index) is
857 * out of range, the expression evaluates to the last value.
858 */
859// @{
860Expr mux(const Expr &id, const std::initializer_list<Expr> &values);
861Expr mux(const Expr &id, const std::vector<Expr> &values);
862Expr mux(const Expr &id, const Tuple &values);
863Expr mux(const Expr &id, const std::initializer_list<FuncRef> &values);
864Tuple mux(const Expr &id, const std::initializer_list<Tuple> &values);
865Tuple mux(const Expr &id, const std::vector<Tuple> &values);
866// @}
867
868/** Return the sine of a floating-point expression. If the argument is
869 * not floating-point, it is cast to Float(32). Does not vectorize
870 * well. */
872
873/** Return the arcsine of a floating-point expression. If the argument
874 * is not floating-point, it is cast to Float(32). Does not vectorize
875 * well. */
877
878/** Return the cosine of a floating-point expression. If the argument
879 * is not floating-point, it is cast to Float(32). Does not vectorize
880 * well. */
882
883/** Return the arccosine of a floating-point expression. If the
884 * argument is not floating-point, it is cast to Float(32). Does not
885 * vectorize well. */
887
888/** Return the tangent of a floating-point expression. If the argument
889 * is not floating-point, it is cast to Float(32). Does not vectorize
890 * well. */
892
893/** Return the arctangent of a floating-point expression. If the
894 * argument is not floating-point, it is cast to Float(32). Does not
895 * vectorize well. */
897
898/** Return the angle of a floating-point gradient. If the argument is
899 * not floating-point, it is cast to Float(32). Does not vectorize
900 * well. */
902
903/** Return the hyperbolic sine of a floating-point expression. If the
904 * argument is not floating-point, it is cast to Float(32). Does not
905 * vectorize well. */
907
908/** Return the hyperbolic arcsinhe of a floating-point expression. If
909 * the argument is not floating-point, it is cast to Float(32). Does
910 * not vectorize well. */
912
913/** Return the hyperbolic cosine of a floating-point expression. If
914 * the argument is not floating-point, it is cast to Float(32). Does
915 * not vectorize well. */
917
918/** Return the hyperbolic arccosine of a floating-point expression.
919 * If the argument is not floating-point, it is cast to
920 * Float(32). Does not vectorize well. */
922
923/** Return the hyperbolic tangent of a floating-point expression. If
924 * the argument is not floating-point, it is cast to Float(32). Does
925 * not vectorize well. */
927
928/** Return the hyperbolic arctangent of a floating-point expression.
929 * If the argument is not floating-point, it is cast to
930 * Float(32). Does not vectorize well. */
932
933/** Return the square root of a floating-point expression. If the
934 * argument is not floating-point, it is cast to Float(32). Typically
935 * vectorizes cleanly. */
937
938/** Return the square root of the sum of the squares of two
939 * floating-point expressions. If the argument is not floating-point,
940 * it is cast to Float(32). Vectorizes cleanly. */
941Expr hypot(const Expr &x, const Expr &y);
942
943/** Return the exponential of a floating-point expression. If the
944 * argument is not floating-point, it is cast to Float(32). For
945 * Float(64) arguments, this calls the system exp function, and does
946 * not vectorize well. For Float(32) arguments, this function is
947 * vectorizable, does the right thing for extremely small or extremely
948 * large inputs, and is accurate up to the last bit of the
949 * mantissa. Vectorizes cleanly. */
951
952/** Return the logarithm of a floating-point expression. If the
953 * argument is not floating-point, it is cast to Float(32). For
954 * Float(64) arguments, this calls the system log function, and does
955 * not vectorize well. For Float(32) arguments, this function is
956 * vectorizable, does the right thing for inputs <= 0 (returns -inf or
957 * nan), and is accurate up to the last bit of the
958 * mantissa. Vectorizes cleanly. */
960
961/** Return one floating point expression raised to the power of
962 * another. The type of the result is given by the type of the first
963 * argument. If the first argument is not a floating-point type, it is
964 * cast to Float(32). For Float(32), cleanly vectorizable, and
965 * accurate up to the last few bits of the mantissa. Gets worse when
966 * approaching overflow. Vectorizes cleanly. */
968
969/** Evaluate the error function erf. Only available for
970 * Float(32). Accurate up to the last three bits of the
971 * mantissa. Vectorizes cleanly. */
972Expr erf(const Expr &x);
973
974/** Fast vectorizable approximation to some trigonometric functions for Float(32).
975 * Absolute approximation error is less than 1e-5. */
976// @{
979// @}
980
981/** Fast approximate cleanly vectorizable log for Float(32). Returns
982 * nonsense for x <= 0.0f. Accurate up to the last 5 bits of the
983 * mantissa. Vectorizes cleanly. */
985
986/** Fast approximate cleanly vectorizable exp for Float(32). Returns
987 * nonsense for inputs that would overflow or underflow. Typically
988 * accurate up to the last 5 bits of the mantissa. Gets worse when
989 * approaching overflow. Vectorizes cleanly. */
991
992/** Fast approximate cleanly vectorizable pow for Float(32). Returns
993 * nonsense for x < 0.0f. Accurate up to the last 5 bits of the
994 * mantissa for typical exponents. Gets worse when approaching
995 * overflow. Vectorizes cleanly. */
997
998/** Fast approximate inverse for Float(32). Corresponds to the rcpps
999 * instruction on x86, and the vrecpe instruction on ARM. Vectorizes
1000 * cleanly. Note that this can produce slightly different results
1001 * across different implementations of the same architecture (e.g. AMD vs Intel),
1002 * even when strict_float is enabled. */
1004
1005/** Fast approximate inverse square root for Float(32). Corresponds to
1006 * the rsqrtps instruction on x86, and the vrsqrte instruction on
1007 * ARM. Vectorizes cleanly. Note that this can produce slightly different results
1008 * across different implementations of the same architecture (e.g. AMD vs Intel),
1009 * even when strict_float is enabled. */
1011
1012/** Return the greatest whole number less than or equal to a
1013 * floating-point expression. If the argument is not floating-point,
1014 * it is cast to Float(32). The return value is still in floating
1015 * point, despite being a whole number. Vectorizes cleanly. */
1017
1018/** Return the least whole number greater than or equal to a
1019 * floating-point expression. If the argument is not floating-point,
1020 * it is cast to Float(32). The return value is still in floating
1021 * point, despite being a whole number. Vectorizes cleanly. */
1023
1024/** Return the whole number closest to a floating-point expression. If the
1025 * argument is not floating-point, it is cast to Float(32). The return value is
1026 * still in floating point, despite being a whole number. On ties, we round
1027 * towards the nearest even integer. Note that this is not the same as
1028 * std::round in C, which rounds away from zero. On platforms without a native
1029 * instruction for this, it is emulated, and may be more expensive than
1030 * cast<int>(x + 0.5f) or similar. */
1032
1033/** Return the integer part of a floating-point expression. If the argument is
1034 * not floating-point, it is cast to Float(32). The return value is still in
1035 * floating point, despite being a whole number. Vectorizes cleanly. */
1037
1038/** Returns true if the argument is a Not a Number (NaN). Requires a
1039 * floating point argument. Vectorizes cleanly.
1040 * Note that the Expr passed in will be evaluated in strict_float mode,
1041 * regardless of whether strict_float mode is enabled in the current Target. */
1043
1044/** Returns true if the argument is Inf or -Inf. Requires a
1045 * floating point argument. Vectorizes cleanly.
1046 * Note that the Expr passed in will be evaluated in strict_float mode,
1047 * regardless of whether strict_float mode is enabled in the current Target. */
1049
1050/** Returns true if the argument is a finite value (ie, neither NaN nor Inf).
1051 * Requires a floating point argument. Vectorizes cleanly.
1052 * Note that the Expr passed in will be evaluated in strict_float mode,
1053 * regardless of whether strict_float mode is enabled in the current Target. */
1055
1056/** Return the fractional part of a floating-point expression. If the argument
1057 * is not floating-point, it is cast to Float(32). The return value has the
1058 * same sign as the original expression. Vectorizes cleanly. */
1059Expr fract(const Expr &x);
1060
1061/** Reinterpret the bits of one value as another type. */
1063
1064template<typename T>
1066 return reinterpret(type_of<T>(), std::move(e));
1067}
1068
1069/** Return the bitwise and of two expressions (which need not have the
1070 * same type). The result type is the wider of the two expressions.
1071 * Only integral types are allowed and both expressions must be signed
1072 * or both must be unsigned. */
1074
1075/** Return the bitwise and of an expression and an integer. The type
1076 * of the result is the type of the expression argument. */
1077// @{
1080// @}
1081
1082/** Return the bitwise or of two expressions (which need not have the
1083 * same type). The result type is the wider of the two expressions.
1084 * Only integral types are allowed and both expressions must be signed
1085 * or both must be unsigned. */
1087
1088/** Return the bitwise or of an expression and an integer. The type of
1089 * the result is the type of the expression argument. */
1090// @{
1093// @}
1094
1095/** Return the bitwise xor of two expressions (which need not have the
1096 * same type). The result type is the wider of the two expressions.
1097 * Only integral types are allowed and both expressions must be signed
1098 * or both must be unsigned. */
1100
1101/** Return the bitwise xor of an expression and an integer. The type
1102 * of the result is the type of the expression argument. */
1103// @{
1106// @}
1107
1108/** Return the bitwise not of an expression. */
1110
1111/** Shift the bits of an integer value left. This is actually less
1112 * efficient than multiplying by 2^n, because Halide's optimization
1113 * passes understand multiplication, and will compile it to
1114 * shifting. This operator is only for if you really really need bit
1115 * shifting (e.g. because the exponent is a run-time parameter). The
1116 * type of the result is equal to the type of the first argument. Both
1117 * arguments must have integer type. */
1118// @{
1121// @}
1122
1123/** Shift the bits of an integer value right. Does sign extension for
1124 * signed integers. This is less efficient than dividing by a power of
1125 * two. Halide's definition of division (always round to negative
1126 * infinity) means that all divisions by powers of two get compiled to
1127 * bit-shifting, and Halide's optimization routines understand
1128 * division and can work with it. The type of the result is equal to
1129 * the type of the first argument. Both arguments must have integer
1130 * type. */
1131// @{
1134// @}
1135
1136/** Linear interpolate between the two values according to a weight.
1137 * \param zero_val The result when weight is 0
1138 * \param one_val The result when weight is 1
1139 * \param weight The interpolation amount
1140 *
1141 * Both zero_val and one_val must have the same type. All types are
1142 * supported, including bool.
1143 *
1144 * The weight is treated as its own type and must be float or an
1145 * unsigned integer type. It is scaled to the bit-size of the type of
1146 * x and y if they are integer, or converted to float if they are
1147 * float. Integer weights are converted to float via division by the
1148 * full-range value of the weight's type. Floating-point weights used
1149 * to interpolate between integer values must be between 0.0f and
1150 * 1.0f, and an error may be signaled if it is not provably so. (clamp
1151 * operators can be added to provide proof. Currently an error is only
1152 * signalled for constant weights.)
1153 *
1154 * For integer linear interpolation, out of range values cannot be
1155 * represented. In particular, weights that are conceptually less than
1156 * 0 or greater than 1.0 are not representable. As such the result is
1157 * always between x and y (inclusive of course). For lerp with
1158 * floating-point values and floating-point weight, the full range of
1159 * a float is valid, however underflow and overflow can still occur.
1160 *
1161 * Ordering is not required between zero_val and one_val:
1162 * lerp(42, 69, .5f) == lerp(69, 42, .5f) == 56
1163 *
1164 * Results for integer types are for exactly rounded arithmetic. As
1165 * such, there are cases where 16-bit and float differ because 32-bit
1166 * floating-point (float) does not have enough precision to produce
1167 * the exact result. (Likely true for 32-bit integer
1168 * vs. double-precision floating-point as well.)
1169 *
1170 * At present, double precision and 64-bit integers are not supported.
1171 *
1172 * Generally, lerp will vectorize as if it were an operation on a type
1173 * twice the bit size of the inferred type for x and y.
1174 *
1175 * Some examples:
1176 * \code
1177 *
1178 * // Since Halide does not have direct type delcarations, casts
1179 * // below are used to indicate the types of the parameters.
1180 * // Such casts not required or expected in actual code where types
1181 * // are inferred.
1182 *
1183 * lerp(cast<float>(x), cast<float>(y), cast<float>(w)) ->
1184 * x * (1.0f - w) + y * w
1185 *
1186 * lerp(cast<uint8_t>(x), cast<uint8_t>(y), cast<uint8_t>(w)) ->
1187 * cast<uint8_t>(cast<uint8_t>(x) * (1.0f - cast<uint8_t>(w) / 255.0f) +
1188 * cast<uint8_t>(y) * cast<uint8_t>(w) / 255.0f + .5f)
1189 *
1190 * // Note addition in Halide promoted uint8_t + int8_t to int16_t already,
1191 * // the outer cast is added for clarity.
1192 * lerp(cast<uint8_t>(x), cast<int8_t>(y), cast<uint8_t>(w)) ->
1193 * cast<int16_t>(cast<uint8_t>(x) * (1.0f - cast<uint8_t>(w) / 255.0f) +
1194 * cast<int8_t>(y) * cast<uint8_t>(w) / 255.0f + .5f)
1195 *
1196 * lerp(cast<int8_t>(x), cast<int8_t>(y), cast<float>(w)) ->
1197 * cast<int8_t>(cast<int8_t>(x) * (1.0f - cast<float>(w)) +
1198 * cast<int8_t>(y) * cast<uint8_t>(w))
1199 *
1200 * \endcode
1201 * */
1202Expr lerp(Expr zero_val, Expr one_val, Expr weight);
1203
1204/** Count the number of set bits in an expression. */
1206
1207/** Count the number of leading zero bits in an expression. If the expression is
1208 * zero, the result is the number of bits in the type. */
1210
1211/** Count the number of trailing zero bits in an expression. If the expression is
1212 * zero, the result is the number of bits in the type. */
1214
1215/** Divide two integers, rounding towards zero. This is the typical
1216 * behavior of most hardware architectures, which differs from
1217 * Halide's division operator, which is Euclidean (rounds towards
1218 * -infinity). Will throw a runtime error if y is zero, or if y is -1
1219 * and x is the minimum signed integer. */
1221
1222/** Compute the remainder of dividing two integers, when division is
1223 * rounding toward zero. This is the typical behavior of most hardware
1224 * architectures, which differs from Halide's mod operator, which is
1225 * Euclidean (produces the remainder when division rounds towards
1226 * -infinity). Will throw a runtime error if y is zero. */
1228
1229/** Return a random variable representing a uniformly distributed
1230 * float in the half-open interval [0.0f, 1.0f). For random numbers of
1231 * other types, use lerp with a random float as the last parameter.
1232 *
1233 * Optionally takes a seed.
1234 *
1235 * Note that:
1236 \code
1237 Expr x = random_float();
1238 Expr y = x + x;
1239 \endcode
1240 *
1241 * is very different to
1242 *
1243 \code
1244 Expr y = random_float() + random_float();
1245 \endcode
1246 *
1247 * The first doubles a random variable, and the second adds two
1248 * independent random variables.
1249 *
1250 * A given random variable takes on a unique value that depends
1251 * deterministically on the pure variables of the function they belong
1252 * to, the identity of the function itself, and which definition of
1253 * the function it is used in. They are, however, shared across tuple
1254 * elements.
1255 *
1256 * This function vectorizes cleanly.
1257 */
1259
1260/** Return a random variable representing a uniformly distributed
1261 * unsigned 32-bit integer. See \ref random_float. Vectorizes cleanly. */
1263
1264/** Return a random variable representing a uniformly distributed
1265 * 32-bit integer. See \ref random_float. Vectorizes cleanly. */
1267
1268/** Create an Expr that prints out its value whenever it is
1269 * evaluated. It also prints out everything else in the arguments
1270 * list, separated by spaces. This can include string literals. */
1271//@{
1272Expr print(const std::vector<Expr> &values);
1273
1274template<typename... Args>
1275inline HALIDE_NO_USER_CODE_INLINE Expr print(Expr a, Args &&...args) {
1276 std::vector<Expr> collected_args = {std::move(a)};
1277 Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1278 return print(collected_args);
1279}
1280//@}
1281
1282/** Create an Expr that prints whenever it is evaluated, provided that
1283 * the condition is true. */
1284// @{
1285Expr print_when(Expr condition, const std::vector<Expr> &values);
1286
1287template<typename... Args>
1288inline HALIDE_NO_USER_CODE_INLINE Expr print_when(Expr condition, Expr a, Args &&...args) {
1289 std::vector<Expr> collected_args = {std::move(a)};
1290 Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1291 return print_when(std::move(condition), collected_args);
1292}
1293
1294// @}
1295
1296/** Create an Expr that that guarantees a precondition.
1297 * If 'condition' is true, the return value is equal to the first Expr.
1298 * If 'condition' is false, halide_error() is called, and the return value
1299 * is arbitrary. Any additional arguments after the first Expr are stringified
1300 * and passed as a user-facing message to halide_error(), similar to print().
1301 *
1302 * Note that this essentially *always* inserts a runtime check into the
1303 * generated code (except when the condition can be proven at compile time);
1304 * as such, it should be avoided inside inner loops, except for debugging
1305 * or testing purposes. Note also that it does not vectorize cleanly (vector
1306 * values will be scalarized for the check).
1307 *
1308 * However, using this to make assertions about (say) input values
1309 * can be useful, both in terms of correctness and (potentially) in terms
1310 * of code generation, e.g.
1311 \code
1312 Param<int> p;
1313 Expr y = require(p > 0, p);
1314 \endcode
1315 * will allow the optimizer to assume positive, nonzero values for y.
1316 */
1317// @{
1318Expr require(Expr condition, const std::vector<Expr> &values);
1319
1320template<typename... Args>
1321inline HALIDE_NO_USER_CODE_INLINE Expr require(Expr condition, Expr value, Args &&...args) {
1322 std::vector<Expr> collected_args = {std::move(value)};
1323 Internal::collect_print_args(collected_args, std::forward<Args>(args)...);
1324 return require(std::move(condition), collected_args);
1325}
1326// @}
1327
1328/** Return an undef value of the given type. Halide skips stores that
1329 * depend on undef values, so you can use this to mean "do not modify
1330 * this memory location". This is an escape hatch that can be used for
1331 * several things:
1332 *
1333 * You can define a reduction with no pure step, by setting the pure
1334 * step to undef. Do this only if you're confident that the update
1335 * steps are sufficient to correctly fill in the domain.
1336 *
1337 * For a tuple-valued reduction, you can write an update step that
1338 * only updates some tuple elements.
1339 *
1340 * You can define single-stage pipeline that only has update steps,
1341 * and depends on the values already in the output buffer.
1342 *
1343 * Use this feature with great caution, as you can use it to load from
1344 * uninitialized memory.
1345 */
1347
1348template<typename T>
1349inline Expr undef() {
1350 return undef(type_of<T>());
1351}
1352
1353namespace Internal {
1354
1355/** Return an expression that should never be evaluated. Expressions
1356 * that depend on unreachabale values are also unreachable, and
1357 * statements that execute unreachable expressions are also considered
1358 * unreachable. */
1360
1361template<typename T>
1363 return unreachable(type_of<T>());
1364}
1365
1366} // namespace Internal
1367
1368/** Control the values used in the memoization cache key for memoize.
1369 * Normally parameters and other external dependencies are
1370 * automatically inferred and added to the cache key. The memoize_tag
1371 * operator allows computing one expression and using either the
1372 * computed value, or one or more other expressions in the cache key
1373 * instead of the parameter dependencies of the computation. The
1374 * single argument version is completely safe in that the cache key
1375 * will use the actual computed value -- it is difficult or imposible
1376 * to produce erroneous caching this way. The more-than-one argument
1377 * version allows generating cache keys that do not uniquely identify
1378 * the computation and thus can result in caching errors.
1379 *
1380 * A potential use for the single argument version is to handle a
1381 * floating-point parameter that is quantized to a small
1382 * integer. Mutliple values of the float will produce the same integer
1383 * and moving the caching to using the integer for the key is more
1384 * efficient.
1385 *
1386 * The main use for the more-than-one argument version is to provide
1387 * cache key information for Handles and ImageParams, which otherwise
1388 * are not allowed inside compute_cached operations. E.g. when passing
1389 * a group of parameters to an external array function via a Handle,
1390 * memoize_tag can be used to isolate the actual values used by that
1391 * computation. If an ImageParam is a constant image with a persistent
1392 * digest, memoize_tag can be used to key computations using that image
1393 * on the digest. */
1394// @{
1395template<typename... Args>
1396inline HALIDE_NO_USER_CODE_INLINE Expr memoize_tag(Expr result, Args &&...args) {
1397 std::vector<Expr> collected_args{std::forward<Args>(args)...};
1398 return Internal::memoize_tag_helper(std::move(result), collected_args);
1399}
1400// @}
1401
1402/** Expressions tagged with this intrinsic are considered to be part
1403 * of the steady state of some loop with a nasty beginning and end
1404 * (e.g. a boundary condition). When Halide encounters likely
1405 * intrinsics, it splits the containing loop body into three, and
1406 * tries to simplify down all conditions that lead to the likely. For
1407 * example, given the expression: select(x < 1, bar, x > 10, bar,
1408 * likely(foo)), Halide will split the loop over x into portions where
1409 * x < 1, 1 <= x <= 10, and x > 10.
1410 *
1411 * You're unlikely to want to call this directly. You probably want to
1412 * use the boundary condition helpers in the BoundaryConditions
1413 * namespace instead.
1414 */
1416
1417/** Equivalent to likely, but only triggers a loop partitioning if
1418 * found in an innermost loop. */
1420
1421/** Cast an expression to the halide type corresponding to the C++
1422 * type T. As part of the cast, clamp to the minimum and maximum
1423 * values of the result type. */
1424template<typename T>
1426 return saturating_cast(type_of<T>(), std::move(e));
1427}
1428
1429/** Cast an expression to a new type, clamping to the minimum and
1430 * maximum values of the result type. */
1432
1433/** Makes a best effort attempt to preserve IEEE floating-point
1434 * semantics in evaluating an expression. May not be implemented for
1435 * all backends. (E.g. it is difficult to do this for C++ code
1436 * generation as it depends on the compiler flags used to compile the
1437 * generated code. */
1439
1440/** Create an Expr that that promises another Expr is clamped but do
1441 * not generate code to check the assertion or modify the value. No
1442 * attempt is made to prove the bound at compile time. (If it is
1443 * proved false as a result of something else, an error might be
1444 * generated, but it is also possible the compiler will crash.) The
1445 * promised bound is used in bounds inference so it will allow
1446 * satisfying bounds checks as well as possibly aiding optimization.
1447 *
1448 * unsafe_promise_clamped returns its first argument, the Expr 'value'
1449 *
1450 * This is a very easy way to make Halide generate erroneous code if
1451 * the bound promises is not kept. Use sparingly when there is no
1452 * other way to convey the information to the compiler and it is
1453 * required for a valuable optimization.
1454 *
1455 * Unsafe promises can be checked by turning on
1456 * Target::CheckUnsafePromises. This is intended for debugging only.
1457 */
1458Expr unsafe_promise_clamped(const Expr &value, const Expr &min, const Expr &max);
1459
1460namespace Internal {
1461/**
1462 * FOR INTERNAL USE ONLY.
1463 *
1464 * An entirely unchecked version of unsafe_promise_clamped, used
1465 * inside the compiler as an annotation of the known bounds of an Expr
1466 * when it has proved something is bounded and wants to record that
1467 * fact for later passes (notably bounds inference) to exploit. This
1468 * gets introduced by GuardWithIf tail strategies, because the bounds
1469 * machinery has a hard time exploiting if statement conditions.
1470 *
1471 * Unlike unsafe_promise_clamped, this expression is
1472 * context-dependent, because 'value' might be statically bounded at
1473 * some point in the IR (e.g. due to a containing if statement), but
1474 * not elsewhere.
1475 *
1476 * This intrinsic always evaluates to its first argument. If this value is
1477 * used by a side-effecting operation and it is outside the range specified
1478 * by its second and third arguments, behavior is undefined. The compiler can
1479 * therefore assume that the value is within the range given and optimize
1480 * accordingly. Note that this permits promise_clamped to evaluate to
1481 * something outside of the range, provided that this value is not used.
1482 *
1483 * Note that this produces an intrinsic that is marked as 'pure' and thus is
1484 * allowed to be hoisted, etc.; thus, extra care must be taken with its use.
1485 **/
1486Expr promise_clamped(const Expr &value, const Expr &min, const Expr &max);
1487} // namespace Internal
1488
1489/** Scatter and gather are used for update definition which must store
1490 * multiple values to distinct locations at the same time. The
1491 * multiple expressions on the right-hand-side are bundled together
1492 * into a "gather", which must match a "scatter" the the same number
1493 * of arguments on the left-hand-size. For example, to store the
1494 * values 1 and 2 to the locations (x, y, 3) and (x, y, 4),
1495 * respectively:
1496 *
1497\code
1498f(x, y, scatter(3, 4)) = gather(1, 2);
1499\endcode
1500 *
1501 * The result of gather or scatter can be treated as an
1502 * expression. Any containing operations on it can be assumed to
1503 * distribute over the elements. If two gather expressions are
1504 * combined with an arithmetic operator (e.g. added), they combine
1505 * element-wise. The following example stores the values 2 * x, 2 * y,
1506 * and 2 * c to the locations (x + 1, y, c), (x, y + 3, c), and (x, y,
1507 * c + 2) respectively:
1508 *
1509\code
1510f(x + scatter(1, 0, 0), y + scatter(0, 3, 0), c + scatter(0, 0, 2)) = 2 * gather(x, y, c);
1511\endcode
1512*
1513* Repeated values in the scatter cause multiple stores to the same
1514* location. The stores happen in order from left to right, so the
1515* rightmost value wins. The following code is equivalent to f(x) = 5
1516*
1517\code
1518f(scatter(x, x)) = gather(3, 5);
1519\endcode
1520*
1521* Gathers are most useful for algorithms which require in-place
1522* swapping or permutation of multiple elements, or other kinds of
1523* in-place mutations that require loading multiple inputs, doing some
1524* operations to them jointly, then storing them again. The following
1525* update definition swaps the values of f at locations 3 and 5 if an
1526* input parameter p is true:
1527*
1528\code
1529f(scatter(3, 5)) = f(select(p, gather(5, 3), gather(3, 5)));
1530\endcode
1531*
1532* For more examples of the use of scatter and gather, see
1533* test/correctness/multiple_scatter.cpp
1534*
1535* It is not currently possible to use scatter and gather to write an
1536* update definition in which the *number* of values loaded or stored
1537* varies, as the size of the scatter/gather packet must be fixed a
1538* compile-time. A workaround is to make the unwanted extra operations
1539* a redundant copy of the last operation, which will be
1540* dead-code-eliminated by the compiler. For example, the following
1541* update definition swaps the values at locations 3 and 5 when the
1542* parameter p is true, and rotates the values at locations 1, 2, and 3
1543* when it is false. The load from 3 and store to 5 will be redundantly
1544* repeated:
1545*
1546\code
1547f(select(p, scatter(3, 5, 5), scatter(1, 2, 3))) = f(select(p, gather(5, 3, 3), gather(2, 3, 1)));
1548\endcode
1549*
1550* Note that in the p == true case, we redudantly load from 3 and write
1551* to 5 twice.
1552*/
1553//@{
1554Expr scatter(const std::vector<Expr> &args);
1555Expr gather(const std::vector<Expr> &args);
1556
1557template<typename... Args>
1558Expr scatter(const Expr &e, Args &&...args) {
1559 return scatter({e, std::forward<Args>(args)...});
1560}
1561
1562template<typename... Args>
1563Expr gather(const Expr &e, Args &&...args) {
1564 return gather({e, std::forward<Args>(args)...});
1565}
1566// @}
1567
1568/** Extract a contiguous subsequence of the bits of 'e', starting at the bit
1569 * index given by 'lsb', where zero is the least-significant bit, returning a
1570 * value of type 't'. Any out-of-range bits requested are filled with zeros.
1571 *
1572 * extract_bits is especially useful when one wants to load a small vector of a
1573 * wide type, and treat it as a larger vector of a smaller type. For example,
1574 * loading a vector of 32 uint8 values from a uint32 Func can be done as
1575 * follows:
1576\code
1577f8(x) = extract_bits<uint8_t>(f32(x/4), 8*(x%4));
1578f8.align_bounds(x, 4).vectorize(x, 32);
1579\endcode
1580 * Note that the align_bounds call is critical so that the narrow Exprs are
1581 * aligned to the wider Exprs. This makes the x%4 term collapse to a
1582 * constant. If f8 is an output Func, then constraining the min value of x to be
1583 * a known multiple of four would also be sufficient, e.g. via:
1584\code
1585f8.output_buffer().dim(0).set_min(0);
1586\endcode
1587 *
1588 * See test/correctness/extract_concat_bits.cpp for a complete example. */
1589// @{
1590Expr extract_bits(Type t, const Expr &e, const Expr &lsb);
1591
1592template<typename T>
1593Expr extract_bits(const Expr &e, const Expr &lsb) {
1594 return extract_bits(type_of<T>(), e, lsb);
1595}
1596// @}
1597
1598/** Given a number of Exprs of the same type, concatenate their bits producing a
1599 * single Expr of the same type code of the input but with more bits. The
1600 * number of arguments must be a power of two.
1601 *
1602 * concat_bits is especially useful when one wants to treat a Func containing
1603 * values of a narrow type as a Func containing fewer values of a wider
1604 * type. For example, the following code reinterprets vectors of 32 uint8 values
1605 * as a vector of 8 uint32s:
1606 *
1607\code
1608f32(x) = concat_bits({f8(4*x), f8(4*x + 1), f8(4*x + 2), f8(4*x + 3)});
1609f32.vectorize(x, 8);
1610\endcode
1611 *
1612 * See test/correctness/extract_concat_bits.cpp for a complete example.
1613 */
1614Expr concat_bits(const std::vector<Expr> &e);
1615
1616/** Below is a collection of intrinsics for fixed-point programming. Most of
1617 * them can be expressed via other means, but this is more natural for some, as
1618 * it avoids ghost widened intermediates that don't (or shouldn't) actually show
1619 * up in codegen, and doesn't rely on pattern-matching inside the compiler to
1620 * succeed to get good instruction selection.
1621 *
1622 * The semantics of each call are defined in terms of a non-existent 'widen' and
1623 * 'narrow' operators, which stand in for casts that double or halve the
1624 * bit-width of a type respectively.
1625 */
1626
1627/** Compute a + widen(b). */
1629
1630/** Compute a * widen(b). */
1632
1633/** Compute a - widen(b). */
1635
1636/** Compute widen(a) + widen(b). */
1638
1639/** Compute widen(a) * widen(b). a and b may have different signedness, in which
1640 * case the result is signed. */
1642
1643/** Compute widen(a) - widen(b). The result is always signed. */
1645
1646/** Compute widen(a) << b. */
1647//@{
1650//@}
1651
1652/** Compute widen(a) >> b. */
1653//@{
1656//@}
1657
1658/** Compute saturating_narrow(widening_add(a, (1 >> min(b, 0)) / 2) << b).
1659 * When b is positive indicating a left shift, the rounding term is zero. */
1660//@{
1663//@}
1664
1665/** Compute saturating_narrow(widening_add(a, (1 << max(b, 0)) / 2) >> b).
1666 * When b is negative indicating a left shift, the rounding term is zero. */
1667//@{
1670//@}
1671
1672/** Compute saturating_narrow(widen(a) + widen(b)) */
1674
1675/** Compute saturating_narrow(widen(a) - widen(b)) */
1677
1678/** Compute narrow((widen(a) + widen(b)) / 2) */
1680
1681/** Compute narrow((widen(a) + widen(b) + 1) / 2) */
1683
1684/** Compute narrow((widen(a) - widen(b)) / 2) */
1686
1687/** Compute saturating_narrow(shift_right(widening_mul(a, b), q)) */
1688//@{
1691//@}
1692
1693/** Compute saturating_narrow(rounding_shift_right(widening_mul(a, b), q)) */
1694//@{
1697//@}
1698
1699namespace Internal {
1700
1701template<typename T = void>
1702HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1703Expr widen_right_add(const Expr &a, const Expr &b, T * = nullptr) {
1704 return Halide::widen_right_add(a, b);
1705}
1706template<typename T = void>
1707HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1708Expr widen_right_mul(const Expr &a, const Expr &b, T * = nullptr) {
1709 return Halide::widen_right_mul(a, b);
1710}
1711template<typename T = void>
1712HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1713Expr widen_right_sub(const Expr &a, const Expr &b, T * = nullptr) {
1714 return Halide::widen_right_sub(a, b);
1715}
1716template<typename T = void>
1717HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1718Expr widening_add(const Expr &a, const Expr &b, T * = nullptr) {
1719 return Halide::widening_add(a, b);
1720}
1721template<typename T = void>
1722HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1723Expr widening_mul(const Expr &a, const Expr &b, T * = nullptr) {
1724 return Halide::widening_mul(a, b);
1725}
1726template<typename T = void>
1727HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1728Expr widening_sub(const Expr &a, const Expr &b, T * = nullptr) {
1729 return Halide::widening_sub(a, b);
1730}
1731template<typename T = void>
1732HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1733Expr widening_shift_left(const Expr &a, const Expr &b, T * = nullptr) {
1734 return Halide::widening_shift_left(a, b);
1735}
1736template<typename T = void>
1737HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1738Expr widening_shift_left(const Expr &a, int b, T * = nullptr) {
1739 return Halide::widening_shift_left(a, b);
1740}
1741template<typename T = void>
1742HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1743Expr widening_shift_right(const Expr &a, const Expr &b, T * = nullptr) {
1744 return Halide::widening_shift_right(a, b);
1745}
1746template<typename T = void>
1747HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1748Expr widening_shift_right(const Expr &a, int b, T * = nullptr) {
1749 return Halide::widening_shift_right(a, b);
1750}
1751template<typename T = void>
1752HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1753Expr rounding_shift_left(const Expr &a, const Expr &b, T * = nullptr) {
1754 return Halide::widening_shift_left(a, b);
1755}
1756template<typename T = void>
1757HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1758Expr rounding_shift_left(const Expr &a, int b, T * = nullptr) {
1759 return Halide::widening_shift_left(a, b);
1760}
1761template<typename T = void>
1762HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1763Expr rounding_shift_right(const Expr &a, const Expr &b, T * = nullptr) {
1764 return Halide::rounding_shift_right(a, b);
1765}
1766template<typename T = void>
1767HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1768Expr rounding_shift_right(const Expr &a, int b, T * = nullptr) {
1769 return Halide::rounding_shift_right(a, b);
1770}
1771template<typename T = void>
1772HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1773Expr saturating_add(const Expr &a, const Expr &b, T * = nullptr) {
1774 return Halide::saturating_add(a, b);
1775}
1776template<typename T = void>
1777HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1778Expr saturating_sub(const Expr &a, const Expr &b, T * = nullptr) {
1779 return Halide::saturating_sub(a, b);
1780}
1781template<typename T = void>
1782HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1783Expr halving_add(const Expr &a, const Expr &b, T * = nullptr) {
1784 return Halide::halving_add(a, b);
1785}
1786template<typename T = void>
1787HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1788Expr rounding_halving_add(const Expr &a, const Expr &b, T * = nullptr) {
1789 return Halide::rounding_halving_add(a, b);
1790}
1791template<typename T = void>
1792HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1793Expr halving_sub(const Expr &a, const Expr &b, T * = nullptr) {
1794 return Halide::halving_sub(a, b);
1795}
1796template<typename T = void>
1797HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1798Expr mul_shift_right(const Expr &a, const Expr &b, const Expr &q, T * = nullptr) {
1799 return Halide::mul_shift_right(a, b, q);
1800}
1801template<typename T = void>
1802HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1803Expr mul_shift_right(const Expr &a, const Expr &b, int q, T * = nullptr) {
1804 return Halide::mul_shift_right(a, b, q);
1805}
1806template<typename T = void>
1807HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1808Expr rounding_mul_shift_right(const Expr &a, const Expr &b, const Expr &q, T * = nullptr) {
1809 return Halide::rounding_mul_shift_right(a, b, q);
1810}
1811template<typename T = void>
1812HALIDE_ATTRIBUTE_DEPRECATED("This function has been moved out of the Halide::Internal:: namespace into Halide::")
1813Expr rounding_mul_shift_right(const Expr &a, const Expr &b, int q, T * = nullptr) {
1814 return Halide::rounding_mul_shift_right(a, b, q);
1815}
1816} // namespace Internal
1817
1818} // namespace Halide
1819
1820#endif
Base classes for Halide expressions (Halide::Expr) and statements (Halide::Internal::Stmt)
#define HALIDE_ATTRIBUTE_DEPRECATED(x)
Defines Tuple - the front-end handle on small arrays of expressions.
#define HALIDE_NO_USER_CODE_INLINE
Definition Util.h:46
A fragment of front-end syntax of the form f(x, y, z), where x, y, z are Vars or Exprs.
Definition Func.h:497
Create a small array of Exprs for defining and calling functions with multiple outputs.
Definition Tuple.h:18
Expr make_one(Type t)
Construct the representation of one in the given type.
T div_imp(T a, T b)
Definition IROperator.h:260
bool is_const_zero(const Expr &e)
Is the expression a const (as defined by is_const), and also equal to zero (in all lanes,...
Expr saturating_add(const Expr &a, const Expr &b, T *=nullptr)
Expr memoize_tag_helper(Expr result, const std::vector< Expr > &cache_key_values)
const double * as_const_float(const Expr &e)
If an expression is a FloatImm or a Broadcast of a FloatImm, return a pointer to its value.
Expr widen_right_sub(const Expr &a, const Expr &b, T *=nullptr)
Expr make_zero(Type t)
Construct the representation of zero in the given type.
bool is_negative_const(const Expr &e)
Is the expression a const (as defined by is_const), and also strictly less than zero (in all lanes,...
bool is_undef(const Expr &e)
Is the expression an undef.
Expr requirement_failed_error(Expr condition, const std::vector< Expr > &args)
Expr make_two(Type t)
Construct the representation of two in the given type.
void check_representable(Type t, int64_t val)
Check if a constant value can be correctly represented as the given type.
Expr halide_erf(const Expr &a)
bool is_const_one(const Expr &e)
Is the expression a const (as defined by is_const), and also equal to one (in all lanes,...
Expr widening_shift_left(const Expr &a, const Expr &b, T *=nullptr)
void match_types(Expr &a, Expr &b)
Coerce the two expressions to have the same type, using C-style casting rules.
double div_imp< double >(double a, double b)
Definition IROperator.h:301
Expr saturating_sub(const Expr &a, const Expr &b, T *=nullptr)
Expr halide_exp(const Expr &a)
Expr halving_sub(const Expr &a, const Expr &b, T *=nullptr)
Expr make_const(Type t, int64_t val)
Construct an immediate of the given type from any numeric C++ type.
Expr widening_shift_right(const Expr &a, const Expr &b, T *=nullptr)
const int64_t * as_const_int(const Expr &e)
If an expression is an IntImm or a Broadcast of an IntImm, return a pointer to its value.
bool is_positive_const(const Expr &e)
Is the expression a const (as defined by is_const), and also strictly greater than zero (in all lanes...
Expr const_true(int lanes=1)
Construct the constant boolean true.
bool is_signed_integer_overflow(const Expr &expr)
Check if an expression is a signed_integer_overflow.
T mod_imp(T a, T b)
Implementations of division and mod that are specific to Halide.
Definition IROperator.h:239
void reset_random_counters()
Reset the counters used for random-number seeds in random_float/int/uint.
Expr halide_log(const Expr &a)
Halide's vectorizable transcendentals.
bool is_pure(const Expr &e)
Does the expression 1) Take on the same value no matter where it appears in a Stmt,...
void split_into_ands(const Expr &cond, std::vector< Expr > &result)
Split a boolean condition into vector of ANDs.
Expr promise_clamped(const Expr &value, const Expr &min, const Expr &max)
FOR INTERNAL USE ONLY.
bool is_no_op(const Stmt &s)
Is the statement a no-op (which we represent as either an undefined Stmt, or as an Evaluate node of a...
Expr unwrap_tags(const Expr &e)
If the expression is a tag helper call, remove it and return the tagged expression.
float div_imp< float >(float a, float b)
Definition IROperator.h:297
bool is_const_power_of_two_integer(const Expr &e, int *bits)
Is the expression a constant integer power of two.
Expr lossless_negate(const Expr &x)
Attempt to negate x without introducing new IR and without overflow.
const uint64_t * as_const_uint(const Expr &e)
If an expression is a UIntImm or a Broadcast of a UIntImm, return a pointer to its value.
Expr strided_ramp_base(const Expr &e, int stride=1)
If e is a ramp expression with stride, default 1, return the base, otherwise undefined.
Expr remove_promises(const Expr &e)
Return an Expr that is identical to the input Expr, but with all calls to promise_clamped() and unsaf...
Expr widening_sub(const Expr &a, const Expr &b, T *=nullptr)
Expr rounding_mul_shift_right(const Expr &a, const Expr &b, const Expr &q, T *=nullptr)
Expr rounding_halving_add(const Expr &a, const Expr &b, T *=nullptr)
Expr widening_add(const Expr &a, const Expr &b, T *=nullptr)
Expr const_false(int lanes=1)
Construct the constant boolean false.
Expr rounding_shift_left(const Expr &a, const Expr &b, T *=nullptr)
double mod_imp< double >(double a, double b)
Definition IROperator.h:291
Expr widen_right_mul(const Expr &a, const Expr &b, T *=nullptr)
Expr rounding_shift_right(const Expr &a, const Expr &b, T *=nullptr)
Expr make_bool(bool val, int lanes=1)
Construct a boolean constant from a C++ boolean value.
Expr mul_shift_right(const Expr &a, const Expr &b, const Expr &q, T *=nullptr)
HALIDE_NO_USER_CODE_INLINE void collect_print_args(std::vector< Expr > &args)
Definition IROperator.h:335
void match_types_bitwise(Expr &a, Expr &b, const char *op_name)
Asserts that both expressions are integer types and are either both signed or both unsigned.
Expr halving_add(const Expr &a, const Expr &b, T *=nullptr)
float mod_imp< float >(float a, float b)
Definition IROperator.h:285
Expr lossless_cast(Type t, Expr e)
Attempt to cast an expression to a smaller type while provably not losing information.
Expr widening_mul(const Expr &a, const Expr &b, T *=nullptr)
Expr raise_to_integer_power(Expr a, int64_t b)
Raise an expression to an integer power by repeatedly multiplying it by itself.
Expr make_signed_integer_overflow(Type type)
Construct a unique signed_integer_overflow Expr.
bool is_const(const Expr &e)
Is the expression either an IntImm, a FloatImm, a StringImm, or a Cast of the same,...
Expr widen_right_add(const Expr &a, const Expr &b, T *=nullptr)
Expr remove_likelies(const Expr &e)
Return an Expr that is identical to the input Expr, but with all calls to likely() and likely_if_inne...
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
auto operator>=(const Other &a, const GeneratorParam< T > &b) -> decltype(a >=(T) b)
Greater than or equal comparison between GeneratorParam<T> and any type that supports operator>= with...
Definition Generator.h:1105
Expr log(Expr x)
Return the logarithm of a floating-point expression.
Expr operator>>(Expr x, Expr y)
Shift the bits of an integer value right.
Expr ceil(Expr x)
Return the least whole number greater than or equal to a floating-point expression.
Expr widen_right_add(Expr a, Expr b)
Below is a collection of intrinsics for fixed-point programming.
Expr rounding_shift_right(Expr a, Expr b)
Compute saturating_narrow(widening_add(a, (1 << max(b, 0)) / 2) >> b).
HALIDE_NO_USER_CODE_INLINE Expr memoize_tag(Expr result, Args &&...args)
Control the values used in the memoization cache key for memoize.
Expr fast_log(const Expr &x)
Fast approximate cleanly vectorizable log for Float(32).
Expr count_leading_zeros(Expr x)
Count the number of leading zero bits in an expression.
Expr reinterpret(Type t, Expr e)
Reinterpret the bits of one value as another type.
Expr saturating_add(Expr a, Expr b)
Compute saturating_narrow(widen(a) + widen(b))
auto operator==(const Other &a, const GeneratorParam< T > &b) -> decltype(a==(T) b)
Equality comparison between GeneratorParam<T> and any type that supports operator== with T.
Definition Generator.h:1131
Expr fast_cos(const Expr &x)
Expr & operator*=(Expr &a, Expr b)
Modify the first expression to be the product of two expressions, without changing its type.
Expr random_uint(Expr seed=Expr())
Return a random variable representing a uniformly distributed unsigned 32-bit integer.
@ Internal
Not visible externally, similar to 'static' linkage in C.
Expr fract(const Expr &x)
Return the fractional part of a floating-point expression.
Expr halving_add(Expr a, Expr b)
Compute narrow((widen(a) + widen(b)) / 2)
Expr & operator-=(Expr &a, Expr b)
Modify the first expression to be the difference of two expressions, without changing its type.
auto operator<(const Other &a, const GeneratorParam< T > &b) -> decltype(a<(T) b)
Less than comparison between GeneratorParam<T> and any type that supports operator< with T.
Definition Generator.h:1092
Expr widening_shift_right(Expr a, Expr b)
Compute widen(a) >> b.
Type type_of()
Construct the halide equivalent of a C type.
Definition Type.h:561
auto operator*(const Other &a, const GeneratorParam< T > &b) -> decltype(a *(T) b)
Multiplication between GeneratorParam<T> and any type that supports operator* with T.
Definition Generator.h:1040
Expr trunc(Expr x)
Return the integer part of a floating-point expression.
Expr halving_sub(Expr a, Expr b)
Compute narrow((widen(a) - widen(b)) / 2)
auto operator||(const Other &a, const GeneratorParam< T > &b) -> decltype(a||(T) b)
Logical or between between GeneratorParam<T> and any type that supports operator|| with T.
Definition Generator.h:1174
Expr acosh(Expr x)
Return the hyperbolic arccosine of a floating-point expression.
Expr fast_inverse(Expr x)
Fast approximate inverse for Float(32).
Expr asin(Expr x)
Return the arcsine of a floating-point expression.
Expr rounding_shift_left(Expr a, Expr b)
Compute saturating_narrow(widening_add(a, (1 >> min(b, 0)) / 2) << b).
auto operator-(const Other &a, const GeneratorParam< T > &b) -> decltype(a -(T) b)
Subtraction between GeneratorParam<T> and any type that supports operator- with T.
Definition Generator.h:1027
Expr clamp(Expr a, const Expr &min_val, const Expr &max_val)
Clamps an expression to lie within the given bounds.
Expr hypot(const Expr &x, const Expr &y)
Return the square root of the sum of the squares of two floating-point expressions.
Expr popcount(Expr x)
Count the number of set bits in an expression.
Expr saturating_sub(Expr a, Expr b)
Compute saturating_narrow(widen(a) - widen(b))
Expr gather(const std::vector< Expr > &args)
Expr print_when(Expr condition, const std::vector< Expr > &values)
Create an Expr that prints whenever it is evaluated, provided that the condition is true.
Expr widening_shift_left(Expr a, Expr b)
Compute widen(a) << b.
Expr pow(Expr x, Expr y)
Return one floating point expression raised to the power of another.
Expr operator&(Expr x, Expr y)
Return the bitwise and of two expressions (which need not have the same type).
Expr undef()
Expr cast(Expr a)
Cast an expression to the halide type corresponding to the C++ type T.
Definition IROperator.h:364
auto operator!(const GeneratorParam< T > &a) -> decltype(!(T) a)
Not operator for GeneratorParam.
Definition Generator.h:1246
Expr lerp(Expr zero_val, Expr one_val, Expr weight)
Linear interpolate between the two values according to a weight.
Expr atan2(Expr y, Expr x)
Return the angle of a floating-point gradient.
Expr saturating_cast(Expr e)
Cast an expression to the halide type corresponding to the C++ type T.
Expr random_float(Expr seed=Expr())
Return a random variable representing a uniformly distributed float in the half-open interval [0....
Expr sin(Expr x)
Return the sine of a floating-point expression.
Expr unsafe_promise_clamped(const Expr &value, const Expr &min, const Expr &max)
Create an Expr that that promises another Expr is clamped but do not generate code to check the asser...
Expr rounding_halving_add(Expr a, Expr b)
Compute narrow((widen(a) + widen(b) + 1) / 2)
Expr extract_bits(Type t, const Expr &e, const Expr &lsb)
Extract a contiguous subsequence of the bits of 'e', starting at the bit index given by 'lsb',...
Expr concat_bits(const std::vector< Expr > &e)
Given a number of Exprs of the same type, concatenate their bits producing a single Expr of the same ...
Expr mux(const Expr &id, const std::initializer_list< Expr > &values)
Oftentimes we want to pack a list of expressions with the same type into a channel dimension,...
Expr cosh(Expr x)
Return the hyperbolic cosine of a floating-point expression.
std::ostream & operator<<(std::ostream &stream, const Expr &)
Emit an expression on an output stream (such as std::cout) in human-readable form.
Type Int(int bits, int lanes=1)
Constructing a signed integer type.
Definition Type.h:530
Expr acos(Expr x)
Return the arccosine of a floating-point expression.
Expr fast_exp(const Expr &x)
Fast approximate cleanly vectorizable exp for Float(32).
Expr widening_add(Expr a, Expr b)
Compute widen(a) + widen(b).
Expr cos(Expr x)
Return the cosine of a floating-point expression.
auto operator+(const Other &a, const GeneratorParam< T > &b) -> decltype(a+(T) b)
Addition between GeneratorParam<T> and any type that supports operator+ with T.
Definition Generator.h:1014
Expr min(const FuncRef &a, const FuncRef &b)
Explicit overloads of min and max for FuncRef.
Definition Func.h:603
Expr exp(Expr x)
Return the exponential of a floating-point expression.
Expr widen_right_mul(Expr a, Expr b)
Compute a * widen(b).
Expr absd(Expr a, Expr b)
Return the absolute difference between two values.
auto operator&&(const Other &a, const GeneratorParam< T > &b) -> decltype(a &&(T) b)
Logical and between between GeneratorParam<T> and any type that supports operator&& with T.
Definition Generator.h:1157
Tuple tuple_select(const Tuple &condition, const Tuple &true_value, const Tuple &false_value)
Equivalent of ternary select(), but taking/returning tuples.
Expr fast_sin(const Expr &x)
Fast vectorizable approximation to some trigonometric functions for Float(32).
Expr fast_pow(Expr x, Expr y)
Fast approximate cleanly vectorizable pow for Float(32).
auto operator%(const Other &a, const GeneratorParam< T > &b) -> decltype(a %(T) b)
Modulo between GeneratorParam<T> and any type that supports operator% with T.
Definition Generator.h:1066
@ C
No name mangling.
Expr round(Expr x)
Return the whole number closest to a floating-point expression.
Expr select(Expr condition, Expr true_value, Expr false_value)
Returns an expression similar to the ternary operator in C, except that it always evaluates all argum...
Expr count_trailing_zeros(Expr x)
Count the number of trailing zero bits in an expression.
Expr scatter(const std::vector< Expr > &args)
Scatter and gather are used for update definition which must store multiple values to distinct locati...
auto operator<=(const Other &a, const GeneratorParam< T > &b) -> decltype(a<=(T) b)
Less than or equal comparison between GeneratorParam<T> and any type that supports operator<= with T.
Definition Generator.h:1118
Expr rounding_mul_shift_right(Expr a, Expr b, Expr q)
Compute saturating_narrow(rounding_shift_right(widening_mul(a, b), q))
Expr random_int(Expr seed=Expr())
Return a random variable representing a uniformly distributed 32-bit integer.
Expr mod_round_to_zero(Expr x, Expr y)
Compute the remainder of dividing two integers, when division is rounding toward zero.
Expr strict_float(Expr e)
Makes a best effort attempt to preserve IEEE floating-point semantics in evaluating an expression.
Expr & operator/=(Expr &a, Expr b)
Modify the first expression to be the ratio of two expressions, without changing its type.
Expr widening_mul(Expr a, Expr b)
Compute widen(a) * widen(b).
auto operator>(const Other &a, const GeneratorParam< T > &b) -> decltype(a >(T) b)
Greater than comparison between GeneratorParam<T> and any type that supports operator> with T.
Definition Generator.h:1079
Expr is_nan(Expr x)
Returns true if the argument is a Not a Number (NaN).
Expr asinh(Expr x)
Return the hyperbolic arcsinhe of a floating-point expression.
Expr sqrt(Expr x)
Return the square root of a floating-point expression.
Expr sinh(Expr x)
Return the hyperbolic sine of a floating-point expression.
Expr atan(Expr x)
Return the arctangent of a floating-point expression.
Expr operator|(Expr x, Expr y)
Return the bitwise or of two expressions (which need not have the same type).
auto operator!=(const Other &a, const GeneratorParam< T > &b) -> decltype(a !=(T) b)
Inequality comparison between between GeneratorParam<T> and any type that supports operator!...
Definition Generator.h:1144
Expr require(Expr condition, const std::vector< Expr > &values)
Create an Expr that that guarantees a precondition.
Expr is_inf(Expr x)
Returns true if the argument is Inf or -Inf.
Expr is_finite(Expr x)
Returns true if the argument is a finite value (ie, neither NaN nor Inf).
Expr tanh(Expr x)
Return the hyperbolic tangent of a floating-point expression.
Expr likely_if_innermost(Expr e)
Equivalent to likely, but only triggers a loop partitioning if found in an innermost loop.
Expr atanh(Expr x)
Return the hyperbolic arctangent of a floating-point expression.
Expr tan(Expr x)
Return the tangent of a floating-point expression.
Expr fast_inverse_sqrt(Expr x)
Fast approximate inverse square root for Float(32).
Expr print(const std::vector< Expr > &values)
Create an Expr that prints out its value whenever it is evaluated.
Expr mul_shift_right(Expr a, Expr b, Expr q)
Compute saturating_narrow(shift_right(widening_mul(a, b), q))
auto operator/(const Other &a, const GeneratorParam< T > &b) -> decltype(a/(T) b)
Division between GeneratorParam<T> and any type that supports operator/ with T.
Definition Generator.h:1053
Expr & operator+=(Expr &a, Expr b)
Modify the first expression to be the sum of two expressions, without changing its type.
Expr abs(Expr a)
Returns the absolute value of a signed integer or floating-point expression.
Expr widen_right_sub(Expr a, Expr b)
Compute a - widen(b).
Expr max(const FuncRef &a, const FuncRef &b)
Definition Func.h:606
Expr floor(Expr x)
Return the greatest whole number less than or equal to a floating-point expression.
Expr div_round_to_zero(Expr x, Expr y)
Divide two integers, rounding towards zero.
Expr widening_sub(Expr a, Expr b)
Compute widen(a) - widen(b).
Expr likely(Expr e)
Expressions tagged with this intrinsic are considered to be part of the steady state of some loop wit...
Expr operator~(Expr x)
Return the bitwise not of an expression.
Expr erf(const Expr &x)
Evaluate the error function erf.
Expr operator^(Expr x, Expr y)
Return the bitwise xor of two expressions (which need not have the same type).
unsigned __INT64_TYPE__ uint64_t
signed __INT64_TYPE__ int64_t
signed __INT32_TYPE__ int32_t
unsigned __INT8_TYPE__ uint8_t
unsigned __INT16_TYPE__ uint16_t
unsigned __INT32_TYPE__ uint32_t
signed __INT16_TYPE__ int16_t
signed __INT8_TYPE__ int8_t
A fragment of Halide syntax.
Definition Expr.h:258
A builder to help create Exprs representing halide_buffer_t structs (e.g.
Definition IROperator.h:210
std::vector< Expr > strides
Definition IROperator.h:215
std::vector< Expr > extents
Definition IROperator.h:215
A reference-counted handle to a statement node.
Definition Expr.h:419
static constexpr bool value
Definition IROperator.h:327
Types in the halide type system.
Definition Type.h:276
HALIDE_ALWAYS_INLINE bool is_int() const
Is this type a signed integer type?
Definition Type.h:428
HALIDE_ALWAYS_INLINE bool is_float() const
Is this type a floating point type (float or double).
Definition Type.h:416
Class that provides a type that implements half precision floating point (IEEE754 2008 binary16) in s...
Definition Float16.h:17