Halide  17.0.2
Halide compiler and libraries
runtime_atomics.h
Go to the documentation of this file.
1 #ifndef HALIDE_RUNTIME_RUNTIME_ATOMICS_H
2 #define HALIDE_RUNTIME_RUNTIME_ATOMICS_H
3 
4 // This file provides an abstraction layer over the __sync/__atomic builtins
5 // in Clang; for various reasons, we use __sync for 32-bit targets, and
6 // __atomic for 64-bit. At some point it may be desirable/necessary to
7 // migrate 32-bit to __atomic as well, at which time this file can
8 // likely go away. See https://github.com/halide/Halide/issues/7431 for
9 // a discussion of the history and issues as to why we work this way.
10 
11 #include "HalideRuntime.h"
12 
13 namespace Halide {
14 namespace Runtime {
15 namespace Internal {
16 namespace Synchronization {
17 
18 namespace {
19 
20 // TODO: most of these wrappers should do the remove_volatile for secondary arguments;
21 // I've only put it in place for the locations necessary at this time.
22 template<class T>
23 struct remove_volatile {
24  typedef T type;
25 };
26 template<class T>
27 struct remove_volatile<volatile T> {
28  typedef T type;
29 };
30 
31 #ifdef BITS_32
32 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
33  return __sync_and_and_fetch(addr, val);
34 }
35 
36 template<typename T>
37 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
38  return __sync_fetch_and_add(addr, val);
39 }
40 
41 template<typename T, typename TV = typename remove_volatile<T>::type>
42 ALWAYS_INLINE T atomic_fetch_add_sequentially_consistent(T *addr, TV val) {
43  return __sync_fetch_and_add(addr, val);
44 }
45 
46 template<typename T, typename TV = typename remove_volatile<T>::type>
47 ALWAYS_INLINE T atomic_fetch_sub_sequentially_consistent(T *addr, TV val) {
48  return __sync_fetch_and_sub(addr, val);
49 }
50 
51 template<typename T, typename TV = typename remove_volatile<T>::type>
52 ALWAYS_INLINE T atomic_fetch_or_sequentially_consistent(T *addr, TV val) {
53  return __sync_fetch_and_or(addr, val);
54 }
55 
56 template<typename T>
57 ALWAYS_INLINE T atomic_add_fetch_sequentially_consistent(T *addr, T val) {
58  return __sync_add_and_fetch(addr, val);
59 }
60 
61 template<typename T>
62 ALWAYS_INLINE T atomic_sub_fetch_sequentially_consistent(T *addr, T val) {
63  return __sync_sub_and_fetch(addr, val);
64 }
65 
66 template<typename T, typename TV = typename remove_volatile<T>::type>
67 ALWAYS_INLINE bool cas_strong_sequentially_consistent_helper(T *addr, TV *expected, TV *desired) {
68  TV oldval = *expected;
69  TV gotval = __sync_val_compare_and_swap(addr, oldval, *desired);
70  *expected = gotval;
71  return oldval == gotval;
72 }
73 
74 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
75  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
76 }
77 
78 template<typename T, typename TV = typename remove_volatile<T>::type>
79 ALWAYS_INLINE bool atomic_cas_strong_sequentially_consistent(T *addr, TV *expected, TV *desired) {
80  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
81 }
82 
83 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
84  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
85 }
86 
87 template<typename T>
88 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
89  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
90 }
91 
92 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
93  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
94 }
95 
96 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
97  return cas_strong_sequentially_consistent_helper(addr, expected, desired);
98 }
99 
100 template<typename T>
101 ALWAYS_INLINE T atomic_fetch_and_release(T *addr, T val) {
102  return __sync_fetch_and_and(addr, val);
103 }
104 
105 template<typename T, typename TV = typename remove_volatile<T>::type>
106 ALWAYS_INLINE T atomic_fetch_and_sequentially_consistent(T *addr, TV val) {
107  return __sync_fetch_and_and(addr, val);
108 }
109 
110 template<typename T>
111 ALWAYS_INLINE void atomic_load_relaxed(T *addr, T *val) {
112  *val = *addr;
113 }
114 
115 template<typename T>
116 ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) {
117  __sync_synchronize();
118  *val = *addr;
119 }
120 
121 template<typename T>
122 ALWAYS_INLINE T atomic_exchange_acquire(T *addr, T val) {
123  // Despite the name, this is really just an exchange operation with acquire ordering.
124  return __sync_lock_test_and_set(addr, val);
125 }
126 
127 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
128  return __sync_or_and_fetch(addr, val);
129 }
130 
131 ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
132  *addr = *val;
133 }
134 
135 template<typename T>
136 ALWAYS_INLINE void atomic_store_release(T *addr, T *val) {
137  *addr = *val;
138  __sync_synchronize();
139 }
140 
141 template<typename T, typename TV = typename remove_volatile<T>::type>
142 ALWAYS_INLINE void atomic_store_sequentially_consistent(T *addr, TV *val) {
143  *addr = *val;
144  __sync_synchronize();
145 }
146 
147 ALWAYS_INLINE void atomic_thread_fence_acquire() {
148  __sync_synchronize();
149 }
150 
151 ALWAYS_INLINE void atomic_thread_fence_sequentially_consistent() {
152  __sync_synchronize();
153 }
154 
155 #else
156 
157 ALWAYS_INLINE uintptr_t atomic_and_fetch_release(uintptr_t *addr, uintptr_t val) {
158  return __atomic_and_fetch(addr, val, __ATOMIC_RELEASE);
159 }
160 
161 template<typename T>
162 ALWAYS_INLINE T atomic_fetch_add_acquire_release(T *addr, T val) {
163  return __atomic_fetch_add(addr, val, __ATOMIC_ACQ_REL);
164 }
165 
166 template<typename T, typename TV = typename remove_volatile<T>::type>
167 ALWAYS_INLINE T atomic_fetch_add_sequentially_consistent(T *addr, TV val) {
168  return __atomic_fetch_add(addr, val, __ATOMIC_SEQ_CST);
169 }
170 
171 template<typename T, typename TV = typename remove_volatile<T>::type>
172 ALWAYS_INLINE T atomic_fetch_sub_sequentially_consistent(T *addr, TV val) {
173  return __atomic_fetch_sub(addr, val, __ATOMIC_SEQ_CST);
174 }
175 
176 template<typename T, typename TV = typename remove_volatile<T>::type>
177 ALWAYS_INLINE T atomic_fetch_or_sequentially_consistent(T *addr, TV val) {
178  return __atomic_fetch_or(addr, val, __ATOMIC_SEQ_CST);
179 }
180 
181 template<typename T>
182 ALWAYS_INLINE T atomic_add_fetch_sequentially_consistent(T *addr, T val) {
183  return __atomic_add_fetch(addr, val, __ATOMIC_SEQ_CST);
184 }
185 
186 template<typename T>
187 ALWAYS_INLINE T atomic_sub_fetch_sequentially_consistent(T *addr, T val) {
188  return __atomic_sub_fetch(addr, val, __ATOMIC_SEQ_CST);
189 }
190 
191 ALWAYS_INLINE bool atomic_cas_strong_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
192  return __atomic_compare_exchange(addr, expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
193 }
194 
195 template<typename T, typename TV = typename remove_volatile<T>::type>
196 ALWAYS_INLINE bool atomic_cas_strong_sequentially_consistent(T *addr, TV *expected, TV *desired) {
197  return __atomic_compare_exchange(addr, expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
198 }
199 
200 template<typename T>
201 ALWAYS_INLINE bool atomic_cas_weak_relacq_relaxed(T *addr, T *expected, T *desired) {
202  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_ACQ_REL, __ATOMIC_RELAXED);
203 }
204 
205 ALWAYS_INLINE bool atomic_cas_weak_release_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
206  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_RELEASE, __ATOMIC_RELAXED);
207 }
208 
209 ALWAYS_INLINE bool atomic_cas_weak_relaxed_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
210  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
211 }
212 
213 ALWAYS_INLINE bool atomic_cas_weak_acquire_relaxed(uintptr_t *addr, uintptr_t *expected, uintptr_t *desired) {
214  return __atomic_compare_exchange(addr, expected, desired, true, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED);
215 }
216 
217 template<typename T>
218 ALWAYS_INLINE uintptr_t atomic_fetch_and_release(T *addr, T val) {
219  return __atomic_fetch_and(addr, val, __ATOMIC_RELEASE);
220 }
221 
222 template<typename T, typename TV = typename remove_volatile<T>::type>
223 ALWAYS_INLINE uintptr_t atomic_fetch_and_sequentially_consistent(T *addr, TV val) {
224  return __atomic_fetch_and(addr, val, __ATOMIC_SEQ_CST);
225 }
226 
227 template<typename T>
228 ALWAYS_INLINE void atomic_load_relaxed(T *addr, T *val) {
229  __atomic_load(addr, val, __ATOMIC_RELAXED);
230 }
231 
232 template<typename T>
233 ALWAYS_INLINE void atomic_load_acquire(T *addr, T *val) {
234  __atomic_load(addr, val, __ATOMIC_ACQUIRE);
235  __sync_synchronize();
236  *val = *addr;
237 }
238 
239 template<typename T>
240 ALWAYS_INLINE T atomic_exchange_acquire(T *addr, T val) {
241  T result;
242  __atomic_exchange(addr, &val, &result, __ATOMIC_ACQUIRE);
243  return result;
244 }
245 
246 ALWAYS_INLINE uintptr_t atomic_or_fetch_relaxed(uintptr_t *addr, uintptr_t val) {
247  return __atomic_or_fetch(addr, val, __ATOMIC_RELAXED);
248 }
249 
250 ALWAYS_INLINE void atomic_store_relaxed(uintptr_t *addr, uintptr_t *val) {
251  __atomic_store(addr, val, __ATOMIC_RELAXED);
252 }
253 
254 template<typename T>
255 ALWAYS_INLINE void atomic_store_release(T *addr, T *val) {
256  __atomic_store(addr, val, __ATOMIC_RELEASE);
257 }
258 
259 template<typename T, typename TV = typename remove_volatile<T>::type>
260 ALWAYS_INLINE void atomic_store_sequentially_consistent(T *addr, TV *val) {
261  __atomic_store(addr, val, __ATOMIC_SEQ_CST);
262 }
263 
264 ALWAYS_INLINE void atomic_thread_fence_acquire() {
265  __atomic_thread_fence(__ATOMIC_ACQUIRE);
266 }
267 
268 ALWAYS_INLINE void atomic_thread_fence_sequentially_consistent() {
269  __atomic_thread_fence(__ATOMIC_SEQ_CST);
270 }
271 
272 #endif
273 
274 } // namespace
275 
276 } // namespace Synchronization
277 } // namespace Internal
278 } // namespace Runtime
279 } // namespace Halide
280 
281 #endif // HALIDE_RUNTIME_RUNTIME_ATOMICS_H
This file declares the routines used by Halide internally in its runtime.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
__UINTPTR_TYPE__ uintptr_t
#define ALWAYS_INLINE