53__device__
inline float unsafeAtomicAdd(
float* addr,
float value) {
54#if defined(__gfx90a__) && \
55 __has_builtin(__builtin_amdgcn_is_shared) && \
56 __has_builtin(__builtin_amdgcn_is_private) && \
57 __has_builtin(__builtin_amdgcn_ds_atomic_fadd_f32) && \
58 __has_builtin(__builtin_amdgcn_global_atomic_fadd_f32)
59 if (__builtin_amdgcn_is_shared(
61 return __builtin_amdgcn_ds_atomic_fadd_f32(addr, value);
62 else if (__builtin_amdgcn_is_private(
69 return __builtin_amdgcn_global_atomic_fadd_f32(addr, value);
70#elif __has_builtin(__hip_atomic_fetch_add)
71 return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
73 return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED);
91__device__
inline float unsafeAtomicMax(
float* addr,
float val) {
92 #if __has_builtin(__hip_atomic_load) && \
93 __has_builtin(__hip_atomic_compare_exchange_strong)
94 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
96 while (!done && value < val) {
97 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
98 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
102 unsigned int *uaddr = (
unsigned int *)addr;
103 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
105 while (!done && __uint_as_float(value) < val) {
106 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
107 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
109 return __uint_as_float(value);
127__device__
inline float unsafeAtomicMin(
float* addr,
float val) {
128 #if __has_builtin(__hip_atomic_load) && \
129 __has_builtin(__hip_atomic_compare_exchange_strong)
130 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
132 while (!done && value > val) {
133 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
134 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
138 unsigned int *uaddr = (
unsigned int *)addr;
139 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
141 while (!done && __uint_as_float(value) > val) {
142 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
143 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
145 return __uint_as_float(value);
175__device__
inline double unsafeAtomicAdd(
double* addr,
double value) {
176#if defined(__gfx90a__) && __has_builtin(__builtin_amdgcn_flat_atomic_fadd_f64)
177 return __builtin_amdgcn_flat_atomic_fadd_f64(addr, value);
178#elif defined (__hip_atomic_fetch_add)
179 return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
181 return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED);
211__device__
inline double unsafeAtomicMax(
double* addr,
double val) {
212#if (defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) && \
213 __has_builtin(__builtin_amdgcn_flat_atomic_fmax_f64)
214 return __builtin_amdgcn_flat_atomic_fmax_f64(addr, val);
216 #if __has_builtin(__hip_atomic_load) && \
217 __has_builtin(__hip_atomic_compare_exchange_strong)
218 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
220 while (!done && value < val) {
221 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
222 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
226 unsigned long long *uaddr = (
unsigned long long *)addr;
227 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
229 while (!done && __longlong_as_double(value) < val) {
230 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
231 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
233 return __longlong_as_double(value);
264__device__
inline double unsafeAtomicMin(
double* addr,
double val) {
265#if (defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)) && \
266 __has_builtin(__builtin_amdgcn_flat_atomic_fmin_f64)
267 return __builtin_amdgcn_flat_atomic_fmin_f64(addr, val);
269 #if __has_builtin(__hip_atomic_load) && \
270 __has_builtin(__hip_atomic_compare_exchange_strong)
271 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
273 while (!done && value > val) {
274 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
275 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
279 unsigned long long *uaddr = (
unsigned long long *)addr;
280 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
282 while (!done && __longlong_as_double(value) > val) {
283 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
284 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
286 return __longlong_as_double(value);
305__device__
inline float safeAtomicAdd(
float* addr,
float value) {
306#if defined(__gfx908__) || defined(__gfx941__) \
307 || ((defined(__gfx90a__) || defined(__gfx940__) || defined(__gfx942__)) \
308 && !__has_builtin(__hip_atomic_fetch_add))
316#if __has_builtin(__hip_atomic_load)
317 old_val = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
319 old_val = __uint_as_float(__atomic_load_n(
reinterpret_cast<unsigned int*
>(addr), __ATOMIC_RELAXED));
321 float expected, temp;
323 temp = expected = old_val;
324#if __has_builtin(__hip_atomic_compare_exchange_strong)
325 __hip_atomic_compare_exchange_strong(addr, &expected, old_val + value, __ATOMIC_RELAXED,
326 __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
328 __atomic_compare_exchange_n(addr, &expected, old_val + value,
false,
329 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
332 }
while (__float_as_uint(temp) != __float_as_uint(old_val));
334#elif defined(__gfx90a__)
339 return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
340#elif __has_builtin(__hip_atomic_fetch_add)
341 return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
343 return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED);
361__device__
inline float safeAtomicMax(
float* addr,
float val) {
362 #if __has_builtin(__hip_atomic_load) && \
363 __has_builtin(__hip_atomic_compare_exchange_strong)
364 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
366 while (!done && value < val) {
367 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
368 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
372 unsigned int *uaddr = (
unsigned int *)addr;
373 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
375 while (!done && __uint_as_float(value) < val) {
376 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
377 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
379 return __uint_as_float(value);
397__device__
inline float safeAtomicMin(
float* addr,
float val) {
398 #if __has_builtin(__hip_atomic_load) && \
399 __has_builtin(__hip_atomic_compare_exchange_strong)
400 float value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
402 while (!done && value > val) {
403 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
404 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
408 unsigned int *uaddr = (
unsigned int *)addr;
409 unsigned int value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
411 while (!done && __uint_as_float(value) > val) {
412 done = __atomic_compare_exchange_n(uaddr, &value, __float_as_uint(val),
false,
413 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
415 return __uint_as_float(value);
433__device__
inline double safeAtomicAdd(
double* addr,
double value) {
434#if defined(__gfx90a__) && __has_builtin(__hip_atomic_fetch_add)
439 return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
440#elif defined(__gfx90a__)
444#if __has_builtin(__hip_atomic_load)
445 old_val = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
447 old_val = __longlong_as_double(__atomic_load_n(
reinterpret_cast<unsigned long long*
>(addr), __ATOMIC_RELAXED));
449 double expected, temp;
451 temp = expected = old_val;
452#if __has_builtin(__hip_atomic_compare_exchange_strong)
453 __hip_atomic_compare_exchange_strong(addr, &expected, old_val + value, __ATOMIC_RELAXED,
454 __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
456 __atomic_compare_exchange_n(addr, &expected, old_val + value,
false,
457 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
460 }
while (__double_as_longlong(temp) != __double_as_longlong(old_val));
463#if __has_builtin(__hip_atomic_fetch_add)
464 return __hip_atomic_fetch_add(addr, value, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
466 return __atomic_fetch_add(addr, value, __ATOMIC_RELAXED);
485__device__
inline double safeAtomicMax(
double* addr,
double val) {
486 #if __has_builtin(__builtin_amdgcn_is_private)
487 if (__builtin_amdgcn_is_private(
490 *addr = __builtin_fmax(old, val);
494 #if __has_builtin(__hip_atomic_load) && \
495 __has_builtin(__hip_atomic_compare_exchange_strong)
496 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
498 while (!done && value < val) {
499 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
500 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
504 unsigned long long *uaddr = (
unsigned long long *)addr;
505 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
507 while (!done && __longlong_as_double(value) < val) {
508 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
509 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
511 return __longlong_as_double(value);
513 #if __has_builtin(__builtin_amdgcn_is_private)
532__device__
inline double safeAtomicMin(
double* addr,
double val) {
533 #if __has_builtin(__builtin_amdgcn_is_private)
534 if (__builtin_amdgcn_is_private(
537 *addr = __builtin_fmin(old, val);
541 #if __has_builtin(__hip_atomic_load) && \
542 __has_builtin(__hip_atomic_compare_exchange_strong)
543 double value = __hip_atomic_load(addr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
545 while (!done && value > val) {
546 done = __hip_atomic_compare_exchange_strong(addr, &value, val,
547 __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
551 unsigned long long *uaddr = (
unsigned long long *)addr;
552 unsigned long long value = __atomic_load_n(uaddr, __ATOMIC_RELAXED);
554 while (!done && __longlong_as_double(value) > val) {
555 done = __atomic_compare_exchange_n(uaddr, &value, __double_as_longlong(val),
false,
556 __ATOMIC_RELAXED, __ATOMIC_RELAXED);
558 return __longlong_as_double(value);
560 #if __has_builtin(__builtin_amdgcn_is_private)
_Float16 __2f16 __attribute__((ext_vector_type(2)))
Definition hip_fp16_math_fwd.h:57