HIP: Heterogenous-computing Interface for Portability
Loading...
Searching...
No Matches
functional_grid_launch.hpp
1/*
2Copyright (c) 2015 - 2021 Advanced Micro Devices, Inc. All rights reserved.
3
4Permission is hereby granted, free of charge, to any person obtaining a copy
5of this software and associated documentation files (the "Software"), to deal
6in the Software without restriction, including without limitation the rights
7to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8copies of the Software, and to permit persons to whom the Software is
9furnished to do so, subject to the following conditions:
10
11The above copyright notice and this permission notice shall be included in
12all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20THE SOFTWARE.
21*/
22
23#pragma once
24
25#include "concepts.hpp"
26#include "helpers.hpp"
27#include "program_state.hpp"
28#include "hip_runtime_api.h"
29
30#include <cstdint>
31#include <cstring>
32#include <stdexcept>
33#include <tuple>
34#include <type_traits>
35#include <utility>
36
37hipError_t ihipExtLaunchMultiKernelMultiDevice(hipLaunchParams* launchParamsList, int numDevices,
38 unsigned int flags, hip_impl::program_state& ps);
39
40hipError_t hipLaunchCooperativeKernel(const void* f, dim3 gridDim,
41 dim3 blockDim, void** args,
42 size_t sharedMem, hipStream_t stream,
44
45hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
46 int numDevices,
47 unsigned int flags,
49
50#pragma GCC visibility push(hidden)
51
52namespace hip_impl {
53template <typename T, typename std::enable_if<std::is_integral<T>{}>::type* = nullptr>
54inline T round_up_to_next_multiple_nonnegative(T x, T y) {
55 T tmp = x + y - 1;
56 return tmp - tmp % y;
57}
58
59template <
60 std::size_t n,
61 typename... Ts,
62 typename std::enable_if<n == sizeof...(Ts)>::type* = nullptr>
63inline hip_impl::kernarg make_kernarg(
64 const std::tuple<Ts...>&,
65 const kernargs_size_align&,
66 hip_impl::kernarg kernarg) {
67 return kernarg;
68}
69
70template <
71 std::size_t n,
72 typename... Ts,
73 typename std::enable_if<n != sizeof...(Ts)>::type* = nullptr>
74inline hip_impl::kernarg make_kernarg(
75 const std::tuple<Ts...>& formals,
76 const kernargs_size_align& size_align,
77 hip_impl::kernarg kernarg) {
78 using T = typename std::tuple_element<n, std::tuple<Ts...>>::type;
79
80 static_assert(
81 !std::is_reference<T>{},
82 "A __global__ function cannot have a reference as one of its "
83 "arguments.");
84 #if defined(HIP_STRICT)
85 static_assert(
86 std::is_trivially_copyable<T>{},
87 "Only TriviallyCopyable types can be arguments to a __global__ "
88 "function");
89 #endif
90
91 kernarg.resize(round_up_to_next_multiple_nonnegative(
92 kernarg.size(), size_align.alignment(n)) + size_align.size(n));
93
94 std::memcpy(
95 kernarg.data() + kernarg.size() - size_align.size(n),
96 &std::get<n>(formals),
97 size_align.size(n));
98 return make_kernarg<n + 1>(formals, size_align, std::move(kernarg));
99}
100
101template <typename... Formals, typename... Actuals>
102inline hip_impl::kernarg make_kernarg(
103 void (*kernel)(Formals...), std::tuple<Actuals...> actuals) {
104 static_assert(sizeof...(Formals) == sizeof...(Actuals),
105 "The count of formal arguments must match the count of actuals.");
106
107 if (sizeof...(Formals) == 0) return {};
108
109 std::tuple<Formals...> to_formals{std::move(actuals)};
110 hip_impl::kernarg kernarg;
111 kernarg.reserve(sizeof(to_formals));
112
113 auto& ps = hip_impl::get_program_state();
114 return make_kernarg<0>(to_formals,
115 ps.get_kernargs_size_align(
116 reinterpret_cast<std::uintptr_t>(kernel)),
117 std::move(kernarg));
118}
119
120
121HIP_INTERNAL_EXPORTED_API hsa_agent_t target_agent(hipStream_t stream);
122
123inline
124__attribute__((visibility("hidden")))
125void hipLaunchKernelGGLImpl(
126 std::uintptr_t function_address,
127 const dim3& numBlocks,
128 const dim3& dimBlocks,
129 std::uint32_t sharedMemBytes,
130 hipStream_t stream,
131 void** kernarg) {
132
133 const auto& kd = hip_impl::get_program_state().kernel_descriptor(function_address,
134 target_agent(stream));
135
136 hipModuleLaunchKernel(kd, numBlocks.x, numBlocks.y, numBlocks.z,
137 dimBlocks.x, dimBlocks.y, dimBlocks.z, sharedMemBytes,
138 stream, nullptr, kernarg);
139}
140} // Namespace hip_impl.
141
142
143template <class T>
144inline
145hipError_t hipOccupancyMaxPotentialBlockSize(int* gridSize, int* blockSize,
146 T kernel, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0) {
147
148 using namespace hip_impl;
149
150 hip_impl::hip_init();
151 auto f = get_program_state().kernel_descriptor(reinterpret_cast<std::uintptr_t>(kernel),
152 target_agent(0));
153
154 return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f,
155 dynSharedMemPerBlk, blockSizeLimit);
156}
157
158template <class T>
159inline
160hipError_t hipOccupancyMaxPotentialBlockSizeWithFlags(int* gridSize, int* blockSize,
161 T kernel, size_t dynSharedMemPerBlk = 0, int blockSizeLimit = 0, unsigned int flags = 0 ) {
162
163 using namespace hip_impl;
164
165 hip_impl::hip_init();
166 if(flags != hipOccupancyDefault) return hipErrorNotSupported;
167 auto f = get_program_state().kernel_descriptor(reinterpret_cast<std::uintptr_t>(kernel),
168 target_agent(0));
169
170 return hipModuleOccupancyMaxPotentialBlockSize(gridSize, blockSize, f,
171 dynSharedMemPerBlk, blockSizeLimit);
172}
173
174template <typename... Args, typename F = void (*)(Args...)>
175inline
176void hipLaunchKernelGGL(F kernel, const dim3& numBlocks, const dim3& dimBlocks,
177 std::uint32_t sharedMemBytes, hipStream_t stream,
178 Args... args) {
179 hip_impl::hip_init();
180 auto kernarg = hip_impl::make_kernarg(kernel, std::tuple<Args...>{std::move(args)...});
181 std::size_t kernarg_size = kernarg.size();
182
183 void* config[]{
184 HIP_LAUNCH_PARAM_BUFFER_POINTER,
185 kernarg.data(),
186 HIP_LAUNCH_PARAM_BUFFER_SIZE,
187 &kernarg_size,
188 HIP_LAUNCH_PARAM_END};
189
190 hip_impl::hipLaunchKernelGGLImpl(reinterpret_cast<std::uintptr_t>(kernel),
191 numBlocks, dimBlocks, sharedMemBytes,
192 stream, &config[0]);
193}
194
195template <typename F>
196inline
197__attribute__((visibility("hidden")))
198hipError_t hipLaunchCooperativeKernel(F f, dim3 gridDim, dim3 blockDim,
199 void** args, size_t sharedMem,
200 hipStream_t stream) {
201 hip_impl::hip_init();
202 auto& ps = hip_impl::get_program_state();
203 return hipLaunchCooperativeKernel(reinterpret_cast<void*>(f), gridDim,
204 blockDim, args, sharedMem, stream, ps);
205}
206
207inline
208__attribute__((visibility("hidden")))
209hipError_t hipLaunchCooperativeKernelMultiDevice(hipLaunchParams* launchParamsList,
210 int numDevices,
211 unsigned int flags) {
212
213 hip_impl::hip_init();
214 auto& ps = hip_impl::get_program_state();
215 return hipLaunchCooperativeKernelMultiDevice(launchParamsList, numDevices, flags, ps);
216}
217
218#pragma GCC visibility pop
Definition program_state.hpp:48
Definition program_state.hpp:63