## START: Set by rpmautospec ## (rpmautospec version 0.7.2) ## RPMAUTOSPEC: autorelease, autochangelog %define autorelease(e:s:pb:n) %{?-p:0.}%{lua: release_number = 2; base_release_number = tonumber(rpm.expand("%{?-b*}%{!?-b:1}")); print(release_number + base_release_number - 1); }%{?-e:.%{-e*}}%{?-s:.%{-s*}}%{!?-n:%{?dist}} ## END: Set by rpmautospec %global upstreamname RCCL %global rocm_release 6.2 %global rocm_patch 0 %global rocm_version %{rocm_release}.%{rocm_patch} %global toolchain rocm # hipcc does not support some clang flags %global build_cxxflags %(echo %{optflags} | sed -e 's/-fstack-protector-strong/-Xarch_host -fstack-protector-strong/' -e 's/-fcf-protection/-Xarch_host -fcf-protection/') # $gpu will be evaluated in the loops below %global _vpath_builddir %{_vendor}-%{_target_os}-build-${gpu} # It is necessary to use this with a local build # export QA_RPATHS=0xff %bcond_with test # rccl is not supported on gfx1103 # On 6.1.1 # lld: error: ld-temp.o :1:25: specified hardware register is not supported on this GPU # s_getreg_b32 s1, hwreg(HW_REG_HW_ID) # # On 6.2 # Problems reported with gfx10, removing gfx10 and default (gfx10 and gfx11) from build list # # Handle with a custom gpu list %global rccl_gpu_list gfx9 gfx11 gfx90a gfx942 gfx1100 Name: rccl Version: %{rocm_version} Release: %autorelease Summary: ROCm Communication Collectives Library Url: https://github.com/ROCm/rccl License: BSD-3-Clause AND MIT AND Apache-2.0 # From License.txt the main license is BSD 3 # Modifications from Microsoft is MIT # The NVIDIA based header files below are Apache-2.0 # src/include/nvtx3/nv*.h and similar # The URL for NVIDIA in the License.txt https://github.com/NVIDIA/NVTX is Apache-2.0 Source0: %{url}/archive/rocm-%{rocm_version}.tar.gz#/%{upstreamname}-%{rocm_version}.tar.gz # Patch0: 0001-prepare-rccl-cmake-for-fedora.patch BuildRequires: cmake BuildRequires: hipify BuildRequires: ninja-build BuildRequires: rocm-cmake BuildRequires: rocm-comgr-devel BuildRequires: rocm-hip-devel BuildRequires: rocm-runtime-devel BuildRequires: rocm-rpm-macros BuildRequires: rocm-rpm-macros-modules BuildRequires: rocm-smi-devel Requires: rocm-rpm-macros-modules Requires: %{name}-data = %{version}-%{release} # Only x86_64 works right now: ExclusiveArch: x86_64 %description RCCL (pronounced "Rickle") is a stand-alone library of standard collective communication routines for GPUs, implementing all-reduce, all-gather, reduce, broadcast, reduce-scatter, gather, scatter, and all-to-all. There is also initial support for direct GPU-to-GPU send and receive operations. It has been optimized to achieve high bandwidth on platforms using PCIe, xGMI as well as networking using InfiniBand Verbs or TCP/IP sockets. RCCL supports an arbitrary number of GPUs installed in a single node or multiple nodes, and can be used in either single- or multi-process (e.g., MPI) applications. The collective operations are implemented using ring and tree algorithms and have been optimized for throughput and latency. For best performance, small operations can be either batched into larger operations or aggregated through the API. %package devel Summary: Headers and libraries for %{name} Requires: %{name}%{?_isa} = %{version}-%{release} Requires: %{name}-gfx90a%{?_isa} = %{version}-%{release} Requires: %{name}-gfx942%{?_isa} = %{version}-%{release} Requires: %{name}-gfx1100%{?_isa} = %{version}-%{release} %description devel Headers and libraries for %{name} %package data Summary: Data for %{name} BuildArch: noarch %description data Data for %{name} %if %{with test} %package test Summary: Tests for %{name} Requires: %{name}%{?_isa} = %{version}-%{release} %description test %{summary} %endif %package gfx90a Summary: %{name} for MI200 %description gfx90a %{summary} %package gfx942 Summary: %{name} for MI300 %description gfx942 %{summary} %package gfx1100 Summary: %{name} for W7900 %description gfx1100 %{summary} %prep %autosetup -p1 -n %{name}-rocm-%{version} # Allow user to set AMDGPU_TARGETS sed -i -e '/AMD GPU targets to compile for/d' CMakeLists.txt # No parallel-jobs flag sed -i -e '/parallel-jobs/d' CMakeLists.txt # No /opt/rocm/.info/version sed -i -e 's@cat ${ROCM_PATH}/.info/version@echo %{rocm_version}@' CMakeLists.txt %build for gpu in %{rccl_gpu_list} do module load rocm/$gpu %cmake -G Ninja \ -DBUILD_FILE_REORG_BACKWARD_COMPATIBILITY=OFF \ -DCMAKE_INSTALL_LIBDIR=%{_libdir} \ -DROCM_SYMLINK_LIBS=OFF \ -DAMDGPU_TARGETS=${ROCM_GPUS} \ -DCMAKE_INSTALL_LIBDIR=$ROCM_LIB \ -DCMAKE_INSTALL_BINDIR=$ROCM_BIN \ %if %{with test} -DBUILD_TESTS=ON \ %endif -DHIP_PLATFORM=amd %cmake_build module purge done %install for gpu in %{rccl_gpu_list} do %cmake_install done %files %license LICENSE.txt %{_libdir}/rocm/gfx{9,11}/lib/lib%{name}.so.* %exclude %{_docdir}/%{name}/LICENSE.txt %files gfx90a %{_libdir}/rocm/gfx90a/lib/lib%{name}.so.* %files gfx942 %{_libdir}/rocm/gfx942/lib/lib%{name}.so.* %files gfx1100 %{_libdir}/rocm/gfx1100/lib/lib%{name}.so.* %files data %dir %{_datadir}/%{name} %dir %{_datadir}/%{name}/msccl-algorithms %{_datadir}/%{name}/msccl-algorithms/*.xml %{_datadir}/%{name}/msccl-unit-test-algorithms/*.xml %files devel %doc README.md %dir %{_libdir}/rocm/gfx*/lib/cmake/%{name} %dir %{_includedir}/%{name} %{_includedir}/%{name}/*.h %{_libdir}/rocm/gfx*/lib/lib%{name}.so %{_libdir}/rocm/gfx*/lib/cmake/%{name}/*.cmake %if %{with test} %files test %{_bindir}/%{name}* %{_libdir}/rocm/gfx*/bin/%{name}* %endif %changelog ## START: Generated by rpmautospec * Tue Aug 27 2024 Tom Rix - 6.2.0-2 - Workaround gfx10* not building * Tue Aug 13 2024 Tom Rix - 6.2.0-1 - Update to ROCm 6.2 * Fri Jul 19 2024 Fedora Release Engineering - 6.1.2-3 - Rebuilt for https://fedoraproject.org/wiki/Fedora_41_Mass_Rebuild * Sat Jul 13 2024 Tom Rix - 6.1.2-2 - Remove second rccl from Source0: * Fri Jun 07 2024 Jeremy Newton - 6.1.2-1 - Update to 6.1.2 * Tue May 21 2024 Tom Rix - 6.1.1-1 - Update to 6.1.1 * Wed Apr 17 2024 Tom Rix - 6.0.2-1 - Initial package. ## END: Generated by rpmautospec