%global pkgvers 0 %global scdate0 20241009 %global schash0 cc3c29a81a140f7b97045718fb88eb0664c37bd7 %global branch0 main %global source0 https://github.com/NVIDIA/cutlass.git %global vcu_maj 12 %global vcu_min 6 %global sshort0 %{expand:%%{lua:print(('%{schash0}'):sub(1,8))}} Name: cutlass Version: 3.6.0 Release: %{scdate0}.%{pkgvers}.git%{sshort0}.cu%{vcu_maj}_%{vcu_min}%{?dist} Summary: Collection of CUDA C++ template License: BSD URL: https://github.com/NVIDIA/cutlass Patch0: cutlass-fp16.patch BuildRequires: cmake gcc-c++ git BuildRequires: doxygen graphviz python3-setuptools python3-devel %define have_cuda 1 %define have_cuda_gcc 1 %define gpu_target_arch "52;61;75;86;89;90" %bcond_without cuda %if %{without cuda} %define have_cuda 0 %endif %if %{have_cuda} %if %{have_cuda_gcc} %if (0%{?fedora} > 34) || (0%{?rhel} > 8) BuildRequires: cuda-gcc-c++ < 12 %endif %endif BuildRequires: cuda-nvcc-%{vcu_maj}-%{vcu_min} BuildRequires: cuda-nvtx-%{vcu_maj}-%{vcu_min} BuildRequires: cuda-cudart-devel-%{vcu_maj}-%{vcu_min} BuildRequires: cuda-nvml-devel-%{vcu_maj}-%{vcu_min} BuildRequires: cuda-nvrtc-devel-%{vcu_maj}-%{vcu_min} BuildRequires: cuda-driver-devel-%{vcu_maj}-%{vcu_min} BuildRequires: libcurand-devel-%{vcu_maj}-%{vcu_min} BuildRequires: libcublas-devel-%{vcu_maj}-%{vcu_min} BuildRequires: libcudnn9-devel-cuda-%{vcu_maj} Requires: cuda-nvcc-%{vcu_maj}-%{vcu_min} Requires: cuda-nvtx-%{vcu_maj}-%{vcu_min} Requires: cuda-cudart-%{vcu_maj}-%{vcu_min} Requires: cuda-nvrtc-%{vcu_maj}-%{vcu_min} Requires: libcublas-%{vcu_maj}-%{vcu_min} Requires: libcurand-%{vcu_maj}-%{vcu_min} Requires: libcudnn9-cuda-%{vcu_maj} %endif %global _lto_cflags %{nil} %global debug_package %{nil} %global __cmake_in_source_build 1 %undefine _hardened_build %undefine _annotated_build %undefine _find_debuginfo_dwz_opts %undefine _missing_build_ids_terminate_build %global __cmake_in_source_build 1 %global _default_patch_fuzz 100 %description CUDA C++ template abstractions for implementing high-performance matrix-multiplication (GEMM) and related computations at all levels and scales within CUDA. %package devel Summary: Development files for %{name} Requires: %{name} = %{version}-%{release} %description devel This package contains development files for %{name}. %package static Summary: Development files for %{name} Requires: %{name}-devel = %{version}-%{release} %description static This package contains static files for %{name}. %prep %setup -T -c -n %{name} git clone --depth 1 -n -b %{branch0} %{source0} . git fetch --depth 1 origin %{schash0} git reset --hard %{schash0} git --no-pager log --format=fuller %patch -P 0 -p0 -b .fp16~ # no rpath sed -i '/-rpath/d' CMakeLists.txt %build mkdir -p build pushd build %global optflags %(echo "%{optflags} -fPIC | sed 's|-g||') export LD_LIBRARY_PATH="/usr/local/cuda-%{vcu_maj}.%{vcu_min}/%{_lib}/" %cmake .. \ -DCMAKE_SKIP_RPATH=ON \ -DCMAKE_VERBOSE_MAKEFILE=OFF \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_EXE_LINKER_FLAGS="%{_libdir}/libstdc++.so.6" \ -DBUILD_TESTING=OFF \ %if 0%{?fedora} %ifarch x86_64 -DCUTLASS_ENABLE_F16C=ON \ %endif %endif -DCUTLASS_ENABLE_TESTS=OFF \ -DCUTLASS_ENABLE_PROFILER=ON \ -DCUTLASS_ENABLE_EXAMPLES=OFF \ -DCUDA_PROPAGATE_HOST_FLAGS=OFF \ %if %{have_cuda_gcc} %if (0%{?fedora} > 34) || (0%{?rhel} > 8) -DCMAKE_CUDA_HOST_COMPILER=%{_bindir}/cuda-c++ \ %endif %endif -DCUTLASS_NVCC_EMBED_PTX=ON \ -DCUTLASS_NVCC_EMBED_CUBIN=ON \ -DCUTLASS_NVCC_ARCHS=%{gpu_target_arch} \ -DCMAKE_CUDA_FLAGS="-Wl,--no-relax -Xfatbin=-compress-all --compiler-options -fPIC -Wno-deprecated-gpu-targets -allow-unsupported-compiler -D_SERIALIZE_H_INCLUDED" \ -DCMAKE_CUDA_COMPILER=/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc %ifarch ppc64le make -j2 %else make %{?_smp_mflags} %endif popd %install rm -rf %{buildroot} pushd build %cmake_install popd # clean spurious rm -rf %{buildroot}/usr/test rm -rf %{buildroot}/usr/share/info # strip elf set +x find %{buildroot} -type f -print | LC_ALL=C sort | file -N -f - | sed -n -e 's/^\(.*\):[ \t]*.*ELF.*, not stripped.*/\1/p' | xargs --no-run-if-empty stat -c '%h %D_%i %n' | while read nlinks inum f; do echo "Stripping: $f" strip -s $f done set -x %files %doc README.md docs %license LICENSE.txt %{_bindir}/* %{_libdir}/*.so %files devel %{_includedir}/* %{_libdir}/cmake/* %files static %{_libdir}/*.a %changelog * Tue Nov 09 2021 Balint Cristian - github release updates