mirror of
https://github.com/gentoo-mirror/gentoo.git
synced 2025-12-10 00:10:19 +03:00
Signed-off-by: Paul Zander <negril.nx+gentoo@gmail.com> Part-of: https://github.com/gentoo/gentoo/pull/44489 Closes: https://github.com/gentoo/gentoo/pull/44489 Signed-off-by: Alfredo Tupone <tupone@gentoo.org>
162 lines
3.2 KiB
Bash
162 lines
3.2 KiB
Bash
# Copyright 2023-2025 Gentoo Authors
|
|
# Distributed under the terms of the GNU General Public License v2
|
|
|
|
EAPI=8
|
|
|
|
PYTHON_COMPAT=( python3_{11..14} )
|
|
|
|
inherit cuda cmake python-any-r1 flag-o-matic toolchain-funcs
|
|
|
|
DESCRIPTION="CUDA Templates for Linear Algebra Subroutines"
|
|
HOMEPAGE="https://github.com/NVIDIA/cutlass"
|
|
|
|
if [[ "${PV}" == *9999* ]]; then
|
|
inherit git-r3
|
|
EGIT_REPO_URI="https://github.com/NVIDIA/${PN}"
|
|
else
|
|
SRC_URI="
|
|
https://github.com/NVIDIA/${PN}/archive/refs/tags/v${PV}.tar.gz -> ${P}.tar.gz
|
|
"
|
|
KEYWORDS="~amd64"
|
|
fi
|
|
|
|
LICENSE="BSD"
|
|
SLOT="0"
|
|
|
|
X86_CPU_FEATURES=(
|
|
f16c:f16c
|
|
)
|
|
CPU_FEATURES=( "${X86_CPU_FEATURES[@]/#/cpu_flags_x86_}" )
|
|
|
|
IUSE="clang-cuda cublas cudnn doc dot examples +headers-only jumbo-build performance profiler test tools ${CPU_FEATURES[*]%:*}"
|
|
|
|
REQUIRED_USE="
|
|
headers-only? (
|
|
!examples
|
|
!profiler
|
|
!test
|
|
)
|
|
test? (
|
|
tools
|
|
)
|
|
"
|
|
|
|
RESTRICT="!test? ( test )"
|
|
|
|
RDEPEND="
|
|
dev-util/nvidia-cuda-toolkit:=
|
|
"
|
|
DEPEND="${RDEPEND}
|
|
test? (
|
|
${PYTHON_DEPS}
|
|
dev-cpp/gtest
|
|
cudnn? (
|
|
dev-libs/cudnn:=
|
|
)
|
|
)
|
|
tools? (
|
|
${PYTHON_DEPS}
|
|
)
|
|
"
|
|
|
|
pkg_setup() {
|
|
if use test || use tools; then
|
|
python-any-r1_pkg_setup
|
|
fi
|
|
}
|
|
|
|
src_configure() {
|
|
# we can use clang as default
|
|
if use clang-cuda && ! tc-is-clang; then
|
|
export CC="${CHOST}-clang"
|
|
export CXX="${CHOST}-clang++"
|
|
else
|
|
tc-export CXX CC
|
|
fi
|
|
|
|
cuda_add_sandbox
|
|
addpredict "/dev/char/"
|
|
|
|
local mycmakeargs=(
|
|
-DCMAKE_POLICY_DEFAULT_CMP0156="OLD" # cutlass_add_library
|
|
|
|
-DCMAKE_DISABLE_FIND_PACKAGE_Doxygen="$(usex !doc)"
|
|
|
|
-DCUTLASS_REVISION="${PVR}"
|
|
-DCUTLASS_ENABLE_CUBLAS="$(usex cublas)"
|
|
-DCUTLASS_ENABLE_CUDNN="$(usex cudnn)"
|
|
-DCUTLASS_ENABLE_EXAMPLES="$(usex examples)"
|
|
-DCUTLASS_ENABLE_F16C="$(usex cpu_flags_x86_f16c)"
|
|
-DCUTLASS_ENABLE_GTEST_UNIT_TESTS="$(usex test)"
|
|
-DCUTLASS_ENABLE_HEADERS_ONLY="$(usex headers-only)"
|
|
-DCUTLASS_ENABLE_LIBRARY="$(usex !headers-only)"
|
|
-DCUTLASS_ENABLE_PERFORMANCE="$(usex performance)"
|
|
-DCUTLASS_ENABLE_PROFILER="$(usex profiler)"
|
|
-DCUTLASS_ENABLE_PROFILER_UNIT_TESTS="$(usex test "$(usex profiler)")"
|
|
-DCUTLASS_ENABLE_TESTS="$(usex test)"
|
|
-DCUTLASS_ENABLE_TOOLS="$(usex tools)"
|
|
-DCUTLASS_INSTALL_TESTS="no"
|
|
-DCUTLASS_NVCC_ARCHS="${CUDAARCHS:-all-major}"
|
|
-DCUTLASS_UNITY_BUILD_ENABLED="$(usex jumbo-build)"
|
|
-DCUTLASS_USE_SYSTEM_GOOGLETEST="yes"
|
|
-DIMPLICIT_CMAKE_CXX_STANDARD="yes"
|
|
)
|
|
|
|
# clang-cuda needs to filter mfpmath
|
|
if use clang-cuda; then
|
|
filter-mfpmath sse
|
|
filter-mfpmath i386
|
|
|
|
mycmakeargs+=(
|
|
-DCMAKE_CUDA_HOST_COMPILER="${CHOST}-clang++"
|
|
)
|
|
else
|
|
mycmakeargs+=(
|
|
-DCMAKE_CUDA_HOST_COMPILER="$(cuda_gccdir)"
|
|
)
|
|
fi
|
|
|
|
if use cudnn; then
|
|
mycmakeargs+=(
|
|
-DCUDNN_INCLUDE_DIR="${CUDNN_PATH:-${ESYSROOT}/opt/cuda}/linux/include"
|
|
-DCUDNN_LIBRARY="${CUDNN_PATH:-${ESYSROOT}/opt/cuda}/$(get_libdir)/libcudnn.so"
|
|
)
|
|
fi
|
|
|
|
if use doc; then
|
|
mycmakeargs+=(
|
|
-DCUTLASS_ENABLE_DOXYGEN_DOT="$(usex dot)"
|
|
)
|
|
fi
|
|
|
|
if use test; then
|
|
mycmakeargs+=(
|
|
-DCUTLASS_TEST_LEVEL="0"
|
|
)
|
|
|
|
append-cxxflags -DNDEBUG
|
|
fi
|
|
|
|
cmake_src_configure
|
|
}
|
|
|
|
src_test() {
|
|
cuda_add_sandbox -w
|
|
|
|
local myctestargs=(
|
|
)
|
|
|
|
local CMAKE_SKIP_TESTS=(
|
|
"ctest_examples_41_fmha_backward_python$"
|
|
)
|
|
|
|
cmake_src_test -j1
|
|
cmake_build test_unit "${myctestargs[@]}" -j1
|
|
}
|
|
|
|
src_install() {
|
|
cmake_src_install
|
|
|
|
rm -r "${ED}/usr/test" || die
|
|
}
|