mirror of
https://github.com/gentoo-mirror/gentoo.git
synced 2025-12-08 00:05:33 +03:00
* Don't use blis - flexiblas replaces it * Since we use monorepo, unpack only part of it * Do not install tests * Pass ld.lld without overriding other cxxflags * Handle removed/renamed CMake options * Fix bad interface linking with hipblas-common Closes: https://bugs.gentoo.org/965873 Signed-off-by: Sv. Lockal <lockalsash@gmail.com> Part-of: https://github.com/gentoo/gentoo/pull/44555 Closes: https://github.com/gentoo/gentoo/pull/44555 Signed-off-by: Sam James <sam@gentoo.org>
193 lines
6.0 KiB
Bash
193 lines
6.0 KiB
Bash
# Copyright 1999-2025 Gentoo Authors
|
|
# Distributed under the terms of the GNU General Public License v2
|
|
|
|
EAPI=8
|
|
|
|
ROCM_SKIP_GLOBALS=1
|
|
PYTHON_COMPAT=( python3_{10..14} )
|
|
|
|
LLVM_COMPAT=( 20 )
|
|
|
|
inherit cmake flag-o-matic multiprocessing llvm-r1 python-any-r1 rocm
|
|
DESCRIPTION="General matrix-matrix operations library for AMD Instinct accelerators"
|
|
HOMEPAGE="https://github.com/ROCm/rocm-libraries/tree/develop/projects/hipblaslt"
|
|
SRC_URI="https://github.com/ROCm/rocm-libraries/archive/refs/tags/rocm-${PV}.tar.gz -> rocm-libraries-${PV}.tar.gz"
|
|
|
|
S="${WORKDIR}/rocm-libraries-rocm-${PV}/projects/hipblaslt/"
|
|
|
|
LICENSE="MIT"
|
|
SLOT="0/$(ver_cut 1-2)"
|
|
KEYWORDS="~amd64"
|
|
|
|
SUPPORTED_GPUS=( gfx908 gfx90a gfx942 gfx950 gfx1100 gfx1101 gfx1103 gfx1150 gfx1151 gfx1200 gfx1201 )
|
|
IUSE_TARGETS=( "${SUPPORTED_GPUS[@]/#/amdgpu_targets_}" )
|
|
IUSE="${IUSE_TARGETS[*]/#/+} benchmark roctracer test"
|
|
RESTRICT="!test? ( test )"
|
|
|
|
RDEPEND="
|
|
dev-util/hip:${SLOT}
|
|
dev-util/rocm-smi:${SLOT}
|
|
roctracer? ( dev-util/roctracer:${SLOT} )
|
|
benchmark? (
|
|
sci-libs/flexiblas
|
|
)
|
|
"
|
|
|
|
DEPEND="
|
|
${RDEPEND}
|
|
dev-cpp/msgpack-cxx
|
|
sci-libs/hipBLAS-common:${SLOT}
|
|
llvm-runtimes/openmp
|
|
"
|
|
|
|
BDEPEND="
|
|
${PYTHON_DEPS}
|
|
dev-build/rocm-cmake:${SLOT}
|
|
dev-util/hipcc:${SLOT}
|
|
$(python_gen_any_dep "
|
|
dev-python/msgpack[\${PYTHON_USEDEP}]
|
|
dev-python/pyyaml[\${PYTHON_USEDEP}]
|
|
dev-python/joblib[\${PYTHON_USEDEP}]
|
|
dev-python/nanobind[\${PYTHON_USEDEP}]
|
|
dev-python/setuptools[\${PYTHON_USEDEP}]
|
|
")
|
|
$(llvm_gen_dep "llvm-core/clang:\${LLVM_SLOT}")
|
|
test? (
|
|
dev-cpp/gtest
|
|
sci-libs/flexiblas
|
|
)
|
|
"
|
|
|
|
# In 7.1.0 to build tests one needs to build benchmarks (which will be installed)
|
|
# TODO: make build of tests independent benchmarks
|
|
REQUIRED_USE="test? ( benchmark )"
|
|
|
|
PATCHES=(
|
|
"${FILESDIR}"/${PN}-7.1.0-no-git.patch
|
|
"${FILESDIR}"/hipBLASLt-7.1.0-rocisa-nanobind.patch
|
|
)
|
|
|
|
python_check_deps() {
|
|
python_has_version "dev-python/msgpack[${PYTHON_USEDEP}]" &&
|
|
python_has_version "dev-python/pyyaml[${PYTHON_USEDEP}]" &&
|
|
python_has_version "dev-python/joblib[${PYTHON_USEDEP}]" &&
|
|
python_has_version "dev-python/nanobind[${PYTHON_USEDEP}]" &&
|
|
python_has_version "dev-python/setuptools[${PYTHON_USEDEP}]"
|
|
}
|
|
|
|
pkg_setup() {
|
|
QA_FLAGS_IGNORED="usr/$(get_libdir)/hipblaslt/library/.*"
|
|
python-any-r1_pkg_setup
|
|
}
|
|
|
|
pkg_pretend() {
|
|
if [[ "${AMDGPU_TARGETS[*]}" = "" ]]; then
|
|
ewarn "hipBLASLt supports only some GPUs: ${SUPPORTED_GPUS[*]},"
|
|
ewarn "but none of them were defined in AMDGPU_TARGETS USE_EXPAND variable."
|
|
ewarn
|
|
ewarn "Library will continue to be built in \"dummy\" mode,"
|
|
ewarn "serving as a non-functional placeholder for end-user applications."
|
|
fi
|
|
}
|
|
|
|
src_unpack() {
|
|
local HIPBLASLT="rocm-libraries-rocm-${PV}/projects/hipblaslt"
|
|
local ORIGAMI="rocm-libraries-rocm-${PV}/shared/origami"
|
|
tar -xzf "${DISTDIR}/${A}" "${HIPBLASLT}" "${ORIGAMI}" -C "${WORKDIR}" || die
|
|
}
|
|
|
|
src_prepare() {
|
|
local shebangs=($(grep -rl "#!/usr/bin/env python3" tensilelite/Tensile || die))
|
|
python_fix_shebang -q "${shebangs[@]}"
|
|
|
|
rocm_use_clang
|
|
|
|
sed -e "s:\$(ROCM_PATH)/bin/amdclang++:$(get_llvm_prefix)/bin/clang++:g" \
|
|
-i tensilelite/Makefile || die
|
|
|
|
# Fix compiler validation (just a validation)
|
|
sed -e "s/amdclang/$(basename "$CC")/g" \
|
|
-i tensilelite/Tensile/Toolchain/Validators.py \
|
|
-i tensilelite/Tensile/Tests/unit/test_MatrixInstructionConversion.py || die
|
|
|
|
# Do not install tests
|
|
sed -e "s/COMPONENT tests/COMPONENT tests EXCLUDE_FROM_ALL/" -i CMakeLists.txt || die
|
|
|
|
# usage: DEPENDS PACKAGE A PACKAGE B, not DEPENDS PACKAGE A DEPENDS PACKAGE B
|
|
# https://rocm.docs.amd.com/projects/ROCmCMakeBuildTools/en/docs-7.1.0/reference/ROCMInstallTargets.html#command:rocm_export_targets
|
|
# Bug: https://bugs.gentoo.org/965873
|
|
# Upstream bug: https://github.com/ROCm/rocm-libraries/issues/2556
|
|
sed -e "s/DEPENDS PACKAGE \${hipblas_target}/PACKAGE \${hipblas_target}/" -i CMakeLists.txt || die
|
|
|
|
cmake_src_prepare
|
|
}
|
|
|
|
src_configure() {
|
|
rocm_use_clang
|
|
|
|
# too many warnings
|
|
append-cxxflags -Wno-explicit-specialization-storage-class
|
|
|
|
# Tensile guesses weirdly how to compile things, ld.bfd won't work, so force lld
|
|
append-cxxflags -DCMAKE_CXX_FLAGS="-fuse-ld=lld"
|
|
|
|
local targets="$(get_amdgpu_flags)"
|
|
local Tensile_SKIP_BUILD=$([ "${AMDGPU_TARGETS[*]}" = "" ] && echo ON || echo OFF )
|
|
local HIPBLASLT_ENABLE_DEVICE=$([ "${AMDGPU_TARGETS[*]}" != "" ] && echo ON || echo OFF )
|
|
|
|
# targets has a trailing semicolon, this trips up Tensile's input parser, so carefully prune
|
|
local mycmakeargs=(
|
|
-DGPU_TARGETS="${targets::-1}"
|
|
-DHIPBLASLT_ENABLE_CLIENT="$(usex benchmark ON $(usex test ON OFF))"
|
|
-DHIPBLASLT_ENABLE_SAMPLES=OFF
|
|
-DHIPBLASLT_ENABLE_DEVICE=${HIPBLASLT_ENABLE_DEVICE}
|
|
-DHIPBLASLT_ENABLE_MARKER="$(usex roctracer ON OFF)"
|
|
-DHIPBLASLT_ENABLE_ROCROLLER=OFF
|
|
-DHIPBLASLT_ENABLE_FETCH=OFF
|
|
-DHIPBLASLT_BUNDLE_PYTHON_DEPS=ON
|
|
-Dnanobind_DIR="$(python_get_sitedir)/nanobind/cmake"
|
|
-DPython_EXECUTABLE="${PYTHON}"
|
|
-DROCM_SYMLINK_LIBS=OFF
|
|
-DTENSILELITE_BUILD_PARALLEL_LEVEL=$(makeopts_jobs)
|
|
-DHIPBLASLT_BUILD_TESTING="$(usex test ON OFF)"
|
|
-Wno-dev
|
|
)
|
|
|
|
if use test || use benchmark; then
|
|
# HIPBLASLT_ENABLE_CLIENT=ON branch
|
|
mycmakeargs+=(
|
|
-DBLA_PKGCONFIG_BLAS=ON
|
|
-DBLA_VENDOR=FlexiBLAS
|
|
-DHIPBLASLT_ENABLE_BLIS=OFF
|
|
)
|
|
fi
|
|
|
|
cmake_src_configure
|
|
}
|
|
|
|
src_compile() {
|
|
local -x ROCM_PATH="${EPREFIX}/usr"
|
|
# set PYTHONPATH to load Tensile from virtualenv, not the system-wide one
|
|
local -x PYTHONPATH="${S}_build/virtualenv/lib/${EPYTHON}/site-packages"
|
|
local -x TENSILE_ROCM_ASSEMBLER_PATH="$(get_llvm_prefix)/bin/clang++"
|
|
# TensileCreateLibrary reads CMAKE_CXX_COMPILER again
|
|
local -x CMAKE_CXX_COMPILER="$(get_llvm_prefix)/bin/clang++"
|
|
cmake_src_compile
|
|
}
|
|
|
|
src_install() {
|
|
cmake_src_install
|
|
|
|
# Stop llvm-strip from removing .strtab section from *.hsaco files,
|
|
# otherwise rocclr/elf/elf.cpp complains with "failed: null sections(STRTAB)" and crashes
|
|
dostrip -x /usr/$(get_libdir)/hipblaslt/library/
|
|
}
|
|
|
|
src_test() {
|
|
check_amdgpu
|
|
|
|
# Expected time for 7900 XTX: 340s (full) or 5s with GTEST_FILTER='*quick*'
|
|
# Fails in `MatrixTransformTest.MultipleDevices` in dGPU+iGPU combination
|
|
HIP_VISIBLE_DEVICES=0 cmake_src_test
|
|
}
|