sci-ml/sentencepiece: new package, add 0.2.0

Signed-off-by: Alfredo Tupone <tupone@gentoo.org>
This commit is contained in:
Alfredo Tupone
2025-03-19 18:54:10 +01:00
parent b985ae8087
commit fcea39b3c8
3 changed files with 65 additions and 0 deletions

View File

@@ -0,0 +1 @@
DIST sentencepiece-0.2.0.tar.gz 11980811 BLAKE2B adf28a66de3f6995d31c3b9be6a324614b95f20fe07ea33dd914bcd9d33d123dfee69f80ef7b2a70c3c23700534916caf57ac877e55e5c9d0d671d37372e0aed SHA512 b4214f5bfbe2a0757794c792e87e7c53fda7e65b2511b37fc757f280bf9287ba59b5d630801e17de6058f8292a3c6433211917324cb3446a212a51735402e614

View File

@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE pkgmetadata SYSTEM "https://www.gentoo.org/dtd/metadata.dtd">
<pkgmetadata>
<maintainer type="person">
<email>tupone@gentoo.org</email>
<name>Tupone Alfredo</name>
</maintainer>
<upstream>
<remote-id type="github">google/sentencepiece</remote-id>
</upstream>
</pkgmetadata>

View File

@@ -0,0 +1,53 @@
# Copyright 2025 Gentoo Authors
# Distributed under the terms of the GNU General Public License v2
EAPI=8
inherit cmake
DESCRIPTION="Text tokenizer for Neural Network-based text generation"
HOMEPAGE="https://github.com/google/sentencepiece"
SRC_URI="https://github.com/google/${PN}/archive/refs/tags/v${PV}.tar.gz
-> ${P}.tar.gz"
LICENSE="Apache-2.0"
SLOT="0"
KEYWORDS="~amd64"
RDEPEND="
dev-cpp/abseil-cpp
dev-libs/protobuf
dev-util/google-perftools
"
DEPEND="${RDEPEND}
dev-libs/darts
"
DOCS=(
README.md
doc/api.md
doc/experiments.md
doc/normalization.md
doc/options.md
doc/special_symbols.md
)
src_prepare() {
sed -i \
-e "s:third_party/darts_clone/darts.h:darts.h:" \
src/model_interface.h \
src/normalizer.h \
src/normalizer.cc \
src/unigram_model.h \
src/builder.cc \
|| die
cmake_src_prepare
}
src_configure() {
local mycmakeargs=(
-DSPM_ABSL_PROVIDER=package
-DSPM_PROTOBUF_PROVIDER=package
)
cmake_src_configure
}