# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4 PortSystem 1.0 PortGroup github 1.0 PortGroup python 1.0 github.setup google sentencepiece 0.2.0 v revision 0 checksums rmd160 9940d5c995a5ee0895854fdd0ca3f8ae4fed6d95 \ sha256 9970f0a0afee1648890293321665e5b2efa04eaec9f1671fcf8048f456f5bb86 \ size 11980811 name py-${github.project} categories-append textproc license Apache-2 maintainers nomaintainer description Python wrapper for ${github.project} long_description SentencePiece is an unsupervised text tokenizer \ and detokenizer mainly for Neural Network-based \ text generation systems where the vocabulary size \ is predetermined prior to the neural model \ training. SentencePiece implements subword units \ (e.g., byte-pair-encoding (BPE) \[Sennrich et al.\]) \ and unigram language model \[Kudo.\]) with the \ extension of direct training from raw \ sentences. SentencePiece allows us to make a \ purely end-to-end system that does not depend on \ language-specific pre/postprocessing. github.tarball_from archive dist_subdir ${github.project} python.versions 39 310 311 312 if {${name} ne ${subport}} { # The source code deliberately breaks macOS builds by hardcoding # the flag. Fix this. patchfiles-append \ patch-fix-macos-flag.diff post-patch { reinplace "s|@MAC_OS_V@|${macosx_deployment_target}|" ${worksrcpath}/setup.py } depends_build-append \ port:pkgconfig \ port:py${python.version}-setuptools depends_lib-append \ port:protobuf3-cpp \ port:sentencepiece worksrcdir ${distname}/python compiler.cxx_standard 2017 test.run yes livecheck.type none }