# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4
PortSystem 1.0
name jdepp
version 2015-02-08
categories textproc japanese
maintainers nomaintainer
description C++ implementation of Japanese Dependency Parsers
long_description J.DepP is a C++ implementation of Japanese dependency \
parsing algorithms. The parser takes a raw sentence as input \
and performs word segmentation, POS tagging (thanks to MeCab), \
bunsetsu chunking and dependency parsing. J.DepP is meant \
for those who want to parse massive texts (e.g., entire blog \
feeds) efficiently with state-of-the-art parsing accuracy.
homepage http://www.tkl.iis.u-tokyo.ac.jp/~ynaga/jdepp/
platforms darwin
license {GPL-2 LGPL-2.1}
master_sites ${homepage}
checksums ${distname}${extract.suffix} \
rmd160 845584ae822b4ddb0bb439765be7714a97be0ed8 \
sha256 55a1a99b6645c4e01126f931fd72bf308dde96df8319f09e1c7cb9cd1ffdae7f
depends_lib port:mecab-base \
port:mecab-jumandic-utf8
patchfiles patch-configure.diff
post-patch {
reinplace "s|KNBC_CORPUS_DIR=\${PWD}/KNBC_v1.0_090925|KNBC_CORPUS_DIR=${prefix}/share/KNBC|" \
${worksrcpath}/configure
}
configure.args --enable-standalone
variant knbc conflicts kyoto kyoto_partial description {Train a parser wht KNBC (Kyoto-University and NTT Blog Corpus)} {
depends_build-append port:KNBC
configure.args-append --with-corpus=knbc
build.target model
build.args-append CORPUS_DIR=${prefix}/share/KNBC
}
variant kyoto_partial conflicts kyoto knbc description {Train a parser with Kyoto-University Text Corpus annotation without Mainichi news articles} {
set kyoto_corpus KyotoCorpus4.0
set corpus_distfile ${kyoto_corpus}${extract.suffix}
distfiles-append ${corpus_distfile}:corpus
master_sites-append http://nlp.ist.i.kyoto-u.ac.jp/nl-resource/corpus/:corpus
checksums-append ${corpus_distfile} \
rmd160 9c8212d939b7112a79aff30ab0880f94c3ee9b48 \
sha256 6fe4f7bac19e192f7545c4de0c765f690a524c7611191e2af9be031d4923871f
configure.args-append --with-corpus=kyoto-partial \
--enable-autopos-train=no
build.target model
build.args-append CORPUS_DIR=${workpath}/${kyoto_corpus}
}
# To use the complete kyoto corpus with +kyoto variant, users must put their data into the following directory.
set kyoto_dir /tmp/KyotoCorpus4.0
variant kyoto conflicts kyoto_partial knbc description {Train a parser with Kyoto-University Text Corpus} {
if {[file exists ${kyoto_dir}]} {
configure.args-append --with-corpus=kyoto
build.target model
build.args-append CORPUS_DIR=${kyoto_dir}
} else {
pre-fetch {
ui_error "To train a parser with Kyoto Corpus, please put the files in ${kyoto_dir}"
return -code error "Kyoto Corpus is not available at ${kyoto_dir}"
}
}
}
variant ipadic conflicts naistjdic unidic description {Build a parser with auto POSs given by MeCab/ipadic} {
depends_lib-delete port:mecab-jumandic-utf8
depends_lib-append port:mecab-ipadic-utf8
configure.args-append --with-mecab-dict=IPA
}
variant naistjdic conflicts ipadic unidic description {Build a parser with auto POSs given by MeCab/NAIST-jdic} {
depends_lib-delete port:mecab-jumandic-utf8
depends_lib-append port:mecab-naist-jdic-utf8