#! /bin/bash

function print_help()
{
    echo "Lemmatizes and morphologically labels running Finnish text on standard input."
    echo
    echo "This package is based on the statistical (CRF-based) tagger FinnPos,"
    echo "the Finnish morphology package OmorFi and the FinnTreeBank corpus of"
    echo "labeled text."
    echo
    echo "Process entire files with redirection, eg."
    echo "  $ finnish-postag < mytext.txt > mytext_tagged.txt"
    echo "or type into the terminal and terminate with EOF (usually ctrl-D on"
    echo "your keyboard), or directly input a line of text with <<<. Example:"
    echo
    echo "$ finnish-postag <<< \"Voitteko ojentaa voita?\""
    echo ""
    echo "Voitteko	voida	[POS=VERB]|[VOICE=ACT]|[MOOD=INDV]|[TENSE=PRESENT]|[PERS=PL2]|[CLIT=KO]"
    echo "ojentaa	ojentaa	[POS=VERB]|[VOICE=ACT]|[MOOD=INDV]|[TENSE=PRESENT]|[PERS=SG3]"
    echo "voita	voi	[POS=NOUN]|[NUM=SG]|[CASE=PAR]"
    echo "?	?	[POS=PUNCTUATION]"
    exit 0
}

function print_version()
{
    echo "finnish-tagtools version 1.1"
    echo "2018-05-16"
    exit 0
}

TAG=DATADIR
TOKENIZE="$TAG/hfst-tokenize -x $TAG/omorfi_tokenize.pmatch"

case $1 in
    "")
	;;
    "--version")
        print_version ;;
    "-v")
        print_version ;;
    *)
	print_help ;;
esac

$TOKENIZE |
python3 $TAG/omorfi2finnpos.py ftb                               |
python3 $TAG/finnpos-ratna-feats.py $TAG/freq_words              |
$TAG/finnpos-label $TAG/ftb.omorfi.model 2>/dev/null             |
python3 $TAG/finnpos-restore-lemma.py                            |
cut -f1,3,4
