#!/bin/bash
TAG=DATADIR
PMATCH=$TAG/hfst-pmatch
TOKENIZE="$TAG/hfst-tokenize -x $TAG/omorfi_tokenize.pmatch"

MOVETAGS=$TAG/move_tags

function print_help()
{
    echo "Adds named entity tags to running Finnish text on standard input."
    echo
    echo "The output is returned one token per line. Where named entities are"
    echo "identified, the token in question is followed by a tab character and"
    echo "the entity identifier. Entities that span multiple tokens are given"
    echo "opening and closing XML-style tags, and single-token entities only a"
    echo "closing tag."
    echo
    echo "This package is based on the statistical (CRF-based) tagger FinnPos,"
    echo "the Finnish morphology package OmorFi, the FinnTreeBank corpus of"
    echo "labeled text and the FinnPos rule-based named entity tagger."
    echo
    echo "Process entire files with redirection, eg."
    echo "  $ finnish-nertag < mytext.txt > mytext_tagged.txt"
    echo "or type into the terminal and terminate with EOF (usually ctrl-D on"
    echo "your keyboard), or directly input a line of text with <<<. Example:"
    echo
        
    echo "$ finnish-nertag <<< \"Pernoossa asuva Heikki Anttonen on ostanut Outokummun osakkeita.\""
    echo "Pernoossa	<EnamexLocXxx/>"
    echo "asuva	"
    echo "Heikki	<EnamexPrsHum>"
    echo "Anttonen	</EnamexPrsHum>"
    echo "on	"
    echo "ostanut	"
    echo "Outokummun	<EnamexOrgCrp/>"
    echo "osakkeita	"
    echo ".	"
    exit 0
}

function print_version()
{
    echo "finnish-tagtools version 1.1"
    echo "2018-05-16"
    exit 0
}

case $1 in
    "") ;;
        "--version")
        print_version ;;
    "-v")
        print_version ;;

    *)
	print_help ;;
esac

$TOKENIZE |
python3 $TAG/omorfi2finnpos.py ftb                        |
python3 $TAG/finnpos-ratna-feats.py $TAG/freq_words      |
$TAG/finnpos-label $TAG/ftb.omorfi.model 2>/dev/null     |
python3 $TAG/finnpos-restore-lemma.py ner                 |
cut -f1,3,4,5 |
$TAG/prefilt_tags |
$TAG/prefilt_lemmas | 
$PMATCH $TAG/proper_tagger_ph1.pmatch | $MOVETAGS |
$PMATCH $TAG/proper_tagger_ph2.pmatch | $MOVETAGS |
$TAG/remove_exc |
cut -f 1,5
