#! /bin/bash

TAG=tag
NEWLINE_SEP=""
SPACE_SEP=""

function print_help()
{
    echo "Tokenizes running Finnish text on standard input."
    echo
    echo "Options:"
    echo "  --version, -v          Print version information"
    echo "  --newline, -n          Newline as input separator (default is blank line)"
    echo "  --space-separated      Print space-separated tokens, one sentence per line"
    echo "                         (default is token per line)"
    echo
    echo "Process entire files with redirection, eg."
    echo "  $ finnish-tokenize < mytext.txt > mytext_tokenized.txt"
    echo "or type into the terminal and terminate with EOF (usually ctrl-D on"
    echo "your keyboard), or directly input a line of text with <<<. Example:"
    echo
    echo "$ finnish-tokenize <<< \"Sähköpostiosoitteeni on (muuten) seppo.teppo@gmail.com.\""
    echo ""
    echo "Sähköpostiosoitteeni"
    echo "on"
    echo "("
    echo "muuten"
    echo ")"
    echo "seppo.teppo@gmail.com"
    echo "."

    exit 0
}

function print_version()
{
    echo "finnish-tagtools version 1.6.0"
    echo "2020-12-09"
    exit 0
}

for var in "$@"
do
    case $var in
	"") ;;
	"--version")
            print_version ;;
	"-v")
            print_version ;;
	"--newline")
	    NEWLINE_SEP="--newline" ;;
	"-n")
	    NEWLINE_SEP="--newline" ;;
	"--space-separated")
	    SPACE_SEP="-i" ;;
	*)
	    print_help ;;
    esac
done

TOKENIZE="hfst-tokenize $NEWLINE_SEP $SPACE_SEP $TAG/omorfi_tokenize.pmatch"
if ! command -v hfst-tokenize > /dev/null 2>&1; then
    TOKENIZE="$TAG/hfst-tokenize $TAG/omorfi_tokenize.pmatch"
fi

$TOKENIZE
