#!/bin/sh

# Replace empty lines between sentences with sentence boundary markers ".#."
# Add empty lines around XML tags that denote start/end of text 

sed -r 's/^[ \t]*$/.\#./g' |
sed -r 's#^(</?(text|body|head|html)( [^>]+)?/?>.*)#\
\1\
#g'

