/*
	File: LocGeo.ll
<ENAMEX TYPE="LOC" SBT="AST|STR|GPL|PPL|FNC"></ENAMEX><MTH="PTR"/>
	Author: Dimitrios Kokkinakis
	version: 101129
	Finds a variety of GPL-Locations using pattern matching
*/
%{
%}
U	[\101-\132\300-\335\&]
%{
  int printFNC (char  *a,int x);
  int printGPL (char  *a,int x);
  int printGPLx  (char  *a);
  int printSTR (char  *a,int x);
  int printPPL (char  *a,int x);
%}
%%
	/* EXCEPTIONS */
"\<ENAMEX TYPE\="\"(TRM|EVN|LOC|MSR|OBJ|ORG|PRS|WRK|TME)"\" SBT\=\""(ANM|ARL|AST|ATH|ATL|CLC|CLT|CLU|CMP|CRP|DAT|DEN|EDU|FIN|FNC|FWP|GPL|HPL|HUM|MDC|MDO|MDD|MDA|MDI|MDS|MTH|PER|PLT|PPL|PRD|PRJ|PSS|PRZ|RLG|RTV|STR|TVR|TRM|VHA|VHG|VHW|WAA|WAO|WMD|WMO|WTH)\"\>[^\n\<]+\<\/ENAMEX\>	{ECHO;}
"\<NUMEX TYPE\="\"MSR"\" SBT\=\""(DSG|DGR|VLM|TMP|INX|DST|CUR|DEN|PRC|CMU|WMU|XXX|TEL|LST|AGE|WEB|CVU|CRD|ELU)\"\>[^\n\<]+\<\/NUMEX\>	{ECHO;}
"\<TIMEX TYPE\="\"TME"\" SBT\=\""DAT\"\>[^\n\<]+\<\/NUMEX\>	{ECHO;}
"Bay Networks"	{ECHO;}
([^\n^\t ]+paketet|Opera|Till|HIF)" Bay"	{ECHO;}
(Frn|Mellan|Till|Alla)" "bergen	{ECHO;}
{U}[^\n^\t ]+" Mountain News"	{ECHO;}
(Ocks|Frn|Stiger|ver|ven|Till|Hela|Hade|ka)" "[a-z]*ha(lvn|vet)	{ECHO;}
[^\n^\t ]*[Ii]nnehavet	{ECHO;}
(Utkn|"Destination Nordsjn"|Svansjn)	{ECHO;}
"Colorado\-Tampa Bay"	{ECHO;}
"Sandviken "(Big|[0-9]+\-[0-9]+)	{ECHO;}
(sl|mot|rubba|drmmer|blekt|mstarna)" "Sandviken	{ECHO;}
(Besviken|Sandviken\-{U})	{ECHO;}
[Ff]reningen" "Torsviken	{ECHO;}
(Dr|Vid)" "lven	{ECHO;}
{U}[^\n^\t ]+" River Band"	{ECHO;}
{U}[^\n^\t ]+" "Rivers	{ECHO;}
Mallorca\-{U}[^\n^\t ]+	{ECHO;}
[Rr]"ster om "{U}[^\n ]+	{ECHO;}
[A-Z][^\n ]+" "besviken	{ECHO;}
[Ii]" hjd med "Th[0-9]*" "	{ECHO;}
Med" "havet	{ECHO;}
TV\-(span|serie)" "Bay	{ECHO;}
EU\-nivn	{ECHO;}
[^\n ]*(ordost|ydost|vst|st|[]ster|ster|[Nn]orr|der|orrut)" "(till|om|mot)(" "{U}[^\n ]+)+	{printGPL(yytext,2);}
[^\n ]*(st|sd|nor|vst)ra(" "{U}[^\n ]+)+	{printGPL(yytext,1);}
[^\n ]*(gaml|slimsk|kuperad|ligast|str|dr|orr)[ae]" "(sida|dele)n" av"(" "{U}[^\n ]+)+	{printGPL(yytext,3);}
[^\n ]*(yd|st|orr|ord)ligaste(" "{U}[^\n ]+)+	{printGPL(yytext,1);}
	/* MULTI AREAS - GPL */
([Uu]nder|[]ver|i|med|om|[Ss]jn|till|[]n|nra|sig|ndde|n|ngs|kring|[Rr]unt|[Ff]rn|ver|[Vv]id|utmed|tanfr|p|mot)" "{U}[^\n^\t ]+s" "[a-z]*(yta|tspets|dspets|kust|kusttrakter|strnder)	{int i=0; 
	for(;yytext[i]!=' ';putchar(yytext[i++])); i++; 
	printf(" <ENAMEX TYPE=\"LOC\" SBT=\"GPL\">"); 
	for(;i<yyleng;putchar(yytext[i++])); printf("</ENAMEX>");}
" n "{U}[^\n^\t ]+	{int i=4; 
	printf(" n <ENAMEX TYPE=\"LOC\" SBT=\"GPL\">"); 
	for(;i<yyleng;putchar(yytext[i++])); printf("</ENAMEX>");}
("Nordre lv"|"Finska "viken|"Vanda ")s" "[^\n^\t ]*mynning(en|ar)?	{printGPLx(yytext);}
{U}[^\n^\t ]+s" "(nor|sd|vst|vnst|st)ra" "strand	{printGPLx(yytext);}
{U}[^\n^\t ]+(bukten|fjorden|sundet|bcken|n|floden|sjn|viken|havet|sund|nden|lven)s" "[^\n^\t ]*(mynning|djup|botten|strand|inre|sida|delta|strnder)(en)?	{printGPLx(yytext);}
(Afrika|"Miami Beach")"s "[^\n^\t ]+spets	{printGPLx(yytext);}
(Andarnas|Pyrenernas|Heliga|"Tora Bora"|Klippiga)(" "|"\-")[Bb]erg(en|et)	{printGPLx(yytext);}
"Archipilago de "{U}[^\n^\t ]+	{printGPLx(yytext);}
(Vttern|Rhen|Nilen|Jenisej|Themsen|Bosporen|Mlaren|Gange|Tibern|Copacabana|Vnern|Donau|Volga|"Rda "[Hh]avet)s" "([^\n^\t ]*mynning|strand|[^\n^\t ]+sida)(en)?	{printGPLx(yytext);}
"Stilla "[Hh]avs(\-)?(strnder|omrdet)	{printGPLx(yytext);}
(Stora|Eriks|Sl|Vejle)" "[^\n^\t ]*[Ff]jord(en)?	{printGPLx(yytext);}
(Nam|Chao|St)" "[^\n^\t ]+\-floden	{printGPLx(yytext);}
(Trosa|Lule|"S:t Annas"|Karlstads|Blekinge|{U}[^\n^\t ]+tlands|{U}[^\n^\t ]+holms|{U}[^\n^\t ]+borgs|{U}[^\n^\t ]+bottens|Pellinge|Bohuslns)(" sdra"|" norra")?" "[^\n^\t ]*skrgrd	{printGPLx(yytext);}
[^\n^\t ]+nska" "([Rr]iviera|[Bb]ukte)n	{printGPLx(yytext);}
(Anderna|Everest|Olympo|Fuji|{U}[^\n^\t ]+berget|{U}[^\n^\t ]+fjll|Himalaya|"Sierra Grande"|Pyrenerna|Merapi)s" "([^\n^\t ]+" ")?(fot|topp|fauna|sluttning|massiv)[a-z]*	{printGPLx(yytext);}
"Costa "(Blanca|Brava|Verde|[Dd]el" "Sud|[Dd]el" "Sol)(\-kust[a-z]*)?	{printGPLx(yytext);}
{U}[^\n^\t ]+s" "[^\n^\t ]*(iga|norra|sdra|stra|vstra|ligaste)" kust"(bygd)?	{printGPLx(yytext);}
(Spanien|Florida|Alaska|"Norra Amerika"|USA\:)"s "[^\n^\t ]+stkust	{printGPLx(yytext);}
[^\n^\t ]+(ets|ska)" "(Guld|Medelhavs|Atlant)kust(en)?	{printGPLx(yytext);}
({U}[^\n^\t ]+sk|Svart|Still|[Ss]dr|[]str|[Nn]orr|[Vv]str)a" "[^\n^\t ]*kust(en)?	{printGPLx(yytext);}
"Playa "d(" \&#x2019;"|e" "las|el)" "{U}[^\n^\t ]+	{printGPLx(yytext);}
{U}[^\n^\t ]+s" "[^\n^\t ]*(norra|sdra|stra|vstra|ligaste)" udde"	{printGPLx(yytext);}
Cap[a-z]*" "(Aguhlas|Aghulas|Cod|Yakatage|Verde|Canaveral)	{printGPLx(yytext);}
"Costa Ricas regnskog"(ar|en)?	{printGPLx(yytext);}
{U}[^\n^\t ]+ska" "[Ss]j(n)?	{printGPLx(yytext);}
Arboga\-(n)?	{printGPLx(yytext);}
{U}[^\n^\t ]+vikens" "[Ll]agun	{printGPLx(yytext);}
({U}[^\n^\t ]+bergs|{U}[^\n^\t ]+viks)" "[Kk]ile	{printGPLx(yytext);}
("Pico de Teide"|"Fig Tree Bay")s" "sdra" "[a-z]+	{printGPLx(yytext);}
{U}[^\n^\t ]+" Francisco "Bay	{printGPLx(yytext);}
{U}[^\n^\t ]+" "Bay	{printGPLx(yytext);}
	/* ARABIC */
[^\n^\t ]*(alv|lode)n" "al\-[^\n^\t ]+	{printGPL(yytext,1);}
{U}[^\n^\t< ][^\n^\t< ][^\n^\t< ]+" "[^\n^\t< ]*(bergen|Mountain|havet|lvdal|lvstrand|vdalhalvn|viken|trsk|River|Havsbad)	{printGPL(yytext,0);}
[Ll]"ngs med "{U}[^\n^\t ]+" "([^\n^\t ]*kust|[^\n^\t ]*strnder|{U}[^\n^\t ]+)	{printGPL(yytext,2);}
[Uu]t(med|anfr)" "{U}[^\n^\t ]+s" "[^\n^\t ]*(kust|strnder|kusten)	{printGPL(yytext,1);}
([Ff]rn|[Nn]ra|[Pp]|[Vv]id|[Ii]|[Rr]unt)" "{U}[^\n^\t ]+" "([Bb]runn|[Rr]eef|[Ff]jord)	{printGPL(yytext,1);}
(Amazonas|Rhen)	{printGPL(yytext,0);}
	/* floderna Mississippi, Missouri och Illinois*/
[Ff]loderna(" "{U}[^\n^\t ]+)+(" \,"{U}[^\n^\t ]+)?" och"(" "{U}[^\n^\t ]+)+	{printGPL(yytext,1);}
([Ll]ngs|[Pp]|ntill|runt|[Uu]tmed|fr|mot|vrtillgngliga)" "{U}[^\n^\t ]+(kusten|kust|leden|sudden|sj|n|bcke|sltte|dalen)(n)?	{printGPL(yytext,1);}
[^\n^\t ]*(alv|lse|ago|avs|ist|rie|dis|vud|nal|ann|rty|mar|nds|oms|alv||[]gruppe|krgrde||[Ff]lode|[Ss]j|[Ss]trande|[Ss]koge)n(" "{U}[^\n^\t\# ]+)+	{printGPL(yytext,1);}
([Pp]|[Ff]rn|[Tt]ill|runt|[Vv]id|[^\n^\t ]+ska[Uu]tanfr)" "n(" "{U}[^\n^\t ]+)+	{printGPL(yytext,2);}
([Pp]|[Ff]rn|[Tt]ill|runt|[Vv]id)" "{U}[^\n^\t ]+[^\n jv]n	{printGPL(yytext,1);}
"ar i gruppen"(" "{U}[^\n^\t ]+)+	{printGPL(yytext,3);}
"mellan "{U}[^\n^\t ]+"\- och "{U}[^\n^\t ]+arna	{printGPL(yytext,1);}
([Pp]|[Ff]rn|[Tt]ill|runt|[Vv]id)" Isla "{U}[^\n^\t ]+	{printGPL(yytext,1);}
(fver|ver|frn)" "Sundet	{printGPL(yytext,1);}
([Vv]ulkan|[Gg]lacir|[Rr]avin|landskap|[]rik)(en|et)?(" "{U}[^\n^\t ]*)+	{printGPL(yytext,1);}
{U}[^\n^\t ]+(arna|bachfallen|httefallen|holmsfallen|sufallen|havet|bukten|sforsen|sudde|fjorden|atollen|lvsdalen|lvdalen|brandsdalen|rske.|acfloden|alfloden|anfloden|asfloden|aufloden|enfloden|gofloden|iafloden|iofloden|isfloden|nefloden|ngfloden|onfloden|pifloden|pofloden|refloden|rafloden|safloden|sefloden|tofloden|unfloden|vafloden|zifloden|\-floden|\-dalen|fjll|lven|arkipelag|viken|sjn|"anska bukten"|plat|strnder|glacir|kn)(en|et)?	{printGPL(yytext,0);}
Mo(nti|nt|unt|nte)" "{U}[^\n^\t ]+" fot"	{printGPL(yytext,0);}
Mo(nti|nt|unt)" "{U}[^\n^\t ]+	{printGPL(yytext,0);}
Mont\-{U}[^\n^\t ]+	{printGPL(yytext,0);}
[Bb]erg(somrdet|stoppen|et|skedjan|hjden)(" "{U}[^\n^\t\< ]+)+	{printGPL(yytext,1);}
[Bb]ergskedjan" heter"(" "{U}[^\n^\t ]+)+	{printGPL(yytext,2);}
"toppen av "Monks" "{U}[^\n^\t ]*	{printGPL(yytext,2);}
[^\n^\t ]*([]a|[Bb]ergskedjo)rna(" "{U}[^\n^\t ]+" \,")+" "{U}[^\n^\t ]+" "(och|eller|samt)" "{U}[^\n^\t ]+	{printGPL(yytext,1);}
(vid|ligger|om|[Nn]ra|[Ii]|[]ver|runt|till)" "{U}[^\n^\t ]+" Canyon"	{printGPL(yytext,1);}
"hgsta topp \, "{U}[^\n^\t ]+" \,"	{printGPL(yytext,3);}
({U}[^\n^\t ]+" ")+"\( "[0-9][0-9][0-9]+" m \)"	{printGPL(yytext,0);}
[Rr]"ster om "{U}[^\n^\t ]+	{ECHO;}
[^\n^\t ]*(ordost|ydost|vst|st|[]ster|ster|[Nn]orr|der|orrut|bten)" "(till|om|mot)(" "{U}[^\n^\t ]+)+	{printGPL(yytext,2);}
[^\n^\t ]*(stra|sdra|norra|vstra|bortom|[]knen)(" "{U}[^\n^\t ]+)+	{printGPL(yytext,1);}
[^\n^\t ]*(gaml|slimsk|kuperad|ligast|str|dr|orr)[ae]" "(sida|dele)n" av"(" "{U}[^\n^\t ]+)+	{printGPL(yytext,3);}
[^\n^\t ]*(yd|st|orr|ord)ligaste(" "{U}[^\n^\t ]+)+	{printGPL(yytext,1);}
([Ii]|[Vv]id)(" "{U}[^\n^\t ]+)+" "Grove	{printGPL(yytext,1);}
grden" vid "{U}[^\n^\t ]+	{printGPL(yytext,2);}
drunknade" i "{U}[^\n^\t ]+	{printGPL(yytext,2);}
omrdet" "(kring|genom)" "{U}[^\n^\t ]+	{printGPL(yytext,2);}
bron" "(ver|fver)" "{U}[^\n^\t ]+	{printGPL(yytext,2);}
([Bb]ortom|[Uu]ppom)" "{U}[^\n^\t ]+vik	{printGPL(yytext,1);}
([Pp][Vv]id|[Nn]ra|runt)" "{U}[^\n^\t ]+"s nordliga nda"	{printGPL(yytext,1);}
[Ii]" trakten "(av|af)" "{U}[^\n^\t ]+	{printGPL(yytext,3);}
[Ii]" "{U}[^\n^\t ]+regionen	{printGPL(yytext,1);}
[Pp]" "(Java|{U}[^\n^\t ]+berget)	{printGPL(yytext,1);}
[^\n^\t ]*rovinsen" "{U}[^\n^\t ]+	{printGPL(yytext,1);}
[Ii]" "{U}[^\n^\t ]+berge[tn]	{printGPL(yytext,1);}
\n	{ECHO;}
.	{ECHO;}
%%
int printSTR(char *a, int x)
{ int z=0; int c=yyleng;
if (x==0) printf("<ENAMEX TYPE=\"LOC\" SBT=\"STR\">%s</ENAMEX>",yytext);
else if (x==1){
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"STR\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
else if (x==2){
for(;yytext[z]!=' ';putchar(yytext[z++])); putchar(yytext[z++]);
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"STR\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
return(1);}
int printGPL(char *a, int x)
{ int z=0; int c=yyleng;
if (x==0) printf("<ENAMEX TYPE=\"LOC\" SBT=\"GPL\">%s</ENAMEX>",yytext);
else if (x==1){
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"GPL\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
else if (x==2){
for(;yytext[z]!=' ';putchar(yytext[z++])); putchar(yytext[z++]);
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"GPL\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
else if (x==3){
for(;yytext[z]!=' ';putchar(yytext[z++])); putchar(yytext[z++]);
for(;yytext[z]!=' ';putchar(yytext[z++])); putchar(yytext[z++]);
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"GPL\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
return(1);}
int printPPL(char *a, int x)
{ int z=0; int c=yyleng;
if (x==0) printf("<ENAMEX TYPE=\"LOC\" SBT=\"PPL\">%s</ENAMEX>",yytext);
else if (x==1){
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"PPL\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
else if (x==2){
for(;yytext[z]!=' ';putchar(yytext[z++])); putchar(yytext[z++]);
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"PPL\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
else if (x==3){
for(;yytext[z]!=' ';putchar(yytext[z++])); putchar(yytext[z++]);
for(;yytext[z]!=' ';putchar(yytext[z++])); putchar(yytext[z++]);
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"PPL\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
else if (x==30){
while(yytext[c]!=' ') c--; c--;
while(yytext[c]!=' ') c--; c--;
while(yytext[c]!=' ') c--;
printf("<ENAMEX TYPE=\"LOC\" SBT=\"PPL\">"); 
for(;z<c; putchar(yytext[z++]));
printf("</ENAMEX>"); for(;c<yyleng; putchar(yytext[c++]));}
else if (x==103){
while(yytext[c]!=' ') c--; c--;
while(yytext[c]!=' ') c--; c--;
while(yytext[c]!=' ') c--;
printf("<ENAMEX TYPE=\"LOC\" SBT=\"PPL\">"); 
for(;z<c; putchar(yytext[z++]));
printf("</ENAMEX>"); for(;c<yyleng; putchar(yytext[c++]));}
return(1);}
int printFNC(char *a, int x)
{ int z=0; int c=yyleng;
if (x==0) printf("<ENAMEX TYPE=\"LOC\" SBT=\"FNC\">%s</ENAMEX>",yytext);
else if (x==1){
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"FNC\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
else if (x==2){
for(;yytext[z]!=' ';putchar(yytext[z++])); putchar(yytext[z++]);
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"FNC\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
else if (x==3){
for(;yytext[z]!=' ';putchar(yytext[z++])); putchar(yytext[z++]);
for(;yytext[z]!=' ';putchar(yytext[z++])); putchar(yytext[z++]);
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"FNC\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
return(1);}
int printFNC_CRP(char *a, int x)
{ int z=0; int c=yyleng;
if (x==0) printf("<ENAMEX TYPE=\"LOC/ORG\" SBT=\"FNC/CRP\">%s</ENAMEX>",yytext);
else if (x==1){
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC/ORG\" SBT=\"FNC/CRP\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
else if (x==2){
for(;yytext[z]!=' ';putchar(yytext[z++])); putchar(yytext[z++]);
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC/ORG\" SBT=\"FNC/CRP\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
return(1);}
int printGPLx (char *a)
{ printf("<ENAMEX TYPE=\"LOC\" SBT=\"GPL\">%s</ENAMEX>",a); return(1); }
main( argc, argv )
int argc;
char **argv;
{
 ++argv, --argc;
 if ( argc > 0 )
  yyin = fopen( argv[0], "r" );
 else
  yyin = stdin;
 yylex();return(0);
}
