/*
	File: LocStr.ll				
	Author: Dimitrios Kokkinakis			
	version: 100127				
	Finds a variety of STR-Locations using pattern matching	
*/
%{
%}
U	[\101-\132\300-\335\&]
%{
  int printSTR (char *a,int x);
%}
%%
	/* EXCEPTIONS */
"\<ENAMEX TYPE\="\"(TRM|EVN|LOC|MSR|OBJ|ORG|PRS|WRK|TME)"\" SBT\=\""(ANM|ARL|AST|ATH|ATL|CLC|CLT|CLU|CMP|CRP|DAT|DEN|EDU|FIN|FNC|FWP|GPL|HPL|HUM|MDC|MDO|MDD|MDA|MDI|MDS|MTH|PER|PLT|PPL|PRD|PRJ|PSS|PRZ|RLG|RTV|STR|TVR|TRM|VHA|VHG|VHW|WAA|WAO|WMD|WMO|WTH)\"\>[^\n\<]+\<\/ENAMEX\>				{ECHO;}
"\<NUMEX TYPE\="\"MSR"\" SBT\=\""(DGR|VLM|TMP|INX|DST|CUR|DEN|PRC|CMU|WMU|XXX|TEL|LST|AGE|WEB|CVU|CRD|ELU)\"\>[^\n\<]+\<\/NUMEX\>						{ECHO;}
"\<TIMEX TYPE\="\"TME"\" SBT\=\""DAT\"\>[^\n\<]+\<\/NUMEX\>		{ECHO;}

	/* STREETS */
"Manic Street Preachers"					{ECHO;}
	/* MOD (Jyrki Niemi): Corrected "E Steet Band" */
"E Street Band"					{ECHO;}
"Back Street Boys"					{ECHO;}
"Wall Street Journal"					{ECHO;}
True(" "{U}[^\n ]+)+					{ECHO;}
\-Sprvgen" "[0-9]+					{ECHO;}
Sprvgen						{ECHO;}
"stjrnan Picabo Street"					{ECHO;}
"Swedish Street Rod Nationals"				{ECHO;}
"Hill Street Blues"					{ECHO;}
"Spanarna p Hill Street"					{ECHO;}
"p Hill Street"					{ECHO;}
"Spridningsvgen"					{ECHO;}
Vintergatan						{ECHO;}
vervg						{ECHO;}

	/* MULTI STREETS */
Gare(" "de[^\n ]*)?(" "{U}[^\n ]+)+			{printSTR(yytext,0);}
(Via|Maria)" "(della|del)" "{U}[^\n ]+			{printSTR(yytext,0);}
"Via "("Appia Antica"|Appia|Felicati|Roma|Marmorata|Dolorosa|Monte[^\n ]+)						{printSTR(yytext,0);}
[Bb]oulevard" "(d[eui]" ")?{U}[^\n ]+(" "{U}[^\n ]+)*		{printSTR(yytext,0);}
([^\n ]*[Rr]iks|[Ll]ands|uropa|[Ll]ns)vgarna" "[A-Z0-9]+(\/|" och ")[A-Z0-9]+						{printSTR(yytext,0);}
([^\n ]*[Rr]iksvg|Interstate|Route|[Ll]andsvg|Europavg|[Ll]nsvg|[Hh]uvudvg)" "[A-Z0-9]+					{printSTR(yytext,0);}
[0-9\-]+" "{U}[^\n ]+" "Street			{printSTR(yytext,0);}
([0-9]+" ")?(Park|Pennsylvania|Hart|Florence|Madison|Lenox|Julian|Olive|Third|Fifth|Gasoline|Lexington|Beeches|Chicago|Addison|Colorado|Rockingham|Auburn|Beach|Upper|Downing|Bourbon|"Ban Co"|Carnaby|Fleet|Oldham|Pusher|South|Broad|Princes|Filbert|Bond|Market|"Kensington High"|{U}[^\n ]+" "House|[^\n ]+th|[^\n ]+nd)" "(Avenue|Street)	{printSTR(yytext,0);}
Aven[a-z]+" "[Dd][esai](" "{U}[^\n ]+)+			{printSTR(yytext,0);}
St" "[^\n ]+" Road"				{printSTR(yytext,0);}
(Sankt|S:t|Vstra|Norra|stra|Sdra|Yttre|Danska)" "([^\n\< ]+gata|[^\n\< ]*[Vv]ge)n(" "[0-9]+" \- "[0-9]+|" "[0-9]+)?				{printSTR(yytext,0);}
([Dd]rottning|[Pp]rinsessan)" "[A-Z][^\n ]+s" vg"		{printSTR(yytext,0);}
([0-9]+" ")?([Aa]venue|[Bb]oulevard|[Rr]ue)(" de"|" de la"|" "des|" "d\&#x2019;|" du")?(" "{U}[^\n ]+)+					{printSTR(yytext,0);}
([Aa]venue|[Bb]oulevard|[Rr]ue)(" de"|" de la"|" "des|" "d\&#x2019;|" du")" "[0-9]+						{printSTR(yytext,0);}
([Ll]illa|[Ss]tora)" "{U}[^\n ]+grnd			{printSTR(yytext,0);}

(t|[Bb]ortre|[Ff]rn|[Gg]enom|[Ii]|[Ii]genom|[Kk]ring|[Ll]ngs|[Mm]ot|[Nn]ra|[Pp]|[Rr]unt|[Uu]ppfr|[Uu]tmed|[Vv]ia|[Vv]id|[]ver|bermda|korsar|till)" "([0-9]+th" "|[0-9]+" ")?[^\n ]*(Strasse|Road|Avenue|Boulevard|Street|Row)		{printSTR(yytext,1);}

(t|[Bb]ortre|[Ff]rn|[Gg]enom|[Ii]genom|[Kk]ring|[Ll]ngs|[Mm]ot|[Nn]ra|[Pp]|[Rr]unt|[Uu]ppfr|[Uu]tmed|[Vv]ia|[Vv]id|[]ver|bermda|korsar|till)" "([A-Z][^\n\< ]+" ")?[^\n ]*([Ss]trasse|[Rr]oad|[Aa]venue|[Bb]oulevard|[Ss]treet)	{printSTR(yytext,1);}
[Pp]" "({U}[^\n ]+" ")+Road				{printSTR(yytext,1);}
([Ll]ngs|[Pp]|afikerade)" vg "[A-Z0-9]+		{printSTR(yytext,1);}
[^\n ]+(gata|gatan|strket)(" la")?(" "{U}[^\n ]+)+		{printSTR(yytext,1);}
[0-9]+"\:e gatan"				{printSTR(yytext,0);}
{U}[^\n ]*gatan\/{U}[^\n ]*vgen			{printSTR(yytext,0);}
{U}[^\n ]*sgata(n)?\-({U}[^\n ]*" ")?{U}[^\n ]*gata(n)?	{printSTR(yytext,0);}
({U}[^\n ]+" ")?{U}[^\n ]*(avenyn|gracht|grasse|[Ss]trasse|gata|[Gg]atan|[Vv]gen|vg)(" "[0-9\-]+)?(" "[A-Z])?				{printSTR(yytext,0);}
({U}[^\n ]*" ")?{U}[^\n ]+"s vg "[0-9]+(\-[0-9]+)?(" "[A-Z])?	{printSTR(yytext,0);}
({U}[^\n ]+" ")?{U}[^\n ]+"ns Gata "[0-9]+(\-[0-9]+)?(" "[A-Z])?	{printSTR(yytext,0);}
[^\n ]+rvgen(" "{U}[^\n ]*)+			{printSTR(yytext,1);}
[Aa]"dressen "[0-9]+(" "{U}[^\n ]*)+			{printSTR(yytext,1);}
[Ll]ngs" med "[A-Z0-9]+				{printSTR(yytext,2);}
([Ll]ngs|utmed|p|av)" "E(" ")?[1-9][0-9]?(\:an)?		{printSTR(yytext,1);}
([Ii]ntill|[Ii])" "{U}[^\n ]+" "{U}[^\n ]+grnd		{printSTR(yytext,1);}
[Pp]" "("E 18"|"E 6"|E18|E6|E6:an)			{printSTR(yytext,1);}
"E 4\/E 20"					{printSTR(yytext,0);}
[0-9]" "[A-Z][^\n ]*" "{U}[^\n ]+street			{printSTR(yytext,0);}
({U}[^\n ][^\n ][^\n ]+" ")?{U}[^\n ]+s" "(all|vg)" "[0-9]+[A-Z]?	{printSTR(yytext,0);}
\n	{ECHO;}
.	{ECHO;}
%%

int printSTR(char *a, int x)
{ int z=0; int c=yyleng;

if (x==0) printf("<ENAMEX TYPE=\"LOC\" SBT=\"STR\">%s</ENAMEX>",yytext);
else if (x==1){
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"STR\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
else if (x==2){
for(;yytext[z]!=' ';putchar(yytext[z++])); putchar(yytext[z++]);
for(;yytext[z]!=' ';putchar(yytext[z++]));
printf(" <ENAMEX TYPE=\"LOC\" SBT=\"STR\">"); z=z+1; 
for(;z<yyleng; putchar(yytext[z++])); printf("</ENAMEX>");}
return(1);}

main( argc, argv )
int argc;
char **argv;
{
  ++argv, --argc;
  if ( argc > 0 )
    yyin = fopen( argv[0], "r" );
  else
    yyin = stdin;
  yylex();return(0);
}
