//parser.h
//22.maj 2202
//Datamatiker, Systemprogrammering C++
//F. Bierlich, N. Grove-Rasmussen, M. Munksgaard, B. Nielsen og N. Nugent

#ifndef PARSER
#define PARSER

#include <unistd.h>//_open()
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <iostream.h>
#include <fstream.h>
#include <stdio.h>
#include <ctype.h>
#include "lexer.h"
#include "symbolTable.h"
class parser
{
private:
	//
        FILE* streamInput;
        //det token som bliver laest ind
	int lookahead;
	//URL er webudbyderens web adresse
	char* Url;
	//filehandler
	int fh;
	
	int lengthURL;
	int lengthHostNameIp;
	
	//times variablerne holder antal gange 
	//det paagaeldende token er blevet indsat
	int timesURL;
	int timesHost;
	int timesHttp;
	
	//
	int NEW_PAGEBUFFERSIZE;
	
public:	
	parser(int _lenURL, int _lenHostNameIp); // Constructor
	~parser();    // Destructor
	FILE* streamToFile;
	char* makeFile();
	int returnNewBufferSize();
	//nedlægger stream-pointeren og file descriptoren
	void closeFile();

	void setUrl(char* hostURL) {Url = hostURL;};
	int get_NewBufferSize() {return NEW_PAGEBUFFERSIZE;};
	int getTotalBytesURL() { return timesURL*lengthURL;};
	int getTotalBytesHost() {return timesHost*lengthHostNameIp;};

	//længden for 'http://' er fast
	int getTotalBytesHttp() {return timesHttp*7;};

char* parse(char* fil_n);bool match(int t);	bool htmldokument();
bool isHyperlink();	bool isJavascriptLink();bool isA();
bool isLoc();		bool isHref();		bool isEqual();
bool isQuote();		bool isProtokol();	bool isHTTP();			
bool isFTP();		bool isGOPHER();	bool isabsURL();
bool isrelURL();	bool isPeriod();	bool isColon();
bool isSpace();		bool isSlash();		bool isLesser();		
bool isServer();	bool addServer();	bool isNetwork();
bool isDomain();	bool isPath();		bool isResource();
bool isTekst();		bool isChar();		bool insert_IP();
bool remove_IP();	bool addProtokol();	bool isBg(); 
bool isSrc(); 		bool isImg();
void emit(int t, int tval);
};
//====================================================================
//									
//IMPLEMENTERING										
//
//====================================================================
parser::parser(int _lenURL, int _lenHostNameIp)
{
  timesURL=0;
  timesHost=0;
  timesHttp=0;

lengthURL = _lenURL;
lengthHostNameIp = _lenHostNameIp;
};
//====================================================================

parser::~parser()
{
  fclose( streamToFile );
  close(fh);
  delete Url;
  cout<<"\nParserObjekt nedlagt\n"; //Kontrol udskrift
};

void parser::emit(int t,int tval)
{
	switch(t)
	{
	case ':': case '/': case '"': case '\'':case ',':
	case '=': case '.': case ';': case '>':
	case '[': case ']': case '(': case ')': case '{': case '}':
	case '_': case '!': case '@': case '*':
	case '§': case '#': case '`': case '´': case '?': case '£':
case '|':
	case '~': case '^': case '¨': case '+': case '%': case '&':
	case '$': case '¤': case '-': case '<':
	case ' ': case '\n':case '\t':case '\\':case '\r': case '½':
	fprintf(streamToFile,"%c",t);	//udskriver til fil
	break;		
		
	case HTTP:	case FTP:	case GOPHER:
	case A:		case HREF:	case LOC:
	case IP:	case IPID:	case ID:
	case SRC: 	case IMG:  	case BG:
	fprintf(streamToFile,"%s",symObj.symtable[tval].get_lex());	
	break;	
		
	default:
	fprintf(streamToFile,"token %d, token %s",t,tval);
	}
}
//====================================================================
char* parser::makeFile()
{
char* tempFileName;

char* nameBuf;
nameBuf = new char[L_tmpnam ];

	if( ( tempFileName = tmpnam( nameBuf ) ) == NULL )
	{printf("Der kunne ikke oprettes en fil!");exit(1);      }
	
	//int fh;

        if((fh = open(tempFileName,O_CREAT | O_RDWR /*O_WRONLY*//*|O_TEMPORARY*/, 0666)) == -1 )
	  exit(1);                                        // |_O_TEMPORARY udkommenteret s? man kan l?se filerne i roden

	//file descriptoren overlader nu arbejdet til stream pointeren
	//"a+" tilføjer i enden af filen - 
   if( (streamToFile = fdopen(fh, "a+" )) == NULL )
	    exit(1);
    cout<<"\nFile2\n";
   return tempFileName;
}
//====================================================================
int parser::returnNewBufferSize()
{		
  NEW_PAGEBUFFERSIZE=0;
  NEW_PAGEBUFFERSIZE = bytesCounted + 
  getTotalBytesURL() + getTotalBytesHost() + 
  getTotalBytesHttp() + timesHost;
  
//Kontrol udskrift, kan indkommenteres til kontrol af udregningen af NEW_BUFFERSIZE
// cout<<"\nbytesCounted="<<bytesCounted<<"\ngetTotalBytesURL()="<<getTotalBytesURL()<<"\ngetTotalBytesHost()="<<getTotalBytesHost()<<"\ngetTotalBytesHttp()="<<getTotalBytesHttp();
 cout<<"\nNEW_PAGEBUFFERSIZE: "<<NEW_PAGEBUFFERSIZE<<endl;

// Kontroludskrift. Kan indkommenteres for at se nye vaerdier
/*
  cout<<bytesCounted<<endl<<
  getTotalBytesURL()<<endl<<
  getTotalBytesHost()<<endl<<
  getTotalBytesHttp()<<endl;
*/

return NEW_PAGEBUFFERSIZE;
}

//====================================================================
//parser() starter hele parsningen
//lookahead initialiseres ved at kalde lexan() i lexer
//og fortsætter indtil End Of File læses og returneres
//fra lexan()
char*  parser::parse(char* fil_n)
{
      cout<<"\nCharFile i parse(): "<<fil_n<<endl; //Kontroludskrift. Kan indkommenteres for at se filnavn
  //cout<<"\nRequest sendt til parse(): "<<req<<endl;
  //FILE* streamInput;
long int i;

	lookahead = lexObj.lexan();
	while(lookahead!= DONE)
	{
		if(htmldokument()==false)	
	       {printf("Slut");
		//indlæser der afsluttende tegn (DONE)
		if(!match(lookahead));}		
	}
	//	cout<<"\nParsning er slut!\n";
	returnNewBufferSize();
	//der knyttes en stream-pointer til filen med det
	//parsest indhold i 'readmode'
   	streamInput = fopen(fil_n,"r");

	//lseek(fh,0,SEEK_SET);

   	int NEW_PAGEBUFFERSIZE =get_NewBufferSize();
   	char* newPageBuf;
	cout<<"\nPagebuffer: "<<NEW_PAGEBUFFERSIZE;
   	newPageBuf = new char[NEW_PAGEBUFFERSIZE];
	//	newPageBuf[0]='\0';
   	// lseek(fh, 0, SEEK_SET);

		cout << "NEW_PAGEBUFFERSIZE:  " << NEW_PAGEBUFFERSIZE;  //Kontroludskrift
		i=0;
		int ch;


		// her laeses den parsede fil, karakter for karakter ind i en char*
		// Desvaerre sker der en fejl, der er skyld i at smaa sider ikke kan laeses ind, og at store sider faar oedelagt deres bund
		// Fejlen er gennemtestet og ligger ikke i:
		// tempfilen
		// NEW_PAGEBUFFERSIZE
		// Hukommelsesallokering
		// Manglende ASCII-nul terminering
		// saa vi er rimelige mystificeret
		while((ch = getc(streamInput))!=EOF)
		{
                 newPageBuf[i]=ch;
		 i++;
		}
		// slut paa fejl

		//Kontroludskrift af den parsede fil		
                // cout<<"\n-------newpage i parser():--------\n"<<newPageBuf;
      		//newPageBuf[NEW_PAGEBUFFERSIZE] = '\0'; //ASCII-nul terminering

	//lukker for det varme vand! :)
	//file descriptors og stream-pointers lukkes

		//Kontroludskrift
	//cout<<"\n--------Udskriver parsed pagebuffer--------\n"<<endl<<newPageBuf<<"\n----------EOFCHUNK------\n";
	
	return newPageBuf;
}
//====================================================================
void parser::closeFile()
{
	//fclose( streamToFile );
	//close(fh);
}
//====================================================================
bool parser::htmldokument()
{//Så længe bare én af funktionerne returnerer true køre løkken


  // Her er fejlen, og grunden til vi ikke kommer videre.
  // while loekken bliver meget enkelt aldrig afsluttet, og derfor kommer der ike noget data til fil2 og dermed brugeren.

	while(true)					
	{
	if(!isHyperlink());
	if(!isJavascriptLink());
	if(!isTekst())return false;//isTekst() skal stå sidst og
	}			//som den eneste returnere false
	return false;	//fordi vi, i vores idelle verden,formoder
		//at et html-dokument afsluttes med '>' som er tekst
}
//====================================================================
bool parser::isHyperlink()
{
	if(!isLesser())return false;	//hvis ikke '<' står forrest,
	if(!(isA()||isImg()))return false;//kan det ikke være et hyperlink	
	{return true;}					
}

//====================================================================
bool parser::isJavascriptLink()
{
if(!isLoc());	//hvis ikke 'location' står forrest,
if(!isHref()) return false;//kan det ikke være et Javascript hyperlink
return true;
}
//====================================================================
bool parser::isLoc()
{
	if(lookahead!=LOC)	return false;
	{ emit(lookahead,token);
	if(!match(lookahead));}

	if(!isPeriod())return false;
	else
	{return true;}
}
//====================================================================
bool parser::isA()
{
	if(lookahead!=A)	return false;
	else
	{emit(lookahead,token);
	if(!match(lookahead));

	while(isSpace()==true);

	if(!isHref()) return false;
	return true;}
}
//====================================================================
bool parser::isHref()
{
	if(lookahead!=HREF) return false;
	else
	{
	emit(lookahead, token);	
	if(!match(lookahead))return false;
	else if(!isEqual())return false;
	else if(!isQuote())return false;
	else if(!isabsURL()) return false;
		return true;
	}	
}
//=================================================================
//=================================================================
bool parser::isImg()
{
  if(lookahead!=IMG)     return false;
    else
      {
	emit(lookahead,token);
	if(!match(lookahead));

	if(lookahead=='\n'){
	  emit(lookahead,token);
	  if(!match(lookahead)); }

	while(isSpace()==true);
    
	if(lookahead=='\n'){
	  emit(lookahead,token);
	  if(!match(lookahead)); }
    
	if(!isSrc()) return false;
    else
      return true;
      }
  /*
if(lookahead!=IMG)	return false;
	else
	{emit(lookahead,token);
	if(!match(lookahead));

	while(isSpace()==true);
	
	if(isSrc())return true;
	else
		return false;
	}
  */
}
//=================================================================
bool parser::isSrc()
{
  if(lookahead!=SRC)     return false;
    else
      {
	emit(lookahead,token);
	if(!match(lookahead));
	if(!isEqual())return false;
	if(!isQuote())return false;
	if(!addProtokol());
	if(!addServer());
	if(!isTekst()) return false;
	return true;}

  /* Old Src
if(lookahead!=SRC)	return false;
	else
	{
	emit(lookahead,token);
	if(!match(lookahead));
	if(!isEqual())return false;
	if(!isQuote())return false;
	if(!isabsURL()) return false;
	return true;}
  */
}
//==================================================================
bool parser::isBg()
{
  if(lookahead!=BG)     return false;
    else
      {emit(lookahead,token);
      if(!match(lookahead));
      if(!isEqual())return false;
      if(!isQuote())return false;
      if(!addProtokol());
      if(!addServer());
      if(!isTekst()) return false;
      return true;
      }

  /* Old isBG()
if(lookahead!=BG)	return false;
	else
	{emit(lookahead,token);
	if(!match(lookahead));
	if(!isEqual())return false;
	if(!isQuote())return false;
	if(!isabsURL()) return false;
	return true;
	}
  */
}
//===================================================================

bool parser::isabsURL()
{
	if(isFTP()||isGOPHER())
		return false;

	if(isProtokol()==false)
		if(!addProtokol());	
	
	if(!isrelURL())	return false;
	return true;
	
}
//====================================================================
bool parser::isrelURL()
{	
	if(!isServer());	
	if(!isPeriod());
	if(!isNetwork());
	if(!isPeriod());
	if(!isDomain());
	if(!isSlash());						
	if(!isPath());						
	if(!isSlash());	
	if(!isResource());
	if(isQuote());
	return true;
}
//====================================================================
bool parser::isHTTP()
{
	if(lookahead!=HTTP)	return false;
	else
	{emit(lookahead, token);
	if(!match(lookahead));
	return true;}
}
//====================================================================
bool parser::isFTP()
{
	if(lookahead!=FTP)	return false;
	else
	{emit(lookahead, token);
	if(!match(lookahead));
	return true;}
}
//====================================================================

bool parser::isGOPHER()
{
	if(lookahead!=GOPHER)	return false;
	else
	{emit(lookahead, token);
	if(!match(lookahead));
	return true;}
}
//====================================================================

bool parser::isProtokol()
{
	 if(isHTTP())
	{	if(!isColon()) {return false;}

		if(!isSlash()) {return false;}
		while(isSlash()==true);
		return true;
	}	
	else
		return false;
}
//====================================================================

bool parser::isServer()
{
	if(lookahead==ID||lookahead==IPID)
	{
		if(insert_IP());	
		return true;
	}
	else if(lookahead==IP)
	{
		if(!remove_IP());
		return true;
	}
	else
		if(!insert_IP()) return false;
		return true;
}
//====================================================================

//her indsættes den URL som hentes fra socketapp.
bool parser::addServer()
{
timesURL++;//optæller antal gange 'URL'  bliver indsat
fprintf(streamToFile,"%s",Url);return true;
}
//====================================================================

//her indsættes protokollen - http - hvis de
bool parser::addProtokol()
{
timesHttp++;	//optæller antal gange  'http://' bliver indsat
fprintf(streamToFile,"%s","http://");
// cout<<"\nProtokol Indsat\n"; //Kontrol udskrift
return true;
}

//====================================================================

//her indsættes forvalgsadressen IP + / (slash)
bool parser::insert_IP()
{	
timesHost++;//optæller antal gange  'IP + /' bliver indsat
emit(IP,0);emit('/',NONE);
	if(lookahead=='/')//hvis det næste tegn er '/'
	{		//er det et internt link og
		if(!addServer());		
//server.network.domain skal indsættes
		emit(lookahead, token);	// '/' udskrives
		if(!match(lookahead));	//og det næste tegn hentes ind
		return true;
	}

	emit(lookahead, token);		//ellers udskrives servertypen
	if(!match(lookahead));		//og det næste tegn hentes ind
	return true;
}
//====================================================================
bool parser::remove_IP()
{	
	if(!match(lookahead));//  fjerner IP +
	if(!match(lookahead));// / (slash)

	emit(lookahead, token);	
	if(!match(lookahead));
	return true;
}
//====================================================================
bool parser::isNetwork()
{	
	if(lookahead==ID)
	{emit(lookahead, token);
	if(!match(lookahead));
	return true;}
	else return false;
}
//====================================================================
bool parser::isDomain()
{
	if(lookahead==ID)
	{emit(lookahead, token);
	if(!match(lookahead));
	return true;}
	else return false;
}
//====================================================================
bool parser::isPath()
{
	if(!isChar())return false;	//hvis der ikke kommer mere
								
//tekst skal funktionen returnere false	
	if(isSlash())
	{
	//fortsætterså længe der er bogstaver/tegn
	while(true)
	{
	//og slashes, (subdir/subdir/subdir/filename)		
	if(!isChar());
	if(!isSlash()) break;			
	}//(.filetype)  lang path
	if(!isPeriod());
	if(!isChar());
	return true;
	}
	else						
//(filename.filetype) kort path
	{
	if(!isPeriod());
	if(!isChar());
	return true;
	}	
}
//====================================================================
bool parser::isResource()
{		
	if(!isChar())return false;//hvis der ikke kommer mere
								
//tekst skal funktionen returnere false
	if(isSlash())
	{
		while(true)	
	{	
		if(!isChar());
		if(!isSlash()) break;		
		}
	}
	if(!isChar());
	return true;
}
//====================================================================
bool parser::isLesser()
{
	if(lookahead!='<')return false;
	else
	{emit(lookahead, token);
	if(!match(lookahead));return true;}  
}
//====================================================================
bool parser::isPeriod()//
{
	if(lookahead!='.')return false;
	else
	{emit(lookahead, token);
	if(!match(lookahead));return true;}
}
//====================================================================
bool parser::isColon()//
{
	if(lookahead!=':')return false;
	else
	{emit(lookahead, token);
	if(!match(lookahead));return true;}
}
//====================================================================
bool parser::isEqual()
{
	if(lookahead!='=')return false;
	else
	{emit(lookahead, token);
	if(!match(lookahead));return true;}	
}
//====================================================================
bool parser::isQuote()
{
	if(lookahead=='"'){emit(lookahead,token);
	if(!match(lookahead));return true;}
	else if(lookahead=='\''){emit(lookahead,token);
	if(!match(lookahead));return true;}
	else
		return false;
}
//====================================================================
bool parser::isSlash()
{
	if(lookahead!='/')return false;
	else
	{emit(lookahead, token);if(!match(lookahead));return true;}
}
//====================================================================
bool parser::isSpace()
{
	if(lookahead!=' ')	return false;
	else
	{emit(lookahead, token);if(!match(lookahead));return true;}
}
//====================================================================
bool parser::isTekst()
{
	if(!isBg());
	
	switch(lookahead)
	{
	case HTTP:	case FTP:	case GOPHER:case LOC:
	case A:		case HREF:	case IP:	case IPID:
	case SRC: case IMG:  case BG:		
	emit(lookahead, token);
	if(!match(lookahead));
	return true;
	break;
//disse fem cases == <slash>, <quote> og <space> er reserverede tegn
//som ikke må forekomme i path og resources

/*her er isSlash(), isQuote() osv.. erstattet med case
//(se grammatikken)*/
	case '/':case '\'': case '"':
	case ' ':case '<':  case'=':case ':':

	case '.':
	emit(lookahead, token);
	if(!match(lookahead));
	return true;break;

	default:
		{
		if(isChar()==true) return true;
		else
		return false;
		}	
	}
	//return false;
}
//====================================================================
bool parser::isChar()
{
	switch(lookahead)
	{		
	case '{': case '}': case '[': case ']': case '(': case ')':
	case ';': case '`': case '´': case '-': case '_': case '>':  
	case ',': case '!': case '?':
	case '§': case '#': case '£': case '@': case '|':
	case '~': case '^': case '¨': case '+': case '%':
	case'&' :case '½':
	case '$':  case '¤': case '*':
	case '\t':case '\\':case '\r':case '\n':
	case ID:
	emit(lookahead, token);
	if(!match(lookahead));
	if(!isChar());		//rekursivt kald
	return true;
	break;

	default:
		return true;
	}
}
//====================================================================
//sammenligner  lookahead med t
//(som den selv lige har hentet ind)
//og henter det næste token ind

bool  parser::match(int t)	
{	if(lookahead==t){
	lookahead=lexObj.lexan();return true;}
		
	else {
	lexObj.error("match(),syntax error");return false;}
};
//====================================================================
#endif //PARSER