#define MiX_SAXPARSER_CPP_

#include "SAX_Parser.h"

#include <queue>
#include <cstring>
#include <sstream>

#include "misc.h"
#include "ParsingException.h"

namespace MiX{
  template <class charT,class char_traits,class xml_traits>
  bool SAX_Parser<charT,char_traits,xml_traits>::parsingLoop(){
    decrared_ = false;
    state_ = STATE_Text;
    while(state_!=STATE_Complete && state_!=STATE_Exception){
      if(state_==STATE_Text)
	state_ = parseText();
      else if(state_==STATE_Tag)
	state_ = parseTag();
      else if(state_==STATE_Comment)
	state_ = parseComment();
      else if(state_==STATE_Declaration)
	state_ = parseDeclaration();
      else if(state_==STATE_XMLDeclaration)
	state_ = parseXMLDeclaration();
      else if(state_==STATE_CData )
	state_ = parseCData();
    }
    if(state_ == STATE_Exception) return false;
    return true;
  }

  template <class charT,class char_traits,class xml_traits>
  bool SAX_Parser<charT,char_traits,xml_traits>::parse(const string_type& text){
    if(handler_==NULL){
      std::ostringstream ss;
      ss << "ParsingException" << std::endl
	 << __FILE__ << ':' << __LINE__ << std::endl
	 << "Set handler before parse." << std::endl << std::ends;
      throw ParsingException(-1,InvalidHandler,ss.str());
    }
    tokenizer_.injectString(text.c_str());
    return parsingLoop();
  }

#ifdef MiX_HAS_BASIC_STREAM
  template <class charT,class char_traits,class xml_traits>
  bool SAX_Parser<charT,char_traits,xml_traits>::parse(std::basic_istream<charT,char_traits>& is) {
    if(handler_==NULL){
      std::ostringstream ss;
      ss << "ParsingException" << std::endl
	 << __FILE__ << ':' << __LINE__ << std::endl
	 << "Set handler before parse." << std::endl << std::ends;
      throw ParsingException(-1,InvalidHandler,ss.str());
    }
    tokenizer_.injectStream(is);
    return parsingLoop();
  }
#endif  
  
  template <class charT,class char_traits,class xml_traits>
  void SAX_Parser<charT,char_traits,xml_traits>::skipSpaceTokens(){
    token_type tok;
    do{
      tokenizer_.ejectToken(tok);
    }while(tok.getType()==Token_cr || 
	   tok.getType()==Token_lf || 
	   tok.getType()==Token_space || 
	   tok.getType()==Token_tab);
    //ŌɈo󔒂Ȃg[N߂
    tokenizer_.pushToken(tok);
  }
  
  template <class charT,class char_traits,class xml_traits>
  typename SAX_Parser<charT,char_traits,xml_traits>::State SAX_Parser<charT,char_traits,xml_traits>::parseText(){
    token_type tok;
    string_type s;
    State ret = STATE_Complete; 
    
    if( ignore_space_ ) skipSpaceTokens();
    
    while(tokenizer_.ejectToken(tok)){
      if(tok.getType()==Token_lt){
	// "<"Tag̓[hɑJ
	ret = STATE_Tag;
	break;
      }else if(tok.getType()==Token_amp){
	// "&"̎QƉ̓[hɑJ
	s += parseReference();
      }else{
	//ȊÕg[N͂ł͖ӖȂ̂ŁAɒǉB
	s += tok.getData();
      }
    }
    if( ignore_space_ ) trimRight(s);
    if( s.length()!=0 ) handler_->onText(s);
    return ret;
  }
  
  template <class charT,class char_traits,class xml_traits>
  typename SAX_Parser<charT,char_traits,xml_traits>::State SAX_Parser<charT,char_traits,xml_traits>::parseTag(){
    XMLToken<charT,char_traits,xml_traits> tok;
    bool endtag = false;
    
    skipSpaceTokens();
    tokenizer_.ejectToken(tok);  
    if(tok.getType()==Token_text){
      tokenizer_.pushToken(tok);
    }else if(tok.getType()==Token_slash){
      //XbVn܂^OȂAI^O
      endtag = true;
    }else if(tok.getType()==Token_exclamation){
      //"!"n܂ΐ錾
      return STATE_Declaration;
    }else if(tok.getType()==Token_question){
      //"?"n܂XML錾
      return STATE_XMLDeclaration;
    }else{
      //^O̎n܂肪
      std::ostringstream ss;
      ss << "ParsingException" << std::endl
	 << __FILE__ << ':' << __LINE__ << std::endl
	 << "Unexpected token in " << tok.getIndex() << std::endl
	 << "(expected !,?,/ or text)"<< std::endl << std::ends;
      if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	return STATE_Exception;
      }
    }
    
    tokenizer_.ejectToken(tok);
    if(tok.getType()!=Token_text){
      //^OȂ΃G[
      std::ostringstream ss;
      ss << "ParsingException" << std::endl
	 << __FILE__ << ':' << __LINE__ << std::endl
	 << "Unexpected token in " << tok.getIndex() << std::endl
	 << "(expected text)"<< std::endl << std::ends;
      if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	return STATE_Exception;
      }
    }
    XMLString<charT,char_traits,xml_traits> name = tok.getData();
    
    if(endtag){
      skipSpaceTokens();
      tokenizer_.ejectToken(tok);
      if(tok.getType()!=Token_gt){
	//">"ȂG[
	std::ostringstream ss;
	ss << "ParsingException" << std::endl
	   << __FILE__ << ':' << __LINE__ << std::endl
	   << "Unexpected token in " << tok.getIndex() << std::endl
	   << "(expected \'>\')" << std::endl << std::ends;
	if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	  return STATE_Exception;
	}
      }
      if(validator_.top()!=name){
	std::ostringstream ss;
	ss << "ParsingException" << std::endl
	   << "End tag mismatch with start tag(" << validator_.top()
	   << ") in " << tok.getIndex() << std::endl << std::ends;
	if(handler_->onException(ParsingException(tok.getIndex(),MismatchTag,ss.str()))){	
	  return STATE_Exception;
	}
      }
      validator_.pop();
      handler_->onEnd(name);
      if(validator_.empty()){
	skipSpaceTokens();
	tokenizer_.ejectToken(tok);
#ifdef MiX_STRICT
	if(tok.getType()!=Token_null){
	  //hLg[gIɂȂ񂩂
	  std::ostringstream ss;
	  ss << "ParsingException" << std::endl
	     << __FILE__ << ':' << __LINE__ << std::endl
	     << "Unexpected token in " << tok.getIndex() << std::endl
	     << "(Expected \'(null)\')" << std::endl << std::ends;
	  if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	    return STATE_Exception;
	  }
	}
#endif
	return STATE_Complete;
      }
    } else {
      AttrMap<charT,char_traits,xml_traits> atts = parseAttributes();
      skipSpaceTokens();
      tokenizer_.ejectToken(tok);
      if(tok.getType()==Token_slash){
	//"/"I^Oȗ^O
	endtag = true;
	skipSpaceTokens();
	tokenizer_.ejectToken(tok);
      }
      if(tok.getType()!=Token_gt){
	//">"ȂG[
	std::ostringstream ss;
	ss << "ParsingException" << std::endl
	   << __FILE__ << ':' << __LINE__ << std::endl
	   << "Unexpected token in " << tok.getIndex() << std::endl
	   << "(Expected \'>\')" << std::endl << std::ends; 
	if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	  return STATE_Exception;
	}
      }
      handler_->onStart(name,atts);
      if(endtag) handler_->onEnd(name);
      else validator_.push(name);

      if(validator_.empty()){
	skipSpaceTokens();
	tokenizer_.ejectToken(tok);
#ifdef MiX_STRICT
	if(tok.getType()!=Token_null){
	  //hLg[gIɂȂ񂩂
	  std::ostringstream ss;
	  ss << "ParsingException" << std::endl
	     << __FILE__ << ':' << __LINE__ << std::endl
	     << "Unexpected token in " <<tok.getIndex()<< std::endl
	     << "(Expected \'(null)\')" << std::endl << std::ends;
	  
	  if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	    return STATE_Exception;
	  }
	}
#endif 	
	return STATE_Complete;
      }
    }
    return STATE_Text;
    
  }
  
  
  template <class charT,class char_traits,class xml_traits>
  AttrMap<charT,char_traits,xml_traits> SAX_Parser<charT,char_traits,xml_traits>::parseAttributes(){
    token_type tok;    
    attrmap_type ret;

    skipSpaceTokens();
    tokenizer_.ejectToken(tok);
    if(tok.getType()!=Token_text){
      tokenizer_.pushToken(tok);
      return ret;
    }
    while(tok.getType()==Token_text){
      string_type name(tok.getData());
      skipSpaceTokens();
      tokenizer_.ejectToken(tok);
      if(tok.getType()!=Token_eq) {
	std::ostringstream ss;
	ss << "ParsingException" << std::endl
	   << __FILE__ << ':' << __LINE__ << std::endl
	   << "Unexpected token in " << tok.getIndex() << std::endl
	   << "(Expected \'=\')" << std::endl << std::ends; 
	
	if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	  state_ = STATE_Exception;
	  return AttrMap<charT,char_traits,xml_traits>();
	}
	continue;
      }
      skipSpaceTokens();
      tokenizer_.ejectToken(tok);
      TokenType quote = tok.getType();
      if(quote!=Token_dblquote && quote!=Token_quote){
	std::ostringstream ss;
	ss << "ParsingException" << std::endl
	   << __FILE__ << ':' << __LINE__ << std::endl
	   << "Unexpected token in " << tok.getIndex() << std::endl
	   << "(Expected \' or \" )" << std::endl << std::ends; 
	
	if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	  state_ = STATE_Exception;
	  return AttrMap<charT,char_traits,xml_traits>();
	}
      }
      string_type val;
      tokenizer_.ejectToken(tok);
      while(tok.getType()!=quote){
	val += tok.getData();
	tokenizer_.ejectToken(tok);
      }
      ret.insert(std::pair<string_type,string_type>(name,val));
      skipSpaceTokens();
      tokenizer_.ejectToken(tok);
    }
    tokenizer_.pushToken(tok);
    return ret;
  }
  
  template <class charT,class char_traits,class xml_traits>
  typename SAX_Parser<charT,char_traits,xml_traits>::State SAX_Parser<charT,char_traits,xml_traits>::parseDeclaration(){
    token_type tok;
    tokenizer_.ejectToken(tok);
    if(tok.getType()==Token_hyphen){
      tokenizer_.ejectToken(tok);
      if(tok.getType()==Token_hyphen) return STATE_Comment;
    } else if( tok.getType()==Token_lsb ) { // Start CDATA section "<![DATA["
      token_type cdata, lsb;
      
      tokenizer_.ejectToken( cdata );
      tokenizer_.ejectToken( lsb );
      if( xml_traits::ci_compare( cdata.getData().c_str(), xml_traits::cdata().c_str(), 6 )==0 &&
	  lsb.getType()==Token_lsb )
	return STATE_CData;
      
      tokenizer_.pushToken( lsb );
      tokenizer_.pushToken( cdata );
    }
    do{
      tokenizer_.ejectToken(tok);
      if(tok.getType()==Token_null){
	std::ostringstream ss;
	ss << "ParsingException" << std::endl
	   << __FILE__ << ':' << __LINE__ << std::endl
	   << "Unexpected end of document" << std::endl << std::ends;

	if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedEOD,ss.str()))){
	  return STATE_Exception;
	}
      }
    }while(tok.getType()!=Token_gt);
    return STATE_Text;
  }
  
  template <class charT,class char_traits,class xml_traits>
  typename SAX_Parser<charT,char_traits,xml_traits>::State SAX_Parser<charT,char_traits,xml_traits>::parseXMLDeclaration(){
    skipSpaceTokens();
    token_type tok;

    if(decrared_) {
      //2ڂXMLDeclaration͖B
      while(1) {
	tokenizer_.ejectToken(tok); 
	if(tok.getType()==Token_question){
	  tokenizer_.ejectToken(tok);
	  if(tok.getType()==Token_gt) return STATE_Text;
	}
      }
    }

    tokenizer_.ejectToken(tok);
    if(0!=xml_traits::ci_compare(tok.getData().c_str(),
				 xml_traits::xml().c_str(),
				 /*strlen("xml")+1*/4)){
      std::ostringstream ss;
      ss << "ParsingException" << std::endl
	 << __FILE__ << ':' << __LINE__ << std::endl
	 << "Unexpected token in " << tok.getIndex() << std::endl
	 << "(Expected \"xml\")" << std::endl << std::ends;  
      
      if(handler_->onException(ParsingException(tok.getIndex(),InvalidDeclaration,ss.str()))){
	return STATE_Exception;
      }
    }
    attrmap_type atts=parseAttributes();
    handler_->onXMLDeclaration(atts);
    decrared_ = true;
    skipSpaceTokens();
    tokenizer_.ejectToken(tok);
    if(tok.getType()!=Token_question){
      std::ostringstream ss;
      ss << "ParsingException" << std::endl
	 << __FILE__ << ':' << __LINE__ << std::endl
	 << "Unexpected token in " << tok.getIndex() << std::endl
	 << "(Expected \'?\' )" << std::endl << std::ends; 
      if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	return STATE_Exception;
      }
      tokenizer_.pushToken(tok);
    }
    skipSpaceTokens();
    tokenizer_.ejectToken(tok);
    if(tok.getType()!=Token_gt){
      std::ostringstream ss;
      ss << "ParsingException" << std::endl
	 << __FILE__ << ':' << __LINE__ << std::endl
	 << "Unexpected token in " << tok.getIndex() << std::endl
	 << "(Expected \'>\' [" << tok.getType() << "] )" 
	 << std::endl << std::ends; 
      
      if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	return STATE_Exception;
      }
    }
    
    
    return STATE_Text;
  }
  
  template <class charT,class char_traits,class xml_traits>
  XMLString<charT,char_traits,xml_traits> SAX_Parser<charT,char_traits,xml_traits>::parseReference(){
    token_type tok;
    string_type ret;
    skipSpaceTokens();
    tokenizer_.ejectToken(tok);
    if(tok.getType()!=Token_text){
#ifdef	MiX_STRICT
      //"&"͕̎̂݁B
      std::ostringstream ss;
      ss << "ParsingException" << std::endl
	 << __FILE__ << ':' << __LINE__ << std::endl
	 << "Unexpected token in " << tok.getIndex() << std::endl
	 << "(Expected text)" << std::endl << std::ends;
      if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	state_ = STATE_Exception;
	return string_type();
      }
#else
      tokenizer_.pushToken( tok );
      ret += xml_traits::amp();
      return ret;
#endif
    }
    
    if(0==xml_traits::ci_compare(tok.getData().c_str(),
				xml_traits::str_lt().c_str(),
				/*strlen("lt")+1*/3)){
      ret+=xml_traits::lt();
    }else if(0==xml_traits::ci_compare(tok.getData().c_str(),
				      xml_traits::str_gt().c_str(),
				      /*strlen("gt")+1*/3)){
      ret+=xml_traits::gt();
    }else if(0==xml_traits::ci_compare(tok.getData().c_str(),
				      xml_traits::str_amp().c_str(),
				      /*strlen("amp")+1*/4)){
      ret+=xml_traits::amp();
    }else if(0==xml_traits::ci_compare(tok.getData().c_str(),
				      xml_traits::str_quot().c_str(),
				      /*strlen("quot")+1*/5)){
      ret+=xml_traits::dblquote();
    } else if( 0==xml_traits::ci_compare(tok.getData().c_str(),
				      xml_traits::str_apos().c_str(),
				      /*strlen("apos")+1*/5)){
      ret+=xml_traits::quote();
    } else if( tok.getData().at(0)==xml_traits::sharp() ) { 
      charT ch;
      if( !xml_traits::str2char( (tok.getData().c_str())+1, ch ) ) {
#ifdef	MiX_STRICT
	std::ostringstream ss;
	ss << "ParsingException" << std::endl
	   << __FILE__ << ':' << __LINE__ << std::endl
	   << "Unexpected token in " << tok.getIndex() << std::endl
	   << "(invalid character reference)" << std::endl << std::ends;
	if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	  state_ = STATE_Exception;
	  return string_type();
	}
#else
	tokenizer_.pushToken( tok );
	ret += xml_traits::amp();
	ret += xml_traits::sharp();
	return ret;
#endif
      }
      ret += ch;
    } else {
#ifdef MiX_STRICT
      std::ostringstream ss;
      ss << "ParsingException" << std::endl
	 << __FILE__ << ':' << __LINE__ << std::endl
	 << "Unexpected token in " << tok.getIndex() << std::endl
	 << "(not supported refference)" << std::endl << std::ends;
      if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	state_ = STATE_Exception;
	return string_type();
      }
#else
      tokenizer_.pushToken( tok );
      ret += xml_traits::amp();
      return ret;
#endif
    }

    tokenizer_.ejectToken(tok);
    if(tok.getType()!=Token_semicolon){
      //u&hogev̂Ƃ͕Ku;v
      std::ostringstream ss;
      ss << "ParsingException" << std::endl
	 << __FILE__ << ':' << __LINE__ << std::endl
	 << "Unexpected token in " << tok.getIndex() << std::endl
	 << "(expected \';\')" << std::endl << std::ends;
      if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
	state_=STATE_Exception;
	return string_type();
      }
    }
    return ret;
  }

  template <class charT,class char_traits,class xml_traits>
  typename SAX_Parser<charT,char_traits,xml_traits>::State SAX_Parser<charT,char_traits,xml_traits>::parseComment(){
    string_type text;
    token_type tok;

    tokenizer_.ejectToken(tok);
    while(tok.getType()!=Token_null){
      if(tok.getType()==Token_hyphen){
	token_type hyphen,gt;
	tokenizer_.ejectToken(hyphen);
	tokenizer_.ejectToken(gt);
	if(hyphen.getType()==Token_hyphen && gt.getType()==Token_gt){
	  handler_->onComment(text);
	  return STATE_Text;
	}else{
	  tokenizer_.pushToken(gt);
	  tokenizer_.pushToken(hyphen);
	}
      }
      text+=tok.getData();
      tokenizer_.ejectToken(tok);
    }
#ifdef MiX_STRICT
    std::ostringstream ss;
    ss << "ParsingException" << std::endl
       << __FILE__ << ':' << __LINE__ << std::endl
       << "Unexpected token(null) in " << tok.getIndex() << "(comment)" 
       << std::endl;
    if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
      return STATE_Exception;
    }    
#endif
    return STATE_Complete;
  }

  // read CDATA Section   by Shin Adachi			2003/07/16
  //   * 2003/07/19 modified by KUBO,Yotaro
  template <class charT,class char_traits,class xml_traits>
  typename SAX_Parser<charT,char_traits,xml_traits>::State SAX_Parser<charT,char_traits,xml_traits>::parseCData()
  {
    //	Dataǂނ
    string_type	text;
    token_type	tok;
    
    tokenizer_.ejectToken( tok );
    while( tok.getType()!=Token_null ) {
      //	]]>I
      if( tok.getType()==Token_rsb ) {
	token_type	rsb, gt;
	
	tokenizer_.ejectToken( rsb );
	tokenizer_.ejectToken( gt );
	if( rsb.getType()==Token_rsb && gt.getType()==Token_gt ) {
	  handler_->onText( text );
	  return STATE_Text;
	}
	else {
	  tokenizer_.pushToken( rsb );
	  tokenizer_.pushToken( gt );
	}
      }
      
      //	
      text += tok.getData();
      tokenizer_.ejectToken( tok );
    }		
    
    //	H
#ifdef MiX_STRICT
    std::ostringstream ss;
    ss << "ParsingException" << std::endl
       << __FILE__ << ':' << __LINE__ << std::endl
       << "Unexpected token(null) in " << tok.getIndex() << "(CDATA Section)" 
       << std::endl;
    if(handler_->onException(ParsingException(tok.getIndex(),UnexpectedToken,ss.str()))){
      return STATE_Exception;
    }    

#endif
    return STATE_Complete;
  }
  
  
  template <class charT,class char_traits,class xml_traits>
  void SAX_Parser<charT,char_traits,xml_traits>::trimRight(string_type& str){
    //W
    charT sp[] = {
      xml_traits::cr(),
      xml_traits::lf(),
      xml_traits::sp(),
      xml_traits::tab(),
      xml_traits::null()
    };
    size_t pos = str.find_last_not_of(sp);
    str.assign(str,0,pos+1);
  }
}
