# -*- coding: ascii -*-
#
#  bottleparser.py - a Sakura Script parser for SSTP Bottle
#  Copyright (C) 2001, 2002 by Tamito KAJIYAMA
#  Copyright (C) 2004 by Shyouzou Sugitani <shy@debian.or.jp>
#  Copyright (C) 2004 by Atzm WATANABE <sitosito@p.chan.ne.jp>
#
#  This program is free software; you can redistribute it and/or modify it
#  under the terms of the GNU General Public License (version 2) as
#  published by the Free Software Foundation.  It is distributed in the
#  hope that it will be useful, but WITHOUT ANY WARRANTY; without even the
#  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#  PURPOSE.  See the GNU General Public License for more details.
#
# $Id: bottleparser.py,v 1.15 2004/09/01 02:35:10 atzm Exp $
#

from script import *

PARSE_ERROR = 9

class BottleParser(Parser):
	def __init__(self, error="loose"):
		Parser.__init__(self, error=error)
	def next_token(self):
		if self.tokens:
			try:
				token, lexeme = self.tokens.pop(0)
			except IndexError:
				raise ParserError("unexpected end of script",
								  self.column + self.length)
			self.column += self.length
			try:
				self.length = len(lexeme.encode('sjis'))
			except:
				self.length = len(lexeme)
			return token, lexeme
		else:
			return '', ''
	def parse(self, s):
		if not s: return []
		# tokenize the script
		self.tokens = self.tokenize(s)
		self.column = 0
		self.length = 0
		self.errorflag = False
		# parse the sequence of tokens
		script = []
		text = []
		string_chunks = []
		scope = 0
		while self.tokens:
			token, lexeme = self.next_token()
			if token == TOKEN_STRING and lexeme == "\\":
				if self.error == "strict":
					raise ParserError("unknown tag", self.column+3)
				else:
					self.errorflag = True
					#sys.stderr.write("Warning: column %d: "
					#				 "unknown tag\n" % self.column)
			elif token == TOKEN_STRING and lexeme == "%":
				if self.error == "strict":
					raise ParserError("unknown meta string", self.column)
				#else:
				#	self.errorflag = True
				#	sys.stderr.write("Warning: column %d: "
				#					 "unknown meta string\n" % self.column)
			if token in [TOKEN_NUMBER, TOKEN_OPENED_SBRA,
						 TOKEN_STRING, TOKEN_CLOSED_SBRA]:
				#lexeme = lexeme.replace(r"\\", "\\")
				#lexeme = lexeme.replace(r"\%", "%")
				string_chunks.append(lexeme)
				continue
			if string_chunks:
				if self.errorflag:
					self.errorflag = False
					text.append((PARSE_ERROR, string.join(string_chunks, '')))
				else:
					text.append((TEXT_STRING, string.join(string_chunks, '')))
				string_chunks = []
			if token == TOKEN_META:
				if lexeme == "%j":
					argument = self.read_sbra_id()
					if self.errorflag:
						text.append((PARSE_ERROR, lexeme, argument))
					else:
						text.append((TEXT_META, lexeme, argument))
				else:
					if self.errorflag:
						text.append((PARSE_ERROR, lexeme))
					else:
						text.append((TEXT_META, lexeme))
				continue
			if text:
				script.append((SCRIPT_TEXT, tuple(text)))
				text = []
			if lexeme in ["\\c", "\\e", "\\t", "\\_s", "\\_q"]:
				script.append((SCRIPT_TAG, lexeme))
			elif lexeme in ["\\0", "\\h"]:
				script.append((SCRIPT_TAG, lexeme))
				scope = 0
			elif lexeme in ["\\1", "\\u"]:
				script.append((SCRIPT_TAG, lexeme))
				scope = 1
			elif lexeme in ["\\s"]:
				[status, argument] = self.read_sbra_id()
				try:
					int(argument)
				except ValueError:
					if self.error == 'strict':
						raise ParserError("invalid argument (%s[%s])" % (lexeme, argument),
										  self.column, self.length)
					else:
						status = PARSE_ERROR
				#print '6.', argument, status, ', errorflag:', self.errorflag, '\n'
				if status == PARSE_ERROR:
					self.errorflag = False
					script.append((PARSE_ERROR, lexeme, argument))
				else:
					script.append((SCRIPT_TAG, lexeme, argument))
			elif lexeme[:2] in ["\\w", "\\s"]:
				#print token, lexeme
				num = lexeme[2]
				if lexeme[:2] == "\\s" and scope == 1:
					num = str(int(num) + 10)
				script.append((SCRIPT_TAG, lexeme[:2], num))
			elif lexeme in ["\\n"]:
				if self.tokens and self.tokens[0][0] == TOKEN_OPENED_SBRA:
					argument = self.read_sbra_text()
					script.append((SCRIPT_TAG, lexeme, argument))
				else:
					script.append((SCRIPT_TAG, lexeme))
			elif lexeme in ["\\URL"]:
				buffer = [self.read_sbra_text()]
				#print 'buffer1:', buffer
				while self.tokens and self.tokens[0][0] == TOKEN_OPENED_SBRA:
					#print self.tokens[0][0]
					buffer.append(self.read_sbra_text())
					buffer.append(self.read_sbra_text())
				#print 'buffer2:', buffer
				if self.errorflag:
					self.errorflag = False
					script.append((PARSE_ERROR, lexeme) + tuple(buffer))
				else:
					script.append((SCRIPT_TAG, lexeme) + tuple(buffer))
			else:
				if self.error == 'strict':
					raise ParserError("unknown tag (%s)" % lexeme,
									  self.column+3, self.length)
				else:
					self.errorflag = True
					#sys.stderr.write("unknown tag (%s): col:%d, len:%d\n" % (lexeme, self.column, self.length))
					string_chunks.append(lexeme)
		if string_chunks:
			if self.errorflag:
				self.errorflag = False
				text.append((PARSE_ERROR, string.join(string_chunks, '')))
			else:
				text.append((TEXT_STRING, string.join(string_chunks, '')))
		if text:
			if self.errorflag:
				self.errorflag = False
				script.append((PARSE_ERROR, tuple(text)))
			else:
				script.append((SCRIPT_TEXT, tuple(text)))
		return script

	def read_sbra_id(self):
		text = self.read_sbra_text()
		if len(text) != 1:
			if self.error == 'strict':
				raise ParserError("syntax error (expected a single ID)",
								  self.column, self.length)
			else:
				self.errorflag = True
				#sys.stderr.write("Warning: column %d: "
				#				 "syntax error (expected a single ID)\n" % self.column)
		#print '5.', text, ', errorflag:', self.errorflag
		if text:
			return (text[0][0], text[0][1])
		else:
			self.errorflag = False
			return (PARSE_ERROR, '')

	def read_sbra_text(self):
		token, lexeme = self.next_token()
		if token != TOKEN_OPENED_SBRA:
			if self.error == 'strict':
				raise ParserError("syntax error (expected a square bracket)",
								  self.column, self.length)
			else:
				self.errorflag = True
				#sys.stderr.write("Warning: column %d: "
				#				 "expected a square bracket in read_sbra_text\n" % self.column)
				self.tokens.insert(0, (token, lexeme))
				self.column -= len(lexeme.encode('sjis'))
				self.length -= len(lexeme.encode('sjis'))
				return ()
		text = []
		string_chunks = []

		closed_flag = False
		token, lexeme = self.next_token()
		if token == TOKEN_CLOSED_SBRA:
			closed_flag = True
		self.tokens.insert(0, (token, lexeme))
		self.column -= len(lexeme.encode('sjis'))
		self.length -= len(lexeme.encode('sjis'))

		while self.tokens:
			token, lexeme = self.next_token()
			if token in [TOKEN_NUMBER, TOKEN_STRING, TOKEN_OPENED_SBRA,
						 TOKEN_TAG]:
				#lexeme = lexeme.replace(r"\\", "\\")
				#lexeme = lexeme.replace(r"\%", "%")
				#lexeme = lexeme.replace(r"\]", "]")
				string_chunks.append(lexeme)
				continue
			if string_chunks:
				if self.errorflag:
					text.append((PARSE_ERROR, string.join(string_chunks, '')))
				else:
					text.append((TEXT_STRING, string.join(string_chunks, '')))
				string_chunks = []
			if token == TOKEN_CLOSED_SBRA:
				break
			elif token == TOKEN_META:
				if self.errorflag:
					text.append((PARSE_ERROR, lexeme))
				else:
					text.append((TEXT_META, lexeme))
			else:
				if self.error == 'strict':
					raise ParserError("syntax error (wrong type of argument)",
									  self.column, self.length)
				else:
					self.errorflag = True
		else:
			if self.error == 'strict':
				raise ParserError("unexpected end of script",
								  self.column + self.length)
			else:
				self.errorflag = True
				#sys.stderr.write("Warning: column %d: "
				#				 "unexpected end of script\n" % self.column)
				#print '\n1.', text, ', errorflag:', self.errorflag
				if string_chunks:
					arg = u'['
					if closed_flag:
						arg += ']'
					arg += string.join(string_chunks[:-1], '')
					text.append((PARSE_ERROR, (arg)))
					self.tokens.insert(0, (token, lexeme))
					self.column -= len(lexeme.encode('sjis'))
					self.length -= len(lexeme.encode('sjis'))
				#print '2. %s' % text, ', errorflag:', self.errorflag
				#print '3. token:%s, lexeme:%s' % (token, lexeme)

		if not text:
			if closed_flag:
				text.append((PARSE_ERROR, (u'[]')))
			else:
				text.append((PARSE_ERROR, (u'[')))
		#print '4. %s' % text, ', errorflag:', self.errorflag
		return tuple(text)
