#
#       read_xml.py
#       simple xml parser to read a parameter file.
#
#       2004.3.29
#
#   Copyright (C) Hidetoshi Nakano
#
#   Please use this program at your own risk.
#   Without any warranty.
# 
#
##############################
import os
import string

comment_start = '<!--'
comment_end   = '-->'

def set_xml_start(title):
    return '<%s>' % title.strip()

def set_xml_end(title):
    return '</%s>' % title.strip()

def get_xml_title(dat):
    data = string.strip(dat)
    if data.startswith('</'):
        start = 2
    elif data.startswith('<'):
        start = 1
    else:
        return None
    ret = data.find('>',start)
    if ret > start:
        return data[start:ret]
    else:
        print "syntax error. (%s)" % dat
        return None

def check_xml_comment(dat):
    ret = string.find(dat,comment_start)
    if ret >= 0:
        if dat.endswith(comment_end):
            if ret >= 1:
                return string.strip(dat[:ret])
            else:
                return None
        else:  #in_comment = 1
            return comment_end
    else:
        return dat

class XMLdata:
    def __init__(self,title):
        self.title = title
        self.start_tag = set_xml_start(title)
        self.end_tag   = set_xml_end(title)
        self.data  = ''
        self.child = {}

class XMLtree:
    stack_init = 0
    stack_push = 1
    stack_pop  = 2

    def __init__(self,title):
        self.title = title
        self.subtitle = None
        self.set_stack(self.stack_init,title)

    def setup_data(self):
        if len(self.stack) != 1:
            print "stack set error. stack len(%d)" % len(self.stack)
            ret = -1
        else:
            if self.stack[0].data or len(self.stack[0].child) >= 1:
                return self.stack[0]
            else:
                print "stack has no data."
                return -1

    def set_stack(self,flag,name):
        if flag == self.stack_init:
            self.stack = []
            self.stack.append(XMLdata(name))

        elif flag == self.stack_push:
            if name != self.title:
                self.stack.append(XMLdata(name))

        elif flag == self.stack_pop:
            stack_len = len(self.stack)
            if stack_len  == 0:
                print "stack is None.(%s)" % (name)
                return  set_xml_end(self.title)
            elif stack_len == 1:
               return None

            pre_stack = self.stack.pop()
            if len(pre_stack.child) >= 1:
                self.stack[-1].child[pre_stack.title] = pre_stack.child
            else:
                self.stack[-1].child[pre_stack.title] = pre_stack.data
        return None

    def set_end(self,dat):
        return self.set_stack(self.stack_pop,dat)

    def set_includes(self,subline):
        while len(subline) :
            if subline.startswith('<'):
                tag = get_xml_title(subline)
                if tag is None:
                    return -1
                if subline.startswith('</'):
                    self.set_stack(self.stack_pop,tag)
                    subline = string.lstrip(subline[len(set_xml_end(tag)):])
                else:
                    self.set_stack(self.stack_push,tag)
                    subline = string.lstrip(subline[len(set_xml_start(tag)):])
            else:
                line = string.find(subline,self.stack[-1].end_tag)
                self.stack[-1].data += subline[:line]
                subline = subline[line:]
        return None

    def set_start(self,tag,dat):
        self.set_stack(self.stack_push,tag)

        if dat == self.stack[-1].start_tag:
            return  self.stack[-1].end_tag # I expect next tag

        subline = dat[len(self.stack[-1].start_tag):]
        if subline == self.stack[-1].end_tag:
            self.set_stack(self.stack_pop,tag)
            return self.stack[-1].end_tag

        subdata = string.lstrip(subline)
        if subdata.startswith('<'):
            tag = get_xml_title(subdata)
            if tag is None:
                return -1
            if subdata.endswith(self.stack[-1].end_tag):
                ret = self.set_includes(subdata)
                if ret == -1:
                    retirn -1
                return self.stack[-1].end_tag
            else:
                return self.set_start(tag,subdata)
        else:
            if subline.endswith(self.stack[-1].end_tag):
                self.stack[-1].data = subline[:- len(self.stack[-1].end_tag)]
                self.set_stack(self.stack_pop,self.stack[-1].title)
            else:
                self.stack[-1].data = subline
                return  self.stack[-1].end_tag

    def set_line(self,dat,line):
        if dat.endswith(self.stack[-1].end_tag):
            self.stack[-1].data += dat[:- len(self.stack[-1].end_tag)]
            self.set_stack(self.stack_pop,self.stack[-1].title)
            return self.stack[-1].end_tag
        else:
            self.stack[-1].data += string.lstrip(line)
            return self.stack[-1].end_tag

class Set_XML:
    def __init__(self,filename):
        if os.path.isfile(filename):
            self.filename = filename
            self.error = ''
        else:
            self.filename = None
            self.error = "(%s) can't be found." % filename
        self.xmltree = []

    def get_error(self):
        return self.error

    def print_xml_data(self):
        if self.xmltree is None:
            print self.get_error()
            return None
        for xml in self.xmltree:
            if len(xml.child) >= 1:
                for key in xml.child:
                   print key,xml.child[key]
            else:
                print "dat",xml.data
            print ""

    def get_xml_count(self):
        return len(self.xmltree)

    ## read file
    def read_file(self,filename):
        try:
            fp = open(filename,'r')
            data = fp.readlines()
            fp.close()
        except:
            data = None
            self.error = "I can't read %s." % filename
        return data

    ## read XML file
    def get_syntax(self,title):
        if title is None or self.filename is None:
            return None

        data = self.read_file(self.filename)
        if data is None:
            return None

        start_tag = set_xml_start(title)
        in_data = start_tag
        in_comment = 0
        xmldata = None

        for line in data:
            dat = string.strip(line)
            # comment
            if in_comment:
                if dat and (dat.startswith(comment_end) or dat.endswith(comment_end)):
                    in_comment = 0
                continue
            if dat:
                dat = check_xml_comment(dat)
                if dat is None:
                    continue
                elif dat == comment_end:
                    in_comment = 1
                    continue

            # start
            if dat and in_data == start_tag:
                if dat.startswith(in_data):
                    if xmldata:
                        xmldata.set_stack(xmldata.stack_init,title)
                    xmldata = XMLtree(title)
                    in_data = None
                    continue
            if xmldata is None:
                continue

            #print "line",dat,in_data
            if dat and dat.startswith('<'):
                tag = get_xml_title(dat)
                if tag is None:
                    self.error = "syntax error. (%s)" % dat
                    return None

                if dat.startswith('</'):  # sub end
                    if tag == title:
                        ret = xmldata.setup_data()
                        if ret == -1:
                            return None
                        elif isinstance(ret,XMLdata):
                            self.xmltree.append(ret)
                        xmldata = None
                        in_data = start_tag
                    else:
                        in_data = xmldata.set_end(tag)
                elif dat.startswith('<'): # sub start
                    in_data = xmldata.set_start(tag,dat)

            else: # content
                in_data = xmldata.set_line(dat,line)

            if in_data == -1:
                self.error = "syntax error. (%s)" % dat
                return None

        if xmldata:
            self.error = "illegal end. (%s)" % (in_data)
            return None

        if len(self.xmltree) >= 1:
            return self.xmltree
        else:
            self.error = "no xml data."
            return None

if __name__ == '__main__':
    import sys
    if (len(sys.argv) < 2):
        print "usage:python read_xml.py xml-file"
        sys.exit(1)

    xmlparse = Set_XML(sys.argv[1])
    dat = xmlparse.get_syntax("replace") # top document
    if dat is None:
        print xmlparse.get_error()
    else:
        xmlparse.print_xml_data()

