# 反切の抽出
# TODO: 反切下字のリスト(韻別)
# TODO: 反切上字のリストと系聯
# GOAL: 反切による小韻の検索、声母韻目の表示

# require 'yaml'
require 'rexml/parsers/sax2parser'
require 'rexml/sax2listener'

SOURCE_DIR = File.join(File.dirname(__FILE__), '..', 'data')
TARGET_DIR = File.join(File.dirname(__FILE__), '..', 'tables')
SBGY = File.new(File.join(SOURCE_DIR, 'sbgy.xml'))

class FanqieCollector
  include REXML::SAX2Listener
  def initialize
    @out = File.open(File.join(TARGET_DIR, 'gyfanqie.txt'), "w")
    @in_voice = false
    @in_word = false
    @in_fanqie = false
    @word_count = 0
  end
  def start_element(uri, localname, qname, attributes)
    if qname == 'rhyme' then
      printf("%s:\n", attributes['id'])
    elsif qname == 'voice_part' then
      @in_voice = true
    elsif qname == 'word_head' then
      @word_count += 1
      @in_word = true if @word_count == 1
    elsif qname == 'fanqie' then
      @in_fanqie = true if @in_voice
    end
  end
  def characters(text)
    if @in_word then
      printf(" - %s: ", text) 
      @in_word = false
    end
    printf("%s\n", text) if @in_fanqie
  end
  def end_element(uri, localname, qname)
    if qname == 'voice_part' then
      @in_voice = false
      @word_count = 0
    elsif qname == 'fanqie' then
      @in_fanqie = false
    end
  end
end

parser = REXML::Parsers::SAX2Parser.new SBGY
listener = FanqieCollector.new
parser.listen(listener)
parser.parse
