# -*- coding: utf-8 -*-
# YAMLからWXR(WordPress eXtended RSS)ファイルを生成する
#    template: sw2wxr.rxml

require 'yaml'
require 'erb'
require 'uri'
#require File.join(File.dirname(__FILE__), 'lib', 'sw_seals')
require File.join(File.dirname(__FILE__), 'lib', 'unicode')

SOURCE_DIR = File.join(File.dirname(__FILE__), '..', 'yml', 'swjz')
TARGET_DIR = File.join(File.dirname(__FILE__), '..', 'wxr')
TEMPLATE = File.join(File.dirname(__FILE__), 'lib', 'sw2wxr.rxml')
URI_BASE = 'http://dianzhuhuiws.wordpress.com'
UNIHAN_QUERY = 'http://www.unicode.org/cgi-bin/GetUnihanData.pl?codepoint='

class WXRConverter
  @@post_id = 100
  def WXRConverter.get_post_id
    @@post_id += 1
    return @@post_id
  end
  extend ERB::DefMethod
  def_erb_method('render()', TEMPLATE)
  def initialize(source_path)
#    @sw_seals = SWSeals.new
    @unicode = UnicodeUtility.new
    @title = ''
    @volume_wordnum = ''
    # contents = radical_info+
    # radical_info = radical, word_info+, part
    # word_info = word, variants*
    @contents = Array.new
    @radical = nil
    @words = nil
    @part = nil
    parse(File.open(source_path))
    @radicals_in_chapter = get_radical_associations(@contents)
#    test
  end

  #--- Dummy
  def test
    @contents.each do |radical_info|
      (radical, words, part) = radical_info
      (rword, _) = radical
      printf("\n[%s]\n", rword)
      words.each do |word|
        (char, wordid, info) = word
        printf("  %s(%s)\n", char, wordid)
#        print_info(info)
      end
    end
    printf("\n")
  end
  def print_info(info)
    info.each do |line|
      if line.has_key?('ex') then
        printf("    ex: %s\n", line['ex'])
      elsif line.has_key?('dn') then
        printf("    dn: %s\n", line['dn'])
      end
    end
  end

  def parse(file)
    YAML::parse_documents(file) do |syck|
      doc = syck.transform
      parse_chapter(doc)
      parse_content(doc)
      parse_wordnum(doc)
    end
    (_, title, _) = @title
    if title == '說文解字第十一篇上一' then
      push_radical_wordnum([])
    end
  end
  def parse_chapter(doc)
    if doc['chapter'] then
      title = doc['chapter'] 
      title =~ /說文解字[第弟]/u
      chapter_num = $~.post_match
      href = get_chapter_uri(chapter_num)
      @title = [chapter_num, title, href]
      # 例外
      if title == '說文解字第十一篇上二' then
        push_radical('水')
      end
    end
  end
  def parse_content(doc)
    if doc['content'] then
      doc['content'].each do |wordinfo|
        word = wordinfo['word']
        position = wordinfo['position']
        info = wordinfo['content']
        wwordid = wordinfo['id']
        wordid = wwordid.delete('w')
        push_radical(word) if radicalp(wordid)
        push_word(word, wordid, position, info)
#        printf("%s ", word)
      end
    end
  end
  def radicalp(wordid)
    if wordid =~ /\d{3}0010/ then
      return true
    else
      return false
    end
  end
  def push_radical(word)
    (chapter_num, _, _) = @title
    href = get_radical_uri(chapter_num, word)
    @radical = [word, href]
    @words = Array.new
    printf(">> %s\n", word)
  end
  def push_word(word, wordid, position, info)
    (chapter_num, _, _) = @title
    (radical, _) = @radical
    href = get_word_uri(chapter_num, radical, word, wordid)
    if info then
      info_string = get_word_info_string(word, wordid, position, info)
      @words.push([word, wordid, href, info_string])
    else
      @words.push([word, wordid, href, []])  # 十三篇上糸部 最後 w4672491
    end
  end
  def parse_wordnum(doc)
    if doc.has_key?('part') then
      push_radical_wordnum(doc['part'])
    elsif doc.has_key?('volume')
      push_volume_wordnum(doc['volume'])
    end
  end
  def push_radical_wordnum(doc)
    wordnum = get_info_string(doc, "\n\n")
    associations = get_associations(@words)
    @contents.push([@radical, @words, wordnum, associations])
  end
  def push_volume_wordnum(doc)
    @volume_wordnum = get_info_string(doc, "\n\n")
  end
  def get_associations(words)
    associations = "<p>\n"
    words.each do |word_info|
      (word, wordid, href, infostr) = word_info
      associations += sprintf("<a href='%s'>%s</a> ", href, word)
    end
    associations += "</p>\n"
    return associations
  end
  def get_radical_associations(contents)
    associations = " 部首<ol>\n"
    contents.each do |radical_info|
      (radical, _, _, _) = radical_info
      (chapter_num, _, _) = @title
      (rword, href) = radical
#      href = get_radical_uri(chapter_num, rword)
      printf(">>> %s %s\n", chapter_num, rword)
      associations += sprintf("<li><a href='%s'>%s</a></li>", href, rword)
    end
    associations += "</ol>\n"
  end
  def get_word_info_string(word, wordid, position, info)
    ucs = @unicode.char2ucs(word)
    query = UNIHAN_QUERY + ucs.delete('U+')
    href = sprintf("<a href='%s' target='_blank'>%s<a>", query, ucs)
    format = "word: %s\nunihan: %s\nwordid: %s\nposition: %s\n\n"
    head = sprintf(format, word, href, wordid, position)
    return get_info_string(info, head)
  end
  def get_info_string(info, info_string)
    info.each do |line|
      if line.has_key?('ex') then
        info_string += sprintf("ex: %s\n", line['ex'])
      elsif line.has_key?('dn') then
        info_string +=  sprintf("dn: %s\n", line['dn'])
      end
    end
    return info_string
  end
  def get_chapter_uri(chapter_num)
    href = URI_BASE + '/' + chapter_num + '/'
    return URI.encode(href)
  end
  def get_radical_uri(chapter_num, radical)
    href = URI_BASE + '/' + chapter_num + '/' + radical + '/'
    return URI.encode(href)
  end
  def get_word_uri(chapter_num, radical, word, wordid)
    href = URI_BASE + '/' + chapter_num + '/' + radical + '/' + wordid + ':' + word
    return URI.encode(href)
  end
end

unless ARGV.empty? then
  filename = ARGV[0]
  printf("\n%% %s\n", filename)
  converter = WXRConverter.new(File.join(SOURCE_DIR, filename))
  print converter.render()
else
#  Dir.foreach(SOURCE_DIR) do |filename|
  Dir.entries(SOURCE_DIR).sort.each do |filename|
    if filename =~ /^v.*\.yml$/ then
      if filename != 'v29.yml' and filename != 'v30.yml' then
        printf("\n> %s\n", filename)
        converter = WXRConverter.new(File.join(SOURCE_DIR, filename))
        outfile = File.basename(filename, 'yml') + 'xml'
        out = File.open(File.join(TARGET_DIR, outfile), "w")
        out.print converter.render()
      end
    end
  end
end
