# Unicodeを扱うライブラリ
#  コード形式: UTF-8

UCS_FORMAT = /^U\+[0-9a-fA-F]+$/
CJK_UNIFIED = [0x4E00, 0x9FFF]
#CJK_UNIFIED_EXT_A = [0x3400, 0x4DBF]
CJK_UNIFIED_EXT_A = [0x3400, 0x4DB5]
#CJK_UNIFIED_EXT_B = [0x20000, 0x2A6DF]
CJK_UNIFIED_EXT_B = [0x20000, 0x2A6D6]
#CJK_UNIFIED_EXT_C = [0x2A700, 0x2B73F]
CJK_UNIFIED_EXT_C = [0x2A700, 0x2B734]
#CJK_UNIFIED_EXT_D = [0x2B740, 0x2B81F]
CJK_UNIFIED_EXT_D = [0x2B740, 0x2B81D]

#EXT_REGEXP = /[㐀-䷿𠀀-𪛖𪜀-𫜴𫝀-𫠝])/u
EXT_REGEXP = /([𠀀-𪛖𪜀-𫜴𫝀-𫠝])/u

# 条件：codespaceの順序に並べる
CODESPACE = [CJK_UNIFIED_EXT_A,
             CJK_UNIFIED,
             CJK_UNIFIED_EXT_B,
             CJK_UNIFIED_EXT_C,
            ]
EXTENSION = [CJK_UNIFIED_EXT_B,
             CJK_UNIFIED_EXT_C,
             CJK_UNIFIED_EXT_D,
            ]

class UnicodeUtility
#  def initialize
    # warning: flags and encoding ignored
    # @extension_regexp = Regexp.new(EXT_REGEXP, nil, 'UTF-8')
#  end
  def nextCodepoint(codepoint)
    CODESPACE.each do |scode, ecode|
      return scode if codepoint < scode
      if scode<= codepoint and codepoint < ecode then
        return codepoint + 1
      elsif codepoint == ecode then
        codepoint = ecode
      end
    end
    return nil
  end
  def ucs2char(ucs)
    return nil unless ucs =~ UCS_FORMAT
    code = ucs.slice(2, ucs.size)
    [code.hex].pack("U*")
  end
  def char2ucs(char)
    # TODO: Check char in codespace
    # Unicode codespace is a range of integers from 0 to 10FFFF_hex
    return format("U+%04X", char.unpack("U")[0])
  end
  def codepoint2ucs(codepoint)
    return format("U+%04X", codepoint)
  end
  def ucs2codepoint(ucs)
#    return niln unless ucs =~ UCS_FORMAT
    code = ucs.slice(2, ucs.size)
#    [code.hex].pack("U*")
    return code.hex
  end
  def char2codepoint(char)
    return char.unpack("U")[0]
  end
  def extension_regexp
    # return @extension_regexp
    return EXT_REGEXP
  end
  def extension?(char)
    # CJI Unified Extension(CJK_UNIFIED_EXT_Aは除く)か？
    codepoint = char2codepoint(char)
    result = false
    EXTENSION.each do |region|
      (start, stop) = region
      if start <= codepoint and codepoint <= stop then
        result = true
      end
    end
    return result
  end
end
