# Copyright (C) 2005  Network Applied Communication Laboratory Co., Ltd.
#
# This file is part of Rast.
# See the file COPYING for redistribution information.
#

# -*- mode: Ruby; coding: euc-japan; -*-

require "test/unit"

require "rast_test"

module Rast
  class Encoding
    class MeCabEucJpTest < Test::Unit::TestCase
      def setup
        @encoding = Encoding["mecab_euc_jp"]
      end

      def test_register_tokenize
        result = []
        @encoding.register_tokenize("ruby is great.") do |ngram, pos, complete|
          result.push([ngram, pos, complete])
        end
        assert_equal(["ruby", 0, true], result[0])
        assert_equal([" ", 4, true], result[1])
        assert_equal(["is", 5, true], result[2])
        assert_equal([" ", 7, true], result[3])
        assert_equal(["great", 8, true], result[4])
        assert_equal([".", 13, true], result[5])
        assert_equal(6, result.length)

        result = []
        @encoding.register_tokenize("") do
          |ngram, pos, complete|
          result.push([ngram, pos, complete])
        end
        assert_equal(["", 0, true], result[0])
        assert_equal(["", 2, true], result[1])
        assert_equal(2, result.length)
      end

      def test_normalize_text
        assert_equal(" abc ", @encoding.normalize_text("  abc  "))
        assert_equal(" abc abc", @encoding.normalize_text(" abc\nabc"))
        assert_equal("a b c d e ",
                     @encoding.normalize_text("a\n \t b\nc\r\rd \ne "))

        s = @encoding.normalize_text("£")
        assert_equal("012ABC", s)
        s = @encoding.normalize_text("")
        assert_equal("", s)
        s = @encoding.normalize_text("")
        assert_equal("", s)
        s = @encoding.normalize_text("Î")
        assert_equal("ĥƥ", s)
        s = @encoding.normalize_text("Î")
        assert_equal("ĥƥ", s)
        s = @encoding.normalize_text("ގގގގ")
        assert_equal("", s)
        s = @encoding.normalize_text("ގގގÎގĎ")
        assert_equal("¥ťǥ", s)
        s = @encoding.normalize_text("ʎߎˎߎ̎ߎ͎ߎΎ")
        assert_equal("ѥԥץڥ", s)
        s = @encoding.normalize_text("")
        assert_equal("", s)
      end

      def test_normalize_chars
        s = @encoding.normalize_chars("ABC")
        assert_equal("abc", s)
      end
    end
  end
end
