# prime/engine/engine-basic.rb
# $Id: engine-basic.rb,v 1.1.1.1.2.12 2004/01/28 08:34:56 komatsu Exp $
#
# Copyright (C) 2002 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.
#

require 'prime/taiyaki'
require 'sary'
require 'prime/engine/engine'
require 'prime/prime-dict-config.rb'

$engine_class_name = 'PrimeEngineBasic'

class PrimeEngineBasic < PrimeEngine
  def initialize
    super

    @name = "Basic engine"
    @id   = "basic"
    @description = "Basic engine"

    initialize_posdict()
    @dict_file = File::join2(PRIME_DICT_DIR, 'prime-dict')
    @dict = Sary::Searcher.new(@dict_file)

    @dict_literal = Sary::Searcher.new(@dict_file, @dict_file + "_literal.ary")
    @max_candidates = 15
  end

  def initialize_posdict ()
    file_posdict  = File::join2(PRIME_DICT_DIR, 'prime-dict-pos')
    file_partdict = File::join2(PRIME_DICT_DIR, 'prime-dict-part')

    if File::exist?(file_posdict) then
      if File::exist?(file_partdict) then
        $stderr.puts("PRIME Warning:")
        $stderr.puts("    Both prime-dict-pos and prime-dict-part exist")
        $stderr.puts("    under '#{PRIME_DICT_DIR}'.")
        $stderr.puts("    The file name 'prime-dict-part' is obsolete.")
      end
      @partdict_file = file_posdict
    elsif File::exist?(file_partdict) then
      @partdict_file = file_partdict
    else
      $stderr.puts("PRIME ERROR:")
      $stderr.puts("    The POS dictionary #{file_posdict} is not found.")
      $stderr.puts("    Please install the latest prime-dict package.")
      Kernel::exit()
    end
    @partdict = Sary::Searcher.new(@partdict_file)
  end

  def search (query)
    if query.input == [""] then
      return PrimeWordList.new()
    end
    case query.method
    when :prefix then
      key = query.input
      max = 10
      word_lines = lookup_dict(key, max)
    when :exact then
      query_format = (query.pos ? "%s\t#{query.pos}\t" : "%s\t")
      key = query.input.map{|input|
        format(query_format, input)
      }
      max   = nil
      word_lines = lookup_dict(key, max)
    when :literal_prefix then
      key = query.input
      max = 10

      word_lines = lookup_dict(key, max, @dict_literal)
    when :literal_exact then
      query_format = "%s\t"
      key = query.input.map{|input|
        format(query_format, input)
      }
      max   = nil
      word_lines = lookup_dict(key, max, @dict_literal)
    else
      return PrimeWordList.new()
    end

    word_list = PrimeWordList.new()
    word_lines.each {|word_line|
      word_list << parse_word(word_line)
    }
    if query.pos == '' then
      query.input.each {|input|
        numeral = PrimeWord.new(input, input, query.pos, 10000)
        word_list.push(numeral)
      }
    end
    return word_list
  end

  def check_existence (pron, literal, pos)
    query = [pron, pos, literal].join("\t")
    if @dict.search(query) then
      return true
    else
      return false
    end
  end

  def get_pos_data (string)
    pos_data = {}
    string.increase {|pron|
      if @partdict.search(pron + "\t") then
        (_pron, *pos_list) = @partdict.get_next_context_line.chomp.split(/\t/)
        pos_data[pron] = pos_list
      end
    }
    if string =~ /^[0-9.,+-]+/ then
      matched_string = $&
      pos_data.list_push(matched_string, '')
    end
    return pos_data
  end

  private
  def parse_word(line)
    (pron, pos, literal, score, *data_list) = line.split(/\t/)
    data = {}
    data_list.each {|item|
      (key, value) = item.split(/=/)
      data[key] = value
    }
    word = PrimeWord.new(pron, literal, pos, score.to_i + 10000,
                         data['category'], data['annotation'], data_list)
    return word
  end

  def lookup_dict (query_lines, max = nil, dict = @dict)
    results = []
    return results if (max.non_nil? and max <= 0)

    if !(query_lines.empty?) and dict.multi_search(query_lines) then
      dict.sort_occurrences
      while line = dict.get_next_context_line do
	if max and (results.length > max) then
	  break
	end
	results << line.chomp
      end
    end
    return results
  end
end

