# dictformat-wordnet.rb: Converter module for cntlist by WordNet.
# See also: http://www.itri.bton.ac.uk/~Adam.Kilgarriff/bnc-readme.html
# $Id: dictformat-wordnet.rb,v 1.1.2.1 2005/03/07 08:52:00 komatsu Exp $
#
# Copyright (C) 2003 Hiroyuki Komatsu <komatsu@taiyaki.org>
#     All rights reserved.
#     This is free software with ABSOLUTELY NO WARRANTY.
#
# You can redistribute it and/or modify it under the terms of 
# the GNU General Public License version 2.

## This module is design for the file wordnet-2.0g/dict/lexfiles/cntlist.

require 'prime/prime-config'
require 'prime/makedict/dictformat'

class DictFormatWordNet < DictFormat
  
  def initialize (is_interactive = true)
    super(is_interactive)
    @pos_table = {
      "1" => "noun",
      "2" => "verb",
      "3" => "adjective",
      "4" => "adverb",
      "5" => "adjective satellite",
    }
  end

  def parse (line)
    line.chomp!
    words = []
    (frequency, sense_key, sense_number) = line.split(/ /)
    (literal, pos_id, lex_filenum, lex_id, head_word, head_id) = 
      sense_key.split(/[%:]/)
    pos = @pos_table[pos_id]
    if pos != "adjective satellite" then
      literal.gsub!("_", " ")
      score = frequency.to_i / 5
      words << [literal, pos, literal, score]
    end
    return words
  end
end
