#!/usr/bin/perl --
#
#	Anthy: mkucdic.pl : Converter, from a shrinked list to an expanded list.
#		Sat,31 Oct,2009, Tue,03 Oct,2009 - Wed,04 Oct,2009
#		Wed,11 Nov,2009
#		Fri,04 Dec,2009 by vagus (#KY*)
#		Fri,29 Jan,2010
#		Thu,20 Oct,2011
#		Copyright(C)2009-2011 G-HAL, vagus
#

# DO NOT set an encoding, because we do not know source data encoding in here.
#use encoding "EUC-JP";
use strict;
use POSIX;


# ȡֹ
use constant TOKENS_FLAG            => 0;
use constant TOKENS_PRIO            => 1;
use constant TOKENS_YOMI_SEG1_INDEP => 2;
use constant TOKENS_YOMI_SEG1_DEP   => 3;
use constant TOKENS_YOMI_SEG2_INDEP => 4;
use constant TOKENS_YOMI_SEG2_DEP   => 5;
use constant TOKENS_CAND_SEG1_INDEP => 6;
use constant TOKENS_CAND_SEG1_DEP   => 7;
use constant TOKENS_CAND_SEG2_INDEP => 8;
use constant TOKENS_CAND_SEG2_DEP   => 9;
use constant TOKENS_WT_SEG1_INDEP   => 10;
use constant TOKENS_WT_SEG1_DEP     => 11;
use constant TOKENS_WT_SEG2_INDEP   => 12;
use constant TOKENS_WT_SEG2_DEP     => 13;
use constant TOKENS_OPTIONS         => 14;

# ǥХå
my $line_number = 0;
my $line_str = "";



# ʻȽ
sub is_noun {
	if (@_[0] =~ /^#(T|T[0-9]{2}|CN|JN|JNM|JNS|KK)$/) {
		return 1;
	}
	return 0;
}
sub is_verb {
	if (@_[0] =~ /^#(kx[a-z]+|sx[a-z]+|[BCGKLMNRSTUW]5[r]{0,1}|KS[r]{0,1}|SX|ZX|aru)$/) {
		return 1;
	}
	return 0;
}
sub is_adjective {
	if (@_[0] =~ /^#(KY|KY(E|I|T|U|ii|me|mi|mime|n|na|s|y))$/) {
		return 1;
	}
	return 0;
}



# ȡսˤ
sub make_reverse {
	my @tokens = @_;
	@tokens[TOKENS_YOMI_SEG1_INDEP] = @_[TOKENS_YOMI_SEG2_INDEP];
	@tokens[TOKENS_YOMI_SEG1_DEP  ] = @_[TOKENS_YOMI_SEG2_DEP  ];
	@tokens[TOKENS_YOMI_SEG2_INDEP] = @_[TOKENS_YOMI_SEG1_INDEP];
	@tokens[TOKENS_YOMI_SEG2_DEP  ] = @_[TOKENS_YOMI_SEG1_DEP  ];
	@tokens[TOKENS_CAND_SEG1_INDEP] = @_[TOKENS_CAND_SEG2_INDEP];
	@tokens[TOKENS_CAND_SEG1_DEP  ] = @_[TOKENS_CAND_SEG2_DEP  ];
	@tokens[TOKENS_CAND_SEG2_INDEP] = @_[TOKENS_CAND_SEG1_INDEP];
	@tokens[TOKENS_CAND_SEG2_DEP  ] = @_[TOKENS_CAND_SEG1_DEP  ];
	@tokens[TOKENS_WT_SEG1_INDEP  ] = @_[TOKENS_WT_SEG2_INDEP  ];
	@tokens[TOKENS_WT_SEG1_DEP    ] = @_[TOKENS_WT_SEG2_DEP    ];
	@tokens[TOKENS_WT_SEG2_INDEP  ] = @_[TOKENS_WT_SEG1_INDEP  ];
	@tokens[TOKENS_WT_SEG2_DEP    ] = @_[TOKENS_WT_SEG1_DEP    ];
	return @tokens;
}


# ս
sub output_reverse {
	my @tokens = make_reverse( @_ );
	parse_line( @tokens );
	return;
}


# սޥɡͥ١
sub cmd_make_reverse {
	my @tokens = @_;
	parse_line( @tokens );
	@tokens[TOKENS_PRIO] = floor( @tokens[TOKENS_PRIO] / 2 );
	output_reverse( @tokens );
	return;
}


# սޥɡͥ١
sub cmd_make_reverse_strong {
	my @tokens = @_;
	parse_line( @tokens );
	output_reverse( @tokens );
	return;
}


# սޥɡ礬ͥ١塢ս礬ͥ١
sub cmd_make_inverse {
	my @tokens = @_;
	output_reverse( @tokens );
	@tokens[TOKENS_PRIO] = floor( @tokens[TOKENS_PRIO] / 2 );
	parse_line( @tokens );
	return;
}


# սޥɡʷƻ̾ڥ
sub cmd_make_reverse_a_n_adjustment {
	my @tokens = @_;
	my $err = 1;
	if (is_adjective(@tokens[TOKENS_WT_SEG1_INDEP])) {
		$err = 2;
		if (is_noun(@tokens[TOKENS_WT_SEG2_INDEP])) {
			$err = 0;
		}
	} elsif (is_noun(@tokens[TOKENS_WT_SEG1_INDEP])) {
		$err = 3;
		if (is_adjective(@tokens[TOKENS_WT_SEG2_INDEP])) {
			$err = 0;
			@tokens = make_reverse( @_ );
		}
	}
	if ($err) {
		print STDERR "Line:$line_number, Unknown classification(s). ($err) '" . @tokens[TOKENS_WT_SEG1_INDEP] . "', '" . @tokens[TOKENS_WT_SEG2_INDEP] . "'\n";
		return;
	}

	@tokens[TOKENS_WT_SEG1_DEP    ] = "HaC*Se";
	@tokens[TOKENS_WT_SEG2_DEP    ] = "HnC_S*";
	parse_line( @tokens );

	@tokens = make_reverse( @tokens );
	@tokens[TOKENS_WT_SEG1_DEP    ] = "HnC_Sk";
	@tokens[TOKENS_WT_SEG2_DEP    ] = "HaC*Sk";
	parse_line( @tokens );

	@tokens[TOKENS_WT_SEG1_DEP    ] = "HnC_Sy";
	@tokens[TOKENS_WT_SEG2_DEP    ] = "HaC*Sk";
	parse_line( @tokens );

	@tokens[TOKENS_WT_SEG1_DEP    ] = "HnC_Sy";
	@tokens[TOKENS_WT_SEG2_DEP    ] = "HaC*Se";
	parse_line( @tokens );

	@tokens[TOKENS_PRIO] = floor( @tokens[TOKENS_PRIO] / 2 );
	@tokens[TOKENS_WT_SEG1_DEP    ] = "HnC_Sk";
	@tokens[TOKENS_WT_SEG2_DEP    ] = "HaC*Se";
	parse_line( @tokens );

	return;
}


# սޥɡ̾ưڥ
sub cmd_make_reverse_n_v_adjustment {
	my @tokens = @_;
	my $err = 1;
	if (is_verb(@tokens[TOKENS_WT_SEG1_INDEP])) {
		$err = 2;
		if (is_noun(@tokens[TOKENS_WT_SEG2_INDEP])) {
			$err = 0;
		}
	} elsif (is_noun(@tokens[TOKENS_WT_SEG1_INDEP])) {
		$err = 3;
		if (is_verb(@tokens[TOKENS_WT_SEG2_INDEP])) {
			$err = 0;
			@tokens = make_reverse( @_ );
		}
	}
	if ($err) {
		print STDERR "Line:$line_number, Unknown classification(s). ($err) '" . @tokens[TOKENS_WT_SEG1_INDEP] . "', '" . @tokens[TOKENS_WT_SEG2_INDEP] . "'\n";
		return;
	}

	@tokens[TOKENS_WT_SEG1_DEP    ] = "HvC*Se";
	@tokens[TOKENS_WT_SEG2_DEP    ] = "HnC_S*";
	parse_line( @tokens );

	@tokens = make_reverse( @tokens );
	@tokens[TOKENS_WT_SEG1_DEP    ] = "HnC_Sk";
	@tokens[TOKENS_WT_SEG2_DEP    ] = "HvC*Sk";
	parse_line( @tokens );

	@tokens[TOKENS_WT_SEG1_DEP    ] = "HnC_Sy";
	@tokens[TOKENS_WT_SEG2_DEP    ] = "HvC*Sk";
	parse_line( @tokens );

	@tokens[TOKENS_PRIO] = floor( @tokens[TOKENS_PRIO] / 2 );
	@tokens[TOKENS_WT_SEG1_DEP    ] = "HnC_Sk";
	@tokens[TOKENS_WT_SEG2_DEP    ] = "HvC*Se";
	parse_line( @tokens );

	@tokens[TOKENS_PRIO] = floor( @tokens[TOKENS_PRIO] / 2 );
	@tokens[TOKENS_WT_SEG1_DEP    ] = "HnC_Sy";
	@tokens[TOKENS_WT_SEG2_DEP    ] = "HvC*Se";
	parse_line( @tokens );

	return;
}



# ʣʬե饰
sub parse_compound {
	my @tokens_base = @_;
	if (@tokens_base[4] =~ /^$/) {
		print STDERR "Line:$line_number, Illigal compound line:'" . $line_str . "'.\n";
		return;
	}

	my @tokens;
	@tokens[TOKENS_FLAG]    = "n";				@tokens_base[0] = "";
	@tokens[TOKENS_PRIO]    = @tokens_base[1];	@tokens_base[1] = "";
	my $yomi_str            = @tokens_base[2];	@tokens_base[2] = "";
	my $cand_str            = @tokens_base[3];	@tokens_base[3] = "";
	my $wt_str              = @tokens_base[4];	@tokens_base[4] = "";
	@tokens[TOKENS_OPTIONS] = join( " ", @tokens_base );

	my $yomi_count = ( () = $yomi_str =~ /[\|\+]/g );
	my $cand_count = ( () = $cand_str =~ /[\|\+]/g );
	my $wt_count   = ( () = $wt_str   =~ /[\|\+]/g );
	if (($yomi_count != $cand_count)
		|| ($yomi_count != $wt_count)
	) {
		print STDERR "Line:$line_number, A count of phrases was mismatch($yomi_count,$cand_count,$wt_count), at compound line:'" . $line_str . "'.\n";
		return;
	}
	if ($yomi_count < 5) {
		print STDERR "Line:$line_number, A count of phrases was too few($yomi_count), at compound line:'" . $line_str . "'.\n";
		return;
	}
	my @yomi_lst = split( /[\|\+]/, $yomi_str );
	my @cand_lst = split( /[\|\+]/, $cand_str );
	my @wt_lst   = split( /[\|\+]/, $wt_str );

	print "# ". $yomi_str ."\t". $cand_str ."\t". $wt_str ."\n";
	my $ptr = 1;
	@tokens[TOKENS_YOMI_SEG2_INDEP] = @yomi_lst[$ptr + 0];
	@tokens[TOKENS_YOMI_SEG2_DEP  ] = @yomi_lst[$ptr + 1];
	@tokens[TOKENS_CAND_SEG2_INDEP] = @cand_lst[$ptr + 0];
	@tokens[TOKENS_CAND_SEG2_DEP  ] = @cand_lst[$ptr + 1];
	@tokens[TOKENS_WT_SEG2_INDEP  ] = @wt_lst[$ptr + 0];
	@tokens[TOKENS_WT_SEG2_DEP    ] = @wt_lst[$ptr + 1];
	my $count = $yomi_count - 1;
	while (4 <= $count) {
		@tokens = make_reverse( @tokens );
		$ptr += 2;
		$count -= 2;
		@tokens[TOKENS_YOMI_SEG2_INDEP] = @yomi_lst[$ptr + 0];
		@tokens[TOKENS_YOMI_SEG2_DEP  ] = @yomi_lst[$ptr + 1];
		@tokens[TOKENS_CAND_SEG2_INDEP] = @cand_lst[$ptr + 0];
		@tokens[TOKENS_CAND_SEG2_DEP  ] = @cand_lst[$ptr + 1];
		@tokens[TOKENS_WT_SEG2_INDEP  ] = @wt_lst[$ptr + 0];
		@tokens[TOKENS_WT_SEG2_DEP    ] = @wt_lst[$ptr + 1];

		parse_line( @tokens );
	}
	return;
}



# ԥѡ
sub parse_line {
	my @tokens = @_;
	if (@tokens[TOKENS_OPTIONS] =~ /^(.*)(\\[\w]+)(.*?)$/) {
		my $cmd = $2;
		@tokens[TOKENS_OPTIONS] = $1.$3;

		# ޥޥɼ¹
		if ($cmd =~ /^\\make_reverse$/) {
			cmd_make_reverse( @tokens );
		} elsif ($cmd =~ /^\\make_reverse_strong$/) {
			cmd_make_reverse_strong( @tokens );
		} elsif ($cmd =~ /^\\make_inverse$/) {
			cmd_make_inverse( @tokens );
		} elsif ($cmd =~ /^\\make_reverse_a_n_adjustment$/) {
			cmd_make_reverse_a_n_adjustment( @tokens );
		} elsif ($cmd =~ /^\\make_reverse_n_v_adjustment$/) {
			cmd_make_reverse_n_v_adjustment( @tokens );
		} else {
			print STDERR "Line:$line_number, Unknown command:'" . $cmd . "'.\n";
		}

	} else {

		my $head_str      = "@tokens[TOKENS_FLAG] @tokens[TOKENS_PRIO]";
		my $lefthalf_str  = "@tokens[TOKENS_YOMI_SEG1_INDEP] @tokens[TOKENS_YOMI_SEG1_DEP] @tokens[TOKENS_YOMI_SEG2_INDEP] @tokens[TOKENS_YOMI_SEG2_DEP]";
		my $righthalf_str = "@tokens[TOKENS_CAND_SEG1_INDEP] @tokens[TOKENS_CAND_SEG1_DEP] @tokens[TOKENS_CAND_SEG2_INDEP] @tokens[TOKENS_CAND_SEG2_DEP]";
		my $wt_str        = "@tokens[TOKENS_WT_SEG1_INDEP] @tokens[TOKENS_WT_SEG1_DEP] @tokens[TOKENS_WT_SEG2_INDEP] @tokens[TOKENS_WT_SEG2_DEP]";
		my $tail_str      = "@tokens[TOKENS_OPTIONS]";

		my $tab1_len = (40 - (int((8 + length($lefthalf_str)) / 8) * 8)) / 8;
		if ($tab1_len < 0) {
			$tab1_len = 0;
		}
		my $tab2_len = (40 - (int((8 + length($righthalf_str)) / 8) * 8)) / 8;
		if ($tab2_len < 0) {
			$tab2_len = 0;
		}
		my $tab3_len = (24 - (int((8 + length($wt_str)) / 8) * 8)) / 8;
		if ($tab3_len < 0) {
			$tab3_len = 0;
		}
		print $head_str ."\t". $lefthalf_str . ("\t" x $tab1_len) . "\t" . $righthalf_str . ("\t" x $tab2_len) . "\t" . $wt_str . ("\t" x $tab3_len) . "\t" . $tail_str . "\n";
	}
	return;
}

{
	while (<STDIN>) {
		chomp( $_ );
		my $input = $_;
		++$line_number;
		$line_str = $input;

		if ($input =~ /^#/i) {
			print $input ."\n";
		} else {
			my @tokens = $input =~ /^(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(.*)$/;
			if (@tokens[TOKENS_OPTIONS] =~ /^$/) {
				@tokens = split( /\s+/, $input );
				if (@tokens[TOKENS_FLAG] =~ /\\compound/){
					parse_compound( @tokens );
				}else {
					print $input ."\n";
				}
			} else {
				parse_line( @tokens );
			}
		}
	}

	exit 0;
}

__END__
# [ End of File ]
