/*
 * LibSKK, a tiny Library to emulate SKK (Simple Kana Kanji Conversion)
 * 
 * Copyright (C) 2002 Motonobu Ichimura <famao@kondara.org>
 *
 * All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, and/or sell copies of the Software, and to permit persons
 * to whom the Software is furnished to do so, provided that the above
 * copyright notice(s) and this permission notice appear in all copies of
 * the Software and that both the above copyright notice(s) and this
 * permission notice appear in supporting documentation.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
 * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
 * HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
 * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
 * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * Except as contained in this notice, the name of a copyright holder
 * shall not be used in advertising or otherwise to promote the sale, use
 * or other dealings in this Software without prior written authorization
 * of the copyright holder.
 *
 */

/* $Id: skkutils.c,v 1.3.2.9 2003/02/12 11:30:15 famao Exp $ */

/* vi:set ts=4 sw=4: */


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <glib.h>
#include <iconv.h>

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "skkutils.h"

/**
 * skk_utils_last_charbytes:
 * @str: string
 *
 * calculate last character's bytes.
 * we can't use wide char. (yes, we can't use locale model)
 * so prepare EUC-JP specific function.
 *
 * Return value: bytes of last character
 **/
gint
skk_utils_last_charbytes (const gchar *str)
{
	gint len;
	if (!str)
		return 0;
	len = strlen (str);
	if (len < 2)
		return len;
	if ((str[len - 1] & 0xff) < 0x80) {
		return 1;
	} else {
		if (len > 2) {
			if ((str[len - 3] & 0xff) == 0x8f) {
				return 3; /* 3 bytes */
			}
		}
	}
	return 2;
}

/**
 * skk_utils_last_charbytes_nth_all:
 * @str: string
 *
 * calculate total bytes of nth character.
 *
 * Return value: amount of bytes.
 **/
gint
skk_utils_last_charbytes_nth_all (const gchar *str, guint nth)
{
	gint len;
	gint charlen;
	gint ret = 0;
	gint i;
	if (!str)
		return 0;
	charlen = skk_utils_strlen (str);
	for (i = 0; i < nth ; charlen--, i++) {
		len = skk_utils_charbytes_nth (str, charlen);
		ret += len;
	}
	return ret;	
}

/**
 * skk_utils_charbytes:
 * @str: string
 *
 * calculate first character's bytes.
 * we can't use wide char. 
 * so prepare EUC-JP specific function.
 *
 * Return value: first character's bytes
 **/
gint
skk_utils_charbytes (const gchar *str)
{
	gint len;
	if (!str)
		return 0;
	len = strlen (str);
	if (len < 2)
		return len;
	if ((str[0] & 0xff) < 0x80) {
		return 1;
	} else if ((str[0] & 0xff) == 0x8f) {
		return 3;
	} else {
		return 2;
	}
	g_assert (0);
}

/**
 * skk_utils_charbytes_nth_all:
 * @str: string
 *
 * Return value:
 **/
gint
skk_utils_charbytes_nth_all (const gchar *str, guint nth)
{
	const gchar *copy;
	gint i;
	gint len;
	gint ret = 0;
	if (!str)
		return 0;
	copy = str;
	for (i = 0; i < nth; i++) {
		len = skk_utils_charbytes (copy);
		copy += len;
		ret += len;
	}
	return ret;
}

/**
 * skk_utils_charbytes_nth:
 * @str: string
 * @nth: 
 *
 * calculate nth character's bytes.
 *
 * Return value: nth character's bytes.
 **/
gint
skk_utils_charbytes_nth (const gchar *str, guint nth)
{
	const gchar *copy;
	gint len = 0;
	gint i;
	if (!str)
		return 0;
	copy = str;
	for (i = 0; i < nth; i++) {
		len = skk_utils_charbytes (copy);
		copy += len;
	}
	return len;
}

/**
 * skk_utils_is_hiragana:
 * @str: string
 *
 * check whether str's first character is HIRAGANA or not.
 *
 * Return value: TRUE if str is hiragana.
 **/
gboolean
skk_utils_is_hiragana (const gchar *str)
{
	gint len;
	if (!str)
		return FALSE;
	len = skk_utils_charbytes (str);
	if (len != 2)
		return FALSE;
	if (((str[0] & 0xff) == 0xa4)) {
		return TRUE;
	}
	return FALSE;
}

/**
 * skk_utils_is_katakana:
 * @str: string
 *
 * check whether str's first character is KATAKANA or not.
 *
 * Return value: TRUE if katakana.
 **/ 
gboolean
skk_utils_is_katakana (const gchar *str)
{
	gint len;
	if (!str)
		return FALSE;
	len = skk_utils_charbytes (str);
	if (len != 2)
		return FALSE;
	if (((str[0] & 0xff) == 0xa5)) {
		return TRUE;
	}
	return FALSE;
}

/**
 * skk_utils_is_halfwidth_katakana:
 * @str: string
 *
 * check whether str's first character is HALFWIDTH KATAKANA or not.
 *
 * Return value: TRUE if halfwidth katakana.
 **/ 
gboolean
skk_utils_is_halfwidth_katakana (const gchar *str)
{
	gint len;
	if (!str)
		return FALSE;
	len = skk_utils_charbytes (str);
	if (len != 2)
		return FALSE;
	if (((str[0] & 0xff) == 0x8e)) {
		return TRUE;
	}
	return FALSE;
}

/**
 * skk_utils_is_kana:
 * @str: string
 *
 * check whether str's first character is KANA or not.
 * 
 * NOTICE: KANA is 
 *          HIRAGANA
 *          KATAKANA
 *          HALFWIDTH KATAKANA
 *    
 * Return value: True if kana.
 **/ 
gboolean
skk_utils_is_kana (const gchar *str)
{
	gint len;
	if (!str)
		return FALSE;
	len = skk_utils_charbytes (str);
	if (len != 2)
		return FALSE;
	if (((str[0] & 0xff) == 0xa4) ||
			((str[0] & 0xff) == 0xa5) ||
			((str[0] & 0xff) == 0x8e)) {
		return TRUE;
	}
	return FALSE;
}

/**
 * skk_utils_get_char:
 * @str: string
 * @with_dakuon: TRUE you want to return dakuon
 *
 * get first character from str.
 * when @with_dakuon is set, check second character
 * and, if it is dakuon, return value includes it.
 *
 *  ex)
 *     dakuon is set -> 
 *                not set -> 
 *
 * Return value: first character. caller up to free after using it.
 **/ 
gchar *
skk_utils_get_char (const gchar *str, gboolean with_dakuon)
{
	const gchar *copy;
	gint len;
	if (!str)
		return NULL;
	len = skk_utils_charbytes (str);
	if (!skk_utils_is_kana (str)) {
		return g_strndup (str, len);
	}
	if (!with_dakuon) {
		return g_strndup (str, len);
	}
	copy = str;
	copy += len;
	len = skk_utils_charbytes (copy);
	if (skk_utils_is_hiragana (str) || skk_utils_is_katakana (str)) {
		if (len == 2) {
			if (((copy[0] & 0xff) == 0xa1) &&
					(((copy[1] & 0xff) == 0xab) ||
					 ((copy[1] & 0xff) == 0xac))) {
				return g_strndup (str, 4);
			}
		}
	} else if (skk_utils_is_halfwidth_katakana (str)) {
		if (len == 2) {
			if (((copy[0] & 0xff) == 0x8e) &&
					(((copy[1] & 0xff) == 0xde) ||
					 ((copy[1] & 0xff) == 0xdf))) {
				return g_strndup (str, 4);
			}
		}
	}
	return g_strndup (str, 2);
}

/**
 * skk_utils_remove_char_nth:
 * @str: string to remove
 * @nth: nth
 *
 * Return value: character. caller up to free after using it.
 **/
gchar *
skk_utils_remove_char_nth (const gchar *str, guint nth)
{
	gchar *ret;
	const gchar *copy;
	gint i;
	gint out = 0;
	gint charlen;
	if (!str)
		return NULL;
	ret = g_new0 (gchar, strlen (str) + 1);
	copy = str;
	charlen = skk_utils_strlen (str);
	for (i = 0; i < charlen; i++) {
		if (i == nth) {
			out = skk_utils_charbytes (copy);
			copy += out;
			continue;
		}
		strcat (ret, copy);
		copy += skk_utils_charbytes (copy);
	}
	if (out) {
		ret = g_realloc (ret, strlen (str) - out + 1);
	}
	return ret;
}

/**
 * skk_utils_escape_char:
 * @str: string
 *
 * Return value: escaped character
 **/
gchar *
skk_utils_escape_char (const gchar *str)
{
	gchar *ret;
	const gchar *copy;
	gint i;
	gint charlen;
	gint len;

	if (!str)
		return NULL;
	ret = g_new0 (gchar, (strlen (str) * 4) + strlen("(concat )") + 1);
	strcpy (ret, "(concat ");
	copy = str;
	charlen = skk_utils_strlen (str);
	for (i = 0; i < charlen; i++) {
		len = skk_utils_charbytes (copy);
		if (len == 1) {
			if (*copy == ';') {
				strcat (ret, "\\073");
			} else if (*copy == '\n') {
				strcat (ret, "\\n");
			} else if (*copy == '\r') {
				strcat (ret, "\\r");
			} else if (*copy == '/') {
				strcat (ret, "\\057");
			}
		} else {
			strncat (ret, copy, len);
		}
		copy += len;
	}
	strcat (ret, ")");
	return ret;

}

/**
 * skk_utils_strlen:
 * @str: string
 *
 * calcurete characters count, not bytes.
 *
 * Return value: characters count.
 **/
gint
skk_utils_strlen (const gchar *str)
{
	const gchar *copy;
	gint len;
	gint ret = 0;

	if (!str)
		return 0;
	len = strlen (str);
	if (len == 1)
		return 1;
	copy = str;
	len = 0;
	for (; copy && *copy ; copy += skk_utils_charbytes (copy)) {
		ret++;
	}
	return ret;
}

static void
list_free_func (gpointer data, gpointer user_data)
{
	if (data) {
		g_free (data);
		data = NULL;
	}
	return;
}

void
skk_utils_slist_free (GSList *list, gboolean need_free, GFunc func, gpointer user_data)
{
	if (!list)
		return;
	if (need_free) {
		if (!func) {
			g_slist_foreach (list, list_free_func, user_data);
		} else {
			g_slist_foreach (list, func, user_data);
		}
	}
	g_slist_free (list);
	return;
}

void
skk_utils_list_free (GList *list, gboolean need_free, GFunc func, gpointer user_data)
{
	if (!list)
		return;
	if (need_free) {
		if (!func) {
			g_list_foreach (list, list_free_func, user_data);
		} else {
			g_list_foreach (list, func, user_data);
		}
	}
	g_list_free (list);
	return;
}

static gint
list_find_by_strcmp (gconstpointer a, gconstpointer b)
{
	if (!a)
		return 1;
	if (!b)
		return 1;
	if (!strcmp ((const char*)a, (const char*)b)) {
		return 0;
	}
	return 1;
}

GList *
skk_utils_list_merge_string (GList *list1, GList *list2, gboolean need_free)
{
	GList *tmp_list;
	GList *found_list = NULL;
	GList *ret = NULL;
	for (tmp_list = list1; tmp_list; tmp_list = g_list_next (tmp_list)) {
		if (tmp_list->data) {
			found_list = g_list_find_custom (list2, tmp_list->data, list_find_by_strcmp);
			if (found_list) {
				if (found_list->data && need_free) {
					g_free (found_list->data);
				}
				list2 = g_list_remove_link (list2, found_list);
				g_list_free_1 (found_list);
			}	
		}
	}
	ret = g_list_concat (list1, list2);
	return ret;
}

gchar*
skk_utils_list_to_string_all (GList *list, gchar *delim)
{
	gchar *ret = NULL;
	gchar *tmp;
	gchar *tmp_result;
	if (!list)
		return NULL;
	if (delim) {
		ret = g_strdup (delim);
	}	
	while (list) {
		if (list->data) {
			if (delim) {
				tmp = g_strconcat ((gchar *)list->data, delim, NULL);
			} else {
				tmp = g_strdup ((gchar *)list->data);
			}
			if (ret) {
				tmp_result = ret;
				ret = g_strconcat (ret, tmp, NULL);
				g_free (tmp_result);
			} else {
				ret = g_strdup (tmp);
			}
			g_free (tmp);
		}
		list = list->next;
	}
	return ret;
}

#ifndef USE_GLIB2
gchar *
skk_utils_utf8_to_eucjp (const gchar *str)
{
	iconv_t cd;
	int i, il, ol;
	char *ob, *dst;
	ICONV_CONST char *ib;
	if (!str)
		return NULL;
	if ((cd = iconv_open ("EUC-JP", "UTF-8")) == (iconv_t) -1) {
		/* 
		 * failed to open iconv,
		 * anyway, return original string
		 */
		return g_strdup (str);
	}
	/*
	 * when converted from UTF-8 to EUC-JP,
	 * UTF-8 strings always have more bytes than EUC-JP's one.
	 */
	il = strlen (str);
	dst = g_new (gchar, il + 1);
	ib = (ICONV_CONST char*)str;
	ol = il + 1;
	ob = dst;
	i = iconv (cd, &ib, &il, &ob, &ol);
	iconv_close (cd);
	*ob = '\0';

	return dst;
}
#else
gchar *
skk_utils_utf8_to_eucjp (const gchar *str)
{
	/* GError *error; */
	gsize bytes_read;
	gsize bytes_written;
	gssize len;
	gchar *ret;

	if (!str)
		return NULL;
	len = strlen (str);
	ret = g_convert (str, len, "EUC-JP", "UTF-8",
			&bytes_read, &bytes_written, NULL);

	return ret;
}
#endif

#ifdef SKKUTILS_DEBUG
void
put_sep (void)
{
	printf ("-------------------------------------------\n");
	return;
}

int
main (void)
{
	const gchar *str = "aiueo";
	const gchar *str2[] = {
		"",
		"ʎ",
		"ʎ",
		"",
		"",
	};
	int i;
	put_sep ();
	printf ("skk_utils_strlen\n");
	put_sep ();
	printf ("check string %s\n",str);
	printf ("exepected length 10 result (%d)\n",skk_utils_strlen (str));
	put_sep ();
	printf ("skk_utils_charbytes_nth\n");
	put_sep ();
	printf ("check string %s\n",str);
	for ( i = 0; i < 15 ; i++) {
		printf ("%dth string has %d byte\n", i, skk_utils_charbytes_nth (str, i));
	}
	put_sep ();
	printf ("skk_utils_charbytes_nth_all\n");
	put_sep ();
	printf ("check string %s\n", str);
	printf ("expected 7 %d bytes \n", skk_utils_charbytes_nth_all (str, 5));
	put_sep ();
	printf ("skk_utils_last_charbytes_nth_all\n");
	put_sep ();
	printf ("check string %s\n", str);
	printf ("expected 5 result %d\n", skk_utils_last_charbytes_nth_all (str, 3));
	put_sep ();
	printf ("skk_utils_last_charbytes\n");
	put_sep ();
	printf ("check string %s\n",str);
	printf ("exepected length 2 result (%d)\n",skk_utils_last_charbytes (str));
	put_sep ();
	printf ("skk_utils_get_char\n");
	put_sep ();
	for ( i = 0; i < 4; i++) {
		printf ("result (%s)\n", skk_utils_get_char (str2[i], TRUE));
		printf ("result (%s)\n", skk_utils_get_char (str2[i], FALSE));
	}
	put_sep ();
	printf ("result %s\n", skk_utils_escape_char ("test\n;/\n"));
	return 0;
}
#endif
