/*
 * LibSKK, a tiny Library to emulate SKK (Simple Kana Kanji Conversion)
 * 
 * Copyright (C) 2002 Motonobu Ichimura <famao@kondara.org>
 *
 * All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, and/or sell copies of the Software, and to permit persons
 * to whom the Software is furnished to do so, provided that the above
 * copyright notice(s) and this permission notice appear in all copies of
 * the Software and that both the above copyright notice(s) and this
 * permission notice appear in supporting documentation.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
 * OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
 * HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
 * INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
 * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 *
 * Except as contained in this notice, the name of a copyright holder
 * shall not be used in advertising or otherwise to promote the sale, use
 * or other dealings in this Software without prior written authorization
 * of the copyright holder.
 *
 */

/* $Id: skkutils.c,v 1.3 2002/04/11 12:44:44 famao Exp $ */

/* vi:set ts=4 sw=4: */


#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <glib.h>

#include "skkutils.h"

/* calculate last character's bytes.
 * we can't use wide char. (yes, we can't use locale model)
 * so prepare EUC-JP specific function.
 */
gint
skk_utils_last_charbytes (const gchar *str)
{
	gint len;
	if (!str)
		return 0;
	len = strlen (str);
	if (len < 2)
		return len;
	if ((str[len - 1] & 0xff) < 0x80) {
		return 1;
	} else {
		if (len > 2) {
			if ((str[len - 3] & 0xff) == 0x8f) {
				return 3; /* 3 bytes */
			}
		}
	}
	return 2;
}

/* calculate first character's bytes.
 * we can't use wide char. 
 * so prepare EUC-JP specific function.
 */
gint
skk_utils_charbytes (const gchar *str)
{
	gint len;
	if (!str)
		return 0;
	len = strlen (str);
	if (len < 2)
		return len;
	if ((str[0] & 0xff) < 0x80) {
		return 1;
	} else if ((str[0] & 0xff) == 0x8f) {
		return 3;
	} else {
		return 2;
	}
	g_assert (0);
}

/*
 * calculate nth character's bytes.
 */
gint
skk_utils_charbytes_nth (const gchar *str, guint nth)
{
	const gchar *copy;
	gint len;
	gint i;
	if (!str)
		return 0;
	copy = str;
	len = skk_utils_charbytes (copy);
	for (i = 0; i < nth; i++) {
		copy += len;
		len = skk_utils_charbytes (copy);
	}
	return len;
}

gboolean
skk_utils_is_hiragana (const gchar *str)
{
	gint len;
	if (!str)
		return FALSE;
	len = skk_utils_charbytes (str);
	if (len != 2)
		return FALSE;
	if (((str[0] & 0xff) == 0xa4)) {
		return TRUE;
	}
	return FALSE;
}

gboolean
skk_utils_is_katakana (const gchar *str)
{
	gint len;
	if (!str)
		return FALSE;
	len = skk_utils_charbytes (str);
	if (len != 2)
		return FALSE;
	if (((str[0] & 0xff) == 0xa5)) {
		return TRUE;
	}
	return FALSE;
}

gboolean
skk_utils_is_halfwidth_katakana (const gchar *str)
{
	gint len;
	if (!str)
		return FALSE;
	len = skk_utils_charbytes (str);
	if (len != 2)
		return FALSE;
	if (((str[0] & 0xff) == 0x8e)) {
		return TRUE;
	}
	return FALSE;
}

gboolean
skk_utils_is_kana (const gchar *str)
{
	gint len;
	if (!str)
		return FALSE;
	len = skk_utils_charbytes (str);
	if (len != 2)
		return FALSE;
	if (((str[0] & 0xff) == 0xa4) ||
			((str[0] & 0xff) == 0xa5) ||
			((str[0] & 0xff) == 0x8e)) {
		return TRUE;
	}
	return FALSE;
}

gchar *
skk_utils_get_char (const gchar *str, gboolean with_dakuon)
{
	const gchar *copy;
	gint len;
	if (!str)
		return NULL;
	len = skk_utils_charbytes (str);
	if (!skk_utils_is_kana (str)) {
		return g_strndup (str, len);
	}
	if (!with_dakuon) {
		return g_strndup (str, len);
	}
	copy = str;
	copy += len;
	len = skk_utils_charbytes (copy);
	if (skk_utils_is_hiragana (str) || skk_utils_is_katakana (str)) {
		if (len == 2) {
			if (((copy[0] & 0xff) == 0xa1) &&
					(((copy[1] & 0xff) == 0xab) ||
					 ((copy[1] & 0xff) == 0xac))) {
				return g_strndup (str, 4);
			}
		}
	} else if (skk_utils_is_halfwidth_katakana (str)) {
		if (len == 2) {
			if (((copy[0] & 0xff) == 0x8e) &&
					(((copy[1] & 0xff) == 0xde) ||
					 ((copy[1] & 0xff) == 0xdf))) {
				return g_strndup (str, 4);
			}
		}
	}
	return g_strndup (str, 2);
}

/*
 * calcurete characters count, not bytes.
 */
gint
skk_utils_strlen (const gchar *str)
{
	const gchar *copy;
	gint len;
	gint ret = 0;

	if (!str)
		return 0;
	len = strlen (str);
	if (len == 1)
		return 1;
	copy = str;
	len = 0;
	for (; copy && *copy ; copy += skk_utils_charbytes (copy)) {
		ret++;
	}
	return ret;
}

static void
list_free_func (gpointer data, gpointer user_data)
{
	if (data) {
		g_free (data);
		data = NULL;
	}
	return;
}

void
skk_utils_list_free (GList *list, gboolean need_free, GFunc func, gpointer user_data)
{
	if (!list)
		return;
	if (need_free) {
		if (!func) {
			g_list_foreach (list, list_free_func, user_data);
		} else {
			g_list_foreach (list, func, user_data);
		}
	}
	g_list_free (list);
	return;
}

static gint
list_find_by_strcmp (gconstpointer a, gconstpointer b)
{
	if (!a)
		return 1;
	if (!b)
		return 1;
	if (!strcmp ((const char*)a, (const char*)b)) {
		return 0;
	}
	return 1;
}

GList *
skk_utils_list_merge_string (GList *list1, GList *list2)
{
	GList *tmp_list;
	GList *found_list = NULL;
	GList *ret = NULL;
	for (tmp_list = list1; tmp_list; tmp_list = g_list_next (tmp_list)) {
		if (tmp_list->data) {
			found_list = g_list_find_custom (list2, tmp_list->data, list_find_by_strcmp);
			if (found_list) {
				list2 = g_list_remove_link (list2, found_list);
				g_list_free_1 (found_list);
			}	
		}
	}
	ret = g_list_concat (list1, list2);
	return ret;
}

gchar*
skk_utils_list_to_string_all (GList *list, gchar *delim)
{
	gchar *ret = NULL;
	gchar *tmp;
	gchar *tmp_result;
	if (!list)
		return NULL;
	if (delim) {
		ret = g_strdup (delim);
	}	
	while (list) {
		if (list->data) {
			if (delim) {
				tmp = g_strconcat ((gchar *)list->data, delim, NULL);
			} else {
				tmp = g_strdup ((gchar *)list->data);
			}
			if (ret) {
				tmp_result = ret;
				ret = g_strconcat (ret, tmp, NULL);
				g_free (tmp_result);
			} else {
				ret = g_strdup (tmp);
			}
			g_free (tmp);
		}
		list = list->next;
	}
	return ret;
}

#ifdef SKKUTILS_DEBUG
void
put_sep (void)
{
	printf ("-------------------------------------------\n");
	return;
}

int
main (void)
{
	const gchar *str = "aiueo";
	const gchar *str2[] = {
		"",
		"ʎ",
		"ʎ",
		"",
		"",
	};
	int i;
	put_sep ();
	printf ("skk_utils_strlen\n");
	put_sep ();
	printf ("check string %s\n",str);
	printf ("exepected length 10 result (%d)\n",skk_utils_strlen (str));
	put_sep ();
	printf ("skk_utils_charbytes_nth\n");
	put_sep ();
	printf ("check string %s\n",str);
	for ( i = 0; i < 15 ; i++) {
		printf ("%dth string has %d byte\n", i, skk_utils_charbytes_nth (str, i));
	}
	put_sep ();
	printf ("skk_utils_last_charbytes\n");
	put_sep ();
	printf ("check string %s\n",str);
	printf ("exepected length 2 result (%d)\n",skk_utils_last_charbytes (str));
	put_sep ();
	printf ("skk_utils_get_char\n");
	put_sep ();
	for ( i = 0; i < 4; i++) {
		printf ("result (%s)\n", skk_utils_get_char (str2[i], TRUE));
		printf ("result (%s)\n", skk_utils_get_char (str2[i], FALSE));
	}
	return 0;
}
#endif
