/**********************************************************************
 
	Copyright (C) 2003 Hirohisa MORI <joshua@nichibun.ac.jp>
 
	This program is free software; you can redistribute it 
	and/or modify it under the terms of the GLOBALBASE 
	Library General Public License (G-LGPL) as published by 

	http://www.globalbase.org/
 
	This program is distributed in the hope that it will be 
	useful, but WITHOUT ANY WARRANTY; without even the 
	implied warranty of MERCHANTABILITY or FITNESS FOR A 
	PARTICULAR PURPOSE.

**********************************************************************/


#include	<stdlib.h>
#include	"memory_debug.h"
#include	"lc_encode.h"

typedef struct utf8_work {
	unsigned char	buf[6];
	char		byte;
	char		len;
} UTF8_WORK;

void * open_utf8();
int close_utf8(unsigned char *,void *);
int utf82int(L_CHAR *,void*,unsigned char);
int int2utf8(unsigned char *,void*,L_CHAR);


static LCZ_SET main_code[] = {
	{0,LCZM_4B_TYPE},
	{LCC_ERROR,0}
};


CODE_METHOD utf8_cm = {
	"UTF-8",
	LCC_UTF8,
	main_code,
	4,
	0,
	{0,0},
	open_utf8,
	close_utf8,
	utf82int,
	int2utf8,
	0
};


void *
open_utf8()
{
UTF8_WORK * ret;
int i;
	ret = d_alloc(sizeof(UTF8_WORK));
	ret->len = ret->byte = 0;
	for ( i = 0 ; i < 6 ; i ++ )
		ret->buf[i] = 0;
	return ret;
}

int
close_utf8(unsigned char * ret,void * work)
{
	d_f_ree(work);
	return 0;
}

int
utf82int(L_CHAR * ret,void * w,unsigned char ch)
{
UTF8_WORK * _work;
	_work = w;
	_work->buf[(int)(_work->len)] = ch;
	_work->len ++;
	if ( _work->len == 1 ) {
		if ( (ch & 0x80) == 0 ) {
			*ret = ch&0x000000ff;
			_work->byte = _work->len = 0;
			return 1;
		}
		else if ( (ch&0xe0) == 0xc0 )
			_work->byte = 2;
		else if ( (ch&0xf0) == 0xe0 )
			_work->byte = 3;
		else if ( (ch&0xf8) == 0xf0 )
			_work->byte = 4;
		else if ( (ch&0xfc) == 0xf8 )
			_work->byte = 5;
		else if ( (ch&0xfe) == 0xfc )
			_work->byte = 6;
		else 	_work->len = _work->byte = 0;
		return 0;
	}
	if ( _work->byte != _work->len )
		return 0;
	switch ( _work->byte ) {
	case 2:
		*ret = (_work->buf[1]&0x3f)|
			((_work->buf[0]&0x1f)<<6);
		break;
	case 3:
		*ret = (_work->buf[2]&0x3f)|
			((_work->buf[1]&0x3f)<<6)|
			((_work->buf[0]&0x0f)<<12);
		break;
	case 4:
		*ret = (_work->buf[3]&0x3f)|
			((_work->buf[2]&0x3f)<<6)|
			((_work->buf[1]&0x3f)<<12)|
			((_work->buf[0]&0x03)<<18);
		break;
	case 5:
		*ret = (_work->buf[4]&0x3f)|
			((_work->buf[3]&0x3f)<<6)|
			((_work->buf[2]&0x3f)<<12)|
			((_work->buf[1]&0x3f)<<18)|
			((_work->buf[0]&0x3f)<<24);
		break;
	case 6:
		*ret = (_work->buf[5]&0x3f)|
			((_work->buf[4]&0x3f)<<6)|
			((_work->buf[3]&0x3f)<<12)|
			((_work->buf[2]&0x3f)<<18)|
			((_work->buf[1]&0x3f)<<24)|
			((_work->buf[0]&0x1)<<30);
		break;
	}
	_work->len = _work->byte = 0;
	return 1;
}

int
int2utf8(unsigned char * ret,void * _work,L_CHAR ch)
{
	switch ( ch & LCZM_2B_TYPE ) {
	case LCZ_2BC_UNICODE_v1_1_JP:
	case LCZ_2BC_UNICODE_v1_1_TW:
	case LCZ_2BC_UNICODE_v1_1_CN:
	case LCZ_2BC_UNICODE_v1_1_KR:

	case LCZ_2BC_UNICODE_v2_0_JP:
	case LCZ_2BC_UNICODE_v2_0_TW:
	case LCZ_2BC_UNICODE_v2_0_CN:
	case LCZ_2BC_UNICODE_v2_0_KR:

	case LCZ_2BC_UNICODE_v3_0_JP:
	case LCZ_2BC_UNICODE_v3_0_TW:
	case LCZ_2BC_UNICODE_v3_0_CN:
	case LCZ_2BC_UNICODE_v3_0_KR:

		ch = ch&LCZM_2B_CODE;
	}

	if ( (ch&LCZM_1B_TYPE) == LCZ_1BC_ASCII ) {
		ret[0] = ch&0xff;
		return 1;
	}

	if ( (ch&LCZM_4BYTE) == LCZ_4BYTE ) {
		if ( (ch&0xffffff80) == 0 ) {
			ret[0] = ch;
			return 1;
		}
		else if ( (ch&0xfffff800) == 0 ) {
			ret[0] = 0xc0|((ch>>6)&0x3f);
			ret[1] = 0x80|(ch&0x4f);
			return 2;
		}
		else if ( (ch&0xffff0000) == 0 ) {
			ret[0] = 0xe0|((ch>>12)&0x0f);
			ret[1] = 0x80|((ch>>6)&0x3f);
			ret[2] = 0x80|(ch&0x3f);
			return 3;
		}
		else if ( (ch&0xffe00000) == 0 ) {
			ret[0] = 0xf0|((ch>>18)&0x07);
			ret[1] = 0x80|((ch>>12)&0x3f);
			ret[2] = 0x80|((ch>>6)&0x3f);
			ret[3] = 0x80|(ch&0x3f);
			return 4;
		}
		else if ( (ch&0xfc000000) == 0 ) {
			ret[0] = ((ch>>24)&0x03)|0xf8; 
			ret[1] = ((ch>>18)&0x3f)|0x80; 
			ret[2] = ((ch>>12)&0x3f)|0x80; 
			ret[3] = ((ch>>6)&0x3f)|0x80; 
			ret[4] = (ch&0x3f)|0x80; 
			return 5;
		}
		else {
			ret[0] = ((ch>>30)&0x01)|0xfc; 
			ret[1] = ((ch>>24)&0x3f)|0x80; 
			ret[2] = ((ch>>18)&0x3f)|0x80; 
			ret[3] = ((ch>>12)&0x3f)|0x80; 
			ret[4] = ((ch>>6)&0x3f)|0x80; 
			ret[5] = (ch&0x3f)|0x80;
			return 6;
 		}
	}
	return 0;
}
