// Armenian.cpp
// (c) 2004 exeal

#include "StdAfx.h"
#include "Encoder.h"
#include <algorithm>	// std::binary_search

using namespace Ascension::Encodings;
using namespace std;


BEGIN_ENCODER_DEFINITION()
	DEFINE_ENCODER_CLASS(CPEX_ARMENIAN_ARMSCII7, Armenian_Armscii7, 2, 1)
	DEFINE_ENCODER_CLASS(CPEX_ARMENIAN_ARMSCII8, Armenian_Armscii8, 2, 1)
	DEFINE_ENCODER_CLASS(CPEX_ARMENIAN_ARMSCII8A, Armenian_Armscii8a, 2, 1)
END_ENCODER_DEFINITION()
DEFINE_DETECTOR(CPEX_ARMENIAN_AUTODETECT, Armenian);


// ȉ̕ϊe[u Hovik Melikyan ̎QƂ
// (http://www.freenet.am/armscii/)
namespace {
	const wchar_t	ARMSCII78toUCS_20[] = {
	/* 0x20 */	0x0020, __RPCH, 0x00A7, 0x0589, 0x0029, 0x0028, 0x00BB, 0x00AB,
				0x2014, 0x002E, 0x055D, 0x002C, 0x002D, 0x058A, 0x2026, 0x055C,
	/* 0x30 */	0x055B, 0x055E, 0x0531, 0x0561, 0x0532, 0x0562, 0x0533, 0x0563,
				0x0534, 0x0564, 0x0535, 0x0565, 0x0536, 0x0566, 0x0537, 0x0567,
	/* 0x40 */	0x0538, 0x0568, 0x0539, 0x0569, 0x053A, 0x056A, 0x053B, 0x056B,
				0x053C, 0x056C, 0x053D, 0x056D, 0x053E, 0x056E, 0x053F, 0x056F,
	/* 0x50 */	0x053F, 0x056F, 0x0540, 0x0570, 0x0541, 0x0571, 0x0542, 0x0572,
				0x0544, 0x0574, 0x0545, 0x0575, 0x0546, 0x0576, 0x0547, 0x0577,
	/* 0x60 */	0x0548, 0x0578, 0x0549, 0x0579, 0x054A, 0x057A, 0x054B, 0x057B,
				0x054C, 0x057C, 0x054D, 0x057D, 0x054E, 0x057E, 0x054F, 0x057F,
	/* 0x70 */	0x0550, 0x0580, 0x0551, 0x0581, 0x0552, 0x0582, 0x0553, 0x0583,
				0x0554, 0x0584, 0x0555, 0x0585, 0x0556, 0x0586, 0x055A, 0x007F
	};
	const uchar		UCStoARMSCII7_0028[] = {
					0x25, 0x24, __NA, __NA, 0x2B, 0x2C, 0x29, __NA
	};
	const uchar		UCStoARMSCII8_0028[] = {
					0xA5, 0xA4, 0x2A, 0x2B, 0xAB, 0xAC, 0xA9, 0x2F
	};
	const uchar		UCStoARMSCII78_00A0[] = {
	/* U+00A0 */	0x20, __NA, __NA, __NA, __NA, __NA, __NA, 0x22,
					__NA, __NA, __NA, 0x27, __NA, __NA, __NA, __NA,
	/* U+00B0 */	__NA, __NA, __NA, __NA, __NA, __NA, __NA, __NA,
					__NA, __NA, __NA, 0x26
	};
	const uchar		UCStoARMSCII78_0530[] = {
	/* U+0530 */	__NA, 0x32, 0x34, 0x36, 0x38, 0x3A, 0x3C, 0x3E,
					0x40, 0x42, 0x44, 0x46, 0x48, 0x4A, 0x4C, 0x4E,
	/* U+0540 */	0x50, 0x52, 0x54, 0x56, 0x58, 0x5A, 0x5C, 0x5E,
					0x60, 0x62, 0x64, 0x66, 0x68, 0x6A, 0x6C, 0x6E,
	/* U+0550 */	0x70, 0x72, 0x74, 0x76, 0x78, 0x7A, 0x7C, __NA,
					__NA, __NA, 0x7E, 0x30, 0x2F, 0x2A, 0x31, __NA,
	/* U+0560 */	__NA, 0x33, 0x35, 0x37, 0x39, 0x3B, 0x3D, 0x3F,
					0x41, 0x43, 0x45, 0x47, 0x49, 0x4B, 0x4D, 0x4F,
	/* U+0570 */	0x51, 0x53, 0x55, 0x57, 0x59, 0x5B, 0x5D, 0x5F,
					0x61, 0x63, 0x65, 0x67, 0x69, 0x6B, 0x6D, 0x6F,
	/* U+0580 */	0x71, 0x73, 0x75, 0x77, 0x79, 0x7B, 0x7D, __NA,
					__NA, 0x23, 0x2D
	};
	const uchar		UCStoARMSCII78_2010[] = {
	/* U+2010 */	__NA, __NA, __NA, __NA, 0x28, __NA, __NA, __NA,
					__NA, __NA, __NA, __NA, __NA, __NA, __NA, __NA,
	/* U+2020 */	__NA, __NA, __NA, __NA, __NA, __NA, 0x2E
	};
	const wchar_t	ARMSCII8AtoUCS_20[] = {
	/* 0x20 */	0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x055B,
				0x0028, 0x0029, 0x0030, 0x0031, 0x002C, 0x2014, 0x002E, 0x0032,
	/* 0x30 */	0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
				0x0038, 0x0039, 0x0589, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
	/* 0x40 */	0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
				0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
	/* 0x50 */	0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
				0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x002D,
	/* 0x60 */	0x055D, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
				0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
	/* 0x70 */	0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
				0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x055C, 0x007F,
	/* 0x80 */	0x0531, 0x0561, 0x0532, 0x0562, 0x0533, 0x0563, 0x0534, 0x0564,
				0x0535, 0x0565, 0x0536, 0x0566, 0x0537, 0x0567, 0x0538, 0x0568,
	/* 0x90 */	0x0539, 0x0569, 0x053A, 0x056A, 0x053B, 0x056B, 0x053C, 0x056C,
				0x053D, 0x056D, 0x053E, 0x056E, 0x053F, 0x056F, 0x0540, 0x0570,
	/* 0xA0 */	0x0541, 0x0571, 0x0542, 0x0572, 0x0543, 0x0573, 0x0544, 0x0574,
				0x0545, 0x0575, 0x0546, 0x0576, 0x0547, 0x0577, 0x00AB, 0x00BB
	};
	const wchar_t	ARMSCII8AtoUCS_D8[] = {
				__RPCH, __RPCH, __RPCH, __RPCH, __RPCH, 0x058A, 0x2026, 0x055E,
	/* 0xE0 */	0x0548, 0x0578, 0x0549, 0x0579, 0x054A, 0x057A, 0x054B, 0x057B,
				0x054C, 0x057C, 0x054D, 0x057D, 0x054E, 0x057E, 0x054F, 0x057F,
	/* 0xF0 */	0x0550, 0x0580, 0x0551, 0x0581, 0x0552, 0x0582, 0x0553, 0x0583,
				0x0554, 0x0584, 0x0555, 0x0585, 0x0556, 0x0586, 0x055A, __RPCH
	};

	const uchar		UCStoARMSCII8A_00A8[] = {
					__NA, __NA, 0xAE, __NA, __NA, __NA, __NA, __NA,
	/* U+00B0 */	__NA, __NA, __NA, __NA, __NA, __NA, __NA, __NA,
					__NA, __NA, __NA, 0xAF
	};
	const uchar		UCStoARMSCII8A_0530[] = {
	/* U+0530 */	__NA, 0x80, 0x82, 0x84, 0x86, 0x88, 0x8A, 0x8C,
					0x8E, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9A, 0x9C,
	/* U+0540 */	0x9E, 0xA0, 0xA2, 0xA4, 0xA6, 0xA8, 0xAA, 0xAC,
					0xE0, 0xE2, 0xE4, 0xE6, 0xE8, 0xEA, 0xEC, 0xEE,
	/* U+0550 */	0xF0, 0xF2, 0xF4, 0xF6, 0xF8, 0xFA, 0xFC, __NA,
					__NA, __NA, 0xFE, 0x27, 0x7E, 0x60, 0xDF, __NA,
	/* U+0560 */	__NA, 0x81, 0x83, 0x85, 0x87, 0x89, 0x8B, 0x8D,
					0x8F, 0x91, 0x93, 0x95, 0x97, 0x99, 0x9B, 0x9D,
	/* U+0570 */	0x9F, 0xA1, 0xA3, 0xA5, 0xA7, 0xA9, 0xAB, 0xAD,
					0xE1, 0xE3, 0xE5, 0xE7, 0xE9, 0xEB, 0xED, 0xEF,
	/* U+0580 */	0xF1, 0xF3, 0xF5, 0xF7, 0xF9, 0xFB, 0xFD, 0x3A,
					__NA, __NA, 0xDD
	};
	const uchar		UCStoARMSCII8A_2010[] = {
	/* U+2010 */	__NA, __NA, __NA, __NA, 0x2D, __NA, __NA, __NA,
					__NA, __NA, __NA, __NA, __NA, __NA, __NA, __NA,
	/* U+2020 */	__NA, __NA, __NA, __NA, __NA, __NA, 0xDE
	};

	inline const wchar_t* DecomposeArmenianLigature(wchar_t ch) {
		switch(ch) {
		case 0x0587:	return L"\x0565\x0582";	// Ech Yiwn
		case 0xFB13:	return L"\x0574\x0576";	// Men Now
		case 0xFB14:	return L"\x0574\x0565";	// Men Ech
		case 0xFB15:	return L"\x0574\x056B";	// Men Ini
		case 0xFB16:	return L"\x057E\x0576";	// Vew Now
		case 0xFB17:	return L"\x0574\x056D";	// Men Xeh
		default:		return 0;
		}
	}

	void DetectCodePage_Armenian(const uchar* psz, size_t cch, CodePage& cpResult, size_t& cchConvertable) {
		// ܂ Unicode 𒲂ׂ
		if(CEncoderFactory::CodePageDetector unicodeDetector = CEncoderFactory::GetInstance().GetUnicodeDetector()) {
			unicodeDetector(psz, cch, cpResult, cchConvertable);
			if(cch == cchConvertable)
				return;
		}

		cpResult = CPEX_ARMENIAN_ARMSCII8;	// preferred code page
		cchConvertable = cch;

		bool	b[3] = {true, true, true};	// 0:-7, 1:-8, 2:-8A
		for(size_t i = 0; i < cch; ++i) {
			const uchar	ch = psz[i];
			if(ch >= 0x80)				b[0] = false;	// ARMSCII-7 consists of only 7-bits
			if(ch >= 0x80 && ch < 0xA0)	b[1] = false;	// 8bit controls (but ARMSCII-8 may contain these)
			if(ch >= 0xB0 && ch < 0xDC)	b[2] = false;

			if(!b[0] && !b[2])
				return;	// ARMSCII-8
		}
		if(!b[0] && !b[1])		cpResult = CPEX_ARMENIAN_ARMSCII8A;
		else if(!b[2] && !b[1])	cpResult = CPEX_ARMENIAN_ARMSCII7;
	}
} // namespace `anonymous'


// AjA (ARMSCII-7) /////////////////////////////////////////////////

size_t CEncoder_Armenian_Armscii7::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;

	while(iSrc < cchSrc && iDest < cchDest) {
		if(pwszSrc[iSrc] < 0x0028) {
			pszDest[iDest++] = static_cast<uchar>(pwszSrc[iSrc++]);
			continue;
		} else if(pwszSrc[iSrc] < 0x0028 + _countof(UCStoARMSCII7_0028))
			pszDest[iDest] = UCStoARMSCII7_0028[pwszSrc[iSrc] - 0x0028];
		else if(pwszSrc[iSrc] >= 0x00A0 && pwszSrc[iSrc] < 0x00A0 + _countof(UCStoARMSCII78_00A0))
			pszDest[iDest] = UCStoARMSCII78_00A0[pwszSrc[iSrc] - 0x00A0];
		else if(pwszSrc[iSrc] >= 0x0530 && pwszSrc[iSrc] < 0x0530 + _countof(UCStoARMSCII78_0530))
			pszDest[iDest] = UCStoARMSCII78_0530[pwszSrc[iSrc] - 0x0530];
		else if(pwszSrc[iSrc] >= 0x2010 && pwszSrc[iSrc] < 0x2010 + _countof(UCStoARMSCII78_2010))
			pszDest[iDest] = UCStoARMSCII78_2010[pwszSrc[iSrc] - 0x2010];
		else if(const wchar_t* pwszDecomposed = DecomposeArmenianLigature(pwszSrc[iSrc])) {
			if(iDest + 1 < cchDest)
				return iDest;
			pszDest[iDest++] = UCStoARMSCII78_0530[pwszDecomposed[0] - 0x0530];
			pszDest[iDest++] = UCStoARMSCII78_0530[pwszDecomposed[1] - 0x0530];
			assert(pszDest[iDest - 2] != __NA && pszDest[iDest - 1] != __NA);
			++iSrc;
			continue;
		} else
			pszDest[iDest] = __NA;

		if(pszDest[iDest] == __NA)
			CONFIRM_ILLEGAL_CHAR(pszDest[iDest++]);
		++iSrc;
		++iDest;
	}
	return iDest;
}

size_t CEncoder_Armenian_Armscii7::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	const size_t	cch = min(cchSrc, cchDest);
	for(size_t i = 0; i < cch; ++i) {
		if(pszSrc[i] < 0x20)
			pwszDest[i] = pszSrc[i];
		else if(pszSrc[i] < 0x20 + _countof(ARMSCII78toUCS_20) && ARMSCII78toUCS_20[pszSrc[i] - 0x20] != __RPCH)
			pwszDest[i] = ARMSCII78toUCS_20[pszSrc[i] - 0x20];
		else
			CONFIRM_ILLEGAL_CHAR(pwszDest[i]);
	}
	return cch;
}


// AjA (ARMSCII-8) /////////////////////////////////////////////////

size_t CEncoder_Armenian_Armscii8::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;

	while(iSrc < cchSrc && iDest < cchDest) {
		if(pwszSrc[iSrc] < 0x0028) {
			pszDest[iSrc++] = static_cast<char>(pwszSrc[iSrc++]);
			continue;
		} else if(pwszSrc[iSrc] < 0x0028 + _countof(UCStoARMSCII8_0028))
			pszDest[iDest] = UCStoARMSCII8_0028[pwszSrc[iSrc] - 0x0028];
		else if(pwszSrc[iSrc] >= 0x00A0 && pwszSrc[iSrc] < 0x00A0 + _countof(UCStoARMSCII78_00A0))
			pszDest[iDest] = UCStoARMSCII78_00A0[pwszSrc[iSrc] - 0x00A0];
		else if(pwszSrc[iSrc] >= 0x0530 && pwszSrc[iSrc] < 0x0530 + _countof(UCStoARMSCII78_0530))
			pszDest[iDest] = UCStoARMSCII78_0530[pwszSrc[iSrc] - 0x0530];
		else if(pwszSrc[iSrc] >= 0x2010 && pwszSrc[iSrc] < 0x2010 + _countof(UCStoARMSCII78_2010))
			pszDest[iDest] = UCStoARMSCII78_2010[pwszSrc[iSrc] - 0x2010];
		else if(const wchar_t* pwszDecomposed = DecomposeArmenianLigature(pwszSrc[iSrc])) {
			if(iDest + 1 < cchDest)
				return iDest;
			pszDest[iDest++] = UCStoARMSCII78_0530[pwszDecomposed[0] - 0x0530] + 0x80;
			pszDest[iDest++] = UCStoARMSCII78_0530[pwszDecomposed[1] - 0x0530] + 0x80;
			assert(pszDest[iDest - 2] != 0x80 && pszDest[iDest - 1] != 0x80);
			++iSrc;
			continue;
		} else
			pszDest[iDest] = __NA;

		if(pszDest[iDest] == __NA)
			CONFIRM_ILLEGAL_CHAR(pszDest[iDest++]);
		++iSrc;
		pszDest[iDest++] += 0x80;
	}
	return iDest;
}

size_t CEncoder_Armenian_Armscii8::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	const size_t	cch = min(cchSrc, cchDest);
	for(size_t i = 0; i < cch; ++i) {
		if(pszSrc[i] < 0xA1)
			pwszDest[i] = pszSrc[i];
		else if(ARMSCII78toUCS_20[pszSrc[i] - 0x20 - 0x80] != __RPCH)
			pwszDest[i] = ARMSCII78toUCS_20[pszSrc[i] - 0x20 - 0x80];
		else
			CONFIRM_ILLEGAL_CHAR(pwszDest[i]);
	}
	return cch;
}


// AjA (ARMSCII-8A) ////////////////////////////////////////////////

size_t CEncoder_Armenian_Armscii8a::ConvertFromUnicode(CFU_ARGLIST) {
	CFU_CHECKARGS();

	size_t	iSrc = 0, iDest = 0;

	while(iSrc < cchSrc && iDest < cchDest) {
		if(pwszSrc[iSrc] < 0x80) {
			static const wchar_t	wszInvChars[] = {0x0027, 0x003A, 0x005F, 0x0060, 0x007E};
			pszDest[iDest] = binary_search(wszInvChars,
				wszInvChars + _countof(wszInvChars), pwszSrc[iSrc]) ? static_cast<uchar>(pwszSrc[iSrc]) : __NA;
		} else if(pwszSrc[iSrc] >= 0x00A8 && pwszSrc[iSrc] < 0x00A8 + _countof(UCStoARMSCII8A_00A8))
			pszDest[iDest] = UCStoARMSCII8A_00A8[pwszSrc[iSrc] - 0x00A8];
		else if(pwszSrc[iSrc] >= 0x0530 && pwszSrc[iSrc] < 0x0530 + _countof(UCStoARMSCII8A_0530))
			pszDest[iDest] = UCStoARMSCII8A_0530[pwszSrc[iSrc] - 0x0530];
		else if(pwszSrc[iSrc] >= 0x2010 && pwszSrc[iSrc] < 0x2010 + _countof(UCStoARMSCII8A_2010))
			pszDest[iDest] = UCStoARMSCII8A_2010[pwszSrc[iSrc] - 0x2010];
		else if(const wchar_t* pwszDecomposed = DecomposeArmenianLigature(pwszSrc[iSrc])) {
			if(iDest + 1 < cchDest)
				return iDest;
			pszDest[iDest++] = UCStoARMSCII8A_0530[pwszDecomposed[0] - 0x0530] + 0x80;
			pszDest[iDest++] = UCStoARMSCII8A_0530[pwszDecomposed[1] - 0x0530] + 0x80;
			assert(pszDest[iDest - 2] != 0x80 && pszDest[iDest - 1] != 0x80);
			++iSrc;
			continue;
		} else
			pszDest[iDest] = __NA;

		if(pszDest[iDest] == __NA)
			CONFIRM_ILLEGAL_CHAR(pszDest[iDest++]);
		++iSrc;
		++iDest;
	}
	return iDest;
}

size_t CEncoder_Armenian_Armscii8a::ConvertToUnicode(CTU_ARGLIST) {
	CTU_CHECKARGS();

	const size_t	cch = min(cchSrc, cchDest);
	for(size_t i = 0; i < cch; ++i) {
		if(pszSrc[i] < 0x20)
			pwszDest[i] = pszSrc[i];
		else if(pszSrc[i] < 0x20 + _countof(ARMSCII8AtoUCS_20))
			pwszDest[i] = ARMSCII8AtoUCS_20[pszSrc[i] - 0x20];
		else
			pwszDest[i] = ARMSCII8AtoUCS_D8[pszSrc[i] - 0xD8];
		if(pwszDest[i] == __REPLACEMENT_CHARACTER)
			CONFIRM_ILLEGAL_CHAR(pwszDest[i]);
	}
	return cch;
}

/* [EOF] */