#include <stdio.h>
#include <getopt.h> 
#include <unistd.h>
#include <string.h>
#include <unicode/utf.h>
#include <unicode/uchar.h>
#include <unicode/unorm.h>
#include <unicode/ustring.h>

#define MAX_UNICODE 0x110000
#define BUF_SIZE 0x100

static int
ucs2utf(unsigned int i, unsigned char *buf)
{
  unsigned char *p = buf;
  if (i < 0x80) {
    *p++ = i;
  } else {
    if (i < 0x800) {
      *p++ = (i >> 6) | 0xc0;
    } else {
      if (i < 0x00010000) {
	*p++ = (i >> 12) | 0xe0;
      } else {
	if (i < 0x00200000) {
	  *p++ = (i >> 18) | 0xf0;
	} else {
	  if (i < 0x04000000) {
	    *p++ = (i >> 24) | 0xf8;
	  } else if (i < 0x80000000) {
	    *p++ = (i >> 30) | 0xfc;
	    *p++ = ((i >> 24) & 0x3f) | 0x80;
	  }
	  *p++ = ((i >> 18) & 0x3f) | 0x80;
	}
	*p++ = ((i >> 12) & 0x3f) | 0x80;
      }
      *p++ = ((i >> 6) & 0x3f) | 0x80;
    }
    *p++ = (0x3f & i) | 0x80;
  }
  *p = '\0';
  return (p - buf);
}

void
blockcode(void)
{
  UChar32 ch;
  unsigned char *p, src[7];
  UBlockCode code, lc = -1;
  for (ch = 1; ch < MAX_UNICODE; ch++) {
    if (!U_IS_UNICODE_CHAR(ch)) { continue; }
    code = ublock_getCode(ch);
    if (code != lc) {
      ucs2utf(ch, src);
      for (p = src; *p; p++) {
	printf("%x:", *p);
      }
      printf("\t%04x\t%d\n", ch, code);
    }
    lc = code;
  }
}

int
normalize(const char *str, char *res, UNormalizationMode mode)
{
  UErrorCode rc;
  int32_t ulen, nlen;
  UChar ubuf[BUF_SIZE], nbuf[BUF_SIZE];
  rc = U_ZERO_ERROR;
  u_strFromUTF8(ubuf, BUF_SIZE, &ulen, str, -1, &rc);
  if (rc != U_ZERO_ERROR /*&& rc != U_STRING_NOT_TERMINATED_WARNING*/) {
    return -1;
  }
  rc = U_ZERO_ERROR;
  nlen = unorm_normalize(ubuf, ulen, mode, 0, nbuf, BUF_SIZE, &rc);
  if (rc != U_ZERO_ERROR /*&& rc != U_STRING_NOT_TERMINATED_WARNING*/) {
    return -1;
  }
  rc = U_ZERO_ERROR;
  u_strToUTF8(res, BUF_SIZE, NULL, nbuf, nlen, &rc);
  if (rc != U_ZERO_ERROR /*&& rc != U_BUFFER_OVERFLOW_ERROR*/) {
    return -1;
  }
  return 0;
}

void
dump(UNormalizationMode mode)
{
  UChar32 ch;
  char str[7], norm[BUF_SIZE];
  for (ch = 1; ch < MAX_UNICODE; ch++) {
    if (!U_IS_UNICODE_CHAR(ch)) { continue; }
    ucs2utf(ch, str);
    if (normalize(str, norm, mode)) {
      printf("ch=%04x error occure\n", ch);
      continue;
    }
    if (strcmp(norm, str)) {
      printf("%04x\t%s\t%s\n", ch, str, norm);
    }
  }
}

void
ccdump(void)
{
  UChar32 ch;
  char str[7], nfd[BUF_SIZE], nfc[BUF_SIZE];
  for (ch = 1; ch < MAX_UNICODE; ch++) {
    if (!U_IS_UNICODE_CHAR(ch)) { continue; }
    ucs2utf(ch, str);
    if (normalize(str, nfd, UNORM_NFD)) {
      printf("ch=%04x error occure\n", ch);
      continue;
    }
    if (normalize(str, nfc, UNORM_NFC)) {
      printf("ch=%04x error occure\n", ch);
      continue;
    }
    if (strcmp(nfd, nfc)) {
      printf("%04x\t%s\t%s\n", ch, nfd, nfc);
    }
  }
}

struct option options[] = {
  {"bc", 0, NULL, 'b'},
  {"nfd", 0, NULL, 'd'},
  {"nfkd", 0, NULL, 'D'},
  {"nfc", 0, NULL, 'c'},
  {"nfkc", 0, NULL, 'C'},
  {"cc", 0, NULL, 'o'},
};

int
main(int argc, char **argv)
{
  switch (getopt_long(argc, argv, "bdDcCo", options, NULL)) {
  case 'b' :
    blockcode();
    break;
  case 'd' :
    dump(UNORM_NFD);
    break;
  case 'D' :
    dump(UNORM_NFKD);
    break;
  case 'c' :
    dump(UNORM_NFC);
    break;
  case 'C' :
    dump(UNORM_NFKC);
    break;
  case 'o' :
    ccdump();
    break;
  default :
    fputs("usage: icudump --[bc|nfd|nfkd|nfc|nfkc|cc]\n", stderr);
    break;
  }
  return 0;
}
