/* Copyright(C) 2004 Brazil

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
#include "senna_in.h"
#include <stdio.h>
#include <string.h>
#include "str.h"
#include "set.h"

static sen_set *prefix = NULL;
static sen_set *suffix = NULL;

#define N_PREFIX 2048
#define N_SUFFIX 0

#define PREFIX_PATH SENNA_HOME PATH_SEPARATOR "prefix"
#define SUFFIX_PATH SENNA_HOME PATH_SEPARATOR "suffix"

inline static void
prefix_init(void)
{
  int i, *ip;
  FILE *fp;
  char buffer[4];
  prefix = sen_set_open(2, sizeof(int), 0);
  if (!prefix) { SEN_LOG(sen_log_alert, "sen_set_open on prefix_init failed !"); return; }
  if ((fp = fopen(PREFIX_PATH, "r"))) {
    for (i = 0; i < N_PREFIX; i++) {
      if (!fgets(buffer, 4, fp)) { break; }
      sen_set_get(prefix, buffer, (void **)&ip);
      *ip = i;
    }
    fclose(fp);
  }
}

inline static void
suffix_init(void)
{
  int i;
  FILE *fp;
  char buffer[4];
  suffix = sen_set_open(2, 0, 0);
  if (!suffix) { SEN_LOG(sen_log_alert, "sen_set_open on suffix_init failed !"); return; }
  if ((fp = fopen(SUFFIX_PATH, "r"))) {
    for (i = N_SUFFIX; i; i--) {
      if (!fgets(buffer, 4, fp)) { break; }
      sen_set_get(suffix, buffer, NULL);
    }
    fclose(fp);
  }
}

inline size_t
sen_str_charlen_utf8(const unsigned char *str, const unsigned char *end)
{
  /* MEMO: This function allows non-null-terminated string as str. */
  /*       But requires the end of string. */
  const unsigned char *p = str;
  if (!*p || p >= end) { return 0; }
  if (*p & 0x80) {
    int b, w;
    size_t size;
    for (b = 0x40, w = 0; b && (*p & b); b >>= 1, w++);
    if (!w) {
      SEN_LOG(sen_log_warning, "invalid utf8 string(1) on sen_str_charlen_utf8");
      return 0;
    }
    for (size = 1; w--; size++) {
      if (++p >= end || !*p || (*p & 0xc0) != 0x80) {
        SEN_LOG(sen_log_warning, "invalid utf8 string(2) on sen_str_charlen_utf8");
        return 0;
      }
    }
    return size;
  } else {
    return 1;
  }
  return 0;
}

size_t
sen_str_charlen(const char *str, sen_encoding encoding)
{
  /* MEMO: This function requires null-terminated string as str.*/
  unsigned char *p = (unsigned char *) str;
  if (!*p) { return 0; }
  switch (encoding) {
  case sen_enc_euc_jp :
    if (*p & 0x80) {
      if (*(p + 1)) {
        return 2;
      } else {
        /* This is invalid character */
        SEN_LOG(sen_log_warning, "invalid euc-jp string end on sen_str_charlen");
        return 0;
      }
    }
    return 1;
    break;
  case sen_enc_utf8 :
    if (*p & 0x80) {
      int b, w;
      size_t size;
      for (b = 0x40, w = 0; b && (*p & b); b >>= 1, w++);
      if (!w) {
        SEN_LOG(sen_log_warning, "invalid utf8 string(1) on sen_str_charlen");
        return 0;
      }
      for (size = 1; w--; size++) {
        if (!*++p || (*p & 0xc0) != 0x80) {
          SEN_LOG(sen_log_warning, "invalid utf8 string(2) on sen_str_charlen");
          return 0;
        }
      }
      return size;
    } else {
      return 1;
    }
    break;
  case sen_enc_sjis :
    if (*p & 0x80) {
      if (0xa1 <= *p && *p <= 0xdf) {
        /* hankaku-kana */
        return 1;
      } else if (!(*(p + 1))) {
        /* This is invalid character */
        SEN_LOG(sen_log_warning, "invalid sjis string end on sen_str_charlen");
        return 0;
      } else {
        return 2;
      }
    } else {
      return 1;
    }
    break;
  default :
    return 1;
    break;
  }
  return 0;
}

size_t
sen_str_charlen_nonnull(const char *str, const char *end, sen_encoding encoding)
{
  /* MEMO: This function allows non-null-terminated string as str. */
  /*       But requires the end of string. */
  unsigned char *p = (unsigned char *) str;
  if (p >= (unsigned char *)end) { return 0; }
  switch (encoding) {
  case sen_enc_euc_jp :
    if (*p & 0x80) {
      if ((p + 1) < (unsigned char *)end) {
        return 2;
      } else {
        /* This is invalid character */
        SEN_LOG(sen_log_warning, "invalid euc-jp string end on sen_str_charlen_nonnull");
        return 0;
      }
    }
    return 1;
    break;
  case sen_enc_utf8 :
    return sen_str_charlen_utf8(p, (unsigned char *)end);
    break;
  case sen_enc_sjis :
    if (*p & 0x80) {
      if (0xa1 <= *p && *p <= 0xdf) {
        /* hankaku-kana */
        return 1;
      } else if (++p >= (unsigned char *)end) {
        /* This is invalid character */
        SEN_LOG(sen_log_warning, "invalid sjis string end on sen_str_charlen_nonnull");
        return 0;
      } else {
        return 2;
      }
    } else {
      return 1;
    }
    break;
  default :
    return 1;
    break;
  }
  return 0;
}

sen_rc
sen_str_fin(void)
{
  if (prefix) { sen_set_close(prefix); }
  if (suffix) { sen_set_close(suffix); }
  return sen_success;
}

int
sen_str_get_prefix_order(const char *str)
{
  int *ip;
  if (!prefix) { prefix_init(); }
  if (sen_set_at(prefix, str, (void **)&ip)) {
    return *ip;
  } else {
    return -1;
  }
}

#include <stdarg.h>
#include <time.h>

/*
static void
default_logger_func(int level, const char *time,
                    const char *title, const char *msg, const char *location)
{
  fprintf(stderr, "%s|%s %s %s\n", time, location, msg, note);
}
*/

static void
default_logger_func(int level, const char *time, const char *title,
                    const char *msg, const char *location, void *func_arg)
{
  static FILE *fp = NULL;
  const char slev[] = " EACewnid-";
  if (!fp) {
    // mutex_lock
    fp = fopen(SENNA_LOG_PATH, "a");
    // mutex_unlock
  }
  if (fp) {
    fprintf(fp, "%s|%c|%s %s %s\n", time, *(slev + level), title, msg, location);
    fflush(fp);
  }
}

static sen_logger_info default_logger = {
  SEN_LOG_DEFAULT_LEVEL,
  SEN_LOG_TIME|SEN_LOG_MESSAGE,
  default_logger_func
};

static const sen_logger_info *sen_logger = &default_logger;

sen_rc
sen_logger_info_set(const sen_logger_info *info)
{
  if (info) {
    sen_logger = info;
  } else {
    sen_logger = &default_logger;
  }
  return sen_success;
}

int
sen_logger_pass(sen_log_level level)
{
  return level <= sen_logger->max_level;
}

#define TBUFSIZE 0x20
#define MBUFSIZE 0x1000
#define LBUFSIZE 0x400

void
sen_logger_put(sen_log_level level,
               const char *file, int line, const char *func, char *fmt, ...)
{
  if (level <= sen_logger->max_level) {
    char tbuf[TBUFSIZE];
    char mbuf[MBUFSIZE];
    char lbuf[LBUFSIZE];
    if (sen_logger->flags & SEN_LOG_TIME) {
#ifdef WIN32
      struct tm *ltm;
      time_t t;
      struct _timeb tb;
      time(&t);
      ltm = localtime(&t);
      _ftime(&tb);
      snprintf(tbuf, TBUFSIZE - 1, "%02d/%02d:%02d:%02d:%02d.%03d",
               ltm->tm_mon + 1, ltm->tm_mday,
               ltm->tm_hour, ltm->tm_min, ltm->tm_sec, tb.millitm);
      tbuf[TBUFSIZE - 1] = '\0';
#else /* WIN32 */
      struct timeval tv;
      struct tm tm;
      gettimeofday(&tv, NULL);
      localtime_r((time_t *)&tv.tv_sec, &tm);
      snprintf(tbuf, TBUFSIZE - 1, "%02d/%02d:%02d:%02d:%02d.%06d",
               tm.tm_mon + 1, tm.tm_mday,
               tm.tm_hour, tm.tm_min, tm.tm_sec, (int) tv.tv_usec);
      tbuf[TBUFSIZE - 1] = '\0';
#endif /* WIN32 */
    } else {
      tbuf[0] = '\0';
    }
    if (sen_logger->flags & SEN_LOG_MESSAGE) {
      va_list argp;
      va_start(argp, fmt);
      vsnprintf(mbuf, MBUFSIZE - 1, fmt, argp);
      va_end(argp);
      mbuf[MBUFSIZE - 1] = '\0';
    } else {
      mbuf[0] = '\0';
    }
    if (sen_logger->flags & SEN_LOG_LOCATION) {
      snprintf(lbuf, LBUFSIZE - 1, "%04x %s:%d %s()",
               getpid(), file, line, func);
      lbuf[LBUFSIZE - 1] = '\0';
    } else {
      lbuf[0] = '\0';
    }
    if (sen_logger->func) {
      sen_logger->func(level, tbuf, "", mbuf, lbuf, sen_logger->func_arg);
    } else {
      default_logger_func(level, tbuf, "", mbuf, lbuf, sen_logger->func_arg);
    }
  }
}

// static int alloc_size = 0;
static int alloc_count = 0;

void *
sen_malloc(size_t size, const char* file, int line)
{
  void *res;
  res = malloc(size);
  if (res) { alloc_count++; }
  if (!res) { SEN_LOG(sen_log_alert, "malloc fail (%d)=%p (%s:%d) <%d>", size, res, file, line, alloc_count); }
  return res;
}

void *
sen_calloc(size_t size, const char* file, int line)
{
  void *res;
  res = calloc(size, 1);
  if (res) { alloc_count++; }
  if (!res) { SEN_LOG(sen_log_alert, "calloc fail (%d)=%p (%s:%d) <%d>", size, res, file, line, alloc_count); }
  return res;
}

void
sen_free(void *ptr, const char* file, int line)
{
  free(ptr);
  if (ptr) { alloc_count--; }
  if (!ptr) { SEN_LOG(sen_log_alert, "free fail (%p) (%s:%d) <%d>", ptr, file, line, alloc_count); }
}

void *
sen_realloc(void *ptr, size_t size, const char* file, int line)
{
  void *res;
  res = realloc(ptr, size);
  if (res) { alloc_count++; }
  if (!res) { SEN_LOG(sen_log_alert, "realloc fail (%p,%d)=%p (%s:%d) <%d>", ptr, size, res, file, line, alloc_count); }
  return res;
}

char *
sen_strdup(const char *s, const char* file, int line)
{
  char *res;
  res = strdup(s);
  if (res) { alloc_count++; }
  if (!res) { SEN_LOG(sen_log_alert, "strdup(%p)=%p (%s:%d) <%d>", s, res, file, line, alloc_count); }
  return res;
}

void
sen_assert(int cond, const char* file, int line, const char* func)
{
  if (!cond) {
    SEN_LOG(sen_log_error, "ASSERT fail on %s %s:%d", func, file, line);
  }
}

static unsigned char symbol[] = {
  ',', '.', 0, ':', ';', '?', '!', 0, 0, 0, '`', 0, '^', '~', '_', 0, 0, 0,
  0, 0, 0, 0, 0, 0, 0, '-', '-', '/', '\\', 0, 0, '|', 0, 0, 0, '\'', 0,
  '"', '(', ')', 0, 0, '[', ']', '{', '}', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  '+', '-', 0, 0, 0, '=', 0, '<', '>', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  '$', 0, 0, '%', '#', '&', '*', '@', 0, 0, 0, 0, 0, 0, 0, 0
};

inline sen_rc
normalize_euc(sen_nstr *nstr)
{
  static uint16_t hankana[] = {
    0xa1a1, 0xa1a3, 0xa1d6, 0xa1d7, 0xa1a2, 0xa1a6, 0xa5f2, 0xa5a1, 0xa5a3,
    0xa5a5, 0xa5a7, 0xa5a9, 0xa5e3, 0xa5e5, 0xa5e7, 0xa5c3, 0xa1bc, 0xa5a2,
    0xa5a4, 0xa5a6, 0xa5a8, 0xa5aa, 0xa5ab, 0xa5ad, 0xa5af, 0xa5b1, 0xa5b3,
    0xa5b5, 0xa5b7, 0xa5b9, 0xa5bb, 0xa5bd, 0xa5bf, 0xa5c1, 0xa5c4, 0xa5c6,
    0xa5c8, 0xa5ca, 0xa5cb, 0xa5cc, 0xa5cd, 0xa5ce, 0xa5cf, 0xa5d2, 0xa5d5,
    0xa5d8, 0xa5db, 0xa5de, 0xa5df, 0xa5e0, 0xa5e1, 0xa5e2, 0xa5e4, 0xa5e6,
    0xa5e8, 0xa5e9, 0xa5ea, 0xa5eb, 0xa5ec, 0xa5ed, 0xa5ef, 0xa5f3, 0xa1ab,
    0xa1eb
  };
  static unsigned char dakuten[] = {
    0xf4, 0, 0, 0, 0, 0xac, 0, 0xae, 0, 0xb0, 0, 0xb2, 0, 0xb4, 0, 0xb6, 0,
    0xb8, 0, 0xba, 0, 0xbc, 0, 0xbe, 0, 0xc0, 0, 0xc2, 0, 0, 0xc5, 0, 0xc7,
    0, 0xc9, 0, 0, 0, 0, 0, 0, 0xd0, 0, 0, 0xd3, 0, 0, 0xd6, 0, 0, 0xd9, 0,
    0, 0xdc
  };
  static unsigned char handaku[] = {
    0xd1, 0, 0, 0xd4, 0, 0, 0xd7, 0, 0, 0xda, 0, 0, 0xdd
  };
  int_least8_t *ch;
  const unsigned char *s, *s_, *e;
  unsigned char *d, *d0, *d_, b;
  uint_least8_t *cp, *ctypes, ctype;
  size_t size = nstr->orig_blen, length = 0;
  int removeblankp = nstr->flags & SEN_STR_REMOVEBLANK;
  if (!(nstr->norm = SEN_MALLOC(size * 2 + 1))) {
    return sen_memory_exhausted;
  }
  d0 = (unsigned char *) nstr->norm;
  if (nstr->flags & SEN_STR_WITH_CHECKS) {
    if (!(nstr->checks = SEN_MALLOC(size * 2 + 1))) {
      SEN_FREE(nstr->norm);
      return sen_memory_exhausted;
    }
  }
  ch = nstr->checks;
  if (nstr->flags & SEN_STR_WITH_CTYPES) {
    if (!(nstr->ctypes = SEN_MALLOC(size + 1))) {
      SEN_FREE(nstr->checks);
      SEN_FREE(nstr->norm);
      return sen_memory_exhausted;
    }
  }
  cp = ctypes = nstr->ctypes;
  e = (unsigned char *)nstr->orig + size;
  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
    if ((*s & 0x80)) {
      if (((s + 1) < e) && (*(s + 1) & 0x80)) {
        unsigned char c1 = *s++, c2 = *s, c3 = 0;
        switch (c1 >> 4) {
        case 0x08 :
          if (c1 == 0x8e && 0xa0 <= c2 && c2 <= 0xdf) {
            uint16_t c = hankana[c2 - 0xa0];
            switch (c) {
            case 0xa1ab :
              if (d > d0 + 1 && d[-2] == 0xa5
                  && 0xa6 <= d[-1] && d[-1] <= 0xdb && (b = dakuten[d[-1] - 0xa6])) {
                *(d - 1) = b;
                if (ch) { ch[-1] += 2; s_ += 2; }
                continue;
              } else {
                *d++ = c >> 8; *d = c & 0xff;
              }
              break;
            case 0xa1eb :
              if (d > d0 + 1 && d[-2] == 0xa5
                  && 0xcf <= d[-1] && d[-1] <= 0xdb && (b = handaku[d[-1] - 0xcf])) {
                *(d - 1) = b;
                if (ch) { ch[-1] += 2; s_ += 2; }
                continue;
              } else {
                *d++ = c >> 8; *d = c & 0xff;
              }
              break;
            default :
              *d++ = c >> 8; *d = c & 0xff;
              break;
            }
            ctype = sen_str_katakana;
          } else {
            *d++ = c1; *d = c2;
            ctype = sen_str_others;
          }
          break;
        case 0x09 :
          *d++ = c1; *d = c2;
          ctype = sen_str_others;
          break;
        case 0x0a :
          switch (c1 & 0x0f) {
          case 1 :
            switch (c2) {
            case 0xbc :
              *d++ = c1; *d = c2;
              ctype = sen_str_katakana;
              break;
            case 0xb9 :
              *d++ = c1; *d = c2;
              ctype = sen_str_kanji;
              break;
            case 0xa1 :
              if (removeblankp) {
                if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
                continue;
              } else {
                *d = ' ';
                ctype = SEN_NSTR_BLANK|sen_str_symbol;
              }
              break;
            default :
              if (c2 >= 0xa4 && (c3 = symbol[c2 - 0xa4])) {
                *d = c3;
                ctype = sen_str_symbol;
              } else {
                *d++ = c1; *d = c2;
                ctype = sen_str_others;
              }
              break;
            }
            break;
          case 2 :
            *d++ = c1; *d = c2;
            ctype = sen_str_symbol;
            break;
          case 3 :
            c3 = c2 - 0x80;
            if ('a' <= c3 && c3 <= 'z') {
              ctype = sen_str_alpha;
              *d = c3;
            } else if ('A' <= c3 && c3 <= 'Z') {
              ctype = sen_str_alpha;
              *d = c3 + 0x20;
            } else if ('0' <= c3 && c3 <= '9') {
              ctype = sen_str_digit;
              *d = c3;
            } else {
              ctype = sen_str_others;
              *d++ = c1; *d = c2;
            }
            break;
          case 4 :
            *d++ = c1; *d = c2;
            ctype = sen_str_hiragana;
            break;
          case 5 :
            *d++ = c1; *d = c2;
            ctype = sen_str_katakana;
            break;
          case 6 :
          case 7 :
          case 8 :
            *d++ = c1; *d = c2;
            ctype = sen_str_symbol;
            break;
          default :
            *d++ = c1; *d = c2;
            ctype = sen_str_others;
            break;
          }
          break;
        default :
          *d++ = c1; *d = c2;
          ctype = sen_str_kanji;
          break;
        }
      } else {
        /* skip invalid character */
        continue;
      }
    } else {
      unsigned char c = *s;
      switch (c >> 4) {
      case 0 :
      case 1 :
        /* skip unprintable ascii */
        if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
        continue;
      case 2 :
        if (c == 0x20) {
          if (removeblankp) {
            if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
            continue;
          } else {
            *d = ' ';
            ctype = SEN_NSTR_BLANK|sen_str_symbol;
          }
        } else {
          *d = c;
          ctype = sen_str_symbol;
        }
        break;
      case 3 :
        *d = c;
        ctype = (c <= 0x39) ? sen_str_digit : sen_str_symbol;
        break;
      case 4 :
        *d = ('A' <= c) ? c + 0x20 : c;
        ctype = (c == 0x40) ? sen_str_symbol : sen_str_alpha;
        break;
      case 5 :
        *d = (c <= 'Z') ? c + 0x20 : c;
        ctype = (c <= 0x5a) ? sen_str_alpha : sen_str_symbol;
        break;
      case 6 :
        *d = c;
        ctype = (c == 0x60) ? sen_str_symbol : sen_str_alpha;
        break;
      case 7 :
        *d = c;
        ctype = (c <= 0x7a) ? sen_str_alpha : (c == 0x7f ? sen_str_others : sen_str_symbol);
        break;
      default :
        *d = c;
        ctype = sen_str_others;
        break;
      }
    }
    d++;
    length++;
    if (cp) { *cp++ = ctype; }
    if (ch) {
      *ch++ = (int_least8_t)(s + 1 - s_);
      s_ = s + 1;
      while (++d_ < d) { *ch++ = 0; }
    }
  }
  if (cp) { *cp = sen_str_null; }
  *d = '\0';
  nstr->length = length;
  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
  return sen_success;
}

#ifndef NO_NFKC
uint_least8_t sen_nfkc_ctype(const unsigned char *str);
const char *sen_nfkc_map1(const unsigned char *str);
const char *sen_nfkc_map2(const unsigned char *prefix, const unsigned char *suffix);

inline static sen_rc
normalize_utf8(sen_nstr *nstr)
{
  int_least8_t *ch;
  const unsigned char *s, *s_, *p, *p2, *pe, *e;
  unsigned char *d, *d0, *d_;
  uint_least8_t *cp, *ctypes;
  size_t length = 0, ls, ls_ = 0, lp, size = nstr->orig_blen;
  int removeblankp = nstr->flags & SEN_STR_REMOVEBLANK;
  if (!(nstr->norm = SEN_MALLOC(size * 5 + 1))) { /* todo: realloc unless enough */
    return sen_memory_exhausted;
  }
  d0 = (unsigned char *) nstr->norm;
  if (nstr->flags & SEN_STR_WITH_CHECKS) {
    if (!(nstr->checks = SEN_MALLOC(size * 5 + 1))) { /* todo: realloc unless enough */
      SEN_FREE(nstr->norm);
      return sen_memory_exhausted;
    }
  }
  ch = nstr->checks;
  if (nstr->flags & SEN_STR_WITH_CTYPES) {
    if (!(nstr->ctypes = SEN_MALLOC(size * 3 + 1))) { /* todo: realloc unless enough */
      SEN_FREE(nstr->checks);
      SEN_FREE(nstr->norm);
      return sen_memory_exhausted;
    }
  }
  cp = ctypes = nstr->ctypes;
  e = (unsigned char *)nstr->orig + size;
  for (s = s_ = (unsigned char *) nstr->orig, d = d0, d_ = NULL; ; s += ls) {
    if (!(ls = sen_str_charlen_utf8(s, e))) {
      break;
    }
    if ((p = (unsigned char *)sen_nfkc_map1(s))) {
      pe = p + strlen((char *)p);
    } else {
      p = s;
      pe = p + ls;
    }
    if (d_ && (p2 = (unsigned char *)sen_nfkc_map2(d_, p))) {
      p = p2;
      pe = p + strlen((char *)p);
      if (cp) { cp--; }
      if (ch) {
        ch -= (d - d_);
        s_ -= ls_;
      }
      d = d_;
      length--;
    }
    for (; ; p += lp) {
      if (!(lp = sen_str_charlen_utf8(p, pe))) {
        break;
      }
      if ((*p == ' ' && removeblankp)
          || *p < 0x20  /* skip unprintable ascii */ ) {
        if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
      } else {
        memcpy(d, p, lp);
        d_ = d;
        d += lp;
        length++;
        if (cp) { *cp++ = sen_nfkc_ctype(p); }
        if (ch) {
          size_t i;
          if (s_ == s + ls) {
            *ch++ = -1;
          } else {
            *ch++ = (int_least8_t)(s + ls - s_);
            s_ = s + ls;
          }
          for (i = lp; i > 1; i--) { *ch++ = 0; }
        }
      }
    }
    ls_ = ls;
  }
  if (cp) { *cp = sen_str_null; }
  *d = '\0';
  nstr->length = length;
  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
  return sen_success;
}
#endif /* NO_NFKC */

inline static sen_rc
normalize_sjis(sen_nstr *nstr)
{
  static uint16_t hankana[] = {
    0x8140, 0x8142, 0x8175, 0x8176, 0x8141, 0x8145, 0x8392, 0x8340, 0x8342,
    0x8344, 0x8346, 0x8348, 0x8383, 0x8385, 0x8387, 0x8362, 0x815b, 0x8341,
    0x8343, 0x8345, 0x8347, 0x8349, 0x834a, 0x834c, 0x834e, 0x8350, 0x8352,
    0x8354, 0x8356, 0x8358, 0x835a, 0x835c, 0x835e, 0x8360, 0x8363, 0x8365,
    0x8367, 0x8369, 0x836a, 0x836b, 0x836c, 0x836d, 0x836e, 0x8371, 0x8374,
    0x8377, 0x837a, 0x837d, 0x837e, 0x8380, 0x8381, 0x8382, 0x8384, 0x8386,
    0x8388, 0x8389, 0x838a, 0x838b, 0x838c, 0x838d, 0x838f, 0x8393, 0x814a,
    0x814b
  };
  static unsigned char dakuten[] = {
    0x94, 0, 0, 0, 0, 0x4b, 0, 0x4d, 0, 0x4f, 0, 0x51, 0, 0x53, 0, 0x55, 0,
    0x57, 0, 0x59, 0, 0x5b, 0, 0x5d, 0, 0x5f, 0, 0x61, 0, 0, 0x64, 0, 0x66,
    0, 0x68, 0, 0, 0, 0, 0, 0, 0x6f, 0, 0, 0x72, 0, 0, 0x75, 0, 0, 0x78, 0,
    0, 0x7b
  };
  static unsigned char handaku[] = {
    0x70, 0, 0, 0x73, 0, 0, 0x76, 0, 0, 0x79, 0, 0, 0x7c
  };
  int_least8_t *ch;
  const unsigned char *s, *s_;
  unsigned char *d, *d0, *d_, b, *e;
  uint_least8_t *cp, *ctypes, ctype;
  size_t size = nstr->orig_blen, length = 0;
  int removeblankp = nstr->flags & SEN_STR_REMOVEBLANK;
  if (!(nstr->norm = SEN_MALLOC(size * 2 + 1))) {
    return sen_memory_exhausted;
  }
  d0 = (unsigned char *) nstr->norm;
  if (nstr->flags & SEN_STR_WITH_CHECKS) {
    if (!(nstr->checks = SEN_MALLOC(size * 2 + 1))) {
      SEN_FREE(nstr->norm);
      return sen_memory_exhausted;
    }
  }
  ch = nstr->checks;
  if (nstr->flags & SEN_STR_WITH_CTYPES) {
    if (!(nstr->ctypes = SEN_MALLOC(size + 1))) {
      SEN_FREE(nstr->checks);
      SEN_FREE(nstr->norm);
      return sen_memory_exhausted;
    }
  }
  cp = ctypes = nstr->ctypes;
  e = (unsigned char *)nstr->orig + size;
  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
    if ((*s & 0x80)) {
      if (0xa0 <= *s && *s <= 0xdf) {
        uint16_t c = hankana[*s - 0xa0];
        switch (c) {
        case 0x814a :
          if (d > d0 + 1 && d[-2] == 0x83
              && 0x45 <= d[-1] && d[-1] <= 0x7a && (b = dakuten[d[-1] - 0x45])) {
            *(d - 1) = b;
            if (ch) { ch[-1]++; s_++; }
            continue;
          } else {
            *d++ = c >> 8; *d = c & 0xff;
          }
          break;
        case 0x814b :
          if (d > d0 + 1 && d[-2] == 0x83
              && 0x6e <= d[-1] && d[-1] <= 0x7a && (b = handaku[d[-1] - 0x6e])) {
            *(d - 1) = b;
            if (ch) { ch[-1]++; s_++; }
            continue;
          } else {
            *d++ = c >> 8; *d = c & 0xff;
          }
          break;
        default :
          *d++ = c >> 8; *d = c & 0xff;
          break;
        }
        ctype = sen_str_katakana;
      } else {
        if ((s + 1) < e && 0x40 <= *(s + 1) && *(s + 1) <= 0xfc) {
          unsigned char c1 = *s++, c2 = *s, c3 = 0;
          if (0x81 <= c1 && c1 <= 0x87) {
            switch (c1 & 0x0f) {
            case 1 :
              switch (c2) {
              case 0x5b :
                *d++ = c1; *d = c2;
                ctype = sen_str_katakana;
                break;
              case 0x58 :
                *d++ = c1; *d = c2;
                ctype = sen_str_kanji;
                break;
              case 0x40 :
                if (removeblankp) {
                  if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
                  continue;
                } else {
                  *d = ' ';
                  ctype = SEN_NSTR_BLANK|sen_str_symbol;
                }
                break;
              default :
                if (0x43 <= c2 && c2 <= 0x7e && (c3 = symbol[c2 - 0x43])) {
                  *d = c3;
                  ctype = sen_str_symbol;
                } else if (0x7f <= c2 && c2 <= 0x97 && (c3 = symbol[c2 - 0x44])) {
                  *d = c3;
                  ctype = sen_str_symbol;
                } else {
                  *d++ = c1; *d = c2;
                  ctype = sen_str_others;
                }
                break;
              }
              break;
            case 2 :
              c3 = c2 - 0x1f;
              if (0x4f <= c2 && c2 <= 0x58) {
                ctype = sen_str_digit;
                *d = c2 - 0x1f;
              } else if (0x60 <= c2 && c2 <= 0x79) {
                ctype = sen_str_alpha;
                *d = c2 + 0x01;
              } else if (0x81 <= c2 && c2 <= 0x9a) {
                ctype = sen_str_alpha;
                *d = c2 - 0x20;
              } else if (0x9f <= c2 && c2 <= 0xf1) {
                *d++ = c1; *d = c2;
                ctype = sen_str_hiragana;
              } else {
                *d++ = c1; *d = c2;
                ctype = sen_str_others;
              }
              break;
            case 3 :
              if (0x40 <= c2 && c2 <= 0x96) {
                *d++ = c1; *d = c2;
                ctype = sen_str_katakana;
              } else {
                *d++ = c1; *d = c2;
                ctype = sen_str_symbol;
              }
              break;
            case 4 :
            case 7 :
              *d++ = c1; *d = c2;
              ctype = sen_str_symbol;
              break;
            default :
              *d++ = c1; *d = c2;
              ctype = sen_str_others;
              break;
            }
          } else {
            *d++ = c1; *d = c2;
            ctype = sen_str_kanji;
          }
        } else {
          /* skip invalid character */
          continue;
        }
      }
    } else {
      unsigned char c = *s;
      switch (c >> 4) {
      case 0 :
      case 1 :
        /* skip unprintable ascii */
        if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
        continue;
      case 2 :
        if (c == 0x20) {
          if (removeblankp) {
            if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
            continue;
          } else {
            *d = ' ';
            ctype = SEN_NSTR_BLANK|sen_str_symbol;
          }
        } else {
          *d = c;
          ctype = sen_str_symbol;
        }
        break;
      case 3 :
        *d = c;
        ctype = (c <= 0x39) ? sen_str_digit : sen_str_symbol;
        break;
      case 4 :
        *d = ('A' <= c) ? c + 0x20 : c;
        ctype = (c == 0x40) ? sen_str_symbol : sen_str_alpha;
        break;
      case 5 :
        *d = (c <= 'Z') ? c + 0x20 : c;
        ctype = (c <= 0x5a) ? sen_str_alpha : sen_str_symbol;
        break;
      case 6 :
        *d = c;
        ctype = (c == 0x60) ? sen_str_symbol : sen_str_alpha;
        break;
      case 7 :
        *d = c;
        ctype = (c <= 0x7a) ? sen_str_alpha : (c == 0x7f ? sen_str_others : sen_str_symbol);
        break;
      default :
        *d = c;
        ctype = sen_str_others;
        break;
      }
    }
    d++;
    length++;
    if (cp) { *cp++ = ctype; }
    if (ch) {
      *ch++ = (int_least8_t)(s + 1 - s_);
      s_ = s + 1;
      while (++d_ < d) { *ch++ = 0; }
    }
  }
  if (cp) { *cp = sen_str_null; }
  *d = '\0';
  nstr->length = length;
  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
  return sen_success;
}

inline static sen_rc
normalize_none(sen_nstr *nstr)
{
  int_least8_t *ch;
  const unsigned char *s, *s_, *e;
  unsigned char *d, *d0, *d_;
  uint_least8_t *cp, *ctypes, ctype;
  size_t size = nstr->orig_blen, length = 0;
  int removeblankp = nstr->flags & SEN_STR_REMOVEBLANK;
  if (!(nstr->norm = SEN_MALLOC(size + 1))) {
    return sen_memory_exhausted;
  }
  d0 = (unsigned char *) nstr->norm;
  if (nstr->flags & SEN_STR_WITH_CHECKS) {
    if (!(nstr->checks = SEN_MALLOC(size + 1))) {
      SEN_FREE(nstr->norm);
      return sen_memory_exhausted;
    }
  }
  ch = nstr->checks;
  if (nstr->flags & SEN_STR_WITH_CTYPES) {
    if (!(nstr->ctypes = SEN_MALLOC(size + 1))) {
      SEN_FREE(nstr->checks);
      SEN_FREE(nstr->norm);
      return sen_memory_exhausted;
    }
  }
  cp = ctypes = nstr->ctypes;
  e = (unsigned char *)nstr->orig + size;
  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
    unsigned char c = *s;
    switch (c >> 4) {
    case 0 :
    case 1 :
      /* skip unprintable ascii */
      if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
      continue;
    case 2 :
      if (c == 0x20) {
        if (removeblankp) {
          if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
          continue;
        } else {
          *d = ' ';
          ctype = SEN_NSTR_BLANK|sen_str_symbol;
        }
      } else {
        *d = c;
        ctype = sen_str_symbol;
      }
      break;
    case 3 :
      *d = c;
      ctype = (c <= 0x39) ? sen_str_digit : sen_str_symbol;
      break;
    case 4 :
      *d = ('A' <= c) ? c + 0x20 : c;
      ctype = (c == 0x40) ? sen_str_symbol : sen_str_alpha;
      break;
    case 5 :
      *d = (c <= 'Z') ? c + 0x20 : c;
      ctype = (c <= 0x5a) ? sen_str_alpha : sen_str_symbol;
      break;
    case 6 :
      *d = c;
      ctype = (c == 0x60) ? sen_str_symbol : sen_str_alpha;
      break;
    case 7 :
      *d = c;
      ctype = (c <= 0x7a) ? sen_str_alpha : (c == 0x7f ? sen_str_others : sen_str_symbol);
      break;
    default :
      *d = c;
      ctype = sen_str_others;
      break;
    }
    d++;
    length++;
    if (cp) { *cp++ = ctype; }
    if (ch) {
      *ch++ = (int_least8_t)(s + 1 - s_);
      s_ = s + 1;
      while (++d_ < d) { *ch++ = 0; }
    }
  }
  if (cp) { *cp = sen_str_null; }
  *d = '\0';
  nstr->length = length;
  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
  return sen_success;
}

/* use cp1252 as latin1 */
inline static sen_rc
normalize_latin1(sen_nstr *nstr)
{
  int_least8_t *ch;
  const unsigned char *s, *s_, *e;
  unsigned char *d, *d0, *d_;
  uint_least8_t *cp, *ctypes, ctype;
  size_t size = strlen(nstr->orig), length = 0;
  int removeblankp = nstr->flags & SEN_STR_REMOVEBLANK;
  if (!(nstr->norm = SEN_MALLOC(size + 1))) {
    return sen_memory_exhausted;
  }
  d0 = (unsigned char *) nstr->norm;
  if (nstr->flags & SEN_STR_WITH_CHECKS) {
    if (!(nstr->checks = SEN_MALLOC(size + 1))) {
      SEN_FREE(nstr->norm);
      return sen_memory_exhausted;
    }
  }
  ch = nstr->checks;
  if (nstr->flags & SEN_STR_WITH_CTYPES) {
    if (!(nstr->ctypes = SEN_MALLOC(size + 1))) {
      SEN_FREE(nstr->checks);
      SEN_FREE(nstr->norm);
      return sen_memory_exhausted;
    }
  }
  cp = ctypes = nstr->ctypes;
  e = (unsigned char *)nstr->orig + size;
  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
    unsigned char c = *s;
    switch (c >> 4) {
    case 0 :
    case 1 :
      /* skip unprintable ascii */
      if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
      continue;
    case 2 :
      if (c == 0x20) {
        if (removeblankp) {
          if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
          continue;
        } else {
          *d = ' ';
          ctype = SEN_NSTR_BLANK|sen_str_symbol;
        }
      } else {
        *d = c;
        ctype = sen_str_symbol;
      }
      break;
    case 3 :
      *d = c;
      ctype = (c <= 0x39) ? sen_str_digit : sen_str_symbol;
      break;
    case 4 :
      *d = ('A' <= c) ? c + 0x20 : c;
      ctype = (c == 0x40) ? sen_str_symbol : sen_str_alpha;
      break;
    case 5 :
      *d = (c <= 'Z') ? c + 0x20 : c;
      ctype = (c <= 0x5a) ? sen_str_alpha : sen_str_symbol;
      break;
    case 6 :
      *d = c;
      ctype = (c == 0x60) ? sen_str_symbol : sen_str_alpha;
      break;
    case 7 :
      *d = c;
      ctype = (c <= 0x7a) ? sen_str_alpha : (c == 0x7f ? sen_str_others : sen_str_symbol);
      break;
    case 8 :
      if (c == 0x8a || c == 0x8c || c == 0x8e) {
        *d = c + 0x10;
        ctype = sen_str_alpha;
      } else {
        *d = c;
        ctype = sen_str_symbol;
      }
      break;
    case 9 :
      if (c == 0x9a || c == 0x9c || c == 0x9e || c == 0x9f) {
        *d = (c == 0x9f) ? c + 0x60 : c;
        ctype = sen_str_alpha;
      } else {
        *d = c;
        ctype = sen_str_symbol;
      }
      break;
    case 0x0c :
      *d = c + 0x20;
      ctype = sen_str_alpha;
      break;
    case 0x0d :
      *d = (c == 0xd7 || c == 0xdf) ? c : c + 0x20;
      ctype = (c == 0xd7) ? sen_str_symbol : sen_str_alpha;
      break;
    case 0x0e :
      *d = c;
      ctype = sen_str_alpha;
      break;
    case 0x0f :
      *d = c;
      ctype = (c == 0xf7) ? sen_str_symbol : sen_str_alpha;
      break;
    default :
      *d = c;
      ctype = sen_str_others;
      break;
    }
    d++;
    length++;
    if (cp) { *cp++ = ctype; }
    if (ch) {
      *ch++ = (int_least8_t)(s + 1 - s_);
      s_ = s + 1;
      while (++d_ < d) { *ch++ = 0; }
    }
  }
  if (cp) { *cp = sen_str_null; }
  *d = '\0';
  nstr->length = length;
  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
  return sen_success;
}

inline static sen_rc
normalize_koi8r(sen_nstr *nstr)
{
  int_least8_t *ch;
  const unsigned char *s, *s_, *e;
  unsigned char *d, *d0, *d_;
  uint_least8_t *cp, *ctypes, ctype;
  size_t size = strlen(nstr->orig), length = 0;
  int removeblankp = nstr->flags & SEN_STR_REMOVEBLANK;
  if (!(nstr->norm = SEN_MALLOC(size + 1))) {
    return sen_memory_exhausted;
  }
  d0 = (unsigned char *) nstr->norm;
  if (nstr->flags & SEN_STR_WITH_CHECKS) {
    if (!(nstr->checks = SEN_MALLOC(size + 1))) {
      SEN_FREE(nstr->norm);
      return sen_memory_exhausted;
    }
  }
  ch = nstr->checks;
  if (nstr->flags & SEN_STR_WITH_CTYPES) {
    if (!(nstr->ctypes = SEN_MALLOC(size + 1))) {
      SEN_FREE(nstr->checks);
      SEN_FREE(nstr->norm);
      return sen_memory_exhausted;
    }
  }
  cp = ctypes = nstr->ctypes;
  e = (unsigned char *)nstr->orig + size;
  for (s = s_ = (unsigned char *) nstr->orig, d = d_ = d0; s < e; s++) {
    unsigned char c = *s;
    switch (c >> 4) {
    case 0 :
    case 1 :
      /* skip unprintable ascii */
      if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
      continue;
    case 2 :
      if (c == 0x20) {
        if (removeblankp) {
          if (cp > ctypes) { *(cp - 1) |= SEN_NSTR_BLANK; }
          continue;
        } else {
          *d = ' ';
          ctype = SEN_NSTR_BLANK|sen_str_symbol;
        }
      } else {
        *d = c;
        ctype = sen_str_symbol;
      }
      break;
    case 3 :
      *d = c;
      ctype = (c <= 0x39) ? sen_str_digit : sen_str_symbol;
      break;
    case 4 :
      *d = ('A' <= c) ? c + 0x20 : c;
      ctype = (c == 0x40) ? sen_str_symbol : sen_str_alpha;
      break;
    case 5 :
      *d = (c <= 'Z') ? c + 0x20 : c;
      ctype = (c <= 0x5a) ? sen_str_alpha : sen_str_symbol;
      break;
    case 6 :
      *d = c;
      ctype = (c == 0x60) ? sen_str_symbol : sen_str_alpha;
      break;
    case 7 :
      *d = c;
      ctype = (c <= 0x7a) ? sen_str_alpha : (c == 0x7f ? sen_str_others : sen_str_symbol);
      break;
    case 0x0a :
      *d = c;
      ctype = (c == 0xa3) ? sen_str_alpha : sen_str_others;
      break;
    case 0x0b :
      if (c == 0xb3) {
        *d = c - 0x10;
        ctype = sen_str_alpha;
      } else {
        *d = c;
        ctype = sen_str_others;
      }
      break;
    case 0x0c :
    case 0x0d :
      *d = c;
      ctype = sen_str_alpha;
      break;
    case 0x0e :
    case 0x0f :
      *d = c - 0x20;
      ctype = sen_str_alpha;
      break;
    default :
      *d = c;
      ctype = sen_str_others;
      break;
    }
    d++;
    length++;
    if (cp) { *cp++ = ctype; }
    if (ch) {
      *ch++ = (int_least8_t)(s + 1 - s_);
      s_ = s + 1;
      while (++d_ < d) { *ch++ = 0; }
    }
  }
  if (cp) { *cp = sen_str_null; }
  *d = '\0';
  nstr->length = length;
  nstr->norm_blen = (size_t)(d - (unsigned char *)nstr->norm);
  return sen_success;
}

sen_nstr *
sen_nstr_open(const char *str, size_t str_len, sen_encoding encoding, int flags)
{
  sen_rc rc;
  sen_nstr *nstr;
  if (!str || !*str) { return NULL; }
  if (!(nstr = SEN_MALLOC(sizeof(sen_nstr)))) { return NULL; }
  nstr->orig = str;
  nstr->orig_blen = str_len;
  nstr->norm = NULL;
  nstr->norm_blen = 0;
  nstr->checks = NULL;
  nstr->ctypes = NULL;
  nstr->flags = flags;
  switch (encoding) {
  case sen_enc_euc_jp :
    rc = normalize_euc(nstr);
    break;
  case sen_enc_utf8 :
#ifdef NO_NFKC
    rc = normalize_none(nstr);
#else /* NO_NFKC */
    rc = normalize_utf8(nstr);
#endif /* NO_NFKC */
    break;
  case sen_enc_sjis :
    rc = normalize_sjis(nstr);
    break;
  case sen_enc_latin1 :
    rc = normalize_latin1(nstr);
    break;
  case sen_enc_koi8r :
    rc = normalize_koi8r(nstr);
    break;
  default :
    rc = normalize_none(nstr);
    break;
  }
  if (rc) {
    sen_nstr_close(nstr);
    return NULL;
  }
  return nstr;
}

sen_nstr *
sen_fakenstr_open(const char *str, size_t str_len, sen_encoding encoding, int flags)
{
  /* TODO: support SEN_STR_REMOVEBLANK flag and ctypes */
  sen_nstr *nstr;

  if (!(nstr = SEN_MALLOC(sizeof(sen_nstr)))) {
    return NULL;
  }
  if (!(nstr->norm = SEN_MALLOC(str_len + 1))) {
    SEN_LOG(sen_log_alert, "memory allocation for keyword on sen_snip_add_cond failed !");
    SEN_FREE(nstr);
    return NULL;
  }
  nstr->orig = str;
  nstr->orig_blen = str_len;
  memcpy(nstr->norm, str, str_len);
  nstr->norm[str_len] = '\0';
  nstr->norm_blen = str_len;
  nstr->ctypes = NULL;

  if (flags & SEN_STR_WITH_CHECKS) {
    int_least8_t f = 0;
    unsigned char c;
    size_t i;
    if (!(nstr->checks = (int_least8_t *) SEN_MALLOC(sizeof(int_least8_t) * str_len))) {
      SEN_FREE(nstr->norm);
      SEN_FREE(nstr);
      return NULL;
    }
    switch (encoding) {
    case sen_enc_euc_jp:
      for (i = 0; i < str_len; i++) {
        if (!f) {
          c = (unsigned char) str[i];
          f = ((c >= 0xa1U && c <= 0xfeU) || c == 0x8eU ? 2 : (c == 0x8fU ? 3 : 1)
            );
          nstr->checks[i] = f;
        } else {
          nstr->checks[i] = 0;
        }
        f--;
      }
      break;
    case sen_enc_sjis:
      for (i = 0; i < str_len; i++) {
        if (!f) {
          c = (unsigned char) str[i];
          f = (c >= 0x81U && ((c <= 0x9fU) || (c >= 0xe0U && c <= 0xfcU)) ? 2 : 1);
          nstr->checks[i] = f;
        } else {
          nstr->checks[i] = 0;
        }
        f--;
      }
      break;
    case sen_enc_utf8:
      for (i = 0; i < str_len; i++) {
        if (!f) {
          c = (unsigned char) str[i];
          f = (c & 0x80U ? (c & 0x20U ? (c & 0x10U ? 4 : 3)
                           : 2)
               : 1);
          nstr->checks[i] = f;
        } else {
          nstr->checks[i] = 0;
        }
        f--;
      }
      break;
    default:
      for (i = 0; i < str_len; i++) {
        nstr->checks[i] = 1;
      }
      break;
    }
  }
  else {
    nstr->checks = NULL;
  }
  return nstr;
}

sen_rc
sen_nstr_close(sen_nstr *nstr)
{
  if (!nstr) { return sen_invalid_argument; }
  if (nstr->norm) { SEN_FREE(nstr->norm); }
  if (nstr->ctypes) { SEN_FREE(nstr->ctypes); }
  if (nstr->checks) { SEN_FREE(nstr->checks); }
  SEN_FREE(nstr);
  return sen_success;
}

const char *sen_enc_string[] = {
  "default",
  "none",
  "euc_jp",
  "utf8",
  "sjis",
  "latin1",
  "koi8r"
};

sen_encoding
sen_strtoenc(const char *str)
{
  sen_encoding e = sen_enc_euc_jp;
  int i = sizeof(sen_enc_string) / sizeof(sen_enc_string[0]);
  while (i--) {
    if (!strcmp(str, sen_enc_string[i])) {
      e = (sen_encoding)i;
    }
  }
  return e;
}

size_t
sen_str_len(const char *str, sen_encoding encoding, const char **last)
{
  size_t len, tlen;
  const char *p = NULL;
  for (len = 0; ; len++) {
    p = str;
    if (!(tlen = sen_str_charlen(str, encoding))) {
      break;
    }
    str += tlen;
  }
  if (last) { *last = p; }
  return len;
}

int
sen_isspace(const char *str, sen_encoding encoding)
{
  const unsigned char *s = (const unsigned char *) str;
  if (!s) { return 0; }
  switch (s[0]) {
  case ' ' :
  case '\f' :
  case '\n' :
  case '\r' :
  case '\t' :
  case '\v' :
    return 1;
  case 0x81 :
    if (encoding == sen_enc_sjis && s[1] == 0x40) { return 1; }
    break;
  case 0xA1 :
    if (encoding == sen_enc_euc_jp && s[1] == 0xA1) { return 1; }
    break;
  case 0xE3 :
    if (encoding == sen_enc_utf8 && s[1] == 0x80 && s[2] == 0x80) { return 1; }
    break;
  default :
    break;
  }
  return 0;
}

int
sen_atoi(const char *nptr, const char *end, const char **rest)
{
  /* FIXME: INT_MIN is not supported */
  int v = 0, t, n = 0;
  if (nptr < end && *nptr == '-') {
    nptr++;
    n = 1;
  }
  while (nptr < end && *nptr >= '0' && *nptr <= '9') {
    t = v * 10 + (*nptr++ - '0');
    if (t < v) { return 0; }
    v = t;
  }
  if (rest) { *rest = nptr; }
  return n ? -v : v;
}

unsigned int
sen_atoui(const char *nptr, const char *end, const char **rest)
{
  unsigned int v = 0, t;
  while (nptr < end && *nptr >= '0' && *nptr <= '9') {
    t = v * 10 + (*nptr++ - '0');
    if (t < v) { return 0; }
    v = t;
  }
  if (rest) { *rest = nptr; }
  return v;
}

void
sen_str_itoh(unsigned int i, char *p, unsigned int len)
{
  static const char *hex = "0123456789ABCDEF";
  p += len;
  *p-- = '\0';
  while (len--) {
    *p-- = hex[i & 0xf];
    i >>= 4;
  }
}

sen_rc
sen_str_itoa(int i, char *p, char *end, char **rest)
{
  /* FIXME: INT_MIN is not supported */
  char *q;
  if (p >= end) { return sen_invalid_argument; }
  if (i < 0) {
    *p++ = '-';
    i = -i;
  }
  q = p;
  do {
    if (p >= end) { return sen_invalid_argument; }
    *p++ = i % 10 + '0';
  } while ((i /= 10) > 0);
  if (rest) { *rest = p; }
  for (p--; q < p; q++, p--) {
    char t = *q;
    *q = *p;
    *p = t;
  }
  return sen_success;
}

int
sen_str_tok(char *str, const char *delim, char **tokbuf, int buf_size, char **rest)
{
  int n = 0;
  while (n < buf_size) {
    *tokbuf++ = str;
    n++;
    for (;;str++) {
      if (!*str) { goto exit; }
      if (strchr(delim, *str)) { break; }
    }
    *str++ = '\0';
  }
exit :
  if (rest) { *rest = str; }
  return n;
}

#define UNIT_SIZE 4096

sen_rc
sen_rbuf_init(sen_rbuf *buf, int size)
{
  if (!size) {
    buf->head = NULL;
    buf->curr = NULL;
    buf->tail = NULL;
  } else {
    if (size < 0) { size = UNIT_SIZE; }
    if (!(buf->head = SEN_REALLOC(NULL, size))) { return sen_memory_exhausted; }
    buf->curr = buf->head;
    buf->tail = buf->head + size;
  }
  return sen_success;
}

sen_rc
sen_rbuf_expand(sen_rbuf *buf, size_t size)
{
  size_t newsize = (buf->tail - buf->head) + size;
  char *head = SEN_REALLOC(buf->head, newsize);
  if (!head) { return sen_memory_exhausted; }
  buf->curr = head + (buf->curr - buf->head);
  buf->head = head;
  buf->tail = head + newsize;
  return sen_success;
}

sen_rc
sen_rbuf_write(sen_rbuf *buf, const char *str, size_t len)
{
  sen_rc rc = sen_success;
  if (buf->tail < buf->curr + len) {
    size_t inc = (buf->curr + len - buf->tail + UNIT_SIZE - 1) & ~(UNIT_SIZE - 1);
    if ((rc = sen_rbuf_expand(buf, inc))) { return rc; }
  }
  memcpy(buf->curr, str, len);
  buf->curr += len;
  return rc;
}

sen_rc
sen_rbuf_itoa(sen_rbuf *buf, int i)
{
  sen_rc rc = sen_success;
  while (sen_str_itoa(i, buf->curr, buf->tail, &buf->curr)) {
    if ((rc = sen_rbuf_expand(buf, UNIT_SIZE))) { return rc; }
  }
  return rc;
}

sen_rc
sen_rbuf_fin(sen_rbuf *buf)
{
  if (buf->head) { SEN_REALLOC(buf->head, 0); }
  return sen_success;
}

struct _sen_lbuf_node {
  sen_lbuf_node *next;
  size_t size;
  char val[1];
};

sen_rc
sen_lbuf_init(sen_lbuf *buf)
{
  buf->head = NULL;
  buf->tail = &buf->head;
  return sen_success;
}

void *
sen_lbuf_add(sen_lbuf *buf, size_t size)
{
  sen_lbuf_node *node = SEN_MALLOC(size + (size_t)(&((sen_lbuf_node *)0)->val));
  if (!node) { return NULL;  }
  node->next = NULL;
  node->size = size;
  *buf->tail = node;
  buf->tail = &node->next;
  return node->val;
}

sen_rc
sen_lbuf_fin(sen_lbuf *buf)
{
  sen_lbuf_node *cur, *next;
  for (cur = buf->head; cur; cur = next) {
    next = cur->next;
    SEN_FREE(cur);
  }
  return sen_success;
}
