/* Copyright(C) 2004 Brazil

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
#include "senna_in.h"
#include <string.h>
#include <limits.h>
#include "ctx.h"
#include "set.h"

#define INITIAL_N_ENTRIES 256U
#define INITIAL_INDEX_SIZE 256U

#define GARBAGE ((entry *) 1)

#define STEP(x) (((x) >> 2) | 0x1010101)

typedef struct _sen_set_element entry;
typedef struct _sen_set_element_str entry_str;

#define SEN_SET_STRHASH(e) (((entry_str *)(e))->key)

inline static entry *
entry_new(sen_set *set)
{
  entry *e;
  if (set->garbages) {
    e = set->garbages;
    set->garbages = *((entry **)e);
    memset(e, 0, set->entry_size);
  } else {
    SEN_ARRAY_NEXT(&set->a, e);
  }
  return e;
}

sen_rc
sen_set_init(sen_ctx *ctx, sen_set *set,
             uint32_t key_size, uint32_t value_size, uint32_t init_size)
{
  uint32_t entry_size, n, mod;
  for (n = INITIAL_INDEX_SIZE; n < init_size; n *= 2);
  switch (key_size) {
  case 0 :
    entry_size = (intptr_t)(&((entry_str *)0)->dummy) + value_size;
    break;
  case sizeof(uint32_t) :
    entry_size = (intptr_t)(&((entry *)0)->dummy) + value_size;
    break;
  default :
    entry_size = (intptr_t)(&((entry *)0)->dummy) + key_size + value_size;
    break;
  }
  if ((mod = entry_size % sizeof(intptr_t))) {
    entry_size += sizeof(intptr_t) - mod;
  }
  memset(set, 0, sizeof(sen_set));
  set->ctx = ctx;
  set->key_size = key_size;
  set->value_size = value_size;
  set->entry_size = entry_size;
  set->max_offset = n - 1;

  sen_array_init(&set->a, ctx, entry_size, SEN_ARRAY_CLEAR);

  return (set->index = SEN_CALLOC(n * sizeof(entry *))) ? sen_success : sen_memory_exhausted;
}

sen_rc
sen_set_fin(sen_set * set)
{
  uint32_t i;
  sen_ctx *ctx;
  if (!set || !set->index) { return sen_invalid_argument; }
  ctx = set->ctx;
  if (!set->key_size) {
    entry *e, **sp;
    for (i = set->max_offset + 1, sp = set->index; i; i--, sp++) {
      e = *sp;
      if (!e || (e == GARBAGE)) { continue; }
      if (SEN_SET_STRKEY(e)) { SEN_FREE(SEN_SET_STRKEY(e)); }
    }
  }
  /*
  for (i = 0; i <= SEN_SET_MAX_CHUNK; i++) {
    if (set->chunks[i]) { SEN_FREE(set->chunks[i]); }
  }
  */
  sen_array_fin(&set->a);
  SEN_FREE(set->index);
  return sen_success;
}

/* for compatibility */
sen_set *
sen_set_open(uint32_t key_size, uint32_t value_size, uint32_t init_size)
{
  sen_set *set;
  sen_ctx *ctx = &sen_gctx; /* todo : replace it with the local ctx */
  if (!(set = SEN_MALLOC(sizeof(sen_set)))) { return NULL; }
  if (sen_set_init(ctx, set, key_size, value_size, init_size)) {
    SEN_FREE(set);
    return NULL;
  }
  return set;
}

sen_rc
sen_set_close(sen_set *set)
{
  sen_rc rc;
  if (set) {
    sen_ctx *ctx = set->ctx;
    rc = sen_set_fin(set);
    SEN_FREE(set);
  } else {
    rc = sen_invalid_argument;
  }
  return rc;
}

sen_rc
sen_set_reset(sen_set * set, uint32_t ne)
{
  uint32_t i, j, m, n, s;
  entry **index, *e, **sp, **dp;
  sen_ctx *ctx = set->ctx;
  if (!ne) { ne = set->n_entries * 2; }
  if (ne > INT_MAX) { return sen_memory_exhausted; }
  for (n = INITIAL_INDEX_SIZE; n <= ne; n *= 2);
  if (!(index = SEN_CALLOC(n * sizeof(entry *)))) { return sen_memory_exhausted; }
  m = n - 1;
  if (set->key_size) {
    for (j = set->max_offset + 1, sp = set->index; j; j--, sp++) {
      e = *sp;
      if (!e || (e == GARBAGE)) { continue; }
      for (i = e->key, s = STEP(i); ; i += s) {
        dp = index + (i & m);
        if (!*dp) { break; }
      }
      *dp = e;
    }
  } else {
    for (j = set->max_offset + 1, sp = set->index; j; j--, sp++) {
      e = *sp;
      if (!e || (e == GARBAGE)) { continue; }
      for (i = SEN_SET_STRHASH(e), s = STEP(i); ; i += s) {
        dp = index + (i & m);
        if (!*dp) { break; }
      }
      *dp = e;
    }
  }
  {
    entry **i0 = set->index;
    set->index = index;
    set->max_offset = m;
    set->n_garbages = 0;
    SEN_FREE(i0);
  }
  return sen_success;
}

sen_rc
sen_set_info(sen_set *set, unsigned *key_size, unsigned *value_size,
             unsigned *n_entries)
{
  if (!set) { return sen_invalid_argument; }
  if (key_size) { *key_size = set->key_size; }
  if (value_size) { *value_size = set->value_size; }
  if (n_entries) { *n_entries = set->n_entries; }
  return sen_success;
}

inline static uint32_t
str_hash(const unsigned char *p)
{
  uint32_t r;
  for (r = 0; *p; p++) { r = (r * 1021) + *p; }
  return r;
}

inline static uint32_t
bin_hash(const uint8_t *p, uint32_t length)
{
  uint32_t r;
  for (r = 0; length--; p++) { r = (r * 1021) + *p; }
  return r;
}

sen_set_eh *
sen_set_int_at(sen_set *set, const uint32_t *key, void **value)
{
  entry *e, **ep, **index = set->index;
  uint32_t h = *key, i, m = set->max_offset, s = STEP(h);
  for (i = h; ep = index + (i & m), (e = *ep); i += s) {
    if (e == GARBAGE) { continue; }
    if (e->key == h) {
      if (value) { *value = e->dummy; }
      return ep;
    }
  }
  return NULL;
}

sen_set_eh *
sen_set_str_at(sen_set *set, const char *key, void **value)
{
  entry *e, **ep, **index = set->index;
  uint32_t h = str_hash((unsigned char *)key), i, m = set->max_offset, s = STEP(h);
  for (i = h; ep = index + (i & m), (e = *ep); i += s) {
    if (e == GARBAGE) { continue; }
    if (SEN_SET_STRHASH(e) == h && !strcmp(key, SEN_SET_STRKEY(e))) {
      if (value) { *value = SEN_SET_STRVAL(e); }
      return (sen_set_eh *) ep;
    }
  }
  return NULL;
}

sen_set_eh *
sen_set_bin_at(sen_set *set, const void *key, void **value)
{
  entry *e, **ep, **index = set->index;
  uint32_t h = bin_hash(key, set->key_size), i, m = set->max_offset, s = STEP(h);
  for (i = h; ep = index + (i & m), (e = *ep); i += s) {
    if (e == GARBAGE) { continue; }
    if (e->key == h && !memcmp(key, e->dummy, set->key_size)) {
      if (value) { *value = SEN_SET_BINVAL(e, set); }
      return ep;
    }
  }
  return NULL;
}

sen_set_eh *
sen_set_at(sen_set *set, const void *key, void **value)
{
  if (set->arrayp) {
    if (sen_set_reset(set, 0)) { return NULL; }
    // set->curr_entry = 0;
    set->arrayp = 0;
  }
  switch (set->key_size) {
  case 0 :
    return sen_set_str_at(set, key, value);
  case sizeof(uint32_t) :
    return sen_set_int_at(set, key, value);
  default :
    return sen_set_bin_at(set, key, value);
  }
}

inline static sen_set_eh *
sen_set_int_get(sen_set *set, const uint32_t *key, void **value)
{
  static int _ncalls = 0, _ncolls = 0;
  entry *e, **ep, **np = NULL, **index = set->index;
  uint32_t h = *key, i, m = set->max_offset, s = STEP(h);
  _ncalls++;
  for (i = h; ep = index + (i & m), e = *ep; i += s) {
    if (e == GARBAGE) {
      if (!np) { np = ep; }
    } else {
      if (e->key == h) { goto exit; }
    }
    if (!(++_ncolls % 1000000) && (_ncolls > _ncalls)) {
      if (_ncolls < 0 || _ncalls < 0) {
        _ncolls = 0; _ncalls = 0;
      } else {
        SEN_LOG(sen_log_notice, "total:%d collisions:%d nentries:%d maxoffset:%d",
                _ncalls, _ncolls, set->n_entries, set->max_offset);
      }
    }
  }
  if (np) {
    set->n_garbages--;
    ep = np;
  }
  if (!(e = entry_new(set))) { return NULL; }
  e->key = h;
  *ep = e;
  set->n_entries++;
exit :
  if (value) { *value = e->dummy; }
  return ep;
}

inline static sen_set_eh *
sen_set_str_get(sen_set *set, const char *key, void **value)
{
  sen_ctx *ctx = set->ctx;
  entry *e, **ep, **np = NULL, **index = set->index;
  uint32_t h = str_hash((unsigned char *)key), i, m = set->max_offset, s = STEP(h);
  for (i = h; ep = index + (i & m), e = *ep; i += s) {
    if (e == GARBAGE) {
      if (!np) { np = ep; }
    } else {
      if (SEN_SET_STRHASH(e) == h && !strcmp(key, SEN_SET_STRKEY(e))) { goto exit; }
    }
  }
  {
    char *keybuf = SEN_STRDUP(key);
    if (!keybuf) { return NULL; }
    if (!(e = entry_new(set))) {
      SEN_FREE(keybuf);
      return NULL;
    }
    SEN_SET_STRHASH(e) = h;
    SEN_SET_STRKEY(e) = keybuf;
    if (np) {
      set->n_garbages--;
      ep = np;
    }
    *ep = e;
    set->n_entries++;
  }
exit :
  if (value) { *value = SEN_SET_STRVAL(e); }
  return ep;
}

inline static sen_set_eh *
sen_set_bin_get(sen_set *set, const void *key, void **value)
{
  entry *e, **ep, **np = NULL, **index = set->index;
  uint32_t h = bin_hash(key, set->key_size), i, m = set->max_offset, s = STEP(h);
  for (i = h; ep = index + (i & m), e = *ep; i += s) {
    if (e == GARBAGE) {
      if (!np) { np = ep; }
    } else {
      if (e->key == h && !memcmp(key, e->dummy, set->key_size)) {
        goto exit;
      }
    }
  }
  if (np) {
    set->n_garbages--;
    ep = np;
  }
  if (!(e = entry_new(set))) { return NULL; }
  e->key = h;
  memcpy(e->dummy, key, set->key_size);
  *ep = e;
  set->n_entries++;
exit :
  if (value) { *value = &e->dummy[set->key_size]; }
  return ep;
}

sen_set_eh *
sen_set_get(sen_set *set, const void *key, void **value)
{
  if (!set) { return NULL; }
  if (set->arrayp) {
    if (sen_set_reset(set, 0)) { return NULL; }
    // set->curr_entry = 0;
    set->arrayp = 0;
  } else if ((set->n_entries + set->n_garbages) * 2 > set->max_offset) {
    sen_set_reset(set, 0);
  }
  switch (set->key_size) {
  case 0 :
    return sen_set_str_get(set, key, value);
  case sizeof(uint32_t) :
    return sen_set_int_get(set, key, value);
  default :
    return sen_set_bin_get(set, key, value);
  }
}

sen_rc
sen_set_del(sen_set *set, sen_set_eh *ep)
{
  entry *e;
  sen_ctx *ctx = set->ctx;
  if (!set || !ep || !*ep) { return sen_invalid_argument; }
  e = *ep;
  *ep = GARBAGE;
  if (!set->key_size) { SEN_FREE(SEN_SET_STRKEY(e)); }
  *((entry **)e) = set->garbages;
  set->garbages = e;
  set->n_entries--;
  set->n_garbages++;
  return sen_success;
}

sen_set_cursor *
sen_set_cursor_open(sen_set *set)
{
  sen_set_cursor *c;
  sen_ctx *ctx = set->ctx;
  if (!set) { return NULL; }
  if (!(c = SEN_MALLOC(sizeof(sen_set_cursor)))) { return NULL; }
  c->set = set;
  c->index = set->index;
  c->curr = set->index;
  c->rest = set->max_offset + 1;
  return c;
}

sen_set_eh *
sen_set_cursor_next(sen_set_cursor *c, void **key, void **value)
{
  uint32_t i;
  entry *e, **ep;
  if (!c || !c->rest) { return NULL; }
  if (c->index != c->set->index) {
    SEN_LOG(sen_log_alert, "sen_reset invoked during sen_set_cursor is opened!");
    return NULL;
  }
  for (i = c->rest, ep = c->curr;;i--, ep++) {
    if (!i) {
      c->rest = 0;
      return NULL;
    }
    e = *ep;
    if (e && e != GARBAGE) { break; }
  }
  switch (c->set->key_size) {
  case 0 :
    if (key) { *key = SEN_SET_STRKEY(e); }
    if (value) { *value = SEN_SET_STRVAL(e); }
    break;
  case sizeof(uint32_t) :
    if (key) { *key = SEN_SET_INTKEY(e); }
    if (value) { *value = SEN_SET_INTVAL(e); }
    break;
  default :
    if (key) { *key = SEN_SET_BINKEY(e); }
    if (value) { *value = SEN_SET_BINVAL(e, c->set); }
    break;
  }
  c->curr = ep + 1;
  c->rest = i - 1;
  return ep;
}

sen_rc
sen_set_cursor_close(sen_set_cursor *cursor)
{
  sen_ctx *ctx = cursor->set->ctx;
  SEN_FREE(cursor);
  return sen_success;
}

inline static void
swap(entry **a, entry **b)
{
  entry *c = *a;
  *a = *b;
  *b = c;
}

#define INT_OFFSET_VAL(x) (((int32_t *)(x))[offset] * dir)

inline static entry **
pack_int(sen_set *set, entry **res, int offset, int dir)
{
  uint32_t i, n, m = set->max_offset;
  int32_t ck;
  entry **head, **tail, *e, *c;
  for (i = m >> 1;; i = (i + 1) & m) {
    if ((c = set->index[i]) && (c != GARBAGE)) { break; }
  }
  head = res;
  n = set->n_entries - 1;
  tail = res + n;
  ck = INT_OFFSET_VAL(c);
  while (n--) {
    for (;;) {
      i = (i + 1) & m;
      if ((e = set->index[i]) && (e != GARBAGE)) { break; }
    }
    if (INT_OFFSET_VAL(e) < ck) {
      *head++ = e;
    } else {
      *tail-- = e;
    }
  }
  *head = c;
  return set->n_entries > 2 ? head : NULL;
}

inline static entry **
part_int(entry **b, entry **e, int offset, int dir)
{
  int32_t ck;
  intptr_t d = e - b;
  entry **c;
  if (INT_OFFSET_VAL(*b) > INT_OFFSET_VAL(*e)) { swap(b, e); }
  if (d < 2) { return NULL; }
  c = b + (d >> 1);
  if (INT_OFFSET_VAL(*b) > INT_OFFSET_VAL(*c)) {
    swap(b, c);
  } else {
    if (INT_OFFSET_VAL(*c) > INT_OFFSET_VAL(*e)) { swap(c, e); }
  }
  if (d < 3) { return NULL; }
  b++;
  ck = INT_OFFSET_VAL(*c);
  swap(b, c);
  c = b;
  for (;;) {
    while (INT_OFFSET_VAL(*++b) < ck) ;
    while (INT_OFFSET_VAL(*--e) > ck) ;
    if (b >= e) { break; }
    swap(b, e);
  }
  swap(c, e);
  return e;
}

static void
_sort_int(entry **head, entry **tail, int limit, int offset, int dir)
{
  intptr_t rest;
  entry **c;
  if (head < tail && (c = part_int(head, tail, offset, dir))) {
    rest = limit - 1 - (c - head);
    _sort_int(head, c - 1, limit, offset, dir);
    if (rest > 0) { _sort_int(c + 1, tail, (int)rest, offset, dir); }
  }
}

inline static void
sort_int(sen_set *set, entry **res, int limit, int offset, int dir)
{
  entry **c = pack_int(set, res, offset, dir);
  if (c) {
    intptr_t rest = limit - 1 - (c - res);
    _sort_int(res, c - 1, limit, offset, dir);
    if (rest > 0 ) { _sort_int(c + 1, res + set->n_entries - 1, (int)rest, offset, dir); }
  }
}

inline static entry **
pack_func(sen_set *set, entry **res,
          int(*func)(sen_set *, entry **, sen_set *, entry **, void *),
          void *arg, int dir)
{
  uint32_t i, n, m = set->max_offset;
  entry **head, **tail, *e, *c;
  for (i = m >> 1;; i = (i + 1) & m) {
    if ((c = set->index[i]) && (c != GARBAGE)) { break; }
  }
  head = res;
  n = set->n_entries - 1;
  tail = res + n;
  while (n--) {
    for (;;) {
      i = (i + 1) & m;
      if ((e = set->index[i]) && (e != GARBAGE)) { break; }
    }
    if (func(set, &e, set, &c, arg) * dir < 0) {
      *head++ = e;
    } else {
      *tail-- = e;
    }
  }
  *head = c;
  return set->n_entries > 2 ? head : NULL;
}

inline static entry **
part_func(entry **b, entry **e,
          int(*func)(sen_set *, entry **, sen_set *, entry **, void *),
          void *arg, sen_set *set, int dir)
{
  intptr_t d = e - b;
  entry **c;
  if (func(set, b, set, e, arg) * dir > 0) { swap(b, e); }
  if (d < 2) { return NULL; }
  c = b + (d >> 1);
  if (func(set, b, set, c, arg) * dir > 0) {
    swap(b, c);
  } else {
    if (func(set, c, set, e, arg) * dir > 0) { swap(c, e); }
  }
  if (d < 3) { return NULL; }
  b++;
  swap(b, c);
  c = b;
  for (;;) {
    while (func(set, ++b, set, c, arg) * dir < 0) ;
    while (func(set, --e, set, c, arg) * dir > 0) ;
    if (b >= e) { break; }
    swap(b, e);
  }
  swap(c, e);
  return e;
}

static void
_sort_func(entry **head, entry **tail, int limit,
           int(*func)(sen_set *, entry **, sen_set *, entry **, void *),
           void *arg, sen_set *set, int dir)
{
  entry **c;
  if (head < tail && (c = part_func(head, tail, func, arg, set, dir))) {
    intptr_t rest = limit - 1 - (c - head);
    _sort_func(head, c - 1, limit, func, arg, set, dir);
    if (rest > 0) { _sort_func(c + 1, tail, (int)rest, func, arg, set, dir); }
  }
}

inline static void
sort_func(sen_set *set, entry **res, int limit,
          int(*func)(sen_set *, entry **, sen_set *, entry **, void *), void *arg, int dir)
{
  entry **c = pack_func(set, res, func, arg, dir);
  if (c) {
    intptr_t rest = limit - 1 - (c - res);
    _sort_func(res, c - 1, limit, func, arg, set, dir);
    if (rest > 0 ) {
      _sort_func(c + 1, res + set->n_entries - 1, (int)rest, func, arg, set, dir);
    }
  }
}

inline static int
func_str(sen_set *sa, entry **a, sen_set *sb, entry **b, void *arg)
{
  return strcmp(SEN_SET_STRKEY(*a), SEN_SET_STRKEY(*b));
}

inline static int
func_bin(sen_set *sa, entry **a, sen_set *sb, entry **b, void *arg)
{
  return memcmp(SEN_SET_BINKEY(*a), SEN_SET_BINKEY(*b), (uintptr_t)arg);
}

sen_set_eh *
sen_set_sort(sen_set *set, int limit, sen_set_sort_optarg *optarg)
{
  sen_ctx *ctx = set->ctx;
  entry **res;
  int dir = 1;
  if (!set) {
    SEN_LOG(sen_log_warning, "sen_set_sort: invalid argument !");
    return NULL;
  }
  if (!set->n_entries) { return NULL; }
  if (!(res = SEN_MALLOC(sizeof(entry *) * set->n_entries))) {
    SEN_LOG(sen_log_alert, "allocation of entries failed on sen_set_sort !");
    return NULL;
  }
  if (limit <= 0) {
    limit += set->n_entries;
    if (limit <= 0) {
      SEN_LOG(sen_log_alert, "limit is too small in sen_set_sort !");
      return NULL;
    }
  }
  if (limit > set->n_entries) { limit = set->n_entries; }
  set->limit = limit;
  if (optarg) {
    dir = (optarg->mode == sen_sort_ascending) ? 1 : -1;
    if (optarg->compar) {
      sort_func(set, res, limit, optarg->compar, optarg->compar_arg, dir);
      goto exit;
    } else if (optarg->compar_arg) {
      sort_int(set, res, limit, ((intptr_t)optarg->compar_arg) / sizeof(int32_t), dir);
      goto exit;
    }
  }
  switch (set->key_size) {
  case 0 :
    sort_func(set, res, limit, func_str, NULL, dir);
    break;
  case sizeof(uint32_t) :
    sort_int(set, res, limit, 0, dir);
    break;
  default :
    sort_func(set, res, limit, func_bin, (void *)(intptr_t)set->key_size, dir);
    break;
  }
exit :
  return res;
}

sen_rc
sen_set_element_info(sen_set *set, const sen_set_eh *e, void **key, void **value)
{
  if (!set || !e) { return sen_invalid_argument; }
  switch (set->key_size) {
  case 0 :
    if (key) { *key = SEN_SET_STRKEY(*e); }
    if (value) { *value = SEN_SET_STRVAL(*e); }
    break;
  case sizeof(uint32_t) :
    if (key) { *key = SEN_SET_INTKEY(*e); }
    if (value) { *value = SEN_SET_INTVAL(*e); }
    break;
  default :
    if (key) { *key = SEN_SET_BINKEY(*e); }
    if (value) { *value = SEN_SET_BINVAL(*e, set); }
    break;
  }
  return sen_success;
}

sen_set *
sen_set_union(sen_set *a, sen_set *b)
{
  void *key, *va, *vb;
  entry *e, **ep;
  uint32_t i, key_size = a->key_size, value_size = a->value_size;
  if (key_size != b->key_size || value_size != b->value_size) { return NULL; }
  for (i = b->n_entries, ep = b->index; i; ep++) {
    if ((e = *ep) && e != GARBAGE) {
      switch (key_size) {
      case 0 :
        key = SEN_SET_STRKEY(e);
        vb = SEN_SET_STRVAL(e);
        break;
      case sizeof(uint32_t) :
        key = SEN_SET_INTKEY(e);
        vb = SEN_SET_INTVAL(e);
        break;
      default :
        key = SEN_SET_BINKEY(e);
        vb = &e->dummy[key_size];
        break;
      }
      if (sen_set_at(a, key, &va)) {
        /* do copy of merge? */
      } else {
        if (sen_set_get(a, key, &va)) { memcpy(va, vb, value_size); }
      }
      i--;
    }
  }
  return a;
}

sen_set *
sen_set_subtract(sen_set *a, sen_set *b)
{
  void *key;
  entry *e, **ep, **dp;
  uint32_t i, key_size = a->key_size;
  if (key_size != b->key_size) { return NULL; }
  for (i = b->n_entries, ep = b->index; i; ep++) {
    if ((e = *ep) && e != GARBAGE) {
      switch (key_size) {
      case 0 :
        key = SEN_SET_STRKEY(e);
        break;
      case sizeof(uint32_t) :
        key = SEN_SET_INTKEY(e);
        break;
      default :
        key = SEN_SET_BINKEY(e);
        break;
      }
      if ((dp = sen_set_at(a, key, NULL))) { sen_set_del(a, dp); }
      i--;
    }
  }
  return a;
}

sen_set *
sen_set_intersect(sen_set *a, sen_set *b)
{
  void *key;
  entry *e, **ep;
  uint32_t i, key_size = a->key_size;
  if (key_size != b->key_size) { return NULL; }
  for (i = a->n_entries, ep = a->index; i; ep++) {
    if ((e = *ep) && e != GARBAGE) {
      switch (key_size) {
      case 0 :
        key = SEN_SET_STRKEY(e);
        break;
      case sizeof(uint32_t) :
        key = SEN_SET_INTKEY(e);
        break;
      default :
        key = SEN_SET_BINKEY(e);
        break;
      }
      if (!sen_set_at(b, key, NULL)) { sen_set_del(a, ep); }
      i--;
    }
  }
  return a;
}

int
sen_set_difference(sen_set *a, sen_set *b)
{
  void *key;
  entry *e, **ep, **dp;
  uint32_t count = 0, i, key_size = a->key_size;
  if (key_size != b->key_size) { return -1; }
  for (i = a->n_entries, ep = a->index; i; ep++) {
    if ((e = *ep) && e != GARBAGE) {
      switch (key_size) {
      case 0 :
        key = SEN_SET_STRKEY(e);
        break;
      case sizeof(uint32_t) :
        key = SEN_SET_INTKEY(e);
        break;
      default :
        key = SEN_SET_BINKEY(e);
        break;
      }
      if ((dp = sen_set_at(b, key, NULL))) {
        sen_set_del(b, dp);
        sen_set_del(a, ep);
        count++;
      }
      i--;
    }
  }
  return count;
}

sen_rc
sen_set_array_init(sen_set *set, uint32_t size)
{
  //  sen_ctx *ctx = set->ctx;
  SEN_ASSERT(!set->n_entries);
  SEN_ASSERT(!set->garbages);
  set->arrayp = 1;
  /*
  set->curr_entry = 0;
  if (set->chunks[SEN_SET_MAX_CHUNK]) {
    SEN_FREE(set->chunks[SEN_SET_MAX_CHUNK]);
  }
  if (!(set->chunks[SEN_SET_MAX_CHUNK] = SEN_CALLOC(set->entry_size * size))) {
    return sen_memory_exhausted;
  }
  */
  return sen_set_reset(set, size);
}

/* rset : sets with subrecs */

inline static int
rec_unit_size(sen_rec_unit unit, int rec_size)
{
  switch (unit) {
  case sen_rec_document :
    return sizeof(sen_id);
  case sen_rec_section :
    return sizeof(sen_id) + sizeof(int);
  case sen_rec_position :
    return sizeof(sen_id) + sizeof(int) + sizeof(int);
  default :
    return rec_size;
  }
}

sen_rc
sen_rset_init(sen_ctx *ctx, sen_set *set, sen_rec_unit record_unit, int record_size,
              sen_rec_unit subrec_unit, int subrec_size, unsigned int max_n_subrecs)
{
  sen_rc rc;
  record_size = rec_unit_size(record_unit, record_size);
  subrec_size = rec_unit_size(subrec_unit, subrec_size);
  if (record_unit != sen_rec_userdef && subrec_unit != sen_rec_userdef) {
    subrec_size -= record_size;
  }
  if (!set) { return sen_invalid_argument; }
  if (record_size < 0) { return sen_invalid_argument; }
  if ((rc = sen_set_init(ctx, set, record_size,
                         SEN_RSET_SCORE_SIZE + sizeof(int) +
                         max_n_subrecs * (SEN_RSET_SCORE_SIZE + subrec_size), 0))) {
    return rc;
  }
  set->record_unit = record_unit;
  set->subrec_unit = subrec_unit;
  set->record_size = record_size;
  set->subrec_size = subrec_size;
  set->max_n_subrecs = max_n_subrecs;
  return rc;
}

inline static void
subrecs_push(byte *subrecs, int size, int n_subrecs, int score, void *body, int dir)
{
  byte *v;
  int *c2;
  int n = n_subrecs - 1, n2;
  while (n) {
    n2 = (n - 1) >> 1;
    c2 = SEN_RSET_SUBRECS_NTH(subrecs,size,n2);
    if (SEN_RSET_SUBRECS_CMP(score, *c2, dir)) { break; }
    SEN_RSET_SUBRECS_COPY(subrecs,size,n,c2);
    n = n2;
  }
  v = subrecs + n * (size + SEN_RSET_SCORE_SIZE);
  *((int *)v) = score;
  memcpy(v + SEN_RSET_SCORE_SIZE, body, size);
}

inline static void
subrecs_replace_min(byte *subrecs, int size, int n_subrecs, int score, void *body, int dir)
{
  byte *v;
  int n = 0, n1, n2, *c1, *c2;
  for (;;) {
    n1 = n * 2 + 1;
    n2 = n1 + 1;
    c1 = n1 < n_subrecs ? SEN_RSET_SUBRECS_NTH(subrecs,size,n1) : NULL;
    c2 = n2 < n_subrecs ? SEN_RSET_SUBRECS_NTH(subrecs,size,n2) : NULL;
    if (c1 && SEN_RSET_SUBRECS_CMP(score, *c1, dir)) {
      if (c2 &&
          SEN_RSET_SUBRECS_CMP(score, *c2, dir) &&
          SEN_RSET_SUBRECS_CMP(*c1, *c2, dir)) {
        SEN_RSET_SUBRECS_COPY(subrecs,size,n,c2);
        n = n2;
      } else {
        SEN_RSET_SUBRECS_COPY(subrecs,size,n,c1);
        n = n1;
      }
    } else {
      if (c2 && SEN_RSET_SUBRECS_CMP(score, *c2, dir)) {
        SEN_RSET_SUBRECS_COPY(subrecs,size,n,c2);
        n = n2;
      } else {
        break;
      }
    }
  }
  v = subrecs + n * (size + SEN_RSET_SCORE_SIZE);
  memcpy(v, &score, SEN_RSET_SCORE_SIZE);
  memcpy(v + SEN_RSET_SCORE_SIZE, body, size);
}

void
sen_rset_add_subrec(sen_set *s, sen_rset_recinfo *ri, int score, void *body, int dir)
{
  int limit = s->max_n_subrecs;
  ri->score += score;
  ri->n_subrecs += 1;
  if (limit) {
    int ssize = s->subrec_size;
    int n_subrecs = SEN_RSET_N_SUBRECS(ri);
    if (limit < n_subrecs) {
      if (SEN_RSET_SUBRECS_CMP(score, *ri->subrecs, dir)) {
        subrecs_replace_min(ri->subrecs, ssize, limit, score, body, dir);
      }
    } else {
      subrecs_push(ri->subrecs, ssize, n_subrecs, score, body, dir);
    }
  }
}

sen_set *
sen_rset_group(sen_set *s, int limit, sen_group_optarg *optarg)
{
  sen_ctx *ctx = s->ctx;
  sen_set *g;
  sen_rset_recinfo *ri;
  sen_rec_unit unit;
  sen_set_cursor *c;
  const sen_recordh *rh;
  byte *key, *ekey, *gkey = NULL;
  int funcp, dir;
  unsigned int rsize;
  if (!s) { return NULL; }
  if (optarg) {
    unit = sen_rec_userdef;
    rsize = optarg->key_size;
    funcp = optarg->func ? 1 : 0;
    dir = (optarg->mode == sen_sort_ascending) ? -1 : 1;
  } else {
    unit = sen_rec_document;
    rsize = rec_unit_size(unit, sizeof(sen_id));
    funcp = 0;
    dir = 1;
  }
  if (funcp) {
    gkey = SEN_MALLOC(rsize ? rsize : 8192);
    if (!gkey) {
      SEN_LOG(sen_log_alert, "allocation for gkey failed !");
      return NULL;
    }
  } else {
    if (s->record_size <= rsize) { return NULL; }
  }
  if (!(c = sen_set_cursor_open(s))) {
    SEN_LOG(sen_log_alert, "sen_set_cursor_open on sen_set_group failed !");
    if (gkey) { SEN_FREE(gkey); }
    return NULL;
  }
  if (!(g = SEN_MALLOC(sizeof(sen_set)))) {
    sen_set_cursor_close(c);
    if (gkey) { SEN_FREE(gkey); }
    return NULL;
  }
  if (sen_rset_init(ctx, g, unit, rsize, s->record_unit, s->record_size, limit)) {
    SEN_LOG(sen_log_alert, "sen_rset_init in sen_set_group failed !");
    sen_set_cursor_close(c);
    SEN_FREE(g);
    if (gkey) { SEN_FREE(gkey); }
    return NULL;
  }
  while ((rh = sen_set_cursor_next(c, (void **)&key, (void **)&ri))) {
    if (funcp) {
      if (optarg->func(s, rh, gkey, optarg->func_arg)) { continue; }
      ekey = key;
    } else {
      gkey = key;
      ekey = key + rsize;
    }
    {
      sen_rset_recinfo *gri;
      if (sen_set_get(g, gkey, (void **)&gri)) {
        sen_rset_add_subrec(g, gri, ri->score, ekey, dir);
      }
    }
  }
  sen_set_cursor_close(c);
  if (funcp) { SEN_FREE(gkey); }
  return g;
}

sen_rc
sen_rset_subrec_info(sen_set *s, const sen_recordh *rh, int index,
                     sen_id *rid, int *section, int *pos, int *score, void **subrec)
{
  sen_rc rc;
  sen_rset_posinfo *pi;
  sen_rset_recinfo *ri;
  int *p, unit_size = SEN_RSET_SCORE_SIZE + s->subrec_size;
  if (!s || !rh || index < 0) { return sen_invalid_argument; }
  if ((unsigned int)index >= s->max_n_subrecs) { return sen_invalid_argument; }
  rc = sen_set_element_info(s, rh, (void **)&pi, (void **)&ri);
  if (rc) { return rc; }
  if (index >= ri->n_subrecs) { return sen_invalid_argument; }
  p = (int *)(ri->subrecs + index * unit_size);
  if (score) { *score = p[0]; }
  if (subrec) { *subrec = &p[1]; }
  switch (s->record_unit) {
  case sen_rec_document :
    if (rid) { *rid = pi->rid; }
    if (section) { *section = (s->subrec_unit != sen_rec_userdef) ? p[1] : 0; }
    if (pos) { *pos = (s->subrec_unit == sen_rec_position) ? p[2] : 0; }
    break;
  case sen_rec_section :
    if (rid) { *rid = pi->rid; }
    if (section) { *section = pi->sid; }
    if (pos) { *pos = (s->subrec_unit == sen_rec_position) ? p[1] : 0; }
    break;
  default :
    pi = (sen_rset_posinfo *)&p[1];
    switch (s->subrec_unit) {
    case sen_rec_document :
      if (rid) { *rid = pi->rid; }
      if (section) { *section = 0; }
      if (pos) { *pos = 0; }
      break;
    case sen_rec_section :
      if (rid) { *rid = pi->rid; }
      if (section) { *section = pi->sid; }
      if (pos) { *pos = 0; }
      break;
    case sen_rec_position :
      if (rid) { *rid = pi->rid; }
      if (section) { *section = pi->sid; }
      if (pos) { *pos = pi->pos; }
      break;
    default :
      if (rid) { *rid = 0; }
      if (section) { *section = 0; }
      if (pos) { *pos = 0; }
      break;
    }
    break;
  }
  return sen_success;
}

/* sen_records : sets with keys or/and cursor */

sen_records *
sen_records_open(sen_rec_unit record_unit,
                 sen_rec_unit subrec_unit, unsigned int max_n_subrecs)
{
  sen_records *r;
  if (!(r = SEN_GMALLOC(sizeof(sen_records)))) { return NULL; }
  if (sen_rset_init(&sen_gctx, r, record_unit, 0, subrec_unit, 0, max_n_subrecs)) {
    SEN_GFREE(r);
    return NULL;
  }
  return r;
}

void
sen_records_cursor_clear(sen_records *r)
{
  sen_ctx *ctx = r->ctx;
  if (r->sorted) {
    SEN_FREE(r->sorted);
    r->sorted = NULL;
  }
  if (r->cursor) {
    sen_set_cursor_close(r->cursor);
    r->cursor = NULL;
  }
  r->curr_rec = NULL;
}

sen_rc
sen_records_close(sen_records *r)
{
  sen_ctx *ctx;
  if (!r) { return sen_invalid_argument; }
  ctx = r->ctx;
  if (r->curr_rec) {
    sen_id *rid;
    sen_rset_recinfo *ri;
    if (!sen_set_element_info(r, r->curr_rec, (void **)&rid, (void **)&ri)) {
      SEN_LOG(sen_log_debug, "curr_rec: %d:%d", *rid, ri->score);
      // sen_log("sen_record_next=%d,%d", *rid, *(_sen_sym_key(r->keys, *rid)));
    }
  }
  sen_records_cursor_clear(r);
  return sen_set_close(r);
}

sen_rc
sen_records_reopen(sen_records *r, sen_rec_unit record_unit,
                   sen_rec_unit subrec_unit, unsigned int max_n_subrecs)
{
  sen_ctx *ctx;
  int record_size = rec_unit_size(record_unit, 0);
  int subrec_size = rec_unit_size(subrec_unit, 0);
  if (!r || record_size < 0 || (max_n_subrecs && subrec_size <= record_size)) {
    return sen_invalid_argument;
  }
  ctx = r->ctx;
  sen_records_cursor_clear(r);
  sen_set_fin(r);
  return sen_rset_init(ctx, r, record_unit, 0, subrec_unit, 0, max_n_subrecs);
}

sen_rc
sen_records_sort(sen_records *r, int limit, sen_sort_optarg *optarg)
{
  if (!r) { return sen_invalid_argument; }
  sen_records_cursor_clear(r);
  if (optarg) {
    r->sorted = sen_set_sort(r, limit, optarg);
  } else {
    sen_set_sort_optarg arg;
    arg.mode = sen_sort_descending;
    arg.compar = NULL;
    arg.compar_arg = (void *)(intptr_t)r->key_size;
    r->sorted = sen_set_sort(r, limit, &arg);
  }
  return r->sorted ? sen_success : sen_internal_error;
}

sen_rc
sen_records_rewind(sen_records *r)
{
  if (!r) { return sen_invalid_argument; }
  if (r->sorted) {
    r->curr_rec = NULL;
    return sen_success;
  } else {
    sen_records_cursor_clear(r);
    r->cursor = sen_set_cursor_open(r);
    return r->cursor ? sen_success : sen_internal_error;
  }
}

int
sen_records_next(sen_records *r, void *keybuf, int bufsize, int *score)
{
  if (!r) { return 0; }
  if (r->sorted) {
    if (r->curr_rec) {
      if (r->curr_rec - r->sorted >= r->limit - 1) { return 0; }
      r->curr_rec++;
    } else {
      if (r->limit > 0) { r->curr_rec = r->sorted; }
    }
  } else {
    if (!r->cursor && sen_records_rewind(r)) { return 0; }
    r->curr_rec = sen_set_cursor_next(r->cursor, NULL, NULL);
  }
  if (r->curr_rec) {
    sen_id *rid;
    sen_rset_recinfo *ri;
    if (!sen_set_element_info(r, r->curr_rec,  (void **)&rid, (void **)&ri)) {
      if (score) { *score = ri->score; }
      if (r->record_unit != sen_rec_userdef) {
        return r->keys ? sen_sym_key(r->keys, *rid, keybuf, bufsize) : r->record_size;
      } else {
        if (r->record_size <= bufsize) { memcpy(keybuf, rid, r->record_size); }
        return r->record_size;
      }
    }
  }
  return 0;
}

int
sen_records_find(sen_records *r, const void *key)
{
  sen_id rid;
  sen_rset_recinfo *ri;
  if (!r || !r->keys || r->record_unit != sen_rec_document) { return 0; }
  if (!(rid = sen_sym_at(r->keys, key))) { return 0; }
  if (!sen_set_at(r, &rid, (void **) &ri)) { return 0; }
  return ri->score;
}

int
sen_records_nhits(sen_records *r)
{
  if (!r) { return 0; }
  return r->n_entries;
}

int
sen_records_curr_score(sen_records *r)
{
  sen_rset_recinfo *ri;
  if (!r || !r->curr_rec) { return 0; }
  if (sen_set_element_info(r, r->curr_rec, NULL, (void **)&ri)) { return 0; }
  return ri->score;
}

int
sen_records_curr_key(sen_records *r, void *keybuf, int bufsize)
{
  sen_id *rid;
  if (!r || !r->curr_rec) { return 0; }
  if (sen_set_element_info(r, r->curr_rec, (void **)&rid, NULL)) {
    return 0;
  }
  if (r->record_unit != sen_rec_userdef) {
    return sen_sym_key(r->keys, *rid, keybuf, bufsize);
  } else {
    if (r->record_size <= bufsize) { memcpy(keybuf, rid, r->record_size); }
    return r->record_size;
  }
}

const sen_recordh *
sen_records_curr_rec(sen_records *r)
{
  if (!r) { return NULL; }
  return r->curr_rec;
}

sen_records *
sen_records_union(sen_records *a, sen_records *b)
{
  if (!a || !b) { return NULL; }
  if (a->keys != b->keys) { return NULL; }
  if (!sen_set_union(a, b)) { return NULL; }
  sen_records_close(b);
  sen_records_cursor_clear(a);
  return a;
}

sen_records *
sen_records_subtract(sen_records *a, sen_records *b)
{
  if (!a || !b) { return NULL; }
  if (a->keys != b->keys) { return NULL; }
  if (!sen_set_subtract(a, b)) { return NULL; }
  sen_records_close(b);
  sen_records_cursor_clear(a);
  return a;
}

sen_records *
sen_records_intersect(sen_records *a, sen_records *b)
{
  if (!a || !b) { return NULL; }
  if (a->keys != b->keys) { return NULL; }
  if (a->n_entries > b->n_entries) {
    sen_set c;
    memcpy(&c, a, sizeof(sen_records));
    memcpy(a, b, sizeof(sen_records));
    memcpy(b, &c, sizeof(sen_records));
  }
  if (!sen_set_intersect(a, b)) { return NULL; }
  sen_records_close(b);
  sen_records_cursor_clear(a);
  return a;
}

int
sen_records_difference(sen_records *a, sen_records *b)
{
  int count;
  if (!a || !b) { return -1; }
  if (a->keys != b->keys) { return -1; }
  if ((count = sen_set_difference(a, b)) >= 0) {
    sen_records_cursor_clear(a);
    sen_records_cursor_clear(b);
  }
  return count;
}

#define SUBRECS_CMP(a,b,dir) (((a) - (b))*(dir) > 0)
#define SUBRECS_NTH(subrecs,size,n) ((int *)(subrecs + n * (size + SEN_RSET_SCORE_SIZE)))
#define SUBRECS_COPY(subrecs,size,n,src) \
  (memcpy(subrecs + n * (size + SEN_RSET_SCORE_SIZE), src, size + SEN_RSET_SCORE_SIZE))

sen_rc
sen_records_group(sen_records *r, int limit, sen_group_optarg *optarg)
{
  sen_ctx *ctx;
  sen_set *g;
  if (!(g = sen_rset_group(r, limit, optarg))) {
    return sen_internal_error;
  }
  ctx = r->ctx;
  sen_records_cursor_clear(r);
  sen_set_fin(r);
  memcpy(r, g, sizeof(sen_set));
  SEN_FREE(g);
  return sen_success;
}

const sen_recordh *
sen_records_at(sen_records *r, const void *key, unsigned section, unsigned pos,
               int *score, int *n_subrecs)
{
  sen_rset_recinfo *ri;
  const sen_recordh *res;
  if (!r || !r->keys) { return NULL; }
  if (r->record_unit == sen_rec_userdef) {
    res = r->curr_rec = sen_set_at(r, key, (void **) &ri);
  } else {
    sen_rset_posinfo pi;
    if (!(pi.rid = sen_sym_at(r->keys, key))) { return NULL; }
    pi.sid = section;
    pi.pos = pos;
    res = r->curr_rec = sen_set_at(r, &pi, (void **) &ri);
  }
  if (res) {
    if (score) { *score = ri->score; }
    if (n_subrecs) { *n_subrecs = ri->n_subrecs; }
  }
  return res;
}

sen_rc
sen_record_info(sen_records *r, const sen_recordh *rh,
                void *keybuf, int bufsize, int *keysize,
                int *section, int *pos, int *score, int *n_subrecs)
{
  sen_rc rc;
  sen_rset_posinfo *pi;
  sen_rset_recinfo *ri;
  if (!r || !rh) { return sen_invalid_argument; }
  rc = sen_set_element_info(r, rh, (void **)&pi, (void **)&ri);
  if (rc) { return rc; }
  switch (r->record_unit) {
  case sen_rec_document :
    if ((keybuf && bufsize) || keysize) {
      int len = sen_sym_key(r->keys, pi->rid, keybuf, bufsize);
      if (keysize) { *keysize = len; }
    }
    if (section) { *section = 0; }
    if (pos) { *pos = 0; }
    break;
  case sen_rec_section :
    if ((keybuf && bufsize) || keysize) {
      int len = sen_sym_key(r->keys, pi->rid, keybuf, bufsize);
      if (keysize) { *keysize = len; }
    }
    if (section) { *section = pi->sid; }
    if (pos) { *pos = 0; }
    break;
  case sen_rec_position :
    if ((keybuf && bufsize) || keysize) {
      int len = sen_sym_key(r->keys, pi->rid, keybuf, bufsize);
      if (keysize) { *keysize = len; }
    }
    if (section) { *section = pi->sid; }
    if (pos) { *pos = pi->pos; }
    break;
  case sen_rec_userdef :
    if ((keybuf && bufsize) || keysize) {
      unsigned int len = r->record_size;
      if (!len) { len = (unsigned int)strlen((const char *)pi) + 1; }
      if ((unsigned int)bufsize >= len) { memcpy(keybuf, pi, len); }
      if (keysize) { *keysize = len; }
    }
    if (section) { *section = 0; }
    if (pos) { *pos = 0; }
    break;
  default :
    return sen_invalid_format;
    break;
  }
  if (score) { *score = ri->score; }
  if (n_subrecs) { *n_subrecs = ri->n_subrecs; }
  return sen_success;
}

sen_rc
sen_record_subrec_info(sen_records *r, const sen_recordh *rh, int index,
                       void *keybuf, int bufsize, int *keysize,
                       int *section, int *pos, int *score)
{
  sen_rc rc;
  sen_id rid;
  void *subrec;
  if ((rc = sen_rset_subrec_info(r, rh, index, &rid, section, pos, score, &subrec))) {
    return rc;
  }
  if ((keybuf && bufsize) || keysize) {
    if (rid) {
      int len = sen_sym_key(r->keys, rid, keybuf, bufsize);
      if (keysize) { *keysize = len; }
    } else if (r->record_unit == sen_rec_userdef && r->subrec_unit == sen_rec_userdef) {
      int len = (int) r->subrec_size;
      if (!len) { len = (unsigned int)strlen((char *)subrec) + 1; }
      if ((unsigned int)bufsize >= len) { memcpy(keybuf, subrec, len); }
      if (keysize) { *keysize = len; }
    }
  }
  return sen_success;
}
