/* Copyright(C) 2004,2005,2006 Brazil

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.
  
  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.
  
  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
#include "senna_in.h"
#include "str.h"
#include "inv.h"
#include "store.h"
#include <string.h>

/* fixed sized elements */

#define SEN_FSE_IDSTR "SENNA:FSE:01.00"
#define SEN_FSE_SEGMENT_SIZE (1 << 22)

struct sen_fse_header {
  char idstr[16];
  unsigned element_size;
  sen_id curr_max;
  uint32_t reserved[10];
};

sen_fse *
sen_fse_create(const char *path, unsigned int element_size)
{
  sen_io *io;
  int max_segments, n_elm, w_elm;
  sen_fse *fse = NULL;
  struct sen_fse_header *header;
  unsigned actual_size;
  if (element_size > SEN_FSE_SEGMENT_SIZE) {
    SEN_LOG(sen_log_error, "element_size too large (%d)", element_size);
    return NULL;
  }
  for (actual_size = 1; actual_size < element_size; actual_size *= 2) ;
  max_segments = ((SEN_ID_MAX + 1) / SEN_FSE_SEGMENT_SIZE) * actual_size;
  io = sen_io_create(path, sizeof(struct sen_fse_header),
                     SEN_FSE_SEGMENT_SIZE, max_segments, sen_io_auto, max_segments);
  if (!io) { return NULL; }
  header = sen_io_header(io);
  memcpy(header->idstr, SEN_FSE_IDSTR, 16);
  header->element_size = actual_size;
  header->curr_max = 0;
  if (!(fse = SEN_MALLOC(sizeof(sen_fse)))) {
    sen_io_close(io);
    return NULL;
  }
  n_elm = SEN_FSE_SEGMENT_SIZE / header->element_size;
  for (w_elm = 22; (1 << w_elm) > n_elm; w_elm--);
  fse->io = io;
  fse->header = header;
  fse->element_mask =  n_elm - 1;
  fse->element_width = w_elm;
  return fse;
}

sen_fse *
sen_fse_open(const char *path)
{
  sen_io *io;
  int n_elm, w_elm;
  sen_fse *fse = NULL;
  struct sen_fse_header *header;
  io = sen_io_open(path, sen_io_auto, UINT_MAX);
  if (!io) { return NULL; }
  header = sen_io_header(io);
  if (memcmp(header->idstr, SEN_FSE_IDSTR, 16)) {
    SEN_LOG(sen_log_error, "fse_idstr (%s)", header->idstr);
    sen_io_close(io);
    return NULL;
  }
  if (!(fse = SEN_MALLOC(sizeof(sen_fse)))) {
    sen_io_close(io);
    return NULL;
  }
  n_elm = SEN_FSE_SEGMENT_SIZE / header->element_size;
  for (w_elm = 22; (1 << w_elm) > n_elm; w_elm--);
  fse->io = io;
  fse->header = header;
  fse->element_mask =  n_elm - 1;
  fse->element_width = w_elm;
  return fse;
}

sen_rc
sen_fse_info(sen_fse *fse, unsigned int *element_size, sen_id *curr_max)
{
  if (!fse) { return sen_invalid_argument; }
  if (element_size) { *element_size = fse->header->element_size; }
  if (curr_max) { *curr_max = fse->header->curr_max; }
  return sen_success;
}

sen_rc
sen_fse_close(sen_fse *fse)
{
  sen_rc rc;
  if (!fse) { return sen_invalid_argument; }
  rc = sen_io_close(fse->io);
  SEN_FREE(fse);
  return rc;
}

sen_rc
sen_fse_remove(const char *path)
{
  if (!path) { return sen_invalid_argument; }
  sen_io_remove(path);
  return sen_success;
}

void *
sen_fse_get(sen_fse *fse, sen_id id)
{
  void *p;
  uint16_t seg;
  if (id > SEN_ID_MAX) { return NULL; }
  seg = id >> fse->element_width;
  SEN_IO_SEG_MAP(fse->io, seg, p);
  if (!p) { return NULL; }
  if (id > fse->header->curr_max) { fse->header->curr_max = id; }
  return (void *)(((byte *)p) + ((id & fse->element_mask) * fse->header->element_size));
}

void *
sen_fse_at(sen_fse *fse, sen_id id)
{
  void *p;
  uint16_t seg;
  if (id > fse->header->curr_max) { return NULL; }
  seg = id >> fse->element_width;
  SEN_IO_SEG_MAP(fse->io, seg, p);
  if (!p) { return NULL; }
  return (void *)(((byte *)p) + ((id & fse->element_mask) * fse->header->element_size));
}

/**** variable sized elements ****/

#define SEN_VSE_IDSTR "SENNA:VSE:01.00"
#define SEN_VSE_SEGMENT_SIZE (1 << 22)
#define SEN_VSE_MAX_SEGMENTS (1 << 16)

struct sen_vse_header {
  char idstr[16];
  unsigned max_element_size;
  unsigned max_segments;
};

struct _sen_vse_einfo {
  union {
    uint64_t ll;
    struct {
      uint32_t pos;
      uint8_t size[4];
    } s;
    uint8_t bytes[8];
  } u;
};

#define SEN_VSE_MAX_ELEMENT_SIZE (1 << 24)

#define _ESIZE(e) ((e)->s.size[0] + ((e)->s.size[1] << 8) + ((e)->s.size[2] << 16))

#if SEN_VSE_MAX_ELEMENT_SIZE > (1 << 24)
# define ESIZE(e) (_ESIZE(e) + (((e)->s.size[3] & 0xc0) << 19))
#else
# define ESIZE _ESIZE
#endif

#define ELEMENT_LOG_SIZE(e) ((e)->s.size[3] & 0x1f)
#define ELEMENT_SIZE(e) (ELEMENT_LOG_SIZE(e) ? ESIZE(e) : (((e)->s.size[3]) >> 5))

sen_vse *
sen_vse_create(const char *path, unsigned int max_element_size)
{
  sen_io *io;
  int max_segments;
  sen_vse *vse = NULL;
  struct sen_vse_header *header;
  unsigned actual_size;
  if (max_element_size > SEN_VSE_SEGMENT_SIZE) {
    SEN_LOG(sen_log_error, "max_element_size too large (%d)", max_element_size);
    return NULL;
  }
  max_segments = max_element_size * 128;
  if (max_segments > SEN_VSE_MAX_SEGMENTS) { max_segments = SEN_VSE_MAX_SEGMENTS; }
  io = sen_io_create(path, sizeof(struct sen_vse_header),
                     SEN_VSE_SEGMENT_SIZE, max_segments, sen_io_auto, max_segments);
  if (!io) { return NULL; }
  header = sen_io_header(io);
  memcpy(header->idstr, SEN_VSE_IDSTR, 16);
  header->max_element_size = max_element_size;
  header->max_segments = max_segments;
  if (!(vse = SEN_MALLOC(sizeof(sen_vse)))) {
    sen_io_close(io);
    return NULL;
  }
  vse->io = io;
  vse->header = header;
  return vse;
}

sen_vse *
sen_vse_open(const char *path)
{
  sen_io *io;
  sen_vse *vse = NULL;
  struct sen_vse_header *header;
  io = sen_io_open(path, sen_io_auto, UINT_MAX);
  if (!io) { return NULL; }
  header = sen_io_header(io);
  if (memcmp(header->idstr, SEN_VSE_IDSTR, 16)) {
    SEN_LOG(sen_log_error, "vse_idstr (%s)", header->idstr);
    sen_io_close(io);
    return NULL;
  }
  if (!(vse = SEN_MALLOC(sizeof(sen_vse)))) {
    sen_io_close(io);
    return NULL;
  }
  vse->io = io;
  vse->header = header;
  return vse;
}

sen_rc
sen_vse_info(sen_vse *vse, unsigned int *max_element_size, sen_id *curr_max)
{
  if (!vse) { return sen_invalid_argument; }
  return sen_success;
}

sen_rc
sen_vse_close(sen_vse *vse)
{
  sen_rc rc;
  if (!vse) { return sen_invalid_argument; }
  rc = sen_io_close(vse->io);
  SEN_FREE(vse);
  return rc;
}

sen_rc
sen_vse_remove(const char *path)
{
  if (!path) { return sen_invalid_argument; }
  sen_io_remove(path);
  return sen_success;
}

sen_rc
sen_vse_put(sen_vse *vse, sen_id id, const void *value, int value_len, int flags)
{
  return sen_success;
}

int
sen_vse_at(sen_vse *vse, sen_id id, void *valbuf, int buf_size)
{
  return 0;
}

const void *
sen_vse_ref(sen_vse *vse, sen_id id, int *value_len)
{
  return NULL;
}

sen_rc
sen_vse_unref(sen_vse *vse, sen_id id)
{
  return sen_success;
}

int
sen_vse_size(sen_vse *vse, sen_id id)
{
  return 0;
}

sen_vse_einfo *
sen_vse_alloc(sen_vse *vse, int element_size, void **value)
{
  return NULL;
}

sen_rc
sen_vse_replace(sen_vse *vse, sen_id id, sen_vse_einfo *ei)
{
  return sen_success;
}

/**** vgram ****/

static int len_sum = 0;
static int img_sum = 0;
static int simple_sum = 0;
static int skip_sum = 0;

sen_store *
sen_store_create(const char *path)
{
  sen_store *s;
  if (!(s = SEN_MALLOC(sizeof(sen_store)))) { return NULL; }
  s->vgram = sen_sym_create(path, sizeof(sen_id) * 2, 0, sen_enc_none);
  if (!s->vgram) {
    SEN_FREE(s);
    return NULL;
  }
  return s;
}

sen_store *
sen_store_open(const char *path)
{
  sen_store *s;
  if (!(s = SEN_MALLOC(sizeof(sen_store)))) { return NULL; }
  s->vgram = sen_sym_open(path);
  if (!s->vgram) {
    SEN_FREE(s);
    return NULL;
  }
  return s;
}

sen_store_buf *
sen_store_buf_open(size_t len)
{
  sen_store_buf *b;
  if (!(b = SEN_MALLOC(sizeof(sen_store_buf)))) { return NULL; }
  b->len = len;
  b->tvs = b->tvp = SEN_MALLOC(sizeof(sen_id) * len);
  if (!b->tvp) { SEN_FREE(b); return NULL; }
  b->tve = b->tvs + len;
  b->vps = b->vpp = SEN_MALLOC(sizeof(sen_store_vnode) * len * 2);
  if (!b->vpp) { SEN_FREE(b->tvp); SEN_FREE(b); return NULL; }
  b->vpe = b->vps + len;
  return b;
}

sen_rc
sen_store_buf_add(sen_store_buf *b, sen_id tid)
{
  uint8_t dummybuf[8], *dummyp;
  if (b->tvp < b->tve) { *b->tvp++ = tid; }
  dummyp = dummybuf;
  SEN_B_ENC(tid, dummyp);
  simple_sum += dummyp - dummybuf;
  return sen_success;
}

typedef struct {
  sen_id vid;
  sen_id tid;
} vgram_key;

sen_rc
sen_store_update(sen_store *store, sen_id rid, sen_store_buf *b, sen_set *terms)
{
  sen_inv_updspec **u;
  if (b && b->tvs < b->tvp) {
    sen_id *t0, *tn;
    for (t0 = b->tvs; t0 < b->tvp - 1; t0++) {
      sen_store_vnode *v, **vp;
      sen_set_at(terms, t0, (void **) &u);
      vp = &(*u)->vnodes;
      for (tn = t0 + 1; tn < b->tvp; tn++) {
        for (v = *vp; v && v->tid != *tn; v = v->cdr) ;
        if (!v) {
          if (b->vpp < b->vpe) {
            v = b->vpp++;
          } else {
            // todo;
            break;
          }
          v->car = NULL;
          v->cdr = *vp;
          *vp = v;
          v->tid = *tn;
          v->vid = 0;
          v->freq = 0;
          v->len = tn - t0;
        }
        v->freq++;
        if (v->vid) {
          vp = &v->car;
        } else {
          break;
        }
      }
    }
    {
      sen_set *th = sen_set_open(sizeof(sen_id), sizeof(int), 0);
      if (!th) { return sen_memory_exhausted; }
      if (t0 == b->tvp) { SEN_LOG(sen_log_debug, "t0 == tvp"); }
      for (t0 = b->tvs; t0 < b->tvp; t0++) {
        sen_id vid, vid0 = *t0, vid1 = 0;
        sen_store_vnode *v, *v2 = NULL, **vp;
        sen_set_at(terms, t0, (void **) &u);
        vp = &(*u)->vnodes;
        for (tn = t0 + 1; tn < b->tvp; tn++) {
          for (v = *vp; v; v = v->cdr) {
            if (!v->vid && (v->freq < 2 || v->freq * v->len < 4)) {
              *vp = v->cdr;
              v->freq = 0;
            }
            if (v->tid == *tn) { break; }
            vp = &v->cdr;
          }
          if (v) {
            if (v->freq) {
              v2 = v;
              vid1 = vid0;
              vid0 = v->vid;
            }
            if (v->vid) {
              vp = &v->car;
              continue;
            }
          }
          break;
        }
        if (v2) {
          if (!v2->vid) {
            vgram_key key;
            key.vid = vid1;
            key.tid = v2->tid;
            v2->vid = sen_sym_get(store->vgram, (char *)&key);
          }
          vid = *t0 = v2->vid * 2 + 1;
          memset(t0 + 1, 0, sizeof(sen_id) * v2->len);
          t0 += v2->len;
        } else {
          vid = *t0 *= 2;
        }
        {
          int *tf;
          sen_set_get(th, &vid, (void **) &tf);
          (*tf)++;
        }
      }
      if (!th->n_entries) { SEN_LOG(sen_log_debug, "th->n_entries == 0"); }
      {
        int j = 0;
        int skip = 0;
        sen_set_eh *ehs, *ehp, *ehe;
        sen_set_sort_optarg arg;
        uint8_t *ps = SEN_MALLOC(b->len * 2), *pp, *pe;
        if (!ps) {
          sen_set_close(th);
          return sen_memory_exhausted;
        }
        pp = ps;
        pe = ps + b->len * 2;
        arg.mode = sen_sort_descending;
        arg.compar = NULL;
        arg.compar_arg = (void *)(intptr_t)sizeof(sen_id);
        arg.compar_arg0 = NULL;
        ehs = sen_set_sort(th, 0, &arg);
        if (!ehs) {
          SEN_FREE(ps);
          sen_set_close(th);
          return sen_memory_exhausted;
        }
        SEN_B_ENC(th->n_entries, pp);
        for (ehp = ehs, ehe = ehs + th->n_entries; ehp < ehe; ehp++, j++) {
          int *id = (int *)SEN_SET_INTVAL(*ehp);
          SEN_B_ENC(*SEN_SET_INTKEY(*ehp), pp);
          *id = j;
        }
        for (t0 = b->tvs; t0 < b->tvp; t0++) {
          if (*t0) {
            int *id;
            if (!sen_set_at(th, t0, (void **) &id)) {
              SEN_LOG(sen_log_error, "lookup error (%d)", *t0);
            }
            SEN_B_ENC(*id, pp);
          } else {
            skip++;
          }
        }
        len_sum += b->len;
        img_sum += pp - ps;
        skip_sum += skip;
        SEN_FREE(ehs);
        SEN_FREE(ps);
      }
      sen_set_close(th);
    }
  }
  return sen_success;
}

sen_rc
sen_store_buf_close(sen_store_buf *b)
{
  if (!b) { return sen_invalid_argument; }
  if (b->tvs) { SEN_FREE(b->tvs); }
  if (b->vps) { SEN_FREE(b->vps); }
  SEN_FREE(b);
  return sen_success;
}

sen_rc
sen_store_close(sen_store *store)
{
  if (!store) { return sen_invalid_argument; }
  SEN_LOG(sen_log_debug, "len=%d img=%d skip=%d simple=%d", len_sum, img_sum, skip_sum, simple_sum);
  sen_sym_close(store->vgram);
  SEN_FREE(store);
  return sen_success;
}
