/* Copyright(C) 2004 Brazil

  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.
  
  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.
  
  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
#include "inv.h"
#include "sym.h"
#include "str.h"

#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <getopt.h>

#define NEXT_ADDR(p) (((byte *)(p)) + sizeof *(p))

typedef struct {
  uint16_t step;
  uint16_t jump;
} buffer_rec;

#define BUFFER_REC_AT(b,pos) ((buffer_rec *)(b) + (pos))

/* copied from inv.c */

/*
void
sen_inv_describe(sen_inv *inv)
{
  int i, j, n = 0;
  puts("chunks");
  for (i = 0; i < 128; i++) {
    for (j = 0; j < 64; j++) {
      printf("%d", inv->header->chunks[i * 64 + j]);
      n += inv->header->chunks[i * 64 + j];
    }
    puts(":");
  }
  printf("n=%d\n", n);
  puts("segments");
  {
    int a = 0, b = 0;
  for (i = 0; i < 128; i++) {
    for (j = 0; j < 64; j++) {
      if (inv->header->segments[i * 64 + j] & SEGMENT_ARRAY) {
        a++;
      }
      if (inv->header->segments[i * 64 + j] & SEGMENT_BUFFER) {
        b++;
      }
      printf("%x ", inv->header->segments[i * 64 + j]);
    }
    puts(":");
  }
  printf("a=%d, b=%d\n", a, b);
  }
}

void
checkdb(sen_index *i)
{
  int j;
  buffer *b;
  sen_inv *inv = i->inv;
  sen_io_seginfo *si;
  unsigned int size = 0;
  for (j = 0; j < 1024; j++) {
    si = &inv->binfo[j];
    if (sen_io_seg_bind(inv->seg, si)) { continue; }
    b = si->addr;
    if (b->header.chunk != CHUNK_NOT_ASSIGNED) {
      printf("j=%d c=%d s=%d f=%d n=%d\n",
             j,
             b->header.chunk,
             b->header.chunk_size,
             b->header.buffer_free,
             b->header.nterms);
    }
    size += b->header.chunk_size;
  }
  printf("total size = %u\n", size);
}

*/

int verbose;

/* current format */
int
calc_dsize0(int lrid, int rid, int lsid, int sid, int tf, int score, int psize)
{
  char *buffer, *p;
  p = buffer = malloc(tf * 4 + 16);
  SEN_B_ENC(rid - lrid, p);
  SEN_B_ENC(sid - lsid, p);
  if (score) {
    SEN_B_ENC(tf * 2 + 1, p);
    SEN_B_ENC(score, p);
  } else {
    SEN_B_ENC(tf * 2, p);
  }
  //  SEN_B_ENC(psize, p);
  free(buffer);
  return p - buffer;
}

/* format1: with psize */
int
calc_dsize1(int lrid, int rid, int lsid, int sid, int tf, int score, int psize)
{
  char *buffer, *p;
  int rgap = rid - lrid;
  int sgap = sid - lsid;
  int pgap = psize - tf;
  p = buffer = malloc(tf * 4 + 16);
  if (sgap == 1 && tf == 1 && score == 0 && pgap == 0) {
    SEN_B_ENC(rgap * 2, p);
  } else {
    SEN_B_ENC(rgap * 2 + 1, p);
    if (sgap == 1 && tf == 1 && score == 0) {
      SEN_B_ENC(pgap * 2, p);
    } else {
      SEN_B_ENC(pgap * 2 + 1, p);
      if (sgap == 1 && score == 0) {
        SEN_B_ENC(tf * 2, p);
      } else {
        SEN_B_ENC(tf * 2 + 1, p);
        SEN_B_ENC(sgap, p);
        SEN_B_ENC(score, p);
      }
    }
  }
  free(buffer);
  return p - buffer;
}

/* format2: without psize */
int
calc_dsize2(int lrid, int rid, int lsid, int sid, int tf, int score, int psize)
{
  char *buffer, *p;
  int rgap = rid - lrid;
  int sgap = sid - lsid;
  p = buffer = malloc(tf * 4 + 16);
  if (sgap == 1 && tf == 1 && score == 0) {
    SEN_B_ENC(rgap * 2, p);
  } else {
    SEN_B_ENC(rgap * 2 + 1, p);
    if (sgap == 1 && score == 0) {
      SEN_B_ENC(tf * 2, p);
    } else {
      SEN_B_ENC(tf * 2 + 1, p);
      SEN_B_ENC(sgap, p);
      SEN_B_ENC(score, p);
    }
  }
  //  SEN_B_ENC(psize, p);
  free(buffer);
  return p - buffer;
}

static inline void
inv_inspect(sen_inv *inv)
{
  int n_terms = 0;
  int n_terms_with_hits = 0;
  int total_df = 0;
  int total_size = 0;
  int total_dsize = 0;
  int total_psize = 0;
  int total_dsize1 = 0;
  int total_dsize2 = 0;
  int total_nposts = 0;
  sen_id tid;
  sen_inv_cursor *c;
  sen_sym *lexicon = inv->lexicon;
  if (verbose) { puts("     tid,      df,    nposts,      size   |  term"); }
  for (tid = 1; tid <= lexicon->header->nrecords; tid++) {
    int ps, df = 0, nposts = 0, size = 0, dsize = 0, psize = 0, dsize1 = 0, dsize2 = 0;
    n_terms++;
    if (!(c = sen_inv_cursor_open(inv, tid))) { continue; }
    n_terms_with_hits++;
    if (c->buf) {
      uint8_t *pp;
      uint32_t tf, gap;
      uint32_t lrid, lsid;
      while (c->nextb) {
        lrid = c->pb.rid, lsid = c->pb.sid; /* for check */
        buffer_rec *br = BUFFER_REC_AT(c->buf, c->nextb);
        pp = c->bp = NEXT_ADDR(br);
        SEN_B_DEC(c->pb.rid, c->bp);
        SEN_B_DEC(c->pb.sid, c->bp);
        if (lrid > c->pb.rid || (lrid == c->pb.rid && lsid >= c->pb.sid)) {
          printf("corrupt entry!! (%d:%d) -> (%d:%d)", lrid, lsid, c->pb.rid, c->pb.sid);
        }
        c->nextb = br->step;
        SEN_B_DEC(tf, c->bp);
        if (tf & 1) { SEN_B_DEC(c->pb.score, c->bp); } else { c->pb.score = 0; }
        dsize += c->bp - pp;
        pp = c->bp;
        c->pb.rest = c->pb.tf = tf >> 1;
        c->pb.pos = 0;
        nposts += c->pb.tf;
        while (c->pb.rest--) { SEN_B_DEC(gap, c->bp); }
        ps = c->bp - pp;
        psize += ps;
        size += c->bp - (uint8_t *)br;
        /* in buffer part, rid and sid are stored as is (not gapped) */
        dsize1 += calc_dsize1(0, c->pb.rid, 0, c->pb.sid, c->pb.tf, c->pb.score, ps);
        dsize2 += calc_dsize2(0, c->pb.rid, 0, c->pb.sid, c->pb.tf, c->pb.score, ps);
        df++;
      }
      size += c->cpe - c->cp;
      lrid = 0; lsid = 0;
      while (c->cp < c->cpe) {
        pp = c->cp;
        SEN_B_DEC(gap, c->cp);
        c->pc.rid += gap;
        if (gap) { c->pc.sid = 0; }
        SEN_B_DEC(gap, c->cp);
        c->pc.sid += gap;
        SEN_B_DEC(tf, c->cp);
        if (tf & 1) { SEN_B_DEC(c->pc.score, c->cp); } else { c->pc.score = 0; }
        dsize += c->cp - pp;
        pp = c->cp;
        c->pc.rest = c->pc.tf = tf >> 1;
        c->pc.pos = 0;
        // while (c->cp < c->cpe && c->pc.rest--) { SEN_B_SKIP(c->cp); }
        nposts += c->pc.tf;
        while (c->cp < c->cpe && c->pc.rest--) { SEN_B_DEC(gap, c->cp); }
        ps = c->cp - pp;
        psize += ps;
        if (lrid != c->pc.rid) { lsid = 0; }
        dsize1 += calc_dsize1(lrid, c->pc.rid, lsid, c->pc.sid, c->pc.tf, c->pc.score, ps);
        dsize2 += calc_dsize2(lrid, c->pc.rid, lsid, c->pc.sid, c->pc.tf, c->pc.score, ps);
        df++;
        lrid = c->pc.rid;
        lsid = c->pc.sid;
      }
    } else {
      df++;
      nposts++;
    }
    if (verbose) {
      printf("%8d,%8d,%10d,%10d   |  %s\n",
             tid, df, nposts, size, _sen_sym_key(lexicon, tid));
    }
    total_df += df;
    total_nposts += nposts;
    total_size += size;
    total_dsize += dsize;
    total_psize += psize;
    total_dsize1 += dsize1;
    total_dsize2 += dsize2;
    sen_inv_cursor_close(c);
  }
  printf("total amount of doc freq.      %12d\n", total_df);
  printf("total number of postings       %12d\n", total_nposts);
  printf("total size of inv entries      %12d\n", total_size);
  printf("total size of pos part in inv  %12d\n", total_psize);
  printf("total size of doc part in inv  %12d\n", total_dsize);
  printf("total size of doc part (1)     %12d\n", total_dsize1);
  printf("total size of doc part (2)     %12d\n", total_dsize2);
  printf("total number of terms          %12d\n", n_terms);
  printf("total number of terms w hits   %12d\n", n_terms_with_hits);
}

static inline void
index_inspect(sen_index *i)
{
  sen_rc rc;
  sen_encoding encoding;
  int key_size, flags, initial_n_segments;
  unsigned nrecords_keys, file_size_keys, nrecords_lexicon, file_size_lexicon, inv_seg_size, inv_chunk_size;
  rc = sen_index_info(i, &key_size, &flags, &initial_n_segments, &encoding, &nrecords_keys, &file_size_keys, &nrecords_lexicon, &file_size_lexicon, &inv_seg_size, &inv_chunk_size);
  if (rc) {
    fprintf(stderr, "sen_index_info failed (%d)", rc);
    return;
  }
  printf("key_size           %24d\n", key_size);
  printf("flags              %24d\n", flags);
  printf("initial_n_segments %24d\n", initial_n_segments);
  printf("encoding           %24s\n", sen_enc_string[encoding]);
  printf("nrecords_keys      %24u\n", nrecords_keys);
  printf("file_size_keys     %24u\n", file_size_keys);
  printf("nrecords_lexicon   %24u\n", nrecords_lexicon);
  printf("file_size_lexicon  %24u\n", file_size_lexicon);
  printf("inv_segment_size   %24u\n", inv_seg_size);
  printf("inv_chunk_size     %24u\n", inv_chunk_size);
  inv_inspect(i->inv);
}

int
main(int argc, char **argv)
{
  int c;
  sen_index *i;
  verbose = 0;

  for (;;) {
    c = getopt(argc, argv, "v");
    if (c == -1) { break; }
    if (c == 'v') { verbose = 1; }
  }

  if (optind >= argc) {
    fputs("Usage: itest [-v] indexfile\n", stderr);
    return -1;
  }
  if (!(i = sen_index_open(argv[optind]))) {
    fprintf(stderr, "index open failed(%s)\n", argv[1]);
    return -1;
  }
  // sen_inv_describe(i->inv);
  // checkdb(i);
  index_inspect(i);
  return 0;
}
