
#ifndef _XTCLASSIFY_H
#define _XTCLASSIFY_H

/*############################################################################*/
/* xtbon.h                                                                    */
/*############################################################################*/
#include <regex.h>
#include <mssBase.h>
#include <mssHeader.h>

#define MaxPatLen 1024 /*ѥκʸ*/
#define MaxClsLen 10   /*祯饹*/
#define MaxRegLen 10   /*ѥΥե٥åȺĹ*/
#define RegCanCnt 200  /*ѥο(setRegTblǻ)*/

typedef unsigned short int usint;

/*============================================================================*/
/* ¤                                                                 */
/*============================================================================*/

/*------------------------------------*/
/* Ȥι¤                     */
/*              predict               */
/* \            True False  Total     */
/* actual True     0     1      1     */
/*        False  0.5     0    0.5     */
/*                tbl[][]    actTtl[] */
/*ֹϡBonClsֹ          */
/*(ɬեν֤ˤϤʤʤ)*/
/*------------------------------------*/
struct Cost {
  double tbl[MaxClsLen][MaxClsLen];
  double actTtl[MaxClsLen];
  struct BonCls *cls;
};

/*--------------------------*/
/*̥饹̷ι¤*/
/*--------------------------*/
/* +---------+-------+-------+---+-------+------+*/
/* |condition| cls0  | cls1  |...| clsN  | Total|*/
/* +---------+-------+-------+---+-------+------+*/
/* |    match|mCnt[0]|mCnt[1]|   |mCnt[N]| mtCnt|*/
/* |         |mShr[0]|mShr[1]|   |mShr[N]| mtShr|*/
/* +---------+-------+-------+---+-------+------+*/
/* |  unmatch|uCnt[0]|uCnt[1]|   |uCnt[N]| utCnt|*/
/* |         |uShr[0]|uShr[1]|   |uShr[N]| utShr|*/
/* +---------+-------+-------+---+-------+------+*/
/* |    Total|tCnt[0]|tCnt[1]|   |tCnt[N]| total|*/
/* |         |tShr[0]|tShr[1]|   |tShr[N]|      |*/
/* +---------+-------+-------+---+-------+------+*/
/* mShr[i]=mCnt[i]/mtCnt                         */
/* uShr[i]=uCnt[i]/utCnt                         */
/* mtShr=mtCnt/total                             */
/* utShr=utCnt/total                             */
struct CndCnt {
  int    uCnt[MaxClsLen]; /*unmatch,饹̷*/
  int    mCnt[MaxClsLen]; /*match,饹̷*/
  int    tCnt[MaxClsLen]; /*饹̷*/
  int    utCnt;           /*unmatch total*/
  double utShr;           /*unmatch total*/
  int    mtCnt;           /*match total*/
  double mtShr;           /*match total*/
  int    total;           /*ȡ*/
  int    splitType;       /*spliting criteria(0:Gini,1:Entropy,2:GeoMean)*/
  double splitBefor;      /*value for spilitting criteria before splitting*/
  double splitAfter;      /*value for spilitting criteria after  splitting*/


  /*桼Υȥե*/
  struct Cost *cost;

  /*---------------------------------------------------*/
  /*ޥåޥḁ̊饹̥                 */
  /*                                                   */
  /* laplace estimateη׻ˡ                        */
  /*   p(i)=(Ni+1)/(k+N)                               */
  /*   k:饹=ClassSize                            */
  /*   Ni: 饹iη=cnt->xCnt[]                   */
  /*   N :                                       */
  /*                                                   */
  /* Ȥδˡ                                  */
  /*   xShrC : ȷפФƥ饹ȤΨ  */
  /*   xShrC=C(j)*xShr(j)/SUMi(C(i)*xShr(i))           */
  /*   where C(j)=SUMi(cost(j,i))                      */
  /*   cost(j,i)Ȥϥ饹jiͽ¬ȤΥ   */
  /*   Pazzani,at el, Reducing Misclassification Costs */
  /*---------------------------------------------------*/

  /*+Ȥʤ                            */
  double uShrRN[MaxClsLen];/*unmatch,饹̥*/
  double mShrRN[MaxClsLen];/*match,饹̥*/
  double tShrRN[MaxClsLen];/*饹̥*/

  /*+ȴ                            */
  double uShrRC[MaxClsLen]; /*unmatch,饹̥*/
  double mShrRC[MaxClsLen]; /*match,饹̥*/
  double tShrRC[MaxClsLen]; /*饹̥*/

  /*Laplace Estimate+Ȥʤ                      */
  double uShrLN[MaxClsLen]; /*unmatch,饹̥*/
  double mShrLN[MaxClsLen]; /*match,饹̥*/
  double tShrLN[MaxClsLen]; /*饹̥*/

  /*Laplace Estimate+ȴ                      */
  double uShrLC[MaxClsLen]; /*unmatch,饹̥*/
  double mShrLC[MaxClsLen]; /*match,饹̥*/
  double tShrLC[MaxClsLen]; /*饹̥*/

  /*--------------------------------------------------------------*/
  /*splitting rule׻Ȥ˻ȤиΨؤݥ      */
  /*  optPrb==0 -> xShrRN                                         */
  /*  optPrb==1 -> xShrRC                                         */
  /*  optPrb==2 -> xShrLN                                         */
  /*  optPrb==3 -> xShrLC                                         */
  /*Υݥ󥿤νiniCndCnt()ˤƹԤ                 */
  /*--------------------------------------------------------------*/
  double *uShr;           /*unmatch,饹̥*/
  double *mShr;           /*match,饹̥*/
  double *tShr;           /*饹̥*/
};

/*----------------------*/
/* ɽϢι¤ */
/*----------------------*/
struct Regexp {
  usint str[MaxRegLen+1]; /* ޥåʸ */
  int   type;    /* 0:substring 1:subsequence 9:regular expression(for future)*/
  int   bgnRng;  /* Ƭޥåϰ 0:Ƭפʤ */
  int   endRng;  /* ޥåϰ 0:פʤ */
//  usint *regStr;  /* 嵭 str,type,bgnRng,endRngθɽ*/
//  regex_t regex; /*regStr򥳥ѥ뤷ɽ*/
  double objVal; /*ѥŪؿ(Gini,Entropy,GeoMean)*/
};

struct RegTbl{
  struct Regexp *reg;
  int            cnt;
};

/*
struct RegExp{
  struct RegTbl *tbl;
  int            cnt;
};
*/

/*----------------------------------*/
/*ե٥åȡݥǥå¤*/
/* Map->alp = "abcde\0"             */
/* Map->idx = "11011\0"             */
/* Map->alpSiz=Map->idxSiz=5        */
/*----------------------------------*/
struct Map {
  struct mssHash *alpOrg;
  char  **numAlp;
  usint *alp;
  usint *idx;
  int  alpSiz;
  int  idxSiz;
};

/*-------------------------------------------------------*/
/*õ륤ǥå                                   */
/* char **     *(Idxs->idx+i)       : ǥåμ */
/* char *    *(*(Idxs->idx+i)+j)    : ѥܤμ */
/* char    *(*(*(Idxs->idx+i)+j)+k) : kܤΥǥå*/
/*                                                       */
/* ex)  *(*(Idxs->idx+i)+j) = "11010011\0"               */
/*-------------------------------------------------------*/
struct Idxs {
  //char ***idx;
  usint ***idx;
  int cnt;
  int patCnt;
};

/*---------------------------------------*/
/* 饹(°)¤                */
/* str[0]="Positive", str[1]="Negative"  */
/* chr = "0011111010101011010010000...." */
/*---------------------------------------*/
struct BonCls {
  char *str[MaxClsLen]; /*ǡΥ饹ʸ*/
  int   cnt;            /*饹ο(ClassSize)*/
  char *chr;            /*¥ǡ(str[]ֹȤ)*/
};

/*----------------------------------------------------------------------------*/
/* ꥹȤȤϿؿ                                         */
/* Ͽ줿nܤʸؤΥݥ󥿤ˤ륢ǽ                    */
/* (line)(element):0ϥߥ͡                                    */
/* 1,2,3,0                                                                    */
/* 3,5,0                                                                      */
/* 6,1,2,3,0                                                                  */
/* top=ƬԤƬǤΥɥ쥹                                             */
/* pointer={0,4,7}                                                            */
/* lineCnt=3                                                                  */
/* size=9                                                                     */
/*----------------------------------------------------------------------------*/
struct StrTbl {
  usint *top;     /*intꥹȤƬ*/
  int   *pointer; /*intŪ(Ƭǿ)*/
  int    lineCnt; /*Ͽ줿int*/
  int    size;    /*Ͽ줿intǤ*/
};

/*------------------------------*/
/*ĤΥѥܤɽ¤*/
/*------------------------------*/
struct Pattern {
  struct Map    *map;    /*ե٥å-ǥå*/
  struct StrTbl *patAlp; /*ե٥åȤˤѥ*/
  struct StrTbl *patIdx; /*ǥåˤѥ*/
  struct RegTbl *regTbl; /*Ȥʤɽꥹ*/
  char  *att[RegCanCnt]; /*嵭ɽ1:match,0:unmatchΥǡ*/
  int    attCnt;         /*att[]°(ɽ)ο*/
  int    numPat;         /*0:ʸѥ,1:ͥѥ*/
};

/*---------------------------------*/
/*ĤΥƥ꡼ܤɽ¤ */
/*---------------------------------*/
struct Category {
  struct mssHash *name; /*°-ѿͤhashɽ*/
  char  **valName;   /*ѿ-°ͤ*/
  int    cnt;        /*°ͤμ*/
  int   *val;        /*ͤѴ줿Ԥͤ*/
};

/*----------------*/
/* ǡι¤ */
/*----------------*/
struct Data {
  struct Pattern  *pat;    /*ѥ°*/
  int              patCnt; /*ѥ°ο*/
  double         **num;    /*°*/
  int              numCnt; /*°ο*/
  struct Category *cat;    /*ƥ꡼°*/
  int              catCnt; /*ƥ꡼°ο*/
  struct BonCls   *cls;    /*°*/
  int              cnt;    /*ǡ*/
  struct mssFPR      *fpr;    /*եɤ߹ߥݥ*/
};


/*------------------------------------------------------*/
/*              ǽ̤빽¤                  */
/* +--------+------------------------------------------+*/
/* |        |  Predicted as                            +*/
/* |        +---------+---------+---+---------+--------+*/
/* |        |  cls0   |  cls1   |...|  clsN   | Total  |*/
/* +---+----+---------+---------+---+---------+--------+*/
/* |   |    | cnt[0,0]| cnt[0,1]|   | cnt[0,N]|atCnt[0]|*/
/* | A |cls0|aShr[0,0]|aShr[0,1]|   |aShr[0,N]|atShr[0]|*/
/* | c |    |pShr[0,0]|pShr[0,1]|   |pShr[0,N]|        |*/
/* | t |    | cst[0,0]| cst[0,1]|   | cst[0,N]|atCst[0]|*/
/* | u +----+---------+---------+---+---------+--------+*/
/* | a |    | cnt[1,0]| cnt[1,1]|   | cnt[1,N]|atCnt[1]|*/
/* | l |cls1|aShr[1,0]|aShr[1,1]|   |aShr[1,N]|atShr[1]|*/
/* |   |    |pShr[1,0]|pShr[1,1]|   |pShr[1,N]|        |*/
/* |   |    | cst[1,0]| cst[1,1]|   | cst[1,N]|atCst[1]|*/
/* |   +----+---------+---------+---+---------+--------+*/
/* |   |    |         |         | : |         |        |*/
/* |   +----+---------+---------+---+---------+--------+*/
/* |   |    | cnt[N,0]| cnt[N,1]|   | cnt[N,N]|atCnt[N]|*/
/* |   |clsN|aShr[N,0]|aShr[N,1]|   |aShr[N,N]|atShr[N]|*/
/* |   |    |pShr[N,0]|pShr[N,1]|   |pShr[N,N]|        |*/
/* |   |    | cst[N,0]| cst[N,1]|   | cst[N,N]|atCst[N]|*/
/* |   +----+---------+---------+---+---------+--------+*/
/* |   |    | ptCnt[0]| ptCnt[1]|   | ptCnt[N]|   tCnt |*/
/* |   |Ttl | ptShr[0]| ptShr[1]|   | ptShr[N]|        |*/
/* |   |    | ptCst[0]| ptCst[1]|   | ptCst[N]|   tCst |*/
/* +---+----+---------+---------+---+---------+--------+*/
/* aShr[x,y]=cnt[x,y]/atCnt[x] */
/* pShr[x,y]=cnt[x,y]/ptCnt[y] */
/* atShr[x]=atCnt[x]/tCnt      */
/* ptShr[x]=ptCnt[x]/tCnt      */

struct ClsTbl {
  int      cnt[MaxClsLen][MaxClsLen]; /*ƥη*/
  double  aShr[MaxClsLen][MaxClsLen]; /*actualTotalФ륷*/
  double  pShr[MaxClsLen][MaxClsLen]; /*predictedTotalФ륷*/
  double   cst[MaxClsLen][MaxClsLen]; /*ƥΥ*/
  int    atCnt[MaxClsLen];            /*actualTotal*/
  double atShr[MaxClsLen];            /*actualTotaltotalФ륷*/
  double atCst[MaxClsLen];            /*actualTotal*/
  int    ptCnt[MaxClsLen];            /*predictedTotal*/
  double ptShr[MaxClsLen];            /*predictedTotaltotalФ륷*/
  double ptCst[MaxClsLen];            /*predictedTotal*/
  int     tCnt;                       /*total*/
  double  tCst;                       /*total*/

  int    accCnt;                     /**/
  int    errCnt;                     /*顼*/
  double accRate;                    /*Ψ accCnt/tCnt*/
  double errRate;                    /*顼Ψ errCnt/tCnt*/
  double geoMean;                    /*ƥ饹Ψδʿ(ºݤ)*/
};

/*============================================================================*/
/* ץȥ                                                           */
/*============================================================================*/
int bonGetCnt(struct mssFPR *fpr);
struct BonCls *bonGetCls(struct mssFPR *trn, struct mssFPR *tst, struct mssHeader *hd);
void bonFreeCls(struct BonCls *cls);
struct Cost *readCost(char *fname,struct BonCls *cls);
void showCost(struct Cost *cost);
struct Map **bonGetAlp(struct mssFPR *trn,struct mssFPR *tst, struct mssHeader *hd);
void freeMap(struct Map **map);
struct Category *bonGetCatHash(struct mssFPR *trn,struct mssFPR *tst,struct mssHeader *hd);
struct Data *bonSetDat( struct mssFPR *trn,struct mssHeader *hd,struct BonCls *cls,int cnt, struct Map **map, struct Category *cat);
void freeDat(struct Data *dat);
void bonPrnDat( struct Data *dat);
struct Idxs *initIdxs(struct Data *dat);
struct Idxs *initIdxs0(struct Data *dat);
void printIdxs(struct Idxs *idxs);
void freeIdxs(struct Idxs *idxs);
void cpyIdx(struct Data *dat, usint **idx);
void datIndexing( struct Data *dat );
void setRegTbl( struct Data *dat, struct Cost *cost);
void freeRegTbl(struct Data *dat);
void mkAtt( struct Data *dat );
void freeAtt( struct Data *dat );
double Log2(double x);
char alp2idx(struct Map *map, usint alpha);
void alpStr2idxStr(usint index[],usint *alpha, struct Map *map);
int regCmp(usint *str,struct Regexp *reg);
struct Idxs *setNextIdxs(struct Data *dat);

void showMap(struct Map *map);
void showAlphabet(struct Data *dat);
void showIndex(struct Data *dat);

void cpReg(struct Regexp *to, struct Regexp *from);
void freeReg(struct Regexp *reg);

void iniCndCnt( struct CndCnt *cnt, struct Cost *cost);
void prnCndCnt( struct CndCnt *cnt);
void clearCndCntCel(struct CndCnt *cnt);
void cpyCndCnt(struct CndCnt *cntTo, struct CndCnt *cntFrom);
void calCndCntSplit( struct CndCnt *cnt);
void calCndCnt( struct CndCnt *cnt);
int  dominantClass(struct CndCnt *cnt);
double calCndCntCost(struct CndCnt *cnt, struct Cost *cost, int class);

void iniClsTbl(struct ClsTbl *clsTbl);
void calClsTbl(struct ClsTbl *ct, struct Cost *cost);
void showClsTbl(struct ClsTbl *ct,struct Data *dat);

void prnStr(FILE *fp, usint *str);
void StrCpy(usint *to, usint *from);
int  StrLen(usint *dat);
int StrnCmp(usint *s1,usint *s2,int n);
int StrCmp(usint *s1,usint *s2);
struct StrTbl *initStrTbl();
void freeStrTbl(struct StrTbl *st);
void putStrTbl(struct StrTbl *st, usint *str);
usint *getStrTbl(struct StrTbl *st,int n);


#endif
