#include <musashi.h>
#include <xtclassify.h>
#include <tree.h>
#include <stdlib.h>
#include <math.h>
#include <float.h>

extern MssOptFLD optPAT;
extern MssOptFLD optNUM;
extern MssOptFLD optCAT;
extern MssOptFLD optCLS;
extern MssOptINF optTRN;
extern MssOptINF optTST;
extern MssOptSTR optDLM;
extern MssOptFLG optTXT;
extern MssOptINF optCST;
extern MssOptSEL optMDL;
extern MssOptFLG optSEQ;
extern MssOptINT optEND;
extern MssOptINT optBGN;
extern MssOptINT optSIZ;
extern MssOptINT optSED;
extern MssOptINT optCAN;
extern MssOptINT optTRY;
extern MssOptSEL optSPL;
extern MssOptSEL optPRB;
extern MssOptSEL optPMD;
extern MssOptINF optPRN;
extern MssOptDBL optCNF;

extern struct mssFields *fpat;
extern struct mssFields *fnum;
extern struct mssFields *fcat;
extern struct mssFields *fcls;

extern int   ClassSize;
extern char  catCombi[1024][10];
extern int   usedRandSeed;

extern struct mssGlobalVariables mssGV;

/*ĥ꡼ˡData򻲾Ȥ뤿Υݥ*/
struct Pointer {
  int  recCnt; /*ǡ*/
  int *recNo;  /*ƬιԿ*/
};


/*############################################################################*/
/*# Ƽܴؿ                                                             #*/
/*############################################################################*/
/*----------------------------------------------------------------------------*/
/* Ρɤη˴ؤ׻                                                 */
/*----------------------------------------------------------------------------*/
static void calCndCntAllNode(struct Node *node, char flg){

  if(flg=='s') calCndCnt(&node->tstCnt);
  else         calCndCnt(&node->prnCnt);

  if(node->nodeType==0) return;

  calCndCntAllNode(node->mNode,flg);
  calCndCntAllNode(node->uNode,flg);
}

/*----------------------------------------------------------------------------*/
/* 쥳ɤڤͽ¬֤ͤؿ                               */
/* LeafΤκƵؿ                                                   */
/*----------------------------------------------------------------------------*/
static void setDatOnTreeRec(
  struct Data   *dat,
  int            rec,
  struct Node   *node,
  struct Tree   *tree,
  int            cls,
  char           flg){

  usint index[MaxPatLen];
  struct CndCnt *cnt=NULL;

  switch(flg){
    case 't': cnt=&node->trnCnt; break;
    case 's': cnt=&node->tstCnt; break;
    case 'p': cnt=&node->prnCnt; break;
  }

  /*LeafʤФȤꤢmCnt򥫥ȥåץ꥿*/
  if(node->nodeType==0){
    cnt->mCnt[cls]++;
    return;
  }

  /*Ρɤξ˰פmNodeǤʤuNode򤿤ɤ*/
  switch(node->attType){
  case 0: /*ѥ*/
    /*оʸ򥤥ǥå*/
    alpStr2idxStr(index,
                getStrTbl((dat->pat+node->attFldNo)->patAlp,rec),
                (tree->dat->pat+node->attFldNo)->map);
    if( regCmp(index,
      (tree->dat->pat+node->attFldNo)->regTbl->reg+node->attPatNo) ){
      cnt->mCnt[cls]++;
      setDatOnTreeRec(dat,rec,node->mNode,tree,cls,flg);
    }else{
      cnt->uCnt[cls]++;
      setDatOnTreeRec(dat,rec,node->uNode,tree,cls,flg);
    }
    break;

  case 1: /**/
    if( *(*(dat->num+node->attFldNo)+rec) <= node->numTH ){
      cnt->mCnt[cls]++;
      setDatOnTreeRec(dat,rec,node->mNode,tree,cls,flg);
    }else{
      cnt->uCnt[cls]++;
      setDatOnTreeRec(dat,rec,node->uNode,tree,cls,flg);
    }
    break;

  case 2: /*ƥ꡼*/
    if( node->catList[*((dat->cat+node->attFldNo)->val+rec)]=='M'){
      cnt->mCnt[cls]++;
      setDatOnTreeRec(dat,rec,node->mNode,tree,cls,flg);
    }else{
      cnt->uCnt[cls]++;
      setDatOnTreeRec(dat,rec,node->uNode,tree,cls,flg);
    }
    break;

  }

  return;
}

/*----------------------------------------------------------------------------*/
/* ƥȤ޴ǡĥ꡼ƤϤ                                     */
/*----------------------------------------------------------------------------*/
static void setDatOnTree(
  struct Tree *tree,
  struct Data *dat,
  char         flg){ /*'s':test, 'p':prune*/
  int           i;

  /*ƥǡ˷򥻥åȤƤ*/
  for(i=0; i<dat->cnt; i++){
    setDatOnTreeRec(dat,i, tree->topNode, tree, *(dat->cls->chr+i),flg);
  }

  /*Ρɤηطη׻*/
  calCndCntAllNode(tree->topNode, flg);

}

/*----------------------------------------------------------------------------*/
/*٥ΥåȴϢ                                                          */
/*  1. ٥Υå                                                         */
/*  2. ǿ٥Υå                                                     */
/*  3. ꡼տΥ                                                     */
/*----------------------------------------------------------------------------*/
static void setLevel(struct Node *node,struct Tree *tree, int level){

  /*٥Υå*/
  node->level=level;

  /*ǿ٥ι*/
  if(level>tree->deepest) tree->deepest=level;

  /*꡼դʤХ꥿*/
  if( 0==node->nodeType || node->pruned){
    tree->leafCnt++;
    return;
  }

  setLevel(node->uNode,tree,level+1);
  setLevel(node->mNode,tree,level+1);
}

/*----------------------------------------------------------------------------*/
/* ĥ꡼ΥΡɤ򼡡˲Ƶؿ                                     */
/*----------------------------------------------------------------------------*/
static struct Node **freeTreeTbl(
  struct Node  *node,
  struct Node **nodeTbl,
  int          *nodeCnt){

  int cnt;
  if(node->attType==2){ /*ƥΥꥹȤ*/
    mssFree( node->catList );
  } 
  if(node->nodeType==0){ /*leafʤ*/
    return(nodeTbl);
  }else{
    cnt=(*nodeCnt);
    cnt=cnt+2;
    nodeTbl=mssRealloc(nodeTbl,sizeof(struct Tree *)*cnt,"freeTree");
    *(nodeTbl+cnt-2) = node->uNode;
    *(nodeTbl+cnt-1) = node->mNode;
    *nodeCnt=cnt;
  }
  nodeTbl=freeTreeTbl(node->uNode,nodeTbl,nodeCnt);
  nodeTbl=freeTreeTbl(node->mNode,nodeTbl,nodeCnt);
  return(nodeTbl);
}

/*----------------------------------------------------------------------------*/
/* ץ빽¤ΤΥԡ(toγƼΰݤޤ)                           */
/*----------------------------------------------------------------------------*/
static struct Data *cpyDatMapReg(struct Data *from){

  int i,j;
  struct Data *to;

  to         = mssCalloc(sizeof(struct Data),"cpySmp");

  /*==============================================*/
  /*ڤˤäѤѿϥݥ󥿤Ȥƥԡ*/
  /*==============================================*/
  to->numCnt = from->numCnt;
  to->num    = from->num;
  to->catCnt = from->catCnt;
  to->cat    = from->cat;
  to->cls    = from->cls;



  /*====================================================*/
  /*ѥܤϷڤˤäѤΤǡΤ򥳥ԡ*/
  /*====================================================*/
  to->patCnt = from->patCnt;
  to->pat    = mssCalloc( to->patCnt * sizeof(struct Pattern) ,"cpyDat" );

  for(i=0; i<to->patCnt; i++){

    (to->pat+i)->numPat=(from->pat+i)->numPat;

    /*MapΥԡ*/
    (to->pat+i)->map=mssCalloc(sizeof(struct Map), "cpySmp");
    (to->pat+i)->map->alpOrg = (from->pat+i)->map->alpOrg;
    (to->pat+i)->map->numAlp = (from->pat+i)->map->numAlp;
    (to->pat+i)->map->alp
      =mssCalloc(sizeof(usint)*((from->pat+i)->map->alpSiz+1), "cpySmp");
    (to->pat+i)->map->idx
      =mssCalloc(sizeof(usint)*((from->pat+i)->map->alpSiz+1), "cpySmp");
    (to->pat+i)->map->alpSiz=(from->pat+i)->map->alpSiz;
    (to->pat+i)->map->idxSiz=(from->pat+i)->map->idxSiz;
    StrCpy((to->pat+i)->map->alp,(from->pat+i)->map->alp);
    StrCpy((to->pat+i)->map->idx,(from->pat+i)->map->idx);

    /*regTblΥԡ*/
    (to->pat+i)->regTbl=mssMalloc(sizeof(struct RegTbl),"spySmp");
    (to->pat+i)->regTbl->reg =
      mssCalloc(sizeof(struct Regexp)*(from->pat+i)->regTbl->cnt,"scr");
    (to->pat+i)->regTbl->cnt= (from->pat+i)->regTbl->cnt;
    for(j=0; j<(to->pat+i)->regTbl->cnt; j++){
      cpReg( (to  ->pat+i)->regTbl->reg+j,
             (from->pat+i)->regTbl->reg+j);

    }
  }
  return(to);
}

/*----------------------------------------------------------------------------*/
/* cpyDatMapRegǥԡ줿ΰβ                                       */
/*----------------------------------------------------------------------------*/
static void freeDatMapReg(struct Data *dat){

  int i;

  for(i=0; i<dat->patCnt; i++){
    /*Mapfree*/
    mssFree( (dat->pat+i)->map->alp );
    mssFree( (dat->pat+i)->map->idx );
    mssFree( (dat->pat+i)->map );

    /*regTblfree*/
    mssFree( (dat->pat+i)->regTbl->reg );
    mssFree( (dat->pat+i)->regTbl );
  }
  mssFree(dat->pat);
  mssFree(dat);
}

/*----------------------------------------------------------------------------*/
/* ڤClassificationTable˷򥻥åȤ                            */
/*----------------------------------------------------------------------------*/
static void setClsTbl(
  int cnt[MaxClsLen][MaxClsLen],
  struct Node *node,
  char flg){ /*t:Training s:Test p:Pruning*/

  int i;
  if(node->nodeType==0 || node->pruned){ /*leafʤ*/
    for(i=0; i<ClassSize; i++){
      switch(flg){
        case 't': cnt[i][node->cls] += node->trnCnt.tCnt[i]; break;
        case 's': cnt[i][node->cls] += node->tstCnt.tCnt[i]; break;
        case 'p': cnt[i][node->cls] += node->prnCnt.tCnt[i]; break;
      }
    }
    return;
  }
  setClsTbl(cnt, node->uNode,flg);
  setClsTbl(cnt, node->mNode,flg);
}

/*############################################################################*/
/* ڤĹϢ                                                     */
/*############################################################################*/
/*----------------------------------------------------------------------------*/
/* ѥ̥饹̷                                             */
/*----------------------------------------------------------------------------*/
static void setCndCntPat(
  struct CndCnt  *cnt,
  struct Pattern *pat,
  struct BonCls  *cls,
  struct Pointer *pnt,
  int             attNo){

  int i;

  for(i=0; i<pnt->recCnt; i++){
    if( 0 == *(pat->att[attNo]+*(pnt->recNo+i)) ){
      cnt->uCnt[(int)*(cls->chr+*(pnt->recNo+i))]++;
      cnt->utCnt++;
    }else{
      cnt->mCnt[(int)*(cls->chr+*(pnt->recNo+i))]++;
      cnt->mtCnt++;
    }
  }
}

/*----------------------------------------------------------------------------*/
/* °ξ̥饹̷                                               */
/*----------------------------------------------------------------------------*/
struct NumCls {
  double num;
  char   cls;
};

static int qscmp3(const void **a, const void **b){
  return( ((struct NumCls *)a)->num > ((struct NumCls *)b)->num );
}

static double setCndCntNum(
  struct CndCnt  *cnt,
  double         *num,
  struct BonCls  *cls,
  struct Pointer *pnt,
  struct Cost    *cost){

  struct CndCnt  tmpCnt;
  struct NumCls *tbl;
  double bestTh;
  double th;
  double bestSv;
  int i;

  /* NumCls¤Τΰ*/
  tbl=mssMalloc(pnt->recCnt * sizeof(struct NumCls),"setCndCntNum");

  /*°͡饹ͤNumCls¤Τ˥å*/
  /*ƱtmpCnt¤Τ*/
  iniCndCnt(&tmpCnt,cost);
  for(i=0; i<pnt->recCnt; i++){
    (tbl+i)->num=*(num+*(pnt->recNo+i));
    (tbl+i)->cls=*(cls->chr+*(pnt->recNo+i));
    tmpCnt.uCnt[(int)((tbl+i)->cls)]++;
    tmpCnt.utCnt++;
    tmpCnt.total++;
  }

  /*ͽ¤Ѥ*/
  qsort(tbl,pnt->recCnt,sizeof(struct NumCls),
                       (int (*)(const void *, const void *))qscmp3);

  bestTh=0;
  th=0;
  bestSv=99999;
  for(i=0; i<pnt->recCnt; i++){
    th=(tbl+i)->num;
    tmpCnt.uCnt[(int)(tbl+i)->cls]--;
    tmpCnt.mCnt[(int)(tbl+i)->cls]++;
    tmpCnt.utCnt--;
    tmpCnt.mtCnt++;

    if( i!=pnt->recCnt-1 ){ /*ǽ쥳ɤǤʤʤ*/
      if(th == (tbl+i+1)->num){ /*Υ쥳ɤͤƱʤcontinue*/
        continue;
      }
    }

    /*̥饹֥ȥԡη׻*/
    calCndCnt(&tmpCnt);
    calCndCntSplit(&tmpCnt);

    /* °ι */ 
    if( tmpCnt.splitAfter < bestSv){
      cpyCndCnt(cnt,&tmpCnt);
      bestSv=tmpCnt.splitAfter;
      if( i!=pnt->recCnt-1 ) bestTh=(th+(tbl+i+1)->num)/(double)2;
      else                   bestTh=th;
    }
  }

  mssFree(tbl);
  return(bestTh);
}


/*----------------------------------------------------------------------------*/
/* ƥ꡼°ξ̥饹̷                                         */
/* ƥ꡼Match,Unmatchɽ֤                                        */
/*   ex) MMUUUMUM                                                             */
/*----------------------------------------------------------------------------*/
#define ValClsCnt(x,y) (*(valClsCnt+(x)*cls->cnt+(y)))

static char *setCndCntCat(
  struct CndCnt   *cnt,
  struct Category *cat,
  struct BonCls   *cls,
  struct Pointer  *pnt,
  struct Cost     *cost){

  int end[]={2,4,8,16,32,64,128,256,512,1024};
  struct CndCnt  tmpCnt;
  double bestSv=99999;
  int    bestCombi=0;
  char  *catList=NULL;
  int    improve;
  int    moveCat=-1;
  int i,j,k;

  int *valClsCnt;
  int catVal,clsVal;

  /* ƥ꡼classɽκ */
  /* valClsCnt[val][class] ΰ*/
  valClsCnt=mssCalloc(cat->cnt*cls->cnt*sizeof(int),"setCndCntCat1");
  for(i=0; i<pnt->recCnt; i++){
    catVal=     *(cat->val+*(pnt->recNo+i));
    clsVal=(int)*(cls->chr+*(pnt->recNo+i));
    ValClsCnt(catVal,clsVal)++;
  }

  /*  ɽɽ */
  /*
  for(i=0; i<cat->cnt; i++){
    printf(" val=%d : ",i);
    for(j=0; j<cls->cnt; j++){
      printf("cls[%d]=%d ",j,ValClsCnt(i,j));
    }
    printf("\n");
  }
  */

  /*tmpCnt¤Τ*/
  iniCndCnt(&tmpCnt,cost);

  /*=======================================================================*/
  /*ƥ꡼°ͤμʲʤСMatch,Unmatchȹ礻*/
  /*׻Ŭʬ롣                                 */
  /* catCombi[i][val]={M,U};                                               */
  /* i 0 1 2 .... */
  /* 0 M M M .... */
  /* 1 U M M .... */
  /* 2 M U M .... */
  /* 3 U U M .... */
  /* 4 M M U .... */
  /* 5 U M U .... */
  /* 6 M U U .... */
  /* 7 U U U .... */
  /*   ºݤĴ٤Τi=1to72ӡʤ1,3,5Ĵ٤ */
  /*   2ӤˤΤϡ07,16,25,34Ʊ̣ʤΤ*/ 
  if(cat->cnt <= 10){
    for(i=0; i<end[cat->cnt-1]-1; i=i+2){
      clearCndCntCel(&tmpCnt);
      for(j=0; j<cat->cnt; j++){
        for(k=0; k<cls->cnt; k++){
          if(catCombi[i][j]=='M') tmpCnt.mCnt[k]+=ValClsCnt(j,k);
          else                    tmpCnt.uCnt[k]+=ValClsCnt(j,k);
        }
      }

      /*̥饹֥ȥԡη׻*/
      calCndCnt(&tmpCnt);
      calCndCntSplit(&tmpCnt);
      /*prnCndCnt(&tmpCnt);*/

      /* °ι */ 
      if( tmpCnt.splitAfter < bestSv){
        cpyCndCnt(cnt,&tmpCnt);
        bestSv=tmpCnt.splitAfter;
        bestCombi=i;
      } 
    }
    catList=mssCalloc(sizeof(char)*cat->cnt,"setCndCntCat2");
    for(j=0; j<cat->cnt; j++){
      *(catList+j)=catCombi[bestCombi][j];
    }

  /*=======================================================================*/
  /*ƥ꡼°ͤμۤʤСgreedy           */
  /*ɤʬ                                                       */
  }else{
    /*ͤȤơMatchȤư*/
    catList=mssCalloc(sizeof(char)*cat->cnt,"setCndCntCat3");
    for(j=0; j<cat->cnt; j++){
      *(catList+j)='M';
      for(k=0; k<cls->cnt; k++) tmpCnt.mCnt[k]+=ValClsCnt(j,k);
    }
    calCndCnt(&tmpCnt);
    calCndCntSplit(&tmpCnt);
    cpyCndCnt(cnt,&tmpCnt);
    bestSv=tmpCnt.splitAfter;

    /*¤ꡢMatchUnmatch˥ƥ꡼ͤưƤ*/
    improve=1;
    while(improve) {
      improve=0;
      /*MatchĤİưɤʤ뤫ǧ*/
      for(j=0; j<cat->cnt; j++){
        if(*(catList+j)=='M'){
          /*MatchUnmatch˰ư*/
          for(k=0; k<cls->cnt; k++){
            tmpCnt.mCnt[k]-=ValClsCnt(j,k);
            tmpCnt.uCnt[k]+=ValClsCnt(j,k);
          }
          calCndCnt(&tmpCnt);
          calCndCntSplit(&tmpCnt);
          if(tmpCnt.splitAfter < bestSv){
            bestSv=tmpCnt.splitAfter;
            moveCat=j;
            improve=1;
          }
          /*MatchUnmatch˰ưΤ򸵤᤹*/
          for(k=0; k<cls->cnt; k++){
            tmpCnt.mCnt[k]+=ValClsCnt(j,k);
            tmpCnt.uCnt[k]-=ValClsCnt(j,k);
          }
        } /*if*/
      }

      /*СmoveCatUnmatch˰ư*/
      if(improve){
        *(catList+moveCat)='U';
        for(k=0; k<cls->cnt; k++){
          tmpCnt.mCnt[k]-=ValClsCnt(moveCat,k);
          tmpCnt.uCnt[k]+=ValClsCnt(moveCat,k);
        }
      }
    } /*while*/
    calCndCnt(&tmpCnt);
    calCndCntSplit(&tmpCnt);
    cpyCndCnt(cnt,&tmpCnt);

  }

  mssFree(valClsCnt);
  return(catList);
}

/*----------------------------------------------------------------------------*/
/* TreeΥΡɺƵؿ                                                   */
/*----------------------------------------------------------------------------*/
static struct Node *makeTree(
  struct Data     *dat,
  struct Cost     *cost,
  struct Pointer  *pointer,
  struct Node     *parent){

  struct Node  *this;
  struct CndCnt cnt;

  struct Pointer uPnt;
  struct Pointer mPnt;

  char *catList;

  double tmpTH;
  int i,j,jm,ju;

  /*ȥΡΰγݤȽ*/
  this = mssCalloc(sizeof(struct Node), "Node");
  this->parent = parent;

  /* this->xxxCntν */
  iniCndCnt(&this->trnCnt,cost);
  iniCndCnt(&this->tstCnt,cost);
  iniCndCnt(&this->prnCnt,cost);

  /*splitting ruleͤν*/
  this->trnCnt.splitAfter=DBL_MAX;

  /*----------------------------------------------*/
  /*ѥܤˤǾȤ°դ*/
  /*----------------------------------------------*/
  for(i=0; i<dat->patCnt; i++){
    for(j=0; j<(dat->pat+i)->attCnt; j++){
      iniCndCnt(&cnt,cost);
      setCndCntPat(&cnt,dat->pat+i,dat->cls,pointer,j);
      calCndCnt(&cnt);
      calCndCntSplit(&cnt);
      /*prnCndCnt(&cnt);*/

      /* ǾȤ°򹹿*/
      if(this->trnCnt.splitAfter > cnt.splitAfter){
        cpyCndCnt(&this->trnCnt,&cnt);
        this->attType =0;
        this->attFldNo=i;
        this->attPatNo=j;
      }
    }
  }

  /*----------------------------------------------*/
  /*͹ܤˤǾȤ°դ    */
  /*----------------------------------------------*/
  for(i=0; i<dat->numCnt; i++){
    clearCndCntCel(&cnt);
    tmpTH=setCndCntNum(&cnt,*(dat->num+i),dat->cls,pointer,cost);
    calCndCnt(&cnt);
    calCndCntSplit(&cnt);

    if(this->trnCnt.splitAfter > cnt.splitAfter){
      cpyCndCnt(&this->trnCnt,&cnt);
      this->attType =1;
      this->attFldNo=i;
      this->numTH = tmpTH;
    }
  }

  /*------------------------------------------------*/
  /*ƥ꡼ܤˤǾȤ°դ*/
  /*------------------------------------------------*/
  for(i=0; i<dat->catCnt; i++){
    clearCndCntCel(&cnt);
    catList=setCndCntCat(&cnt,dat->cat+i,dat->cls,pointer,cost);
    calCndCnt(&cnt);
    calCndCntSplit(&cnt);

    if(this->trnCnt.splitAfter > cnt.splitAfter){
      cpyCndCnt(&this->trnCnt,&cnt);
      this->attType =2;
      this->attFldNo=i;
      this->catList = catList;
    }else{
      mssFree(catList);
    }
  }

  /*ΡɤΥ饹(꡼հʳǤ⥯饹ꤹΤϻ޴Τ)*/
  this->cls=dominantClass(&this->trnCnt);

  /*ʤLeaf&return*/
  if(this->trnCnt.splitAfter >= this->trnCnt.splitBefor){
    this->nodeType=0;
    return(this);
  }

  /*줫0ξ,Leaf&return*/
  if(this->trnCnt.utCnt == 0 || this->trnCnt.mtCnt == 0){
    this->nodeType=0;
    return(this);
  }

  /*ξƤϤޤʤȤȤϥΡɤǤ*/
  this->nodeType = 1;

  uPnt.recCnt = this->trnCnt.utCnt;
  mPnt.recCnt = this->trnCnt.mtCnt;
  uPnt.recNo  = mssCalloc(uPnt.recCnt*sizeof(int),"uIdx");
  mPnt.recNo  = mssCalloc(mPnt.recCnt*sizeof(int),"mIdx");
  jm=0;
  ju=0;

  switch(this->attType){

  /*ѥܤξ*/
  case 0:
    for(i=0; i<pointer->recCnt; i++){
      if( (int)*((dat->pat+(this->attFldNo))->att[this->attPatNo]
                                           +*(pointer->recNo+i)) ){
        *(mPnt.recNo+jm++)=*(pointer->recNo+i);
      }else{
        *(uPnt.recNo+ju++)=*(pointer->recNo+i);
      }
    }
    break;

  /*͹ܤξ*/
  /*matchͰʲ, unmatchͤ礭*/
  case 1:
    for(i=0; i<pointer->recCnt; i++){
      if( *(*(dat->num+this->attFldNo)+*(pointer->recNo+i)) <= this->numTH ){
        *(mPnt.recNo+jm++)=*(pointer->recNo+i);
      }else{
        *(uPnt.recNo+ju++)=*(pointer->recNo+i);
      }
    }
    break;

  /*ƥ꡼ܤξ*/
  /*matchcatListˤ, unmatchcatListˤʤ*/
  case 2:
    for(i=0; i<pointer->recCnt; i++){
      if( this->catList[*((dat->cat+this->attFldNo)->val+*(pointer->recNo+i))]=='M'){
        *(mPnt.recNo+jm++)=*(pointer->recNo+i);
      }else{
        *(uPnt.recNo+ju++)=*(pointer->recNo+i);
      }
    }
    break;

  }

  /*Ρɤ򿭤Ф(ƵƤӽФ)*/
  this->mNode = makeTree(dat,cost,&mPnt,this);
  this->uNode = makeTree(dat,cost,&uPnt,this);

  /*ݥΰγ*/
  mssFree(uPnt.recNo);
  mssFree(mPnt.recNo);

  return(this);
}

/*############################################################################*/
/*޴                                                                      */
/* 0. Error-Based Pruning                                                     */
/* 1. Reduced Error Pruning                                                   */
/*############################################################################*/
/*============================================================================*/
/*Error-Based Pruning                                                         */
/*============================================================================*/
/*----------------------------------------------------------------------------*/
/*޴ ͽ¬ʬ׻(fromC4.5)                                           */
/*----------------------------------------------------------------------------*/
static double CF;

double Val[] = {  0,  0.001, 0.005, 0.01, 0.05, 0.10, 0.20, 0.40, 1.00};
double Dev[] = {4.0,  3.09,  2.58,  2.33, 1.65, 1.28, 0.84, 0.25, 0.00};

static double calEBPerr(double NN, double e){

  double coeff=0;
  double val0;
  int i;

  i=0;
  while(CF>Val[i])i++;
  coeff=Dev[i-1]+(Dev[i]-Dev[i-1]) * (CF-Val[i-1]) / (Val[i]-Val[i-1]);
  coeff=coeff * coeff;

  if(e<1E-6){
    return( NN * (1-exp(log(CF)/NN)) );
  }else if(e < 0.9999 ){
    val0=NN * (1-exp(log(CF)/NN));
    return(val0+e*(calEBPerr(NN,1.0)-val0) );
  }else if(e+0.5 >=NN){
    return( 0.67 * (NN-e) );
  }else{
    return( NN * ((e+0.5+coeff/2+sqrt(coeff*((e+0.5)*(1-(e+0.5)/NN)+coeff/4)))
                 /(NN+coeff)) -e  );
  }
}

/*----------------------------------------------------------------------------*/
/*Ρɤˤͽ¬ʬ(C4.5)׻ƥåȤ                      */
/*----------------------------------------------------------------------------*/
static void setEBPNodeErr(struct Node *node){
  double errCnt;
  int i;

  /*errorΥ*/
  errCnt=0;
  for(i=0; i<ClassSize; i++){
    if(i!=node->cls) errCnt += (double)node->trnCnt.total 
                              *(*(node->trnCnt.tShr+i));
  }
  node->estErrNode = calEBPerr((double)node->trnCnt.total,errCnt)+errCnt;

  /*꡼դʤХ꥿*/
  if( 0==node->nodeType ) return;

  setEBPNodeErr(node->uNode);
  setEBPNodeErr(node->mNode);
}

/*----------------------------------------------------------------------------*/
/*Ϳ줿Ρɰʲ꡼դˤͽ¬ʬ(C4.5)׻֤      */
/*----------------------------------------------------------------------------*/
static double setEBPLeafErr(struct Node *node){

  if( 0==node->nodeType ) return(node->estErrNode);

  node->estErrLeaf= setEBPLeafErr(node->uNode)+setEBPLeafErr(node->mNode);
  return(node->estErrLeaf);
}

/*----------------------------------------------------------------------------*/
/*޴¹                                                                  */
/*----------------------------------------------------------------------------*/
static void pruneEBP(struct Node *node){

  if( 0==node->nodeType ) return;
  
  if(node->estErrLeaf > node->estErrNode ){
    node->pruned=1;
    return;
  }

  pruneEBP(node->uNode);
  pruneEBP(node->mNode);

  return;
}

/*============================================================================*/
/*Reduced Error Pruning                                                       */
/*============================================================================*/
/*----------------------------------------------------------------------------*/
/*Ρɤˤ륳Ȥ׻ƥåȤ                                  */
/*----------------------------------------------------------------------------*/
static void setREPNodeCost(struct Node *node, struct Cost *cost){

  /*ΥΡɤΥȤ򥻥å*/
  node->costNode=calCndCntCost(&node->prnCnt,cost,node->cls);

  /*꡼դʤХ꥿*/
  if( 0==node->nodeType ) return;

  setREPNodeCost(node->uNode,cost);
  setREPNodeCost(node->mNode,cost);
}

/*----------------------------------------------------------------------------*/
/*Ϳ줿Ρɰʲ꡼դˤ륳Ȥ׻֤                  */
/*----------------------------------------------------------------------------*/
static double setREPLeafCost(struct Node *node){

  if( 0==node->nodeType ) return(node->costNode);

  node->costLeaf= setREPLeafCost(node->uNode)+setREPLeafCost(node->mNode);
  return(node->costLeaf);
}

/*----------------------------------------------------------------------------*/
/*޴¹                                                                  */
/*----------------------------------------------------------------------------*/
static void pruneREP(struct Node *node){

  if( 0==node->nodeType ) return;
  
  if(node->costLeaf > node->costNode ){
    node->pruned=1;
    return;
  }

  pruneREP(node->uNode);
  pruneREP(node->mNode);

  return;
}

/*----------------------------------------------------------------------------*/
/*޴ Ʊ饹ͽ¬դĥΡɤϻ޴()                 */
/*----------------------------------------------------------------------------*/
static void pruneSameClass(struct Node *node){

  if( 0==node->nodeType ) return;

  if( 1==node->uNode->nodeType && !node->uNode->pruned )
    pruneSameClass(node->uNode);
  if( 1==node->mNode->nodeType && !node->mNode->pruned )
    pruneSameClass(node->mNode);

  /*ҤξȤ꡼դ⤷PrunedNodeξ硢ξȤƱ饹ʤ޴*/
  if( (0==node->uNode->nodeType || node->uNode->pruned) &&
      (0==node->mNode->nodeType || node->mNode->pruned) &&
      (node->uNode->cls == node->mNode->cls)            ){
    node->pruned=1;
    return;
  }

}

/*============================================================================*/
/* ޴ᥤ                                                               */
/* optPMD.val==0 Error-Based Pruning(same as C4.5)                           */
/*              1 Reduced Error Pruning                                       */
/*============================================================================*/
static void pruneTree(struct Tree *tree,struct Data *prn,struct Cost *cost){

  switch(*optPMD.str){
  case '1':
    CF=optCNF.val/(double)100;
    setEBPNodeErr(tree->topNode);
    setEBPLeafErr(tree->topNode);
    pruneEBP(tree->topNode);
    break;

  case '2':
    setDatOnTree(tree,prn,'p');
    setREPNodeCost(tree->topNode,cost);
    setREPLeafCost(tree->topNode);
    pruneREP(tree->topNode);
    break;
  }

  /*Ʊ饹Υ꡼դĥΡɤ޴*/
  pruneSameClass(tree->topNode);

}

/*############################################################################*/
/* ɽϴؿ                                                                 */
/*############################################################################*/
/*--------------------*/
/* ѥɽ */
/*--------------------*/
static void prnRegexp(struct Regexp *reg){
  if(reg->type==0) printf("(str)");
  else             printf("(seq)");
  if(reg->bgnRng!=0) printf("^[%d]",reg->bgnRng);
  prnStr(stdout,reg->str);
  if(reg->endRng!=0) printf("$[%d]",reg->endRng);
}

/*---------------------------------*/
/* ڤΥΡɤɽƵؿ*/
/*---------------------------------*/
static void showNode(
  struct Tree *tree,
  struct Node *node,
  char trl,          /* match or unmatch */
  char flg){         /* 't':training, 's':test, 'p':pruned */

  char *clsNam;
  char *clsVal;
  int   hitCnt=0;
  int   supCnt=0;
  struct Category *catAdd;
  int i;

  for(i=0; i<node->level; i++){
    printf("  ");
  }
  switch(trl){
  case 'm': /*match*/
    printf("then ");
    break;
  case 'u': /*unmatch*/
    printf("else ");
    break;
  case 't': /*top Node*/
    break;
  }

  /*LeafʤХ饹̷ɽ & */
  if(node->nodeType==0 || node->pruned==1){
    clsNam = MssFlds2name(fcls,0);
    clsVal = tree->dat->cls->str[node->cls];
    switch(flg){
    case 't':
      hitCnt = node->trnCnt.tCnt[node->cls],
      supCnt = node->trnCnt.total;
      break;
    case 's':
      hitCnt = node->tstCnt.tCnt[node->cls],
      supCnt = node->tstCnt.total;
      break;
    case 'p':
      hitCnt = node->prnCnt.tCnt[node->cls],
      supCnt = node->prnCnt.total;
      break;
    }
    printf("$%s=\"%s\" (hit/sup)=(%d/%d)\n",clsNam,clsVal,hitCnt,supCnt);
    return;
  }

  /*Ρɤʤ*/
  if(node->nodeType==1){
    switch(node->attType){
    case 0: /*pat*/
      printf("if($%s=\"", MssFlds2name(fpat,node->attFldNo) );
      prnRegexp((tree->dat->pat+node->attFldNo)->regTbl->reg+node->attPatNo);
      printf("\")\n");
      break;
    case 1: /*num*/
      printf("if($%s<=%g)\n",MssFlds2name(fnum,node->attFldNo), node->numTH);
      break;
    case 2: /*category*/
      printf("if($%s is in [ ",MssFlds2name(fcat,node->attFldNo) );
      catAdd=tree->dat->cat+node->attFldNo;
      for(i=0; i<catAdd->cnt; i++){
        if(node->catList[i]=='M'){
          printf("%s ",*(catAdd->valName+i) );
        }
      }
      printf("])\n");
      break;
    }
  }
  showNode(tree,node->mNode,'m',flg);
  showNode(tree,node->uNode,'u',flg);
}

/*---------------------------------*/
/* ĥ꡼ɽᥤ              */
/* 1.Command Line                  */
/* 2.Cost Matrix                   */
/* 3.Alphabet-Index                */
/* 4.Decision Tree                 */
/* 5.Classification Table          */
/* 6.Summary                       */
/*---------------------------------*/
void showTree(struct Tree *tree,struct Cost *cost){
  int i,j,k;
  struct Map *map;

  j=0;
  printf("=================================\n");
  printf("RESULT\n");
  printf("=================================\n");
  printf("[Command Line]\n");

  for(i=0; i<mssGV.argc-1; i++){
    printf("%s ",*(mssGV.argv+i));
  }
  printf("%s\n\n",*(mssGV.argv+i));

  printf("[Random Seed]\n");
  printf("%d\n",usedRandSeed);

  showCost(cost);
  
  printf("\n[Alphabet-Index]\n");
  for(i=0; i<tree->dat->patCnt; i++){
    map=(tree->dat->pat+i)->map;
    printf("## fieldName=$%s\n",MssFlds2name(fpat,i) );

    printf("alphabet original string\n");
    for(j=0;j<map->alpSiz;j++){
      printf("%8d %s\n",j+1,*(map->numAlp+j));
    }
    printf("\n");

    printf("alphabet : ");
    prnStr(stdout,map->alp );
    printf("\n");
    printf("index    : ");
    prnStr(stdout,map->idx );
    printf("\n\n");

    for(j=0;j<map->idxSiz;j++){
      printf("-- Index(%d) has following original strings---\n",j+1);
      for(k=0; k<map->alpSiz; k++){
        if(*(map->idx+k) == (usint)(j+1) ){
          printf("%s,",*(map->numAlp+k));
        }
      }
      printf("\n");
    }
    /*
    for(j=0; j<(tree->dat->pat+i)->regTbl->cnt; j++){
      printf("#%02d : ",j);
      prnRegexp((tree->dat->pat+i)->regTbl->reg+j );
      printf("(%f)\n", ((tree->dat->pat+i)->regTbl->reg+j)->objVal );
    }
    */
  }    

  printf("\n---------------------------------\n");
  printf("          Training Data\n");
  printf("---------------------------------\n");
  printf("[Decision Tree]\n");
  showNode(tree,tree->topNode,'t','t');
  showClsTbl(&tree->trnRsl,tree->dat);
  printf("numberOfLeaves=%d\n",tree->leafCnt);
  printf("deepestLevel=%d\n",tree->deepest);
  printf("\n---------------------------------\n");
  printf("            Test Data\n");
  printf("---------------------------------\n");
  printf("[DECISION TREE]\n");
  showNode(tree,tree->topNode,'t','s');
  printf("---------------------------------\n");
  showClsTbl(&tree->tstRsl,tree->dat);
  printf("numberOfLeaves=%d\n",tree->leafCnt);
  printf("deepestLevel=%d\n",tree->deepest);
  printf("=================================\n");
}

/*вɽ*/
void showTreeSum(struct Tree *tree){
  fprintf(stderr,"cost=%f geoMean=%f leafCnt=%d",tree->tstRsl.tCst,tree->tstRsl.geoMean,tree->leafCnt);
}


/*############################################################################*/
/* API                                                              */
/*   1.decisionTree                                                           */
/*   2.updateTree                                                             */
/*   3.freeTree                                                               */
/*############################################################################*/
/*----------------------------------------------------------------------------*/
/* ᥤ롼                                                   */
/*----------------------------------------------------------------------------*/
struct Tree *decisionTree(
  struct Data *trn,   /*ȥ졼˥󥰥ǡ*/
  struct Data *tst,   /*ƥȥǡ*/
  struct Data *prn,   /*޴ǡ*/
  struct Cost *cost){ /*ɽ*/

  struct Tree   *tree;
  struct Pointer pointer; /*ĥ꡼ѤattTblΥǥå*/
  int i;

  tree = mssCalloc(sizeof(struct Tree),"tree");

  /*pointerĤ*/
  pointer.recCnt=trn->cnt;
  pointer.recNo =mssCalloc(trn->cnt * sizeof(int),"decTreePointer");
  for(i=0; i<trn->cnt; i++) *(pointer.recNo+i)=i;

  tree->topNode   = makeTree(trn,cost,&pointer,NULL);
  mssFree(pointer.recNo);

  /*ץǡ¤ΤΥԡ(ºݤmapHash,mapTbl,cat->catListΤߥԡ)*/
  tree->dat=cpyDatMapReg(trn);

  /*޴*/
  pruneTree(tree,prn,cost);

  /*ĥ꡼¤ΤγƼѿΥå*/
  setLevel(tree->topNode,tree,0);

  /*ɽκ(ȥ졼˥)*/
  iniClsTbl(&tree->trnRsl);
  setClsTbl(tree->trnRsl.cnt, tree->topNode, 't');
  calClsTbl(&tree->trnRsl,cost); /*Ƽ͡ɸ׻*/

  /*ɽκ(ƥ)*/
  iniClsTbl(&tree->tstRsl);
  setDatOnTree(tree,tst,'s');
  setClsTbl(tree->tstRsl.cnt, tree->topNode, 's');
  calClsTbl(&tree->tstRsl,cost); /*Ƽ͡ɸ׻*/

  return(tree);
}

/*----------------------------------------------------------------------------*/
/* ĥ꡼¤Τΰ                                                     */
/*----------------------------------------------------------------------------*/
void freeTree(struct Tree *tree){
  struct Node **nodeTbl=NULL;
  int           nodeCnt=0;
  int i;

  if(tree==NULL)return;
  nodeTbl=freeTreeTbl(tree->topNode,nodeTbl,&nodeCnt);

  for(i=0; i<nodeCnt; i++){
   mssFree(*(nodeTbl+i));
  }
  mssFree(nodeTbl);
  mssFree(tree->topNode);
  freeDatMapReg(tree->dat);
  mssFree(tree);
}

/*----------------------------------------------------------------------------*/
/* 褤ĥ꡼ι                                                           */
/*----------------------------------------------------------------------------*/
struct Tree *updateTree(
  struct Tree *tree,
  struct Tree *betterTree){

  /*NULLȽ*/
  if(tree==NULL){
    return(betterTree);
  }else if(betterTree==NULL){
    return(tree);

  /*Ȥ*/
  }else if(tree->trnRsl.tCst < betterTree->trnRsl.tCst){
    freeTree(betterTree);
    return(tree);
  }else if(tree->trnRsl.tCst > betterTree->trnRsl.tCst){
    freeTree(tree);
    return(betterTree);

  /*ȤƱϥ*/
  }else{
    if(tree->leafCnt < betterTree->leafCnt){
      freeTree(betterTree);
      return(tree);
    }else{
      freeTree(tree);
      return(betterTree);
    }
  }
  return(NULL); /*to avoid warning*/
}

