
/*******

v8.0 on pic:

AMORTIZE : 8.100 bpb , 23k byps
NO AMORT : 0.795 bpb ,  2k byps

I once got as good as 0.758 (v7.3) - what happened?
	config settings are the same, it must be something
	algorithmic.  possibly the run-transform ?

********/

/* #define STATS */

#define PERMIT_LRU

#define AMORTIZE
/* #define DO_MTF */

#include <crbinc/inc.h>
#include <crbinc/memutil.h>
#include <crbinc/mempool.h>
#include <crbinc/arithc.h>

#include "exclude.h"

#ifdef STATS
#include <stdio.h>
#endif /* STATS */

/* config settings ; shared with PPMZ */

#include "ppmz_cfg.h"

#define LRU_MaxNumContexts LRU_MaxNumNodes  // 128 k

#define DCT_BITS       4

#define AMORTIZE_MAX  16

/* */

#define PPMHASHBITS 14

#define DSXHASH(x)    ( x & ((1<<DTSX_BITS)-1) )
#define DCTHASH(x)    ( min((x-1),((1<<DCT_BITS)-1)) )

#define DT_SIZE       (1<<(DCT_BITS+DTSX_BITS))
#define DTHASH(c,x)   ( (DCTHASH(c)<<DTSX_BITS) + DSXHASH(x) )

#define DT_NUMCOUNTS  (1<<DCT_BITS)
#define NUM_O1_CHARS  (1<<DTSX_BITS)

#define PPMHASH_SIZE  (1<<PPMHASHBITS)
#define PPMHASH(x)    ( ((x>>9)^x) & (PPMHASH_SIZE-1))



/** context list operations macros **/

#define CONTEXT_ADDHEAD(CurContextInfo,CurIndex)	{																		\
CurContextInfo->Next = CurIndex->ContextList;																					\
CurContextInfo->Prev = (struct PPMdet_ContextInfo *)CurIndex;														\
if ( CurContextInfo->Next ) CurContextInfo->Next->Prev = CurContextInfo;							\
CurContextInfo->Prev->Next = CurContextInfo;		}					/***/

#define CONTEXT_CUT(CurContextInfo)		{																								\
CurContextInfo->Prev->Next = CurContextInfo->Next;																		\
if ( CurContextInfo->Next ) CurContextInfo->Next->Prev = CurContextInfo->Prev;	}	/***/

#ifdef DO_MTF

#define CONTEXT_MTF(CurContextInfo,CurIndex)                                  		  	\
if ( CurContextInfo == CurIndex->ContextList ) { } else                								\
 { CONTEXT_CUT(CurContextInfo); CONTEXT_ADDHEAD(CurContextInfo,CurIndex); }       /***/

#else

#define CONTEXT_MTF(CurContextInfo,CurIndex)

#endif /* DO_MTF */

/* */

struct PPMdet_ContextIndex
  {
  struct PPMdet_ContextInfo * ContextList; /* same place as Info->Next */
  struct PPMdet_ContextIndex * Next;
  
  ulong Context;
  };

struct PPMdet_ContextInfo
  {
  struct PPMdet_ContextInfo *Next;
	struct PPMdet_ContextInfo *Prev;
	struct PPMdet_ContextInfo *LRU_Next,*LRU_Prev;

  ubyte * HOContextPtr;
  uword HOminLen;
  uword Count;
  uword Char;
  uword Pad; /* ugh! */
  };

struct PPMdetInfo
  {
  long Error; /* if it's not 0, you're fucked */

  /* copied in from users */
  struct FAI * FAI;
  long MinOrder,MaxOrder,MinCount;
	bool DoLRU;

  /* state save between DecodeC and DecodeGotC */
  uword * PPMdet_WriteC_Ptr;
  ubyte * PPMdet_WriteC_CntxPtr;
  struct PPMdet_ContextInfo * PPMdet_WriteC_GotCI;
  long PPMdet_WriteC_GotML;
  struct PPMdet_ContextIndex * PPMdet_WriteC_Index;

  MemPool * ContextPool;
  MemPool * IndexPool;
	struct PPMdet_ContextInfo * LRU_Base;

  uword * DT_EscCounts;
  uword * DT_TotCounts;

#ifdef STATS
  long Stats_NumSeen;
  long Stats_NumPos;
  long Stats_NumNeg;
  long Stats_NumMaxL;
  long Stats_NumAmortized;
#endif /* STATS */

  struct PPMdet_ContextIndex * ContextIndeces[PPMHASH_SIZE];
  };

void PPMdet_CleanUp(struct PPMdetInfo * PPMI);

struct PPMdetInfo * PPMdet_Init(struct FAI * FAI,long MinOrder,long MaxOrder,long MinCount,bool DoLRU)
{
struct PPMdetInfo * PPMI;
int i;

if ( (PPMI = AllocMem(sizeof(struct PPMdetInfo),MEMF_ANY|MEMF_CLEAR)) == NULL )
  return(NULL);

if ( (PPMI->ContextPool = AllocPool(sizeof(struct PPMdet_ContextInfo),1024,1024)) == NULL )
  { PPMdet_CleanUp(PPMI); return(NULL); }

if ( (PPMI->IndexPool = AllocPool(sizeof(struct PPMdet_ContextIndex),1024,1024)) == NULL )
  { PPMdet_CleanUp(PPMI); return(NULL); }

if ( (PPMI->DT_EscCounts = malloc(sizeof(uword)*DT_SIZE)) == NULL )
  { PPMdet_CleanUp(PPMI); return(NULL); }
if ( (PPMI->DT_TotCounts = malloc(sizeof(uword)*DT_SIZE)) == NULL )
  { PPMdet_CleanUp(PPMI); return(NULL); }
  
for(i=0;i<DT_SIZE;i++)
  {
  PPMI->DT_EscCounts[i] = 1;
  if ( (i>>DTSX_BITS) == 0 )
    PPMI->DT_TotCounts[i] = 7;
  else
    PPMI->DT_TotCounts[i] = 100;
  }

PPMI->MinOrder = MinOrder;
PPMI->MaxOrder = MaxOrder;
PPMI->MinCount = MinCount;
PPMI->DoLRU    = DoLRU;

PPMI->FAI = FAI;

PPMI->Error = 0;

if ( (PPMI->LRU_Base = GetPoolHunk(PPMI->ContextPool,0)) == NULL )
	{ PPMdet_CleanUp(PPMI); return(NULL); }
	
PPMI->LRU_Base->LRU_Next = PPMI->LRU_Base;
PPMI->LRU_Base->LRU_Prev = PPMI->LRU_Base;	

return(PPMI);
}

#define PPMdet_CutLRU(CI) { \
if ( CI->LRU_Next ) CI->LRU_Next->LRU_Prev = CI->LRU_Prev; \
if ( CI->LRU_Prev ) CI->LRU_Prev->LRU_Next = CI->LRU_Next; \
CI->LRU_Next = NULL; CI->LRU_Prev = NULL; } \
/* */

/* move CI to LRU_Head */
#define PPMdet_UpdateLRU(PPMI,CI) { PPMdet_CutLRU(CI); \
CI->LRU_Next = PPMI->LRU_Base->LRU_Next; CI->LRU_Prev = PPMI->LRU_Base; \
CI->LRU_Next->LRU_Prev = CI; CI->LRU_Prev->LRU_Next = CI; } \
/* */

void PPMdet_CheckLRU(struct PPMdetInfo * PPMI)
{
struct PPMdet_ContextInfo * LRU_Tail;

if ( PPMI->ContextPool->NumItemsActive < LRU_MaxNumContexts ) return;

LRU_Tail = PPMI->LRU_Base->LRU_Prev;

if ( LRU_Tail == PPMI->LRU_Base ) return;

CONTEXT_CUT(LRU_Tail);
PPMdet_CutLRU(LRU_Tail);

if ( ! FreePoolHunk(PPMI->ContextPool,LRU_Tail) )
	PPMI->Error = 9;

}

/*
 * return-value indicates whether char was written with
 *  current order or not.  If not, you MUST write it with
 *  some other model.
 *
 */
bool PPMdet_EncodeC(struct PPMdetInfo * PPMI,long Symbol,ulong Context,ubyte * HOContextPtr,exclusion * Exclusion)
{
struct PPMdet_ContextIndex *CurIndex,*PrevIndex;
struct PPMdet_ContextInfo *CurContextInfo,*GotCI;
long Hash,ml,GotML,MaxML = PPMI->MaxOrder-4;
ubyte *ap,*bp;
#ifdef AMORTIZE
int Amortizer;
#endif

#ifdef PERMIT_LRU
if ( PPMI->DoLRU ) PPMdet_CheckLRU(PPMI);
#endif

#ifdef STATS
PPMI->Stats_NumSeen++;
#endif /* STATS */

Hash = PPMHASH(Context);

PrevIndex = NULL;
CurIndex = PPMI->ContextIndeces[Hash];
while(CurIndex)
  {
  if ( CurIndex->Context == Context )
    {
#ifdef DO_MTF
    if ( PrevIndex )
      {
      /* MTF */
      PrevIndex->Next = CurIndex->Next;
      CurIndex->Next = PPMI->ContextIndeces[Hash];
      PPMI->ContextIndeces[Hash] = CurIndex;
      }
#endif
    goto PPMdet_Encode_FoundCurIndex;
    }
  PrevIndex = CurIndex;
  CurIndex = CurIndex->Next;
  }

/* index not found */
if ( (CurIndex = GetPoolHunk(PPMI->IndexPool,1)) == NULL )
  { PPMI->Error = 9; return(0); }
CurIndex->Context = Context;
CurIndex->Next = PPMI->ContextIndeces[Hash];
PPMI->ContextIndeces[Hash] = CurIndex;
CurIndex->ContextList = NULL;

/***/
  PPMdet_Encode_FoundCurIndex:
/***/

CurContextInfo = CurIndex->ContextList;
GotML = PPMI->MinOrder - 5;
GotCI = NULL;
#ifdef AMORTIZE
Amortizer=0;
while(CurContextInfo && GotML < MaxML && Amortizer < AMORTIZE_MAX )
#else
while(CurContextInfo && GotML < MaxML)
#endif /* AMORTIZE */
  {
  if ( *(CurContextInfo->HOContextPtr - GotML) == *(HOContextPtr - GotML) )
    {
    ap = CurContextInfo->HOContextPtr;
    bp = HOContextPtr;
    ml = 0;
    while( *ap-- == *bp-- && ml < MaxML ) ml++;

    if ( ml > GotML && ml > CurContextInfo->HOminLen )
      {
      GotML = ml;
      GotCI = CurContextInfo;
      }

#ifdef AMORTIZE
    Amortizer++;
#endif
    }

  CurContextInfo = CurContextInfo->Next;
  }
#ifdef AMORTIZE
#ifdef STATS
if ( Amortizer == AMORTIZE_MAX ) PPMI->Stats_NumAmortized ++;
#endif
#endif

if ( GotCI )
  {
  GotML += 4; MaxML += 4;

	CONTEXT_MTF(GotCI,CurIndex);
	PPMdet_UpdateLRU(PPMI,GotCI);

  /* got context */

  if ( !isExcluded(Exclusion,GotCI->Char) && GotCI->Count >= PPMI->MinCount )
    {
    long I,EscP,TotP;

    I = DTHASH(GotCI->Count,Context);
  
    if ( PPMI->DT_TotCounts[I] > PPMI->FAI->FastArithCumProbMaxSafe )
      { PPMI->DT_EscCounts[I] >>= 1; PPMI->DT_TotCounts[I] >>= 1; }
  
    if ( PPMI->DT_EscCounts[I] == 0 || PPMI->DT_TotCounts[I] <= PPMI->DT_EscCounts[I] )
      { PPMI->DT_EscCounts[I] ++; PPMI->DT_TotCounts[I] += 2; }
  
    EscP = PPMI->DT_EscCounts[I];
    TotP = PPMI->DT_TotCounts[I];
  
    if ( GotCI->Char == Symbol )
      {
#ifdef STATS
PPMI->Stats_NumPos++;
#endif /* STATS */

      FastArithEncodeRange(PPMI->FAI,EscP,TotP,TotP);
      PPMI->DT_TotCounts[I] += DT_INC_T;
      GotCI->Count++;
      return(1);
      }
    else
      {
#ifdef STATS
PPMI->Stats_NumNeg++;
#endif /* STATS */

      FastArithEncodeRange(PPMI->FAI,0,EscP,TotP);
      PPMI->DT_EscCounts[I] += DT_INC_E;
      PPMI->DT_TotCounts[I] += DT_INC_T;

			setExcluded(Exclusion,GotCI->Char);
      }
    }

  if ( GotML != MaxML )
    {
    if ( GotCI->Char == Symbol )
      {
      GotCI->Count++;
      }
    else
      {
      GotCI->HOminLen = GotML + 1;
  
      if ( (CurContextInfo = GetPoolHunk(PPMI->ContextPool,1)) == NULL )
        { PPMI->Error = 9; return(0); }
      
      CurContextInfo->Char = Symbol;
      CurContextInfo->Count = 1;
      CurContextInfo->HOContextPtr = HOContextPtr;
      CurContextInfo->HOminLen = GotML + 1;

			CurContextInfo->Next=CurContextInfo->Prev=CurContextInfo->LRU_Next=CurContextInfo->LRU_Prev=NULL;

			CONTEXT_ADDHEAD(CurContextInfo,CurIndex);
			PPMdet_UpdateLRU(PPMI,CurContextInfo);
      }
    }
  else
    {
#ifdef STATS
PPMI->Stats_NumMaxL++;
#endif /* STATS */
    } 

  return(0);
  }

if ( (CurContextInfo = GetPoolHunk(PPMI->ContextPool,1)) == NULL )
  { PPMI->Error = 9; return(0); }

CurContextInfo->Char = Symbol;
CurContextInfo->Count = 1;
CurContextInfo->HOContextPtr = HOContextPtr;
CurContextInfo->HOminLen = 0;

CurContextInfo->Next=CurContextInfo->Prev=CurContextInfo->LRU_Next=CurContextInfo->LRU_Prev=NULL;

CONTEXT_ADDHEAD(CurContextInfo,CurIndex);
PPMdet_UpdateLRU(PPMI,CurContextInfo);

return(0);
}

/*
 * return-value indicates whether an escape was read or not
 *  If so, you MUST read it with some other model.
 *    and then call DecodeGotC
 *  If not, don't call DecodeGotC
 *
 */
bool PPMdet_DecodeC(struct PPMdetInfo * PPMI,long * SymbolPtr,ulong Context,ubyte * HOContextPtr,exclusion * Exclusion)
{
struct PPMdet_ContextIndex *CurIndex,*PrevIndex;
struct PPMdet_ContextInfo *CurContextInfo,*GotCI;
long Hash,ml,GotML,MaxML = PPMI->MaxOrder-4;
ubyte *ap,*bp;
#ifdef AMORTIZE
int Amortizer;
#endif

#ifdef PERMIT_LRU
if ( PPMI->DoLRU ) PPMdet_CheckLRU(PPMI);
#endif

Hash = PPMHASH(Context);

PrevIndex = NULL;
CurIndex = PPMI->ContextIndeces[Hash];
while(CurIndex)
  {
  if ( CurIndex->Context == Context )
    {
#ifdef DO_MTF
    if ( PrevIndex )
      {
      /* MTF */
      PrevIndex->Next = CurIndex->Next;
      CurIndex->Next = PPMI->ContextIndeces[Hash];
      PPMI->ContextIndeces[Hash] = CurIndex;
      }
#endif
    goto PPMdet_Decode_FoundCurIndex;
    }
  PrevIndex = CurIndex;
  CurIndex = CurIndex->Next;
  }

/* index not found */
if ( (CurIndex = GetPoolHunk(PPMI->IndexPool,1)) == NULL )
  { PPMI->Error = 9; return(0); }
CurIndex->Context = Context;
CurIndex->Next = PPMI->ContextIndeces[Hash];
PPMI->ContextIndeces[Hash] = CurIndex;
CurIndex->ContextList = NULL;

/***/
  PPMdet_Decode_FoundCurIndex:
/***/


CurContextInfo = CurIndex->ContextList;
GotML = PPMI->MinOrder - 5;
GotCI = NULL;
#ifdef AMORTIZE
Amortizer=0;
while(CurContextInfo && GotML < MaxML && Amortizer < AMORTIZE_MAX )
#else
while(CurContextInfo && GotML < MaxML)
#endif /* AMORTIZE */
  {
  if ( *(CurContextInfo->HOContextPtr - GotML) == *(HOContextPtr - GotML) )
    {
    ap = CurContextInfo->HOContextPtr;
    bp = HOContextPtr;
    ml = 0;
    while( *ap-- == *bp-- && ml < MaxML ) ml++;

    if ( ml > GotML && ml > CurContextInfo->HOminLen )
      {
      GotML = ml;
      GotCI = CurContextInfo;
      }

#ifdef AMORTIZE
    Amortizer++;
#endif
    }

  CurContextInfo = CurContextInfo->Next;
  }
#ifdef AMORTIZE
#ifdef STATS
if ( Amortizer == AMORTIZE_MAX ) PPMI->Stats_NumAmortized ++;
#endif
#endif

if ( GotCI )
  {
  GotML += 4; MaxML += 4;

	CONTEXT_MTF(GotCI,CurIndex);
	PPMdet_UpdateLRU(PPMI,GotCI);

  /* got context */

  if ( !isExcluded(Exclusion,GotCI->Char) && GotCI->Count >= PPMI->MinCount )
    {
    long Target,EscP,TotP,I;
    
    I = DTHASH(GotCI->Count,Context);
    
    if ( PPMI->DT_TotCounts[I] > PPMI->FAI->FastArithCumProbMaxSafe )
      { PPMI->DT_EscCounts[I] >>= 1; PPMI->DT_TotCounts[I] >>= 1; }
    
    if ( PPMI->DT_EscCounts[I] == 0 || PPMI->DT_TotCounts[I] <= PPMI->DT_EscCounts[I] )
      { PPMI->DT_EscCounts[I] ++; PPMI->DT_TotCounts[I] += 2; }
    
    EscP = PPMI->DT_EscCounts[I];
    TotP = PPMI->DT_TotCounts[I];
    
    FastArithDecodeRange(PPMI->FAI,&Target,TotP);
    
    if ( Target >= EscP )
      {
      FastArithDecodeRangeRemove(PPMI->FAI,EscP,TotP,TotP);
      PPMI->DT_TotCounts[I] += DT_INC_T;
      GotCI->Count++;
      *SymbolPtr = GotCI->Char;
      return(1);
      }
    else
      {
      FastArithDecodeRangeRemove(PPMI->FAI,0,EscP,TotP);
      PPMI->DT_EscCounts[I] += DT_INC_E;
      PPMI->DT_TotCounts[I] += DT_INC_T;
			
			setExcluded(Exclusion,GotCI->Char);
      }

    }

  if ( GotML == MaxML )
    {
    PPMI->PPMdet_WriteC_Ptr = NULL;
    PPMI->PPMdet_WriteC_GotCI = NULL;
    }
  else
    {
    PPMI->PPMdet_WriteC_Ptr = NULL;
    PPMI->PPMdet_WriteC_GotCI = GotCI;
    PPMI->PPMdet_WriteC_GotML = GotML;
    PPMI->PPMdet_WriteC_CntxPtr = HOContextPtr;
    PPMI->PPMdet_WriteC_Index = CurIndex;
    }

  return(0);
  }

if ( (CurContextInfo = GetPoolHunk(PPMI->ContextPool,1)) == NULL )
  { PPMI->Error = 9; return(0); }

PPMI->PPMdet_WriteC_GotCI = NULL;
PPMI->PPMdet_WriteC_Ptr = & ( CurContextInfo->Char ) ;
CurContextInfo->Count = 1;
CurContextInfo->HOContextPtr = HOContextPtr;
CurContextInfo->HOminLen = 0;

CurContextInfo->Next=CurContextInfo->Prev=CurContextInfo->LRU_Next=CurContextInfo->LRU_Prev=NULL;

CONTEXT_ADDHEAD(CurContextInfo,CurIndex);
PPMdet_UpdateLRU(PPMI,CurContextInfo);

return(0);
}

void PPMdet_DecodeGotC(struct PPMdetInfo * PPMI,uword GotC)
{

if ( PPMI->PPMdet_WriteC_GotCI )
  {
  if ( PPMI->PPMdet_WriteC_GotCI->Char == GotC )
    {
    PPMI->PPMdet_WriteC_GotCI->Count += 1;
    }
  else
    {
    struct PPMdet_ContextInfo *CurContextInfo;

    PPMI->PPMdet_WriteC_GotCI->HOminLen = PPMI->PPMdet_WriteC_GotML + 1;

    if ( (CurContextInfo = GetPoolHunk(PPMI->ContextPool,1)) == NULL )
      { PPMI->Error = 9; return; }
    
    CurContextInfo->Char  = GotC;
    CurContextInfo->Count = 1;
    CurContextInfo->HOContextPtr = PPMI->PPMdet_WriteC_CntxPtr;
    CurContextInfo->HOminLen = PPMI->PPMdet_WriteC_GotML + 1;
    
		CurContextInfo->Next=CurContextInfo->Prev=CurContextInfo->LRU_Next=CurContextInfo->LRU_Prev=NULL;

		CONTEXT_ADDHEAD(CurContextInfo,PPMI->PPMdet_WriteC_Index);
		PPMdet_UpdateLRU(PPMI,CurContextInfo);
    }
  }
else if ( PPMI->PPMdet_WriteC_Ptr )
  {
  *(PPMI->PPMdet_WriteC_Ptr) = GotC;
  }

PPMI->PPMdet_WriteC_GotCI = NULL;
PPMI->PPMdet_WriteC_Ptr = NULL;

}

void PPMdet_CleanUp(struct PPMdetInfo * PPMI)
{
if (! PPMI ) return;

smartfree(PPMI->DT_EscCounts);
smartfree(PPMI->DT_TotCounts);
smartfree(PPMI->LRU_Base);

if ( PPMI->ContextPool ) FreePool(PPMI->ContextPool);
if ( PPMI->IndexPool ) FreePool(PPMI->IndexPool);

#ifdef STATS
if ( PPMI->Stats_NumSeen )
  {
  printf("--------------------------\n");
  printf("N:Seen:%6ld Got:%6ld Esc:%6ld MaxL:%ld",
    PPMI->Stats_NumSeen,PPMI->Stats_NumPos,PPMI->Stats_NumNeg,PPMI->Stats_NumMaxL);
#ifdef AMORTIZE
  printf(" Amortized:%6ld",PPMI->Stats_NumAmortized);
#endif
  printf("\n");
  
    {
    long DetAvg[DT_NUMCOUNTS];
    int i,j,k;
    
    for(i=0;i<DT_NUMCOUNTS;i++)
      DetAvg[i] = 0;
    
    for(j=0;j<NUM_O1_CHARS;j++)
      {
      for(i=0;i<DT_NUMCOUNTS;i++)
        {
        k = (i<<DTSX_BITS) + j;
        DetAvg[i] += PPMI->DT_TotCounts[k] * 2 / PPMI->DT_EscCounts[k] ;
        }
      }
    
    printf(" DetVals: ");
    for(i=0;i<DT_NUMCOUNTS;i++)
      {
      DetAvg[i] /= NUM_O1_CHARS*2;
      printf("%d,",DetAvg[i]);
      }
    printf("\n");
    
    }
  }
#endif /* STATS */

FreeMem(PPMI,sizeof(struct PPMdetInfo));
}
