'Modified LZ78 Encoder In PDS/QB4.5 By Rich Geldreich
'max. table size/mimimum code size: 4096/3
'
'Warning: Do NOT press CTRL+Break and then continue this program while
'you are running it in the environment! At best, the resulting compressed
'file will be invalid. At worst, your machine will lock up.
DEFINT A-Z
DECLARE SUB PutData (A, C)
DECLARE SUB PutByte (A)
DECLARE SUB PutCode (A)
DECLARE SUB Rebuild.Table (New.Entries)
DECLARE FUNCTION GetByte ()
DECLARE SUB Hash (Prefix, Suffix, Index, Found)

CONST True = -1, False = 0
CONST HashSize = 6577

DIM SHARED Prefix(HashSize - 1), Suffix(HashSize - 1), Code(HashSize - 1)
DIM SHARED Used(4352)

DIM SHARED InBuffer$, IAddress, IEndAddress, Iseg
DIM SHARED OutBuffer$, OStartAddress, OAddress, OEndAddress, Oseg

DIM SHARED CodeSize, CurrentBit, Char&
DIM SHARED Shift(7) AS LONG
DIM SHARED MaxCodes(12)

FOR A = 0 TO 7: READ Shift(A): NEXT
DATA 1,2,4,8,16,32,64,128

DATA 264,272,288,320,384,512,768,1280,2304,4352
FOR A = 3 TO 12: READ MaxCodes(A): NEXT

LOCATE , , 1
IF POS(0) <> 1 THEN PRINT

PRINT "C2.BAS - Modified LZ78 Encoder in QuickBASIC 4.5"
PRINT "By Rich Geldreich 1992"

'Open input file
File$ = COMMAND$
IF File$ = "" THEN
    INPUT "File to compress"; File$
    File$ = UCASE$(File$)
    IF File$ = "" THEN END
END IF
IF INSTR(File$, "OUTPUT.LZ1") THEN
    PRINT "Cannot compress output file."
    END
END IF

InBuffer$ = STRING$(4000, 0)   'input buffer
OutBuffer$ = STRING$(4000, 0)  'output buffer

A& = SADD(OutBuffer$)
A& = A& - 65536 * (A& < 0)
Oseg = SSEG(OutBuffer$) + (A& \ 16)
OAddress = A& AND 15
OEndAddress = OAddress + 4000
OStartAddress = OAddress

OPEN File$ FOR BINARY AS #1
FileLength& = LOF(1)
'Is it there?
IF FileLength& = 0 THEN
    CLOSE #1
    KILL COMMAND$
    PRINT COMMAND$; " not found or null."
    END
END IF
'Open output file
OPEN "output.lz1" FOR BINARY AS #2
'Is it already there?
IF LOF(2) <> 0 THEN
    'Kill output file and reopen it
    CLOSE #2
    KILL "output.lz1"
    OPEN "output.lz1" FOR BINARY AS #2
END IF
'CurrentLoc& = position in input file
currentloc& = 2

'Compression codes:
'Code 256 = end of file
'Code 257 = increase code size
'Code 258 = rebuild table
'Code 259 = first code available for strings

StartCode = 259
NextCode = 259
MaxCode = 264
CodeSize = 3
CurrentBit = 0
Char& = 0

GOSUB ClearTable

Prefix = GetByte

PRINT "Compressing "; File$
PRINT : PRINT : PRINT

Y = CSRLIN - 3
'Main compression loop
DO
    DO 'get new string loop
     
        IF currentloc& > FileLength& THEN
            PutCode Prefix  'send last char
            PutCode 256     'send EOF code
            PutCode 0: PutCode 0  'flush the bit buffer
            OutBuffer$ = LEFT$(OutBuffer$, OAddress - OStartAddress)

            LOCATE Y, 1
            currentloc& = currentloc& - 1
            PRINT "Bytes In:"; currentloc&; (100& * currentloc&) \ FileLength&; "%"

            BytesOut& = LOF(2) + (OAddress - OStartAddress)
            PRINT "Bytes Out:"; BytesOut&
            PRINT "Total Compression:"; 100 - ((100& * BytesOut&) \ currentloc&); "%                         ";

            PUT #2, , OutBuffer$
            CLOSE
            END
        ELSE
            
            Suffix = GetByte
            currentloc& = currentloc& + 1

            Hash Prefix, Suffix, Index, Found
            IF Found THEN
                Prefix = Code(Index)
                Used(Prefix) = Used(Prefix) + 1
            END IF
        END IF
    LOOP WHILE Found

    PutCode Prefix
    
    Prefix(Index) = Prefix
    Suffix(Index) = Suffix
    Code(Index) = NextCode
    
    Prefix = Suffix
 
    NextCode = NextCode + 1
    
    'time to rebuild table?
    IF NextCode > 4352 THEN
        PutCode 258
        Rebuild.Table New.Entries
        NextCode = New.Entries + StartCode

        'table still full? very unlikely but possible
        IF NextCode > 4352 THEN
            GOSUB ClearTable
            NextCode = StartCode
        END IF
        CodeSize = 3
        MaxCode = 264
    
    END IF

    PrintCounter = PrintCounter + 1     'see if time to update the
    IF PrintCounter = 512 THEN          'screen
        LOCATE Y, 1
        PRINT "Bytes In:"; currentloc&; (100& * currentloc&) \ FileLength&; "%"
        BytesOut& = LOF(2) + (OAddress - OStartAddress)
        PRINT "Bytes Out:"; BytesOut&
        PRINT "Compression:"; 100 - ((100& * BytesOut&) \ currentloc&); "%  "; "CodeSize:"; CodeSize; "NextCode:"; NextCode; "   ";
        PrintCounter = 0
    END IF
LOOP
'clears the hash table
ClearTable:
    FOR A = 0 TO HashSize - 1
        Prefix(A) = -1
        Suffix(A) = -1
        Code(A) = -1
    NEXT
RETURN

FUNCTION GetByte STATIC
    IF IAddress = IEndAddress THEN
        GET #1, , InBuffer$
        A& = SADD(InBuffer$)
        A& = A& - 65536 * (A& < 0)
        Iseg = SSEG(InBuffer$) + (A& \ 16)
        IAddress = A& AND 15
        IEndAddress = IAddress + 4000
    END IF
    DEF SEG = Iseg
    GetByte = PEEK(IAddress)
    IAddress = IAddress + 1
END FUNCTION

SUB Hash (Prefix, Suffix, Index, Found)
    
    Index = (Prefix * 256& XOR Suffix) MOD HashSize
    IF Index = 0 THEN
        Offset = 1
    ELSE
        Offset = HashSize - Index
    END IF
    DO 'until we find a match or don't
        IF Code(Index) = -1 THEN      'is there nothing here?
            Found = False             'yup, not found
            EXIT SUB
        'is this entry what we're looking for?
        ELSEIF Prefix(Index) = Prefix AND Suffix(Index) = Suffix THEN
            Found = True              'yup, found
            EXIT SUB
        ELSE 'retry until we find what were looking for or we find a blank
             'entry
            Index = Index - Offset
            IF Index < 0 THEN 'is index too far down?
                Index = Index + HashSize 'yup, bring it up then
            END IF
        END IF
    LOOP
END SUB

SUB PutByte (A) STATIC
    IF OAddress = OEndAddress THEN
        PUT #2, , OutBuffer$
        OAddress = OStartAddress
    END IF
    DEF SEG = Oseg
    POKE OAddress, A
    OAddress = OAddress + 1
END SUB

SUB PutCode (A) STATIC
    SHARED MaxCode

    IF A < 256 THEN
        PutData A * 2, 9 'write out an unencoded character
    ELSE
        'send a code
        DO WHILE A >= MaxCode AND CodeSize < 12
            PutCode 257
            CodeSize = CodeSize + 1
            MaxCode = MaxCodes(CodeSize)
        LOOP

        PutData (A - 256) * 2 OR 1, CodeSize + 1
    END IF
END SUB

SUB PutData (A, C) 'writes out one multibit code to the output file
    Char& = Char& + A * Shift(CurrentBit)
    CurrentBit = CurrentBit + C
    DO WHILE CurrentBit > 7
        PutByte Char& AND 255
        Char& = Char& \ 256
        CurrentBit = CurrentBit - 8
    LOOP
END SUB

SUB Rebuild.Table (New.Entries)
    DIM P(4096), S(4096), U(4096) AS LONG, Pn(4096), Location(4096)
    DIM C(4352)

    SHARED StartCode

    'Collect all used codes.
    Num.Entries = 0
    FOR A = 0 TO HashSize - 1
        C = Code(A)
        IF C <> -1 THEN 'valid code?
            IF Used(C) > 0 THEN 'was it used at all?
                Used(C) = 0
                P = Prefix(A): S = Suffix(A)
                P(Num.Entries) = P          'put it into a temporary table
                S(Num.Entries) = S
                U(Num.Entries) = P * 4353& + S
                C(C) = Num.Entries
                Num.Entries = Num.Entries + 1
            END IF
        END IF
    NEXT
    
    'Build pointers to all used codes.
    Num.Entries = Num.Entries - 1
    FOR A = 0 TO Num.Entries
        Pn(A) = A
    NEXT

    'Sort the used codes based on the value of Prefix*4353+Suffix
    '(larger codes go farther down the table).
    Mid = Num.Entries \ 2
    DO
        FOR A = 0 TO Num.Entries - Mid
            CompareLow = A
            CompareHigh = A + Mid
            DO WHILE U(Pn(CompareLow)) > U(Pn(CompareHigh))
                SWAP Pn(CompareLow), Pn(CompareHigh)
                CompareHigh = CompareLow
                CompareLow = CompareLow - Mid
                IF CompareLow < 0 THEN EXIT DO
            LOOP
        NEXT
        Mid = Mid \ 2
    LOOP WHILE Mid > 0
    
    FOR A = 0 TO Num.Entries
        Location(Pn(A)) = A
    NEXT

    'clear the old hash table
    FOR A = 0 TO HashSize - 1
        Prefix(A) = -1
        Suffix(A) = -1
        Code(A) = -1
    NEXT
    
    'Rebuild the encoding table- modify the Prefix if it points to any
    'strings.
    FOR A1 = 0 TO Num.Entries
        A = Pn(A1)
       
        P = P(A)
        S = S(A)
        IF P >= StartCode THEN             'is it pointing twards a string?
            P = StartCode + Location(C(P)) 'yup; update the pointer
        END IF

        'where does this prefix:suffix go?
        Hash P, S, Index, 0
        'put it there
        Prefix(Index) = P
        Suffix(Index) = S
        Code(Index) = A1 + StartCode
        
    NEXT
    '# of entries in the hash table now
    New.Entries = Num.Entries + 1
END SUB

