'HUFFMAN4.BAS - A static Huffman compression program for PDS/QB4.5
'By Rich Geldreich 1992
'Replaces HUFFMAN2.BAS
'August 14th, 1992
'QuickBASIC users: you must change all of the "SSEG" string to "VARSEG"
'in this program with search and replace!

DEFINT A-Z
DECLARE SUB RGEncode (InputFile$, OutputFile$)

DECLARE SUB InitTree ()
DECLARE SUB MakeSortTable ()
DECLARE SUB CombineTree ()
DECLARE SUB CleanUpTree ()
DECLARE SUB WriteTree ()

DECLARE SUB SortDistribution2 ()
DECLARE SUB SortDistribution ()
DECLARE SUB GetDistribution ()
DECLARE SUB RecurseTree (Node)

DECLARE SUB FillBuffer ()

CONST True = -1, False = 0, Null = -2, BufferLength = 10000

CLEAR , , 4096

DIM SHARED Father(255) AS LONG
DIM SHARED LeftSon(255), RightSon(255)
DIM SHARED Index(255), RealIndex

DIM SHARED Pointer(255), HighestEntry
DIM SHARED Code(255, 40), CodeLength(255)'max. of 41 bits isn't really needed
DIM SHARED CurrentLength, CurrentCode(40)

DIM SHARED Buffer$, Address, EndAddress, Bits(8), CurrentByte, CurrentBit
DIM SHARED BufferSeg

DIM SHARED FileLength AS LONG, In.File, Out.File

RGEncode COMMAND$, "output.huf"

SUB CleanUpTree
    RealIndex = 256
    FOR A = 0 TO 255
        IF Father(A) > 0 THEN
            Index(A) = RealIndex
            RealIndex = RealIndex + 1
        END IF
    NEXT
    RealIndex = RealIndex - 257
    FOR A = 0 TO 255
        IF Father(A) > 0 THEN
            L = LeftSon(A): R = RightSon(A)
            IF L > 255 THEN LeftSon(A) = Index(L - 256)
            IF R > 255 THEN RightSon(A) = Index(R - 256)
        END IF
    NEXT
END SUB

'Combines the tree until there is only one node at the top.
SUB CombineTree
   
    DO UNTIL HighestEntry = 0

        SortDistribution2

        Lowest = Pointer(HighestEntry)
        NextLowest = Pointer(HighestEntry - 1)

        NewFrequency& = Father(Lowest) + Father(NextLowest)

        SELECT CASE RightSon(Lowest)
        CASE Null
            SELECT CASE RightSon(NextLowest)
            CASE Null
             Father(NextLowest) = NewFrequency&
             RightSon(NextLowest) = LeftSon(Lowest)
             Father(Lowest) = Null
            CASE ELSE
             Father(Lowest) = NewFrequency&
             RightSon(Lowest) = NextLowest + 256
             Pointer(HighestEntry - 1) = Lowest
           END SELECT
        CASE ELSE
            SELECT CASE RightSon(NextLowest)
            CASE Null
             Father(NextLowest) = NewFrequency&
             RightSon(NextLowest) = Lowest + 256
            CASE ELSE
             FOR A = 255 TO 0 STEP -1
                IF Father(A) = Null THEN EXIT FOR
             NEXT
             Father(A) = NewFrequency&
             LeftSon(A) = Lowest + 256
             RightSon(A) = NextLowest + 256
             Pointer(HighestEntry - 1) = A
            END SELECT
        END SELECT

        HighestEntry = HighestEntry - 1

    'loop until there is only one node at the top
    LOOP
END SUB

SUB FillBuffer
    GET In.File, , Buffer$

    A& = SADD(Buffer$)
    A& = A& - 65536 * (A& < 0)
    BufferSeg = SSEG(Buffer$) + (A& \ 16)
    Address = A& AND 15
    EndAddress = Address + BufferLength
    DEF SEG = BufferSeg
END SUB

SUB GetDistribution
    FOR A& = 1 TO FileLength
        Address = Address + 1
        IF Address = EndAddress THEN FillBuffer
        Father(PEEK(Address)) = Father(PEEK(Address)) + 1
    NEXT
END SUB

'Initializes the tree.
SUB InitTree
    FOR A = 0 TO 255
        Father(A) = 0
        LeftSon(A) = A
        RightSon(A) = Null
    NEXT
END SUB

'Enters all the nodes which have a frequency>0 into a table for quick access.
SUB MakeSortTable
    HighestEntry = 0
    FOR A = 0 TO 255
        IF Father(A) > 0 THEN
            Pointer(HighestEntry) = A
            HighestEntry = HighestEntry + 1
        END IF
    NEXT
    HighestEntry = HighestEntry - 1
END SUB

SUB RecurseTree (Node)
    
    IF Node < 256 THEN
        FOR A = 0 TO CurrentLength - 1
            Code(Node, A) = CurrentCode(A)
        NEXT
        CodeLength(Node) = CurrentLength - 1
        EXIT SUB
    END IF
    
    L = LeftSon(Node - 256): R = RightSon(Node - 256)

    IF L <> Null THEN
        CurrentCode(CurrentLength) = 1
        CurrentLength = CurrentLength + 1
        RecurseTree L
        CurrentLength = CurrentLength - 1
    END IF
    
    IF R <> Null THEN
        CurrentCode(CurrentLength) = 0
        CurrentLength = CurrentLength + 1
        RecurseTree R
        CurrentLength = CurrentLength - 1
    END IF
END SUB

SUB RGEncode (InputFile$, OutputFile$)

    Bits(0) = 1
    Bits(1) = 2
    Bits(2) = 4
    Bits(3) = 8
    Bits(4) = 16
    Bits(5) = 32
    Bits(6) = 64
    Bits(7) = 128
    Bits(8) = 256

    InitTree

    Buffer$ = SPACE$(BufferLength)
    EndAddress = 1: Address = 0

    In.File = FREEFILE: Out.File = In.File + 1
    OPEN InputFile$ FOR INPUT AS In.File: CLOSE In.File
    OPEN InputFile$ FOR BINARY AS In.File
    FileLength = LOF(In.File)
    
    GetDistribution
    MakeSortTable
    SortDistribution
    CombineTree

    TopOfTree = Pointer(0) + 256
    CurrentLength = 0
    RecurseTree TopOfTree

    CleanUpTree

    CurrentByte = 0: CurrentBit = 0
    
    OPEN OutputFile$ FOR OUTPUT AS Out.File: CLOSE Out.File
    OPEN OutputFile$ FOR BINARY AS Out.File
    
    PUT Out.File, , FileLength
    Top = Index(TopOfTree - 256) - 256
    PUT Out.File, , Top
    PUT Out.File, , RealIndex

    WriteTree                     'writes the tree to the output file

    SEEK In.File, 1
    EndAddress = 1: Address = 0

    A$ = SPACE$(BufferLength)
    A& = SADD(A$)
    A& = A& - 65536 * (A& < 0)
    OBufferSeg = SSEG(A$) + (A& \ 16)
    OAddress = A& AND 15
    OEndAddress = OAddress + BufferLength
    OStart = OAddress

    FOR A& = 1 TO FileLength

        Address = Address + 1
        IF Address = EndAddress THEN FillBuffer
        B = PEEK(Address)

        'there is a much faster way of doing this, but this works ok:
        FOR C = 0 TO CodeLength(B)

            CurrentByte = CurrentByte * 2 OR Code(B, C)

            CurrentBit = CurrentBit + 1
            IF CurrentBit = 8 THEN
                DEF SEG = OBufferSeg
                POKE OAddress, CurrentByte
                OAddress = OAddress + 1
                IF OAddress = OEndAddress THEN
                    PUT Out.File, , A$
                    OAddress = OStart
                END IF
                CurrentByte = 0: CurrentBit = 0
                DEF SEG = BufferSeg
            END IF
        NEXT
    NEXT
    DO UNTIL CurrentBit = 8
        CurrentByte = CurrentByte * 2
        CurrentBit = CurrentBit + 1
    LOOP

    DEF SEG = OBufferSeg
    POKE OAddress, CurrentByte
    A$ = LEFT$(A$, OAddress + 1 - OStart)
    PUT Out.File, , A$

    CLOSE In.File, Out.File

END SUB

'A REAL Shell sort follows. It is much faster than the well-known one.
'Sorts the nodes according to the sorting table.
SUB SortDistribution
    Offset = HighestEntry \ 2
    DO
        FOR I = 0 TO HighestEntry - Offset
            CompareLow = I
            CompareHigh = I + Offset
            DO WHILE Father(Pointer(CompareLow)) < Father(Pointer(CompareHigh))
                SWAP Pointer(CompareLow), Pointer(CompareHigh)
                CompareHigh = CompareLow
                CompareLow = CompareLow - Offset
                IF CompareLow < 0 THEN EXIT DO
            LOOP
        NEXT
        Offset = Offset \ 2
    LOOP WHILE Offset > 0
    

END SUB

'A simple bubble sort... used while combining the tree.
SUB SortDistribution2
    
    '"bubble" all of the entries with higher frequencies to the beginning
    'of the list
    DO
        SwapFlag = False
        FOR A = HighestEntry - 1 TO 0 STEP -1
            IF Father(Pointer(A + 1)) > Father(Pointer(A)) THEN
                SWAP Pointer(A + 1), Pointer(A)
                SwapFlag = True
            END IF
        NEXT
    LOOP WHILE SwapFlag
    
END SUB

'Writes the tree to disk.
SUB WriteTree
    FOR A = 0 TO 255
        IF Father(A) > 0 THEN
            Son = LeftSon(A)
            FOR C = 0 TO 8
                IF (Son AND Bits(C)) > 0 THEN
                    GOSUB SendOne
                ELSE
                    GOSUB SendZero
                END IF
            NEXT
            
            Son = RightSon(A)
            FOR C = 0 TO 8
                IF (Son AND Bits(C)) > 0 THEN
                    GOSUB SendOne
                ELSE
                    GOSUB SendZero
                END IF
            NEXT
        END IF

    NEXT

EXIT SUB

SendZero:
    CurrentByte = CurrentByte * 2
    CurrentBit = CurrentBit + 1
    IF CurrentBit = 8 THEN
        A$ = CHR$(CurrentByte)
        PUT Out.File, , A$
        CurrentByte = 0: CurrentBit = 0
    END IF
RETURN

SendOne:
   
    CurrentByte = CurrentByte * 2 OR 1
    CurrentBit = CurrentBit + 1
    IF CurrentBit = 8 THEN
        A$ = CHR$(CurrentByte)
        PUT Out.File, , A$
        CurrentByte = 0: CurrentBit = 0
    END IF
RETURN

END SUB

