/*
** general purpose file packer, Copyright (C) Randy Nevin 1989, 1990.
**
** you may give this software to anyone, make as many copies as you like, and
** post it on public computer bulletin boards and file servers. you may not
** sell it or charge any fee for distribution (except for media and postage),
** remove this comment or the copyright notice from the code, or claim that
** you wrote this code or anything derived from it. you may modify the code as
** much as you want (please document clearly with comments, and maintain the
** coding style), but programs which are derived from this one are subject to
** the conditions stated here. i am providing this code so that people can
** learn from it, so if you distribute it, please include source code, not
** just executables. contact me to report bugs or suggest enhancements; i do
** not guarantee support, but i will make an effort to help you, and i want to
** act as a central clearing house for future versions. you should contact me
** before undertaking a significant development effort, to avoid reinventing
** the wheel. if you come up with an enhancement you consider particularly
** useful, i would appreciate being informed so that it can be incorporated in
** future versions. my address is: Randy Nevin, 1731 211th PL NE, Redmond,
** WA 98053, USA. this code is available directly from the author; just send a
** 360k floppy and a self-addressed floppy mailer with sufficient postage.
**
** HISTORY
** (name		date		description)
** ----------------------------------------------------
** randy nevin		7/16/89		initial version
** randy nevin		8/15/89		released version 1.00
*/

/* BUGBUG: doesn't handle files with only one byte value */

/*
** use huffman encoding to find the most frequently used bytes. do this by
** scanning the input file once, counting the number of times each byte
** occurs. then build a frequency-weighted binary tree. tree traversal will
** assign a bit sequence to each byte. for files in which a few bytes occur
** many times, those bytes are near the top of the tree, and thus have a short
** encoding scheme. then scan the input file again, writing out the binary
** tree and the encoded input to the output file.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX 32	/* use a maximum of 32 bits per byte */

struct node { /* tree node for huffman encoding */
	long count;
	int byteval;
	char buf[MAX+1];
	char pad;
	struct node *left, *right, *nxtnode;
	} *head = NULL, *tail = NULL;

long table[0x100]; /* frequency table */
struct node *ptab[0x100]; /* pointers to the corresponding nodes */

void main( int, char *[] );
struct node *getmin( void );
void walk0( struct node *, char [], int );
void walk1( struct node *, FILE * );
void walk2( struct node *, FILE * );
void walk3( struct node * );
void initbit( void );
void flushbit( FILE * );
void outbit( int, FILE * );

void main ( argc, argv )
	int argc;
	char *argv[];
	{
	char *self, *t;
	int c, i;
	FILE *fin, *fout;
	struct node *p, *q, *r;
	char buf[MAX+1];
	long total;

	printf( "Copyright (C) Randy Nevin, 1989, 1990. Version 1.00\n" );
	printf( "See source code for rights granted.\n\n" );
	self = argv[0];
	/* get rid of initial part of path */
	if ((t = strrchr( self, '\\' )) || (t = strrchr( self, ':' )))
		self = ++t;
	/* get rid of extension */
	if ((t = strrchr( self, '.' )) && !stricmp( t, ".EXE" ))
		*t = 0;
	if (argc != 3) { /* need infile and outfile */
		fprintf( stderr, "usage: %s infile outfile\n", self );
		exit( -1 );
		}
	if (!(fin = fopen( argv[1], "rb" ))) {
		fprintf( stderr, "can't open %s\n", argv[1] );
		exit( -1 );
		}
	if (!(fout = fopen( argv[2], "wb" ))) {
		fprintf( stderr, "can't open %s\n", argv[2] );
		exit( -1 );
		}
	/* initialize the byte count table */
	for (i = 0; i < 0x100; i++)
		table[i] = 0;
	/* count the bytes */
	for (total = 0, c = getc( fin ); c != EOF; total++, c = getc( fin ))
		table[c] += 1L;
	putc( 1, fout ); /* encoding type (1=huffman) */
	putc( (char)total, fout );
	putc( (char)(total >> 8), fout );
	putc( (char)(total >> 16), fout );
	putc( (char)(total >> 24), fout );
	fseek( fin, 0L, SEEK_SET ); /* rewind input file for second pass */
	/* build a node for each byte value, and put them in a list */
	for (i = 0; i < 0x100; i++)
		if (table[i]) {
			if (!(p = (struct node *)malloc(
					sizeof(struct node) ))) {
				fprintf( stderr, "out of memory\n" );
				exit( -1 );
				}
			p->count = table[i];
			p->byteval = i;
			p->left = p->right = p->nxtnode = NULL;
			if (tail)
				tail->nxtnode = p;
			else
				head = p;
			tail = p;
			}
	/* collapse forest to a single tree */
	while (head && head->nxtnode) {
		p = getmin();
		q = getmin();
		if (!(r = (struct node *)malloc( sizeof(struct node) ))) {
			fprintf( stderr, "out of memory\n" );
			exit( -1 );
			}
		r->count = p->count + q->count;
		r->left = p;
		r->right = q;
		r->nxtnode = NULL;
		if (tail)
			tail->nxtnode = r;
		else
			head = r;
		tail = r;
		}
	buf[0] = 0;
	walk0( head, buf, 0 ); /* assign encoded bit strings */
	initbit();
	walk1( head, fout ); /* write out tree skeleton */
	flushbit( fout );
	walk2( head, fout ); /* write out byte values */
	walk3( head ); /* set pointers to corresponding nodes */
	initbit();
	while ((c = getc( fin )) != EOF) { /* encode input file */
		for (t = ptab[c]->buf; *t; t++)
			outbit( (*t == '0') ? 0 : 1, fout );
		}
	flushbit( fout );
	if (ferror( fout ))
		fprintf( stderr, "output error; disk might be full\n" );
	fclose( fout );
	exit( 0 );
	}

struct node *getmin () { /* fetch & remove smallest-weighted element */
	struct node *p, *q;

	for (p = head, q = p->nxtnode; q; q = q->nxtnode)
		if (q->count < p->count) /* find smallest */
			p = q;
	if (p == head) {
		if (p == tail)
			head = tail = NULL;
		else {
			head = p->nxtnode;
			p->nxtnode = NULL;
			}
		}
	else {
		for (q = head; q && q->nxtnode != p; q = q->nxtnode)
			;
		if (!q) {
			fprintf( stderr, "internal error\n" );
			exit( -1 );
			}
		q->nxtnode = p->nxtnode;
		p->nxtnode = NULL;
		if (p == tail)
			tail = q;
		}
	return( p );
	}

void walk0 ( p, buf, level ) /* assign encoded bit strings */
	struct node *p;
	char buf[];
	int level;
	{
	if (!(p->left) && !(p->right)) {
		strcpy( p->buf, buf );
		/* printf( "str[0x%02X] = %s\n", p->byteval, buf ); */
		}
	else if (level < MAX) {
		buf[level] = '0';
		buf[level+1] = 0;
		walk0( p->left, buf, level+1 );
		buf[level] = '1';
		buf[level+1] = 0;
		walk0( p->right, buf, level+1 );
		buf[level] = 0;
		}
	else {
		fprintf( stderr, "error: skewed tree\n" );
		exit( -1 );
		}
	}

void walk1 ( p, fout ) /* output skeletal tree structure */
	struct node *p;
	FILE *fout;
	{
	/* a 1 bit indicates a node has children; 0 means no children */
	if (p->left) {
		outbit( 1, fout );
		walk1( p->left, fout );
		walk1( p->right, fout );
		}
	else
		outbit( 0, fout );
	}

void walk2 ( p, fout ) /* write out byte values from the tree */
	struct node *p;
	FILE *fout;
	{
	if (p->left) {
		walk2( p->left, fout );
		walk2( p->right, fout );
		}
	else
		putc( (char)(p->byteval), fout );
	}

void walk3 ( p ) /* set pointers to corresponding nodes */
	struct node *p;
	{
	if (p->left) {
		walk3( p->left );
		walk3( p->right );
		}
	else
		ptab[p->byteval] = p;
	}

static int shift; /* how far to shift next bit */
static char byte; /* the byte buffer */

void initbit () { /* initialize bit output buffer */
	byte = 0;
	shift = 0;
	}

void flushbit ( fp ) /* flush bit output buffer */
	FILE *fp;
	{
	if (shift) /* buffer non-empty? */
		putc( byte, fp ); /* yes, output partial byte */
	}

void outbit( bit, fp ) /* output a bit using byte buffering */
	int bit;
	FILE *fp;
	{
	byte |= ((char)bit << shift);
	if (shift == 7) {
		putc( byte, fp );
		byte = 0;
		shift = 0;
		}
	else
		shift++;
	}
