// DUPES.CPP								 1		  1    6666
// Dave Harris								11		 11   6
// Compiled using Borland C++ ver 3.1	   1 1		1 1   6666
// 03-03-94 								 1	 ..   1   6   6
//										   11111 .. 11111  666
////////////////////////////////////////////////////////////////////////

#include "au.hpp"

#define PROGRAM "DUPES"  // Name of module

typedef struct
{
	char arcFile[80];
	char filename[FILE_SIZE];
	unsigned long crc;
	long size;
	long next;				 /* next file record */
	unsigned short used;	 /* used flag */
} FILE_CONT;

typedef struct
{
	long first;
	long last;
} HASH;

#define HASH_SLOTS 1024

/*********************************************************************/

typedef struct
{
	char do_arcs;			  // do arced files or not */
	HASH *hash;
	long last_record;
	long number_inside_processed;
	long size_thresh;
	char data_file_name[FLENGTH];
	HANDLE data_file;
	char log_file_name[FLENGTH];
	HANDLE log_file;
	BYTE keep_data_file;
	BYTE use_32;
	BYTE verbose;
	unsigned short used;	  // Used number to use
} DUPES_INFO;

#define HASH_TABLE_SIZE (sizeof(HASH) * HASH_SLOTS)

/**/
static void dupes_log(AU *au, char *format, ...)
{
	DUPES_INFO *in = (DUPES_INFO *)au->info;
	va_list plist;
	char string[200];

	va_start(plist, format);
	vsprintf(string, format, plist);
	va_end(plist);

	if (in->log_file == -1 && in->log_file_name[0] != '\0')
	{
		in->log_file = au_open(au, in->log_file_name, O_CREAT|O_WRONLY|O_TEXT);
		lseek(in->log_file, 0, SEEK_END);
	}
	write(in->log_file, string, strlen(string));
	return;
}

/**/
static void read_hash_table(AU *au)
{
	DUPES_INFO *in = (DUPES_INFO *)au->info;

	lseek(in->data_file, 0L, SEEK_SET);
	if (read(in->data_file, in->hash, HASH_TABLE_SIZE) != HASH_TABLE_SIZE)
	{
		au_printf_error(au, "\nRead Error");
		exit(1);
	}
	if (read(in->data_file, &in->used, 2) != 2)
	{
		au_printf_error(au, "\nRead Error");
		exit(1);
	}
	in->used++;

	lseek(in->data_file, 0L, SEEK_END);
	in->last_record = (tell(in->data_file)-HASH_TABLE_SIZE-2)/sizeof(FILE_CONT);
}
/**/
static void write_hash_table(AU *au)
{
	DUPES_INFO *in = (DUPES_INFO *)au->info;

	lseek(in->data_file, 0L, SEEK_SET);
	if (write(in->data_file, in->hash, HASH_TABLE_SIZE) != HASH_TABLE_SIZE)
	{
		au_printf_error(au, "\nWrite Error");
		exit(1);
	}
	if (write(in->data_file, &in->used, 2) != 2)
	{
		au_printf_error(au, "\nRead Error");
		exit(1);
	}
}
/**/
static void read_data_struct(AU *au, FILE_CONT *contents,long record)
{
	DUPES_INFO *in = (DUPES_INFO *)au->info;

	lseek(in->data_file, HASH_TABLE_SIZE + 2 + sizeof(FILE_CONT)*(record-1), SEEK_SET);
	read(in->data_file, contents, sizeof(FILE_CONT));
}
/**/
static void write_data_struct(AU *au, FILE_CONT *contents,long record)
{
	DUPES_INFO *in = (DUPES_INFO *)au->info;

	lseek(in->data_file, HASH_TABLE_SIZE + 2 + sizeof(FILE_CONT)*(record-1), SEEK_SET);
	if (write(in->data_file, contents, sizeof(FILE_CONT)) != sizeof(FILE_CONT))
	{
		au_printf_error(au, "\nWrite Error");
		exit(1);
	}
}
/**/
static BYTE add_to_list(AU *au, char *arcFile, char *filename, unsigned long crc, long size)
{
	FILE_CONT temp;
	long end;
	int  hash_code;
	DUPES_INFO *in = (DUPES_INFO *)au->info;

	if (size < in->size_thresh)
		return FALSE;
	if (strlen(filename) > FILE_SIZE)
		filename[FILE_SIZE-1] = '\0';
	strcpy(temp.arcFile, arcFile);
	strcpy(temp.filename, filename);
	temp.crc = crc;
	temp.size = size;
	temp.next = 0L;

	in->last_record++;
	hash_code = crc % HASH_SLOTS;
	if (in->hash[hash_code].first == 0)
		in->hash[hash_code].first = in->hash[hash_code].last = in->last_record;
	else				   /* thread the previous */
	{
		FILE_CONT last;
		read_data_struct(au, &last, in->hash[hash_code].last);
		last.next = in->last_record;
		write_data_struct(au, &last, in->hash[hash_code].last);
		in->hash[hash_code].last = in->last_record;
	}
	write_data_struct(au, &temp, in->last_record);
	return TRUE;
}
/**/
static BYTE dupes_one(AU *au, char *file_name, ARC_FILE *arcFile)
{
	ARC_RECORD record;
	int ret_code;
	BYTE returnCode = FALSE;
	char string[FLENGTH];
	DUPES_INFO *in = (DUPES_INFO *)au->info;

	for (;;)
	{
		ret_code = get_record(au, arcFile, &record);
		if (ret_code == EOF)
			break;
		else if (ret_code == -2)
		{
			add_to_bad_list(au, au->source_directory, file_name);
			return FALSE;
		}
		else if (ret_code == -3)
			return FALSE;

		build_fname(string, au->source_directory, file_name);
		if (add_to_list(au, string, record.name, record.crc, record.unpacked_size))
			returnCode = TRUE;
		in->number_inside_processed++;
	}
	return returnCode;
}
/**/
static int dupes(AU *au, char *file_name)
{
	ARC_FILE arcFile;
	char string[FLENGTH];
	struct ffblk ffblk;
	DUPES_INFO *in = (DUPES_INFO *)au->info;
	BYTE processed = FALSE;

	check_for_key();

	if (in->do_arcs != OFF)
	{
		arc_file_init(au, &arcFile, file_name);
		if (arcFile.type > 0 && au->package[arcFile.type].crc != 0)
		{
			processed = dupes_one(au, file_name, &arcFile);
		}
		arc_file_deinit(au, &arcFile);
	}

	if (in->do_arcs != ONLY)
	{
		/* Place the archive itself in the list */
		build_fname(string, au->source_directory, file_name);

		findfirst(file_name, &ffblk, 0);
		if (in->use_32)
			processed |=
			  add_to_list(au, string, "", crc32(au, file_name), ffblk.ff_fsize);
		else
			processed |=
			  add_to_list(au, string, "", crc16(au, file_name), ffblk.ff_fsize);
	}
	if (processed)
	{
		au->number_processed++;
		if (in->verbose)
			au_printf(au, "@?6Processed @?1%s@?H\n", file_name);
	}

	return 0;
}
/**/
static BYTE parse_comm_line(AU *au, char option, char *cur_argv,
							PARSE_TYPE type)
{
	DUPES_INFO *in = (DUPES_INFO *)au->info;

	switch (type)
	{
	case PARSE_SINGLE_OPTION:
		if (option == '3')
		{
			in->use_32 = TRUE;
			return TRUE;
		}
		if (option == '1')
		{
			in->use_32 = FALSE;
			return TRUE;
		}
		else if (option == 'V')
		{
			in->verbose = TRUE;
			return TRUE;
		}
		return FALSE;
	case PARSE_PARAM_OPTION:
		switch (option)
		{
		case 'W':
			strcpy(au->dest_directory, cur_argv);
			break;
		case 'A':                 /* Smart mode on/off/always */
			in->do_arcs = get_value(au, OFF | ON | ONLY);
			break;
		case 'D':
			strcpy(in->data_file_name, cur_argv);
			in->keep_data_file = TRUE;
			break;
		case 'L':
			strcpy(in->log_file_name, cur_argv);
			break;
		case 'T':
			in->size_thresh = atol(cur_argv);
			break;
		case '?':
			au_standard_opt_header(au, "DUpes",
				"@?3-16@?H             use 16 bit CRCs for non-arc files\n"
				"@?3-32@?H             use 32 bit CRCs for non-arc files (default)\n"
				"@?3-A@?Hon|off|only   process Archive files\n"
				"@?3-D<file>@?H        Data file.  If unspecified, a temp file is used and deleted\n"
				"@?3-L<file>@?H        Log file.  Contains listing of duplicates\n"
				"@?3-T@?Hn             size Threshold\n"
				"@?3-V@?H              Verbose listing of files as processed\n"
				"@?3-W@?H<path>        Work directory\n");
			exit(0);
		default:
			au_invalid_option(au, PROGRAM, option);
		}
		return TRUE;
	}
	return FALSE;
}
/**/
static void print_results(AU *au)
{
	FILE_CONT temp,temp2;
	int first = TRUE;
	long rec;
	DUPES_INFO *in = (DUPES_INFO *)au->info;

	au_printf_c(au, 15, "\nDuplicate Files:\n\n");

	for (int i=0; i<HASH_SLOTS; i++)
	{
		if (in->hash[i].first!=0)
		{
			read_data_struct(au, &temp, in->hash[i].first);
			while (temp.next != 0)
			{
				if (temp.used != in->used)
				{
				rec = temp.next;
				read_data_struct(au, &temp2,temp.next);
				for(EVER)
				{
					if (temp.crc == temp2.crc && temp.size == temp2.size &&
						temp2.used != in->used)
					{
						if (first)
						{
							if (temp.filename[0] != '\0')
							{
								au_printf(au, "(@?B%s@?H inside @?1%s@?H)", temp.filename, temp.arcFile);
								dupes_log(au, "(%s inside %s)", temp.filename, temp.arcFile);
							}
							else
							{
								au_printf(au, "@?1%s@?H", temp.arcFile);
								dupes_log(au, "%s", temp.arcFile);
							}
							au_printf(au, "\n");
							dupes_log(au, "\n");
							first = FALSE;
						}
						au_printf(au, "     ");
						dupes_log(au, "     ");

						if (temp2.filename[0] != '\0')
						{
							au_printf(au, "(@?C%s@?H inside @?2%s@?H)", temp2.filename, temp2.arcFile);
							dupes_log(au, "(%s inside %s)", temp2.filename, temp2.arcFile);
						}
						else
						{
							au_printf(au, "@?2%s@?H", temp2.arcFile);
							dupes_log(au, "%s", temp2.arcFile);
						}
						au_printf(au, "\n");
						dupes_log(au, "\n");
						temp2.used = in->used;
						write_data_struct(au, &temp2, rec);
					}
					if (temp2.next == 0)
						break;
					else
					{
						rec = temp2.next;
						read_data_struct(au, &temp2,temp2.next);
					}
				}
				}
				read_data_struct(au, &temp,temp.next);
				first = TRUE;
			}
		}
	}
}
/**/
static void end_program(void)
{
	DUPES_INFO *in = (DUPES_INFO *)glob_au->info;

	if (in->data_file != -1)
	{
		write_hash_table(glob_au);
		close(in->data_file);
		if (!in->keep_data_file)
			unlink(in->data_file_name);
	}
	if (in->log_file != -1)
	{
		close(in->log_file);
	}
	if (in->hash != NULL)
		free(in->hash);
	return;
}
/**/
int main_dupes(AU *au, int argc, char *argv[])
{
	DUPES_INFO *in;
	long space_required;

	in = (DUPES_INFO *)au_malloc(au, sizeof(DUPES_INFO));
	memset(in, '\0', sizeof(DUPES_INFO));
	au->info = in;
	in->do_arcs=ON;
	in->data_file = -1;
	in->log_file = -1;
	in->size_thresh=1;
	in->use_32 = TRUE;
	in->used = 1;

	ReadGlobalCFGInfo(au, au->cfg_file, PROGRAM, NULL);
	generic_parse_comm_line(au, argc, argv, parse_comm_line);

	atexit(end_program);

	in->hash = (HASH *)au_calloc(au, sizeof(HASH), HASH_SLOTS);

	if (in->data_file_name[0] == '\0')
		build_fname(in->data_file_name, au->dest_directory, "dupes94.dat");

	if (in->keep_data_file)
	{
		if (access(in->data_file_name, 0x00) == 0)
		{
			in->data_file = au_open(au, in->data_file_name, O_BINARY | O_RDWR);
			read_hash_table(au);
		}
		else
			in->data_file = _creat(in->data_file_name, 0);
	}
	else
		in->data_file = _creat(in->data_file_name, 0);

	process_files(au, dupes);
	print_results(au);
	lseek(in->data_file, 0L, SEEK_END);
	space_required = tell(in->data_file);

	if (!au->no_extra)
	{
		au_printf_c(au, 15, "\nFiles Processed = %d\n", au->number_processed);
		au_printf_c(au, 15, "Files Inside Archives Processed = %ld\n",
			   in->number_inside_processed);
		au_printf_c(au, 15, "Disk space required to hold results = %ld bytes\n", space_required);
	}

	return 0;
}

