#
# AWK script to create a header file from an assembly language
# source (Intel x86), listing labels and variables by segment.
# Morten Elling, April 1995 + May 1997.
#
# Usage
#   awk -f ash.awk [NEARS=1] [TYPES=0] [IDEAL=1] file.asm [>output.fil]
#   NEARS=1 : INclude non-local near code labels (default = 0)
#   TYPES=0 : EXclude struc, record, enum types  (default = 1)
#   IDEAL=1 : Assume Ideal syntax  (auto-detect = default = 0)
#
# Notes
#   Despite several limitations, this script can be a time saver
#   by providing a list of labels, procedures, and variables per
#   segment in the form of global (externdef) declarations.
#
#   Doesn't understand conditional blocks, includes, or line
#   continuation. Ignores comments, macros, and repeat blocks.
#   Understands generic or simplified segmentation, or mixed,
#   but does not properly handle:
#     - [generic] nested segments
#     - [simplified] named far segments (e.g. .code fharcode)
#     - segment ordering (due to AWK's associative arrays)
#   Understands MASM and Ideal mode syntax, strucs and records.
#   Treats 'sym: db 1' as a byte label if NEARS = 0.


##### Make set array from string
function zsplit(str, arr     ,i, j ,temp) {	# i,j,temp = locals
	j = split(str, temp);
	for (i = 1; i <= j; i++)  arr[temp[i]] = 1;
}


BEGIN	{ # Initialize; AWK zero-inits all variables.
	TYPES = 1;		# Overridden by cmd. line if appropriate
	NOSEG = "unknown";

	print "Just a minute, Admiral" >"CON"
	printf ("\n; %s\n", tolower(ARGV[ARGC-1]) );	# Filespec


	segdir	 = ".code .data .const .data? .stack .fardata .fardata?" \
		   " codeseg dataseg const udataseg stack fardata ufardata";
	labeldir = "proc label ";
	typdir	 = "struc record union enum typedef ";
	data2dir = "db dw dd df dp dq dt byte word dword" \
		   " sbyte sword sdword real4 real8 real10";
	idealdir = "proc label ideal ";		# 1st token on a line
	distspec = "near far near16 near32 far16 far32";

	# Make sets of directive names
	zsplit(segdir, cannedsegset);
	zsplit(labeldir, labelset);
	zsplit(typdir  , typeset);
	zsplit(data2dir, data2set);
	zsplit(idealdir, idealset);
	zsplit(distspec, distset);


	curseg = NOSEG;
	symcount[curseg] = 1;
}

{ ##### Main loop begins #######################

  if ($1 ~ /^;/)   next;			# Skip comment lines
  if ($1 ~ /^%$/)  $1 = "";			# Strip immed. macro

  tok1 = tolower($1);				# Lowercase field no. 1
  tok2 = tolower($2);				# Lowercase field no. 2


  ##### Ideal/MASM mode change
  if (!IDEAL)
	IDEAL = (tok1 in idealset);		# Auto-detect
  if (IDEAL)
	{ tokk = tok1;  toki = $2; }		# Keyword before identifier
  else
	{ tokk = tok2;  toki = $1; }		# Identifier before keyword


  ##### Ignore comment
  if (!IDEAL && tok1 == "comment") {
	tmp = substr($2,1,1);
	while (getline == 1)			# Read until
	  if (index($0, tmp)) break;		#   end of comment
	next;					# Skip to next line
  }

  ##### Ignore macro/rept/irp
  if (tokk == "macro" || tok1 == "rept" || tok1 == "irp") {
	while (getline == 1)			# Read until "endm"
	  if ($1 ~ /^(E|e)(N|n)(D|d)(M|m)$/) break;
	next;					# Skip to next line
  }


  ##### Segment directive
  if (tokk == "segment" || tok1 in cannedsegset) {
    if (tokk == "segment") curseg = toki
    else curseg = toupper(tok1);		# Set current segment
	if (symcount[curseg] == 0)
	  symcount[curseg]++;			# Need this for void segs
	next;					# Skip to next line
  }


  ##### Proc/label declaration
  if (tokk in labelset) {
	i = ++symcount[curseg]; 		# Step symbol counter
	syms[curseg,i] = toki;			# Add symbol name to array
	gtyp = (($3 != "") ? $3 : "unknown");	# Get distance/type, if any
	if ((tokk == "proc") &&
	   !(tolower(gtyp) in distset))		# Default to model-
	      gtyp = "proc";                    #  dependent size ("proc")
	syms[curseg,i,typ] = gtyp;		# Set type
	next;					# Skip to next line
  }


  if (NEARS) {
  ##### Non-local near code label
  if ($1 ~ /^[A-Za-z_$?][A-Za-z_$?0-9@]*:$/ ) {
	i = ++symcount[curseg]; 		# Step symbol counter
	syms[curseg,i] = \
	    substr($1, 1, index($1,":")-1);	# Add symbol name to array
	syms[curseg,i,typ] = "Near";		# Distance is near
	next;					# Skip to next line
  }
  } # endif (NEARS)


  ##### Data allocation (Ideal and MASM syntax identical)
  #	(Avoid false match on "mov dword ptr memvar, eax")
  if ((tok2 in data2set) && ($3 !~ /^(P|p)(T|t)(R|r)$/)) {
	i = ++symcount[curseg]; 		# Step symbol counter
	syms[curseg,i] = $1;			# Add symbol name to array
	if	(tok2 == "db")	gtyp = "byte"
	else if (tok2 == "dw")	gtyp = "word"
	else if (tok2 == "dd")	gtyp = "dword"
	else if (tok2 == "df")	gtyp = "fword"
	else if (tok2 == "dp")	gtyp = "pword"
	else if (tok2 == "dq")	gtyp = "qword"
	else if (tok2 == "dt")	gtyp = "tbyte"
	else if (tok2 == "byte")   gtyp = "BYTE"
	else if (tok2 == "word")   gtyp = "WORD"
	else if (tok2 == "dword")  gtyp = "DWORD"
	else if (tok2 == "sbyte")  gtyp = "SBYTE"
	else if (tok2 == "sword")  gtyp = "SWORD"
	else if (tok2 == "sdword") gtyp = "SDWORD"
	else if (tok2 == "real4")  gtyp = "REAL4"
	else if (tok2 == "real8")  gtyp = "REAL8"
	else if (tok2 == "real10") gtyp = "REAL10"
	else gtyp = "unknown";
	syms[curseg,i,typ] = gtyp;		# Set data type
	next;					# Skip to next line
  }


  if (TYPES) {
  ##### Struc/record/enum definition
  if (tokk in typeset) {
	typs[++typecount] = tolower(toki);	# Add type name to array
	if (tokk=="struc" || tokk=="union") {
	   while (getline == 1) 		# Read until "ends"
	      if ($2 ~ /^(E|e)(N|n)(D|d)(S|s)$/ ||
		  $1 ~ /^(E|e)(N|n)(D|d)(S|s)$/) break;
	}
	next;					# Skip to next line
  }

  ##### Struc/record/enum allocation
  j = 1;
  while (j <= typecount)			# See if token is
    if (tok2 == typs[j++]) {			#  a struc, record etc.
	i = ++symcount[curseg]; 		# Step counter
	syms[curseg,i] = $1;			# Add symbol name to array
	syms[curseg,i,typ] = $2;		# Set data type
    }
  } # endif (TYPES)


} ##### Main loop ends #########################
END {
	##### For each segment, print results on the form:
	#	segname [SEGMENT]
	#	global	sym_name	:sym_type
	#	[segname ENDS]

	if (IDEAL) printf("; ideal\n")
	 else printf("; global equ externdef\n");
	for (curseg in symcount) {
	   if (curseg == NOSEG && symcount[curseg] == 1) continue;
	   pad = (length(curseg) < 8 ? "\t" : " ");
	   if (tolower(curseg) in cannedsegset) {
	      printf ("\n\t%s\n", curseg) }
	    else {
	      if (IDEAL) printf ("\n\tSEGMENT\t%s\n", curseg)
	      else       printf ("\n\t%s%sSEGMENT\n", curseg, pad);
	   }
	   for (i = 2; i <= symcount[curseg]; i++)	# 1st is void
	      printf "\tglobal\t%-24s%s\n",
		     syms[curseg,i], ":" syms[curseg,i,typ];
	   if (!(tolower(curseg) in cannedsegset)) {
	      if (IDEAL) { printf ("\tENDS\t%s\n", curseg) }
	      else       { printf ("\t%s%sENDS\n", curseg, pad) }
	   }
	} # endfor
} # eof
