ports//devel/cmunge/work/cmunger/src/ctran.c

/*{{{  ctran.c*/
/*----------------------------------------------------------------
Usage:
  ctran [-D id-dictionary-file]  [-f output-file-rootname]
        [-l min-output-linelen]  [-p id-prefix]  [-v version-string]
        filename ...
----------------------------------------------------------------*/
/*{{{  global decls*/
#include <stdio.h>
#define  BUFLEN			1000	/* max len of ID or keyword in i/p */
#define  PROTECT_LINE_CHAR	'@'	/* replaces some "#include's */

#define  writechar(c)			(op_linelen++, putc (c, op_file))
#define  write_and_get_next_char(c)	(op_linelen++, putc (c, op_file), c = getc (ip_file))
#define  isidchar(c)			(isalnum(c) || c == '_')

#define  TK_WORD_OR_CONST	0x1
#define  TK_OPER		0x2
#define  TK_PUNCT		0x4
#define  TK_SPACE		0x8

#define  LEAVE_NAME_CHAR	'-'

#define  DEFAULT_DICT_FILENAME	"Id.dict";	/* 'ID dictionary' filename */
#define  DEFAULT_OP_ROOTNAME	"f"		/* root of o/p filenames */
#define  DEFAULT_ID_PREFIX	"l"		/* prefix of translated IDs */
#define  DEFAULT_ID_PREFIX_LEN	 1
#define  DEFAULT_MIN_OP_LINELEN	 90		/* min length of o/p lines */

typedef  int  bool;

#define  FALSE	0
#define  TRUE	1

typedef struct Symbol {
	char  *word;
	int    val;
	struct Symbol  *next;
} Symbol;

#define  HASHSIZE  101

char  buf [BUFLEN];
Symbol  *keytab [HASHSIZE],  *std_nametab [HASHSIZE],  *symtab [HASHSIZE];


int    id_prefix_len,  min_op_linelen,  next_val,  n_dict_errs;

char  *id_prefix,  *dict_filename,  *version_str;

/*{{{  Symbol  keywords []  &  std_names []*/
Symbol  keywords [] = {
	/*{{{  C keywords*/
	/*------------*/
	/* C keywords */
	/*------------*/
	"auto",		0,	0,
	"break",	0,	0,
	"case",		0,	0,
	"char",		0,	0,
	"const",	0,	0,
	"continue",	0,	0,
	"default",	0,	0,
	"do",		0,	0,
	"double",	0,	0,
	"else",		0,	0,
	"enum",		0,	0,
	"extern",	0,	0,
	"float",	0,	0,
	"for",		0,	0,
	"goto",		0,	0,
	"if",		0,	0,
	"int",		0,	0,
	"long",		0,	0,
	"register",	0,	0,
	"return",	0,	0,
	"short",	0,	0,
	"signed",	0,	0,
	"sizeof",	0,	0,
	"static",	0,	0,
	"struct",	0,	0,
	"switch",	0,	0,
	"typedef",	0,	0,
	"union",	0,	0,
	"unsigned",	0,	0,
	"void",		0,	0,
	"volatile",	0,	0,
	"while",	0,	0,
	0,		0,	0
	/*}}}*/
};


Symbol  std_names [] = {
	/*------------------------------*/
	/* 'significant names' in C.	*/
	/*------------------------------*/
	"NULL",		0,	0,
	"main",		0,	0,

	/*------------------------------------------------------*/
	/* Standard library constants and function names.	*/
	/* The section numbers (Bx.x) are from K & R 'ANSI C'.	*/
	/*------------------------------------------------------*/
	/*{{{  B1. <stdio.h>*/
	/*----------------------*/
	/* B1. <stdio.h>	*/
	/*----------------------*/
	/*-----------*/
	/* constants */
	/*-----------*/
	"stdin",	0,	0,
	"stdout",	0,	0,
	"stderr",	0,	0,
	"size_t",	0,	0,
	
	"FILE",		0,	0,
	"EOF",		0,	0,
	"FILENAME_MAX",	0,	0,
	"FOPEN_MAX",	0,	0,
	"TMP_MAX",	0,	0,
	"_IOFBF",	0,	0,
	"_IOLBF",	0,	0,
	"_IONBF",	0,	0,
	"BUFSIZ",	0,	0,
	"SEEK_SET",	0,	0,
	"SEEK_CUR",	0,	0,
	"SEEK_END",	0,	0,
	"L_tmpnam",	0,	0,
	
	/*------------------------------*/
	/* B1.1  File operations	*/
	/*------------------------------*/
	"fopen",	0,	0,
	"freopen",	0,	0,
	"fflush",	0,	0,
	"fclose",	0,	0,
	"remove",	0,	0,
	"rename",	0,	0,
	"tmpfile",	0,	0,
	"tmpnam",	0,	0,
	"setvbuf",	0,	0,
	"setbuf",	0,	0,
	
	/*------------------------------*/
	/* B1.2  Formatted output	*/
	/*------------------------------*/
	"fprintf",	0,	0,
	"printf",	0,	0,
	"sprintf",	0,	0,
	"vfprintf",	0,	0,
	"vprintf",	0,	0,
	"vsprintf",	0,	0,
	
	/*------------------------------*/
	/* B1.3  Formatted input	*/
	/*------------------------------*/
	"fscanf",	0,	0,
	"scanf",	0,	0,
	"sscanf",	0,	0,
	
	/*----------------------*/
	/* B1.4  Character I/O	*/
	/*----------------------*/
	"fgetc",	0,	0,
	"fgets",	0,	0,
	"fputc",	0,	0,
	"fputs",	0,	0,
	"getc",		0,	0,
	"getchar",	0,	0,
	"gets",		0,	0,
	"putc",		0,	0,
	"putchar",	0,	0,
	"puts",		0,	0,
	"ungetc",	0,	0,
	
	/*----------------------*/
	/* B1.5  Direct I/O	*/
	/*----------------------*/
	"fread",	0,	0,
	"fwrite",	0,	0,
	
	/*--------------------------------------*/
	/* B1.6  File positioning functions	*/
	/*--------------------------------------*/
	"fseek",	0,	0,
	"ftell",	0,	0,
	"rewind",	0,	0,
	"fgetpos",	0,	0,
	"fsetpos",	0,	0,
	
	/*------------------------------*/
	/* B1.7  Error functions	*/
	/*------------------------------*/
	"clearerr",	0,	0,
	"foef",		0,	0,
	"ferror",	0,	0,
	"perror",	0,	0,
	/*}}}*/
	
	/*{{{  B2. <ctype.h>*/
	/*----------------------*/
	/* B2. <ctype.h>	*/
	/*----------------------*/
	"isalnum",	0,	0,
	"isalpha",	0,	0,
	"iscntrl",	0,	0,
	"isdigit",	0,	0,
	"isgraph",	0,	0,
	"islower",	0,	0,
	"isprint",	0,	0,
	"ispunct",	0,	0,
	"isspace",	0,	0,
	"isupper",	0,	0,
	"isxdigit",	0,	0,
	"tolower",	0,	0,
	"toupper",	0,	0,
	/*}}}*/
	
	/*{{{  B3. <string.h>*/
	/*----------------------*/
	/* B3. <string.h>	*/
	/*----------------------*/
	"strcpy",	0,	0,
	"strncpy",	0,	0,
	"strcat",	0,	0,
	"strncat",	0,	0,
	"strcmp",	0,	0,
	"strncmp",	0,	0,
	"strchr",	0,	0,
	"strrchr",	0,	0,
	"strspn",	0,	0,
	"strcspn",	0,	0,
	"strpbrk",	0,	0,
	"strstr",	0,	0,
	"strlen",	0,	0,
	"strerror",	0,	0,
	"strtok",	0,	0,
	"memcpy",	0,	0,
	"memmove",	0,	0,
	"memcmp",	0,	0,
	"memchr",	0,	0,
	"memset",	0,	0,
	/*}}}*/
	
	/*{{{  B4. <math.h>  and  <errno.h>*/
	/*------------------------------*/
	/* B4. <math.h>  and  <errno.h>	*/
	/*------------------------------*/
	"EDOM",		0,	0,
	"ERANGE",	0,	0,
	"HUGE_VAL",	0,	0,
	
	"errno",	0,	0,
	
	"sin",		0,	0,
	"cos",		0,	0,
	"tan",		0,	0,
	"asin",		0,	0,
	"acos",		0,	0,
	"atan",		0,	0,
	"atan2",	0,	0,
	"sinh",		0,	0,
	"cosh",		0,	0,
	"tanh",		0,	0,
	"exp",		0,	0,
	"log",		0,	0,
	"log10",	0,	0,
	"pow",		0,	0,
	"sqrt",		0,	0,
	"ceil",		0,	0,
	"floor",	0,	0,
	"fabs",		0,	0,
	"ldexp",	0,	0,
	"frexp",	0,	0,
	"modf",		0,	0,
	"fmod",		0,	0,
	
	/*}}}*/
	
	/*{{{  B5. <stdlib.h>*/
	/*----------------------*/
	/* B5. <stdlib.h>	*/
	/*----------------------*/
	"LONG_MAX",	0,	0,
	"LONG_MIN",	0,	0,
	"ULONG_MAX",	0,	0,
	"RAND_MAX",	0,	0,
	"EXIT_SUCCESS",	0,	0,
	"EXIT_FAILURE",	0,	0,
	
	"div_t",	0,	0,
	"ldiv_t",	0,	0,
	
	"atof",		0,	0,
	"atoi",		0,	0,
	"atol",		0,	0,
	"strtod",	0,	0,
	"strtol",	0,	0,
	"strtoul",	0,	0,
	"rand",		0,	0,
	"srand",	0,	0,
	"calloc",	0,	0,
	"malloc",	0,	0,
	"realloc",	0,	0,
	"free",		0,	0,
	"abort",	0,	0,
	"exit",		0,	0,
	"atexit",	0,	0,
	"system",	0,	0,
	"getenv",	0,	0,
	"bsearch",	0,	0,
	"qsort",	0,	0,
	"abs",		0,	0,
	"labs",		0,	0,
	"div",		0,	0,
	"ldiv",		0,	0,
	
	/*}}}*/
	
	/*{{{  B6. <assert.h>*/
	/*----------------------*/
	/* B6. <assert.h>	*/
	/*----------------------*/
	"NDEBUG",	0,	0,
	
	"assert",	0,	0,
	
	/*}}}*/
	
	/*{{{  B7. <stdarg.h>*/
	/*----------------------*/
	/* B7. <stdarg.h>	*/
	/*----------------------*/
	"va_alist",	0,	0,	/* omitted from K&R C book */
	"va_dcl",	0,	0,	/*    "      "     "    "  */
	"va_list",	0,	0,
	"va_start",	0,	0,
	"va_arg",	0,	0,
	"va_end",	0,	0,
	
	/*}}}*/
	
	/*{{{  B8. <setjmp.h>*/
	/*----------------------*/
	/* B8. <setjmp.h>	*/
	/*----------------------*/
	"jmp_buf",	0,	0,
	
	"setjmp",	0,	0,
	"longjmp",	0,	0,
	
	/*}}}*/
	
	/*{{{  B9. <signal.h>*/
	/*----------------------*/
	/* B9. <signal.h>	*/
	/*----------------------*/
	"SIG_DFL",	0,	0,
	"SIG_IGN",	0,	0,
	"SIGABRT",	0,	0,
	"SIGFPE",	0,	0,
	"SIGILL",	0,	0,
	"SIGINT",	0,	0,
	"SIGSEGV",	0,	0,
	"SIGTERM",	0,	0,
	
	"signal",	0,	0,
	"raise",	0,	0,
	
	/*}}}*/
	
	/*{{{  B10. <time.h>*/
	/*----------------------*/
	/* B10. <time.h>	*/
	/*----------------------*/
	"CLK_TCK",	0,	0,
	"clock_t",	0,	0,
	"time_t",	0,	0,
	"tm",		0,	0,
	"tm_sec",	0,	0,
	"tm_min",	0,	0,
	"tm_hour",	0,	0,
	"tm_mday",	0,	0,
	"tm_mon",	0,	0,
	"tm_year",	0,	0,
	"tm_wday",	0,	0,
	"tm_yday",	0,	0,
	"tm_isdst",	0,	0,
	
	
	"clock",	0,	0,
	"time",	0,	0,
	"difftime",	0,	0,
	"mktime",	0,	0,
	"asctime",	0,	0,
	"ctime",	0,	0,
	"gmtime",	0,	0,
	"localtime",	0,	0,
	"strftime",	0,	0,
	
	/*}}}*/
	
	/*{{{  B11. <limits.h>  and  <float.h>*/
	/*--------------------------------------*/
	/* B11. <limits.h>  and  <float.h>	*/
	/*--------------------------------------*/
	/*--------------*/
	/* <limits.h>	*/
	/*--------------*/
	"CHAR_BIT",	0,	0,
	"CHAR_MAX",	0,	0,
	"CHAR_MIN",	0,	0,
	"INT_MAX",	0,	0,
	"INT_MIN",	0,	0,
	"LONG_MAX",	0,	0,
	"LONG_MIN",	0,	0,
	"SCHAR_MAX",	0,	0,
	"SCHAR_MIN",	0,	0,
	"SHRT_MAX",	0,	0,
	"SHRT_MIN",	0,	0,
	"UCHAR_MAX",	0,	0,
	"UINT_MAX",	0,	0,
	"ULONG_MAX",	0,	0,
	"USHRT_MAX",	0,	0,
	
	/*--------------*/
	/* <float.h>	*/
	/*--------------*/
	"FLT_RADIX",	0,	0,
	"FLT_ROUNDS",	0,	0,
	"FLT_DIG",	0,	0,
	"FLT_EPSILON",	0,	0,
	"FLT_MANT_DIG",	0,	0,
	"FLT_MAX",	0,	0,
	"FLT_MAX_EXP",	0,	0,
	"FLT_MIN",	0,	0,
	"FLT_MIN_EXP",	0,	0,
	
	"DBL_DIG",	0,	0,
	"DBL_EPSILON",	0,	0,
	"DBL_MANT_DIG",	0,	0,
	"DBL_MAX",	0,	0,
	"DBL_MAX_EXP",	0,	0,
	"DBL_MIN",	0,	0,
	"DBL_MIN_EXP",	0,	0,
	
	/*}}}*/

	0,	0,	0
};
/*}}}*/

bool   file_exists (),  install_word ();
unsigned   hash ();
/*}}}*/

/*{{{  main (argc, argv)*/
main (argc, argv)
/*-------------*/
	int  argc;
	char  *argv [];
/*----------------------------------------------------------------
Usage:
  ctran [-D id-dictionary-file]  [-f output-file-rootname]
        [-l min-output-linelen]  [-p id-prefix]  [-v version-string]
        filename ...
----------------------------------------------------------------*/
{
	char  *op_rootname,  *progname = argv [0];
	FILE  *ip_file,  *op_file;
	int    file_no,  nfiles;

	/*{{{  defaults for above options*/
	/*--------------------------------------------------------------*/
	/* Defaults.  (They're defined in the 'global decls' section.)	*/
	/*--------------------------------------------------------------*/
	dict_filename	= DEFAULT_DICT_FILENAME;
	op_rootname	= DEFAULT_OP_ROOTNAME;	
	id_prefix	= DEFAULT_ID_PREFIX;
	id_prefix_len	= DEFAULT_ID_PREFIX_LEN;
	version_str	= (char *) NULL;
	min_op_linelen	= DEFAULT_MIN_OP_LINELEN;
	/*}}}*/
	/*{{{  process command line options (starting '-')*/
	/*----------------------------------------------------------------------
	Options -- notes:
	-----------------
	
	(i)  Since every option is followed by an argument word, every option
	letter must be preceded by a '-' and separated from the last
	argument by whitespace (otherwise we couldn't tell whether a '-'
	in an argument is part of that argument or the next option letter!).
		For simplicity, we don't allow '-' to be repeated (e.g. "--"),
	and require that every '-' is followed by a letter.
	
	(ii)  We don't check for or complain about repetition of option letters.
	In any case, the '-I' option may well be repeated, to specify a
	number of include directories.
	----------------------------------------------------------------------*/
	while (--argc && *(*++argv) == '-') {
		int  c;
		
		switch (c = *++(*argv)) {
		case 'D' :
			/*{{{  'ID dictionary' filename ('dict_filename')*/
			/*--------------------------------------------------------------*/
			/* '-D' option:  filename of 'ID dictionary'.			*/
			/* This may follow with or without intervening whitespace.	*/
			/*--------------------------------------------------------------*/
			if (*++(*argv))
				/*------------------------------------------------------*/
				/* filename follows '-D' immediately (without a space)	*/
				/*------------------------------------------------------*/
				;
				
			else if ( ! --argc || **++argv == '-') {
				fputs ("Expect a filename after the \'-D\' option.\n", stderr);
				goto  abort;
			}
			
			dict_filename = *argv;
			
			break;
			/*}}}*/
	
		case 'f' :
			/*{{{  rootname for output file(s)  ('op_rootname')*/
			/*--------------------------------------------------------------*/
			/* '-f' option:  rootname for output files.			*/
			/* This may follow with or without intervening whitespace.	*/
			/*--------------------------------------------------------------*/
			if (*++(*argv))
				/*------------------------------------------------------*/
				/* rootname follows '-f' immediately (without a space)	*/
				/*------------------------------------------------------*/
				;
				
			else if ( ! --argc || **++argv == '-') {
				fputs ("Expect (the root of) a filename after the \'-f\' option.\n", stderr);
				goto  abort;
			}
			
			op_rootname = *argv;
			
			break;
			/*}}}*/
	
		case 'l' :
			/*{{{  minimum length of output lines ('min_op_linelen')*/
			/*--------------------------------------------------------------*/
			/* '-l' option:  minimum length of output lines.		*/
			/* This may follow with or without intervening whitespace.	*/
			/*--------------------------------------------------------------*/
			if (*++(*argv))
				/*------------------------------------------------------*/
				/* number follows '-l' immediately (without a space)	*/
				/*------------------------------------------------------*/
				;
				
			else if ( ! --argc || **++argv == '-' || ! isdigit (**argv)) {
				fputs ("Expect a number after the \'-l\' option.\n", stderr);
				goto  abort;
			}
			
			min_op_linelen = (**argv - '0');
			(*argv)++;
			
			while (isdigit (**argv)) {
				min_op_linelen = 10 * min_op_linelen + (**argv - '0');
				(*argv)++;
			}
			
			if (**argv) {
				fprintf (stderr, "Illegal character(s) in number following the \'-l\' option: \"%s\"\n",
				                  *argv);
				goto  abort;
			}
			
			break;
			/*}}}*/
	
		case 'p' :
			/*{{{  prefix for translated IDs ('id_prefix')*/
			/*--------------------------------------------------------------*/
			/* '-p' option:  prefix for translated identifiers.		*/
			/* This may follow with or without intervening whitespace.	*/
			/*--------------------------------------------------------------*/
			if (*++(*argv))
				/*----------------------------------------------------------*/
				/* prefix string follows '-p' immediately (without a space) */
				/*----------------------------------------------------------*/
				;
				
			else if ( ! --argc || **++argv == '-') {
				fputs ("Expect a letter or identifier after the \'-p\' option.\n", stderr);
				goto  abort;
			}
			
			id_prefix     = *argv;
			id_prefix_len = strlen (id_prefix);
			
			break;
			
			/*}}}*/
	
		case 'v' :
			/*{{{  version string ('version_str')*/
			/*--------------------------------------------------------------*/
			/* '-v' option:  version string, which is written as the 1st	*/
			/* line of each output file (enclosed in C comment delimiters).	*/
			/* This may follow with or without intervening whitespace.	*/
			/*--------------------------------------------------------------*/
			if (*++(*argv))
				/*------------------------------------------------------*/
				/* string follows '-v' immediately (without a space)	*/
				/*------------------------------------------------------*/
				;
				
			else if ( ! --argc || **++argv == '-') {
				fputs ("Expect a string after the \'-v\' option.\n", stderr);
				goto  abort;
			}
			
			version_str = *argv;
			
			break;
			/*}}}*/
	
		case '\0' :
			fputs ("Expected an option letter after \'-\'.\n", stderr);
			goto  abort;
	
		default :
			fprintf (stderr, "Illegal option \'-%c\'.\n", c);
			goto  abort;
		}
	}
	/*}}}*/
	
	nfiles = argc;		/* total number of i/p files */
	if (! nfiles) {
		fputs ("No input file specified.\n", stderr);
		goto  abort;
	}

	init_keytab_and_std_nametab ();
	read_old_dict ();

	for (file_no = 1;   argc;   argc--, argv++, file_no++) {
		char  *op_filename;
		int    op_filename_len;

		/*{{{  read & open 'ip_filename', and create the output file*/
		/*--------------------------------------------------------------*/
		/* Read and open the 'ip_filename' (the next C input file),	*/
		/* create the output file, and if 'version_str' isn't NULL,	*/
		/* write it, enclosed in C comment delimiters, as the 1st line	*/
		/* of the output file.						*/
		/*--------------------------------------------------------------*/
		if ( !(ip_file = fopen (*argv, "r")) ) {
			fprintf (stderr, "Cannot open file \"%s\".\n", *argv);
			exit (1);
		}
		
		/*------------------------------*/
		/* Compose the 'op_filename'.	*/
		/*------------------------------*/
		op_filename_len = strlen (op_rootname) + 3;	/* +3 for ".c" and '\0' */
		if (nfiles > 1) {
			int  i = file_no;
		
			do
				op_filename_len++;	/* +1 for each digit of 'file_no' */
			while (i /= 10);
		}
		
		op_filename = (char *) malloc (op_filename_len);
		if (nfiles == 1)
			sprintf (op_filename, "%s.c", op_rootname);
		else
			sprintf (op_filename, "%s%d.c", op_rootname, file_no);
		
		/*--------------------------------------------------------------*/
		/* Create the output file.  If it already exists, give an	*/
		/* error message and quit, as we don't want to over-write a	*/
		/* pre-existing 'C' file!					*/
		/*--------------------------------------------------------------*/
		if (file_exists (op_filename)) {
			fprintf (stderr, "*** Error: output file \"%s\" already exists.\n",
			                 op_filename);
			exit (1);
		}
		
		if ( !(op_file = fopen (op_filename, "w")) ) {
			fprintf (stderr, "*** Error: cannot create output file \"%s\"\n",
			                 op_filename);
			exit (1);
		}
		
		/*--------------------------------------------------------------*/
		/* If 'version_str' isn't NULL, write it, enclosed in C comment	*/
		/* delimiters, as the 1st line of the output file.		*/
		/*--------------------------------------------------------------*/
		if (version_str)
			fprintf (op_file, "/* %s */\n", version_str);
		/*}}}*/

		scan_file (ip_file);
		assign_wordvals ();

		rewind (ip_file);
		translate_file (ip_file, op_file);

		fclose (ip_file);
		fclose (op_file);
	}

	write_new_dict ();
	exit (0);	/* successful execution! */
	
abort :
	fprintf (stderr, "Usage: %s [-f rootname] filename ...\n", progname);
	exit (1);
}
/*}}}*/

/*{{{  init_keytab_and_std_nametab ()*/
init_keytab_and_std_nametab ()
/*--------------------------*/
/*--------------------------------------------------------------*/
/*    Link keywords into keyword hash table ('keytab'),		*/
/* and 'standard' names into the 'standard name' hash table	*/
/* ('std_nametab').						*/
/*--------------------------------------------------------------*/
{
	register Symbol  *sp;
	register unsigned  hashval;

	for (sp = keywords;   sp->word;   sp++) {
		hashval = hash (sp->word);
		sp->next = keytab [hashval];
		keytab [hashval] = sp;
	}

	for (sp = std_names;   sp->word;   sp++) {
		hashval = hash (sp->word);
		sp->next = std_nametab [hashval];
		std_nametab [hashval] = sp;
	}
}
/*}}}*/

/*{{{  << 'ID dictionary' I/O >>*/
/*{{{  read_old_dict ()*/
read_old_dict ()
/*------------*/
{
	register  char  *p;
	char  *word;
	FILE  *dict_file;
	int    lineno = 0,  max_val = 0,  val;

	if ( !(dict_file = fopen (dict_filename, "r")) )
		return;

	if ( ! fgets (buf, BUFLEN, dict_file) ) {
		fclose (dict_file);
		return;
	}

	/*{{{  1st line may give 'next_val' (otherwise its a normal line)*/
	/*--------------------------------------------------------------*/
	/*	The first line of the 'ID dictionary' may be special:	*/
	/* it may give 'next_val' (i.e. the starting value for 'val's	*/
	/* of identifiers that have no declared translation value).	*/
	/* If so, its format (expressed as a 'printf' format) should be:*/
	/*								*/
	/*	("%d\n", next_val)					*/
	/*								*/
	/*	Otherwise the 1st line is treated as a normal line,	*/
	/* giving a (word,val) pair.					*/
	/*								*/
	/* N.B. If 'next_val' isn't specified, its default is the	*/
	/* maximum 'val' of all the declared (word, val) pairs, or 0	*/
	/* if there are none.						*/
	/*--------------------------------------------------------------*/
	
	p = buf;
	
	while (isdigit (*p)) p++;
	
	if ((p > buf)  &&  (*p != '\t')) {
		/*----------------------------------------------*/
		/* Assume this line specifies the 'next_val'.	*/
		/*----------------------------------------------*/
		lineno++;
		
		sscanf (buf, "%d", &next_val);
		if (*p != '\n')
			errmsg (lineno, "unexpected characters after \'next_val\'", 0);
	
		if ( ! fgets (buf, BUFLEN, dict_file))
			goto  end_of_dict;
	}
	/*}}}*/

	do {
		/*{{{  read (word, val) pair from 'buf' & install in 'symtab'*/
		/*--------------------------------------------------------------*/
		/* Read the (word, val) pair given by the line in 'buf []',	*/
		/* and install it in the symbol table.  The line's syntax is	*/
		/* checked.							*/
		/*--------------------------------------------------------------*/
		lineno++;
		p = buf;
		
		/*{{{  read 'val'*/
		/*------------*/
		/* Read 'val' */
		/*------------*/
		switch (*p) {
		case '0' :  case '1' :  case '2' :  case '3' :  case '4' :
		case '5' :  case '6' :  case '7' :  case '8' :  case '9' :
			/*--------------------------------------*/
			/* translation value given for word	*/
			/*--------------------------------------*/
			val = *p - '0';
			p++;
			while (isdigit (*p)) {
				val = 10 * val + (*p - '0');
				p++;
			}
			if (val > max_val)
				max_val = val;
			break;
		
		case  LEAVE_NAME_CHAR :
			/*----------------------*/
			/* don't translate word	*/
			/*----------------------*/
			val = 0;
			p++;
			break;
		
		case  '#' :  case  '\n' :
			/*------------------------------------------------------*/
			/* a blank or comment line in the 'dictionary 'file	*/
			/*------------------------------------------------------*/
			continue;	/* skip line */
		
		case  '\t' :
			/*------------------------------------------------------*/
			/* a new symbol to be given a translation value.  Set	*/
			/* it's value to -1 for now -- it will be assigned a	*/
			/* proper value after the input file is scanned.	*/
			/*------------------------------------------------------*/
			val = -1;
			break;
		
		default :
			/*------------------------------------------------------*/
			/* a syntax error in the 'dictionary' file -- skip line	*/
			/*------------------------------------------------------*/
			errmsg (lineno, "syntax error at start of line", 0);
			continue;	/* illegal line -- skip it! */
		}
		
		if (*p != '\t') {
			errmsg (lineno, "missing tab before identifier name", 0);
			continue;	/* illegal line -- skip it! */
		}
		/*}}}*/
		
		/*{{{  read 'word'*/
		/*-------------*/
		/* Read 'word' */
		/*-------------*/
		word = ++p;		/* skip over '\t' to start of 'word' */
		if ( ! (isalpha (*word) || *word == '_')) {
			errmsg (lineno, "illegal character where identifier name expected", 0);
			continue;	/* illegal line -- skip it! */
		}
		
		p++;
		while (isidchar (*p))
			p++;
		
		/*--------------------------------------------------------------*/
		/* Check that we're at the end-of-line;  skip the line if not.	*/
		/*--------------------------------------------------------------*/
		if (*p != '\n') {
			errmsg (lineno, "unexpected characters after name", word);
			continue;	/* illegal line -- skip it! */
		}
		/*}}}*/
		
		/*{{{  install (word, val) pair in 'symtab'*/
		/*--------------------------------------------------------------*/
		/* Install the (word, val) pair in the symbol table.		*/
		/*								*/
		/* N.B. We don't check for the possibility that the 'buf' is	*/
		/* too small for the whole identifier name (indicated by	*/
		/* (*p == '\0')).  We assume that BUFLEN (1000 chars) is big	*/
		/* enough!!							*/
		/*--------------------------------------------------------------*/
		*p = '\0';	/* terminate 'word' */
		
		if ( ! install_word (word, val))
			errmsg (lineno, "multiple entries for name", word);
		/*}}}*/
		/*}}}*/

	} while (fgets (buf, BUFLEN, dict_file) );

end_of_dict :

	if (n_dict_errs) {
		fprintf (stderr, "quitting because of error(s) in \"%s\"\n\n",
		                  dict_filename);
		exit (1);
	}
	if (next_val < max_val) {
		if (next_val)	/* one was specified! */
			fprintf (stderr, "Value specified for \'next_val\' (%d) must not be less than the maximum declared word value (%d).\n\'next_val\' is reset to %d !\n\n",
			                  next_val,  max_val,  max_val);
		next_val = max_val;
	}

	fclose (dict_file);
	return;
}
/*}}}*/
/*{{{  write_new_dict ()*/
write_new_dict ()
/*-------------*/
{
	int     i;
	FILE   *dict_file;
	Symbol *sp;

	if ( ! (dict_file = fopen (dict_filename, "w")) )
		fprintf (stderr, "*** Error: can\'t open \"%s\" for writing.\n\n",
		                 dict_filename);

	/*------------------------------------------------------*/	
	/* first, output the maximum translation value...	*/
	/*------------------------------------------------------*/	
	fprintf (dict_file, "%d\n\n", next_val);

	/*----------------------------------------------*/	
	/* then output all **untranslated** words...	*/
	/*----------------------------------------------*/	
	for (i = 0;  i < HASHSIZE;  i++)
		for (sp = symtab [i];  sp;  sp = sp->next)
			if ( ! sp->val)
				fprintf (dict_file, "%c\t%s\n",
				               LEAVE_NAME_CHAR, sp->word);


	/*----------------------*/	
	/* then a blank line...	*/
	/*----------------------*/	
	fputc ('\n', dict_file);

	/*------------------------------------------------------*/	
	/* then output all **translated** words with their	*/
	/* translation values.					*/
	/*------------------------------------------------------*/	
	for (i = 0;  i < HASHSIZE;  i++)
		for (sp = symtab [i];  sp;  sp = sp->next)
			if (sp->val)
				fprintf (dict_file, "%d\t%s\n",
				                sp->val, sp->word);

	fclose (dict_file);
}
/*}}}*/
/*}}}*/

/*{{{  scan_file (ip_file)*/
scan_file (ip_file)
/*---------------*/
	FILE  *ip_file;
/*----------------------------------------------------------------
We assume:
-- The input file has already been passed through the C preprocessor
(e.g. '\\'-terminated lines have been spliced, and no lines start '#').
-- No extended char consts or strings (i.e. preceded by 'L').
-- No trigraph sequences ("??x"), as this function may split such
sequences by '\n'.  They should be removed by the C preprocessor anyway.

N.B.:  There may be problems if a 'word' is a macro that expands to a
'bare operator', e.g.:
	#define  add  +
as this function removes any whitespace between 'word' and an
adjacent operator, which may be illegal in this circumstance.  E.g.:
	p++ add ++q	==>	p++add++q
which expands to:
	p+++++q
However, only C library macros remain, and *** I assume that none of
them expand to bare operators! ***
----------------------------------------------------------------*/
{
	register  int  c;
	
	c = getc (ip_file);
	while (c != EOF) {
		if (isalpha (c) || c == '_') {
			/*{{{  an identifier or keyword:  translate the former*/
			/*------------------------------------------------------*/
			/* An identifier or keyword.  Translate if the former.	*/
			/*------------------------------------------------------*/
			register  char  *p;
			
			p = buf;
			do {
				*p++ = c;
				c = getc (ip_file);
			} while (isidchar (c));
			
			*p = '\0';			/* terminate the 'word' in 'buf' */
			
			lookup_or_install_word (buf);
			/*}}}*/
		}
		else if (isdigit (c) || c == '.') {
			if (c == '.') {
				/*{{{  check whether member selection op or number*/
				/*--------------------------------------------------------------*/
				/* Check whether 'c' is a member selection operator (i.e. a '.'	*/
				/* not followed by a digit).  If so, treat it like an operator.	*/
				/*--------------------------------------------------------------*/
				c = getc (ip_file);
				if (isdigit (c)) {
					/*-----------------*/
					/* 'c' is a number */
					/*-----------------*/
					ungetc (c, ip_file);	/* restore state so the number	*/
					c = '.';		/* is handled as normal.	*/
				}
				else
					/*---------------------------------------*/
					/* 'c' *is* a member selection operator! */
					/*---------------------------------------*/
					continue;	/* do next iter of outer 'while' loop */
				/*}}}*/
			}
			/*{{{  scan numeric constant*/
			/*----------------------------------------------------------------------*/
			/* A numeric constant!  It should have the following syntax:		*/
			/*									*/
			/* integer:	[0 [x|X]] digits [u|U] [l|L]				*/
			/*									*/
			/* floating:	[digits] ['.'] [digits] [(e|E) [+|-] digits] [f|F|l|L]	*/
			/*									*/
			/* where 'digits' means one or more digits, and for a floating constant:*/
			/*	-- at least one of the '.' and [(e|E)...] must be present	*/
			/*	-- at least one of the first two [digits] must be present	*/
			/*									*/
			/*	The following copies out all valid numerical constants but	*/
			/* doesn't check syntax  (e.g. it doesn't check either of the above	*/
			/* rules, or that "0x" precedes an integer rather than a floating const,*/
			/* or that an 'f' follows a floating rather than an integer const).	*/
			/*----------------------------------------------------------------------*/
			/*------------------------------------------------------*/
			/* optional integer part:    [0 [x|X]] digit  digit...	*/
			/*------------------------------------------------------*/
			if (c == '0') {
				c = getc (ip_file);
				if (c == 'x' || c == 'X')
					do  c = getc (ip_file);  while (isxdigit (c));
			}
			while (isdigit (c))  c = getc (ip_file);
			
			/*----------------------------------------------*/
			/* optional fraction part:    '.'  digit...	*/
			/*----------------------------------------------*/
			if (c == '.')
				do  c = getc (ip_file);  while (isdigit (c));
			
			/*------------------------------------------------------*/
			/* optional exponent:    (e|E) [+|-] digit digit...	*/
			/*------------------------------------------------------*/
			if (c == 'e' || c == 'E') {
				c = getc (ip_file);
				if (c == '+' || c == '-')  c = getc (ip_file);
				while (isdigit (c))  c = getc (ip_file);
			}
			
			/*--------------------------------------------------------------*/
			/* optional suffix letter(s)  (for an integer:  u and/or l;	*/
			/* for a floating constant:  one of f or l;  either case in	*/
			/* either case).						*/
			/*--------------------------------------------------------------*/
			if (c == 'f' || c == 'F' || c == 'l' || c == 'L' || c == 'u' || c == 'U') {
				c = getc (ip_file);
				if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
					c = getc (ip_file);
			}
			/*}}}*/
		}
		else if (c == '\'' || c == '\"') {
			/*{{{  scan a char const or string*/
			/*---------------------------------*/
			/* A character constant or string. */
			/*---------------------------------*/
			int  delim = c;
			
			while ((c = getc (ip_file)) != delim) {
				switch (c) {
				case  EOF :
					return;
			
				case  '\\' :
					/*---------------------------------------*/
					/* copy this & next char unconditionally */
					/*---------------------------------------*/
					c = getc (ip_file);
					if (c == EOF)
						return;
					break;
				}
			}
			c = getc (ip_file);
			
			/*}}}*/
		}
		else if (c == PROTECT_LINE_CHAR) {
			/*{{{  'c' 'protects' the line from translation; scan it*/
			/*--------------------------------------------------------------*/
			/* We assume that this char (normally '@') starts a line that	*/
			/* is to be 'protected' (i.e. output unchanged).  (E.g. the	*/
			/* filter 'proc-incl' introduces this char at the start of	*/
			/* '#include' lines that are to be protected both from 'cpp'	*/
			/* and from 'ctran').  Therefore, scan this whole line, up to	*/
			/* and including the '\n'.					*/
			/*								*/
			/* N.B. *** The validity of this relies on the assumption that	*/
			/* char '@' can't occur in a legal C program except in strings	*/
			/* and character constants.  I hope that's correct!! ***	*/
			/*--------------------------------------------------------------*/
			while ((c = getc (ip_file)) != '\n'  &&  c != EOF)
				;
			
			if (c != EOF)
				c = getc (ip_file);		/* get 1st char of next line */
			/*}}}*/
		}
		else
			c = getc (ip_file);
	}
}

/*}}}*/
/*{{{  translate_file (ip_file, op_file)*/
translate_file (ip_file, op_file)
/*-----------------------------*/
	FILE  *ip_file,  *op_file;
/*----------------------------------------------------------------
We assume:
-- The input file has already been passed through the C preprocessor
(e.g. '\\'-terminated lines have been spliced, and no lines start '#').
-- No extended char consts or strings (i.e. preceded by 'L').
-- No trigraph sequences ("??x"), as this function may split such
sequences by '\n'.  They should be removed by the C preprocessor anyway.

N.B.:  There may be problems if a 'word' is a macro that expands to a
'bare operator', e.g.:
	#define  add  +
as this function removes any whitespace between 'word' and an
adjacent operator, which may be illegal in this circumstance.  E.g.:
	p++ add ++q	==>	p++add++q
which expands to:
	p+++++q
However, only C library macros remain, and *** I assume that none of
them expand to bare operators! ***
----------------------------------------------------------------*/
{
	register  int  c;
	int  last_token_type,  op_linelen;
	
	last_token_type = op_linelen = 0;
	c = getc (ip_file);
	while (c != EOF) {
		if (op_linelen >= min_op_linelen) {
			/*{{{  write '\n', unset TK_SPACE & skip whitespace in i/p*/
			/*--------------------------------------------------------------*/
			/* Output a '\n', provided it doesn't split a multi-character	*/
			/* operator, i.e. one of:					*/
			/*								*/
			/*	++,  --,  ->,  >>,  <<,  &&,  ||,  ...,			*/
			/*	{<, >, =, !, +, -, *, /, %, <<, >>, &, ^, |}=		*/
			/*								*/
			/* N.B. The following test for multi-character operators isn't	*/
			/* very rigorous!  It prevents newlines being inserted in some	*/
			/* places where they're OK (e.g. after the '&' in "&++p").	*/
			/* However, we forgo exactness for the sake of simplicity.	*/
			/*--------------------------------------------------------------*/
			bool  multi_char_oper = (last_token_type & TK_OPER)  &&
			                        (c == '+' || c == '-' || c == '>' || c == '<' ||
			                         c == '&' || c == '|' || c == '.' || c == '=');
			
			if ( ! multi_char_oper) {
				putc ('\n', op_file);
			
				while (c == ' ' || c == '\t' || c == '\n')
					c = getc (ip_file);
			
				if (c == EOF)
					return;
			
				last_token_type  &=  ~TK_SPACE;		/* unset TK_SPACE, if set */
				op_linelen = 0;
			}
			/*}}}*/
		}
		if (isalpha (c) || c == '_') {
			/*{{{  an identifier or keyword:  translate the former*/
			/*------------------------------------------------------*/
			/* An identifier or keyword.  Translate if the former.	*/
			/*------------------------------------------------------*/
			register  char  *p;
			int  val;
			
			
			if (last_token_type == (TK_WORD_OR_CONST | TK_SPACE))
				writechar (' ');
				
			last_token_type = TK_WORD_OR_CONST;
			
			p = buf;
			do {
				*p++ = c;
				c = getc (ip_file);
			} while (isidchar (c));
			
			*p = '\0';			/* terminate the 'word' in 'buf' */
			
			if (val = wordval (buf)) {
				/*------------------------------*/
				/* 'translate' the identifier.	*/
				/*------------------------------*/
				fprintf (op_file, "%s%d", id_prefix, val);
				op_linelen += id_prefix_len;	/* for the prefix of the o/p word */
				do
					op_linelen++;	/* count digits */
				while (val /= 10);
			}
			else {
				/*------------------------------------------------------*/
				/* a keyword or identifier that mustn't be changed	*/
				/* (e.g. one from a standard library);  *don't*		*/
				/* 'translate' it!					*/
				/*------------------------------------------------------*/
				fputs (buf, op_file);
				op_linelen += strlen (buf);
			}
			/*}}}*/
		}
		else if (isdigit (c) || c == '.') {
			if (c == '.') {
				/*{{{  process it if it's a member selection oper*/
				/*--------------------------------------------------------------*/
				/* Check whether 'c' is a member selection operator (i.e. a '.'	*/
				/* not followed by a digit), or part of an ANSI-C ellipsis	*/
				/* ("...").  If so, treat it like an operator.			*/
				/*--------------------------------------------------------------*/
				c = getc (ip_file);
				if (isdigit (c)) {
					/*-----------------*/
					/* 'c' is a number */
					/*-----------------*/
					ungetc (c, ip_file);	/* restore state so the number	*/
					c = '.';		/* is handled as normal.	*/
				}
				else {
					/*------------------------------------------------------*/
					/* 'c' *is* a member selection operator or part of an	*/
					/* ellipsis!						*/
					/*------------------------------------------------------*/
					if (last_token_type == (TK_OPER | TK_SPACE))
						writechar (' ');
					last_token_type = TK_OPER;
					writechar ('.');
					continue;	/* do next iter of outer 'while' loop */
				}
				/*}}}*/
			}
			/*{{{  a numeric constant:  copy out*/
			/*----------------------------------------------------------------------
			A numeric constant!  It should have the following syntax:
			
			integer:	[0 [x|X]] digits [u|U] [l|L]
			
			floating:	[digits] ['.'] [digits] [(e|E) [+|-] digits] [f|F|l|L]
			
			where 'digits' means one or more digits (including 'a'-f' & 'A'-'F'
			for a number starting "0x"), and for a floating constant:
				-- at least one of the '.' and [(e|E)...] must be present
				-- at least one of the first two [digits] must be present
			
			The following copies out all valid numerical constants but doesn't
			check syntax  (e.g. it doesn't check either of the above rules,
			or that "0x" precedes an integer rather than a floating const,
			or that an 'f' follows a floating rather than an integer const).
			
			It assumes that a numeric constant can't be followed by:
			-- another numeric constant without intervening whitespace
				(e.g. "0x100.2E-04" will be parsed as a single constant);
			-- a word without intervening whitespace (e.g. for "0x100else", the
				portion "0x100el" will be parsed as a numeric constant --
				with 'e' an exponent letter & 'l' a suffix!).
			----------------------------------------------------------------------*/
			if (last_token_type == (TK_WORD_OR_CONST | TK_SPACE))
				writechar (' ');
			last_token_type = TK_WORD_OR_CONST;
			
			
			/*{{{  optional integer part:    [0 [x|X]] digit  digit...*/
			/*------------------------------------------------------*/
			/* optional integer part:    [0 [x|X]] digit  digit...	*/
			/*------------------------------------------------------*/
			if (c == '0') {
				write_and_get_next_char (c);
				if (c == 'x' || c == 'X')
					do  write_and_get_next_char (c);  while (isxdigit (c));
			}
			while (isdigit (c))  write_and_get_next_char (c);
			
			/*}}}*/
			
			/*{{{  optional fraction part:    '.'  digit...*/
			/*----------------------------------------------*/
			/* optional fraction part:    '.'  digit...	*/
			/*----------------------------------------------*/
			if (c == '.')
				do  write_and_get_next_char (c);  while (isdigit (c));
			
			/*}}}*/
			
			/*{{{  optional exponent:    (e|E) [+|-] digit digit...*/
			/*------------------------------------------------------*/
			/* optional exponent:    (e|E) [+|-] digit digit...	*/
			/*------------------------------------------------------*/
			if (c == 'e' || c == 'E') {
				write_and_get_next_char (c);
				if (c == '+' || c == '-')  write_and_get_next_char (c);
				while (isdigit (c))  write_and_get_next_char (c);
			}
			
			/*}}}*/
			
			/*{{{  optional suffix letter(s)*/
			/*--------------------------------------------------------------*/
			/* optional suffix letter(s)  (for an integer:  u and/or l;	*/
			/* for a floating constant:  one of f or l;  either case in	*/
			/* either case).						*/
			/*--------------------------------------------------------------*/
			if (c == 'f' || c == 'F' || c == 'l' || c == 'L' || c == 'u' || c == 'U') {
				write_and_get_next_char (c);
				if (c == 'l' || c == 'L' || c == 'u' || c == 'U')
					write_and_get_next_char (c);
			}
			/*}}}*/
			/*}}}*/
		}
		else if (c == '\'' || c == '\"') {
			/*{{{  a char const or string:  copy out*/
			/*--------------------------------------------------------------*/
			/* A character constant or string.  Copy it straight to output.	*/
			/*--------------------------------------------------------------*/
			int  delim = c;
			
			if (last_token_type == (TK_WORD_OR_CONST | TK_SPACE))
				writechar (' ');
			last_token_type = TK_WORD_OR_CONST;
			
			writechar (delim);
			while ((c = getc (ip_file)) != delim) {
				switch (c) {
				case  EOF :
					return;
			
				case  '\\' :
					/*---------------------------------------*/
					/* copy this & next char unconditionally */
					/*---------------------------------------*/
					write_and_get_next_char (c);
					if (c == EOF)
						return;
					writechar (c);
					break;
			
				default :
					writechar (c);
					break;
				}
			}
			writechar (delim);
			c = getc (ip_file);
			/*}}}*/
		}
		else if (c == ' ' || c == '\t' || c == '\n') {
			/*{{{  whitespace:  output a single ' ' if necessary*/
			/*--------------------------------------------------------------*/
			/* One or more whitespace chars (' ' or '\t').			*/
			/*	Scan to the next non-white char to determine whether	*/
			/* it's actually necessary to output some whitespace.  If it is,*/
			/* output a single ' ', otherwise output nothing.  (Also 'unget'*/
			/* the next non-white char so that it's processed separately.)	*/
			/*--------------------------------------------------------------*/
			last_token_type |= TK_SPACE;
			
			do
				c = getc (ip_file);
			while (c == ' ' || c == '\t' || c == '\n');
			/*}}}*/
		}
		else if (c == PROTECT_LINE_CHAR) {
			/*{{{  start a new line, and o/p rest of line unchanged*/
			/*--------------------------------------------------------------*/
			/* We assume that this char (normally '@') starts a line that	*/
			/* is to be 'protected' (i.e. output unchanged).  (E.g. the	*/
			/* filter 'proc-incl' introduces this char at the start of	*/
			/* '#include' lines that are to be protected both from 'cpp'	*/
			/* and from 'ctran').  Therefore we skip this character, start	*/
			/* a new line, and then copy out the rest of the line unchanged,*/
			/* up to and including the '\n'.				*/
			/*								*/
			/* N.B. *** The validity of this relies on the assumption that	*/
			/* char '@' can't occur in a legal C program except in strings	*/
			/* and character constants.  I hope that's correct!! ***	*/
			/*--------------------------------------------------------------*/
			if (op_linelen)
				putc ('\n', op_file);
			
			while ((c = getc (ip_file)) != '\n'  &&  c != EOF)
				putc (c, op_file);
			
			putc ('\n', op_file);
			
			if (c != EOF)
				c = getc (ip_file);		/* get 1st char of next line */
			
			last_token_type = op_linelen = 0;
			/*}}}*/
		}
		else {
			/*{{{  output 'c' & perhaps skip subsequent whitespace*/
			/*--------------------------------------------------------------*/
			/* 'c' is punctutation or (part of?) an operator.  Output it	*/
			/* and perhaps skip subsequent whitespace.			*/
			/*--------------------------------------------------------------*/
			
			switch (c) {
			case  '('  :  case  ')'  :  case  '{'  :  case  '}'  :  case  '['  :
			case  ']'  :  case  ';'  :  case  '\?' :  case  ':'  :  case  ','  :
			
				last_token_type = TK_PUNCT;
				break;
			
			default :
				if (last_token_type == (TK_OPER | TK_SPACE))
					writechar (' ');
				last_token_type = TK_OPER;
				break;
			}
			
			writechar (c);
			c = getc (ip_file);
			/*}}}*/
		}
	}
	if (op_linelen)  putc ('\n', op_file);
}
/*}}}*/

/*{{{  << 'symtab' handling functions >>*/
/*{{{  unsigned  hash (char *word)*/
unsigned  hash (word)
/*-----------------*/
	register char  *word;
/*--------------------------------------------*/
/*    Compute hash value for string 'word'    */
/*--------------------------------------------*/
{
	register unsigned  hashval;

	for (hashval = 0;   *word;   word++)
		hashval = *word + 31 * hashval;

	return  hashval % HASHSIZE;
}
/*}}}*/

/*{{{  bool  install_word (word, val)*/
bool  install_word (word, val)
/*--------------------------*/
	char *word;
	int   val;	/* >= 0 */
/*--------------------------------------------------------------*/
/*	Called by 'read_old_dict ()' to install a (word, val)	*/
/* pair, as specified in the 'ID dictionary', into 'symtab'.	*/
/*	'val' is determined by the 1st field of the entry in	*/
/* the dictionary file, as follows:				*/
/*								*/
/* 1st field	val		meaning				*/
/* ---------	---		-------				*/
/*     -	0		the word *isn't* translated;	*/
/* number (>0)	this number	the word is translated to "lxxx"*/
/*				where "xxx" is the number.	*/
/*								*/
/*	An errmsg is given if the 'word' is already in the	*/
/* 'symtab' (as this means that the dictionary specifies a	*/
/* value for 'word' more than once, which is an error).		*/
/*--------------------------------------------------------------*/
{
	unsigned  hashval;
	register Symbol  *sp;

	hashval = hash (word);
	
	/*----------------------*/
	/* Search symbol table.	*/
	/*----------------------*/
	for (sp = symtab [hashval];  sp;  sp = sp->next)
		if (! strcmp (sp->word, word))
			return  FALSE;		/* multiple entry for 'word' */
	
	/*------------------------------------------------------*/
	/* make a new symbol table entry & initialise its 'val'	*/
	/*------------------------------------------------------*/
	sp = (Symbol *) malloc (sizeof (Symbol));
	sp->val = val;
	
	/*-------------------------------------------------------*/
	/* make a permanent copy of 'word' and attach it to 'sp' */
	/*-------------------------------------------------------*/
	sp->word = (char *) malloc (strlen (word) + 1);	/* +1 for final '\0' */
	strcpy (sp->word, word);

	/*----------------------------------------------*/
	/*  link symtab entry into the hash table	*/
	/*----------------------------------------------*/
	sp->next = symtab [hashval];  /* add to front of list */
	symtab [hashval] = sp;

	return  TRUE;		/* installed ok! */
}
/*}}}*/
/*{{{  lookup_or_install_word (word)*/
lookup_or_install_word (word)
/*-------------------------*/
	char  *word;
/*--------------------------------------------------------------*/
/*	Called by 'scan_file ()' whenever it reads any word.	*/
/*	If the 'word' isn't a keyword and isn't in the 'symtab',*/
/* it's installed in the latter.  It's given a value of 0	*/
/* (meaning no translation) if it's found in the 'std_nametab',	*/
/* or -1 otherwise (meaning it's to be translated, and will be	*/
/* given a proper value later, by 'assign_wordvals ()').	*/
/*--------------------------------------------------------------*/
{
	register Symbol  *sp,  *std_sp;
	int  val;
	unsigned  hashval = hash (word);

	/*------------------------------------------------------*/
	/* Search for 'word' in the keyword table, 'keytab'.	*/
	/*------------------------------------------------------*/
	for (sp = keytab [hashval];  sp;  sp = sp->next)
		if (! strcmp (sp->word, word))
			return;		/* found! */

	/*------------------------------------------------------*/
	/* Now search for it in the symbol table, 'symtab'.	*/
	/*------------------------------------------------------*/
	for (sp = symtab [hashval];  sp;  sp = sp->next)
		if (! strcmp (sp->word, word))
			return;		/* found! */

	/*------------------------------------------------------*/
	/* Not found in either table  (i.e. not a keyword and	*/
	/* no previous appearance in the input).  Install it in	*/
	/* 'symtab' with a value of 0 or 1 (as described above).*/
	/*------------------------------------------------------*/
	val = -1;
	for (std_sp = std_nametab [hashval];  std_sp;  std_sp = std_sp->next)
		if (! strcmp (std_sp->word, word)) {	/* found! */
			val = 0;
			break;		/* quit loop */
		}
	
	sp = (Symbol *) malloc (sizeof (Symbol));
	sp->val = val;
	if ( ! val)
		sp->word = std_sp->word;	/* use name in 'std_nametab' */
	else {
		/*-------------------------------------------------------*/
		/* make a permanent copy of 'word' and attach it to 'sp' */
		/*-------------------------------------------------------*/
		sp->word = (char *) malloc (strlen (word) + 1);	/* +1 for '\0'*/
		strcpy (sp->word, word);
	}
	/*----------------------------------------------*/
	/*  link symtab entry into the hash table	*/
	/*----------------------------------------------*/
	sp->next = symtab [hashval];  /* add to front of list */
	symtab [hashval] = sp;

	return;
}

/*}}}*/
/*{{{  assign_wordvals ()*/
assign_wordvals ()
/*--------------*/
/*--------------------------------------------------------------*/
/*	Called after 'scan_file ()' has performed a first scan	*/
/* of the input file(s) and installed every identifier in the	*/
/* 'symtab',  to assign a unique 'value' to every word therein	*/
/* whose 'val' field is -1.					*/
/*--------------------------------------------------------------*/
{
	register  int  i;
	register  Symbol *sp;

	for (i = 0;  i < HASHSIZE;  i++)
		for (sp = symtab [i];  sp;  sp = sp->next)
			if (sp->val == -1)
				sp->val = ++next_val;
}
/*}}}*/
/*{{{  int  wordval (word)*/
int  wordval (word)
/*---------------*/
	char  *word;
/*--------------------------------------------------------------*/
/*	Called by 'translate_file ()' to return the 'value' of	*/
/* a word.							*/
/*	A return value of 0 means that 'word' isn't translated.	*/
/* Otherwise the return value is >0, which means that 'word' is	*/
/* translated to, e.g., "lxxx", where "xxx" is the return value.*/
/*--------------------------------------------------------------*/
{
	register Symbol  *sp;
	unsigned  hashval = hash (word);

	/*------------------------------------------------------*/
	/* Search for 'word' in the keyword table, 'keytab'.	*/
	/*------------------------------------------------------*/
	for (sp = keytab [hashval];  sp;  sp = sp->next)
		if (! strcmp (sp->word, word))
			return  0;	/* keywords aren't translated */

	/*------------------------------------------------------*/
	/* Now search for it in the symbol table, 'symtab'.	*/
	/* *** We assume it's there, as it should have been	*/
	/* installed when encountered by 'scan_file ()' in the	*/
	/* first pass over the file. ***			*/
	/*------------------------------------------------------*/
	for (sp = symtab [hashval];  sp;  sp = sp->next)
		if (! strcmp (sp->word, word))
			return  sp->val;

}
/*}}}*/

/*}}}*/

/*{{{  errmsg (lineno, msg, word) -- ok*/
errmsg (lineno, msg, word)
/*----------------------*/
	int  lineno;		/* number of illegal line */
	char  *msg;		/* the error message */
	char  *word;		/* the word being installed; may be NULL! */
/*----------------------------------------------------------------
	Output an error message about an illegal line in the
'ID dictionary' file, like the following:

"Id.dict", line 10:  missing tab before identifier name; line skipped:
	"*   var"

We assume that the illegal line is in 'buf []'.
----------------------------------------------------------------*/
{
	n_dict_errs++;
	fprintf (stderr, "\"%s\" line %d:  %s", dict_filename, lineno, msg);

	if (word) fprintf (stderr, " \'%s\'", word);

	fprintf (stderr, "; line skipped:\n\"%s\"\n\n", buf);
}
/*}}}*/

/*{{{  bool  file_exists (filename)*/
bool  file_exists (filename)
/*------------------------*/
	char  *filename;
/*--------------------------------------------------------------*/
/*	Returns 1 if 'filename' exists and is accessible (see	*/
/* below for the interpretation of this) and 0 otherwise.	*/
/*								*/
/* N.B. In the Unix implementation, this function returns 1	*/
/* only if the file exists *and* the directories leading to it	*/
/* can be searched (i.e. have 'x' permission).			*/
/*	Threefore, if any directory leading to the file *cannot**/
/* be searched the function returns 0, even if the file exists.	*/
/* However, in that case the given 'filename' cannot be opened,	*/
/* and therefore cannot be overwritten.				*/
/*	In any other implementation of this function, the	*/
/* important property that must be preserved is that, if the	*/
/* function returns 0, it must be guaranteed that an attempt to	*/
/* open 'filename' will not *delete* or *over-write* a		*/
/* pre-existing file with name 'filename'!			*/
/*								*/
/* N.B. System-dependent features:				*/
/* -------------------------------				*/
/* -- Uses the Unix system call 'access()'.			*/
/*--------------------------------------------------------------*/
{
	return  (access (filename, 0) == 0);
}
/*}}}*/
/*}}}*/
syntax highlighted by Code2HTML, v. 0.9.1