ports//devel/cmunge/work/cmunger/cmunge

#!/bin/csh -f

#-----------------------------------------------------------------------
# cmunge: a tool for encrypting and compacting C source code.
# 
# Usage:  cmunge [options] files...
#
# Options:
#   -I include-dir	-- Search directory `include-dir' for #include files.
#   -d output-dir	-- Write the output file(s) to directory
#			     `output-dir'.  (Default: current directory.)
#   -f output-file-rootname
#			-- Output files have names starting with the
#			     string `output-file-rootname' followed by ".c"
#			     if there is one output file, or "1.c", "2.c",
#			     etc, if there is more than one.  E.g. `-f f'
# 			     causes the output files to be named "f.c"
# 			     or "f1.c", "f2.c", etc.  (Default: f).
#   -l min-output-linelen
#			-- Make lines of the output file(s) at least
#			     `min-output-linelen' characters long.
#			     (Default: 90.)
#   -p prefix-letter	-- Give the translated C identifiers names
#			     consisting of the letter `prefix-letter'
#			     followed by a number.  E.g. `-p l' causes
#			     the translated identifiers to be called
#			     l1, l2, etc.  (Default: l).
#   -v version-string	-- Insert string `version-string' as a C comment
#			     in the first line of each output file.
#			     (Default: no comment is written.)
#   -<anything else>	-- Other arguments are passed directly to the
#			     C pre-processor, `cpp'.  E.g. argument
#                            `-Dname=def' is passed directly to `cpp'.
#   files...		-- The input C source file(s) to be `munged'.
#
# Spaces are optional between option letters and their accompanying arguments.
#
# 'cmunge' calls 2 programs, 'proc-incl' and 'ctran', whose arguments are:
#
# proc-incl  [-I include-dir]... [-D include-dict-file] [file]
# ctran  [-D id-dict-file] [-f output-file-rootname]
#        [-l min-output-linelen] [-p prefix-letter] [-v version-string]
#        file...
#
# with an intervening call to 'cpp', the C-preprocessor.
#
#
# Author:  John Merlin, Dept of Electronics & Comp Sci, Univ of Southampton.
# Developed:  Dec 1993.
# Released on WWW:  Aug 1998.
# This software can be downloaded from URL:
#	http://www.vcpc.univie.ac.at/~jhm/cmunge/.
# Present contact details:
#	John Merlin, VCPC, University of Vienna.
#	Email:  jhm@vcpc.univie.ac.at
#
#-----------------------------------------------------------------------
# ** STILL TO DO:
# -- get script to check exit status of 'proc-incl' & 'ctran', and 
#	exit & tidy-up if != 0.
# -- in o/p info (at end) only o/p stuff about user & system incl files
# if there are any; otherwise say, e.g. "No user-defined include files.".
# -- try to limit number of vars -- unset as soon as poss.
# -- abbreviate the info!
# -- get it to work with dir names, rootnames, etc, with blanks,
#-----------------------------------------------------------------------

#===============================================================#
# Substitute the absolute pathname of `cmunge's `bin' directory	#
# (containing the programs `ctran' and `proc-incl')  here...	#
#								#
# N.B.  If things haven't been moved around, this is		#
# ${cmunger}/bin, where ${cmunger} is the absolute pathname	#
# of the `cmunger' directory.					#
#===============================================================#
set  cmunger_bin_dir = /usr/local/bin

#===============================================================#
# Substitute the pathname of the C pre-processor, `cpp', here...#
#===============================================================#
set  cpp = /usr/bin/cpp


set  proc_incl = ${cmunger_bin_dir}/proc-incl
set  ctran     = ${cmunger_bin_dir}/ctran

set  nonomatch	# ...so can use 'tmp_*' to find if there are "tmp_" files

#---------------#
# defaults...	#
#---------------#
set  outdir  = 
set  outroot = f

#----------------------------------------#
# process command options (starting '-') #
#----------------------------------------#
set proc_incl_args = ()
set cpp_args = ()
set ctran_args = ()

if (! $#argv)  goto  no_input_files
while ("$argv[1]" =~ -*)
	switch ($argv[1])
	case  -I*:
		#------------------------------------------------#
		# -I include-dir	-- passed to 'proc-incl' #
		#------------------------------------------------#
		set proc_incl_args = ($proc_incl_args  $argv[1])
		if ("$argv[1]" == "-I") then
			shift
			if (! $#argv) then
				echo "cmunge: expected directory name after '-I'"
				goto usage
			endif
			set proc_incl_args = ($proc_incl_args  "$argv[1]")
		endif
		breaksw

	case  -d*:
		#-------------------------------------------------------#
		# -d output-dir	-- used here  ($outdir), and appended	#
		# to the front of '$outroot' (after all the options	#
		# have been read).					#
		#-------------------------------------------------------#
		if ("$argv[1]" == "-d") then
			shift
			if (! $#argv) then
				echo "cmunge: expected directory name after '-d'"
				goto usage
			endif
			set outdir = "$argv[1]"
		else
			set outdir = `sed 's/^-d//' <<ARG\
$argv[1]\
ARG`
		endif
		if ( ! (-e "$outdir")) then
			echo "cmunge: output directory '$outdir' does not exist"
			exit (1)
		endif
		set  outdir = "${outdir}/"
		breaksw

	case  -f*:
		#-------------------------------------------------------#
		# -f output-file-rootname  -- used here ($outroot) and	#
		# passed to 'ctran'.					#
		# N.B. It's added to 'ctran_args' only after all the	#
		# options have been read, in case '$outdir', which must	#
		# be appended to '$outroot', is also specified.		#
		#-------------------------------------------------------#
		if ("$argv[1]" == "-f") then
			shift
			if (! $#argv) then
				echo "cmunge: expected output file rootname after '-f'"
				goto usage
			endif
			set outroot = "$argv[1]"
		else
			set outroot = `sed 's/^-f//' <<ARG\
$argv[1]\
ARG`
		endif
		breaksw

	case  -l*:
	case  -p*:
		#-----------------------------------------------#
		# -l min-output-linelen	-- passed to 'ctran'	#
		# -p prefix-letter	--    "    "    "	#
		#-----------------------------------------------#
		set arg = $argv[1]
		set ctran_args = ($ctran_args  $arg)
		if ("$arg" =~ -?) then
			shift
			if (! $#argv) then
				switch ("$arg")
				case  "-l" :
					echo "cmunge: expected a number after '-l'"
					breaksw
				default :
					echo "cmunge: expected a letter after '-p'"
					breaksw
				endsw
				goto usage
			endif
			set ctran_args = ($ctran_args  $argv[1])
		endif
		breaksw

	case  -v*:
		#-----------------------------------------------#
		# -v version-string	-- passed to 'ctran'	#
		#-----------------------------------------------#
		set arg = $argv[1]
		if ("$arg" =~ -v) then
			shift
			if (! $#argv) then
				echo "cmunge: expected a string after '-v'"
				goto usage
			endif
			set version_str = "$argv[1]"
		else
			set ctran_args = ($ctran_args  $arg)
		endif
		breaksw

	default:
		#------------------------------------------------#
		# Pass option directly as an argument to 'cpp'.  #
		# (E.g. this could be something like '-DADAPT'). #
		#------------------------------------------------#
		set cpp_args = ($cpp_args  $argv[1])
		breaksw
	endsw
	shift
	if (! $#argv)  goto  no_input_files
end

if ($#argv > 1) then
	set  munged_files = "${outroot}1.c - ${outroot}${#argv}.c"
else
	set  munged_files = "${outroot}.c"
endif
set outroot = "${outdir}$outroot"
set ctran_args = ($ctran_args -f "$outroot")	# always pass in, in case
						# '$outdir' appended.

#-----------------------------------------------------------------#
# Check that there are no 'tmp_' files that might be overwritten! #
#-----------------------------------------------------------------#
set  tmp_files = "${outdir}"tmp_*
if ( $#tmp_files > 1 || "$tmp_files" != "${outdir}tmp_*" ) then
	echo -n  "cmunge:  please delete or rename the files"
	if ("$outdir" != "") echo -n  "in '${outdir}'"
	echo  "whose names start 'tmp_'"
	exit (1)
endif
unset  tmp_files

#---------------------------------------------------------------#
# delete other special files that may remain from a previous	#
# run of 'cmunge'.						#
#---------------------------------------------------------------#
set  file_dict = "${outdir}File.dict"
set  id2val = "${outdir}Id2val"
set  val2id = "${outdir}Val2id"

if (-e "$file_dict") then
	echo  rm  $file_dict;  rm  "$file_dict"
endif
if (-e "$id2val") then
	echo  rm  $id2val;  rm  "$id2val"
endif
if (-e "$val2id") then
	echo  rm  $val2id;  rm  "$val2id"
endif

set  tmp_file	= "${outdir}tmp_file"	# general purpose temp file!
set  incl_dict	= "${outdir}tmp_incl_dict"
if ("$outdir" != "") set  proc_incl_args = ($proc_incl_args -D "$incl_dict")


#-------------------------------------------------------------------
#	Pass the 'input files' through filter 'proc-incl', which:
# -- replaces each comment by ' ' and splices '\'-terminated lines;
# -- in:	#include  <...>
#		#include  token-sequence
#	 	#include  "filename"	-- 'filename' in "/usr/include"
# replaces "#include" by "@"  (to protect the 'include' file from
# replacement by 'cpp' later on);
# -- in:	#include "filename"	-- 'filename' *not* in "/usr/include"
# replaces 'filename' by a new temporary filename ("tmp_h..").  These
# files (assumed to be 'user-defined') will be 'expanded' by 'cpp' later.
# -- does some simple textual reduction (e.g. replacing each sequence
# of whitespace by a single ' ').
#
#	The output is written to temporary files called "tmp_c..".
#	File 'tmp_incl_dict' is created to store the replacement filenames
# for 'include' files (or rather their numeric suffixes, since they
# all start "tmp_h..").
#
# N.B. We use a 'while' rather than a 'foreach' loop in case any of
# the filenames in '$argv' contain embedded whitespace!
#-------------------------------------------------------------------
echo  ""
echo  "Processing '#include' directives in source files..."

set  tmp_c_files = ()
set  final_files = ()

set  num = 0
set  i = 1
while ($i <= $#argv)
	set c_file = "$argv[$i]";	@ i++
	@ num++
	set  out_file    = "${outdir}tmp_c$num"
	set  tmp_c_files = ($tmp_c_files  "$out_file")

	if ($#argv > 1) then
		cat  >> "$file_dict" <<FILE_ENTRY
${outroot}${num}.c	$c_file
FILE_ENTRY
		set  final_files = ($final_files  "${outroot}${num}.c")
	else
		cat  >> "$file_dict" <<FILE_ENTRY
${outroot}.c	$c_file
FILE_ENTRY
		set  final_files = ($final_files  "${outroot}.c")
	endif

	echo "	'$c_file'	-- output to '$out_file'"
	$proc_incl  $proc_incl_args   "$c_file" > "$out_file"
end

echo -n "Continue (y/n) ?: "
switch ($<)
case  n:
	exit (0);
endsw

#-------------------------------------------------------------------
#	Now file 'tmp_incl_dict' contains a list of user-defined
# files that are included by the C files.  These files are processed
# as above for the C files.
#	This may generate further new 'include' files, which are listed
# in 'tmp_incl_dict', so this procedure is iterated until no more new
# 'include' files are found.

# N.B. The following 'set  incl_files' command should be robust even
# for 'include' filenames containing whitespace!!  (Aren't I clever!!)
#-------------------------------------------------------------------
echo  ""
echo  "Processing '#include' directives in user-defined 'include' files..."

set  incl_files = "`grep '^u' $incl_dict | sed 's/^u	//'`"
set  num = 0
while ($#incl_files)
	#-------------------------------------------------------#
	# change 'u' -> 'p' in the records of '$incl_dict', as	#
	# the files therein are now being 'processed'...	#
	#-------------------------------------------------------#
	sed 's/^u/p/' "$incl_dict" > "$tmp_file";  mv "$tmp_file" "$incl_dict"

	#-------------------------------------------------------#
	# use 'while' rather than 'foreach' in case a filename	#
	# in 'incl_files' contain embedded whitespace!...	#
	#-------------------------------------------------------#
	set i = 1
	while ($i <= $#incl_files)
		set incl_file = "$incl_files[$i]";	@ i++
		@ num++
		set  out_file = "${outdir}tmp_h$num"

		echo "	'$incl_file'	-- output to '$out_file'"
		$proc_incl  $proc_incl_args   "$incl_file" > "$out_file"
	end

	set  incl_files = "`grep '^u' $incl_dict | sed 's/^u	//'`"

echo -n "Continue (y/n) ?: "
switch ($<)
case  n:
	exit (0);
endsw

end

#---------------------------------------------------------------#
# Pass input files through C preprocessor			#
#								#
# N.B. As before, we use a 'while' rather than a 'foreach' loop	#
# in case any of the filenames in '$tmp_c_files' contain	#
# embedded whitespace!						#
#---------------------------------------------------------------#
echo  ""
echo  "Passing 'tmp_c..' files through C preprocessor (which 'expands' above 'include' files):"

set  i = 1
while ($i <= $#tmp_c_files)
	set c_file = "$tmp_c_files[$i]";	@ i++
	if ($cpp_args == "") then
		echo "	$cpp -P  $c_file > $tmp_file;   mv  $tmp_file  $c_file"
		$cpp -P  "$c_file" > "$tmp_file";   mv  "$tmp_file"  "$c_file"
	else
		echo "	$cpp -P  $cpp_args  $c_file > $tmp_file;   mv  $tmp_file  $c_file"
		$cpp -P  "$cpp_args"  "$c_file" > "$tmp_file";   mv  "$tmp_file"  "$c_file"
	endif
end

echo -n "Continue (y/n) ?: "
switch ($<)
case  n:
	exit (0);
endsw

#-------------------------------#
# Munge C files (using 'ctran')	#
#-------------------------------#
set  id_dict = "${outdir}Id.dict"
if ("$outdir" != "") set  ctran_args = ($ctran_args -D "$id_dict")

echo ""
echo "'Munging' 'tmp_c..' files..."

if ($?version_str) then
	echo "	ctran"  $ctran_args -v \""$version_str"\" $tmp_c_files
	$ctran  $ctran_args -v "$version_str" $tmp_c_files
else
	echo "	ctran"  $ctran_args  $tmp_c_files
	$ctran  $ctran_args  $tmp_c_files
endif
echo -n "Continue (y/n) ?: "
switch ($<)
case  n:
	exit (0);
endsw

#-------------------------------------------------------#
# Make sorted dictionary files, 'Id2val' and 'Val2id'	#
#-------------------------------------------------------#
echo  ""
echo  "Munging complete."

echo  ""
echo  "Making sorted ID dictionaries '$id2val' and '$val2id'."
sed 1d  "$id_dict" | sed '/^$/d' | sort +1 -df -o "$id2val"
sed 1d  "$id_dict" | sed '/^$/d' | sort -n -o "$val2id"

#-------------------------------------------------#
# Write general information to file 'Cmunge.info' #
#-------------------------------------------------#
cat > ${outdir}Cmunge.readme <<INFO
'cmunge' information
====================
The following files are generated:
$munged_files
	-- The 'munged' C output.
File.dict
	-- A dictionary of new filenames versus original ones.
Id.dict	-- A list (in no particular order) of every identifier with
	   its translation value (a number) or '-' if not renamed.
		If this file exists when 'cmunge' is run it controls
	   the renaming of identifiers, so the translation can be
	   modified by editing this file.  E.g. to prevent an identifier
	   from being renamed, replace its translation value by '-',
	   and to force the renaming of an identifier that by default
	   isn't (e.g. 'printf') replace its '-' by a (unique!) number,
	   and then re-run 'cmunge'.
		The first line of the file contains a number, N say.
	   If the 'Id.dict' file is used in a subsequent 'cmunge' run,
	   then the starting value for translation of new identifiers
	   (i.e. ones not listed in 'Id.dict') is (N+1).  Therefore
	   N must be greater than or equal to the maximum listed
	   translation value.  On output from a 'cmunge' run, this
	   number is set to the maximum listed translation value.
Id2val	-- As above, listed in alphabetical order of the old names.
Val2id	-- As above, listed in order of translation value.


'Include file' information
==========================
The following 'include' files:

	Expanded include files:
	-----------------------
`sed 's/^p	\(.*\)/	"\1"/' $incl_dict | sort`

are expanded in the 'munged' C source code.

Check that all of these files are *user-defined*.  'Standard library'
or 'system' include files shouldn't be expanded, as this may cause
inconsistent renaming and/or make the resulting C code non-portable.
If any of the above *are* 'standard library' or 'system' files,
take the following actions:

-- To prevent the relevant files being expanded, either:
	-- change the filename delimiters of the relevant files
	     in '#include' statements from  "..."  to  <...>; or
	-- put an '@' character at the start of the relevant
	     '#include' lines, which will make 'cmunge' copy them
	     out unchanged  (as described in the final section).
-- Delete the file 'Id.dict' (generated by 'cmunge') and the
	'munged' output files.
-- Re-run 'cmunge'.

The following 'include' files:

	Unexpanded include files:
	-------------------------
`grep -h '^#include' $final_files | sed 's/^#include[ 	]*/	/' | sort -u`

*aren't* expanded in the 'munged' C source code, i.e. their '#include'
directives are preserved.

'cmunge' contains a dictionary of the 'standard library' identifiers
defined in Appendix B of Kernighan & Ritchie's 'ANSI C' -- basically,
those in the following files:

	Standard library include files:
	-------------------------------
	<assert.h>	<limits.h>	<stdarg.h>	<time.h>
	<ctype.h>	<math.h>	<stdio.h>
	<errno.h>	<setjmp.h>	<stdlib.h>
	<float.h>	<signal.h>	<string.h>

These identifiers aren't renamed in the 'munged' output and hence
remain consistent with the 'standard library'.  If all of the
above 'unexpanded include files' are in the list of 'standard library
include files', and no use is made of external functions from 'system'
libraries that are not part of the 'standard library', then all should
be OK, so skip the rest of this message.

By default, all other identifiers in the C source code are renamed
in the 'munged' output.  Therefore, if any of the 'unexpanded include
files' are *not* in the 'standard library' list, or any external
functions are used from a 'system' library that is not part of the
'standard library', 'cmunge' will rename the corresponding identifiers
in the source code, thus making them inconsistent with the names in
the 'system' files or libraries.  Two possible remedies are:

(i) If any of the 'unexpanded include files' are user-defined rather
than 'system' files, then:

-- Change their filename delimiters in the '#include' statements
	from  <...>  to  "...",  or remove the initial '@' from
	their '#include' lines  (if '@' was added to protect the
	lines from being 'munged' as indicated earlier).
-- Delete the 'munged' output files.
-- Re-run 'cmunge'.

This will cause those 'include' files to be expanded in the 'munged'
source code.

-- OR --

(ii) Prevent renaming of the identifiers concerned as follows:

-- Find the improperly renamed identifiers.  (Perhaps the easiest
	way to do this is to compile the 'munged' output to find
	which identifiers aren't declared or defined, using file
	'Val2id' to translate from new to old names).
-- In file 'Id.dict', find the entry for each identifier concerned,
	and replace its first field (a number) by '-'.
-- Delete the 'munged' output files.
-- Re-run 'cmunge'.

N.B. When 'cmunge' is re-run ignore this warning  (as names already
marked with '-' in 'Id.dict' aren't renamed).


How to protect lines of the original source code from being 'munged'
===================================================================
If is possible to protect any line of the source code  (including
blank lines, comments and pre-processor directives) from being
'munged' by prefixing an '@' character as the first character on
the line.  This will cause 'cmunge' to copy out the line unchanged,
apart from removing its initial '@' character.

One possible use of this is to preserve conditional compilation
directives in the 'munged' output, e.g. lines like:

#ifdef __STDC___
...
#else
...
#endif

If 'protected' lines contain identifiers that also appear in
'unprotected' lines (i.e. lines that will be 'munged'), then it is
necessary to prevent those identifiers from being renamed in the
'munged' lines.  This can be done by modifying their entries in the
file 'Id.dict' as described in (ii) above, and re-running 'cmunge'.
INFO

#---------------------------------------------------------------#
# Cleanup and exit!						#
# N.B. We know that there actually *are* some 'tmp_.." files	#
# (e.g. 'tmp_file', 'tmp_incl_dict' and some 'tmp_c' files).	#
# If there weren't, 'tmp_*' would just expand to itself and	#
# 'rm' would give an errmsg!					#
#---------------------------------------------------------------#
echo  ""
echo  "rm  ${outdir}tmp_*"
rm  ${outdir}tmp_*

echo  ""
echo "*****************************************************************************"
echo "*  SEE FILE '${outdir}Cmunge.readme' FOR IMPORTANT INFORMATION ABOUT THIS RUN"
echo "*****************************************************************************"

exit (0)

#-----------------#
# error messages  #
#-----------------#
no_input_files:
	echo 'cmunge: no input files specified'
	goto  usage

usage:
	echo ""
	echo "Usage:  cmunge [options] files..."
	echo ""
	echo "Options:"
	echo "  -I include-dir        -- Search directory 'include-dir' for #include files."
	echo "  -d output-dir         -- Write the output file(s) to directory"
	echo "                             'output-dir'.  (Default: current directory.)"
	echo "  -f output-file-rootname -- Output files have names starting with the"
	echo "                             string 'output-file-rootname' followed by '.c'"
	echo "                             if there is one output file, or '1.c', '2.c',"
	echo "                             etc, if there is more than one.  (Default: f)."
	echo "  -l min-output-linelen -- Make lines of the output file(s) at least"
	echo "                             'min-output-linelen' characters long."
	echo "                             (Default: 90.)"
	echo "  -p prefix-letter      -- Give the translated C identifiers names"
	echo "                             consisting of the letter 'prefix-letter'"
	echo "                             followed by a number.  (Default: l)."
	echo "  -v version-string     -- Insert string 'version-string' as a C comment"
	echo "                             in the first line of each output file."
	echo "                             (Default: no comment is written.)"
	echo "  -<anything else>      -- Other arguments are passed directly to the"
	echo "                             C pre-processor, 'cpp'."
	echo "  files...              -- The input C source file(s) to be 'munged'."
	echo ""
	echo "Spaces are optional between option letters and their accompanying arguments."
	echo ""

#	echo "usage:  cmunge [-I include-dir]... [-d output-dir] [-f output-file-rootname]"
#	echo "               [-l min-output-linelen] [-p prefix-letter] [-v version-string]"
#	echo "               [-<anything-else-is-passed-to-cpp>] files..."

	exit (1)
syntax highlighted by Code2HTML, v. 0.9.1