diff options
| author | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2013-11-19 12:43:49 +0100 | 
|---|---|---|
| committer | Bernhard Reutner-Fischer <rep.dot.nop@gmail.com> | 2013-11-19 12:43:49 +0100 | 
| commit | 2a021ae81c36f4281883a3195f7ce81504edf978 (patch) | |
| tree | b851240ca72301ebb42a85fed6e2bfd640f1e21a | |
| parent | 32ef09dd30bb2b8e1878afff755fc5b7000f24c7 (diff) | |
buildsys: update unifdef
sync up to 55501a61dd257e941d53f175350ee52fc6ff2866
(2.9 + refactor keyword and symbol matching)
Signed-off-by: Bernhard Reutner-Fischer <rep.dot.nop@gmail.com>
| -rw-r--r-- | Makefile.in | 8 | ||||
| -rwxr-xr-x | extra/scripts/install_headers.sh | 4 | ||||
| -rw-r--r-- | extra/scripts/unifdef.c | 1235 | ||||
| -rw-r--r-- | extra/scripts/unifdef.h | 56 | ||||
| -rw-r--r-- | extra/scripts/version.h | 2 | 
5 files changed, 934 insertions, 371 deletions
| diff --git a/Makefile.in b/Makefile.in index 500b8f37d..946e14e9f 100644 --- a/Makefile.in +++ b/Makefile.in @@ -67,6 +67,10 @@ MAKEFLAGS += -L  $(top_builddir)include/config/linuxthreads/old.h $(top_builddir)include/config/linuxthreads/new.h:  	@true +$(top_builddir)include/generated/unifdef_config.h: $(top_builddir)include/bits/uClibc_config.h| $(top_builddir)include/generated +	@$(disp_gen) +	$(Q)$(SED) -e '1,3d' $^ > $@ +  # For the moment, we have to keep re-running this target  # because the fix includes scripts rely on pre-processers  # in order to generate the headers correctly :(.  That @@ -331,7 +335,9 @@ $(PREFIX)$(RUNTIME_PREFIX)$(MULTILIB_DIR):  	$(do_mkdir)  endif  endif -install_headers: headers $(top_builddir)extra/scripts/unifdef | $(PREFIX)$(DEVEL_PREFIX)include + + +install_headers: headers $(top_builddir)extra/scripts/unifdef $(top_builddir)include/generated/unifdef_config.h | $(PREFIX)$(DEVEL_PREFIX)include  	@$(call disp_install,"include -> $(PREFIX)$(DEVEL_PREFIX)include")  	$(Q)top_builddir=$(top_builddir) \  	$(top_srcdir)extra/scripts/install_headers.sh \ diff --git a/extra/scripts/install_headers.sh b/extra/scripts/install_headers.sh index 14d64dc9d..5a966066d 100755 --- a/extra/scripts/install_headers.sh +++ b/extra/scripts/install_headers.sh @@ -32,7 +32,6 @@ if ! test -x "$top_builddir/extra/scripts/unifdef"; then  	exit 1  fi -  # Sanitize and copy uclibc headers  (  # We must cd, or else we'll prepend "${srcdir}" to filenames! @@ -56,6 +55,9 @@ while read -r filename; do  	# Do not abort the script if unifdef "fails"!  	# NB2: careful with sed command arguments, they contain tab character  	"$top_builddir/extra/scripts/unifdef" \ +		-B \ +		-t \ +		-f "$top_builddir/include/generated/unifdef_config.h" \  		-U_LIBC \  		-U__UCLIBC_GEN_LOCALE \  		-U__NO_CTYPE \ diff --git a/extra/scripts/unifdef.c b/extra/scripts/unifdef.c index abc39966c..b159df0a6 100644 --- a/extra/scripts/unifdef.c +++ b/extra/scripts/unifdef.c @@ -1,13 +1,5 @@  /* - * Copyright (c) 2002 - 2005 Tony Finch <dot@dotat.at>.  All rights reserved. - * - * This code is derived from software contributed to Berkeley by Dave Yost. - * It was rewritten to support ANSI C by Tony Finch. The original version of - * unifdef carried the following copyright notice. None of its code remains - * in this version (though some of the names remain). - * - * Copyright (c) 1985, 1993 - *	The Regents of the University of California.  All rights reserved. + * Copyright (c) 2002 - 2013 Tony Finch <dot@dotat.at>   *   * Redistribution and use in source and binary forms, with or without   * modification, are permitted provided that the following conditions @@ -31,27 +23,15 @@   * SUCH DAMAGE.   */ -#include <sys/cdefs.h> - -#ifndef lint -#if 0 -static const char copyright[] = -"@(#) Copyright (c) 1985, 1993\n\ -	The Regents of the University of California.  All rights reserved.\n"; -#endif -#ifdef __IDSTRING -__IDSTRING(Berkeley, "@(#)unifdef.c	8.1 (Berkeley) 6/6/93"); -__IDSTRING(NetBSD, "$NetBSD: unifdef.c,v 1.8 2000/07/03 02:51:36 matt Exp $"); -__IDSTRING(dotat, "$dotat: things/unifdef.c,v 1.171 2005/03/08 12:38:48 fanf2 Exp $"); -#endif -#endif /* not lint */ -#ifdef __FBSDID -__FBSDID("$FreeBSD: /repoman/r/ncvs/src/usr.bin/unifdef/unifdef.c,v 1.20 2005/05/21 09:55:09 ru Exp $"); -#endif -  /*   * unifdef - remove ifdef'ed lines   * + * This code was derived from software contributed to Berkeley by Dave Yost. + * It was rewritten to support ANSI C by Tony Finch. The original version + * of unifdef carried the 4-clause BSD copyright licence. None of its code + * remains in this version (though some of the names remain) so it now + * carries a more liberal licence. + *   *  Wishlist:   *      provide an option which will append the name of the   *        appropriate symbol after #else's and #endif's @@ -59,26 +39,17 @@ __FBSDID("$FreeBSD: /repoman/r/ncvs/src/usr.bin/unifdef/unifdef.c,v 1.20 2005/05   *        #else's and #endif's to see that they match their   *        corresponding #ifdef or #ifndef   * - *   The first two items above require better buffer handling, which would - *     also make it possible to handle all "dodgy" directives correctly. + *   These require better buffer handling, which would also make + *   it possible to handle all "dodgy" directives correctly.   */ -#include <errno.h> -#include <ctype.h> -#include <stdarg.h> -#include <stdbool.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -/* Avoid err.h since uClibc can disable these things */ -#define vwarnx(fmt, args)   ({ fprintf(stderr, "unifdef: "); vfprintf(stderr, fmt, args); fprintf(stderr, "\n"); }) -#define warnx(fmt, args...) fprintf(stderr, "unifdef: " fmt "\n", ## args) -#define errx(exit_code, fmt, args...) ({ warnx(fmt, ## args); exit(exit_code); }) -#define err(exit_code, fmt, args...)  errx(exit_code, fmt ": %s", ## args, strerror(errno)) +#include "unifdef.h" -size_t strlcpy(char *dst, const char *src, size_t siz); +static const char copyright[] = +    #include "version.h" +    "@(#) $Author: Tony Finch (dot@dotat.at) $\n" +    "@(#) $URL: http://dotat.at/prog/unifdef $\n" +;  /* types of input lines: */  typedef enum { @@ -96,6 +67,7 @@ typedef enum {  	LT_DODGY_LAST = LT_DODGY + LT_ENDIF,  	LT_PLAIN,		/* ordinary line */  	LT_EOF,			/* end of file */ +	LT_ERROR,		/* unevaluable #if */  	LT_COUNT  } Linetype; @@ -106,9 +78,12 @@ static char const * const linetype_name[] = {  	"DODGY IF", "DODGY TRUE", "DODGY FALSE",  	"DODGY ELIF", "DODGY ELTRUE", "DODGY ELFALSE",  	"DODGY ELSE", "DODGY ENDIF", -	"PLAIN", "EOF" +	"PLAIN", "EOF", "ERROR"  }; +#define linetype_if2elif(lt) ((Linetype)(lt - LT_IF + LT_ELIF)) +#define linetype_2dodgy(lt) ((Linetype)(lt + LT_DODGY)) +  /* state of #if processing */  typedef enum {  	IS_OUTSIDE, @@ -162,7 +137,7 @@ static char const * const linestate_name[] = {   */  #define	MAXDEPTH        64			/* maximum #if nesting */  #define	MAXLINE         4096			/* maximum length of line */ -#define	MAXSYMS         4096			/* maximum number of symbols */ +#define	MAXSYMS         16384			/* maximum number of symbols */  /*   * Sometimes when editing a keyword the replacement text is longer, so @@ -174,13 +149,17 @@ static char const * const linestate_name[] = {   * Globals.   */ +static bool             compblank;		/* -B: compress blank lines */ +static bool             lnblank;		/* -b: blank deleted lines */  static bool             complement;		/* -c: do the complement */  static bool             debugging;		/* -d: debugging reports */ +static bool             inplace;		/* -m: modify in place */  static bool             iocccok;		/* -e: fewer IOCCC errors */ +static bool             strictlogic;		/* -K: keep ambiguous #ifs */  static bool             killconsts;		/* -k: eval constant #ifs */ -static bool             lnblank;		/* -l: blank deleted lines */  static bool             lnnum;			/* -n: add #line directives */  static bool             symlist;		/* -s: output symbol list */ +static bool             symdepth;		/* -S: output symbol depth */  static bool             text;			/* -t: this is a text file */  static const char      *symname[MAXSYMS];	/* symbol name */ @@ -191,10 +170,28 @@ static int              nsyms;			/* number of symbols */  static FILE            *input;			/* input file pointer */  static const char      *filename;		/* input file name */  static int              linenum;		/* current line number */ +static const char      *linefile;		/* file name for #line */ +static FILE            *output;			/* output file pointer */ +static const char      *ofilename;		/* output file name */ +static const char      *backext;		/* backup extension */ +static char            *tempname;		/* avoid splatting input */  static char             tline[MAXLINE+EDITSLOP];/* input buffer plus space */  static char            *keyword;		/* used for editing #elif's */ +/* + * When processing a file, the output's newline style will match the + * input's, and unifdef correctly handles CRLF or LF endings whatever + * the platform's native style. The stdio streams are opened in binary + * mode to accommodate platforms whose native newline style is CRLF. + * When the output isn't a processed input file (when it is error / + * debug / diagnostic messages) then unifdef uses native line endings. + */ + +static const char      *newline;		/* input file format */ +static const char       newline_unix[] = "\n"; +static const char       newline_crlf[] = "\r\n"; +  static Comment_state    incomment;		/* comment parser state */  static Line_state       linestate;		/* #if line parser state */  static Ifstate          ifstate[MAXDEPTH];	/* #if processor state */ @@ -202,31 +199,52 @@ static bool             ignoring[MAXDEPTH];	/* ignore comments state */  static int              stifline[MAXDEPTH];	/* start of current #if */  static int              depth;			/* current #if nesting */  static int              delcount;		/* count of deleted lines */ -static bool             keepthis;		/* don't delete constant #if */ +static unsigned         blankcount;		/* count of blank lines */ +static unsigned         blankmax;		/* maximum recent blankcount */ +static bool             constexpr;		/* constant #if expression */ +static bool             zerosyms;		/* to format symdepth output */ +static bool             firstsym;		/* ditto */ +static int              exitmode;		/* exit status mode */  static int              exitstat;		/* program exit status */ -static void             addsym(bool, bool, char *); +static void             addsym1(bool, bool, char *); +static void             addsym2(bool, const char *, const char *); +static char            *astrcat(const char *, const char *); +static void             cleantemp(void); +static void             closeio(void);  static void             debug(const char *, ...); +static void             debugsym(const char *, int); +static bool             defundef(void); +static void             defundefile(const char *);  static void             done(void);  static void             error(const char *); -static int              findsym(const char *); +static int              findsym(const char **);  static void             flushline(bool); -static Linetype         get_line(void); +static void             hashline(void); +static void             help(void);  static Linetype         ifeval(const char **);  static void             ignoreoff(void);  static void             ignoreon(void); +static void             indirectsym(void);  static void             keywordedit(const char *); +static const char      *matchsym(const char *, const char *);  static void             nest(void); +static Linetype         parseline(void);  static void             process(void); +static void             processinout(const char *, const char *); +static const char      *skipargs(const char *);  static const char      *skipcomment(const char *); +static const char      *skiphash(void); +static const char      *skipline(const char *);  static const char      *skipsym(const char *);  static void             state(Ifstate); -static int              strlcmp(const char *, const char *, size_t);  static void             unnest(void);  static void             usage(void); +static void             version(void); +static const char      *xstrdup(const char *, const char *); -#define endsym(c) (!isalpha((unsigned char)c) && !isdigit((unsigned char)c) && c != '_') +#define endsym(c) (!isalnum((unsigned char)c) && c != '_')  /*   * The main program. @@ -236,7 +254,7 @@ main(int argc, char *argv[])  {  	int opt; -	while ((opt = getopt(argc, argv, "i:D:U:I:cdeklnst")) != -1) +	while ((opt = getopt(argc, argv, "i:D:U:f:I:M:o:x:bBcdehKklmnsStV")) != -1)  		switch (opt) {  		case 'i': /* treat stuff controlled by these symbols as text */  			/* @@ -246,20 +264,26 @@ main(int argc, char *argv[])  			 */  			opt = *optarg++;  			if (opt == 'D') -				addsym(true, true, optarg); +				addsym1(true, true, optarg);  			else if (opt == 'U') -				addsym(true, false, optarg); +				addsym1(true, false, optarg);  			else  				usage();  			break;  		case 'D': /* define a symbol */ -			addsym(false, true, optarg); +			addsym1(false, true, optarg);  			break;  		case 'U': /* undef a symbol */ -			addsym(false, false, optarg); +			addsym1(false, false, optarg);  			break; -		case 'I': -			/* no-op for compatibility with cpp */ +		case 'I': /* no-op for compatibility with cpp */ +			break; +		case 'b': /* blank deleted lines instead of omitting them */ +		case 'l': /* backwards compatibility */ +			lnblank = true; +			break; +		case 'B': /* compress blank lines around removed section */ +			compblank = true;  			break;  		case 'c': /* treat -D as -U and vice versa */  			complement = true; @@ -270,50 +294,210 @@ main(int argc, char *argv[])  		case 'e': /* fewer errors from dodgy lines */  			iocccok = true;  			break; +		case 'f': /* definitions file */ +			defundefile(optarg); +			break; +		case 'h': +			help(); +			break; +		case 'K': /* keep ambiguous #ifs */ +			strictlogic = true; +			break;  		case 'k': /* process constant #ifs */  			killconsts = true;  			break; -		case 'l': /* blank deleted lines instead of omitting them */ -			lnblank = true; +		case 'm': /* modify in place */ +			inplace = true; +			break; +		case 'M': /* modify in place and keep backup */ +			inplace = true; +			backext = optarg;  			break;  		case 'n': /* add #line directive after deleted lines */  			lnnum = true;  			break; +		case 'o': /* output to a file */ +			ofilename = optarg; +			break;  		case 's': /* only output list of symbols that control #ifs */  			symlist = true;  			break; +		case 'S': /* list symbols with their nesting depth */ +			symlist = symdepth = true; +			break;  		case 't': /* don't parse C comments */  			text = true;  			break; +		case 'V': +			version(); +			break; +		case 'x': +			exitmode = atoi(optarg); +			if(exitmode < 0 || exitmode > 2) +				usage(); +			break;  		default:  			usage();  		}  	argc -= optind;  	argv += optind; -	if (argc > 1) { -		errx(2, "can only do one file"); -	} else if (argc == 1 && strcmp(*argv, "-") != 0) { -		filename = *argv; -		input = fopen(filename, "r"); -		if (input == NULL) -			err(2, "can't open %s", filename); -	} else { +	if (compblank && lnblank) +		errx(2, "-B and -b are mutually exclusive"); +	if (symlist && (ofilename != NULL || inplace || argc > 1)) +		errx(2, "-s only works with one input file"); +	if (argc > 1 && ofilename != NULL) +		errx(2, "-o cannot be used with multiple input files"); +	if (argc > 1 && !inplace) +		errx(2, "multiple input files require -m or -M"); +	if (argc == 0) +		argc = 1; +	if (argc == 1 && !inplace && ofilename == NULL) +		ofilename = "-"; +	indirectsym(); + +	atexit(cleantemp); +	if (ofilename != NULL) +		processinout(*argv, ofilename); +	else while (argc-- > 0) { +		processinout(*argv, *argv); +		argv++; +	} +	switch(exitmode) { +	case(0): exit(exitstat); +	case(1): exit(!exitstat); +	case(2): exit(0); +	default: abort(); /* bug */ +	} +} + +/* + * File logistics. + */ +static void +processinout(const char *ifn, const char *ofn) +{ +	struct stat st; + +	if (ifn == NULL || strcmp(ifn, "-") == 0) {  		filename = "[stdin]"; -		input = stdin; +		linefile = NULL; +		input = fbinmode(stdin); +	} else { +		filename = ifn; +		linefile = ifn; +		input = fopen(ifn, "rb"); +		if (input == NULL) +			err(2, "can't open %s", ifn); +	} +	if (strcmp(ofn, "-") == 0) { +		output = fbinmode(stdout); +		process(); +		return; +	} +	if (stat(ofn, &st) < 0) { +		output = fopen(ofn, "wb"); +		if (output == NULL) +			err(2, "can't create %s", ofn); +		process(); +		return;  	} + +	tempname = astrcat(ofn, ".XXXXXX"); +	output = mktempmode(tempname, st.st_mode); +	if (output == NULL) +		err(2, "can't create %s", tempname); +  	process(); -	debug("bug at line %d", __LINE__); -	abort(); /* bug */ + +	if (backext != NULL) { +		char *backname = astrcat(ofn, backext); +		if (rename(ofn, backname) < 0) +			err(2, "can't rename \"%s\" to \"%s\"", ofn, backname); +		free(backname); +	} +	if (replace(tempname, ofn) < 0) +		err(2, "can't rename \"%s\" to \"%s\"", tempname, ofn); +	free(tempname); +	tempname = NULL; +} + +/* + * For cleaning up if there is an error. + */ +static void +cleantemp(void) +{ +	if (tempname != NULL) +		remove(tempname); +} + +/* + * Self-identification functions. + */ + +static void +version(void) +{ +	const char *c = copyright; +	for (;;) { +		while (*++c != '$') +			if (*c == '\0') +				exit(0); +		while (*++c != '$') +			putc(*c, stderr); +		putc('\n', stderr); +	} +} + +static void +synopsis(FILE *fp) +{ +	fprintf(fp, +	    "usage:	unifdef [-bBcdehKkmnsStV] [-x{012}] [-Mext] [-opath] \\\n" +	    "		[-[i]Dsym[=val]] [-[i]Usym] [-fpath] ... [file] ...\n");  }  static void  usage(void)  { -	fprintf(stderr, "usage: unifdef [-cdeklnst] [-Ipath]" -	    " [-Dsym[=val]] [-Usym] [-iDsym[=val]] [-iUsym] ... [file]\n"); +	synopsis(stderr);  	exit(2);  } +static void +help(void) +{ +	synopsis(stdout); +	printf( +	    "	-Dsym=val  define preprocessor symbol with given value\n" +	    "	-Dsym      define preprocessor symbol with value 1\n" +	    "	-Usym	   preprocessor symbol is undefined\n" +	    "	-iDsym=val \\  ignore C strings and comments\n" +	    "	-iDsym      ) in sections controlled by these\n" +	    "	-iUsym	   /  preprocessor symbols\n" +	    "	-fpath	file containing #define and #undef directives\n" +	    "	-b	blank lines instead of deleting them\n" +	    "	-B	compress blank lines around deleted section\n" +	    "	-c	complement (invert) keep vs. delete\n" +	    "	-d	debugging mode\n" +	    "	-e	ignore multiline preprocessor directives\n" +	    "	-h	print help\n" +	    "	-Ipath	extra include file path (ignored)\n" +	    "	-K	disable && and || short-circuiting\n" +	    "	-k	process constant #if expressions\n" +	    "	-Mext	modify in place and keep backups\n" +	    "	-m	modify input files in place\n" +	    "	-n	add #line directives to output\n" +	    "	-opath	output file name\n" +	    "	-S	list #if control symbols with nesting\n" +	    "	-s	list #if control symbols\n" +	    "	-t	ignore C strings and comments\n" +	    "	-V	print version\n" +	    "	-x{012}	exit status mode\n" +	); +	exit(0); +} +  /*   * A state transition function alters the global #if processing state   * in a particular way. The table below is indexed by the current @@ -327,7 +511,8 @@ usage(void)   * When we have processed a group that starts off with a known-false   * #if/#elif sequence (which has therefore been deleted) followed by a   * #elif that we don't understand and therefore must keep, we edit the - * latter into a #if to keep the nesting correct. + * latter into a #if to keep the nesting correct. We use memcpy() to + * overwrite the 4 byte token "elif" with "if  " without a '\0' byte.   *   * When we find a true #elif in a group, the following block will   * always be kept and the rest of the sequence after the next #elif or @@ -380,75 +565,66 @@ static void Oelif (void) { if (!iocccok) Eioccc(); Pelif(); }  static void Idrop (void) { Fdrop();  ignoreon(); }  static void Itrue (void) { Ftrue();  ignoreon(); }  static void Ifalse(void) { Ffalse(); ignoreon(); } -/* edit this line */ -static void Mpass (void) { strncpy(keyword, "if  ", 4); Pelif(); } -static void Mtrue (void) { keywordedit("else\n");  state(IS_TRUE_MIDDLE); } -static void Melif (void) { keywordedit("endif\n"); state(IS_FALSE_TRAILER); } -static void Melse (void) { keywordedit("endif\n"); state(IS_FALSE_ELSE); } +/* modify this line */ +static void Mpass (void) { memcpy(keyword, "if  ", 4); Pelif(); } +static void Mtrue (void) { keywordedit("else");  state(IS_TRUE_MIDDLE); } +static void Melif (void) { keywordedit("endif"); state(IS_FALSE_TRAILER); } +static void Melse (void) { keywordedit("endif"); state(IS_FALSE_ELSE); }  static state_fn * const trans_table[IS_COUNT][LT_COUNT] = {  /* IS_OUTSIDE */  { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Eendif,    Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eendif, -  print, done }, +  print, done,  abort },  /* IS_FALSE_PREFIX */  { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Strue, Sfalse,Selse, Dendif,    Idrop, Idrop, Fdrop, Fdrop, Fdrop, Mpass, Eioccc,Eioccc,Eioccc,Eioccc, -  drop,  Eeof }, +  drop,  Eeof,  abort },  /* IS_TRUE_PREFIX */  { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Dfalse,Dfalse,Dfalse,Delse, Dendif,    Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, -  print, Eeof }, +  print, Eeof,  abort },  /* IS_PASS_MIDDLE */  { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Pelif, Mtrue, Delif, Pelse, Pendif,    Oiffy, Oiffy, Fpass, Oif,   Oif,   Pelif, Oelif, Oelif, Pelse, Pendif, -  print, Eeof }, +  print, Eeof,  abort },  /* IS_FALSE_MIDDLE */  { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Pelif, Mtrue, Delif, Pelse, Pendif,    Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eioccc,Eioccc,Eioccc,Eioccc,Eioccc, -  drop,  Eeof }, +  drop,  Eeof,  abort },  /* IS_TRUE_MIDDLE */  { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Melif, Melif, Melif, Melse, Pendif,    Oiffy, Oiffy, Fpass, Oif,   Oif,   Eioccc,Eioccc,Eioccc,Eioccc,Pendif, -  print, Eeof }, +  print, Eeof,  abort },  /* IS_PASS_ELSE */  { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Pendif,    Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Pendif, -  print, Eeof }, +  print, Eeof,  abort },  /* IS_FALSE_ELSE */  { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Dendif,    Idrop, Idrop, Fdrop, Fdrop, Fdrop, Eelif, Eelif, Eelif, Eelse, Eioccc, -  drop,  Eeof }, +  drop,  Eeof,  abort },  /* IS_TRUE_ELSE */  { Itrue, Ifalse,Fpass, Ftrue, Ffalse,Eelif, Eelif, Eelif, Eelse, Dendif,    Oiffy, Oiffy, Fpass, Oif,   Oif,   Eelif, Eelif, Eelif, Eelse, Eioccc, -  print, Eeof }, +  print, Eeof,  abort },  /* IS_FALSE_TRAILER */  { Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Dendif,    Idrop, Idrop, Fdrop, Fdrop, Fdrop, Dfalse,Dfalse,Dfalse,Delse, Eioccc, -  drop,  Eeof } +  drop,  Eeof,  abort }  /*TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF    TRUEI  FALSEI IF     TRUE   FALSE  ELIF   ELTRUE ELFALSE ELSE  ENDIF (DODGY) -  PLAIN  EOF */ +  PLAIN  EOF    ERROR */  };  /*   * State machine utility functions   */  static void -done(void) -{ -	if (incomment) -		error("EOF in comment"); -	exit(exitstat); -} -static void  ignoreoff(void)  { -	if (depth == 0) { -		debug("bug at line %d", __LINE__); +	if (depth == 0)  		abort(); /* bug */ -	}  	ignoring[depth] = ignoring[depth-1];  }  static void @@ -459,31 +635,25 @@ ignoreon(void)  static void  keywordedit(const char *replacement)  { -	size_t size = tline + sizeof(tline) - keyword; -	char *dst = keyword; -	const char *src = replacement; -	if (size != 0) { -		while ((--size != 0) && (*src != '\0')) -			*dst++ = *src++; -		*dst = '\0'; -	} +	snprintf(keyword, tline + sizeof(tline) - keyword, +	    "%s%s", replacement, newline);  	print();  }  static void  nest(void)  { -	depth += 1; -	if (depth >= MAXDEPTH) +	if (depth > MAXDEPTH-1) +		abort(); /* bug */ +	if (depth == MAXDEPTH-1)  		error("Too many levels of nesting"); +	depth += 1;  	stifline[depth] = linenum;  }  static void  unnest(void)  { -	if (depth == 0) { -		debug("bug at line %d", __LINE__); +	if (depth == 0)  		abort(); /* bug */ -	}  	depth -= 1;  }  static void @@ -493,7 +663,20 @@ state(Ifstate is)  }  /* + * The last state transition function. When this is called, + * lineval == LT_EOF, so the process() loop will terminate. + */ +static void +done(void) +{ +	if (incomment) +		error("EOF in comment"); +	closeio(); +} + +/*   * Write a line to the output or not, according to command line options. + * If writing fails, closeio() will print the error and exit.   */  static void  flushline(bool keep) @@ -501,16 +684,58 @@ flushline(bool keep)  	if (symlist)  		return;  	if (keep ^ complement) { -		if (lnnum && delcount > 0) -			printf("#line %d\n", linenum); -		fputs(tline, stdout); -		delcount = 0; +		bool blankline = tline[strspn(tline, " \t\r\n")] == '\0'; +		if (blankline && compblank && blankcount != blankmax) { +			delcount += 1; +			blankcount += 1; +		} else { +			if (lnnum && delcount > 0) +				hashline(); +			if (fputs(tline, output) == EOF) +				closeio(); +			delcount = 0; +			blankmax = blankcount = blankline ? blankcount + 1 : 0; +		}  	} else { -		if (lnblank) -			putc('\n', stdout); +		if (lnblank && fputs(newline, output) == EOF) +			closeio();  		exitstat = 1;  		delcount += 1; +		blankcount = 0;  	} +	if (debugging && fflush(output) == EOF) +		closeio(); +} + +/* + * Format of #line directives depends on whether we know the input filename. + */ +static void +hashline(void) +{ +	int e; + +	if (linefile == NULL) +		e = fprintf(output, "#line %d%s", linenum, newline); +	else +		e = fprintf(output, "#line %d \"%s\"%s", +		    linenum, linefile, newline); +	if (e < 0) +		closeio(); +} + +/* + * Flush the output and handle errors. + */ +static void +closeio(void) +{ +	/* Tidy up after findsym(). */ +	if (symdepth && !zerosyms) +		printf("\n"); +	if (output != NULL && (ferror(output) || fclose(output) == EOF)) +			err(2, "%s: can't write to output", filename); +	fclose(input);  }  /* @@ -519,14 +744,18 @@ flushline(bool keep)  static void  process(void)  { -	Linetype lineval; - -	for (;;) { -		linenum++; -		lineval = get_line(); +	Linetype lineval = LT_PLAIN; +	/* When compressing blank lines, act as if the file +	   is preceded by a large number of blank lines. */ +	blankmax = blankcount = 1000; +	zerosyms = true; +	newline = NULL; +	linenum = 0; +	while (lineval != LT_EOF) { +		lineval = parseline();  		trans_table[ifstate[depth]][lineval](); -		debug("process %s -> %s depth %d", -		    linetype_name[lineval], +		debug("process line %d %s -> %s depth %d", +		    linenum, linetype_name[lineval],  		    ifstate_name[ifstate[depth]], depth);  	}  } @@ -537,104 +766,131 @@ process(void)   * help from skipcomment().   */  static Linetype -get_line(void) +parseline(void)  {  	const char *cp;  	int cursym; -	int kwlen;  	Linetype retval;  	Comment_state wascomment; -	if (fgets(tline, MAXLINE, input) == NULL) -		return (LT_EOF); -	retval = LT_PLAIN;  	wascomment = incomment; -	cp = skipcomment(tline); -	if (linestate == LS_START) { -		if (*cp == '#') { -			linestate = LS_HASH; -			cp = skipcomment(cp + 1); -		} else if (*cp != '\0') -			linestate = LS_DIRTY; +	cp = skiphash(); +	if (cp == NULL) +		return (LT_EOF); +	if (newline == NULL) { +		if (strrchr(tline, '\n') == strrchr(tline, '\r') + 1) +			newline = newline_crlf; +		else +			newline = newline_unix;  	} -	if (!incomment && linestate == LS_HASH) { -		keyword = tline + (cp - tline); -		cp = skipsym(cp); -		kwlen = cp - keyword; -		/* no way can we deal with a continuation inside a keyword */ -		if (strncmp(cp, "\\\n", 2) == 0) -			Eioccc(); -		if (strlcmp("ifdef", keyword, kwlen) == 0 || -		    strlcmp("ifndef", keyword, kwlen) == 0) { -			cp = skipcomment(cp); -			if ((cursym = findsym(cp)) < 0) -				retval = LT_IF; -			else { -				retval = (keyword[2] == 'n') -				    ? LT_FALSE : LT_TRUE; -				if (value[cursym] == NULL) -					retval = (retval == LT_TRUE) -					    ? LT_FALSE : LT_TRUE; -				if (ignore[cursym]) -					retval = (retval == LT_TRUE) -					    ? LT_TRUEI : LT_FALSEI; -			} -			cp = skipsym(cp); -		} else if (strlcmp("if", keyword, kwlen) == 0) -			retval = ifeval(&cp); -		else if (strlcmp("elif", keyword, kwlen) == 0) -			retval = ifeval(&cp) - LT_IF + LT_ELIF; -		else if (strlcmp("else", keyword, kwlen) == 0) -			retval = LT_ELSE; -		else if (strlcmp("endif", keyword, kwlen) == 0) -			retval = LT_ENDIF; +	if (*cp == '\0') { +		retval = LT_PLAIN; +		goto done; +	} +	keyword = tline + (cp - tline); +	if ((cp = matchsym("ifdef", keyword)) != NULL || +	    (cp = matchsym("ifndef", keyword)) != NULL) { +		cp = skipcomment(cp); +		if ((cursym = findsym(&cp)) < 0) +			retval = LT_IF;  		else { -			linestate = LS_DIRTY; -			retval = LT_PLAIN; +			retval = (keyword[2] == 'n') +			    ? LT_FALSE : LT_TRUE; +			if (value[cursym] == NULL) +				retval = (retval == LT_TRUE) +				    ? LT_FALSE : LT_TRUE; +			if (ignore[cursym]) +				retval = (retval == LT_TRUE) +				    ? LT_TRUEI : LT_FALSEI;  		} -		cp = skipcomment(cp); -		if (*cp != '\0') { +	} else if ((cp = matchsym("if", keyword)) != NULL) +		retval = ifeval(&cp); +	else if ((cp = matchsym("elif", keyword)) != NULL) +		retval = linetype_if2elif(ifeval(&cp)); +	else if ((cp = matchsym("else", keyword)) != NULL) +		retval = LT_ELSE; +	else if ((cp = matchsym("endif", keyword)) != NULL) +		retval = LT_ENDIF; +	else { +		cp = skipsym(keyword); +		/* no way can we deal with a continuation inside a keyword */ +		if (strncmp(cp, "\\\r\n", 3) == 0 || +		    strncmp(cp, "\\\n", 2) == 0) +			Eioccc(); +		cp = skipline(cp); +		retval = LT_PLAIN; +		goto done; +	} +	cp = skipcomment(cp); +	if (*cp != '\0') { +		cp = skipline(cp); +		if (retval == LT_TRUE || retval == LT_FALSE || +		    retval == LT_TRUEI || retval == LT_FALSEI) +			retval = LT_IF; +		if (retval == LT_ELTRUE || retval == LT_ELFALSE) +			retval = LT_ELIF; +	} +	/* the following can happen if the last line of the file lacks a +	   newline or if there is too much whitespace in a directive */ +	if (linestate == LS_HASH) { +		long len = cp - tline; +		if (fgets(tline + len, MAXLINE - len, input) == NULL) { +			if (ferror(input)) +				err(2, "can't read %s", filename); +			/* append the missing newline at eof */ +			strcpy(tline + len, newline); +			cp += strlen(newline); +			linestate = LS_START; +		} else {  			linestate = LS_DIRTY; -			if (retval == LT_TRUE || retval == LT_FALSE || -			    retval == LT_TRUEI || retval == LT_FALSEI) -				retval = LT_IF; -			if (retval == LT_ELTRUE || retval == LT_ELFALSE) -				retval = LT_ELIF; -		} -		if (retval != LT_PLAIN && (wascomment || incomment)) { -			retval += LT_DODGY; -			if (incomment) -				linestate = LS_DIRTY;  		} -		/* skipcomment should have changed the state */ -// Hmm hppens sometimes on valid files -//		if (linestate == LS_HASH) { -//			debug("bug at line %d", __LINE__); -//			abort(); /* bug */ -//		}  	} -	if (linestate == LS_DIRTY) { -		while (*cp != '\0') -			cp = skipcomment(cp + 1); +	if (retval != LT_PLAIN && (wascomment || linestate != LS_START)) { +		retval = linetype_2dodgy(retval); +		linestate = LS_DIRTY;  	} -	debug("parser %s comment %s line", +done: +	debug("parser line %d state %s comment %s line", linenum,  	    comment_name[incomment], linestate_name[linestate]);  	return (retval);  }  /*   * These are the binary operators that are supported by the expression - * evaluator. Note that if support for division is added then we also - * need short-circuiting booleans because of divide-by-zero. + * evaluator.   */ -static int op_lt(int a, int b) { return (a < b); } -static int op_gt(int a, int b) { return (a > b); } -static int op_le(int a, int b) { return (a <= b); } -static int op_ge(int a, int b) { return (a >= b); } -static int op_eq(int a, int b) { return (a == b); } -static int op_ne(int a, int b) { return (a != b); } -static int op_or(int a, int b) { return (a || b); } -static int op_and(int a, int b) { return (a && b); } +static Linetype op_strict(long *p, long v, Linetype at, Linetype bt) { +	if(at == LT_IF || bt == LT_IF) return (LT_IF); +	return (*p = v, v ? LT_TRUE : LT_FALSE); +} +static Linetype op_lt(long *p, Linetype at, long a, Linetype bt, long b) { +	return op_strict(p, a < b, at, bt); +} +static Linetype op_gt(long *p, Linetype at, long a, Linetype bt, long b) { +	return op_strict(p, a > b, at, bt); +} +static Linetype op_le(long *p, Linetype at, long a, Linetype bt, long b) { +	return op_strict(p, a <= b, at, bt); +} +static Linetype op_ge(long *p, Linetype at, long a, Linetype bt, long b) { +	return op_strict(p, a >= b, at, bt); +} +static Linetype op_eq(long *p, Linetype at, long a, Linetype bt, long b) { +	return op_strict(p, a == b, at, bt); +} +static Linetype op_ne(long *p, Linetype at, long a, Linetype bt, long b) { +	return op_strict(p, a != b, at, bt); +} +static Linetype op_or(long *p, Linetype at, long a, Linetype bt, long b) { +	if (!strictlogic && (at == LT_TRUE || bt == LT_TRUE)) +		return (*p = 1, LT_TRUE); +	return op_strict(p, a || b, at, bt); +} +static Linetype op_and(long *p, Linetype at, long a, Linetype bt, long b) { +	if (!strictlogic && (at == LT_FALSE || bt == LT_FALSE)) +		return (*p = 0, LT_FALSE); +	return op_strict(p, a && b, at, bt); +}  /*   * An evaluation function takes three arguments, as follows: (1) a pointer to @@ -643,12 +899,12 @@ static int op_and(int a, int b) { return (a && b); }   * value of the expression; and (3) a pointer to a char* that points to the   * expression to be evaluated and that is updated to the end of the expression   * when evaluation is complete. The function returns LT_FALSE if the value of - * the expression is zero, LT_TRUE if it is non-zero, or LT_IF if the - * expression could not be evaluated. + * the expression is zero, LT_TRUE if it is non-zero, LT_IF if the expression + * depends on an unknown symbol, or LT_ERROR if there is a parse failure.   */  struct ops; -typedef Linetype eval_fn(const struct ops *, int *, const char **); +typedef Linetype eval_fn(const struct ops *, long *, const char **);  static eval_fn eval_table, eval_unary; @@ -659,139 +915,139 @@ static eval_fn eval_table, eval_unary;   * element of the table. Innermost expressions have special non-table-driven   * handling.   */ -static const struct ops { +struct op { +	const char *str; +	Linetype (*fn)(long *, Linetype, long, Linetype, long); +}; +struct ops {  	eval_fn *inner; -	struct op { -		const char *str; -		int short_circuit_val; -		int (*fn)(int, int); -	} op[5]; -} eval_ops[] = { -	{ eval_table, { { "||", 1, op_or } } }, -	{ eval_table, { { "&&", 0, op_and } } }, -	{ eval_table, { { "==", -1, op_eq }, -			{ "!=", -1, op_ne } } }, -	{ eval_unary, { { "<=", -1, op_le }, -			{ ">=", -1, op_ge }, -			{ "<", -1, op_lt }, -			{ ">", -1, op_gt } } } +	struct op op[5];  }; +static const struct ops eval_ops[] = { +	{ eval_table, { { "||", op_or } } }, +	{ eval_table, { { "&&", op_and } } }, +	{ eval_table, { { "==", op_eq }, +			{ "!=", op_ne } } }, +	{ eval_unary, { { "<=", op_le }, +			{ ">=", op_ge }, +			{ "<", op_lt }, +			{ ">", op_gt } } } +}; + +/* Current operator precedence level */ +static long prec(const struct ops *ops) +{ +	return (ops - eval_ops); +}  /* - * Function for evaluating the innermost parts of expressions, viz. - * "!expr", "(expr)", "defined(symbol)", "defined symbol", "symbol", "number". - * We reset the keepthis flag when we find a non-constant subexpression. + * Function for evaluating the innermost parts of expressions, + * viz. !expr (expr) number defined(symbol) symbol + * We reset the constexpr flag in the last two cases.   */ -// TODO: we use LT_IF both as "I don't know whether it's false or true" -// (example: "#if defined FOO") and when we see syntax error -// (example: "#if (1 || 2" - no closing paren!), but this is wrong. -// Binary && and || need to distinguish these cases in order to handle this: -// "#if defined KNOWN_UNDEFINED && FOO" - discard -// "#if defined KNOWN_UNDEFINED && (syntax_error_here" - do not discard!  static Linetype -eval_unary(const struct ops *ops, int *valp, const char **cpp) +eval_unary(const struct ops *ops, long *valp, const char **cpp)  {  	const char *cp;  	char *ep;  	int sym; +	bool defparen; +	Linetype lt;  	cp = skipcomment(*cpp);  	if (*cp == '!') { -		debug("eval%d !", ops - eval_ops); +		debug("eval%d !", prec(ops));  		cp++; -		if (eval_unary(ops, valp, &cp) == LT_IF) { -			*cpp = cp; -			return (LT_IF); +		lt = eval_unary(ops, valp, &cp); +		if (lt == LT_ERROR) +			return (LT_ERROR); +		if (lt != LT_IF) { +			*valp = !*valp; +			lt = *valp ? LT_TRUE : LT_FALSE;  		} -		*valp = !*valp; -  	} else if (*cp == '(') { -		Linetype expr_res; -  		cp++; -		debug("eval%d (%s", ops - eval_ops, cp); -		expr_res = eval_table(eval_ops, valp, &cp); +		debug("eval%d (", prec(ops)); +		lt = eval_table(eval_ops, valp, &cp); +		if (lt == LT_ERROR) +			return (LT_ERROR);  		cp = skipcomment(cp); -		*cpp = cp;  		if (*cp++ != ')') -			return (LT_IF); -		*cpp = cp; -		if (expr_res == LT_IF) -			return (LT_IF); - +			return (LT_ERROR);  	} else if (isdigit((unsigned char)*cp)) { -		debug("eval%d number", ops - eval_ops); +		debug("eval%d number", prec(ops));  		*valp = strtol(cp, &ep, 0); -		cp = skipsym(cp); - -	} else if (strncmp(cp, "defined", 7) == 0 && endsym(cp[7])) { -		bool parens; - +		if (ep == cp) +			return (LT_ERROR); +		lt = *valp ? LT_TRUE : LT_FALSE; +		cp = ep; +	} else if (matchsym("defined", cp) != NULL) {  		cp = skipcomment(cp+7); -		debug("eval%d defined '%s'", ops - eval_ops, cp); -		parens = (*cp == '('); -		if (parens) +		if (*cp == '(') {  			cp = skipcomment(cp+1); -		sym = findsym(cp); -		cp = skipsym(cp); +			defparen = true; +		} else { +			defparen = false; +		} +		sym = findsym(&cp);  		cp = skipcomment(cp); -		if (parens) { -			if (*cp != ')') -				return (LT_IF); -			cp = skipcomment(cp+1); +		if (defparen && *cp++ != ')') { +			debug("eval%d defined missing ')'", prec(ops)); +			return (LT_ERROR);  		} -		*cpp = cp;  		if (sym < 0) { -			debug("sym not found, returning LT_IF"); -			return (LT_IF); +			debug("eval%d defined unknown", prec(ops)); +			lt = LT_IF; +		} else { +			debug("eval%d defined %s", prec(ops), symname[sym]); +			*valp = (value[sym] != NULL); +			lt = *valp ? LT_TRUE : LT_FALSE;  		} -		*valp = (value[sym] != NULL); -		keepthis = false; - +		constexpr = false;  	} else if (!endsym(*cp)) { -		debug("eval%d symbol", ops - eval_ops); -		sym = findsym(cp); -		cp = skipsym(cp); -		*cpp = cp; -		if (sym < 0) -			return (LT_IF); -		if (value[sym] == NULL) +		debug("eval%d symbol", prec(ops)); +		sym = findsym(&cp); +		if (sym < 0) { +			lt = LT_IF; +			cp = skipargs(cp); +		} else if (value[sym] == NULL) {  			*valp = 0; -		else { +			lt = LT_FALSE; +		} else {  			*valp = strtol(value[sym], &ep, 0);  			if (*ep != '\0' || ep == value[sym]) -				return (LT_IF); +				return (LT_ERROR); +			lt = *valp ? LT_TRUE : LT_FALSE; +			cp = skipargs(cp);  		} -		keepthis = false; - +		constexpr = false;  	} else { -		debug("eval%d bad expr", ops - eval_ops); -		return (LT_IF); +		debug("eval%d bad expr", prec(ops)); +		return (LT_ERROR);  	}  	*cpp = cp; -	debug("eval%d = %d", ops - eval_ops, *valp); -	return (*valp ? LT_TRUE : LT_FALSE); +	debug("eval%d = %d", prec(ops), *valp); +	return (lt);  }  /*   * Table-driven evaluation of binary operators.   */  static Linetype -eval_table(const struct ops *ops, int *valp, const char **cpp) +eval_table(const struct ops *ops, long *valp, const char **cpp)  { -	Linetype left_side;  	const struct op *op;  	const char *cp; -	int val; +	long val; +	Linetype lt, rt; -	debug("eval%d '%s'", ops - eval_ops, *cpp); -	left_side = ops->inner(ops+1, valp, cpp); +	debug("eval%d", prec(ops));  	cp = *cpp; - +	lt = ops->inner(ops+1, valp, &cp); +	if (lt == LT_ERROR) +		return (LT_ERROR);  	for (;;) { -		Linetype right_side; -  		cp = skipcomment(cp);  		for (op = ops->op; op->str != NULL; op++)  			if (strncmp(cp, op->str, strlen(op->str)) == 0) @@ -799,38 +1055,17 @@ eval_table(const struct ops *ops, int *valp, const char **cpp)  		if (op->str == NULL)  			break;  		cp += strlen(op->str); -		debug("eval%d '%s'", ops - eval_ops, op->str); -		right_side = ops->inner(ops+1, &val, &cp); -		*cpp = cp; - -		/* If short_circuit_val is 0 or 1, we can ignore -		 * right side if left size is known, and its value -		 * (i.e., *valp) is 0 or !0, respectively */ -		if (left_side != LT_IF && op->short_circuit_val == !!*valp) { -			debug("op->short_circuit_val:%d *valp:%d cp:'%s'", -					op->short_circuit_val, *valp, cp); -			*valp = !!*valp; -			break; -		} -		/* Same for the right side */ -		if (right_side != LT_IF && op->short_circuit_val == !!val) { -			debug("op->short_circuit_val:%d val:%d cp:'%s'", -					op->short_circuit_val, val, cp); -			left_side = right_side; -			*valp = !!val; -			break; -		} - -		if (left_side == LT_IF || right_side == LT_IF) -			return (LT_IF); -		*valp = op->fn(*valp, val); -		left_side = right_side; +		debug("eval%d %s", prec(ops), op->str); +		rt = ops->inner(ops+1, &val, &cp); +		if (rt == LT_ERROR) +			return (LT_ERROR); +		lt = op->fn(valp, lt, *valp, rt, val);  	} -	debug("eval%d = %d LT_IF:%d", ops - eval_ops, *valp, (left_side == LT_IF)); -	if (left_side == LT_IF) -		return (LT_IF); -	return (*valp ? LT_TRUE : LT_FALSE); +	*cpp = cp; +	debug("eval%d = %d", prec(ops), *valp); +	debug("eval%d lt = %s", prec(ops), linetype_name[lt]); +	return (lt);  }  /* @@ -841,14 +1076,56 @@ eval_table(const struct ops *ops, int *valp, const char **cpp)  static Linetype  ifeval(const char **cpp)  { -	int ret; -	int val; +	Linetype ret; +	long val = 0;  	debug("eval %s", *cpp); -	keepthis = killconsts ? false : true; +	constexpr = killconsts ? false : true;  	ret = eval_table(eval_ops, &val, cpp); -	debug("val:%d ret:%d keepthis:%d", val, ret, keepthis); -	return (keepthis ? LT_IF : ret); +	debug("eval = %d", val); +	return (constexpr ? LT_IF : ret == LT_ERROR ? LT_IF : ret); +} + +/* + * Read a line and examine its initial part to determine if it is a + * preprocessor directive. Returns NULL on EOF, or a pointer to a + * preprocessor directive name, or a pointer to the zero byte at the + * end of the line. + */ +static const char * +skiphash(void) +{ +	const char *cp; + +	linenum++; +	if (fgets(tline, MAXLINE, input) == NULL) { +		if (ferror(input)) +			err(2, "can't read %s", filename); +		else +			return (NULL); +	} +	cp = skipcomment(tline); +	if (linestate == LS_START && *cp == '#') { +		linestate = LS_HASH; +		return (skipcomment(cp + 1)); +	} else if (*cp == '\0') { +		return (cp); +	} else { +		return (skipline(cp)); +	} +} + +/* + * Mark a line dirty and consume the rest of it, keeping track of the + * lexical state. + */ +static const char * +skipline(const char *cp) +{ +	linestate = LS_DIRTY; +	while (*cp != '\0') +		cp = skipcomment(cp + 1); +	return (cp);  }  /* @@ -869,11 +1146,16 @@ skipcomment(const char *cp)  	}  	while (*cp != '\0')  		/* don't reset to LS_START after a line continuation */ -		if (strncmp(cp, "\\\n", 2) == 0) +		if (strncmp(cp, "\\\r\n", 3) == 0) +			cp += 3; +		else if (strncmp(cp, "\\\n", 2) == 0)  			cp += 2;  		else switch (incomment) {  		case NO_COMMENT: -			if (strncmp(cp, "/\\\n", 3) == 0) { +			if (strncmp(cp, "/\\\r\n", 4) == 0) { +				incomment = STARTING_COMMENT; +				cp += 4; +			} else if (strncmp(cp, "/\\\n", 3) == 0) {  				incomment = STARTING_COMMENT;  				cp += 3;  			} else if (strncmp(cp, "/*", 2) == 0) { @@ -893,7 +1175,7 @@ skipcomment(const char *cp)  			} else if (strncmp(cp, "\n", 1) == 0) {  				linestate = LS_START;  				cp += 1; -			} else if (strchr(" \t", *cp) != NULL) { +			} else if (strchr(" \r\t", *cp) != NULL) {  				cp += 1;  			} else  				return (cp); @@ -925,7 +1207,10 @@ skipcomment(const char *cp)  				cp += 1;  			continue;  		case C_COMMENT: -			if (strncmp(cp, "*\\\n", 3) == 0) { +			if (strncmp(cp, "*\\\r\n", 4) == 0) { +				incomment = FINISHING_COMMENT; +				cp += 4; +			} else if (strncmp(cp, "*\\\n", 3) == 0) {  				incomment = FINISHING_COMMENT;  				cp += 3;  			} else if (strncmp(cp, "*/", 2) == 0) { @@ -954,13 +1239,37 @@ skipcomment(const char *cp)  				incomment = C_COMMENT;  			continue;  		default: -			debug("bug at line %d", __LINE__);  			abort(); /* bug */  		}  	return (cp);  }  /* + * Skip macro arguments. + */ +static const char * +skipargs(const char *cp) +{ +	const char *ocp = cp; +	int level = 0; +	cp = skipcomment(cp); +	if (*cp != '(') +		return (cp); +	do { +		if (*cp == '(') +			level++; +		if (*cp == ')') +			level--; +		cp = skipcomment(cp+1); +	} while (level != 0 && *cp != '\0'); +	if (level == 0) +		return (cp); +	else +	/* Rewind and re-detect the syntax error later. */ +		return (ocp); +} + +/*   * Skip over an identifier.   */  static const char * @@ -972,27 +1281,69 @@ skipsym(const char *cp)  }  /* - * Look for the symbol in the symbol table. If is is found, we return + * Skip whitespace and take a copy of any following identifier. + */ +static const char * +getsym(const char **cpp) +{ +	const char *cp = *cpp, *sym; + +	cp = skipcomment(cp); +	cp = skipsym(sym = cp); +	if (cp == sym) +		return NULL; +	*cpp = cp; +	return (xstrdup(sym, cp)); +} + +/* + * Check that s (a symbol) matches the start of t, and that the + * following character in t is not a symbol character. Returns a + * pointer to the following character in t if there is a match, + * otherwise NULL. + */ +static const char * +matchsym(const char *s, const char *t) +{ +	while (*s != '\0' && *t != '\0') +		if (*s != *t) +			return (NULL); +		else +			++s, ++t; +	if (*s == '\0' && endsym(*t)) +		return(t); +	else +		return(NULL); +} + +/* + * Look for the symbol in the symbol table. If it is found, we return   * the symbol table index, else we return -1.   */  static int -findsym(const char *str) +findsym(const char **strp)  { -	const char *cp; +	const char *str;  	int symind; -	cp = skipsym(str); -	if (cp == str) -		return (-1); +	str = *strp; +	*strp = skipsym(str);  	if (symlist) { -		printf("%.*s\n", (int)(cp-str), str); +		if (*strp == str) +			return (-1); +		if (symdepth && firstsym) +			printf("%s%3d", zerosyms ? "" : "\n", depth); +		firstsym = zerosyms = false; +		printf("%s%.*s%s", +		       symdepth ? " " : "", +		       (int)(*strp-str), str, +		       symdepth ? "" : "\n");  		/* we don't care about the value of the symbol */  		return (0);  	}  	for (symind = 0; symind < nsyms; ++symind) { -		if (strlcmp(symname[symind], str, cp-str) == 0) { -			debug("findsym %s %s", symname[symind], -			    value[symind] ? value[symind] : ""); +		if (matchsym(symname[symind], str) != NULL) { +			debugsym("findsym", symind);  			return (symind);  		}  	} @@ -1000,51 +1351,196 @@ findsym(const char *str)  }  /* + * Resolve indirect symbol values to their final definitions. + */ +static void +indirectsym(void) +{ +	const char *cp; +	int changed, sym, ind; + +	do { +		changed = 0; +		for (sym = 0; sym < nsyms; ++sym) { +			if (value[sym] == NULL) +				continue; +			cp = value[sym]; +			ind = findsym(&cp); +			if (ind == -1 || ind == sym || +			    *cp != '\0' || +			    value[ind] == NULL || +			    value[ind] == value[sym]) +				continue; +			debugsym("indir...", sym); +			value[sym] = value[ind]; +			debugsym("...ectsym", sym); +			changed++; +		} +	} while (changed); +} + +/* + * Add a symbol to the symbol table, specified with the format sym=val + */ +static void +addsym1(bool ignorethis, bool definethis, char *symval) +{ +	const char *sym, *val; + +	sym = symval; +	val = skipsym(sym); +	if (definethis && *val == '=') { +		symval[val - sym] = '\0'; +		val = val + 1; +	} else if (*val == '\0') { +		val = definethis ? "1" : NULL; +	} else { +		usage(); +	} +	addsym2(ignorethis, sym, val); +} + +/*   * Add a symbol to the symbol table.   */  static void -addsym(bool ignorethis, bool definethis, char *sym) +addsym2(bool ignorethis, const char *sym, const char *val)  { +	const char *cp = sym;  	int symind; -	char *val; -	symind = findsym(sym); +	symind = findsym(&cp);  	if (symind < 0) {  		if (nsyms >= MAXSYMS)  			errx(2, "too many symbols");  		symind = nsyms++;  	} -	symname[symind] = sym;  	ignore[symind] = ignorethis; -	val = sym + (skipsym(sym) - sym); -	if (definethis) { -		if (*val == '=') { -			value[symind] = val+1; -			*val = '\0'; -		} else if (*val == '\0') -			value[symind] = ""; -		else -			usage(); +	symname[symind] = sym; +	value[symind] = val; +	debugsym("addsym", symind); +} + +static void +debugsym(const char *why, int symind) +{ +	debug("%s %s%c%s", why, symname[symind], +	    value[symind] ? '=' : ' ', +	    value[symind] ? value[symind] : "undef"); +} + +/* + * Add symbols to the symbol table from a file containing + * #define and #undef preprocessor directives. + */ +static void +defundefile(const char *fn) +{ +	filename = fn; +	input = fopen(fn, "rb"); +	if (input == NULL) +		err(2, "can't open %s", fn); +	linenum = 0; +	while (defundef()) +		; +	if (ferror(input)) +		err(2, "can't read %s", filename); +	else +		fclose(input); +	if (incomment) +		error("EOF in comment"); +} + +/* + * Read and process one #define or #undef directive + */ +static bool +defundef(void) +{ +	const char *cp, *kw, *sym, *val, *end; +	Comment_state wascomment; + +	wascomment = incomment; +	cp = skiphash(); +	if (cp == NULL) +		return (false); +	if (*cp == '\0') +		goto done; +	/* strip trailing whitespace, and do a fairly rough check to +	   avoid unsupported multi-line preprocessor directives */ +	end = cp + strlen(cp); +	while (end > tline && strchr(" \t\n\r", end[-1]) != NULL) +		--end; +	if (end > tline && end[-1] == '\\') +		Eioccc(); + +	kw = cp; +	if ((cp = matchsym("define", kw)) != NULL) { +		sym = getsym(&cp); +		if (sym == NULL) +			error("missing macro name in #define"); +		if (*cp == '(') { +			val = "1"; +		} else { +			cp = skipcomment(cp); +			val = (cp < end) ? xstrdup(cp, end) : ""; +		} +		debug("#define"); +		addsym2(false, sym, val); +	} else if ((cp = matchsym("undef", kw)) != NULL) { +		sym = getsym(&cp); +		if (sym == NULL) +			error("missing macro name in #undef"); +		cp = skipcomment(cp); +		debug("#undef"); +		addsym2(false, sym, NULL);  	} else { -		if (*val != '\0') -			usage(); -		value[symind] = NULL; +		error("unrecognized preprocessor directive");  	} +	skipline(cp); +done: +	debug("parser line %d state %s comment %s line", linenum, +	    comment_name[incomment], linestate_name[linestate]); +	return (true);  }  /* - * Compare s with n characters of t. - * The same as strncmp() except that it checks that s[n] == '\0'. + * Concatenate two strings into new memory, checking for failure.   */ -static int -strlcmp(const char *s, const char *t, size_t n) +static char * +astrcat(const char *s1, const char *s2)  { -	while (n-- && *t != '\0') -		if (*s != *t) -			return ((unsigned char)*s - (unsigned char)*t); -		else -			++s, ++t; -	return ((unsigned char)*s); +	char *s; +	int len; +	size_t size; + +	len = snprintf(NULL, 0, "%s%s", s1, s2); +	if (len < 0) +		err(2, "snprintf"); +	size = (size_t)len + 1; +	s = (char *)malloc(size); +	if (s == NULL) +		err(2, "malloc"); +	snprintf(s, size, "%s%s", s1, s2); +	return (s); +} + +/* + * Duplicate a segment of a string, checking for failure. + */ +static const char * +xstrdup(const char *start, const char *end) +{ +	size_t n; +	char *s; + +	if (end < start) abort(); /* bug */ +	n = (size_t)(end - start) + 1; +	s = malloc(n); +	if (s == NULL) +		err(2, "malloc"); +	snprintf(s, n, "%s", start); +	return (s);  }  /* @@ -1070,5 +1566,6 @@ error(const char *msg)  	else  		warnx("%s: %d: %s (#if line %d depth %d)",  		    filename, linenum, msg, stifline[depth], depth); +	closeio();  	errx(2, "output may be truncated");  } diff --git a/extra/scripts/unifdef.h b/extra/scripts/unifdef.h new file mode 100644 index 000000000..d57a8660b --- /dev/null +++ b/extra/scripts/unifdef.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2012 - 2013 Tony Finch <dot@dotat.at> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *    notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *    notice, this list of conditions and the following disclaimer in the + *    documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/stat.h> + +#include <ctype.h> +#include <errno.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/* Avoid err.h since this are non-standard BSD extensions */ +#define vwarnx(fmt, args)   ({ fprintf(stderr, "unifdef: "); vfprintf(stderr, fmt, args); fprintf(stderr, "\n"); }) +#define warnx(fmt, args...) fprintf(stderr, "unifdef: " fmt "\n", ## args) +#define errx(exit_code, fmt, args...) ({ warnx(fmt, ## args); exit(exit_code); }) +#define err(exit_code, fmt, args...)  errx(exit_code, fmt ": %s", ## args, strerror(errno)) + +/* portability stubs */ + +#define fbinmode(fp) (fp) + +#define replace(old,new) rename(old,new) + +static FILE * +mktempmode(char *tmp, int mode) +{ +	int fd = mkstemp(tmp); +	if (fd < 0) return (NULL); +	fchmod(fd, mode & (S_IRWXU|S_IRWXG|S_IRWXO)); +	return (fdopen(fd, "wb")); +} diff --git a/extra/scripts/version.h b/extra/scripts/version.h new file mode 100644 index 000000000..ed3e7c0fd --- /dev/null +++ b/extra/scripts/version.h @@ -0,0 +1,2 @@ +"@(#) $Version: unifdef-2.9.5.55501a6 $\n" +"@(#) $Date: 2013-06-12 15:50:39 +0100 $\n" | 
