diff options
Diffstat (limited to 'libc/misc/regex')
| -rw-r--r-- | libc/misc/regex/Makefile | 2 | ||||
| -rw-r--r-- | libc/misc/regex/regex.c | 5725 | ||||
| -rw-r--r-- | libc/misc/regex/rx.c | 7273 | 
3 files changed, 5726 insertions, 7274 deletions
diff --git a/libc/misc/regex/Makefile b/libc/misc/regex/Makefile index c4c13f6cf..38b7e98bf 100644 --- a/libc/misc/regex/Makefile +++ b/libc/misc/regex/Makefile @@ -24,7 +24,7 @@ TOPDIR=../../  include $(TOPDIR)Rules.mak  LIBC=$(TOPDIR)libc.a -CSRC=rx.c +CSRC=regex.c  COBJS=$(patsubst %.c,%.o, $(CSRC))  OBJS=$(COBJS) diff --git a/libc/misc/regex/regex.c b/libc/misc/regex/regex.c new file mode 100644 index 000000000..64e754ee0 --- /dev/null +++ b/libc/misc/regex/regex.c @@ -0,0 +1,5725 @@ +/* Extended regular expression matching and search library, +   version 0.12. +   (Implements POSIX draft P1003.2/D11.2, except for some of the +   internationalization features.) +   Copyright (C) 1993-1999, 2000 Free Software Foundation, Inc. + +   The GNU C Library is free software; you can redistribute it and/or +   modify it under the terms of the GNU Library General Public License as +   published by the Free Software Foundation; either version 2 of the +   License, or (at your option) any later version. + +   The GNU C Library is distributed in the hope that it will be useful, +   but WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Library General Public License for more details. + +   You should have received a copy of the GNU Library General Public +   License along with the GNU C Library; see the file COPYING.LIB.  If not, +   write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +   Boston, MA 02111-1307, USA.  */ + +/* AIX requires this to be the first thing in the file. */ +#if defined _AIX && !defined REGEX_MALLOC +#pragma alloca +#endif + +#undef	_GNU_SOURCE +#define _GNU_SOURCE +#define STDC_HEADERS + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#ifndef PARAMS +# if defined __GNUC__ || (defined __STDC__ && __STDC__) +#  define PARAMS(args) args +# else +#  define PARAMS(args) () +# endif							/* GCC.  */ +#endif							/* Not PARAMS.  */ + +#if defined STDC_HEADERS && !defined emacs +# include <stddef.h> +#else +/* We need this for `regex.h', and perhaps for the Emacs include files.  */ +# include <sys/types.h> +#endif + +#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) + +/* For platform which support the ISO C amendement 1 functionality we +   support user defined character classes.  */ +#if defined _LIBC || WIDE_CHAR_SUPPORT +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */ +# include <wchar.h> +# include <wctype.h> +#endif + +#ifdef _LIBC +/* We have to keep the namespace clean.  */ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ +	__regerror(errcode, preg, errbuf, errbuf_size) +# define re_set_registers(bu, re, nu, st, en) \ +	__re_set_registers (bu, re, nu, st, en) +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ +	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) +# define re_match(bufp, string, size, pos, regs) \ +	__re_match (bufp, string, size, pos, regs) +# define re_search(bufp, string, size, startpos, range, regs) \ +	__re_search (bufp, string, size, startpos, range, regs) +# define re_compile_pattern(pattern, length, bufp) \ +	__re_compile_pattern (pattern, length, bufp) +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ +	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +#define btowc __btowc +#endif + +/* This is for other GNU distributions with internationalized messages.  */ +#if HAVE_LIBINTL_H || defined _LIBC +# include <libintl.h> +#else +# define gettext(msgid) (msgid) +#endif + +#ifndef gettext_noop +/* This define is so xgettext can find the internationalizable +   strings.  */ +# define gettext_noop(String) String +#endif + +/* The `emacs' switch turns on certain matching commands +   that make sense only in Emacs. */ +#ifdef emacs + +# include "lisp.h" +# include "buffer.h" +# include "syntax.h" + +#else							/* not emacs */ + +/* If we are not linking with Emacs proper, +   we can't use the relocating allocator +   even if config.h says that we can.  */ +# undef REL_ALLOC + +# if defined STDC_HEADERS || defined _LIBC +#  include <stdlib.h> +# else +char *malloc(); +char *realloc(); +# endif + +/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. +   If nothing else has been done, use the method below.  */ +# ifdef INHIBIT_STRING_HEADER +#  if !(defined HAVE_BZERO && defined HAVE_BCOPY) +#   if !defined bzero && !defined bcopy +#    undef INHIBIT_STRING_HEADER +#   endif +#  endif +# endif + +/* This is the normal way of making sure we have a bcopy and a bzero. +   This is used in most programs--a few other programs avoid this +   by defining INHIBIT_STRING_HEADER.  */ +# ifndef INHIBIT_STRING_HEADER +#  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC +#   include <string.h> +#   ifndef bzero +#    ifndef _LIBC +#     define bzero(s, n)	(memset (s, '\0', n), (s)) +#    else +#     define bzero(s, n)	__bzero (s, n) +#    endif +#   endif +#  else +#   include <strings.h> +#   ifndef memcmp +#    define memcmp(s1, s2, n)	bcmp (s1, s2, n) +#   endif +#   ifndef memcpy +#    define memcpy(d, s, n)	(bcopy (s, d, n), (d)) +#   endif +#  endif +# endif + +/* Define the syntax stuff for \<, \>, etc.  */ + +/* This must be nonzero for the wordchar and notwordchar pattern +   commands in re_match_2.  */ +# ifndef Sword +#  define Sword 1 +# endif + +# ifdef SWITCH_ENUM_BUG +#  define SWITCH_ENUM_CAST(x) ((int)(x)) +# else +#  define SWITCH_ENUM_CAST(x) (x) +# endif + +#endif							/* not emacs */ + +/* Get the interface, including the syntax bits.  */ +#include <regex.h> + +/* isalpha etc. are used for the character classes.  */ +#include <ctype.h> + +/* Jim Meyering writes: + +   "... Some ctype macros are valid only for character codes that +   isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when +   using /bin/cc or gcc but without giving an ansi option).  So, all +   ctype uses should be through macros like ISPRINT...  If +   STDC_HEADERS is defined, then autoconf has verified that the ctype +   macros don't need to be guarded with references to isascii. ... +   Defining isascii to 1 should let any compiler worth its salt +   eliminate the && through constant folding." +   Solaris defines some of these symbols so we must undefine them first.  */ + +#undef ISASCII +#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) +# define ISASCII(c) 1 +#else +# define ISASCII(c) isascii(c) +#endif + +#ifdef isblank +# define ISBLANK(c) (ISASCII (c) && isblank (c)) +#else +# define ISBLANK(c) ((c) == ' ' || (c) == '\t') +#endif +#ifdef isgraph +# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) +#else +# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) +#endif + +#undef ISPRINT +#define ISPRINT(c) (ISASCII (c) && isprint (c)) +#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) +#define ISALNUM(c) (ISASCII (c) && isalnum (c)) +#define ISALPHA(c) (ISASCII (c) && isalpha (c)) +#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) +#define ISLOWER(c) (ISASCII (c) && islower (c)) +#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) +#define ISSPACE(c) (ISASCII (c) && isspace (c)) +#define ISUPPER(c) (ISASCII (c) && isupper (c)) +#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) + +#ifdef _tolower +# define TOLOWER(c) _tolower(c) +#else +# define TOLOWER(c) tolower(c) +#endif + +#ifndef NULL +# define NULL (void *)0 +#endif + +/* We remove any previous definition of `SIGN_EXTEND_CHAR', +   since ours (we hope) works properly with all combinations of +   machines, compilers, `char' and `unsigned char' argument types. +   (Per Bothner suggested the basic approach.)  */ +#undef SIGN_EXTEND_CHAR +#if __STDC__ +# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) +#else							/* not __STDC__ */ +/* As in Harbison and Steele.  */ +# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) +#endif + +#ifndef emacs +/* How many characters in the character set.  */ +# define CHAR_SET_SIZE 256 + +# ifdef SYNTAX_TABLE + +extern char *re_syntax_table; + +# else							/* not SYNTAX_TABLE */ + +static char re_syntax_table[CHAR_SET_SIZE]; + +static void init_syntax_once() +{ +	register int c; +	static int done = 0; + +	if (done) +		return; +	bzero(re_syntax_table, sizeof re_syntax_table); + +	for (c = 0; c < CHAR_SET_SIZE; ++c) +		if (ISALNUM(c)) +			re_syntax_table[c] = Sword; + +	re_syntax_table['_'] = Sword; + +	done = 1; +} + +# endif							/* not SYNTAX_TABLE */ + +# define SYNTAX(c) re_syntax_table[((c) & 0xFF)] + +#endif							/* emacs */ + +/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we +   use `alloca' instead of `malloc'.  This is because using malloc in +   re_search* or re_match* could cause memory leaks when C-g is used in +   Emacs; also, malloc is slower and causes storage fragmentation.  On +   the other hand, malloc is more portable, and easier to debug. + +   Because we sometimes use alloca, some routines have to be macros, +   not functions -- `alloca'-allocated space disappears at the end of the +   function it is called in.  */ + +#ifdef REGEX_MALLOC + +# define REGEX_ALLOCATE malloc +# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) +# define REGEX_FREE free + +#else							/* not REGEX_MALLOC  */ + +/* Emacs already defines alloca, sometimes.  */ +# ifndef alloca + +/* Make alloca work the best possible way.  */ +#  ifdef __GNUC__ +#   define alloca __builtin_alloca +#  else							/* not __GNUC__ */ +#   if HAVE_ALLOCA_H +#    include <alloca.h> +#   endif						/* HAVE_ALLOCA_H */ +#  endif						/* not __GNUC__ */ + +# endif							/* not alloca */ + +# define REGEX_ALLOCATE alloca + +/* Assumes a `char *destination' variable.  */ +# define REGEX_REALLOCATE(source, osize, nsize)				\ +  (destination = (char *) alloca (nsize),				\ +   memcpy (destination, source, osize)) + +/* No need to do anything to free, after alloca.  */ +# define REGEX_FREE(arg) ((void)0)	/* Do nothing!  But inhibit gcc warning.  */ + +#endif							/* not REGEX_MALLOC */ + +/* Define how to allocate the failure stack.  */ + +#if defined REL_ALLOC && defined REGEX_MALLOC + +# define REGEX_ALLOCATE_STACK(size)				\ +  r_alloc (&failure_stack_ptr, (size)) +# define REGEX_REALLOCATE_STACK(source, osize, nsize)		\ +  r_re_alloc (&failure_stack_ptr, (nsize)) +# define REGEX_FREE_STACK(ptr)					\ +  r_alloc_free (&failure_stack_ptr) + +#else							/* not using relocating allocator */ + +# ifdef REGEX_MALLOC + +#  define REGEX_ALLOCATE_STACK malloc +#  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) +#  define REGEX_FREE_STACK free + +# else							/* not REGEX_MALLOC */ + +#  define REGEX_ALLOCATE_STACK alloca + +#  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\ +   REGEX_REALLOCATE (source, osize, nsize) +/* No need to explicitly free anything.  */ +#  define REGEX_FREE_STACK(arg) + +# endif							/* not REGEX_MALLOC */ +#endif							/* not using relocating allocator */ + + +/* True if `size1' is non-NULL and PTR is pointing anywhere inside +   `string1' or just past its end.  This works if PTR is NULL, which is +   a good thing.  */ +#define FIRST_STRING_P(ptr) 					\ +  (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) + +/* (Re)Allocate N items of type T using malloc, or fail.  */ +#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) +#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) +#define RETALLOC_IF(addr, n, t) \ +  if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) +#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) + +#define BYTEWIDTH 8				/* In bits.  */ + +#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) + +#undef MAX +#undef MIN +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +typedef char boolean; + +#define false 0 +#define true 1 + +static int re_match_2_internal PARAMS((struct re_pattern_buffer * bufp, +									   const char *string1, int size1, +									   const char *string2, int size2, +									   int pos, +									   struct re_registers * regs, + +									   int stop)); + +/* These are the command codes that appear in compiled regular +   expressions.  Some opcodes are followed by argument bytes.  A +   command code can specify any interpretation whatsoever for its +   arguments.  Zero bytes may appear in the compiled regular expression.  */ + +typedef enum { +	no_op = 0, + +	/* Succeed right away--no more backtracking.  */ +	succeed, + +	/* Followed by one byte giving n, then by n literal bytes.  */ +	exactn, + +	/* Matches any (more or less) character.  */ +	anychar, + +	/* Matches any one char belonging to specified set.  First +	   following byte is number of bitmap bytes.  Then come bytes +	   for a bitmap saying which chars are in.  Bits in each byte +	   are ordered low-bit-first.  A character is in the set if its +	   bit is 1.  A character too large to have a bit in the map is +	   automatically not in the set.  */ +	charset, + +	/* Same parameters as charset, but match any character that is +	   not one of those specified.  */ +	charset_not, + +	/* Start remembering the text that is matched, for storing in a +	   register.  Followed by one byte with the register number, in +	   the range 0 to one less than the pattern buffer's re_nsub +	   field.  Then followed by one byte with the number of groups +	   inner to this one.  (This last has to be part of the +	   start_memory only because we need it in the on_failure_jump +	   of re_match_2.)  */ +	start_memory, + +	/* Stop remembering the text that is matched and store it in a +	   memory register.  Followed by one byte with the register +	   number, in the range 0 to one less than `re_nsub' in the +	   pattern buffer, and one byte with the number of inner groups, +	   just like `start_memory'.  (We need the number of inner +	   groups here because we don't have any easy way of finding the +	   corresponding start_memory when we're at a stop_memory.)  */ +	stop_memory, + +	/* Match a duplicate of something remembered. Followed by one +	   byte containing the register number.  */ +	duplicate, + +	/* Fail unless at beginning of line.  */ +	begline, + +	/* Fail unless at end of line.  */ +	endline, + +	/* Succeeds if at beginning of buffer (if emacs) or at beginning +	   of string to be matched (if not).  */ +	begbuf, + +	/* Analogously, for end of buffer/string.  */ +	endbuf, + +	/* Followed by two byte relative address to which to jump.  */ +	jump, + +	/* Same as jump, but marks the end of an alternative.  */ +	jump_past_alt, + +	/* Followed by two-byte relative address of place to resume at +	   in case of failure.  */ +	on_failure_jump, + +	/* Like on_failure_jump, but pushes a placeholder instead of the +	   current string position when executed.  */ +	on_failure_keep_string_jump, + +	/* Throw away latest failure point and then jump to following +	   two-byte relative address.  */ +	pop_failure_jump, + +	/* Change to pop_failure_jump if know won't have to backtrack to +	   match; otherwise change to jump.  This is used to jump +	   back to the beginning of a repeat.  If what follows this jump +	   clearly won't match what the repeat does, such that we can be +	   sure that there is no use backtracking out of repetitions +	   already matched, then we change it to a pop_failure_jump. +	   Followed by two-byte address.  */ +	maybe_pop_jump, + +	/* Jump to following two-byte address, and push a dummy failure +	   point. This failure point will be thrown away if an attempt +	   is made to use it for a failure.  A `+' construct makes this +	   before the first repeat.  Also used as an intermediary kind +	   of jump when compiling an alternative.  */ +	dummy_failure_jump, + +	/* Push a dummy failure point and continue.  Used at the end of +	   alternatives.  */ +	push_dummy_failure, + +	/* Followed by two-byte relative address and two-byte number n. +	   After matching N times, jump to the address upon failure.  */ +	succeed_n, + +	/* Followed by two-byte relative address, and two-byte number n. +	   Jump to the address N times, then fail.  */ +	jump_n, + +	/* Set the following two-byte relative address to the +	   subsequent two-byte number.  The address *includes* the two +	   bytes of number.  */ +	set_number_at, + +	wordchar,					/* Matches any word-constituent character.  */ +	notwordchar,				/* Matches any char that is not a word-constituent.  */ + +	wordbeg,					/* Succeeds if at word beginning.  */ +	wordend,					/* Succeeds if at word end.  */ + +	wordbound,					/* Succeeds if at a word boundary.  */ +	notwordbound				/* Succeeds if not at a word boundary.  */ +#ifdef emacs +		, before_dot,			/* Succeeds if before point.  */ +	at_dot,						/* Succeeds if at point.  */ +	after_dot,					/* Succeeds if after point.  */ + +	/* Matches any character whose syntax is specified.  Followed by +	   a byte which contains a syntax code, e.g., Sword.  */ +	syntaxspec, + +	/* Matches any character whose syntax is not that specified.  */ +	notsyntaxspec +#endif							/* emacs */ +} re_opcode_t; + +/* Common operations on the compiled pattern.  */ + +/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */ + +#define STORE_NUMBER(destination, number)				\ +  do {									\ +    (destination)[0] = (number) & 0377;					\ +    (destination)[1] = (number) >> 8;					\ +  } while (0) + +/* Same as STORE_NUMBER, except increment DESTINATION to +   the byte after where the number is stored.  Therefore, DESTINATION +   must be an lvalue.  */ + +#define STORE_NUMBER_AND_INCR(destination, number)			\ +  do {									\ +    STORE_NUMBER (destination, number);					\ +    (destination) += 2;							\ +  } while (0) + +/* Put into DESTINATION a number stored in two contiguous bytes starting +   at SOURCE.  */ + +#define EXTRACT_NUMBER(destination, source)				\ +  do {									\ +    (destination) = *(source) & 0377;					\ +    (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\ +  } while (0) + +#ifdef DEBUG +static void extract_number _RE_ARGS((int *dest, unsigned char *source)); +static void extract_number(dest, source) +int *dest; +unsigned char *source; +{ +	int temp = SIGN_EXTEND_CHAR(*(source + 1)); + +	*dest = *source & 0377; +	*dest += temp << 8; +} + +# ifndef EXTRACT_MACROS			/* To debug the macros.  */ +#  undef EXTRACT_NUMBER +#  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) +# endif							/* not EXTRACT_MACROS */ + +#endif							/* DEBUG */ + +/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. +   SOURCE must be an lvalue.  */ + +#define EXTRACT_NUMBER_AND_INCR(destination, source)			\ +  do {									\ +    EXTRACT_NUMBER (destination, source);				\ +    (source) += 2; 							\ +  } while (0) + +#ifdef DEBUG +static void extract_number_and_incr _RE_ARGS((int *destination, +											  unsigned char **source)); +static void extract_number_and_incr(destination, source) +int *destination; +unsigned char **source; +{ +	extract_number(destination, *source); +	*source += 2; +} + +# ifndef EXTRACT_MACROS +#  undef EXTRACT_NUMBER_AND_INCR +#  define EXTRACT_NUMBER_AND_INCR(dest, src) \ +  extract_number_and_incr (&dest, &src) +# endif							/* not EXTRACT_MACROS */ + +#endif							/* DEBUG */ + +/* If DEBUG is defined, Regex prints many voluminous messages about what +   it is doing (if the variable `debug' is nonzero).  If linked with the +   main program in `iregex.c', you can enter patterns and strings +   interactively.  And if linked with the main program in `main.c' and +   the other test files, you can run the already-written tests.  */ + +#ifdef DEBUG + +/* We use standard I/O for debugging.  */ +# include <stdio.h> + +/* It is useful to test things that ``must'' be true when debugging.  */ +# include <assert.h> + +static int debug; + +# define DEBUG_STATEMENT(e) e +# define DEBUG_PRINT1(x) if (debug) printf (x) +# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) +# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) +# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) +# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\ +  if (debug) print_partial_compiled_pattern (s, e) +# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\ +  if (debug) print_double_string (w, s1, sz1, s2, sz2) + + +/* Print the fastmap in human-readable form.  */ + +void print_fastmap(fastmap) +char *fastmap; +{ +	unsigned was_a_range = 0; +	unsigned i = 0; + +	while (i < (1 << BYTEWIDTH)) { +		if (fastmap[i++]) { +			was_a_range = 0; +			putchar(i - 1); +			while (i < (1 << BYTEWIDTH) && fastmap[i]) { +				was_a_range = 1; +				i++; +			} +			if (was_a_range) { +				printf("-"); +				putchar(i - 1); +			} +		} +	} +	putchar('\n'); +} + + +/* Print a compiled pattern string in human-readable form, starting at +   the START pointer into it and ending just before the pointer END.  */ + +void print_partial_compiled_pattern(start, end) +unsigned char *start; +unsigned char *end; +{ +	int mcnt, mcnt2; +	unsigned char *p1; +	unsigned char *p = start; +	unsigned char *pend = end; + +	if (start == NULL) { +		printf("(null)\n"); +		return; +	} + +	/* Loop over pattern commands.  */ +	while (p < pend) { +		printf("%d:\t", p - start); + +		switch ((re_opcode_t) * p++) { +		case no_op: +			printf("/no_op"); +			break; + +		case exactn: +			mcnt = *p++; +			printf("/exactn/%d", mcnt); +			do { +				putchar('/'); +				putchar(*p++); +			} +			while (--mcnt); +			break; + +		case start_memory: +			mcnt = *p++; +			printf("/start_memory/%d/%d", mcnt, *p++); +			break; + +		case stop_memory: +			mcnt = *p++; +			printf("/stop_memory/%d/%d", mcnt, *p++); +			break; + +		case duplicate: +			printf("/duplicate/%d", *p++); +			break; + +		case anychar: +			printf("/anychar"); +			break; + +		case charset: +		case charset_not: +		{ +			register int c, last = -100; +			register int in_range = 0; + +			printf("/charset [%s", +				   (re_opcode_t) * (p - 1) == charset_not ? "^" : ""); + +			assert(p + *p < pend); + +			for (c = 0; c < 256; c++) +				if (c / 8 < *p && (p[1 + (c / 8)] & (1 << (c % 8)))) { +					/* Are we starting a range?  */ +					if (last + 1 == c && !in_range) { +						putchar('-'); +						in_range = 1; +					} +					/* Have we broken a range?  */ +					else if (last + 1 != c && in_range) { +						putchar(last); +						in_range = 0; +					} + +					if (!in_range) +						putchar(c); + +					last = c; +				} + +			if (in_range) +				putchar(last); + +			putchar(']'); + +			p += 1 + *p; +		} +			break; + +		case begline: +			printf("/begline"); +			break; + +		case endline: +			printf("/endline"); +			break; + +		case on_failure_jump: +			extract_number_and_incr(&mcnt, &p); +			printf("/on_failure_jump to %d", p + mcnt - start); +			break; + +		case on_failure_keep_string_jump: +			extract_number_and_incr(&mcnt, &p); +			printf("/on_failure_keep_string_jump to %d", p + mcnt - start); +			break; + +		case dummy_failure_jump: +			extract_number_and_incr(&mcnt, &p); +			printf("/dummy_failure_jump to %d", p + mcnt - start); +			break; + +		case push_dummy_failure: +			printf("/push_dummy_failure"); +			break; + +		case maybe_pop_jump: +			extract_number_and_incr(&mcnt, &p); +			printf("/maybe_pop_jump to %d", p + mcnt - start); +			break; + +		case pop_failure_jump: +			extract_number_and_incr(&mcnt, &p); +			printf("/pop_failure_jump to %d", p + mcnt - start); +			break; + +		case jump_past_alt: +			extract_number_and_incr(&mcnt, &p); +			printf("/jump_past_alt to %d", p + mcnt - start); +			break; + +		case jump: +			extract_number_and_incr(&mcnt, &p); +			printf("/jump to %d", p + mcnt - start); +			break; + +		case succeed_n: +			extract_number_and_incr(&mcnt, &p); +			p1 = p + mcnt; +			extract_number_and_incr(&mcnt2, &p); +			printf("/succeed_n to %d, %d times", p1 - start, mcnt2); +			break; + +		case jump_n: +			extract_number_and_incr(&mcnt, &p); +			p1 = p + mcnt; +			extract_number_and_incr(&mcnt2, &p); +			printf("/jump_n to %d, %d times", p1 - start, mcnt2); +			break; + +		case set_number_at: +			extract_number_and_incr(&mcnt, &p); +			p1 = p + mcnt; +			extract_number_and_incr(&mcnt2, &p); +			printf("/set_number_at location %d to %d", p1 - start, mcnt2); +			break; + +		case wordbound: +			printf("/wordbound"); +			break; + +		case notwordbound: +			printf("/notwordbound"); +			break; + +		case wordbeg: +			printf("/wordbeg"); +			break; + +		case wordend: +			printf("/wordend"); + +# ifdef emacs +		case before_dot: +			printf("/before_dot"); +			break; + +		case at_dot: +			printf("/at_dot"); +			break; + +		case after_dot: +			printf("/after_dot"); +			break; + +		case syntaxspec: +			printf("/syntaxspec"); +			mcnt = *p++; +			printf("/%d", mcnt); +			break; + +		case notsyntaxspec: +			printf("/notsyntaxspec"); +			mcnt = *p++; +			printf("/%d", mcnt); +			break; +# endif							/* emacs */ + +		case wordchar: +			printf("/wordchar"); +			break; + +		case notwordchar: +			printf("/notwordchar"); +			break; + +		case begbuf: +			printf("/begbuf"); +			break; + +		case endbuf: +			printf("/endbuf"); +			break; + +		default: +			printf("?%d", *(p - 1)); +		} + +		putchar('\n'); +	} + +	printf("%d:\tend of pattern.\n", p - start); +} + + +void print_compiled_pattern(bufp) +struct re_pattern_buffer *bufp; +{ +	unsigned char *buffer = bufp->buffer; + +	print_partial_compiled_pattern(buffer, buffer + bufp->used); +	printf("%ld bytes used/%ld bytes allocated.\n", +		   bufp->used, bufp->allocated); + +	if (bufp->fastmap_accurate && bufp->fastmap) { +		printf("fastmap: "); +		print_fastmap(bufp->fastmap); +	} + +	printf("re_nsub: %d\t", bufp->re_nsub); +	printf("regs_alloc: %d\t", bufp->regs_allocated); +	printf("can_be_null: %d\t", bufp->can_be_null); +	printf("newline_anchor: %d\n", bufp->newline_anchor); +	printf("no_sub: %d\t", bufp->no_sub); +	printf("not_bol: %d\t", bufp->not_bol); +	printf("not_eol: %d\t", bufp->not_eol); +	printf("syntax: %lx\n", bufp->syntax); +	/* Perhaps we should print the translate table?  */ +} + + +void print_double_string(where, string1, size1, string2, size2) +const char *where; +const char *string1; +const char *string2; +int size1; +int size2; +{ +	int this_char; + +	if (where == NULL) +		printf("(null)"); +	else { +		if (FIRST_STRING_P(where)) { +			for (this_char = where - string1; this_char < size1; +				 this_char++) +				putchar(string1[this_char]); + +			where = string2; +		} + +		for (this_char = where - string2; this_char < size2; this_char++) +			putchar(string2[this_char]); +	} +} + +void printchar(c) +int c; +{ +	putc(c, stderr); +} + +#else							/* not DEBUG */ + +# undef assert +# define assert(e) + +# define DEBUG_STATEMENT(e) +# define DEBUG_PRINT1(x) +# define DEBUG_PRINT2(x1, x2) +# define DEBUG_PRINT3(x1, x2, x3) +# define DEBUG_PRINT4(x1, x2, x3, x4) +# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) +# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) + +#endif							/* not DEBUG */ + +/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can +   also be assigned to arbitrarily: each pattern buffer stores its own +   syntax, so it can be changed between regex compilations.  */ +/* This has no initializer because initialized variables in Emacs +   become read-only after dumping.  */ +reg_syntax_t re_syntax_options; + + +/* Specify the precise syntax of regexps for compilation.  This provides +   for compatibility for various utilities which historically have +   different, incompatible syntaxes. + +   The argument SYNTAX is a bit mask comprised of the various bits +   defined in regex.h.  We return the old syntax.  */ + +reg_syntax_t re_set_syntax(syntax) +reg_syntax_t syntax; +{ +	reg_syntax_t ret = re_syntax_options; + +	re_syntax_options = syntax; +#ifdef DEBUG +	if (syntax & RE_DEBUG) +		debug = 1; +	else if (debug)				/* was on but now is not */ +		debug = 0; +#endif							/* DEBUG */ +	return ret; +} + +#ifdef _LIBC +weak_alias(__re_set_syntax, re_set_syntax) +#endif +/* This table gives an error message for each of the error codes listed +   in regex.h.  Obviously the order here has to be same as there. +   POSIX doesn't require that we do anything for REG_NOERROR, +   but why not be nice?  */ +static const char re_error_msgid[] = { +#define REG_NOERROR_IDX	0 +	gettext_noop("Success")		/* REG_NOERROR */ +		"\0" +#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") +		gettext_noop("No match")	/* REG_NOMATCH */ +		"\0" +#define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match") +		gettext_noop("Invalid regular expression")	/* REG_BADPAT */ +		"\0" +#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") +		gettext_noop("Invalid collation character")	/* REG_ECOLLATE */ +		"\0" +#define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character") +		gettext_noop("Invalid character class name")	/* REG_ECTYPE */ +		"\0" +#define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name") +		gettext_noop("Trailing backslash")	/* REG_EESCAPE */ +		"\0" +#define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash") +		gettext_noop("Invalid back reference")	/* REG_ESUBREG */ +		"\0" +#define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference") +		gettext_noop("Unmatched [ or [^")	/* REG_EBRACK */ +		"\0" +#define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^") +		gettext_noop("Unmatched ( or \\(")	/* REG_EPAREN */ +		"\0" +#define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") +		gettext_noop("Unmatched \\{")	/* REG_EBRACE */ +		"\0" +#define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{") +		gettext_noop("Invalid content of \\{\\}")	/* REG_BADBR */ +		"\0" +#define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") +		gettext_noop("Invalid range end")	/* REG_ERANGE */ +		"\0" +#define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end") +		gettext_noop("Memory exhausted")	/* REG_ESPACE */ +		"\0" +#define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted") +		gettext_noop("Invalid preceding regular expression")	/* REG_BADRPT */ +		"\0" +#define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") +		gettext_noop("Premature end of regular expression")	/* REG_EEND */ +		"\0" +#define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression") +		gettext_noop("Regular expression too big")	/* REG_ESIZE */ +		"\0" +#define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big") +		gettext_noop("Unmatched ) or \\)")	/* REG_ERPAREN */ +}; + +static const size_t re_error_msgid_idx[] = { +	REG_NOERROR_IDX, +	REG_NOMATCH_IDX, +	REG_BADPAT_IDX, +	REG_ECOLLATE_IDX, +	REG_ECTYPE_IDX, +	REG_EESCAPE_IDX, +	REG_ESUBREG_IDX, +	REG_EBRACK_IDX, +	REG_EPAREN_IDX, +	REG_EBRACE_IDX, +	REG_BADBR_IDX, +	REG_ERANGE_IDX, +	REG_ESPACE_IDX, +	REG_BADRPT_IDX, +	REG_EEND_IDX, +	REG_ESIZE_IDX, +	REG_ERPAREN_IDX +}; + +/* Avoiding alloca during matching, to placate r_alloc.  */ + +/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the +   searching and matching functions should not call alloca.  On some +   systems, alloca is implemented in terms of malloc, and if we're +   using the relocating allocator routines, then malloc could cause a +   relocation, which might (if the strings being searched are in the +   ralloc heap) shift the data out from underneath the regexp +   routines. + +   Here's another reason to avoid allocation: Emacs +   processes input from X in a signal handler; processing X input may +   call malloc; if input arrives while a matching routine is calling +   malloc, then we're scrod.  But Emacs can't just block input while +   calling matching routines; then we don't notice interrupts when +   they come in.  So, Emacs blocks input around all regexp calls +   except the matching calls, which it leaves unprotected, in the +   faith that they will not malloc.  */ + +/* Normally, this is fine.  */ +#define MATCH_MAY_ALLOCATE + +/* When using GNU C, we are not REALLY using the C alloca, no matter +   what config.h may say.  So don't take precautions for it.  */ +#ifdef __GNUC__ +# undef C_ALLOCA +#endif + +/* The match routines may not allocate if (1) they would do it with malloc +   and (2) it's not safe for them to use malloc. +   Note that if REL_ALLOC is defined, matching would not use malloc for the +   failure stack, but we would still use it for the register vectors; +   so REL_ALLOC should not affect this.  */ +#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs +# undef MATCH_MAY_ALLOCATE +#endif + + +/* Failure stack declarations and macros; both re_compile_fastmap and +   re_match_2 use a failure stack.  These have to be macros because of +   REGEX_ALLOCATE_STACK.  */ + + +/* Number of failure points for which to initially allocate space +   when matching.  If this number is exceeded, we allocate more +   space, so it is not a hard limit.  */ +#ifndef INIT_FAILURE_ALLOC +# define INIT_FAILURE_ALLOC 5 +#endif + +/* Roughly the maximum number of failure points on the stack.  Would be +   exactly that if always used MAX_FAILURE_ITEMS items each time we failed. +   This is a variable only so users of regex can assign to it; we never +   change it ourselves.  */ + +#ifdef INT_IS_16BIT + +# if defined MATCH_MAY_ALLOCATE +/* 4400 was enough to cause a crash on Alpha OSF/1, +   whose default stack limit is 2mb.  */ +long int re_max_failures = 4000; +# else +long int re_max_failures = 2000; +# endif + +union fail_stack_elt { +	unsigned char *pointer; +	long int integer; +}; + +typedef union fail_stack_elt fail_stack_elt_t; + +typedef struct { +	fail_stack_elt_t *stack; +	unsigned long int size; +	unsigned long int avail;	/* Offset of next open position.  */ +} fail_stack_type; + +#else							/* not INT_IS_16BIT */ + +# if defined MATCH_MAY_ALLOCATE +/* 4400 was enough to cause a crash on Alpha OSF/1, +   whose default stack limit is 2mb.  */ +int re_max_failures = 20000; +# else +int re_max_failures = 2000; +# endif + +union fail_stack_elt { +	unsigned char *pointer; +	int integer; +}; + +typedef union fail_stack_elt fail_stack_elt_t; + +typedef struct { +	fail_stack_elt_t *stack; +	unsigned size; +	unsigned avail;				/* Offset of next open position.  */ +} fail_stack_type; + +#endif							/* INT_IS_16BIT */ + +#define FAIL_STACK_EMPTY()     (fail_stack.avail == 0) +#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) +#define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size) + + +/* Define macros to initialize and free the failure stack. +   Do `return -2' if the alloc fails.  */ + +#ifdef MATCH_MAY_ALLOCATE +# define INIT_FAIL_STACK()						\ +  do {									\ +    fail_stack.stack = (fail_stack_elt_t *)				\ +      REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ +									\ +    if (fail_stack.stack == NULL)					\ +      return -2;							\ +									\ +    fail_stack.size = INIT_FAILURE_ALLOC;				\ +    fail_stack.avail = 0;						\ +  } while (0) + +# define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack) +#else +# define INIT_FAIL_STACK()						\ +  do {									\ +    fail_stack.avail = 0;						\ +  } while (0) + +# define RESET_FAIL_STACK() +#endif + + +/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. + +   Return 1 if succeeds, and 0 if either ran out of memory +   allocating space for it or it was already too large. + +   REGEX_REALLOCATE_STACK requires `destination' be declared.   */ + +#define DOUBLE_FAIL_STACK(fail_stack)					\ +  ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\ +   ? 0									\ +   : ((fail_stack).stack = (fail_stack_elt_t *)				\ +        REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\ +          (fail_stack).size * sizeof (fail_stack_elt_t),		\ +          ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\ +									\ +      (fail_stack).stack == NULL					\ +      ? 0								\ +      : ((fail_stack).size <<= 1, 					\ +         1))) + + +/* Push pointer POINTER on FAIL_STACK. +   Return 1 if was able to do so and 0 if ran out of memory allocating +   space to do so.  */ +#define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\ +  ((FAIL_STACK_FULL ()							\ +    && !DOUBLE_FAIL_STACK (FAIL_STACK))					\ +   ? 0									\ +   : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\ +      1)) + +/* Push a pointer value onto the failure stack. +   Assumes the variable `fail_stack'.  Probably should only +   be called from within `PUSH_FAILURE_POINT'.  */ +#define PUSH_FAILURE_POINTER(item)					\ +  fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) + +/* This pushes an integer-valued item onto the failure stack. +   Assumes the variable `fail_stack'.  Probably should only +   be called from within `PUSH_FAILURE_POINT'.  */ +#define PUSH_FAILURE_INT(item)					\ +  fail_stack.stack[fail_stack.avail++].integer = (item) + +/* Push a fail_stack_elt_t value onto the failure stack. +   Assumes the variable `fail_stack'.  Probably should only +   be called from within `PUSH_FAILURE_POINT'.  */ +#define PUSH_FAILURE_ELT(item)					\ +  fail_stack.stack[fail_stack.avail++] =  (item) + +/* These three POP... operations complement the three PUSH... operations. +   All assume that `fail_stack' is nonempty.  */ +#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer +#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer +#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] + +/* Used to omit pushing failure point id's when we're not debugging.  */ +#ifdef DEBUG +# define DEBUG_PUSH PUSH_FAILURE_INT +# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () +#else +# define DEBUG_PUSH(item) +# define DEBUG_POP(item_addr) +#endif + + +/* Push the information about the state we will need +   if we ever fail back to it. + +   Requires variables fail_stack, regstart, regend, reg_info, and +   num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination' +   be declared. + +   Does `return FAILURE_CODE' if runs out of memory.  */ + +#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\ +  do {									\ +    char *destination;							\ +    /* Must be int, so when we don't save any registers, the arithmetic	\ +       of 0 + -1 isn't done as unsigned.  */				\ +    /* Can't be int, since there is not a shred of a guarantee that int	\ +       is wide enough to hold a value of something to which pointer can	\ +       be assigned */							\ +    active_reg_t this_reg;						\ +    									\ +    DEBUG_STATEMENT (failure_id++);					\ +    DEBUG_STATEMENT (nfailure_points_pushed++);				\ +    DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\ +    DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\ +    DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\ +									\ +    DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\ +    DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\ +									\ +    /* Ensure we have enough space allocated for what we will push.  */	\ +    while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\ +      {									\ +        if (!DOUBLE_FAIL_STACK (fail_stack))				\ +          return failure_code;						\ +									\ +        DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\ +		       (fail_stack).size);				\ +        DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\ +      }									\ +									\ +    /* Push the info, starting with the registers.  */			\ +    DEBUG_PRINT1 ("\n");						\ +									\ +    if (1)								\ +      for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ +	   this_reg++)							\ +	{								\ +	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\ +	  DEBUG_STATEMENT (num_regs_pushed++);				\ +									\ +	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\ +	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\ +									\ +	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\ +	  PUSH_FAILURE_POINTER (regend[this_reg]);			\ +									\ +	  DEBUG_PRINT2 ("    info: %p\n      ",				\ +			reg_info[this_reg].word.pointer);		\ +	  DEBUG_PRINT2 (" match_null=%d",				\ +			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\ +	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\ +	  DEBUG_PRINT2 (" matched_something=%d",			\ +			MATCHED_SOMETHING (reg_info[this_reg]));	\ +	  DEBUG_PRINT2 (" ever_matched=%d",				\ +			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\ +	  DEBUG_PRINT1 ("\n");						\ +	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\ +	}								\ +									\ +    DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\ +    PUSH_FAILURE_INT (lowest_active_reg);				\ +									\ +    DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\ +    PUSH_FAILURE_INT (highest_active_reg);				\ +									\ +    DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\ +    DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\ +    PUSH_FAILURE_POINTER (pattern_place);				\ +									\ +    DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\ +    DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \ +				 size2);				\ +    DEBUG_PRINT1 ("'\n");						\ +    PUSH_FAILURE_POINTER (string_place);				\ +									\ +    DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\ +    DEBUG_PUSH (failure_id);						\ +  } while (0) + +/* This is the number of items that are pushed and popped on the stack +   for each register.  */ +#define NUM_REG_ITEMS  3 + +/* Individual items aside from the registers.  */ +#ifdef DEBUG +# define NUM_NONREG_ITEMS 5		/* Includes failure point id.  */ +#else +# define NUM_NONREG_ITEMS 4 +#endif + +/* We push at most this many items on the stack.  */ +/* We used to use (num_regs - 1), which is the number of registers +   this regexp will save; but that was changed to 5 +   to avoid stack overflow for a regexp with lots of parens.  */ +#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) + +/* We actually push this many items.  */ +#define NUM_FAILURE_ITEMS				\ +  (((0							\ +     ? 0 : highest_active_reg - lowest_active_reg + 1)	\ +    * NUM_REG_ITEMS)					\ +   + NUM_NONREG_ITEMS) + +/* How many items can still be added to the stack without overflowing it.  */ +#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) + + +/* Pops what PUSH_FAIL_STACK pushes. + +   We restore into the parameters, all of which should be lvalues: +     STR -- the saved data position. +     PAT -- the saved pattern position. +     LOW_REG, HIGH_REG -- the highest and lowest active registers. +     REGSTART, REGEND -- arrays of string positions. +     REG_INFO -- array of information about each subexpression. + +   Also assumes the variables `fail_stack' and (if debugging), `bufp', +   `pend', `string1', `size1', `string2', and `size2'.  */ + +#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ +{									\ +  DEBUG_STATEMENT (unsigned failure_id;)				\ +  active_reg_t this_reg;						\ +  const unsigned char *string_temp;					\ +									\ +  assert (!FAIL_STACK_EMPTY ());					\ +									\ +  /* Remove failure points and point to how many regs pushed.  */	\ +  DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\ +  DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\ +  DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\ +									\ +  assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\ +									\ +  DEBUG_POP (&failure_id);						\ +  DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\ +									\ +  /* If the saved string location is NULL, it came from an		\ +     on_failure_keep_string_jump opcode, and we want to throw away the	\ +     saved NULL, thus retaining our current position in the string.  */	\ +  string_temp = POP_FAILURE_POINTER ();					\ +  if (string_temp != NULL)						\ +    str = (const char *) string_temp;					\ +									\ +  DEBUG_PRINT2 ("  Popping string %p: `", str);				\ +  DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\ +  DEBUG_PRINT1 ("'\n");							\ +									\ +  pat = (unsigned char *) POP_FAILURE_POINTER ();			\ +  DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\ +  DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\ +									\ +  /* Restore register info.  */						\ +  high_reg = (active_reg_t) POP_FAILURE_INT ();				\ +  DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\ +									\ +  low_reg = (active_reg_t) POP_FAILURE_INT ();				\ +  DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\ +									\ +  if (1)								\ +    for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\ +      {									\ +	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\ +									\ +	reg_info[this_reg].word = POP_FAILURE_ELT ();			\ +	DEBUG_PRINT2 ("      info: %p\n",				\ +		      reg_info[this_reg].word.pointer);			\ +									\ +	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\ +	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\ +									\ +	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\ +	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\ +      }									\ +  else									\ +    {									\ +      for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ +	{								\ +	  reg_info[this_reg].word.integer = 0;				\ +	  regend[this_reg] = 0;						\ +	  regstart[this_reg] = 0;					\ +	}								\ +      highest_active_reg = high_reg;					\ +    }									\ +									\ +  set_regs_matched_done = 0;						\ +  DEBUG_STATEMENT (nfailure_points_popped++);				\ +}								/* POP_FAILURE_POINT */ + + + +/* Structure for per-register (a.k.a. per-group) information. +   Other register information, such as the +   starting and ending positions (which are addresses), and the list of +   inner groups (which is a bits list) are maintained in separate +   variables. + +   We are making a (strictly speaking) nonportable assumption here: that +   the compiler will pack our bit fields into something that fits into +   the type of `word', i.e., is something that fits into one item on the +   failure stack.  */ + + +/* Declarations and macros for re_match_2.  */ + +typedef union { +	fail_stack_elt_t word; +	struct { +		/* This field is one if this group can match the empty string, +		   zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */ +#define MATCH_NULL_UNSET_VALUE 3 +		unsigned match_null_string_p:2; +		unsigned is_active:1; +		unsigned matched_something:1; +		unsigned ever_matched_something:1; +	} bits; +} register_info_type; + +#define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p) +#define IS_ACTIVE(R)  ((R).bits.is_active) +#define MATCHED_SOMETHING(R)  ((R).bits.matched_something) +#define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something) + + +/* Call this when have matched a real character; it sets `matched' flags +   for the subexpressions which we are currently inside.  Also records +   that those subexprs have matched.  */ +#define SET_REGS_MATCHED()						\ +  do									\ +    {									\ +      if (!set_regs_matched_done)					\ +	{								\ +	  active_reg_t r;						\ +	  set_regs_matched_done = 1;					\ +	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\ +	    {								\ +	      MATCHED_SOMETHING (reg_info[r])				\ +		= EVER_MATCHED_SOMETHING (reg_info[r])			\ +		= 1;							\ +	    }								\ +	}								\ +    }									\ +  while (0) + +/* Registers are set to a sentinel when they haven't yet matched.  */ +static char reg_unset_dummy; + +#define REG_UNSET_VALUE (®_unset_dummy) +#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) + +/* Subroutine declarations and macros for regex_compile.  */ + +static reg_errcode_t regex_compile +_RE_ARGS( +		 (const char *pattern, size_t size, reg_syntax_t syntax, +		  struct re_pattern_buffer * bufp)); +static void store_op1 + +_RE_ARGS((re_opcode_t op, unsigned char *loc, int arg)); +static void store_op2 +_RE_ARGS((re_opcode_t op, unsigned char *loc, int arg1, int arg2)); +static void insert_op1 +_RE_ARGS( + +		 (re_opcode_t op, unsigned char *loc, int arg, +		  unsigned char *end)); +static void insert_op2 +_RE_ARGS( +		 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, + +		  unsigned char *end)); +static boolean at_begline_loc_p +_RE_ARGS((const char *pattern, const char *p, reg_syntax_t syntax)); +static boolean at_endline_loc_p +_RE_ARGS((const char *p, const char *pend, reg_syntax_t syntax)); +static reg_errcode_t compile_range +_RE_ARGS( +		 (const char **p_ptr, const char *pend, char *translate, +		  reg_syntax_t syntax, unsigned char *b)); + +/* Fetch the next character in the uncompiled pattern---translating it +   if necessary.  Also cast from a signed character in the constant +   string passed to us by the user to an unsigned char that we can use +   as an array index (in, e.g., `translate').  */ +#ifndef PATFETCH +# define PATFETCH(c)							\ +  do {if (p == pend) return REG_EEND;					\ +    c = (unsigned char) *p++;						\ +    if (translate) c = (unsigned char) translate[c];			\ +  } while (0) +#endif + +/* Fetch the next character in the uncompiled pattern, with no +   translation.  */ +#define PATFETCH_RAW(c)							\ +  do {if (p == pend) return REG_EEND;					\ +    c = (unsigned char) *p++; 						\ +  } while (0) + +/* Go backwards one character in the pattern.  */ +#define PATUNFETCH p-- + + +/* If `translate' is non-null, return translate[D], else just D.  We +   cast the subscript to translate because some data is declared as +   `char *', to avoid warnings when a string constant is passed.  But +   when we use a character as a subscript we must make it unsigned.  */ +#ifndef TRANSLATE +# define TRANSLATE(d) \ +  (translate ? (char) translate[(unsigned char) (d)] : (d)) +#endif + + +/* Macros for outputting the compiled pattern into `buffer'.  */ + +/* If the buffer isn't allocated when it comes in, use this.  */ +#define INIT_BUF_SIZE  32 + +/* Make sure we have at least N more bytes of space in buffer.  */ +#define GET_BUFFER_SPACE(n)						\ +    while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\ +      EXTEND_BUFFER () + +/* Make sure we have one more byte of buffer space and then add C to it.  */ +#define BUF_PUSH(c)							\ +  do {									\ +    GET_BUFFER_SPACE (1);						\ +    *b++ = (unsigned char) (c);						\ +  } while (0) + + +/* Ensure we have two more bytes of buffer space and then append C1 and C2.  */ +#define BUF_PUSH_2(c1, c2)						\ +  do {									\ +    GET_BUFFER_SPACE (2);						\ +    *b++ = (unsigned char) (c1);					\ +    *b++ = (unsigned char) (c2);					\ +  } while (0) + + +/* As with BUF_PUSH_2, except for three bytes.  */ +#define BUF_PUSH_3(c1, c2, c3)						\ +  do {									\ +    GET_BUFFER_SPACE (3);						\ +    *b++ = (unsigned char) (c1);					\ +    *b++ = (unsigned char) (c2);					\ +    *b++ = (unsigned char) (c3);					\ +  } while (0) + + +/* Store a jump with opcode OP at LOC to location TO.  We store a +   relative address offset by the three bytes the jump itself occupies.  */ +#define STORE_JUMP(op, loc, to) \ +  store_op1 (op, loc, (int) ((to) - (loc) - 3)) + +/* Likewise, for a two-argument jump.  */ +#define STORE_JUMP2(op, loc, to, arg) \ +  store_op2 (op, loc, (int) ((to) - (loc) - 3), arg) + +/* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */ +#define INSERT_JUMP(op, loc, to) \ +  insert_op1 (op, loc, (int) ((to) - (loc) - 3), b) + +/* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */ +#define INSERT_JUMP2(op, loc, to, arg) \ +  insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b) + + +/* This is not an arbitrary limit: the arguments which represent offsets +   into the pattern are two bytes long.  So if 2^16 bytes turns out to +   be too small, many things would have to change.  */ +/* Any other compiler which, like MSC, has allocation limit below 2^16 +   bytes will have to use approach similar to what was done below for +   MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up +   reallocating to 0 bytes.  Such thing is not going to work too well. +   You have been warned!!  */ +#if defined _MSC_VER  && !defined WIN32 +/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. +   The REALLOC define eliminates a flurry of conversion warnings, +   but is not required. */ +# define MAX_BUF_SIZE  65500L +# define REALLOC(p,s) realloc ((p), (size_t) (s)) +#else +# define MAX_BUF_SIZE (1L << 16) +# define REALLOC(p,s) realloc ((p), (s)) +#endif + +/* Extend the buffer by twice its current size via realloc and +   reset the pointers that pointed into the old block to point to the +   correct places in the new one.  If extending the buffer results in it +   being larger than MAX_BUF_SIZE, then flag memory exhausted.  */ +#define EXTEND_BUFFER()							\ +  do { 									\ +    unsigned char *old_buffer = bufp->buffer;				\ +    if (bufp->allocated == MAX_BUF_SIZE) 				\ +      return REG_ESIZE;							\ +    bufp->allocated <<= 1;						\ +    if (bufp->allocated > MAX_BUF_SIZE)					\ +      bufp->allocated = MAX_BUF_SIZE; 					\ +    bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\ +    if (bufp->buffer == NULL)						\ +      return REG_ESPACE;						\ +    /* If the buffer moved, move all the pointers into it.  */		\ +    if (old_buffer != bufp->buffer)					\ +      {									\ +        b = (b - old_buffer) + bufp->buffer;				\ +        begalt = (begalt - old_buffer) + bufp->buffer;			\ +        if (fixup_alt_jump)						\ +          fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ +        if (laststart)							\ +          laststart = (laststart - old_buffer) + bufp->buffer;		\ +        if (pending_exact)						\ +          pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\ +      }									\ +  } while (0) + + +/* Since we have one byte reserved for the register number argument to +   {start,stop}_memory, the maximum number of groups we can report +   things about is what fits in that byte.  */ +#define MAX_REGNUM 255 + +/* But patterns can have more than `MAX_REGNUM' registers.  We just +   ignore the excess.  */ +typedef unsigned regnum_t; + + +/* Macros for the compile stack.  */ + +/* Since offsets can go either forwards or backwards, this type needs to +   be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */ +/* int may be not enough when sizeof(int) == 2.  */ +typedef long pattern_offset_t; + +typedef struct { +	pattern_offset_t begalt_offset; +	pattern_offset_t fixup_alt_jump; +	pattern_offset_t inner_group_offset; +	pattern_offset_t laststart_offset; +	regnum_t regnum; +} compile_stack_elt_t; + + +typedef struct { +	compile_stack_elt_t *stack; +	unsigned size; +	unsigned avail;				/* Offset of next open position.  */ +} compile_stack_type; + + +#define INIT_COMPILE_STACK_SIZE 32 + +#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0) +#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size) + +/* The next available element.  */ +#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) + + +/* Set the bit for character C in a list.  */ +#define SET_LIST_BIT(c)                               \ +  (b[((unsigned char) (c)) / BYTEWIDTH]               \ +   |= 1 << (((unsigned char) c) % BYTEWIDTH)) + + +/* Get the next unsigned number in the uncompiled pattern.  */ +#define GET_UNSIGNED_NUMBER(num) 					\ +  { if (p != pend)							\ +     {									\ +       PATFETCH (c); 							\ +       while ('0' <= c && c <= '9')					\ +         { 								\ +           if (num < 0)							\ +              num = 0;							\ +           num = num * 10 + c - '0'; 					\ +           if (p == pend) 						\ +              break; 							\ +           PATFETCH (c);						\ +         } 								\ +       } 								\ +    } + +#if defined _LIBC || WIDE_CHAR_SUPPORT +/* The GNU C library provides support for user-defined character classes +   and the functions from ISO C amendement 1.  */ +# ifdef CHARCLASS_NAME_MAX +#  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX +# else +/* This shouldn't happen but some implementation might still have this +   problem.  Use a reasonable default value.  */ +#  define CHAR_CLASS_MAX_LENGTH 256 +# endif + +# ifdef _LIBC +#  define IS_CHAR_CLASS(string) __wctype (string) +# else +#  define IS_CHAR_CLASS(string) wctype (string) +# endif +#else +# define CHAR_CLASS_MAX_LENGTH  6	/* Namely, `xdigit'.  */ + +# define IS_CHAR_CLASS(string)						\ +   (STREQ (string, "alpha") || STREQ (string, "upper")			\ +    || STREQ (string, "lower") || STREQ (string, "digit")		\ +    || STREQ (string, "alnum") || STREQ (string, "xdigit")		\ +    || STREQ (string, "space") || STREQ (string, "print")		\ +    || STREQ (string, "punct") || STREQ (string, "graph")		\ +    || STREQ (string, "cntrl") || STREQ (string, "blank")) +#endif + +#ifndef MATCH_MAY_ALLOCATE + +/* If we cannot allocate large objects within re_match_2_internal, +   we make the fail stack and register vectors global. +   The fail stack, we grow to the maximum size when a regexp +   is compiled. +   The register vectors, we adjust in size each time we +   compile a regexp, according to the number of registers it needs.  */ + +static fail_stack_type fail_stack; + +/* Size with which the following vectors are currently allocated. +   That is so we can make them bigger as needed, +   but never make them smaller.  */ +static int regs_allocated_size; + +static const char **regstart, **regend; +static const char **old_regstart, **old_regend; +static const char **best_regstart, **best_regend; +static register_info_type *reg_info; +static const char **reg_dummy; +static register_info_type *reg_info_dummy; + +/* Make the register vectors big enough for NUM_REGS registers, +   but don't make them smaller.  */ + +static regex_grow_registers(num_regs) +int num_regs; +{ +	if (num_regs > regs_allocated_size) { +		RETALLOC_IF(regstart, num_regs, const char *); +		RETALLOC_IF(regend, num_regs, const char *); +		RETALLOC_IF(old_regstart, num_regs, const char *); +		RETALLOC_IF(old_regend, num_regs, const char *); +		RETALLOC_IF(best_regstart, num_regs, const char *); +		RETALLOC_IF(best_regend, num_regs, const char *); + +		RETALLOC_IF(reg_info, num_regs, register_info_type); +		RETALLOC_IF(reg_dummy, num_regs, const char *); + +		RETALLOC_IF(reg_info_dummy, num_regs, register_info_type); + +		regs_allocated_size = num_regs; +	} +} + +#endif							/* not MATCH_MAY_ALLOCATE */ + +static boolean group_in_compile_stack _RE_ARGS((compile_stack_type +												compile_stack, + +												regnum_t regnum)); + +/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. +   Returns one of error codes defined in `regex.h', or zero for success. + +   Assumes the `allocated' (and perhaps `buffer') and `translate' +   fields are set in BUFP on entry. + +   If it succeeds, results are put in BUFP (if it returns an error, the +   contents of BUFP are undefined): +     `buffer' is the compiled pattern; +     `syntax' is set to SYNTAX; +     `used' is set to the length of the compiled pattern; +     `fastmap_accurate' is zero; +     `re_nsub' is the number of subexpressions in PATTERN; +     `not_bol' and `not_eol' are zero; + +   The `fastmap' and `newline_anchor' fields are neither +   examined nor set.  */ + +/* Return, freeing storage we allocated.  */ +#define FREE_STACK_RETURN(value)		\ +  return (free (compile_stack.stack), value) + +static reg_errcode_t regex_compile(pattern, size, syntax, bufp) +const char *pattern; +size_t size; +reg_syntax_t syntax; +struct re_pattern_buffer *bufp; +{ +	/* We fetch characters from PATTERN here.  Even though PATTERN is +	   `char *' (i.e., signed), we declare these variables as unsigned, so +	   they can be reliably used as array indices.  */ +	register unsigned char c, c1; + +	/* A random temporary spot in PATTERN.  */ +	const char *p1; + +	/* Points to the end of the buffer, where we should append.  */ +	register unsigned char *b; + +	/* Keeps track of unclosed groups.  */ +	compile_stack_type compile_stack; + +	/* Points to the current (ending) position in the pattern.  */ +	const char *p = pattern; +	const char *pend = pattern + size; + +	/* How to translate the characters in the pattern.  */ +	RE_TRANSLATE_TYPE translate = bufp->translate; + +	/* Address of the count-byte of the most recently inserted `exactn' +	   command.  This makes it possible to tell if a new exact-match +	   character can be added to that command or if the character requires +	   a new `exactn' command.  */ +	unsigned char *pending_exact = 0; + +	/* Address of start of the most recently finished expression. +	   This tells, e.g., postfix * where to find the start of its +	   operand.  Reset at the beginning of groups and alternatives.  */ +	unsigned char *laststart = 0; + +	/* Address of beginning of regexp, or inside of last group.  */ +	unsigned char *begalt; + +	/* Place in the uncompiled pattern (i.e., the {) to +	   which to go back if the interval is invalid.  */ +	const char *beg_interval; + +	/* Address of the place where a forward jump should go to the end of +	   the containing expression.  Each alternative of an `or' -- except the +	   last -- ends with a forward jump of this sort.  */ +	unsigned char *fixup_alt_jump = 0; + +	/* Counts open-groups as they are encountered.  Remembered for the +	   matching close-group on the compile stack, so the same register +	   number is put in the stop_memory as the start_memory.  */ +	regnum_t regnum = 0; + +#ifdef DEBUG +	DEBUG_PRINT1("\nCompiling pattern: "); +	if (debug) { +		unsigned debug_count; + +		for (debug_count = 0; debug_count < size; debug_count++) +			putchar(pattern[debug_count]); +		putchar('\n'); +	} +#endif							/* DEBUG */ + +	/* Initialize the compile stack.  */ +	compile_stack.stack = +		TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); +	if (compile_stack.stack == NULL) +		return REG_ESPACE; + +	compile_stack.size = INIT_COMPILE_STACK_SIZE; +	compile_stack.avail = 0; + +	/* Initialize the pattern buffer.  */ +	bufp->syntax = syntax; +	bufp->fastmap_accurate = 0; +	bufp->not_bol = bufp->not_eol = 0; + +	/* Set `used' to zero, so that if we return an error, the pattern +	   printer (for debugging) will think there's no pattern.  We reset it +	   at the end.  */ +	bufp->used = 0; + +	/* Always count groups, whether or not bufp->no_sub is set.  */ +	bufp->re_nsub = 0; + +#if !defined emacs && !defined SYNTAX_TABLE +	/* Initialize the syntax table.  */ +	init_syntax_once(); +#endif + +	if (bufp->allocated == 0) { +		if (bufp->buffer) {		/* If zero allocated, but buffer is non-null, try to realloc +								   enough space.  This loses if buffer's address is bogus, but +								   that is the user's responsibility.  */ +			RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char); +		} else {				/* Caller did not allocate a buffer.  Do it for them.  */ +			bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char); +		} +		if (!bufp->buffer) +			FREE_STACK_RETURN(REG_ESPACE); + +		bufp->allocated = INIT_BUF_SIZE; +	} + +	begalt = b = bufp->buffer; + +	/* Loop through the uncompiled pattern until we're at the end.  */ +	while (p != pend) { +		PATFETCH(c); + +		switch (c) { +		case '^': +		{ +			if (				/* If at start of pattern, it's an operator.  */ +				   p == pattern + 1 +				   /* If context independent, it's an operator.  */ +				   || syntax & RE_CONTEXT_INDEP_ANCHORS +				   /* Otherwise, depends on what's come before.  */ +				   || at_begline_loc_p(pattern, p, syntax)) +				BUF_PUSH(begline); +			else +				goto normal_char; +		} +			break; + + +		case '$': +		{ +			if (				/* If at end of pattern, it's an operator.  */ +				   p == pend +				   /* If context independent, it's an operator.  */ +				   || syntax & RE_CONTEXT_INDEP_ANCHORS +				   /* Otherwise, depends on what's next.  */ +				   || at_endline_loc_p(p, pend, syntax)) +				BUF_PUSH(endline); +			else +				goto normal_char; +		} +			break; + + +		case '+': +		case '?': +			if ((syntax & RE_BK_PLUS_QM) +				|| (syntax & RE_LIMITED_OPS)) +				goto normal_char; +		  handle_plus: +		case '*': +			/* If there is no previous pattern... */ +			if (!laststart) { +				if (syntax & RE_CONTEXT_INVALID_OPS) +					FREE_STACK_RETURN(REG_BADRPT); +				else if (!(syntax & RE_CONTEXT_INDEP_OPS)) +					goto normal_char; +			} + +			{ +				/* Are we optimizing this jump?  */ +				boolean keep_string_p = false; + +				/* 1 means zero (many) matches is allowed.  */ +				char zero_times_ok = 0, many_times_ok = 0; + +				/* If there is a sequence of repetition chars, collapse it +				   down to just one (the right one).  We can't combine +				   interval operators with these because of, e.g., `a{2}*', +				   which should only match an even number of `a's.  */ + +				for (;;) { +					zero_times_ok |= c != '+'; +					many_times_ok |= c != '?'; + +					if (p == pend) +						break; + +					PATFETCH(c); + +					if (c == '*' +						|| (!(syntax & RE_BK_PLUS_QM) +							&& (c == '+' || c == '?'))); + +					else if (syntax & RE_BK_PLUS_QM && c == '\\') { +						if (p == pend) +							FREE_STACK_RETURN(REG_EESCAPE); + +						PATFETCH(c1); +						if (!(c1 == '+' || c1 == '?')) { +							PATUNFETCH; +							PATUNFETCH; +							break; +						} + +						c = c1; +					} else { +						PATUNFETCH; +						break; +					} + +					/* If we get here, we found another repeat character.  */ +				} + +				/* Star, etc. applied to an empty pattern is equivalent +				   to an empty pattern.  */ +				if (!laststart) +					break; + +				/* Now we know whether or not zero matches is allowed +				   and also whether or not two or more matches is allowed.  */ +				if (many_times_ok) {	/* More than one repetition is allowed, so put in at the +										   end a backward relative jump from `b' to before the next +										   jump we're going to put in below (which jumps from +										   laststart to after this jump). + +										   But if we are at the `*' in the exact sequence `.*\n', +										   insert an unconditional jump backwards to the ., +										   instead of the beginning of the loop.  This way we only +										   push a failure point once, instead of every time +										   through the loop.  */ +					assert(p - 1 > pattern); + +					/* Allocate the space for the jump.  */ +					GET_BUFFER_SPACE(3); + +					/* We know we are not at the first character of the pattern, +					   because laststart was nonzero.  And we've already +					   incremented `p', by the way, to be the character after +					   the `*'.  Do we have to do something analogous here +					   for null bytes, because of RE_DOT_NOT_NULL?  */ +					if (TRANSLATE(*(p - 2)) == TRANSLATE('.') +						&& zero_times_ok +						&& p < pend && TRANSLATE(*p) == TRANSLATE('\n') +						&& !(syntax & RE_DOT_NEWLINE)) {	/* We have .*\n.  */ +						STORE_JUMP(jump, b, laststart); +						keep_string_p = true; +					} else +						/* Anything else.  */ +						STORE_JUMP(maybe_pop_jump, b, laststart - 3); + +					/* We've added more stuff to the buffer.  */ +					b += 3; +				} + +				/* On failure, jump from laststart to b + 3, which will be the +				   end of the buffer after this jump is inserted.  */ +				GET_BUFFER_SPACE(3); +				INSERT_JUMP(keep_string_p ? on_failure_keep_string_jump +							: on_failure_jump, laststart, b + 3); +				pending_exact = 0; +				b += 3; + +				if (!zero_times_ok) { +					/* At least one repetition is required, so insert a +					   `dummy_failure_jump' before the initial +					   `on_failure_jump' instruction of the loop. This +					   effects a skip over that instruction the first time +					   we hit that loop.  */ +					GET_BUFFER_SPACE(3); +					INSERT_JUMP(dummy_failure_jump, laststart, +								laststart + 6); +					b += 3; +				} +			} +			break; + + +		case '.': +			laststart = b; +			BUF_PUSH(anychar); +			break; + + +		case '[': +		{ +			boolean had_char_class = false; + +			if (p == pend) +				FREE_STACK_RETURN(REG_EBRACK); + +			/* Ensure that we have enough space to push a charset: the +			   opcode, the length count, and the bitset; 34 bytes in all.  */ +			GET_BUFFER_SPACE(34); + +			laststart = b; + +			/* We test `*p == '^' twice, instead of using an if +			   statement, so we only need one BUF_PUSH.  */ +			BUF_PUSH(*p == '^' ? charset_not : charset); +			if (*p == '^') +				p++; + +			/* Remember the first position in the bracket expression.  */ +			p1 = p; + +			/* Push the number of bytes in the bitmap.  */ +			BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH); + +			/* Clear the whole map.  */ +			bzero(b, (1 << BYTEWIDTH) / BYTEWIDTH); + +			/* charset_not matches newline according to a syntax bit.  */ +			if ((re_opcode_t) b[-2] == charset_not +				&& (syntax & RE_HAT_LISTS_NOT_NEWLINE)) SET_LIST_BIT('\n'); + +			/* Read in characters and ranges, setting map bits.  */ +			for (;;) { +				if (p == pend) +					FREE_STACK_RETURN(REG_EBRACK); + +				PATFETCH(c); + +				/* \ might escape characters inside [...] and [^...].  */ +				if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') { +					if (p == pend) +						FREE_STACK_RETURN(REG_EESCAPE); + +					PATFETCH(c1); +					SET_LIST_BIT(c1); +					continue; +				} + +				/* Could be the end of the bracket expression.  If it's +				   not (i.e., when the bracket expression is `[]' so +				   far), the ']' character bit gets set way below.  */ +				if (c == ']' && p != p1 + 1) +					break; + +				/* Look ahead to see if it's a range when the last thing +				   was a character class.  */ +				if (had_char_class && c == '-' && *p != ']') +					FREE_STACK_RETURN(REG_ERANGE); + +				/* Look ahead to see if it's a range when the last thing +				   was a character: if this is a hyphen not at the +				   beginning or the end of a list, then it's the range +				   operator.  */ +				if (c == '-' && !(p - 2 >= pattern && p[-2] == '[') +					&& !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') +					&& *p != ']') { +					reg_errcode_t ret +						= compile_range(&p, pend, translate, syntax, b); + +					if (ret != REG_NOERROR) +						FREE_STACK_RETURN(ret); +				} + +				else if (p[0] == '-' && p[1] != ']') {	/* This handles ranges made up of characters only.  */ +					reg_errcode_t ret; + +					/* Move past the `-'.  */ +					PATFETCH(c1); + +					ret = compile_range(&p, pend, translate, syntax, b); +					if (ret != REG_NOERROR) +						FREE_STACK_RETURN(ret); +				} + +				/* See if we're at the beginning of a possible character +				   class.  */ + +				else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') {	/* Leave room for the null.  */ +					char str[CHAR_CLASS_MAX_LENGTH + 1]; + +					PATFETCH(c); +					c1 = 0; + +					/* If pattern is `[[:'.  */ +					if (p == pend) +						FREE_STACK_RETURN(REG_EBRACK); + +					for (;;) { +						PATFETCH(c); +						if ((c == ':' && *p == ']') || p == pend) +							break; +						if (c1 < CHAR_CLASS_MAX_LENGTH) +							str[c1++] = c; +						else +							/* This is in any case an invalid class name.  */ +							str[0] = '\0'; +					} +					str[c1] = '\0'; + +					/* If isn't a word bracketed by `[:' and `:]': +					   undo the ending character, the letters, and leave +					   the leading `:' and `[' (but set bits for them).  */ +					if (c == ':' && *p == ']') { +#if defined _LIBC || WIDE_CHAR_SUPPORT +						boolean is_lower = STREQ(str, "lower"); +						boolean is_upper = STREQ(str, "upper"); +						wctype_t wt; +						int ch; + +						wt = IS_CHAR_CLASS(str); +						if (wt == 0) +							FREE_STACK_RETURN(REG_ECTYPE); + +						/* Throw away the ] at the end of the character +						   class.  */ +						PATFETCH(c); + +						if (p == pend) +							FREE_STACK_RETURN(REG_EBRACK); + +						for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) { +# ifdef _LIBC +							if (__iswctype(__btowc(ch), wt)) +								SET_LIST_BIT(ch); +# else +							if (iswctype(btowc(ch), wt)) +								SET_LIST_BIT(ch); +# endif + +							if (translate && (is_upper || is_lower) +								&& (ISUPPER(ch) || ISLOWER(ch))) +								SET_LIST_BIT(ch); +						} + +						had_char_class = true; +#else +						int ch; +						boolean is_alnum = STREQ(str, "alnum"); +						boolean is_alpha = STREQ(str, "alpha"); +						boolean is_blank = STREQ(str, "blank"); +						boolean is_cntrl = STREQ(str, "cntrl"); +						boolean is_digit = STREQ(str, "digit"); +						boolean is_graph = STREQ(str, "graph"); +						boolean is_lower = STREQ(str, "lower"); +						boolean is_print = STREQ(str, "print"); +						boolean is_punct = STREQ(str, "punct"); +						boolean is_space = STREQ(str, "space"); +						boolean is_upper = STREQ(str, "upper"); +						boolean is_xdigit = STREQ(str, "xdigit"); + +						if (!IS_CHAR_CLASS(str)) +							FREE_STACK_RETURN(REG_ECTYPE); + +						/* Throw away the ] at the end of the character +						   class.  */ +						PATFETCH(c); + +						if (p == pend) +							FREE_STACK_RETURN(REG_EBRACK); + +						for (ch = 0; ch < 1 << BYTEWIDTH; ch++) { +							/* This was split into 3 if's to +							   avoid an arbitrary limit in some compiler.  */ +							if ((is_alnum && ISALNUM(ch)) +								|| (is_alpha && ISALPHA(ch)) +								|| (is_blank && ISBLANK(ch)) +								|| (is_cntrl && ISCNTRL(ch))) +								SET_LIST_BIT(ch); +							if ((is_digit && ISDIGIT(ch)) +								|| (is_graph && ISGRAPH(ch)) +								|| (is_lower && ISLOWER(ch)) +								|| (is_print && ISPRINT(ch))) +								SET_LIST_BIT(ch); +							if ((is_punct && ISPUNCT(ch)) +								|| (is_space && ISSPACE(ch)) +								|| (is_upper && ISUPPER(ch)) +								|| (is_xdigit && ISXDIGIT(ch))) +								SET_LIST_BIT(ch); +							if (translate && (is_upper || is_lower) +								&& (ISUPPER(ch) || ISLOWER(ch))) +								SET_LIST_BIT(ch); +						} +						had_char_class = true; +#endif							/* libc || wctype.h */ +					} else { +						c1++; +						while (c1--) +							PATUNFETCH; +						SET_LIST_BIT('['); +						SET_LIST_BIT(':'); +						had_char_class = false; +					} +				} else { +					had_char_class = false; +					SET_LIST_BIT(c); +				} +			} + +			/* Discard any (non)matching list bytes that are all 0 at the +			   end of the map.  Decrease the map-length byte too.  */ +			while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) +				b[-1]--; +			b += b[-1]; +		} +			break; + + +		case '(': +			if (syntax & RE_NO_BK_PARENS) +				goto handle_open; +			else +				goto normal_char; + + +		case ')': +			if (syntax & RE_NO_BK_PARENS) +				goto handle_close; +			else +				goto normal_char; + + +		case '\n': +			if (syntax & RE_NEWLINE_ALT) +				goto handle_alt; +			else +				goto normal_char; + + +		case '|': +			if (syntax & RE_NO_BK_VBAR) +				goto handle_alt; +			else +				goto normal_char; + + +		case '{': +			if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) +				goto handle_interval; +			else +				goto normal_char; + + +		case '\\': +			if (p == pend) +				FREE_STACK_RETURN(REG_EESCAPE); + +			/* Do not translate the character after the \, so that we can +			   distinguish, e.g., \B from \b, even if we normally would +			   translate, e.g., B to b.  */ +			PATFETCH_RAW(c); + +			switch (c) { +			case '(': +				if (syntax & RE_NO_BK_PARENS) +					goto normal_backslash; + +			  handle_open: +				bufp->re_nsub++; +				regnum++; + +				if (COMPILE_STACK_FULL) { +					RETALLOC(compile_stack.stack, compile_stack.size << 1, +							 compile_stack_elt_t); +					if (compile_stack.stack == NULL) +						return REG_ESPACE; + +					compile_stack.size <<= 1; +				} + +				/* These are the values to restore when we hit end of this +				   group.  They are all relative offsets, so that if the +				   whole pattern moves because of realloc, they will still +				   be valid.  */ +				COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; +				COMPILE_STACK_TOP.fixup_alt_jump +					= +					fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; +				COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; +				COMPILE_STACK_TOP.regnum = regnum; + +				/* We will eventually replace the 0 with the number of +				   groups inner to this one.  But do not push a +				   start_memory for groups beyond the last one we can +				   represent in the compiled pattern.  */ +				if (regnum <= MAX_REGNUM) { +					COMPILE_STACK_TOP.inner_group_offset = +						b - bufp->buffer + 2; +					BUF_PUSH_3(start_memory, regnum, 0); +				} + +				compile_stack.avail++; + +				fixup_alt_jump = 0; +				laststart = 0; +				begalt = b; +				/* If we've reached MAX_REGNUM groups, then this open +				   won't actually generate any code, so we'll have to +				   clear pending_exact explicitly.  */ +				pending_exact = 0; +				break; + + +			case ')': +				if (syntax & RE_NO_BK_PARENS) +					goto normal_backslash; + +				if (COMPILE_STACK_EMPTY) { +					if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) +						goto normal_backslash; +					else +						FREE_STACK_RETURN(REG_ERPAREN); +				} + +			  handle_close: +				if (fixup_alt_jump) {	/* Push a dummy failure point at the end of the +										   alternative for a possible future +										   `pop_failure_jump' to pop.  See comments at +										   `push_dummy_failure' in `re_match_2'.  */ +					BUF_PUSH(push_dummy_failure); + +					/* We allocated space for this jump when we assigned +					   to `fixup_alt_jump', in the `handle_alt' case below.  */ +					STORE_JUMP(jump_past_alt, fixup_alt_jump, b - 1); +				} + +				/* See similar code for backslashed left paren above.  */ +				if (COMPILE_STACK_EMPTY) { +					if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) +						goto normal_char; +					else +						FREE_STACK_RETURN(REG_ERPAREN); +				} + +				/* Since we just checked for an empty stack above, this +				   ``can't happen''.  */ +				assert(compile_stack.avail != 0); +				{ +					/* We don't just want to restore into `regnum', because +					   later groups should continue to be numbered higher, +					   as in `(ab)c(de)' -- the second group is #2.  */ +					regnum_t this_group_regnum; + +					compile_stack.avail--; +					begalt = +						bufp->buffer + COMPILE_STACK_TOP.begalt_offset; +					fixup_alt_jump = +						COMPILE_STACK_TOP.fixup_alt_jump ? bufp->buffer + +						COMPILE_STACK_TOP.fixup_alt_jump - 1 : 0; +					laststart = +						bufp->buffer + COMPILE_STACK_TOP.laststart_offset; +					this_group_regnum = COMPILE_STACK_TOP.regnum; +					/* If we've reached MAX_REGNUM groups, then this open +					   won't actually generate any code, so we'll have to +					   clear pending_exact explicitly.  */ +					pending_exact = 0; + +					/* We're at the end of the group, so now we know how many +					   groups were inside this one.  */ +					if (this_group_regnum <= MAX_REGNUM) { +						unsigned char *inner_group_loc + +							= +							bufp->buffer + +							COMPILE_STACK_TOP.inner_group_offset; + +						*inner_group_loc = regnum - this_group_regnum; +						BUF_PUSH_3(stop_memory, this_group_regnum, +								   regnum - this_group_regnum); +					} +				} +				break; + + +			case '|':			/* `\|'.  */ +				if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) +					goto normal_backslash; +			  handle_alt: +				if (syntax & RE_LIMITED_OPS) +					goto normal_char; + +				/* Insert before the previous alternative a jump which +				   jumps to this alternative if the former fails.  */ +				GET_BUFFER_SPACE(3); +				INSERT_JUMP(on_failure_jump, begalt, b + 6); +				pending_exact = 0; +				b += 3; + +				/* The alternative before this one has a jump after it +				   which gets executed if it gets matched.  Adjust that +				   jump so it will jump to this alternative's analogous +				   jump (put in below, which in turn will jump to the next +				   (if any) alternative's such jump, etc.).  The last such +				   jump jumps to the correct final destination.  A picture: +				   _____ _____ +				   |   | |   | +				   |   v |   v +				   a | b   | c + +				   If we are at `b', then fixup_alt_jump right now points to a +				   three-byte space after `a'.  We'll put in the jump, set +				   fixup_alt_jump to right after `b', and leave behind three +				   bytes which we'll fill in when we get to after `c'.  */ + +				if (fixup_alt_jump) +					STORE_JUMP(jump_past_alt, fixup_alt_jump, b); + +				/* Mark and leave space for a jump after this alternative, +				   to be filled in later either by next alternative or +				   when know we're at the end of a series of alternatives.  */ +				fixup_alt_jump = b; +				GET_BUFFER_SPACE(3); +				b += 3; + +				laststart = 0; +				begalt = b; +				break; + + +			case '{': +				/* If \{ is a literal.  */ +				if (!(syntax & RE_INTERVALS) +					/* If we're at `\{' and it's not the open-interval +					   operator.  */ +					|| ((syntax & RE_INTERVALS) +						&& (syntax & RE_NO_BK_BRACES)) || (p - 2 == pattern +														   && p == pend)) +					goto normal_backslash; + +			  handle_interval: +				{ +					/* If got here, then the syntax allows intervals.  */ + +					/* At least (most) this many matches must be made.  */ +					int lower_bound = -1, upper_bound = -1; + +					beg_interval = p - 1; + +					if (p == pend) { +						if (!(syntax & RE_INTERVALS) +							&& (syntax & RE_NO_BK_BRACES)) goto +								unfetch_interval; +						else +							FREE_STACK_RETURN(REG_EBRACE); +					} + +					GET_UNSIGNED_NUMBER(lower_bound); + +					if (c == ',') { +						GET_UNSIGNED_NUMBER(upper_bound); +						if ((!(syntax & RE_NO_BK_BRACES) && c != '\\') +							|| ((syntax & RE_NO_BK_BRACES) && c != '}')) +							FREE_STACK_RETURN(REG_BADBR); + +						if (upper_bound < 0) +							upper_bound = RE_DUP_MAX; +					} else +						/* Interval such as `{1}' => match exactly once. */ +						upper_bound = lower_bound; + +					if (lower_bound < 0 || upper_bound > RE_DUP_MAX +						|| lower_bound > upper_bound) { +						if (!(syntax & RE_INTERVALS) +							&& (syntax & RE_NO_BK_BRACES)) goto +								unfetch_interval; +						else +							FREE_STACK_RETURN(REG_BADBR); +					} + +					if (!(syntax & RE_NO_BK_BRACES)) { +						if (c != '\\') +							FREE_STACK_RETURN(REG_EBRACE); + +						PATFETCH(c); +					} + +					if (c != '}') { +						if (!(syntax & RE_INTERVALS) +							&& (syntax & RE_NO_BK_BRACES)) goto +								unfetch_interval; +						else +							FREE_STACK_RETURN(REG_BADBR); +					} + +					/* We just parsed a valid interval.  */ + +					/* If it's invalid to have no preceding re.  */ +					if (!laststart) { +						if (syntax & RE_CONTEXT_INVALID_OPS) +							FREE_STACK_RETURN(REG_BADRPT); +						else if (syntax & RE_CONTEXT_INDEP_OPS) +							laststart = b; +						else +							goto unfetch_interval; +					} + +					/* If the upper bound is zero, don't want to succeed at +					   all; jump from `laststart' to `b + 3', which will be +					   the end of the buffer after we insert the jump.  */ +					if (upper_bound == 0) { +						GET_BUFFER_SPACE(3); +						INSERT_JUMP(jump, laststart, b + 3); +						b += 3; +					} + +					/* Otherwise, we have a nontrivial interval.  When +					   we're all done, the pattern will look like: +					   set_number_at <jump count> <upper bound> +					   set_number_at <succeed_n count> <lower bound> +					   succeed_n <after jump addr> <succeed_n count> +					   <body of loop> +					   jump_n <succeed_n addr> <jump count> +					   (The upper bound and `jump_n' are omitted if +					   `upper_bound' is 1, though.)  */ +					else {		/* If the upper bound is > 1, we need to insert +								   more at the end of the loop.  */ +						unsigned nbytes = 10 + (upper_bound > 1) * 10; + +						GET_BUFFER_SPACE(nbytes); + +						/* Initialize lower bound of the `succeed_n', even +						   though it will be set during matching by its +						   attendant `set_number_at' (inserted next), +						   because `re_compile_fastmap' needs to know. +						   Jump to the `jump_n' we might insert below.  */ +						INSERT_JUMP2(succeed_n, laststart, +									 b + 5 + (upper_bound > 1) * 5, +									 lower_bound); +						b += 5; + +						/* Code to initialize the lower bound.  Insert +						   before the `succeed_n'.  The `5' is the last two +						   bytes of this `set_number_at', plus 3 bytes of +						   the following `succeed_n'.  */ +						insert_op2(set_number_at, laststart, 5, +								   lower_bound, b); +						b += 5; + +						if (upper_bound > 1) {	/* More than one repetition is allowed, so +												   append a backward jump to the `succeed_n' +												   that starts this interval. + +												   When we've reached this during matching, +												   we'll have matched the interval once, so +												   jump back only `upper_bound - 1' times.  */ +							STORE_JUMP2(jump_n, b, laststart + 5, +										upper_bound - 1); +							b += 5; + +							/* The location we want to set is the second +							   parameter of the `jump_n'; that is `b-2' as +							   an absolute address.  `laststart' will be +							   the `set_number_at' we're about to insert; +							   `laststart+3' the number to set, the source +							   for the relative address.  But we are +							   inserting into the middle of the pattern -- +							   so everything is getting moved up by 5. +							   Conclusion: (b - 2) - (laststart + 3) + 5, +							   i.e., b - laststart. + +							   We insert this at the beginning of the loop +							   so that if we fail during matching, we'll +							   reinitialize the bounds.  */ +							insert_op2(set_number_at, laststart, +									   b - laststart, upper_bound - 1, b); +							b += 5; +						} +					} +					pending_exact = 0; +					beg_interval = NULL; +				} +				break; + +			  unfetch_interval: +				/* If an invalid interval, match the characters as literals.  */ +				assert(beg_interval); +				p = beg_interval; +				beg_interval = NULL; + +				/* normal_char and normal_backslash need `c'.  */ +				PATFETCH(c); + +				if (!(syntax & RE_NO_BK_BRACES)) { +					if (p > pattern && p[-1] == '\\') +						goto normal_backslash; +				} +				goto normal_char; + +#ifdef emacs +				/* There is no way to specify the before_dot and after_dot +				   operators.  rms says this is ok.  --karl  */ +			case '=': +				BUF_PUSH(at_dot); +				break; + +			case 's': +				laststart = b; +				PATFETCH(c); +				BUF_PUSH_2(syntaxspec, syntax_spec_code[c]); +				break; + +			case 'S': +				laststart = b; +				PATFETCH(c); +				BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]); +				break; +#endif							/* emacs */ + + +			case 'w': +				if (syntax & RE_NO_GNU_OPS) +					goto normal_char; +				laststart = b; +				BUF_PUSH(wordchar); +				break; + + +			case 'W': +				if (syntax & RE_NO_GNU_OPS) +					goto normal_char; +				laststart = b; +				BUF_PUSH(notwordchar); +				break; + + +			case '<': +				if (syntax & RE_NO_GNU_OPS) +					goto normal_char; +				BUF_PUSH(wordbeg); +				break; + +			case '>': +				if (syntax & RE_NO_GNU_OPS) +					goto normal_char; +				BUF_PUSH(wordend); +				break; + +			case 'b': +				if (syntax & RE_NO_GNU_OPS) +					goto normal_char; +				BUF_PUSH(wordbound); +				break; + +			case 'B': +				if (syntax & RE_NO_GNU_OPS) +					goto normal_char; +				BUF_PUSH(notwordbound); +				break; + +			case '`': +				if (syntax & RE_NO_GNU_OPS) +					goto normal_char; +				BUF_PUSH(begbuf); +				break; + +			case '\'': +				if (syntax & RE_NO_GNU_OPS) +					goto normal_char; +				BUF_PUSH(endbuf); +				break; + +			case '1': +			case '2': +			case '3': +			case '4': +			case '5': +			case '6': +			case '7': +			case '8': +			case '9': +				if (syntax & RE_NO_BK_REFS) +					goto normal_char; + +				c1 = c - '0'; + +				if (c1 > regnum) +					FREE_STACK_RETURN(REG_ESUBREG); + +				/* Can't back reference to a subexpression if inside of it.  */ +				if (group_in_compile_stack(compile_stack, (regnum_t) c1)) +					goto normal_char; + +				laststart = b; +				BUF_PUSH_2(duplicate, c1); +				break; + + +			case '+': +			case '?': +				if (syntax & RE_BK_PLUS_QM) +					goto handle_plus; +				else +					goto normal_backslash; + +			default: +			  normal_backslash: +				/* You might think it would be useful for \ to mean +				   not to translate; but if we don't translate it +				   it will never match anything.  */ +				c = TRANSLATE(c); +				goto normal_char; +			} +			break; + + +		default: +			/* Expects the character in `c'.  */ +		  normal_char: +			/* If no exactn currently being built.  */ +			if (!pending_exact +				/* If last exactn not at current position.  */ +				|| pending_exact + *pending_exact + 1 != b +				/* We have only one byte following the exactn for the count.  */ +				|| *pending_exact == (1 << BYTEWIDTH) - 1 +				/* If followed by a repetition operator.  */ +				|| *p == '*' || *p == '^' || ((syntax & RE_BK_PLUS_QM) +											  ? *p == '\\' && (p[1] == '+' +															   || p[1] == +															   '?') : (*p +																	   == +																	   '+' +																	   || +																	   *p +																	   == +																	   '?')) +				|| ((syntax & RE_INTERVALS) +					&& ((syntax & RE_NO_BK_BRACES) +						? *p == '{' : (p[0] == '\\' && p[1] == '{')))) { +				/* Start building a new exactn.  */ + +				laststart = b; + +				BUF_PUSH_2(exactn, 0); +				pending_exact = b - 1; +			} + +			BUF_PUSH(c); +			(*pending_exact)++; +			break; +		}						/* switch (c) */ +	}							/* while p != pend */ + + +	/* Through the pattern now.  */ + +	if (fixup_alt_jump) +		STORE_JUMP(jump_past_alt, fixup_alt_jump, b); + +	if (!COMPILE_STACK_EMPTY) +		FREE_STACK_RETURN(REG_EPAREN); + +	/* If we don't want backtracking, force success +	   the first time we reach the end of the compiled pattern.  */ +	if (syntax & RE_NO_POSIX_BACKTRACKING) +		BUF_PUSH(succeed); + +	free(compile_stack.stack); + +	/* We have succeeded; set the length of the buffer.  */ +	bufp->used = b - bufp->buffer; + +#ifdef DEBUG +	if (debug) { +		DEBUG_PRINT1("\nCompiled pattern: \n"); +		print_compiled_pattern(bufp); +	} +#endif							/* DEBUG */ + +#ifndef MATCH_MAY_ALLOCATE +	/* Initialize the failure stack to the largest possible stack.  This +	   isn't necessary unless we're trying to avoid calling alloca in +	   the search and match routines.  */ +	{ +		int num_regs = bufp->re_nsub + 1; + +		/* Since DOUBLE_FAIL_STACK refuses to double only if the current size +		   is strictly greater than re_max_failures, the largest possible stack +		   is 2 * re_max_failures failure points.  */ +		if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) { +			fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); + +# ifdef emacs +			if (!fail_stack.stack) +				fail_stack.stack +					= (fail_stack_elt_t *) xmalloc(fail_stack.size +												   * +												   sizeof +												   (fail_stack_elt_t)); +			else +				fail_stack.stack = +					(fail_stack_elt_t *) xrealloc(fail_stack.stack, +												  (fail_stack.size * +												   sizeof +												   (fail_stack_elt_t))); +# else							/* not emacs */ +			if (!fail_stack.stack) +				fail_stack.stack +					= (fail_stack_elt_t *) malloc(fail_stack.size +												  * +												  sizeof +												  (fail_stack_elt_t)); +			else +				fail_stack.stack = +					(fail_stack_elt_t *) realloc(fail_stack.stack, +												 (fail_stack.size * +												  sizeof +												  (fail_stack_elt_t))); +# endif							/* not emacs */ +		} + +		regex_grow_registers(num_regs); +	} +#endif							/* not MATCH_MAY_ALLOCATE */ + +	return REG_NOERROR; +}								/* regex_compile */ + +/* Subroutines for `regex_compile'.  */ + +/* Store OP at LOC followed by two-byte integer parameter ARG.  */ + +static void store_op1(op, loc, arg) +re_opcode_t op; +unsigned char *loc; +int arg; +{ +	*loc = (unsigned char) op; +	STORE_NUMBER(loc + 1, arg); +} + + +/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */ + +static void store_op2(op, loc, arg1, arg2) +re_opcode_t op; +unsigned char *loc; +int arg1, arg2; +{ +	*loc = (unsigned char) op; +	STORE_NUMBER(loc + 1, arg1); +	STORE_NUMBER(loc + 3, arg2); +} + + +/* Copy the bytes from LOC to END to open up three bytes of space at LOC +   for OP followed by two-byte integer parameter ARG.  */ + +static void insert_op1(op, loc, arg, end) +re_opcode_t op; +unsigned char *loc; +int arg; +unsigned char *end; +{ +	register unsigned char *pfrom = end; +	register unsigned char *pto = end + 3; + +	while (pfrom != loc) +		*--pto = *--pfrom; + +	store_op1(op, loc, arg); +} + + +/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */ + +static void insert_op2(op, loc, arg1, arg2, end) +re_opcode_t op; +unsigned char *loc; +int arg1, arg2; +unsigned char *end; +{ +	register unsigned char *pfrom = end; +	register unsigned char *pto = end + 5; + +	while (pfrom != loc) +		*--pto = *--pfrom; + +	store_op2(op, loc, arg1, arg2); +} + + +/* P points to just after a ^ in PATTERN.  Return true if that ^ comes +   after an alternative or a begin-subexpression.  We assume there is at +   least one character before the ^.  */ + +static boolean at_begline_loc_p(pattern, p, syntax) +const char *pattern, *p; +reg_syntax_t syntax; +{ +	const char *prev = p - 2; +	boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; + +	return +		/* After a subexpression?  */ +		(*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) +		/* After an alternative?  */ +		|| (*prev == '|' +			&& (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); +} + + +/* The dual of at_begline_loc_p.  This one is for $.  We assume there is +   at least one character after the $, i.e., `P < PEND'.  */ + +static boolean at_endline_loc_p(p, pend, syntax) +const char *p, *pend; +reg_syntax_t syntax; +{ +	const char *next = p; +	boolean next_backslash = *next == '\\'; +	const char *next_next = p + 1 < pend ? p + 1 : 0; + +	return +		/* Before a subexpression?  */ +		(syntax & RE_NO_BK_PARENS ? *next == ')' +		 : next_backslash && next_next && *next_next == ')') +		/* Before an alternative?  */ +		|| (syntax & RE_NO_BK_VBAR ? *next == '|' +			: next_backslash && next_next && *next_next == '|'); +} + + +/* Returns true if REGNUM is in one of COMPILE_STACK's elements and +   false if it's not.  */ + +static boolean group_in_compile_stack(compile_stack, regnum) +compile_stack_type compile_stack; +regnum_t regnum; +{ +	int this_element; + +	for (this_element = compile_stack.avail - 1; +		 this_element >= 0; this_element--) +		if (compile_stack.stack[this_element].regnum == regnum) +			return true; + +	return false; +} + + +/* Read the ending character of a range (in a bracket expression) from the +   uncompiled pattern *P_PTR (which ends at PEND).  We assume the +   starting character is in `P[-2]'.  (`P[-1]' is the character `-'.) +   Then we set the translation of all bits between the starting and +   ending characters (inclusive) in the compiled pattern B. + +   Return an error code. + +   We use these short variable names so we can use the same macros as +   `regex_compile' itself.  */ + +static reg_errcode_t compile_range(p_ptr, pend, translate, syntax, b) +const char **p_ptr, *pend; +RE_TRANSLATE_TYPE translate; +reg_syntax_t syntax; +unsigned char *b; +{ +	unsigned this_char; + +	const char *p = *p_ptr; +	reg_errcode_t ret; +	char range_start[2]; +	char range_end[2]; +	char ch[2]; + +	if (p == pend) +		return REG_ERANGE; + +	/* Fetch the endpoints without translating them; the +	   appropriate translation is done in the bit-setting loop below.  */ +	range_start[0] = p[-2]; +	range_start[1] = '\0'; +	range_end[0] = p[0]; +	range_end[1] = '\0'; + +	/* Have to increment the pointer into the pattern string, so the +	   caller isn't still at the ending character.  */ +	(*p_ptr)++; + +	/* Report an error if the range is empty and the syntax prohibits this.  */ +	ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; + +	/* Here we see why `this_char' has to be larger than an `unsigned +	   char' -- we would otherwise go into an infinite loop, since all +	   characters <= 0xff.  */ +	ch[1] = '\0'; +	for (this_char = 0; this_char <= (unsigned char) -1; ++this_char) { +		ch[0] = this_char; +		if (strcoll(range_start, ch) <= 0 && strcoll(ch, range_end) <= 0) { +			SET_LIST_BIT(TRANSLATE(this_char)); +			ret = REG_NOERROR; +		} +	} + +	return ret; +} + +/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in +   BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible +   characters can start a string that matches the pattern.  This fastmap +   is used by re_search to skip quickly over impossible starting points. + +   The caller must supply the address of a (1 << BYTEWIDTH)-byte data +   area as BUFP->fastmap. + +   We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in +   the pattern buffer. + +   Returns 0 if we succeed, -2 if an internal error.   */ + +int re_compile_fastmap(bufp) +struct re_pattern_buffer *bufp; +{ +	int j, k; + +#ifdef MATCH_MAY_ALLOCATE +	fail_stack_type fail_stack; +#endif +#ifndef REGEX_MALLOC +	char *destination; +#endif + +	register char *fastmap = bufp->fastmap; +	unsigned char *pattern = bufp->buffer; +	unsigned char *p = pattern; +	register unsigned char *pend = pattern + bufp->used; + +#ifdef REL_ALLOC +	/* This holds the pointer to the failure stack, when +	   it is allocated relocatably.  */ +	fail_stack_elt_t *failure_stack_ptr; +#endif + +	/* Assume that each path through the pattern can be null until +	   proven otherwise.  We set this false at the bottom of switch +	   statement, to which we get only if a particular path doesn't +	   match the empty string.  */ +	boolean path_can_be_null = true; + +	/* We aren't doing a `succeed_n' to begin with.  */ +	boolean succeed_n_p = false; + +	assert(fastmap != NULL && p != NULL); + +	INIT_FAIL_STACK(); +	bzero(fastmap, 1 << BYTEWIDTH);	/* Assume nothing's valid.  */ +	bufp->fastmap_accurate = 1;	/* It will be when we're done.  */ +	bufp->can_be_null = 0; + +	while (1) { +		if (p == pend || *p == succeed) { +			/* We have reached the (effective) end of pattern.  */ +			if (!FAIL_STACK_EMPTY()) { +				bufp->can_be_null |= path_can_be_null; + +				/* Reset for next path.  */ +				path_can_be_null = true; + +				p = fail_stack.stack[--fail_stack.avail].pointer; + +				continue; +			} else +				break; +		} + +		/* We should never be about to go beyond the end of the pattern.  */ +		assert(p < pend); + +		switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) { + +			/* I guess the idea here is to simply not bother with a fastmap +			   if a backreference is used, since it's too hard to figure out +			   the fastmap for the corresponding group.  Setting +			   `can_be_null' stops `re_search_2' from using the fastmap, so +			   that is all we do.  */ +		case duplicate: +			bufp->can_be_null = 1; +			goto done; + + +			/* Following are the cases which match a character.  These end +			   with `break'.  */ + +		case exactn: +			fastmap[p[1]] = 1; +			break; + + +		case charset: +			for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) +				if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) +					fastmap[j] = 1; +			break; + + +		case charset_not: +			/* Chars beyond end of map must be allowed.  */ +			for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) +				fastmap[j] = 1; + +			for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) +				if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) +					fastmap[j] = 1; +			break; + + +		case wordchar: +			for (j = 0; j < (1 << BYTEWIDTH); j++) +				if (SYNTAX(j) == Sword) +					fastmap[j] = 1; +			break; + + +		case notwordchar: +			for (j = 0; j < (1 << BYTEWIDTH); j++) +				if (SYNTAX(j) != Sword) +					fastmap[j] = 1; +			break; + + +		case anychar: +		{ +			int fastmap_newline = fastmap['\n']; + +			/* `.' matches anything ...  */ +			for (j = 0; j < (1 << BYTEWIDTH); j++) +				fastmap[j] = 1; + +			/* ... except perhaps newline.  */ +			if (!(bufp->syntax & RE_DOT_NEWLINE)) +				fastmap['\n'] = fastmap_newline; + +			/* Return if we have already set `can_be_null'; if we have, +			   then the fastmap is irrelevant.  Something's wrong here.  */ +			else if (bufp->can_be_null) +				goto done; + +			/* Otherwise, have to check alternative paths.  */ +			break; +		} + +#ifdef emacs +		case syntaxspec: +			k = *p++; +			for (j = 0; j < (1 << BYTEWIDTH); j++) +				if (SYNTAX(j) == (enum syntaxcode) k) +					fastmap[j] = 1; +			break; + + +		case notsyntaxspec: +			k = *p++; +			for (j = 0; j < (1 << BYTEWIDTH); j++) +				if (SYNTAX(j) != (enum syntaxcode) k) +					fastmap[j] = 1; +			break; + + +			/* All cases after this match the empty string.  These end with +			   `continue'.  */ + + +		case before_dot: +		case at_dot: +		case after_dot: +			continue; +#endif							/* emacs */ + + +		case no_op: +		case begline: +		case endline: +		case begbuf: +		case endbuf: +		case wordbound: +		case notwordbound: +		case wordbeg: +		case wordend: +		case push_dummy_failure: +			continue; + + +		case jump_n: +		case pop_failure_jump: +		case maybe_pop_jump: +		case jump: +		case jump_past_alt: +		case dummy_failure_jump: +			EXTRACT_NUMBER_AND_INCR(j, p); +			p += j; +			if (j > 0) +				continue; + +			/* Jump backward implies we just went through the body of a +			   loop and matched nothing.  Opcode jumped to should be +			   `on_failure_jump' or `succeed_n'.  Just treat it like an +			   ordinary jump.  For a * loop, it has pushed its failure +			   point already; if so, discard that as redundant.  */ +			if ((re_opcode_t) * p != on_failure_jump +				&& (re_opcode_t) * p != succeed_n) +				continue; + +			p++; +			EXTRACT_NUMBER_AND_INCR(j, p); +			p += j; + +			/* If what's on the stack is where we are now, pop it.  */ +			if (!FAIL_STACK_EMPTY() +				&& fail_stack.stack[fail_stack.avail - 1].pointer == p) +				fail_stack.avail--; + +			continue; + + +		case on_failure_jump: +		case on_failure_keep_string_jump: +		  handle_on_failure_jump: +			EXTRACT_NUMBER_AND_INCR(j, p); + +			/* For some patterns, e.g., `(a?)?', `p+j' here points to the +			   end of the pattern.  We don't want to push such a point, +			   since when we restore it above, entering the switch will +			   increment `p' past the end of the pattern.  We don't need +			   to push such a point since we obviously won't find any more +			   fastmap entries beyond `pend'.  Such a pattern can match +			   the null string, though.  */ +			if (p + j < pend) { +				if (!PUSH_PATTERN_OP(p + j, fail_stack)) { +					RESET_FAIL_STACK(); +					return -2; +				} +			} else +				bufp->can_be_null = 1; + +			if (succeed_n_p) { +				EXTRACT_NUMBER_AND_INCR(k, p);	/* Skip the n.  */ +				succeed_n_p = false; +			} + +			continue; + + +		case succeed_n: +			/* Get to the number of times to succeed.  */ +			p += 2; + +			/* Increment p past the n for when k != 0.  */ +			EXTRACT_NUMBER_AND_INCR(k, p); +			if (k == 0) { +				p -= 4; +				succeed_n_p = true;	/* Spaghetti code alert.  */ +				goto handle_on_failure_jump; +			} +			continue; + + +		case set_number_at: +			p += 4; +			continue; + + +		case start_memory: +		case stop_memory: +			p += 2; +			continue; + + +		default: +			abort();			/* We have listed all the cases.  */ +		}						/* switch *p++ */ + +		/* Getting here means we have found the possible starting +		   characters for one path of the pattern -- and that the empty +		   string does not match.  We need not follow this path further. +		   Instead, look at the next alternative (remembered on the +		   stack), or quit if no more.  The test at the top of the loop +		   does these things.  */ +		path_can_be_null = false; +		p = pend; +	}							/* while p */ + +	/* Set `can_be_null' for the last path (also the first path, if the +	   pattern is empty).  */ +	bufp->can_be_null |= path_can_be_null; + +  done: +	RESET_FAIL_STACK(); +	return 0; +}								/* re_compile_fastmap */ + +#ifdef _LIBC +weak_alias(__re_compile_fastmap, re_compile_fastmap) +#endif +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and +   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use +   this memory for recording register information.  STARTS and ENDS +   must be allocated using the malloc library routine, and must each +   be at least NUM_REGS * sizeof (regoff_t) bytes long. + +   If NUM_REGS == 0, then subsequent matches should allocate their own +   register data. + +   Unless this function is called, the first search or match using +   PATTERN_BUFFER will allocate its own register data, without +   freeing the old data.  */ +void re_set_registers(bufp, regs, num_regs, starts, ends) +struct re_pattern_buffer *bufp; +struct re_registers *regs; +unsigned num_regs; +regoff_t *starts, *ends; +{ +	if (num_regs) { +		bufp->regs_allocated = REGS_REALLOCATE; +		regs->num_regs = num_regs; +		regs->start = starts; +		regs->end = ends; +	} else { +		bufp->regs_allocated = REGS_UNALLOCATED; +		regs->num_regs = 0; +		regs->start = regs->end = (regoff_t *) 0; +	} +} + +#ifdef _LIBC +weak_alias(__re_set_registers, re_set_registers) +#endif +/* Searching routines.  */ +/* Like re_search_2, below, but only one string is specified, and +   doesn't let you say where to stop matching. */ +int re_search(bufp, string, size, startpos, range, regs) +struct re_pattern_buffer *bufp; +const char *string; +int size, startpos, range; +struct re_registers *regs; +{ +	return re_search_2(bufp, NULL, 0, string, size, startpos, range, +					   regs, size); +} + +#ifdef _LIBC +weak_alias(__re_search, re_search) +#endif +/* Using the compiled pattern in BUFP->buffer, first tries to match the +   virtual concatenation of STRING1 and STRING2, starting first at index +   STARTPOS, then at STARTPOS + 1, and so on. + +   STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. + +   RANGE is how far to scan while trying to match.  RANGE = 0 means try +   only at STARTPOS; in general, the last start tried is STARTPOS + +   RANGE. + +   In REGS, return the indices of the virtual concatenation of STRING1 +   and STRING2 that matched the entire BUFP->buffer and its contained +   subexpressions. + +   Do not consider matching one past the index STOP in the virtual +   concatenation of STRING1 and STRING2. + +   We return either the position in the strings at which the match was +   found, -1 if no match, or -2 if error (such as failure +   stack overflow).  */ +int +re_search_2(bufp, string1, size1, string2, size2, startpos, range, regs, +			stop) +struct re_pattern_buffer *bufp; +const char *string1, *string2; +int size1, size2; +int startpos; +int range; +struct re_registers *regs; +int stop; +{ +	int val; +	register char *fastmap = bufp->fastmap; +	register RE_TRANSLATE_TYPE translate = bufp->translate; +	int total_size = size1 + size2; +	int endpos = startpos + range; + +	/* Check for out-of-range STARTPOS.  */ +	if (startpos < 0 || startpos > total_size) +		return -1; + +	/* Fix up RANGE if it might eventually take us outside +	   the virtual concatenation of STRING1 and STRING2. +	   Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */ +	if (endpos < 0) +		range = 0 - startpos; +	else if (endpos > total_size) +		range = total_size - startpos; + +	/* If the search isn't to be a backwards one, don't waste time in a +	   search for a pattern that must be anchored.  */ +	if (bufp->used > 0 && range > 0 +		&& ((re_opcode_t) bufp->buffer[0] == begbuf +			/* `begline' is like `begbuf' if it cannot match at newlines.  */ +			|| ((re_opcode_t) bufp->buffer[0] == begline +				&& !bufp->newline_anchor))) { +		if (startpos > 0) +			return -1; +		else +			range = 1; +	} +#ifdef emacs +	/* In a forward search for something that starts with \=. +	   don't keep searching past point.  */ +	if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot +		&& range > 0) { +		range = PT - startpos; +		if (range <= 0) +			return -1; +	} +#endif							/* emacs */ + +	/* Update the fastmap now if not correct already.  */ +	if (fastmap && !bufp->fastmap_accurate) +		if (re_compile_fastmap(bufp) == -2) +			return -2; + +	/* Loop through the string, looking for a place to start matching.  */ +	for (;;) { +		/* If a fastmap is supplied, skip quickly over characters that +		   cannot be the start of a match.  If the pattern can match the +		   null string, however, we don't need to skip characters; we want +		   the first null string.  */ +		if (fastmap && startpos < total_size && !bufp->can_be_null) { +			if (range > 0) {	/* Searching forwards.  */ +				register const char *d; +				register int lim = 0; +				int irange = range; + +				if (startpos < size1 && startpos + range >= size1) +					lim = range - (size1 - startpos); + +				d = +					(startpos >= +					 size1 ? string2 - size1 : string1) + startpos; + +				/* Written out as an if-else to avoid testing `translate' +				   inside the loop.  */ +				if (translate) +					while (range > lim && !fastmap[(unsigned char) +												   translate[ +															 (unsigned +															  char) *d++]]) +						range--; +				else +					while (range > lim && !fastmap[(unsigned char) *d++]) +						range--; + +				startpos += irange - range; +			} else {			/* Searching backwards.  */ + +				register char c = (size1 == 0 || startpos >= size1 +								   ? string2[startpos - size1] +								   : string1[startpos]); + +				if (!fastmap[(unsigned char) TRANSLATE(c)]) +					goto advance; +			} +		} + +		/* If can't match the null string, and that's all we have left, fail.  */ +		if (range >= 0 && startpos == total_size && fastmap +			&& !bufp->can_be_null) return -1; + +		val = re_match_2_internal(bufp, string1, size1, string2, size2, +								  startpos, regs, stop); +#ifndef REGEX_MALLOC +# ifdef C_ALLOCA +		alloca(0); +# endif +#endif + +		if (val >= 0) +			return startpos; + +		if (val == -2) +			return -2; + +	  advance: +		if (!range) +			break; +		else if (range > 0) { +			range--; +			startpos++; +		} else { +			range++; +			startpos--; +		} +	} +	return -1; +}								/* re_search_2 */ + +#ifdef _LIBC +weak_alias(__re_search_2, re_search_2) +#endif +/* This converts PTR, a pointer into one of the search strings `string1' +   and `string2' into an offset from the beginning of that string.  */ +#define POINTER_TO_OFFSET(ptr)			\ +  (FIRST_STRING_P (ptr)				\ +   ? ((regoff_t) ((ptr) - string1))		\ +   : ((regoff_t) ((ptr) - string2 + size1))) +/* Macros for dealing with the split strings in re_match_2.  */ +#define MATCHING_IN_FIRST_STRING  (dend == end_match_1) +/* Call before fetching a character with *d.  This switches over to +   string2 if necessary.  */ +#define PREFETCH()							\ +  while (d == dend)						    	\ +    {									\ +      /* End of string2 => fail.  */					\ +      if (dend == end_match_2) 						\ +        goto fail;							\ +      /* End of string1 => advance to string2.  */ 			\ +      d = string2;						        \ +      dend = end_match_2;						\ +    } +/* Test if at very beginning or at very end of the virtual concatenation +   of `string1' and `string2'.  If only one string, it's `string2'.  */ +#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) +#define AT_STRINGS_END(d) ((d) == end2) +/* Test if D points to a character which is word-constituent.  We have +   two special cases to check for: if past the end of string1, look at +   the first character in string2; and if before the beginning of +   string2, look at the last character in string1.  */ +#define WORDCHAR_P(d)							\ +  (SYNTAX ((d) == end1 ? *string2					\ +           : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\ +   == Sword) +/* Disabled due to a compiler bug -- see comment at case wordbound */ +#if 0 +/* Test if the character before D and the one at D differ with respect +   to being word-constituent.  */ +#define AT_WORD_BOUNDARY(d)						\ +  (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\ +   || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) +#endif +/* Free everything we malloc.  */ +#ifdef MATCH_MAY_ALLOCATE +# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL +# define FREE_VARIABLES()						\ +  do {									\ +    REGEX_FREE_STACK (fail_stack.stack);				\ +    FREE_VAR (regstart);						\ +    FREE_VAR (regend);							\ +    FREE_VAR (old_regstart);						\ +    FREE_VAR (old_regend);						\ +    FREE_VAR (best_regstart);						\ +    FREE_VAR (best_regend);						\ +    FREE_VAR (reg_info);						\ +    FREE_VAR (reg_dummy);						\ +    FREE_VAR (reg_info_dummy);						\ +  } while (0) +#else +# define FREE_VARIABLES() ((void)0)	/* Do nothing!  But inhibit gcc warning. */ +#endif							/* not MATCH_MAY_ALLOCATE */ +/* These values must meet several constraints.  They must not be valid +   register values; since we have a limit of 255 registers (because +   we use only one byte in the pattern for the register number), we can +   use numbers larger than 255.  They must differ by 1, because of +   NUM_FAILURE_ITEMS above.  And the value for the lowest register must +   be larger than the value for the highest register, so we do not try +   to actually save any registers when none are active.  */ +#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) +#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) +/* Matching routines.  */ +#ifndef emacs					/* Emacs never uses this.  */ +/* re_match is like re_match_2 except it takes only a single string.  */ +int re_match(bufp, string, size, pos, regs) +struct re_pattern_buffer *bufp; +const char *string; +int size, pos; +struct re_registers *regs; +{ +	int result = re_match_2_internal(bufp, NULL, 0, string, size, +									 pos, regs, size); + +# ifndef REGEX_MALLOC +#  ifdef C_ALLOCA +	alloca(0); +#  endif +# endif +	return result; +} + +# ifdef _LIBC +weak_alias(__re_match, re_match) +# endif +#endif							/* not emacs */ +static boolean group_match_null_string_p _RE_ARGS((unsigned char **p, +												   unsigned char *end, +												   register_info_type * + +												   reg_info)); +static boolean alt_match_null_string_p +_RE_ARGS( + +		 (unsigned char *p, unsigned char *end, +		  register_info_type * reg_info)); +static boolean common_op_match_null_string_p +_RE_ARGS( + +		 (unsigned char **p, unsigned char *end, +		  register_info_type * reg_info)); +static int bcmp_translate +_RE_ARGS((const char *s1, const char *s2, int len, char *translate)); + +/* re_match_2 matches the compiled pattern in BUFP against the +   the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 +   and SIZE2, respectively).  We start matching at POS, and stop +   matching at STOP. + +   If REGS is non-null and the `no_sub' field of BUFP is nonzero, we +   store offsets for the substring each group matched in REGS.  See the +   documentation for exactly how many groups we fill. + +   We return -1 if no match, -2 if an internal error (such as the +   failure stack overflowing).  Otherwise, we return the length of the +   matched substring.  */ + +int re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) +struct re_pattern_buffer *bufp; +const char *string1, *string2; +int size1, size2; +int pos; +struct re_registers *regs; +int stop; +{ +	int result = re_match_2_internal(bufp, string1, size1, string2, size2, +									 pos, regs, stop); + +#ifndef REGEX_MALLOC +# ifdef C_ALLOCA +	alloca(0); +# endif +#endif +	return result; +} + +#ifdef _LIBC +weak_alias(__re_match_2, re_match_2) +#endif +/* This is a separate function so that we can force an alloca cleanup +   afterwards.  */ +static int +re_match_2_internal(bufp, string1, size1, string2, size2, pos, regs, stop) +struct re_pattern_buffer *bufp; +const char *string1, *string2; +int size1, size2; +int pos; +struct re_registers *regs; +int stop; +{ +	/* General temporaries.  */ +	int mcnt; +	unsigned char *p1; + +	/* Just past the end of the corresponding string.  */ +	const char *end1, *end2; + +	/* Pointers into string1 and string2, just past the last characters in +	   each to consider matching.  */ +	const char *end_match_1, *end_match_2; + +	/* Where we are in the data, and the end of the current string.  */ +	const char *d, *dend; + +	/* Where we are in the pattern, and the end of the pattern.  */ +	unsigned char *p = bufp->buffer; +	register unsigned char *pend = p + bufp->used; + +	/* Mark the opcode just after a start_memory, so we can test for an +	   empty subpattern when we get to the stop_memory.  */ +	unsigned char *just_past_start_mem = 0; + +	/* We use this to map every character in the string.  */ +	RE_TRANSLATE_TYPE translate = bufp->translate; + +	/* Failure point stack.  Each place that can handle a failure further +	   down the line pushes a failure point on this stack.  It consists of +	   restart, regend, and reg_info for all registers corresponding to +	   the subexpressions we're currently inside, plus the number of such +	   registers, and, finally, two char *'s.  The first char * is where +	   to resume scanning the pattern; the second one is where to resume +	   scanning the strings.  If the latter is zero, the failure point is +	   a ``dummy''; if a failure happens and the failure point is a dummy, +	   it gets discarded and the next next one is tried.  */ +#ifdef MATCH_MAY_ALLOCATE		/* otherwise, this is global.  */ +	fail_stack_type fail_stack; +#endif +#ifdef DEBUG +	static unsigned failure_id; +	unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; +#endif + +#ifdef REL_ALLOC +	/* This holds the pointer to the failure stack, when +	   it is allocated relocatably.  */ +	fail_stack_elt_t *failure_stack_ptr; +#endif + +	/* We fill all the registers internally, independent of what we +	   return, for use in backreferences.  The number here includes +	   an element for register zero.  */ +	size_t num_regs = bufp->re_nsub + 1; + +	/* The currently active registers.  */ +	active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; +	active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; + +	/* Information on the contents of registers. These are pointers into +	   the input strings; they record just what was matched (on this +	   attempt) by a subexpression part of the pattern, that is, the +	   regnum-th regstart pointer points to where in the pattern we began +	   matching and the regnum-th regend points to right after where we +	   stopped matching the regnum-th subexpression.  (The zeroth register +	   keeps track of what the whole pattern matches.)  */ +#ifdef MATCH_MAY_ALLOCATE		/* otherwise, these are global.  */ +	const char **regstart, **regend; +#endif + +	/* If a group that's operated upon by a repetition operator fails to +	   match anything, then the register for its start will need to be +	   restored because it will have been set to wherever in the string we +	   are when we last see its open-group operator.  Similarly for a +	   register's end.  */ +#ifdef MATCH_MAY_ALLOCATE		/* otherwise, these are global.  */ +	const char **old_regstart, **old_regend; +#endif + +	/* The is_active field of reg_info helps us keep track of which (possibly +	   nested) subexpressions we are currently in. The matched_something +	   field of reg_info[reg_num] helps us tell whether or not we have +	   matched any of the pattern so far this time through the reg_num-th +	   subexpression.  These two fields get reset each time through any +	   loop their register is in.  */ +#ifdef MATCH_MAY_ALLOCATE		/* otherwise, this is global.  */ +	register_info_type *reg_info; +#endif + +	/* The following record the register info as found in the above +	   variables when we find a match better than any we've seen before. +	   This happens as we backtrack through the failure points, which in +	   turn happens only if we have not yet matched the entire string. */ +	unsigned best_regs_set = false; + +#ifdef MATCH_MAY_ALLOCATE		/* otherwise, these are global.  */ +	const char **best_regstart, **best_regend; +#endif + +	/* Logically, this is `best_regend[0]'.  But we don't want to have to +	   allocate space for that if we're not allocating space for anything +	   else (see below).  Also, we never need info about register 0 for +	   any of the other register vectors, and it seems rather a kludge to +	   treat `best_regend' differently than the rest.  So we keep track of +	   the end of the best match so far in a separate variable.  We +	   initialize this to NULL so that when we backtrack the first time +	   and need to test it, it's not garbage.  */ +	const char *match_end = NULL; + +	/* This helps SET_REGS_MATCHED avoid doing redundant work.  */ +	int set_regs_matched_done = 0; + +	/* Used when we pop values we don't care about.  */ +#ifdef MATCH_MAY_ALLOCATE		/* otherwise, these are global.  */ +	const char **reg_dummy; +	register_info_type *reg_info_dummy; +#endif + +#ifdef DEBUG +	/* Counts the total number of registers pushed.  */ +	unsigned num_regs_pushed = 0; +#endif + +	DEBUG_PRINT1("\n\nEntering re_match_2.\n"); + +	INIT_FAIL_STACK(); + +#ifdef MATCH_MAY_ALLOCATE +	/* Do not bother to initialize all the register variables if there are +	   no groups in the pattern, as it takes a fair amount of time.  If +	   there are groups, we include space for register 0 (the whole +	   pattern), even though we never use it, since it simplifies the +	   array indexing.  We should fix this.  */ +	if (bufp->re_nsub) { +		regstart = REGEX_TALLOC(num_regs, const char *); +		regend = REGEX_TALLOC(num_regs, const char *); +		old_regstart = REGEX_TALLOC(num_regs, const char *); +		old_regend = REGEX_TALLOC(num_regs, const char *); +		best_regstart = REGEX_TALLOC(num_regs, const char *); +		best_regend = REGEX_TALLOC(num_regs, const char *); + +		reg_info = REGEX_TALLOC(num_regs, register_info_type); +		reg_dummy = REGEX_TALLOC(num_regs, const char *); + +		reg_info_dummy = REGEX_TALLOC(num_regs, register_info_type); + +		if (!(regstart && regend && old_regstart && old_regend && reg_info +			  && best_regstart && best_regend && reg_dummy +			  && reg_info_dummy)) { +			FREE_VARIABLES(); +			return -2; +		} +	} else { +		/* We must initialize all our variables to NULL, so that +		   `FREE_VARIABLES' doesn't try to free them.  */ +		regstart = regend = old_regstart = old_regend = best_regstart +			= best_regend = reg_dummy = NULL; +		reg_info = reg_info_dummy = (register_info_type *) NULL; +	} +#endif							/* MATCH_MAY_ALLOCATE */ + +	/* The starting position is bogus.  */ +	if (pos < 0 || pos > size1 + size2) { +		FREE_VARIABLES(); +		return -1; +	} + +	/* Initialize subexpression text positions to -1 to mark ones that no +	   start_memory/stop_memory has been seen for. Also initialize the +	   register information struct.  */ +	for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) { +		regstart[mcnt] = regend[mcnt] +			= old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; + +		REG_MATCH_NULL_STRING_P(reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; +		IS_ACTIVE(reg_info[mcnt]) = 0; +		MATCHED_SOMETHING(reg_info[mcnt]) = 0; +		EVER_MATCHED_SOMETHING(reg_info[mcnt]) = 0; +	} + +	/* We move `string1' into `string2' if the latter's empty -- but not if +	   `string1' is null.  */ +	if (size2 == 0 && string1 != NULL) { +		string2 = string1; +		size2 = size1; +		string1 = 0; +		size1 = 0; +	} +	end1 = string1 + size1; +	end2 = string2 + size2; + +	/* Compute where to stop matching, within the two strings.  */ +	if (stop <= size1) { +		end_match_1 = string1 + stop; +		end_match_2 = string2; +	} else { +		end_match_1 = end1; +		end_match_2 = string2 + stop - size1; +	} + +	/* `p' scans through the pattern as `d' scans through the data. +	   `dend' is the end of the input string that `d' points within.  `d' +	   is advanced into the following input string whenever necessary, but +	   this happens before fetching; therefore, at the beginning of the +	   loop, `d' can be pointing at the end of a string, but it cannot +	   equal `string2'.  */ +	if (size1 > 0 && pos <= size1) { +		d = string1 + pos; +		dend = end_match_1; +	} else { +		d = string2 + pos - size1; +		dend = end_match_2; +	} + +	DEBUG_PRINT1("The compiled pattern is:\n"); +	DEBUG_PRINT_COMPILED_PATTERN(bufp, p, pend); +	DEBUG_PRINT1("The string to match is: `"); +	DEBUG_PRINT_DOUBLE_STRING(d, string1, size1, string2, size2); +	DEBUG_PRINT1("'\n"); + +	/* This loops over pattern commands.  It exits by returning from the +	   function if the match is complete, or it drops through if the match +	   fails at this starting point in the input data.  */ +	for (;;) { +#ifdef _LIBC +		DEBUG_PRINT2("\n%p: ", p); +#else +		DEBUG_PRINT2("\n0x%x: ", p); +#endif + +		if (p == pend) {		/* End of pattern means we might have succeeded.  */ +			DEBUG_PRINT1("end of pattern ... "); + +			/* If we haven't matched the entire string, and we want the +			   longest match, try backtracking.  */ +			if (d != end_match_2) { +				/* 1 if this match ends in the same string (string1 or string2) +				   as the best previous match.  */ +				boolean same_str_p = (FIRST_STRING_P(match_end) +									  == MATCHING_IN_FIRST_STRING); + +				/* 1 if this match is the best seen so far.  */ +				boolean best_match_p; + +				/* AIX compiler got confused when this was combined +				   with the previous declaration.  */ +				if (same_str_p) +					best_match_p = d > match_end; +				else +					best_match_p = !MATCHING_IN_FIRST_STRING; + +				DEBUG_PRINT1("backtracking.\n"); + +				if (!FAIL_STACK_EMPTY()) {	/* More failure points to try.  */ + +					/* If exceeds best match so far, save it.  */ +					if (!best_regs_set || best_match_p) { +						best_regs_set = true; +						match_end = d; + +						DEBUG_PRINT1("\nSAVING match as best so far.\n"); + +						for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) { +							best_regstart[mcnt] = regstart[mcnt]; +							best_regend[mcnt] = regend[mcnt]; +						} +					} +					goto fail; +				} + +				/* If no failure points, don't restore garbage.  And if +				   last match is real best match, don't restore second +				   best one. */ +				else if (best_regs_set && !best_match_p) { +				  restore_best_regs: +					/* Restore best match.  It may happen that `dend == +					   end_match_1' while the restored d is in string2. +					   For example, the pattern `x.*y.*z' against the +					   strings `x-' and `y-z-', if the two strings are +					   not consecutive in memory.  */ +					DEBUG_PRINT1("Restoring best registers.\n"); + +					d = match_end; +					dend = ((d >= string1 && d <= end1) +							? end_match_1 : end_match_2); + +					for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) { +						regstart[mcnt] = best_regstart[mcnt]; +						regend[mcnt] = best_regend[mcnt]; +					} +				} +			} +			/* d != end_match_2 */ +		  succeed_label: +			DEBUG_PRINT1("Accepting match.\n"); + +			/* If caller wants register contents data back, do it.  */ +			if (regs && !bufp->no_sub) { +				/* Have the register data arrays been allocated?  */ +				if (bufp->regs_allocated == REGS_UNALLOCATED) {	/* No.  So allocate them with malloc.  We need one +																   extra element beyond `num_regs' for the `-1' marker +																   GNU code uses.  */ +					regs->num_regs = MAX(RE_NREGS, num_regs + 1); +					regs->start = TALLOC(regs->num_regs, regoff_t); +					regs->end = TALLOC(regs->num_regs, regoff_t); +					if (regs->start == NULL || regs->end == NULL) { +						FREE_VARIABLES(); +						return -2; +					} +					bufp->regs_allocated = REGS_REALLOCATE; +				} else if (bufp->regs_allocated == REGS_REALLOCATE) {	/* Yes.  If we need more elements than were already +																		   allocated, reallocate them.  If we need fewer, just +																		   leave it alone.  */ +					if (regs->num_regs < num_regs + 1) { +						regs->num_regs = num_regs + 1; +						RETALLOC(regs->start, regs->num_regs, regoff_t); +						RETALLOC(regs->end, regs->num_regs, regoff_t); +						if (regs->start == NULL || regs->end == NULL) { +							FREE_VARIABLES(); +							return -2; +						} +					} +				} else { +					/* These braces fend off a "empty body in an else-statement" +					   warning under GCC when assert expands to nothing.  */ +					assert(bufp->regs_allocated == REGS_FIXED); +				} + +				/* Convert the pointer data in `regstart' and `regend' to +				   indices.  Register zero has to be set differently, +				   since we haven't kept track of any info for it.  */ +				if (regs->num_regs > 0) { +					regs->start[0] = pos; +					regs->end[0] = (MATCHING_IN_FIRST_STRING +									? ((regoff_t) (d - string1)) +									: ((regoff_t) (d - string2 + size1))); +				} + +				/* Go through the first `min (num_regs, regs->num_regs)' +				   registers, since that is all we initialized.  */ +				for (mcnt = 1; +					 (unsigned) mcnt < MIN(num_regs, regs->num_regs); +					 mcnt++) { +					if (REG_UNSET(regstart[mcnt]) +						|| REG_UNSET(regend[mcnt])) regs->start[mcnt] = +							regs->end[mcnt] = -1; +					else { +						regs->start[mcnt] +							= (regoff_t) POINTER_TO_OFFSET(regstart[mcnt]); +						regs->end[mcnt] +							= (regoff_t) POINTER_TO_OFFSET(regend[mcnt]); +					} +				} + +				/* If the regs structure we return has more elements than +				   were in the pattern, set the extra elements to -1.  If +				   we (re)allocated the registers, this is the case, +				   because we always allocate enough to have at least one +				   -1 at the end.  */ +				for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; +					 mcnt++) +					regs->start[mcnt] = regs->end[mcnt] = -1; +			} +			/* regs && !bufp->no_sub */ +			DEBUG_PRINT4 +				("%u failure points pushed, %u popped (%u remain).\n", +				 nfailure_points_pushed, nfailure_points_popped, +				 nfailure_points_pushed - nfailure_points_popped); +			DEBUG_PRINT2("%u registers pushed.\n", num_regs_pushed); + +			mcnt = d - pos - (MATCHING_IN_FIRST_STRING +							  ? string1 : string2 - size1); + +			DEBUG_PRINT2("Returning %d from re_match_2.\n", mcnt); + +			FREE_VARIABLES(); +			return mcnt; +		} + +		/* Otherwise match next pattern command.  */ +		switch (SWITCH_ENUM_CAST((re_opcode_t) * p++)) { +			/* Ignore these.  Used to ignore the n of succeed_n's which +			   currently have n == 0.  */ +		case no_op: +			DEBUG_PRINT1("EXECUTING no_op.\n"); +			break; + +		case succeed: +			DEBUG_PRINT1("EXECUTING succeed.\n"); +			goto succeed_label; + +			/* Match the next n pattern characters exactly.  The following +			   byte in the pattern defines n, and the n bytes after that +			   are the characters to match.  */ +		case exactn: +			mcnt = *p++; +			DEBUG_PRINT2("EXECUTING exactn %d.\n", mcnt); + +			/* This is written out as an if-else so we don't waste time +			   testing `translate' inside the loop.  */ +			if (translate) { +				do { +					PREFETCH(); +					if ((unsigned char) translate[(unsigned char) *d++] +						!= (unsigned char) *p++) +						goto fail; +				} +				while (--mcnt); +			} else { +				do { +					PREFETCH(); +					if (*d++ != (char) *p++) +						goto fail; +				} +				while (--mcnt); +			} +			SET_REGS_MATCHED(); +			break; + + +			/* Match any character except possibly a newline or a null.  */ +		case anychar: +			DEBUG_PRINT1("EXECUTING anychar.\n"); + +			PREFETCH(); + +			if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE(*d) == '\n') +				|| (bufp->syntax & RE_DOT_NOT_NULL +					&& TRANSLATE(*d) == '\000')) goto fail; + +			SET_REGS_MATCHED(); +			DEBUG_PRINT2("  Matched `%d'.\n", *d); +			d++; +			break; + + +		case charset: +		case charset_not: +		{ +			register unsigned char c; +			boolean not = (re_opcode_t) * (p - 1) == charset_not; + +			DEBUG_PRINT2("EXECUTING charset%s.\n", not ? "_not" : ""); + +			PREFETCH(); +			c = TRANSLATE(*d);	/* The character to match.  */ + +			/* Cast to `unsigned' instead of `unsigned char' in case the +			   bit list is a full 32 bytes long.  */ +			if (c < (unsigned) (*p * BYTEWIDTH) +				&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) +				not = !not; + +			p += 1 + *p; + +			if (!not) +				goto fail; + +			SET_REGS_MATCHED(); +			d++; +			break; +		} + + +			/* The beginning of a group is represented by start_memory. +			   The arguments are the register number in the next byte, and the +			   number of groups inner to this one in the next.  The text +			   matched within the group is recorded (in the internal +			   registers data structure) under the register number.  */ +		case start_memory: +			DEBUG_PRINT3("EXECUTING start_memory %d (%d):\n", *p, p[1]); + +			/* Find out if this group can match the empty string.  */ +			p1 = p;				/* To send to group_match_null_string_p.  */ + +			if (REG_MATCH_NULL_STRING_P(reg_info[*p]) == +				MATCH_NULL_UNSET_VALUE) +					REG_MATCH_NULL_STRING_P(reg_info[*p]) = +					group_match_null_string_p(&p1, pend, reg_info); + +			/* Save the position in the string where we were the last time +			   we were at this open-group operator in case the group is +			   operated upon by a repetition operator, e.g., with `(a*)*b' +			   against `ab'; then we want to ignore where we are now in +			   the string in case this attempt to match fails.  */ +			old_regstart[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p]) +				? REG_UNSET(regstart[*p]) ? d : regstart[*p] +				: regstart[*p]; +			DEBUG_PRINT2("  old_regstart: %d\n", +						 POINTER_TO_OFFSET(old_regstart[*p])); + +			regstart[*p] = d; +			DEBUG_PRINT2("  regstart: %d\n", +						 POINTER_TO_OFFSET(regstart[*p])); + +			IS_ACTIVE(reg_info[*p]) = 1; +			MATCHED_SOMETHING(reg_info[*p]) = 0; + +			/* Clear this whenever we change the register activity status.  */ +			set_regs_matched_done = 0; + +			/* This is the new highest active register.  */ +			highest_active_reg = *p; + +			/* If nothing was active before, this is the new lowest active +			   register.  */ +			if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) +				lowest_active_reg = *p; + +			/* Move past the register number and inner group count.  */ +			p += 2; +			just_past_start_mem = p; + +			break; + + +			/* The stop_memory opcode represents the end of a group.  Its +			   arguments are the same as start_memory's: the register +			   number, and the number of inner groups.  */ +		case stop_memory: +			DEBUG_PRINT3("EXECUTING stop_memory %d (%d):\n", *p, p[1]); + +			/* We need to save the string position the last time we were at +			   this close-group operator in case the group is operated +			   upon by a repetition operator, e.g., with `((a*)*(b*)*)*' +			   against `aba'; then we want to ignore where we are now in +			   the string in case this attempt to match fails.  */ +			old_regend[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p]) +				? REG_UNSET(regend[*p]) ? d : regend[*p] +				: regend[*p]; +			DEBUG_PRINT2("      old_regend: %d\n", +						 POINTER_TO_OFFSET(old_regend[*p])); + +			regend[*p] = d; +			DEBUG_PRINT2("      regend: %d\n", +						 POINTER_TO_OFFSET(regend[*p])); + +			/* This register isn't active anymore.  */ +			IS_ACTIVE(reg_info[*p]) = 0; + +			/* Clear this whenever we change the register activity status.  */ +			set_regs_matched_done = 0; + +			/* If this was the only register active, nothing is active +			   anymore.  */ +			if (lowest_active_reg == highest_active_reg) { +				lowest_active_reg = NO_LOWEST_ACTIVE_REG; +				highest_active_reg = NO_HIGHEST_ACTIVE_REG; +			} else {			/* We must scan for the new highest active register, since +								   it isn't necessarily one less than now: consider +								   (a(b)c(d(e)f)g).  When group 3 ends, after the f), the +								   new highest active register is 1.  */ +				unsigned char r = *p - 1; + +				while (r > 0 && !IS_ACTIVE(reg_info[r])) +					r--; + +				/* If we end up at register zero, that means that we saved +				   the registers as the result of an `on_failure_jump', not +				   a `start_memory', and we jumped to past the innermost +				   `stop_memory'.  For example, in ((.)*) we save +				   registers 1 and 2 as a result of the *, but when we pop +				   back to the second ), we are at the stop_memory 1. +				   Thus, nothing is active.  */ +				if (r == 0) { +					lowest_active_reg = NO_LOWEST_ACTIVE_REG; +					highest_active_reg = NO_HIGHEST_ACTIVE_REG; +				} else +					highest_active_reg = r; +			} + +			/* If just failed to match something this time around with a +			   group that's operated on by a repetition operator, try to +			   force exit from the ``loop'', and restore the register +			   information for this group that we had before trying this +			   last match.  */ +			if ((!MATCHED_SOMETHING(reg_info[*p]) +				 || just_past_start_mem == p - 1) +				&& (p + 2) < pend) { +				boolean is_a_jump_n = false; + +				p1 = p + 2; +				mcnt = 0; +				switch ((re_opcode_t) * p1++) { +				case jump_n: +					is_a_jump_n = true; +				case pop_failure_jump: +				case maybe_pop_jump: +				case jump: +				case dummy_failure_jump: +					EXTRACT_NUMBER_AND_INCR(mcnt, p1); +					if (is_a_jump_n) +						p1 += 2; +					break; + +				default: +					/* do nothing */ ; +				} +				p1 += mcnt; + +				/* If the next operation is a jump backwards in the pattern +				   to an on_failure_jump right before the start_memory +				   corresponding to this stop_memory, exit from the loop +				   by forcing a failure after pushing on the stack the +				   on_failure_jump's jump in the pattern, and d.  */ +				if (mcnt < 0 && (re_opcode_t) * p1 == on_failure_jump +					&& (re_opcode_t) p1[3] == start_memory && p1[4] == *p) { +					/* If this group ever matched anything, then restore +					   what its registers were before trying this last +					   failed match, e.g., with `(a*)*b' against `ab' for +					   regstart[1], and, e.g., with `((a*)*(b*)*)*' +					   against `aba' for regend[3]. + +					   Also restore the registers for inner groups for, +					   e.g., `((a*)(b*))*' against `aba' (register 3 would +					   otherwise get trashed).  */ + +					if (EVER_MATCHED_SOMETHING(reg_info[*p])) { +						unsigned r; + +						EVER_MATCHED_SOMETHING(reg_info[*p]) = 0; + +						/* Restore this and inner groups' (if any) registers.  */ +						for (r = *p; +							 r < (unsigned) *p + (unsigned) *(p + 1); r++) { +							regstart[r] = old_regstart[r]; + +							/* xx why this test?  */ +							if (old_regend[r] >= regstart[r]) +								regend[r] = old_regend[r]; +						} +					} +					p1++; +					EXTRACT_NUMBER_AND_INCR(mcnt, p1); +					PUSH_FAILURE_POINT(p1 + mcnt, d, -2); + +					goto fail; +				} +			} + +			/* Move past the register number and the inner group count.  */ +			p += 2; +			break; + + +			/* \<digit> has been turned into a `duplicate' command which is +			   followed by the numeric value of <digit> as the register number.  */ +		case duplicate: +		{ +			register const char *d2, *dend2; +			int regno = *p++;	/* Get which register to match against.  */ + +			DEBUG_PRINT2("EXECUTING duplicate %d.\n", regno); + +			/* Can't back reference a group which we've never matched.  */ +			if (REG_UNSET(regstart[regno]) || REG_UNSET(regend[regno])) +				goto fail; + +			/* Where in input to try to start matching.  */ +			d2 = regstart[regno]; + +			/* Where to stop matching; if both the place to start and +			   the place to stop matching are in the same string, then +			   set to the place to stop, otherwise, for now have to use +			   the end of the first string.  */ + +			dend2 = ((FIRST_STRING_P(regstart[regno]) +					  == FIRST_STRING_P(regend[regno])) +					 ? regend[regno] : end_match_1); +			for (;;) { +				/* If necessary, advance to next segment in register +				   contents.  */ +				while (d2 == dend2) { +					if (dend2 == end_match_2) +						break; +					if (dend2 == regend[regno]) +						break; + +					/* End of string1 => advance to string2. */ +					d2 = string2; +					dend2 = regend[regno]; +				} +				/* At end of register contents => success */ +				if (d2 == dend2) +					break; + +				/* If necessary, advance to next segment in data.  */ +				PREFETCH(); + +				/* How many characters left in this segment to match.  */ +				mcnt = dend - d; + +				/* Want how many consecutive characters we can match in +				   one shot, so, if necessary, adjust the count.  */ +				if (mcnt > dend2 - d2) +					mcnt = dend2 - d2; + +				/* Compare that many; failure if mismatch, else move +				   past them.  */ +				if (translate ? bcmp_translate(d, d2, mcnt, translate) +					: memcmp(d, d2, mcnt)) +					goto fail; +				d += mcnt, d2 += mcnt; + +				/* Do this because we've match some characters.  */ +				SET_REGS_MATCHED(); +			} +		} +			break; + + +			/* begline matches the empty string at the beginning of the string +			   (unless `not_bol' is set in `bufp'), and, if +			   `newline_anchor' is set, after newlines.  */ +		case begline: +			DEBUG_PRINT1("EXECUTING begline.\n"); + +			if (AT_STRINGS_BEG(d)) { +				if (!bufp->not_bol) +					break; +			} else if (d[-1] == '\n' && bufp->newline_anchor) { +				break; +			} +			/* In all other cases, we fail.  */ +			goto fail; + + +			/* endline is the dual of begline.  */ +		case endline: +			DEBUG_PRINT1("EXECUTING endline.\n"); + +			if (AT_STRINGS_END(d)) { +				if (!bufp->not_eol) +					break; +			} + +			/* We have to ``prefetch'' the next character.  */ +			else if ((d == end1 ? *string2 : *d) == '\n' +					 && bufp->newline_anchor) { +				break; +			} +			goto fail; + + +			/* Match at the very beginning of the data.  */ +		case begbuf: +			DEBUG_PRINT1("EXECUTING begbuf.\n"); +			if (AT_STRINGS_BEG(d)) +				break; +			goto fail; + + +			/* Match at the very end of the data.  */ +		case endbuf: +			DEBUG_PRINT1("EXECUTING endbuf.\n"); +			if (AT_STRINGS_END(d)) +				break; +			goto fail; + + +			/* on_failure_keep_string_jump is used to optimize `.*\n'.  It +			   pushes NULL as the value for the string on the stack.  Then +			   `pop_failure_point' will keep the current value for the +			   string, instead of restoring it.  To see why, consider +			   matching `foo\nbar' against `.*\n'.  The .* matches the foo; +			   then the . fails against the \n.  But the next thing we want +			   to do is match the \n against the \n; if we restored the +			   string value, we would be back at the foo. + +			   Because this is used only in specific cases, we don't need to +			   check all the things that `on_failure_jump' does, to make +			   sure the right things get saved on the stack.  Hence we don't +			   share its code.  The only reason to push anything on the +			   stack at all is that otherwise we would have to change +			   `anychar's code to do something besides goto fail in this +			   case; that seems worse than this.  */ +		case on_failure_keep_string_jump: +			DEBUG_PRINT1("EXECUTING on_failure_keep_string_jump"); + +			EXTRACT_NUMBER_AND_INCR(mcnt, p); +#ifdef _LIBC +			DEBUG_PRINT3(" %d (to %p):\n", mcnt, p + mcnt); +#else +			DEBUG_PRINT3(" %d (to 0x%x):\n", mcnt, p + mcnt); +#endif + +			PUSH_FAILURE_POINT(p + mcnt, NULL, -2); +			break; + + +			/* Uses of on_failure_jump: + +			   Each alternative starts with an on_failure_jump that points +			   to the beginning of the next alternative.  Each alternative +			   except the last ends with a jump that in effect jumps past +			   the rest of the alternatives.  (They really jump to the +			   ending jump of the following alternative, because tensioning +			   these jumps is a hassle.) + +			   Repeats start with an on_failure_jump that points past both +			   the repetition text and either the following jump or +			   pop_failure_jump back to this on_failure_jump.  */ +		case on_failure_jump: +		  on_failure: +			DEBUG_PRINT1("EXECUTING on_failure_jump"); + +			EXTRACT_NUMBER_AND_INCR(mcnt, p); +#ifdef _LIBC +			DEBUG_PRINT3(" %d (to %p)", mcnt, p + mcnt); +#else +			DEBUG_PRINT3(" %d (to 0x%x)", mcnt, p + mcnt); +#endif + +			/* If this on_failure_jump comes right before a group (i.e., +			   the original * applied to a group), save the information +			   for that group and all inner ones, so that if we fail back +			   to this point, the group's information will be correct. +			   For example, in \(a*\)*\1, we need the preceding group, +			   and in \(zz\(a*\)b*\)\2, we need the inner group.  */ + +			/* We can't use `p' to check ahead because we push +			   a failure point to `p + mcnt' after we do this.  */ +			p1 = p; + +			/* We need to skip no_op's before we look for the +			   start_memory in case this on_failure_jump is happening as +			   the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 +			   against aba.  */ +			while (p1 < pend && (re_opcode_t) * p1 == no_op) +				p1++; + +			if (p1 < pend && (re_opcode_t) * p1 == start_memory) { +				/* We have a new highest active register now.  This will +				   get reset at the start_memory we are about to get to, +				   but we will have saved all the registers relevant to +				   this repetition op, as described above.  */ +				highest_active_reg = *(p1 + 1) + *(p1 + 2); +				if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) +					lowest_active_reg = *(p1 + 1); +			} + +			DEBUG_PRINT1(":\n"); +			PUSH_FAILURE_POINT(p + mcnt, d, -2); +			break; + + +			/* A smart repeat ends with `maybe_pop_jump'. +			   We change it to either `pop_failure_jump' or `jump'.  */ +		case maybe_pop_jump: +			EXTRACT_NUMBER_AND_INCR(mcnt, p); +			DEBUG_PRINT2("EXECUTING maybe_pop_jump %d.\n", mcnt); +			{ +				register unsigned char *p2 = p; + +				/* Compare the beginning of the repeat with what in the +				   pattern follows its end. If we can establish that there +				   is nothing that they would both match, i.e., that we +				   would have to backtrack because of (as in, e.g., `a*a') +				   then we can change to pop_failure_jump, because we'll +				   never have to backtrack. + +				   This is not true in the case of alternatives: in +				   `(a|ab)*' we do need to backtrack to the `ab' alternative +				   (e.g., if the string was `ab').  But instead of trying to +				   detect that here, the alternative has put on a dummy +				   failure point which is what we will end up popping.  */ + +				/* Skip over open/close-group commands. +				   If what follows this loop is a ...+ construct, +				   look at what begins its body, since we will have to +				   match at least one of that.  */ +				while (1) { +					if (p2 + 2 < pend +						&& ((re_opcode_t) * p2 == stop_memory +							|| (re_opcode_t) * p2 == start_memory)) +						p2 += 3; +					else if (p2 + 6 < pend +							 && (re_opcode_t) * p2 == dummy_failure_jump) +							p2 += 6; +					else +						break; +				} + +				p1 = p + mcnt; +				/* p1[0] ... p1[2] are the `on_failure_jump' corresponding +				   to the `maybe_finalize_jump' of this case.  Examine what +				   follows.  */ + +				/* If we're at the end of the pattern, we can change.  */ +				if (p2 == pend) { +					/* Consider what happens when matching ":\(.*\)" +					   against ":/".  I don't really understand this code +					   yet.  */ +					p[-3] = (unsigned char) pop_failure_jump; +					DEBUG_PRINT1 +						("  End of pattern: change to `pop_failure_jump'.\n"); +				} + +				else if ((re_opcode_t) * p2 == exactn +						 || (bufp->newline_anchor +							 && (re_opcode_t) * p2 == endline)) { +					register unsigned char c = +						*p2 == (unsigned char) endline ? '\n' : p2[2]; + +					if ((re_opcode_t) p1[3] == exactn && p1[5] != c) { +						p[-3] = (unsigned char) pop_failure_jump; +						DEBUG_PRINT3("  %c != %c => pop_failure_jump.\n", +									 c, p1[5]); +					} + +					else if ((re_opcode_t) p1[3] == charset +							 || (re_opcode_t) p1[3] == charset_not) { +						int not = (re_opcode_t) p1[3] == charset_not; + +						if (c < (unsigned char) (p1[4] * BYTEWIDTH) +							&& p1[5 + +								  c / BYTEWIDTH] & (1 << (c % +														  BYTEWIDTH))) not +								= !not; + +						/* `not' is equal to 1 if c would match, which means +						   that we can't change to pop_failure_jump.  */ +						if (!not) { +							p[-3] = (unsigned char) pop_failure_jump; +							DEBUG_PRINT1 +								("  No match => pop_failure_jump.\n"); +						} +					} +				} else if ((re_opcode_t) * p2 == charset) { +					/* We win if the first character of the loop is not part +					   of the charset.  */ +					if ((re_opcode_t) p1[3] == exactn +						&& !((int) p2[1] * BYTEWIDTH > (int) p1[5] +							 && (p2[2 + p1[5] / BYTEWIDTH] +								 & (1 << (p1[5] % BYTEWIDTH))))) { +						p[-3] = (unsigned char) pop_failure_jump; +						DEBUG_PRINT1("  No match => pop_failure_jump.\n"); +					} + +					else if ((re_opcode_t) p1[3] == charset_not) { +						int idx; + +						/* We win if the charset_not inside the loop +						   lists every character listed in the charset after.  */ +						for (idx = 0; idx < (int) p2[1]; idx++) +							if (!(p2[2 + idx] == 0 || (idx < (int) p1[4] +													   && +													   ((p2 +														 [2 + +														  idx] & ~p1[5 + +																	 idx]) +														== 0)))) +								break; + +						if (idx == p2[1]) { +							p[-3] = (unsigned char) pop_failure_jump; +							DEBUG_PRINT1 +								("  No match => pop_failure_jump.\n"); +						} +					} else if ((re_opcode_t) p1[3] == charset) { +						int idx; + +						/* We win if the charset inside the loop +						   has no overlap with the one after the loop.  */ +						for (idx = 0; +							 idx < (int) p2[1] && idx < (int) p1[4]; idx++) +							if ((p2[2 + idx] & p1[5 + idx]) != 0) +								break; + +						if (idx == p2[1] || idx == p1[4]) { +							p[-3] = (unsigned char) pop_failure_jump; +							DEBUG_PRINT1 +								("  No match => pop_failure_jump.\n"); +						} +					} +				} +			} +			p -= 2;				/* Point at relative address again.  */ +			if ((re_opcode_t) p[-1] != pop_failure_jump) { +				p[-1] = (unsigned char) jump; +				DEBUG_PRINT1("  Match => jump.\n"); +				goto unconditional_jump; +			} +			/* Note fall through.  */ + + +			/* The end of a simple repeat has a pop_failure_jump back to +			   its matching on_failure_jump, where the latter will push a +			   failure point.  The pop_failure_jump takes off failure +			   points put on by this pop_failure_jump's matching +			   on_failure_jump; we got through the pattern to here from the +			   matching on_failure_jump, so didn't fail.  */ +		case pop_failure_jump: +		{ +			/* We need to pass separate storage for the lowest and +			   highest registers, even though we don't care about the +			   actual values.  Otherwise, we will restore only one +			   register from the stack, since lowest will == highest in +			   `pop_failure_point'.  */ +			active_reg_t dummy_low_reg, dummy_high_reg; +			unsigned char *pdummy; +			const char *sdummy; + +			DEBUG_PRINT1("EXECUTING pop_failure_jump.\n"); +			POP_FAILURE_POINT(sdummy, pdummy, +							  dummy_low_reg, dummy_high_reg, +							  reg_dummy, reg_dummy, reg_info_dummy); +		} +			/* Note fall through.  */ + +		  unconditional_jump: +#ifdef _LIBC +			DEBUG_PRINT2("\n%p: ", p); +#else +			DEBUG_PRINT2("\n0x%x: ", p); +#endif +			/* Note fall through.  */ + +			/* Unconditionally jump (without popping any failure points).  */ +		case jump: +			EXTRACT_NUMBER_AND_INCR(mcnt, p);	/* Get the amount to jump.  */ +			DEBUG_PRINT2("EXECUTING jump %d ", mcnt); +			p += mcnt;			/* Do the jump.  */ +#ifdef _LIBC +			DEBUG_PRINT2("(to %p).\n", p); +#else +			DEBUG_PRINT2("(to 0x%x).\n", p); +#endif +			break; + + +			/* We need this opcode so we can detect where alternatives end +			   in `group_match_null_string_p' et al.  */ +		case jump_past_alt: +			DEBUG_PRINT1("EXECUTING jump_past_alt.\n"); +			goto unconditional_jump; + + +			/* Normally, the on_failure_jump pushes a failure point, which +			   then gets popped at pop_failure_jump.  We will end up at +			   pop_failure_jump, also, and with a pattern of, say, `a+', we +			   are skipping over the on_failure_jump, so we have to push +			   something meaningless for pop_failure_jump to pop.  */ +		case dummy_failure_jump: +			DEBUG_PRINT1("EXECUTING dummy_failure_jump.\n"); +			/* It doesn't matter what we push for the string here.  What +			   the code at `fail' tests is the value for the pattern.  */ +			PUSH_FAILURE_POINT(NULL, NULL, -2); +			goto unconditional_jump; + + +			/* At the end of an alternative, we need to push a dummy failure +			   point in case we are followed by a `pop_failure_jump', because +			   we don't want the failure point for the alternative to be +			   popped.  For example, matching `(a|ab)*' against `aab' +			   requires that we match the `ab' alternative.  */ +		case push_dummy_failure: +			DEBUG_PRINT1("EXECUTING push_dummy_failure.\n"); +			/* See comments just above at `dummy_failure_jump' about the +			   two zeroes.  */ +			PUSH_FAILURE_POINT(NULL, NULL, -2); +			break; + +			/* Have to succeed matching what follows at least n times. +			   After that, handle like `on_failure_jump'.  */ +		case succeed_n: +			EXTRACT_NUMBER(mcnt, p + 2); +			DEBUG_PRINT2("EXECUTING succeed_n %d.\n", mcnt); + +			assert(mcnt >= 0); +			/* Originally, this is how many times we HAVE to succeed.  */ +			if (mcnt > 0) { +				mcnt--; +				p += 2; +				STORE_NUMBER_AND_INCR(p, mcnt); +#ifdef _LIBC +				DEBUG_PRINT3("  Setting %p to %d.\n", p - 2, mcnt); +#else +				DEBUG_PRINT3("  Setting 0x%x to %d.\n", p - 2, mcnt); +#endif +			} else if (mcnt == 0) { +#ifdef _LIBC +				DEBUG_PRINT2("  Setting two bytes from %p to no_op.\n", +							 p + 2); +#else +				DEBUG_PRINT2("  Setting two bytes from 0x%x to no_op.\n", +							 p + 2); +#endif +				p[2] = (unsigned char) no_op; +				p[3] = (unsigned char) no_op; +				goto on_failure; +			} +			break; + +		case jump_n: +			EXTRACT_NUMBER(mcnt, p + 2); +			DEBUG_PRINT2("EXECUTING jump_n %d.\n", mcnt); + +			/* Originally, this is how many times we CAN jump.  */ +			if (mcnt) { +				mcnt--; +				STORE_NUMBER(p + 2, mcnt); +#ifdef _LIBC +				DEBUG_PRINT3("  Setting %p to %d.\n", p + 2, mcnt); +#else +				DEBUG_PRINT3("  Setting 0x%x to %d.\n", p + 2, mcnt); +#endif +				goto unconditional_jump; +			} +			/* If don't have to jump any more, skip over the rest of command.  */ +			else +				p += 4; +			break; + +		case set_number_at: +		{ +			DEBUG_PRINT1("EXECUTING set_number_at.\n"); + +			EXTRACT_NUMBER_AND_INCR(mcnt, p); +			p1 = p + mcnt; +			EXTRACT_NUMBER_AND_INCR(mcnt, p); +#ifdef _LIBC +			DEBUG_PRINT3("  Setting %p to %d.\n", p1, mcnt); +#else +			DEBUG_PRINT3("  Setting 0x%x to %d.\n", p1, mcnt); +#endif +			STORE_NUMBER(p1, mcnt); +			break; +		} + +#if 0 +			/* The DEC Alpha C compiler 3.x generates incorrect code for the +			   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of +			   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the +			   macro and introducing temporary variables works around the bug.  */ + +		case wordbound: +			DEBUG_PRINT1("EXECUTING wordbound.\n"); +			if (AT_WORD_BOUNDARY(d)) +				break; +			goto fail; + +		case notwordbound: +			DEBUG_PRINT1("EXECUTING notwordbound.\n"); +			if (AT_WORD_BOUNDARY(d)) +				goto fail; +			break; +#else +		case wordbound: +		{ +			boolean prevchar, thischar; + +			DEBUG_PRINT1("EXECUTING wordbound.\n"); +			if (AT_STRINGS_BEG(d) || AT_STRINGS_END(d)) +				break; + +			prevchar = WORDCHAR_P(d - 1); +			thischar = WORDCHAR_P(d); +			if (prevchar != thischar) +				break; +			goto fail; +		} + +		case notwordbound: +		{ +			boolean prevchar, thischar; + +			DEBUG_PRINT1("EXECUTING notwordbound.\n"); +			if (AT_STRINGS_BEG(d) || AT_STRINGS_END(d)) +				goto fail; + +			prevchar = WORDCHAR_P(d - 1); +			thischar = WORDCHAR_P(d); +			if (prevchar != thischar) +				goto fail; +			break; +		} +#endif + +		case wordbeg: +			DEBUG_PRINT1("EXECUTING wordbeg.\n"); +			if (WORDCHAR_P(d) && (AT_STRINGS_BEG(d) || !WORDCHAR_P(d - 1))) +				break; +			goto fail; + +		case wordend: +			DEBUG_PRINT1("EXECUTING wordend.\n"); +			if (!AT_STRINGS_BEG(d) && WORDCHAR_P(d - 1) +				&& (!WORDCHAR_P(d) || AT_STRINGS_END(d))) +				break; +			goto fail; + +#ifdef emacs +		case before_dot: +			DEBUG_PRINT1("EXECUTING before_dot.\n"); +			if (PTR_CHAR_POS((unsigned char *) d) >= point) +				goto fail; +			break; + +		case at_dot: +			DEBUG_PRINT1("EXECUTING at_dot.\n"); +			if (PTR_CHAR_POS((unsigned char *) d) != point) +				goto fail; +			break; + +		case after_dot: +			DEBUG_PRINT1("EXECUTING after_dot.\n"); +			if (PTR_CHAR_POS((unsigned char *) d) <= point) +				goto fail; +			break; + +		case syntaxspec: +			DEBUG_PRINT2("EXECUTING syntaxspec %d.\n", mcnt); +			mcnt = *p++; +			goto matchsyntax; + +		case wordchar: +			DEBUG_PRINT1("EXECUTING Emacs wordchar.\n"); +			mcnt = (int) Sword; +		  matchsyntax: +			PREFETCH(); +			/* Can't use *d++ here; SYNTAX may be an unsafe macro.  */ +			d++; +			if (SYNTAX(d[-1]) != (enum syntaxcode) mcnt) +				goto fail; +			SET_REGS_MATCHED(); +			break; + +		case notsyntaxspec: +			DEBUG_PRINT2("EXECUTING notsyntaxspec %d.\n", mcnt); +			mcnt = *p++; +			goto matchnotsyntax; + +		case notwordchar: +			DEBUG_PRINT1("EXECUTING Emacs notwordchar.\n"); +			mcnt = (int) Sword; +		  matchnotsyntax: +			PREFETCH(); +			/* Can't use *d++ here; SYNTAX may be an unsafe macro.  */ +			d++; +			if (SYNTAX(d[-1]) == (enum syntaxcode) mcnt) +				goto fail; +			SET_REGS_MATCHED(); +			break; + +#else							/* not emacs */ +		case wordchar: +			DEBUG_PRINT1("EXECUTING non-Emacs wordchar.\n"); +			PREFETCH(); +			if (!WORDCHAR_P(d)) +				goto fail; +			SET_REGS_MATCHED(); +			d++; +			break; + +		case notwordchar: +			DEBUG_PRINT1("EXECUTING non-Emacs notwordchar.\n"); +			PREFETCH(); +			if (WORDCHAR_P(d)) +				goto fail; +			SET_REGS_MATCHED(); +			d++; +			break; +#endif							/* not emacs */ + +		default: +			abort(); +		} +		continue;				/* Successfully executed one pattern command; keep going.  */ + + +		/* We goto here if a matching operation fails. */ +	  fail: +		if (!FAIL_STACK_EMPTY()) {	/* A restart point is known.  Restore to that state.  */ +			DEBUG_PRINT1("\nFAIL:\n"); +			POP_FAILURE_POINT(d, p, +							  lowest_active_reg, highest_active_reg, +							  regstart, regend, reg_info); + +			/* If this failure point is a dummy, try the next one.  */ +			if (!p) +				goto fail; + +			/* If we failed to the end of the pattern, don't examine *p.  */ +			assert(p <= pend); +			if (p < pend) { +				boolean is_a_jump_n = false; + +				/* If failed to a backwards jump that's part of a repetition +				   loop, need to pop this failure point and use the next one.  */ +				switch ((re_opcode_t) * p) { +				case jump_n: +					is_a_jump_n = true; +				case maybe_pop_jump: +				case pop_failure_jump: +				case jump: +					p1 = p + 1; +					EXTRACT_NUMBER_AND_INCR(mcnt, p1); +					p1 += mcnt; + +					if ((is_a_jump_n && (re_opcode_t) * p1 == succeed_n) +						|| (!is_a_jump_n +							&& (re_opcode_t) * p1 == on_failure_jump)) +							goto fail; +					break; +				default: +					/* do nothing */ ; +				} +			} + +			if (d >= string1 && d <= end1) +				dend = end_match_1; +		} else +			break;				/* Matching at this starting point really fails.  */ +	}							/* for (;;) */ + +	if (best_regs_set) +		goto restore_best_regs; + +	FREE_VARIABLES(); + +	return -1;					/* Failure to match.  */ +}								/* re_match_2 */ + +/* Subroutine definitions for re_match_2.  */ + + +/* We are passed P pointing to a register number after a start_memory. + +   Return true if the pattern up to the corresponding stop_memory can +   match the empty string, and false otherwise. + +   If we find the matching stop_memory, sets P to point to one past its number. +   Otherwise, sets P to an undefined byte less than or equal to END. + +   We don't handle duplicates properly (yet).  */ + +static boolean group_match_null_string_p(p, end, reg_info) +unsigned char **p, *end; +register_info_type *reg_info; +{ +	int mcnt; + +	/* Point to after the args to the start_memory.  */ +	unsigned char *p1 = *p + 2; + +	while (p1 < end) { +		/* Skip over opcodes that can match nothing, and return true or +		   false, as appropriate, when we get to one that can't, or to the +		   matching stop_memory.  */ + +		switch ((re_opcode_t) * p1) { +			/* Could be either a loop or a series of alternatives.  */ +		case on_failure_jump: +			p1++; +			EXTRACT_NUMBER_AND_INCR(mcnt, p1); + +			/* If the next operation is not a jump backwards in the +			   pattern.  */ + +			if (mcnt >= 0) { +				/* Go through the on_failure_jumps of the alternatives, +				   seeing if any of the alternatives cannot match nothing. +				   The last alternative starts with only a jump, +				   whereas the rest start with on_failure_jump and end +				   with a jump, e.g., here is the pattern for `a|b|c': + +				   /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 +				   /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 +				   /exactn/1/c + +				   So, we have to first go through the first (n-1) +				   alternatives and then deal with the last one separately.  */ + + +				/* Deal with the first (n-1) alternatives, which start +				   with an on_failure_jump (see above) that jumps to right +				   past a jump_past_alt.  */ + +				while ((re_opcode_t) p1[mcnt - 3] == jump_past_alt) { +					/* `mcnt' holds how many bytes long the alternative +					   is, including the ending `jump_past_alt' and +					   its number.  */ + +					if (!alt_match_null_string_p(p1, p1 + mcnt - 3, +												 reg_info)) return false; + +					/* Move to right after this alternative, including the +					   jump_past_alt.  */ +					p1 += mcnt; + +					/* Break if it's the beginning of an n-th alternative +					   that doesn't begin with an on_failure_jump.  */ +					if ((re_opcode_t) * p1 != on_failure_jump) +						break; + +					/* Still have to check that it's not an n-th +					   alternative that starts with an on_failure_jump.  */ +					p1++; +					EXTRACT_NUMBER_AND_INCR(mcnt, p1); +					if ((re_opcode_t) p1[mcnt - 3] != jump_past_alt) { +						/* Get to the beginning of the n-th alternative.  */ +						p1 -= 3; +						break; +					} +				} + +				/* Deal with the last alternative: go back and get number +				   of the `jump_past_alt' just before it.  `mcnt' contains +				   the length of the alternative.  */ +				EXTRACT_NUMBER(mcnt, p1 - 2); + +				if (!alt_match_null_string_p(p1, p1 + mcnt, reg_info)) +					return false; + +				p1 += mcnt;		/* Get past the n-th alternative.  */ +			}					/* if mcnt > 0 */ +			break; + + +		case stop_memory: +			assert(p1[1] == **p); +			*p = p1 + 2; +			return true; + + +		default: +			if (!common_op_match_null_string_p(&p1, end, reg_info)) +				return false; +		} +	}							/* while p1 < end */ + +	return false; +}								/* group_match_null_string_p */ + + +/* Similar to group_match_null_string_p, but doesn't deal with alternatives: +   It expects P to be the first byte of a single alternative and END one +   byte past the last. The alternative can contain groups.  */ + +static boolean alt_match_null_string_p(p, end, reg_info) +unsigned char *p, *end; +register_info_type *reg_info; +{ +	int mcnt; +	unsigned char *p1 = p; + +	while (p1 < end) { +		/* Skip over opcodes that can match nothing, and break when we get +		   to one that can't.  */ + +		switch ((re_opcode_t) * p1) { +			/* It's a loop.  */ +		case on_failure_jump: +			p1++; +			EXTRACT_NUMBER_AND_INCR(mcnt, p1); +			p1 += mcnt; +			break; + +		default: +			if (!common_op_match_null_string_p(&p1, end, reg_info)) +				return false; +		} +	}							/* while p1 < end */ + +	return true; +}								/* alt_match_null_string_p */ + + +/* Deals with the ops common to group_match_null_string_p and +   alt_match_null_string_p. + +   Sets P to one after the op and its arguments, if any.  */ + +static boolean common_op_match_null_string_p(p, end, reg_info) +unsigned char **p, *end; +register_info_type *reg_info; +{ +	int mcnt; +	boolean ret; +	int reg_no; +	unsigned char *p1 = *p; + +	switch ((re_opcode_t) * p1++) { +	case no_op: +	case begline: +	case endline: +	case begbuf: +	case endbuf: +	case wordbeg: +	case wordend: +	case wordbound: +	case notwordbound: +#ifdef emacs +	case before_dot: +	case at_dot: +	case after_dot: +#endif +		break; + +	case start_memory: +		reg_no = *p1; +		assert(reg_no > 0 && reg_no <= MAX_REGNUM); +		ret = group_match_null_string_p(&p1, end, reg_info); + +		/* Have to set this here in case we're checking a group which +		   contains a group and a back reference to it.  */ + +		if (REG_MATCH_NULL_STRING_P(reg_info[reg_no]) == +			MATCH_NULL_UNSET_VALUE) +				REG_MATCH_NULL_STRING_P(reg_info[reg_no]) = ret; + +		if (!ret) +			return false; +		break; + +		/* If this is an optimized succeed_n for zero times, make the jump.  */ +	case jump: +		EXTRACT_NUMBER_AND_INCR(mcnt, p1); +		if (mcnt >= 0) +			p1 += mcnt; +		else +			return false; +		break; + +	case succeed_n: +		/* Get to the number of times to succeed.  */ +		p1 += 2; +		EXTRACT_NUMBER_AND_INCR(mcnt, p1); + +		if (mcnt == 0) { +			p1 -= 4; +			EXTRACT_NUMBER_AND_INCR(mcnt, p1); +			p1 += mcnt; +		} else +			return false; +		break; + +	case duplicate: +		if (!REG_MATCH_NULL_STRING_P(reg_info[*p1])) +			return false; +		break; + +	case set_number_at: +		p1 += 4; + +	default: +		/* All other opcodes mean we cannot match the empty string.  */ +		return false; +	} + +	*p = p1; +	return true; +}								/* common_op_match_null_string_p */ + + +/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN +   bytes; nonzero otherwise.  */ + +static int bcmp_translate(s1, s2, len, translate) +const char *s1, *s2; +register int len; +RE_TRANSLATE_TYPE translate; +{ +	register const unsigned char *p1 = (const unsigned char *) s1; +	register const unsigned char *p2 = (const unsigned char *) s2; + +	while (len) { +		if (translate[*p1++] != translate[*p2++]) +			return 1; +		len--; +	} +	return 0; +} + +/* Entry points for GNU code.  */ + +/* re_compile_pattern is the GNU regular expression compiler: it +   compiles PATTERN (of length SIZE) and puts the result in BUFP. +   Returns 0 if the pattern was valid, otherwise an error string. + +   Assumes the `allocated' (and perhaps `buffer') and `translate' fields +   are set in BUFP on entry. + +   We call regex_compile to do the actual compilation.  */ + +const char *re_compile_pattern(pattern, length, bufp) +const char *pattern; +size_t length; +struct re_pattern_buffer *bufp; +{ +	reg_errcode_t ret; + +	/* GNU code is written to assume at least RE_NREGS registers will be set +	   (and at least one extra will be -1).  */ +	bufp->regs_allocated = REGS_UNALLOCATED; + +	/* And GNU code determines whether or not to get register information +	   by passing null for the REGS argument to re_match, etc., not by +	   setting no_sub.  */ +	bufp->no_sub = 0; + +	/* Match anchors at newline.  */ +	bufp->newline_anchor = 1; + +	ret = regex_compile(pattern, length, re_syntax_options, bufp); + +	if (!ret) +		return NULL; +	return gettext(re_error_msgid + re_error_msgid_idx[(int) ret]); +} + +#ifdef _LIBC +weak_alias(__re_compile_pattern, re_compile_pattern) +#endif +/* Entry points compatible with 4.2 BSD regex library.  We don't define +   them unless specifically requested.  */ +#if defined _REGEX_RE_COMP || defined _LIBC +/* BSD has one and only one pattern buffer.  */ +static struct re_pattern_buffer re_comp_buf; + +char * +#ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine +   these names if they don't use our functions, and still use +   regcomp/regexec below without link errors.  */ weak_function +#endif +re_comp(s) +const char *s; +{ +	reg_errcode_t ret; + +	if (!s) { +		if (!re_comp_buf.buffer) +			return gettext("No previous regular expression"); +		return 0; +	} + +	if (!re_comp_buf.buffer) { +		re_comp_buf.buffer = (unsigned char *) malloc(200); +		if (re_comp_buf.buffer == NULL) +			return (char *) gettext(re_error_msgid +									+ +									re_error_msgid_idx[(int) REG_ESPACE]); +		re_comp_buf.allocated = 200; + +		re_comp_buf.fastmap = (char *) malloc(1 << BYTEWIDTH); +		if (re_comp_buf.fastmap == NULL) +			return (char *) gettext(re_error_msgid +									+ +									re_error_msgid_idx[(int) REG_ESPACE]); +	} + +	/* Since `re_exec' always passes NULL for the `regs' argument, we +	   don't need to initialize the pattern buffer fields which affect it.  */ + +	/* Match anchors at newlines.  */ +	re_comp_buf.newline_anchor = 1; + +	ret = regex_compile(s, strlen(s), re_syntax_options, &re_comp_buf); + +	if (!ret) +		return NULL; + +	/* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */ +	return (char *) gettext(re_error_msgid + +							re_error_msgid_idx[(int) ret]); +} + + +int +#ifdef _LIBC + weak_function +#endif +re_exec(s) +const char *s; +{ +	const int len = strlen(s); + +	return +		0 <= re_search(&re_comp_buf, s, len, 0, len, +					   (struct re_registers *) 0); +} + +#endif							/* _REGEX_RE_COMP */ + +/* POSIX.2 functions.  Don't define these for Emacs.  */ + +#ifndef emacs + +/* regcomp takes a regular expression as a string and compiles it. + +   PREG is a regex_t *.  We do not expect any fields to be initialized, +   since POSIX says we shouldn't.  Thus, we set + +     `buffer' to the compiled pattern; +     `used' to the length of the compiled pattern; +     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the +       REG_EXTENDED bit in CFLAGS is set; otherwise, to +       RE_SYNTAX_POSIX_BASIC; +     `newline_anchor' to REG_NEWLINE being set in CFLAGS; +     `fastmap' to an allocated space for the fastmap; +     `fastmap_accurate' to zero; +     `re_nsub' to the number of subexpressions in PATTERN. + +   PATTERN is the address of the pattern string. + +   CFLAGS is a series of bits which affect compilation. + +     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we +     use POSIX basic syntax. + +     If REG_NEWLINE is set, then . and [^...] don't match newline. +     Also, regexec will try a match beginning after every newline. + +     If REG_ICASE is set, then we considers upper- and lowercase +     versions of letters to be equivalent when matching. + +     If REG_NOSUB is set, then when PREG is passed to regexec, that +     routine will report only success or failure, and nothing about the +     registers. + +   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for +   the return codes and their meanings.)  */ + +int regcomp(preg, pattern, cflags) +regex_t *preg; +const char *pattern; +int cflags; +{ +	reg_errcode_t ret; +	reg_syntax_t syntax +		= (cflags & REG_EXTENDED) ? + +		RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; + +	/* regex_compile will allocate the space for the compiled pattern.  */ +	preg->buffer = 0; +	preg->allocated = 0; +	preg->used = 0; + +	/* Try to allocate space for the fastmap.  */ +	preg->fastmap = (char *) malloc(1 << BYTEWIDTH); + +	if (cflags & REG_ICASE) { +		unsigned i; + +		preg->translate +			= (RE_TRANSLATE_TYPE) malloc(CHAR_SET_SIZE +										 * sizeof(*(RE_TRANSLATE_TYPE) 0)); +		if (preg->translate == NULL) +			return (int) REG_ESPACE; + +		/* Map uppercase characters to corresponding lowercase ones.  */ +		for (i = 0; i < CHAR_SET_SIZE; i++) +			preg->translate[i] = ISUPPER(i) ? TOLOWER(i) : i; +	} else +		preg->translate = NULL; + +	/* If REG_NEWLINE is set, newlines are treated differently.  */ +	if (cflags & REG_NEWLINE) {	/* REG_NEWLINE implies neither . nor [^...] match newline.  */ +		syntax &= ~RE_DOT_NEWLINE; +		syntax |= RE_HAT_LISTS_NOT_NEWLINE; +		/* It also changes the matching behavior.  */ +		preg->newline_anchor = 1; +	} else +		preg->newline_anchor = 0; + +	preg->no_sub = !!(cflags & REG_NOSUB); + +	/* POSIX says a null character in the pattern terminates it, so we +	   can use strlen here in compiling the pattern.  */ +	ret = regex_compile(pattern, strlen(pattern), syntax, preg); + +	/* POSIX doesn't distinguish between an unmatched open-group and an +	   unmatched close-group: both are REG_EPAREN.  */ +	if (ret == REG_ERPAREN) +		ret = REG_EPAREN; + +	if (ret == REG_NOERROR && preg->fastmap) { +		/* Compute the fastmap now, since regexec cannot modify the pattern +		   buffer.  */ +		if (re_compile_fastmap(preg) == -2) { +			/* Some error occurred while computing the fastmap, just forget +			   about it.  */ +			free(preg->fastmap); +			preg->fastmap = NULL; +		} +	} + +	return (int) ret; +} + +#ifdef _LIBC +weak_alias(__regcomp, regcomp) +#endif +/* regexec searches for a given pattern, specified by PREG, in the +   string STRING. + +   If NMATCH is zero or REG_NOSUB was set in the cflags argument to +   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at +   least NMATCH elements, and we set them to the offsets of the +   corresponding matched substrings. + +   EFLAGS specifies `execution flags' which affect matching: if +   REG_NOTBOL is set, then ^ does not match at the beginning of the +   string; if REG_NOTEOL is set, then $ does not match at the end. + +   We return 0 if we find a match and REG_NOMATCH if not.  */ +int regexec(preg, string, nmatch, pmatch, eflags) +const regex_t *preg; +const char *string; +size_t nmatch; +regmatch_t pmatch[]; +int eflags; +{ +	int ret; +	struct re_registers regs; +	regex_t private_preg; +	int len = strlen(string); +	boolean want_reg_info = !preg->no_sub && nmatch > 0; + +	private_preg = *preg; + +	private_preg.not_bol = !!(eflags & REG_NOTBOL); +	private_preg.not_eol = !!(eflags & REG_NOTEOL); + +	/* The user has told us exactly how many registers to return +	   information about, via `nmatch'.  We have to pass that on to the +	   matching routines.  */ +	private_preg.regs_allocated = REGS_FIXED; + +	if (want_reg_info) { +		regs.num_regs = nmatch; +		regs.start = TALLOC(nmatch * 2, regoff_t); +		if (regs.start == NULL) +			return (int) REG_NOMATCH; +		regs.end = regs.start + nmatch; +	} + +	/* Perform the searching operation.  */ +	ret = re_search(&private_preg, string, len, +					/* start: */ 0, /* range: */ len, +					want_reg_info ? ®s : (struct re_registers *) 0); + +	/* Copy the register information to the POSIX structure.  */ +	if (want_reg_info) { +		if (ret >= 0) { +			unsigned r; + +			for (r = 0; r < nmatch; r++) { +				pmatch[r].rm_so = regs.start[r]; +				pmatch[r].rm_eo = regs.end[r]; +			} +		} + +		/* If we needed the temporary register info, free the space now.  */ +		free(regs.start); +	} + +	/* We want zero return to mean success, unlike `re_search'.  */ +	return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; +} + +#ifdef _LIBC +weak_alias(__regexec, regexec) +#endif +/* Returns a message corresponding to an error code, ERRCODE, returned +   from either regcomp or regexec.   We don't use PREG here.  */ +	size_t regerror(errcode, preg, errbuf, errbuf_size) +int errcode; +const regex_t *preg; +char *errbuf; +size_t errbuf_size; +{ +	const char *msg; +	size_t msg_size; + +	if (errcode < 0 || errcode >= (int) (sizeof(re_error_msgid_idx) +										 / sizeof(re_error_msgid_idx[0]))) +		/* Only error codes returned by the rest of the code should be passed +		   to this routine.  If we are given anything else, or if other regex +		   code generates an invalid error code, then the program has a bug. +		   Dump core so we can fix it.  */ +		abort(); + +	msg = gettext(re_error_msgid + re_error_msgid_idx[errcode]); + +	msg_size = strlen(msg) + 1;	/* Includes the null.  */ + +	if (errbuf_size != 0) { +		if (msg_size > errbuf_size) { +#if defined HAVE_MEMPCPY || defined _LIBC +			*((char *) __mempcpy(errbuf, msg, errbuf_size - 1)) = '\0'; +#else +			memcpy(errbuf, msg, errbuf_size - 1); +			errbuf[errbuf_size - 1] = 0; +#endif +		} else +			memcpy(errbuf, msg, msg_size); +	} + +	return msg_size; +} + +#ifdef _LIBC +weak_alias(__regerror, regerror) +#endif +/* Free dynamically allocated space used by PREG.  */ +void regfree(preg) +regex_t *preg; +{ +	if (preg->buffer != NULL) +		free(preg->buffer); +	preg->buffer = NULL; + +	preg->allocated = 0; +	preg->used = 0; + +	if (preg->fastmap != NULL) +		free(preg->fastmap); +	preg->fastmap = NULL; +	preg->fastmap_accurate = 0; + +	if (preg->translate != NULL) +		free(preg->translate); +	preg->translate = NULL; +} + +#ifdef _LIBC +weak_alias(__regfree, regfree) +#endif +#endif							/* not emacs  */ diff --git a/libc/misc/regex/rx.c b/libc/misc/regex/rx.c deleted file mode 100644 index 39f77adb6..000000000 --- a/libc/misc/regex/rx.c +++ /dev/null @@ -1,7273 +0,0 @@ -/*	Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc. - -This file is part of the librx library. - -Librx is free software; you can redistribute it and/or modify it under -the terms of the GNU Library General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -Librx is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License -for more details. - -You should have received a copy of the GNU Library General Public -License along with this software; see the file COPYING.LIB.  If not, -write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA -02139, USA.  */ - -/* NOTE!!!  AIX is so losing it requires this to be the first thing in the  - * file.  - * Do not put ANYTHING before it!   - */ -#if !defined (__GNUC__) && defined (_AIX) -#pragma alloca -#endif - -/* To make linux happy? */ -#ifndef	_GNU_SOURCE -#define	_GNU_SOURCE -#endif - - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> -#include <ctype.h> -#ifndef isgraph -#define isgraph(c) (isprint (c) && !isspace (c)) -#endif -#ifndef isblank -#define isblank(c) ((c) == ' ' || (c) == '\t') -#endif - -#include <sys/types.h> - -#undef MAX -#undef MIN -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -typedef char boolean; - -#define false 0 -#define true 1 - -#ifndef __GCC__ -#undef __inline__ -#define __inline__ -#endif - -/* Emacs already defines alloca, sometimes.  */ -#ifndef alloca - -/* Make alloca work the best possible way.  */ -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else							/* not __GNUC__ */ -#if HAVE_ALLOCA_H -#include <alloca.h> -#else							/* not __GNUC__ or HAVE_ALLOCA_H */ -#ifndef _AIX					/* Already did AIX, up at the top.  */ -char *alloca(); -#endif							/* not _AIX */ -#endif							/* not HAVE_ALLOCA_H */ -#endif							/* not __GNUC__ */ - -#endif							/* not alloca */ - -/* Memory management and stuff for emacs. */ - -#define CHARBITS 8 -#define remalloc(M, S) (M ? realloc (M, S) : malloc (S)) - - -/* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we - * use `alloca' instead of `malloc' for the backtracking stack. - * - * Emacs will die miserably if we don't do this. - */ - -#ifdef REGEX_MALLOC -#define REGEX_ALLOCATE malloc -#else							/* not REGEX_MALLOC  */ -#define REGEX_ALLOCATE alloca -#endif							/* not REGEX_MALLOC */ - - -#ifdef RX_WANT_RX_DEFS -#define RX_DECL extern -#define RX_DEF_QUAL -#else -#define RX_WANT_RX_DEFS -#define RX_DECL static -#define RX_DEF_QUAL static -#endif - -#include <regex.h> -#undef RX_DECL -#define RX_DECL RX_DEF_QUAL - - -/* - *  Prototypes. - */ -#ifdef __STDC__ -RX_DECL struct rx_hash_item -*rx_hash_find(struct rx_hash *, unsigned long, - -			  void *, struct rx_hash_rules *); -RX_DECL struct rx_hash_item -*rx_hash_find(struct rx_hash *, unsigned long, - -			  void *, struct rx_hash_rules *); -RX_DECL struct rx_hash_item -*rx_hash_store(struct rx_hash *, unsigned long, - -			   void *, struct rx_hash_rules *); -RX_DECL void rx_hash_free(struct rx_hash_item *, struct rx_hash_rules *); -RX_DECL void rx_free_hash_table(struct rx_hash *, rx_hash_freefn, - -								struct rx_hash_rules *); -RX_DECL rx_Bitset rx_cset(struct rx *); -RX_DECL rx_Bitset rx_copy_cset(struct rx *, rx_Bitset); -RX_DECL void rx_free_cset(struct rx *, rx_Bitset); -static struct rx_hash_item -*compiler_hash_item_alloc(struct rx_hash_rules *, void *); -static struct rx_hash -*compiler_hash_alloc(struct rx_hash_rules *); -static void compiler_free_hash(struct rx_hash *, struct rx_hash_rules *); -static void compiler_free_hash_item(struct rx_hash_item *, - -									struct rx_hash_rules *); -RX_DECL struct rexp_node -*rexp_node(struct rx *, enum rexp_node_type); -RX_DECL struct rexp_node -*rx_mk_r_cset(struct rx *, rx_Bitset); -RX_DECL struct rexp_node -*rx_mk_r_concat(struct rx *, struct rexp_node *, struct rexp_node *); -RX_DECL struct rexp_node -*rx_mk_r_alternate(struct rx *, struct rexp_node *, struct rexp_node *); -RX_DECL struct rexp_node -*rx_mk_r_alternate(struct rx *, struct rexp_node *, struct rexp_node *); -RX_DECL struct rexp_node -*rx_mk_r_opt(struct rx *, struct rexp_node *); -RX_DECL struct rexp_node -*rx_mk_r_star(struct rx *, struct rexp_node *); -RX_DECL struct rexp_node -*rx_mk_r_2phase_star(struct rx *, struct rexp_node *, struct rexp_node *); -RX_DECL struct rexp_node -*rx_mk_r_side_effect(struct rx *, rx_side_effect); - -//RX_DECL struct rexp_node -//            *rx_mk_r_data              (struct rx *, void *); -RX_DECL void rx_free_rexp(struct rx *, struct rexp_node *); -RX_DECL struct rexp_node -*rx_copy_rexp(struct rx *, struct rexp_node *); -RX_DECL struct rx_nfa_state -*rx_nfa_state(struct rx *); -RX_DECL void rx_free_nfa_state(struct rx_nfa_state *); -RX_DECL struct rx_nfa_state -*rx_id_to_nfa_state(struct rx *, int); -RX_DECL struct rx_nfa_edge -*rx_nfa_edge(struct rx *, enum rx_nfa_etype, - -			 struct rx_nfa_state *, struct rx_nfa_state *); -RX_DECL void rx_free_nfa_edge(struct rx_nfa_edge *); -static struct rx_possible_future -*rx_possible_future(struct rx *, struct rx_se_list *); -static void rx_free_possible_future(struct rx_possible_future *); -RX_DECL void rx_free_nfa(struct rx *); -RX_DECL int rx_build_nfa(struct rx *, struct rexp_node *, -						 struct rx_nfa_state **, struct rx_nfa_state **); -RX_DECL void rx_name_nfa_states(struct rx *); -static int se_list_cmp(void *, void *); -static int se_list_equal(void *, void *); -static struct rx_se_list -*hash_cons_se_prog(struct rx *, struct rx_hash *, - -				   void *, struct rx_se_list *); -static struct rx_se_list -*hash_se_prog(struct rx *, struct rx_hash *, struct rx_se_list *); -static int nfa_set_cmp(void *, void *); -static int nfa_set_equal(void *, void *); -static struct rx_nfa_state_set -*nfa_set_cons(struct rx *, struct rx_hash *, - -			  struct rx_nfa_state *, struct rx_nfa_state_set *); -static struct rx_nfa_state_set -*nfa_set_enjoin(struct rx *, struct rx_hash *, - -				struct rx_nfa_state *, struct rx_nfa_state_set *); -#endif - -#ifndef emacs - -#ifdef SYNTAX_TABLE -extern char *re_syntax_table; -#else							/* not SYNTAX_TABLE */ - -#ifndef RX_WANT_RX_DEFS -RX_DECL char re_syntax_table[CHAR_SET_SIZE]; -#endif - -#ifdef __STDC__ -static void init_syntax_once(void) -#else -static void init_syntax_once() -#endif -{ -	register int c; -	static int done = 0; - -	if (done) -		return; - -	bzero(re_syntax_table, sizeof re_syntax_table); - -	for (c = 'a'; c <= 'z'; c++) -		re_syntax_table[c] = Sword; - -	for (c = 'A'; c <= 'Z'; c++) -		re_syntax_table[c] = Sword; - -	for (c = '0'; c <= '9'; c++) -		re_syntax_table[c] = Sword; - -	re_syntax_table['_'] = Sword; - -	done = 1; -} -#endif							/* not SYNTAX_TABLE */ -#endif							/* not emacs */ - -/* Compile with `-DRX_DEBUG' and use the following flags. - * - * Debugging flags: - *   	rx_debug - print information as a regexp is compiled - * 	rx_debug_trace - print information as a regexp is executed - */ - -#ifdef RX_DEBUG - -int rx_debug_compile = 0; -int rx_debug_trace = 0; -static struct re_pattern_buffer *dbug_rxb = 0; - - -/* - * More Prototypes - */ -#ifdef __STDC__ -typedef void (*side_effect_printer) (struct rx *, void *, FILE *); -static void print_cset(struct rx *, rx_Bitset, FILE *); -static void print_rexp(struct rx *, struct rexp_node *, int, -					   side_effect_printer, FILE *); -static void print_nfa(struct rx *, struct rx_nfa_state *, -					  side_effect_printer, FILE *); -static void re_seprint(struct rx *, void *, FILE *); -void print_compiled_pattern(struct re_pattern_buffer *); -void print_fastmap(char *); -#else -typedef void (*side_effect_printer) (); -static void print_cset(); -#endif - -#ifdef __STDC__ -static void -print_rexp(struct rx *rx, -		   struct rexp_node *node, int depth, -		   side_effect_printer seprint, FILE * fp) -#else -static void print_rexp(rx, node, depth, seprint, fp) -struct rx *rx; -struct rexp_node *node; -int depth; -side_effect_printer seprint; -FILE *fp; -#endif -{ -	if (!node) -		return; -	else { -		switch (node->type) { -		case r_cset: -		{ -			fprintf(fp, "%*s", depth, ""); -			print_cset(rx, node->params.cset, fp); -			fputc('\n', fp); -			break; -		} - -		case r_opt: -		case r_star: -			fprintf(fp, "%*s%s\n", depth, "", -					node->type == r_opt ? "opt" : "star"); -			print_rexp(rx, node->params.pair.left, depth + 3, seprint, fp); -			break; - -		case r_2phase_star: -			fprintf(fp, "%*s2phase star\n", depth, ""); -			print_rexp(rx, node->params.pair.right, depth + 3, seprint, -					   fp); -			print_rexp(rx, node->params.pair.left, depth + 3, seprint, fp); -			break; - - -		case r_alternate: -		case r_concat: -			fprintf(fp, "%*s%s\n", depth, "", -					node->type == r_alternate ? "alt" : "concat"); -			print_rexp(rx, node->params.pair.left, depth + 3, seprint, fp); -			print_rexp(rx, node->params.pair.right, depth + 3, seprint, -					   fp); -			break; -		case r_side_effect: -			fprintf(fp, "%*sSide effect: ", depth, ""); -			seprint(rx, node->params.side_effect, fp); -			fputc('\n', fp); -		} -	} -} - -#ifdef __STDC__ -static void -print_nfa(struct rx *rx, -		  struct rx_nfa_state *n, side_effect_printer seprint, FILE * fp) -#else -static void print_nfa(rx, n, seprint, fp) -struct rx *rx; -struct rx_nfa_state *n; -side_effect_printer seprint; -FILE *fp; -#endif -{ -	while (n) { -		struct rx_nfa_edge *e = n->edges; -		struct rx_possible_future *ec = n->futures; - -		fprintf(fp, "node %d %s\n", n->id, -				n->is_final ? "final" : (n->is_start ? "start" : "")); -		while (e) { -			fprintf(fp, "   edge to %d, ", e->dest->id); -			switch (e->type) { -			case ne_epsilon: -				fprintf(fp, "epsilon\n"); -				break; -			case ne_side_effect: -				fprintf(fp, "side effect "); -				seprint(rx, e->params.side_effect, fp); -				fputc('\n', fp); -				break; -			case ne_cset: -				fprintf(fp, "cset "); -				print_cset(rx, e->params.cset, fp); -				fputc('\n', fp); -				break; -			} -			e = e->next; -		} - -		while (ec) { -			int x; -			struct rx_nfa_state_set *s; -			struct rx_se_list *l; - -			fprintf(fp, "   eclosure to {"); -			for (s = ec->destset; s; s = s->cdr) -				fprintf(fp, "%d ", s->car->id); -			fprintf(fp, "} ("); -			for (l = ec->effects; l; l = l->cdr) { -				seprint(rx, l->car, fp); -				fputc(' ', fp); -			} -			fprintf(fp, ")\n"); -			ec = ec->next; -		} -		n = n->next; -	} -} - -static char *efnames[] = { -	"bogon", -	"re_se_try", -	"re_se_pushback", -	"re_se_push0", -	"re_se_pushpos", -	"re_se_chkpos", -	"re_se_poppos", -	"re_se_at_dot", -	"re_se_syntax", -	"re_se_not_syntax", -	"re_se_begbuf", -	"re_se_hat", -	"re_se_wordbeg", -	"re_se_wordbound", -	"re_se_notwordbound", -	"re_se_wordend", -	"re_se_endbuf", -	"re_se_dollar", -	"re_se_fail", -}; - -static char *efnames2[] = { -	"re_se_win", -	"re_se_lparen", -	"re_se_rparen", -	"re_se_backref", -	"re_se_iter", -	"re_se_end_iter", -	"re_se_tv" -}; - -static char *inx_names[] = { -	"rx_backtrack_point", -	"rx_do_side_effects", -	"rx_cache_miss", -	"rx_next_char", -	"rx_backtrack", -	"rx_error_inx", -	"rx_num_instructions" -}; - - -#ifdef __STDC__ -static void re_seprint(struct rx *rx, void *effect, FILE * fp) -#else -static void re_seprint(rx, effect, fp) -struct rx *rx; -void *effect; -FILE *fp; -#endif -{ -	if ((int) effect < 0) -		fputs(efnames[-(int) effect], fp); -	else if (dbug_rxb) { -		struct re_se_params *p = &dbug_rxb->se_params[(int) effect]; - -		fprintf(fp, "%s(%d,%d)", efnames2[p->se], p->op1, p->op2); -	} else -		fprintf(fp, "[complex op # %d]", (int) effect); -} - -/* These are so the regex.c regression tests will compile. */ -void print_compiled_pattern(rxb) -struct re_pattern_buffer *rxb; -{ -} - -void print_fastmap(fm) -char *fm; -{ -} - -#endif							/* RX_DEBUG */ - - - -/* This page: Bitsets.  Completely unintersting. */ - -//RX_DECL int   rx_bitset_is_equal        (int, rx_Bitset, rx_Bitset); -RX_DECL int rx_bitset_is_subset(int, rx_Bitset, rx_Bitset); - -//RX_DECL int   rx_bitset_empty           (int, rx_Bitset); -RX_DECL void rx_bitset_null(int, rx_Bitset); -RX_DECL void rx_bitset_complement(int, rx_Bitset); -RX_DECL void rx_bitset_complement(int, rx_Bitset); -RX_DECL void rx_bitset_assign(int, rx_Bitset, rx_Bitset); -RX_DECL void rx_bitset_union(int, rx_Bitset, rx_Bitset); -RX_DECL void rx_bitset_intersection(int, rx_Bitset, rx_Bitset); -RX_DECL void rx_bitset_difference(int, rx_Bitset, rx_Bitset); - -//RX_DECL void  rx_bitset_revdifference   (int, rx_Bitset, rx_Bitset); -#ifdef emacs -RX_DECL void rx_bitset_xor(int, rx_Bitset, rx_Bitset); -#endif -RX_DECL unsigned long rx_bitset_hash(int, rx_Bitset); - -#if 0 -#ifdef __STDC__ -RX_DECL int rx_bitset_is_equal(int size, rx_Bitset a, rx_Bitset b) -#else -RX_DECL int rx_bitset_is_equal(size, a, b) -int size; -rx_Bitset a; -rx_Bitset b; -#endif -{ -	int x; -	RX_subset s = b[0]; - -	b[0] = ~a[0]; - -	for (x = rx_bitset_numb_subsets(size) - 1; a[x] == b[x]; --x); - -	b[0] = s; -	return !x && s == a[0]; -} -#endif - -#ifdef __STDC__ -RX_DECL int rx_bitset_is_subset(int size, rx_Bitset a, rx_Bitset b) -#else -RX_DECL int rx_bitset_is_subset(size, a, b) -int size; -rx_Bitset a; -rx_Bitset b; -#endif -{ -	int x = rx_bitset_numb_subsets(size) - 1; - -	while (x-- && (a[x] & b[x]) == a[x]); -	return x == -1; -} - -#if 0 -#ifdef __STDC__ -RX_DECL int rx_bitset_empty(int size, rx_Bitset set) -#else -RX_DECL int rx_bitset_empty(size, set) -int size; -rx_Bitset set; -#endif -{ -	int x; -	RX_subset s = set[0]; - -	set[0] = 1; -	for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x); -	set[0] = s; -	return !s; -} -#endif - -#ifdef __STDC__ -RX_DECL void rx_bitset_null(int size, rx_Bitset b) -#else -RX_DECL void rx_bitset_null(size, b) -int size; -rx_Bitset b; -#endif -{ -	bzero(b, rx_sizeof_bitset(size)); -} - - -#ifdef __STDC__ -RX_DECL void rx_bitset_universe(int size, rx_Bitset b) -#else -RX_DECL void rx_bitset_universe(size, b) -int size; -rx_Bitset b; -#endif -{ -	int x = rx_bitset_numb_subsets(size); - -	while (x--) -		*b++ = ~(RX_subset) 0; -} - - -#ifdef __STDC__ -RX_DECL void rx_bitset_complement(int size, rx_Bitset b) -#else -RX_DECL void rx_bitset_complement(size, b) -int size; -rx_Bitset b; -#endif -{ -	int x = rx_bitset_numb_subsets(size); - -	while (x--) { -		*b = ~*b; -		++b; -	} -} - - -#ifdef __STDC__ -RX_DECL void rx_bitset_assign(int size, rx_Bitset a, rx_Bitset b) -#else -RX_DECL void rx_bitset_assign(size, a, b) -int size; -rx_Bitset a; -rx_Bitset b; -#endif -{ -	int x; - -	for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x) -		a[x] = b[x]; -} - -#ifdef __STDC__ -RX_DECL void rx_bitset_union(int size, rx_Bitset a, rx_Bitset b) -#else -RX_DECL void rx_bitset_union(size, a, b) -int size; -rx_Bitset a; -rx_Bitset b; -#endif -{ -	int x; - -	for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x) -		a[x] |= b[x]; -} - - -#ifdef __STDC__ -RX_DECL void rx_bitset_intersection(int size, rx_Bitset a, rx_Bitset b) -#else -RX_DECL void rx_bitset_intersection(size, a, b) -int size; -rx_Bitset a; -rx_Bitset b; -#endif -{ -	int x; - -	for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x) -		a[x] &= b[x]; -} - - -#ifdef __STDC__ -RX_DECL void rx_bitset_difference(int size, rx_Bitset a, rx_Bitset b) -#else -RX_DECL void rx_bitset_difference(size, a, b) -int size; -rx_Bitset a; -rx_Bitset b; -#endif -{ -	int x; - -	for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x) -		a[x] &= ~b[x]; -} - - -#if 0 -#ifdef __STDC__ -RX_DECL void rx_bitset_revdifference(int size, rx_Bitset a, rx_Bitset b) -#else -RX_DECL void rx_bitset_revdifference(size, a, b) -int size; -rx_Bitset a; -rx_Bitset b; -#endif -{ -	int x; - -	for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x) -		a[x] = ~a[x] & b[x]; -} -#endif - - -#ifdef emacs -#ifdef __STDC__ -RX_DECL void rx_bitset_xor(int size, rx_Bitset a, rx_Bitset b) -#else -RX_DECL void rx_bitset_xor(size, a, b) -int size; -rx_Bitset a; -rx_Bitset b; -#endif -{ -	int x; - -	for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x) -		a[x] ^= b[x]; -} -#endif - - -#ifdef __STDC__ -RX_DECL unsigned long rx_bitset_hash(int size, rx_Bitset b) -#else -RX_DECL unsigned long rx_bitset_hash(size, b) -int size; -rx_Bitset b; -#endif -{ -	int x; -	unsigned long hash = (unsigned long) rx_bitset_hash; - -	for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x) -		hash ^= rx_bitset_subset_val(b, x); - -	return hash; -} - -RX_DECL RX_subset rx_subset_singletons[RX_subset_bits] = { -	0x1, -	0x2, -	0x4, -	0x8, -	0x10, -	0x20, -	0x40, -	0x80, -	0x100, -	0x200, -	0x400, -	0x800, -	0x1000, -	0x2000, -	0x4000, -	0x8000, -	0x10000, -	0x20000, -	0x40000, -	0x80000, -	0x100000, -	0x200000, -	0x400000, -	0x800000, -	0x1000000, -	0x2000000, -	0x4000000, -	0x8000000, -	0x10000000, -	0x20000000, -	0x40000000, -	0x80000000 -}; - -#ifdef RX_DEBUG - -#ifdef __STDC__ -static void print_cset(struct rx *rx, rx_Bitset cset, FILE * fp) -#else -static void print_cset(rx, cset, fp) -struct rx *rx; -rx_Bitset cset; -FILE *fp; -#endif -{ -	int x; - -	fputc('[', fp); -	for (x = 0; x < rx->local_cset_size; ++x) -		if (RX_bitset_member(cset, x)) { -			if (isprint(x)) -				fputc(x, fp); -			else -				fprintf(fp, "\\0%o ", x); -		} -	fputc(']', fp); -} - -#endif							/*  RX_DEBUG */ - - - -static unsigned long rx_hash_masks[4] = { -	0x12488421, -	0x96699669, -	0xbe7dd7eb, -	0xffffffff -}; - - -/* Hash tables */ -#ifdef __STDC__ -RX_DECL struct rx_hash_item *rx_hash_find(struct rx_hash *table, -										  unsigned long hash, -										  void *value, -										  struct rx_hash_rules *rules) -#else -RX_DECL struct rx_hash_item *rx_hash_find(table, hash, value, rules) -struct rx_hash *table; -unsigned long hash; -void *value; -struct rx_hash_rules *rules; -#endif -{ -	rx_hash_eq eq = rules->eq; -	int maskc = 0; -	long mask = rx_hash_masks[0]; -	int bucket = (hash & mask) % 13; - -	while (table->children[bucket]) { -		table = table->children[bucket]; -		++maskc; -		mask = rx_hash_masks[maskc]; -		bucket = (hash & mask) % 13; -	} - -	{ -		struct rx_hash_item *it = table->buckets[bucket]; - -		while (it) -			if (eq(it->data, value)) -				return it; -			else -				it = it->next_same_hash; -	} - -	return 0; -} - -#ifdef __STDC__ -RX_DECL struct rx_hash_item *rx_hash_store(struct rx_hash *table, -										   unsigned long hash, -										   void *value, -										   struct rx_hash_rules *rules) -#else -RX_DECL struct rx_hash_item *rx_hash_store(table, hash, value, rules) -struct rx_hash *table; -unsigned long hash; -void *value; -struct rx_hash_rules *rules; -#endif -{ -	rx_hash_eq eq = rules->eq; -	int maskc = 0; -	long mask = rx_hash_masks[0]; -	int bucket = (hash & mask) % 13; -	int depth = 0; - -	while (table->children[bucket]) { -		table = table->children[bucket]; -		++maskc; -		mask = rx_hash_masks[maskc]; -		bucket = (hash & mask) % 13; -		++depth; -	} - -	{ -		struct rx_hash_item *it = table->buckets[bucket]; - -		while (it) -			if (eq(it->data, value)) -				return it; -			else -				it = it->next_same_hash; -	} - -	{ -		if ((depth < 3) -			&& (table->bucket_size[bucket] >= 4)) { -			struct rx_hash *newtab = ((struct rx_hash *) -									   rules->hash_alloc(rules)); - -			if (!newtab) -				goto add_to_bucket; -			bzero(newtab, sizeof(*newtab)); -			newtab->parent = table; -			{ -				struct rx_hash_item *them = table->buckets[bucket]; -				unsigned long newmask = rx_hash_masks[maskc + 1]; - -				while (them) { -					struct rx_hash_item *save = them->next_same_hash; -					int new_buck = (them->hash & newmask) % 13; - -					them->next_same_hash = newtab->buckets[new_buck]; -					newtab->buckets[new_buck] = them; -					them->table = newtab; -					them = save; -					++newtab->bucket_size[new_buck]; -					++newtab->refs; -				} -				table->refs = -					(table->refs - table->bucket_size[bucket] + 1); -				table->bucket_size[bucket] = 0; -				table->buckets[bucket] = 0; -				table->children[bucket] = newtab; -				table = newtab; -				bucket = (hash & newmask) % 13; -			} -		} -	} -  add_to_bucket: -	{ -		struct rx_hash_item *it = ((struct rx_hash_item *) -								   rules->hash_item_alloc(rules, value)); - -		if (!it) -			return 0; -		it->hash = hash; -		it->table = table; -		/* DATA and BINDING are to be set in hash_item_alloc */ -		it->next_same_hash = table->buckets[bucket]; -		table->buckets[bucket] = it; -		++table->bucket_size[bucket]; -		++table->refs; -		return it; -	} -} - - -#ifdef __STDC__ -RX_DECL void -rx_hash_free(struct rx_hash_item *it, struct rx_hash_rules *rules) -#else -RX_DECL void rx_hash_free(it, rules) -struct rx_hash_item *it; -struct rx_hash_rules *rules; -#endif -{ -	if (it) { -		struct rx_hash *table = it->table; -		unsigned long hash = it->hash; -		int depth = (table->parent -					 ? (table->parent->parent -						? (table->parent->parent->parent ? 3 : 2) -						: 1) -					 : 0); -		int bucket = (hash & rx_hash_masks[depth]) % 13; -		struct rx_hash_item **pos = &table->buckets[bucket]; - -		while (*pos != it) -			pos = &(*pos)->next_same_hash; -		*pos = it->next_same_hash; -		rules->free_hash_item(it, rules); -		--table->bucket_size[bucket]; -		--table->refs; -		while (!table->refs && depth) { -			struct rx_hash *save = table; - -			table = table->parent; -			--depth; -			bucket = (hash & rx_hash_masks[depth]) % 13; -			--table->refs; -			table->children[bucket] = 0; -			rules->free_hash(save, rules); -		} -	} -} - -#ifdef __STDC__ -RX_DECL void -rx_free_hash_table(struct rx_hash *tab, rx_hash_freefn freefn, -				   struct rx_hash_rules *rules) -#else -RX_DECL void rx_free_hash_table(tab, freefn, rules) -struct rx_hash *tab; -rx_hash_freefn freefn; -struct rx_hash_rules *rules; -#endif -{ -	int x; - -	for (x = 0; x < 13; ++x) -		if (tab->children[x]) { -			rx_free_hash_table(tab->children[x], freefn, rules); -			rules->free_hash(tab->children[x], rules); -		} else { -			struct rx_hash_item *them = tab->buckets[x]; - -			while (them) { -				struct rx_hash_item *that = them; - -				them = that->next_same_hash; -				freefn(that); -				rules->free_hash_item(that, rules); -			} -		} -} - - - -/* Utilities for manipulating bitset represntations of characters sets. */ - -#ifdef __STDC__ -RX_DECL rx_Bitset rx_cset(struct rx *rx) -#else -RX_DECL rx_Bitset rx_cset(rx) -struct rx *rx; -#endif -{ -	rx_Bitset b = - -		(rx_Bitset) malloc(rx_sizeof_bitset(rx->local_cset_size)); -	if (b) -		rx_bitset_null(rx->local_cset_size, b); -	return b; -} - - -#ifdef __STDC__ -RX_DECL rx_Bitset rx_copy_cset(struct rx * rx, rx_Bitset a) -#else -RX_DECL rx_Bitset rx_copy_cset(rx, a) -struct rx *rx; -rx_Bitset a; -#endif -{ -	rx_Bitset cs = rx_cset(rx); - -	if (cs) -		rx_bitset_union(rx->local_cset_size, cs, a); - -	return cs; -} - - -#ifdef __STDC__ -RX_DECL void rx_free_cset(struct rx *rx, rx_Bitset c) -#else -RX_DECL void rx_free_cset(rx, c) -struct rx *rx; -rx_Bitset c; -#endif -{ -	if (c) -		free((char *) c); -} - - -/* Hash table memory allocation policy for the regexp compiler */ - -#ifdef __STDC__ -static struct rx_hash *compiler_hash_alloc(struct rx_hash_rules *rules) -#else -static struct rx_hash *compiler_hash_alloc(rules) -struct rx_hash_rules *rules; -#endif -{ -	return (struct rx_hash *) malloc(sizeof(struct rx_hash)); -} - - -#ifdef __STDC__ -static struct rx_hash_item *compiler_hash_item_alloc(struct rx_hash_rules -													 *rules, void *value) -#else -static struct rx_hash_item *compiler_hash_item_alloc(rules, value) -struct rx_hash_rules *rules; -void *value; -#endif -{ -	struct rx_hash_item *it; - -	it = (struct rx_hash_item *) malloc(sizeof(*it)); -	if (it) { -		it->data = value; -		it->binding = 0; -	} -	return it; -} - -#ifdef __STDC__ -static void -compiler_free_hash(struct rx_hash *tab, struct rx_hash_rules *rules) -#else -static void compiler_free_hash(tab, rules) -struct rx_hash *tab; -struct rx_hash_rules *rules; -#endif -{ -	free((char *) tab); -} - - -#ifdef __STDC__ -static void -compiler_free_hash_item(struct rx_hash_item *item, -						struct rx_hash_rules *rules) -#else -static void compiler_free_hash_item(item, rules) -struct rx_hash_item *item; -struct rx_hash_rules *rules; -#endif -{ -	free((char *) item); -} - - -/* This page: REXP_NODE (expression tree) structures. */ - -#ifdef __STDC__ -RX_DECL struct rexp_node *rexp_node(struct rx *rx, -									enum rexp_node_type type) -#else -RX_DECL struct rexp_node *rexp_node(rx, type) -struct rx *rx; -enum rexp_node_type type; -#endif -{ -	struct rexp_node *n; - -	n = (struct rexp_node *) malloc(sizeof(*n)); -	if (n) { -		bzero(n, sizeof(*n)); -		n->type = type; -	} -	return n; -} - - -/* free_rexp_node assumes that the bitset passed to rx_mk_r_cset - * can be freed using rx_free_cset. - */ -#ifdef __STDC__ -RX_DECL struct rexp_node *rx_mk_r_cset(struct rx *rx, rx_Bitset b) -#else -RX_DECL struct rexp_node *rx_mk_r_cset(rx, b) -struct rx *rx; -rx_Bitset b; -#endif -{ -	struct rexp_node *n = rexp_node(rx, r_cset); - -	if (n) -		n->params.cset = b; -	return n; -} - -#ifdef __STDC__ -RX_DECL struct rexp_node *rx_mk_r_concat(struct rx *rx, -										 struct rexp_node *a, -										 struct rexp_node *b) -#else -RX_DECL struct rexp_node *rx_mk_r_concat(rx, a, b) -struct rx *rx; -struct rexp_node *a; -struct rexp_node *b; -#endif -{ -	struct rexp_node *n = rexp_node(rx, r_concat); - -	if (n) { -		n->params.pair.left = a; -		n->params.pair.right = b; -	} -	return n; -} - - -#ifdef __STDC__ -RX_DECL struct rexp_node *rx_mk_r_alternate(struct rx *rx, -											struct rexp_node *a, -											struct rexp_node *b) -#else -RX_DECL struct rexp_node *rx_mk_r_alternate(rx, a, b) -struct rx *rx; -struct rexp_node *a; -struct rexp_node *b; -#endif -{ -	struct rexp_node *n = rexp_node(rx, r_alternate); - -	if (n) { -		n->params.pair.left = a; -		n->params.pair.right = b; -	} -	return n; -} - - -#ifdef __STDC__ -RX_DECL struct rexp_node *rx_mk_r_opt(struct rx *rx, struct rexp_node *a) -#else -RX_DECL struct rexp_node *rx_mk_r_opt(rx, a) -struct rx *rx; -struct rexp_node *a; -#endif -{ -	struct rexp_node *n = rexp_node(rx, r_opt); - -	if (n) { -		n->params.pair.left = a; -		n->params.pair.right = 0; -	} -	return n; -} - -#ifdef __STDC__ -RX_DECL struct rexp_node *rx_mk_r_star(struct rx *rx, struct rexp_node *a) -#else -RX_DECL struct rexp_node *rx_mk_r_star(rx, a) -struct rx *rx; -struct rexp_node *a; -#endif -{ -	struct rexp_node *n = rexp_node(rx, r_star); - -	if (n) { -		n->params.pair.left = a; -		n->params.pair.right = 0; -	} -	return n; -} - - -#ifdef __STDC__ -RX_DECL struct rexp_node *rx_mk_r_2phase_star(struct rx *rx, -											  struct rexp_node *a, -											  struct rexp_node *b) -#else -RX_DECL struct rexp_node *rx_mk_r_2phase_star(rx, a, b) -struct rx *rx; -struct rexp_node *a; -struct rexp_node *b; -#endif -{ -	struct rexp_node *n = rexp_node(rx, r_2phase_star); - -	if (n) { -		n->params.pair.left = a; -		n->params.pair.right = b; -	} -	return n; -} - -#ifdef __STDC__ -RX_DECL struct rexp_node *rx_mk_r_side_effect(struct rx *rx, -											  rx_side_effect a) -#else -RX_DECL struct rexp_node *rx_mk_r_side_effect(rx, a) -struct rx *rx; -rx_side_effect a; -#endif -{ -	struct rexp_node *n = rexp_node(rx, r_side_effect); - -	if (n) { -		n->params.side_effect = a; -		n->params.pair.right = 0; -	} -	return n; -} - - -#if 0 -#ifdef __STDC__ -RX_DECL struct rexp_node *rx_mk_r_data(struct rx *rx, void *a) -#else -RX_DECL struct rexp_node *rx_mk_r_data(rx, a) -struct rx *rx; -void *a; -#endif -{ -	struct rexp_node *n = rexp_node(rx, r_data); - -	if (n) { -		n->params.pair.left = a; -		n->params.pair.right = 0; -	} -	return n; -} -#endif - -#ifdef __STDC__ -RX_DECL void rx_free_rexp(struct rx *rx, struct rexp_node *node) -#else -RX_DECL void rx_free_rexp(rx, node) -struct rx *rx; -struct rexp_node *node; -#endif -{ -	if (node) { -		switch (node->type) { -		case r_cset: -			if (node->params.cset) -				rx_free_cset(rx, node->params.cset); - -		case r_side_effect: -			break; - -		case r_concat: -		case r_alternate: -		case r_2phase_star: -		case r_opt: -		case r_star: -			rx_free_rexp(rx, node->params.pair.left); -			rx_free_rexp(rx, node->params.pair.right); -			break; - -		case r_data: -			/* This shouldn't occur. */ -			break; -		} -		free((char *) node); -	} -} - -#ifdef __STDC__ -RX_DECL struct rexp_node *rx_copy_rexp(struct rx *rx, -									   struct rexp_node *node) -#else -RX_DECL struct rexp_node *rx_copy_rexp(rx, node) -struct rx *rx; -struct rexp_node *node; -#endif -{ -	if (!node) -		return 0; -	else { -		struct rexp_node *n = rexp_node(rx, node->type); - -		if (!n) -			return 0; -		switch (node->type) { -		case r_cset: -			n->params.cset = rx_copy_cset(rx, node->params.cset); -			if (!n->params.cset) { -				rx_free_rexp(rx, n); -				return 0; -			} -			break; - -		case r_side_effect: -			n->params.side_effect = node->params.side_effect; -			break; - -		case r_concat: -		case r_alternate: -		case r_opt: -		case r_2phase_star: -		case r_star: -			n->params.pair.left = rx_copy_rexp(rx, node->params.pair.left); -			n->params.pair.right = -				rx_copy_rexp(rx, node->params.pair.right); -			if ((node->params.pair.left && !n->params.pair.left) -				|| (node->params.pair.right && !n->params.pair.right)) { -				rx_free_rexp(rx, n); -				return 0; -			} -			break; -		case r_data: -			/* shouldn't happen */ -			break; -		} -		return n; -	} -} - - - -/* This page: functions to build and destroy graphs that describe nfa's */ - -/* Constructs a new nfa node. */ -#ifdef __STDC__ -RX_DECL struct rx_nfa_state *rx_nfa_state(struct rx *rx) -#else -RX_DECL struct rx_nfa_state *rx_nfa_state(rx) -struct rx *rx; -#endif -{ -	struct rx_nfa_state *n = (struct rx_nfa_state *) malloc(sizeof(*n)); - -	if (!n) -		return 0; -	bzero(n, sizeof(*n)); -	n->next = rx->nfa_states; -	rx->nfa_states = n; -	return n; -} - - -#ifdef __STDC__ -RX_DECL void rx_free_nfa_state(struct rx_nfa_state *n) -#else -RX_DECL void rx_free_nfa_state(n) -struct rx_nfa_state *n; -#endif -{ -	free((char *) n); -} - - -/* This looks up an nfa node, given a numeric id.  Numeric id's are - * assigned after the nfa has been built. - */ -#ifdef __STDC__ -RX_DECL struct rx_nfa_state *rx_id_to_nfa_state(struct rx *rx, int id) -#else -RX_DECL struct rx_nfa_state *rx_id_to_nfa_state(rx, id) -struct rx *rx; -int id; -#endif -{ -	struct rx_nfa_state *n; - -	for (n = rx->nfa_states; n; n = n->next) -		if (n->id == id) -			return n; -	return 0; -} - - -/* This adds an edge between two nodes, but doesn't initialize the  - * edge label. - */ - -#ifdef __STDC__ -RX_DECL struct rx_nfa_edge *rx_nfa_edge(struct rx *rx, -										enum rx_nfa_etype type, -										struct rx_nfa_state *start, -										struct rx_nfa_state *dest) -#else -RX_DECL struct rx_nfa_edge *rx_nfa_edge(rx, type, start, dest) -struct rx *rx; -enum rx_nfa_etype type; -struct rx_nfa_state *start; -struct rx_nfa_state *dest; -#endif -{ -	struct rx_nfa_edge *e; - -	e = (struct rx_nfa_edge *) malloc(sizeof(*e)); -	if (!e) -		return 0; -	e->next = start->edges; -	start->edges = e; -	e->type = type; -	e->dest = dest; -	return e; -} - - -#ifdef __STDC__ -RX_DECL void rx_free_nfa_edge(struct rx_nfa_edge *e) -#else -RX_DECL void rx_free_nfa_edge(e) -struct rx_nfa_edge *e; -#endif -{ -	free((char *) e); -} - - -/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure - * of an NFA.  These are added to an nfa automaticly by eclose_nfa. - */ - -#ifdef __STDC__ -static struct rx_possible_future *rx_possible_future(struct rx *rx, struct rx_se_list -													 *effects) -#else -static struct rx_possible_future *rx_possible_future(rx, effects) -struct rx *rx; -struct rx_se_list *effects; -#endif -{ -	struct rx_possible_future *ec; - -	ec = (struct rx_possible_future *) malloc(sizeof(*ec)); -	if (!ec) -		return 0; -	ec->destset = 0; -	ec->next = 0; -	ec->effects = effects; -	return ec; -} - - -#ifdef __STDC__ -static void rx_free_possible_future(struct rx_possible_future *pf) -#else -static void rx_free_possible_future(pf) -struct rx_possible_future *pf; -#endif -{ -	free((char *) pf); -} - - -#ifdef __STDC__ -RX_DECL void rx_free_nfa(struct rx *rx) -#else -RX_DECL void rx_free_nfa(rx) -struct rx *rx; -#endif -{ -	while (rx->nfa_states) { -		while (rx->nfa_states->edges) { -			switch (rx->nfa_states->edges->type) { -			case ne_cset: -				rx_free_cset(rx, rx->nfa_states->edges->params.cset); -				break; -			default: -				break; -			} -			{ -				struct rx_nfa_edge *e; - -				e = rx->nfa_states->edges; -				rx->nfa_states->edges = rx->nfa_states->edges->next; -				rx_free_nfa_edge(e); -			} -		}						/* while (rx->nfa_states->edges) */ -		{ -			/* Iterate over the partial epsilon closures of rx->nfa_states */ -			struct rx_possible_future *pf = rx->nfa_states->futures; - -			while (pf) { -				struct rx_possible_future *pft = pf; - -				pf = pf->next; -				rx_free_possible_future(pft); -			} -		} -		{ -			struct rx_nfa_state *n; - -			n = rx->nfa_states; -			rx->nfa_states = rx->nfa_states->next; -			rx_free_nfa_state(n); -		} -	} -} - - - -/* This page: translating a pattern expression into an nfa and doing the  - * static part of the nfa->super-nfa translation. - */ - -/* This is the thompson regexp->nfa algorithm.  - * It is modified to allow for `side-effect epsilons.'  Those are - * edges that are taken whenever a similar epsilon edge would be, - * but which imply that some side effect occurs when the edge  - * is taken. - * - * Side effects are used to model parts of the pattern langauge  - * that are not regular (in the formal sense). - */ - -#ifdef __STDC__ -RX_DECL int -rx_build_nfa(struct rx *rx, -			 struct rexp_node *rexp, -			 struct rx_nfa_state **start, struct rx_nfa_state **end) -#else -RX_DECL int rx_build_nfa(rx, rexp, start, end) -struct rx *rx; -struct rexp_node *rexp; -struct rx_nfa_state **start; -struct rx_nfa_state **end; -#endif -{ -	struct rx_nfa_edge *edge; - -	/* Start & end nodes may have been allocated by the caller. */ -	*start = *start ? *start : rx_nfa_state(rx); - -	if (!*start) -		return 0; - -	if (!rexp) { -		*end = *start; -		return 1; -	} - -	*end = *end ? *end : rx_nfa_state(rx); - -	if (!*end) { -		rx_free_nfa_state(*start); -		return 0; -	} - -	switch (rexp->type) { -	case r_data: -		return 0; - -	case r_cset: -		edge = rx_nfa_edge(rx, ne_cset, *start, *end); -		if (!edge) -			return 0; -		edge->params.cset = rx_copy_cset(rx, rexp->params.cset); -		if (!edge->params.cset) { -			rx_free_nfa_edge(edge); -			return 0; -		} -		return 1; - -	case r_opt: -		return (rx_build_nfa(rx, rexp->params.pair.left, start, end) -				&& rx_nfa_edge(rx, ne_epsilon, *start, *end)); - -	case r_star: -	{ -		struct rx_nfa_state *star_start = 0; -		struct rx_nfa_state *star_end = 0; - -		return (rx_build_nfa(rx, rexp->params.pair.left, -							 &star_start, &star_end) -				&& star_start -				&& star_end -				&& rx_nfa_edge(rx, ne_epsilon, star_start, star_end) -				&& rx_nfa_edge(rx, ne_epsilon, *start, star_start) -				&& rx_nfa_edge(rx, ne_epsilon, star_end, *end) - -				&& rx_nfa_edge(rx, ne_epsilon, star_end, star_start)); -	} - -	case r_2phase_star: -	{ -		struct rx_nfa_state *star_start = 0; -		struct rx_nfa_state *star_end = 0; -		struct rx_nfa_state *loop_exp_start = 0; -		struct rx_nfa_state *loop_exp_end = 0; - -		return (rx_build_nfa(rx, rexp->params.pair.left, -							 &star_start, &star_end) -				&& rx_build_nfa(rx, rexp->params.pair.right, -								&loop_exp_start, &loop_exp_end) -				&& star_start -				&& star_end -				&& loop_exp_end -				&& loop_exp_start -				&& rx_nfa_edge(rx, ne_epsilon, star_start, *end) -				&& rx_nfa_edge(rx, ne_epsilon, *start, star_start) -				&& rx_nfa_edge(rx, ne_epsilon, star_end, *end) - -				&& rx_nfa_edge(rx, ne_epsilon, star_end, loop_exp_start) -				&& rx_nfa_edge(rx, ne_epsilon, loop_exp_end, star_start)); -	} - - -	case r_concat: -	{ -		struct rx_nfa_state *shared = 0; - -		return (rx_build_nfa(rx, rexp->params.pair.left, start, &shared) -				&& rx_build_nfa(rx, rexp->params.pair.right, &shared, -								end)); -	} - -	case r_alternate: -	{ -		struct rx_nfa_state *ls = 0; -		struct rx_nfa_state *le = 0; -		struct rx_nfa_state *rs = 0; -		struct rx_nfa_state *re = 0; - -		return (rx_build_nfa(rx, rexp->params.pair.left, &ls, &le) -				&& rx_build_nfa(rx, rexp->params.pair.right, &rs, &re) -				&& rx_nfa_edge(rx, ne_epsilon, *start, ls) -				&& rx_nfa_edge(rx, ne_epsilon, *start, rs) -				&& rx_nfa_edge(rx, ne_epsilon, le, *end) -				&& rx_nfa_edge(rx, ne_epsilon, re, *end)); -	} - -	case r_side_effect: -		edge = rx_nfa_edge(rx, ne_side_effect, *start, *end); -		if (!edge) -			return 0; -		edge->params.side_effect = rexp->params.side_effect; -		return 1; -	} - -	/* this should never happen */ -	return 0; -} - - -/* RX_NAME_NFA_STATES identifies all nodes with outgoing non-epsilon - * transitions.  Only these nodes can occur in super-states.   - * All nodes are given an integer id.  - * The id is non-negative if the node has non-epsilon out-transitions, negative - * otherwise (this is because we want the non-negative ids to be used as  - * array indexes in a few places). - */ - -#ifdef __STDC__ -RX_DECL void rx_name_nfa_states(struct rx *rx) -#else -RX_DECL void rx_name_nfa_states(rx) -struct rx *rx; -#endif -{ -	struct rx_nfa_state *n = rx->nfa_states; - -	rx->nodec = 0; -	rx->epsnodec = -1; - -	while (n) { -		struct rx_nfa_edge *e = n->edges; - -		if (n->is_start) -			n->eclosure_needed = 1; - -		while (e) { -			switch (e->type) { -			case ne_epsilon: -			case ne_side_effect: -				break; - -			case ne_cset: -				n->id = rx->nodec++; -				{ -					struct rx_nfa_edge *from_n = n->edges; - -					while (from_n) { -						from_n->dest->eclosure_needed = 1; -						from_n = from_n->next; -					} -				} -				goto cont; -			} -			e = e->next; -		} -		n->id = rx->epsnodec--; -	  cont: -		n = n->next; -	} -	rx->epsnodec = -rx->epsnodec; -} - - -/* This page: data structures for the static part of the nfa->supernfa - * translation. - * - * There are side effect lists -- lists of side effects occuring - * along an uninterrupted, acyclic path of side-effect epsilon edges. - * Such paths are collapsed to single edges in the course of computing - * epsilon closures.  Such single edges are labled with a list of all - * the side effects entailed in crossing them.  Like lists of side - * effects are made == by the constructors below. - * - * There are also nfa state sets.  These are used to hold a list of all - * states reachable from a starting state for a given type of transition - * and side effect list.   These are also hash-consed. - */ - -/* The next several functions compare, construct, etc. lists of side - * effects.  See ECLOSE_NFA (below) for details. - */ - -/* Ordering of rx_se_list - * (-1, 0, 1 return value convention). - */ - -#ifdef __STDC__ -static int se_list_cmp(void *va, void *vb) -#else -static int se_list_cmp(va, vb) -void *va; -void *vb; -#endif -{ -	struct rx_se_list *a = (struct rx_se_list *) va; -	struct rx_se_list *b = (struct rx_se_list *) vb; - -	return ((va == vb) -			? 0 -			: (!va -			   ? -1 -			   : (!vb -				  ? 1 -				  : ((long) a->car < (long) b->car -					 ? 1 -					 : ((long) a->car > (long) b->car -						? -1 -						: se_list_cmp((void *) a->cdr, -									  (void *) b->cdr)))))); -} - - -#ifdef __STDC__ -static int se_list_equal(void *va, void *vb) -#else -static int se_list_equal(va, vb) -void *va; -void *vb; -#endif -{ -	return !(se_list_cmp(va, vb)); -} - -static struct rx_hash_rules se_list_hash_rules = { -	se_list_equal, -	compiler_hash_alloc, -	compiler_free_hash, -	compiler_hash_item_alloc, -	compiler_free_hash_item -}; - - -#ifdef __STDC__ -static struct rx_se_list *side_effect_cons(struct rx *rx, -										   void *se, -										   struct rx_se_list *list) -#else -static struct rx_se_list *side_effect_cons(rx, se, list) -struct rx *rx; -void *se; -struct rx_se_list *list; -#endif -{ -	struct rx_se_list *l; - -	l = ((struct rx_se_list *) malloc(sizeof(*l))); -	if (!l) -		return 0; -	l->car = se; -	l->cdr = list; -	return l; -} - - -#ifdef __STDC__ -static struct rx_se_list *hash_cons_se_prog(struct rx *rx, -											struct rx_hash *memo, -											void *car, -											struct rx_se_list *cdr) -#else -static struct rx_se_list *hash_cons_se_prog(rx, memo, car, cdr) -struct rx *rx; -struct rx_hash *memo; -void *car; -struct rx_se_list *cdr; -#endif -{ -	long hash = (long) car ^ (long) cdr; -	struct rx_se_list template; - -	template.car = car; -	template.cdr = cdr; -	{ -		struct rx_hash_item *it = rx_hash_store(memo, hash, -												(void *) &template, -												&se_list_hash_rules); - -		if (!it) -			return 0; -		if (it->data == (void *) &template) { -			struct rx_se_list *consed; - -			consed = (struct rx_se_list *) malloc(sizeof(*consed)); -			if (!consed) { -				free((char *) it); -				return 0; -			} -			*consed = template; -			it->data = (void *) consed; -		} -		return (struct rx_se_list *) it->data; -	} -} - - -#ifdef __STDC__ -static struct rx_se_list *hash_se_prog(struct rx *rx, struct rx_hash *memo, -									   struct rx_se_list *prog) -#else -static struct rx_se_list *hash_se_prog(rx, memo, prog) -struct rx *rx; -struct rx_hash *memo; -struct rx_se_list *prog; -#endif -{ -	struct rx_se_list *answer = 0; - -	while (prog) { -		answer = hash_cons_se_prog(rx, memo, prog->car, answer); -		if (!answer) -			return 0; -		prog = prog->cdr; -	} -	return answer; -} - -#ifdef __STDC__ -static int nfa_set_cmp(void *va, void *vb) -#else -static int nfa_set_cmp(va, vb) -void *va; -void *vb; -#endif -{ -	struct rx_nfa_state_set *a = (struct rx_nfa_state_set *) va; -	struct rx_nfa_state_set *b = (struct rx_nfa_state_set *) vb; - -	return ((va == vb) -			? 0 -			: (!va -			   ? -1 -			   : (!vb -				  ? 1 -				  : (a->car->id < b->car->id -					 ? 1 -					 : (a->car->id > b->car->id -						? -1 -						: nfa_set_cmp((void *) a->cdr, -									  (void *) b->cdr)))))); -} - -#ifdef __STDC__ -static int nfa_set_equal(void *va, void *vb) -#else -static int nfa_set_equal(va, vb) -void *va; -void *vb; -#endif -{ -	return !nfa_set_cmp(va, vb); -} - -static struct rx_hash_rules nfa_set_hash_rules = { -	nfa_set_equal, -	compiler_hash_alloc, -	compiler_free_hash, -	compiler_hash_item_alloc, -	compiler_free_hash_item -}; - - -#ifdef __STDC__ -static struct rx_nfa_state_set *nfa_set_cons(struct rx *rx, -											 struct rx_hash *memo, -											 struct rx_nfa_state *state, -											 struct rx_nfa_state_set *set) -#else -static struct rx_nfa_state_set *nfa_set_cons(rx, memo, state, set) -struct rx *rx; -struct rx_hash *memo; -struct rx_nfa_state *state; -struct rx_nfa_state_set *set; -#endif -{ -	struct rx_nfa_state_set template; -	struct rx_hash_item *node; - -	template.car = state; -	template.cdr = set; -	node = rx_hash_store(memo, -						 (((long) state) >> 8) ^ (long) set, -						 &template, &nfa_set_hash_rules); -	if (!node) -		return 0; -	if (node->data == &template) { -		struct rx_nfa_state_set *l; - -		l = (struct rx_nfa_state_set *) malloc(sizeof(*l)); -		node->data = (void *) l; -		if (!l) -			return 0; -		*l = template; -	} -	return (struct rx_nfa_state_set *) node->data; -} - -#ifdef __STDC__ -static struct rx_nfa_state_set *nfa_set_enjoin(struct rx *rx, -											   struct rx_hash *memo, -											   struct rx_nfa_state *state, -											   struct rx_nfa_state_set -											   *set) -#else -static struct rx_nfa_state_set *nfa_set_enjoin(rx, memo, state, set) -struct rx *rx; -struct rx_hash *memo; -struct rx_nfa_state *state; -struct rx_nfa_state_set *set; -#endif -{ -	if (!set || state->id < set->car->id) -		return nfa_set_cons(rx, memo, state, set); -	if (state->id == set->car->id) -		return set; -	else { -		struct rx_nfa_state_set *newcdr - -			= nfa_set_enjoin(rx, memo, state, set->cdr); -		if (newcdr != set->cdr) -			set = nfa_set_cons(rx, memo, set->car, newcdr); -		return set; -	} -} - - - -/* This page: computing epsilon closures.  The closures aren't total. - * Each node's closures are partitioned according to the side effects entailed - * along the epsilon edges.  Return true on success. - */ - -struct eclose_frame { -	struct rx_se_list *prog_backwards; -}; -static int eclose_node(struct rx *, struct rx_nfa_state *, -					   struct rx_nfa_state *, struct eclose_frame *); -RX_DECL int rx_eclose_nfa(struct rx *); -RX_DECL void rx_delete_epsilon_transitions(struct rx *); -static int nfacmp(void *, void *); -static int count_hash_nodes(struct rx_hash *); -static void nfa_set_freer(struct rx_hash_item *); -RX_DECL int rx_compactify_nfa(struct rx *, void **, unsigned long *); -static char *rx_cache_malloc(struct rx_cache *, int); -static void rx_cache_free(struct rx_cache *, - -						  struct rx_freelist **, char *); -static void install_transition(struct rx_superstate *, - -							   struct rx_inx *, rx_Bitset); -static int qlen(struct rx_superstate *); -static void check_cache(struct rx_cache *); -static void semifree_superstate(struct rx_cache *); -static void refresh_semifree_superstate - -	(struct rx_cache *, struct rx_superstate *); -static void rx_refresh_this_superstate - -	(struct rx_cache *, struct rx_superstate *); -static void release_superset_low(struct rx_cache *, struct rx_superset *); -RX_DECL void rx_release_superset(struct rx *, struct rx_superset *); -static int rx_really_free_superstate(struct rx_cache *); -static char *rx_cache_get(struct rx_cache *, struct rx_freelist **); -static char *rx_cache_malloc_or_get(struct rx_cache *, -									struct rx_freelist **, int); -static char *rx_cache_get_superstate(struct rx_cache *); -static int supersetcmp(void *, void *); -static struct rx_hash_item -*superset_allocator(struct rx_hash_rules *, void *); -static struct rx_hash -*super_hash_allocator(struct rx_hash_rules *); -static void super_hash_liberator(struct rx_hash *, struct rx_hash_rules *); -static void superset_hash_item_liberator - -	(struct rx_hash_item *, struct rx_hash_rules *); -static int bytes_for_cache_size(int, int); -static void rx_morecore(struct rx_cache *); -RX_DECL struct rx_superset -*rx_superset_cons(struct rx *, struct rx_nfa_state *, - -				  struct rx_superset *); -RX_DECL struct rx_superset -*rx_superstate_eclosure_union - -	(struct rx *, struct rx_superset *, struct rx_nfa_state_set *); -static struct rx_distinct_future -*include_futures(struct rx *, -				 struct rx_distinct_future *, - -				 struct rx_nfa_state *, struct rx_superstate *); -RX_DECL struct rx_superstate -*rx_superstate(struct rx *, struct rx_superset *); -static int solve_destination(struct rx *, struct rx_distinct_future *); -static int compute_super_edge(struct rx *, -							  struct rx_distinct_future **, - -							  rx_Bitset, struct rx_superstate *, -							  unsigned char); -static struct rx_super_edge -*rx_super_edge(struct rx *, struct rx_superstate *, - -			   rx_Bitset, struct rx_distinct_future *); -static void install_partial_transition -	(struct rx_superstate *, struct rx_inx *, RX_subset, int); -RX_DECL struct rx_inx -*rx_handle_cache_miss(struct rx *, struct rx_superstate *, - -					  unsigned char, void *); -static boolean - -at_begline_loc_p(__const__ char *, __const__ char *, reg_syntax_t); -static boolean at_endline_loc_p(__const__ char *, __const__ char *, int); -static rx_Bitset -inverse_translation(struct re_pattern_buffer *, char *, -					rx_Bitset, unsigned char *, int); - - -#ifdef __STDC__ -static int -eclose_node(struct rx *rx, struct rx_nfa_state *outnode, -			struct rx_nfa_state *node, struct eclose_frame *frame) -#else -static int eclose_node(rx, outnode, node, frame) -struct rx *rx; -struct rx_nfa_state *outnode; -struct rx_nfa_state *node; -struct eclose_frame *frame; -#endif -{ -	struct rx_nfa_edge *e = node->edges; - -	/* For each node, we follow all epsilon paths to build the closure. -	 * The closure omits nodes that have only epsilon edges. -	 * The closure is split into partial closures -- all the states in -	 * a partial closure are reached by crossing the same list of -	 * of side effects (though not necessarily the same path). -	 */ -	if (node->mark) -		return 1; -	node->mark = 1; - -	if (node->id >= 0 || node->is_final) { -		struct rx_possible_future **ec; -		struct rx_se_list *prog_in_order -			= ((struct rx_se_list *) hash_se_prog(rx, -												  &rx->se_list_memo, -												  frame->prog_backwards)); -		int cmp; - -		ec = &outnode->futures; - -		while (*ec) { -			cmp = -				se_list_cmp((void *) (*ec)->effects, -							(void *) prog_in_order); -			if (cmp <= 0) -				break; -			ec = &(*ec)->next; -		} -		if (!*ec || (cmp < 0)) { -			struct rx_possible_future *saved = *ec; - -			*ec = rx_possible_future(rx, prog_in_order); -			(*ec)->next = saved; -			if (!*ec) -				return 0; -		} -		if (node->id >= 0) { -			(*ec)->destset = nfa_set_enjoin(rx, &rx->set_list_memo, -											node, (*ec)->destset); -			if (!(*ec)->destset) -				return 0; -		} -	} - -	while (e) { -		switch (e->type) { -		case ne_epsilon: -			if (!eclose_node(rx, outnode, e->dest, frame)) -				return 0; -			break; -		case ne_side_effect: -		{ -			frame->prog_backwards = side_effect_cons(rx, -													 e->params.side_effect, -													 frame->prog_backwards); -			if (!frame->prog_backwards) -				return 0; -			if (!eclose_node(rx, outnode, e->dest, frame)) -				return 0; -			{ -				struct rx_se_list *dying = frame->prog_backwards; - -				frame->prog_backwards = frame->prog_backwards->cdr; -				free((char *) dying); -			} -			break; -		} -		default: -			break; -		} -		e = e->next; -	} -	node->mark = 0; -	return 1; -} - -#ifdef __STDC__ -RX_DECL int rx_eclose_nfa(struct rx *rx) -#else -RX_DECL int rx_eclose_nfa(rx) -struct rx *rx; -#endif -{ -	struct rx_nfa_state *n = rx->nfa_states; -	struct eclose_frame frame; -	static int rx_id = 0; - -	frame.prog_backwards = 0; -	rx->rx_id = rx_id++; -	bzero(&rx->se_list_memo, sizeof(rx->se_list_memo)); -	bzero(&rx->set_list_memo, sizeof(rx->set_list_memo)); -	while (n) { -		n->futures = 0; -		if (n->eclosure_needed && !eclose_node(rx, n, n, &frame)) -			return 0; -		/* clear_marks (rx); */ -		n = n->next; -	} -	return 1; -} - - -/* This deletes epsilon edges from an NFA.  After running eclose_node, - * we have no more need for these edges.  They are removed to simplify - * further operations on the NFA. - */ - -#ifdef __STDC__ -RX_DECL void rx_delete_epsilon_transitions(struct rx *rx) -#else -RX_DECL void rx_delete_epsilon_transitions(rx) -struct rx *rx; -#endif -{ -	struct rx_nfa_state *n = rx->nfa_states; -	struct rx_nfa_edge **e; - -	while (n) { -		e = &n->edges; -		while (*e) { -			struct rx_nfa_edge *t; - -			switch ((*e)->type) { -			case ne_epsilon: -			case ne_side_effect: -				t = *e; -				*e = t->next; -				rx_free_nfa_edge(t); -				break; - -			default: -				e = &(*e)->next; -				break; -			} -		} -		n = n->next; -	} -} - - -/* This page: storing the nfa in a contiguous region of memory for - * subsequent conversion to a super-nfa. - */ - -/* This is for qsort on an array of nfa_states. The order - * is based on state ids and goes  - *		[0...MAX][MIN..-1] where (MAX>=0) and (MIN<0) - * This way, positive ids double as array indices. - */ - -#ifdef __STDC__ -static int nfacmp(void *va, void *vb) -#else -static int nfacmp(va, vb) -void *va; -void *vb; -#endif -{ -	struct rx_nfa_state **a = (struct rx_nfa_state **) va; -	struct rx_nfa_state **b = (struct rx_nfa_state **) vb; - -	return (*a == *b			/* &&&& 3.18 */ -			? 0 : (((*a)->id < 0) == ((*b)->id < 0) -				   ? (((*a)->id < (*b)->id) ? -1 : 1) -				   : (((*a)->id < 0) -					  ? 1 : -1))); -} - -#ifdef __STDC__ -static int count_hash_nodes(struct rx_hash *st) -#else -static int count_hash_nodes(st) -struct rx_hash *st; -#endif -{ -	int x; -	int count = 0; - -	for (x = 0; x < 13; ++x) -		count += ((st->children[x]) -				  ? count_hash_nodes(st->children[x]) -				  : st->bucket_size[x]); - -	return count; -} - - -#ifdef __STDC__ -static void se_memo_freer(struct rx_hash_item *node) -#else -static void se_memo_freer(node) -struct rx_hash_item *node; -#endif -{ -	free((char *) node->data); -} - - -#ifdef __STDC__ -static void nfa_set_freer(struct rx_hash_item *node) -#else -static void nfa_set_freer(node) -struct rx_hash_item *node; -#endif -{ -	free((char *) node->data); -} - - -/* This copies an entire NFA into a single malloced block of memory. - * Mostly this is for compatability with regex.c, though it is convenient - * to have the nfa nodes in an array. - */ - -#ifdef __STDC__ -RX_DECL int -rx_compactify_nfa(struct rx *rx, void **mem, unsigned long *size) -#else -RX_DECL int rx_compactify_nfa(rx, mem, size) -struct rx *rx; -void **mem; -unsigned long *size; -#endif -{ -	int total_nodec; -	struct rx_nfa_state *n; -	int edgec = 0; -	int eclosec = 0; -	int se_list_consc = count_hash_nodes(&rx->se_list_memo); -	int nfa_setc = count_hash_nodes(&rx->set_list_memo); -	unsigned long total_size; - -	/* This takes place in two stages.   First, the total size of the -	 * nfa is computed, then structures are copied.   -	 */ -	n = rx->nfa_states; -	total_nodec = 0; -	while (n) { -		struct rx_nfa_edge *e = n->edges; -		struct rx_possible_future *ec = n->futures; - -		++total_nodec; -		while (e) { -			++edgec; -			e = e->next; -		} -		while (ec) { -			++eclosec; -			ec = ec->next; -		} -		n = n->next; -	} - -	total_size = (total_nodec * sizeof(struct rx_nfa_state) -				  + edgec * rx_sizeof_bitset(rx->local_cset_size) -				  + edgec * sizeof(struct rx_nfa_edge) -				  + nfa_setc * sizeof(struct rx_nfa_state_set) -				  + eclosec * sizeof(struct rx_possible_future) -				  + se_list_consc * sizeof(struct rx_se_list) -				  + rx->reserved); - -	if (total_size > *size) { -		*mem = remalloc(*mem, total_size); -		if (*mem) -			*size = total_size; -		else -			return 0; -	} -	/* Now we've allocated the memory; this copies the NFA. */ -	{ -		static struct rx_nfa_state **scratch = 0; -		static int scratch_alloc = 0; -		struct rx_nfa_state *state_base = (struct rx_nfa_state *) *mem; -		struct rx_nfa_state *new_state = state_base; -		struct rx_nfa_edge *new_edge = (struct rx_nfa_edge *) -			((char *) state_base + total_nodec * sizeof(struct rx_nfa_state)); -		struct rx_se_list *new_se_list = (struct rx_se_list *) -			((char *) new_edge + edgec * sizeof(struct rx_nfa_edge)); -		struct rx_possible_future *new_close = -			((struct rx_possible_future *) -			 ((char *) new_se_list - -			  + se_list_consc * sizeof(struct rx_se_list))); -		struct rx_nfa_state_set *new_nfa_set = ((struct rx_nfa_state_set *) - -												((char *) new_close + -												 eclosec * - -												 sizeof(struct -														rx_possible_future))); -		char *new_bitset = - -			((char *) new_nfa_set + -			 nfa_setc * sizeof(struct rx_nfa_state_set)); -		int x; -		struct rx_nfa_state *n; - -		if (scratch_alloc < total_nodec) { -			scratch = ((struct rx_nfa_state **) -					   remalloc(scratch, total_nodec * sizeof(*scratch))); -			if (scratch) -				scratch_alloc = total_nodec; -			else { -				scratch_alloc = 0; -				return 0; -			} -		} - -		for (x = 0, n = rx->nfa_states; n; n = n->next) -			scratch[x++] = n; - -		qsort(scratch, total_nodec, sizeof(struct rx_nfa_state *), -			  (__compar_fn_t) nfacmp); - -		for (x = 0; x < total_nodec; ++x) { -			struct rx_possible_future *eclose = scratch[x]->futures; -			struct rx_nfa_edge *edge = scratch[x]->edges; -			struct rx_nfa_state *cn = new_state++; - -			cn->futures = 0; -			cn->edges = 0; -			cn->next = (x == total_nodec - 1) ? 0 : (cn + 1); -			cn->id = scratch[x]->id; -			cn->is_final = scratch[x]->is_final; -			cn->is_start = scratch[x]->is_start; -			cn->mark = 0; -			while (edge) { -				int indx = (edge->dest->id < 0 -							? (total_nodec + edge->dest->id) - -							: edge->dest->id); -				struct rx_nfa_edge *e = new_edge++; -				rx_Bitset cset = (rx_Bitset) new_bitset; - -				new_bitset += rx_sizeof_bitset(rx->local_cset_size); -				rx_bitset_null(rx->local_cset_size, cset); -				rx_bitset_union(rx->local_cset_size, cset, -								edge->params.cset); -				e->next = cn->edges; -				cn->edges = e; -				e->type = edge->type; -				e->dest = state_base + indx; -				e->params.cset = cset; -				edge = edge->next; -			} -			while (eclose) { -				struct rx_possible_future *ec = new_close++; -				struct rx_hash_item *sp; -				struct rx_se_list **sepos; -				struct rx_se_list *sesrc; -				struct rx_nfa_state_set *destlst; -				struct rx_nfa_state_set **destpos; - -				ec->next = cn->futures; -				cn->futures = ec; -				for (sepos = &ec->effects, sesrc = eclose->effects; -					 sesrc; sesrc = sesrc->cdr, sepos = &(*sepos)->cdr) { -					sp = rx_hash_find(&rx->se_list_memo, -									  (long) sesrc-> -									  car ^ (long) sesrc->cdr, sesrc, -									  &se_list_hash_rules); -					if (sp->binding) { -						sesrc = (struct rx_se_list *) sp->binding; -						break; -					} -					*new_se_list = *sesrc; -					sp->binding = (void *) new_se_list; -					*sepos = new_se_list; -					++new_se_list; -				} -				*sepos = sesrc; -				for (destpos = &ec->destset, destlst = eclose->destset; -					 destlst; -					 destpos = &(*destpos)->cdr, destlst = destlst->cdr) { -					sp = rx_hash_find(&rx->set_list_memo, -									  ((((long) destlst->car) >> 8) -									   ^ (long) destlst->cdr), -									  destlst, &nfa_set_hash_rules); -					if (sp->binding) { -						destlst = (struct rx_nfa_state_set *) sp->binding; -						break; -					} -					*new_nfa_set = *destlst; -					new_nfa_set->car = state_base + destlst->car->id; -					sp->binding = (void *) new_nfa_set; -					*destpos = new_nfa_set; -					++new_nfa_set; -				} -				*destpos = destlst; -				eclose = eclose->next; -			} -		} -	} -	rx_free_hash_table(&rx->se_list_memo, se_memo_freer, -					   &se_list_hash_rules); -	bzero(&rx->se_list_memo, sizeof(rx->se_list_memo)); -	rx_free_hash_table(&rx->set_list_memo, nfa_set_freer, -					   &nfa_set_hash_rules); -	bzero(&rx->set_list_memo, sizeof(rx->set_list_memo)); - -	rx_free_nfa(rx); -	rx->nfa_states = (struct rx_nfa_state *) *mem; -	return 1; -} - - -/* The functions in the next several pages define the lazy-NFA-conversion used - * by matchers.  The input to this construction is an NFA such as  - * is built by compactify_nfa (rx.c).  The output is the superNFA. - */ - -/* Match engines can use arbitrary values for opcodes.  So, the parse tree  - * is built using instructions names (enum rx_opcode), but the superstate - * nfa is populated with mystery opcodes (void *). - * - * For convenience, here is an id table.  The opcodes are == to their inxs - * - * The lables in re_search_2 would make good values for instructions. - */ - -void *rx_id_instruction_table[rx_num_instructions] = { -	(void *) rx_backtrack_point, -	(void *) rx_do_side_effects, -	(void *) rx_cache_miss, -	(void *) rx_next_char, -	(void *) rx_backtrack, -	(void *) rx_error_inx -}; - - - -/* Memory mgt. for superstate graphs. */ - -#ifdef __STDC__ -static char *rx_cache_malloc(struct rx_cache *cache, int bytes) -#else -static char *rx_cache_malloc(cache, bytes) -struct rx_cache *cache; -int bytes; -#endif -{ -	while (cache->bytes_left < bytes) { -		if (cache->memory_pos) -			cache->memory_pos = cache->memory_pos->next; -		if (!cache->memory_pos) { -			cache->morecore(cache); -			if (!cache->memory_pos) -				return 0; -		} -		cache->bytes_left = cache->memory_pos->bytes; -		cache->memory_addr = ((char *) cache->memory_pos - -							  + sizeof(struct rx_blocklist)); -	} -	cache->bytes_left -= bytes; -	{ -		char *addr = cache->memory_addr; - -		cache->memory_addr += bytes; -		return addr; -	} -} - -#ifdef __STDC__ -static void -rx_cache_free(struct rx_cache *cache, -			  struct rx_freelist **freelist, char *mem) -#else -static void rx_cache_free(cache, freelist, mem) -struct rx_cache *cache; -struct rx_freelist **freelist; -char *mem; -#endif -{ -	struct rx_freelist *it = (struct rx_freelist *) mem; - -	it->next = *freelist; -	*freelist = it; -} - -/* The partially instantiated superstate graph has a transition  - * table at every node.  There is one entry for every character. - * This fills in the transition for a set. - */ -#ifdef __STDC__ -static void -install_transition(struct rx_superstate *super, -				   struct rx_inx *answer, rx_Bitset trcset) -#else -static void install_transition(super, answer, trcset) -struct rx_superstate *super; -struct rx_inx *answer; -rx_Bitset trcset; -#endif -{ -	struct rx_inx *transitions = super->transitions; -	int chr; - -	for (chr = 0; chr < 256;) -		if (!*trcset) { -			++trcset; -			chr += 32; -		} else { -			RX_subset sub = *trcset; -			RX_subset mask = 1; -			int bound = chr + 32; - -			while (chr < bound) { -				if (sub & mask) -					transitions[chr] = *answer; -				++chr; -				mask <<= 1; -			} -			++trcset; -		} -} - -#ifdef __STDC__ -static int qlen(struct rx_superstate *q) -#else -static int qlen(q) -struct rx_superstate *q; -#endif -{ -	int count = 1; -	struct rx_superstate *it; - -	if (!q) -		return 0; -	for (it = q->next_recyclable; it != q; it = it->next_recyclable) -		++count; -	return count; -} - -#ifdef __STDC__ -static void check_cache(struct rx_cache *cache) -#else -static void check_cache(cache) -struct rx_cache *cache; -#endif -{ -	struct rx_cache *you_fucked_up = 0; -	int total = cache->superstates; -	int semi = cache->semifree_superstates; - -	if (semi != qlen(cache->semifree_superstate)) -		check_cache(you_fucked_up); -	if ((total - semi) != qlen(cache->lru_superstate)) -		check_cache(you_fucked_up); -} - -/* When a superstate is old and neglected, it can enter a  - * semi-free state.  A semi-free state is slated to die. - * Incoming transitions to a semi-free state are re-written - * to cause an (interpreted) fault when they are taken. - * The fault handler revives the semi-free state, patches - * incoming transitions back to normal, and continues. - * - * The idea is basicly to free in two stages, aborting  - * between the two if the state turns out to be useful again. - * When a free is aborted, the rescued superstate is placed - * in the most-favored slot to maximize the time until it - * is next semi-freed. - */ - -#ifdef __STDC__ -static void semifree_superstate(struct rx_cache *cache) -#else -static void semifree_superstate(cache) -struct rx_cache *cache; -#endif -{ -	int disqualified = cache->semifree_superstates; - -	if (disqualified == cache->superstates) -		return; -	while (cache->lru_superstate->locks) { -		cache->lru_superstate = cache->lru_superstate->next_recyclable; -		++disqualified; -		if (disqualified == cache->superstates) -			return; -	} -	{ -		struct rx_superstate *it = cache->lru_superstate; - -		it->next_recyclable->prev_recyclable = it->prev_recyclable; -		it->prev_recyclable->next_recyclable = it->next_recyclable; -		cache->lru_superstate = (it == it->next_recyclable -								 ? 0 : it->next_recyclable); -		if (!cache->semifree_superstate) { -			cache->semifree_superstate = it; -			it->next_recyclable = it; -			it->prev_recyclable = it; -		} else { -			it->prev_recyclable = -				cache->semifree_superstate->prev_recyclable; -			it->next_recyclable = cache->semifree_superstate; -			it->prev_recyclable->next_recyclable = it; -			it->next_recyclable->prev_recyclable = it; -		} -		{ -			struct rx_distinct_future *df; - -			it->is_semifree = 1; -			++cache->semifree_superstates; -			df = it->transition_refs; -			if (df) { -				df->prev_same_dest->next_same_dest = 0; -				for (df = it->transition_refs; df; df = df->next_same_dest) { -					df->future_frame.inx = -						cache->instruction_table[rx_cache_miss]; -					df->future_frame.data = 0; -					df->future_frame.data_2 = (void *) df; -					/* If there are any NEXT-CHAR instruction frames that -					 * refer to this state, we convert them to CACHE-MISS frames. -					 */ -					if (!df->effects -						&& (df->edge->options->next_same_super_edge[0] -							== df->edge->options)) -						install_transition(df->present, &df->future_frame, -										   df->edge->cset); -				} -				df = it->transition_refs; -				df->prev_same_dest->next_same_dest = df; -			} -		} -	} -} - -#ifdef __STDC__ -static void -refresh_semifree_superstate(struct rx_cache *cache, -							struct rx_superstate *super) -#else -static void refresh_semifree_superstate(cache, super) -struct rx_cache *cache; -struct rx_superstate *super; -#endif -{ -	struct rx_distinct_future *df; - -	if (super->transition_refs) { -		super->transition_refs->prev_same_dest->next_same_dest = 0; -		for (df = super->transition_refs; df; df = df->next_same_dest) { -			df->future_frame.inx = cache->instruction_table[rx_next_char]; -			df->future_frame.data = (void *) super->transitions; -			/* CACHE-MISS instruction frames that refer to this state, -			 * must be converted to NEXT-CHAR frames. -			 */ -			if (!df->effects && (df->edge->options->next_same_super_edge[0] -								 == df->edge->options)) -				install_transition(df->present, &df->future_frame, -								   df->edge->cset); -		} -		super->transition_refs->prev_same_dest->next_same_dest -			= super->transition_refs; -	} -	if (cache->semifree_superstate == super) -		cache->semifree_superstate = (super->prev_recyclable == super -									  ? 0 : super->prev_recyclable); -	super->next_recyclable->prev_recyclable = super->prev_recyclable; -	super->prev_recyclable->next_recyclable = super->next_recyclable; - -	if (!cache->lru_superstate) -		(cache->lru_superstate -		 = super->next_recyclable = super->prev_recyclable = super); -	else { -		super->next_recyclable = cache->lru_superstate; -		super->prev_recyclable = cache->lru_superstate->prev_recyclable; -		super->next_recyclable->prev_recyclable = super; -		super->prev_recyclable->next_recyclable = super; -	} -	super->is_semifree = 0; -	--cache->semifree_superstates; -} - -#ifdef __STDC__ -static void -rx_refresh_this_superstate(struct rx_cache *cache, -						   struct rx_superstate *superstate) -#else -static void rx_refresh_this_superstate(cache, superstate) -struct rx_cache *cache; -struct rx_superstate *superstate; -#endif -{ -	if (superstate->is_semifree) -		refresh_semifree_superstate(cache, superstate); -	else if (cache->lru_superstate == superstate) -		cache->lru_superstate = superstate->next_recyclable; -	else if (superstate != cache->lru_superstate->prev_recyclable) { -		superstate->next_recyclable->prev_recyclable -			= superstate->prev_recyclable; -		superstate->prev_recyclable->next_recyclable -			= superstate->next_recyclable; -		superstate->next_recyclable = cache->lru_superstate; -		superstate->prev_recyclable = -			cache->lru_superstate->prev_recyclable; -		superstate->next_recyclable->prev_recyclable = superstate; -		superstate->prev_recyclable->next_recyclable = superstate; -	} -} - -#ifdef __STDC__ -static void -release_superset_low(struct rx_cache *cache, struct rx_superset *set) -#else -static void release_superset_low(cache, set) -struct rx_cache *cache; -struct rx_superset *set; -#endif -{ -	if (!--set->refs) { -		if (set->cdr) -			release_superset_low(cache, set->cdr); - -		set->starts_for = 0; - -		rx_hash_free -			(rx_hash_find -			 (&cache->superset_table, -			  (unsigned long) set->car ^ set-> -			  id ^ (unsigned long) set->cdr, (void *) set, -			  &cache->superset_hash_rules), &cache->superset_hash_rules); -		rx_cache_free(cache, &cache->free_supersets, (char *) set); -	} -} - -#ifdef __STDC__ -RX_DECL void rx_release_superset(struct rx *rx, struct rx_superset *set) -#else -RX_DECL void rx_release_superset(rx, set) -struct rx *rx; -struct rx_superset *set; -#endif -{ -	release_superset_low(rx->cache, set); -} - -/* This tries to add a new superstate to the superstate freelist. - * It might, as a result, free some edge pieces or hash tables. - * If nothing can be freed because too many locks are being held, fail. - */ - -#ifdef __STDC__ -static int rx_really_free_superstate(struct rx_cache *cache) -#else -static int rx_really_free_superstate(cache) -struct rx_cache *cache; -#endif -{ -	int locked_superstates = 0; -	struct rx_superstate *it; - -	if (!cache->superstates) -		return 0; - -	{ -		/* This is a total guess.  The idea is that we should expect as -		 * many misses as we've recently experienced.  I.e., cache->misses -		 * should be the same as cache->semifree_superstates. -		 */ -		while ((cache->hits + cache->misses) > cache->superstates_allowed) { -			cache->hits >>= 1; -			cache->misses >>= 1; -		} -		if (((cache->hits + cache->misses) * cache->semifree_superstates) -			< (cache->superstates * cache->misses)) { -			semifree_superstate(cache); -			semifree_superstate(cache); -		} -	} - -	while (cache->semifree_superstate && cache->semifree_superstate->locks) { -		refresh_semifree_superstate(cache, cache->semifree_superstate); -		++locked_superstates; -		if (locked_superstates == cache->superstates) -			return 0; -	} - -	if (cache->semifree_superstate) { -		it = cache->semifree_superstate; -		it->next_recyclable->prev_recyclable = it->prev_recyclable; -		it->prev_recyclable->next_recyclable = it->next_recyclable; -		cache->semifree_superstate = ((it == it->next_recyclable) -									  ? 0 : it->next_recyclable); -		--cache->semifree_superstates; -	} else { -		while (cache->lru_superstate->locks) { -			cache->lru_superstate = cache->lru_superstate->next_recyclable; -			++locked_superstates; -			if (locked_superstates == cache->superstates) -				return 0; -		} -		it = cache->lru_superstate; -		it->next_recyclable->prev_recyclable = it->prev_recyclable; -		it->prev_recyclable->next_recyclable = it->next_recyclable; -		cache->lru_superstate = ((it == it->next_recyclable) -								 ? 0 : it->next_recyclable); -	} - -	if (it->transition_refs) { -		struct rx_distinct_future *df; - -		for (df = it->transition_refs, -			 df->prev_same_dest->next_same_dest = 0; -			 df; df = df->next_same_dest) { -			df->future_frame.inx = cache->instruction_table[rx_cache_miss]; -			df->future_frame.data = 0; -			df->future_frame.data_2 = (void *) df; -			df->future = 0; -		} -		it->transition_refs->prev_same_dest->next_same_dest = -			it->transition_refs; -	} -	{ -		struct rx_super_edge *tc = it->edges; - -		while (tc) { -			struct rx_distinct_future *df; -			struct rx_super_edge *tct = tc->next; - -			df = tc->options; -			df->next_same_super_edge[1]->next_same_super_edge[0] = 0; -			while (df) { -				struct rx_distinct_future *dft = df; - -				df = df->next_same_super_edge[0]; - - -				if (dft->future && dft->future->transition_refs == dft) { -					dft->future->transition_refs = dft->next_same_dest; -					if (dft->future->transition_refs == dft) -						dft->future->transition_refs = 0; -				} -				dft->next_same_dest->prev_same_dest = dft->prev_same_dest; -				dft->prev_same_dest->next_same_dest = dft->next_same_dest; -				rx_cache_free(cache, &cache->free_discernable_futures, -							  (char *) dft); -			} -			rx_cache_free(cache, &cache->free_transition_classes, -						  (char *) tc); -			tc = tct; -		} -	} - -	if (it->contents->superstate == it) -		it->contents->superstate = 0; -	release_superset_low(cache, it->contents); -	rx_cache_free(cache, &cache->free_superstates, (char *) it); -	--cache->superstates; -	return 1; -} - -#ifdef __STDC__ -static char *rx_cache_get(struct rx_cache *cache, -						  struct rx_freelist **freelist) -#else -static char *rx_cache_get(cache, freelist) -struct rx_cache *cache; -struct rx_freelist **freelist; -#endif -{ -	while (!*freelist && rx_really_free_superstate(cache)); -	if (!*freelist) -		return 0; -	{ -		struct rx_freelist *it = *freelist; - -		*freelist = it->next; -		return (char *) it; -	} -} - -#ifdef __STDC__ -static char *rx_cache_malloc_or_get(struct rx_cache *cache, -									struct rx_freelist **freelist, -									int bytes) -#else -static char *rx_cache_malloc_or_get(cache, freelist, bytes) -struct rx_cache *cache; -struct rx_freelist **freelist; -int bytes; -#endif -{ -	if (!*freelist) { -		char *answer = rx_cache_malloc(cache, bytes); - -		if (answer) -			return answer; -	} - -	return rx_cache_get(cache, freelist); -} - -#ifdef __STDC__ -static char *rx_cache_get_superstate(struct rx_cache *cache) -#else -static char *rx_cache_get_superstate(cache) -struct rx_cache *cache; -#endif -{ -	char *answer; -	int bytes = (sizeof(struct rx_superstate) -				 + cache->local_cset_size * sizeof(struct rx_inx)); - -	if (!cache->free_superstates -		&& (cache->superstates < cache->superstates_allowed)) { -		answer = rx_cache_malloc(cache, bytes); -		if (answer) { -			++cache->superstates; -			return answer; -		} -	} -	answer = rx_cache_get(cache, &cache->free_superstates); -	if (!answer) { -		answer = rx_cache_malloc(cache, bytes); -		if (answer) -			++cache->superstates_allowed; -	} -	++cache->superstates; -	return answer; -} - - - -#ifdef __STDC__ -static int supersetcmp(void *va, void *vb) -#else -static int supersetcmp(va, vb) -void *va; -void *vb; -#endif -{ -	struct rx_superset *a = (struct rx_superset *) va; -	struct rx_superset *b = (struct rx_superset *) vb; - -	return ((a == b) -			|| (a && b && (a->car == b->car) && (a->cdr == b->cdr))); -} - -#ifdef __STDC__ -static struct rx_hash_item *superset_allocator(struct rx_hash_rules *rules, -											   void *val) -#else -static struct rx_hash_item *superset_allocator(rules, val) -struct rx_hash_rules *rules; -void *val; -#endif -{ -	struct rx_cache *cache = ((struct rx_cache *) -							  ((char *) rules -							   - - -							   (unsigned -								long) (&((struct rx_cache *) -										 0)->superset_hash_rules))); -	struct rx_superset *template = (struct rx_superset *) val; -	struct rx_superset *newset -		= ((struct rx_superset *) rx_cache_malloc_or_get(cache, -														 &cache->free_supersets, -														 sizeof - -														 (*template))); -	if (!newset) -		return 0; -	newset->refs = 0; -	newset->car = template->car; -	newset->id = template->car->id; -	newset->cdr = template->cdr; -	newset->superstate = 0; -	rx_protect_superset(rx, template->cdr); -	newset->hash_item.data = (void *) newset; -	newset->hash_item.binding = 0; -	return &newset->hash_item; -} - -#ifdef __STDC__ -static struct rx_hash *super_hash_allocator(struct rx_hash_rules *rules) -#else -static struct rx_hash *super_hash_allocator(rules) -struct rx_hash_rules *rules; -#endif -{ -	struct rx_cache *cache = ((struct rx_cache *) -							  ((char *) rules -							   - - -							   (unsigned -								long) (&((struct rx_cache *) -										 0)->superset_hash_rules))); -	return ((struct rx_hash *) -			rx_cache_malloc_or_get(cache, &cache->free_hash, - -								   sizeof(struct rx_hash))); -} - - -#ifdef __STDC__ -static void -super_hash_liberator(struct rx_hash *hash, struct rx_hash_rules *rules) -#else -static void super_hash_liberator(hash, rules) -struct rx_hash *hash; -struct rx_hash_rules *rules; -#endif -{ -	struct rx_cache *cache = ((struct rx_cache *) - -							  (char *) rules - -							  (long) (& - -									  ((struct rx_cache *) -									   0)->superset_hash_rules)); -	rx_cache_free(cache, &cache->free_hash, (char *) hash); -} - -#ifdef __STDC__ -static void -superset_hash_item_liberator(struct rx_hash_item *it, -							 struct rx_hash_rules *rules) -#else -static void superset_hash_item_liberator(it, rules)	/* Well, it does ya know. */ -struct rx_hash_item *it; -struct rx_hash_rules *rules; -#endif -{ -} - -int rx_cache_bound = 128; -static int rx_default_cache_got = 0; - -#ifdef __STDC__ -static int bytes_for_cache_size(int supers, int cset_size) -#else -static int bytes_for_cache_size(supers, cset_size) -int supers; -int cset_size; -#endif -{ -	/* What the hell is this? !!! */ -	return (int) -		((float) supers * ((1.03 * (float) (rx_sizeof_bitset(cset_size) -											+ -											sizeof(struct rx_super_edge))) -						   + -						   (1.80 * -							(float) sizeof(struct rx_possible_future)) + -						   (float) (sizeof(struct rx_superstate) -									+ cset_size * sizeof(struct rx_inx)))); -} - -#ifdef __STDC__ -static void rx_morecore(struct rx_cache *cache) -#else -static void rx_morecore(cache) -struct rx_cache *cache; -#endif -{ -	if (rx_default_cache_got >= rx_cache_bound) -		return; - -	rx_default_cache_got += 16; -	cache->superstates_allowed = rx_cache_bound; -	{ -		struct rx_blocklist **pos = &cache->memory; -		int size = bytes_for_cache_size(16, cache->local_cset_size); - -		while (*pos) -			pos = &(*pos)->next; -		*pos = ((struct rx_blocklist *) -				malloc(size + sizeof(struct rx_blocklist))); - -		if (!*pos) -			return; - -		(*pos)->next = 0; -		(*pos)->bytes = size; -		cache->memory_pos = *pos; -		cache->memory_addr = (char *) *pos + sizeof(**pos); -		cache->bytes_left = size; -	} -} - -static struct rx_cache default_cache = { -	{ -	 supersetcmp, -	 super_hash_allocator, -	 super_hash_liberator, -	 superset_allocator, -	 superset_hash_item_liberator, -	 }, -	0, -	0, -	0, -	0, -	rx_morecore, - -	0, -	0, -	0, -	0, -	0, - -	0, -	0, - -	0, - -	0, -	0, -	0, -	0, -	128, - -	256, -	rx_id_instruction_table, - -	{ -	 0, -	 0, -	 {0}, -	 {0}, -	 {0} -	 } -}; - -/* This adds an element to a superstate set.  These sets are lists, such - * that lists with == elements are ==.  The empty set is returned by - * superset_cons (rx, 0, 0) and is NOT equivelent to  - * (struct rx_superset)0. - */ - -#ifdef __STDC__ -RX_DECL struct rx_superset *rx_superset_cons(struct rx *rx, -											 struct rx_nfa_state *car, -											 struct rx_superset *cdr) -#else -RX_DECL struct rx_superset *rx_superset_cons(rx, car, cdr) -struct rx *rx; -struct rx_nfa_state *car; -struct rx_superset *cdr; -#endif -{ -	struct rx_cache *cache = rx->cache; - -	if (!car && !cdr) { -		if (!cache->empty_superset) { -			cache->empty_superset = ((struct rx_superset *) -									 rx_cache_malloc_or_get(cache, -															&cache->free_supersets, - -															sizeof(struct -																   rx_superset))); -			if (!cache->empty_superset) -				return 0; -			bzero(cache->empty_superset, sizeof(struct rx_superset)); - -			cache->empty_superset->refs = 1000; -		} -		return cache->empty_superset; -	} -	{ -		struct rx_superset template; -		struct rx_hash_item *hit; - -		template.car = car; -		template.cdr = cdr; -		template.id = car->id; -		/* While hash_store will protect cdr itself it might first allocate hash -		   tables and stuff which might cause it to be garbage collected before -		   it's protected -- [gsstark:19961026.2155EST] */ -		rx_protect_superset(rx, cdr); -		hit = rx_hash_store(&cache->superset_table, -							(unsigned long) car ^ car->id ^ (unsigned long) -							cdr, (void *) &template, -							&cache->superset_hash_rules); -		rx_release_superset(rx, cdr); -		return (hit ? (struct rx_superset *) hit->data : 0); -	} -} - -/* This computes a union of two NFA state sets.  The sets do not have the - * same representation though.  One is a RX_SUPERSET structure (part - * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA). - */ - -#ifdef __STDC__ -RX_DECL struct rx_superset *rx_superstate_eclosure_union -	(struct rx *rx, struct rx_superset *set, struct rx_nfa_state_set *ecl) -#else -RX_DECL struct rx_superset *rx_superstate_eclosure_union(rx, set, ecl) -struct rx *rx; -struct rx_superset *set; -struct rx_nfa_state_set *ecl; -#endif -{ -	if (!ecl) -		return set; - -	if (!set->car) -		return rx_superset_cons(rx, ecl->car, -								rx_superstate_eclosure_union(rx, set, -															 ecl->cdr)); -	if (set->car == ecl->car) -		return rx_superstate_eclosure_union(rx, set, ecl->cdr); - -	{ -		struct rx_superset *tail; -		struct rx_nfa_state *first; - -		if (set->car > ecl->car) { -			tail = rx_superstate_eclosure_union(rx, set->cdr, ecl); -			first = set->car; -		} else { -			tail = rx_superstate_eclosure_union(rx, set, ecl->cdr); -			first = ecl->car; -		} -		if (!tail) -			return 0; -		else { -			struct rx_superset *answer; - -			answer = rx_superset_cons(rx, first, tail); -			if (!answer) { -				rx_protect_superset(rx, tail); -				rx_release_superset(rx, tail); -				return 0; -			} else -				return answer; -		} -	} -} - - - - -/* - * This makes sure that a list of rx_distinct_futures contains - * a future for each possible set of side effects in the eclosure - * of a given state.  This is some of the work of filling in a - * superstate transition.  - */ - -#ifdef __STDC__ -static struct rx_distinct_future *include_futures(struct rx *rx, struct rx_distinct_future -												  *df, struct rx_nfa_state -												  *state, struct rx_superstate -												  *superstate) -#else -static struct rx_distinct_future *include_futures(rx, df, state, -												  superstate) -struct rx *rx; -struct rx_distinct_future *df; -struct rx_nfa_state *state; -struct rx_superstate *superstate; -#endif -{ -	struct rx_possible_future *future; -	struct rx_cache *cache = rx->cache; - -	for (future = state->futures; future; future = future->next) { -		struct rx_distinct_future *dfp; -		struct rx_distinct_future *insert_before = 0; - -		if (df) -			df->next_same_super_edge[1]->next_same_super_edge[0] = 0; -		for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0]) -			if (dfp->effects == future->effects) -				break; -			else { -				int order = - -					rx->se_list_cmp(rx, dfp->effects, future->effects); -				if (order > 0) { -					insert_before = dfp; -					dfp = 0; -					break; -				} -			} -		if (df) -			df->next_same_super_edge[1]->next_same_super_edge[0] = df; -		if (!dfp) { -			dfp = ((struct rx_distinct_future *) -				   rx_cache_malloc_or_get(cache, -										  &cache->free_discernable_futures, - -										  sizeof(struct -												 rx_distinct_future))); -			if (!dfp) -				return 0; -			if (!df) { -				df = insert_before = dfp; -				df->next_same_super_edge[0] = df->next_same_super_edge[1] = -					df; -			} else if (!insert_before) -				insert_before = df; -			else if (insert_before == df) -				df = dfp; - -			dfp->next_same_super_edge[0] = insert_before; -			dfp->next_same_super_edge[1] -				= insert_before->next_same_super_edge[1]; -			dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp; -			dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp; -			dfp->next_same_dest = dfp->prev_same_dest = dfp; -			dfp->future = 0; -			dfp->present = superstate; -			dfp->future_frame.inx = rx->instruction_table[rx_cache_miss]; -			dfp->future_frame.data = 0; -			dfp->future_frame.data_2 = (void *) dfp; -			dfp->side_effects_frame.inx -				= rx->instruction_table[rx_do_side_effects]; -			dfp->side_effects_frame.data = 0; -			dfp->side_effects_frame.data_2 = (void *) dfp; -			dfp->effects = future->effects; -		} -	} -	return df; -} - - -/* This constructs a new superstate from its state set.  The only  - * complexity here is memory management. - */ -#ifdef __STDC__ -RX_DECL struct rx_superstate *rx_superstate(struct rx *rx, -											struct rx_superset *set) -#else -RX_DECL struct rx_superstate *rx_superstate(rx, set) -struct rx *rx; -struct rx_superset *set; -#endif -{ -	struct rx_cache *cache = rx->cache; -	struct rx_superstate *superstate = 0; - -	/* Does the superstate already exist in the cache? */ -	if (set->superstate) { -		if (set->superstate->rx_id != rx->rx_id) { -			/* Aha.  It is in the cache, but belongs to a superstate -			 * that refers to an NFA that no longer exists. -			 * (We know it no longer exists because it was evidently -			 *  stored in the same region of memory as the current nfa -			 *  yet it has a different id.) -			 */ -			superstate = set->superstate; -			if (!superstate->is_semifree) { -				if (cache->lru_superstate == superstate) { -					cache->lru_superstate = superstate->next_recyclable; -					if (cache->lru_superstate == superstate) -						cache->lru_superstate = 0; -				} -				{ -					superstate->next_recyclable->prev_recyclable -						= superstate->prev_recyclable; -					superstate->prev_recyclable->next_recyclable -						= superstate->next_recyclable; -					if (!cache->semifree_superstate) { -						(cache->semifree_superstate -						 = superstate->next_recyclable -						 = superstate->prev_recyclable = superstate); -					} else { -						superstate->next_recyclable = -							cache->semifree_superstate; -						superstate->prev_recyclable = -							cache->semifree_superstate->prev_recyclable; -						superstate->next_recyclable->prev_recyclable = -							superstate; -						superstate->prev_recyclable->next_recyclable = -							superstate; -						cache->semifree_superstate = superstate; -					} -					++cache->semifree_superstates; -				} -			} -			set->superstate = 0; -			goto handle_cache_miss; -		} -		++cache->hits; -		superstate = set->superstate; - -		rx_refresh_this_superstate(cache, superstate); -		return superstate; -	} - -  handle_cache_miss: - -	/* This point reached only for cache misses. */ -	++cache->misses; -#if RX_DEBUG -	if (rx_debug_trace > 1) { -		struct rx_superset *setp = set; - -		fprintf(stderr, "Building a superstet %d(%d): ", rx->rx_id, set); -		while (setp) { -			fprintf(stderr, "%d ", setp->id); -			setp = setp->cdr; -		} -		fprintf(stderr, "(%d)\n", set); -	} -#endif -	superstate = (struct rx_superstate *) rx_cache_get_superstate(cache); -	if (!superstate) -		return 0; - -	if (!cache->lru_superstate) -		(cache->lru_superstate -		 = superstate->next_recyclable -		 = superstate->prev_recyclable = superstate); -	else { -		superstate->next_recyclable = cache->lru_superstate; -		superstate->prev_recyclable = -			cache->lru_superstate->prev_recyclable; -		(superstate->prev_recyclable->next_recyclable = -		 superstate->next_recyclable->prev_recyclable = superstate); -	} -	superstate->rx_id = rx->rx_id; -	superstate->transition_refs = 0; -	superstate->locks = 0; -	superstate->is_semifree = 0; -	set->superstate = superstate; -	superstate->contents = set; -	rx_protect_superset(rx, set); -	superstate->edges = 0; -	{ -		int x; - -		/* None of the transitions from this superstate are known yet. */ -		for (x = 0; x < rx->local_cset_size; ++x) {	/* &&&&& 3.8 % */ -			struct rx_inx *ifr = &superstate->transitions[x]; - -			ifr->inx = rx->instruction_table[rx_cache_miss]; -			ifr->data = ifr->data_2 = 0; -		} -	} -	return superstate; -} - - -/* This computes the destination set of one edge of the superstate NFA. - * Note that a RX_DISTINCT_FUTURE is a superstate edge. - * Returns 0 on an allocation failure. - */ - -#ifdef __STDC__ -static int solve_destination(struct rx *rx, struct rx_distinct_future *df) -#else -static int solve_destination(rx, df) -struct rx *rx; -struct rx_distinct_future *df; -#endif -{ -	struct rx_super_edge *tc = df->edge; -	struct rx_superset *nfa_state; -	struct rx_superset *nil_set = rx_superset_cons(rx, 0, 0); -	struct rx_superset *solution = nil_set; -	struct rx_superstate *dest; - -	rx_protect_superset(rx, solution); -	/* Iterate over all NFA states in the state set of this superstate. */ -	for (nfa_state = df->present->contents; -		 nfa_state->car; nfa_state = nfa_state->cdr) { -		struct rx_nfa_edge *e; - -		/* Iterate over all edges of each NFA state. */ -		for (e = nfa_state->car->edges; e; e = e->next) -			/* If we find an edge that is labeled with  -			   * the characters we are solving for..... -			 */ -			if (rx_bitset_is_subset(rx->local_cset_size, -									tc->cset, e->params.cset)) { -				struct rx_nfa_state *n = e->dest; -				struct rx_possible_future *pf; - -				/* ....search the partial epsilon closures of the destination -				 * of that edge for a path that involves the same set of -				 * side effects we are solving for. -				 * If we find such a RX_POSSIBLE_FUTURE, we add members to the -				 * stateset we are computing. -				 */ -				for (pf = n->futures; pf; pf = pf->next) -					if (pf->effects == df->effects) { -						struct rx_superset *old_sol; - -						old_sol = solution; -						solution = -							rx_superstate_eclosure_union(rx, solution, -														 pf->destset); -						if (!solution) -							return 0; -						rx_protect_superset(rx, solution); -						rx_release_superset(rx, old_sol); -					} -			} -	} -	/* It is possible that the RX_DISTINCT_FUTURE we are working on has  -	 * the empty set of NFA states as its definition.  In that case, this -	 * is a failure point. -	 */ -	if (solution == nil_set) { -		df->future_frame.inx = (void *) rx_backtrack; -		df->future_frame.data = 0; -		df->future_frame.data_2 = 0; -		return 1; -	} -	dest = rx_superstate(rx, solution); -	rx_release_superset(rx, solution); -	if (!dest) -		return 0; - -	{ -		struct rx_distinct_future *dft; - -		dft = df; -		df->prev_same_dest->next_same_dest = 0; -		while (dft) { -			dft->future = dest; -			dft->future_frame.inx = rx->instruction_table[rx_next_char]; -			dft->future_frame.data = (void *) dest->transitions; -			dft = dft->next_same_dest; -		} -		df->prev_same_dest->next_same_dest = df; -	} -	if (!dest->transition_refs) -		dest->transition_refs = df; -	else { -		struct rx_distinct_future *dft = - -			dest->transition_refs->next_same_dest; -		dest->transition_refs->next_same_dest = df->next_same_dest; -		df->next_same_dest->prev_same_dest = dest->transition_refs; -		df->next_same_dest = dft; -		dft->prev_same_dest = df; -	} -	return 1; -} - - -/* This takes a superstate and a character, and computes some edges - * from the superstate NFA.  In particular, this computes all edges - * that lead from SUPERSTATE given CHR.   This function also  - * computes the set of characters that share this edge set. - * This returns 0 on allocation error. - * The character set and list of edges are returned through  - * the paramters CSETOUT and DFOUT. -} */ - -#ifdef __STDC__ -static int -compute_super_edge(struct rx *rx, struct rx_distinct_future **dfout, -				   rx_Bitset csetout, struct rx_superstate *superstate, -				   unsigned char chr) -#else -static int compute_super_edge(rx, dfout, csetout, superstate, chr) -struct rx *rx; -struct rx_distinct_future **dfout; -rx_Bitset csetout; -struct rx_superstate *superstate; -unsigned char chr; -#endif -{ -	struct rx_superset *stateset = superstate->contents; - -	/* To compute the set of characters that share edges with CHR,  -	 * we start with the full character set, and subtract. -	 */ -	rx_bitset_universe(rx->local_cset_size, csetout); -	*dfout = 0; - -	/* Iterate over the NFA states in the superstate state-set. */ -	while (stateset->car) { -		struct rx_nfa_edge *e; - -		for (e = stateset->car->edges; e; e = e->next) -			if (RX_bitset_member(e->params.cset, chr)) { -				/* If we find an NFA edge that applies, we make sure there -				 * are corresponding edges in the superstate NFA. -				 */ -				{ -					struct rx_distinct_future *saved; - -					saved = *dfout; -					*dfout = -						include_futures(rx, *dfout, e->dest, superstate); -					if (!*dfout) { -						struct rx_distinct_future *df; - -						df = saved; -						if (df) -							df-> -								next_same_super_edge -								[1]->next_same_super_edge[0] = 0; -						while (df) { -							struct rx_distinct_future *dft; - -							dft = df; -							df = df->next_same_super_edge[0]; - -							if (dft->future -								&& dft->future->transition_refs == dft) { -								dft->future->transition_refs = -									dft->next_same_dest; -								if (dft->future->transition_refs == dft) -									dft->future->transition_refs = 0; -							} -							dft->next_same_dest->prev_same_dest = -								dft->prev_same_dest; -							dft->prev_same_dest->next_same_dest = -								dft->next_same_dest; -							rx_cache_free(rx->cache, -										  &rx-> -										  cache->free_discernable_futures, -										  (char *) dft); -						} -						return 0; -					} -				} -				/* We also trim the character set a bit. */ -				rx_bitset_intersection(rx->local_cset_size, -									   csetout, e->params.cset); -			} else -				/* An edge that doesn't apply at least tells us some characters -				 * that don't share the same edge set as CHR. -				 */ -				rx_bitset_difference(rx->local_cset_size, csetout, -									 e->params.cset); -		stateset = stateset->cdr; -	} -	return 1; -} - - -/* This is a constructor for RX_SUPER_EDGE structures.  These are - * wrappers for lists of superstate NFA edges that share character sets labels. - * If a transition class contains more than one rx_distinct_future (superstate - * edge), then it represents a non-determinism in the superstate NFA. - */ - - -#ifdef __STDC__ -static struct rx_super_edge *rx_super_edge(struct rx *rx, -										   struct rx_superstate *super, -										   rx_Bitset cset, -										   struct rx_distinct_future *df) -#else -static struct rx_super_edge *rx_super_edge(rx, super, cset, df) -struct rx *rx; -struct rx_superstate *super; -rx_Bitset cset; -struct rx_distinct_future *df; -#endif -{ -	struct rx_super_edge *tc = -		(struct rx_super_edge *) rx_cache_malloc_or_get -		(rx->cache, &rx->cache->free_transition_classes, -		 sizeof(struct rx_super_edge) + - -		 rx_sizeof_bitset(rx->local_cset_size)); - -	if (!tc) -		return 0; -	tc->next = super->edges; -	super->edges = tc; -	tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point]; -	tc->rx_backtrack_frame.data = 0; -	tc->rx_backtrack_frame.data_2 = (void *) tc; -	tc->options = df; -	tc->cset = (rx_Bitset) ((char *) tc + sizeof(*tc)); -	rx_bitset_assign(rx->local_cset_size, tc->cset, cset); -	if (df) { -		struct rx_distinct_future *dfp = df; - -		df->next_same_super_edge[1]->next_same_super_edge[0] = 0; -		while (dfp) { -			dfp->edge = tc; -			dfp = dfp->next_same_super_edge[0]; -		} -		df->next_same_super_edge[1]->next_same_super_edge[0] = df; -	} -	return tc; -} - - -/* There are three kinds of cache miss.  The first occurs when a - * transition is taken that has never been computed during the - * lifetime of the source superstate.  That cache miss is handled by - * calling COMPUTE_SUPER_EDGE.  The second kind of cache miss - * occurs when the destination superstate of a transition doesn't - * exist.  SOLVE_DESTINATION is used to construct the destination superstate. - * Finally, the third kind of cache miss occurs when the destination - * superstate of a transition is in a `semi-free state'.  That case is - * handled by UNFREE_SUPERSTATE. - * - * The function of HANDLE_CACHE_MISS is to figure out which of these - * cases applies. - */ - - -#ifdef __STDC__ -static void -install_partial_transition(struct rx_superstate *super, -						   struct rx_inx *answer, -						   RX_subset set, int offset) -#else -static void install_partial_transition(super, answer, set, offset) -struct rx_superstate *super; -struct rx_inx *answer; -RX_subset set; -int offset; -#endif -{ -	int start = offset; -	int end = start + 32; -	RX_subset pos = 1; -	struct rx_inx *transitions = super->transitions; - -	while (start < end) { -		if (set & pos) -			transitions[start] = *answer; -		pos <<= 1; -		++start; -	} -} - -#ifdef __STDC__ -RX_DECL struct rx_inx *rx_handle_cache_miss -	(struct rx *rx, struct rx_superstate *super, unsigned char chr, -	 void *data) -#else -RX_DECL struct rx_inx *rx_handle_cache_miss(rx, super, chr, data) -struct rx *rx; -struct rx_superstate *super; -unsigned char chr; -void *data; -#endif -{ -	int offset = chr / RX_subset_bits; -	struct rx_distinct_future *df = data; - -	if (!df) {					/* must be the shared_cache_miss_frame */ -		/* Perhaps this is just a transition waiting to be filled. */ -		struct rx_super_edge *tc; -		RX_subset mask = rx_subset_singletons[chr % RX_subset_bits]; - -		for (tc = super->edges; tc; tc = tc->next) -			if (tc->cset[offset] & mask) { -				struct rx_inx *answer; - -				df = tc->options; -				answer = -					((tc->options->next_same_super_edge[0] != -					  tc->options) ? &tc-> -					 rx_backtrack_frame : (df->effects ? -										   &df->side_effects_frame : -										   &df->future_frame)); -				install_partial_transition(super, answer, tc->cset[offset], -										   offset * 32); -				return answer; -			} -		/* Otherwise, it's a flushed or  newly encountered edge. */ -		{ -			char cset_space[1024];	/* this limit is far from unreasonable */ -			rx_Bitset trcset; -			struct rx_inx *answer; - -			if (rx_sizeof_bitset(rx->local_cset_size) > sizeof(cset_space)) -				return 0;		/* If the arbitrary limit is hit, always fail */ -			/* cleanly. */ -			trcset = (rx_Bitset) cset_space; -			rx_lock_superstate(rx, super); -			if (!compute_super_edge(rx, &df, trcset, super, chr)) { -				rx_unlock_superstate(rx, super); -				return 0; -			} -			if (!df) {			/* We just computed the fail transition. */ -				static struct rx_inx -					shared_fail_frame = { 0, 0, (void *) rx_backtrack, 0 }; - -				answer = &shared_fail_frame; -			} else { -				tc = rx_super_edge(rx, super, trcset, df); -				if (!tc) { -					rx_unlock_superstate(rx, super); -					return 0; -				} -				answer = -					((tc->options->next_same_super_edge[0] != -					  tc->options) ? &tc-> -					 rx_backtrack_frame : (df->effects ? -										   &df->side_effects_frame : -										   &df->future_frame)); -			} -			install_partial_transition(super, answer, -									   trcset[offset], offset * 32); -			rx_unlock_superstate(rx, super); -			return answer; -		} -	} else if (df->future) {	/* A cache miss on an edge with a future? Must be -								   * a semi-free destination. */ -		if (df->future->is_semifree) -			refresh_semifree_superstate(rx->cache, df->future); -		return &df->future_frame; -	} else -		/* no future superstate on an existing edge */ -	{ -		rx_lock_superstate(rx, super); -		if (!solve_destination(rx, df)) { -			rx_unlock_superstate(rx, super); -			return 0; -		} -		if (!df->effects -			&& (df->edge->options->next_same_super_edge[0] == -				df->edge->options)) install_partial_transition(super, -															   &df->future_frame, -															   df-> -															   edge->cset -															   [offset], -															   offset * -															   32); -		rx_unlock_superstate(rx, super); -		return &df->future_frame; -	} -} - - - - -/* The rest of the code provides a regex.c compatable interface. */ - - -__const__ char *re_error_msg[] = { -	0,							/* REG_NOUT */ -	"No match",					/* REG_NOMATCH */ -	"Invalid regular expression",	/* REG_BADPAT */ -	"Invalid collation character",	/* REG_ECOLLATE */ -	"Invalid character class name",	/* REG_ECTYPE */ -	"Trailing backslash",		/* REG_EESCAPE */ -	"Invalid back reference",	/* REG_ESUBREG */ -	"Unmatched [ or [^",		/* REG_EBRACK */ -	"Unmatched ( or \\(",		/* REG_EPAREN */ -	"Unmatched \\{",			/* REG_EBRACE */ -	"Invalid content of \\{\\}",	/* REG_BADBR */ -	"Invalid range end",		/* REG_ERANGE */ -	"Memory exhausted",			/* REG_ESPACE */ -	"Invalid preceding regular expression",	/* REG_BADRPT */ -	"Premature end of regular expression",	/* REG_EEND */ -	"Regular expression too big",	/* REG_ESIZE */ -	"Unmatched ) or \\)",		/* REG_ERPAREN */ -}; - - - -/*  - * Macros used while compiling patterns. - * - * By convention, PEND points just past the end of the uncompiled pattern, - * P points to the read position in the pattern.  `translate' is the name - * of the translation table (`TRANSLATE' is the name of a macro that looks - * things up in `translate'). - */ - - -/* - * Fetch the next character in the uncompiled pattern---translating it  - * if necessary. *Also cast from a signed character in the constant - * string passed to us by the user to an unsigned char that we can use - * as an array index (in, e.g., `translate'). - */ -#define PATFETCH(c)							\ - do {if (p == pend) return REG_EEND;					\ -    c = (unsigned char) *p++;						\ -    c = translate[c];		 					\ - } while (0) - -/*  - * Fetch the next character in the uncompiled pattern, with no - * translation. - */ -#define PATFETCH_RAW(c)							\ -  do {if (p == pend) return REG_EEND;					\ -    c = (unsigned char) *p++; 						\ -  } while (0) - -/* Go backwards one character in the pattern.  */ -#define PATUNFETCH p-- - - -#define TRANSLATE(d) translate[(unsigned char) (d)] - -typedef unsigned regnum_t; - -/* Since offsets can go either forwards or backwards, this type needs to - * be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. - */ -typedef int pattern_offset_t; - -typedef struct { -	struct rexp_node **top_expression;	/* was begalt */ -	struct rexp_node **last_expression;	/* was laststart */ -	pattern_offset_t inner_group_offset; -	regnum_t regnum; -} compile_stack_elt_t; -typedef struct { -	compile_stack_elt_t *stack; -	unsigned size; -	unsigned avail;				/* Offset of next open position.  */ -} compile_stack_type; - -static boolean group_in_compile_stack(compile_stack_type, regnum_t); -static reg_errcode_t -compile_range(struct re_pattern_buffer *, rx_Bitset, -			  __const__ char **, __const__ char *, -			  unsigned char *, reg_syntax_t, rx_Bitset, char *); -static void find_backrefs(char *, struct rexp_node *, - -						  struct re_se_params *); -static int compute_fastset(struct re_pattern_buffer *, struct rexp_node *); -static int is_anchored(struct rexp_node *, rx_side_effect); -static struct rexp_node -*remove_unecessary_side_effects - -	(struct rx *, char *, struct rexp_node *, struct re_se_params *); -static int pointless_if_repeated(struct rexp_node *, - -								 struct re_se_params *); -static int registers_on_stack(struct re_pattern_buffer *, -							  struct rexp_node *, - -							  int, struct re_se_params *); -static int has_any_se(struct rx *, struct rexp_node *); -static int has_non_idempotent_epsilon_path - -	(struct rx *, struct rexp_node *, struct re_se_params *); -static int begins_with_complex_se(struct rx *, struct rexp_node *); -static void speed_up_alt(struct rx *, struct rexp_node *, int); -RX_DECL reg_errcode_t - -rx_compile(__const__ char *, int, reg_syntax_t, -		   struct re_pattern_buffer *); -RX_DECL void rx_blow_up_fastmap(struct re_pattern_buffer *); -static __inline__ enum rx_get_burst_return -re_search_2_get_burst(struct rx_string_position *, void *, int); -static __inline__ enum rx_back_check_return -re_search_2_back_check(struct rx_string_position *, int, -					   int, unsigned char *, void *, int); -static __inline__ int -re_search_2_fetch_char(struct rx_string_position *, int, void *, int); - - -#define INIT_COMPILE_STACK_SIZE 32 - -#define COMPILE_STACK_EMPTY  (compile_stack.avail == 0) -#define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size) - -/* The next available element.  */ -#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) - - -/* Set the bit for character C in a list.  */ -#define SET_LIST_BIT(c)                               \ -  (b[((unsigned char) (c)) / CHARBITS]               \ -   |= 1 << (((unsigned char) c) % CHARBITS)) - -/* Get the next unsigned number in the uncompiled pattern.  */ -#define GET_UNSIGNED_NUMBER(num) 					\ -  { if (p != pend)							\ -     {									\ -       PATFETCH (c); 							\ -       while (isdigit (c)) 						\ -         { 								\ -           if (num < 0)							\ -              num = 0;							\ -           num = num * 10 + c - '0'; 					\ -           if (p == pend) 						\ -              break; 							\ -           PATFETCH (c);						\ -         } 								\ -       } 								\ -    } - -#define CHAR_CLASS_MAX_LENGTH  6	/* Namely, `xdigit'.  */ - -#define IS_CHAR_CLASS(string)						\ -   (!strcmp (string, "alpha") || !strcmp (string, "upper")		\ -    || !strcmp (string, "lower") || !strcmp (string, "digit")		\ -    || !strcmp (string, "alnum") || !strcmp (string, "xdigit")		\ -    || !strcmp (string, "space") || !strcmp (string, "print")		\ -    || !strcmp (string, "punct") || !strcmp (string, "graph")		\ -    || !strcmp (string, "cntrl") || !strcmp (string, "blank")) - - -/* These predicates are used in regex_compile. */ - -/* P points to just after a ^ in PATTERN.  Return true if that ^ comes - * after an alternative or a begin-subexpression.  We assume there is at - * least one character before the ^.   - */ - -#ifdef __STDC__ -static boolean -at_begline_loc_p(__const__ char *pattern, __const__ char *p, -				 reg_syntax_t syntax) -#else -static boolean at_begline_loc_p(pattern, p, syntax) -__const__ char *pattern; -__const__ char *p; -reg_syntax_t syntax; -#endif -{ -	__const__ char *prev = p - 2; -	boolean prev_prev_backslash = ((prev > pattern) && (prev[-1] == '\\')); - -	return (					/* After a subexpression?  */ -			   ((*prev == '(') && ((syntax & RE_NO_BK_PARENS) || prev_prev_backslash)) -			   || -			   /* After an alternative?  */ -			   ((*prev == '|') && ((syntax & RE_NO_BK_VBAR) || prev_prev_backslash)) -		); -} - -/* The dual of at_begline_loc_p.  This one is for $.  We assume there is - * at least one character after the $, i.e., `P < PEND'. - */ - -#ifdef __STDC__ -static boolean -at_endline_loc_p(__const__ char *p, __const__ char *pend, int syntax) -#else -static boolean at_endline_loc_p(p, pend, syntax) -__const__ char *p; -__const__ char *pend; -int syntax; -#endif -{ -	__const__ char *next = p; -	boolean next_backslash = (*next == '\\'); -	__const__ char *next_next = (p + 1 < pend) ? (p + 1) : 0; - -	return ( -			   /* Before a subexpression?  */ -			   ((syntax & RE_NO_BK_PARENS) -				? (*next == ')') -				: (next_backslash && next_next && (*next_next == ')'))) -			   || -			   /* Before an alternative?  */ -			   ((syntax & RE_NO_BK_VBAR) -				? (*next == '|') -				: (next_backslash && next_next && (*next_next == '|'))) -		); -} - - -unsigned char rx_id_translation[256] = { -	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -	10, 11, 12, 13, 14, 15, 16, 17, 18, 19, -	20, 21, 22, 23, 24, 25, 26, 27, 28, 29, -	30, 31, 32, 33, 34, 35, 36, 37, 38, 39, -	40, 41, 42, 43, 44, 45, 46, 47, 48, 49, -	50, 51, 52, 53, 54, 55, 56, 57, 58, 59, -	60, 61, 62, 63, 64, 65, 66, 67, 68, 69, -	70, 71, 72, 73, 74, 75, 76, 77, 78, 79, -	80, 81, 82, 83, 84, 85, 86, 87, 88, 89, -	90, 91, 92, 93, 94, 95, 96, 97, 98, 99, - -	100, 101, 102, 103, 104, 105, 106, 107, 108, 109, -	110, 111, 112, 113, 114, 115, 116, 117, 118, 119, -	120, 121, 122, 123, 124, 125, 126, 127, 128, 129, -	130, 131, 132, 133, 134, 135, 136, 137, 138, 139, -	140, 141, 142, 143, 144, 145, 146, 147, 148, 149, -	150, 151, 152, 153, 154, 155, 156, 157, 158, 159, -	160, 161, 162, 163, 164, 165, 166, 167, 168, 169, -	170, 171, 172, 173, 174, 175, 176, 177, 178, 179, -	180, 181, 182, 183, 184, 185, 186, 187, 188, 189, -	190, 191, 192, 193, 194, 195, 196, 197, 198, 199, - -	200, 201, 202, 203, 204, 205, 206, 207, 208, 209, -	210, 211, 212, 213, 214, 215, 216, 217, 218, 219, -	220, 221, 222, 223, 224, 225, 226, 227, 228, 229, -	230, 231, 232, 233, 234, 235, 236, 237, 238, 239, -	240, 241, 242, 243, 244, 245, 246, 247, 248, 249, -	250, 251, 252, 253, 254, 255 -}; - -/* The compiler keeps an inverted translation table. - * This looks up/inititalize elements. - * VALID is an array of booleans that validate CACHE. - */ - -#ifdef __STDC__ -static rx_Bitset -inverse_translation(struct re_pattern_buffer *rxb, -					char *valid, rx_Bitset cache, -					unsigned char *translate, int c) -#else -static rx_Bitset inverse_translation(rxb, valid, cache, translate, c) -struct re_pattern_buffer *rxb; -char *valid; -rx_Bitset cache; -unsigned char *translate; -int c; -#endif -{ -	rx_Bitset cs - -		= cache + c * rx_bitset_numb_subsets(rxb->rx.local_cset_size); - -	if (!valid[c]) { -		int x; -		int c_tr = TRANSLATE(c); - -		rx_bitset_null(rxb->rx.local_cset_size, cs); -		for (x = 0; x < 256; ++x)	/* &&&& 13.37 */ -			if (TRANSLATE(x) == c_tr) -				RX_bitset_enjoin(cs, x); -		valid[c] = 1; -	} -	return cs; -} - - - - -/* More subroutine declarations and macros for regex_compile.  */ - -/* Returns true if REGNUM is in one of COMPILE_STACK's elements and  -   false if it's not.  */ - -#ifdef __STDC__ -static boolean -group_in_compile_stack(compile_stack_type compile_stack, regnum_t regnum) -#else -static boolean group_in_compile_stack(compile_stack, regnum) -compile_stack_type compile_stack; -regnum_t regnum; -#endif -{ -	int this_element; - -	for (this_element = compile_stack.avail - 1; -		 this_element >= 0; this_element--) -		if (compile_stack.stack[this_element].regnum == regnum) -			return true; - -	return false; -} - - -/* - * Read the ending character of a range (in a bracket expression) from the - * uncompiled pattern *P_PTR (which ends at PEND).  We assume the - * starting character is in `P[-2]'.  (`P[-1]' is the character `-'.) - * Then we set the translation of all bits between the starting and - * ending characters (inclusive) in the compiled pattern B. - *  - * Return an error code. - *  - * We use these short variable names so we can use the same macros as - * `regex_compile' itself.   - */ - -#ifdef __STDC__ -static reg_errcode_t -compile_range(struct re_pattern_buffer *rxb, rx_Bitset cs, -			  __const__ char **p_ptr, __const__ char *pend, -			  unsigned char *translate, reg_syntax_t syntax, -			  rx_Bitset inv_tr, char *valid_inv_tr) -#else -static reg_errcode_t -compile_range(rxb, cs, p_ptr, pend, translate, syntax, inv_tr, -			  valid_inv_tr) -struct re_pattern_buffer *rxb; -rx_Bitset cs; -__const__ char **p_ptr; -__const__ char *pend; -unsigned char *translate; -reg_syntax_t syntax; -rx_Bitset inv_tr; -char *valid_inv_tr; -#endif -{ -	unsigned this_char; - -	__const__ char *p = *p_ptr; - -	unsigned char range_end; -	unsigned char range_start = TRANSLATE(p[-2]); - -	if (p == pend) -		return REG_ERANGE; - -	PATFETCH(range_end); - -	(*p_ptr)++; - -	if (range_start > range_end) -		return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; - -	for (this_char = range_start; this_char <= range_end; this_char++) { -		rx_Bitset it = -			inverse_translation(rxb, valid_inv_tr, inv_tr, translate, - -								this_char); - -		rx_bitset_union(rxb->rx.local_cset_size, cs, it); -	} - -	return REG_NOERROR; -} - - -/* This searches a regexp for backreference side effects. - * It fills in the array OUT with 1 at the index of every register pair - * referenced by a backreference. - * - * This is used to help optimize patterns for searching.  The information is - * useful because, if the caller doesn't want register values, backreferenced - * registers are the only registers for which we need rx_backtrack. - */ - -#ifdef __STDC__ -static void -find_backrefs(char *out, struct rexp_node *rexp, -			  struct re_se_params *params) -#else -static void find_backrefs(out, rexp, params) -char *out; -struct rexp_node *rexp; -struct re_se_params *params; -#endif -{ -	if (rexp) -		switch (rexp->type) { -		case r_cset: -		case r_data: -			return; -		case r_alternate: -		case r_concat: -		case r_opt: -		case r_star: -		case r_2phase_star: -			find_backrefs(out, rexp->params.pair.left, params); -			find_backrefs(out, rexp->params.pair.right, params); -			return; -		case r_side_effect: -			if (((long) rexp->params.side_effect >= 0) -				&& (params[(long) rexp->params.side_effect].se == -					re_se_backref)) -					out[params[(long) rexp->params.side_effect].op1] = 1; -			return; -		} -} - - - -/* Returns 0 unless the pattern can match the empty string. */ - -#ifdef __STDC__ -static int -compute_fastset(struct re_pattern_buffer *rxb, struct rexp_node *rexp) -#else -static int compute_fastset(rxb, rexp) -struct re_pattern_buffer *rxb; -struct rexp_node *rexp; -#endif -{ -	if (!rexp) -		return 1; -	switch (rexp->type) { -	case r_data: -		return 1; -	case r_cset: -	{ -		rx_bitset_union(rxb->rx.local_cset_size, -						rxb->fastset, rexp->params.cset); -	} -		return 0; -	case r_concat: -		return (compute_fastset(rxb, rexp->params.pair.left) -				&& compute_fastset(rxb, rexp->params.pair.right)); -	case r_2phase_star: -		compute_fastset(rxb, rexp->params.pair.left); -		/* compute_fastset (rxb, rexp->params.pair.right);  nope... */ -		return 1; -	case r_alternate: -		return !!(compute_fastset(rxb, rexp->params.pair.left) -				  + compute_fastset(rxb, rexp->params.pair.right)); -	case r_opt: -	case r_star: -		compute_fastset(rxb, rexp->params.pair.left); -		return 1; -	case r_side_effect: -		return 1; -	} - -	/* this should never happen */ -	return 0; -} - - -/* returns - *  1 -- yes, definately anchored by the given side effect. - *  2 -- maybe anchored, maybe the empty string. - *  0 -- definately not anchored - *  There is simply no other possibility. - */ - -#ifdef __STDC__ -static int is_anchored(struct rexp_node *rexp, rx_side_effect se) -#else -static int is_anchored(rexp, se) -struct rexp_node *rexp; -rx_side_effect se; -#endif -{ -	if (!rexp) -		return 2; -	switch (rexp->type) { -	case r_cset: -	case r_data: -		return 0; -	case r_concat: -	case r_2phase_star: -	{ -		int l = is_anchored(rexp->params.pair.left, se); - -		return (l == 2 ? is_anchored(rexp->params.pair.right, se) : l); -	} -	case r_alternate: -	{ -		int l = is_anchored(rexp->params.pair.left, se); -		int r = l ? is_anchored(rexp->params.pair.right, se) : 0; - -		if (l == r) -			return l; -		else if ((l == 0) || (r == 0)) -			return 0; -		else -			return 2; -	} -	case r_opt: -	case r_star: -		return is_anchored(rexp->params.pair.left, se) ? 2 : 0; - -	case r_side_effect: -		return ((rexp->params.side_effect == se) -				? 1 : 2); -	} - -	/* this should never happen */ -	return 0; -} - - -/* This removes register assignments that aren't required by backreferencing. - * This can speed up explore_future, especially if it eliminates - * non-determinism in the superstate NFA. - *  - * NEEDED is an array of characters, presumably filled in by FIND_BACKREFS. - * The non-zero elements of the array indicate which register assignments - * can NOT be removed from the expression. - */ - -#ifdef __STDC__ -static struct rexp_node *remove_unecessary_side_effects(struct rx *rx, -														char *needed, -														struct rexp_node -														*rexp, -														struct re_se_params -														*params) -#else -static struct rexp_node *remove_unecessary_side_effects(rx, needed, rexp, -														params) -struct rx *rx; -char *needed; -struct rexp_node *rexp; -struct re_se_params *params; -#endif -{ -	struct rexp_node *l; -	struct rexp_node *r; - -	if (!rexp) -		return 0; -	else -		switch (rexp->type) { -		case r_cset: -		case r_data: -			return rexp; -		case r_alternate: -		case r_concat: -		case r_2phase_star: -			l = remove_unecessary_side_effects(rx, needed, -											   rexp->params.pair.left, -											   params); -			r = -				remove_unecessary_side_effects(rx, needed, -											   rexp->params.pair.right, -											   params); -			if ((l && r) || (rexp->type != r_concat)) { -				rexp->params.pair.left = l; -				rexp->params.pair.right = r; -				return rexp; -			} else { -				rexp->params.pair.left = rexp->params.pair.right = 0; -				rx_free_rexp(rx, rexp); -				return l ? l : r; -			} -		case r_opt: -		case r_star: -			l = remove_unecessary_side_effects(rx, needed, -											   rexp->params.pair.left, -											   params); -			if (l) { -				rexp->params.pair.left = l; -				return rexp; -			} else { -				rexp->params.pair.left = 0; -				rx_free_rexp(rx, rexp); -				return 0; -			} -		case r_side_effect: -		{ -			int se = (long) rexp->params.side_effect; - -			if ((se >= 0) -				&& (((enum re_side_effects) params[se].se == re_se_lparen) -					|| ((enum re_side_effects) params[se].se == -						re_se_rparen)) && (params[se].op1 > 0) -				&& (!needed[params[se].op1])) { -				rx_free_rexp(rx, rexp); -				return 0; -			} else -				return rexp; -		} -		} - -	/* this should never happen */ -	return 0; -} - - - -#ifdef __STDC__ -static int -pointless_if_repeated(struct rexp_node *node, struct re_se_params *params) -#else -static int pointless_if_repeated(node, params) -struct rexp_node *node; -struct re_se_params *params; -#endif -{ -	if (!node) -		return 1; -	switch (node->type) { -	case r_cset: -		return 0; -	case r_alternate: -	case r_concat: -	case r_2phase_star: -		return (pointless_if_repeated(node->params.pair.left, params) -				&& pointless_if_repeated(node->params.pair.right, params)); -	case r_opt: -	case r_star: -		return pointless_if_repeated(node->params.pair.left, params); -	case r_side_effect: -		switch (((long) node->params.side_effect < 0) -				? (enum re_side_effects) node->params.side_effect -				: (enum re_side_effects) params[(long) node-> -												params.side_effect].se) { -		case re_se_try: -		case re_se_at_dot: -		case re_se_begbuf: -		case re_se_hat: -		case re_se_wordbeg: -		case re_se_wordbound: -		case re_se_notwordbound: -		case re_se_wordend: -		case re_se_endbuf: -		case re_se_dollar: -		case re_se_fail: -		case re_se_win: -			return 1; -		case re_se_lparen: -		case re_se_rparen: -		case re_se_iter: -		case re_se_end_iter: -		case re_se_syntax: -		case re_se_not_syntax: -		case re_se_backref: -			return 0; -		} -	case r_data: -	default: -		return 0; -	} -} - - - -#ifdef __STDC__ -static int -registers_on_stack(struct re_pattern_buffer *rxb, -				   struct rexp_node *rexp, int in_danger, -				   struct re_se_params *params) -#else -static int registers_on_stack(rxb, rexp, in_danger, params) -struct re_pattern_buffer *rxb; -struct rexp_node *rexp; -int in_danger; -struct re_se_params *params; -#endif -{ -	if (!rexp) -		return 0; -	else -		switch (rexp->type) { -		case r_cset: -		case r_data: -			return 0; -		case r_alternate: -		case r_concat: -			return (registers_on_stack(rxb, rexp->params.pair.left, -									   in_danger, params) -					|| (registers_on_stack -						(rxb, rexp->params.pair.right, -						 in_danger, params))); -		case r_opt: -			return registers_on_stack(rxb, rexp->params.pair.left, 0, -									  params); -		case r_star: -			return registers_on_stack(rxb, rexp->params.pair.left, 1, -									  params); -		case r_2phase_star: -			return -				(registers_on_stack(rxb, rexp->params.pair.left, 1, params) -				 || registers_on_stack(rxb, rexp->params.pair.right, 1, -									   params)); -		case r_side_effect: -		{ -			int se = (long) rexp->params.side_effect; - -			if (in_danger && (se >= 0) -				&& (params[se].op1 > 0) -				&& (((enum re_side_effects) params[se].se == re_se_lparen) -					|| ((enum re_side_effects) params[se].se == -						re_se_rparen))) return 1; -			else -				return 0; -		} -		} - -	/* this should never happen */ -	return 0; -} - - - -static char idempotent_complex_se[] = { -#define RX_WANT_SE_DEFS 1 -#undef RX_DEF_SE -#undef RX_DEF_CPLX_SE -#define RX_DEF_SE(IDEM, NAME, VALUE) -#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE)     IDEM, -#include <regex.h> -#undef RX_DEF_SE -#undef RX_DEF_CPLX_SE -#undef RX_WANT_SE_DEFS -	23 -}; - -static char idempotent_se[] = { -	13, -#define RX_WANT_SE_DEFS 1 -#undef RX_DEF_SE -#undef RX_DEF_CPLX_SE -#define RX_DEF_SE(IDEM, NAME, VALUE)	      IDEM, -#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE) -#include <regex.h> -#undef RX_DEF_SE -#undef RX_DEF_CPLX_SE -#undef RX_WANT_SE_DEFS -	42 -}; - - - -#ifdef __STDC__ -static int has_any_se(struct rx *rx, struct rexp_node *rexp) -#else -static int has_any_se(rx, rexp) -struct rx *rx; -struct rexp_node *rexp; -#endif -{ -	if (!rexp) -		return 0; - -	switch (rexp->type) { -	case r_cset: -	case r_data: -		return 0; - -	case r_side_effect: -		return 1; - -	case r_2phase_star: -	case r_concat: -	case r_alternate: -		return (has_any_se(rx, rexp->params.pair.left) -				|| has_any_se(rx, rexp->params.pair.right)); - -	case r_opt: -	case r_star: -		return has_any_se(rx, rexp->params.pair.left); -	} - -	/* this should never happen */ -	return 0; -} - - - -/* This must be called AFTER `convert_hard_loops' for a given REXP. */ -#ifdef __STDC__ -static int -has_non_idempotent_epsilon_path(struct rx *rx, -								struct rexp_node *rexp, -								struct re_se_params *params) -#else -static int has_non_idempotent_epsilon_path(rx, rexp, params) -struct rx *rx; -struct rexp_node *rexp; -struct re_se_params *params; -#endif -{ -	if (!rexp) -		return 0; - -	switch (rexp->type) { -	case r_cset: -	case r_data: -	case r_star: -		return 0; - -	case r_side_effect: -		return -			!((long) rexp->params.side_effect > 0 -			  ? -			  idempotent_complex_se[params -									[(long) rexp->params. -									 side_effect].se] : -			  idempotent_se[-(long) rexp->params.side_effect]); - -	case r_alternate: -		return -			(has_non_idempotent_epsilon_path(rx, -											 rexp->params.pair.left, -											 params) -			 || has_non_idempotent_epsilon_path(rx, -												rexp->params.pair.right, -												params)); - -	case r_2phase_star: -	case r_concat: -		return -			(has_non_idempotent_epsilon_path(rx, -											 rexp->params.pair.left, -											 params) -			 && has_non_idempotent_epsilon_path(rx, -												rexp->params.pair.right, -												params)); - -	case r_opt: -		return has_non_idempotent_epsilon_path(rx, -											   rexp->params.pair.left, -											   params); -	} - -	/* this should never happen */ -	return 0; -} - - - -/* This computes rougly what it's name suggests.   It can (and does) go wrong  - * in the direction of returning spurious 0 without causing disasters. - */ -#ifdef __STDC__ -static int begins_with_complex_se(struct rx *rx, struct rexp_node *rexp) -#else -static int begins_with_complex_se(rx, rexp) -struct rx *rx; -struct rexp_node *rexp; -#endif -{ -	if (!rexp) -		return 0; - -	switch (rexp->type) { -	case r_cset: -	case r_data: -		return 0; - -	case r_side_effect: -		return ((long) rexp->params.side_effect >= 0); - -	case r_alternate: -		return (begins_with_complex_se(rx, rexp->params.pair.left) -				&& begins_with_complex_se(rx, rexp->params.pair.right)); - - -	case r_concat: -		return has_any_se(rx, rexp->params.pair.left); -	case r_opt: -	case r_star: -	case r_2phase_star: -		return 0; -	} - -	/* this should never happen */ -	return 0; -} - - -/* This destructively removes some of the re_se_tv side effects from  - * a rexp tree.  In particular, during parsing re_se_tv was inserted on the - * right half of every | to guarantee that posix path preference could be  - * honored.  This function removes some which it can be determined aren't  - * needed.   - */ - -#ifdef __STDC__ -static void -speed_up_alt(struct rx *rx, struct rexp_node *rexp, int unposix) -#else -static void speed_up_alt(rx, rexp, unposix) -struct rx *rx; -struct rexp_node *rexp; -int unposix; -#endif -{ -	if (!rexp) -		return; - -	switch (rexp->type) { -	case r_cset: -	case r_data: -	case r_side_effect: -		return; - -	case r_opt: -	case r_star: -		speed_up_alt(rx, rexp->params.pair.left, unposix); -		return; - -	case r_2phase_star: -	case r_concat: -		speed_up_alt(rx, rexp->params.pair.left, unposix); -		speed_up_alt(rx, rexp->params.pair.right, unposix); -		return; - -	case r_alternate: -		/* the right child is guaranteed to be (concat re_se_tv <subexp>) */ - -		speed_up_alt(rx, rexp->params.pair.left, unposix); -		speed_up_alt(rx, rexp->params.pair.right->params.pair.right, -					 unposix); - -		if (unposix -			|| (begins_with_complex_se -				(rx, rexp->params.pair.right->params.pair.right)) -			|| !(has_any_se(rx, rexp->params.pair.right->params.pair.right) -				 || has_any_se(rx, rexp->params.pair.left))) { -			struct rexp_node *conc = rexp->params.pair.right; - -			rexp->params.pair.right = conc->params.pair.right; -			conc->params.pair.right = 0; -			rx_free_rexp(rx, conc); -		} -	} -} - - - - - -/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. -   Returns one of error codes defined in `regex.h', or zero for success. - -   Assumes the `allocated' (and perhaps `buffer') and `translate' -   fields are set in BUFP on entry. - -   If it succeeds, results are put in BUFP (if it returns an error, the -   contents of BUFP are undefined): -     `buffer' is the compiled pattern; -     `syntax' is set to SYNTAX; -     `used' is set to the length of the compiled pattern; -     `fastmap_accurate' is set to zero; -     `re_nsub' is set to the number of groups in PATTERN; -     `not_bol' and `not_eol' are set to zero. -    -   The `fastmap' and `newline_anchor' fields are neither -   examined nor set.  */ - - -#ifdef __STDC__ -RX_DECL reg_errcode_t -rx_compile(__const__ char *pattern, int size, -		   reg_syntax_t syntax, struct re_pattern_buffer *rxb) -#else -RX_DECL reg_errcode_t rx_compile(pattern, size, syntax, rxb) -__const__ char *pattern; -int size; -reg_syntax_t syntax; -struct re_pattern_buffer *rxb; -#endif -{ -	RX_subset -		inverse_translate[CHAR_SET_SIZE * -						  rx_bitset_numb_subsets(CHAR_SET_SIZE)]; -	char validate_inv_tr[CHAR_SET_SIZE * - -						 rx_bitset_numb_subsets(CHAR_SET_SIZE)]; - -	/* We fetch characters from PATTERN here.  Even though PATTERN is -	   `char *' (i.e., signed), we declare these variables as unsigned, so -	   they can be reliably used as array indices.  */ -	register unsigned char c, c1; - -	/* A random tempory spot in PATTERN.  */ -	__const__ char *p1; - -	/* Keeps track of unclosed groups.  */ -	compile_stack_type compile_stack; - -	/* Points to the current (ending) position in the pattern.  */ -	__const__ char *p = pattern; -	__const__ char *pend = pattern + size; - -	/* How to translate the characters in the pattern.  */ -	unsigned char *translate = (rxb->translate - -								? rxb->translate : rx_id_translation); - -	/* When parsing is done, this will hold the expression tree. */ -	struct rexp_node *rexp = 0; - -	/* In the midst of compilation, this holds onto the regexp  -	 * first parst while rexp goes on to aquire additional constructs. -	 */ -	struct rexp_node *orig_rexp = 0; -	struct rexp_node *fewer_side_effects = 0; - -	/* This and top_expression are saved on the compile stack. */ -	struct rexp_node **top_expression = &rexp; -	struct rexp_node **last_expression = top_expression; - -	/* Parameter to `goto append_node' */ -	struct rexp_node *append; - -	/* Counts open-groups as they are encountered.  This is the index of the -	 * innermost group being compiled. -	 */ -	regnum_t regnum = 0; - -	/* Place in the uncompiled pattern (i.e., the {) to -	 * which to go back if the interval is invalid.   -	 */ -	__const__ char *beg_interval; - -	struct re_se_params *params = 0; -	int paramc = 0;				/* How many complex side effects so far? */ - -	rx_side_effect side;		/* param to `goto add_side_effect' */ - -	bzero(validate_inv_tr, sizeof(validate_inv_tr)); - -	rxb->rx.instruction_table = rx_id_instruction_table; - - -	/* Initialize the compile stack.  */ -	compile_stack.stack = ((compile_stack_elt_t *) -						   malloc((INIT_COMPILE_STACK_SIZE) * -								  sizeof(compile_stack_elt_t))); -	if (compile_stack.stack == 0) -		return REG_ESPACE; - -	compile_stack.size = INIT_COMPILE_STACK_SIZE; -	compile_stack.avail = 0; - -	/* Initialize the pattern buffer.  */ -	rxb->rx.cache = &default_cache; -	rxb->syntax = syntax; -	rxb->fastmap_accurate = 0; -	rxb->not_bol = rxb->not_eol = 0; -	rxb->least_subs = 0; - -	/* Always count groups, whether or not rxb->no_sub is set.   -	 * The whole pattern is implicitly group 0, so counting begins -	 * with 1. -	 */ -	rxb->re_nsub = 0; - -#if !defined (emacs) && !defined (SYNTAX_TABLE) -	/* Initialize the syntax table.  */ -	init_syntax_once(); -#endif - -	/* Loop through the uncompiled pattern until we're at the end.  */ -	while (p != pend) { -		PATFETCH(c); - -		switch (c) { -		case '^': -		{ -			if (				/* If at start of pattern, it's an operator.  */ -				   p == pattern + 1 -				   /* If context independent, it's an operator.  */ -				   || syntax & RE_CONTEXT_INDEP_ANCHORS -				   /* Otherwise, depends on what's come before.  */ -				   || at_begline_loc_p(pattern, p, syntax)) { -				struct rexp_node *n = rx_mk_r_side_effect(&rxb->rx, -														  (rx_side_effect) re_se_hat); - -				if (!n) -					return REG_ESPACE; -				append = n; -				goto append_node; -			} else -				goto normal_char; -		} -			break; - - -		case '$': -		{ -			if (				/* If at end of pattern, it's an operator.  */ -				   p == pend -				   /* If context independent, it's an operator.  */ -				   || syntax & RE_CONTEXT_INDEP_ANCHORS -				   /* Otherwise, depends on what's next.  */ -				   || at_endline_loc_p(p, pend, syntax)) { -				struct rexp_node *n = rx_mk_r_side_effect(&rxb->rx, -														  (rx_side_effect) re_se_dollar); - -				if (!n) -					return REG_ESPACE; -				append = n; -				goto append_node; -			} else -				goto normal_char; -		} -			break; - - -		case '+': -		case '?': -			if ((syntax & RE_BK_PLUS_QM) -				|| (syntax & RE_LIMITED_OPS)) -				goto normal_char; - -		  handle_plus: -		case '*': -			/* If there is no previous pattern... */ -			if (pointless_if_repeated(*last_expression, params)) { -				if (syntax & RE_CONTEXT_INVALID_OPS) -					return REG_BADRPT; -				else if (!(syntax & RE_CONTEXT_INDEP_OPS)) -					goto normal_char; -			} - -			{ -				/* 1 means zero (many) matches is allowed.  */ -				char zero_times_ok = 0, many_times_ok = 0; - -				/* If there is a sequence of repetition chars, collapse it -				   down to just one (the right one).  We can't combine -				   interval operators with these because of, e.g., `a{2}*', -				   which should only match an even number of `a's.  */ - -				for (;;) { -					zero_times_ok |= c != '+'; -					many_times_ok |= c != '?'; - -					if (p == pend) -						break; - -					PATFETCH(c); - -					if (c == '*' || (!(syntax & RE_BK_PLUS_QM) -									 && (c == '+' || c == '?'))); - -					else if (syntax & RE_BK_PLUS_QM && c == '\\') { -						if (p == pend) -							return REG_EESCAPE; - -						PATFETCH(c1); -						if (!(c1 == '+' || c1 == '?')) { -							PATUNFETCH; -							PATUNFETCH; -							break; -						} - -						c = c1; -					} else { -						PATUNFETCH; -						break; -					} - -					/* If we get here, we found another repeat character.  */ -				} - -				/* Star, etc. applied to an empty pattern is equivalent -				   to an empty pattern.  */ -				if (!last_expression) -					break; - -				/* Now we know whether or not zero matches is allowed -				 * and also whether or not two or more matches is allowed. -				 */ - -				{ -					struct rexp_node *inner_exp = *last_expression; -					int need_sync = 0; - -					if (many_times_ok -						&& has_non_idempotent_epsilon_path(&rxb->rx, -														   inner_exp, -														   params)) { -						struct rexp_node *pusher = -							rx_mk_r_side_effect(&rxb->rx, -												(rx_side_effect) re_se_pushpos); -						struct rexp_node *checker -							= rx_mk_r_side_effect(&rxb->rx, -												  (rx_side_effect) re_se_chkpos); -						struct rexp_node *pushback -							= rx_mk_r_side_effect(&rxb->rx, -												  (rx_side_effect) re_se_pushback); -						rx_Bitset cs = rx_cset(&rxb->rx); -						struct rexp_node *lit_t; -						struct rexp_node *fake_state; -						struct rexp_node *phase2; -						struct rexp_node *popper; -						struct rexp_node *star; -						struct rexp_node *a; -						struct rexp_node *whole_thing; - -						if (!cs) -							return REG_ESPACE; -						lit_t = rx_mk_r_cset(&rxb->rx, cs); -						fake_state = -							rx_mk_r_concat(&rxb->rx, pushback, lit_t); -						phase2 = -							rx_mk_r_concat(&rxb->rx, checker, fake_state); -						popper = -							rx_mk_r_side_effect(&rxb->rx, -												(rx_side_effect) -												re_se_poppos); -						star = -							rx_mk_r_2phase_star(&rxb->rx, inner_exp, -												phase2); -						a = rx_mk_r_concat(&rxb->rx, pusher, star); -						whole_thing = rx_mk_r_concat(&rxb->rx, a, popper); - -						if (! -							(pusher && star && pushback && lit_t -							 && fake_state && lit_t && phase2 && checker -							 && popper && a && whole_thing)) -							return REG_ESPACE; -						RX_bitset_enjoin(cs, 't'); -						*last_expression = whole_thing; -					} else { -						struct rexp_node *star = -							(many_times_ok ? rx_mk_r_star : rx_mk_r_opt) -							(&rxb->rx, *last_expression); - -						if (!star) -							return REG_ESPACE; -						*last_expression = star; -						need_sync = has_any_se(&rxb->rx, *last_expression); -					} -					if (!zero_times_ok) { -						struct rexp_node *concat -							= rx_mk_r_concat(&rxb->rx, inner_exp, -											 rx_copy_rexp(&rxb->rx, -														  *last_expression)); - -						if (!concat) -							return REG_ESPACE; -						*last_expression = concat; -					} -					if (need_sync) { -						int sync_se = paramc; - -						params = (params ? ((struct re_se_params *) -											realloc(params, -													sizeof(*params) * (1 + -																	   paramc))) -								  : ((struct re_se_params *) -									 malloc(sizeof(*params)))); -						if (!params) -							return REG_ESPACE; -						++paramc; -						params[sync_se].se = re_se_tv; -						side = (rx_side_effect) sync_se; -						goto add_side_effect; -					} -				} -				/* The old regex.c used to optimize `.*\n'.   -				 * Maybe rx should too? -				 */ -			} -			break; - - -		case '.': -		{ -			rx_Bitset cs = rx_cset(&rxb->rx); -			struct rexp_node *n = rx_mk_r_cset(&rxb->rx, cs); - -			if (!(cs && n)) -				return REG_ESPACE; - -			rx_bitset_universe(rxb->rx.local_cset_size, cs); -			if (!(rxb->syntax & RE_DOT_NEWLINE)) -				RX_bitset_remove(cs, '\n'); -			if (!(rxb->syntax & RE_DOT_NOT_NULL)) -				RX_bitset_remove(cs, 0); - -			append = n; -			goto append_node; -			break; -		} - - -		case '[': -			if (p == pend) -				return REG_EBRACK; -			{ -				boolean had_char_class = false; -				rx_Bitset cs = rx_cset(&rxb->rx); -				struct rexp_node *node = rx_mk_r_cset(&rxb->rx, cs); -				int is_inverted = *p == '^'; - -				if (!(node && cs)) -					return REG_ESPACE; - -				/* This branch of the switch is normally exited with -				 *`goto append_node' -				 */ -				append = node; - -				if (is_inverted) -					p++; - -				/* Remember the first position in the bracket expression.  */ -				p1 = p; - -				/* Read in characters and ranges, setting map bits.  */ -				for (;;) { -					if (p == pend) -						return REG_EBRACK; - -					PATFETCH(c); - -					/* \ might escape characters inside [...] and [^...].  */ -					if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) -						&& c == '\\') { -						if (p == pend) -							return REG_EESCAPE; - -						PATFETCH(c1); -						{ -							rx_Bitset it = inverse_translation(rxb, -															   validate_inv_tr, -															   inverse_translate, -															   translate, -															   c1); - -							rx_bitset_union(rxb->rx.local_cset_size, cs, -											it); -						} -						continue; -					} - -					/* Could be the end of the bracket expression.  If it's -					   not (i.e., when the bracket expression is `[]' so -					   far), the ']' character bit gets set way below.  */ -					if (c == ']' && p != p1 + 1) -						goto finalize_class_and_append; - -					/* Look ahead to see if it's a range when the last thing -					   was a character class.  */ -					if (had_char_class && c == '-' && *p != ']') -						return REG_ERANGE; - -					/* Look ahead to see if it's a range when the last thing -					   was a character: if this is a hyphen not at the -					   beginning or the end of a list, then it's the range -					   operator.  */ -					if (c == '-' && !(p - 2 >= pattern && p[-2] == '[') -						&& !(p - 3 >= pattern && p[-3] == '[' -							 && p[-2] == '^') && *p != ']') { -						reg_errcode_t ret = -							compile_range(rxb, cs, &p, pend, translate, -										  syntax, -										  inverse_translate, - -										  validate_inv_tr); - -						if (ret != REG_NOERROR) -							return ret; -					} - -					else if (p[0] == '-' && p[1] != ']') {	/* This handles ranges made up of characters only.  */ -						reg_errcode_t ret; - -						/* Move past the `-'.  */ -						PATFETCH(c1); - -						ret = -							compile_range(rxb, cs, &p, pend, translate, -										  syntax, inverse_translate, -										  validate_inv_tr); -						if (ret != REG_NOERROR) -							return ret; -					} - -					/* See if we're at the beginning of a possible character -					   class.  */ - -					else if ((syntax & RE_CHAR_CLASSES) -							 && (c == '[') && (*p == ':')) { -						char str[CHAR_CLASS_MAX_LENGTH + 1]; - -						PATFETCH(c); -						c1 = 0; - -						/* If pattern is `[[:'.  */ -						if (p == pend) -							return REG_EBRACK; - -						for (;;) { -							PATFETCH(c); -							if (c == ':' || c == ']' || p == pend -								|| c1 == CHAR_CLASS_MAX_LENGTH) break; -							str[c1++] = c; -						} -						str[c1] = '\0'; - -						/* If isn't a word bracketed by `[:' and:`]': -						   undo the ending character, the letters, and leave  -						   the leading `:' and `[' (but set bits for them).  */ -						if (c == ':' && *p == ']') { -							int ch; -							boolean is_alnum = !strcmp(str, "alnum"); -							boolean is_alpha = !strcmp(str, "alpha"); -							boolean is_blank = !strcmp(str, "blank"); -							boolean is_cntrl = !strcmp(str, "cntrl"); -							boolean is_digit = !strcmp(str, "digit"); -							boolean is_graph = !strcmp(str, "graph"); -							boolean is_lower = !strcmp(str, "lower"); -							boolean is_print = !strcmp(str, "print"); -							boolean is_punct = !strcmp(str, "punct"); -							boolean is_space = !strcmp(str, "space"); -							boolean is_upper = !strcmp(str, "upper"); -							boolean is_xdigit = !strcmp(str, "xdigit"); - -							if (!IS_CHAR_CLASS(str)) -								return REG_ECTYPE; - -							/* Throw away the ] at the end of the character -							   class.  */ -							PATFETCH(c); - -							if (p == pend) -								return REG_EBRACK; - -							for (ch = 0; ch < 1 << CHARBITS; ch++) { -								if ((is_alnum && isalnum(ch)) -									|| (is_alpha && isalpha(ch)) -									|| (is_blank && isblank(ch)) -									|| (is_cntrl && iscntrl(ch)) -									|| (is_digit && isdigit(ch)) -									|| (is_graph && isgraph(ch)) -									|| (is_lower && islower(ch)) -									|| (is_print && isprint(ch)) -									|| (is_punct && ispunct(ch)) -									|| (is_space && isspace(ch)) -									|| (is_upper && isupper(ch)) -									|| (is_xdigit && isxdigit(ch))) { -									rx_Bitset it = inverse_translation(rxb, -																	   validate_inv_tr, -																	   inverse_translate, -																	   translate, -																	   ch); - -									rx_bitset_union(rxb-> -													rx.local_cset_size, cs, -													it); -								} -							} -							had_char_class = true; -						} else { -							c1++; -							while (c1--) -								PATUNFETCH; -							{ -								rx_Bitset it = inverse_translation(rxb, -																   validate_inv_tr, -																   inverse_translate, -																   translate, -																   '['); - -								rx_bitset_union(rxb->rx.local_cset_size, -												cs, it); -							} -							{ -								rx_Bitset it = inverse_translation(rxb, -																   validate_inv_tr, -																   inverse_translate, -																   translate, -																   ':'); - -								rx_bitset_union(rxb->rx.local_cset_size, -												cs, it); -							} -							had_char_class = false; -						} -					} else { -						had_char_class = false; -						{ -							rx_Bitset it = inverse_translation(rxb, -															   validate_inv_tr, -															   inverse_translate, -															   translate, -															   c); - -							rx_bitset_union(rxb->rx.local_cset_size, cs, -											it); -						} -					} -				} - -			  finalize_class_and_append: -				if (is_inverted) { -					rx_bitset_complement(rxb->rx.local_cset_size, cs); -					if (syntax & RE_HAT_LISTS_NOT_NEWLINE) -						RX_bitset_remove(cs, '\n'); -				} -				goto append_node; -			} -			break; - - -		case '(': -			if (syntax & RE_NO_BK_PARENS) -				goto handle_open; -			else -				goto normal_char; - - -		case ')': -			if (syntax & RE_NO_BK_PARENS) -				goto handle_close; -			else -				goto normal_char; - - -		case '\n': -			if (syntax & RE_NEWLINE_ALT) -				goto handle_alt; -			else -				goto normal_char; - - -		case '|': -			if (syntax & RE_NO_BK_VBAR) -				goto handle_alt; -			else -				goto normal_char; - - -		case '{': -			if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) -				goto handle_interval; -			else -				goto normal_char; - - -		case '\\': -			if (p == pend) -				return REG_EESCAPE; - -			/* Do not translate the character after the \, so that we can -			   distinguish, e.g., \B from \b, even if we normally would -			   translate, e.g., B to b.  */ -			PATFETCH_RAW(c); - -			switch (c) { -			case '(': -				if (syntax & RE_NO_BK_PARENS) -					goto normal_backslash; - -			  handle_open: -				rxb->re_nsub++; -				regnum++; -				if (COMPILE_STACK_FULL) { -					((compile_stack.stack) = -					 (compile_stack_elt_t *) realloc(compile_stack.stack, -													 (compile_stack.size << -													  1) * -													 sizeof -													 (compile_stack_elt_t))); -					if (compile_stack.stack == 0) -						return REG_ESPACE; - -					compile_stack.size <<= 1; -				} - -				if (*last_expression) { -					struct rexp_node *concat -						= rx_mk_r_concat(&rxb->rx, *last_expression, 0); - -					if (!concat) -						return REG_ESPACE; -					*last_expression = concat; -					last_expression = &concat->params.pair.right; -				} - -				/* -				   * These are the values to restore when we hit end of this -				   * group.   -				 */ -				COMPILE_STACK_TOP.top_expression = top_expression; -				COMPILE_STACK_TOP.last_expression = last_expression; -				COMPILE_STACK_TOP.regnum = regnum; - -				compile_stack.avail++; - -				top_expression = last_expression; -				break; - - -			case ')': -				if (syntax & RE_NO_BK_PARENS) -					goto normal_backslash; - -			  handle_close: -				/* See similar code for backslashed left paren above.  */ -				if (COMPILE_STACK_EMPTY) { -					if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) { -						goto normal_char; -					} else { -						return REG_ERPAREN; -					} -				} - -				/* Since we just checked for an empty stack above, this -				   ``can't happen''.  */ - -				{ -					/* We don't just want to restore into `regnum', because -					   later groups should continue to be numbered higher, -					   as in `(ab)c(de)' -- the second group is #2.  */ -					regnum_t this_group_regnum; -					struct rexp_node **inner = top_expression; - -					compile_stack.avail--; -					top_expression = COMPILE_STACK_TOP.top_expression; -					last_expression = COMPILE_STACK_TOP.last_expression; -					this_group_regnum = COMPILE_STACK_TOP.regnum; -					{ -						int left_se = paramc; -						int right_se = paramc + 1; - -						params = (params ? ((struct re_se_params *) -											realloc(params, -													(paramc + -													 2) * -													sizeof(params[0]))) -								  : ((struct re_se_params *) -									 malloc(2 * sizeof(params[0])))); -						if (!params) -							return REG_ESPACE; -						paramc += 2; - -						params[left_se].se = re_se_lparen; -						params[left_se].op1 = this_group_regnum; -						params[right_se].se = re_se_rparen; -						params[right_se].op1 = this_group_regnum; -						{ -							struct rexp_node *left -								= rx_mk_r_side_effect(&rxb->rx, -													  (rx_side_effect) left_se); -							struct rexp_node *right -								= rx_mk_r_side_effect(&rxb->rx, -													  (rx_side_effect) right_se); -							struct rexp_node *c1 -								= (*inner ? rx_mk_r_concat(&rxb->rx, left, -														   *inner) : left); -							struct rexp_node *c2 = -								rx_mk_r_concat(&rxb->rx, c1, right); - -							if (!(left && right && c1 && c2)) -								return REG_ESPACE; -							*inner = c2; -						} -					} -					break; -				} - -			case '|':			/* `\|'.  */ -				if ((syntax & RE_LIMITED_OPS) || (syntax & RE_NO_BK_VBAR)) -					goto normal_backslash; -			  handle_alt: -				if (syntax & RE_LIMITED_OPS) -					goto normal_char; - -				{ -					struct rexp_node *alt -						= rx_mk_r_alternate(&rxb->rx, *top_expression, 0); - -					if (!alt) -						return REG_ESPACE; -					*top_expression = alt; -					last_expression = &alt->params.pair.right; -					{ -						int sync_se = paramc; - -						params = (params ? ((struct re_se_params *) -											realloc(params, -													(paramc + -													 1) * -													sizeof(params[0]))) -								  : ((struct re_se_params *) -									 malloc(sizeof(params[0])))); -						if (!params) -							return REG_ESPACE; -						++paramc; - -						params[sync_se].se = re_se_tv; -						{ -							struct rexp_node *sync -								= rx_mk_r_side_effect(&rxb->rx, -													  (rx_side_effect) sync_se); -							struct rexp_node *conc -								= rx_mk_r_concat(&rxb->rx, sync, 0); - -							if (!sync || !conc) -								return REG_ESPACE; - -							*last_expression = conc; -							last_expression = &conc->params.pair.right; -						} -					} -				} -				break; - - -			case '{': -				/* If \{ is a literal.  */ -				if (!(syntax & RE_INTERVALS) -					/* If we're at `\{' and it's not the open-interval  -					   operator.  */ -					|| ((syntax & RE_INTERVALS) -						&& (syntax & RE_NO_BK_BRACES)) || (p - 2 == pattern -														   && p == pend)) -					goto normal_backslash; - -			  handle_interval: -				{ -					/* If got here, then the syntax allows intervals.  */ - -					/* At least (most) this many matches must be made.  */ -					int lower_bound = -1, upper_bound = -1; - -					beg_interval = p - 1; - -					if (p == pend) { -						if (syntax & RE_NO_BK_BRACES) -							goto unfetch_interval; -						else -							return REG_EBRACE; -					} - -					GET_UNSIGNED_NUMBER(lower_bound); - -					if (c == ',') { -						GET_UNSIGNED_NUMBER(upper_bound); -						if (upper_bound < 0) -							upper_bound = RE_DUP_MAX; -					} else -						/* Interval such as `{1}' => match exactly once. */ -						upper_bound = lower_bound; - -					if (lower_bound < 0 || upper_bound > RE_DUP_MAX -						|| lower_bound > upper_bound) { -						if (syntax & RE_NO_BK_BRACES) -							goto unfetch_interval; -						else -							return REG_BADBR; -					} - -					if (!(syntax & RE_NO_BK_BRACES)) { -						if (c != '\\') -							return REG_EBRACE; -						PATFETCH(c); -					} - -					if (c != '}') { -						if (syntax & RE_NO_BK_BRACES) -							goto unfetch_interval; -						else -							return REG_BADBR; -					} - -					/* We just parsed a valid interval.  */ - -					/* If it's invalid to have no preceding re.  */ -					if (pointless_if_repeated(*last_expression, params)) { -						if (syntax & RE_CONTEXT_INVALID_OPS) -							return REG_BADRPT; -						else if (!(syntax & RE_CONTEXT_INDEP_OPS)) -							goto unfetch_interval; -						/* was: else laststart = b; */ -					} - -					/* If the upper bound is zero, don't want to iterate -					 * at all. -					 */ -					if (upper_bound == 0) { -						if (*last_expression) { -							rx_free_rexp(&rxb->rx, *last_expression); -							*last_expression = 0; -						} -					} else -						/* Otherwise, we have a nontrivial interval. */ -					{ -						int iter_se = paramc; -						int end_se = paramc + 1; - -						params = (params ? ((struct re_se_params *) -											realloc(params, -													sizeof(*params) * (2 + -																	   paramc))) -								  : ((struct re_se_params *) -									 malloc(2 * sizeof(*params)))); -						if (!params) -							return REG_ESPACE; -						paramc += 2; -						params[iter_se].se = re_se_iter; -						params[iter_se].op1 = lower_bound; -						params[iter_se].op2 = upper_bound; - -						params[end_se].se = re_se_end_iter; -						params[end_se].op1 = lower_bound; -						params[end_se].op2 = upper_bound; -						{ -							struct rexp_node *push0 -								= rx_mk_r_side_effect(&rxb->rx, -													  (rx_side_effect) re_se_push0); -							struct rexp_node *start_one_iter -								= rx_mk_r_side_effect(&rxb->rx, -													  (rx_side_effect) iter_se); -							struct rexp_node *phase1 -								= rx_mk_r_concat(&rxb->rx, start_one_iter, -												 *last_expression); -							struct rexp_node *pushback -								= rx_mk_r_side_effect(&rxb->rx, -													  (rx_side_effect) re_se_pushback); -							rx_Bitset cs = rx_cset(&rxb->rx); -							struct rexp_node *lit_t; -							struct rexp_node *phase2; -							struct rexp_node *loop; -							struct rexp_node *push_n_loop; -							struct rexp_node *final_test; -							struct rexp_node *full_exp; - -							if (!cs) -								return REG_ESPACE; -							lit_t = rx_mk_r_cset(&rxb->rx, cs); -							phase2 = -								rx_mk_r_concat(&rxb->rx, pushback, lit_t); -							loop = -								rx_mk_r_2phase_star(&rxb->rx, phase1, -													phase2); -							push_n_loop = -								rx_mk_r_concat(&rxb->rx, push0, loop); -							final_test = -								rx_mk_r_side_effect(&rxb->rx, -													(rx_side_effect) -													end_se); -							full_exp = -								rx_mk_r_concat(&rxb->rx, push_n_loop, -											   final_test); - -							if (!(push0 && start_one_iter && phase1 -								  && pushback && lit_t && phase2 -								  && loop && push_n_loop && final_test -								  && full_exp)) return REG_ESPACE; - -							RX_bitset_enjoin(cs, 't'); - -							*last_expression = full_exp; -						} -					} -					beg_interval = 0; -				} -				break; - -			  unfetch_interval: -				/* If an invalid interval, match the characters as literals.  */ -				p = beg_interval; -				beg_interval = 0; - -				/* normal_char and normal_backslash need `c'.  */ -				PATFETCH(c); - -				if (!(syntax & RE_NO_BK_BRACES)) { -					if (p > pattern && p[-1] == '\\') -						goto normal_backslash; -				} -				goto normal_char; - -#ifdef emacs -				/* There is no way to specify the before_dot and after_dot -				   operators.  rms says this is ok.  --karl  */ -			case '=': -				side = (rx_side_effect) rx_se_at_dot; -				goto add_side_effect; -				break; - -			case 's': -			case 'S': -			{ -				rx_Bitset cs = rx_cset(&rxb->rx); -				struct rexp_node *set = rx_mk_r_cset(&rxb->rx, cs); - -				if (!(cs && set)) -					return REG_ESPACE; -				if (c == 'S') -					rx_bitset_universe(rxb->rx.local_cset_size, cs); - -				PATFETCH(c); -				{ -					int x; -					enum syntaxcode code = syntax_spec_code[c]; - -					for (x = 0; x < 256; ++x) { - -						if (SYNTAX(x) == code) { -							rx_Bitset it = -								inverse_translation(rxb, validate_inv_tr, -													inverse_translate, -													translate, x); - -							rx_bitset_xor(rxb->rx.local_cset_size, cs, it); -						} -					} -				} -				append = set; -				goto append_node; -			} -				break; -#endif							/* emacs */ - - -			case 'w': -			case 'W': -				if (syntax & RE_NO_GNU_OPS) -					goto normal_char; -				{ -					rx_Bitset cs = rx_cset(&rxb->rx); -					struct rexp_node *n = -						(cs ? rx_mk_r_cset(&rxb->rx, cs) : 0); - -					if (!(cs && n)) -						return REG_ESPACE; -					if (c == 'W') -						rx_bitset_universe(rxb->rx.local_cset_size, cs); -					{ -						int x; - -						for (x = rxb->rx.local_cset_size - 1; x > 0; --x) -							if (SYNTAX(x) & Sword) -								RX_bitset_toggle(cs, x); -					} -					append = n; -					goto append_node; -				} -				break; - -/* With a little extra work, some of these side effects could be optimized - * away (basicly by looking at what we already know about the surrounding - * chars).   - */ -			case '<': -				if (syntax & RE_NO_GNU_OPS) -					goto normal_char; -				side = (rx_side_effect) re_se_wordbeg; -				goto add_side_effect; -				break; - -			case '>': -				if (syntax & RE_NO_GNU_OPS) -					goto normal_char; -				side = (rx_side_effect) re_se_wordend; -				goto add_side_effect; -				break; - -			case 'b': -				if (syntax & RE_NO_GNU_OPS) -					goto normal_char; -				side = (rx_side_effect) re_se_wordbound; -				goto add_side_effect; -				break; - -			case 'B': -				if (syntax & RE_NO_GNU_OPS) -					goto normal_char; -				side = (rx_side_effect) re_se_notwordbound; -				goto add_side_effect; -				break; - -			case '`': -				if (syntax & RE_NO_GNU_OPS) -					goto normal_char; -				side = (rx_side_effect) re_se_begbuf; -				goto add_side_effect; -				break; - -			case '\'': -				if (syntax & RE_NO_GNU_OPS) -					goto normal_char; -				side = (rx_side_effect) re_se_endbuf; -				goto add_side_effect; -				break; - -			  add_side_effect: -				{ -					struct rexp_node *se - -						= rx_mk_r_side_effect(&rxb->rx, side); -					if (!se) -						return REG_ESPACE; -					append = se; -					goto append_node; -				} -				break; - -			case '1': -			case '2': -			case '3': -			case '4': -			case '5': -			case '6': -			case '7': -			case '8': -			case '9': -				if (syntax & RE_NO_BK_REFS) -					goto normal_char; - -				c1 = c - '0'; - -				if (c1 > regnum) -					return REG_ESUBREG; - -				/* Can't back reference to a subexpression if inside of it.  */ -				if (group_in_compile_stack(compile_stack, c1)) -					return REG_ESUBREG; - -				{ -					int backref_se = paramc; - -					params = (params ? ((struct re_se_params *) -										realloc(params, -												sizeof(*params) * (1 + -																   paramc))) -							  : ((struct re_se_params *) -								 malloc(sizeof(*params)))); -					if (!params) -						return REG_ESPACE; -					++paramc; -					params[backref_se].se = re_se_backref; -					params[backref_se].op1 = c1; -					side = (rx_side_effect) backref_se; -					goto add_side_effect; -				} -				break; - -			case '+': -			case '?': -				if (syntax & RE_BK_PLUS_QM) -					goto handle_plus; -				else -					goto normal_backslash; - -			default: -			  normal_backslash: -				/* You might think it would be useful for \ to mean -				   not to translate; but if we don't translate it -				   it will never match anything.  */ -				c = TRANSLATE(c); -				goto normal_char; -			} -			break; - - -		default: -			/* Expects the character in `c'.  */ -		  normal_char: -		{ -			rx_Bitset cs = rx_cset(&rxb->rx); -			struct rexp_node *match = rx_mk_r_cset(&rxb->rx, cs); -			rx_Bitset it; - -			if (!(cs && match)) -				return REG_ESPACE; -			it = inverse_translation(rxb, validate_inv_tr, -									 inverse_translate, translate, c); -			rx_bitset_union(CHAR_SET_SIZE, cs, it); -			append = match; - -		  append_node: -			/* This genericly appends the rexp APPEND to *LAST_EXPRESSION -			 * and then parses the next character normally. -			 */ -			if (*last_expression) { -				struct rexp_node *concat -					= rx_mk_r_concat(&rxb->rx, *last_expression, append); - -				if (!concat) -					return REG_ESPACE; -				*last_expression = concat; -				last_expression = &concat->params.pair.right; -			} else -				*last_expression = append; -		} -		}						/* switch (c) */ -	}							/* while p != pend */ - - -	{ -		int win_se = paramc; - -		params = (params ? ((struct re_se_params *) -							realloc(params, -									sizeof(*params) * (1 + paramc))) -				  : ((struct re_se_params *) -					 malloc(sizeof(*params)))); -		if (!params) -			return REG_ESPACE; -		++paramc; -		params[win_se].se = re_se_win; -		{ -			struct rexp_node *se -				= rx_mk_r_side_effect(&rxb->rx, (rx_side_effect) win_se); -			struct rexp_node *concat = rx_mk_r_concat(&rxb->rx, rexp, se); - -			if (!(se && concat)) -				return REG_ESPACE; -			rexp = concat; -		} -	} - - -	/* Through the pattern now.  */ - -	if (!COMPILE_STACK_EMPTY) -		return REG_EPAREN; - -	free(compile_stack.stack); - -	orig_rexp = rexp; -#ifdef RX_DEBUG -	if (rx_debug_compile) { -		dbug_rxb = rxb; -		fputs("\n\nCompiling ", stdout); -		fwrite(pattern, 1, size, stdout); -		fputs(":\n", stdout); -		rxb->se_params = params; -		print_rexp(&rxb->rx, orig_rexp, 2, re_seprint, stdout); -	} -#endif -	{ -		rx_Bitset cs = rx_cset(&rxb->rx); -		rx_Bitset cs2 = rx_cset(&rxb->rx); -		char *se_map = (char *) alloca(paramc); -		struct rexp_node *new_rexp = 0; - - -		bzero(se_map, paramc); -		find_backrefs(se_map, rexp, params); -		fewer_side_effects = -			remove_unecessary_side_effects(&rxb->rx, se_map, -										   rx_copy_rexp(&rxb->rx, rexp), -										   params); - -		speed_up_alt(&rxb->rx, rexp, 0); -		speed_up_alt(&rxb->rx, fewer_side_effects, 1); - -		{ -			char *syntax_parens = rxb->syntax_parens; - -			if (syntax_parens == (char *) 0x1) -				rexp = remove_unecessary_side_effects -					(&rxb->rx, se_map, rexp, params); -			else if (syntax_parens) { -				int x; - -				for (x = 0; x < paramc; ++x) -					if (((params[x].se == re_se_lparen) -						 || (params[x].se == re_se_rparen)) -						&& (!syntax_parens[params[x].op1])) -						se_map[x] = 1; -				rexp = remove_unecessary_side_effects -					(&rxb->rx, se_map, rexp, params); -			} -		} - -		/* At least one more optimization would be nice to have here but i ran out  -		 * of time.  The idea would be to delay side effects.   -		 * For examle, `(abc)' is the same thing as `abc()' except that the -		 * left paren is offset by 3 (which we know at compile time). -		 * (In this comment, write that second pattern `abc(:3:)'  -		 * where `(:3:' is a syntactic unit.) -		 * -		 * Trickier:  `(abc|defg)'  is the same as `(abc(:3:|defg(:4:))' -		 * (The paren nesting may be hard to follow -- that's an alternation -		 *  of `abc(:3:' and `defg(:4:' inside (purely syntactic) parens -		 *  followed by the closing paren from the original expression.) -		 * -		 * Neither the expression tree representation nor the the nfa make -		 * this very easy to write. :( -		 */ - -		/* What we compile is different than what the parser returns. -		 * Suppose the parser returns expression R. -		 * Let R' be R with unnecessary register assignments removed  -		 * (see REMOVE_UNECESSARY_SIDE_EFFECTS, above). -		 * -		 * What we will compile is the expression: -		 * -		 *    m{try}R{win}\|s{try}R'{win} -		 * -		 * {try} and {win} denote side effect epsilons (see EXPLORE_FUTURE). -		 *  -		 * When trying a match, we insert an `m' at the beginning of the  -		 * string if the user wants registers to be filled, `s' if not. -		 */ -		new_rexp = -			rx_mk_r_alternate -			(&rxb->rx, -			 rx_mk_r_concat(&rxb->rx, rx_mk_r_cset(&rxb->rx, cs2), rexp), -			 rx_mk_r_concat(&rxb->rx, -							rx_mk_r_cset(&rxb->rx, cs), -							fewer_side_effects)); - -		if (!(new_rexp && cs && cs2)) -			return REG_ESPACE; -		RX_bitset_enjoin(cs2, '\0');	/* prefixed to the rexp used for matching. */ -		RX_bitset_enjoin(cs, '\1');	/* prefixed to the rexp used for searching. */ -		rexp = new_rexp; -	} - -#ifdef RX_DEBUG -	if (rx_debug_compile) { -		fputs("\n...which is compiled as:\n", stdout); -		print_rexp(&rxb->rx, rexp, 2, re_seprint, stdout); -	} -#endif -	{ -		struct rx_nfa_state *start = 0; -		struct rx_nfa_state *end = 0; - -		if (!rx_build_nfa(&rxb->rx, rexp, &start, &end)) -			return REG_ESPACE;	/*  */ -		else { -			void *mem = (void *) rxb->buffer; -			unsigned long size = rxb->allocated; -			int start_id; -			char *perm_mem; -			int iterator_size = paramc * sizeof(params[0]); - -			end->is_final = 1; -			start->is_start = 1; -			rx_name_nfa_states(&rxb->rx); -			start_id = start->id; -#ifdef RX_DEBUG -			if (rx_debug_compile) { -				fputs("...giving the NFA: \n", stdout); -				dbug_rxb = rxb; -				print_nfa(&rxb->rx, rxb->rx.nfa_states, re_seprint, -						  stdout); -			} -#endif -			if (!rx_eclose_nfa(&rxb->rx)) -				return REG_ESPACE; -			else { -				rx_delete_epsilon_transitions(&rxb->rx); - -				/* For compatability reasons, we need to shove the -				 * compiled nfa into one chunk of malloced memory. -				 */ -				rxb->rx.reserved = (sizeof(params[0]) * paramc -									+ -									rx_sizeof_bitset(rxb-> -													 rx.local_cset_size)); -#ifdef RX_DEBUG -				if (rx_debug_compile) { -					dbug_rxb = rxb; -					fputs("...which cooks down (uncompactified) to: \n", -						  stdout); -					print_nfa(&rxb->rx, rxb->rx.nfa_states, re_seprint, -							  stdout); -				} -#endif -				if (!rx_compactify_nfa(&rxb->rx, &mem, &size)) -					return REG_ESPACE; -				rxb->buffer = mem; -				rxb->allocated = size; -				rxb->rx.buffer = mem; -				rxb->rx.allocated = size; -				perm_mem = ((char *) rxb->rx.buffer -							+ rxb->rx.allocated - rxb->rx.reserved); -				rxb->se_params = ((struct re_se_params *) perm_mem); -				bcopy(params, rxb->se_params, iterator_size); -				perm_mem += iterator_size; -				rxb->fastset = (rx_Bitset) perm_mem; -				rxb->start = rx_id_to_nfa_state(&rxb->rx, start_id); -			} -			rx_bitset_null(rxb->rx.local_cset_size, rxb->fastset); -			rxb->can_match_empty = compute_fastset(rxb, orig_rexp); -			rxb->match_regs_on_stack = -				registers_on_stack(rxb, orig_rexp, 0, params); -			rxb->search_regs_on_stack = -				registers_on_stack(rxb, fewer_side_effects, 0, params); -			if (rxb->can_match_empty) -				rx_bitset_universe(rxb->rx.local_cset_size, rxb->fastset); -			rxb->is_anchored = -				is_anchored(orig_rexp, (rx_side_effect) re_se_hat); -			rxb->begbuf_only = -				is_anchored(orig_rexp, (rx_side_effect) re_se_begbuf); -		} -		rx_free_rexp(&rxb->rx, rexp); -		if (params) -			free(params); -#ifdef RX_DEBUG -		if (rx_debug_compile) { -			dbug_rxb = rxb; -			fputs("...which cooks down to: \n", stdout); -			print_nfa(&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout); -		} -#endif -	} -	return REG_NOERROR; -} - - - -/* This table gives an error message for each of the error codes listed -   in regex.h.  Obviously the order here has to be same as there.  */ - -__const__ char *rx_error_msg[] = { 0,	/* REG_NOERROR */ -	"No match",					/* REG_NOMATCH */ -	"Invalid regular expression",	/* REG_BADPAT */ -	"Invalid collation character",	/* REG_ECOLLATE */ -	"Invalid character class name",	/* REG_ECTYPE */ -	"Trailing backslash",		/* REG_EESCAPE */ -	"Invalid back reference",	/* REG_ESUBREG */ -	"Unmatched [ or [^",		/* REG_EBRACK */ -	"Unmatched ( or \\(",		/* REG_EPAREN */ -	"Unmatched \\{",			/* REG_EBRACE */ -	"Invalid content of \\{\\}",	/* REG_BADBR */ -	"Invalid range end",		/* REG_ERANGE */ -	"Memory exhausted",			/* REG_ESPACE */ -	"Invalid preceding regular expression",	/* REG_BADRPT */ -	"Premature end of regular expression",	/* REG_EEND */ -	"Regular expression too big",	/* REG_ESIZE */ -	"Unmatched ) or \\)",		/* REG_ERPAREN */ -}; - - - - -char rx_slowmap[256] = { -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -#ifdef __STDC__ -RX_DECL void rx_blow_up_fastmap(struct re_pattern_buffer *rxb) -#else -RX_DECL void rx_blow_up_fastmap(rxb) -struct re_pattern_buffer *rxb; -#endif -{ -	int x; - -	for (x = 0; x < 256; ++x)	/* &&&& 3.6 % */ -		rxb->fastmap[x] = !!RX_bitset_member(rxb->fastset, x); -	rxb->fastmap_accurate = 1; -} - - - - -#if !defined(REGEX_MALLOC) && !defined(__GNUC__) -#define RE_SEARCH_2_FN	inner_re_search_2 -#define RE_S2_QUAL static -#else -#define RE_SEARCH_2_FN	re_search_2 -#define RE_S2_QUAL -#endif - -struct re_search_2_closure { -	__const__ char *string1; -	int size1; -	__const__ char *string2; -	int size2; -}; - -RE_S2_QUAL int -RE_SEARCH_2_FN(struct re_pattern_buffer *, -			   __const__ char *, -			   int, __const__ char *, int, int, - -			   int, struct re_registers *, int); -int re_rx_search(struct re_pattern_buffer *, int, -				 int, int, int, rx_get_burst_fn, -				 rx_back_check_fn, rx_fetch_char_fn, -				 void *, struct re_registers *, - -				 struct rx_search_state *, struct rx_search_state *); -#if !defined(REGEX_MALLOC) && !defined(__GNUC__) -int re_search_2(struct re_pattern_buffer *, -				__const__ char *, int, -				__const__ char *, int, - -				int, int, struct re_registers *, int); -#endif -int re_search(struct re_pattern_buffer *, - -			  __const__ char *, int, int, int, struct re_registers *); -int re_match_2(struct re_pattern_buffer *, -			   __const__ char *, int, -			   __const__ char *, int, int, struct re_registers *, int); -int re_match(struct re_pattern_buffer *, - -			 __const__ char *, int, int, struct re_registers *); -reg_syntax_t re_set_syntax(reg_syntax_t); -void re_set_registers(struct re_pattern_buffer *, -					  struct re_registers *, unsigned, -					  regoff_t *, regoff_t *); -static int cplx_se_sublist_len(struct rx_se_list *); -static int posix_se_list_order(struct rx *, struct rx_se_list *, - -							   struct rx_se_list *); -__const__ char -*re_compile_pattern(__const__ char *, int, struct re_pattern_buffer *); -int re_compile_fastmap(struct re_pattern_buffer *); -char *re_comp(__const__ char *); -int re_exec(__const__ char *); -int regcomp(regex_t *, __const__ char *, int); -int regexec(__const__ regex_t *, -			__const__ char *, size_t, regmatch_t pmatch[], int); -size_t regerror(int, __const__ regex_t *, char *, size_t); - -#ifdef __STDC__ -static __inline__ enum rx_get_burst_return -re_search_2_get_burst(struct rx_string_position *pos, -					  void *vclosure, int stop) -#else -static __inline__ enum rx_get_burst_return -re_search_2_get_burst(pos, vclosure, stop) -struct rx_string_position *pos; -void *vclosure; -int stop; -#endif -{ -	struct re_search_2_closure *closure; - -	closure = (struct re_search_2_closure *) vclosure; -	if (!closure->string2) { -		int inset; - -		inset = pos->pos - pos->string; -		if ((inset < -1) || (inset > closure->size1)) -			return rx_get_burst_no_more; -		else { -			pos->pos = -				(__const__ unsigned char *) closure->string1 + inset; -			pos->string = (__const__ unsigned char *) closure->string1; -			pos->size = closure->size1; -			pos->end = ((__const__ unsigned char *) -						MIN(closure->string1 + closure->size1, -							closure->string1 + stop)); -			pos->offset = 0; -			return ((pos->pos < pos->end) -					? rx_get_burst_ok : rx_get_burst_no_more); -		} -	} else if (!closure->string1) { -		int inset; - -		inset = pos->pos - pos->string; -		pos->pos = (__const__ unsigned char *) closure->string2 + inset; -		pos->string = (__const__ unsigned char *) closure->string2; -		pos->size = closure->size2; -		pos->end = ((__const__ unsigned char *) -					MIN(closure->string2 + closure->size2, -						closure->string2 + stop)); -		pos->offset = 0; -		return ((pos->pos < pos->end) -				? rx_get_burst_ok : rx_get_burst_no_more); -	} else { -		int inset; - -		inset = pos->pos - pos->string + pos->offset; -		if (inset < closure->size1) { -			pos->pos = -				(__const__ unsigned char *) closure->string1 + inset; -			pos->string = (__const__ unsigned char *) closure->string1; -			pos->size = closure->size1; -			pos->end = ((__const__ unsigned char *) -						MIN(closure->string1 + closure->size1, -							closure->string1 + stop)); -			pos->offset = 0; -			return rx_get_burst_ok; -		} else { -			pos->pos = ((__const__ unsigned char *) -						closure->string2 + inset - closure->size1); -			pos->string = (__const__ unsigned char *) closure->string2; -			pos->size = closure->size2; -			pos->end = ((__const__ unsigned char *) -						MIN(closure->string2 + closure->size2, -							closure->string2 + stop - closure->size1)); -			pos->offset = closure->size1; -			return ((pos->pos < pos->end) -					? rx_get_burst_ok : rx_get_burst_no_more); -		} -	} -} - - -#ifdef __STDC__ -static __inline__ enum rx_back_check_return -re_search_2_back_check(struct rx_string_position *pos, -					   int lparen, int rparen, unsigned char *translate, -					   void *vclosure, int stop) -#else -static __inline__ enum rx_back_check_return -re_search_2_back_check(pos, lparen, rparen, translate, vclosure, stop) -struct rx_string_position *pos; -int lparen; -int rparen; -unsigned char *translate; -void *vclosure; -int stop; -#endif -{ -	struct rx_string_position there; -	struct rx_string_position past; - -	there = *pos; -	there.pos = there.string + lparen - there.offset; -	re_search_2_get_burst(&there, vclosure, stop); - -	past = *pos; -	past.pos = past.string + rparen - there.offset; -	re_search_2_get_burst(&past, vclosure, stop); - -	++pos->pos; -	re_search_2_get_burst(pos, vclosure, stop); - -	while ((there.pos != past.pos) -		   && (pos->pos != pos->end)) -		if (TRANSLATE(*there.pos) != TRANSLATE(*pos->pos)) -			return rx_back_check_fail; -		else { -			++there.pos; -			++pos->pos; -			if (there.pos == there.end) -				re_search_2_get_burst(&there, vclosure, stop); -			if (pos->pos == pos->end) -				re_search_2_get_burst(pos, vclosure, stop); -		} - -	if (there.pos != past.pos) -		return rx_back_check_fail; -	--pos->pos; -	re_search_2_get_burst(pos, vclosure, stop); -	return rx_back_check_pass; -} - -#ifdef __STDC__ -static __inline__ int -re_search_2_fetch_char(struct rx_string_position *pos, int offset, -					   void *app_closure, int stop) -#else -static __inline__ int -re_search_2_fetch_char(pos, offset, app_closure, stop) -struct rx_string_position *pos; -int offset; -void *app_closure; -int stop; -#endif -{ -	struct re_search_2_closure *closure; - -	closure = (struct re_search_2_closure *) app_closure; -	if (offset == 0) { -		if (pos->pos >= pos->string) -			return *pos->pos; -		else { -			if ( -				(pos->string == -				 (__const__ unsigned char *) closure->string2) -				&& (closure->string1) && (closure->size1)) -				return closure->string1[closure->size1 - 1]; -			else -				return 0;		/* sure, why not. */ -		} -	} -	if (pos->pos == pos->end) -		return *closure->string2; -	else -#if 0 -		return pos->pos[1]; -#else -		return pos->pos[offset];	/* FIXME */ -#endif -} - -#ifdef __STDC__ -RE_S2_QUAL int -RE_SEARCH_2_FN(struct re_pattern_buffer *rxb, -			   __const__ char *string1, int size1, -			   __const__ char *string2, int size2, -			   int startpos, int range, -			   struct re_registers *regs, int stop) -#else -RE_S2_QUAL int -RE_SEARCH_2_FN(rxb, -			   string1, size1, string2, size2, startpos, range, regs, stop) -struct re_pattern_buffer *rxb; -__const__ char *string1; -int size1; -__const__ char *string2; -int size2; -int startpos; -int range; -struct re_registers *regs; -int stop; -#endif -{ -	int answer; -	struct re_search_2_closure closure; - -	closure.string1 = string1; -	closure.size1 = size1; -	closure.string2 = string2; -	closure.size2 = size2; -	answer = rx_search(rxb, startpos, range, stop, size1 + size2, -					   re_search_2_get_burst, -					   re_search_2_back_check, -					   re_search_2_fetch_char, -					   (void *) &closure, regs, 0, 0); -	switch (answer) { -	case rx_search_continuation: -		abort(); -	case rx_search_error: -		return -2; -	case rx_search_soft_fail: -	case rx_search_fail: -		return -1; -	default: -		return answer; -	} -} - -/* Export rx_search to callers outside this file.  */ - -#ifdef __STDC__ -int -re_rx_search(struct re_pattern_buffer *rxb, int startpos, int range, -			 int stop, int total_size, rx_get_burst_fn get_burst, -			 rx_back_check_fn back_check, rx_fetch_char_fn fetch_char, -			 void *app_closure, struct re_registers *regs, -			 struct rx_search_state *resume_state, -			 struct rx_search_state *save_state) -#else -int -re_rx_search(rxb, startpos, range, stop, total_size, -			 get_burst, back_check, fetch_char, -			 app_closure, regs, resume_state, save_state) -struct re_pattern_buffer *rxb; -int startpos; -int range; -int stop; -int total_size; -rx_get_burst_fn get_burst; -rx_back_check_fn back_check; -rx_fetch_char_fn fetch_char; -void *app_closure; -struct re_registers *regs; -struct rx_search_state *resume_state; -struct rx_search_state *save_state; -#endif -{ -	return rx_search(rxb, startpos, range, stop, total_size, -					 get_burst, back_check, fetch_char, app_closure, -					 regs, resume_state, save_state); -} - -#if !defined(REGEX_MALLOC) && !defined(__GNUC__) -#ifdef __STDC__ -int -re_search_2(struct re_pattern_buffer *rxb, -			__const__ char *string1, int size1, -			__const__ char *string2, int size2, -			int startpos, int range, struct re_registers *regs, int stop) -#else -int -re_search_2(rxb, string1, size1, string2, size2, startpos, range, regs, -			stop) -struct re_pattern_buffer *rxb; -__const__ char *string1; -int size1; -__const__ char *string2; -int size2; -int startpos; -int range; -struct re_registers *regs; -int stop; -#endif -{ -	int ret; - -	ret = inner_re_search_2(rxb, string1, size1, string2, size2, startpos, -							range, regs, stop); -	alloca(0); -	return ret; -} -#endif - - -/* Like re_search_2, above, but only one string is specified, and - * doesn't let you say where to stop matching. - */ - -#ifdef __STDC__ -int -re_search(struct re_pattern_buffer *rxb, __const__ char *string, -		  int size, int startpos, int range, struct re_registers *regs) -#else -int re_search(rxb, string, size, startpos, range, regs) -struct re_pattern_buffer *rxb; -__const__ char *string; -int size; -int startpos; -int range; -struct re_registers *regs; -#endif -{ -	return re_search_2(rxb, 0, 0, string, size, startpos, range, regs, -					   size); -} - -#ifdef __STDC__ -int -re_match_2(struct re_pattern_buffer *rxb, -		   __const__ char *string1, int size1, -		   __const__ char *string2, int size2, -		   int pos, struct re_registers *regs, int stop) -#else -int re_match_2(rxb, string1, size1, string2, size2, pos, regs, stop) -struct re_pattern_buffer *rxb; -__const__ char *string1; -int size1; -__const__ char *string2; -int size2; -int pos; -struct re_registers *regs; -int stop; -#endif -{ -	struct re_registers some_regs; -	regoff_t start; -	regoff_t end; -	int srch; -	int save = rxb->regs_allocated; -	struct re_registers *regs_to_pass = regs; -	char *old_fastmap = rxb->fastmap; - -	if (!regs) { -		some_regs.start = &start; -		some_regs.end = &end; -		some_regs.num_regs = 1; -		regs_to_pass = &some_regs; -		rxb->regs_allocated = REGS_FIXED; -	} - -	rxb->fastmap = NULL; -	srch = re_search_2(rxb, string1, size1, string2, size2, -					   pos, 1, regs_to_pass, stop); -	rxb->fastmap = old_fastmap; -	if (regs_to_pass != regs) -		rxb->regs_allocated = save; -	if (srch < 0) -		return srch; -	return regs_to_pass->end[0] - regs_to_pass->start[0]; -} - -/* re_match is like re_match_2 except it takes only a single string.  */ - -#ifdef __STDC__ -int -re_match(struct re_pattern_buffer *rxb, -		 __const__ char *string, -		 int size, int pos, struct re_registers *regs) -#else -int re_match(rxb, string, size, pos, regs) -struct re_pattern_buffer *rxb; -__const__ char *string; -int size; -int pos; -struct re_registers *regs; -#endif -{ -	return re_match_2(rxb, string, size, 0, 0, pos, regs, size); -} - - - -/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can -   also be assigned to arbitrarily: each pattern buffer stores its own -   syntax, so it can be changed between regex compilations.  */ -reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS; - - -/* Specify the precise syntax of regexps for compilation.  This provides -   for compatibility for various utilities which historically have -   different, incompatible syntaxes. - -   The argument SYNTAX is a bit mask comprised of the various bits -   defined in regex.h.  We return the old syntax.  */ - -#ifdef __STDC__ -reg_syntax_t re_set_syntax(reg_syntax_t syntax) -#else -reg_syntax_t re_set_syntax(syntax) -reg_syntax_t syntax; -#endif -{ -	reg_syntax_t ret = re_syntax_options; - -	re_syntax_options = syntax; -	return ret; -} - - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and -   ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use -   this memory for recording register information.  STARTS and ENDS -   must be allocated using the malloc library routine, and must each -   be at least NUM_REGS * sizeof (regoff_t) bytes long. - -   If NUM_REGS == 0, then subsequent matches should allocate their own -   register data. - -   Unless this function is called, the first search or match using -   PATTERN_BUFFER will allocate its own register data, without -   freeing the old data.  */ - -#ifdef __STDC__ -void -re_set_registers(struct re_pattern_buffer *bufp, -				 struct re_registers *regs, -				 unsigned num_regs, regoff_t * starts, regoff_t * ends) -#else -void re_set_registers(bufp, regs, num_regs, starts, ends) -struct re_pattern_buffer *bufp; -struct re_registers *regs; -unsigned num_regs; -regoff_t *starts; -regoff_t *ends; -#endif -{ -	if (num_regs) { -		bufp->regs_allocated = REGS_REALLOCATE; -		regs->num_regs = num_regs; -		regs->start = starts; -		regs->end = ends; -	} else { -		bufp->regs_allocated = REGS_UNALLOCATED; -		regs->num_regs = 0; -		regs->start = regs->end = (regoff_t) 0; -	} -} - - - - -#ifdef __STDC__ -static int cplx_se_sublist_len(struct rx_se_list *list) -#else -static int cplx_se_sublist_len(list) -struct rx_se_list *list; -#endif -{ -	int x = 0; - -	while (list) { -		if ((long) list->car >= 0) -			++x; -		list = list->cdr; -	} -	return x; -} - - -/* For rx->se_list_cmp */ - -#ifdef __STDC__ -static int -posix_se_list_order(struct rx *rx, -					struct rx_se_list *a, struct rx_se_list *b) -#else -static int posix_se_list_order(rx, a, b) -struct rx *rx; -struct rx_se_list *a; -struct rx_se_list *b; -#endif -{ -	int al = cplx_se_sublist_len(a); -	int bl = cplx_se_sublist_len(b); - -	if (!al && !bl) -		return ((a == b) -				? 0 : ((a < b) ? -1 : 1)); - -	else if (!al) -		return -1; - -	else if (!bl) -		return 1; - -	else { -		rx_side_effect *av = ((rx_side_effect *) -							  alloca(sizeof(rx_side_effect) * (al + 1))); -		rx_side_effect *bv = ((rx_side_effect *) -							  alloca(sizeof(rx_side_effect) * (bl + 1))); -		struct rx_se_list *ap = a; -		struct rx_se_list *bp = b; -		int ai, bi; - -		for (ai = al - 1; ai >= 0; --ai) { -			while ((long) ap->car < 0) -				ap = ap->cdr; -			av[ai] = ap->car; -			ap = ap->cdr; -		} -		av[al] = (rx_side_effect) - 2; -		for (bi = bl - 1; bi >= 0; --bi) { -			while ((long) bp->car < 0) -				bp = bp->cdr; -			bv[bi] = bp->car; -			bp = bp->cdr; -		} -		bv[bl] = (rx_side_effect) - 1; - -		{ -			int ret; -			int x = 0; - -			while (av[x] == bv[x]) -				++x; -			ret = (((unsigned *) (av[x]) < (unsigned *) (bv[x])) ? -1 : 1); -			return ret; -		} -	} -} - - - - -/* re_compile_pattern is the GNU regular expression compiler: it -   compiles PATTERN (of length SIZE) and puts the result in RXB. -   Returns 0 if the pattern was valid, otherwise an error string. - -   Assumes the `allocated' (and perhaps `buffer') and `translate' fields -   are set in RXB on entry. - -   We call rx_compile to do the actual compilation.  */ - -#ifdef __STDC__ -__const__ char *re_compile_pattern(__const__ char *pattern, -								   int length, -								   struct re_pattern_buffer *rxb) -#else -__const__ char *re_compile_pattern(pattern, length, rxb) -__const__ char *pattern; -int length; -struct re_pattern_buffer *rxb; -#endif -{ -	reg_errcode_t ret; - -	/* GNU code is written to assume at least RE_NREGS registers will be set -	   (and at least one extra will be -1).  */ -	rxb->regs_allocated = REGS_UNALLOCATED; - -	/* And GNU code determines whether or not to get register information -	   by passing null for the REGS argument to re_match, etc., not by -	   setting no_sub.  */ -	rxb->no_sub = 0; - -	rxb->rx.local_cset_size = 256; - -	/* Match anchors at newline.  */ -	rxb->newline_anchor = 1; - -	rxb->re_nsub = 0; -	rxb->start = 0; -	rxb->se_params = 0; -	rxb->rx.nodec = 0; -	rxb->rx.epsnodec = 0; -	rxb->rx.instruction_table = 0; -	rxb->rx.nfa_states = 0; -	rxb->rx.se_list_cmp = posix_se_list_order; -	rxb->rx.start_set = 0; - -	ret = rx_compile(pattern, length, re_syntax_options, rxb); -	alloca(0); -	return rx_error_msg[(int) ret]; -} - - -#ifdef __STDC__ -int re_compile_fastmap(struct re_pattern_buffer *rxb) -#else -int re_compile_fastmap(rxb) -struct re_pattern_buffer *rxb; -#endif -{ -	rx_blow_up_fastmap(rxb); -	return 0; -} - - - - -/* Entry points compatible with 4.2 BSD regex library.  We don't define -   them if this is an Emacs or POSIX compilation.  */ - -#if (!defined (emacs) && !defined (_POSIX_SOURCE)) || defined(USE_BSD_REGEX) - -/* BSD has one and only one pattern buffer.  */ -static struct re_pattern_buffer rx_comp_buf; - -#ifdef __STDC__ -char *re_comp(__const__ char *s) -#else -char *re_comp(s) -__const__ char *s; -#endif -{ -	reg_errcode_t ret; - -	if (!s || (*s == '\0')) { -		if (!rx_comp_buf.buffer) -			return "No previous regular expression"; -		return 0; -	} - -	if (!rx_comp_buf.fastmap) { -		rx_comp_buf.fastmap = (char *) malloc(1 << CHARBITS); -		if (!rx_comp_buf.fastmap) -			return "Memory exhausted"; -	} - -	/* Since `rx_exec' always passes NULL for the `regs' argument, we -	   don't need to initialize the pattern buffer fields which affect it.  */ - -	/* Match anchors at newlines.  */ -	rx_comp_buf.newline_anchor = 1; - -	rx_comp_buf.re_nsub = 0; -	rx_comp_buf.start = 0; -	rx_comp_buf.se_params = 0; -	rx_comp_buf.rx.nodec = 0; -	rx_comp_buf.rx.epsnodec = 0; -	rx_comp_buf.rx.instruction_table = 0; -	rx_comp_buf.rx.nfa_states = 0; -	rx_comp_buf.rx.start = 0; -	rx_comp_buf.rx.se_list_cmp = posix_se_list_order; -	rx_comp_buf.rx.start_set = 0; -	rx_comp_buf.rx.local_cset_size = 256; - -	ret = rx_compile(s, strlen(s), re_syntax_options, &rx_comp_buf); -	alloca(0); - -	/* Yes, we're discarding `__const__' here.  */ -	return (char *) rx_error_msg[(int) ret]; -} - - -#ifdef __STDC__ -int re_exec(__const__ char *s) -#else -int re_exec(s) -__const__ char *s; -#endif -{ -	__const__ int len = strlen(s); - -	return -		0 <= re_search(&rx_comp_buf, s, len, 0, len, -					   (struct re_registers *) 0); -} -#endif							/* not emacs and not _POSIX_SOURCE */ - - - -/* POSIX.2 functions.  Don't define these for Emacs.  */ - -#if !defined(emacs) - -/* regcomp takes a regular expression as a string and compiles it. - -   PREG is a regex_t *.  We do not expect any fields to be initialized, -   since POSIX says we shouldn't.  Thus, we set - -     `buffer' to the compiled pattern; -     `used' to the length of the compiled pattern; -     `syntax' to RE_SYNTAX_POSIX_EXTENDED if the -       REG_EXTENDED bit in CFLAGS is set; otherwise, to -       RE_SYNTAX_POSIX_BASIC; -     `newline_anchor' to REG_NEWLINE being set in CFLAGS; -     `fastmap' and `fastmap_accurate' to zero; -     `re_nsub' to the number of subexpressions in PATTERN. - -   PATTERN is the address of the pattern string. - -   CFLAGS is a series of bits which affect compilation. - -     If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we -     use POSIX basic syntax. - -     If REG_NEWLINE is set, then . and [^...] don't match newline. -     Also, regexec will try a match beginning after every newline. - -     If REG_ICASE is set, then we considers upper- and lowercase -     versions of letters to be equivalent when matching. - -     If REG_NOSUB is set, then when PREG is passed to regexec, that -     routine will report only success or failure, and nothing about the -     registers. - -   It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for -   the return codes and their meanings.)  */ - - -#ifdef __STDC__ -int regcomp(regex_t * preg, __const__ char *pattern, int cflags) -#else -int regcomp(preg, pattern, cflags) -regex_t *preg; -__const__ char *pattern; -int cflags; -#endif -{ -	reg_errcode_t ret; -	unsigned syntax - -		= -		cflags & REG_EXTENDED ? RE_SYNTAX_POSIX_EXTENDED : -		RE_SYNTAX_POSIX_BASIC; - -	/* regex_compile will allocate the space for the compiled pattern.  */ -	preg->buffer = 0; -	preg->allocated = 0; -	preg->fastmap = malloc(256); -	if (!preg->fastmap) -		return REG_ESPACE; -	preg->fastmap_accurate = 0; - -	if (cflags & REG_ICASE) { -		unsigned i; - -		preg->translate = (unsigned char *) malloc(256); -		if (!preg->translate) -			return (int) REG_ESPACE; - -		/* Map uppercase characters to corresponding lowercase ones.  */ -		for (i = 0; i < CHAR_SET_SIZE; i++) -			preg->translate[i] = isupper(i) ? tolower(i) : i; -	} else -		preg->translate = 0; - -	/* If REG_NEWLINE is set, newlines are treated differently.  */ -	if (cflags & REG_NEWLINE) {	/* REG_NEWLINE implies neither . nor [^...] match newline.  */ -		syntax &= ~RE_DOT_NEWLINE; -		syntax |= RE_HAT_LISTS_NOT_NEWLINE; -		/* It also changes the matching behavior.  */ -		preg->newline_anchor = 1; -	} else -		preg->newline_anchor = 0; - -	preg->no_sub = !!(cflags & REG_NOSUB); - -	/* POSIX says a null character in the pattern terminates it, so we -	   can use strlen here in compiling the pattern.  */ -	preg->re_nsub = 0; -	preg->start = 0; -	preg->se_params = 0; -	preg->syntax_parens = 0; -	preg->rx.nodec = 0; -	preg->rx.epsnodec = 0; -	preg->rx.instruction_table = 0; -	preg->rx.nfa_states = 0; -	preg->rx.local_cset_size = 256; -	preg->rx.start = 0; -	preg->rx.se_list_cmp = posix_se_list_order; -	preg->rx.start_set = 0; -	ret = rx_compile(pattern, strlen(pattern), syntax, preg); -	alloca(0); - -	/* POSIX doesn't distinguish between an unmatched open-group and an -	   unmatched close-group: both are REG_EPAREN.  */ -	if (ret == REG_ERPAREN) -		ret = REG_EPAREN; - -	return (int) ret; -} - - -/* regexec searches for a given pattern, specified by PREG, in the -   string STRING. - -   If NMATCH is zero or REG_NOSUB was set in the cflags argument to -   `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at -   least NMATCH elements, and we set them to the offsets of the -   corresponding matched substrings. - -   EFLAGS specifies `execution flags' which affect matching: if -   REG_NOTBOL is set, then ^ does not match at the beginning of the -   string; if REG_NOTEOL is set, then $ does not match at the end. - -   We return 0 if we find a match and REG_NOMATCH if not.  */ - -#ifdef __STDC__ -int -regexec(__const__ regex_t * preg, __const__ char *string, -		size_t nmatch, regmatch_t pmatch[], int eflags) -#else -int regexec(preg, string, nmatch, pmatch, eflags) -__const__ regex_t *preg; -__const__ char *string; -size_t nmatch; -regmatch_t pmatch[]; -int eflags; -#endif -{ -	int ret; -	struct re_registers regs; -	regex_t private_preg; -	int len = strlen(string); -	boolean want_reg_info = !preg->no_sub && nmatch > 0; - -	private_preg = *preg; - -	private_preg.not_bol = !!(eflags & REG_NOTBOL); -	private_preg.not_eol = !!(eflags & REG_NOTEOL); - -	/* The user has told us exactly how many registers to return -	 * information about, via `nmatch'.  We have to pass that on to the -	 * matching routines. -	 */ -	private_preg.regs_allocated = REGS_FIXED; - -	if (want_reg_info) { -		regs.num_regs = nmatch; -		regs.start = ((regoff_t *) malloc((nmatch) * sizeof(regoff_t))); -		regs.end = ((regoff_t *) malloc((nmatch) * sizeof(regoff_t))); -		if (regs.start == 0 || regs.end == 0) -			return (int) REG_NOMATCH; -	} - -	/* Perform the searching operation.  */ -	ret = re_search(&private_preg, string, len, -					/* start: */ 0, -					/* range: */ len, -					want_reg_info ? ®s : (struct re_registers *) 0); - -	/* Copy the register information to the POSIX structure.  */ -	if (want_reg_info) { -		if (ret >= 0) { -			unsigned r; - -			for (r = 0; r < nmatch; r++) { -				pmatch[r].rm_so = regs.start[r]; -				pmatch[r].rm_eo = regs.end[r]; -			} -		} - -		/* If we needed the temporary register info, free the space now.  */ -		free(regs.start); -		free(regs.end); -	} - -	/* We want zero return to mean success, unlike `re_search'.  */ -	return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; -} - - -/* Returns a message corresponding to an error code, ERRCODE, returned -   from either regcomp or regexec.   */ - -#ifdef __STDC__ -size_t -regerror(int errcode, __const__ regex_t * preg, -		 char *errbuf, size_t errbuf_size) -#else -size_t regerror(errcode, preg, errbuf, errbuf_size) -int errcode; -__const__ regex_t *preg; -char *errbuf; -size_t errbuf_size; -#endif -{ -	__const__ char *msg -		= rx_error_msg[errcode] == 0 ? "Success" : rx_error_msg[errcode]; -	size_t msg_size = strlen(msg) + 1;	/* Includes the 0.  */ - -	if (errbuf_size != 0) { -		if (msg_size > errbuf_size) { -			strncpy(errbuf, msg, errbuf_size - 1); -			errbuf[errbuf_size - 1] = 0; -		} else -			strcpy(errbuf, msg); -	} - -	return msg_size; -} - - -/* Free dynamically allocated space used by PREG.  */ - -#ifdef __STDC__ -void regfree(regex_t * preg) -#else -void regfree(preg) -regex_t *preg; -#endif -{ -	if (preg->buffer != 0) -		free(preg->buffer); -	preg->buffer = 0; -	preg->allocated = 0; - -	if (preg->fastmap != 0) -		free(preg->fastmap); -	preg->fastmap = 0; -	preg->fastmap_accurate = 0; - -	if (preg->translate != 0) -		free(preg->translate); -	preg->translate = 0; -} - -#endif							/* not emacs  */  | 
