From 2733ce87f0ffd9a6b950ec49f1d502da7a7e973b Mon Sep 17 00:00:00 2001 From: Waldemar Brodkorb Date: Mon, 17 Apr 2017 17:19:35 +0200 Subject: remove old regex implementation --- extra/Configs/Config.in | 15 - libc/misc/regex/Makefile.in | 3 +- libc/misc/regex/regex_old.c | 8265 ------------------------------------------- 3 files changed, 1 insertion(+), 8282 deletions(-) delete mode 100644 libc/misc/regex/regex_old.c diff --git a/extra/Configs/Config.in b/extra/Configs/Config.in index 974fc7a1a..80100a7f8 100644 --- a/extra/Configs/Config.in +++ b/extra/Configs/Config.in @@ -1835,21 +1835,6 @@ config UCLIBC_HAS_REGEX Of course, if you only statically link, leave this on, since it will only be included in your apps if you use regular expressions. -config UCLIBC_HAS_REGEX_OLD - bool "Use the older (stable) regular expression code" - depends on UCLIBC_HAS_REGEX - default y - help - There are two versions of regex. The older (stable) version has - been in uClibc for quite a long time but hasn't seen too many - updates. It also has some known issues when dealing with uncommon - corner cases and multibyte/unicode strings. However, it is quite - a bit smaller than the newer version. - - If the older version has worked for you and you don't need unicode - support, then stick with the old version (and say Y here). - Otherwise, you should use the new version (and say N here). - config UCLIBC_HAS_FNMATCH bool "fnmatch Support" default y diff --git a/libc/misc/regex/Makefile.in b/libc/misc/regex/Makefile.in index f95a9f7a3..f0c349466 100644 --- a/libc/misc/regex/Makefile.in +++ b/libc/misc/regex/Makefile.in @@ -7,8 +7,7 @@ subdirs += libc/misc/regex -VARIANT := $(if $(UCLIBC_HAS_REGEX_OLD),_old) -CSRC-y := regex$(VARIANT).c +CSRC-y := regex.c MISC_REGEX_DIR := $(top_srcdir)libc/misc/regex MISC_REGEX_OUT := $(top_builddir)libc/misc/regex diff --git a/libc/misc/regex/regex_old.c b/libc/misc/regex/regex_old.c deleted file mode 100644 index 9d8182ead..000000000 --- a/libc/misc/regex/regex_old.c +++ /dev/null @@ -1,8265 +0,0 @@ -/* Extended regular expression matching and search library, - version 0.12. - (Implements POSIX draft P1003.2/D11.2, except for some of the - internationalization features.) - Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - . */ - -/* To exclude some unwanted junk.... */ -#undef emacs -#include -/* unistd.h must be included with _LIBC defined: we need smallint */ -#include -#include -#ifdef __UCLIBC__ -# undef _LIBC -# define _REGEX_RE_COMP -# define STDC_HEADERS -# define __RE_TRANSLATE_TYPE char * -# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE -#endif -#include -#include -#include - -/* AIX requires this to be the first thing in the file. */ -#if defined _AIX && !defined REGEX_MALLOC -# pragma alloca -#endif - -#ifdef HAVE_CONFIG_H -# include -#endif - -#ifndef INSIDE_RECURSION - -# if defined STDC_HEADERS && !defined emacs -# include -# else -/* We need this for `regex.h', and perhaps for the Emacs include files. */ -# include -# endif - - -/* For platform which support the ISO C amendement 1 functionality we - support user defined character classes. */ -# if defined __UCLIBC_HAS_WCHAR__ -# define WIDE_CHAR_SUPPORT 1 -/* Solaris 2.5 has a bug: must be included before . */ -# include -# include -# endif - -# ifdef _LIBC -/* We have to keep the namespace clean. */ - -# define btowc __btowc - -/* We are also using some library internals. */ -# include -# include -# include -# include -# endif - -# ifndef gettext -# define gettext(msgid) (msgid) -# endif - -# ifndef gettext_noop -/* This define is so xgettext can find the internationalizable - strings. */ -# define gettext_noop(String) String -# endif - -/* The `emacs' switch turns on certain matching commands - that make sense only in Emacs. */ -# ifdef emacs - -# include "lisp.h" -# include "buffer.h" -# include "syntax.h" - -# else /* not emacs */ - -/* If we are not linking with Emacs proper, - we can't use the relocating allocator - even if config.h says that we can. */ -# undef REL_ALLOC - -# if defined STDC_HEADERS || defined _LIBC -# include -# else -char *malloc (); -char *realloc (); -# endif - -/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. - If nothing else has been done, use the method below. */ -# ifdef INHIBIT_STRING_HEADER -# if !(defined HAVE_BZERO && defined HAVE_BCOPY) -# if !defined bzero && !defined bcopy -# undef INHIBIT_STRING_HEADER -# endif -# endif -# endif - -/* This is the normal way of making sure we have a bcopy and a bzero. - This is used in most programs--a few other programs avoid this - by defining INHIBIT_STRING_HEADER. */ -# ifndef INHIBIT_STRING_HEADER -# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC -# include -# ifndef bzero -# ifndef _LIBC -# define bzero(s, n) (memset (s, '\0', n), (s)) -# else -# define bzero(s, n) __bzero (s, n) -# endif -# endif -# else -# include -# ifndef memcmp -# define memcmp(s1, s2, n) bcmp (s1, s2, n) -# endif -# ifndef memcpy -# define memcpy(d, s, n) (bcopy (s, d, n), (d)) -# endif -# endif -# endif - -/* Define the syntax stuff for \<, \>, etc. */ - -/* This must be nonzero for the wordchar and notwordchar pattern - commands in re_match_2. */ -# ifndef Sword -# define Sword 1 -# endif - -# ifdef SWITCH_ENUM_BUG -# define SWITCH_ENUM_CAST(x) ((int)(x)) -# else -# define SWITCH_ENUM_CAST(x) (x) -# endif - -# endif /* not emacs */ - -# if defined _LIBC || defined HAVE_LIMITS_H -# include -# endif - -# ifndef MB_LEN_MAX -# define MB_LEN_MAX 1 -# endif - -/* Get the interface, including the syntax bits. */ -# include -# define translate __REPB_PREFIX(translate) - -/* isalpha etc. are used for the character classes. */ -# include - -/* Jim Meyering writes: - - "... Some ctype macros are valid only for character codes that - isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when - using /bin/cc or gcc but without giving an ansi option). So, all - ctype uses should be through macros like ISPRINT... If - STDC_HEADERS is defined, then autoconf has verified that the ctype - macros don't need to be guarded with references to isascii. ... - Defining isascii to 1 should let any compiler worth its salt - eliminate the && through constant folding." - Solaris defines some of these symbols so we must undefine them first. */ - -# undef ISASCII -# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) -# define ISASCII(c) 1 -# else -# define ISASCII(c) isascii(c) -# endif - -# ifdef isblank -# define ISBLANK(c) (ISASCII (c) && isblank (c)) -# else -# define ISBLANK(c) ((c) == ' ' || (c) == '\t') -# endif -# ifdef isgraph -# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) -# else -# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) -# endif - -# undef ISPRINT -# define ISPRINT(c) (ISASCII (c) && isprint (c)) -# define ISDIGIT(c) (ISASCII (c) && isdigit (c)) -# define ISALNUM(c) (ISASCII (c) && isalnum (c)) -# define ISALPHA(c) (ISASCII (c) && isalpha (c)) -# define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) -# define ISLOWER(c) (ISASCII (c) && islower (c)) -# define ISPUNCT(c) (ISASCII (c) && ispunct (c)) -# define ISSPACE(c) (ISASCII (c) && isspace (c)) -# define ISUPPER(c) (ISASCII (c) && isupper (c)) -# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) - -# ifdef _tolower -# define TOLOWER(c) _tolower(c) -# else -# define TOLOWER(c) tolower(c) -# endif - -# ifndef NULL -# define NULL (void *)0 -# endif - -/* We remove any previous definition of `SIGN_EXTEND_CHAR', - since ours (we hope) works properly with all combinations of - machines, compilers, `char' and `unsigned char' argument types. - (Per Bothner suggested the basic approach.) */ -# undef SIGN_EXTEND_CHAR -# if __STDC__ -# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) -# else /* not __STDC__ */ -/* As in Harbison and Steele. */ -# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) -# endif - -# ifndef emacs -/* How many characters in the character set. */ -# define CHAR_SET_SIZE 256 - -# ifdef SYNTAX_TABLE - -extern char *re_syntax_table; - -# else /* not SYNTAX_TABLE */ - -static char re_syntax_table[CHAR_SET_SIZE]; - -static void init_syntax_once (void); - -static void -init_syntax_once (void) -{ - register int c; - static smallint done = 0; - - if (done) - return; - bzero (re_syntax_table, sizeof re_syntax_table); - - for (c = 0; c < CHAR_SET_SIZE; ++c) - if (ISALNUM (c)) - re_syntax_table[c] = Sword; - - re_syntax_table['_'] = Sword; - - done = 1; -} - -# endif /* not SYNTAX_TABLE */ - -# define SYNTAX(c) re_syntax_table[(unsigned char) (c)] - -# endif /* emacs */ - -/* Integer type for pointers. */ -# if !defined _LIBC && !defined __intptr_t_defined -typedef unsigned long int uintptr_t; -# endif - -/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we - use `alloca' instead of `malloc'. This is because using malloc in - re_search* or re_match* could cause memory leaks when C-g is used in - Emacs; also, malloc is slower and causes storage fragmentation. On - the other hand, malloc is more portable, and easier to debug. - - Because we sometimes use alloca, some routines have to be macros, - not functions -- `alloca'-allocated space disappears at the end of the - function it is called in. */ - -# ifdef REGEX_MALLOC - -# define REGEX_ALLOCATE malloc -# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) -# define REGEX_FREE free - -# else /* not REGEX_MALLOC */ - -/* Emacs already defines alloca, sometimes. */ -# ifndef alloca - -/* Make alloca work the best possible way. */ -# ifdef __GNUC__ -# define alloca __builtin_alloca -# else /* not __GNUC__ */ -# if HAVE_ALLOCA_H -# include -# endif /* HAVE_ALLOCA_H */ -# endif /* not __GNUC__ */ - -# endif /* not alloca */ - -# define REGEX_ALLOCATE alloca - -/* Assumes a `char *destination' variable. */ -# define REGEX_REALLOCATE(source, osize, nsize) \ - (destination = (char *) alloca (nsize), \ - memcpy (destination, source, osize)) - -/* No need to do anything to free, after alloca. */ -# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ - -# endif /* not REGEX_MALLOC */ - -/* Define how to allocate the failure stack. */ - -# if defined REL_ALLOC && defined REGEX_MALLOC - -# define REGEX_ALLOCATE_STACK(size) \ - r_alloc (&failure_stack_ptr, (size)) -# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ - r_re_alloc (&failure_stack_ptr, (nsize)) -# define REGEX_FREE_STACK(ptr) \ - r_alloc_free (&failure_stack_ptr) - -# else /* not using relocating allocator */ - -# ifdef REGEX_MALLOC - -# define REGEX_ALLOCATE_STACK malloc -# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) -# define REGEX_FREE_STACK free - -# else /* not REGEX_MALLOC */ - -# define REGEX_ALLOCATE_STACK alloca - -# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ - REGEX_REALLOCATE (source, osize, nsize) -/* No need to explicitly free anything. */ -# define REGEX_FREE_STACK(arg) - -# endif /* not REGEX_MALLOC */ -# endif /* not using relocating allocator */ - - -/* True if `size1' is non-NULL and PTR is pointing anywhere inside - `string1' or just past its end. This works if PTR is NULL, which is - a good thing. */ -# define FIRST_STRING_P(ptr) \ - (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) - -/* (Re)Allocate N items of type T using malloc, or fail. */ -# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) -# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) -# define RETALLOC_IF(addr, n, t) \ - if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) -# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) - -# define BYTEWIDTH 8 /* In bits. */ - -# define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) - -# undef MAX -# undef MIN -# define MAX(a, b) ((a) > (b) ? (a) : (b)) -# define MIN(a, b) ((a) < (b) ? (a) : (b)) - -typedef char boolean; -# define false 0 -# define true 1 - -static reg_errcode_t byte_regex_compile (const char *pattern, size_t size, - reg_syntax_t syntax, - struct re_pattern_buffer *bufp); - -static int byte_re_match_2_internal (struct re_pattern_buffer *bufp, - const char *string1, int size1, - const char *string2, int size2, - int pos, - struct re_registers *regs, - int stop); -static int byte_re_search_2 (struct re_pattern_buffer *bufp, - const char *string1, int size1, - const char *string2, int size2, - int startpos, int range, - struct re_registers *regs, int stop); -static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp); - -#ifdef MBS_SUPPORT -static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size, - reg_syntax_t syntax, - struct re_pattern_buffer *bufp); - - -static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp, - const char *cstring1, int csize1, - const char *cstring2, int csize2, - int pos, - struct re_registers *regs, - int stop, - wchar_t *string1, int size1, - wchar_t *string2, int size2, - int *mbs_offset1, int *mbs_offset2); -static int wcs_re_search_2 (struct re_pattern_buffer *bufp, - const char *string1, int size1, - const char *string2, int size2, - int startpos, int range, - struct re_registers *regs, int stop); -static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp); -#endif - -/* These are the command codes that appear in compiled regular - expressions. Some opcodes are followed by argument bytes. A - command code can specify any interpretation whatsoever for its - arguments. Zero bytes may appear in the compiled regular expression. */ - -typedef enum -{ - no_op = 0, - - /* Succeed right away--no more backtracking. */ - succeed, - - /* Followed by one byte giving n, then by n literal bytes. */ - exactn, - -# ifdef MBS_SUPPORT - /* Same as exactn, but contains binary data. */ - exactn_bin, -# endif - - /* Matches any (more or less) character. */ - anychar, - - /* Matches any one char belonging to specified set. First - following byte is number of bitmap bytes. Then come bytes - for a bitmap saying which chars are in. Bits in each byte - are ordered low-bit-first. A character is in the set if its - bit is 1. A character too large to have a bit in the map is - automatically not in the set. */ - /* ifdef MBS_SUPPORT, following element is length of character - classes, length of collating symbols, length of equivalence - classes, length of character ranges, and length of characters. - Next, character class element, collating symbols elements, - equivalence class elements, range elements, and character - elements follow. - See regex_compile function. */ - charset, - - /* Same parameters as charset, but match any character that is - not one of those specified. */ - charset_not, - - /* Start remembering the text that is matched, for storing in a - register. Followed by one byte with the register number, in - the range 0 to one less than the pattern buffer's re_nsub - field. Then followed by one byte with the number of groups - inner to this one. (This last has to be part of the - start_memory only because we need it in the on_failure_jump - of re_match_2.) */ - start_memory, - - /* Stop remembering the text that is matched and store it in a - memory register. Followed by one byte with the register - number, in the range 0 to one less than `re_nsub' in the - pattern buffer, and one byte with the number of inner groups, - just like `start_memory'. (We need the number of inner - groups here because we don't have any easy way of finding the - corresponding start_memory when we're at a stop_memory.) */ - stop_memory, - - /* Match a duplicate of something remembered. Followed by one - byte containing the register number. */ - duplicate, - - /* Fail unless at beginning of line. */ - begline, - - /* Fail unless at end of line. */ - endline, - - /* Succeeds if at beginning of buffer (if emacs) or at beginning - of string to be matched (if not). */ - begbuf, - - /* Analogously, for end of buffer/string. */ - endbuf, - - /* Followed by two byte relative address to which to jump. */ - jump, - - /* Same as jump, but marks the end of an alternative. */ - jump_past_alt, - - /* Followed by two-byte relative address of place to resume at - in case of failure. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - on_failure_jump, - - /* Like on_failure_jump, but pushes a placeholder instead of the - current string position when executed. */ - on_failure_keep_string_jump, - - /* Throw away latest failure point and then jump to following - two-byte relative address. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - pop_failure_jump, - - /* Change to pop_failure_jump if know won't have to backtrack to - match; otherwise change to jump. This is used to jump - back to the beginning of a repeat. If what follows this jump - clearly won't match what the repeat does, such that we can be - sure that there is no use backtracking out of repetitions - already matched, then we change it to a pop_failure_jump. - Followed by two-byte address. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - maybe_pop_jump, - - /* Jump to following two-byte address, and push a dummy failure - point. This failure point will be thrown away if an attempt - is made to use it for a failure. A `+' construct makes this - before the first repeat. Also used as an intermediary kind - of jump when compiling an alternative. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - dummy_failure_jump, - - /* Push a dummy failure point and continue. Used at the end of - alternatives. */ - push_dummy_failure, - - /* Followed by two-byte relative address and two-byte number n. - After matching N times, jump to the address upon failure. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - succeed_n, - - /* Followed by two-byte relative address, and two-byte number n. - Jump to the address N times, then fail. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - jump_n, - - /* Set the following two-byte relative address to the - subsequent two-byte number. The address *includes* the two - bytes of number. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - set_number_at, - - wordchar, /* Matches any word-constituent character. */ - notwordchar, /* Matches any char that is not a word-constituent. */ - - wordbeg, /* Succeeds if at word beginning. */ - wordend, /* Succeeds if at word end. */ - - wordbound, /* Succeeds if at a word boundary. */ - notwordbound /* Succeeds if not at a word boundary. */ - -# ifdef emacs - ,before_dot, /* Succeeds if before point. */ - at_dot, /* Succeeds if at point. */ - after_dot, /* Succeeds if after point. */ - - /* Matches any character whose syntax is specified. Followed by - a byte which contains a syntax code, e.g., Sword. */ - syntaxspec, - - /* Matches any character whose syntax is not that specified. */ - notsyntaxspec -# endif /* emacs */ -} re_opcode_t; -#endif /* not INSIDE_RECURSION */ - - -#ifdef BYTE -# define CHAR_T char -# define UCHAR_T unsigned char -# define COMPILED_BUFFER_VAR bufp->buffer -# define OFFSET_ADDRESS_SIZE 2 -# define PREFIX(name) byte_##name -# define ARG_PREFIX(name) name -# define PUT_CHAR(c) putchar (c) -#else -# ifdef WCHAR -# define CHAR_T wchar_t -# define UCHAR_T wchar_t -# define COMPILED_BUFFER_VAR wc_buffer -# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */ -# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1) -# define PREFIX(name) wcs_##name -# define ARG_PREFIX(name) c##name -/* Should we use wide stream?? */ -# define PUT_CHAR(c) printf ("%C", c); -# define TRUE 1 -# define FALSE 0 -# else -# ifdef MBS_SUPPORT -# define WCHAR -# define INSIDE_RECURSION -# include "regex_old.c" -# undef INSIDE_RECURSION -# endif -# define BYTE -# define INSIDE_RECURSION -# include "regex_old.c" -# undef INSIDE_RECURSION -# endif -#endif - -#ifdef INSIDE_RECURSION -/* Common operations on the compiled pattern. */ - -/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ -/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ - -# ifdef WCHAR -# define STORE_NUMBER(destination, number) \ - do { \ - *(destination) = (UCHAR_T)(number); \ - } while (0) -# else /* BYTE */ -# define STORE_NUMBER(destination, number) \ - do { \ - (destination)[0] = (number) & 0377; \ - (destination)[1] = (number) >> 8; \ - } while (0) -# endif /* WCHAR */ - -/* Same as STORE_NUMBER, except increment DESTINATION to - the byte after where the number is stored. Therefore, DESTINATION - must be an lvalue. */ -/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ - -# define STORE_NUMBER_AND_INCR(destination, number) \ - do { \ - STORE_NUMBER (destination, number); \ - (destination) += OFFSET_ADDRESS_SIZE; \ - } while (0) - -/* Put into DESTINATION a number stored in two contiguous bytes starting - at SOURCE. */ -/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ - -# ifdef WCHAR -# define EXTRACT_NUMBER(destination, source) \ - do { \ - (destination) = *(source); \ - } while (0) -# else /* BYTE */ -# define EXTRACT_NUMBER(destination, source) \ - do { \ - (destination) = *(source) & 0377; \ - (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ - } while (0) -# endif - -# ifdef DEBUG -static void PREFIX(extract_number) (int *dest, UCHAR_T *source) -{ -# ifdef WCHAR - *dest = *source; -# else /* BYTE */ - int temp = SIGN_EXTEND_CHAR (*(source + 1)); - *dest = *source & 0377; - *dest += temp << 8; -# endif -} - -# ifndef EXTRACT_MACROS /* To debug the macros. */ -# undef EXTRACT_NUMBER -# define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src) -# endif /* not EXTRACT_MACROS */ - -# endif /* DEBUG */ - -/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. - SOURCE must be an lvalue. */ - -# define EXTRACT_NUMBER_AND_INCR(destination, source) \ - do { \ - EXTRACT_NUMBER (destination, source); \ - (source) += OFFSET_ADDRESS_SIZE; \ - } while (0) - -# ifdef DEBUG -static void PREFIX(extract_number_and_incr) (int *destination, - UCHAR_T **source) -{ - PREFIX(extract_number) (destination, *source); - *source += OFFSET_ADDRESS_SIZE; -} - -# ifndef EXTRACT_MACROS -# undef EXTRACT_NUMBER_AND_INCR -# define EXTRACT_NUMBER_AND_INCR(dest, src) \ - PREFIX(extract_number_and_incr) (&dest, &src) -# endif /* not EXTRACT_MACROS */ - -# endif /* DEBUG */ - - - -/* If DEBUG is defined, Regex prints many voluminous messages about what - it is doing (if the variable `debug' is nonzero). If linked with the - main program in `iregex.c', you can enter patterns and strings - interactively. And if linked with the main program in `main.c' and - the other test files, you can run the already-written tests. */ - -# ifdef DEBUG - -# ifndef DEFINED_ONCE - -/* We use standard I/O for debugging. */ -# include - -/* It is useful to test things that ``must'' be true when debugging. */ -# include - -static smallint debug; - -# define DEBUG_STATEMENT(e) e -# define DEBUG_PRINT1(x) if (debug) printf (x) -# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) -# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) -# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) -# endif /* not DEFINED_ONCE */ - -# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ - if (debug) PREFIX(print_partial_compiled_pattern) (s, e) -# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ - if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2) - - -/* Print the fastmap in human-readable form. */ - -# ifndef DEFINED_ONCE -static void -print_fastmap (char *fastmap) -{ - unsigned was_a_range = 0; - unsigned i = 0; - - while (i < (1 << BYTEWIDTH)) - { - if (fastmap[i++]) - { - was_a_range = 0; - putchar (i - 1); - while (i < (1 << BYTEWIDTH) && fastmap[i]) - { - was_a_range = 1; - i++; - } - if (was_a_range) - { - printf ("-"); - putchar (i - 1); - } - } - } - putchar ('\n'); -} -# endif /* not DEFINED_ONCE */ - - -/* Print a compiled pattern string in human-readable form, starting at - the START pointer into it and ending just before the pointer END. */ - -static void -PREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end) -{ - int mcnt, mcnt2; - UCHAR_T *p1; - UCHAR_T *p = start; - UCHAR_T *pend = end; - - if (start == NULL) - { - printf ("(null)\n"); - return; - } - - /* Loop over pattern commands. */ - while (p < pend) - { -# ifdef _LIBC - printf ("%td:\t", p - start); -# else - printf ("%ld:\t", (long int) (p - start)); -# endif - - switch ((re_opcode_t) *p++) - { - case no_op: - printf ("/no_op"); - break; - - case exactn: - mcnt = *p++; - printf ("/exactn/%d", mcnt); - do - { - putchar ('/'); - PUT_CHAR (*p++); - } - while (--mcnt); - break; - -# ifdef MBS_SUPPORT - case exactn_bin: - mcnt = *p++; - printf ("/exactn_bin/%d", mcnt); - do - { - printf("/%lx", (long int) *p++); - } - while (--mcnt); - break; -# endif /* MBS_SUPPORT */ - - case start_memory: - mcnt = *p++; - printf ("/start_memory/%d/%ld", mcnt, (long int) *p++); - break; - - case stop_memory: - mcnt = *p++; - printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++); - break; - - case duplicate: - printf ("/duplicate/%ld", (long int) *p++); - break; - - case anychar: - printf ("/anychar"); - break; - - case charset: - case charset_not: - { -# ifdef WCHAR - int i, length; - wchar_t *workp = p; - printf ("/charset [%s", - (re_opcode_t) *(workp - 1) == charset_not ? "^" : ""); - p += 5; - length = *workp++; /* the length of char_classes */ - for (i=0 ; ibuffer; - - PREFIX(print_partial_compiled_pattern) (buffer, buffer - + bufp->used / sizeof(UCHAR_T)); - printf ("%ld bytes used/%ld bytes allocated.\n", - bufp->used, bufp->allocated); - - if (bufp->fastmap_accurate && bufp->fastmap) - { - printf ("fastmap: "); - print_fastmap (bufp->fastmap); - } - -# ifdef _LIBC - printf ("re_nsub: %Zd\t", bufp->re_nsub); -# else - printf ("re_nsub: %ld\t", (long int) bufp->re_nsub); -# endif - printf ("regs_alloc: %d\t", bufp->regs_allocated); - printf ("can_be_null: %d\t", bufp->can_be_null); - printf ("newline_anchor: %d\n", bufp->newline_anchor); - printf ("no_sub: %d\t", bufp->no_sub); - printf ("not_bol: %d\t", bufp->not_bol); - printf ("not_eol: %d\t", bufp->not_eol); - printf ("syntax: %lx\n", bufp->syntax); - /* Perhaps we should print the translate table? */ -} - - -static void -PREFIX(print_double_string) ( - const CHAR_T *where, - const CHAR_T *string1, - int size1, - const CHAR_T *string2, - int size2) -{ - int this_char; - - if (where == NULL) - printf ("(null)"); - else - { - int cnt; - - if (FIRST_STRING_P (where)) - { - for (this_char = where - string1; this_char < size1; this_char++) - PUT_CHAR (string1[this_char]); - - where = string2; - } - - cnt = 0; - for (this_char = where - string2; this_char < size2; this_char++) - { - PUT_CHAR (string2[this_char]); - if (++cnt > 100) - { - fputs ("...", stdout); - break; - } - } - } -} - -# if 0 /* ndef DEFINED_ONCE */ -void -printchar (int c) -{ - putc (c, stderr); -} -# endif - -# else /* not DEBUG */ - -# ifndef DEFINED_ONCE -# undef assert -# define assert(e) - -# define DEBUG_STATEMENT(e) -# define DEBUG_PRINT1(x) -# define DEBUG_PRINT2(x1, x2) -# define DEBUG_PRINT3(x1, x2, x3) -# define DEBUG_PRINT4(x1, x2, x3, x4) -# endif /* not DEFINED_ONCE */ -# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) -# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) - -# endif /* not DEBUG */ - - - -# ifdef WCHAR -/* This convert a multibyte string to a wide character string. - And write their correspondances to offset_buffer(see below) - and write whether each wchar_t is binary data to is_binary. - This assume invalid multibyte sequences as binary data. - We assume offset_buffer and is_binary is already allocated - enough space. */ - -static size_t -convert_mbs_to_wcs ( - CHAR_T *dest, - const unsigned char* src, - size_t len, /* the length of multibyte string. */ - - /* It hold correspondances between src(char string) and - dest(wchar_t string) for optimization. - e.g. src = "xxxyzz" - dest = {'X', 'Y', 'Z'} - (each "xxx", "y" and "zz" represent one multibyte character - corresponding to 'X', 'Y' and 'Z'.) - offset_buffer = {0, 0+3("xxx"), 0+3+1("y"), 0+3+1+2("zz")} - = {0, 3, 4, 6} - */ - int *offset_buffer, - char *is_binary) -{ - wchar_t *pdest = dest; - const unsigned char *psrc = src; - size_t wc_count = 0; - - mbstate_t mbs; - int i, consumed; - size_t mb_remain = len; - size_t mb_count = 0; - - /* Initialize the conversion state. */ - memset (&mbs, 0, sizeof (mbstate_t)); - - offset_buffer[0] = 0; - for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed, - psrc += consumed) - { -#ifdef _LIBC - consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs); -#else - consumed = mbrtowc (pdest, psrc, mb_remain, &mbs); -#endif - - if (consumed <= 0) - /* failed to convert. maybe src contains binary data. - So we consume 1 byte manualy. */ - { - *pdest = *psrc; - consumed = 1; - is_binary[wc_count] = TRUE; - } - else - is_binary[wc_count] = FALSE; - /* In sjis encoding, we use yen sign as escape character in - place of reverse solidus. So we convert 0x5c(yen sign in - sjis) to not 0xa5(yen sign in UCS2) but 0x5c(reverse - solidus in UCS2). */ - if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5) - *pdest = (wchar_t) *psrc; - - offset_buffer[wc_count + 1] = mb_count += consumed; - } - - /* Fill remain of the buffer with sentinel. */ - for (i = wc_count + 1 ; i <= len ; i++) - offset_buffer[i] = mb_count + 1; - - return wc_count; -} - -# endif /* WCHAR */ - -#else /* not INSIDE_RECURSION */ - -/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can - also be assigned to arbitrarily: each pattern buffer stores its own - syntax, so it can be changed between regex compilations. */ -/* This has no initializer because initialized variables in Emacs - become read-only after dumping. */ -reg_syntax_t re_syntax_options; - - -/* Specify the precise syntax of regexps for compilation. This provides - for compatibility for various utilities which historically have - different, incompatible syntaxes. - - The argument SYNTAX is a bit mask comprised of the various bits - defined in regex.h. We return the old syntax. */ - -reg_syntax_t -re_set_syntax (reg_syntax_t syntax) -{ - reg_syntax_t ret = re_syntax_options; - - re_syntax_options = syntax; -# ifdef DEBUG - if (syntax & RE_DEBUG) - debug = 1; - else if (debug) /* was on but now is not */ - debug = 0; -# endif /* DEBUG */ - return ret; -} - -/* This table gives an error message for each of the error codes listed - in regex.h. Obviously the order here has to be same as there. - POSIX doesn't require that we do anything for REG_NOERROR, - but why not be nice? */ - -static const char re_error_msgid[] = - { -# define REG_NOERROR_IDX 0 - gettext_noop ("Success") /* REG_NOERROR */ - "\0" -# define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") - gettext_noop ("No match") /* REG_NOMATCH */ - "\0" -# define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") - gettext_noop ("Invalid regular expression") /* REG_BADPAT */ - "\0" -# define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") - gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ - "\0" -# define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") - gettext_noop ("Invalid character class name") /* REG_ECTYPE */ - "\0" -# define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") - gettext_noop ("Trailing backslash") /* REG_EESCAPE */ - "\0" -# define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") - gettext_noop ("Invalid back reference") /* REG_ESUBREG */ - "\0" -# define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") - gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ - "\0" -# define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") - gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ - "\0" -# define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") - gettext_noop ("Unmatched \\{") /* REG_EBRACE */ - "\0" -# define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") - gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ - "\0" -# define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") - gettext_noop ("Invalid range end") /* REG_ERANGE */ - "\0" -# define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") - gettext_noop ("Memory exhausted") /* REG_ESPACE */ - "\0" -# define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") - gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ - "\0" -# define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") - gettext_noop ("Premature end of regular expression") /* REG_EEND */ - "\0" -# define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") - gettext_noop ("Regular expression too big") /* REG_ESIZE */ - "\0" -# define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") - gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ - }; - -static const uint16_t re_error_msgid_idx[] = - { - REG_NOERROR_IDX, - REG_NOMATCH_IDX, - REG_BADPAT_IDX, - REG_ECOLLATE_IDX, - REG_ECTYPE_IDX, - REG_EESCAPE_IDX, - REG_ESUBREG_IDX, - REG_EBRACK_IDX, - REG_EPAREN_IDX, - REG_EBRACE_IDX, - REG_BADBR_IDX, - REG_ERANGE_IDX, - REG_ESPACE_IDX, - REG_BADRPT_IDX, - REG_EEND_IDX, - REG_ESIZE_IDX, - REG_ERPAREN_IDX - }; - -#endif /* INSIDE_RECURSION */ - -#ifndef DEFINED_ONCE -/* Avoiding alloca during matching, to placate r_alloc. */ - -/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the - searching and matching functions should not call alloca. On some - systems, alloca is implemented in terms of malloc, and if we're - using the relocating allocator routines, then malloc could cause a - relocation, which might (if the strings being searched are in the - ralloc heap) shift the data out from underneath the regexp - routines. - - Here's another reason to avoid allocation: Emacs - processes input from X in a signal handler; processing X input may - call malloc; if input arrives while a matching routine is calling - malloc, then we're scrod. But Emacs can't just block input while - calling matching routines; then we don't notice interrupts when - they come in. So, Emacs blocks input around all regexp calls - except the matching calls, which it leaves unprotected, in the - faith that they will not malloc. */ - -/* Normally, this is fine. */ -# define MATCH_MAY_ALLOCATE - -/* When using GNU C, we are not REALLY using the C alloca, no matter - what config.h may say. So don't take precautions for it. */ -# ifdef __GNUC__ -# undef C_ALLOCA -# endif - -/* The match routines may not allocate if (1) they would do it with malloc - and (2) it's not safe for them to use malloc. - Note that if REL_ALLOC is defined, matching would not use malloc for the - failure stack, but we would still use it for the register vectors; - so REL_ALLOC should not affect this. */ -# if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs -# undef MATCH_MAY_ALLOCATE -# endif -#endif /* not DEFINED_ONCE */ - -#ifdef INSIDE_RECURSION -/* Failure stack declarations and macros; both re_compile_fastmap and - re_match_2 use a failure stack. These have to be macros because of - REGEX_ALLOCATE_STACK. */ - - -/* Number of failure points for which to initially allocate space - when matching. If this number is exceeded, we allocate more - space, so it is not a hard limit. */ -# ifndef INIT_FAILURE_ALLOC -# define INIT_FAILURE_ALLOC 5 -# endif - -/* Roughly the maximum number of failure points on the stack. Would be - exactly that if always used MAX_FAILURE_ITEMS items each time we failed. - This is a variable only so users of regex can assign to it; we never - change it ourselves. */ - -# ifdef INT_IS_16BIT - -# ifndef DEFINED_ONCE -# if defined MATCH_MAY_ALLOCATE -/* 4400 was enough to cause a crash on Alpha OSF/1, - whose default stack limit is 2mb. */ -long int re_max_failures = 4000; -# else -long int re_max_failures = 2000; -# endif -# endif - -union PREFIX(fail_stack_elt) -{ - UCHAR_T *pointer; - long int integer; -}; - -typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t); - -typedef struct -{ - PREFIX(fail_stack_elt_t) *stack; - unsigned long int size; - unsigned long int avail; /* Offset of next open position. */ -} PREFIX(fail_stack_type); - -# else /* not INT_IS_16BIT */ - -# ifndef DEFINED_ONCE -# if defined MATCH_MAY_ALLOCATE -/* 4400 was enough to cause a crash on Alpha OSF/1, - whose default stack limit is 2mb. */ -int re_max_failures = 4000; -# else -int re_max_failures = 2000; -# endif -# endif - -union PREFIX(fail_stack_elt) -{ - UCHAR_T *pointer; - int integer; -}; - -typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t); - -typedef struct -{ - PREFIX(fail_stack_elt_t) *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} PREFIX(fail_stack_type); - -# endif /* INT_IS_16BIT */ - -# ifndef DEFINED_ONCE -# define FAIL_STACK_EMPTY() (fail_stack.avail == 0) -# define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) -# define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) -# endif - - -/* Define macros to initialize and free the failure stack. - Do `return -2' if the alloc fails. */ - -# ifdef MATCH_MAY_ALLOCATE -# define INIT_FAIL_STACK() \ - do { \ - fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \ - REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \ - \ - if (fail_stack.stack == NULL) \ - return -2; \ - \ - fail_stack.size = INIT_FAILURE_ALLOC; \ - fail_stack.avail = 0; \ - } while (0) - -# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) -# else -# define INIT_FAIL_STACK() \ - do { \ - fail_stack.avail = 0; \ - } while (0) - -# define RESET_FAIL_STACK() -# endif - - -/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. - - Return 1 if succeeds, and 0 if either ran out of memory - allocating space for it or it was already too large. - - REGEX_REALLOCATE_STACK requires `destination' be declared. */ - -# define DOUBLE_FAIL_STACK(fail_stack) \ - ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ - ? 0 \ - : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \ - REGEX_REALLOCATE_STACK ((fail_stack).stack, \ - (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \ - ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\ - \ - (fail_stack).stack == NULL \ - ? 0 \ - : ((fail_stack).size <<= 1, \ - 1))) - - -/* Push pointer POINTER on FAIL_STACK. - Return 1 if was able to do so and 0 if ran out of memory allocating - space to do so. */ -# define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ - ((FAIL_STACK_FULL () \ - && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ - ? 0 \ - : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ - 1)) - -/* Push a pointer value onto the failure stack. - Assumes the variable `fail_stack'. Probably should only - be called from within `PUSH_FAILURE_POINT'. */ -# define PUSH_FAILURE_POINTER(item) \ - fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item) - -/* This pushes an integer-valued item onto the failure stack. - Assumes the variable `fail_stack'. Probably should only - be called from within `PUSH_FAILURE_POINT'. */ -# define PUSH_FAILURE_INT(item) \ - fail_stack.stack[fail_stack.avail++].integer = (item) - -/* Push a fail_stack_elt_t value onto the failure stack. - Assumes the variable `fail_stack'. Probably should only - be called from within `PUSH_FAILURE_POINT'. */ -# define PUSH_FAILURE_ELT(item) \ - fail_stack.stack[fail_stack.avail++] = (item) - -/* These three POP... operations complement the three PUSH... operations. - All assume that `fail_stack' is nonempty. */ -# define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer -# define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer -# define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] - -/* Used to omit pushing failure point id's when we're not debugging. */ -# ifdef DEBUG -# define DEBUG_PUSH PUSH_FAILURE_INT -# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () -# else -# define DEBUG_PUSH(item) -# define DEBUG_POP(item_addr) -# endif - - -/* Push the information about the state we will need - if we ever fail back to it. - - Requires variables fail_stack, regstart, regend, reg_info, and - num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination' - be declared. - - Does `return FAILURE_CODE' if runs out of memory. */ - -# define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ - do { \ - char *destination; \ - /* Must be int, so when we don't save any registers, the arithmetic \ - of 0 + -1 isn't done as unsigned. */ \ - /* Can't be int, since there is not a shred of a guarantee that int \ - is wide enough to hold a value of something to which pointer can \ - be assigned */ \ - active_reg_t this_reg; \ - \ - DEBUG_STATEMENT (failure_id++); \ - DEBUG_STATEMENT (nfailure_points_pushed++); \ - DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ - DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ - DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ - \ - DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \ - DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ - \ - /* Ensure we have enough space allocated for what we will push. */ \ - while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ - { \ - if (!DOUBLE_FAIL_STACK (fail_stack)) \ - return failure_code; \ - \ - DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ - (fail_stack).size); \ - DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ - } \ - \ - /* Push the info, starting with the registers. */ \ - DEBUG_PRINT1 ("\n"); \ - \ - if (1) \ - for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ - this_reg++) \ - { \ - DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \ - DEBUG_STATEMENT (num_regs_pushed++); \ - \ - DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ - PUSH_FAILURE_POINTER (regstart[this_reg]); \ - \ - DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ - PUSH_FAILURE_POINTER (regend[this_reg]); \ - \ - DEBUG_PRINT2 (" info: %p\n ", \ - reg_info[this_reg].word.pointer); \ - DEBUG_PRINT2 (" match_null=%d", \ - REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ - DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ - DEBUG_PRINT2 (" matched_something=%d", \ - MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT2 (" ever_matched=%d", \ - EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT1 ("\n"); \ - PUSH_FAILURE_ELT (reg_info[this_reg].word); \ - } \ - \ - DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\ - PUSH_FAILURE_INT (lowest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\ - PUSH_FAILURE_INT (highest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ - PUSH_FAILURE_POINTER (pattern_place); \ - \ - DEBUG_PRINT2 (" Pushing string %p: `", string_place); \ - DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ - size2); \ - DEBUG_PRINT1 ("'\n"); \ - PUSH_FAILURE_POINTER (string_place); \ - \ - DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ - DEBUG_PUSH (failure_id); \ - } while (0) - -# ifndef DEFINED_ONCE -/* This is the number of items that are pushed and popped on the stack - for each register. */ -# define NUM_REG_ITEMS 3 - -/* Individual items aside from the registers. */ -# ifdef DEBUG -# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ -# else -# define NUM_NONREG_ITEMS 4 -# endif - -/* We push at most this many items on the stack. */ -/* We used to use (num_regs - 1), which is the number of registers - this regexp will save; but that was changed to 5 - to avoid stack overflow for a regexp with lots of parens. */ -# define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) - -/* We actually push this many items. */ -# define NUM_FAILURE_ITEMS \ - (((0 \ - ? 0 : highest_active_reg - lowest_active_reg + 1) \ - * NUM_REG_ITEMS) \ - + NUM_NONREG_ITEMS) - -/* How many items can still be added to the stack without overflowing it. */ -# define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) -# endif /* not DEFINED_ONCE */ - - -/* Pops what PUSH_FAIL_STACK pushes. - - We restore into the parameters, all of which should be lvalues: - STR -- the saved data position. - PAT -- the saved pattern position. - LOW_REG, HIGH_REG -- the highest and lowest active registers. - REGSTART, REGEND -- arrays of string positions. - REG_INFO -- array of information about each subexpression. - - Also assumes the variables `fail_stack' and (if debugging), `bufp', - `pend', `string1', `size1', `string2', and `size2'. */ -# define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ -{ \ - DEBUG_STATEMENT (unsigned failure_id;) \ - active_reg_t this_reg; \ - const UCHAR_T *string_temp; \ - \ - assert (!FAIL_STACK_EMPTY ()); \ - \ - /* Remove failure points and point to how many regs pushed. */ \ - DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ - DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ - DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ - \ - assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ - \ - DEBUG_POP (&failure_id); \ - DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ - \ - /* If the saved string location is NULL, it came from an \ - on_failure_keep_string_jump opcode, and we want to throw away the \ - saved NULL, thus retaining our current position in the string. */ \ - string_temp = POP_FAILURE_POINTER (); \ - if (string_temp != NULL) \ - str = (const CHAR_T *) string_temp; \ - \ - DEBUG_PRINT2 (" Popping string %p: `", str); \ - DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ - DEBUG_PRINT1 ("'\n"); \ - \ - pat = (UCHAR_T *) POP_FAILURE_POINTER (); \ - DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ - \ - /* Restore register info. */ \ - high_reg = (active_reg_t) POP_FAILURE_INT (); \ - DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \ - \ - low_reg = (active_reg_t) POP_FAILURE_INT (); \ - DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \ - \ - if (1) \ - for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ - { \ - DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \ - \ - reg_info[this_reg].word = POP_FAILURE_ELT (); \ - DEBUG_PRINT2 (" info: %p\n", \ - reg_info[this_reg].word.pointer); \ - \ - regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \ - DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ - \ - regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \ - DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ - } \ - else \ - { \ - for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ - { \ - reg_info[this_reg].word.integer = 0; \ - regend[this_reg] = 0; \ - regstart[this_reg] = 0; \ - } \ - highest_active_reg = high_reg; \ - } \ - \ - set_regs_matched_done = 0; \ - DEBUG_STATEMENT (nfailure_points_popped++); \ -} /* POP_FAILURE_POINT */ - -/* Structure for per-register (a.k.a. per-group) information. - Other register information, such as the - starting and ending positions (which are addresses), and the list of - inner groups (which is a bits list) are maintained in separate - variables. - - We are making a (strictly speaking) nonportable assumption here: that - the compiler will pack our bit fields into something that fits into - the type of `word', i.e., is something that fits into one item on the - failure stack. */ - - -/* Declarations and macros for re_match_2. */ - -typedef union -{ - PREFIX(fail_stack_elt_t) word; - struct - { - /* This field is one if this group can match the empty string, - zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ -# define MATCH_NULL_UNSET_VALUE 3 - unsigned match_null_string_p : 2; - unsigned is_active : 1; - unsigned matched_something : 1; - unsigned ever_matched_something : 1; - } bits; -} PREFIX(register_info_type); - -# ifndef DEFINED_ONCE -# define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) -# define IS_ACTIVE(R) ((R).bits.is_active) -# define MATCHED_SOMETHING(R) ((R).bits.matched_something) -# define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) - - -/* Call this when have matched a real character; it sets `matched' flags - for the subexpressions which we are currently inside. Also records - that those subexprs have matched. */ -# define SET_REGS_MATCHED() \ - do \ - { \ - if (!set_regs_matched_done) \ - { \ - active_reg_t r; \ - set_regs_matched_done = 1; \ - for (r = lowest_active_reg; r <= highest_active_reg; r++) \ - { \ - MATCHED_SOMETHING (reg_info[r]) \ - = EVER_MATCHED_SOMETHING (reg_info[r]) \ - = 1; \ - } \ - } \ - } \ - while (0) -# endif /* not DEFINED_ONCE */ - -/* Registers are set to a sentinel when they haven't yet matched. */ -static CHAR_T PREFIX(reg_unset_dummy); -# define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy)) -# define REG_UNSET(e) ((e) == REG_UNSET_VALUE) - -/* Subroutine declarations and macros for regex_compile. */ -static void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg); -static void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, - int arg1, int arg2); -static void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, - int arg, UCHAR_T *end); -static void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, - int arg1, int arg2, UCHAR_T *end); -static boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern, - const CHAR_T *p, - reg_syntax_t syntax); -static boolean PREFIX(at_endline_loc_p) (const CHAR_T *p, - const CHAR_T *pend, - reg_syntax_t syntax); -# ifdef WCHAR -static reg_errcode_t wcs_compile_range (CHAR_T range_start, - const CHAR_T **p_ptr, - const CHAR_T *pend, - __RE_TRANSLATE_TYPE translate, - reg_syntax_t syntax, - UCHAR_T *b, - CHAR_T *char_set); -static void insert_space (int num, CHAR_T *loc, CHAR_T *end); -# else /* BYTE */ -static reg_errcode_t byte_compile_range (unsigned int range_start, - const char **p_ptr, - const char *pend, - __RE_TRANSLATE_TYPE translate, - reg_syntax_t syntax, - unsigned char *b); -# endif /* WCHAR */ - -/* Fetch the next character in the uncompiled pattern---translating it - if necessary. Also cast from a signed character in the constant - string passed to us by the user to an unsigned char that we can use - as an array index (in, e.g., `translate'). */ -/* ifdef MBS_SUPPORT, we translate only if character <= 0xff, - because it is impossible to allocate 4GB array for some encodings - which have 4 byte character_set like UCS4. */ -# ifndef PATFETCH -# ifdef WCHAR -# define PATFETCH(c) \ - do {if (p == pend) return REG_EEND; \ - c = (UCHAR_T) *p++; \ - if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \ - } while (0) -# else /* BYTE */ -# define PATFETCH(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - if (translate) c = (unsigned char) translate[c]; \ - } while (0) -# endif /* WCHAR */ -# endif - -/* Fetch the next character in the uncompiled pattern, with no - translation. */ -# define PATFETCH_RAW(c) \ - do {if (p == pend) return REG_EEND; \ - c = (UCHAR_T) *p++; \ - } while (0) - -/* Go backwards one character in the pattern. */ -# define PATUNFETCH p-- - - -/* If `translate' is non-null, return translate[D], else just D. We - cast the subscript to translate because some data is declared as - `char *', to avoid warnings when a string constant is passed. But - when we use a character as a subscript we must make it unsigned. */ -/* ifdef MBS_SUPPORT, we translate only if character <= 0xff, - because it is impossible to allocate 4GB array for some encodings - which have 4 byte character_set like UCS4. */ - -# ifndef TRANSLATE -# ifdef WCHAR -# define TRANSLATE(d) \ - ((translate && ((UCHAR_T) (d)) <= 0xff) \ - ? (char) translate[(unsigned char) (d)] : (d)) -# else /* BYTE */ -# define TRANSLATE(d) \ - (translate ? (char) translate[(unsigned char) (d)] : (d)) -# endif /* WCHAR */ -# endif - - -/* Macros for outputting the compiled pattern into `buffer'. */ - -/* If the buffer isn't allocated when it comes in, use this. */ -# define INIT_BUF_SIZE (32 * sizeof(UCHAR_T)) - -/* Make sure we have at least N more bytes of space in buffer. */ -# ifdef WCHAR -# define GET_BUFFER_SPACE(n) \ - while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \ - + (n)*sizeof(CHAR_T)) > bufp->allocated) \ - EXTEND_BUFFER () -# else /* BYTE */ -# define GET_BUFFER_SPACE(n) \ - while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ - EXTEND_BUFFER () -# endif /* WCHAR */ - -/* Make sure we have one more byte of buffer space and then add C to it. */ -# define BUF_PUSH(c) \ - do { \ - GET_BUFFER_SPACE (1); \ - *b++ = (UCHAR_T) (c); \ - } while (0) - - -/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ -# define BUF_PUSH_2(c1, c2) \ - do { \ - GET_BUFFER_SPACE (2); \ - *b++ = (UCHAR_T) (c1); \ - *b++ = (UCHAR_T) (c2); \ - } while (0) - - -/* As with BUF_PUSH_2, except for three bytes. */ -# define BUF_PUSH_3(c1, c2, c3) \ - do { \ - GET_BUFFER_SPACE (3); \ - *b++ = (UCHAR_T) (c1); \ - *b++ = (UCHAR_T) (c2); \ - *b++ = (UCHAR_T) (c3); \ - } while (0) - -/* St