diff options
author | Eric Andersen <andersen@codepoet.org> | 2001-11-24 13:20:18 +0000 |
---|---|---|
committer | Eric Andersen <andersen@codepoet.org> | 2001-11-24 13:20:18 +0000 |
commit | 322e234bd5ae2b05566491a6f1481bee8b1731c9 (patch) | |
tree | 17adf256c552bd3b43b28ca0e1b16d59ef52bee7 /libc/misc | |
parent | dfb5fe2dee1b64c57c3df7fc4c0ecb7ad0450730 (diff) |
Sync regex with glibc 2.2.4. I need to add an option to select
a minamalist replacement. Coming soon...
-Erik
Diffstat (limited to 'libc/misc')
-rw-r--r-- | libc/misc/regex/regex.c | 11242 |
1 files changed, 6942 insertions, 4300 deletions
diff --git a/libc/misc/regex/regex.c b/libc/misc/regex/regex.c index 350535fa1..81298314b 100644 --- a/libc/misc/regex/regex.c +++ b/libc/misc/regex/regex.c @@ -2,35 +2,41 @@ version 0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the internationalization features.) - Copyright (C) 1993-1999, 2000 Free Software Foundation, Inc. + Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ /* To exclude some unwanted junk.... */ #undef _LIBC +#undef emacs #define _REGEX_RE_COMP +#include <features.h> +#include <stdlib.h> +#include <string.h> +#define STDC_HEADERS /* AIX requires this to be the first thing in the file. */ #if defined _AIX && !defined REGEX_MALLOC -#pragma alloca + #pragma alloca #endif #undef _GNU_SOURCE #define _GNU_SOURCE -#define STDC_HEADERS #ifdef HAVE_CONFIG_H # include <config.h> @@ -41,141 +47,161 @@ # define PARAMS(args) args # else # define PARAMS(args) () -# endif /* GCC. */ -#endif /* Not PARAMS. */ +# endif /* GCC. */ +#endif /* Not PARAMS. */ -#if defined STDC_HEADERS && !defined emacs -# include <stddef.h> -#else +#ifndef INSIDE_RECURSION + +# if defined STDC_HEADERS && !defined emacs +# include <stddef.h> +# else /* We need this for `regex.h', and perhaps for the Emacs include files. */ -# include <sys/types.h> -#endif +# include <sys/types.h> +# endif -#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) +# define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) /* For platform which support the ISO C amendement 1 functionality we support user defined character classes. */ -#if defined _LIBC || WIDE_CHAR_SUPPORT +# if defined _LIBC || WIDE_CHAR_SUPPORT /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ -# include <wchar.h> -# include <wctype.h> -#endif +# include <wchar.h> +# include <wctype.h> +# endif -#ifdef _LIBC +# ifdef _LIBC /* We have to keep the namespace clean. */ -# define regfree(preg) __regfree (preg) -# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) -# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) -# define regerror(errcode, preg, errbuf, errbuf_size) \ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ __regerror(errcode, preg, errbuf, errbuf_size) -# define re_set_registers(bu, re, nu, st, en) \ +# define re_set_registers(bu, re, nu, st, en) \ __re_set_registers (bu, re, nu, st, en) -# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) -# define re_match(bufp, string, size, pos, regs) \ +# define re_match(bufp, string, size, pos, regs) \ __re_match (bufp, string, size, pos, regs) -# define re_search(bufp, string, size, startpos, range, regs) \ +# define re_search(bufp, string, size, startpos, range, regs) \ __re_search (bufp, string, size, startpos, range, regs) -# define re_compile_pattern(pattern, length, bufp) \ +# define re_compile_pattern(pattern, length, bufp) \ __re_compile_pattern (pattern, length, bufp) -# define re_set_syntax(syntax) __re_set_syntax (syntax) -# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) -# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) -#define btowc __btowc -#endif +# define btowc __btowc + +/* We are also using some library internals. */ +# include <locale/localeinfo.h> +# include <locale/elem-hash.h> +# include <langinfo.h> +# include <locale/coll-lookup.h> +# endif /* This is for other GNU distributions with internationalized messages. */ -#if HAVE_LIBINTL_H || defined _LIBC -# include <libintl.h> -#else -# define gettext(msgid) (msgid) -#endif +# if HAVE_LIBINTL_H || defined _LIBC +# include <libintl.h> +# ifdef _LIBC +# undef gettext +# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES) +# endif +# else +# define gettext(msgid) (msgid) +# endif -#ifndef gettext_noop +# ifndef gettext_noop /* This define is so xgettext can find the internationalizable strings. */ -# define gettext_noop(String) String -#endif +# define gettext_noop(String) String +# endif /* The `emacs' switch turns on certain matching commands that make sense only in Emacs. */ -#ifdef emacs +# ifdef emacs -# include "lisp.h" -# include "buffer.h" -# include "syntax.h" +# include "lisp.h" +# include "buffer.h" +# include "syntax.h" -#else /* not emacs */ +# else /* not emacs */ /* If we are not linking with Emacs proper, we can't use the relocating allocator even if config.h says that we can. */ -# undef REL_ALLOC +# undef REL_ALLOC -# if defined STDC_HEADERS || defined _LIBC -# include <stdlib.h> -# else -char *malloc(); -char *realloc(); -# endif +# if defined STDC_HEADERS || defined _LIBC +# include <stdlib.h> +# else +char *malloc (); +char *realloc (); +# endif /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. If nothing else has been done, use the method below. */ -# ifdef INHIBIT_STRING_HEADER -# if !(defined HAVE_BZERO && defined HAVE_BCOPY) -# if !defined bzero && !defined bcopy -# undef INHIBIT_STRING_HEADER +# ifdef INHIBIT_STRING_HEADER +# if !(defined HAVE_BZERO && defined HAVE_BCOPY) +# if !defined bzero && !defined bcopy +# undef INHIBIT_STRING_HEADER +# endif # endif # endif -# endif /* This is the normal way of making sure we have a bcopy and a bzero. This is used in most programs--a few other programs avoid this by defining INHIBIT_STRING_HEADER. */ -# ifndef INHIBIT_STRING_HEADER -# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC -# include <string.h> -# ifndef bzero -# ifndef _LIBC -# define bzero(s, n) (memset (s, '\0', n), (s)) -# else -# define bzero(s, n) __bzero (s, n) +# ifndef INHIBIT_STRING_HEADER +# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC +# include <string.h> +# ifndef bzero +# ifndef _LIBC +# define bzero(s, n) (memset (s, '\0', n), (s)) +# else +# define bzero(s, n) __bzero (s, n) +# endif +# endif +# else +# include <strings.h> +# ifndef memcmp +# define memcmp(s1, s2, n) bcmp (s1, s2, n) +# endif +# ifndef memcpy +# define memcpy(d, s, n) (bcopy (s, d, n), (d)) # endif -# endif -# else -# include <strings.h> -# ifndef memcmp -# define memcmp(s1, s2, n) bcmp (s1, s2, n) -# endif -# ifndef memcpy -# define memcpy(d, s, n) (bcopy (s, d, n), (d)) # endif # endif -# endif /* Define the syntax stuff for \<, \>, etc. */ /* This must be nonzero for the wordchar and notwordchar pattern commands in re_match_2. */ -# ifndef Sword -# define Sword 1 -# endif +# ifndef Sword +# define Sword 1 +# endif -# ifdef SWITCH_ENUM_BUG -# define SWITCH_ENUM_CAST(x) ((int)(x)) -# else -# define SWITCH_ENUM_CAST(x) (x) +# ifdef SWITCH_ENUM_BUG +# define SWITCH_ENUM_CAST(x) ((int)(x)) +# else +# define SWITCH_ENUM_CAST(x) (x) +# endif + +# endif /* not emacs */ + +# if defined _LIBC || HAVE_LIMITS_H +# include <limits.h> # endif -#endif /* not emacs */ +# ifndef MB_LEN_MAX +# define MB_LEN_MAX 1 +# endif /* Get the interface, including the syntax bits. */ -#include <regex.h> +# include <regex.h> /* isalpha etc. are used for the character classes. */ -#include <ctype.h> +# include <ctype.h> /* Jim Meyering writes: @@ -189,94 +215,102 @@ char *realloc(); eliminate the && through constant folding." Solaris defines some of these symbols so we must undefine them first. */ -#undef ISASCII -#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) -# define ISASCII(c) 1 -#else -# define ISASCII(c) isascii(c) -#endif +# undef ISASCII +# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) +# define ISASCII(c) 1 +# else +# define ISASCII(c) isascii(c) +# endif -#ifdef isblank -# define ISBLANK(c) (ISASCII (c) && isblank (c)) -#else -# define ISBLANK(c) ((c) == ' ' || (c) == '\t') -#endif -#ifdef isgraph -# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) -#else -# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) -#endif +# ifdef isblank +# define ISBLANK(c) (ISASCII (c) && isblank (c)) +# else +# define ISBLANK(c) ((c) == ' ' || (c) == '\t') +# endif +# ifdef isgraph +# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) +# else +# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) +# endif -#undef ISPRINT -#define ISPRINT(c) (ISASCII (c) && isprint (c)) -#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) -#define ISALNUM(c) (ISASCII (c) && isalnum (c)) -#define ISALPHA(c) (ISASCII (c) && isalpha (c)) -#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) -#define ISLOWER(c) (ISASCII (c) && islower (c)) -#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) -#define ISSPACE(c) (ISASCII (c) && isspace (c)) -#define ISUPPER(c) (ISASCII (c) && isupper (c)) -#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) - -#ifdef _tolower -# define TOLOWER(c) _tolower(c) -#else -# define TOLOWER(c) tolower(c) -#endif +# undef ISPRINT +# define ISPRINT(c) (ISASCII (c) && isprint (c)) +# define ISDIGIT(c) (ISASCII (c) && isdigit (c)) +# define ISALNUM(c) (ISASCII (c) && isalnum (c)) +# define ISALPHA(c) (ISASCII (c) && isalpha (c)) +# define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) +# define ISLOWER(c) (ISASCII (c) && islower (c)) +# define ISPUNCT(c) (ISASCII (c) && ispunct (c)) +# define ISSPACE(c) (ISASCII (c) && isspace (c)) +# define ISUPPER(c) (ISASCII (c) && isupper (c)) +# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) + +# ifdef _tolower +# define TOLOWER(c) _tolower(c) +# else +# define TOLOWER(c) tolower(c) +# endif -#ifndef NULL -# define NULL (void *)0 -#endif +# ifndef NULL +# define NULL (void *)0 +# endif /* We remove any previous definition of `SIGN_EXTEND_CHAR', since ours (we hope) works properly with all combinations of machines, compilers, `char' and `unsigned char' argument types. (Per Bothner suggested the basic approach.) */ -#undef SIGN_EXTEND_CHAR -#if __STDC__ -# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) -#else /* not __STDC__ */ +# undef SIGN_EXTEND_CHAR +# if __STDC__ +# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) +# else /* not __STDC__ */ /* As in Harbison and Steele. */ -# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) -#endif +# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) +# endif -#ifndef emacs +# ifndef emacs /* How many characters in the character set. */ -# define CHAR_SET_SIZE 256 +# define CHAR_SET_SIZE 256 -# ifdef SYNTAX_TABLE +# ifdef SYNTAX_TABLE extern char *re_syntax_table; -# else /* not SYNTAX_TABLE */ +# else /* not SYNTAX_TABLE */ static char re_syntax_table[CHAR_SET_SIZE]; -static void init_syntax_once() +static void init_syntax_once PARAMS ((void)); + +static void +init_syntax_once () { - register int c; - static int done = 0; + register int c; + static int done = 0; - if (done) - return; - bzero(re_syntax_table, sizeof re_syntax_table); + if (done) + return; + bzero (re_syntax_table, sizeof re_syntax_table); - for (c = 0; c < CHAR_SET_SIZE; ++c) - if (ISALNUM(c)) - re_syntax_table[c] = Sword; + for (c = 0; c < CHAR_SET_SIZE; ++c) + if (ISALNUM (c)) + re_syntax_table[c] = Sword; - re_syntax_table['_'] = Sword; + re_syntax_table['_'] = Sword; - done = 1; + done = 1; } -# endif /* not SYNTAX_TABLE */ +# endif /* not SYNTAX_TABLE */ -# define SYNTAX(c) re_syntax_table[((c) & 0xFF)] +# define SYNTAX(c) re_syntax_table[(unsigned char) (c)] -#endif /* emacs */ +# endif /* emacs */ +/* Integer type for pointers. */ +# if !defined _LIBC +typedef unsigned long int uintptr_t; +# endif + /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we use `alloca' instead of `malloc'. This is because using malloc in re_search* or re_match* could cause memory leaks when C-g is used in @@ -287,674 +321,1016 @@ static void init_syntax_once() not functions -- `alloca'-allocated space disappears at the end of the function it is called in. */ -#ifdef REGEX_MALLOC +# ifdef REGEX_MALLOC -# define REGEX_ALLOCATE malloc -# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) -# define REGEX_FREE free +# define REGEX_ALLOCATE malloc +# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) +# define REGEX_FREE free -#else /* not REGEX_MALLOC */ +# else /* not REGEX_MALLOC */ /* Emacs already defines alloca, sometimes. */ -# ifndef alloca +# ifndef alloca /* Make alloca work the best possible way. */ -# ifdef __GNUC__ -# define alloca __builtin_alloca -# else /* not __GNUC__ */ -# if HAVE_ALLOCA_H -# include <alloca.h> -# endif /* HAVE_ALLOCA_H */ -# endif /* not __GNUC__ */ +# ifdef __GNUC__ +# define alloca __builtin_alloca +# else /* not __GNUC__ */ +# if HAVE_ALLOCA_H +# include <alloca.h> +# endif /* HAVE_ALLOCA_H */ +# endif /* not __GNUC__ */ -# endif /* not alloca */ +# endif /* not alloca */ -# define REGEX_ALLOCATE alloca +# define REGEX_ALLOCATE alloca /* Assumes a `char *destination' variable. */ -# define REGEX_REALLOCATE(source, osize, nsize) \ +# define REGEX_REALLOCATE(source, osize, nsize) \ (destination = (char *) alloca (nsize), \ memcpy (destination, source, osize)) /* No need to do anything to free, after alloca. */ -# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ +# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ -#endif /* not REGEX_MALLOC */ +# endif /* not REGEX_MALLOC */ /* Define how to allocate the failure stack. */ -#if defined REL_ALLOC && defined REGEX_MALLOC +# if defined REL_ALLOC && defined REGEX_MALLOC -# define REGEX_ALLOCATE_STACK(size) \ +# define REGEX_ALLOCATE_STACK(size) \ r_alloc (&failure_stack_ptr, (size)) -# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ +# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ r_re_alloc (&failure_stack_ptr, (nsize)) -# define REGEX_FREE_STACK(ptr) \ +# define REGEX_FREE_STACK(ptr) \ r_alloc_free (&failure_stack_ptr) -#else /* not using relocating allocator */ +# else /* not using relocating allocator */ -# ifdef REGEX_MALLOC +# ifdef REGEX_MALLOC -# define REGEX_ALLOCATE_STACK malloc -# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) -# define REGEX_FREE_STACK free +# define REGEX_ALLOCATE_STACK malloc +# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) +# define REGEX_FREE_STACK free -# else /* not REGEX_MALLOC */ +# else /* not REGEX_MALLOC */ -# define REGEX_ALLOCATE_STACK alloca +# define REGEX_ALLOCATE_STACK alloca -# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ +# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ REGEX_REALLOCATE (source, osize, nsize) /* No need to explicitly free anything. */ -# define REGEX_FREE_STACK(arg) +# define REGEX_FREE_STACK(arg) -# endif /* not REGEX_MALLOC */ -#endif /* not using relocating allocator */ +# endif /* not REGEX_MALLOC */ +# endif /* not using relocating allocator */ /* True if `size1' is non-NULL and PTR is pointing anywhere inside `string1' or just past its end. This works if PTR is NULL, which is a good thing. */ -#define FIRST_STRING_P(ptr) \ +# define FIRST_STRING_P(ptr) \ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) /* (Re)Allocate N items of type T using malloc, or fail. */ -#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) -#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) -#define RETALLOC_IF(addr, n, t) \ +# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) +# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) +# define RETALLOC_IF(addr, n, t) \ if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) -#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) +# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) -#define BYTEWIDTH 8 /* In bits. */ +# define BYTEWIDTH 8 /* In bits. */ -#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) +# define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) -#undef MAX -#undef MIN -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define MIN(a, b) ((a) < (b) ? (a) : (b)) +# undef MAX +# undef MIN +# define MAX(a, b) ((a) > (b) ? (a) : (b)) +# define MIN(a, b) ((a) < (b) ? (a) : (b)) typedef char boolean; - -#define false 0 -#define true 1 - -static int re_match_2_internal PARAMS((struct re_pattern_buffer * bufp, - const char *string1, int size1, - const char *string2, int size2, - int pos, - struct re_registers * regs, - - int stop)); +# define false 0 +# define true 1 + +static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size, + reg_syntax_t syntax, + struct re_pattern_buffer *bufp)); + +static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp, + const char *string1, int size1, + const char *string2, int size2, + int pos, + struct re_registers *regs, + int stop)); +static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp, + const char *string1, int size1, + const char *string2, int size2, + int startpos, int range, + struct re_registers *regs, int stop)); +static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp)); + +#ifdef MBS_SUPPORT +static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size, + reg_syntax_t syntax, + struct re_pattern_buffer *bufp)); + + +static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp, + const char *cstring1, int csize1, + const char *cstring2, int csize2, + int pos, + struct re_registers *regs, + int stop, + wchar_t *string1, int size1, + wchar_t *string2, int size2, + int *mbs_offset1, int *mbs_offset2)); +static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp, + const char *string1, int size1, + const char *string2, int size2, + int startpos, int range, + struct re_registers *regs, int stop)); +static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp)); +#endif /* These are the command codes that appear in compiled regular expressions. Some opcodes are followed by argument bytes. A command code can specify any interpretation whatsoever for its arguments. Zero bytes may appear in the compiled regular expression. */ -typedef enum { - no_op = 0, - - /* Succeed right away--no more backtracking. */ - succeed, - - /* Followed by one byte giving n, then by n literal bytes. */ - exactn, - - /* Matches any (more or less) character. */ - anychar, - - /* Matches any one char belonging to specified set. First - following byte is number of bitmap bytes. Then come bytes - for a bitmap saying which chars are in. Bits in each byte - are ordered low-bit-first. A character is in the set if its - bit is 1. A character too large to have a bit in the map is - automatically not in the set. */ - charset, - - /* Same parameters as charset, but match any character that is - not one of those specified. */ - charset_not, - - /* Start remembering the text that is matched, for storing in a - register. Followed by one byte with the register number, in - the range 0 to one less than the pattern buffer's re_nsub - field. Then followed by one byte with the number of groups - inner to this one. (This last has to be part of the - start_memory only because we need it in the on_failure_jump - of re_match_2.) */ - start_memory, - - /* Stop remembering the text that is matched and store it in a - memory register. Followed by one byte with the register - number, in the range 0 to one less than `re_nsub' in the - pattern buffer, and one byte with the number of inner groups, - just like `start_memory'. (We need the number of inner - groups here because we don't have any easy way of finding the - corresponding start_memory when we're at a stop_memory.) */ - stop_memory, - - /* Match a duplicate of something remembered. Followed by one - byte containing the register number. */ - duplicate, - - /* Fail unless at beginning of line. */ - begline, +typedef enum +{ + no_op = 0, - /* Fail unless at end of line. */ - endline, + /* Succeed right away--no more backtracking. */ + succeed, - /* Succeeds if at beginning of buffer (if emacs) or at beginning - of string to be matched (if not). */ - begbuf, + /* Followed by one byte giving n, then by n literal bytes. */ + exactn, - /* Analogously, for end of buffer/string. */ - endbuf, +# ifdef MBS_SUPPORT + /* Same as exactn, but contains binary data. */ + exactn_bin, +# endif - /* Followed by two byte relative address to which to jump. */ - jump, + /* Matches any (more or less) character. */ + anychar, + + /* Matches any one char belonging to specified set. First + following byte is number of bitmap bytes. Then come bytes + for a bitmap saying which chars are in. Bits in each byte + are ordered low-bit-first. A character is in the set if its + bit is 1. A character too large to have a bit in the map is + automatically not in the set. */ + /* ifdef MBS_SUPPORT, following element is length of character + classes, length of collating symbols, length of equivalence + classes, length of character ranges, and length of characters. + Next, character class element, collating symbols elements, + equivalence class elements, range elements, and character + elements follow. + See regex_compile function. */ + charset, + + /* Same parameters as charset, but match any character that is + not one of those specified. */ + charset_not, + + /* Start remembering the text that is matched, for storing in a + register. Followed by one byte with the register number, in + the range 0 to one less than the pattern buffer's re_nsub + field. Then followed by one byte with the number of groups + inner to this one. (This last has to be part of the + start_memory only because we need it in the on_failure_jump + of re_match_2.) */ + start_memory, + + /* Stop remembering the text that is matched and store it in a + memory register. Followed by one byte with the register + number, in the range 0 to one less than `re_nsub' in the + pattern buffer, and one byte with the number of inner groups, + just like `start_memory'. (We need the number of inner + groups here because we don't have any easy way of finding the + corresponding start_memory when we're at a stop_memory.) */ + stop_memory, + + /* Match a duplicate of something remembered. Followed by one + byte containing the register number. */ + duplicate, + + /* Fail unless at beginning of line. */ + begline, + + /* Fail unless at end of line. */ + endline, + + /* Succeeds if at beginning of buffer (if emacs) or at beginning + of string to be matched (if not). */ + begbuf, + + /* Analogously, for end of buffer/string. */ + endbuf, + + /* Followed by two byte relative address to which to jump. */ + jump, /* Same as jump, but marks the end of an alternative. */ - jump_past_alt, - - /* Followed by two-byte relative address of place to resume at - in case of failure. */ - on_failure_jump, - - /* Like on_failure_jump, but pushes a placeholder instead of the - current string position when executed. */ - on_failure_keep_string_jump, - - /* Throw away latest failure point and then jump to following - two-byte relative address. */ - pop_failure_jump, - - /* Change to pop_failure_jump if know won't have to backtrack to - match; otherwise change to jump. This is used to jump - back to the beginning of a repeat. If what follows this jump - clearly won't match what the repeat does, such that we can be - sure that there is no use backtracking out of repetitions - already matched, then we change it to a pop_failure_jump. - Followed by two-byte address. */ - maybe_pop_jump, - - /* Jump to following two-byte address, and push a dummy failure - point. This failure point will be thrown away if an attempt - is made to use it for a failure. A `+' construct makes this - before the first repeat. Also used as an intermediary kind - of jump when compiling an alternative. */ - dummy_failure_jump, + jump_past_alt, + + /* Followed by two-byte relative address of place to resume at + in case of failure. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + on_failure_jump, + + /* Like on_failure_jump, but pushes a placeholder instead of the + current string position when executed. */ + on_failure_keep_string_jump, + + /* Throw away latest failure point and then jump to following + two-byte relative address. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + pop_failure_jump, + + /* Change to pop_failure_jump if know won't have to backtrack to + match; otherwise change to jump. This is used to jump + back to the beginning of a repeat. If what follows this jump + clearly won't match what the repeat does, such that we can be + sure that there is no use backtracking out of repetitions + already matched, then we change it to a pop_failure_jump. + Followed by two-byte address. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + maybe_pop_jump, + + /* Jump to following two-byte address, and push a dummy failure + point. This failure point will be thrown away if an attempt + is made to use it for a failure. A `+' construct makes this + before the first repeat. Also used as an intermediary kind + of jump when compiling an alternative. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + dummy_failure_jump, /* Push a dummy failure point and continue. Used at the end of alternatives. */ - push_dummy_failure, + push_dummy_failure, - /* Followed by two-byte relative address and two-byte number n. - After matching N times, jump to the address upon failure. */ - succeed_n, + /* Followed by two-byte relative address and two-byte number n. + After matching N times, jump to the address upon failure. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + succeed_n, - /* Followed by two-byte relative address, and two-byte number n. - Jump to the address N times, then fail. */ - jump_n, + /* Followed by two-byte relative address, and two-byte number n. + Jump to the address N times, then fail. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + jump_n, - /* Set the following two-byte relative address to the - subsequent two-byte number. The address *includes* the two - bytes of number. */ - set_number_at, + /* Set the following two-byte relative address to the + subsequent two-byte number. The address *includes* the two + bytes of number. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + set_number_at, - wordchar, /* Matches any word-constituent character. */ - notwordchar, /* Matches any char that is not a word-constituent. */ + wordchar, /* Matches any word-constituent character. */ + notwordchar, /* Matches any char that is not a word-constituent. */ - wordbeg, /* Succeeds if at word beginning. */ - wordend, /* Succeeds if at word end. */ + wordbeg, /* Succeeds if at word beginning. */ + wordend, /* Succeeds if at word end. */ - wordbound, /* Succeeds if at a word boundary. */ - notwordbound /* Succeeds if not at a word boundary. */ -#ifdef emacs - , before_dot, /* Succeeds if before point. */ - at_dot, /* Succeeds if at point. */ - after_dot, /* Succeeds if after point. */ + wordbound, /* Succeeds if at a word boundary. */ + notwordbound /* Succeeds if not at a word boundary. */ + +# ifdef emacs + ,before_dot, /* Succeeds if before point. */ + at_dot, /* Succeeds if at point. */ + after_dot, /* Succeeds if after point. */ /* Matches any character whose syntax is specified. Followed by - a byte which contains a syntax code, e.g., Sword. */ - syntaxspec, + a byte which contains a syntax code, e.g., Sword. */ + syntaxspec, /* Matches any character whose syntax is not that specified. */ - notsyntaxspec -#endif /* emacs */ + notsyntaxspec +# endif /* emacs */ } re_opcode_t; +#endif /* not INSIDE_RECURSION */ + +#ifdef BYTE +# define CHAR_T char +# define UCHAR_T unsigned char +# define COMPILED_BUFFER_VAR bufp->buffer +# define OFFSET_ADDRESS_SIZE 2 +# define PREFIX(name) byte_##name +# define ARG_PREFIX(name) name +# define PUT_CHAR(c) putchar (c) +#else +# ifdef WCHAR +# define CHAR_T wchar_t +# define UCHAR_T wchar_t +# define COMPILED_BUFFER_VAR wc_buffer +# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */ +# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1) +# define PREFIX(name) wcs_##name +# define ARG_PREFIX(name) c##name +/* Should we use wide stream?? */ +# define PUT_CHAR(c) printf ("%C", c); +# define TRUE 1 +# define FALSE 0 +# else +# ifdef MBS_SUPPORT +# define WCHAR +# define INSIDE_RECURSION +# include "regex.c" +# undef INSIDE_RECURSION +# endif +# define BYTE +# define INSIDE_RECURSION +# include "regex.c" +# undef INSIDE_RECURSION +# endif +#endif + +#ifdef INSIDE_RECURSION /* Common operations on the compiled pattern. */ /* Store NUMBER in two contiguous bytes starting at DESTINATION. */ +/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ -#define STORE_NUMBER(destination, number) \ +# ifdef WCHAR +# define STORE_NUMBER(destination, number) \ + do { \ + *(destination) = (UCHAR_T)(number); \ + } while (0) +# else /* BYTE */ +# define STORE_NUMBER(destination, number) \ do { \ (destination)[0] = (number) & 0377; \ (destination)[1] = (number) >> 8; \ } while (0) +# endif /* WCHAR */ /* Same as STORE_NUMBER, except increment DESTINATION to the byte after where the number is stored. Therefore, DESTINATION must be an lvalue. */ +/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ -#define STORE_NUMBER_AND_INCR(destination, number) \ +# define STORE_NUMBER_AND_INCR(destination, number) \ do { \ STORE_NUMBER (destination, number); \ - (destination) += 2; \ + (destination) += OFFSET_ADDRESS_SIZE; \ } while (0) /* Put into DESTINATION a number stored in two contiguous bytes starting at SOURCE. */ +/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ -#define EXTRACT_NUMBER(destination, source) \ +# ifdef WCHAR +# define EXTRACT_NUMBER(destination, source) \ + do { \ + (destination) = *(source); \ + } while (0) +# else /* BYTE */ +# define EXTRACT_NUMBER(destination, source) \ do { \ (destination) = *(source) & 0377; \ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ } while (0) +# endif -#ifdef DEBUG -static void extract_number _RE_ARGS((int *dest, unsigned char *source)); -static void extract_number(dest, source) -int *dest; -unsigned char *source; +# ifdef DEBUG +static void PREFIX(extract_number) _RE_ARGS ((int *dest, UCHAR_T *source)); +static void +PREFIX(extract_number) (dest, source) + int *dest; + UCHAR_T *source; { - int temp = SIGN_EXTEND_CHAR(*(source + 1)); - - *dest = *source & 0377; - *dest += temp << 8; +# ifdef WCHAR + *dest = *source; +# else /* BYTE */ + int temp = SIGN_EXTEND_CHAR (*(source + 1)); + *dest = *source & 0377; + *dest += temp << 8; +# endif } -# ifndef EXTRACT_MACROS /* To debug the macros. */ -# undef EXTRACT_NUMBER -# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) -# endif /* not EXTRACT_MACROS */ +# ifndef EXTRACT_MACROS /* To debug the macros. */ +# undef EXTRACT_NUMBER +# define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src) +# endif /* not EXTRACT_MACROS */ |