diff options
author | Peter S. Mazinger <ps.m@gmx.net> | 2005-11-28 16:39:11 +0000 |
---|---|---|
committer | Peter S. Mazinger <ps.m@gmx.net> | 2005-11-28 16:39:11 +0000 |
commit | c4869de2c791985539b642ec22606a3cebc03778 (patch) | |
tree | 62bc650f32e54ac31fe59966cd53b90ff3e15f5a /libc/misc/regex/regex_old.c | |
parent | 47e2330706f095aecc98fb7998c60bd1c6c045d3 (diff) |
Rename regex.c to regex_old.c
Diffstat (limited to 'libc/misc/regex/regex_old.c')
-rw-r--r-- | libc/misc/regex/regex_old.c | 8383 |
1 files changed, 8383 insertions, 0 deletions
diff --git a/libc/misc/regex/regex_old.c b/libc/misc/regex/regex_old.c new file mode 100644 index 000000000..3e9d2eb30 --- /dev/null +++ b/libc/misc/regex/regex_old.c @@ -0,0 +1,8383 @@ +/* Extended regular expression matching and search library, + version 0.12. + (Implements POSIX draft P1003.2/D11.2, except for some of the + internationalization features.) + Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#define HAVE_MEMPCPY +#define __mempcpy __libc_mempcpy +#define memset __memset +#define memcmp __memcmp +#define strcmp __strcmp +#define strlen __strlen +#define wcslen __wcslen +/* for some reason this does not work */ +#define memcpy __memcpy + +/* To exclude some unwanted junk.... */ +#undef _LIBC +#undef emacs +#define _REGEX_RE_COMP +#include <features.h> +#include <stdlib.h> +#include <string.h> +#define STDC_HEADERS +#define RE_TRANSLATE_TYPE char * + +extern void *__libc_mempcpy (void *__restrict __dest, + __const void *__restrict __src, size_t __n) /*attribute_hidden*/; + +/* AIX requires this to be the first thing in the file. */ +#if defined _AIX && !defined REGEX_MALLOC + #pragma alloca +#endif + +#undef _GNU_SOURCE +#define _GNU_SOURCE + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#ifndef PARAMS +# if defined __GNUC__ || (defined __STDC__ && __STDC__) +# define PARAMS(args) args +# else +# define PARAMS(args) () +# endif /* GCC. */ +#endif /* Not PARAMS. */ + +#ifndef INSIDE_RECURSION + +# if defined STDC_HEADERS && !defined emacs +# include <stddef.h> +# else +/* We need this for `regex.h', and perhaps for the Emacs include files. */ +# include <sys/types.h> +# endif + + +/* For platform which support the ISO C amendement 1 functionality we + support user defined character classes. */ +#if defined __UCLIBC_HAS_WCHAR__ +# define WIDE_CHAR_SUPPORT 1 +/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ +# include <wchar.h> +# include <wctype.h> +# endif + +# ifdef _LIBC +/* We have to keep the namespace clean. */ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ + __regerror(errcode, preg, errbuf, errbuf_size) +# define re_set_registers(bu, re, nu, st, en) \ + __re_set_registers (bu, re, nu, st, en) +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ + __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) +# define re_match(bufp, string, size, pos, regs) \ + __re_match (bufp, string, size, pos, regs) +# define re_search(bufp, string, size, startpos, range, regs) \ + __re_search (bufp, string, size, startpos, range, regs) +# define re_compile_pattern(pattern, length, bufp) \ + __re_compile_pattern (pattern, length, bufp) +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ + __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +# define btowc __btowc + +/* We are also using some library internals. */ +# include <locale/localeinfo.h> +# include <locale/elem-hash.h> +# include <langinfo.h> +# include <locale/coll-lookup.h> +# endif + +/* This is for other GNU distributions with internationalized messages. */ +# if HAVE_LIBINTL_H || defined _LIBC +# include <libintl.h> +# ifdef _LIBC +# undef gettext +# define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES) +# endif +# else +# define gettext(msgid) (msgid) +# endif + +# ifndef gettext_noop +/* This define is so xgettext can find the internationalizable + strings. */ +# define gettext_noop(String) String +# endif + +/* The `emacs' switch turns on certain matching commands + that make sense only in Emacs. */ +# ifdef emacs + +# include "lisp.h" +# include "buffer.h" +# include "syntax.h" + +# else /* not emacs */ + +/* If we are not linking with Emacs proper, + we can't use the relocating allocator + even if config.h says that we can. */ +# undef REL_ALLOC + +# if defined STDC_HEADERS || defined _LIBC +# include <stdlib.h> +# else +char *malloc (); +char *realloc (); +# endif + +/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. + If nothing else has been done, use the method below. */ +# ifdef INHIBIT_STRING_HEADER +# if !(defined HAVE_BZERO && defined HAVE_BCOPY) +# if !defined bzero && !defined bcopy +# undef INHIBIT_STRING_HEADER +# endif +# endif +# endif + +/* This is the normal way of making sure we have a bcopy and a bzero. + This is used in most programs--a few other programs avoid this + by defining INHIBIT_STRING_HEADER. */ +# ifndef INHIBIT_STRING_HEADER +# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC +# include <string.h> +# ifndef bzero +# ifndef _LIBC +# define bzero(s, n) (memset (s, '\0', n), (s)) +# else +# define bzero(s, n) __bzero (s, n) +# endif +# endif +# else +# include <strings.h> +# ifndef memcmp +# define memcmp(s1, s2, n) bcmp (s1, s2, n) +# endif +# ifndef memcpy +# define memcpy(d, s, n) (bcopy (s, d, n), (d)) +# endif +# endif +# endif + +/* Define the syntax stuff for \<, \>, etc. */ + +/* This must be nonzero for the wordchar and notwordchar pattern + commands in re_match_2. */ +# ifndef Sword +# define Sword 1 +# endif + +# ifdef SWITCH_ENUM_BUG +# define SWITCH_ENUM_CAST(x) ((int)(x)) +# else +# define SWITCH_ENUM_CAST(x) (x) +# endif + +# endif /* not emacs */ + +# if defined _LIBC || HAVE_LIMITS_H +# include <limits.h> +# endif + +# ifndef MB_LEN_MAX +# define MB_LEN_MAX 1 +# endif + +/* Get the interface, including the syntax bits. */ +# include <regex.h> + +/* isalpha etc. are used for the character classes. */ +# include <ctype.h> + +/* Jim Meyering writes: + + "... Some ctype macros are valid only for character codes that + isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when + using /bin/cc or gcc but without giving an ansi option). So, all + ctype uses should be through macros like ISPRINT... If + STDC_HEADERS is defined, then autoconf has verified that the ctype + macros don't need to be guarded with references to isascii. ... + Defining isascii to 1 should let any compiler worth its salt + eliminate the && through constant folding." + Solaris defines some of these symbols so we must undefine them first. */ + +# undef ISASCII +# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) +# define ISASCII(c) 1 +# else +# define ISASCII(c) isascii(c) +# endif + +# ifdef isblank +# define ISBLANK(c) (ISASCII (c) && isblank (c)) +# else +# define ISBLANK(c) ((c) == ' ' || (c) == '\t') +# endif +# ifdef isgraph +# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) +# else +# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) +# endif + +# undef ISPRINT +# define ISPRINT(c) (ISASCII (c) && isprint (c)) +# define ISDIGIT(c) (ISASCII (c) && isdigit (c)) +# define ISALNUM(c) (ISASCII (c) && isalnum (c)) +# define ISALPHA(c) (ISASCII (c) && isalpha (c)) +# define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) +# define ISLOWER(c) (ISASCII (c) && islower (c)) +# define ISPUNCT(c) (ISASCII (c) && ispunct (c)) +# define ISSPACE(c) (ISASCII (c) && isspace (c)) +# define ISUPPER(c) (ISASCII (c) && isupper (c)) +# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) + +# ifdef _tolower +# define TOLOWER(c) _tolower(c) +# else +# define TOLOWER(c) tolower(c) +# endif + +# ifndef NULL +# define NULL (void *)0 +# endif + +/* We remove any previous definition of `SIGN_EXTEND_CHAR', + since ours (we hope) works properly with all combinations of + machines, compilers, `char' and `unsigned char' argument types. + (Per Bothner suggested the basic approach.) */ +# undef SIGN_EXTEND_CHAR +# if __STDC__ +# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) +# else /* not __STDC__ */ +/* As in Harbison and Steele. */ +# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) +# endif + +# ifndef emacs +/* How many characters in the character set. */ +# define CHAR_SET_SIZE 256 + +# ifdef SYNTAX_TABLE + +extern char *re_syntax_table; + +# else /* not SYNTAX_TABLE */ + +static char re_syntax_table[CHAR_SET_SIZE]; + +static void init_syntax_once PARAMS ((void)); + +static void +init_syntax_once () +{ + register int c; + static int done = 0; + + if (done) + return; + bzero (re_syntax_table, sizeof re_syntax_table); + + for (c = 0; c < CHAR_SET_SIZE; ++c) + if (ISALNUM (c)) + re_syntax_table[c] = Sword; + + re_syntax_table['_'] = Sword; + + done = 1; +} + +# endif /* not SYNTAX_TABLE */ + +# define SYNTAX(c) re_syntax_table[(unsigned char) (c)] + +# endif /* emacs */ + +/* Integer type for pointers. */ +# if !defined _LIBC +typedef unsigned long int uintptr_t; +# endif + +/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we + use `alloca' instead of `malloc'. This is because using malloc in + re_search* or re_match* could cause memory leaks when C-g is used in + Emacs; also, malloc is slower and causes storage fragmentation. On + the other hand, malloc is more portable, and easier to debug. + + Because we sometimes use alloca, some routines have to be macros, + not functions -- `alloca'-allocated space disappears at the end of the + function it is called in. */ + +# ifdef REGEX_MALLOC + +# define REGEX_ALLOCATE malloc +# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) +# define REGEX_FREE free + +# else /* not REGEX_MALLOC */ + +/* Emacs already defines alloca, sometimes. */ +# ifndef alloca + +/* Make alloca work the best possible way. */ +# ifdef __GNUC__ +# define alloca __builtin_alloca +# else /* not __GNUC__ */ +# if HAVE_ALLOCA_H +# include <alloca.h> +# endif /* HAVE_ALLOCA_H */ +# endif /* not __GNUC__ */ + +# endif /* not alloca */ + +# define REGEX_ALLOCATE alloca + +/* Assumes a `char *destination' variable. */ +# define REGEX_REALLOCATE(source, osize, nsize) \ + (destination = (char *) alloca (nsize), \ + memcpy (destination, source, osize)) + +/* No need to do anything to free, after alloca. */ +# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ + +# endif /* not REGEX_MALLOC */ + +/* Define how to allocate the failure stack. */ + +# if defined REL_ALLOC && defined REGEX_MALLOC + +# define REGEX_ALLOCATE_STACK(size) \ + r_alloc (&failure_stack_ptr, (size)) +# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ + r_re_alloc (&failure_stack_ptr, (nsize)) +# define REGEX_FREE_STACK(ptr) \ + r_alloc_free (&failure_stack_ptr) + +# else /* not using relocating allocator */ + +# ifdef REGEX_MALLOC + +# define REGEX_ALLOCATE_STACK malloc +# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) +# define REGEX_FREE_STACK free + +# else /* not REGEX_MALLOC */ + +# define REGEX_ALLOCATE_STACK alloca + +# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ + REGEX_REALLOCATE (source, osize, nsize) +/* No need to explicitly free anything. */ +# define REGEX_FREE_STACK(arg) + +# endif /* not REGEX_MALLOC */ +# endif /* not using relocating allocator */ + + +/* True if `size1' is non-NULL and PTR is pointing anywhere inside + `string1' or just past its end. This works if PTR is NULL, which is + a good thing. */ +# define FIRST_STRING_P(ptr) \ + (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) + +/* (Re)Allocate N items of type T using malloc, or fail. */ +# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) +# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) +# define RETALLOC_IF(addr, n, t) \ + if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) +# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) + +# define BYTEWIDTH 8 /* In bits. */ + +# define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) + +# undef MAX +# undef MIN +# define MAX(a, b) ((a) > (b) ? (a) : (b)) +# define MIN(a, b) ((a) < (b) ? (a) : (b)) + +typedef char boolean; +# define false 0 +# define true 1 + +static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size, + reg_syntax_t syntax, + struct re_pattern_buffer *bufp)); + +static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp, + const char *string1, int size1, + const char *string2, int size2, + int pos, + struct re_registers *regs, + int stop)); +static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp, + const char *string1, int size1, + const char *string2, int size2, + int startpos, int range, + struct re_registers *regs, int stop)); +static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp)); + +#ifdef MBS_SUPPORT +static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size, + reg_syntax_t syntax, + struct re_pattern_buffer *bufp)); + + +static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp, + const char *cstring1, int csize1, + const char *cstring2, int csize2, + int pos, + struct re_registers *regs, + int stop, + wchar_t *string1, int size1, + wchar_t *string2, int size2, + int *mbs_offset1, int *mbs_offset2)); +static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp, + const char *string1, int size1, + const char *string2, int size2, + int startpos, int range, + struct re_registers *regs, int stop)); +static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp)); +#endif + +/* These are the command codes that appear in compiled regular + expressions. Some opcodes are followed by argument bytes. A + command code can specify any interpretation whatsoever for its + arguments. Zero bytes may appear in the compiled regular expression. */ + +typedef enum +{ + no_op = 0, + + /* Succeed right away--no more backtracking. */ + succeed, + + /* Followed by one byte giving n, then by n literal bytes. */ + exactn, + +# ifdef MBS_SUPPORT + /* Same as exactn, but contains binary data. */ + exactn_bin, +# endif + + /* Matches any (more or less) character. */ + anychar, + + /* Matches any one char belonging to specified set. First + following byte is number of bitmap bytes. Then come bytes + for a bitmap saying which chars are in. Bits in each byte + are ordered low-bit-first. A character is in the set if its + bit is 1. A character too large to have a bit in the map is + automatically not in the set. */ + /* ifdef MBS_SUPPORT, following element is length of character + classes, length of collating symbols, length of equivalence + classes, length of character ranges, and length of characters. + Next, character class element, collating symbols elements, + equivalence class elements, range elements, and character + elements follow. + See regex_compile function. */ + charset, + + /* Same parameters as charset, but match any character that is + not one of those specified. */ + charset_not, + + /* Start remembering the text that is matched, for storing in a + register. Followed by one byte with the register number, in + the range 0 to one less than the pattern buffer's re_nsub + field. Then followed by one byte with the number of groups + inner to this one. (This last has to be part of the + start_memory only because we need it in the on_failure_jump + of re_match_2.) */ + start_memory, + + /* Stop remembering the text that is matched and store it in a + memory register. Followed by one byte with the register + number, in the range 0 to one less than `re_nsub' in the + pattern buffer, and one byte with the number of inner groups, + just like `start_memory'. (We need the number of inner + groups here because we don't have any easy way of finding the + corresponding start_memory when we're at a stop_memory.) */ + stop_memory, + + /* Match a duplicate of something remembered. Followed by one + byte containing the register number. */ + duplicate, + + /* Fail unless at beginning of line. */ + begline, + + /* Fail unless at end of line. */ + endline, + + /* Succeeds if at beginning of buffer (if emacs) or at beginning + of string to be matched (if not). */ + begbuf, + + /* Analogously, for end of buffer/string. */ + endbuf, + + /* Followed by two byte relative address to which to jump. */ + jump, + + /* Same as jump, but marks the end of an alternative. */ + jump_past_alt, + + /* Followed by two-byte relative address of place to resume at + in case of failure. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + on_failure_jump, + + /* Like on_failure_jump, but pushes a placeholder instead of the + current string position when executed. */ + on_failure_keep_string_jump, + + /* Throw away latest failure point and then jump to following + two-byte relative address. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + pop_failure_jump, + + /* Change to pop_failure_jump if know won't have to backtrack to + match; otherwise change to jump. This is used to jump + back to the beginning of a repeat. If what follows this jump + clearly won't match what the repeat does, such that we can be + sure that there is no use backtracking out of repetitions + already matched, then we change it to a pop_failure_jump. + Followed by two-byte address. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + maybe_pop_jump, + + /* Jump to following two-byte address, and push a dummy failure + point. This failure point will be thrown away if an attempt + is made to use it for a failure. A `+' construct makes this + before the first repeat. Also used as an intermediary kind + of jump when compiling an alternative. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + dummy_failure_jump, + + /* Push a dummy failure point and continue. Used at the end of + alternatives. */ + push_dummy_failure, + + /* Followed by two-byte relative address and two-byte number n. + After matching N times, jump to the address upon failure. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + succeed_n, + + /* Followed by two-byte relative address, and two-byte number n. + Jump to the address N times, then fail. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + jump_n, + + /* Set the following two-byte relative address to the + subsequent two-byte number. The address *includes* the two + bytes of number. */ + /* ifdef MBS_SUPPORT, the size of address is 1. */ + set_number_at, + + wordchar, /* Matches any word-constituent character. */ + notwordchar, /* Matches any char that is not a word-constituent. */ + + wordbeg, /* Succeeds if at word beginning. */ + wordend, /* Succeeds if at word end. */ + + wordbound, /* Succeeds if at a word boundary. */ + notwordbound /* Succeeds if not at a word boundary. */ + +# ifdef emacs + ,before_dot, /* Succeeds if before point. */ + at_dot, /* Succeeds if at point. */ + after_dot, /* Succeeds if after point. */ + + /* Matches any character whose syntax is specified. Followed by + a byte which contains a syntax code, e.g., Sword. */ + syntaxspec, + + /* Matches any character whose syntax is not that specified. */ + notsyntaxspec +# endif /* emacs */ +} re_opcode_t; +#endif /* not INSIDE_RECURSION */ + + +#ifdef BYTE +# define CHAR_T char +# define UCHAR_T unsigned char +# define COMPILED_BUFFER_VAR bufp->buffer +# define OFFSET_ADDRESS_SIZE 2 +# define PREFIX(name) byte_##name +# define ARG_PREFIX(name) name +# define PUT_CHAR(c) putchar (c) +#else +# ifdef WCHAR +# define CHAR_T wchar_t +# define UCHAR_T wchar_t +# define COMPILED_BUFFER_VAR wc_buffer +# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */ +# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1) +# define PREFIX(name) wcs_##name +# define ARG_PREFIX(name) c##name +/* Should we use wide stream?? */ +# define PUT_CHAR(c) printf ("%C", c); +# define TRUE 1 +# define FALSE 0 +# else +# ifdef MBS_SUPPORT +# define WCHAR +# define INSIDE_RECURSION +# include "regex_old.c" +# undef INSIDE_RECURSION +# endif +# define BYTE +# define INSIDE_RECURSION +# include "regex_old.c" +# undef INSIDE_RECURSION +# endif +#endif + +#ifdef INSIDE_RECURSION +/* Common operations on the compiled pattern. */ + +/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ +/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ + +# ifdef WCHAR +# define STORE_NUMBER(destination, number) \ + do { \ + *(destination) = (UCHAR_T)(number); \ + } while (0) +# else /* BYTE */ +# define STORE_NUMBER(destination, number) \ + do { \ + (destination)[0] = (number) & 0377; \ + (destination)[1] = (number) >> 8; \ + } while (0) +# endif /* WCHAR */ + +/* Same as STORE_NUMBER, except increment DESTINATION to + the byte after where the number is stored. Therefore, DESTINATION + must be an lvalue. */ +/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ + +# define STORE_NUMBER_AND_INCR(destination, number) \ + do { \ + STORE_NUMBER (destination, number); \ + (destination) += OFFSET_ADDRESS_SIZE; \ + } while (0) + +/* Put into DESTINATION a number stored in two contiguous bytes starting + at SOURCE. */ +/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ + +# ifdef WCHAR +# define EXTRACT_NUMBER(destination, source) \ + do { \ + (destination) = *(source); \ + } while (0) +# else /* BYTE */ +# define EXTRACT_NUMBER(destination, source) \ + do { \ + (destination) = *(source) & 0377; \ + (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ + } while (0) +# endif + +# ifdef DEBUG +static void PREFIX(extract_number) _RE_ARGS ((int *dest, UCHAR_T *source)); +static void +PREFIX(extract_number) (dest, source) + int *dest; + UCHAR_T *source; +{ +# ifdef WCHAR + *dest = *source; +# else /* BYTE */ + int temp = SIGN_EXTEND_CHAR (*(source + 1)); + *dest = *source & 0377; + *dest += temp << 8; +# endif +} + +# ifndef EXTRACT_MACROS /* To debug the macros. */ +# undef EXTRACT_NUMBER +# define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src) +# endif /* not EXTRACT_MACROS */ + +# endif /* DEBUG */ + +/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. + SOURCE must be an lvalue. */ + +# define EXTRACT_NUMBER_AND_INCR(destination, source) \ + do { \ + EXTRACT_NUMBER (destination, source); \ + (source) += OFFSET_ADDRESS_SIZE; \ + } while (0) + +# ifdef DEBUG +static void PREFIX(extract_number_and_incr) _RE_ARGS ((int *destination, + UCHAR_T **source)); +static void +PREFIX(extract_number_and_incr) (destination, source) + int *destination; + UCHAR_T **source; +{ + PREFIX(extract_number) (destination, *source); + *source += OFFSET_ADDRESS_SIZE; +} + +# ifndef EXTRACT_MACROS +# undef EXTRACT_NUMBER_AND_INCR +# define EXTRACT_NUMBER_AND_INCR(dest, src) \ + PREFIX(extract_number_and_incr) (&dest, &src) +# endif /* not EXTRACT_MACROS */ + +# endif /* DEBUG */ + + + +/* If DEBUG is defined, Regex prints many voluminous messages about what + it is doing (if the variable `debug' is nonzero). If linked with the + main program in `iregex.c', you can enter patterns and strings + interactively. And if linked with the main program in `main.c' and + the other test files, you can run the already-written tests. */ + +# ifdef DEBUG + +# ifndef DEFINED_ONCE + +/* We use standard I/O for debugging. */ +# include <stdio.h> + +/* It is useful to test things that ``must'' be true when debugging. */ +# include <assert.h> + +static int debug; + +# define DEBUG_STATEMENT(e) e +# define DEBUG_PRINT1(x) if (debug) printf (x) +# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) +# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) +# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) +# endif /* not DEFINED_ONCE */ + +# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ + if (debug) PREFIX(print_partial_compiled_pattern) (s, e) +# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ + if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2) + + +/* Print the fastmap in human-readable form. */ + +# ifndef DEFINED_ONCE +void +print_fastmap (fastmap) + char *fastmap; +{ + unsigned was_a_range = 0; + unsigned i = 0; + + while (i < (1 << BYTEWIDTH)) + { + if (fastmap[i++]) + { + was_a_range = 0; + putchar (i - 1); + while (i < (1 << BYTEWIDTH) && fastmap[i]) + { + was_a_range = 1; + i++; + } + if (was_a_range) + { + printf ("-"); + putchar (i - 1); + } + } + } + putchar ('\n'); +} +# endif /* not DEFINED_ONCE */ + + +/* Print a compiled pattern string in human-readable form, starting at + the START pointer into it and ending just before the pointer END. */ + +void +PREFIX(print_partial_compiled_pattern) (start, end) + UCHAR_T *start; + UCHAR_T *end; +{ + int mcnt, mcnt2; + UCHAR_T *p1; + UCHAR_T *p = start; + UCHAR_T *pend = end; + + if (start == NULL) + { + printf ("(null)\n"); + return; + } + + /* Loop over pattern commands. */ + while (p < pend) + { +# ifdef _LIBC + printf ("%td:\t", p - start); +# else + printf ("%ld:\t", (long int) (p - start)); +# endif + + switch ((re_opcode_t) *p++) + { + case no_op: + printf ("/no_op"); + break; + + case exactn: + mcnt = *p++; + printf ("/exactn/%d", mcnt); + do + { + putchar ('/'); + PUT_CHAR (*p++); + } + while (--mcnt); + break; + +# ifdef MBS_SUPPORT + case exactn_bin: + mcnt = *p++; + printf ("/exactn_bin/%d", mcnt); + do + { + printf("/%lx", (long int) *p++); + } + while (--mcnt); + break; +# endif /* MBS_SUPPORT */ + + case start_memory: + mcnt = *p++; + printf ("/start_memory/%d/%ld", mcnt, (long int) *p++); + break; + + case stop_memory: + mcnt = *p++; + printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++); + break; + + case duplicate: + printf ("/duplicate/%ld", (long int) *p++); + break; + + case anychar: + printf ("/anychar"); + break; + + case charset: + case charset_not: + { +# ifdef WCHAR + int i, length; + wchar_t *workp = p; + printf ("/charset [%s", + (re_opcode_t) *(workp - 1) == charset_not ? "^" : ""); + p += 5; + length = *workp++; /* the length of char_classes */ + for (i=0 ; i<length ; i++) + printf("[:%lx:]", (long int) *p++); + length = *workp++; /* the length of collating_symbol */ + for (i=0 ; i<length ;) + { + printf("[."); + while(*p != 0) + PUT_CHAR((i++,*p++)); + i++,p++; + printf(".]"); + } + length = *workp++; /* the length of equivalence_class */ + for (i=0 ; i<length ;) + { + printf("[="); + while(*p != 0) + PUT_CHAR((i++,*p++)); + i++,p++; + printf("=]"); + } + length = *workp++; /* the length of char_range */ + for (i=0 ; i<length ; i++) + { + wchar_t range_start = *p++; + wchar_t range_end = *p++; + printf("%C-%C", range_start, range_end); + } + length = *workp++; /* the length of char */ + for (i=0 ; i<length ; i++) + printf("%C", *p++); + putchar (']'); +# else + register int c, last = -100; + register int in_range = 0; + + printf ("/charset [%s", + (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); + + assert (p + *p < pend); + + for (c = 0; c < 256; c++) + if (c / 8 < *p + && (p[1 + (c/8)] & (1 << (c % 8)))) + { + /* Are we starting a range? */ + if (last + 1 == c && ! in_range) + { + putchar ('-'); + in_range = 1; + } + /* Have we broken a range? */ + else if (last + 1 != c && in_range) + { + putchar (last); + in_range = 0; + } + + if (! in_range) + putchar (c); + + last = c; + } + + if (in_range) + putchar (last); + + putchar (']'); + + p += 1 + *p; +# endif /* WCHAR */ + } + break; + + case begline: + printf ("/begline"); + break; + + case endline: + printf ("/endline"); + break; + + case on_failure_jump: + PREFIX(extract_number_and_incr) (&mcnt, &p); +# ifdef _LIBC + printf ("/on_failure_jump to %td", p + mcnt - start); +# else + printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start)); +# endif + break; + + case on_failure_keep_string_jump: + PREFIX(extract_number_and_incr) (&mcnt, &p); +# ifdef _LIBC + printf ("/on_failure_keep_string_jump to %td", p + mcnt - start); +# else + printf ("/on_failure_keep_string_jump to %ld", + (long int) (p + mcnt - start)); +# endif + break; + + case dummy_failure_jump: + PREFIX(extract_number_and_incr) (&mcnt, &p); +# ifdef _LIBC + printf ("/dummy_failure_jump to %td", p + mcnt - start); +# else + printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start)); +# endif + break; + + case push_dummy_failure: + printf ("/push_dummy_failure"); + break; + + case maybe_pop_jump: + PREFIX(extract_number_and_incr) (&mcnt, &p); +# ifdef _LIBC + printf ("/maybe_pop_jump to %td", p + mcnt - start); +# else + printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start)); +# endif + break; + + case pop_failure_jump: + PREFIX(extract_number_and_incr) (&mcnt, &p); +# ifdef _LIBC + printf ("/pop_failure_jump to %td", p + mcnt - start); +# else + printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start)); +# endif + break; + + case jump_past_alt: + PREFIX(extract_number_and_incr) (&mcnt, &p); +# ifdef _LIBC + printf ("/jump_past_alt to %td", p + mcnt - start); +# else + printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start)); +# endif + break; + + case jump: + PREFIX(extract_number_and_incr) (&mcnt, &p); +# ifdef _LIBC + printf ("/jump to %td", p + mcnt - start); +# else + printf ("/jump to %ld", (long int) (p + mcnt - start)); +# endif + break; + + case succeed_n: + PREFIX(extract_number_and_incr) (&mcnt, &p); + p1 = p + mcnt; + PREFIX(extract_number_and_incr) (&mcnt2, &p); +# ifdef _LIBC + printf ("/succeed_n to %td, %d times", p1 - start, mcnt2); +# else + printf ("/succeed_n to %ld, %d times", + (long int) (p1 - start), mcnt2); +# endif + break; + + case jump_n: + PREFIX(extract_number_and_incr) (&mcnt, &p); + p1 = p + mcnt; + PREFIX(extract_number_and_incr) (&mcnt2, &p); + printf ("/jump_n to %d, %d times", p1 - start, mcnt2); + break; + + case set_number_at: + PREFIX(extract_number_and_incr) (&mcnt, &p); + p1 = p + mcnt; + PREFIX(extract_number_and_incr) (&mcnt2, &p); +# ifdef _LIBC + printf ("/set_number_at location %td to %d", p1 - start, mcnt2); +# else + printf ("/set_number_at location %ld to %d", + (long int) (p1 - start), mcnt2); +# endif + break; + + case wordbound: + printf ("/wordbound"); |