diff options
author | Waldemar Brodkorb <wbx@openadk.org> | 2017-04-17 17:19:35 +0200 |
---|---|---|
committer | Waldemar Brodkorb <wbx@openadk.org> | 2017-04-17 17:51:12 +0200 |
commit | 2733ce87f0ffd9a6b950ec49f1d502da7a7e973b (patch) | |
tree | c67cbc0eff152ad247908bcc498a7a9eba8b759c /libc/misc | |
parent | 43da5e839f0a287206b5aafd8784e8390481fa73 (diff) |
remove old regex implementation
Diffstat (limited to 'libc/misc')
-rw-r--r-- | libc/misc/regex/Makefile.in | 3 | ||||
-rw-r--r-- | libc/misc/regex/regex_old.c | 8265 |
2 files changed, 1 insertions, 8267 deletions
diff --git a/libc/misc/regex/Makefile.in b/libc/misc/regex/Makefile.in index f95a9f7a3..f0c349466 100644 --- a/libc/misc/regex/Makefile.in +++ b/libc/misc/regex/Makefile.in @@ -7,8 +7,7 @@ subdirs += libc/misc/regex -VARIANT := $(if $(UCLIBC_HAS_REGEX_OLD),_old) -CSRC-y := regex$(VARIANT).c +CSRC-y := regex.c MISC_REGEX_DIR := $(top_srcdir)libc/misc/regex MISC_REGEX_OUT := $(top_builddir)libc/misc/regex diff --git a/libc/misc/regex/regex_old.c b/libc/misc/regex/regex_old.c deleted file mode 100644 index 9d8182ead..000000000 --- a/libc/misc/regex/regex_old.c +++ /dev/null @@ -1,8265 +0,0 @@ -/* Extended regular expression matching and search library, - version 0.12. - (Implements POSIX draft P1003.2/D11.2, except for some of the - internationalization features.) - Copyright (C) 1993-1999, 2000, 2001 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, see - <http://www.gnu.org/licenses/>. */ - -/* To exclude some unwanted junk.... */ -#undef emacs -#include <features.h> -/* unistd.h must be included with _LIBC defined: we need smallint */ -#include <unistd.h> -#include <stdio.h> -#ifdef __UCLIBC__ -# undef _LIBC -# define _REGEX_RE_COMP -# define STDC_HEADERS -# define __RE_TRANSLATE_TYPE char * -# define RE_TRANSLATE_TYPE __RE_TRANSLATE_TYPE -#endif -#include <stdlib.h> -#include <stdint.h> -#include <string.h> - -/* AIX requires this to be the first thing in the file. */ -#if defined _AIX && !defined REGEX_MALLOC -# pragma alloca -#endif - -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif - -#ifndef INSIDE_RECURSION - -# if defined STDC_HEADERS && !defined emacs -# include <stddef.h> -# else -/* We need this for `regex.h', and perhaps for the Emacs include files. */ -# include <sys/types.h> -# endif - - -/* For platform which support the ISO C amendement 1 functionality we - support user defined character classes. */ -# if defined __UCLIBC_HAS_WCHAR__ -# define WIDE_CHAR_SUPPORT 1 -/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ -# include <wchar.h> -# include <wctype.h> -# endif - -# ifdef _LIBC -/* We have to keep the namespace clean. */ - -# define btowc __btowc - -/* We are also using some library internals. */ -# include <locale/localeinfo.h> -# include <locale/elem-hash.h> -# include <langinfo.h> -# include <locale/coll-lookup.h> -# endif - -# ifndef gettext -# define gettext(msgid) (msgid) -# endif - -# ifndef gettext_noop -/* This define is so xgettext can find the internationalizable - strings. */ -# define gettext_noop(String) String -# endif - -/* The `emacs' switch turns on certain matching commands - that make sense only in Emacs. */ -# ifdef emacs - -# include "lisp.h" -# include "buffer.h" -# include "syntax.h" - -# else /* not emacs */ - -/* If we are not linking with Emacs proper, - we can't use the relocating allocator - even if config.h says that we can. */ -# undef REL_ALLOC - -# if defined STDC_HEADERS || defined _LIBC -# include <stdlib.h> -# else -char *malloc (); -char *realloc (); -# endif - -/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. - If nothing else has been done, use the method below. */ -# ifdef INHIBIT_STRING_HEADER -# if !(defined HAVE_BZERO && defined HAVE_BCOPY) -# if !defined bzero && !defined bcopy -# undef INHIBIT_STRING_HEADER -# endif -# endif -# endif - -/* This is the normal way of making sure we have a bcopy and a bzero. - This is used in most programs--a few other programs avoid this - by defining INHIBIT_STRING_HEADER. */ -# ifndef INHIBIT_STRING_HEADER -# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC -# include <string.h> -# ifndef bzero -# ifndef _LIBC -# define bzero(s, n) (memset (s, '\0', n), (s)) -# else -# define bzero(s, n) __bzero (s, n) -# endif -# endif -# else -# include <strings.h> -# ifndef memcmp -# define memcmp(s1, s2, n) bcmp (s1, s2, n) -# endif -# ifndef memcpy -# define memcpy(d, s, n) (bcopy (s, d, n), (d)) -# endif -# endif -# endif - -/* Define the syntax stuff for \<, \>, etc. */ - -/* This must be nonzero for the wordchar and notwordchar pattern - commands in re_match_2. */ -# ifndef Sword -# define Sword 1 -# endif - -# ifdef SWITCH_ENUM_BUG -# define SWITCH_ENUM_CAST(x) ((int)(x)) -# else -# define SWITCH_ENUM_CAST(x) (x) -# endif - -# endif /* not emacs */ - -# if defined _LIBC || defined HAVE_LIMITS_H -# include <limits.h> -# endif - -# ifndef MB_LEN_MAX -# define MB_LEN_MAX 1 -# endif - -/* Get the interface, including the syntax bits. */ -# include <regex.h> -# define translate __REPB_PREFIX(translate) - -/* isalpha etc. are used for the character classes. */ -# include <ctype.h> - -/* Jim Meyering writes: - - "... Some ctype macros are valid only for character codes that - isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when - using /bin/cc or gcc but without giving an ansi option). So, all - ctype uses should be through macros like ISPRINT... If - STDC_HEADERS is defined, then autoconf has verified that the ctype - macros don't need to be guarded with references to isascii. ... - Defining isascii to 1 should let any compiler worth its salt - eliminate the && through constant folding." - Solaris defines some of these symbols so we must undefine them first. */ - -# undef ISASCII -# if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) -# define ISASCII(c) 1 -# else -# define ISASCII(c) isascii(c) -# endif - -# ifdef isblank -# define ISBLANK(c) (ISASCII (c) && isblank (c)) -# else -# define ISBLANK(c) ((c) == ' ' || (c) == '\t') -# endif -# ifdef isgraph -# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) -# else -# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) -# endif - -# undef ISPRINT -# define ISPRINT(c) (ISASCII (c) && isprint (c)) -# define ISDIGIT(c) (ISASCII (c) && isdigit (c)) -# define ISALNUM(c) (ISASCII (c) && isalnum (c)) -# define ISALPHA(c) (ISASCII (c) && isalpha (c)) -# define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) -# define ISLOWER(c) (ISASCII (c) && islower (c)) -# define ISPUNCT(c) (ISASCII (c) && ispunct (c)) -# define ISSPACE(c) (ISASCII (c) && isspace (c)) -# define ISUPPER(c) (ISASCII (c) && isupper (c)) -# define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) - -# ifdef _tolower -# define TOLOWER(c) _tolower(c) -# else -# define TOLOWER(c) tolower(c) -# endif - -# ifndef NULL -# define NULL (void *)0 -# endif - -/* We remove any previous definition of `SIGN_EXTEND_CHAR', - since ours (we hope) works properly with all combinations of - machines, compilers, `char' and `unsigned char' argument types. - (Per Bothner suggested the basic approach.) */ -# undef SIGN_EXTEND_CHAR -# if __STDC__ -# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) -# else /* not __STDC__ */ -/* As in Harbison and Steele. */ -# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) -# endif - -# ifndef emacs -/* How many characters in the character set. */ -# define CHAR_SET_SIZE 256 - -# ifdef SYNTAX_TABLE - -extern char *re_syntax_table; - -# else /* not SYNTAX_TABLE */ - -static char re_syntax_table[CHAR_SET_SIZE]; - -static void init_syntax_once (void); - -static void -init_syntax_once (void) -{ - register int c; - static smallint done = 0; - - if (done) - return; - bzero (re_syntax_table, sizeof re_syntax_table); - - for (c = 0; c < CHAR_SET_SIZE; ++c) - if (ISALNUM (c)) - re_syntax_table[c] = Sword; - - re_syntax_table['_'] = Sword; - - done = 1; -} - -# endif /* not SYNTAX_TABLE */ - -# define SYNTAX(c) re_syntax_table[(unsigned char) (c)] - -# endif /* emacs */ - -/* Integer type for pointers. */ -# if !defined _LIBC && !defined __intptr_t_defined -typedef unsigned long int uintptr_t; -# endif - -/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we - use `alloca' instead of `malloc'. This is because using malloc in - re_search* or re_match* could cause memory leaks when C-g is used in - Emacs; also, malloc is slower and causes storage fragmentation. On - the other hand, malloc is more portable, and easier to debug. - - Because we sometimes use alloca, some routines have to be macros, - not functions -- `alloca'-allocated space disappears at the end of the - function it is called in. */ - -# ifdef REGEX_MALLOC - -# define REGEX_ALLOCATE malloc -# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) -# define REGEX_FREE free - -# else /* not REGEX_MALLOC */ - -/* Emacs already defines alloca, sometimes. */ -# ifndef alloca - -/* Make alloca work the best possible way. */ -# ifdef __GNUC__ -# define alloca __builtin_alloca -# else /* not __GNUC__ */ -# if HAVE_ALLOCA_H -# include <alloca.h> -# endif /* HAVE_ALLOCA_H */ -# endif /* not __GNUC__ */ - -# endif /* not alloca */ - -# define REGEX_ALLOCATE alloca - -/* Assumes a `char *destination' variable. */ -# define REGEX_REALLOCATE(source, osize, nsize) \ - (destination = (char *) alloca (nsize), \ - memcpy (destination, source, osize)) - -/* No need to do anything to free, after alloca. */ -# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ - -# endif /* not REGEX_MALLOC */ - -/* Define how to allocate the failure stack. */ - -# if defined REL_ALLOC && defined REGEX_MALLOC - -# define REGEX_ALLOCATE_STACK(size) \ - r_alloc (&failure_stack_ptr, (size)) -# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ - r_re_alloc (&failure_stack_ptr, (nsize)) -# define REGEX_FREE_STACK(ptr) \ - r_alloc_free (&failure_stack_ptr) - -# else /* not using relocating allocator */ - -# ifdef REGEX_MALLOC - -# define REGEX_ALLOCATE_STACK malloc -# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) -# define REGEX_FREE_STACK free - -# else /* not REGEX_MALLOC */ - -# define REGEX_ALLOCATE_STACK alloca - -# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ - REGEX_REALLOCATE (source, osize, nsize) -/* No need to explicitly free anything. */ -# define REGEX_FREE_STACK(arg) - -# endif /* not REGEX_MALLOC */ -# endif /* not using relocating allocator */ - - -/* True if `size1' is non-NULL and PTR is pointing anywhere inside - `string1' or just past its end. This works if PTR is NULL, which is - a good thing. */ -# define FIRST_STRING_P(ptr) \ - (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) - -/* (Re)Allocate N items of type T using malloc, or fail. */ -# define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) -# define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) -# define RETALLOC_IF(addr, n, t) \ - if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) -# define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) - -# define BYTEWIDTH 8 /* In bits. */ - -# define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) - -# undef MAX -# undef MIN -# define MAX(a, b) ((a) > (b) ? (a) : (b)) -# define MIN(a, b) ((a) < (b) ? (a) : (b)) - -typedef char boolean; -# define false 0 -# define true 1 - -static reg_errcode_t byte_regex_compile (const char *pattern, size_t size, - reg_syntax_t syntax, - struct re_pattern_buffer *bufp); - -static int byte_re_match_2_internal (struct re_pattern_buffer *bufp, - const char *string1, int size1, - const char *string2, int size2, - int pos, - struct re_registers *regs, - int stop); -static int byte_re_search_2 (struct re_pattern_buffer *bufp, - const char *string1, int size1, - const char *string2, int size2, - int startpos, int range, - struct re_registers *regs, int stop); -static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp); - -#ifdef MBS_SUPPORT -static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size, - reg_syntax_t syntax, - struct re_pattern_buffer *bufp); - - -static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp, - const char *cstring1, int csize1, - const char *cstring2, int csize2, - int pos, - struct re_registers *regs, - int stop, - wchar_t *string1, int size1, - wchar_t *string2, int size2, - int *mbs_offset1, int *mbs_offset2); -static int wcs_re_search_2 (struct re_pattern_buffer *bufp, - const char *string1, int size1, - const char *string2, int size2, - int startpos, int range, - struct re_registers *regs, int stop); -static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp); -#endif - -/* These are the command codes that appear in compiled regular - expressions. Some opcodes are followed by argument bytes. A - command code can specify any interpretation whatsoever for its - arguments. Zero bytes may appear in the compiled regular expression. */ - -typedef enum -{ - no_op = 0, - - /* Succeed right away--no more backtracking. */ - succeed, - - /* Followed by one byte giving n, then by n literal bytes. */ - exactn, - -# ifdef MBS_SUPPORT - /* Same as exactn, but contains binary data. */ - exactn_bin, -# endif - - /* Matches any (more or less) character. */ - anychar, - - /* Matches any one char belonging to specified set. First - following byte is number of bitmap bytes. Then come bytes - for a bitmap saying which chars are in. Bits in each byte - are ordered low-bit-first. A character is in the set if its - bit is 1. A character too large to have a bit in the map is - automatically not in the set. */ - /* ifdef MBS_SUPPORT, following element is length of character - classes, length of collating symbols, length of equivalence - classes, length of character ranges, and length of characters. - Next, character class element, collating symbols elements, - equivalence class elements, range elements, and character - elements follow. - See regex_compile function. */ - charset, - - /* Same parameters as charset, but match any character that is - not one of those specified. */ - charset_not, - - /* Start remembering the text that is matched, for storing in a - register. Followed by one byte with the register number, in - the range 0 to one less than the pattern buffer's re_nsub - field. Then followed by one byte with the number of groups - inner to this one. (This last has to be part of the - start_memory only because we need it in the on_failure_jump - of re_match_2.) */ - start_memory, - - /* Stop remembering the text that is matched and store it in a - memory register. Followed by one byte with the register - number, in the range 0 to one less than `re_nsub' in the - pattern buffer, and one byte with the number of inner groups, - just like `start_memory'. (We need the number of inner - groups here because we don't have any easy way of finding the - corresponding start_memory when we're at a stop_memory.) */ - stop_memory, - - /* Match a duplicate of something remembered. Followed by one - byte containing the register number. */ - duplicate, - - /* Fail unless at beginning of line. */ - begline, - - /* Fail unless at end of line. */ - endline, - - /* Succeeds if at beginning of buffer (if emacs) or at beginning - of string to be matched (if not). */ - begbuf, - - /* Analogously, for end of buffer/string. */ - endbuf, - - /* Followed by two byte relative address to which to jump. */ - jump, - - /* Same as jump, but marks the end of an alternative. */ - jump_past_alt, - - /* Followed by two-byte relative address of place to resume at - in case of failure. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - on_failure_jump, - - /* Like on_failure_jump, but pushes a placeholder instead of the - current string position when executed. */ - on_failure_keep_string_jump, - - /* Throw away latest failure point and then jump to following - two-byte relative address. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - pop_failure_jump, - - /* Change to pop_failure_jump if know won't have to backtrack to - match; otherwise change to jump. This is used to jump - back to the beginning of a repeat. If what follows this jump - clearly won't match what the repeat does, such that we can be - sure that there is no use backtracking out of repetitions - already matched, then we change it to a pop_failure_jump. - Followed by two-byte address. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - maybe_pop_jump, - - /* Jump to following two-byte address, and push a dummy failure - point. This failure point will be thrown away if an attempt - is made to use it for a failure. A `+' construct makes this - before the first repeat. Also used as an intermediary kind - of jump when compiling an alternative. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - dummy_failure_jump, - - /* Push a dummy failure point and continue. Used at the end of - alternatives. */ - push_dummy_failure, - - /* Followed by two-byte relative address and two-byte number n. - After matching N times, jump to the address upon failure. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - succeed_n, - - /* Followed by two-byte relative address, and two-byte number n. - Jump to the address N times, then fail. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - jump_n, - - /* Set the following two-byte relative address to the - subsequent two-byte number. The address *includes* the two - bytes of number. */ - /* ifdef MBS_SUPPORT, the size of address is 1. */ - set_number_at, - - wordchar, /* Matches any word-constituent character. */ - notwordchar, /* Matches any char that is not a word-constituent. */ - - wordbeg, /* Succeeds if at word beginning. */ - wordend, /* Succeeds if at word end. */ - - wordbound, /* Succeeds if at a word boundary. */ - notwordbound /* Succeeds if not at a word boundary. */ - -# ifdef emacs - ,before_dot, /* Succeeds if before point. */ - at_dot, /* Succeeds if at point. */ - after_dot, /* Succeeds if after point. */ - - /* Matches any character whose syntax is specified. Followed by - a byte which contains a syntax code, e.g., Sword. */ - syntaxspec, - - /* Matches any character whose syntax is not that specified. */ - notsyntaxspec -# endif /* emacs */ -} re_opcode_t; -#endif /* not INSIDE_RECURSION */ - - -#ifdef BYTE -# define CHAR_T char -# define UCHAR_T unsigned char -# define COMPILED_BUFFER_VAR bufp->buffer -# define OFFSET_ADDRESS_SIZE 2 -# define PREFIX(name) byte_##name -# define ARG_PREFIX(name) name -# define PUT_CHAR(c) putchar (c) -#else -# ifdef WCHAR -# define CHAR_T wchar_t -# define UCHAR_T wchar_t -# define COMPILED_BUFFER_VAR wc_buffer -# define OFFSET_ADDRESS_SIZE 1 /* the size which STORE_NUMBER macro use */ -# define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1) -# define PREFIX(name) wcs_##name -# define ARG_PREFIX(name) c##name -/* Should we use wide stream?? */ -# define PUT_CHAR(c) printf ("%C", c); -# define TRUE 1 -# define FALSE 0 -# else -# ifdef MBS_SUPPORT -# define WCHAR -# define INSIDE_RECURSION -# include "regex_old.c" -# undef INSIDE_RECURSION -# endif -# define BYTE -# define INSIDE_RECURSION -# include "regex_old.c" -# undef INSIDE_RECURSION -# endif -#endif - -#ifdef INSIDE_RECURSION -/* Common operations on the compiled pattern. */ - -/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ -/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ - -# ifdef WCHAR -# define STORE_NUMBER(destination, number) \ - do { \ - *(destination) = (UCHAR_T)(number); \ - } while (0) -# else /* BYTE */ -# define STORE_NUMBER(destination, number) \ - do { \ - (destination)[0] = (number) & 0377; \ - (destination)[1] = (number) >> 8; \ - } while (0) -# endif /* WCHAR */ - -/* Same as STORE_NUMBER, except increment DESTINATION to - the byte after where the number is stored. Therefore, DESTINATION - must be an lvalue. */ -/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ - -# define STORE_NUMBER_AND_INCR(destination, number) \ - do { \ - STORE_NUMBER (destination, number); \ - (destination) += OFFSET_ADDRESS_SIZE; \ - } while (0) - -/* Put into DESTINATION a number stored in two contiguous bytes starting - at SOURCE. */ -/* ifdef MBS_SUPPORT, we store NUMBER in 1 element. */ - -# ifdef WCHAR -# define EXTRACT_NUMBER(destination, source) \ - do { \ - (destination) = *(source); \ - } while (0) -# else /* BYTE */ -# define EXTRACT_NUMBER(destination, source) \ - do { \ - (destination) = *(source) & 0377; \ - (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ - } while (0) -# endif - -# ifdef DEBUG -static void PREFIX(extract_number) (int *dest, UCHAR_T *source) -{ -# ifdef WCHAR - *dest = *source; -# else /* BYTE */ - int temp = SIGN_EXTEND_CHAR (*(source + 1)); - *dest = *source & 0377; - *dest += temp << 8; -# endif -} - -# ifndef EXTRACT_MACROS /* To debug the macros. */ -# undef EXTRACT_NUMBER -# define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src) -# endif /* not EXTRACT_MACROS */ - -# endif /* DEBUG */ - -/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. - SOURCE must be an lvalue. */ - -# define EXTRACT_NUMBER_AND_INCR(destination, source) \ - do { \ - EXTRACT_NUMBER (destination, source); \ - (source) += OFFSET_ADDRESS_SIZE; \ - } while (0) - -# ifdef DEBUG -static void PREFIX(extract_number_and_incr) (int *destination, - UCHAR_T **source) -{ - PREFIX(extract_number) (destination, *source); - *source += OFFSET_ADDRESS_SIZE; -} - -# ifndef EXTRACT_MACROS -# undef EXTRACT_NUMBER_AND_INCR -# define EXTRACT_NUMBER_AND_INCR(dest, src) \ - PREFIX(extract_number_and_incr) (&dest, &src) -# endif /* not EXTRACT_MACROS */ - -# endif /* DEBUG */ - - - -/* If DEBUG is defined, Regex prints many voluminous messages about what - it is doing (if the variable `debug' is nonzero). If linked with the - main program in `iregex.c', you can enter patterns and strings - interactively. And if linked with the main program in `main.c' and - the other test files, you can run the already-written tests. */ - -# ifdef DEBUG - -# ifndef DEFINED_ONCE - -/* We use standard I/O for debugging. */ -# include <stdio.h> - -/* It is useful to test things that ``must'' be true when debugging. */ -# include <assert.h> - -static smallint debug; - -# define DEBUG_STATEMENT(e) e -# define DEBUG_PRINT1(x) if (debug) printf (x) -# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) -# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) -# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) -# endif /* not DEFINED_ONCE */ - -# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ - if (debug) PREFIX(print_partial_compiled_pattern) (s, e) -# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ - if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2) - - -/* Print the fastmap in human-readable form. */ - -# ifndef DEFINED_ONCE -static void -print_fastmap (char *fastmap) -{ - unsigned was_a_range = 0; - unsigned i = 0; - - while (i < (1 << BYTEWIDTH)) - { - if (fastmap[i++]) - { - was_a_range = 0; - putchar (i - 1); - while (i < (1 << BYTEWIDTH) && fastmap[i]) - { - was_a_range = 1; - i++; - } - if (was_a_range) - { - printf ("-"); - putchar (i - 1); - } - } - } - putchar ('\n'); -} -# endif /* not DEFINED_ONCE */ - - -/* Print a compiled pattern string in human-readable form, starting at - the START pointer into it and ending just before the pointer END. */ - -static void -PREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end) -{ - int mcnt, mcnt2; - UCHAR_T *p1; - UCHAR_T *p = start; - UCHAR_T *pend = end; - - if (start == NULL) - { - printf ("(null)\n"); - return; - } - - /* Loop over pattern commands. */ - while (p < pend) - { -# ifdef _LIBC - printf ("%td:\t", p - start); -# else - printf ("%ld:\t", (long int) (p - start)); -# endif - - switch ((re_opcode_t) *p++) - { - case no_op: - printf ("/no_op"); - break; - - case exactn: - mcnt = *p++; - printf ("/exactn/%d", mcnt); - do - { - putchar ('/'); - PUT_CHAR (*p++); - } - while (--mcnt); - break; - -# ifdef MBS_SUPPORT - case exactn_bin: - mcnt = *p++; - printf ("/exactn_bin/%d", mcnt); - do - { - printf("/%lx", (long int) *p++); - } - while (--mcnt); - break; -# endif /* MBS_SUPPORT */ - - case start_memory: - mcnt = *p++; - printf ("/start_memory/%d/%ld", mcnt, (long int) *p++); - break; - - case stop_memory: - mcnt = *p++; - printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++); - break; - - case duplicate: - printf ("/duplicate/%ld", (long int) *p++); - break; - - case anychar: - printf ("/anychar"); - break; - - case charset: - case charset_not: - { -# ifdef WCHAR - int i, length; - wchar_t *workp = p; - printf ("/charset [%s", - (re_opcode_t) *(workp - 1) == charset_not ? "^" : ""); - p += 5; - length = *workp++; /* the length of char_classes */ - for (i=0 ; i<length ; i++) - printf("[:%lx:]", (long int) *p++); - length = *workp++; /* the length of collating_symbol */ - for (i=0 ; i<length ;) - { - printf("[."); - while(*p != 0) - PUT_CHAR((i++,*p++)); - i++,p++; - printf(".]"); - } - length = *workp++; /* the length of equivalence_class */ - for (i=0 ; i<length ;) - { - printf("[="); - while(*p != 0) - PUT_CHAR((i++,*p++)); - i++,p++; - printf("=]"); - } - length = *workp++; /* the length of char_range */ - for (i=0 ; i<length ; i++) - { - wchar_t range_start = *p++; - wchar_t range_end = *p++; - printf("%C-%C", range_start, range_end); - } - length = *workp++; /* the length of char */ - for (i=0 ; i<length ; i++) - printf("%C", *p++); - putchar (']'); -# else - register int c, last = -100; - register int in_range = 0; - - printf ("/charset [%s", - (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); - - assert (p + *p < pend); - - for (c = 0; c < 256; c++) - if (c / 8 < *p - && (p[1 + (c/8)] & (1 << (c % 8)))) - { - /* Are we starting a range? */ - if (last + 1 == c && ! in_range) - { - putchar ('-'); - in_range = 1; - } - /* Have we broken a range? */ - else if (last + 1 != c && in_range) - { - putchar (last); - in_range = 0; - } - - if (! in_range) - putchar (c); - - last = c; - } - - if (in_range) - putchar (last); - - putchar (']'); - - p += 1 + *p; -# endif /* WCHAR */ - } - break; - - case begline: - printf ("/begline"); - break; - - case endline: - printf ("/endline"); - break; - - case on_failure_jump: - PREFIX(extract_number_and_incr) (&mcnt, &p); -# ifdef _LIBC - printf ("/on_failure_jump to %td", p + mcnt - start); -# else - printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start)); -# endif - break; - - case on_failure_keep_string_jump: - PREFIX(extract_number_and_incr) (&mcnt, &p); -# ifdef _LIBC - printf ("/on_failure_keep_string_jump to %td", p + mcnt - start); -# else - printf ("/on_failure_keep_string_jump to %ld", - (long int) (p + mcnt - start)); -# endif - break; - - case dummy_failure_jump: - PREFIX(extract_number_and_incr) (&mcnt, &p); -# ifdef _LIBC - printf ("/dummy_failure_jump to %td", p + mcnt - start); -# else - printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start)); -# endif - break; - - case push_dummy_failure: - printf ("/push_dummy_failure"); - break; - - case maybe_pop_jump: - PREFIX(extract_number_and_incr) (&mcnt, &p); -# ifdef _LIBC - printf ("/maybe_pop_jump to %td", p + mcnt - start); -# else - printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start)); -# endif - break; - - case pop_failure_jump: - PREFIX(extract_number_and_incr) (&mcnt, &p); -# ifdef _LIBC - printf ("/pop_failure_jump to %td", p + mcnt - start); -# else - printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start)); -# endif - break; - - case jump_past_alt: - PREFIX(extract_number_and_incr) (&mcnt, &p); -# ifdef _LIBC - printf ("/jump_past_alt to %td", p + mcnt - start); -# else - printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start)); -# endif - break; - - case jump: - PREFIX(extract_number_and_incr) (&mcnt, &p); -# ifdef _LIBC - printf ("/jump to %td", p + mcnt - start); -# else - printf ("/jump to %ld", (long int) (p + mcnt - start)); -# endif - break; - - case succeed_n: - PREFIX(extract_number_and_incr) (&mcnt, &p); - p1 = p + mcnt; - PREFIX(extract_number_and_incr) (&mcnt2, &p); -# ifdef _LIBC - printf ("/succeed_n to %td, %d times", p1 - start, mcnt2); -# else - printf ("/succeed_n to %ld, %d times", - (long int) (p1 - start), mcnt2); -# endif - break; - - case jump_n: - PREFIX(extract_number_and_incr) (&mcnt, &p); - p1 = p + mcnt; - PREFIX(extract_number_and_incr) (&mcnt2, &p); - printf ("/jump_n to %d, %d times", p1 - start, mcnt2); - break; - - case set_number_at: - PREFIX(extract_number_and_incr) (&mcnt, &p); - p1 = p + mcnt; - PREFIX(extract_number_and_incr) (&mcnt2, &p); -# ifdef _LIBC - printf ("/set_number_at location %td to %d", p1 - start, mcnt2); -# else - printf ("/set_number_at location %ld to %d", - (long int) (p1 - start), mcnt2); -# endif - break; - - case wordbound: - printf ("/wordbound"); - break; - - case notwordbound: - printf ("/notwordbound"); - break; - - case wordbeg: - printf ("/wordbeg"); - break; - - case wordend: - printf ("/wordend"); - break; - -# ifdef emacs - case before_dot: - printf ("/before_dot"); - break; - - case at_dot: - printf ("/at_dot"); - break; - - case after_dot: - printf ("/after_dot"); - break; - - case syntaxspec: - printf ("/syntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; - - case notsyntaxspec: - printf ("/notsyntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; -# endif /* emacs */ - - case wordchar: - printf ("/wordchar"); - break; - - case notwordchar: - printf ("/notwordchar"); - break; - - case begbuf: - printf ("/begbuf"); - break; - - case endbuf: - printf ("/endbuf"); - break; - - default: - printf ("?%ld", (long int) *(p-1)); - } - - putchar ('\n'); - } - |