path: root/libc/misc
diff options
authorDavid McCullough <>2001-06-07 12:08:54 +0000
committerDavid McCullough <>2001-06-07 12:08:54 +0000
commitfde510315baf84e33f689f12c5d21297b5321470 (patch)
tree05c7e76f4913568f7d4e53b849c5204cfa98904e /libc/misc
parente9499a8a60154078b834828fe18437dadf0cdf74 (diff)
The m68k-elf compiler chokes on this code when compiling for PIC as
compile_regex is one big function (relative function calls further than cpu32 can do). The solution was to re-order the code a little to reduce the size of these relative calls. So the total sum of the changes is: * Move compile_regex to the end of the file * make store_op1 an inline Unfortunately CVS diff doesn't show this and makes it look like the whole file has been severely hacked. It hasn't.
Diffstat (limited to 'libc/misc')
1 files changed, 1128 insertions, 1129 deletions
diff --git a/libc/misc/regex/regex.c b/libc/misc/regex/regex.c
index d14595dfd..350535fa1 100644
--- a/libc/misc/regex/regex.c
+++ b/libc/misc/regex/regex.c
@@ -1832,1139 +1832,11 @@ int num_regs;
#endif /* not MATCH_MAY_ALLOCATE */
-static boolean group_in_compile_stack _RE_ARGS((compile_stack_type
- compile_stack,
- regnum_t regnum));
-/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
- Returns one of error codes defined in `regex.h', or zero for success.
- Assumes the `allocated' (and perhaps `buffer') and `translate'
- fields are set in BUFP on entry.
- If it succeeds, results are put in BUFP (if it returns an error, the
- contents of BUFP are undefined):
- `buffer' is the compiled pattern;
- `syntax' is set to SYNTAX;
- `used' is set to the length of the compiled pattern;
- `fastmap_accurate' is zero;
- `re_nsub' is the number of subexpressions in PATTERN;
- `not_bol' and `not_eol' are zero;
- The `fastmap' and `newline_anchor' fields are neither
- examined nor set. */
-/* Return, freeing storage we allocated. */
-#define FREE_STACK_RETURN(value) \
- return (free (compile_stack.stack), value)
-static reg_errcode_t regex_compile(pattern, size, syntax, bufp)
-const char *pattern;
-size_t size;
-reg_syntax_t syntax;
-struct re_pattern_buffer *bufp;
- /* We fetch characters from PATTERN here. Even though PATTERN is
- `char *' (i.e., signed), we declare these variables as unsigned, so
- they can be reliably used as array indices. */
- register unsigned char c, c1;
- /* A random temporary spot in PATTERN. */
- const char *p1;
- /* Points to the end of the buffer, where we should append. */
- register unsigned char *b;
- /* Keeps track of unclosed groups. */
- compile_stack_type compile_stack;
- /* Points to the current (ending) position in the pattern. */
- const char *p = pattern;
- const char *pend = pattern + size;
- /* How to translate the characters in the pattern. */
- RE_TRANSLATE_TYPE translate = bufp->translate;
- /* Address of the count-byte of the most recently inserted `exactn'
- command. This makes it possible to tell if a new exact-match
- character can be added to that command or if the character requires
- a new `exactn' command. */
- unsigned char *pending_exact = 0;
- /* Address of start of the most recently finished expression.
- This tells, e.g., postfix * where to find the start of its
- operand. Reset at the beginning of groups and alternatives. */
- unsigned char *laststart = 0;
- /* Address of beginning of regexp, or inside of last group. */
- unsigned char *begalt;
- /* Place in the uncompiled pattern (i.e., the {) to
- which to go back if the interval is invalid. */
- const char *beg_interval;
- /* Address of the place where a forward jump should go to the end of
- the containing expression. Each alternative of an `or' -- except the
- last -- ends with a forward jump of this sort. */
- unsigned char *fixup_alt_jump = 0;
- /* Counts open-groups as they are encountered. Remembered for the
- matching close-group on the compile stack, so the same register
- number is put in the stop_memory as the start_memory. */
- regnum_t regnum = 0;
-#ifdef DEBUG
- DEBUG_PRINT1("\nCompiling pattern: ");
- if (debug) {
- unsigned debug_count;
- for (debug_count = 0; debug_count < size; debug_count++)
- putchar(pattern[debug_count]);
- putchar('\n');
- }
-#endif /* DEBUG */
- /* Initialize the compile stack. */
- compile_stack.stack =
- TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
- if (compile_stack.stack == NULL)
- return REG_ESPACE;
- compile_stack.size = INIT_COMPILE_STACK_SIZE;
- compile_stack.avail = 0;
- /* Initialize the pattern buffer. */
- bufp->syntax = syntax;
- bufp->fastmap_accurate = 0;
- bufp->not_bol = bufp->not_eol = 0;
- /* Set `used' to zero, so that if we return an error, the pattern
- printer (for debugging) will think there's no pattern. We reset it
- at the end. */
- bufp->used = 0;
- /* Always count groups, whether or not bufp->no_sub is set. */
- bufp->re_nsub = 0;
-#if !defined emacs && !defined SYNTAX_TABLE
- /* Initialize the syntax table. */
- init_syntax_once();
- if (bufp->allocated == 0) {
- if (bufp->buffer) { /* If zero allocated, but buffer is non-null, try to realloc
- enough space. This loses if buffer's address is bogus, but
- that is the user's responsibility. */
- RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char);
- } else { /* Caller did not allocate a buffer. Do it for them. */
- bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char);
- }
- if (!bufp->buffer)
- bufp->allocated = INIT_BUF_SIZE;
- }
- begalt = b = bufp->buffer;
- /* Loop through the uncompiled pattern until we're at the end. */
- while (p != pend) {
- switch (c) {
- case '^':
- {
- if ( /* If at start of pattern, it's an operator. */
- p == pattern + 1
- /* If context independent, it's an operator. */
- /* Otherwise, depends on what's come before. */
- || at_begline_loc_p(pattern, p, syntax))
- BUF_PUSH(begline);
- else
- goto normal_char;
- }
- break;
- case '$':
- {
- if ( /* If at end of pattern, it's an operator. */
- p == pend
- /* If context independent, it's an operator. */
- /* Otherwise, depends on what's next. */
- || at_endline_loc_p(p, pend, syntax))
- BUF_PUSH(endline);
- else
- goto normal_char;
- }
- break;
- case '+':
- case '?':
- if ((syntax & RE_BK_PLUS_QM)
- || (syntax & RE_LIMITED_OPS))
- goto normal_char;
- handle_plus:
- case '*':
- /* If there is no previous pattern... */
- if (!laststart) {
- if (syntax & RE_CONTEXT_INVALID_OPS)
- else if (!(syntax & RE_CONTEXT_INDEP_OPS))
- goto normal_char;
- }
- {
- /* Are we optimizing this jump? */
- boolean keep_string_p = false;
- /* 1 means zero (many) matches is allowed. */
- char zero_times_ok = 0, many_times_ok = 0;
- /* If there is a sequence of repetition chars, collapse it
- down to just one (the right one). We can't combine
- interval operators with these because of, e.g., `a{2}*',
- which should only match an even number of `a's. */
- for (;;) {
- zero_times_ok |= c != '+';
- many_times_ok |= c != '?';
- if (p == pend)
- break;
- if (c == '*'
- || (!(syntax & RE_BK_PLUS_QM)
- && (c == '+' || c == '?')));
- else if (syntax & RE_BK_PLUS_QM && c == '\\') {
- if (p == pend)
- if (!(c1 == '+' || c1 == '?')) {
- break;
- }
- c = c1;
- } else {
- break;
- }
- /* If we get here, we found another repeat character. */
- }
- /* Star, etc. applied to an empty pattern is equivalent
- to an empty pattern. */
- if (!laststart)
- break;
- /* Now we know whether or not zero matches is allowed
- and also whether or not two or more matches is allowed. */
- if (many_times_ok) { /* More than one repetition is allowed, so put in at the
- end a backward relative jump from `b' to before the next
- jump we're going to put in below (which jumps from
- laststart to after this jump).
- But if we are at the `*' in the exact sequence `.*\n',
- insert an unconditional jump backwards to the .,
- instead of the beginning of the loop. This way we only
- push a failure point once, instead of every time
- through the loop. */
- assert(p - 1 > pattern);
- /* Allocate the space for the jump. */
- /* We know we are not at the first character of the pattern,
- because laststart was nonzero. And we've already
- incremented `p', by the way, to be the character after
- the `*'. Do we have to do something analogous here
- for null bytes, because of RE_DOT_NOT_NULL? */
- if (TRANSLATE(*(p - 2)) == TRANSLATE('.')
- && zero_times_ok
- && p < pend && TRANSLATE(*p) == TRANSLATE('\n')
- && !(syntax & RE_DOT_NEWLINE)) { /* We have .*\n. */
- STORE_JUMP(jump, b, laststart);
- keep_string_p = true;
- } else
- /* Anything else. */
- STORE_JUMP(maybe_pop_jump, b, laststart - 3);
- /* We've added more stuff to the buffer. */
- b += 3;
- }
- /* On failure, jump from laststart to b + 3, which will be the
- end of the buffer after this jump is inserted. */
- INSERT_JUMP(keep_string_p ? on_failure_keep_string_jump
- : on_failure_jump, laststart, b + 3);
- pending_exact = 0;
- b += 3;
- if (!zero_times_ok) {
- /* At least one repetition is required, so insert a
- `dummy_failure_jump' before the initial
- `on_failure_jump' instruction of the loop. This
- effects a skip over that instruction the first time
- we hit that loop. */
- INSERT_JUMP(dummy_failure_jump, laststart,
- laststart + 6);
- b += 3;
- }
- }
- break;
- case '.':
- laststart = b;
- BUF_PUSH(anychar);
- break;
- case '[':
- {
- boolean had_char_class = false;
- if (p == pend)
- /* Ensure that we have enough space to push a charset: the
- opcode, the length count, and the bitset; 34 bytes in all. */
- laststart = b;
- /* We test `*p == '^' twice, instead of using an if
- statement, so we only need one BUF_PUSH. */
- BUF_PUSH(*p == '^' ? charset_not : charset);
- if (*p == '^')
- p++;
- /* Remember the first position in the bracket expression. */
- p1 = p;
- /* Push the number of bytes in the bitmap. */
- /* Clear the whole map. */
- bzero(b, (1 << BYTEWIDTH) / BYTEWIDTH);
- /* charset_not matches newline according to a syntax bit. */
- if ((re_opcode_t) b[-2] == charset_not
- && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) SET_LIST_BIT('\n');
- /* Read in characters and ranges, setting map bits. */
- for (;;) {
- if (p == pend)
- /* \ might escape characters inside [...] and [^...]. */
- if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') {
- if (p == pend)
- continue;
- }
- /* Could be the end of the bracket expression. If it's
- not (i.e., when the bracket expression is `[]' so
- far), the ']' character bit gets set way below. */
- if (c == ']' && p != p1 + 1)
- break;
- /* Look ahead to see if it's a range when the last thing
- was a character class. */
- if (had_char_class && c == '-' && *p != ']')
- /* Look ahead to see if it's a range when the last thing
- was a character: if this is a hyphen not at the
- beginning or the end of a list, then it's the range
- operator. */
- if (c == '-' && !(p - 2 >= pattern && p[-2] == '[')
- && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
- && *p != ']') {
- reg_errcode_t ret
- = compile_range(&p, pend, translate, syntax, b);
- if (ret != REG_NOERROR)
- }
- else if (p[0] == '-' && p[1] != ']') { /* This handles ranges made up of characters only. */
- reg_errcode_t ret;
- /* Move past the `-'. */
- ret = compile_range(&p, pend, translate, syntax, b);
- if (ret != REG_NOERROR)
- }
- /* See if we're at the beginning of a possible character
- class. */
- else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') { /* Leave room for the null. */
- char str[CHAR_CLASS_MAX_LENGTH + 1];
- c1 = 0;
- /* If pattern is `[[:'. */
- if (p == pend)
- for (;;) {
- if ((c == ':' && *p == ']') || p == pend)
- break;
- str[c1++] = c;
- else
- /* This is in any case an invalid class name. */
- str[0] = '\0';
- }
- str[c1] = '\0';
- /* If isn't a word bracketed by `[:' and `:]':
- undo the ending character, the letters, and leave
- the leading `:' and `[' (but set bits for them). */
- if (c == ':' && *p == ']') {
-#if defined _LIBC || WIDE_CHAR_SUPPORT
- boolean is_lower = STREQ(str, "lower");
- boolean is_upper = STREQ(str, "upper");
- wctype_t wt;
- int ch;
- wt = IS_CHAR_CLASS(str);
- if (wt == 0)
- /* Throw away the ] at the end of the character
- class. */
- if (p == pend)
- for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) {
-# ifdef _LIBC
- if (__iswctype(__btowc(ch), wt))
-# else
- if (iswctype(btowc(ch), wt))
-# endif
- if (translate && (is_upper || is_lower)
- && (ISUPPER(ch) || ISLOWER(ch)))
- }
- had_char_class = true;
- int ch;
- boolean is_alnum = STREQ(str, "alnum");
- boolean is_alpha = STREQ(str, "alpha");
- boolean is_blank = STREQ(str, "blank");
- boolean is_cntrl = STREQ(str, "cntrl");
- boolean is_digit = STREQ(str, "digit");
- boolean is_graph = STREQ(str, "graph");
- boolean is_lower = STREQ(str, "lower");
- boolean is_print = STREQ(str, "print");
- boolean is_punct = STREQ(str, "punct");
- boolean is_space = STREQ(str, "space");
- boolean is_upper = STREQ(str, "upper");
- boolean is_xdigit = STREQ(str, "xdigit");
- if (!IS_CHAR_CLASS(str))
- /* Throw away the ] at the end of the character
- class. */
- if (p == pend)
- for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {
- /* This was split into 3 if's to
- avoid an arbitrary limit in some compiler. */
- if ((is_alnum && ISALNUM(ch))
- || (is_alpha && ISALPHA(ch))
- || (is_blank && ISBLANK(ch))
- || (is_cntrl && ISCNTRL(ch)))
- if ((is_digit && ISDIGIT(ch))
- || (is_graph && ISGRAPH(ch))
- || (is_lower && ISLOWER(ch))
- || (is_print && ISPRINT(ch)))
- if ((is_punct && ISPUNCT(ch))
- || (is_space && ISSPACE(ch))
- || (is_upper && ISUPPER(ch))
- || (is_xdigit && ISXDIGIT(ch)))
- if (translate && (is_upper || is_lower)
- && (ISUPPER(ch) || ISLOWER(ch)))
- }
- had_char_class = true;
-#endif /* libc || wctype.h */
- } else {
- c1++;
- while (c1--)
- SET_LIST_BIT('[');
- SET_LIST_BIT(':');
- had_char_class = false;
- }
- } else {
- had_char_class = false;
- }
- }
- /* Discard any (non)matching list bytes that are all 0 at the
- end of the map. Decrease the map-length byte too. */
- while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
- b[-1]--;
- b += b[-1];
- }
- break;
- case '(':
- if (syntax & RE_NO_BK_PARENS)
- goto handle_open;
- else
- goto normal_char;
- case ')':
- if (syntax & RE_NO_BK_PARENS)
- goto handle_close;
- else
- goto normal_char;
- case '\n':
- if (syntax & RE_NEWLINE_ALT)
- goto handle_alt;
- else
- goto normal_char;
- case '|':
- if (syntax & RE_NO_BK_VBAR)
- goto handle_alt;
- else
- goto normal_char;
- case '{':
- if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
- goto handle_interval;
- else
- goto normal_char;
- case '\\':
- if (p == pend)
- /* Do not translate the character after the \, so that we can
- distinguish, e.g., \B from \b, even if we normally would
- translate, e.g., B to b. */
- switch (c) {
- case '(':
- if (syntax & RE_NO_BK_PARENS)
- goto normal_backslash;
- handle_open:
- bufp->re_nsub++;
- regnum++;
- RETALLOC(compile_stack.stack, compile_stack.size << 1,
- compile_stack_elt_t);
- if (compile_stack.stack == NULL)
- return REG_ESPACE;
- compile_stack.size <<= 1;
- }
- /* These are the values to restore when we hit end of this
- group. They are all relative offsets, so that if the
- whole pattern moves because of realloc, they will still
- be valid. */
- COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
- COMPILE_STACK_TOP.fixup_alt_jump
- =
- fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
- COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
- COMPILE_STACK_TOP.regnum = regnum;
- /* We will eventually replace the 0 with the number of
- groups inner to this one. But do not push a
- start_memory for groups beyond the last one we can
- represent in the compiled pattern. */
- if (regnum <= MAX_REGNUM) {
- COMPILE_STACK_TOP.inner_group_offset =
- b - bufp->buffer + 2;
- BUF_PUSH_3(start_memory, regnum, 0);
- }
- compile_stack.avail++;
- fixup_alt_jump = 0;
- laststart = 0;
- begalt = b;
- /* If we've reached MAX_REGNUM groups, then this open
- won't actually generate any code, so we'll have to
- clear pending_exact explicitly. */
- pending_exact = 0;
- break;
- case ')':
- if (syntax & RE_NO_BK_PARENS)
- goto normal_backslash;
- goto normal_backslash;
- else
- }
- handle_close:
- if (fixup_alt_jump) { /* Push a dummy failure point at the end of the
- alternative for a possible future
- `pop_failure_jump' to pop. See comments at
- `push_dummy_failure' in `re_match_2'. */
- BUF_PUSH(push_dummy_failure);
- /* We allocated space for this jump when we assigned
- to `fixup_alt_jump', in the `handle_alt' case below. */
- STORE_JUMP(jump_past_alt, fixup_alt_jump, b - 1);
- }
- /* See similar code for backslashed left paren above. */
- goto normal_char;
- else
- }
- /* Since we just checked for an empty stack above, this
- ``can't happen''. */
- assert(compile_stack.avail != 0);
- {
- /* We don't just want to restore into `regnum', because
- later groups should continue to be numbered higher,
- as in `(ab)c(de)' -- the second group is #2. */
- regnum_t this_group_regnum;
- compile_stack.avail--;
- begalt =
- bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
- fixup_alt_jump =
- COMPILE_STACK_TOP.fixup_alt_jump ? bufp->buffer +
- COMPILE_STACK_TOP.fixup_alt_jump - 1 : 0;
- laststart =
- bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
- this_group_regnum = COMPILE_STACK_TOP.regnum;
- /* If we've reached MAX_REGNUM groups, then this open
- won't actually generate any code, so we'll have to
- clear pending_exact explicitly. */
- pending_exact = 0;
- /* We're at the end of the group, so now we know how many
- groups were inside this one. */
- if (this_group_regnum <= MAX_REGNUM) {
- unsigned char *inner_group_loc
- =
- bufp->buffer +
- COMPILE_STACK_TOP.inner_group_offset;
- *inner_group_loc = regnum - this_group_regnum;
- BUF_PUSH_3(stop_memory, this_group_regnum,
- regnum - this_group_regnum);
- }
- }
- break;
- case '|': /* `\|'. */
- if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
- goto normal_backslash;
- handle_alt:
- if (syntax & RE_LIMITED_OPS)
- goto normal_char;
- /* Insert before the previous alternative a jump which
- jumps to this alternative if the former fails. */
- INSERT_JUMP(on_failure_jump, begalt, b + 6);
- pending_exact = 0;
- b += 3;
- /* The alternative before this one has a jump after it
- which gets executed if it gets matched. Adjust that
- jump so it will jump to this alternative's analogous
- jump (put in below, which in turn will jump to the next
- (if any) alternative's such jump, etc.). The last such
- jump jumps to the correct final destination. A picture:
- _____ _____
- | | | |
- | v | v
- a | b | c
- If we are at `b', then fixup_alt_jump right now points to a
- three-byte space after `a'. We'll put in the jump, set
- fixup_alt_jump to right after `b', and leave behind three
- bytes which we'll fill in when we get to after `c'. */
- if (fixup_alt_jump)
- STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
- /* Mark and leave space for a jump after this alternative,
- to be filled in later either by next alternative or
- when know we're at the end of a series of alternatives. */
- fixup_alt_jump = b;
- b += 3;
- laststart = 0;
- begalt = b;
- break;
- case '{':
- /* If \{ is a literal. */
- if (!(syntax & RE_INTERVALS)
- /* If we're at `\{' and it's not the open-interval
- operator. */
- || ((syntax & RE_INTERVALS)
- && (syntax & RE_NO_BK_BRACES)) || (p - 2 == pattern
- && p == pend))
- goto normal_backslash;
- handle_interval:
- {
- /* If got here, then the syntax allows intervals. */
- /* At least (most) this many matches must be made. */
- int lower_bound = -1, upper_bound = -1;
- beg_interval = p - 1;
- if (p == pend) {
- if (!(syntax & RE_INTERVALS)
- && (syntax & RE_NO_BK_BRACES)) goto
- unfetch_interval;
- else
- }
- GET_UNSIGNED_NUMBER(lower_bound);
- if (c == ',') {
- GET_UNSIGNED_NUMBER(upper_bound);
- if ((!(syntax & RE_NO_BK_BRACES) && c != '\\')
- || ((syntax & RE_NO_BK_BRACES) && c != '}'))
- if (upper_bound < 0)
- upper_bound = RE_DUP_MAX;
- } else
- /* Interval such as `{1}' => match exactly once. */
- upper_bound = lower_bound;
- if (lower_bound < 0 || upper_bound > RE_DUP_MAX
- || lower_bound > upper_bound) {
- if (!(syntax & RE_INTERVALS)
- && (syntax & RE_NO_BK_BRACES)) goto
- unfetch_interval;
- else
- }
- if (!(syntax & RE_NO_BK_BRACES)) {
- if (c != '\\')
- }
- if (c != '}') {
- if (!(syntax & RE_INTERVALS)
- && (syntax & RE_NO_BK_BRACES)) goto
- unfetch_interval;
- else
- }
- /* We just parsed a valid interval. */
- /* If it's invalid to have no preceding re. */
- if (!laststart) {
- if (syntax & RE_CONTEXT_INVALID_OPS)
- else if (syntax & RE_CONTEXT_INDEP_OPS)
- laststart = b;
- else
- goto unfetch_interval;
- }
- /* If the upper bound is zero, don't want to succeed at
- all; jump from `laststart' to `b + 3', which will be
- the end of the buffer after we insert the jump. */
- if (upper_bound == 0) {
- INSERT_JUMP(jump, laststart, b + 3);
- b += 3;
- }
- /* Otherwise, we have a nontrivial interval. When
- we're all done, the pattern will look like:
- set_number_at <jump count> <upper bound>
- set_number_at <succeed_n count> <lower bound>
- succeed_n <after jump addr> <succeed_n count>
- <body of loop>
- jump_n <succeed_n addr> <jump count>
- (The upper bound and `jump_n' are omitted if
- `upper_bound' is 1, though.) */
- else { /* If the upper bound is > 1, we need to insert
- more at the end of the loop. */
- unsigned nbytes = 10 + (upper_bound > 1) * 10;
- /* Initialize lower bound of the `succeed_n', even
- though it will be set during matching by its
- attendant `set_number_at' (inserted next),
- because `re_compile_fastmap' needs to know.
- Jump to the `jump_n' we might insert below. */
- INSERT_JUMP2(succeed_n, laststart,
- b + 5 + (upper_bound > 1) * 5,
- lower_bound);
- b += 5;
- /* Code to initialize the lower bound. Insert
- before the `succeed_n'. The `5' is the last two
- bytes of this `set_number_at', plus 3 bytes of
- the following `succeed_n'. */
- insert_op2(set_number_at, laststart, 5,
- lower_bound, b);
- b += 5;
- if (upper_bound > 1) { /* More than one repetition is allowed, so
- append a backward jump to the `succeed_n'
- that starts this interval.
- When we've reached this during matching,
- we'll have matched the interval once, so
- jump back only `upper_bound - 1' times. */
- STORE_JUMP2(jump_n, b, laststart + 5,
- upper_bound - 1);
- b += 5;
- /* The location we want to set is the second
- parameter of the `jump_n'; that is `b-2' as
- an absolute address. `laststart' will be
- the `set_number_at' we're about to insert;
- `laststart+3' the number to set, the source
- for the relative address. But we are
- inserting into the middle of the pattern --
- so everything is getting moved up by 5.
- Conclusion: (b - 2) - (laststart + 3) + 5,
- i.e., b - laststart.
- We insert this at the beginning of the loop
- so that if we fail during matching, we'll
- reinitialize the bounds. */
- insert_op2(set_number_at, laststart,
- b - laststart, upper_bound - 1, b);
- b += 5;
- }
- }
- pending_exact = 0;
- beg_interval = NULL;
- }
- break;
- unfetch_interval:
- /* If an invalid interval, match the characters as literals. */
- assert(beg_interval);
- p = beg_interval;
- beg_interval = NULL;
- /* normal_char and normal_backslash need `c'. */
- if (!(syntax & RE_NO_BK_BRACES)) {
- if (p > pattern && p[-1] == '\\')
- goto normal_backslash;
- }
- goto normal_char;
-#ifdef emacs
- /* There is no way to specify the before_dot and after_dot
- operators. rms says this is ok. --karl */
- case '=':
- BUF_PUSH(at_dot);
- break;
- case 's':
- laststart = b;
- BUF_PUSH_2(syntaxspec, syntax_spec_code[c]);
- break;
- case 'S':
- laststart = b;
- BUF_PUSH_2(notsyntaxspec, syntax_spec_code[c]);
- break;
-#endif /* emacs */
- case 'w':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- laststart = b;
- BUF_PUSH(wordchar);
- break;
- case 'W':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- laststart = b;
- BUF_PUSH(notwordchar);
- break;
- case '<':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- BUF_PUSH(wordbeg);
- break;
- case '>':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- BUF_PUSH(wordend);
- break;
- case 'b':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- BUF_PUSH(wordbound);
- break;
- case 'B':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- BUF_PUSH(notwordbound);
- break;
- case '`':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- BUF_PUSH(begbuf);
- break;
- case '\'':
- if (syntax & RE_NO_GNU_OPS)
- goto normal_char;
- BUF_PUSH(endbuf);
- break;
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
- if (syntax & RE_NO_BK_REFS)
- goto normal_char;
- c1 = c - '0';
- if (c1 > regnum)
- /* Can't back reference to a subexpression if inside of it. */
- if (group_in_compile_stack(compile_stack, (regnum_t) c1))
- goto normal_char;
- laststart = b;
- BUF_PUSH_2(duplicate, c1);
- break;
- case '+':
- case '?':
- if (syntax & RE_BK_PLUS_QM)
- goto handle_plus;
- else
- goto normal_backslash;
- default:
- normal_backslash:
- /* You might think it would be useful for \ to mean
- not to translate; but if we don't translate it
- it will never match anything. */
- c = TRANSLATE(c);
- goto normal_char;
- }
- break;
- default:
- /* Expects the character in `c'. */
- normal_char:
- /* If no exactn currently being built. */
- if (!pending_exact
- /* If last exactn not at current position. */
- || pending_exact + *pending_exact + 1 != b
- /* We have only one byte following the exactn for the count. */
- || *pending_exact == (1 << BYTEWIDTH) - 1
- /* If followed by a repetition operator. */
- || *p == '*' || *p == '^' || ((syntax & RE_BK_PLUS_QM)
- ? *p == '\\' && (p[1] == '+'
- || p[1] ==
- '?') : (*p
- ==
- '+'
- ||
- *p
- ==
- '?'))
- || ((syntax & RE_INTERVALS)
- && ((syntax & RE_NO_BK_BRACES)
- ? *p == '{' : (p[0] == '\\' && p[1] == '{')))) {
- /* Start building a new exactn. */
- laststart = b;
- BUF_PUSH_2(exactn, 0);
- pending_exact = b - 1;
- }
- BUF_PUSH(c);
- (*pending_exact)++;
- break;
- } /* switch (c) */
- } /* while p != pend */
- /* Through the pattern now. */
- if (fixup_alt_jump)
- STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
- /* If we don't want backtracking, force success
- the first time we reach the end of the compiled pattern. */
- BUF_PUSH(succeed);
- free(compile_stack.stack);
- /* We have succeeded; set the length of the buffer. */
- bufp->used = b - bufp->buffer;
-#ifdef DEBUG
- if (debug) {
- DEBUG_PRINT1("\nCompiled pattern: \n");
- print_compiled_pattern(bufp);
- }
-#endif /* DEBUG */
- /* Initialize the failure stack to the largest possible stack. This
- isn't necessary unless we're trying to avoid calling alloca in
- the search and match routines. */
- {
- int num_regs = bufp->re_nsub + 1;
- /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
- is strictly greater than re_max_failures, the largest possible stack
- is 2 * re_max_failures failure points. */
- if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) {
- fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
-# ifdef emacs
- if (!fail_stack.stack)
- fail_stack.stack
- = (fail_stack_elt_t *) xmalloc(fail_stack.size
- *
- sizeof
- (fail_stack_elt_t));
- else
- fail_stack.stack =
- (fail_stack_elt_t *) xrealloc(fail_stack.stack,
- (fail_stack.size *
- sizeof
- (fail_stack_elt_t)));
-# else /* not emacs */
- if (!fail_stack.stack)
- fail_stack.stack
- = (fail_stack_elt_t *) malloc(fail_stack.size
- *
- sizeof
- (fail_stack_elt_t));
- el