summaryrefslogtreecommitdiff
path: root/libc/misc/regex/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'libc/misc/regex/regexec.c')
-rw-r--r--libc/misc/regex/regexec.c4335
1 files changed, 4335 insertions, 0 deletions
diff --git a/libc/misc/regex/regexec.c b/libc/misc/regex/regexec.c
new file mode 100644
index 000000000..a985d9e7e
--- /dev/null
+++ b/libc/misc/regex/regexec.c
@@ -0,0 +1,4335 @@
+/* Extended regular expression matching and search library.
+ Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
+ int n) internal_function;
+static void match_ctx_clean (re_match_context_t *mctx) internal_function;
+static void match_ctx_free (re_match_context_t *cache) internal_function;
+static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
+ int str_idx, int from, int to)
+ internal_function;
+static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)
+ internal_function;
+static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
+ int str_idx) internal_function;
+static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
+ int node, int str_idx)
+ internal_function;
+static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
+ re_dfastate_t **limited_sts, int last_node,
+ int last_str_idx)
+ internal_function;
+static reg_errcode_t re_search_internal (const regex_t *preg,
+ const char *string, int length,
+ int start, int range, int stop,
+ size_t nmatch, regmatch_t pmatch[],
+ int eflags) internal_function;
+static int re_search_2_stub (struct re_pattern_buffer *bufp,
+ const char *string1, int length1,
+ const char *string2, int length2,
+ int start, int range, struct re_registers *regs,
+ int stop, int ret_len) internal_function;
+static int re_search_stub (struct re_pattern_buffer *bufp,
+ const char *string, int length, int start,
+ int range, int stop, struct re_registers *regs,
+ int ret_len) internal_function;
+static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
+ int nregs, int regs_allocated) internal_function;
+static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
+ internal_function;
+static int check_matching (re_match_context_t *mctx, int fl_longest_match,
+ int *p_match_first) internal_function;
+static int check_halt_state_context (const re_match_context_t *mctx,
+ const re_dfastate_t *state, int idx)
+ internal_function;
+static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
+ regmatch_t *prev_idx_match, int cur_node,
+ int cur_idx, int nmatch) internal_function;
+static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
+ int str_idx, int dest_node, int nregs,
+ regmatch_t *regs,
+ re_node_set *eps_via_nodes)
+ internal_function;
+static reg_errcode_t set_regs (const regex_t *preg,
+ const re_match_context_t *mctx,
+ size_t nmatch, regmatch_t *pmatch,
+ int fl_backtrack) internal_function;
+static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
+ internal_function;
+
+#ifdef RE_ENABLE_I18N
+static int sift_states_iter_mb (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int node_idx, int str_idx, int max_str_idx)
+ internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
+ re_sift_context_t *sctx)
+ internal_function;
+static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
+ re_sift_context_t *sctx, int str_idx,
+ re_node_set *cur_dest)
+ internal_function;
+static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int str_idx,
+ re_node_set *dest_nodes)
+ internal_function;
+static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates)
+ internal_function;
+static int check_dst_limits (const re_match_context_t *mctx,
+ re_node_set *limits,
+ int dst_node, int dst_idx, int src_node,
+ int src_idx) internal_function;
+static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
+ int boundaries, int subexp_idx,
+ int from_node, int bkref_idx)
+ internal_function;
+static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
+ int limit, int subexp_idx,
+ int node, int str_idx,
+ int bkref_idx) internal_function;
+static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
+ re_node_set *dest_nodes,
+ const re_node_set *candidates,
+ re_node_set *limits,
+ struct re_backref_cache_entry *bkref_ents,
+ int str_idx) internal_function;
+static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
+ re_sift_context_t *sctx,
+ int str_idx, const re_node_set *candidates)
+ internal_function;
+static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
+ re_dfastate_t **dst,
+ re_dfastate_t **src, int num)
+ internal_function;
+static re_dfastate_t *find_recover_state (reg_errcode_t *err,
+ re_match_context_t *mctx) internal_function;
+static re_dfastate_t *transit_state (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *state) internal_function;
+static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *next_state)
+ internal_function;
+static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
+ re_node_set *cur_nodes,
+ int str_idx) internal_function;
+#if 0
+static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
+ re_match_context_t *mctx,
+ re_dfastate_t *pstate)
+ internal_function;
+#endif
+#ifdef RE_ENABLE_I18N
+static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
+ re_dfastate_t *pstate)
+ internal_function;
+#endif /* RE_ENABLE_I18N */
+static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
+ const re_node_set *nodes)
+ internal_function;
+static reg_errcode_t get_subexp (re_match_context_t *mctx,
+ int bkref_node, int bkref_str_idx)
+ internal_function;
+static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
+ const re_sub_match_top_t *sub_top,
+ re_sub_match_last_t *sub_last,
+ int bkref_node, int bkref_str)
+ internal_function;
+static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
+ int subexp_idx, int type) internal_function;
+static reg_errcode_t check_arrival (re_match_context_t *mctx,
+ state_array_t *path, int top_node,
+ int top_str, int last_node, int last_str,
+ int type) internal_function;
+static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
+ int str_idx,
+ re_node_set *cur_nodes,
+ re_node_set *next_nodes)
+ internal_function;
+static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
+ re_node_set *cur_nodes,
+ int ex_subexp, int type)
+ internal_function;
+static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
+ re_node_set *dst_nodes,
+ int target, int ex_subexp,
+ int type) internal_function;
+static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
+ re_node_set *cur_nodes, int cur_str,
+ int subexp_num, int type)
+ internal_function;
+static int build_trtable (const re_dfa_t *dfa,
+ re_dfastate_t *state) internal_function;
+#ifdef RE_ENABLE_I18N
+static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,
+ const re_string_t *input, int idx)
+ internal_function;
+# ifdef _LIBC
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
+ size_t name_len)
+ internal_function;
+# endif /* _LIBC */
+#endif /* RE_ENABLE_I18N */
+static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
+ const re_dfastate_t *state,
+ re_node_set *states_node,
+ bitset_t *states_ch) internal_function;
+static int check_node_accept (const re_match_context_t *mctx,
+ const re_token_t *node, int idx)
+ internal_function;
+static reg_errcode_t extend_buffers (re_match_context_t *mctx)
+ internal_function;
+
+/* Entry point for POSIX code. */
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *__restrict preg;
+ const char *__restrict string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ reg_errcode_t err;
+ int start, length;
+#ifndef __UCLIBC__ /* libc_lock_lock does not exist */
+ re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+#endif
+
+ if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
+ return REG_BADPAT;
+
+ if (eflags & REG_STARTEND)
+ {
+ start = pmatch[0].rm_so;
+ length = pmatch[0].rm_eo;
+ }
+ else
+ {
+ start = 0;
+ length = strlen (string);
+ }
+
+ __libc_lock_lock (dfa->lock);
+ if (preg->no_sub)
+ err = re_search_internal (preg, string, length, start, length - start,
+ length, 0, NULL, eflags);
+ else
+ err = re_search_internal (preg, string, length, start, length - start,
+ length, nmatch, pmatch, eflags);
+ __libc_lock_unlock (dfa->lock);
+ return err != REG_NOERROR;
+}
+
+#ifdef _LIBC
+# include <shlib-compat.h>
+versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
+
+# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
+__typeof__ (__regexec) __compat_regexec;
+
+int
+attribute_compat_text_section
+__compat_regexec (const regex_t *__restrict preg,
+ const char *__restrict string, size_t nmatch,
+ regmatch_t pmatch[], int eflags)
+{
+ return regexec (preg, string, nmatch, pmatch,
+ eflags & (REG_NOTBOL | REG_NOTEOL));
+}
+compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
+# endif
+#endif
+
+/* Entry points for GNU code. */
+
+/* re_match, re_search, re_match_2, re_search_2
+
+ The former two functions operate on STRING with length LENGTH,
+ while the later two operate on concatenation of STRING1 and STRING2
+ with lengths LENGTH1 and LENGTH2, respectively.
+
+ re_match() matches the compiled pattern in BUFP against the string,
+ starting at index START.
+
+ re_search() first tries matching at index START, then it tries to match
+ starting from index START + 1, and so on. The last start position tried
+ is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
+ way as re_match().)
+
+ The parameter STOP of re_{match,search}_2 specifies that no match exceeding
+ the first STOP characters of the concatenation of the strings should be
+ concerned.
+
+ If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
+ and all groups is stroed in REGS. (For the "_2" variants, the offsets are
+ computed relative to the concatenation, not relative to the individual
+ strings.)
+
+ On success, re_match* functions return the length of the match, re_search*
+ return the position of the start of the match. Return value -1 means no
+ match was found and -2 indicates an internal error. */
+
+int
+re_match (bufp, string, length, start, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start;
+ struct re_registers *regs;
+{
+ return re_search_stub (bufp, string, length, start, 0, length, regs, 1);
+}
+#if defined _LIBC || defined __UCLIBC__
+weak_alias (__re_match, re_match)
+#endif
+
+int
+re_search (bufp, string, length, start, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start, range;
+ struct re_registers *regs;
+{
+ return re_search_stub (bufp, string, length, start, range, length, regs, 0);
+}
+#if defined _LIBC || defined __UCLIBC__
+weak_alias (__re_search, re_search)
+#endif
+
+int
+re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, stop;
+ struct re_registers *regs;
+{
+ return re_search_2_stub (bufp, string1, length1, string2, length2,
+ start, 0, regs, stop, 1);
+}
+#if defined _LIBC || defined __UCLIBC__
+weak_alias (__re_match_2, re_match_2)
+#endif
+
+int
+re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, range, stop;
+ struct re_registers *regs;
+{
+ return re_search_2_stub (bufp, string1, length1, string2, length2,
+ start, range, regs, stop, 0);
+}
+#if defined _LIBC || defined __UCLIBC__
+weak_alias (__re_search_2, re_search_2)
+#endif
+
+static int
+re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,
+ stop, ret_len)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int length1, length2, start, range, stop, ret_len;
+ struct re_registers *regs;
+{
+ const char *str;
+ int rval;
+ int len = length1 + length2;
+ int free_str = 0;
+
+ if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))
+ return -2;
+
+ /* Concatenate the strings. */
+ if (length2 > 0)
+ if (length1 > 0)
+ {
+ char *s = re_malloc (char, len);
+
+ if (BE (s == NULL, 0))
+ return -2;
+#ifdef _LIBC
+ memcpy (__mempcpy (s, string1, length1), string2, length2);
+#else
+ memcpy (s, string1, length1);
+ memcpy (s + length1, string2, length2);
+#endif
+ str = s;
+ free_str = 1;
+ }
+ else
+ str = string2;
+ else
+ str = string1;
+
+ rval = re_search_stub (bufp, str, len, start, range, stop, regs,
+ ret_len);
+ if (free_str)
+ re_free ((char *) str);
+ return rval;
+}
+
+/* The parameters have the same meaning as those of re_search.
+ Additional parameters:
+ If RET_LEN is nonzero the length of the match is returned (re_match style);
+ otherwise the position of the match is returned. */
+
+static int
+re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int length, start, range, stop, ret_len;
+ struct re_registers *regs;
+{
+ reg_errcode_t result;
+ regmatch_t *pmatch;
+ int nregs, rval;
+ int eflags = 0;
+#ifndef __UCLIBC__ /* libc_lock_lock does not exist */
+ re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+#endif
+
+ /* Check for out-of-range. */
+ if (BE (start < 0 || start > length, 0))
+ return -1;
+ if (BE (start + range > length, 0))
+ range = length - start;
+ else if (BE (start + range < 0, 0))
+ range = -start;
+
+ __libc_lock_lock (dfa->lock);
+
+ eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
+ eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
+
+ /* Compile fastmap if we haven't yet. */
+ if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)
+ re_compile_fastmap (bufp);
+
+ if (BE (bufp->no_sub, 0))
+ regs = NULL;
+
+ /* We need at least 1 register. */
+ if (regs == NULL)
+ nregs = 1;
+ else if (BE (bufp->regs_allocated == REGS_FIXED &&
+ regs->num_regs < bufp->re_nsub + 1, 0))
+ {
+ nregs = regs->num_regs;
+ if (BE (nregs < 1, 0))
+ {
+ /* Nothing can be copied to regs. */
+ regs = NULL;
+ nregs = 1;
+ }
+ }
+ else
+ nregs = bufp->re_nsub + 1;
+ pmatch = re_malloc (regmatch_t, nregs);
+ if (BE (pmatch == NULL, 0))
+ {
+ rval = -2;
+ goto out;
+ }
+
+ result = re_search_internal (bufp, string, length, start, range, stop,
+ nregs, pmatch, eflags);
+
+ rval = 0;
+
+ /* I hope we needn't fill ther regs with -1's when no match was found. */
+ if (result != REG_NOERROR)
+ rval = -1;
+ else if (regs != NULL)
+ {
+ /* If caller wants register contents data back, copy them. */
+ bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
+ bufp->regs_allocated);
+ if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
+ rval = -2;
+ }
+
+ if (BE (rval == 0, 1))
+ {
+ if (ret_len)
+ {
+ assert (pmatch[0].rm_so == start);
+ rval = pmatch[0].rm_eo - start;
+ }
+ else
+ rval = pmatch[0].rm_so;
+ }
+ re_free (pmatch);
+ out:
+ __libc_lock_unlock (dfa->lock);
+ return rval;
+}
+
+static unsigned
+re_copy_regs (regs, pmatch, nregs, regs_allocated)
+ struct re_registers *regs;
+ regmatch_t *pmatch;
+ int nregs, regs_allocated;
+{
+ int rval = REGS_REALLOCATE;
+ int i;
+ int need_regs = nregs + 1;
+ /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
+ uses. */
+
+ /* Have the register data arrays been allocated? */
+ if (regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. */
+ regs->start = re_malloc (regoff_t, need_regs);
+ regs->end = re_malloc (regoff_t, need_regs);
+ if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0))
+ return REGS_UNALLOCATED;
+ regs->num_regs = need_regs;
+ }
+ else if (regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (BE (need_regs > regs->num_regs, 0))
+ {
+ regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
+ regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs);
+ if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0))
+ return REGS_UNALLOCATED;
+ regs->start = new_start;
+ regs->end = new_end;
+ regs->num_regs = need_regs;
+ }
+ }
+ else
+ {
+ assert (regs_allocated == REGS_FIXED);
+ /* This function may not be called with REGS_FIXED and nregs too big. */
+ assert (regs->num_regs >= nregs);
+ rval = REGS_FIXED;
+ }
+
+ /* Copy the regs. */
+ for (i = 0; i < nregs; ++i)
+ {
+ regs->start[i] = pmatch[i].rm_so;
+ regs->end[i] = pmatch[i].rm_eo;
+ }
+ for ( ; i < regs->num_regs; ++i)
+ regs->start[i] = regs->end[i] = -1;
+
+ return rval;
+}
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t *) 0;
+ }
+}
+#if defined _LIBC || defined __UCLIBC__
+weak_alias (__re_set_registers, re_set_registers)
+#endif
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them unless specifically requested. */
+
+#if defined _REGEX_RE_COMP || defined _LIBC || defined __UCLIBC__
+int
+# if defined _LIBC || defined __UCLIBC__
+weak_function
+# endif
+re_exec (s)
+ const char *s;
+{
+ return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
+}
+#endif /* _REGEX_RE_COMP */
+
+/* Internal entry point. */
+
+/* Searches for a compiled pattern PREG in the string STRING, whose
+ length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
+ mingings with regexec. START, and RANGE have the same meanings
+ with re_search.
+ Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
+ otherwise return the error code.
+ Note: We assume front end functions already check ranges.
+ (START + RANGE >= 0 && START + RANGE <= LENGTH) */
+
+static reg_errcode_t
+re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
+ eflags)
+ const regex_t *preg;
+ const char *string;
+ int length, start, range, stop, eflags;
+ size_t nmatch;
+ regmatch_t pmatch[];
+{
+ reg_errcode_t err;
+ const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
+ int left_lim, right_lim, incr;
+ int fl_longest_match, match_first, match_kind, match_last = -1;
+ int extra_nmatch;
+ int sb, ch;
+#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
+ re_match_context_t mctx = { .dfa = dfa };
+#else
+ re_match_context_t mctx;
+#endif
+ char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate
+ && range && !preg->can_be_null) ? preg->fastmap : NULL;
+ RE_TRANSLATE_TYPE t = preg->translate;
+
+#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
+ memset (&mctx, '\0', sizeof (re_match_context_t));
+ mctx.dfa = dfa;
+#endif
+
+ extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
+ nmatch -= extra_nmatch;
+
+ /* Check if the DFA haven't been compiled. */
+ if (BE (preg->used == 0 || dfa->init_state == NULL
+ || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+ || dfa->init_state_begbuf == NULL, 0))
+ return REG_NOMATCH;
+
+#ifdef DEBUG
+ /* We assume front-end functions already check them. */
+ assert (start + range >= 0 && start + range <= length);
+#endif
+
+ /* If initial states with non-begbuf contexts have no elements,
+ the regex must be anchored. If preg->newline_anchor is set,
+ we'll never use init_state_nl, so do not check it. */
+ if (dfa->init_state->nodes.nelem == 0
+ && dfa->init_state_word->nodes.nelem == 0
+ && (dfa->init_state_nl->nodes.nelem == 0
+ || !preg->newline_anchor))
+ {
+ if (start != 0 && start + range != 0)
+ return REG_NOMATCH;
+ start = range = 0;
+ }
+
+ /* We must check the longest matching, if nmatch > 0. */
+ fl_longest_match = (nmatch != 0 || dfa->nbackref);
+
+ err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
+ preg->translate, preg->syntax & RE_ICASE, dfa);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ mctx.input.stop = stop;
+ mctx.input.raw_stop = stop;
+ mctx.input.newline_anchor = preg->newline_anchor;
+
+ err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ /* We will log all the DFA states through which the dfa pass,
+ if nmatch > 1, or this dfa has "multibyte node", which is a
+ back-reference or a node which can accept multibyte character or
+ multi character collating element. */
+ if (nmatch > 1 || dfa->has_mb_node)
+ {
+ mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
+ if (BE (mctx.state_log == NULL, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ }
+ else
+ mctx.state_log = NULL;
+
+ match_first = start;
+ mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
+ : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
+
+ /* Check incrementally whether of not the input string match. */
+ incr = (range < 0) ? -1 : 1;
+ left_lim = (range < 0) ? start + range : start;
+ right_lim = (range < 0) ? start : start + range;
+ sb = dfa->mb_cur_max == 1;
+ match_kind =
+ (fastmap
+ ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
+ | (range >= 0 ? 2 : 0)
+ | (t != NULL ? 1 : 0))
+ : 8);
+
+ for (;; match_first += incr)
+ {
+ err = REG_NOMATCH;
+ if (match_first < left_lim || right_lim < match_first)
+ goto free_return;
+
+ /* Advance as rapidly as possible through the string, until we
+ find a plausible place to start matching. This may be done
+ with varying efficiency, so there are various possibilities:
+ only the most common of them are specialized, in order to
+ save on code size. We use a switch statement for speed. */
+ switch (match_kind)
+ {
+ case 8:
+ /* No fastmap. */
+ break;
+
+ case 7:
+ /* Fastmap with single-byte translation, match forward. */
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[t[(unsigned char) string[match_first]]])
+ ++match_first;
+ goto forward_match_found_start_or_reached_end;
+
+ case 6:
+ /* Fastmap without translation, match forward. */
+ while (BE (match_first < right_lim, 1)
+ && !fastmap[(unsigned char) string[match_first]])
+ ++match_first;
+
+ forward_match_found_start_or_reached_end:
+ if (BE (match_first == right_lim, 0))
+ {
+ ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (!fastmap[t ? t[ch] : ch])
+ goto free_return;
+ }
+ break;
+
+ case 4:
+ case 5:
+ /* Fastmap without multi-byte translation, match backwards. */
+ while (match_first >= left_lim)
+ {
+ ch = match_first >= length
+ ? 0 : (unsigned char) string[match_first];
+ if (fastmap[t ? t[ch] : ch])
+ break;
+ --match_first;
+ }
+ if (match_first < left_lim)
+ goto free_return;
+ break;
+
+ default:
+ /* In this case, we can't determine easily the current byte,
+ since it might be a component byte of a multibyte
+ character. Then we use the constructed buffer instead. */
+ for (;;)
+ {
+ /* If MATCH_FIRST is out of the valid range, reconstruct the
+ buffers. */
+ unsigned int offset = match_first - mctx.input.raw_mbs_idx;
+ if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))
+ {
+ err = re_string_reconstruct (&mctx.input, match_first,
+ eflags);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+ offset = match_first - mctx.input.raw_mbs_idx;
+ }
+ /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
+ Note that MATCH_FIRST must not be smaller than 0. */
+ ch = (match_first >= length
+ ? 0 : re_string_byte_at (&mctx.input, offset));
+ if (fastmap[ch])
+ break;
+ match_first += incr;
+ if (match_first < left_lim || match_first > right_lim)
+ {
+ err = REG_NOMATCH;
+ goto free_return;
+ }
+ }
+ break;
+ }
+
+ /* Reconstruct the buffers so that the matcher can assume that
+ the matching starts from the beginning of the buffer. */
+ err = re_string_reconstruct (&mctx.input, match_first, eflags);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+
+#ifdef RE_ENABLE_I18N
+ /* Don't consider this char as a possible match start if it part,
+ yet isn't the head, of a multibyte character. */
+ if (!sb && !re_string_first_byte (&mctx.input, 0))
+ continue;
+#endif
+
+ /* It seems to be appropriate one, then use the matcher. */
+ /* We assume that the matching starts from 0. */
+ mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
+ match_last = check_matching (&mctx, fl_longest_match,
+ range >= 0 ? &match_first : NULL);
+ if (match_last != -1)
+ {
+ if (BE (match_last == -2, 0))
+ {
+ err = REG_ESPACE;
+ goto free_return;
+ }
+ else
+ {
+ mctx.match_last = match_last;
+ if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
+ {
+ re_dfastate_t *pstate = mctx.state_log[match_last];
+ mctx.last_node = check_halt_state_context (&mctx, pstate,
+ match_last);
+ }
+ if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
+ || dfa->nbackref)
+ {
+ err = prune_impossible_nodes (&mctx);
+ if (err == REG_NOERROR)
+ break;
+ if (BE (err != REG_NOMATCH, 0))
+ goto free_return;
+ match_last = -1;
+ }
+ else
+ break; /* We found a match. */
+ }
+ }
+
+ match_ctx_clean (&mctx);
+ }
+
+#ifdef DEBUG
+ assert (match_last != -1);
+ assert (err == REG_NOERROR);
+#endif
+
+ /* Set pmatch[] if we need. */
+ if (nmatch > 0)
+ {
+ int reg_idx;
+
+ /* Initialize registers. */
+ for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
+ pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
+
+ /* Set the points where matching start/end. */
+ pmatch[0].rm_so = 0;
+ pmatch[0].rm_eo = mctx.match_last;
+
+ if (!preg->no_sub && nmatch > 1)
+ {
+ err = set_regs (preg, &mctx, nmatch, pmatch,
+ dfa->has_plural_match && dfa->nbackref > 0);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+
+ /* At last, add the offset to the each registers, since we slided
+ the buffers so that we could assume that the matching starts
+ from 0. */
+ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
+ if (pmatch[reg_idx].rm_so != -1)
+ {
+#ifdef RE_ENABLE_I18N
+ if (BE (mctx.input.offsets_needed != 0, 0))
+ {
+ pmatch[reg_idx].rm_so =
+ (pmatch[reg_idx].rm_so == mctx.input.valid_len
+ ? mctx.input.valid_raw_len
+ : mctx.input.offsets[pmatch[reg_idx].rm_so]);
+ pmatch[reg_idx].rm_eo =
+ (pmatch[reg_idx].rm_eo == mctx.input.valid_len
+ ? mctx.input.valid_raw_len
+ : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
+ }
+#else
+ assert (mctx.input.offsets_needed == 0);
+#endif
+ pmatch[reg_idx].rm_so += match_first;
+ pmatch[reg_idx].rm_eo += match_first;
+ }
+ for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
+ {
+ pmatch[nmatch + reg_idx].rm_so = -1;
+ pmatch[nmatch + reg_idx].rm_eo = -1;
+ }
+
+ if (dfa->subexp_map)
+ for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+ if (dfa->subexp_map[reg_idx] != reg_idx)
+ {
+ pmatch[reg_idx + 1].rm_so
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
+ pmatch[reg_idx + 1].rm_eo
+ = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
+ }
+ }
+
+ free_return:
+ re_free (mctx.state_log);
+ if (dfa->nbackref)
+ match_ctx_free (&mctx);
+ re_string_destruct (&mctx.input);
+ return err;
+}
+
+static reg_errcode_t
+prune_impossible_nodes (mctx)
+ re_match_context_t *mctx;
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ int halt_node, match_last;
+ reg_errcode_t ret;
+ re_dfastate_t **sifted_states;
+ re_dfastate_t **lim_states = NULL;
+ re_sift_context_t sctx;
+#ifdef DEBUG
+ assert (mctx->state_log != NULL);
+#endif
+ match_last = mctx->match_last;
+ halt_node = mctx->last_node;
+ sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (sifted_states == NULL, 0))
+ {
+ ret = REG_ESPACE;
+ goto free_return;
+ }
+ if (dfa->nbackref)
+ {
+ lim_states = re_malloc (re_dfastate_t *, match_last + 1);
+ if (BE (lim_states == NULL, 0))
+ {
+ ret = REG_ESPACE;
+ goto free_return;
+ }
+ while (1)
+ {
+ memset (lim_states, '\0',
+ sizeof (re_dfastate_t *) * (match_last + 1));
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
+ match_last);
+ ret = sift_states_backward (mctx, &sctx);
+ re_node_set_free (&sctx.limits);
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ if (sifted_states[0] != NULL || lim_states[0] != NULL)
+ break;
+ do
+ {
+ --match_last;
+ if (match_last < 0)
+ {
+ ret = REG_NOMATCH;
+ goto free_return;
+ }
+ } while (mctx->state_log[match_last] == NULL
+ || !mctx->state_log[match_last]->halt);
+ halt_node = check_halt_state_context (mctx,
+ mctx->state_log[match_last],
+ match_last);
+ }
+ ret = merge_state_array (dfa, sifted_states, lim_states,
+ match_last + 1);
+ re_free (lim_states);
+ lim_states = NULL;
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ }
+ else
+ {
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
+ ret = sift_states_backward (mctx, &sctx);
+ re_node_set_free (&sctx.limits);
+ if (BE (ret != REG_NOERROR, 0))
+ goto free_return;
+ }
+ re_free (mctx->state_log);
+ mctx->state_log = sifted_states;
+ sifted_states = NULL;
+ mctx->last_node = halt_node;
+ mctx->match_last = match_last;
+ ret = REG_NOERROR;
+ free_return:
+ re_free (sifted_states);
+ re_free (lim_states);
+ return ret;
+}
+
+/* Acquire an initial state and return it.
+ We must select appropriate initial state depending on the context,
+ since initial states may have constraints like "\<", "^", etc.. */
+
+static inline re_dfastate_t *
+__attribute ((always_inline)) internal_function
+acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
+ int idx)
+{
+ const re_dfa_t *const dfa = mctx->dfa;
+ if (dfa->init_state->has_constraint)
+ {
+ unsigned int context;
+ context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
+ if (IS_WORD_CONTEXT (context))
+ return dfa->init_state_word;
+ else if (IS_ORDINARY_CONTEXT (context))
+ return dfa->init_state;
+ else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
+ return dfa->init_state_begbuf;
+ else if (IS_NEWLINE_CONTEXT (context))
+ return dfa->init_state_nl;
+ else if (IS_BEGBUF_CONTEXT (context))
+ {
+ /* It is relatively rare case, then calculate on demand. */
+ return re_acquire_state_context (err, dfa,
+ dfa->init_state->entrance_nodes,
+ context);
+ }
+ else
+ /* Must not happen? */
+ return dfa->init_state;
+ }
+ else
+ return