diff options
Diffstat (limited to 'package/toolbox/src/grep')
-rw-r--r-- | package/toolbox/src/grep/Makefile | 4 | ||||
-rw-r--r-- | package/toolbox/src/grep/fastgrep.c | 336 | ||||
-rw-r--r-- | package/toolbox/src/grep/file.c | 215 | ||||
-rw-r--r-- | package/toolbox/src/grep/grep.c | 695 | ||||
-rw-r--r-- | package/toolbox/src/grep/grep.h | 149 | ||||
-rw-r--r-- | package/toolbox/src/grep/queue.c | 116 | ||||
-rw-r--r-- | package/toolbox/src/grep/util.c | 499 |
7 files changed, 2014 insertions, 0 deletions
diff --git a/package/toolbox/src/grep/Makefile b/package/toolbox/src/grep/Makefile new file mode 100644 index 000000000..30336ccbe --- /dev/null +++ b/package/toolbox/src/grep/Makefile @@ -0,0 +1,4 @@ +PROG= grep +SRCS= fastgrep.c file.c grep.c queue.c util.c + +include ../tool.mk diff --git a/package/toolbox/src/grep/fastgrep.c b/package/toolbox/src/grep/fastgrep.c new file mode 100644 index 000000000..2fcd864e2 --- /dev/null +++ b/package/toolbox/src/grep/fastgrep.c @@ -0,0 +1,336 @@ +/* $OpenBSD: util.c,v 1.36 2007/10/02 17:59:18 otto Exp $ */ +/* $FreeBSD: head/usr.bin/grep/fastgrep.c 211496 2010-08-19 09:28:59Z des $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * Copyright (C) 2008 Gabor Kovesdan <gabor@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * XXX: This file is a speed up for grep to cover the defects of the + * regex library. These optimizations should practically be implemented + * there keeping this code clean. This is a future TODO, but for the + * meantime, we need to use this workaround. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include <sys/cdefs.h> +__RCSID("$NetBSD: fastgrep.c,v 1.5 2011/04/18 03:27:40 joerg Exp $"); + +#include <limits.h> +#include <stdbool.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <wctype.h> + +#include "grep.h" + +static inline int grep_cmp(const unsigned char *, const unsigned char *, size_t); +static inline void grep_revstr(unsigned char *, int); + +void +fgrepcomp(fastgrep_t *fg, const char *pat) +{ + unsigned int i; + + /* Initialize. */ + fg->len = strlen(pat); + fg->bol = false; + fg->eol = false; + fg->reversed = false; + + fg->pattern = (unsigned char *)grep_strdup(pat); + + /* Preprocess pattern. */ + for (i = 0; i <= UCHAR_MAX; i++) + fg->qsBc[i] = fg->len; + for (i = 1; i < fg->len; i++) + fg->qsBc[fg->pattern[i]] = fg->len - i; +} + +/* + * Returns: -1 on failure, 0 on success + */ +int +fastcomp(fastgrep_t *fg, const char *pat) +{ + unsigned int i; + int firstHalfDot = -1; + int firstLastHalfDot = -1; + int hasDot = 0; + int lastHalfDot = 0; + int shiftPatternLen; + + /* Initialize. */ + fg->len = strlen(pat); + fg->bol = false; + fg->eol = false; + fg->reversed = false; + fg->word = wflag; + + /* Remove end-of-line character ('$'). */ + if (fg->len > 0 && pat[fg->len - 1] == '$') { + fg->eol = true; + fg->len--; + } + + /* Remove beginning-of-line character ('^'). */ + if (pat[0] == '^') { + fg->bol = true; + fg->len--; + pat++; + } + + if (fg->len >= 14 && + memcmp(pat, "[[:<:]]", 7) == 0 && + memcmp(pat + fg->len - 7, "[[:>:]]", 7) == 0) { + fg->len -= 14; + pat += 7; + /* Word boundary is handled separately in util.c */ + fg->word = true; + } + + /* + * pat has been adjusted earlier to not include '^', '$' or + * the word match character classes at the beginning and ending + * of the string respectively. + */ + fg->pattern = grep_malloc(fg->len + 1); + memcpy(fg->pattern, pat, fg->len); + fg->pattern[fg->len] = '\0'; + + /* Look for ways to cheat...er...avoid the full regex engine. */ + for (i = 0; i < fg->len; i++) { + /* Can still cheat? */ + if (fg->pattern[i] == '.') { + hasDot = i; + if (i < fg->len / 2) { + if (firstHalfDot < 0) + /* Closest dot to the beginning */ + firstHalfDot = i; + } else { + /* Closest dot to the end of the pattern. */ + lastHalfDot = i; + if (firstLastHalfDot < 0) + firstLastHalfDot = i; + } + } else { + /* Free memory and let others know this is empty. */ + free(fg->pattern); + fg->pattern = NULL; + return (-1); + } + } + + /* + * Determine if a reverse search would be faster based on the placement + * of the dots. + */ + if ((!(lflag || cflag)) && ((!(fg->bol || fg->eol)) && + ((lastHalfDot) && ((firstHalfDot < 0) || + ((fg->len - (lastHalfDot + 1)) < (size_t)firstHalfDot)))) && + !oflag && !color) { + fg->reversed = true; + hasDot = fg->len - (firstHalfDot < 0 ? + firstLastHalfDot : firstHalfDot) - 1; + grep_revstr(fg->pattern, fg->len); + } + + /* + * Normal Quick Search would require a shift based on the position the + * next character after the comparison is within the pattern. With + * wildcards, the position of the last dot effects the maximum shift + * distance. + * The closer to the end the wild card is the slower the search. A + * reverse version of this algorithm would be useful for wildcards near + * the end of the string. + * + * Examples: + * Pattern Max shift + * ------- --------- + * this 5 + * .his 4 + * t.is 3 + * th.s 2 + * thi. 1 + */ + + /* Adjust the shift based on location of the last dot ('.'). */ + shiftPatternLen = fg->len - hasDot; + + /* Preprocess pattern. */ + for (i = 0; i <= (signed)UCHAR_MAX; i++) + fg->qsBc[i] = shiftPatternLen; + for (i = hasDot + 1; i < fg->len; i++) { + fg->qsBc[fg->pattern[i]] = fg->len - i; + } + + /* + * Put pattern back to normal after pre-processing to allow for easy + * comparisons later. + */ + if (fg->reversed) + grep_revstr(fg->pattern, fg->len); + + return (0); +} + +int +grep_search(fastgrep_t *fg, const unsigned char *data, size_t len, regmatch_t *pmatch) +{ + unsigned int j; + int ret = REG_NOMATCH; + + if (pmatch->rm_so == (ssize_t)len) + return (ret); + + if (fg->bol && pmatch->rm_so != 0) { + pmatch->rm_so = len; + pmatch->rm_eo = len; + return (ret); + } + + /* No point in going farther if we do not have enough data. */ + if (len < fg->len) + return (ret); + + /* Only try once at the beginning or ending of the line. */ + if (fg->bol || fg->eol) { + /* Simple text comparison. */ + /* Verify data is >= pattern length before searching on it. */ + if (len >= fg->len) { + /* Determine where in data to start search at. */ + j = fg->eol ? len - fg->len : 0; + if (!((fg->bol && fg->eol) && (len != fg->len))) + if (grep_cmp(fg->pattern, data + j, + fg->len) == -1) { + pmatch->rm_so = j; + pmatch->rm_eo = j + fg->len; + ret = 0; + } + } + } else if (fg->reversed) { + /* Quick Search algorithm. */ + j = len; + do { + if (grep_cmp(fg->pattern, data + j - fg->len, + fg->len) == -1) { + pmatch->rm_so = j - fg->len; + pmatch->rm_eo = j; + ret = 0; + break; + } + /* Shift if within bounds, otherwise, we are done. */ + if (j == fg->len) + break; + j -= fg->qsBc[data[j - fg->len - 1]]; + } while (j >= fg->len); + } else { + /* Quick Search algorithm. */ + j = pmatch->rm_so; + do { + if (grep_cmp(fg->pattern, data + j, fg->len) == -1) { + pmatch->rm_so = j; + pmatch->rm_eo = j + fg->len; + ret = 0; + break; + } + + /* Shift if within bounds, otherwise, we are done. */ + if (j + fg->len == len) + break; + else + j += fg->qsBc[data[j + fg->len]]; + } while (j <= (len - fg->len)); + } + + return (ret); +} + +/* + * Returns: i >= 0 on failure (position that it failed) + * -1 on success + */ +static inline int +grep_cmp(const unsigned char *pat, const unsigned char *data, size_t len) +{ + size_t size; + wchar_t *wdata, *wpat; + unsigned int i; + + if (iflag) { + if ((size = mbstowcs(NULL, (const char *)data, 0)) == + ((size_t) - 1)) + return (-1); + + wdata = grep_malloc(size * sizeof(wint_t)); + + if (mbstowcs(wdata, (const char *)data, size) == + ((size_t) - 1)) + return (-1); + + if ((size = mbstowcs(NULL, (const char *)pat, 0)) == + ((size_t) - 1)) + return (-1); + + wpat = grep_malloc(size * sizeof(wint_t)); + + if (mbstowcs(wpat, (const char *)pat, size) == ((size_t) - 1)) + return (-1); + for (i = 0; i < len; i++) { + if ((towlower(wpat[i]) == towlower(wdata[i])) || + ((grepbehave != GREP_FIXED) && wpat[i] == L'.')) + continue; + free(wpat); + free(wdata); + return (i); + } + } else { + for (i = 0; i < len; i++) { + if ((pat[i] == data[i]) || ((grepbehave != GREP_FIXED) && + pat[i] == '.')) + continue; + return (i); + } + } + return (-1); +} + +static inline void +grep_revstr(unsigned char *str, int len) +{ + int i; + char c; + + for (i = 0; i < len / 2; i++) { + c = str[i]; + str[i] = str[len - i - 1]; + str[len - i - 1] = c; + } +} diff --git a/package/toolbox/src/grep/file.c b/package/toolbox/src/grep/file.c new file mode 100644 index 000000000..4cbb2a736 --- /dev/null +++ b/package/toolbox/src/grep/file.c @@ -0,0 +1,215 @@ +/* $NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $ */ +/* $FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $ */ +/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> + * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include <sys/cdefs.h> +__RCSID("$NetBSD: file.c,v 1.7 2011/04/18 22:46:48 joerg Exp $"); + +#include <sys/param.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <wchar.h> +#include <wctype.h> + +#include "grep.h" + +#define MAXBUFSIZ (32 * 1024) +#define LNBUFBUMP 80 + +static unsigned char buffer[MAXBUFSIZ]; +static unsigned char *bufpos; +static size_t bufrem; + +static unsigned char *lnbuf; +static size_t lnbuflen; + +static inline int +grep_refill(struct file *f) +{ + ssize_t nr; + + bufpos = buffer; + bufrem = 0; + + nr = read(f->fd, buffer, MAXBUFSIZ); + + if (nr < 0) + return (-1); + + bufrem = nr; + return (0); +} + +static inline int +grep_lnbufgrow(size_t newlen) +{ + + if (lnbuflen < newlen) { + lnbuf = grep_realloc(lnbuf, newlen); + lnbuflen = newlen; + } + + return (0); +} + +char * +grep_fgetln(struct file *f, size_t *lenp) +{ + unsigned char *p; + char *ret; + size_t len; + size_t off; + ptrdiff_t diff; + + /* Fill the buffer, if necessary */ + if (bufrem == 0 && grep_refill(f) != 0) + goto error; + + if (bufrem == 0) { + /* Return zero length to indicate EOF */ + *lenp = 0; + return ((char *)bufpos); + } + + /* Look for a newline in the remaining part of the buffer */ + if ((p = memchr(bufpos, line_sep, bufrem)) != NULL) { + ++p; /* advance over newline */ + ret = (char *)bufpos; + len = p - bufpos; + bufrem -= len; + bufpos = p; + *lenp = len; + return (ret); + } + + /* We have to copy the current buffered data to the line buffer */ + for (len = bufrem, off = 0; ; len += bufrem) { + /* Make sure there is room for more data */ + if (grep_lnbufgrow(len + LNBUFBUMP)) + goto error; + memcpy(lnbuf + off, bufpos, len - off); + off = len; + if (grep_refill(f) != 0) + goto error; + if (bufrem == 0) + /* EOF: return partial line */ + break; + if ((p = memchr(bufpos, line_sep, bufrem)) == NULL) + continue; + /* got it: finish up the line (like code above) */ + ++p; + diff = p - bufpos; + len += diff; + if (grep_lnbufgrow(len)) + goto error; + memcpy(lnbuf + off, bufpos, diff); + bufrem -= diff; + bufpos = p; + break; + } + *lenp = len; + return ((char *)lnbuf); + +error: + *lenp = 0; + return (NULL); +} + +static inline struct file * +grep_file_init(struct file *f) +{ + /* Fill read buffer, also catches errors early */ + if (grep_refill(f) != 0) + goto error; + + /* Check for binary stuff, if necessary */ + if (!nulldataflag && binbehave != BINFILE_TEXT && + memchr(bufpos, '\0', bufrem) != NULL) + f->binary = true; + + return (f); +error: + close(f->fd); + free(f); + return (NULL); +} + +/* + * Opens a file for processing. + */ +struct file * +grep_open(const char *path) +{ + struct file *f; + + f = grep_malloc(sizeof *f); + memset(f, 0, sizeof *f); + if (path == NULL) { + /* Processing stdin implies --line-buffered. */ + lbflag = true; + f->fd = STDIN_FILENO; + } else if ((f->fd = open(path, O_RDONLY)) == -1) { + free(f); + return (NULL); + } + + return (grep_file_init(f)); +} + +/* + * Closes a file. + */ +void +grep_close(struct file *f) +{ + + close(f->fd); + + /* Reset read buffer and line buffer */ + bufpos = buffer; + bufrem = 0; + + free(lnbuf); + lnbuf = NULL; + lnbuflen = 0; +} diff --git a/package/toolbox/src/grep/grep.c b/package/toolbox/src/grep/grep.c new file mode 100644 index 000000000..6d70f8252 --- /dev/null +++ b/package/toolbox/src/grep/grep.c @@ -0,0 +1,695 @@ +/* $NetBSD: grep.c,v 1.11 2012/05/06 22:27:00 joerg Exp $ */ +/* $FreeBSD: head/usr.bin/grep/grep.c 211519 2010-08-19 22:55:17Z delphij $ */ +/* $OpenBSD: grep.c,v 1.42 2010/07/02 22:18:03 tedu Exp $ */ + +/*- + * Copyright (c) 1999 James Howard and Dag-Erling Coïdan Smørgrav + * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#endif + +#include <sys/cdefs.h> +__RCSID("$NetBSD: grep.c,v 1.11 2012/05/06 22:27:00 joerg Exp $"); + +#include <sys/stat.h> +#include <sys/types.h> + +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <getopt.h> +#include <limits.h> +#include <libgen.h> +#include <locale.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "grep.h" + +/* + * Default messags to use when NLS is disabled or no catalogue + * is found. + */ +const char *errstr[] = { + "", +/* 1*/ "(standard input)", +/* 2*/ "cannot read bzip2 compressed file", +/* 3*/ "unknown %s option", +/* 4*/ "usage: %s [-abcDEFGHhIiJLlmnOoPqRSsUVvwxZz] [-A num] [-B num] [-C[num]]\n", +/* 5*/ "\t[-e pattern] [-f file] [--binary-files=value] [--color=when]\n", +/* 6*/ "\t[--context[=num]] [--directories=action] [--label] [--line-buffered]\n", +/* 7*/ "\t[pattern] [file ...]\n", +/* 8*/ "Binary file %s matches\n", +/* 9*/ "%s (BSD grep) %s\n", +}; + +/* Flags passed to regcomp() and regexec() */ +int cflags = 0; +int eflags = REG_STARTEND; + +/* Searching patterns */ +unsigned int patterns, pattern_sz; +char **pattern; +regex_t *r_pattern; +fastgrep_t *fg_pattern; + +/* Filename exclusion/inclusion patterns */ +unsigned int fpatterns, fpattern_sz; +unsigned int dpatterns, dpattern_sz; +struct epat *dpattern, *fpattern; + +/* For regex errors */ +char re_error[RE_ERROR_BUF + 1]; + +/* Command-line flags */ +unsigned long long Aflag; /* -A x: print x lines trailing each match */ +unsigned long long Bflag; /* -B x: print x lines leading each match */ +bool Hflag; /* -H: always print file name */ +bool Lflag; /* -L: only show names of files with no matches */ +bool bflag; /* -b: show block numbers for each match */ +bool cflag; /* -c: only show a count of matching lines */ +bool hflag; /* -h: don't print filename headers */ +bool iflag; /* -i: ignore case */ +bool lflag; /* -l: only show names of files with matches */ +bool mflag; /* -m x: stop reading the files after x matches */ +unsigned long long mcount; /* count for -m */ +bool nflag; /* -n: show line numbers in front of matching lines */ +bool oflag; /* -o: print only matching part */ +bool qflag; /* -q: quiet mode (don't output anything) */ +bool sflag; /* -s: silent mode (ignore errors) */ +bool vflag; /* -v: only show non-matching lines */ +bool wflag; /* -w: pattern must start and end on word boundaries */ +bool xflag; /* -x: pattern must match entire line */ +bool lbflag; /* --line-buffered */ +bool nullflag; /* --null */ +bool nulldataflag; /* --null-data */ +unsigned char line_sep = '\n'; /* 0 for --null-data */ +char *label; /* --label */ +const char *color; /* --color */ +int grepbehave = GREP_BASIC; /* -EFGP: type of the regex */ +int binbehave = BINFILE_BIN; /* -aIU: handling of binary files */ +int filebehave = FILE_STDIO; /* -JZ: normal, gzip or bzip2 file */ +int devbehave = DEV_READ; /* -D: handling of devices */ +int dirbehave = DIR_READ; /* -dRr: handling of directories */ +int linkbehave = LINK_READ; /* -OpS: handling of symlinks */ + +bool dexclude, dinclude; /* --exclude-dir and --include-dir */ +bool fexclude, finclude; /* --exclude and --include */ + +enum { + BIN_OPT = CHAR_MAX + 1, + COLOR_OPT, + DECOMPRESS_OPT, + HELP_OPT, + MMAP_OPT, + LINEBUF_OPT, + LABEL_OPT, + R_EXCLUDE_OPT, + R_INCLUDE_OPT, + R_DEXCLUDE_OPT, + R_DINCLUDE_OPT +}; + +static inline const char *init_color(const char *); + +/* Housekeeping */ +int tail; /* lines left to print */ +bool notfound; /* file not found */ + +extern char *__progname; + +/* + * Prints usage information and returns 2. + */ +__dead static void +usage(void) +{ + fprintf(stderr, getstr(4), __progname); + fprintf(stderr, "%s", getstr(5)); + fprintf(stderr, "%s", getstr(5)); + fprintf(stderr, "%s", getstr(6)); + fprintf(stderr, "%s", getstr(7)); + exit(2); +} + +static const char optstr[] = + "0123456789A:B:C:D:EFGHIJLOPSRUVZabcd:e:f:hilm:nopqrsuvwxyz"; + +struct option long_options[] = +{ + {"binary-files", required_argument, NULL, BIN_OPT}, + {"decompress", no_argument, NULL, DECOMPRESS_OPT}, + {"help", no_argument, NULL, HELP_OPT}, + {"mmap", no_argument, NULL, MMAP_OPT}, + {"line-buffered", no_argument, NULL, LINEBUF_OPT}, + {"label", required_argument, NULL, LABEL_OPT}, + {"color", optional_argument, NULL, COLOR_OPT}, + {"colour", optional_argument, NULL, COLOR_OPT}, + {"exclude", required_argument, NULL, R_EXCLUDE_OPT}, + {"include", required_argument, NULL, R_INCLUDE_OPT}, + {"exclude-dir", required_argument, NULL, R_DEXCLUDE_OPT}, + {"include-dir", required_argument, NULL, R_DINCLUDE_OPT}, + {"after-context", required_argument, NULL, 'A'}, + {"text", no_argument, NULL, 'a'}, + {"before-context", required_argument, NULL, 'B'}, + {"byte-offset", no_argument, NULL, 'b'}, + {"context", optional_argument, NULL, 'C'}, + {"count", no_argument, NULL, 'c'}, + {"devices", required_argument, NULL, 'D'}, + {"directories", required_argument, NULL, 'd'}, + {"extended-regexp", no_argument, NULL, 'E'}, + {"regexp", required_argument, NULL, 'e'}, + {"fixed-strings", no_argument, NULL, 'F'}, + {"file", required_argument, NULL, 'f'}, + {"basic-regexp", no_argument, NULL, 'G'}, + {"no-filename", no_argument, NULL, 'h'}, + {"with-filename", no_argument, NULL, 'H'}, + {"ignore-case", no_argument, NULL, 'i'}, + {"bz2decompress", no_argument, NULL, 'J'}, + {"files-with-matches", no_argument, NULL, 'l'}, + {"files-without-match", no_argument, NULL, 'L'}, + {"max-count", required_argument, NULL, 'm'}, + {"line-number", no_argument, NULL, 'n'}, + {"only-matching", no_argument, NULL, 'o'}, + {"quiet", no_argument, NULL, 'q'}, + {"silent", no_argument, NULL, 'q'}, + {"recursive", no_argument, NULL, 'r'}, + {"no-messages", no_argument, NULL, 's'}, + {"binary", no_argument, NULL, 'U'}, + {"unix-byte-offsets", no_argument, NULL, 'u'}, + {"invert-match", no_argument, NULL, 'v'}, + {"version", no_argument, NULL, 'V'}, + {"word-regexp", no_argument, NULL, 'w'}, + {"line-regexp", no_argument, NULL, 'x'}, + {"null", no_argument, NULL, 'Z'}, + {"null-data", no_argument, NULL, 'z'}, + {NULL, no_argument, NULL, 0} +}; + +/* + * Adds a searching pattern to the internal array. + */ +static void +add_pattern(char *pat, size_t len) +{ + + /* TODO: Check for empty patterns and shortcut */ + + /* Increase size if necessary */ + if (patterns == pattern_sz) { + pattern_sz *= 2; + pattern = grep_realloc(pattern, ++pattern_sz * + sizeof(*pattern)); + } + if (len > 0 && pat[len - 1] == '\n') + --len; + /* pat may not be NUL-terminated */ + pattern[patterns] = grep_malloc(len + 1); + memcpy(pattern[patterns], pat, len); + pattern[patterns][len] = '\0'; + ++patterns; +} + +/* + * Adds a file include/exclude pattern to the internal array. + */ +static void +add_fpattern(const char *pat, int mode) +{ + + /* Increase size if necessary */ + if (fpatterns == fpattern_sz) { + fpattern_sz *= 2; + fpattern = grep_realloc(fpattern, ++fpattern_sz * + sizeof(struct epat)); + } + fpattern[fpatterns].pat = grep_strdup(pat); + fpattern[fpatterns].mode = mode; + ++fpatterns; +} + +/* + * Adds a directory include/exclude pattern to the internal array. + */ +static void +add_dpattern(const char *pat, int mode) +{ + + /* Increase size if necessary */ + if (dpatterns == dpattern_sz) { + dpattern_sz *= 2; + dpattern = grep_realloc(dpattern, ++dpattern_sz * + sizeof(struct epat)); + } + dpattern[dpatterns].pat = grep_strdup(pat); + dpattern[dpatterns].mode = mode; + ++dpatterns; +} + +/* + * Reads searching patterns from a file and adds them with add_pattern(). + */ +static void +read_patterns(const char *fn) +{ + FILE *f; + char *line; + size_t len; + ssize_t rlen; + + if ((f = fopen(fn, "r")) == NULL) + err(2, "%s", fn); + line = NULL; + len = 0; +#ifndef ANDROID + while ((rlen = getline(&line, &len, f)) != -1) + add_pattern(line, *line == '\n' ? 0 : (size_t)rlen); +#endif + free(line); + if (ferror(f)) + err(2, "%s", fn); + fclose(f); +} + +static inline const char * +init_color(const char *d) +{ + char *c; + + c = getenv("GREP_COLOR"); + return (c != NULL ? c : d); +} + +int +main(int argc, char *argv[]) +{ + char **aargv, **eargv, *eopts; + char *ep; + unsigned long long l; + unsigned int aargc, eargc, i, j; + int c, lastc, needpattern, newarg, prevoptind; + + /* Check what is the program name of the binary. In this + way we can have all the funcionalities in one binary + without the need of scripting and using ugly hacks. */ + switch (__progname[0]) { + case 'e': + grepbehave = GREP_EXTENDED; + break; + case 'f': + grepbehave = GREP_FIXED; + break; + case 'g': + grepbehave = GREP_BASIC; + break; + case 'z': + filebehave = FILE_GZIP; + switch(__progname[1]) { + case 'e': + grepbehave = GREP_EXTENDED; + break; + case 'f': + grepbehave = GREP_FIXED; + break; + case 'g': + grepbehave = GREP_BASIC; + break; + } + break; + } + + lastc = '\0'; + newarg = 1; + prevoptind = 1; + needpattern = 1; + + eopts = getenv("GREP_OPTIONS"); + + /* support for extra arguments in GREP_OPTIONS */ + eargc = 0; + if (eopts != NULL) { + char *str; + + /* make an estimation of how many extra arguments we have */ + for (j = 0; j < strlen(eopts); j++) + if (eopts[j] == ' ') + eargc++; + + eargv = (char **)grep_malloc(sizeof(char *) * (eargc + 1)); + + eargc = 0; + /* parse extra arguments */ + while ((str = strsep(&eopts, " ")) != NULL) + eargv[eargc++] = grep_strdup(str); + + aargv = (char **)grep_calloc(eargc + argc + 1, + sizeof(char *)); + + aargv[0] = argv[0]; + for (i = 0; i < eargc; i++) + aargv[i + 1] = eargv[i]; + for (j = 1; j < (unsigned int)argc; j++, i++) + aargv[i + 1] = argv[j]; + + aargc = eargc + argc; + } else { + aargv = argv; + aargc = argc; + } + + while (((c = getopt_long(aargc, aargv, optstr, long_options, NULL)) != + -1)) { + switch (c) { + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + if (newarg || !isdigit(lastc)) + Aflag = 0; + else if (Aflag > LLONG_MAX / 10) { + errno = ERANGE; + err(2, NULL); + } + Aflag = Bflag = (Aflag * 10) + (c - '0'); + break; + case 'C': + if (optarg == NULL) { + Aflag = Bflag = 2; + break; + } + /* FALLTHROUGH */ + case 'A': + /* FALLTHROUGH */ + case 'B': + errno = 0; + l = strtoull(optarg, &ep, 10); + if (((errno == ERANGE) && (l == ULLONG_MAX)) || + ((errno == EINVAL) && (l == 0))) + err(2, NULL); + else if (ep[0] != '\0') { + errno = EINVAL; + err(2, NULL); + } + if (c == 'A') + Aflag = l; + else if (c == 'B') + Bflag = l; + else + Aflag = Bflag = l; + break; + case 'a': + binbehave = BINFILE_TEXT; + break; + case 'b': + bflag = true; + break; + case 'c': + cflag = true; + break; + case 'D': + if (strcasecmp(optarg, "skip") == 0) + devbehave = DEV_SKIP; + else if (strcasecmp(optarg, "read") == 0) + devbehave = DEV_READ; + else + errx(2, getstr(3), "--devices"); + break; + case 'd': + if (strcasecmp("recurse", optarg) == 0) { + Hflag = true; + dirbehave = DIR_RECURSE; + } else if (strcasecmp("skip", optarg) == 0) + dirbehave = DIR_SKIP; + else if (strcasecmp("read", optarg) == 0) + dirbehave = DIR_READ; + else + errx(2, getstr(3), "--directories"); + break; + case 'E': + grepbehave = GREP_EXTENDED; + break; + case 'e': + add_pattern(optarg, strlen(optarg)); + needpattern = 0; + break; + case 'F': + grepbehave = GREP_FIXED; + break; + case 'f': + read_patterns(optarg); + needpattern = 0; + break; + case 'G': + grepbehave = GREP_BASIC; + break; + case 'H': + Hflag = true; + break; + case 'h': + Hflag = false; + hflag = true; + break; + case 'I': + binbehave = BINFILE_SKIP; + break; + case 'i': + case 'y': + iflag = true; + cflags |= REG_ICASE; + break; + case 'J': + filebehave = FILE_BZIP; + break; + case 'L': + lflag = false; + Lflag = true; + break; + case 'l': + Lflag = false; + lflag = true; + break; + case 'm': + mflag = true; + errno = 0; + mcount = strtoull(optarg, &ep, 10); + if (((errno == ERANGE) && (mcount == ULLONG_MAX)) || + ((errno == EINVAL) && (mcount == 0))) + err(2, NULL); + else if (ep[0] != '\0') { + errno = EINVAL; + err(2, NULL); + } + break; + case 'n': + nflag = true; + break; + case 'O': + linkbehave = LINK_EXPLICIT; + break; + case 'o': + oflag = true; + break; + case 'p': + linkbehave = LINK_SKIP; + break; + case 'q': + qflag = true; + break; + case 'S': + linkbehave = LINK_READ; + break; + case 'R': + case 'r': + dirbehave = DIR_RECURSE; + Hflag = true; + break; + case 's': + sflag = true; + break; + case 'U': + binbehave = BINFILE_BIN; + break; + case 'u': + case MMAP_OPT: + /* noop, compatibility */ + break; + case 'V': + printf(getstr(9), __progname, VERSION); + exit(0); + case 'v': + vflag = true; + break; + case 'w': + wflag = true; + break; + case 'x': + xflag = true; + break; + case 'Z': + nullflag = true; + break; + case 'z': + nulldataflag = true; + line_sep = '\0'; + break; + case BIN_OPT: + if (strcasecmp("binary", optarg) == 0) + binbehave = BINFILE_BIN; + else if (strcasecmp("without-match", optarg) == 0) + binbehave = BINFILE_SKIP; + else if (strcasecmp("text", optarg) == 0) + binbeha |