summaryrefslogtreecommitdiff
path: root/include/regexp.h
blob: 174e10b75393a947e3b24b77cb0a8397b0c7d07c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
/*
 * regexp.h -- old-style regexp compile and step (emulated with POSIX regex)
 * Copyright (C) 1993 Rick Sladkey <jrs@world.std.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Library Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Library Public License for more details.
 */

/*
 * Think really hard before you intentionally include this file.
 * You should really be using the POSIX regex interface instead.
 * This emulation file is intended solely for compiling old code.
 *
 * A program that uses this file must define six macros: INIT,
 * GETC, PEEKC, UNGETC, RETURN, and ERROR.  This interface is
 * so arcane that VMS hackers point at it in ridicule.
 */

#ifndef _REGEXP_H
#define _REGEXP_H

#include <sys/types.h>			/* regex.h needs size_t */
#include <regex.h>			/* POSIX.2 regexp routines */
#include <stdlib.h>			/* for malloc, realloc and free */

/*
 * These three advertised external variables record state information
 * for compile and step.  They are so gross, I'm choking as I write this.
 */
char *loc1;				/* the beginning of a match */
char *loc2;				/* the end of a match */
int circf;				/* current pattern begins with '^' */

/*
 * These are the other variables mentioned in the regexp.h manpage.
 * Since we don't emulate them (whatever they do), we want errors if
 * they are referenced.  Therefore they are commented out here.
 */
#if 0
char *locs;
int sed;
int nbra;
#endif

/*
 * We need to stuff a regex_t into an arbitrary buffer so align it.
 * GCC make this easy.  For the others we have to guess.
 */
#ifdef __GNUC__
#define __REGEX_T_ALIGN (__alignof__(regex_t))
#else /* !__GNUC__ */
#define __REGEX_T_ALIGN 8
#endif /* !__GNUC__ */

#define __regex_t_align(p)						\
	((regex_t *) ((((unsigned long) p) + __REGEX_T_ALIGN - 1)	\
		/ __REGEX_T_ALIGN * __REGEX_T_ALIGN))

/*
 * We just slurp the whole pattern into a string and then compile
 * it `normally'.  With this implementation we never use the PEEKC
 * macro.  Please feel free to die laughing when we translate
 * error symbols into hard-coded numbers.
 */
char *
compile(char *instring, char *expbuf, char *endbuf, int eof)
{
	int __c;
	int __len;
	char *__buf;
	int __buflen;
	int __error;
	regex_t *__preg;
	INIT;

	__buflen = 128;
	__buf = malloc(__buflen);
	if (!__buf) {
		ERROR(50);
		return 0;
	}
	__len = 0;
	circf = 0;
	for (;;) {
		__c = GETC();
		if (__c == eof)
			break;
		if (__c == '\0' || __c == '\n') {
			UNGETC(__c);
			break;
		}
		if (__len + 2 > __buflen) {
			__buflen *= 2;
			__buf = realloc(__buf, __buflen);
			if (!__buf) {
				ERROR(50);
				return 0;
			}
		}
		if (__len == 0 && !circf && __c == '^')
			circf = 1;
		else
			__buf[__len++] = __c;
	}
	if (__len == 0 && !circf) {
		free(__buf);
		ERROR(41);
		return 0;
	}
	__buf[__len] = '\0';
	if (endbuf <= expbuf + sizeof(regex_t)) {
		free(__buf);
		ERROR(50);
		return 0;
	}
	__preg = __regex_t_align(expbuf);
	__preg->buffer = (char *) (__preg + 1);
	__preg->allocated = endbuf - (char *) __preg->buffer;
	__error = regcomp(__preg, __buf, REG_NEWLINE);
	free(__buf);
	switch (__error) {
	case 0:
		break;
	case REG_BADRPT:
		__error = 36; /* poor fit */
		break;
	case REG_BADBR:
		__error = 16;
		break;
	case REG_EBRACE:
		__error = 44; /* poor fit */
		break;
	case REG_EBRACK:
		__error = 49;
		break;
	case REG_ERANGE:
		__error = 36; /* poor fit */
		break;
	case REG_ECTYPE:
		__error = 36; /* poor fit */
		break;
	case REG_EPAREN:
		__error = 42;
		break;
	case REG_ESUBREG:
		__error = 36; /* poor fit */
		break;
	case REG_EEND:
		__error = 36; /* poor fit */
		break;
	case REG_EESCAPE:
		__error = 36;
		break;
	case REG_BADPAT:
		__error = 36; /* poor fit */
		break;
	case REG_ESIZE:
		__error = 50;
		break;
	case REG_ESPACE:
		__error = 50;
		break;
	default:
		__error = 36; /* as good as any */
		break;
	}
	if (__error) {
		ERROR(__error);
		return 0;
	}
#ifdef _RX_H
	RETURN((__preg->buffer + __preg->rx.allocated - __preg->rx.reserved));
#else
	RETURN((__preg->buffer + __preg->used));
#endif
}

/*
 * Note how we carefully emulate the gross `circf' hack.  Otherwise,
 * this just looks like an ordinary matching call that records the
 * starting and ending match positions.
 */
int
step(char *string, char *expbuf)
{
	int __result;
	regmatch_t __pmatch[1];

	__result = regexec(__regex_t_align(expbuf), string, 1, __pmatch, 0);
	if (circf && __pmatch[0].rm_so != 0)
		__result = REG_NOMATCH;
	if (__result == 0) {
		loc1 = string + __pmatch[0].rm_so;
		loc2 = string + __pmatch[0].rm_eo;
	}
	return __result == 0;
}

/*
 * For advance we are only supposed to match at the beginning of the
 * string.  You have to read the man page really carefully to find this
 * one.  We'll match them kludge-for-kludge.
 */
int
advance(char *string, char *expbuf)
{
	int __old_circf;
	int __result;

	__old_circf = circf;
	circf = 1;
	__result = step(string, expbuf);
	circf = __old_circf;
	return __result;
}

#endif /* _REGEXP_H */