summaryrefslogtreecommitdiff
path: root/adk/tools/sortfile.c
blob: 1e9fc9623f8032fb739ea83206ec2efe620e6782 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
/*-
 * Copyright (c) 2010
 *      Thorsten Glaser <tg@mirbsd.org>
 *
 * Provided that these terms and disclaimer and all copyright notices
 * are retained or reproduced in an accompanying document, permission
 * is granted to deal in this work without restriction, including un-
 * limited rights to use, publicly perform, distribute, sell, modify,
 * merge, give away, or sublicence.
 *
 * This work is provided "AS IS" and WITHOUT WARRANTY of any kind, to
 * the utmost extent permitted by applicable law, neither express nor
 * implied; without malicious intent or gross negligence. In no event
 * may a licensor, author or contributor be held liable for indirect,
 * direct, other damage, loss, or other issues arising in any way out
 * of dealing in the work, even if advised of the possibility of such
 * damage or existence of a defect, except proven that it results out
 * of said person's immediate fault when using the work as intended.
 */

#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <err.h>
#include <fcntl.h>
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

struct ptrsize {
	const char *ptr;
	size_t size;
};

static void *xrecalloc(void *, size_t, size_t);
static int cmpfn(const void *, const void *);

#define MUL_NO_OVERFLOW	(1UL << (sizeof (size_t) * 8 / 2))

#ifndef SIZE_MAX
#ifdef SIZE_T_MAX
#define SIZE_MAX	SIZE_T_MAX
#else
#define SIZE_MAX	((size_t)-1)
#endif
#endif

#if !defined(MAP_FAILED)
/* XXX imake style */
#  if defined(__linux)
#define MAP_FAILED	((void *)-1)
#  elif defined(__bsdi__) || defined(__osf__) || defined(__ultrix)
#define MAP_FAILED	((caddr_t)-1)
#  endif
#endif

static void *
xrecalloc(void *ptr, size_t nmemb, size_t size)
{
	void *rv;

	if ((nmemb >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) &&
	    nmemb > 0 && SIZE_MAX / nmemb < size)
		errx(1, "attempted integer overflow: %zu * %zu", nmemb, size);
	size *= nmemb;
	if ((rv = realloc(ptr, size)) == NULL)
		err(1, "cannot allocate %zu bytes", size);
	return (rv);
}

int
sortfile(char *infile, char *outfile)
{
	int fd, fdout;
	size_t fsz, asz, anents;
	char *cp, *thefile, *endfile;
	struct ptrsize *thearray;

	if ((fd = open(infile, O_RDONLY)) < 0)
		err(1, "open: %s", infile);
	else {
		struct stat sb;

		/* reasonable maximum size: 3/4 of SIZE_MAX */
		fsz = (SIZE_MAX / 2) + (SIZE_MAX / 4);

		if (fstat(fd, &sb))
			err(1, "stat: %s", infile);
		if (sb.st_size > fsz)
			errx(1, "file %s too big, %llu > %zu", infile,
			    (unsigned long long)sb.st_size, fsz);
		fsz = (size_t)sb.st_size;
	}

	if ((thefile = mmap(NULL, fsz, PROT_READ, MAP_FILE | MAP_PRIVATE,
	    fd, (off_t)0)) == MAP_FAILED)
		err(1, "mmap %zu bytes from %s", fsz, infile);
	/* last valid byte in the file, must be newline anyway */
	endfile = thefile + fsz - 1;

	thearray = xrecalloc(NULL, (asz = 8), sizeof(thearray[0]));
	thearray[(anents = 0)].ptr = cp = thefile;

	while ((cp = memchr(cp, '\n', endfile - cp)) != NULL) {
		/* byte after the \n */
		if (++cp > endfile)
			/* end of file */
			break;
		thearray[anents].size = cp - thearray[anents].ptr;
		if (++anents == asz)
			/* resize array */
			thearray = xrecalloc(thearray, (asz <<= 1),
			    sizeof(thearray[0]));
		thearray[anents].ptr = cp;
	}
	thearray[anents].size = endfile - thearray[anents].ptr + 1;

	qsort(thearray, ++anents, sizeof(thearray[0]), cmpfn);

	if ((fdout = open(outfile, O_WRONLY | O_CREAT, S_IRWXU)) < 0)
		err(1, "open: %s", outfile);
	else {
		for (asz = 0; asz < anents; ++asz)
			if ((size_t)write(fdout, thearray[asz].ptr,
			    thearray[asz].size) != thearray[asz].size)
				err(1, "write %zu bytes", thearray[asz].size);
	}

	if (munmap(thefile, fsz))
		warn("munmap");

	free(thearray);
	close(fd);

	return (0);
}

static int
cmpfn(const void *p1, const void *p2)
{
	int rv;
	const struct ptrsize *a1 = (const struct ptrsize *)p1;
	const struct ptrsize *a2 = (const struct ptrsize *)p2;

	if ((rv = memcmp(a1->ptr, a2->ptr, (a1->size > a2->size ?
	    a2->size : a1->size) - /* '\n' */ 1)) != 0)
		/* unequal in the common part */
		return (rv);

	/* shorter string is smaller */
	return (a1->size > a2->size ? 1 : a1->size == a2->size ? 0 : -1);
}