summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarmelo Amoroso <carmelo.amoroso@st.com>2008-07-09 15:05:36 +0000
committerCarmelo Amoroso <carmelo.amoroso@st.com>2008-07-09 15:05:36 +0000
commita691312d8794d5516402bb6bb0d3e90c40ba188b (patch)
treedcac242fcad7d24a4f452722de26c56cfaf8c98a
parent56df95fe5d0778352abe09225d6587b88643d135 (diff)
Added several tests for locale support (8 bit and multibyte UTF-8)
Basically all tests have been taken from glibc. For testing multibyte encoding EUC_JP parts have been commented out and added new section for UTF-8 that is the only multibyte codeset currently supported on uCLibc. Some tests are still failing due to unsupported/missing features, other have been fixed. Signed-off-by: Filippo Arcidiacono <filippo.arcidiacono@st.com> Signed-off-by: Carmelo Amoroso <carmelo.amoroso@st.com>
-rw-r--r--test/locale/Makefile31
-rw-r--r--test/locale/bug-iconv-trans.c68
-rw-r--r--test/locale/bug-usesetlocale.c38
-rw-r--r--test/locale/collate-test.c133
-rw-r--r--test/locale/dump-ctype.c164
-rw-r--r--test/locale/gen-unicode-ctype.c785
-rw-r--r--test/locale/show-ucs-data.c62
-rw-r--r--test/locale/tst-C-locale.c498
-rw-r--r--test/locale/tst-ctype-de_DE.ISO-8859-1.in56
-rw-r--r--test/locale/tst-ctype.c447
-rw-r--r--test/locale/tst-digits.c249
-rw-r--r--test/locale/tst-fmon.c68
-rw-r--r--test/locale/tst-langinfo.c284
-rw-r--r--test/locale/tst-langinfo.input303
-rw-r--r--test/locale/tst-leaks.c18
-rw-r--r--test/locale/tst-mbswcs1.c63
-rw-r--r--test/locale/tst-mbswcs2.c65
-rw-r--r--test/locale/tst-mbswcs3.c76
-rw-r--r--test/locale/tst-mbswcs4.c63
-rw-r--r--test/locale/tst-mbswcs5.c75
-rw-r--r--test/locale/tst-mbswcs6.c74
-rw-r--r--test/locale/tst-numeric.c74
-rw-r--r--test/locale/tst-rpmatch.c37
-rw-r--r--test/locale/tst-setlocale.c25
-rw-r--r--test/locale/tst-sscanf.c56
-rw-r--r--test/locale/tst-strfmon1.c42
-rw-r--r--test/locale/tst-trans.c71
-rw-r--r--test/locale/tst-wctype.c144
-rw-r--r--test/locale/tst-xlocale1.c75
-rw-r--r--test/locale/tst-xlocale2.c64
-rw-r--r--test/locale/tst_nl_langinfo.c296
-rw-r--r--test/locale/xfrm-test.c144
32 files changed, 4648 insertions, 0 deletions
diff --git a/test/locale/Makefile b/test/locale/Makefile
new file mode 100644
index 000000000..0ab07f7aa
--- /dev/null
+++ b/test/locale/Makefile
@@ -0,0 +1,31 @@
+# uClibc locale tests
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+# tst_mbtowc tst_strcoll tst_strfmon tst_strxfrm \
+
+TESTS := bug-iconv-trans bug-usesetlocale collate-test dump-ctype \
+ gen-unicode-ctype show-ucs-data tst-ctype \
+ tst-digits tst-fmon tst-langinfo tst-leaks tst-mbswcs1 \
+ tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 tst-mbswcs6 \
+ tst_nl_langinfo tst-numeric tst-rpmatch tst-setlocale \
+ tst-sscanf tst-strfmon1 tst-trans tst-wctype tst-xlocale1 \
+ tst-xlocale2 xfrm-test
+
+
+# NOTE: For now disabled some tests that are known not build
+TESTS_DISABLED := tst-ctype tst-fmon tst-leaks tst-rpmatch tst-strfmon1
+
+ifneq ($(UCLIBC_HAS_XLOCALE),y)
+TESTS_DISABLED += bug-usesetlocale tst-xlocale1 tst-xlocale2 xfrm-test tst-C-locale
+endif
+
+include ../Test.mak
+
+DODIFF_rint := 1
+
+EXTRA_CFLAGS := -D__USE_GNU -fno-builtin
+
+OPTS_dump-ctype = C
+OPTS_tst-ctype = < tst-ctype-de_DE.ISO-8859-1.in
+OPTS_tst-langinfo = < tst-langinfo.input
+
+EXTRA_CLEAN := C
diff --git a/test/locale/bug-iconv-trans.c b/test/locale/bug-iconv-trans.c
new file mode 100644
index 000000000..3886247c3
--- /dev/null
+++ b/test/locale/bug-iconv-trans.c
@@ -0,0 +1,68 @@
+#include <iconv.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+
+int
+main (void)
+{
+ iconv_t cd;
+ const char str[] = "ÄäÖöÜüß";
+ const char expected[] = "AEaeOEoeUEuess";
+ char *inptr = (char *) str;
+ size_t inlen = strlen (str) + 1;
+ char outbuf[500];
+ char *outptr = outbuf;
+ size_t outlen = sizeof (outbuf);
+ int result = 0;
+ size_t n;
+
+ if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
+ {
+ puts ("setlocale failed");
+ return 1;
+ }
+
+ cd = iconv_open ("ANSI_X3.4-1968//TRANSLIT", "ISO-8859-1");
+ if (cd == (iconv_t) -1)
+ {
+ puts ("iconv_open failed");
+ return 1;
+ }
+
+ n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+ if (n != 7)
+ {
+ if (n == (size_t) -1)
+ printf ("iconv() returned error: %m\n");
+ else
+ printf ("iconv() returned %Zd, expected 7\n", n);
+ result = 1;
+ }
+ if (inlen != 0)
+ {
+ puts ("not all input consumed");
+ result = 1;
+ }
+ else if (inptr - str != strlen (str) + 1)
+ {
+ printf ("inptr wrong, advanced by %td\n", inptr - str);
+ result = 1;
+ }
+ if (memcmp (outbuf, expected, sizeof (expected)) != 0)
+ {
+ printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
+ (int) (sizeof (outbuf) - outlen), outbuf, expected);
+ result = 1;
+ }
+ else if (outlen != sizeof (outbuf) - sizeof (expected))
+ {
+ printf ("outlen wrong: %Zd, expected %Zd\n", outlen,
+ sizeof (outbuf) - 15);
+ result = 1;
+ }
+ else
+ printf ("output is \"%s\" which is OK\n", outbuf);
+
+ return result;
+}
diff --git a/test/locale/bug-usesetlocale.c b/test/locale/bug-usesetlocale.c
new file mode 100644
index 000000000..0637067de
--- /dev/null
+++ b/test/locale/bug-usesetlocale.c
@@ -0,0 +1,38 @@
+/* Test case for setlocale vs uselocale (LC_GLOBAL_LOCALE) bug. */
+
+#define _GNU_SOURCE 1
+#include <locale.h>
+#include <stdio.h>
+#include <ctype.h>
+
+static int
+do_test (void)
+{
+ __locale_t loc_new, loc_old;
+
+ int first = !!isalpha(0xE4);
+
+ setlocale (LC_ALL, "de_DE");
+
+ int global_de = !!isalpha(0xE4);
+
+ loc_new = newlocale (1 << LC_ALL, "C", 0);
+ loc_old = uselocale (loc_new);
+
+ int used_c = !!isalpha(0xE4);
+
+ uselocale (loc_old);
+
+ int used_global = !!isalpha(0xE4);
+
+ printf ("started %d, after setlocale %d\n", first, global_de);
+ printf ("after uselocale %d, after LC_GLOBAL_LOCALE %d\n",
+ used_c, used_global);
+
+ freelocale (loc_new);
+ return !(used_c == first && used_global == global_de);
+}
+
+
+#define TEST_FUNCTION do_test ()
+#include "test-skeleton.c"
diff --git a/test/locale/collate-test.c b/test/locale/collate-test.c
new file mode 100644
index 000000000..e8f43218f
--- /dev/null
+++ b/test/locale/collate-test.c
@@ -0,0 +1,133 @@
+/* Test collation function using real data.
+ Copyright (C) 1997, 1999, 2000, 2003 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <ctype.h>
+#include <error.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+struct lines
+{
+ char *key;
+ char *line;
+};
+
+static int xstrcoll (const void *, const void *);
+
+int
+main (int argc, char *argv[])
+{
+ int result = 0;
+ size_t nstrings, nstrings_max;
+ struct lines *strings;
+ char *line = NULL;
+ size_t len = 0;
+ size_t n;
+
+ if (argc < 2)
+ error (1, 0, "usage: %s <random seed>", argv[0]);
+
+ setlocale (LC_ALL, "");
+
+ nstrings_max = 100;
+ nstrings = 0;
+ strings = (struct lines *) malloc (nstrings_max * sizeof (struct lines));
+ if (strings == NULL)
+ {
+ perror (argv[0]);
+ exit (1);
+ }
+
+ while (1)
+ {
+ int l;
+ if (getline (&line, &len, stdin) < 0)
+ break;
+
+ if (nstrings == nstrings_max)
+ {
+ strings = (struct lines *) realloc (strings,
+ (nstrings_max *= 2)
+ * sizeof (*strings));
+ if (strings == NULL)
+ {
+ perror (argv[0]);
+ exit (1);
+ }
+ }
+ strings[nstrings].line = strdup (line);
+ l = strcspn (line, ":(;");
+ while (l > 0 && isspace (line[l - 1]))
+ --l;
+ strings[nstrings].key = strndup (line, l);
+ ++nstrings;
+ }
+ free (line);
+
+ /* First shuffle. */
+ srandom (atoi (argv[1]));
+ for (n = 0; n < 10 * nstrings; ++n)
+ {
+ int r1, r2, r;
+ size_t idx1 = random () % nstrings;
+ size_t idx2 = random () % nstrings;
+ struct lines tmp = strings[idx1];
+ strings[idx1] = strings[idx2];
+ strings[idx2] = tmp;
+
+ /* While we are at it a first little test. */
+ r1 = strcoll (strings[idx1].key, strings[idx2].key);
+ r2 = strcoll (strings[idx2].key, strings[idx1].key);
+ r = r1 * r2;
+
+ if (r > 0 || (r == 0 && r1 != 0) || (r == 0 && r2 != 0))
+ printf ("`%s' and `%s' collate wrong: %d vs. %d\n",
+ strings[idx1].key, strings[idx2].key, r1, r2);
+ }
+
+ /* Now sort. */
+ qsort (strings, nstrings, sizeof (struct lines), xstrcoll);
+
+ /* Print the result. */
+ for (n = 0; n < nstrings; ++n)
+ {
+ fputs (strings[n].line, stdout);
+ free (strings[n].line);
+ free (strings[n].key);
+ }
+ free (strings);
+
+ return result;
+}
+
+
+static int
+xstrcoll (ptr1, ptr2)
+ const void *ptr1;
+ const void *ptr2;
+{
+ const struct lines *l1 = (const struct lines *) ptr1;
+ const struct lines *l2 = (const struct lines *) ptr2;
+
+ return strcoll (l1->key, l2->key);
+}
diff --git a/test/locale/dump-ctype.c b/test/locale/dump-ctype.c
new file mode 100644
index 000000000..a1f24c656
--- /dev/null
+++ b/test/locale/dump-ctype.c
@@ -0,0 +1,164 @@
+/* Dump the character classes and character maps of a locale to a bunch
+ of individual files which can be processed with diff, sed etc.
+ Copyright (C) 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* Usage example:
+ $ dump-ctype de_DE.UTF-8
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <wctype.h>
+#include <locale.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+
+static const char *program_name = "dump-ctype";
+static const char *locale;
+
+static const char *class_names[] =
+ {
+ "alnum", "alpha", "blank", "cntrl", "digit", "graph", "lower",
+ "print", "punct", "space", "upper", "xdigit"
+ };
+
+static const char *map_names[] =
+ {
+ "tolower", "toupper", "totitle"
+ };
+
+static void dump_class (const char *class_name)
+{
+ wctype_t class;
+ FILE *f;
+ unsigned int ch;
+
+ class = wctype (class_name);
+ if (class == (wctype_t) 0)
+ {
+ fprintf (stderr, "%s %s: noexistent class %s\n", program_name,
+ locale, class_name);
+ return;
+ }
+
+ f = fopen (class_name, "w");
+ if (f == NULL)
+ {
+ fprintf (stderr, "%s %s: cannot open file %s/%s\n", program_name,
+ locale, locale, class_name);
+ exit (1);
+ }
+
+ for (ch = 0; ch < 0x10000; ch++)
+ if (iswctype (ch, class))
+ fprintf (f, "0x%04X\n", ch);
+
+ if (ferror (f) || fclose (f))
+ {
+ fprintf (stderr, "%s %s: I/O error on file %s/%s\n", program_name,
+ locale, locale, class_name);
+ exit (1);
+ }
+}
+
+static void dump_map (const char *map_name)
+{
+ wctrans_t map;
+ FILE *f;
+ unsigned int ch;
+
+ map = wctrans (map_name);
+ if (map == (wctrans_t) 0)
+ {
+ fprintf (stderr, "%s %s: noexistent map %s\n", program_name,
+ locale, map_name);
+ return;
+ }
+
+ f = fopen (map_name, "w");
+ if (f == NULL)
+ {
+ fprintf (stderr, "%s %s: cannot open file %s/%s\n", program_name,
+ locale, locale, map_name);
+ exit (1);
+ }
+
+ for (ch = 0; ch < 0x10000; ch++)
+ if (towctrans (ch, map) != ch)
+ fprintf (f, "0x%04X\t0x%04X\n", ch, towctrans (ch, map));
+
+ if (ferror (f) || fclose (f))
+ {
+ fprintf (stderr, "%s %s: I/O error on file %s/%s\n", program_name,
+ locale, locale, map_name);
+ exit (1);
+ }
+}
+
+int
+main (int argc, char *argv[])
+{
+ size_t i;
+
+ if (argc != 2)
+ {
+ fprintf (stderr, "Usage: dump-ctype locale\n");
+ exit (1);
+ }
+ locale = argv[1];
+
+ if (setlocale (LC_ALL, locale) == NULL)
+ {
+ fprintf (stderr, "%s: setlocale cannot switch to locale %s\n",
+ program_name, locale);
+ exit (1);
+ }
+
+ if (mkdir (locale, 0777) < 0)
+ {
+ char buf[100];
+ int save_errno = errno;
+
+ sprintf (buf, "%s: cannot create directory %s", program_name, locale);
+ errno = save_errno;
+ perror (buf);
+ exit (1);
+ }
+
+ if (chdir (locale) < 0)
+ {
+ char buf[100];
+ int save_errno = errno;
+
+ sprintf (buf, "%s: cannot chdir to %s", program_name, locale);
+ errno = save_errno;
+ perror (buf);
+ exit (1);
+ }
+
+ for (i = 0; i < sizeof (class_names) / sizeof (class_names[0]); i++)
+ dump_class (class_names[i]);
+
+ for (i = 0; i < sizeof (map_names) / sizeof (map_names[0]); i++)
+ dump_map (map_names[i]);
+
+ return 0;
+}
diff --git a/test/locale/gen-unicode-ctype.c b/test/locale/gen-unicode-ctype.c
new file mode 100644
index 000000000..849f272ed
--- /dev/null
+++ b/test/locale/gen-unicode-ctype.c
@@ -0,0 +1,785 @@
+/* Generate a Unicode conforming LC_CTYPE category from a UnicodeData file.
+ Copyright (C) 2000-2001 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/* Usage example:
+ $ gen-unicode /usr/local/share/Unidata/UnicodeData.txt 3.1
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+
+/* This structure represents one line in the UnicodeData.txt file. */
+struct unicode_attribute
+{
+ const char *name; /* Character name */
+ const char *category; /* General category */
+ const char *combining; /* Canonical combining classes */
+ const char *bidi; /* Bidirectional category */
+ const char *decomposition; /* Character decomposition mapping */
+ const char *decdigit; /* Decimal digit value */
+ const char *digit; /* Digit value */
+ const char *numeric; /* Numeric value */
+ int mirrored; /* mirrored */
+ const char *oldname; /* Old Unicode 1.0 name */
+ const char *comment; /* Comment */
+ unsigned int upper; /* Uppercase mapping */
+ unsigned int lower; /* Lowercase mapping */
+ unsigned int title; /* Titlecase mapping */
+};
+
+/* Missing fields are represented with "" for strings, and NONE for
+ characters. */
+#define NONE (~(unsigned int)0)
+
+/* The entire contents of the UnicodeData.txt file. */
+struct unicode_attribute unicode_attributes [0x110000];
+
+/* Stores in unicode_attributes[i] the values from the given fields. */
+static void
+fill_attribute (unsigned int i,
+ const char *field1, const char *field2,
+ const char *field3, const char *field4,
+ const char *field5, const char *field6,
+ const char *field7, const char *field8,
+ const char *field9, const char *field10,
+ const char *field11, const char *field12,
+ const char *field13, const char *field14)
+{
+ struct unicode_attribute * uni;
+
+ if (i >= 0x110000)
+ {
+ fprintf (stderr, "index too large\n");
+ exit (1);
+ }
+ if (strcmp (field2, "Cs") == 0)
+ /* Surrogates are UTF-16 artefacts, not real characters. Ignore them. */
+ return;
+ uni = &unicode_attributes[i];
+ /* Copy the strings. */
+ uni->name = strdup (field1);
+ uni->category = (field2[0] == '\0' ? "" : strdup (field2));
+ uni->combining = (field3[0] == '\0' ? "" : strdup (field3));
+ uni->bidi = (field4[0] == '\0' ? "" : strdup (field4));
+ uni->decomposition = (field5[0] == '\0' ? "" : strdup (field5));
+ uni->decdigit = (field6[0] == '\0' ? "" : strdup (field6));
+ uni->digit = (field7[0] == '\0' ? "" : strdup (field7));
+ uni->numeric = (field8[0] == '\0' ? "" : strdup (field8));
+ uni->mirrored = (field9[0] == 'Y');
+ uni->oldname = (field10[0] == '\0' ? "" : strdup (field10));
+ uni->comment = (field11[0] == '\0' ? "" : strdup (field11));
+ uni->upper = (field12[0] =='\0' ? NONE : strtoul (field12, NULL, 16));
+ uni->lower = (field13[0] =='\0' ? NONE : strtoul (field13, NULL, 16));
+ uni->title = (field14[0] =='\0' ? NONE : strtoul (field14, NULL, 16));
+}
+
+/* Maximum length of a field in the UnicodeData.txt file. */
+#define FIELDLEN 120
+
+/* Reads the next field from STREAM. The buffer BUFFER has size FIELDLEN.
+ Reads up to (but excluding) DELIM.
+ Returns 1 when a field was successfully read, otherwise 0. */
+static int
+getfield (FILE *stream, char *buffer, int delim)
+{
+ int count = 0;
+ int c;
+
+ for (; (c = getc (stream)), (c != EOF && c != delim); )
+ {
+ /* The original unicode.org UnicodeData.txt file happens to have
+ CR/LF line terminators. Silently convert to LF. */
+ if (c == '\r')
+ continue;
+
+ /* Put c into the buffer. */
+ if (++count >= FIELDLEN - 1)
+ {
+ fprintf (stderr, "field too long\n");
+ exit (1);
+ }
+ *buffer++ = c;
+ }
+
+ if (c == EOF)
+ return 0;
+
+ *buffer = '\0';
+ return 1;
+}
+
+/* Stores in unicode_attributes[] the entire contents of the UnicodeData.txt
+ file. */
+static void
+fill_attributes (const char *unicodedata_filename)
+{
+ unsigned int i, j;
+ FILE *stream;
+ char field0[FIELDLEN];
+ char field1[FIELDLEN];
+ char field2[FIELDLEN];
+ char field3[FIELDLEN];
+ char field4[FIELDLEN];
+ char field5[FIELDLEN];
+ char field6[FIELDLEN];
+ char field7[FIELDLEN];
+ char field8[FIELDLEN];
+ char field9[FIELDLEN];
+ char field10[FIELDLEN];
+ char field11[FIELDLEN];
+ char field12[FIELDLEN];
+ char field13[FIELDLEN];
+ char field14[FIELDLEN];
+ int lineno = 0;
+
+ for (i = 0; i < 0x110000; i++)
+ unicode_attributes[i].name = NULL;
+
+ stream = fopen (unicodedata_filename, "r");
+ if (stream == NULL)
+ {
+ fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
+ exit (1);
+ }
+
+ for (;;)
+ {
+ int n;
+
+ lineno++;
+ n = getfield (stream, field0, ';');
+ n += getfield (stream, field1, ';');
+ n += getfield (stream, field2, ';');
+ n += getfield (stream, field3, ';');
+ n += getfield (stream, field4, ';');
+ n += getfield (stream, field5, ';');
+ n += getfield (stream, field6, ';');
+ n += getfield (stream, field7, ';');
+ n += getfield (stream, field8, ';');
+ n += getfield (stream, field9, ';');
+ n += getfield (stream, field10, ';');
+ n += getfield (stream, field11, ';');
+ n += getfield (stream, field12, ';');
+ n += getfield (stream, field13, ';');
+ n += getfield (stream, field14, '\n');
+ if (n == 0)
+ break;
+ if (n != 15)
+ {
+ fprintf (stderr, "short line in'%s':%d\n",
+ unicodedata_filename, lineno);
+ exit (1);
+ }
+ i = strtoul (field0, NULL, 16);
+ if (field1[0] == '<'
+ && strlen (field1) >= 9
+ && !strcmp (field1 + strlen(field1) - 8, ", First>"))
+ {
+ /* Deal with a range. */
+ lineno++;
+ n = getfield (stream, field0, ';');
+ n += getfield (stream, field1, ';');
+ n += getfield (stream, field2, ';');
+ n += getfield (stream, field3, ';');
+ n += getfield (stream, field4, ';');
+ n += getfield (stream, field5, ';');
+ n += getfield (stream, field6, ';');
+ n += getfield (stream, field7, ';');
+ n += getfield (stream, field8, ';');
+ n += getfield (stream, field9, ';');
+ n += getfield (stream, field10, ';');
+ n += getfield (stream, field11, ';');
+ n += getfield (stream, field12, ';');
+ n += getfield (stream, field13, ';');
+ n += getfield (stream, field14, '\n');
+ if (n != 15)
+ {
+ fprintf (stderr, "missing end range in '%s':%d\n",
+ unicodedata_filename, lineno);
+ exit (1);
+ }
+ if (!(field1[0] == '<'
+ && strlen (field1) >= 8
+ && !strcmp (field1 + strlen (field1) - 7, ", Last>")))
+ {
+ fprintf (stderr, "missing end range in '%s':%d\n",
+ unicodedata_filename, lineno);
+ exit (1);
+ }
+ field1[strlen (field1) - 7] = '\0';
+ j = strtoul (field0, NULL, 16);
+ for (; i <= j; i++)
+ fill_attribute (i, field1+1, field2, field3, field4, field5,
+ field6, field7, field8, field9, field10,
+ field11, field12, field13, field14);
+ }
+ else
+ {
+ /* Single character line */
+ fill_attribute (i, field1, field2, field3, field4, field5,
+ field6, field7, field8, field9, field10,
+ field11, field12, field13, field14);
+ }
+ }
+ if (ferror (stream) || fclose (stream))
+ {
+ fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
+ exit (1);
+ }
+}
+
+/* Character mappings. */
+
+static unsigned int
+to_upper (unsigned int ch)
+{
+ if (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].upper != NONE)
+ return unicode_attributes[ch].upper;
+ else
+ return ch;
+}
+
+static unsigned int
+to_lower (unsigned int ch)
+{
+ if (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].lower != NONE)
+ return unicode_attributes[ch].lower;
+ else
+ return ch;
+}
+
+static unsigned int
+to_title (unsigned int ch)
+{
+ if (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].title != NONE)
+ return unicode_attributes[ch].title;
+ else
+ return ch;
+}
+
+/* Character class properties. */
+
+static bool
+is_upper (unsigned int ch)
+{
+ return (to_lower (ch) != ch);
+}
+
+static bool
+is_lower (unsigned int ch)
+{
+ return (to_upper (ch) != ch)
+ /* <U00DF> is lowercase, but without simple to_upper mapping. */
+ || (ch == 0x00DF);
+}
+
+static bool
+is_alpha (unsigned int ch)
+{
+ return (unicode_attributes[ch].name != NULL
+ && ((unicode_attributes[ch].category[0] == 'L'
+ /* Theppitak Karoonboonyanan <thep@links.nectec.or.th> says
+ <U0E2F>, <U0E46> should belong to is_punct. */
+ && (ch != 0x0E2F) && (ch != 0x0E46))
+ /* Theppitak Karoonboonyanan <thep@links.nectec.or.th> says
+ <U0E31>, <U0E34>..<U0E3A>, <U0E47>..<U0E4E> are is_alpha. */
+ || (ch == 0x0E31)
+ || (ch >= 0x0E34 && ch <= 0x0E3A)
+ || (ch >= 0x0E47 && ch <= 0x0E4E)
+ /* Avoid warning for <U0345>. */
+ || (ch == 0x0345)
+ /* Avoid warnings for <U2160>..<U217F>. */
+ || (unicode_attributes[ch].category[0] == 'N'
+ && unicode_attributes[ch].category[1] == 'l')
+ /* Avoid warnings for <U24B6>..<U24E9>. */
+ || (unicode_attributes[ch].category[0] == 'S'
+ && unicode_attributes[ch].category[1] == 'o'
+ && strstr (unicode_attributes[ch].name, " LETTER ")
+ != NULL)
+ /* Consider all the non-ASCII digits as alphabetic.
+ ISO C 99 forbids us to have them in category "digit",
+ but we want iswalnum to return true on them. */
+ || (unicode_attributes[ch].category[0] == 'N'
+ && unicode_attributes[ch].category[1] == 'd'
+ && !(ch >= 0x0030 && ch <= 0x0039))));
+}
+
+static bool
+is_digit (unsigned int ch)
+{
+#if 0
+ return (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].category[0] == 'N'
+ && unicode_attributes[ch].category[1] == 'd');
+ /* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without
+ a zero. Must add <0> in front of them by hand. */
+#else
+ /* SUSV2 gives us some freedom for the "digit" category, but ISO C 99
+ takes it away:
+ 7.25.2.1.5:
+ The iswdigit function tests for any wide character that corresponds
+ to a decimal-digit character (as defined in 5.2.1).
+ 5.2.1:
+ the 10 decimal digits 0 1 2 3 4 5 6 7 8 9
+ */
+ return (ch >= 0x0030 && ch <= 0x0039);
+#endif
+}
+
+static bool
+is_outdigit (unsigned int ch)
+{
+ return (ch >= 0x0030 && ch <= 0x0039);
+}
+
+static bool
+is_blank (unsigned int ch)
+{
+ return (ch == 0x0009 /* '\t' */
+ /* Category Zs without mention of "<noBreak>" */
+ || (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].category[0] == 'Z'
+ && unicode_attributes[ch].category[1] == 's'
+ && !strstr (unicode_attributes[ch].decomposition, "<noBreak>")));
+}
+
+static bool
+is_space (unsigned int ch)
+{
+ /* Don't make U+00A0 a space. Non-breaking space means that all programs
+ should treat it like a punctuation character, not like a space. */
+ return (ch == 0x0020 /* ' ' */
+ || ch == 0x000C /* '\f' */
+ || ch == 0x000A /* '\n' */
+ || ch == 0x000D /* '\r' */
+ || ch == 0x0009 /* '\t' */
+ || ch == 0x000B /* '\v' */
+ /* Categories Zl, Zp, and Zs without mention of "<noBreak>" */
+ || (unicode_attributes[ch].name != NULL
+ && unicode_attributes[ch].category[0] == 'Z'
+ && (unicode_attributes[ch].category[1] == 'l'
+ || unicode_attributes[ch].category[1] == 'p'
+ || (unicode_attributes[ch].category[1] == 's'
+ && !strstr (unicode_attributes[ch].decomposition,
+ "<noBreak>")))));
+}
+
+static bool
+is_cntrl (unsigned int ch)
+{
+ return (unicode_attributes[ch].name != NULL
+ && (!strcmp (unicode_attributes[ch].name, "<control>")
+ /* Categories Zl and Zp */
+ || (unicode_attribute