Added several tests for locale support (8 bit and multibyte UTF-8)

Basically all tests have been taken from glibc. For testing multibyte encoding EUC_JP parts have been commented out and added new section for UTF-8 that is the only multibyte codeset currently supported on uCLibc. Some tests are still failing due to unsupported/missing features, other have been fixed. Signed-off-by: Filippo Arcidiacono <filippo.arcidiacono@st.com> Signed-off-by: Carmelo Amoroso <carmelo.amoroso@st.com>
author: Carmelo Amoroso <carmelo.amoroso@st.com> 2008-07-09 15:05:36 +0000
committer: Carmelo Amoroso <carmelo.amoroso@st.com> 2008-07-09 15:05:36 +0000
commit: a691312d8794d5516402bb6bb0d3e90c40ba188b (patch)
tree: dcac242fcad7d24a4f452722de26c56cfaf8c98a
parent: 56df95fe5d0778352abe09225d6587b88643d135 (diff)
32 files changed, 4648 insertions, 0 deletions
diff --git a/test/locale/Makefile b/test/locale/Makefile
new file mode 100644
index 000000000..0ab07f7aa
--- /dev/null
+++ b/test/locale/Makefile
@@ -0,0 +1,31 @@
+# uClibc locale tests
+# Licensed under the LGPL v2.1, see the file COPYING.LIB in this tarball.
+#	 tst_mbtowc tst_strcoll tst_strfmon tst_strxfrm    \
+
+TESTS := bug-iconv-trans bug-usesetlocale collate-test dump-ctype \
+	 gen-unicode-ctype show-ucs-data tst-ctype \
+	 tst-digits tst-fmon tst-langinfo tst-leaks tst-mbswcs1 \
+	 tst-mbswcs2 tst-mbswcs3 tst-mbswcs4 tst-mbswcs5 tst-mbswcs6 \
+	 tst_nl_langinfo tst-numeric tst-rpmatch tst-setlocale \
+	 tst-sscanf tst-strfmon1 tst-trans tst-wctype tst-xlocale1 \
+	 tst-xlocale2 xfrm-test
+
+
+# NOTE: For now disabled some tests that are known not build
+TESTS_DISABLED := tst-ctype tst-fmon tst-leaks tst-rpmatch tst-strfmon1
+
+ifneq ($(UCLIBC_HAS_XLOCALE),y)
+TESTS_DISABLED += bug-usesetlocale tst-xlocale1 tst-xlocale2 xfrm-test tst-C-locale
+endif
+
+include ../Test.mak
+
+DODIFF_rint     := 1
+
+EXTRA_CFLAGS    := -D__USE_GNU -fno-builtin
+
+OPTS_dump-ctype = C
+OPTS_tst-ctype = < tst-ctype-de_DE.ISO-8859-1.in
+OPTS_tst-langinfo = < tst-langinfo.input
+
+EXTRA_CLEAN := C
diff --git a/test/locale/bug-iconv-trans.c b/test/locale/bug-iconv-trans.c
new file mode 100644
index 000000000..3886247c3
--- /dev/null
+++ b/test/locale/bug-iconv-trans.c
@@ -0,0 +1,68 @@
+#include <iconv.h>
+#include <locale.h>
+#include <stdio.h>
+#include <string.h>
+
+int
+main (void)
+{
+  iconv_t cd;
+  const char str[] = "�������";
+  const char expected[] = "AEaeOEoeUEuess";
+  char *inptr = (char *) str;
+  size_t inlen = strlen (str) + 1;
+  char outbuf[500];
+  char *outptr = outbuf;
+  size_t outlen = sizeof (outbuf);
+  int result = 0;
+  size_t n;
+
+  if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
+    {
+      puts ("setlocale failed");
+      return 1;
+    }
+
+  cd = iconv_open ("ANSI_X3.4-1968//TRANSLIT", "ISO-8859-1");
+  if (cd == (iconv_t) -1)
+    {
+      puts ("iconv_open failed");
+      return 1;
+    }
+
+  n = iconv (cd, &inptr, &inlen, &outptr, &outlen);
+  if (n != 7)
+    {
+      if (n == (size_t) -1)
+	printf ("iconv() returned error: %m\n");
+      else
+	printf ("iconv() returned %Zd, expected 7\n", n);
+      result = 1;
+    }
+  if (inlen != 0)
+    {
+      puts ("not all input consumed");
+      result = 1;
+    }
+  else if (inptr - str != strlen (str) + 1)
+    {
+      printf ("inptr wrong, advanced by %td\n", inptr - str);
+      result = 1;
+    }
+  if (memcmp (outbuf, expected, sizeof (expected)) != 0)
+    {
+      printf ("result wrong: \"%.*s\", expected: \"%s\"\n",
+	      (int) (sizeof (outbuf) - outlen), outbuf, expected);
+      result = 1;
+    }
+  else if (outlen != sizeof (outbuf) - sizeof (expected))
+    {
+      printf ("outlen wrong: %Zd, expected %Zd\n", outlen,
+	      sizeof (outbuf) - 15);
+      result = 1;
+    }
+  else
+    printf ("output is \"%s\" which is OK\n", outbuf);
+
+  return result;
+}
diff --git a/test/locale/bug-usesetlocale.c b/test/locale/bug-usesetlocale.c
new file mode 100644
index 000000000..0637067de
--- /dev/null
+++ b/test/locale/bug-usesetlocale.c
@@ -0,0 +1,38 @@
+/* Test case for setlocale vs uselocale (LC_GLOBAL_LOCALE) bug.  */
+
+#define _GNU_SOURCE 1
+#include <locale.h>
+#include <stdio.h>
+#include <ctype.h>
+
+static int
+do_test (void)
+{
+  __locale_t loc_new, loc_old;
+
+  int first = !!isalpha(0xE4);
+
+  setlocale (LC_ALL, "de_DE");
+
+  int global_de = !!isalpha(0xE4);
+
+  loc_new = newlocale (1 << LC_ALL, "C", 0);
+  loc_old = uselocale (loc_new);
+
+  int used_c = !!isalpha(0xE4);
+
+  uselocale (loc_old);
+
+  int used_global = !!isalpha(0xE4);
+
+  printf ("started %d, after setlocale %d\n", first, global_de);
+  printf ("after uselocale %d, after LC_GLOBAL_LOCALE %d\n",
+	  used_c, used_global);
+
+  freelocale (loc_new);
+  return !(used_c == first && used_global == global_de);
+}
+
+
+#define TEST_FUNCTION do_test ()
+#include "test-skeleton.c"
diff --git a/test/locale/collate-test.c b/test/locale/collate-test.c
new file mode 100644
index 000000000..e8f43218f
--- /dev/null
+++ b/test/locale/collate-test.c
@@ -0,0 +1,133 @@
+/* Test collation function using real data.
+   Copyright (C) 1997, 1999, 2000, 2003 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <ctype.h>
+#include <error.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+struct lines
+{
+  char *key;
+  char *line;
+};
+
+static int xstrcoll (const void *, const void *);
+
+int
+main (int argc, char *argv[])
+{
+  int result = 0;
+  size_t nstrings, nstrings_max;
+  struct lines *strings;
+  char *line = NULL;
+  size_t len = 0;
+  size_t n;
+
+  if (argc < 2)
+    error (1, 0, "usage: %s <random seed>", argv[0]);
+
+  setlocale (LC_ALL, "");
+
+  nstrings_max = 100;
+  nstrings = 0;
+  strings = (struct lines *) malloc (nstrings_max * sizeof (struct lines));
+  if (strings == NULL)
+    {
+      perror (argv[0]);
+      exit (1);
+    }
+
+  while (1)
+    {
+      int l;
+      if (getline (&line, &len, stdin) < 0)
+	break;
+
+      if (nstrings == nstrings_max)
+	{
+	  strings = (struct lines *) realloc (strings,
+					      (nstrings_max *= 2)
+					       * sizeof (*strings));
+	  if (strings == NULL)
+	    {
+	      perror (argv[0]);
+	      exit (1);
+	    }
+	}
+      strings[nstrings].line = strdup (line);
+      l = strcspn (line, ":(;");
+      while (l > 0 && isspace (line[l - 1]))
+	--l;
+      strings[nstrings].key = strndup (line, l);
+      ++nstrings;
+    }
+  free (line);
+
+  /* First shuffle.  */
+  srandom (atoi (argv[1]));
+  for (n = 0; n < 10 * nstrings; ++n)
+    {
+      int r1, r2, r;
+      size_t idx1 = random () % nstrings;
+      size_t idx2 = random () % nstrings;
+      struct lines tmp = strings[idx1];
+      strings[idx1] = strings[idx2];
+      strings[idx2] = tmp;
+
+      /* While we are at it a first little test.  */
+      r1 = strcoll (strings[idx1].key, strings[idx2].key);
+      r2 = strcoll (strings[idx2].key, strings[idx1].key);
+      r = r1 * r2;
+
+      if (r > 0 || (r == 0 && r1 != 0) || (r == 0 && r2 != 0))
+	printf ("`%s' and `%s' collate wrong: %d vs. %d\n",
+		strings[idx1].key, strings[idx2].key, r1, r2);
+    }
+
+  /* Now sort.  */
+  qsort (strings, nstrings, sizeof (struct lines), xstrcoll);
+
+  /* Print the result.  */
+  for (n = 0; n < nstrings; ++n)
+    {
+      fputs (strings[n].line, stdout);
+      free (strings[n].line);
+      free (strings[n].key);
+    }
+  free (strings);
+
+  return result;
+}
+
+
+static int
+xstrcoll (ptr1, ptr2)
+     const void *ptr1;
+     const void *ptr2;
+{
+  const struct lines *l1 = (const struct lines *) ptr1;
+  const struct lines *l2 = (const struct lines *) ptr2;
+
+  return strcoll (l1->key, l2->key);
+}
diff --git a/test/locale/dump-ctype.c b/test/locale/dump-ctype.c
new file mode 100644
index 000000000..a1f24c656
--- /dev/null
+++ b/test/locale/dump-ctype.c
@@ -0,0 +1,164 @@
+/* Dump the character classes and character maps of a locale to a bunch
+   of individual files which can be processed with diff, sed etc.
+   Copyright (C) 2000 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* Usage example:
+     $ dump-ctype de_DE.UTF-8
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <wctype.h>
+#include <locale.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+
+static const char *program_name = "dump-ctype";
+static const char *locale;
+
+static const char *class_names[] =
+  {
+    "alnum", "alpha", "blank", "cntrl", "digit", "graph", "lower",
+    "print", "punct", "space", "upper", "xdigit"
+  };
+
+static const char *map_names[] =
+  {
+    "tolower", "toupper", "totitle"
+  };
+
+static void dump_class (const char *class_name)
+{
+  wctype_t class;
+  FILE *f;
+  unsigned int ch;
+
+  class = wctype (class_name);
+  if (class == (wctype_t) 0)
+    {
+      fprintf (stderr, "%s %s: noexistent class %s\n", program_name,
+	       locale, class_name);
+      return;
+    }
+
+  f = fopen (class_name, "w");
+  if (f == NULL)
+    {
+      fprintf (stderr, "%s %s: cannot open file %s/%s\n", program_name,
+	       locale, locale, class_name);
+      exit (1);
+    }
+
+  for (ch = 0; ch < 0x10000; ch++)
+    if (iswctype (ch, class))
+      fprintf (f, "0x%04X\n", ch);
+
+  if (ferror (f) || fclose (f))
+    {
+      fprintf (stderr, "%s %s: I/O error on file %s/%s\n", program_name,
+	       locale, locale, class_name);
+      exit (1);
+    }
+}
+
+static void dump_map (const char *map_name)
+{
+  wctrans_t map;
+  FILE *f;
+  unsigned int ch;
+
+  map = wctrans (map_name);
+  if (map == (wctrans_t) 0)
+    {
+      fprintf (stderr, "%s %s: noexistent map %s\n", program_name,
+	       locale, map_name);
+      return;
+    }
+
+  f = fopen (map_name, "w");
+  if (f == NULL)
+    {
+      fprintf (stderr, "%s %s: cannot open file %s/%s\n", program_name,
+	       locale, locale, map_name);
+      exit (1);
+    }
+
+  for (ch = 0; ch < 0x10000; ch++)
+    if (towctrans (ch, map) != ch)
+      fprintf (f, "0x%04X\t0x%04X\n", ch, towctrans (ch, map));
+
+  if (ferror (f) || fclose (f))
+    {
+      fprintf (stderr, "%s %s: I/O error on file %s/%s\n", program_name,
+	       locale, locale, map_name);
+      exit (1);
+    }
+}
+
+int
+main (int argc, char *argv[])
+{
+  size_t i;
+
+  if (argc != 2)
+    {
+      fprintf (stderr, "Usage: dump-ctype locale\n");
+      exit (1);
+    }
+  locale = argv[1];
+
+  if (setlocale (LC_ALL, locale) == NULL)
+    {
+      fprintf (stderr, "%s: setlocale cannot switch to locale %s\n",
+	       program_name, locale);
+      exit (1);
+    }
+
+  if (mkdir (locale, 0777) < 0)
+    {
+      char buf[100];
+      int save_errno = errno;
+
+      sprintf (buf, "%s: cannot create directory %s", program_name, locale);
+      errno = save_errno;
+      perror (buf);
+      exit (1);
+    }
+
+  if (chdir (locale) < 0)
+    {
+      char buf[100];
+      int save_errno = errno;
+
+      sprintf (buf, "%s: cannot chdir to %s", program_name, locale);
+      errno = save_errno;
+      perror (buf);
+      exit (1);
+    }
+
+  for (i = 0; i < sizeof (class_names) / sizeof (class_names[0]); i++)
+    dump_class (class_names[i]);
+
+  for (i = 0; i < sizeof (map_names) / sizeof (map_names[0]); i++)
+    dump_map (map_names[i]);
+
+  return 0;
+}
diff --git a/test/locale/gen-unicode-ctype.c b/test/locale/gen-unicode-ctype.c
new file mode 100644
index 000000000..849f272ed
--- /dev/null
+++ b/test/locale/gen-unicode-ctype.c
@@ -0,0 +1,785 @@
+/* Generate a Unicode conforming LC_CTYPE category from a UnicodeData file.
+   Copyright (C) 2000-2001 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Bruno Haible <haible@clisp.cons.org>, 2000.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+/* Usage example:
+     $ gen-unicode /usr/local/share/Unidata/UnicodeData.txt 3.1
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+
+/* This structure represents one line in the UnicodeData.txt file.  */
+struct unicode_attribute
+{
+  const char *name;           /* Character name */
+  const char *category;       /* General category */
+  const char *combining;      /* Canonical combining classes */
+  const char *bidi;           /* Bidirectional category */
+  const char *decomposition;  /* Character decomposition mapping */
+  const char *decdigit;       /* Decimal digit value */
+  const char *digit;          /* Digit value */
+  const char *numeric;        /* Numeric value */
+  int mirrored;               /* mirrored */
+  const char *oldname;        /* Old Unicode 1.0 name */
+  const char *comment;        /* Comment */
+  unsigned int upper;         /* Uppercase mapping */
+  unsigned int lower;         /* Lowercase mapping */
+  unsigned int title;         /* Titlecase mapping */
+};
+
+/* Missing fields are represented with "" for strings, and NONE for
+   characters.  */
+#define NONE (~(unsigned int)0)
+
+/* The entire contents of the UnicodeData.txt file.  */
+struct unicode_attribute unicode_attributes [0x110000];
+
+/* Stores in unicode_attributes[i] the values from the given fields.  */
+static void
+fill_attribute (unsigned int i,
+		const char *field1, const char *field2,
+		const char *field3, const char *field4,
+		const char *field5, const char *field6,
+		const char *field7, const char *field8,
+		const char *field9, const char *field10,
+		const char *field11, const char *field12,
+		const char *field13, const char *field14)
+{
+  struct unicode_attribute * uni;
+
+  if (i >= 0x110000)
+    {
+      fprintf (stderr, "index too large\n");
+      exit (1);
+    }
+  if (strcmp (field2, "Cs") == 0)
+    /* Surrogates are UTF-16 artefacts, not real characters. Ignore them.  */
+    return;
+  uni = &unicode_attributes[i];
+  /* Copy the strings.  */
+  uni->name          = strdup (field1);
+  uni->category      = (field2[0] == '\0' ? "" : strdup (field2));
+  uni->combining     = (field3[0] == '\0' ? "" : strdup (field3));
+  uni->bidi          = (field4[0] == '\0' ? "" : strdup (field4));
+  uni->decomposition = (field5[0] == '\0' ? "" : strdup (field5));
+  uni->decdigit      = (field6[0] == '\0' ? "" : strdup (field6));
+  uni->digit         = (field7[0] == '\0' ? "" : strdup (field7));
+  uni->numeric       = (field8[0] == '\0' ? "" : strdup (field8));
+  uni->mirrored      = (field9[0] == 'Y');
+  uni->oldname       = (field10[0] == '\0' ? "" : strdup (field10));
+  uni->comment       = (field11[0] == '\0' ? "" : strdup (field11));
+  uni->upper = (field12[0] =='\0' ? NONE : strtoul (field12, NULL, 16));
+  uni->lower = (field13[0] =='\0' ? NONE : strtoul (field13, NULL, 16));
+  uni->title = (field14[0] =='\0' ? NONE : strtoul (field14, NULL, 16));
+}
+
+/* Maximum length of a field in the UnicodeData.txt file.  */
+#define FIELDLEN 120
+
+/* Reads the next field from STREAM.  The buffer BUFFER has size FIELDLEN.
+   Reads up to (but excluding) DELIM.
+   Returns 1 when a field was successfully read, otherwise 0.  */
+static int
+getfield (FILE *stream, char *buffer, int delim)
+{
+  int count = 0;
+  int c;
+
+  for (; (c = getc (stream)), (c != EOF && c != delim); )
+    {
+      /* The original unicode.org UnicodeData.txt file happens to have
+	 CR/LF line terminators.  Silently convert to LF.  */
+      if (c == '\r')
+	continue;
+
+      /* Put c into the buffer.  */
+      if (++count >= FIELDLEN - 1)
+	{
+	  fprintf (stderr, "field too long\n");
+	  exit (1);
+	}
+      *buffer++ = c;
+    }
+
+  if (c == EOF)
+    return 0;
+
+  *buffer = '\0';
+  return 1;
+}
+
+/* Stores in unicode_attributes[] the entire contents of the UnicodeData.txt
+   file.  */
+static void
+fill_attributes (const char *unicodedata_filename)
+{
+  unsigned int i, j;
+  FILE *stream;
+  char field0[FIELDLEN];
+  char field1[FIELDLEN];
+  char field2[FIELDLEN];
+  char field3[FIELDLEN];
+  char field4[FIELDLEN];
+  char field5[FIELDLEN];
+  char field6[FIELDLEN];
+  char field7[FIELDLEN];
+  char field8[FIELDLEN];
+  char field9[FIELDLEN];
+  char field10[FIELDLEN];
+  char field11[FIELDLEN];
+  char field12[FIELDLEN];
+  char field13[FIELDLEN];
+  char field14[FIELDLEN];
+  int lineno = 0;
+
+  for (i = 0; i < 0x110000; i++)
+    unicode_attributes[i].name = NULL;
+
+  stream = fopen (unicodedata_filename, "r");
+  if (stream == NULL)
+    {
+      fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
+      exit (1);
+    }
+
+  for (;;)
+    {
+      int n;
+
+      lineno++;
+      n = getfield (stream, field0, ';');
+      n += getfield (stream, field1, ';');
+      n += getfield (stream, field2, ';');
+      n += getfield (stream, field3, ';');
+      n += getfield (stream, field4, ';');
+      n += getfield (stream, field5, ';');
+      n += getfield (stream, field6, ';');
+      n += getfield (stream, field7, ';');
+      n += getfield (stream, field8, ';');
+      n += getfield (stream, field9, ';');
+      n += getfield (stream, field10, ';');
+      n += getfield (stream, field11, ';');
+      n += getfield (stream, field12, ';');
+      n += getfield (stream, field13, ';');
+      n += getfield (stream, field14, '\n');
+      if (n == 0)
+	break;
+      if (n != 15)
+	{
+	  fprintf (stderr, "short line in'%s':%d\n",
+		   unicodedata_filename, lineno);
+	  exit (1);
+	}
+      i = strtoul (field0, NULL, 16);
+      if (field1[0] == '<'
+	  && strlen (field1) >= 9
+	  && !strcmp (field1 + strlen(field1) - 8, ", First>"))
+	{
+	  /* Deal with a range. */
+	  lineno++;
+	  n = getfield (stream, field0, ';');
+	  n += getfield (stream, field1, ';');
+	  n += getfield (stream, field2, ';');
+	  n += getfield (stream, field3, ';');
+	  n += getfield (stream, field4, ';');
+	  n += getfield (stream, field5, ';');
+	  n += getfield (stream, field6, ';');
+	  n += getfield (stream, field7, ';');
+	  n += getfield (stream, field8, ';');
+	  n += getfield (stream, field9, ';');
+	  n += getfield (stream, field10, ';');
+	  n += getfield (stream, field11, ';');
+	  n += getfield (stream, field12, ';');
+	  n += getfield (stream, field13, ';');
+	  n += getfield (stream, field14, '\n');
+	  if (n != 15)
+	    {
+	      fprintf (stderr, "missing end range in '%s':%d\n",
+		       unicodedata_filename, lineno);
+	      exit (1);
+	    }
+	  if (!(field1[0] == '<'
+		&& strlen (field1) >= 8
+		&& !strcmp (field1 + strlen (field1) - 7, ", Last>")))
+	    {
+	      fprintf (stderr, "missing end range in '%s':%d\n",
+		       unicodedata_filename, lineno);
+	      exit (1);
+	    }
+	  field1[strlen (field1) - 7] = '\0';
+	  j = strtoul (field0, NULL, 16);
+	  for (; i <= j; i++)
+	    fill_attribute (i, field1+1, field2, field3, field4, field5,
+			       field6, field7, field8, field9, field10,
+			       field11, field12, field13, field14);
+	}
+      else
+	{
+	  /* Single character line */
+	  fill_attribute (i, field1, field2, field3, field4, field5,
+			     field6, field7, field8, field9, field10,
+			     field11, field12, field13, field14);
+	}
+    }
+  if (ferror (stream) || fclose (stream))
+    {
+      fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
+      exit (1);
+    }
+}
+
+/* Character mappings.  */
+
+static unsigned int
+to_upper (unsigned int ch)
+{
+  if (unicode_attributes[ch].name != NULL
+      && unicode_attributes[ch].upper != NONE)
+    return unicode_attributes[ch].upper;
+  else
+    return ch;
+}
+
+static unsigned int
+to_lower (unsigned int ch)
+{
+  if (unicode_attributes[ch].name != NULL
+      && unicode_attributes[ch].lower != NONE)
+    return unicode_attributes[ch].lower;
+  else
+    return ch;
+}
+
+static unsigned int
+to_title (unsigned int ch)
+{
+  if (unicode_attributes[ch].name != NULL
+      && unicode_attributes[ch].title != NONE)
+    return unicode_attributes[ch].title;
+  else
+    return ch;
+}
+
+/* Character class properties.  */
+
+static bool
+is_upper (unsigned int ch)
+{
+  return (to_lower (ch) != ch);
+}
+
+static bool
+is_lower (unsigned int ch)
+{
+  return (to_upper (ch) != ch)
+	 /* <U00DF> is lowercase, but without simple to_upper mapping.  */
+	 || (ch == 0x00DF);
+}
+
+static bool
+is_alpha (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+	  && ((unicode_attributes[ch].category[0] == 'L'
+	       /* Theppitak Karoonboonyanan <thep@links.nectec.or.th> says
+		  <U0E2F>, <U0E46> should belong to is_punct.  */
+	       && (ch != 0x0E2F) && (ch != 0x0E46))
+	      /* Theppitak Karoonboonyanan <thep@links.nectec.or.th> says
+		 <U0E31>, <U0E34>..<U0E3A>, <U0E47>..<U0E4E> are is_alpha.  */
+	      || (ch == 0x0E31)
+	      || (ch >= 0x0E34 && ch <= 0x0E3A)
+	      || (ch >= 0x0E47 && ch <= 0x0E4E)
+	      /* Avoid warning for <U0345>.  */
+	      || (ch == 0x0345)
+	      /* Avoid warnings for <U2160>..<U217F>.  */
+	      || (unicode_attributes[ch].category[0] == 'N'
+		  && unicode_attributes[ch].category[1] == 'l')
+	      /* Avoid warnings for <U24B6>..<U24E9>.  */
+	      || (unicode_attributes[ch].category[0] == 'S'
+		  && unicode_attributes[ch].category[1] == 'o'
+		  && strstr (unicode_attributes[ch].name, " LETTER ")
+		     != NULL)
+	      /* Consider all the non-ASCII digits as alphabetic.
+		 ISO C 99 forbids us to have them in category "digit",
+		 but we want iswalnum to return true on them.  */
+	      || (unicode_attributes[ch].category[0] == 'N'
+		  && unicode_attributes[ch].category[1] == 'd'
+		  && !(ch >= 0x0030 && ch <= 0x0039))));
+}
+
+static bool
+is_digit (unsigned int ch)
+{
+#if 0
+  return (unicode_attributes[ch].name != NULL
+	  && unicode_attributes[ch].category[0] == 'N'
+	  && unicode_attributes[ch].category[1] == 'd');
+  /* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without
+     a zero.  Must add <0> in front of them by hand.  */
+#else
+  /* SUSV2 gives us some freedom for the "digit" category, but ISO C 99
+     takes it away:
+     7.25.2.1.5:
+        The iswdigit function tests for any wide character that corresponds
+        to a decimal-digit character (as defined in 5.2.1).
+     5.2.1:
+        the 10 decimal digits 0 1 2 3 4 5 6 7 8 9
+   */
+  return (ch >= 0x0030 && ch <= 0x0039);
+#endif
+}
+
+static bool
+is_outdigit (unsigned int ch)
+{
+  return (ch >= 0x0030 && ch <= 0x0039);
+}
+
+static bool
+is_blank (unsigned int ch)
+{
+  return (ch == 0x0009 /* '\t' */
+	  /* Category Zs without mention of "<noBreak>" */
+	  || (unicode_attributes[ch].name != NULL
+	      && unicode_attributes[ch].category[0] == 'Z'
+	      && unicode_attributes[ch].category[1] == 's'
+	      && !strstr (unicode_attributes[ch].decomposition, "<noBreak>")));
+}
+
+static bool
+is_space (unsigned int ch)
+{
+  /* Don't make U+00A0 a space. Non-breaking space means that all programs
+     should treat it like a punctuation character, not like a space. */
+  return (ch == 0x0020 /* ' ' */
+	  || ch == 0x000C /* '\f' */
+	  || ch == 0x000A /* '\n' */
+	  || ch == 0x000D /* '\r' */
+	  || ch == 0x0009 /* '\t' */
+	  || ch == 0x000B /* '\v' */
+	  /* Categories Zl, Zp, and Zs without mention of "<noBreak>" */
+	  || (unicode_attributes[ch].name != NULL
+	      && unicode_attributes[ch].category[0] == 'Z'
+	      && (unicode_attributes[ch].category[1] == 'l'
+		  || unicode_attributes[ch].category[1] == 'p'
+		  || (unicode_attributes[ch].category[1] == 's'
+		      && !strstr (unicode_attributes[ch].decomposition,
+				  "<noBreak>")))));
+}
+
+static bool
+is_cntrl (unsigned int ch)
+{
+  return (unicode_attributes[ch].name != NULL
+	  && (!strcmp (unicode_attributes[ch].name, "<control>")
+	      /* Categories Zl and Zp */
+	      || (unicode_attribute
author	Carmelo Amoroso <carmelo.amoroso@st.com>	2008-07-09 15:05:36 +0000
committer	Carmelo Amoroso <carmelo.amoroso@st.com>	2008-07-09 15:05:36 +0000
commit	a691312d8794d5516402bb6bb0d3e90c40ba188b (patch)
tree	dcac242fcad7d24a4f452722de26c56cfaf8c98a
parent	56df95fe5d0778352abe09225d6587b88643d135 (diff)