summaryrefslogtreecommitdiff
path: root/include/ctype.h
diff options
context:
space:
mode:
authorManuel Novoa III <mjn3@codepoet.org>2003-08-01 20:08:59 +0000
committerManuel Novoa III <mjn3@codepoet.org>2003-08-01 20:08:59 +0000
commit1217289737588e65b088b3535428b27c7287d699 (patch)
tree6a292ac767d219702e26a6a2111737f84a96900c /include/ctype.h
parent32b76c5ec3c257b7287913d0d1a96e0cbb2e9c6a (diff)
Add a new *scanf implementation, includeing the *wscanf functions.
Should be standards compliant and with several optional features, including support for hexadecimal float notation, locale awareness, glibc-like locale-specific digit grouping with the `'' flag, and positional arg support. I tested it pretty well (finding several bugs in glibc's scanf in the process), but it is brand new so be aware. The *wprintf functions now support floating point output. Also, a couple of bugs were squashed. Finally, %a/%A conversions are now implemented. Implement the glibc xlocale interface for thread-specific locale support. Also add the various *_l(args, locale_t loc_arg) funcs. NOTE!!! setlocale() is NOT threadsafe! NOTE!!! The strto{floating point} conversion functions are now locale aware. The also now support hexadecimal floating point notation. Add the wcsto{floating point} conversion functions. Fix a bug in mktime() related to dst. Note that unlike glibc's mktime, uClibc's version always normalizes the struct tm before attempting to determine the correct dst setting if tm_isdst == -1 on entry. Add a stub version of the libintl functions. (untested) Fixed a known memory leak in setlocale() related to the collation data. Add lots of new config options (which Erik agreed to sort out :-), including finally exposing some of the stripped down stdio configs. Be careful with those though, as they haven't been tested in a long time. (temporary) GOTCHAs... The ctype functions are currently incorrect for 8-bit locales. They will be fixed shortly. The ctype functions are now table-based, resulting in larger staticly linked binaries. I'll be adding an option to use the old approach in the stub locale configuration.
Diffstat (limited to 'include/ctype.h')
-rw-r--r--include/ctype.h443
1 files changed, 348 insertions, 95 deletions
diff --git a/include/ctype.h b/include/ctype.h
index c6faf3d9b..23ff199e4 100644
--- a/include/ctype.h
+++ b/include/ctype.h
@@ -1,129 +1,382 @@
-/* Copyright (C) 2002 Manuel Novoa III
+/* Copyright (C) 1991,92,93,95,96,97,98,99,2001,02
+ Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+/*
+ * ISO C99 Standard 7.4: Character handling <ctype.h>
+ */
+
+#ifndef _CTYPE_H
+#define _CTYPE_H 1
+
+#include <features.h>
+#include <bits/types.h>
+
+__BEGIN_DECLS
+
+#ifndef _ISbit
+/* These are all the characteristics of characters.
+ If there get to be more than 16 distinct characteristics,
+ many things must be changed that use `__uint16_t's. */
+
+# define _ISbit(bit) (1 << (bit))
+
+enum
+{
+ _ISupper = _ISbit (0), /* UPPERCASE. */
+ _ISlower = _ISbit (1), /* lowercase. */
+ _ISalpha = _ISbit (2), /* Alphabetic. */
+ _ISdigit = _ISbit (3), /* Numeric. */
+ _ISxdigit = _ISbit (4), /* Hexadecimal numeric. */
+ _ISspace = _ISbit (5), /* Whitespace. */
+ _ISprint = _ISbit (6), /* Printing. */
+ _ISgraph = _ISbit (7), /* Graphical. */
+ _ISblank = _ISbit (8), /* Blank (usually SPC and TAB). */
+ _IScntrl = _ISbit (9), /* Control character. */
+ _ISpunct = _ISbit (10), /* Punctuation. */
+ _ISalnum = _ISbit (11) /* Alphanumeric. */
+};
+#else
+#error _ISbit already defined!
+#endif /* ! _ISbit */
+
+#include <bits/uClibc_touplow.h>
+
+#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
+# define __UCLIBC_CTYPE_IN_TO_DOMAIN(c) (((unsigned int)((c) + 128)) < 384)
+
+#else /* __UCLIBC_HAS_CTYPE_SIGNED__ */
+# define __UCLIBC_CTYPE_IN_TO_DOMAIN(c) (((unsigned int)(c)) < 256)
+
+#endif /* __UCLIBC_HAS_CTYPE_SIGNED__ */
+
+/* In the thread-specific locale model (see `uselocale' in <locale.h>)
+ we cannot use global variables for these as was done in the past.
+ Instead, the following accessor functions return the address of
+ each variable, which is local to the current thread if multithreaded.
+
+ These point into arrays of 384, so they can be indexed by any `unsigned
+ char' value [0,255]; by EOF (-1); or by any `signed char' value
+ [-128,-1). ISO C requires that the ctype functions work for `unsigned
+ char' values and for EOF; we also support negative `signed char' values
+ for broken old programs. The case conversion arrays are of `int's
+ rather than `unsigned char's because tolower (EOF) must be EOF, which
+ doesn't fit into an `unsigned char'. But today more important is that
+ the arrays are also used for multi-byte character sets. */
+
+/* uClibc differences:
+ *
+ * When __UCLIBC_HAS_CTYPE_SIGNED is defined,
*
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * The upper and lower mapping arrays are type int16_t, so that
+ * they may store all char values plus EOF. The glibc reasoning
+ * given above for these being type int is questionable, as the
+ * ctype mapping functions map from the set of (unsigned) char
+ * and EOF back into the set. They have no awareness of multi-byte
+ * or wide characters.
*
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Library General Public License for more details.
+ * Otherwise,
*
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the Free
- * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * The ctype array is defined for -1..255.
+ * The upper and lower mapping arrays are defined for 0..255.
+ * The upper and lower mapping arrays are type unsigned char.
*/
-/* NOTE: It is assumed here and throughout the library that the underlying
- * char encoding for the portable C character set is ASCII (host & target). */
+/* Pointers to the default C-locale data. */
+extern const __uint16_t *__C_ctype_b;
+extern const __ctype_touplow_t *__C_ctype_toupper;
+extern const __ctype_touplow_t *__C_ctype_tolower;
-#ifndef _CTYPE_H
-#define _CTYPE_H
+#ifdef __UCLIBC_HAS_XLOCALE__
-#include <features.h>
-#include <bits/uClibc_ctype.h>
+extern __const __uint16_t **__ctype_b_loc (void)
+ __attribute__ ((__const));
+extern __const __ctype_touplow_t **__ctype_tolower_loc (void)
+ __attribute__ ((__const));
+extern __const __ctype_touplow_t **__ctype_toupper_loc (void)
+ __attribute__ ((__const));
-__BEGIN_DECLS
+#define __UCLIBC_CTYPE_B (*__ctype_b_loc())
+#define __UCLIBC_CTYPE_TOLOWER (*__ctype_tolower_loc())
+#define __UCLIBC_CTYPE_TOUPPER (*__ctype_toupper_loc())
-extern int isalnum(int c) __THROW;
-extern int isalpha(int c) __THROW;
-#ifdef __USE_ISOC99
-extern int isblank(int c) __THROW;
-#endif
-extern int iscntrl(int c) __THROW;
-extern int isdigit(int c) __THROW;
-extern int isgraph(int c) __THROW;
-extern int islower(int c) __THROW;
-extern int isprint(int c) __THROW;
-extern int ispunct(int c) __THROW;
-extern int isspace(int c) __THROW;
-extern int isupper(int c) __THROW;
-extern int isxdigit(int c) __THROW;
-
-extern int tolower(int c) __THROW;
-extern int toupper(int c) __THROW;
+#else /* __UCLIBC_HAS_XLOCALE__ */
-#if defined __USE_SVID || defined __USE_MISC || defined __USE_XOPEN
-extern int isascii(int c) __THROW;
-extern int toascii(int c) __THROW;
-#endif
+/* Pointers to the current global locale data in use. */
+extern const __uint16_t *__ctype_b;
+extern const __ctype_touplow_t *__ctype_toupper;
+extern const __ctype_touplow_t *__ctype_tolower;
+
+#define __UCLIBC_CTYPE_B (__ctype_b)
+#define __UCLIBC_CTYPE_TOLOWER (__ctype_tolower)
+#define __UCLIBC_CTYPE_TOUPPER (__ctype_toupper)
+
+#endif /* __UCLIBC_HAS_XLOCALE__ */
+
+#define __isctype(c, type) \
+ ((__UCLIBC_CTYPE_B)[(int) (c)] & (__uint16_t) type)
+
+#define __isascii(c) (((c) & ~0x7f) == 0) /* If C is a 7 bit value. */
+#define __toascii(c) ((c) & 0x7f) /* Mask off high bits. */
-/* The following are included for compatibility with older versions of
- * uClibc; but now they're only visible if MISC funcctionality is requested.
- * However, as they are locale-independent, the hidden macro versions are
- * always present. */
#ifdef __USE_MISC
-extern int isxlower(int c) __THROW; /* uClibc-specific. */
-extern int isxupper(int c) __THROW; /* uClibc-specific. */
+
+/* The following are included for compatibility with older versions of
+ * uClibc; but now they're only visible if MISC funcctionality is requested. */
+extern int isxlower(int c) __THROW;
+extern int isxupper(int c) __THROW;
+
+/* isdigit() is really locale-invariant, so provide some small fast macros.
+ * These are uClibc-specific. */
+#define __isdigit_char(C) (((unsigned char)((C) - '0')) <= 9)
+#define __isdigit_int(C) (((unsigned int)((C) - '0')) <= 9)
+
#endif
-/* Next, some ctype macros which are valid for all supported locales. */
-/* WARNING: isspace and isblank need to be reverified if more 8-bit codesets
- * are added!!! But isdigit and isxdigit are always valid. */
+#define __exctype(name) extern int name (int) __THROW
-#define __isspace(c) __C_isspace(c)
-#define __isblank(c) __C_isblank(c)
+__BEGIN_NAMESPACE_STD
-#define __isdigit(c) __C_isdigit(c)
-#define __isxdigit(c) __C_isxdigit(c)
+/* The following names are all functions:
+ int isCHARACTERISTIC(int c);
+ which return nonzero iff C has CHARACTERISTIC.
+ For the meaning of the characteristic names, see the `enum' above. */
+__exctype (isalnum);
+__exctype (isalpha);
+__exctype (iscntrl);
+__exctype (isdigit);
+__exctype (islower);
+__exctype (isgraph);
+__exctype (isprint);
+__exctype (ispunct);
+__exctype (isspace);
+__exctype (isupper);
+__exctype (isxdigit);
-/* Now some non-ansi/iso c99 macros. */
-#define __isascii(c) (((c) & ~0x7f) == 0)
-#define __toascii(c) ((c) & 0x7f)
-#define _toupper(c) ((c) ^ 0x20)
-#define _tolower(c) ((c) | 0x20)
+/* Return the lowercase version of C. */
+extern int tolower (int __c) __THROW;
+/* Return the uppercase version of C. */
+extern int toupper (int __c) __THROW;
-/* For compatibility with older versions of uClibc. Are these ever used? */
-#define __isxlower(c) __C_isxlower(c) /* uClibc-specific. */
-#define __isxupper(c) __C_isxupper(c) /* uClibc-specific. */
+__END_NAMESPACE_STD
-/* Apparently, glibc implements things as macros if __NO_CTYPE isn't defined.
- * If we don't have locale support, we'll do the same. Otherwise, we'll
- * only use macros for the supported-locale-invariant cases. */
-#if 0
-/* Currently broken, since masking macros, other than getc and putc, must
- * evaluate their args exactly once. Will be fixed by the next release. mjn3 */
-/* #ifndef __NO_CTYPE */
-#define isdigit(c) __isdigit(c)
-#define isxdigit(c) __isxdigit(c)
-#define isspace(c) __isspace(c)
-#ifdef __USE_ISOC99
-#define isblank(c) __isblank(c)
-#endif
+/* ISO C99 introduced one new function. */
+#ifdef __USE_ISOC99
+__BEGIN_NAMESPACE_C99
-#if defined __USE_SVID || defined __USE_MISC || defined __USE_XOPEN
-#define isascii(c) __isascii(c)
-#define toascii(c) __toascii(c)
+__exctype (isblank);
+
+__END_NAMESPACE_C99
#endif
-#ifdef __USE_MISC
-#define isxlower(c) __C_isxlower(c) /* uClibc-specific. */
-#define isxupper(c) __C_isxupper(c) /* uClibc-specific. */
+#ifdef __USE_GNU
+/* Test C for a set of character classes according to MASK. */
+extern int isctype (int __c, int __mask) __THROW;
#endif
-/* TODO - Should test for 8-bit codesets instead, but currently impossible. */
-#ifndef __UCLIBC_HAS_LOCALE__
+#if defined __USE_SVID || defined __USE_MISC || defined __USE_XOPEN
+
+/* Return nonzero iff C is in the ASCII set
+ (i.e., is no more than 7 bits wide). */
+extern int isascii (int __c) __THROW;
+
+/* Return the part of C that is in the ASCII set
+ (i.e., the low-order 7 bits of C). */
+extern int toascii (int __c) __THROW;
+
+/* These are the same as `toupper' and `tolower' except that they do not
+ check the argument for being in the range of a `char'. */
+__exctype (_toupper);
+__exctype (_tolower);
+#endif /* Use SVID or use misc. */
+
+/* This code is needed for the optimized mapping functions. */
+#define __tobody(c, f, a, args) \
+ (__extension__ \
+ ({ int __res; \
+ if (sizeof (c) > 1) \
+ { \
+ if (__builtin_constant_p (c)) \
+ { \
+ int __c = (c); \
+ __res = __UCLIBC_CTYPE_IN_TO_DOMAIN(__c) ? (a)[__c] : __c; \
+ } \
+ else \
+ __res = f args; \
+ } \
+ else \
+ __res = (a)[(int) (c)]; \
+ __res; }))
+
+#if !defined __NO_CTYPE && !defined __cplusplus
+# define isalnum(c) __isctype((c), _ISalnum)
+# define isalpha(c) __isctype((c), _ISalpha)
+# define iscntrl(c) __isctype((c), _IScntrl)
+# define isdigit(c) __isctype((c), _ISdigit)
+# define islower(c) __isctype((c), _ISlower)
+# define isgraph(c) __isctype((c), _ISgraph)
+# define isprint(c) __isctype((c), _ISprint)
+# define ispunct(c) __isctype((c), _ISpunct)
+# define isspace(c) __isctype((c), _ISspace)
+# define isupper(c) __isctype((c), _ISupper)
+# define isxdigit(c) __isctype((c), _ISxdigit)
+
+# ifdef __USE_ISOC99
+# define isblank(c) __isctype((c), _ISblank)
+# endif
+
+# ifdef __USE_EXTERN_INLINES
+extern __inline int
+tolower (int __c) __THROW
+{
+ return __UCLIBC_CTYPE_IN_TO_DOMAIN(__c) ? (__UCLIBC_CTYPE_TOLOWER)[__c] : __c;
+}
+
+extern __inline int
+toupper (int __c) __THROW
+{
+ return __UCLIBC_CTYPE_IN_TO_DOMAIN(__c) ? (__UCLIBC_CTYPE_TOUPPER)[__c] : __c;
+}
+# endif
+
+# if __GNUC__ >= 2 && defined __OPTIMIZE__ && !defined __cplusplus
+# define tolower(c) __tobody (c, tolower, __UCLIBC_CTYPE_TOLOWER, (c))
+# define toupper(c) __tobody (c, toupper, __UCLIBC_CTYPE_TOUPPER, (c))
+# endif /* Optimizing gcc */
+
+# if defined __USE_SVID || defined __USE_MISC || defined __USE_XOPEN
+# define isascii(c) __isascii (c)
+# define toascii(c) __toascii (c)
+
+# define _tolower(c) ((int) (__UCLIBC_CTYPE_TOLOWER)[(int) (c)])
+# define _toupper(c) ((int) (__UCLIBC_CTYPE_TOUPPER)[(int) (c)])
+# endif
+
+#endif /* Not __NO_CTYPE. */
+
+
+#if defined(__USE_GNU) && defined(__UCLIBC_HAS_XLOCALE__)
+/* The concept of one static locale per category is not very well
+ thought out. Many applications will need to process its data using
+ information from several different locales. Another application is
+ the implementation of the internationalization handling in the
+ upcoming ISO C++ standard library. To support this another set of
+ the functions using locale data exist which have an additional
+ argument.
+
+ Attention: all these functions are *not* standardized in any form.
+ This is a proof-of-concept implementation. */
+
+/* Structure for reentrant locale using functions. This is an
+ (almost) opaque type for the user level programs. */
+# include <xlocale.h>
+
+/* These definitions are similar to the ones above but all functions
+ take as an argument a handle for the locale which shall be used. */
+# define __isctype_l(c, type, locale) \
+ ((locale)->__ctype_b[(int) (c)] & (__uint16_t) type)
+
+# define __exctype_l(name) \
+ extern int name (int, __locale_t) __THROW
+
+/* The following names are all functions:
+ int isCHARACTERISTIC(int c, locale_t *locale);
+ which return nonzero iff C has CHARACTERISTIC.
+ For the meaning of the characteristic names, see the `enum' above. */
+__exctype_l (isalnum_l);
+__exctype_l (isalpha_l);
+__exctype_l (iscntrl_l);
+__exctype_l (isdigit_l);
+__exctype_l (islower_l);
+__exctype_l (isgraph_l);
+__exctype_l (isprint_l);
+__exctype_l (ispunct_l);
+__exctype_l (isspace_l);
+__exctype_l (isupper_l);
+__exctype_l (isxdigit_l);
+
+__exctype_l (isblank_l);
+
+
+/* Return the lowercase version of C in locale L. */
+extern int __tolower_l (int __c, __locale_t __l) __THROW;
+extern int tolower_l (int __c, __locale_t __l) __THROW;
+
+/* Return the uppercase version of C. */
+extern int __toupper_l (int __c, __locale_t __l) __THROW;
+extern int toupper_l (int __c, __locale_t __l) __THROW;
+
+# if __GNUC__ >= 2 && defined __OPTIMIZE__ && !defined __cplusplus
+# define __tolower_l(c, locale) \
+ __tobody (c, __tolower_l, (locale)->__ctype_tolower, (c, locale))
+# define __toupper_l(c, locale) \
+ __tobody (c, __toupper_l, (locale)->__ctype_toupper, (c, locale))
+# define tolower_l(c, locale) __tolower_l ((c), (locale))
+# define toupper_l(c, locale) __toupper_l ((c), (locale))
+# endif /* Optimizing gcc */
+
+
+# ifndef __NO_CTYPE
+# define __isalnum_l(c,l) __isctype_l((c), _ISalnum, (l))
+# define __isalpha_l(c,l) __isctype_l((c), _ISalpha, (l))
+# define __iscntrl_l(c,l) __isctype_l((c), _IScntrl, (l))
+# define __isdigit_l(c,l) __isctype_l((c), _ISdigit, (l))
+# define __islower_l(c,l) __isctype_l((c), _ISlower, (l))
+# define __isgraph_l(c,l) __isctype_l((c), _ISgraph, (l))
+# define __isprint_l(c,l) __isctype_l((c), _ISprint, (l))
+# define __ispunct_l(c,l) __isctype_l((c), _ISpunct, (l))
+# define __isspace_l(c,l) __isctype_l((c), _ISspace, (l))
+# define __isupper_l(c,l) __isctype_l((c), _ISupper, (l))
+# define __isxdigit_l(c,l) __isctype_l((c), _ISxdigit, (l))
+
+# define __isblank_l(c,l) __isctype_l((c), _ISblank, (l))
+
+# if defined __USE_SVID || defined __USE_MISC || defined __USE_XOPEN
+# define __isascii_l(c,l) ((l), __isascii (c))
+# define __toascii_l(c,l) ((l), __toascii (c))
+# endif
+
+# define isalnum_l(c,l) __isalnum_l ((c), (l))
+# define isalpha_l(c,l) __isalpha_l ((c), (l))
+# define iscntrl_l(c,l) __iscntrl_l ((c), (l))
+# define isdigit_l(c,l) __isdigit_l ((c), (l))
+# define islower_l(c,l) __islower_l ((c), (l))
+# define isgraph_l(c,l) __isgraph_l ((c), (l))
+# define isprint_l(c,l) __isprint_l ((c), (l))
+# define ispunct_l(c,l) __ispunct_l ((c), (l))
+# define isspace_l(c,l) __isspace_l ((c), (l))
+# define isupper_l(c,l) __isupper_l ((c), (l))
+# define isxdigit_l(c,l) __isxdigit_l ((c), (l))
-#define isalnum(c) __C_isalnum(c)
-#define isalpha(c) __C_isalpha(c)
-#define iscntrl(c) __C_iscntrl(c)
-#define isgraph(c) __C_isgraph(c)
-#define islower(c) __C_islower(c)
-#define isprint(c) __C_isprint(c)
-#define ispunct(c) __C_ispunct(c)
-#define isupper(c) __C_isupper(c)
+# define isblank_l(c,l) __isblank_l ((c), (l))
-#define tolower(c) __C_tolower(c)
-#define toupper(c) __C_toupper(c)
+# if defined __USE_SVID || defined __USE_MISC || defined __USE_XOPEN
+# define isascii_l(c,l) __isascii_l ((c), (l))
+# define toascii_l(c,l) __toascii_l ((c), (l))
+# endif
-#endif /* __UCLIBC_HAS_LOCALE__ */
+# endif /* Not __NO_CTYPE. */
-#endif /* __NO_CTYPE */
+#endif /* Use GNU. */
__END_DECLS
-#endif /* _CTYPE_H */
+#endif /* ctype.h */