diff options
Diffstat (limited to 'libc/misc/ctype/ctype.c')
-rw-r--r-- | libc/misc/ctype/ctype.c | 486 |
1 files changed, 237 insertions, 249 deletions
diff --git a/libc/misc/ctype/ctype.c b/libc/misc/ctype/ctype.c index a3d3d4354..dedd5c00a 100644 --- a/libc/misc/ctype/ctype.c +++ b/libc/misc/ctype/ctype.c @@ -1,341 +1,329 @@ -/* ctype.c - * Character classification and conversion - * Copyright (C) 2000 Lineo, Inc. - * Written by Erik Andersen - * This file is part of the uClibc C library and is distributed - * under the GNU Library General Public License. +/* Copyright (C) 2002 Manuel Novoa III * - * not C-locale only code - * written by Vladimir Oleynik (c) vodz@usa.net - * and Manuel Novoa III <mnovoa3@bellsouth.net> - * used ideas is part of the GNU C Library. + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the Free + * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#define __USE_CTYPE_MACROS +/* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! + * + * Besides uClibc, I'm using this code in my libc for elks, which is + * a 16-bit environment with a fairly limited compiler. It would make + * things much easier for me if this file isn't modified unnecessarily. + * In particular, please put any new or replacement functions somewhere + * else, and modify the makefile to use your version instead. + * Thanks. Manuel + * + * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */ + +#define _GNU_SOURCE +#define __NO_CTYPE + #include <ctype.h> +#include <stdio.h> +#include <limits.h> +#include <assert.h> +#include <locale.h> -#ifdef L_isascii -#undef isascii -int -isascii( int c ) -{ - return (c > 0 && c <= 0x7f); -} -#endif +/**********************************************************************/ -#ifdef L_isdigit -#undef isdigit -int -isdigit( int c ) -{ - return (c >= '0' && c <= '9'); -} -#endif +extern int __isctype_loc(int c, int ct); -#ifdef L_toascii -#undef toascii -int -toascii( int c ) -{ - return (c & 0x7f); -} -#endif +/* Some macros used throughout the file. */ +#define U ((unsigned char)c) +/* #define LCT (__cur_locale->ctype) */ +#define LCT (&__global_locale) -#ifdef L_isblank -#undef isblank -int -isblank( int c ) -{ - return ((c == ' ') || (c == '\t')); -} +/**********************************************************************/ + +#ifndef __PASTE +#define __PASTE(X,Y) X ## Y #endif -/* locale depended */ -#ifndef __UCLIBC_HAS_LOCALE__ +#define C_MACRO(X) __PASTE(__C_,X)(c) -#ifdef L_isalpha -#undef isalpha -int -isalpha( int c ) -{ - return (isupper(c) || islower(c)); -} -#endif +#define CT_MACRO(X) __PASTE(__ctype_,X)(c) -#ifdef L_isalnum -#undef isalnum -int -isalnum( int c ) -{ - return (isalpha(c) || isdigit(c)); -} -#endif +/**********************************************************************/ -#ifdef L_iscntrl -#undef iscntrl -int -iscntrl( int c ) -{ - return ((c >= 0) && ((c <= 0x1f) || (c == 0x7f))); -} -#endif +#ifndef __CTYPE_HAS_8_BIT_LOCALES -#ifdef L_isgraph -#undef isgraph -int -isgraph( int c ) -{ - return (c > ' ' && isprint(c)); +#define IS_FUNC_BODY(NAME) \ +int NAME (int c) \ +{ \ + return C_MACRO(NAME); \ } -#endif -#ifdef L_islower -#undef islower -int -islower( int c ) -{ - return (c >= 'a' && c <= 'z'); -} -#endif +#else -#ifdef L_isprint -#undef isprint -int -isprint( int c ) -{ - return (c >= ' ' && c <= '~'); -} -#endif +/* It may be worth defining __isctype_loc over the whole range of char. */ +/* #define IS_FUNC_BODY(NAME) \ */ +/* int NAME (int c) \ */ +/* { \ */ +/* return __isctype_loc(c, __PASTE(_CTYPE_,NAME)); \ */ +/* } */ -#ifdef L_ispunct -#undef ispunct -int -ispunct( int c ) -{ - return ((c > ' ' && c <= '~') && !isalnum(c)); +#define IS_FUNC_BODY(NAME) \ +int NAME (int c) \ +{ \ + if (((unsigned int) c) <= 0x7f) { \ + return C_MACRO(NAME); \ + } \ + return __isctype_loc(c, __PASTE(_CTYPE_,NAME)); \ } -#endif -#ifdef L_isspace -#undef isspace -int -isspace( int c ) -{ - return (c == ' ' || c == '\f' || c == '\n' || c == '\r' || - c == '\t' || c == '\v'); -} -#endif +#endif /* __CTYPE_HAS_8_BIT_LOCALES */ -#ifdef L_isupper -#undef isupper -int -isupper( int c ) -{ - return (c >= 'A' && c <= 'Z'); -} -#endif +/**********************************************************************/ +#ifdef L_isalnum + +IS_FUNC_BODY(isalnum); -#ifdef L_isxdigit -#undef isxdigit -int -isxdigit( int c ) -{ - return (isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')); -} #endif +/**********************************************************************/ +#ifdef L_isalpha + +IS_FUNC_BODY(isalpha); -#ifdef L_isxlower -#undef isxlower -int -isxlower( int c ) -{ - return (isdigit(c) || (c >= 'a' && c <= 'f')); -} #endif +/**********************************************************************/ +#ifdef L_isblank -#ifdef L_isxupper -#undef isxupper -int -isxupper( int c ) +/* Warning!!! This is correct for all the currently supported 8-bit locales. + * If any are added though, this will need to be verified. */ + +int isblank(int c) { - return (isdigit(c) || (c >= 'A' && c <= 'F')); + return __isblank(c); } + #endif +/**********************************************************************/ +#ifdef L_iscntrl + +IS_FUNC_BODY(iscntrl); -#ifdef L_tolower -#undef tolower -int -tolower( int c ) -{ - return (isupper(c) ? (c - 'A' + 'a') : (c)); -} #endif +/**********************************************************************/ +#ifdef L_isdigit -#ifdef L_toupper -#undef toupper -int -toupper( int c ) +int isdigit(int c) { - return (islower(c) ? (c - 'a' + 'A') : (c)); + return __isdigit(c); } + #endif +/**********************************************************************/ +#ifdef L_isgraph -#else /* __UCLIBC_HAS_LOCALE__ */ +IS_FUNC_BODY(isgraph); -#include <limits.h> -#include "../locale/_locale.h" +#endif +/**********************************************************************/ +#ifdef L_islower -#define _UC_ISCTYPE(c, type) \ -((c != -1) && ((_uc_ctype_b[(int)((unsigned char)c)] & type) != 0)) +IS_FUNC_BODY(islower); -#define _UC_ISCTYPE2(c, type, type2) \ -((c != -1) && ((_uc_ctype_b[(int)((unsigned char)c)] & type) == type2)) +#endif +/**********************************************************************/ +#ifdef L_isprint +IS_FUNC_BODY(isprint); -#ifdef L_ctype_C +#endif +/**********************************************************************/ +#ifdef L_ispunct -/* startup setlocale(LC_TYPE, "C"); */ -#include "ctype_C.c" +IS_FUNC_BODY(ispunct); -const unsigned char *_uc_ctype_b = _uc_ctype_b_C; -const unsigned char *_uc_ctype_trans = _uc_ctype_b_C+LOCALE_BUF_SIZE/2; +#endif +/**********************************************************************/ +#ifdef L_isspace -#endif /* L_ctype_C */ +/* Warning!!! This is correct for all the currently supported 8-bit locales. + * If any are added though, this will need to be verified. */ -#ifdef L_isalpha -#undef isalpha -int -isalpha( int c ) +int isspace(int c) { - return _UC_ISCTYPE(c, ISalpha); + return __isspace(c); } -#endif -#ifdef L_isalnum -#undef isalnum -int -isalnum( int c ) -{ - return _UC_ISCTYPE(c, (ISalpha|ISxdigit)); -} #endif +/**********************************************************************/ +#ifdef L_isupper + +IS_FUNC_BODY(isupper); -#ifdef L_iscntrl -#undef iscntrl -int -iscntrl( int c ) -{ - return _UC_ISCTYPE(c, IScntrl); -} #endif +/**********************************************************************/ +#ifdef L_isxdigit -#ifdef L_isgraph -#undef isgraph -int -isgraph( int c ) +int isxdigit(int c) { - return _UC_ISCTYPE2(c, (ISprint|ISspace), ISprint); + return __isxdigit(c); } + #endif +/**********************************************************************/ +#ifdef L_tolower -#ifdef L_islower -#undef islower -int -islower( int c ) +#ifdef __CTYPE_HAS_8_BIT_LOCALES + +int tolower(int c) { - return _UC_ISCTYPE(c, ISlower); + return ((((unsigned int) c) <= 0x7f) + || (LCT->encoding != __ctype_encoding_8_bit)) + ? __C_tolower(c) + : ( __isctype_loc(c, _CTYPE_isupper) + ? (unsigned char) + ( U - LCT->tbl8uplow[ ((int) + (LCT->idx8uplow[(U & 0x7f) + >> Cuplow_IDX_SHIFT]) + << Cuplow_IDX_SHIFT) + + (U & ((1 << Cuplow_IDX_SHIFT) - 1)) ]) + : c ); } -#endif -#ifdef L_isprint -#undef isprint -int -isprint( int c ) +#else /* __CTYPE_HAS_8_BIT_LOCALES */ + +int tolower(int c) { - return _UC_ISCTYPE(c, ISprint); + return __C_tolower(c); } + +#endif /* __CTYPE_HAS_8_BIT_LOCALES */ + #endif +/**********************************************************************/ +#ifdef L_toupper -#ifdef L_ispunct -#undef ispunct -int -ispunct( int c ) +#ifdef __CTYPE_HAS_8_BIT_LOCALES + +int toupper(int c) { - return _UC_ISCTYPE(c, ISpunct); + return ((((unsigned int) c) <= 0x7f) + || (LCT->encoding != __ctype_encoding_8_bit)) + ? __C_toupper(c) + : ( __isctype_loc(c, _CTYPE_islower) + ? (unsigned char) + ( U + LCT->tbl8uplow[ ((int) + (LCT->idx8uplow[(U & 0x7f) + >> Cuplow_IDX_SHIFT]) + << Cuplow_IDX_SHIFT) + + (U & ((1 << Cuplow_IDX_SHIFT) - 1)) ]) + : c ); } -#endif -#ifdef L_isspace -#undef isspace -int -isspace( int c ) +#else /* __CTYPE_HAS_8_BIT_LOCALES */ + +int toupper(int c) { - return _UC_ISCTYPE(c, ISspace); + return __C_toupper(c); } + +#endif /* __CTYPE_HAS_8_BIT_LOCALES */ + #endif +/**********************************************************************/ +#ifdef L_isascii -#ifdef L_isupper -#undef isupper -int -isupper( int c ) +int isascii(int c) { - return _UC_ISCTYPE(c, ISupper); + return __isascii(c); } + #endif +/**********************************************************************/ +#ifdef L_toascii -#ifdef L_isxdigit -#undef isxdigit -int -isxdigit( int c ) +int toascii(int c) { - return _UC_ISCTYPE(c, ISxdigit); + return __toascii(c); } -#endif +#endif +/**********************************************************************/ #ifdef L_isxlower -#undef isxlower -int -isxlower( int c ) + +int isxlower(int c) { - return _UC_ISCTYPE2(c, (ISxdigit|ISupper), ISxdigit); + return __isxlower(c); } -#endif +#endif +/**********************************************************************/ #ifdef L_isxupper -#undef isxupper -int -isxupper( int c ) + +int isxupper(int c) { - return _UC_ISCTYPE2(c, (ISxdigit|ISlower), ISxdigit); + return __isxupper(c); } + #endif +/**********************************************************************/ +#ifdef L___isctype_loc +#ifdef __CTYPE_HAS_8_BIT_LOCALES -#ifdef L_tolower -#undef tolower -int -tolower( int c ) +/* This internal routine is similar to iswctype(), but it doesn't + * work for any non-standard types, itdoesn't work for "xdigit"s, + * and it doesn't work for chars between 0 and 0x7f (although that + * may change). */ + +static const char ctype_range[] = { + __CTYPE_RANGES +}; + +int __isctype_loc(int c, int ct) { - if((c < CHAR_MIN) || (c > UCHAR_MAX)) - return c; - if(isupper(c)) - return _uc_ctype_trans[(int)((unsigned char)c)]; - else - return c; -} + unsigned char d; + + assert(((unsigned int)ct) < _CTYPE_isxdigit); + assert(((unsigned int)c) > 0x7f); + +#if (CHAR_MIN == 0) /* We don't have signed chars... */ + if ((LCT->encoding != __ctype_encoding_8_bit) + || (((unsigned int) c) > UCHAR_MAX) + ) { + return 0; + } +#else + /* Allow non-EOF negative char values for glibc compatiblity. */ + if ((LCT->encoding != __ctype_encoding_8_bit) || (c == EOF) + || ( ((unsigned int)(c - CHAR_MIN)) > (UCHAR_MAX - CHAR_MIN)) + ) { + return 0; + } #endif -#ifdef L_toupper -#undef toupper -int -toupper( int c ) -{ - if((c < CHAR_MIN) || (c > UCHAR_MAX)) - return c; - if(islower(c)) - return _uc_ctype_trans[(int)((unsigned char)c)]; - else - return c; -} + /* TODO - test assumptions??? 8-bit chars -- or ensure in generator. */ + +#define Cctype_TBL_MASK ((1 << Cctype_IDX_SHIFT) - 1) +#define Cctype_IDX_OFFSET (128 >> Cctype_IDX_SHIFT) + + c &= 0x7f; +#ifdef Cctype_PACKED + d = LCT->tbl8ctype[ ((int)(LCT->idx8ctype[(U >> Cctype_IDX_SHIFT) ]) + << (Cctype_IDX_SHIFT - 1)) + + ((U & Cctype_TBL_MASK) >> 1)]; + d = (U & 1) ? (d >> 4) : (d & 0xf); +#else + d = LCT->tbl8ctype[ ((int)(LCT->idx8ctype[(U >> Cctype_IDX_SHIFT) ]) + << Cctype_IDX_SHIFT) + + (U & Cctype_TBL_MASK) ]; #endif + return ( ((unsigned char)(d - ctype_range[2*ct])) <= ctype_range[2*ct+1] ); +} +#endif /* __CTYPE_HAS_8_BIT_LOCALES */ #endif +/**********************************************************************/ |