diff options
author | Manuel Novoa III <mjn3@codepoet.org> | 2003-08-01 20:08:59 +0000 |
---|---|---|
committer | Manuel Novoa III <mjn3@codepoet.org> | 2003-08-01 20:08:59 +0000 |
commit | 1217289737588e65b088b3535428b27c7287d699 (patch) | |
tree | 6a292ac767d219702e26a6a2111737f84a96900c /libc/misc/wchar/wchar.c | |
parent | 32b76c5ec3c257b7287913d0d1a96e0cbb2e9c6a (diff) |
Add a new *scanf implementation, includeing the *wscanf functions.
Should be standards compliant and with several optional features,
including support for hexadecimal float notation, locale awareness,
glibc-like locale-specific digit grouping with the `'' flag, and
positional arg support. I tested it pretty well (finding several
bugs in glibc's scanf in the process), but it is brand new so be
aware.
The *wprintf functions now support floating point output. Also, a
couple of bugs were squashed. Finally, %a/%A conversions are
now implemented.
Implement the glibc xlocale interface for thread-specific locale
support. Also add the various *_l(args, locale_t loc_arg) funcs.
NOTE!!! setlocale() is NOT threadsafe! NOTE!!!
The strto{floating point} conversion functions are now locale aware.
The also now support hexadecimal floating point notation.
Add the wcsto{floating point} conversion functions.
Fix a bug in mktime() related to dst. Note that unlike glibc's mktime,
uClibc's version always normalizes the struct tm before attempting
to determine the correct dst setting if tm_isdst == -1 on entry.
Add a stub version of the libintl functions. (untested)
Fixed a known memory leak in setlocale() related to the collation data.
Add lots of new config options (which Erik agreed to sort out :-),
including finally exposing some of the stripped down stdio configs.
Be careful with those though, as they haven't been tested in a
long time.
(temporary) GOTCHAs...
The ctype functions are currently incorrect for 8-bit locales. They
will be fixed shortly.
The ctype functions are now table-based, resulting in larger staticly
linked binaries. I'll be adding an option to use the old approach
in the stub locale configuration.
Diffstat (limited to 'libc/misc/wchar/wchar.c')
-rw-r--r-- | libc/misc/wchar/wchar.c | 111 |
1 files changed, 76 insertions, 35 deletions
diff --git a/libc/misc/wchar/wchar.c b/libc/misc/wchar/wchar.c index ff3e42a84..aa0c2735a 100644 --- a/libc/misc/wchar/wchar.c +++ b/libc/misc/wchar/wchar.c @@ -86,6 +86,9 @@ * Add a couple of ugly hacks to support *wprintf. * Add a mini iconv() and iconv implementation (requires locale support). * + * Aug 1, 2003 + * Bug fix for mbrtowc. + * * Manuel */ @@ -101,13 +104,39 @@ #include <assert.h> #include <locale.h> #include <wchar.h> +#include <bits/uClibc_uwchar.h> +/**********************************************************************/ #ifdef __UCLIBC_HAS_LOCALE__ -#define ENCODING (__global_locale.encoding) +#ifdef __UCLIBC_MJN3_ONLY__ +#ifdef L_iswspace +/* generates one warning */ +#warning TODO: Fix Cc2wc* and Cwc2c* defines! +#endif +#endif /* __UCLIBC_MJN3_ONLY__ */ + +#define ENCODING ((__UCLIBC_CURLOCALE_DATA).encoding) + +#define Cc2wc_IDX_SHIFT __LOCALE_DATA_Cc2wc_IDX_SHIFT +#define Cc2wc_ROW_LEN __LOCALE_DATA_Cc2wc_ROW_LEN +#define Cwc2c_DOMAIN_MAX __LOCALE_DATA_Cwc2c_DOMAIN_MAX +#define Cwc2c_TI_SHIFT __LOCALE_DATA_Cwc2c_TI_SHIFT +#define Cwc2c_TT_SHIFT __LOCALE_DATA_Cwc2c_TT_SHIFT +#define Cwc2c_TI_LEN __LOCALE_DATA_Cwc2c_TI_LEN + #ifndef __CTYPE_HAS_UTF_8_LOCALES #warning __CTYPE_HAS_UTF_8_LOCALES not set! #endif -#else + +#else /* __UCLIBC_HAS_LOCALE__ */ + +#ifdef __UCLIBC_MJN3_ONLY__ +#ifdef L_btowc +/* emit only once */ +#warning fix preprocessor logic testing locale settings +#endif +#endif + #define ENCODING (__ctype_encoding_7_bit) #ifdef __CTYPE_HAS_8_BIT_LOCALES #error __CTYPE_HAS_8_BIT_LOCALES is defined! @@ -117,7 +146,9 @@ #endif #undef L__wchar_utf8sntowcs #undef L__wchar_wcsntoutf8s -#endif + +#endif /* __UCLIBC_HAS_LOCALE__ */ +/**********************************************************************/ #if WCHAR_MAX > 0xffffUL #define UTF_8_MAX_LEN 6 @@ -266,11 +297,18 @@ size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s, #ifdef __CTYPE_HAS_UTF_8_LOCALES /* Need to do this here since mbsrtowcs doesn't allow incompletes. */ if (ENCODING == __ctype_encoding_utf8) { + if (!pwc) { + pwc = wcbuf; + } r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1); return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */ } #endif +#ifdef __UCLIBC_MJN3_ONLY__ +#warning TODO: This adds a trailing nul! +#endif /* __UCLIBC_MJN3_ONLY__ */ + r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps); if (((ssize_t) r) >= 0) { @@ -291,7 +329,10 @@ size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s, size_t wcrtomb(register char *__restrict s, wchar_t wc, mbstate_t *__restrict ps) { - wchar_t wcbuf[2]; +#ifdef __UCLIBC_MJN3_ONLY__ +#warning TODO: Should wcsnrtombs nul-terminate unconditionally? Check glibc. +#endif /* __UCLIBC_MJN3_ONLY__ */ + wchar_t wcbuf[1]; const wchar_t *pwc; size_t r; char buf[MB_LEN_MAX]; @@ -303,9 +344,8 @@ size_t wcrtomb(register char *__restrict s, wchar_t wc, pwc = wcbuf; wcbuf[0] = wc; - wcbuf[1] = 0; - r = __wcsnrtombs(s, &pwc, SIZE_MAX, MB_LEN_MAX, ps); + r = __wcsnrtombs(s, &pwc, 1, MB_LEN_MAX, ps); return (r != 0) ? r : 1; } @@ -418,7 +458,7 @@ size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn, if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */ mask = 0x40; #ifdef __UCLIBC_MJN3_ONLY__ -#warning fix range for 16 bit wides +#warning TODO: Fix range for 16 bit wchar_t case. #endif if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) { goto START; @@ -495,7 +535,6 @@ size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn, COMPLETE: *pwc = wc; pwc += incr; - } #ifdef DECODER while (--count); @@ -684,8 +723,8 @@ size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, while (count) { if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */ wc -= 0x80; - wc = __global_locale.tbl8c2wc[ - (__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT] + wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[ + (__UCLIBC_CURLOCALE_DATA.idx8c2wc[wc >> Cc2wc_IDX_SHIFT] << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))]; if (!wc) { goto BAD; @@ -797,12 +836,12 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src, } else { u = 0; if (wc <= Cwc2c_DOMAIN_MAX) { - u = __global_locale.idx8wc2c[wc >> (Cwc2c_TI_SHIFT + u = __UCLIBC_CURLOCALE_DATA.idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)]; - u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT) + u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT) + ((wc >> Cwc2c_TT_SHIFT) & ((1 << Cwc2c_TI_SHIFT)-1))]; - u = __global_locale.tbl8wc2c[Cwc2c_TI_LEN + u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN + (u << Cwc2c_TT_SHIFT) + (wc & ((1 << Cwc2c_TT_SHIFT)-1))]; } @@ -859,7 +898,8 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src, #ifdef L_wcswidth #ifdef __UCLIBC_MJN3_ONLY__ -#warning if we start doing translit, wcwidth and wcswidth will need updating. +#warning REMINDER: If we start doing translit, wcwidth and wcswidth will need updating. +#warning TODO: Update wcwidth to match latest by Kuhn. #endif #if defined(__UCLIBC_HAS_LOCALE__) && \ @@ -1163,7 +1203,7 @@ enum { * */ -const unsigned char codesets[] = +const unsigned char __iconv_codesets[] = "\x0a\xe0""WCHAR_T\x00" /* superset of UCS-4 but platform-endian */ #if __BYTE_ORDER == __BIG_ENDIAN "\x08\xec""UCS-4\x00" /* always BE */ @@ -1201,7 +1241,7 @@ static int find_codeset(const char *name) const unsigned char *s; int codeset; - for (s = codesets ; *s ; s += *s) { + for (s = __iconv_codesets ; *s ; s += *s) { if (!strcasecmp(s+2, name)) { return s[1]; } @@ -1212,10 +1252,10 @@ static int find_codeset(const char *name) /* TODO: maybe CODESET_LIST + *s ??? */ /* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */ codeset = 2; - s = CODESET_LIST; + s = __LOCALE_DATA_CODESET_LIST; do { ++codeset; /* Increment codeset first. */ - if (!strcasecmp(CODESET_LIST+*s, name)) { + if (!strcasecmp(__LOCALE_DATA_CODESET_LIST+*s, name)) { return codeset; } } while (*++s); @@ -1223,7 +1263,7 @@ static int find_codeset(const char *name) return 0; /* No matching codeset! */ } -iconv_t iconv_open(const char *tocode, const char *fromcode) +iconv_t weak_function iconv_open(const char *tocode, const char *fromcode) { register _UC_iconv_t *px; int tocodeset, fromcodeset; @@ -1244,16 +1284,17 @@ iconv_t iconv_open(const char *tocode, const char *fromcode) return (iconv_t)(-1); } -int iconv_close(iconv_t cd) +int weak_function iconv_close(iconv_t cd) { free(cd); return 0; } -size_t iconv(iconv_t cd, char **__restrict inbuf, - size_t *__restrict inbytesleft, - char **__restrict outbuf, size_t *__restrict outbytesleft) +size_t weak_function iconv(iconv_t cd, char **__restrict inbuf, + size_t *__restrict inbytesleft, + char **__restrict outbuf, + size_t *__restrict outbytesleft) { _UC_iconv_t *px = (_UC_iconv_t *) cd; size_t nrcount, r; @@ -1362,9 +1403,9 @@ size_t iconv(iconv_t cd, char **__restrict inbuf, return (size_t)(-1); } #ifdef __UCLIBC_MJN3_ONLY__ -#warning optimize this +#warning TODO: optimize this. #endif - if (p != NULL) { /* incomplet char case */ + if (p != NULL) { /* incomplete char case */ goto INVALID; } p = *inbuf + 1; /* nul */ @@ -1374,10 +1415,10 @@ size_t iconv(iconv_t cd, char **__restrict inbuf, if (px->fromcodeset == IC_ASCII) { /* US-ASCII codeset */ goto ILLEGAL; } else { /* some other 8-bit ascii-extension codeset */ - const codeset_8_bit_t *c8b + const __codeset_8_bit_t *c8b = __locale_mmap->codeset_8_bit + px->fromcodeset - 3; wc -= 0x80; - wc = __global_locale.tbl8c2wc[ + wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[ (c8b->idx8c2wc[wc >> Cc2wc_IDX_SHIFT] << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))]; if (!wc) { @@ -1439,7 +1480,7 @@ size_t iconv(iconv_t cd, char **__restrict inbuf, r = _wchar_wcsntoutf8s(*outbuf, *outbytesleft, &pw, 1); if (r != (size_t)(-1)) { #ifdef __UCLIBC_MJN3_ONLY__ -#warning what happens for a nul? +#warning TODO: What happens for a nul? #endif if (r == 0) { if (wc != 0) { @@ -1458,14 +1499,14 @@ size_t iconv(iconv_t cd, char **__restrict inbuf, **outbuf = wc; } else { if ((px->tocodeset != 0x01) && (wc <= Cwc2c_DOMAIN_MAX)) { - const codeset_8_bit_t *c8b + const __codeset_8_bit_t *c8b = __locale_mmap->codeset_8_bit + px->tocodeset - 3; __uwchar_t u; u = c8b->idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)]; - u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT) + u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT) + ((wc >> Cwc2c_TT_SHIFT) & ((1 << Cwc2c_TI_SHIFT)-1))]; - wc = __global_locale.tbl8wc2c[Cwc2c_TI_LEN + wc = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN + (u << Cwc2c_TT_SHIFT) + (wc & ((1 << Cwc2c_TT_SHIFT)-1))]; if (wc) { @@ -1497,7 +1538,7 @@ size_t iconv(iconv_t cd, char **__restrict inbuf, #include <stdarg.h> #include <libgen.h> -extern const unsigned char codesets[]; +extern const unsigned char __iconv_codesets[]; #define IBUF BUFSIZ #define OBUF BUFSIZ @@ -1572,12 +1613,12 @@ int main(int argc, char **argv) if (opts[5]) { /* -l */ fprintf(stderr, "Recognized codesets:\n"); - for (s = codesets ; *s ; s += *s) { + for (s = __iconv_codesets ; *s ; s += *s) { fprintf(stderr," %s\n", s+2); } - s = CODESET_LIST; + s = __LOCALE_DATA_CODESET_LIST; do { - fprintf(stderr," %s\n", CODESET_LIST+ (unsigned char)(*s)); + fprintf(stderr," %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s)); } while (*++s); return EXIT_SUCCESS; |