From a79016198c859a3388584ac7782d760f349e2d67 Mon Sep 17 00:00:00 2001 From: Carmelo Amoroso Date: Tue, 9 Sep 2008 13:01:58 +0000 Subject: Fix some locale multibyte tests failures ad below: libc/stdlib/_strtod.c -> tst_wcstod; libc/stdlib/stdlib.c -> tst_mblen, tst_mbtowc, tst_wctomb; libc/stdio/_scanf.c -> tst_swscanf; libc/string/strncmp.c -> tst_wcsncmp; libc/misc/wchar/wchar.c -> tst_mbrlen, tst_mbrtowc, tst_wcswidth. Signed-off-by: Filippo Arcidiacono Signed-off-by: Carmelo Amoroso --- libc/misc/wchar/wchar.c | 10 ++++++-- libc/stdio/_scanf.c | 3 --- libc/stdlib/_strtod.c | 2 +- libc/stdlib/stdlib.c | 64 +++++++++++++++++++++++++++++++++++++------------ libc/string/strncmp.c | 2 +- 5 files changed, 59 insertions(+), 22 deletions(-) (limited to 'libc') diff --git a/libc/misc/wchar/wchar.c b/libc/misc/wchar/wchar.c index 1a6586e91..567be8585 100644 --- a/libc/misc/wchar/wchar.c +++ b/libc/misc/wchar/wchar.c @@ -293,10 +293,17 @@ size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s, empty_string[0] = 0; /* Init the empty string when necessary. */ s = empty_string; n = 1; + } else if (*s == '\0') { + /* According to the ISO C 89 standard this is the expected behaviour. */ + return 0; } else if (!n) { /* TODO: change error code? */ +#if 0 return (ps->__mask && (ps->__wc == 0xffffU)) ? ((size_t) -1) : ((size_t) -2); +#else + return 0; +#endif } p = s; @@ -865,7 +872,6 @@ size_t wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src, + (wc & ((1 << Cwc2c_TT_SHIFT)-1))]; } -#define __WCHAR_REPLACEMENT_CHAR '?' #ifdef __WCHAR_REPLACEMENT_CHAR *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR ); #else /* __WCHAR_REPLACEMENT_CHAR */ @@ -1045,7 +1051,7 @@ int wcswidth(const wchar_t *pwcs, size_t n) size_t i; for (i = 0 ; (i < n) && pwcs[i] ; i++) { - if (pwcs[i] != ((unsigned char)(pwcs[i]))) { + if (pwcs[i] != (pwcs[i] & 0x7f)) { return -1; } } diff --git a/libc/stdio/_scanf.c b/libc/stdio/_scanf.c index bef1ce0b5..3b004d5f0 100644 --- a/libc/stdio/_scanf.c +++ b/libc/stdio/_scanf.c @@ -1068,9 +1068,6 @@ static int sc_getc(register struct scan_cookie *sc) wc = '.'; } else #endif /* __UCLIBC_HAS_FLOATS__ */ - if (!__isascii(wc)) { - wc = '?'; - } sc->wc = sc->ungot_char = wc; return (int) wc; diff --git a/libc/stdlib/_strtod.c b/libc/stdlib/_strtod.c index 73a7ed7c1..1b2adc986 100644 --- a/libc/stdlib/_strtod.c +++ b/libc/stdlib/_strtod.c @@ -234,7 +234,7 @@ __fpmax_t attribute_hidden __XL_NPP(__strtofpmax)(const Wchar *str, Wchar **endp #endif #ifdef __UCLIBC_HAS_LOCALE__ #if defined(L___wcstofpmax) || defined(L___wcstofpmax_l) - wchar_t decpt_wc = __LOCALE_PTR->decimal_point; + wchar_t decpt_wc = __LOCALE_PTR->decimal_point_wc; #else const char *decpt = __LOCALE_PTR->decimal_point; int decpt_len = __LOCALE_PTR->decimal_point_len; diff --git a/libc/stdlib/stdlib.c b/libc/stdlib/stdlib.c index 15292ceed..68796656a 100644 --- a/libc/stdlib/stdlib.c +++ b/libc/stdlib/stdlib.c @@ -929,6 +929,30 @@ size_t _stdlib_mb_cur_max(void) libc_hidden_def(_stdlib_mb_cur_max) #endif + +#ifdef __UCLIBC_HAS_LOCALE__ +/* + * The following function return 1 if the encoding is stateful, 0 if stateless. + * To note, until now all the supported encoding are stateless. + */ + +static inline int is_stateful(unsigned char encoding) +{ + switch (encoding) + { + case __ctype_encoding_7_bit: + case __ctype_encoding_utf8: + case __ctype_encoding_8_bit: + return 0; + default: + assert(0); + return -1; + } +} +#else +#define is_stateful(encoding) 0 +#endif + /**********************************************************************/ #ifdef L_mblen @@ -941,13 +965,17 @@ int mblen(register const char *s, size_t n) if (!s) { state.__mask = 0; -#ifdef __CTYPE_HAS_UTF_8_LOCALES - return ENCODING == __ctype_encoding_utf8; -#else - return 0; -#endif + /* + In this case we have to return 0 because the only multibyte supported encoding + is utf-8, that is a stateless encoding. See mblen() documentation. + */ + return is_stateful(ENCODING); } + if (*s == '\0') + /* According to the ISO C 89 standard this is the expected behaviour. */ + return 0; + if ((r = mbrlen(s, n, &state)) == (size_t) -2) { /* TODO: Should we set an error state? */ state.__wc = 0xffffU; /* Make sure we're in an error state. */ @@ -969,13 +997,18 @@ int mbtowc(wchar_t *__restrict pwc, register const char *__restrict s, size_t n) if (!s) { state.__mask = 0; -#ifdef __CTYPE_HAS_UTF_8_LOCALES - return ENCODING == __ctype_encoding_utf8; -#else - return 0; -#endif + /* + In this case we have to return 0 because the only multibyte supported encoding + is utf-8, that is a stateless encoding. See mbtowc() documentation. + */ + + return is_stateful(ENCODING); } + if (*s == '\0') + /* According to the ISO C 89 standard this is the expected behaviour. */ + return 0; + if ((r = mbrtowc(pwc, s, n, &state)) == (size_t) -2) { /* TODO: Should we set an error state? */ state.__wc = 0xffffU; /* Make sure we're in an error state. */ @@ -996,11 +1029,12 @@ int wctomb(register char *__restrict s, wchar_t swc) { return (!s) ? -#ifdef __CTYPE_HAS_UTF_8_LOCALES - (ENCODING == __ctype_encoding_utf8) -#else - 0 /* Encoding is stateless. */ -#endif + /* + In this case we have to return 0 because the only multibyte supported encoding + is utf-8, that is a stateless encoding. See wctomb() documentation. + */ + + is_stateful(ENCODING) : ((ssize_t) wcrtomb(s, swc, NULL)); } diff --git a/libc/string/strncmp.c b/libc/string/strncmp.c index 84a2bd512..59e4a2c22 100644 --- a/libc/string/strncmp.c +++ b/libc/string/strncmp.c @@ -25,7 +25,7 @@ int Wstrncmp(register const Wchar *s1, register const Wchar *s2, size_t n) --n; } - return (n == 0) ? 0 : ((*((Wuchar *)s1) < *((Wuchar *)s2)) ? -1 : 1); + return (n == 0) ? 0 : (*((Wuchar *)s1) - *((Wuchar *)s2)); #else int r = 0; -- cgit v1.2.3