From 599ad608ee3297bc1e2bb11656a97335e303877a Mon Sep 17 00:00:00 2001 From: Manuel Novoa III Date: Wed, 3 Jul 2002 17:24:17 +0000 Subject: Enable WCHAR support for C/POSIX stub locales. Implemented unformatted wide i/o functions. (ungetwc still needs testing) Fix a few bugs in wchar.c. Modifications for bcc/elks support. --- libc/misc/assert/__assert.c | 2 +- libc/misc/locale/locale.c | 19 +- libc/misc/time/time.c | 5 +- libc/misc/wchar/Makefile | 29 ++- libc/misc/wchar/wchar.c | 76 +++++-- libc/misc/wchar/wstdio.c | 542 ++++++++++++++++++++++++++++++++++++++++++++ libc/misc/wctype/wctype.c | 123 +++++++++- 7 files changed, 756 insertions(+), 40 deletions(-) create mode 100644 libc/misc/wchar/wstdio.c (limited to 'libc/misc') diff --git a/libc/misc/assert/__assert.c b/libc/misc/assert/__assert.c index fba9dc719..db5f9a7a9 100644 --- a/libc/misc/assert/__assert.c +++ b/libc/misc/assert/__assert.c @@ -28,7 +28,7 @@ #undef assert void __assert(const char *assertion, const char * filename, - int linenumber, const char * function) + int linenumber, register const char * function) { char buf[__BUFLEN_INT10TOSTR]; diff --git a/libc/misc/locale/locale.c b/libc/misc/locale/locale.c index fd52782c1..689257b41 100644 --- a/libc/misc/locale/locale.c +++ b/libc/misc/locale/locale.c @@ -30,13 +30,7 @@ #include #include -#ifdef __LOCALE_C_ONLY - -#ifdef __WCHAR_ENABLED -#error wide char support requires full locale support -#endif - -#else /* __LOCALE_C_ONLY */ +#ifndef __LOCALE_C_ONLY #define CUR_LOCALE_SPEC (__global_locale.cur_locale) #undef CODESET_LIST @@ -496,14 +490,15 @@ void _locale_set(const unsigned char *p) * ctype, numeric, monetary, time, collate, messages, all */ +#define C_LC_ALL 6 /* Combine the data to avoid size penalty for seperate char arrays when * compiler aligns objects. The original code is left in as documentation. */ #define cat_start nl_data -#define C_locale_data nl_data + LC_ALL + 1 + 78 +#define C_locale_data (nl_data + C_LC_ALL + 1 + 78) -static const unsigned char nl_data[LC_ALL + 1 + 78 + 300] = { -/* static const unsigned char cat_start[LC_ALL + 1] = { */ +static const unsigned char nl_data[C_LC_ALL + 1 + 78 + 300] = { +/* static const unsigned char cat_start[C_LC_ALL + 1] = { */ '\x00', '\x01', '\x04', '\x1a', '\x4c', '\x4c', '\x4e', /* }; */ /* static const unsigned char item_offset[78] = { */ @@ -564,10 +559,10 @@ char *nl_langinfo(nl_item item) unsigned int c; unsigned int i; - if ((c = _NL_ITEM_CATEGORY(item)) < LC_ALL) { + if ((c = _NL_ITEM_CATEGORY(item)) < C_LC_ALL) { if ((i = cat_start[c] + _NL_ITEM_INDEX(item)) < cat_start[c+1]) { /* return (char *) C_locale_data + item_offset[i] + (i & 64); */ - return (char *) C_locale_data + nl_data[LC_ALL+1+i] + (i & 64); + return (char *) C_locale_data + nl_data[C_LC_ALL+1+i] + (i & 64); } } return (char *) cat_start; /* Conveniently, this is the empty string. */ diff --git a/libc/misc/time/time.c b/libc/misc/time/time.c index 409be3247..e7e3b8414 100644 --- a/libc/misc/time/time.c +++ b/libc/misc/time/time.c @@ -94,14 +94,12 @@ #endif /* TODO - This stuff belongs in some include/bits/ file. */ -#ifndef __BCC__ #undef CLK_TCK #if (TARGET_ARCH == alpha) || (TARGET_ARCH == ia64) #define CLK_TCK 1024 #else #define CLK_TCK 100 #endif -#endif /* The era code is currently unfinished. */ /* #define ENABLE_ERA_CODE */ @@ -1855,9 +1853,8 @@ time_t _time_mktime(struct tm *timeptr, int store_on_success) } #ifdef __BCC__ - /* TODO - check */ d = p[5] - 1; - days = -719163L + d*365 + ((d/4) - (d/100) + (d/400) + p[3] + p[7]); + days = -719163L + ((long)d)*365 + ((d/4) - (d/100) + (d/400) + p[3] + p[7]); secs = p[0] + 60*( p[1] + 60*((long)(p[2])) ) + _time_tzinfo[timeptr->tm_isdst > 0].gmt_offset; if (secs < 0) { diff --git a/libc/misc/wchar/Makefile b/libc/misc/wchar/Makefile index 23a1e9bba..ddd701a6d 100644 --- a/libc/misc/wchar/Makefile +++ b/libc/misc/wchar/Makefile @@ -24,12 +24,27 @@ TOPDIR=../../../ include $(TOPDIR)Rules.mak -MSRC= wchar.c -MOBJ= btowc.o wctob.o mbsinit.o mbrlen.o mbrtowc.o wcrtomb.o mbsrtowcs.o \ - wcsrtombs.o _wchar_utf8sntowcs.o _wchar_wcstoutf8s.o \ +MSRC1= wchar.c +MOBJ1= btowc.o wctob.o mbsinit.o mbrlen.o mbrtowc.o wcrtomb.o mbsrtowcs.o \ + wcsrtombs.o _wchar_utf8sntowcs.o _wchar_wcsntoutf8s.o \ __mbsnrtowcs.o __wcsnrtombs.o -OBJS=$(MOBJ) +MSRC2= wstdio.c +MOBJ2= fwide.o \ + fgetwc.o getwchar.o fgetws.o \ + fputwc.o putwchar.o fputws.o \ + ungetwc.o +# getwc (fgetwc alias) getwc_unlocked (fgetwc_unlocked alias) +# putwc (fputwc alias) putwc_unlocked (fputwc_unlocked alias) + +# wcwidth wcswidth +# wcstod wcstof wcstold +# wcstol wcstoul wcstoq wcstouq wcstoll wcstoull +# fwprintf wprintf swprintf vfwprintf vwprintf vswprintf +# fwscanf wscanf swscanf vfwscanf vwscanf vswscanf +# wcsftime + +OBJS=$(MOBJ1) $(MOBJ2) all: $(OBJS) $(LIBC) @@ -38,7 +53,11 @@ $(LIBC): ar-target ar-target: $(OBJS) $(AR) $(ARFLAGS) $(LIBC) $(OBJS) -$(MOBJ): $(MSRC) +$(MOBJ1): $(MSRC1) + $(CC) $(CFLAGS) -DL_$* $< -c -o $*.o + $(STRIPTOOL) -x -R .note -R .comment $*.o + +$(MOBJ2): $(MSRC2) $(CC) $(CFLAGS) -DL_$* $< -c -o $*.o $(STRIPTOOL) -x -R .note -R .comment $*.o diff --git a/libc/misc/wchar/wchar.c b/libc/misc/wchar/wchar.c index f2d9f4a7d..9f08f3312 100644 --- a/libc/misc/wchar/wchar.c +++ b/libc/misc/wchar/wchar.c @@ -50,6 +50,14 @@ * an issue for uClibc, but may be for ELKS. I'm currently not sure * if I'll use 16-bit, 32-bit, or configureable wchars in ELKS. * + * July 1, 2002 + * + * Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case. + * Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit + * locales. + * Enabled building of a C/POSIX-locale-only version, so full locale support + * no longer needs to be enabled. + * * Manuel */ @@ -66,7 +74,17 @@ #include #include +#ifdef __UCLIBC_HAS_LOCALE__ #define ENCODING (__global_locale.encoding) +#warning implement __CTYPE_HAS_UTF_8_LOCALES! +#define __CTYPE_HAS_UTF_8_LOCALES +#else +#define ENCODING (__ctype_encoding_7_bit) +#undef __CTYPE_HAS_8_BIT_LOCALES +#undef __CTYPE_HAS_UTF_8_LOCALES +#undef L__wchar_utf8sntowcs +#undef L__wchar_wcsntoutf8s +#endif #if WCHAR_MAX > 0xffffU #define UTF_8_MAX_LEN 6 @@ -76,9 +94,6 @@ /* #define KUHN */ -#warning implement __CTYPE_HAS_UTF_8_LOCALES! -#define __CTYPE_HAS_UTF_8_LOCALES - /* Implementation-specific work functions. */ extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn, @@ -103,6 +118,8 @@ extern size_t __wcsnrtombs(char *__restrict dst, wint_t btowc(int c) { +#ifdef __CTYPE_HAS_8_BIT_LOCALES + wchar_t wc; unsigned char buf[1]; mbstate_t mbstate; @@ -110,11 +127,19 @@ wint_t btowc(int c) if (c != EOF) { *buf = (unsigned char) c; mbstate.mask = 0; /* Initialize the mbstate. */ - if (mbrtowc(&wc, buf, 1, &mbstate) == 1) { + if (mbrtowc(&wc, buf, 1, &mbstate) <= 1) { return wc; } } return WEOF; + +#else /* __CTYPE_HAS_8_BIT_LOCALES */ + + /* If we don't have 8-bit locale support, then this is trivial since + * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */ + return (((unsigned int)c) < 0x80) ? c : WEOF; + +#endif /* __CTYPE_HAS_8_BIT_LOCALES */ } #endif @@ -125,9 +150,22 @@ wint_t btowc(int c) int wctob(wint_t c) { +#ifdef __CTYPE_HAS_8_BIT_LOCALES + unsigned char buf[MB_LEN_MAX]; return (wcrtomb(buf, c, NULL) == 1) ? *buf : EOF; + +#else /* __CTYPE_HAS_8_BIT_LOCALES */ + + /* If we don't have 8-bit locale support, then this is trivial since + * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */ + + /* TODO: need unsigned version of wint_t... */ +/* return (((unsigned int)c) < 0x80) ? c : WEOF; */ + return ((c >= 0) && (c < 0x80)) ? c : EOF; + +#endif /* __CTYPE_HAS_8_BIT_LOCALES */ } #endif @@ -144,6 +182,9 @@ int mbsinit(const mbstate_t *ps) #ifdef L_mbrlen size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps) + __attribute__ ((__weak__, __alias__("__mbrlen"))); + +size_t __mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps) { static mbstate_t mbstate; /* Rely on bss 0-init. */ @@ -183,7 +224,7 @@ size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s, /* Need to do this here since mbsrtowcs doesn't allow incompletes. */ if (ENCODING == __ctype_encoding_utf8) { r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1); - return (r == 1) ? (p-s) : r; + return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */ } #endif @@ -289,10 +330,13 @@ size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn, wn = SIZE_MAX; incr = 0; } -#warning fix _wchar_utf8sntowcs to allow wn == 0! - assert(wn > 0); /* TODO: fix this!! */ - count = wn; + /* This is really here only to support the glibc extension function + * __mbsnrtowcs which apparently returns 0 if wn == 0 without any + * check on the validity of the mbstate. */ + if (!(count = wn)) { + return 0; + } if ((mask = (__uwchar_t) ps->mask) != 0) { /* A continuation... */ #ifdef DECODER @@ -420,7 +464,7 @@ size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn, #endif /**********************************************************************/ -#ifdef L__wchar_wcstoutf8s +#ifdef L__wchar_wcsntoutf8s size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n, const wchar_t **__restrict src, size_t wn) @@ -568,13 +612,13 @@ size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, if (!wc) { goto BAD; } - } else if (!wc) { + } + if (!(*dst = wc)) { s = NULL; break; } - ++s; - *dst = wc; dst += incr; + ++s; --count; } if (dst != wcbuf) { @@ -633,7 +677,7 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src, if (ENCODING == __ctype_encoding_utf8) { return _wchar_wcsntoutf8s(dst, len, src, NWC); } -#endif +#endif /* __CTYPE_HAS_UTF_8_LOCALES */ incr = 1; if (!dst) { @@ -676,12 +720,12 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src, /* #define __WCHAR_REPLACEMENT_CHAR '?' */ #ifdef __WCHAR_REPLACEMENT_CHAR *dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR ); -#else +#else /* __WCHAR_REPLACEMENT_CHAR */ if (!u) { goto BAD; } *dst = (unsigned char) u; -#endif +#endif /* __WCHAR_REPLACEMENT_CHAR */ } ++s; dst += incr; @@ -692,7 +736,7 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src, } return len - count; } -#endif +#endif /* __CTYPE_HAS_8_BIT_LOCALES */ assert(ENCODING == __ctype_encoding_7_bit); diff --git a/libc/misc/wchar/wstdio.c b/libc/misc/wchar/wstdio.c new file mode 100644 index 000000000..79cf1dfa1 --- /dev/null +++ b/libc/misc/wchar/wstdio.c @@ -0,0 +1,542 @@ + +/* + * ANSI/ISO C99 says + + 9 Although both text and binary wide­oriented streams are conceptually sequences of wide + characters, the external file associated with a wide­oriented stream is a sequence of + multibyte characters, generalized as follows: + --- Multibyte encodings within files may contain embedded null bytes (unlike multibyte + encodings valid for use internal to the program). + --- A file need not begin nor end in the initial shift state. 225) + + * How do we deal with this? + + * Should auto_wr_transition init the mbstate object? +*/ + + +#define _GNU_SOURCE +#include +#include +#include +#include +#include + +#ifndef __STDIO_THREADSAFE + +#ifdef __BCC__ +#define UNLOCKED_STREAM(RETURNTYPE,NAME,PARAMS,ARGS,STREAM) \ +asm(".text\nexport _" "NAME" "_unlocked\n_" "NAME" "_unlocked = _" "NAME"); \ +RETURNTYPE NAME PARAMS +#else +#define UNLOCKED_STREAM(RETURNTYPE,NAME,PARAMS,ARGS,STREAM) \ +strong_alias(NAME,NAME##_unlocked) \ +RETURNTYPE NAME PARAMS +#endif + +#define UNLOCKED(RETURNTYPE,NAME,PARAMS,ARGS) \ + UNLOCKED_STREAM(RETURNTYPE,NAME,PARAMS,ARGS,stream) + +#ifdef __BCC__ +#define UNLOCKED_VOID_RETURN(NAME,PARAMS,ARGS) \ +asm(".text\nexport _" "NAME" "_unlocked\n_" "NAME" "_unlocked = _" "NAME"); \ +void NAME PARAMS +#else +#define UNLOCKED_VOID_RETURN(NAME,PARAMS,ARGS) \ +strong_alias(NAME,NAME##_unlocked) \ +void NAME PARAMS +#endif + +#define __STDIO_THREADLOCK_OPENLIST +#define __STDIO_THREADUNLOCK_OPENLIST + +#else /* __STDIO_THREADSAFE */ + +#include + +#define UNLOCKED_STREAM(RETURNTYPE,NAME,PARAMS,ARGS,STREAM) \ +RETURNTYPE NAME PARAMS \ +{ \ + RETURNTYPE retval; \ + __STDIO_THREADLOCK(STREAM); \ + retval = NAME##_unlocked ARGS ; \ + __STDIO_THREADUNLOCK(STREAM); \ + return retval; \ +} \ +RETURNTYPE NAME##_unlocked PARAMS + +#define UNLOCKED(RETURNTYPE,NAME,PARAMS,ARGS) \ + UNLOCKED_STREAM(RETURNTYPE,NAME,PARAMS,ARGS,stream) + +#define UNLOCKED_VOID_RETURN(NAME,PARAMS,ARGS) \ +void NAME PARAMS \ +{ \ + __STDIO_THREADLOCK(stream); \ + NAME##_unlocked ARGS ; \ + __STDIO_THREADUNLOCK(stream); \ +} \ +void NAME##_unlocked PARAMS + +#define __STDIO_THREADLOCK_OPENLIST \ + pthread_mutex_lock(&_stdio_openlist_lock) + +#define __STDIO_THREADUNLOCK_OPENLIST \ + pthread_mutex_unlock(&_stdio_openlist_lock) + +#define __STDIO_THREADTRYLOCK_OPENLIST \ + pthread_mutex_trylock(&_stdio_openlist_lock) + +#endif /* __STDIO_THREADSAFE */ + +#ifndef __STDIO_BUFFERS +#error stdio buffers are currently required for wide i/o +#endif + +/**********************************************************************/ +#ifdef L_fwide + +/* TODO: According to SUSv3 should return EBADF if invalid stream. */ + +int fwide(register FILE *stream, int mode) +{ + __STDIO_THREADLOCK(stream); + + if (mode && !(stream->modeflags & (__FLAG_WIDE|__FLAG_NARROW))) { + stream->modeflags |= ((mode > 0) ? __FLAG_WIDE : __FLAG_NARROW); + } + + mode = (stream->modeflags & __FLAG_WIDE) + - (stream->modeflags & __FLAG_NARROW); + + __STDIO_THREADUNLOCK(stream); + + return mode; +} + +#endif +/**********************************************************************/ +#ifdef L_fgetwc + +static void munge_stream(register FILE *stream, unsigned char *buf) +{ +#ifdef __STDIO_GETC_MACRO + stream->bufgetc = +#endif +#ifdef __STDIO_PUTC_MACRO + stream->bufputc = +#endif + stream->bufpos = stream->bufread = stream->bufend = stream->bufstart = buf; +} + +UNLOCKED(wint_t,fgetwc,(register FILE *stream),(stream)) +{ + wint_t wi; + wchar_t wc[1]; + int n; + size_t r; + unsigned char c[1]; + unsigned char sbuf[1]; + unsigned char ungot_width; /* Support ftell after wscanf ungetwc. */ + + wi = WEOF; /* Prepare for failure. */ + + if (stream->modeflags & __FLAG_NARROW) { + stream->modeflags |= __FLAG_ERROR; + __set_errno(EBADF); + goto DONE; + } + stream->modeflags |= __FLAG_WIDE; + + if (stream->modeflags & __MASK_UNGOT) {/* Any ungetwc()s? */ + assert( (stream->modeflags & (__FLAG_READING|__FLAG_ERROR)) + == __FLAG_READING); + wi = stream->ungot[(--stream->modeflags) & __MASK_UNGOT]; + stream->ungot[1] = 0; + goto DONE; + } + + if (!stream->bufstart) { /* Ugh... stream isn't buffered! */ + /* Munge the stream temporarily to use a 1-byte buffer. */ + munge_stream(stream, sbuf); + ++stream->bufend; + } + + ungot_width = 0; + + LOOP: + if ((n = stream->bufread - stream->bufpos) == 0) { + goto FILL_BUFFER; + } + + r = mbrtowc(wc, stream->bufpos, n, &stream->state); + if (((ssize_t) r) >= 0) { /* Single byte... */ + if (r == 0) { /* Nul wide char... means 0 byte for us so */ + ++r; /* increment r and handle below as single. */ + } + stream->bufpos += r; + stream->ungot_width[0] = ungot_width + r; + wi = *wc; + goto DONE; + } + + if (r == ((size_t) -2)) { + /* Potentially valid but incomplete and no more buffered. */ + stream->bufpos += n; /* Update bufpos for stream. */ + ungot_width += n; + FILL_BUFFER: + if (_stdio_fread(c, (size_t) 1, stream) > 0) { + assert(stream->bufpos == stream->bufstart + 1); + *--stream->bufpos = *c; /* Insert byte into buffer. */ + goto LOOP; + } + if (!__FERROR(stream)) { /* EOF with no error. */ + if (!stream->state.mask) { /* No partially complete wchar. */ + goto DONE; + } + /* EOF but partially complete wchar. */ + /* TODO: should EILSEQ be set? */ + __set_errno(EILSEQ); + } + } + + /* If we reach here, either r == ((size_t)-1) and mbrtowc set errno + * to EILSEQ, or r == ((size_t)-2) and stream is in an error state + * or at EOF with a partially complete wchar. Make sure stream's + * error indicator is set. */ + stream->modeflags |= __FLAG_ERROR; + + DONE: + if (stream->bufstart == sbuf) { /* Need to un-munge the stream. */ + munge_stream(stream, NULL); + } + + return wi; +} + +strong_alias(fgetwc_unlocked,getwc_unlocked); +strong_alias(fgetwc,getwc); + +#endif +/**********************************************************************/ +#ifdef L_getwchar + +UNLOCKED_STREAM(wint_t,getwchar,(void),(),stdin) +{ + register FILE *stream = stdin; /* This helps bcc optimize. */ + + return fgetwc_unlocked(stream); +} + +#endif +/**********************************************************************/ +#ifdef L_fgetws + +UNLOCKED(wchar_t *,fgetws,(wchar_t *__restrict ws, int n, + FILE *__restrict stream),(ws, n, stream)) +{ + register wchar_t *p = ws; + wint_t wi; + + while ((n > 1) + && ((wi = fgetwc_unlocked(stream)) != WEOF) + && ((*p++ = wi) != '\n') + ) { + --n; + } + if (p == ws) { + /* TODO -- should we set errno? */ +/* if (n <= 0) { */ +/* errno = EINVAL; */ +/* } */ + return NULL; + } + *p = 0; + return ws; +} + +#endif +/**********************************************************************/ +#ifdef L_fputwc + +UNLOCKED(wint_t,fputwc,(wchar_t wc, FILE *stream),(wc, stream)) +{ +#if 0 + size_t r; + char buf[MB_LEN_MAX]; + + if (stream->modeflags & __FLAG_NARROW) { + stream->modeflags |= __FLAG_ERROR; + __set_errno(EBADF); + return WEOF; + } + stream->modeflags |= __FLAG_WIDE; + + /* TODO: + * If stream is in reading state with bad mbstate object, what to do? + * Should we check the state first? Should we check error indicator? + * Should we check reading or even read-only? + */ + /* It looks like the only ANSI/ISO C99 - blessed way of manipulating + * the stream's mbstate object is through fgetpos/fsetpos. */ + r = wcrtomb(buf, wc, &stream->state); + + return (r != ((size_t) -1) && (r == _stdio_fwrite(buf, r, stream))) + ? wc : WEOF; + +#elif 0 + + /* this is broken if wc == 0 !!! */ + wchar_t wbuf[2]; + + wbuf[0] = wc; + wbuf[1] = 0; + + return (fputws_unlocked(wbuf, stream) > 0) ? wc : WEOF; + +#else + + size_t n; + char buf[MB_LEN_MAX]; + + if (stream->modeflags & __FLAG_NARROW) { + stream->modeflags |= __FLAG_ERROR; + __set_errno(EBADF); + return WEOF; + } + stream->modeflags |= __FLAG_WIDE; + + return (((n = wcrtomb(buf, wc, &stream->state)) != ((size_t)-1)) /* EILSEQ */ + && (_stdio_fwrite(buf, n, stream) != n))/* Didn't write everything. */ + ? wc : WEOF; + +#endif +} + +strong_alias(fputwc_unlocked,putwc_unlocked); +strong_alias(fputwc,putwc); + +#endif +/**********************************************************************/ +#ifdef L_putwchar + +UNLOCKED_STREAM(wint_t,putwchar,(wchar_t wc),(wc),stdout) +{ + register FILE *stream = stdout; /* This helps bcc optimize. */ + + return fputwc_unlocked(wc, stream); +} + +#endif +/**********************************************************************/ +#ifdef L_fputws + +UNLOCKED(int,fputws,(const wchar_t *__restrict ws, + register FILE *__restrict stream),(ws, stream)) +{ +#if 1 + size_t n; + char buf[64]; + + if (stream->modeflags & __FLAG_NARROW) { + stream->modeflags |= __FLAG_ERROR; + __set_errno(EBADF); + return -1; + } + stream->modeflags |= __FLAG_WIDE; + + while ((n = wcsrtombs(buf, &ws, sizeof(buf), &stream->state)) != 0) { + /* Wasn't an empty wide string. */ + if ((n == ((size_t) -1))/* Encoding error! */ + || (_stdio_fwrite(buf, n, stream) != n)/* Didn't write everything. */ + ) { + return -1; + } + if (!ws) { /* Done? */ + break; + } + } + + return 1; + + + + +#elif 1 + int result; + size_t n; + size_t len; + register char *s; + unsigned char *bufend; + char sbuf[MB_LEN_MAX]; + + if (stream->modeflags & __FLAG_NARROW) { + RETURN_BADF: + stream->modeflags |= __FLAG_ERROR; + __set_errno(EBADF); + return -1; + } + stream->modeflags |= __FLAG_WIDE; + + /* Note: What follows is setup grabbed from _stdio_fwrite and modified + * slightly. Since this is a wide stream, we can ignore bufgetc and + * bufputc if present. They always == bufstart. + * It is unfortunate that we need to duplicate so much code here, but + * we need to do the stream setup before starting the wc->mb conversion. */ + + if ((stream->modeflags & __FLAG_READONLY) +#ifndef __STDIO_AUTO_RW_TRANSITION + /* ANSI/ISO requires either at EOF or currently not reading. */ + || ((stream->modeflags & (__FLAG_READING|__FLAG_EOF)) + == __FLAG_READING) +#endif /* __STDIO_AUTO_RW_TRANSITION */ + ) { + /* TODO: This is for posix behavior if readonly. To save space, we + * use this errno for write attempt while reading, as no errno is + * specified by posix for this case, even though the restriction is + * mentioned in fopen(). */ + goto RETURN_BADF; + } + +#ifdef __STDIO_AUTO_RW_TRANSITION + /* If reading, deal with ungots and read-buffered chars. */ + if (stream->modeflags & __FLAG_READING) { + if (((stream->bufpos < stream->bufread) + || (stream->modeflags & __MASK_UNGOT)) + /* If appending, we might as well seek to end to save a seek. */ + /* TODO: set EOF in fseek when appropriate? */ + && fseek(stream, 0L, + ((stream->modeflags & __FLAG_APPEND) + ? SEEK_END : SEEK_CUR)) + ) { + /* Note: This differs from glibc's apparent behavior of + not setting the error flag and discarding the buffered + read data. */ + stream->modeflags |= __FLAG_ERROR; /* fseek may not set this. */ + return -1; /* Fail if we need to fseek but can't. */ + } + /* Always reset even if fseek called (saves a test). */ + stream->bufpos = stream->bufread = stream->bufstart; + stream->modeflags &= ~__FLAG_READING; + } +#endif + + /* Ok, the boilerplate from _stdio_fwrite is done. */ + + if (stream->bufpos > stream->bufstart) { /* Pending writes.. */ + /* This is a performance penalty, but it simplifies the code below. + * If this is removed, the buffer sharing and while loop condition + * need to be modified below (at least). We at least save a little + * on the overhead by calling _stdio_fwrite directly instead of + * fflush_unlocked. */ + if (_stdio_fwrite(NULL, 0, stream) > 0) {/* fflush incomplete! */ + return -1; + } + } + + stream->modeflags |= __FLAG_WRITING; /* Ensure Writing flag is set. */ + + /* Next, we "steal" the stream's buffer and do the wc->mb conversion + * straight into it. This will cause the equivalent of an fflush + * for each string write. :-( */ + + bufend = NULL; + s = stream->bufstart; + + if ((len = stream->bufend - stream->bufstart) < MB_LEN_MAX) { + /* Stream is unbuffered or buffer is too small, so deactivate. */ + bufend = stream->bufend; + stream->bufend = stream->bufstart; + s = sbuf; + len = MB_LEN_MAX; + } + + result = 1; /* Assume success. */ + while (ws && (n = wcsrtombs(s, &ws, len, &stream->state)) != 0) { + if ((n == ((size_t) -1)) /* Encoding error! */ + /* TODO - maybe call write directly? but what about custom streams? */ + || (_stdio_fwrite(s, n, stream) != n)/* Didn't write everything. */ + ) { + result = -1; + break; + } + } + + if (bufend) { /* If deactivated stream buffer, renable it. */ + stream->bufend = bufend; + } + + return result; + +#else /* slow, dumb version */ + while (*ws) { + if (fputwc_unlocked(*ws, stream) == WEOF) { + return -1; + } + ++ws; + } + return 1; +#endif +} + +#endif +/**********************************************************************/ +#ifdef L_ungetwc +/* + * Note: This is the application-callable ungetwc. If wscanf calls this, it + * should also set stream->ungot[1] to 0 if this is the only ungot. + */ + +/* Reentrant. */ + +wint_t ungetwc(wint_t c, register FILE *stream) +{ + __STDIO_THREADLOCK(stream); + + __stdio_validate_FILE(stream); /* debugging only */ + + if (stream->modeflags & __FLAG_NARROW) { + stream->modeflags |= __FLAG_ERROR; + c = WEOF; + goto DONE; + } + stream->modeflags |= __FLAG_WIDE; + + /* If can't read or there's been an error, or c == EOF, or ungot slots + * already filled, then return EOF */ + if ((stream->modeflags + & (__MASK_UNGOT2|__FLAG_WRITEONLY +#ifndef __STDIO_AUTO_RW_TRANSITION + |__FLAG_WRITING /* Note: technically no, but yes in spirit */ +#endif /* __STDIO_AUTO_RW_TRANSITION */ + )) + || ((stream->modeflags & __MASK_UNGOT1) && (stream->ungot[1])) + || (c == WEOF) ) { + c = WEOF; + goto DONE;; + } + +/* ungot_width */ + +#ifdef __STDIO_BUFFERS + /* TODO: shouldn't allow writing??? */ + if (stream->modeflags & __FLAG_WRITING) { + fflush_unlocked(stream); /* Commit any write-buffered chars. */ + } +#endif /* __STDIO_BUFFERS */ + + /* Clear EOF and WRITING flags, and set READING FLAG */ + stream->modeflags &= ~(__FLAG_EOF|__FLAG_WRITING); + stream->modeflags |= __FLAG_READING; + stream->ungot[1] = 1; /* Flag as app ungetc call; wscanf fixes up. */ + stream->ungot[(stream->modeflags++) & __MASK_UNGOT] = c; + + __stdio_validate_FILE(stream); /* debugging only */ + + DONE: + __STDIO_THREADUNLOCK(stream); + + return c; +} + +#endif +/**********************************************************************/ diff --git a/libc/misc/wctype/wctype.c b/libc/misc/wctype/wctype.c index 39ed2cfd5..802e979f1 100644 --- a/libc/misc/wctype/wctype.c +++ b/libc/misc/wctype/wctype.c @@ -34,6 +34,7 @@ #include #include #include +#include /* We know wide char support is enabled. We wouldn't be here otherwise. */ @@ -41,7 +42,9 @@ * towctrans function. */ /* #define SMALL_UPLOW */ +#ifndef __LOCALE_C_ONLY #define __WCTYPE_WITH_LOCALE +#endif /**********************************************************************/ @@ -292,6 +295,10 @@ wctype_t wctype(const char *property) /**********************************************************************/ #ifdef L_iswctype +#warning duh... replace the range-based classification with table lookup! + +#ifdef __WCTYPE_WITH_LOCALE + #warning TODO: need to fix locale ctype table lookup stuff #if 0 extern const char ctype_range[]; @@ -346,6 +353,102 @@ int iswctype(wint_t wc, wctype_t desc) return 0; } +#else + +static const unsigned char WCctype[] = { + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_space_blank << 4), + __CTYPE_cntrl_space_nonblank | (__CTYPE_cntrl_space_nonblank << 4), + __CTYPE_cntrl_space_nonblank | (__CTYPE_cntrl_space_nonblank << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_cntrl_nonspace | (__CTYPE_cntrl_nonspace << 4), + __CTYPE_print_space_blank | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_digit | (__CTYPE_digit << 4), + __CTYPE_digit | (__CTYPE_digit << 4), + __CTYPE_digit | (__CTYPE_digit << 4), + __CTYPE_digit | (__CTYPE_digit << 4), + __CTYPE_digit | (__CTYPE_digit << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_alpha_upper << 4), + __CTYPE_alpha_upper | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_alpha_lower << 4), + __CTYPE_alpha_lower | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_punct << 4), + __CTYPE_punct | (__CTYPE_cntrl_nonspace << 4), +}; + +static const char ctype_range[] = { + __CTYPE_RANGES +}; + +int iswctype(wint_t wc, wctype_t desc) +{ + unsigned char d = __CTYPE_unclassified; + + if (((__uwchar_t) wc) <= 0x7f) { + if (desc < _CTYPE_iswxdigit) { + d = WCctype[wc >> 1]; + d = (wc & 1) ? (d >> 4) : (d & 0xf); + + return ( ((unsigned char)(d - ctype_range[2*desc])) + <= ctype_range[2*desc + 1] ) + && ((desc != _CTYPE_iswblank) || (d & 1)); + } + + if (desc == _CTYPE_iswxdigit) { + return __C_isxdigit(((char) wc)); + } + } + return 0; +} + +#endif + #endif /**********************************************************************/ #ifdef L_towctrans @@ -446,10 +549,26 @@ wint_t towctrans(wint_t wc, wctrans_t desc) #endif -#else +#else /* __WCTYPE_WITH_LOCALE */ +/* Minimal support for C/POSIX locale. */ -#endif +wint_t towctrans(wint_t wc, wctrans_t desc) +{ + if (((unsigned int)(desc - _CTYPE_tolower)) + <= (_CTYPE_toupper - _CTYPE_tolower) + ) { + /* Transliteration is either tolower or toupper. */ + if (((__uwchar_t) wc) <= 0x7f) { + return (desc == _CTYPE_tolower) ? _tolower(wc) : _toupper(wc); + } + } else { + __set_errno(EINVAL); /* Invalid transliteration. */ + } + return wc; +} + +#endif /* __WCTYPE_WITH_LOCALE */ #endif /**********************************************************************/ -- cgit v1.2.3