summaryrefslogtreecommitdiff
path: root/libc/misc/wchar
diff options
context:
space:
mode:
authorManuel Novoa III <mjn3@codepoet.org>2003-08-01 20:08:59 +0000
committerManuel Novoa III <mjn3@codepoet.org>2003-08-01 20:08:59 +0000
commit1217289737588e65b088b3535428b27c7287d699 (patch)
tree6a292ac767d219702e26a6a2111737f84a96900c /libc/misc/wchar
parent32b76c5ec3c257b7287913d0d1a96e0cbb2e9c6a (diff)
Add a new *scanf implementation, includeing the *wscanf functions.
Should be standards compliant and with several optional features, including support for hexadecimal float notation, locale awareness, glibc-like locale-specific digit grouping with the `'' flag, and positional arg support. I tested it pretty well (finding several bugs in glibc's scanf in the process), but it is brand new so be aware. The *wprintf functions now support floating point output. Also, a couple of bugs were squashed. Finally, %a/%A conversions are now implemented. Implement the glibc xlocale interface for thread-specific locale support. Also add the various *_l(args, locale_t loc_arg) funcs. NOTE!!! setlocale() is NOT threadsafe! NOTE!!! The strto{floating point} conversion functions are now locale aware. The also now support hexadecimal floating point notation. Add the wcsto{floating point} conversion functions. Fix a bug in mktime() related to dst. Note that unlike glibc's mktime, uClibc's version always normalizes the struct tm before attempting to determine the correct dst setting if tm_isdst == -1 on entry. Add a stub version of the libintl functions. (untested) Fixed a known memory leak in setlocale() related to the collation data. Add lots of new config options (which Erik agreed to sort out :-), including finally exposing some of the stripped down stdio configs. Be careful with those though, as they haven't been tested in a long time. (temporary) GOTCHAs... The ctype functions are currently incorrect for 8-bit locales. They will be fixed shortly. The ctype functions are now table-based, resulting in larger staticly linked binaries. I'll be adding an option to use the old approach in the stub locale configuration.
Diffstat (limited to 'libc/misc/wchar')
-rw-r--r--libc/misc/wchar/Makefile1
-rw-r--r--libc/misc/wchar/wchar.c111
-rw-r--r--libc/misc/wchar/wstdio.c37
3 files changed, 102 insertions, 47 deletions
diff --git a/libc/misc/wchar/Makefile b/libc/misc/wchar/Makefile
index acc852195..4a3e2fc7a 100644
--- a/libc/misc/wchar/Makefile
+++ b/libc/misc/wchar/Makefile
@@ -42,7 +42,6 @@ MOBJ2= fwide.o \
# getwc (fgetwc alias) getwc_unlocked (fgetwc_unlocked alias)
# putwc (fputwc alias) putwc_unlocked (fputwc_unlocked alias)
-# fwscanf wscanf swscanf vfwscanf vwscanf vswscanf
# wcsftime
OBJS=$(MOBJ1) $(MOBJ2)
diff --git a/libc/misc/wchar/wchar.c b/libc/misc/wchar/wchar.c
index ff3e42a84..aa0c2735a 100644
--- a/libc/misc/wchar/wchar.c
+++ b/libc/misc/wchar/wchar.c
@@ -86,6 +86,9 @@
* Add a couple of ugly hacks to support *wprintf.
* Add a mini iconv() and iconv implementation (requires locale support).
*
+ * Aug 1, 2003
+ * Bug fix for mbrtowc.
+ *
* Manuel
*/
@@ -101,13 +104,39 @@
#include <assert.h>
#include <locale.h>
#include <wchar.h>
+#include <bits/uClibc_uwchar.h>
+/**********************************************************************/
#ifdef __UCLIBC_HAS_LOCALE__
-#define ENCODING (__global_locale.encoding)
+#ifdef __UCLIBC_MJN3_ONLY__
+#ifdef L_iswspace
+/* generates one warning */
+#warning TODO: Fix Cc2wc* and Cwc2c* defines!
+#endif
+#endif /* __UCLIBC_MJN3_ONLY__ */
+
+#define ENCODING ((__UCLIBC_CURLOCALE_DATA).encoding)
+
+#define Cc2wc_IDX_SHIFT __LOCALE_DATA_Cc2wc_IDX_SHIFT
+#define Cc2wc_ROW_LEN __LOCALE_DATA_Cc2wc_ROW_LEN
+#define Cwc2c_DOMAIN_MAX __LOCALE_DATA_Cwc2c_DOMAIN_MAX
+#define Cwc2c_TI_SHIFT __LOCALE_DATA_Cwc2c_TI_SHIFT
+#define Cwc2c_TT_SHIFT __LOCALE_DATA_Cwc2c_TT_SHIFT
+#define Cwc2c_TI_LEN __LOCALE_DATA_Cwc2c_TI_LEN
+
#ifndef __CTYPE_HAS_UTF_8_LOCALES
#warning __CTYPE_HAS_UTF_8_LOCALES not set!
#endif
-#else
+
+#else /* __UCLIBC_HAS_LOCALE__ */
+
+#ifdef __UCLIBC_MJN3_ONLY__
+#ifdef L_btowc
+/* emit only once */
+#warning fix preprocessor logic testing locale settings
+#endif
+#endif
+
#define ENCODING (__ctype_encoding_7_bit)
#ifdef __CTYPE_HAS_8_BIT_LOCALES
#error __CTYPE_HAS_8_BIT_LOCALES is defined!
@@ -117,7 +146,9 @@
#endif
#undef L__wchar_utf8sntowcs
#undef L__wchar_wcsntoutf8s
-#endif
+
+#endif /* __UCLIBC_HAS_LOCALE__ */
+/**********************************************************************/
#if WCHAR_MAX > 0xffffUL
#define UTF_8_MAX_LEN 6
@@ -266,11 +297,18 @@ size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
#ifdef __CTYPE_HAS_UTF_8_LOCALES
/* Need to do this here since mbsrtowcs doesn't allow incompletes. */
if (ENCODING == __ctype_encoding_utf8) {
+ if (!pwc) {
+ pwc = wcbuf;
+ }
r = _wchar_utf8sntowcs(pwc, 1, &p, n, ps, 1);
return (r == 1) ? (p-s) : r; /* Need to return 0 if nul char. */
}
#endif
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning TODO: This adds a trailing nul!
+#endif /* __UCLIBC_MJN3_ONLY__ */
+
r = __mbsnrtowcs(wcbuf, &p, SIZE_MAX, 1, ps);
if (((ssize_t) r) >= 0) {
@@ -291,7 +329,10 @@ size_t mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
size_t wcrtomb(register char *__restrict s, wchar_t wc,
mbstate_t *__restrict ps)
{
- wchar_t wcbuf[2];
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning TODO: Should wcsnrtombs nul-terminate unconditionally? Check glibc.
+#endif /* __UCLIBC_MJN3_ONLY__ */
+ wchar_t wcbuf[1];
const wchar_t *pwc;
size_t r;
char buf[MB_LEN_MAX];
@@ -303,9 +344,8 @@ size_t wcrtomb(register char *__restrict s, wchar_t wc,
pwc = wcbuf;
wcbuf[0] = wc;
- wcbuf[1] = 0;
- r = __wcsnrtombs(s, &pwc, SIZE_MAX, MB_LEN_MAX, ps);
+ r = __wcsnrtombs(s, &pwc, 1, MB_LEN_MAX, ps);
return (r != 0) ? r : 1;
}
@@ -418,7 +458,7 @@ size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
if ((wc = ((unsigned char) *s++)) >= 0x80) { /* Not ASCII... */
mask = 0x40;
#ifdef __UCLIBC_MJN3_ONLY__
-#warning fix range for 16 bit wides
+#warning TODO: Fix range for 16 bit wchar_t case.
#endif
if ( ((unsigned char)(s[-1] - 0xc0)) < (0xfe - 0xc0) ) {
goto START;
@@ -495,7 +535,6 @@ size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
COMPLETE:
*pwc = wc;
pwc += incr;
-
}
#ifdef DECODER
while (--count);
@@ -684,8 +723,8 @@ size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
while (count) {
if ((wc = ((unsigned char)(*s))) >= 0x80) { /* Non-ASCII... */
wc -= 0x80;
- wc = __global_locale.tbl8c2wc[
- (__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
+ wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
+ (__UCLIBC_CURLOCALE_DATA.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
<< Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
if (!wc) {
goto BAD;
@@ -797,12 +836,12 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
} else {
u = 0;
if (wc <= Cwc2c_DOMAIN_MAX) {
- u = __global_locale.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
+ u = __UCLIBC_CURLOCALE_DATA.idx8wc2c[wc >> (Cwc2c_TI_SHIFT
+ Cwc2c_TT_SHIFT)];
- u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
+ u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
+ ((wc >> Cwc2c_TT_SHIFT)
& ((1 << Cwc2c_TI_SHIFT)-1))];
- u = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
+ u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
+ (u << Cwc2c_TT_SHIFT)
+ (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
}
@@ -859,7 +898,8 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
#ifdef L_wcswidth
#ifdef __UCLIBC_MJN3_ONLY__
-#warning if we start doing translit, wcwidth and wcswidth will need updating.
+#warning REMINDER: If we start doing translit, wcwidth and wcswidth will need updating.
+#warning TODO: Update wcwidth to match latest by Kuhn.
#endif
#if defined(__UCLIBC_HAS_LOCALE__) && \
@@ -1163,7 +1203,7 @@ enum {
*
*/
-const unsigned char codesets[] =
+const unsigned char __iconv_codesets[] =
"\x0a\xe0""WCHAR_T\x00" /* superset of UCS-4 but platform-endian */
#if __BYTE_ORDER == __BIG_ENDIAN
"\x08\xec""UCS-4\x00" /* always BE */
@@ -1201,7 +1241,7 @@ static int find_codeset(const char *name)
const unsigned char *s;
int codeset;
- for (s = codesets ; *s ; s += *s) {
+ for (s = __iconv_codesets ; *s ; s += *s) {
if (!strcasecmp(s+2, name)) {
return s[1];
}
@@ -1212,10 +1252,10 @@ static int find_codeset(const char *name)
/* TODO: maybe CODESET_LIST + *s ??? */
/* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
codeset = 2;
- s = CODESET_LIST;
+ s = __LOCALE_DATA_CODESET_LIST;
do {
++codeset; /* Increment codeset first. */
- if (!strcasecmp(CODESET_LIST+*s, name)) {
+ if (!strcasecmp(__LOCALE_DATA_CODESET_LIST+*s, name)) {
return codeset;
}
} while (*++s);
@@ -1223,7 +1263,7 @@ static int find_codeset(const char *name)
return 0; /* No matching codeset! */
}
-iconv_t iconv_open(const char *tocode, const char *fromcode)
+iconv_t weak_function iconv_open(const char *tocode, const char *fromcode)
{
register _UC_iconv_t *px;
int tocodeset, fromcodeset;
@@ -1244,16 +1284,17 @@ iconv_t iconv_open(const char *tocode, const char *fromcode)
return (iconv_t)(-1);
}
-int iconv_close(iconv_t cd)
+int weak_function iconv_close(iconv_t cd)
{
free(cd);
return 0;
}
-size_t iconv(iconv_t cd, char **__restrict inbuf,
- size_t *__restrict inbytesleft,
- char **__restrict outbuf, size_t *__restrict outbytesleft)
+size_t weak_function iconv(iconv_t cd, char **__restrict inbuf,
+ size_t *__restrict inbytesleft,
+ char **__restrict outbuf,
+ size_t *__restrict outbytesleft)
{
_UC_iconv_t *px = (_UC_iconv_t *) cd;
size_t nrcount, r;
@@ -1362,9 +1403,9 @@ size_t iconv(iconv_t cd, char **__restrict inbuf,
return (size_t)(-1);
}
#ifdef __UCLIBC_MJN3_ONLY__
-#warning optimize this
+#warning TODO: optimize this.
#endif
- if (p != NULL) { /* incomplet char case */
+ if (p != NULL) { /* incomplete char case */
goto INVALID;
}
p = *inbuf + 1; /* nul */
@@ -1374,10 +1415,10 @@ size_t iconv(iconv_t cd, char **__restrict inbuf,
if (px->fromcodeset == IC_ASCII) { /* US-ASCII codeset */
goto ILLEGAL;
} else { /* some other 8-bit ascii-extension codeset */
- const codeset_8_bit_t *c8b
+ const __codeset_8_bit_t *c8b
= __locale_mmap->codeset_8_bit + px->fromcodeset - 3;
wc -= 0x80;
- wc = __global_locale.tbl8c2wc[
+ wc = __UCLIBC_CURLOCALE_DATA.tbl8c2wc[
(c8b->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
<< Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
if (!wc) {
@@ -1439,7 +1480,7 @@ size_t iconv(iconv_t cd, char **__restrict inbuf,
r = _wchar_wcsntoutf8s(*outbuf, *outbytesleft, &pw, 1);
if (r != (size_t)(-1)) {
#ifdef __UCLIBC_MJN3_ONLY__
-#warning what happens for a nul?
+#warning TODO: What happens for a nul?
#endif
if (r == 0) {
if (wc != 0) {
@@ -1458,14 +1499,14 @@ size_t iconv(iconv_t cd, char **__restrict inbuf,
**outbuf = wc;
} else {
if ((px->tocodeset != 0x01) && (wc <= Cwc2c_DOMAIN_MAX)) {
- const codeset_8_bit_t *c8b
+ const __codeset_8_bit_t *c8b
= __locale_mmap->codeset_8_bit + px->tocodeset - 3;
__uwchar_t u;
u = c8b->idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)];
- u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
+ u = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
+ ((wc >> Cwc2c_TT_SHIFT)
& ((1 << Cwc2c_TI_SHIFT)-1))];
- wc = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
+ wc = __UCLIBC_CURLOCALE_DATA.tbl8wc2c[Cwc2c_TI_LEN
+ (u << Cwc2c_TT_SHIFT)
+ (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
if (wc) {
@@ -1497,7 +1538,7 @@ size_t iconv(iconv_t cd, char **__restrict inbuf,
#include <stdarg.h>
#include <libgen.h>
-extern const unsigned char codesets[];
+extern const unsigned char __iconv_codesets[];
#define IBUF BUFSIZ
#define OBUF BUFSIZ
@@ -1572,12 +1613,12 @@ int main(int argc, char **argv)
if (opts[5]) { /* -l */
fprintf(stderr, "Recognized codesets:\n");
- for (s = codesets ; *s ; s += *s) {
+ for (s = __iconv_codesets ; *s ; s += *s) {
fprintf(stderr," %s\n", s+2);
}
- s = CODESET_LIST;
+ s = __LOCALE_DATA_CODESET_LIST;
do {
- fprintf(stderr," %s\n", CODESET_LIST+ (unsigned char)(*s));
+ fprintf(stderr," %s\n", __LOCALE_DATA_CODESET_LIST+ (unsigned char)(*s));
} while (*++s);
return EXIT_SUCCESS;
diff --git a/libc/misc/wchar/wstdio.c b/libc/misc/wchar/wstdio.c
index dfeb35c30..1069ee938 100644
--- a/libc/misc/wchar/wstdio.c
+++ b/libc/misc/wchar/wstdio.c
@@ -171,7 +171,6 @@ UNLOCKED(wint_t,fgetwc,(register FILE *stream),(stream))
size_t r;
unsigned char c[1];
unsigned char sbuf[1];
- unsigned char ungot_width; /* Support ftell after wscanf ungetwc. */
wi = WEOF; /* Prepare for failure. */
@@ -183,8 +182,18 @@ UNLOCKED(wint_t,fgetwc,(register FILE *stream),(stream))
stream->modeflags |= __FLAG_WIDE;
if (stream->modeflags & __MASK_UNGOT) {/* Any ungetwc()s? */
- assert( (stream->modeflags & (__FLAG_READING|__FLAG_ERROR))
- == __FLAG_READING);
+
+ assert(stream->modeflags & __FLAG_READING);
+
+/* assert( (stream->modeflags & (__FLAG_READING|__FLAG_ERROR)) */
+/* == __FLAG_READING); */
+
+ if ((((stream->modeflags & __MASK_UNGOT) > 1) || stream->ungot[1])) {
+ stream->ungot_width[0] = 0; /* Application ungot... */
+ } else {
+ stream->ungot_width[0] = stream->ungot_width[1]; /* scanf ungot */
+ }
+
wi = stream->ungot[(--stream->modeflags) & __MASK_UNGOT];
stream->ungot[1] = 0;
goto DONE;
@@ -196,7 +205,9 @@ UNLOCKED(wint_t,fgetwc,(register FILE *stream),(stream))
++stream->bufend;
}
- ungot_width = 0;
+ if (stream->state.mask == 0) { /* If last was a complete char */
+ stream->ungot_width[0] = 0; /* then reset the width. */
+ }
LOOP:
if ((n = stream->bufread - stream->bufpos) == 0) {
@@ -204,12 +215,12 @@ UNLOCKED(wint_t,fgetwc,(register FILE *stream),(stream))
}
r = mbrtowc(wc, stream->bufpos, n, &stream->state);
- if (((ssize_t) r) >= 0) { /* Single byte... */
+ if (((ssize_t) r) >= 0) { /* Success... */
if (r == 0) { /* Nul wide char... means 0 byte for us so */
++r; /* increment r and handle below as single. */
}
stream->bufpos += r;
- stream->ungot_width[0] = ungot_width + r;
+ stream->ungot_width[0] += r;
wi = *wc;
goto DONE;
}
@@ -217,7 +228,7 @@ UNLOCKED(wint_t,fgetwc,(register FILE *stream),(stream))
if (r == ((size_t) -2)) {
/* Potentially valid but incomplete and no more buffered. */
stream->bufpos += n; /* Update bufpos for stream. */
- ungot_width += n;
+ stream->ungot_width[0] += n;
FILL_BUFFER:
if (_stdio_fread(c, (size_t) 1, stream) > 0) {
assert(stream->bufpos == stream->bufstart + 1);
@@ -371,7 +382,8 @@ UNLOCKED(int,fputws,(const wchar_t *__restrict ws,
#ifdef L_ungetwc
/*
* Note: This is the application-callable ungetwc. If wscanf calls this, it
- * should also set stream->ungot[1] to 0 if this is the only ungot.
+ * should also set stream->ungot[1] to 0 if this is the only ungot, as well
+ * as reset stream->ungot_width[1] for use by _stdio_adjpos().
*/
/* Reentrant. */
@@ -389,8 +401,7 @@ wint_t ungetwc(wint_t c, register FILE *stream)
}
stream->modeflags |= __FLAG_WIDE;
- /* If can't read or there's been an error, or c == EOF, or ungot slots
- * already filled, then return EOF */
+ /* If can't read or c == WEOF or ungot slots already filled, then fail. */
if ((stream->modeflags
& (__MASK_UNGOT2|__FLAG_WRITEONLY
#ifndef __STDIO_AUTO_RW_TRANSITION
@@ -406,14 +417,18 @@ wint_t ungetwc(wint_t c, register FILE *stream)
/* ungot_width */
#ifdef __STDIO_BUFFERS
- /* TODO: shouldn't allow writing??? */
+#ifdef __STDIO_AUTO_RW_TRANSITION
if (stream->modeflags & __FLAG_WRITING) {
fflush_unlocked(stream); /* Commit any write-buffered chars. */
}
+#endif /* __STDIO_AUTO_RW_TRANSITION */
#endif /* __STDIO_BUFFERS */
/* Clear EOF and WRITING flags, and set READING FLAG */
stream->modeflags &= ~(__FLAG_EOF|__FLAG_WRITING);
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning CONSIDER: Is setting the reading flag after an ungetwc necessary?
+#endif /* __UCLIBC_MJN3_ONLY__ */
stream->modeflags |= __FLAG_READING;
stream->ungot[1] = 1; /* Flag as app ungetc call; wscanf fixes up. */
stream->ungot[(stream->modeflags++) & __MASK_UNGOT] = c;