summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Novoa III <mjn3@codepoet.org>2002-11-04 21:27:46 +0000
committerManuel Novoa III <mjn3@codepoet.org>2002-11-04 21:27:46 +0000
commita854cf512abbcf96e0950ff776f11a0ce3829840 (patch)
treed3c2c43d8e590aeb542a2f4e96f952d3e80034ac
parent44835a918f93196eb994d3c0d92bfa4aa2d4aef6 (diff)
Add printf wchar support for %lc (%C) and %ls (%S).
Require printf format strings to be valid multibyte strings beginning and ending in their initial shift state, as per the stds. Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL. Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in order to support %ls in printf. See comments below for details. Change behaviour of wc<->mb functions when in the C locale. Now they do a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility and consistency with the stds requirements that a printf format string by a valid multibyte string beginning and ending in it's initial shift state.
-rw-r--r--libc/misc/wchar/wchar.c56
-rw-r--r--libc/stdio/printf.c113
2 files changed, 140 insertions, 29 deletions
diff --git a/libc/misc/wchar/wchar.c b/libc/misc/wchar/wchar.c
index cb24f069e..6bdc7c068 100644
--- a/libc/misc/wchar/wchar.c
+++ b/libc/misc/wchar/wchar.c
@@ -58,6 +58,16 @@
* Enabled building of a C/POSIX-locale-only version, so full locale support
* no longer needs to be enabled.
*
+ * Nov 4, 2002
+ *
+ * Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
+ * Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
+ * order to support %ls in printf. See comments below for details.
+ * Change behaviour of wc<->mb functions when in the C locale. Now they do
+ * a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
+ * and consistency with the stds requirements that a printf format string by
+ * a valid multibyte string beginning and ending in it's initial shift state.
+ *
* Manuel
*/
@@ -481,9 +491,19 @@ size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
char m;
store = 1;
- if (!s) {
- s = buf;
- n = SIZE_MAX;
+ /* NOTE: The following is an AWFUL HACK! In order to support %ls in
+ * printf, we need to be able to compute the number of bytes needed
+ * for the mbs conversion, not to exceed the precision specified.
+ * But if dst is NULL, the return value is the length assuming a
+ * sufficiently sized buffer. So, we allow passing of (char *) src
+ * as dst in order to flag that we really want the length, subject
+ * to the restricted buffer size and no partial conversions.
+ * See wcsnrtombs() as well. */
+ if (!s || (s == ((char *) src))) {
+ if (!s) {
+ n = SIZE_MAX;
+ }
+ s = buf;
store = 0;
}
@@ -553,7 +573,9 @@ size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
}
}
- *src = (const wchar_t *) swc;
+ if (store) {
+ *src = (const wchar_t *) swc;
+ }
return n - t;
}
@@ -614,7 +636,8 @@ size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
(__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
<< Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
if (!wc) {
- goto BAD;
+ __set_errno(EILSEQ);
+ return (size_t) -1;
}
}
if (!(*dst = wc)) {
@@ -641,13 +664,6 @@ size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
s = NULL;
break;
}
- if (*dst >= 0x80) {
-#ifdef __CTYPE_HAS_8_BIT_LOCALES
- BAD:
-#endif
- __set_errno(EILSEQ);
- return (size_t) -1;
- }
++s;
dst += incr;
--count;
@@ -686,9 +702,19 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
#endif /* __CTYPE_HAS_UTF_8_LOCALES */
incr = 1;
- if (!dst) {
+ /* NOTE: The following is an AWFUL HACK! In order to support %ls in
+ * printf, we need to be able to compute the number of bytes needed
+ * for the mbs conversion, not to exceed the precision specified.
+ * But if dst is NULL, the return value is the length assuming a
+ * sufficiently sized buffer. So, we allow passing of (char *) src
+ * as dst in order to flag that we really want the length, subject
+ * to the restricted buffer size and no partial conversions.
+ * See _wchar_wcsntoutf8s() as well. */
+ if (!dst || (dst == ((char *) src))) {
+ if (!dst) {
+ len = SIZE_MAX;
+ }
dst = buf;
- len = SIZE_MAX;
incr = 0;
}
@@ -749,7 +775,7 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
#endif
while (count) {
- if (*s >= 0x80) {
+ if (*s > UCHAR_MAX) {
#if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
BAD:
#endif
diff --git a/libc/stdio/printf.c b/libc/stdio/printf.c
index 88f248a75..ddf282e7e 100644
--- a/libc/stdio/printf.c
+++ b/libc/stdio/printf.c
@@ -31,25 +31,45 @@
*
* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
-/* 4-01-2002
+
+/* April 1, 2002
* Initialize thread locks for fake files in vsnprintf and vdprintf.
* reported by Erik Andersen (andersen@codepoet.com)
* Fix an arg promotion handling bug in _do_one_spec for %c.
* reported by Ilguiz Latypov <ilatypov@superbt.com>
*
- * 5-10-2002
+ * May 10, 2002
* Remove __isdigit and use new ctype.h version.
* Add conditional setting of QUAL_CHARS for size_t and ptrdiff_t.
*
- * 8-16-2002
+ * Aug 16, 2002
* Fix two problems that showed up with the python 2.2.1 tests; one
* involving %o and one involving %f.
*
- * 10-28-2002
+ * Oct 28, 2002
* Fix a problem in vasprintf (reported by vodz a while back) when built
* without custom stream support. In that case, it is necessary to do
* a va_copy.
* Make sure each va_copy has a matching va_end, as required by C99.
+ *
+ * Nov 4, 2002
+ * Add locale-specific grouping support for integer decimal conversion.
+ * Add locale-specific decimal point support for floating point conversion.
+ * Note: grouping will have to wait for _dtostr() rewrite.
+ * Add printf wchar support for %lc (%C) and %ls (%S).
+ * Require printf format strings to be valid multibyte strings beginning and
+ * ending in their initial shift state, as per the stds.
+ */
+
+/* TODO:
+ *
+ * Should we validate that *printf format strings are valid multibyte
+ * strings in the current locale? ANSI/ISO C99 seems to imply this
+ * and Plauger's printf implementation in his Standard C Library book
+ * treats this as an error.
+ *
+ * Implement %a, %A, and locale-specific grouping for the printf floating
+ * point conversions. To be done in the rewrite of _dtostr().
*/
@@ -75,6 +95,10 @@
#include <pthread.h>
#endif /* __STDIO_THREADSAFE */
+#ifdef __UCLIBC_HAS_WCHAR__
+#include <wchar.h>
+#endif /* __UCLIBC_HAS_WCHAR__ */
+
/**********************************************************************/
/* These provide some control over printf's feature set */
@@ -335,7 +359,7 @@ typedef struct {
extern size_t _dtostr(FILE * fp, long double x, struct printf_info *info);
#endif
-#define _outnstr(stream, string, len) _stdio_fwrite(s, len, stream) /* TODO */
+#define _outnstr(stream, string, len) _stdio_fwrite(string, len, stream) /* TODO */
extern int _do_one_spec(FILE * __restrict stream, ppfs_t *ppfs, int *count);
@@ -431,7 +455,7 @@ int vfprintf(FILE * __restrict stream, register const char * __restrict format,
s = format;
if (_ppfs_init(&ppfs, format) < 0) { /* Bad format string. */
- _outnstr(stream, format, strlen(format));
+ _outnstr(stream, ppfs.fmtpos, strlen(ppfs.fmtpos));
count = -1;
} else {
_ppfs_prepargs(&ppfs, arg); /* This did a va_copy!!! */
@@ -481,11 +505,29 @@ int vfprintf(FILE * __restrict stream, register const char * __restrict format,
int _ppfs_init(register ppfs_t *ppfs, const char *fmt0)
{
+#if defined(__UCLIBC_HAS_WCHAR__) && defined(__UCLIBC_HAS_LOCALE__)
+ static const char invalid_mbs[] = "Invalid multibyte format string.";
+#endif /* defined(__UCLIBC_HAS_WCHAR__) && defined(__UCLIBC_HAS_LOCALE__) */
int r;
/* First, zero out everything... argnumber[], argtype[], argptr[] */
memset(ppfs, 0, sizeof(ppfs_t)); /* TODO: nonportable???? */
--ppfs->maxposarg; /* set to -1 */
+ ppfs->fmtpos = fmt0;
+#if defined(__UCLIBC_HAS_WCHAR__) && defined(__UCLIBC_HAS_LOCALE__)
+ /* Note: We don't need to check if we don't have wide chars or we only
+ * support the C locale. */
+ {
+ mbstate_t mbstate;
+ const char *p;
+ mbstate.mask = 0; /* Initialize the mbstate. */
+ p = fmt0;
+ if (mbsrtowcs(NULL, &p, SIZE_MAX, &mbstate) == ((size_t)(-1))) {
+ ppfs->fmtpos = invalid_mbs;
+ return -1;
+ }
+ }
+#endif /* defined(__UCLIBC_HAS_WCHAR__) && defined(__UCLIBC_HAS_LOCALE__) */
/* now set all argtypes to no-arg */
{
#if 1
@@ -1098,6 +1140,10 @@ int _do_one_spec(FILE * __restrict stream, register ppfs_t *ppfs, int *count)
const void * argptr[MAX_ARGS_PER_SPEC];
#endif
int *argtype;
+#ifdef __UCLIBC_HAS_WCHAR__
+ const wchar_t *ws = NULL;
+ mbstate_t mbstate;
+#endif /* __UCLIBC_HAS_WCHAR__ */
size_t slen;
int base;
int numpad;
@@ -1223,18 +1269,39 @@ int _do_one_spec(FILE * __restrict stream, register ppfs_t *ppfs, int *count)
&ppfs->info);
return 0;
#else /* __STDIO_PRINTF_FLOAT */
- return -1; /* TODO -- try ton continue? */
+ return -1; /* TODO -- try to continue? */
#endif /* __STDIO_PRINTF_FLOAT */
} else if (ppfs->conv_num <= CONV_S) { /* wide char or string */
-#if 1
- return -1; /* TODO -- wide */
-#else
+#ifdef __UCLIBC_HAS_WCHAR__
+ mbstate.mask = 0; /* Initialize the mbstate. */
if (ppfs->conv_num == CONV_S) { /* wide string */
-
+ if (!(ws = *((const wchar_t **) *argptr))) {
+ goto NULL_STRING;
+ }
+ /* We use an awful uClibc-specific hack here, passing
+ * (char*) &ws as the conversion destination. This signals
+ * uClibc's wcsrtombs that we want a "restricted" length
+ * such that the mbs fits in a buffer of the specified
+ * size with no partial conversions. */
+ if ((slen = wcsrtombs((char *) &ws, &ws, /* Use awful hack! */
+ ((ppfs->info.prec >= 0)
+ ? ppfs->info.prec
+ : SIZE_MAX), &mbstate))
+ == ((size_t)-1)
+ ) {
+ return -1; /* EILSEQ */
+ }
} else { /* wide char */
-
+ s = buf;
+ slen = wcrtomb(s, (*((const wchar_t *) *argptr)), &mbstate);
+ if (slen == ((size_t)-1)) {
+ return -1; /* EILSEQ */
+ }
+ s[slen] = 0; /* TODO - Is this necessary? */
}
-#endif
+#else /* __UCLIBC_HAS_WCHAR__ */
+ return -1;
+#endif /* __UCLIBC_HAS_WCHAR__ */
} else if (ppfs->conv_num <= CONV_s) { /* char or string */
if (ppfs->conv_num == CONV_s) { /* string */
s = *((char **) (*argptr));
@@ -1243,11 +1310,12 @@ int _do_one_spec(FILE * __restrict stream, register ppfs_t *ppfs, int *count)
slen = strnlen(s, ((ppfs->info.prec >= 0)
? ppfs->info.prec : SIZE_MAX));
} else {
+ NULL_STRING:
s = "(null)";
slen = 6;
}
} else { /* char */
- s = (char *) buf;
+ s = buf;
*s = (unsigned char)(*((const int *) *argptr));
s[1] = 0;
slen = 1;
@@ -1301,7 +1369,24 @@ int _do_one_spec(FILE * __restrict stream, register ppfs_t *ppfs, int *count)
}
output(stream, prefix + prefix_num);
_charpad(stream, '0', numfill);
+#ifdef __UCLIBC_HAS_WCHAR__
+ if (!ws) {
+ _outnstr(stream, s, slen);
+ } else { /* wide string */
+ size_t t;
+ mbstate.mask = 0; /* Initialize the mbstate. */
+ while (slen) {
+ t = (slen <= sizeof(buf)) ? slen : sizeof(buf);
+ t = wcsrtombs(buf, &ws, t, &mbstate);
+ assert (t != ((size_t)(-1)));
+ _outnstr(stream, buf, t);
+ slen -= t;
+ }
+ ws = NULL; /* Reset ws. */
+ }
+#else /* __UCLIBC_HAS_WCHAR__ */
_outnstr(stream, s, slen);
+#endif /* __UCLIBC_HAS_WCHAR__ */
_charpad(stream, ' ', numpad);
}