/* Copyright (C) 2002, 2003, 2004 Manuel Novoa III
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION!
*
* Besides uClibc, I'm using this code in my libc for elks, which is
* a 16-bit environment with a fairly limited compiler. It would make
* things much easier for me if this file isn't modified unnecessarily.
* In particular, please put any new or replacement functions somewhere
* else, and modify the makefile to use your version instead.
* Thanks. Manuel
*
* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
/* May 23, 2002 Initial Notes:
*
* I'm still tweaking this stuff, but it passes the tests I've thrown
* at it, and Erik needs it for the gcc port. The glibc extension
* __wcsnrtombs() hasn't been tested, as I didn't find a test for it
* in the glibc source. I also need to fix the behavior of
* _wchar_utf8sntowcs() if the max number of wchars to convert is 0.
*
* UTF-8 -> wchar -> UTF-8 conversion tests on Markus Kuhn's UTF-8-demo.txt
* file on my platform (x86) show about 5-10% faster conversion speed than
* glibc with mbsrtowcs()/wcsrtombs() and almost twice as fast as glibc with
* individual mbrtowc()/wcrtomb() calls.
*
* If 'DECODER' is defined, then _wchar_utf8sntowcs() will be compiled
* as a fail-safe UTF-8 decoder appropriate for a terminal, etc. which
* needs to deal gracefully with whatever is sent to it. In that mode,
* it passes Markus Kuhn's UTF-8-test.txt stress test. I plan to add
* an arg to force that behavior, so the interface will be changing.
*
* I need to fix the error checking for 16-bit wide chars. This isn't
* an issue for uClibc, but may be for ELKS. I'm currently not sure
* if I'll use 16-bit, 32-bit, or configureable wchars in ELKS.
*
* July 1, 2002
*
* Fixed _wchar_utf8sntowcs() for the max number of wchars == 0 case.
* Fixed nul-char bug in btowc(), and another in __mbsnrtowcs() for 8-bit
* locales.
* Enabled building of a C/POSIX-locale-only version, so full locale support
* no longer needs to be enabled.
*
* Nov 4, 2002
*
* Fixed a bug in _wchar_wcsntoutf8s(). Don't store wcs position if dst is NULL.
* Also, introduce an awful hack into _wchar_wcsntoutf8s() and wcsrtombs() in
* order to support %ls in printf. See comments below for details.
* Change behaviour of wc<->mb functions when in the C locale. Now they do
* a 1-1 map for the range 0x80-UCHAR_MAX. This is for backwards compatibility
* and consistency with the stds requirements that a printf format string by
* a valid multibyte string beginning and ending in it's initial shift state.
*
* Nov 5, 2002
*
* Forgot to change btowc and wctob when I changed the wc<->mb functions yesterday.
*
* Nov 7, 2002
*
* Add wcwidth and wcswidth, based on Markus Kuhn's wcwidth of 2002-05-08.
* Added some size/speed optimizations and integrated it into my locale
* framework. Minimally tested at the moment, but the stub C-locale
* version (which most people would probably be using) should be fine.
*
* Nov 21, 2002
*
* Revert the wc<->mb changes from earlier this month involving the C-locale.
* Add a couple of ugly hacks to support *wprintf.
* Add a mini iconv() and iconv implementation (requires locale support).
*
* Aug 1, 2003
* Bug fix for mbrtowc.
*
* Aug 18, 2003
* Bug fix: _wchar_utf8sntowcs and _wchar_wcsntoutf8s now set errno if EILSEQ.
*
* Feb 11, 2004
* Bug fix: Fix size check for remaining output space in iconv().
*
* Manuel
*/
#ifdef _LIBC
#include <errno.h>
#include <stddef.h>
#include <limits.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdlib.h>
#include <stdio.h>
#include <assert.h>
#include <locale.h>
#include <wchar.h>
#include <bits/uClibc_uwchar.h>
/**********************************************************************/
#ifdef __UCLIBC_HAS_LOCALE__
#ifdef __UCLIBC_MJN3_ONLY__
#ifdef L_iswspace
/* generates one warning */
#warning TODO: Fix Cc2wc* and Cwc2c* defines!
#endif
#endif /* __UCLIBC_MJN3_ONLY__ */
#define ENCODING (__UCLIBC_CURLOCALE->encoding)
#define Cc2wc_IDX_SHIFT __LOCALE_DATA_Cc2wc_IDX_SHIFT
#define Cc2wc_ROW_LEN __LOCALE_DATA_Cc2wc_ROW_LEN
#define Cwc2c_DOMAIN_MAX __LOCALE_DATA_Cwc2c_DOMAIN_MAX
#define Cwc2c_TI_SHIFT __LOCALE_DATA_Cwc2c_TI_SHIFT
#define Cwc2c_TT_SHIFT __LOCALE_DATA_Cwc2c_TT_SHIFT
#define Cwc2c_TI_LEN __LOCALE_DATA_Cwc2c_TI_LEN
#ifndef __CTYPE_HAS_UTF_8_LOCALES
#warning __CTYPE_HAS_UTF_8_LOCALES not set!
#endif
#else /* __UCLIBC_HAS_LOCALE__ */
#ifdef __UCLIBC_MJN3_ONLY__
#ifdef L_btowc
/* emit only once */
#warning fix preprocessor logic testing locale settings
#endif
#endif
#define ENCODING (__ctype_encoding_7_bit)
#ifdef __CTYPE_HAS_8_BIT_LOCALES
#error __CTYPE_HAS_8_BIT_LOCALES is defined!
#endif
#ifdef __CTYPE_HAS_UTF_8_LOCALES
#error __CTYPE_HAS_UTF_8_LOCALES is defined!
#endif
#undef L__wchar_utf8sntowcs
#undef L__wchar_wcsntoutf8s
#endif /* __UCLIBC_HAS_LOCALE__ */
/**********************************************************************/
#if WCHAR_MAX > 0xffffUL
#define UTF_8_MAX_LEN 6
#else
#define UTF_8_MAX_LEN 3
#endif
#define KUHN 1
/* Implementation-specific work functions. */
extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
const char **__restrict src, size_t n,
mbstate_t *ps, int allow_continuation) attribute_hidden;
extern size_t _wchar_wcsntoutf8s(char *__restrict s, size_t n,
const wchar_t **__restrict src, size_t wn) attribute_hidden;
#endif
/**********************************************************************/
#ifdef L_btowc
wint_t btowc(int c)
{
#ifdef __CTYPE_HAS_8_BIT_LOCALES
wchar_t wc;
unsigned char buf[1];
mbstate_t mbstate;
if (c != EOF) {
*buf = (unsigned char) c;
mbstate.__mask = 0; /* Initialize the mbstate. */
if (mbrtowc(&wc, (char*) buf, 1, &mbstate) <= 1) {
return wc;
}
}
return WEOF;
#else /* !__CTYPE_HAS_8_BIT_LOCALES */
#ifdef __UCLIBC_HAS_LOCALE__
assert((ENCODING == __ctype_encoding_7_bit)
|| (ENCODING == __ctype_encoding_utf8));
#endif
/* If we don't have 8-bit locale support, then this is trivial since
* anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
return (((unsigned int)c) < 0x80) ? c : WEOF;
#endif /* !__CTYPE_HAS_8_BIT_LOCALES */
}
libc_hidden_def(btowc)
#endif
/**********************************************************************/
#ifdef L_wctob
/* Note: We completely ignore ps in all currently supported conversions. */
int wctob(wint_t c)
{
#ifdef __CTYPE_HAS_8_BIT_LOCALES
unsigned char buf[MB_LEN_MAX];
return (wcrtomb((char*) buf, c, NULL) == 1) ? *buf : EOF;
#else /* __CTYPE
|