summaryrefslogtreecommitdiff
path: root/libc/misc/wchar/wchar.c
diff options
context:
space:
mode:
Diffstat (limited to 'libc/misc/wchar/wchar.c')
-rw-r--r--libc/misc/wchar/wchar.c658
1 files changed, 624 insertions, 34 deletions
diff --git a/libc/misc/wchar/wchar.c b/libc/misc/wchar/wchar.c
index 7da3b1a50..a6e5fc616 100644
--- a/libc/misc/wchar/wchar.c
+++ b/libc/misc/wchar/wchar.c
@@ -1,3 +1,4 @@
+
/* Copyright (C) 2002 Manuel Novoa III
*
* This library is free software; you can redistribute it and/or
@@ -79,6 +80,12 @@
* framework. Minimally tested at the moment, but the stub C-locale
* version (which most people would probably be using) should be fine.
*
+ * Nov 21, 2002
+ *
+ * Revert the wc<->mb changes from earlier this month involving the C-locale.
+ * Add a couple of ugly hacks to support *wprintf.
+ * Add a mini iconv() and iconv implementation (requires locale support).
+ *
* Manuel
*/
@@ -97,13 +104,17 @@
#ifdef __UCLIBC_HAS_LOCALE__
#define ENCODING (__global_locale.encoding)
-#ifdef __UCLIBC_MJN3_ONLY__
-#warning implement __CTYPE_HAS_UTF_8_LOCALES!
+#ifndef __CTYPE_HAS_UTF_8_LOCALES
+#warning __CTYPE_HAS_UTF_8_LOCALES not set!
#endif
-#define __CTYPE_HAS_UTF_8_LOCALES
#else
-#undef __CTYPE_HAS_8_BIT_LOCALES
-#undef __CTYPE_HAS_UTF_8_LOCALES
+#define ENCODING (__ctype_encoding_7_bit)
+#ifdef __CTYPE_HAS_8_BIT_LOCALES
+#error __CTYPE_HAS_8_BIT_LOCALES is defined!
+#endif
+#ifdef __CTYPE_HAS_UTF_8_LOCALES
+#error __CTYPE_HAS_UTF_8_LOCALES is defined!
+#endif
#undef L__wchar_utf8sntowcs
#undef L__wchar_wcsntoutf8s
#endif
@@ -114,7 +125,7 @@
#define UTF_8_MAX_LEN 3
#endif
-/* #define KUHN */
+#define KUHN 1
/* Implementation-specific work functions. */
@@ -157,17 +168,14 @@ wint_t btowc(int c)
#else /* __CTYPE_HAS_8_BIT_LOCALES */
-#ifdef __CTYPE_HAS_UTF_8_LOCALES
- if (ENCODING == __ctype_encoding_utf8) {
- return (((unsigned int)c) < 0x80) ? c : WEOF;
- }
-#endif /* __CTYPE_HAS_UTF_8_LOCALES */
-
#ifdef __UCLIBC_HAS_LOCALE__
- assert(ENCODING == __ctype_encoding_7_bit);
-#endif
+ assert((ENCODING == __ctype_encoding_7_bit)
+ || (ENCODING == __ctype_encoding_utf8));
+#endif /* __UCLIBC_HAS_LOCALE__ */
- return (((unsigned int)c) <= UCHAR_MAX) ? c : WEOF;
+ /* If we don't have 8-bit locale support, then this is trivial since
+ * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
+ return (((unsigned int)c) < 0x80) ? c : WEOF;
#endif /* __CTYPE_HAS_8_BIT_LOCALES */
}
@@ -188,17 +196,18 @@ int wctob(wint_t c)
#else /* __CTYPE_HAS_8_BIT_LOCALES */
-#ifdef __CTYPE_HAS_UTF_8_LOCALES
- if (ENCODING == __ctype_encoding_utf8) {
- return ((c >= 0) && (c < 0x80)) ? c : EOF;
- }
-#endif /* __CTYPE_HAS_UTF_8_LOCALES */
-
#ifdef __UCLIBC_HAS_LOCALE__
- assert(ENCODING == __ctype_encoding_7_bit);
-#endif
+ assert((ENCODING == __ctype_encoding_7_bit)
+ || (ENCODING == __ctype_encoding_utf8));
+#endif /* __UCLIBC_HAS_LOCALE__ */
+
+ /* If we don't have 8-bit locale support, then this is trivial since
+ * anything outside of 0-0x7f is illegal in C/POSIX and UTF-8 locales. */
+
+ /* TODO: need unsigned version of wint_t... */
+/* return (((unsigned int)c) < 0x80) ? c : WEOF; */
+ return ((c >= 0) && (c < 0x80)) ? c : EOF;
- return ((c >= 0) && (c <= UCHAR_MAX)) ? c : EOF;
#endif /* __CTYPE_HAS_8_BIT_LOCALES */
}
@@ -359,9 +368,19 @@ size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
assert(ps != NULL);
incr = 1;
- if (!pwc) {
+ /* NOTE: The following is an AWFUL HACK! In order to support %s in
+ * wprintf, we need to be able to compute the number of wchars needed
+ * for the mbs conversion, not to exceed the precision specified.
+ * But if dst is NULL, the return value is the length assuming a
+ * sufficiently sized buffer. So, we allow passing of (wchar_t *) ps
+ * as pwc in order to flag that we really want the length, subject
+ * to the restricted buffer size and no partial conversions.
+ * See mbsnrtowcs() as well. */
+ if (!pwc || (pwc == ((wchar_t *)ps))) {
+ if (!pwc) {
+ wn = SIZE_MAX;
+ }
pwc = wcbuf;
- wn = SIZE_MAX;
incr = 0;
}
@@ -406,7 +425,7 @@ size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn,
}
BAD:
#ifdef DECODER
- wc = 0xfffd;
+ wc = 0xfffdU;
goto COMPLETE;
#else
ps->mask = mask;
@@ -635,9 +654,19 @@ size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
}
#endif
incr = 1;
- if (!dst) {
+ /* NOTE: The following is an AWFUL HACK! In order to support %s in
+ * wprintf, we need to be able to compute the number of wchars needed
+ * for the mbs conversion, not to exceed the precision specified.
+ * But if dst is NULL, the return value is the length assuming a
+ * sufficiently sized buffer. So, we allow passing of ((wchar_t *)ps)
+ * as dst in order to flag that we really want the length, subject
+ * to the restricted buffer size and no partial conversions.
+ * See _wchar_utf8sntowcs() as well. */
+ if (!dst || (dst == ((wchar_t *)ps))) {
+ if (!dst) {
+ len = SIZE_MAX;
+ }
dst = wcbuf;
- len = SIZE_MAX;
incr = 0;
}
@@ -659,8 +688,7 @@ size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
(__global_locale.idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
<< Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
if (!wc) {
- __set_errno(EILSEQ);
- return (size_t) -1;
+ goto BAD;
}
}
if (!(*dst = wc)) {
@@ -687,6 +715,13 @@ size_t __mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
s = NULL;
break;
}
+ if (*dst >= 0x80) {
+#ifdef __CTYPE_HAS_8_BIT_LOCALES
+ BAD:
+#endif
+ __set_errno(EILSEQ);
+ return (size_t) -1;
+ }
++s;
dst += incr;
--count;
@@ -772,7 +807,7 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
+ (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
}
-/* #define __WCHAR_REPLACEMENT_CHAR '?' */
+#define __WCHAR_REPLACEMENT_CHAR '?'
#ifdef __WCHAR_REPLACEMENT_CHAR
*dst = (unsigned char) ( u ? u : __WCHAR_REPLACEMENT_CHAR );
#else /* __WCHAR_REPLACEMENT_CHAR */
@@ -798,13 +833,12 @@ size_t __wcsnrtombs(char *__restrict dst, const wchar_t **__restrict src,
#endif
while (count) {
- if (*s > UCHAR_MAX) {
+ if (*s >= 0x80) {
#if defined(__CTYPE_HAS_8_BIT_LOCALES) && !defined(__WCHAR_REPLACEMENT_CHAR)
BAD:
#endif
__set_errno(EILSEQ);
return (size_t) -1;
-
}
if ((*dst = (unsigned char) *s) == 0) {
s = NULL;
@@ -1068,3 +1102,559 @@ int wcwidth(wchar_t wc)
#endif
/**********************************************************************/
+
+
+typedef struct {
+ mbstate_t tostate;
+ mbstate_t fromstate;
+ int tocodeset;
+ int fromcodeset;
+ int frombom;
+ int tobom;
+ int fromcodeset0;
+ int frombom0;
+ int tobom0;
+ int skip_invalid_input; /* To support iconv -c option. */
+} _UC_iconv_t;
+
+
+
+#ifdef L_iconv
+
+#include <iconv.h>
+#include <string.h>
+#include <endian.h>
+#include <byteswap.h>
+
+#if (__BYTE_ORDER != __BIG_ENDIAN) && (__BYTE_ORDER != __LITTLE_ENDIAN)
+#error unsupported endianness for iconv
+#endif
+
+#ifndef __CTYPE_HAS_8_BIT_LOCALES
+#error currently iconv requires 8 bit locales
+#endif
+#ifndef __CTYPE_HAS_UTF_8_LOCALES
+#error currently iconv requires UTF-8 locales
+#endif
+
+
+enum {
+ IC_WCHAR_T = 0xe0,
+ IC_MULTIBYTE = 0xe0,
+#if __BYTE_ORDER == __BIG_ENDIAN
+ IC_UCS_4 = 0xec,
+ IC_UTF_32 = 0xe4,
+ IC_UCS_2 = 0xe2,
+ IC_UTF_16 = 0xea,
+#else
+ IC_UCS_4 = 0xed,
+ IC_UTF_32 = 0xe5,
+ IC_UCS_2 = 0xe3,
+ IC_UTF_16 = 0xeb,
+#endif
+ IC_UTF_8 = 2,
+ IC_ASCII = 1
+};
+
+/* For the multibyte
+ * bit 0 means swap endian
+ * bit 1 means 2 byte
+ * bit 2 means 4 byte
+ *
+ */
+
+const unsigned char codesets[] =
+ "\x0a\xe0""WCHAR_T\x00" /* superset of UCS-4 but platform-endian */
+#if __BYTE_ORDER == __BIG_ENDIAN
+ "\x08\xec""UCS-4\x00" /* always BE */
+ "\x0a\xec""UCS-4BE\x00"
+ "\x0a\xed""UCS-4LE\x00"
+ "\x09\fe4""UTF-32\x00" /* platform endian with BOM */
+ "\x0b\xe4""UTF-32BE\x00"
+ "\x0b\xe5""UTF-32LE\x00"
+ "\x08\xe2""UCS-2\x00" /* always BE */
+ "\x0a\xe2""UCS-2BE\x00"
+ "\x0a\xe3""UCS-2LE\x00"
+ "\x09\xea""UTF-16\x00" /* platform endian with BOM */
+ "\x0b\xea""UTF-16BE\x00"
+ "\x0b\xeb""UTF-16LE\x00"
+#elif __BYTE_ORDER == __LITTLE_ENDIAN
+ "\x08\xed""UCS-4\x00" /* always BE */
+ "\x0a\xed""UCS-4BE\x00"
+ "\x0a\xec""UCS-4LE\x00"
+ "\x09\xf4""UTF-32\x00" /* platform endian with BOM */
+ "\x0b\xe5""UTF-32BE\x00"
+ "\x0b\xe4""UTF-32LE\x00"
+ "\x08\xe3""UCS-2\x00" /* always BE */
+ "\x0a\xe3""UCS-2BE\x00"
+ "\x0a\xe2""UCS-2LE\x00"
+ "\x09\xfa""UTF-16\x00" /* platform endian with BOM */
+ "\x0b\xeb""UTF-16BE\x00"
+ "\x0b\xea""UTF-16LE\x00"
+#endif
+ "\x08\x02""UTF-8\x00"
+ "\x0b\x01""US-ASCII\x00"
+ "\x07\x01""ASCII"; /* Must be last! (special case to save a nul) */
+
+static int find_codeset(const char *name)
+{
+ const unsigned char *s;
+ int codeset;
+
+ for (s = codesets ; *s ; s += *s) {
+ if (!strcasecmp(s+2, name)) {
+ return s[1];
+ }
+ }
+
+ /* The following is ripped from find_locale in locale.c. */
+
+ /* TODO: maybe CODESET_LIST + *s ??? */
+ /* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */
+ codeset = 2;
+ s = CODESET_LIST;
+ do {
+ ++codeset; /* Increment codeset first. */
+ if (!strcasecmp(CODESET_LIST+*s, name)) {
+ return codeset;
+ }
+ } while (*++s);
+
+ return 0; /* No matching codeset! */
+}
+
+iconv_t iconv_open(const char *tocode, const char *fromcode)
+{
+ register _UC_iconv_t *px;
+ int tocodeset, fromcodeset;
+
+ if (((tocodeset = find_codeset(tocode)) != 0)
+ && ((fromcodeset = find_codeset(fromcode)) != 0)) {
+ if ((px = malloc(sizeof(_UC_iconv_t))) != NULL) {
+ px->tocodeset = tocodeset;
+ px->tobom0 = px->tobom = (tocodeset & 0x10) >> 4;
+ px->fromcodeset0 = px->fromcodeset = fromcodeset;
+ px->frombom0 = px->frombom = (fromcodeset & 0x10) >> 4;
+ px->skip_invalid_input = px->tostate.mask = px->fromstate.mask = 0;
+ return (iconv_t) px;
+ }
+ } else {
+ __set_errno(EINVAL);
+ }
+ return (iconv_t)(-1);
+}
+
+int iconv_close(iconv_t cd)
+{
+ free(cd);
+
+ return 0;
+}
+
+size_t iconv(iconv_t cd, char **__restrict inbuf,
+ size_t *__restrict inbytesleft,
+ char **__restrict outbuf, size_t *__restrict outbytesleft)
+{
+ _UC_iconv_t *px = (_UC_iconv_t *) cd;
+ size_t nrcount, r;
+ wchar_t wc, wc2;
+ int inci, inco;
+
+ assert(px != (_UC_iconv_t *)(-1));
+ assert(sizeof(wchar_t) == 4);
+
+ if (!inbuf || !*inbuf) { /* Need to reinitialze conversion state. */
+ /* Note: For shift-state encodings we possibly need to output the
+ * shift sequence to return to initial state! */
+ if ((px->fromcodeset & 0xf0) == 0xe0) {
+ }
+ px->tostate.mask = px->fromstate.mask = 0;
+ px->fromcodeset = px->fromcodeset0;
+ px->tobom = px->tobom0;
+ px->frombom = px->frombom0;
+ return 0;
+ }
+
+ nrcount = 0;
+ while (*inbytesleft) {
+ if (!*outbytesleft) {
+ TOO_BIG:
+ __set_errno(E2BIG);
+ return (size_t) -1;
+ }
+
+ inci = inco = 1;
+ if (px->fromcodeset >= IC_MULTIBYTE) {
+ inci = (px->fromcodeset == IC_WCHAR_T) ? 4: (px->fromcodeset & 6);
+ if (*inbytesleft < inci) goto INVALID;
+ wc = (((unsigned int)((unsigned char)((*inbuf)[0]))) << 8)
+ + ((unsigned char)((*inbuf)[1]));
+ if (inci == 4) {
+ wc = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
+ + ((unsigned char)((*inbuf)[3])) + (wc << 16);
+ if (!(px->fromcodeset & 1)) wc = bswap_32(wc);
+ } else {
+ if (!(px->fromcodeset & 1)) wc = bswap_16(wc);
+ if (((px->fromcodeset & IC_UTF_16) == IC_UTF_16)
+ && (((__uwchar_t)(wc - 0xd800U)) < (0xdc00U - 0xd800U))
+ ) { /* surrogate */
+ wc =- 0xd800U;
+ if (*inbytesleft < 4) goto INVALID;
+ wc2 = (((unsigned int)((unsigned char)((*inbuf)[2]))) << 8)
+ + ((unsigned char)((*inbuf)[3]));
+ if (!(px->fromcodeset & 1)) wc = bswap_16(wc2);
+ if (((__uwchar_t)(wc2 -= 0xdc00U)) < (0xe0000U - 0xdc00U)) {
+ goto ILLEGAL;
+ }
+ inci = 4; /* Change inci here in case skipping illegals. */
+ wc = 0x10000UL + (wc << 10) + wc2;
+ }
+ }
+
+ if (px->frombom) {
+ px->frombom = 0;
+ if ((wc == 0xfeffU)
+ || (wc == ((inci == 4)
+ ? (((wchar_t) 0xfffe0000UL))
+ : ((wchar_t)(0xfffeUL))))
+ ) {
+ if (wc != 0xfeffU) {
+ px->fromcodeset ^= 1; /* toggle endianness */
+ wc = 0xfeffU;
+ }
+ if (!px->frombom) {
+ goto BOM_SKIP_OUTPUT;
+ }
+ goto GOT_BOM;
+ }
+ }
+
+ if (px->fromcodeset != IC_WCHAR_T) {
+ if (((__uwchar_t) wc) > (((px->fromcodeset & IC_UCS_4) == IC_UCS_4)
+ ? 0x7fffffffUL : 0x10ffffUL)
+#ifdef KUHN
+ || (((__uwchar_t)(wc - 0xfffeU)) < 2)
+ || (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
+#endif
+ ) {
+ goto ILLEGAL;
+ }
+ }
+ } else if (px->fromcodeset == IC_UTF_8) {
+ const char *p = *inbuf;
+ r = _wchar_utf8sntowcs(&wc, 1, &p, *inbytesleft, &px->fromstate, 0);
+ if (((ssize_t) r) <= 0) { /* either EILSEQ or incomplete or nul */
+ if (((ssize_t) r) < 0) { /* either EILSEQ or incomplete or nul */
+ assert((r == (size_t)(-1)) || (r == (size_t)(-2)));
+ if (r == (size_t)(-2)) {
+ INVALID:
+ __set_errno(EINVAL);
+ } else {
+ px->fromstate.mask = 0;
+ inci = 1;
+ ILLEGAL:
+ if (px->skip_invalid_input) {
+ px->skip_invalid_input = 2; /* flag for iconv utility */
+ goto BOM_SKIP_OUTPUT;
+ }
+ __set_errno(EILSEQ);
+ }
+ return (size_t)(-1);
+ }
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning optimize this
+#endif
+ if (p != NULL) { /* incomplet char case */
+ goto INVALID;
+ }
+ p = *inbuf + 1; /* nul */
+ }
+ inci = p - *inbuf;
+ } else if ((wc = ((unsigned char)(**inbuf))) >= 0x80) { /* Non-ASCII... */
+ if (px->fromcodeset == IC_ASCII) { /* US-ASCII codeset */
+ goto ILLEGAL;
+ } else { /* some other 8-bit ascii-extension codeset */
+ const codeset_8_bit_t *c8b
+ = __locale_mmap->codeset_8_bit + px->fromcodeset - 3;
+ wc -= 0x80;
+ wc = __global_locale.tbl8c2wc[
+ (c8b->idx8c2wc[wc >> Cc2wc_IDX_SHIFT]
+ << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))];
+ if (!wc) {
+ goto ILLEGAL;
+ }
+ }
+ } else {
+ fprintf(stderr, "ARG!! %d\n", px->fromcodeset);
+ }
+
+
+ if (px->tobom) {
+ inci = 0;
+ wc = 0xfeffU;
+ GOT_BOM:
+ px->tobom = 0;
+ }
+
+ if (px->tocodeset >= IC_MULTIBYTE) {
+ inco = (px->tocodeset == IC_WCHAR_T) ? 4: (px->tocodeset & 6);
+ if (*outbytesleft < inci) goto TOO_BIG;
+ if (px->tocodeset != IC_WCHAR_T) {
+ if (((__uwchar_t) wc) > (((px->tocodeset & IC_UCS_4) == IC_UCS_4)
+ ? 0x7fffffffUL : 0x10ffffUL)
+#ifdef KUHN
+ || (((__uwchar_t)(wc - 0xfffeU)) < 2)
+ || (((__uwchar_t)(wc - 0xd800U)) < (0xe000U - 0xd800U))
+#endif
+ ) {
+ REPLACE_32:
+ wc = 0xfffd;
+ ++nrcount;
+ }
+ }
+ if (inco == 4) {
+ if (px->tocodeset & 1) wc = bswap_32(wc);
+ } else {
+ if (((__uwchar_t)wc ) > 0xffffU) {
+ if ((px->tocodeset & IC_UTF_16) != IC_UTF_16) {
+ goto REPLACE_32;
+ }
+ if (*outbytesleft < (inco = 4)) goto TOO_BIG;
+ wc2 = 0xdc00U + (wc & 0x3ff);
+ wc = 0xd800U + ((wc >> 10) & 0x3ff);
+ if (px->tocodeset & 1) {
+ wc = bswap_16(wc);
+ wc2 = bswap_16(wc2);
+ }
+ wc += (wc2 << 16);
+ } else if (px->tocodeset & 1) wc = bswap_16(wc);
+ }
+ (*outbuf)[0] = (char)((unsigned char)(wc));
+ (*outbuf)[1] = (char)((unsigned char)(wc >> 8));
+ if (inco == 4) {
+ (*outbuf)[2] = (char)((unsigned char)(wc >> 16));
+ (*outbuf)[3] = (char)((unsigned char)(wc >> 24));
+ }
+ } else if (px->tocodeset == IC_UTF_8) {
+ const wchar_t *pw = &wc;
+ do {
+ r = _wchar_wcsntoutf8s(*outbuf, *outbytesleft, &pw, 1);
+ if (r != (size_t)(-1)) {
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning what happens for a nul?
+#endif
+ if (r == 0) {
+ if (wc != 0) {
+ goto TOO_BIG;
+ }
+ ++r;
+ }
+ break;
+ }
+ wc = 0xfffdU;
+ ++nrcount;
+ } while (1);
+ inco = r;
+ } else if (((__uwchar_t)(wc)) < 0x80) {
+ CHAR_GOOD:
+ **outbuf = wc;
+ } else {
+ if ((px->tocodeset != 0x01) && (wc <= Cwc2c_DOMAIN_MAX)) {
+ const codeset_8_bit_t *c8b
+ = __locale_mmap->codeset_8_bit + px->tocodeset - 3;
+ __uwchar_t u;
+ u = c8b->idx8wc2c[wc >> (Cwc2c_TI_SHIFT + Cwc2c_TT_SHIFT)];
+ u = __global_locale.tbl8wc2c[(u << Cwc2c_TI_SHIFT)
+ + ((wc >> Cwc2c_TT_SHIFT)
+ & ((1 << Cwc2c_TI_SHIFT)-1))];
+ wc = __global_locale.tbl8wc2c[Cwc2c_TI_LEN
+ + (u << Cwc2c_TT_SHIFT)
+ + (wc & ((1 << Cwc2c_TT_SHIFT)-1))];
+ if (wc) {
+ goto CHAR_GOOD;
+ }
+ }
+ **outbuf = '?';
+ ++nrcount;
+ }
+
+ *outbuf += inco;
+ *outbytesleft -= inco;
+ BOM_SKIP_OUTPUT:
+ *inbuf += inci;
+ *inbytesleft -= inci;
+ }
+ return nrcount;
+}
+
+#endif
+/**********************************************************************/
+#ifdef L_iconv_main
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <iconv.h>
+#include <stdarg.h>
+#include <libgen.h>
+
+extern const unsigned char codesets[];
+
+#define IBUF BUFSIZ
+#define OBUF BUFSIZ
+
+char *progname;
+int hide_errors;
+
+static void error_msg(const char *fmt, ...)
+ __attribute__ ((noreturn, format (printf, 1, 2)));
+
+static void error_msg(const char *fmt, ...)
+{
+ va_list arg;
+
+ if (!hide_errors) {
+ fprintf(stderr, "%s: ", progname);
+ va_start(arg, fmt);
+ vfprintf(stderr, fmt, arg);
+ va_end(arg);
+ }
+
+ exit(EXIT_FAILURE);
+}
+
+int main(int argc, char **argv)
+{
+ FILE *ifile;
+ FILE *ofile = stdout;
+ const char *p;
+ const char *s;
+ static const char opt_chars[] = "tfocsl";
+ /* 012345 */
+ const char *opts[sizeof(opt_chars)]; /* last is infile name */
+ iconv_t ic;
+ char ibuf[IBUF];
+ char obuf[OBUF];
+ char *pi;
+ char *po;
+ size_t ni, no, r, pos;
+
+ hide_errors = 0;
+
+ for (s = opt_chars ; *s ; s++) {
+ opts[ s - opt_chars ] = NULL;
+ }
+
+ progname = *argv;
+ while (--argc) {
+ p = *++argv;
+ if ((*p != '-') || (*++p == 0)) {
+ break;
+ }
+ do {
+ if ((s = strchr(opt_chars,*p)) == NULL) {
+ USAGE:
+ s = basename(progname);
+ fprintf(stderr,
+ "%s [-cs] -f fromcode -t tocode [-o outputfile] [inputfile ...]\n"
+ " or\n%s -l\n", s, s);
+ return EXIT_FAILURE;
+ }
+ if ((s - opt_chars) < 3) {
+ if ((--argc == 0) || opts[s - opt_chars]) {
+ goto USAGE;
+ }
+ opts[s - opt_chars] = *++argv;
+ } else {
+ opts[s - opt_chars] = p;
+ }
+ } while (*++p);
+ }
+
+ if (opts[5]) { /* -l */
+ fprintf(stderr, "Recognized codesets:\n");
+ for (s = codesets ; *s ; s += *s) {
+ fprintf(stderr," %s\n", s+2);
+ }
+ s = CODESET_LIST;
+ do {
+ fprintf(stderr," %s\n", CODESET_LIST+ (unsigned char)(*s));
+ } while (*++s);
+
+ return EXIT_SUCCESS;
+ }
+
+ if (opts[4]) {
+ hide_errors = 1;
+ }
+
+ if (!opts[0] || !opts[1]) {
+ goto USAGE;
+ }
+ if ((ic = iconv_open(opts[0],opts[1])) == ((iconv_t)(-1))) {
+ error_msg( "unsupported codeset in %s -> %s conversion\n", opts[0], opts[1]);
+ }
+ if (opts[3]) { /* -c */
+ ((_UC_iconv_t *) ic)->skip_invalid_input = 1;
+ }
+
+ if ((s = opts[2]) != NULL) {
+ if (!(ofile = fopen(s, "w"))) {
+ error_msg( "couldn't open %s for writing\n", s);
+ }
+ }
+
+ pos = ni = 0;
+ do {
+ if (!argc || ((**argv == '-') && !((*argv)[1]))) {
+ ifile = stdin; /* we don't check for duplicates */
+ } else if (!(ifile = fopen(*argv, "r"))) {
+ error_msg( "couldn't open %s for reading\n", *argv);
+ }
+
+ while ((r = fread(ibuf + ni, 1, IBUF - ni, ifile)) > 0) {
+ pos += r;
+ ni += r;
+ no = OBUF;
+ pi = ibuf;
+ po = obuf;
+ if ((r = iconv(ic, &pi, &ni, &po, &no)) == ((size_t)(-1))) {
+ if ((errno != EINVAL) && (errno != E2BIG)) {
+ error_msg( "iconv failed at pos %lu : %m\n", (unsigned long) (pos - ni));
+ }
+ }
+ if ((r = OBUF - no) > 0) {
+ if (fwrite(obuf, 1, OBUF - no, ofile) < r) {
+ error_msg( "write error\n");
+ }
+ }
+ if (ni) { /* still bytes in buffer! */
+ memmove(ibuf, pi, ni);
+ }
+ }
+
+ if (ferror(ifile)) {
+ error_msg( "read error\n");
+ }
+
+ ++argv;
+
+ if (ifile != stdin) {
+ fclose(ifile);
+ }
+
+ } while (--argc > 0);
+
+ iconv_close(ic);
+
+ if (ni) {
+ error_msg( "incomplete sequence\n");
+ }
+
+ return (((_UC_iconv_t *) ic)->skip_invalid_input < 2)
+ ? EXIT_SUCCESS : EXIT_FAILURE;
+}
+
+#endif
+/**********************************************************************/