summaryrefslogtreecommitdiff
path: root/libc
diff options
context:
space:
mode:
Diffstat (limited to 'libc')
-rw-r--r--libc/misc/locale/locale.c356
-rw-r--r--libc/stdio/printf.c19
-rw-r--r--libc/stdio/stdio.c37
-rw-r--r--libc/string/Makefile5
-rw-r--r--libc/string/wstring.c684
-rw-r--r--libc/sysdeps/linux/common/bits/uClibc_locale.h63
6 files changed, 1037 insertions, 127 deletions
diff --git a/libc/misc/locale/locale.c b/libc/misc/locale/locale.c
index 0875a4e5b..9c162a980 100644
--- a/libc/misc/locale/locale.c
+++ b/libc/misc/locale/locale.c
@@ -22,6 +22,10 @@
* query locale settings should now work... at the cost of almost
* doubling the size of the setlocale object code.
* Fixed a bug in the internal fixed-size-string locale specifier code.
+ *
+ * Dec 20, 2002
+ *
+ * Added in collation support and updated stub nl_langinfo.
*/
@@ -415,6 +419,189 @@ struct lconv *localeconv(void)
__locale_t __global_locale;
+typedef struct {
+ uint16_t num_base;
+ uint16_t num_der;
+ uint16_t MAX_WEIGHTS;
+ uint16_t num_index2weight;
+#define num_index2ruleidx num_index2weight
+ uint16_t num_weightstr;
+ uint16_t num_multistart;
+ uint16_t num_override;
+ uint16_t num_ruletable;
+} coldata_header_t;
+
+typedef struct {
+ uint16_t num_weights;
+ uint16_t num_starters;
+ uint16_t ii_shift;
+ uint16_t ti_shift;
+ uint16_t ii_len;
+ uint16_t ti_len;
+ uint16_t max_weight;
+ uint16_t num_col_base;
+ uint16_t max_col_index;
+ uint16_t undefined_idx;
+ uint16_t range_low;
+ uint16_t range_count;
+ uint16_t range_base_weight;
+ uint16_t range_rule_offset;
+
+ uint16_t index2weight_offset;
+ uint16_t index2ruleidx_offset;
+ uint16_t multistart_offset;
+ uint16_t wcs2colidt_offset_low;
+ uint16_t wcs2colidt_offset_hi;
+} coldata_base_t;
+
+typedef struct {
+ uint16_t base_idx;
+ uint16_t undefined_idx;
+ uint16_t overrides_offset;
+ uint16_t multistart_offset;
+} coldata_der_t;
+
+static int init_cur_collate(int der_num)
+{
+ __collate_t *cur_collate = &__global_locale.collate;
+ const uint16_t *__locale_collate_tbl = __locale_mmap->collate_data;
+ coldata_header_t *cdh;
+ coldata_base_t *cdb;
+ coldata_der_t *cdd;
+ const uint16_t *p;
+ size_t n;
+ uint16_t i, w;
+
+ assert(sizeof(coldata_base_t) == 19*2);
+ assert(sizeof(coldata_der_t) == 4*2);
+ assert(sizeof(coldata_header_t) == 8*2);
+
+ if (!der_num) { /* C locale... special */
+ cur_collate->num_weights = 0;
+ return 1;
+ }
+
+ --der_num;
+
+ cdh = (coldata_header_t *) __locale_collate_tbl;
+
+ if (der_num >= cdh->num_der) {
+ return 0;
+ }
+
+ cdd = (coldata_der_t *)(__locale_collate_tbl
+ + (sizeof(coldata_header_t)
+ + cdh->num_base * sizeof(coldata_base_t)
+ + der_num * sizeof(coldata_der_t)
+ )/2 );
+
+ cdb = (coldata_base_t *)(__locale_collate_tbl
+ + (sizeof(coldata_header_t)
+ + cdd->base_idx * sizeof(coldata_base_t)
+ )/2 );
+
+ memcpy(cur_collate, cdb, offsetof(coldata_base_t,index2weight_offset));
+ cur_collate->undefined_idx = cdd->undefined_idx;
+
+ cur_collate->ti_mask = (1 << cur_collate->ti_shift)-1;
+ cur_collate->ii_mask = (1 << cur_collate->ii_shift)-1;
+
+/* printf("base=%d num_col_base: %d %d\n", cdd->base_idx ,cur_collate->num_col_base, cdb->num_col_base); */
+
+ n = (sizeof(coldata_header_t) + cdh->num_base * sizeof(coldata_base_t)
+ + cdh->num_der * sizeof(coldata_der_t))/2;
+
+/* printf("n = %d\n", n); */
+ cur_collate->index2weight_tbl = __locale_collate_tbl + n + cdb->index2weight_offset;
+/* printf("i2w = %d\n", n + cdb->index2weight_offset); */
+ n += cdh->num_index2weight;
+ cur_collate->index2ruleidx_tbl = __locale_collate_tbl + n + cdb->index2ruleidx_offset;
+/* printf("i2r = %d\n", n + cdb->index2ruleidx_offset); */
+ n += cdh->num_index2ruleidx;
+ cur_collate->multistart_tbl = __locale_collate_tbl + n + cdd->multistart_offset;
+/* printf("mts = %d\n", n + cdb->multistart_offset); */
+ n += cdh->num_multistart;
+ cur_collate->overrides_tbl = __locale_collate_tbl + n + cdd->overrides_offset;
+/* printf("ovr = %d\n", n + cdd->overrides_offset); */
+ n += cdh->num_override;
+ cur_collate->ruletable = __locale_collate_tbl + n;
+/* printf("rtb = %d\n", n); */
+ n += cdh->num_ruletable;
+ cur_collate->weightstr = __locale_collate_tbl + n;
+/* printf("wts = %d\n", n); */
+ n += cdh->num_weightstr;
+ cur_collate->wcs2colidt_tbl = __locale_collate_tbl + n
+ + (((unsigned long)(cdb->wcs2colidt_offset_hi)) << 16)
+ + cdb->wcs2colidt_offset_low;
+/* printf("wcs = %lu\n", n + (((unsigned long)(cdb->wcs2colidt_offset_hi)) << 16) */
+/* + cdb->wcs2colidt_offset_low); */
+
+ cur_collate->MAX_WEIGHTS = cdh->MAX_WEIGHTS;
+
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning if calloc fails, this is WRONG. there is also a memory leak here at the moment
+#warning fix the +1 by increasing max_col_index?
+#endif
+ cur_collate->index2weight = calloc(2*cur_collate->max_col_index+2, sizeof(uint16_t));
+ if (!cur_collate->index2weight) {
+ return 0;
+ }
+ cur_collate->index2ruleidx = cur_collate->index2weight + cur_collate->max_col_index + 1;
+
+ memcpy(cur_collate->index2weight, cur_collate->index2weight_tbl,
+ cur_collate->num_col_base * sizeof(uint16_t));
+ memcpy(cur_collate->index2ruleidx, cur_collate->index2ruleidx_tbl,
+ cur_collate->num_col_base * sizeof(uint16_t));
+
+ /* now do the overrides */
+ p = cur_collate->overrides_tbl;
+ while (*p > 1) {
+/* fprintf(stderr, "processing override -- count = %d\n", *p); */
+ n = *p++;
+ w = *p++;
+ do {
+ i = *p++;
+/* fprintf(stderr, " i=%d w=%d *p=%d\n", i, w, *p); */
+ cur_collate->index2weight[i-1] = w++;
+ cur_collate->index2ruleidx[i-1] = *p++;
+ } while (--n);
+ }
+ while (*++p) {
+ i = *p;
+ cur_collate->index2weight[i-1] = *++p;
+ cur_collate->index2ruleidx[i-1] = *++p;
+ }
+
+
+ for (i=0 ; i < cur_collate->multistart_tbl[0] ; i++) {
+ p = cur_collate->multistart_tbl;
+/* fprintf(stderr, "%2d of %2d: %d ", i, cur_collate->multistart_tbl[0], p[i]); */
+ p += p[i];
+
+ do {
+ n = *p++;
+ do {
+ if (!*p) { /* found it */
+/* fprintf(stderr, "found: n=%d (%#lx) |%.*ls|\n", n, (int) *cs->s, n, cs->s); */
+/* fprintf(stderr, ": %d - single\n", n); */
+ goto FOUND;
+ }
+ /* the lookup check here is safe since we're assured that *p is a valid colidex */
+/* fprintf(stderr, "lookup(%lc)==%d *p==%d\n", cs->s[n], lookup(cs->s[n]), (int) *p); */
+/* fprintf(stderr, ": %d - ", n); */
+ do {
+/* fprintf(stderr, "%d|", *p); */
+ } while (*p++);
+ break;
+ } while (1);
+ } while (1);
+ FOUND:
+ continue;
+ }
+
+ return 1;
+}
+
void _locale_init(void)
{
/* TODO: mmap the locale file */
@@ -427,7 +614,8 @@ void _locale_init(void)
__locale_mmap->lc_common_item_offsets_LEN,
LC_ALL);
- __global_locale.category_offsets[0] = offsetof(__locale_t, codeset);
+ ++__global_locale.category_item_count[0]; /* Increment for codeset entry. */
+ __global_locale.category_offsets[0] = offsetof(__locale_t, outdigit0_mb);
__global_locale.category_offsets[1] = offsetof(__locale_t, decimal_point);
__global_locale.category_offsets[2] = offsetof(__locale_t, int_curr_symbol);
__global_locale.category_offsets[3] = offsetof(__locale_t, abday_1);
@@ -489,6 +677,22 @@ void _locale_set(const unsigned char *p)
*s = *p;
s[1] = p[1];
+ if ((i != LC_COLLATE)
+ && ((len = __locale_mmap->lc_common_item_offsets_LEN[i]) != 0)
+ ) {
+ crow = __locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ]
+ * len;
+ x = (const char **)(((char *) &__global_locale)
+ + __global_locale.category_offsets[i]);
+ stp = __locale_mmap->lc_common_tbl_offsets + 4*i;
+ r = (const unsigned char *)( ((char *)__locale_mmap) + *stp );
+ io = (const uint16_t *)( ((char *)__locale_mmap) + *++stp );
+ ii = (const uint16_t *)( ((char *)__locale_mmap) + *++stp );
+ d = (const unsigned char *)( ((char *)__locale_mmap) + *++stp );
+ for (c=0 ; c < len ; c++) {
+ *(x + c) = d + ii[ r[crow + c] + io[c] ];
+ }
+ }
if (i == LC_CTYPE) {
c = __locale_mmap->locales[ WIDTH_LOCALES * row + 2 ]; /* codeset */
if (c <= 2) {
@@ -524,22 +728,18 @@ void _locale_set(const unsigned char *p)
#endif /* __WCHAR_ENABLED */
#endif /* __CTYPE_HAS_8_BIT_LOCALES */
}
-
- } else if ((len = __locale_mmap->lc_common_item_offsets_LEN[i]) != 0) {
- crow = __locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ]
- * len;
- x = (const char **)(((char *) &__global_locale)
- + __global_locale.category_offsets[i]);
- stp = __locale_mmap->lc_common_tbl_offsets + 4*i;
- r = (const unsigned char *)( ((char *)__locale_mmap) + *stp );
- io = (const uint16_t *)( ((char *)__locale_mmap) + *++stp );
- ii = (const uint16_t *)( ((char *)__locale_mmap) + *++stp );
- d = (const unsigned char *)( ((char *)__locale_mmap) + *++stp );
- for (c=0 ; c < len ; c++) {
- *(x + c) = d + ii[ r[crow + c] + io[c] ];
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning might want to just put this in the locale_mmap object
+#endif
+ d = __global_locale.outdigit_length;
+ x = &__global_locale.outdigit0_mb;
+ for (c = 0 ; c < 10 ; c++) {
+ ((unsigned char *)d)[c] = strlen(x[c]);
+ assert(d[c] > 0);
}
+ } else if (i == LC_COLLATE) {
+ init_cur_collate(__locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ]);
}
-
}
++i;
p += 2;
@@ -558,10 +758,10 @@ void _locale_set(const unsigned char *p)
#ifdef __LOCALE_C_ONLY
-/* We need to index 300 bytes of data, so you might initially think we
+/* We need to index 320 bytes of data, so you might initially think we
* need to store the offsets in shorts. But since the offset of the
- * 64th item is 231, we'll store "offset - 64" for all items >= 64
- * and always calculate the data offset as "offset[i] + (i & 64)".
+ * 64th item is 182, we'll store "offset - 2*64" for all items >= 64
+ * and always calculate the data offset as "offset[i] + 2*(i & 64)".
* This allows us to pack the data offsets in an unsigned char while
* also avoiding an "if".
*
@@ -574,63 +774,67 @@ void _locale_set(const unsigned char *p)
/* Combine the data to avoid size penalty for seperate char arrays when
* compiler aligns objects. The original code is left in as documentation. */
#define cat_start nl_data
-#define C_locale_data (nl_data + C_LC_ALL + 1 + 78)
-
-static const unsigned char nl_data[C_LC_ALL + 1 + 78 + 300] = {
-/* static const unsigned char cat_start[C_LC_ALL + 1] = { */
- '\x00', '\x01', '\x04', '\x1a', '\x4c', '\x4c', '\x4e',
-/* }; */
-/* static const unsigned char item_offset[78] = { */
- '\x00', '\x06', '\x07', '\x07', '\x07', '\x07', '\x07', '\x07',
- '\x07', '\x07', '\x07', '\x08', '\x08', '\x08', '\x08', '\x08',
- '\x08', '\x08', '\x08', '\x08', '\x08', '\x08', '\x08', '\x08',
- '\x08', '\x0a', '\x0c', '\x10', '\x14', '\x18', '\x1c', '\x20',
- '\x24', '\x28', '\x2f', '\x36', '\x3e', '\x48', '\x51', '\x58',
- '\x61', '\x65', '\x69', '\x6d', '\x71', '\x75', '\x79', '\x7d',
- '\x81', '\x85', '\x89', '\x8d', '\x91', '\x99', '\xa2', '\xa8',
- '\xae', '\xb2', '\xb7', '\xbc', '\xc3', '\xcd', '\xd5', '\xde',
- '\xa7', '\xaa', '\xad', '\xc2', '\xcb', '\xd4', '\xdf', '\xdf',
- '\xdf', '\xdf', '\xdf', '\xdf', '\xe0', '\xe6',
-/* }; */
-/* static const unsigned char C_locale_data[300] = { */
- 'A', 'S', 'C', 'I', 'I', '\x00', '.', '\x00',
- '\x7f', '\x00', '-', '\x00', 'S', 'u', 'n', '\x00',
- 'M', 'o', 'n', '\x00', 'T', 'u', 'e', '\x00',
- 'W', 'e', 'd', '\x00', 'T', 'h', 'u', '\x00',
- 'F', 'r', 'i', '\x00', 'S', 'a', 't', '\x00',
- 'S', 'u', 'n', 'd', 'a', 'y', '\x00', 'M',
- 'o', 'n', 'd', 'a', 'y', '\x00', 'T', 'u',
- 'e', 's', 'd', 'a', 'y', '\x00', 'W', 'e',
- 'd', 'n', 'e', 's', 'd', 'a', 'y', '\x00',
- 'T', 'h', 'u', 'r', 's', 'd', 'a', 'y',
- '\x00', 'F', 'r', 'i', 'd', 'a', 'y', '\x00',
- 'S', 'a', 't', 'u', 'r', 'd', 'a', 'y',
- '\x00', 'J', 'a', 'n', '\x00', 'F', 'e', 'b',
- '\x00', 'M', 'a', 'r', '\x00', 'A', 'p', 'r',
- '\x00', 'M', 'a', 'y', '\x00', 'J', 'u', 'n',
- '\x00', 'J', 'u', 'l', '\x00', 'A', 'u', 'g',
- '\x00', 'S', 'e', 'p', '\x00', 'O', 'c', 't',
- '\x00', 'N', 'o', 'v', '\x00', 'D', 'e', 'c',
- '\x00', 'J', 'a', 'n', 'u', 'a', 'r', 'y',
- '\x00', 'F', 'e', 'b', 'r', 'u', 'a', 'r',
- 'y', '\x00', 'M', 'a', 'r', 'c', 'h', '\x00',
- 'A', 'p', 'r', 'i', 'l', '\x00', 'M', 'a',
- 'y', '\x00', 'J', 'u', 'n', 'e', '\x00', 'J',
- 'u', 'l', 'y', '\x00', 'A', 'u', 'g', 'u',
- 's', 't', '\x00', 'S', 'e', 'p', 't', 'e',
- 'm', 'b', 'e', 'r', '\x00', 'O', 'c', 't',
- 'o', 'b', 'e', 'r', '\x00', 'N', 'o', 'v',
- 'e', 'm', 'b', 'e', 'r', '\x00', 'D', 'e',
- 'c', 'e', 'm', 'b', 'e', 'r', '\x00', 'A',
- 'M', '\x00', 'P', 'M', '\x00', '%', 'a', ' ',
- '%', 'b', ' ', '%', 'e', ' ', '%', 'H',
- ':', '%', 'M', ':', '%', 'S', ' ', '%',
- 'Y', '\x00', '%', 'm', '/', '%', 'd', '/',
- '%', 'y', '\x00', '%', 'H', ':', '%', 'M',
- ':', '%', 'S', '\x00', '%', 'I', ':', '%',
- 'M', ':', '%', 'S', ' ', '%', 'p', '\x00',
- '^', '[', 'y', 'Y', ']', '\x00', '^', '[',
- 'n', 'N', ']', '\x00',
+#define C_locale_data (nl_data + C_LC_ALL + 1 + 90)
+
+static const unsigned char nl_data[C_LC_ALL + 1 + 90 + 320] = {
+/* static const char cat_start[LC_ALL + 1] = { */
+ '\x00', '\x0b', '\x0e', '\x24', '\x56', '\x56', '\x5a',
+/* }; */
+/* static const char item_offset[90] = { */
+ '\x00', '\x02', '\x04', '\x06', '\x08', '\x0a', '\x0c', '\x0e',
+ '\x10', '\x12', '\x14', '\x1a', '\x1b', '\x1b', '\x1b', '\x1b',
+ '\x1b', '\x1b', '\x1b', '\x1b', '\x1b', '\x1c', '\x1c', '\x1c',
+ '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c',
+ '\x1c', '\x1c', '\x1c', '\x1e', '\x20', '\x24', '\x28', '\x2c',
+ '\x30', '\x34', '\x38', '\x3c', '\x43', '\x4a', '\x52', '\x5c',
+ '\x65', '\x6c', '\x75', '\x79', '\x7d', '\x81', '\x85', '\x89',
+ '\x8d', '\x91', '\x95', '\x99', '\x9d', '\xa1', '\xa5', '\xad',
+ '\x36', '\x3c', '\x42', '\x46', '\x4b', '\x50', '\x57', '\x61',
+ '\x69', '\x72', '\x7b', '\x7e', '\x81', '\x96', '\x9f', '\xa8',
+ '\xb3', '\xb3', '\xb3', '\xb3', '\xb3', '\xb3', '\xb4', '\xba',
+ '\xbf', '\xbf',
+/* }; */
+/* static const char C_locale_data[320] = { */
+ '0', '\x00', '1', '\x00', '2', '\x00', '3', '\x00',
+ '4', '\x00', '5', '\x00', '6', '\x00', '7', '\x00',
+ '8', '\x00', '9', '\x00', 'A', 'S', 'C', 'I',
+ 'I', '\x00', '.', '\x00', '\x7f', '\x00', '-', '\x00',
+ 'S', 'u', 'n', '\x00', 'M', 'o', 'n', '\x00',
+ 'T', 'u', 'e', '\x00', 'W', 'e', 'd', '\x00',
+ 'T', 'h', 'u', '\x00', 'F', 'r', 'i', '\x00',
+ 'S', 'a', 't', '\x00', 'S', 'u', 'n', 'd',
+ 'a', 'y', '\x00', 'M', 'o', 'n', 'd', 'a',
+ 'y', '\x00', 'T', 'u', 'e', 's', 'd', 'a',
+ 'y', '\x00', 'W', 'e', 'd', 'n', 'e', 's',
+ 'd', 'a', 'y', '\x00', 'T', 'h', 'u', 'r',
+ 's', 'd', 'a', 'y', '\x00', 'F', 'r', 'i',
+ 'd', 'a', 'y', '\x00', 'S', 'a', 't', 'u',
+ 'r', 'd', 'a', 'y', '\x00', 'J', 'a', 'n',
+ '\x00', 'F', 'e', 'b', '\x00', 'M', 'a', 'r',
+ '\x00', 'A', 'p', 'r', '\x00', 'M', 'a', 'y',
+ '\x00', 'J', 'u', 'n', '\x00', 'J', 'u', 'l',
+ '\x00', 'A', 'u', 'g', '\x00', 'S', 'e', 'p',
+ '\x00', 'O', 'c', 't', '\x00', 'N', 'o', 'v',
+ '\x00', 'D', 'e', 'c', '\x00', 'J', 'a', 'n',
+ 'u', 'a', 'r', 'y', '\x00', 'F', 'e', 'b',
+ 'r', 'u', 'a', 'r', 'y', '\x00', 'M', 'a',
+ 'r', 'c', 'h', '\x00', 'A', 'p', 'r', 'i',
+ 'l', '\x00', 'M', 'a', 'y', '\x00', 'J', 'u',
+ 'n', 'e', '\x00', 'J', 'u', 'l', 'y', '\x00',
+ 'A', 'u', 'g', 'u', 's', 't', '\x00', 'S',
+ 'e', 'p', 't', 'e', 'm', 'b', 'e', 'r',
+ '\x00', 'O', 'c', 't', 'o', 'b', 'e', 'r',
+ '\x00', 'N', 'o', 'v', 'e', 'm', 'b', 'e',
+ 'r', '\x00', 'D', 'e', 'c', 'e', 'm', 'b',
+ 'e', 'r', '\x00', 'A', 'M', '\x00', 'P', 'M',
+ '\x00', '%', 'a', ' ', '%', 'b', ' ', '%',
+ 'e', ' ', '%', 'H', ':', '%', 'M', ':',
+ '%', 'S', ' ', '%', 'Y', '\x00', '%', 'm',
+ '/', '%', 'd', '/', '%', 'y', '\x00', '%',
+ 'H', ':', '%', 'M', ':', '%', 'S', '\x00',
+ '%', 'I', ':', '%', 'M', ':', '%', 'S',
+ ' ', '%', 'p', '\x00', '^', '[', 'y', 'Y',
+ ']', '\x00', '^', '[', 'n', 'N', ']', '\x00',
};
char *nl_langinfo(nl_item item)
@@ -641,7 +845,7 @@ char *nl_langinfo(nl_item item)
if ((c = _NL_ITEM_CATEGORY(item)) < C_LC_ALL) {
if ((i = cat_start[c] + _NL_ITEM_INDEX(item)) < cat_start[c+1]) {
/* return (char *) C_locale_data + item_offset[i] + (i & 64); */
- return (char *) C_locale_data + nl_data[C_LC_ALL+1+i] + (i & 64);
+ return (char *) C_locale_data + nl_data[C_LC_ALL+1+i] + 2*(i & 64);
}
}
return (char *) cat_start; /* Conveniently, this is the empty string. */
diff --git a/libc/stdio/printf.c b/libc/stdio/printf.c
index 273bf3621..945d3c38d 100644
--- a/libc/stdio/printf.c
+++ b/libc/stdio/printf.c
@@ -2331,11 +2331,16 @@ static int _do_one_spec(FILE * __restrict stream,
}
if (ppfs->conv_num <= CONV_i) { /* pointer or (un)signed int */
alphacase = __UIM_LOWER;
- if (((base = spec_base[(int)(ppfs->conv_num - CONV_p)]) == 10)
- && (PRINT_INFO_FLAG_VAL(&(ppfs->info),group))
- ) {
- alphacase = __UIM_GROUP;
+#ifndef __LOCALE_C_ONLY
+ if ((base = spec_base[(int)(ppfs->conv_num - CONV_p)]) == 10) {
+ if (PRINT_INFO_FLAG_VAL(&(ppfs->info),group)) {
+ alphacase = __UIM_GROUP;
+ }
+ if (PRINT_INFO_FLAG_VAL(&(ppfs->info),i18n)) {
+ alphacase |= 0x80;
+ }
}
+#endif /* __LOCALE_C_ONLY */
if (ppfs->conv_num <= CONV_u) { /* pointer or unsigned int */
if (ppfs->conv_num == CONV_X) {
alphacase = __UIM_UPPER;
@@ -2350,6 +2355,9 @@ static int _do_one_spec(FILE * __restrict stream,
if (ppfs->info.prec < 0) { /* Ignore '0' flag if prec specified. */
padchar = ppfs->info.pad;
}
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning if using outdigits and/or grouping, how should we interpret precision?
+#endif
s = _uintmaxtostr(buf + sizeof(buf) - 1,
(uintmax_t)
_load_inttype(*argtype & __PA_INTMASK,
@@ -2557,6 +2565,9 @@ static int _do_one_spec(FILE * __restrict stream,
return -1;
}
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning if using outdigits and/or grouping, how should we pad?
+#endif
{
size_t t;
diff --git a/libc/stdio/stdio.c b/libc/stdio/stdio.c
index cf72a5ccc..e39cf5205 100644
--- a/libc/stdio/stdio.c
+++ b/libc/stdio/stdio.c
@@ -319,11 +319,11 @@ int putw(int w, FILE *stream)
UNLOCKED(int,fileno,(register FILE *stream),(stream))
{
#ifdef __STDIO_GLIBC_CUSTOM_STREAMS
- return ( ((stream->cookie == &(stream->filedes)) && (stream->filedes >= 0))
+ return ( (stream && (stream->cookie == &(stream->filedes)) && (stream->filedes >= 0))
? stream->filedes
: (__set_errno(EBADF), -1) );
#else /* __STDIO_GLIBC_CUSTOM_STREAMS */
- return (stream->filedes >= 0) ? stream->filedes : (__set_errno(EBADF), -1);
+ return ((stream && stream->filedes >= 0)) ? stream->filedes : (__set_errno(EBADF), -1);
#endif /* __STDIO_GLIBC_CUSTOM_STREAMS */
}
@@ -3331,7 +3331,7 @@ char *_uintmaxtostr(register char * __restrict bufend, uintmax_t uval,
unsigned int H, L, high, low, rh;
#endif
#ifndef __LOCALE_C_ONLY
- int grouping;
+ int grouping, outdigit;
size_t gslen; /* This does not need to be initialized. */
const char *g; /* This does not need to be initialized. */
#endif /* __LOCALE_C_ONLY */
@@ -3350,6 +3350,11 @@ char *_uintmaxtostr(register char * __restrict bufend, uintmax_t uval,
#ifndef __LOCALE_C_ONLY
grouping = -1;
+ outdigit = 0x80 & alphacase;
+ alphacase ^= outdigit;
+#ifdef __UCLIBC_MJN3_ONLY_
+#warning implement outdigit... need digit lengths! (put it in locale struct)
+#endif
if (alphacase == __UIM_GROUP) {
assert(base == 10);
if (*(g = CUR_LOCALE.grouping)
@@ -3391,7 +3396,18 @@ char *_uintmaxtostr(register char * __restrict bufend, uintmax_t uval,
digit = uval % base;
uval /= base;
- *--bufend = ( (digit < 10) ? digit + '0' : digit + alphacase );
+#ifndef __LOCALE_C_ONLY
+ if (outdigit) {
+ outdigit = CUR_LOCALE.outdigit_length[digit];
+ do {
+ *--bufend = (&CUR_LOCALE.outdigit0_mb)[digit][--outdigit];
+ } while (outdigit);
+ outdigit = 1;
+ } else
+#endif
+ {
+ *--bufend = ( (digit < 10) ? digit + '0' : digit + alphacase );
+ }
} while (uval);
#else /* ************************************************** */
@@ -3437,7 +3453,18 @@ char *_uintmaxtostr(register char * __restrict bufend, uintmax_t uval,
low = (low / base) + (H * rh) + (digit / base);
digit %= base;
- *--bufend = ( (digit < 10) ? digit + '0' : digit + alphacase );
+#ifndef __LOCALE_C_ONLY
+ if (outdigit) {
+ outdigit = CUR_LOCALE.outdigit_length[digit];
+ do {
+ *--bufend = (&CUR_LOCALE.outdigit0_mb)[digit][--outdigit];
+ } while (outdigit);
+ outdigit = 1;
+ } else
+#endif
+ {
+ *--bufend = ( (digit < 10) ? digit + '0' : digit + alphacase );
+ }
} while (low | high);
#endif /******************************************************/
diff --git a/libc/string/Makefile b/libc/string/Makefile
index 088058ebe..fe9b64382 100644
--- a/libc/string/Makefile
+++ b/libc/string/Makefile
@@ -33,13 +33,14 @@ MOBJW= basename.o bcopy.o bzero.o dirname.o ffs.o memccpy.o memchr.o memcmp.o \
strspn.o strstr.o strtok.o strtok_r.o strerror.o _susv3_strerror_r.o \
_string_syserrmsgs.o _glibc_strerror_r.o \
_string_syssigmsgs.o sys_siglist.o strsignal.o psignal.o \
- __xpg_basename.o strlcat.o strlcpy.o sys_errlist.o # strcoll.o
+ __xpg_basename.o strlcat.o strlcpy.o sys_errlist.o
MOBJW2= wcscasecmp.o wcscat.o wcschrnul.o wcschr.o wcscmp.o wcscpy.o wcscspn.o \
wcsdup.o wcslen.o wcsncasecmp.o wcsncat.o wcsncmp.o wcsncpy.o \
wcsnlen.o wcspbrk.o wcsrchr.o wcsspn.o wcsstr.o wcstok.o wmemchr.o \
wmemcmp.o wmemcpy.o wmemmove.o wmempcpy.o wmemset.o wcpcpy.o wcpncpy.o \
- wcsxfrm.o # wcscoll
+ __wcslcpy.o \
+ wcsxfrm.o strxfrm.o # wcscoll strcoll.o
OBJS=$(MOBJ) $(MOBJ1) $(MOBJ2) $(MOBJW)
diff --git a/libc/string/wstring.c b/libc/string/wstring.c
index 531b1c9fd..c1ead6e00 100644
--- a/libc/string/wstring.c
+++ b/libc/string/wstring.c
@@ -26,6 +26,13 @@
*
* ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */
+/* Dec 20, 2002
+ *
+ * Initial test implementation of strcoll, strxfrm, wcscoll, and wcsxfrm.
+ * The code needs to be cleaned up a good bit, but I'd like to see people
+ * test it out.
+ */
+
#define _STDIO_UTILITY
#define _GNU_SOURCE
#include <string.h>
@@ -36,11 +43,12 @@
#include <stdlib.h>
#include <errno.h>
#include <signal.h>
+#include <assert.h>
+#include <locale.h>
#ifdef WANT_WIDE
#include <wchar.h>
#include <wctype.h>
-#include <locale.h>
#define Wvoid wchar_t
#define Wchar wchar_t
@@ -627,17 +635,14 @@ int Wmemcmp(const Wvoid *s1, const Wvoid *s2, size_t n)
#ifdef L_strcmp
+#ifdef __LOCALE_C_ONLY
+#warning c only
#ifdef L_wcscmp
-#ifdef __UCLIBC_MJN3_ONLY__
-#warning implement wcscoll and remove weak alias (or enable for C locale only)
-#endif
weak_alias(wcscmp,wcscoll);
#else /* L_wcscmp */
-#ifdef __UCLIBC_MJN3_ONLY__
-#warning implement strcoll and remove weak alias (or enable for C locale only)
-#endif
weak_alias(strcmp,strcoll);
#endif /* L_wcscmp */
+#endif /* __LOCALE_C_ONLY */
int Wstrcmp(register const Wchar *s1, register const Wchar *s2)
{
@@ -661,23 +666,6 @@ int Wstrcmp(register const Wchar *s1, register const Wchar *s2)
}
#endif
/**********************************************************************/
-#ifdef L_strcoll
-#error implement strcoll and remove weak_alias!!
-
-#if 0
-extern unsigned char *_ctype_collate;
-int strcoll(register const char *s1, const char *s2)
-{
- int r;
-
- while (!(r = (_ctype_collate[(int)(*s1++)]-_ctype_collate[(int)(*s2++)])));
-
- return r;
-}
-#endif
-
-#endif
-/**********************************************************************/
#ifdef L_wcsncmp
#define L_strncmp
#define Wstrncmp wcsncmp
@@ -713,11 +701,6 @@ int Wstrncmp(register const Wchar *s1, register const Wchar *s2, size_t n)
#endif
/**********************************************************************/
-#ifdef L_strxfrm
-#error implement strxfrm
-/* size_t strxfrm(char *dst, const char *src, size_t len); */
-#endif
-/**********************************************************************/
#ifdef L_wmemchr
#define L_memchr
#define Wmemchr wmemchr
@@ -1923,28 +1906,37 @@ size_t strlcat(register char *__restrict dst,
#endif
/**********************************************************************/
-#ifdef L_wcsxfrm
+#ifdef WANT_WIDE
+extern size_t __wcslcpy(wchar_t *__restrict dst,
+ const wchar_t *__restrict src,
+ size_t n);
+#endif
+
+
+#ifdef L___wcslcpy
#define L_strlcpy
-#define Wstrlcpy wcsxfrm
+#define Wstrlcpy __wcslcpy
+#ifdef __LOCALE_C_ONLY
+weak_alias(__wcslcpy,wcsxfrm);
+#endif
#endif
#ifdef L_strlcpy
-#ifndef L_wcsxfrm
+#ifndef L___wcslcpy
#define Wstrlcpy strlcpy
-#ifdef __UCLIBC_MJN3_ONLY__
-#warning implement wcscoll and remove weak alias (or enable for C locale only)
-#endif
+#ifdef __LOCALE_C_ONLY
weak_alias(strlcpy,strxfrm);
#endif
+#endif
/* OpenBSD function:
* Copy at most n-1 chars from src to dst and nul-terminate dst.
* Returns strlen(src), so truncation occurred if the return value is >= n. */
size_t Wstrlcpy(register Wchar *__restrict dst,
- register const Wchar *__restrict src,
- size_t n)
+ register const Wchar *__restrict src,
+ size_t n)
{
const Wchar *src0 = src;
Wchar dummy[1];
@@ -2145,3 +2137,621 @@ void psignal(int signum, register const char *message)
#endif
/**********************************************************************/
+#ifndef __LOCALE_C_ONLY
+
+#ifdef L_strxfrm
+#ifndef WANT_WIDE
+#error WANT_WIDE should be defined for L_strxfrm
+#endif
+#ifdef L_wcsxfrm
+#error L_wcsxfrm already defined for L_strxfrm
+#endif
+
+#define wcscoll strcoll
+#define L_wcsxfrm
+#undef WANT_WIDE
+
+#undef Wvoid
+#undef Wchar
+#undef Wuchar
+#undef Wint
+
+#define Wchar char
+
+#endif /* L_strxfrm */
+
+
+
+#ifdef L_wcsxfrm
+
+#define CUR_COLLATE (&__global_locale.collate)
+
+#define MAX_PENDING 8
+
+typedef struct {
+ const Wchar *s;
+ const Wchar *eob; /* end of backward */
+
+ __uwchar_t weight;
+ __uwchar_t ui_weight; /* undefined or invalid */
+ int colitem;
+ int weightidx;
+ int rule;
+ size_t position;
+ /* should be wchar_t. if wchar < 0 do EILSEQ? */
+ __uwchar_t *cip;
+ __uwchar_t ci_pending[MAX_PENDING]; /* nul-terminated */
+
+ char *back_buf;
+ char *bbe; /* end of back_buf (actual last... not 1 past end) */
+ char *bp; /* ptr into backbuf, NULL if not in backward mode */
+ char ibb[128];
+ size_t bb_size;
+
+ int ru_pushed;
+} col_state_t;
+
+
+#define WEIGHT_MASK 0x3fffU
+#define RULE_MASK 0xc000U
+
+#define RULE_FORWARD (1 << 14)
+#define RULE_POSITION (1 << 15)
+
+#define UI_IDX (WEIGHT_MASK-6)
+#define POSIT_IDX (WEIGHT_MASK-5)
+#define RANGE_IDX (WEIGHT_MASK-4)
+#define UNDEF_IDX (WEIGHT_MASK-3)
+#define INVAL_IDX (WEIGHT_MASK-2)
+#define DITTO_IDX (WEIGHT_MASK-1)
+
+
+#undef TRACE
+#if 0
+#define TRACE(X) printf##X
+#else
+#define TRACE(X) ((void)0)
+#endif
+
+static int lookup(wchar_t wc)
+{
+ unsigned int sc, n, i0, i1;
+
+ if (((__uwchar_t) wc) > 0xffffU) {
+ return 0;
+ }
+
+ sc = wc & CUR_COLLATE->ti_mask;
+ wc >>= CUR_COLLATE->ti_shift;
+ n = wc & CUR_COLLATE->ii_mask;
+ wc >>= CUR_COLLATE->ii_shift;
+
+ i0 = CUR_COLLATE->wcs2colidt_tbl[wc];
+ i0 <<= CUR_COLLATE->ii_shift;
+ i1 = CUR_COLLATE->wcs2colidt_tbl[CUR_COLLATE->ii_len + i0 + n];
+ i1 <<= CUR_COLLATE->ti_shift;
+ return CUR_COLLATE->wcs2colidt_tbl[CUR_COLLATE->ii_len + CUR_COLLATE->ti_len + i1 + sc];
+
+}
+
+static void init_col_state(col_state_t *cs, const Wchar *wcs)
+{
+ memset(cs, 0, sizeof(col_state_t));
+ cs->s = wcs;
+ cs->bp = cs->back_buf = cs->ibb;
+ cs->bb_size = 128;
+ cs->bbe = cs->back_buf + (cs->bb_size -1);
+}
+
+static void next_weight(col_state_t *cs, int pass)
+{
+ int r, w, ru, ri, popping_backup_stack;
+ ssize_t n;
+ const uint16_t *p;
+#ifdef WANT_WIDE
+#define WC (*cs->s)
+#define N (1)
+#else /* WANT_WIDE */
+ mbstate_t mbstate;
+ wchar_t WC;
+ size_t n0, nx;
+#define N n0
+
+ mbstate.mask = 0;
+#endif /* WANT_WIDE */
+
+ do {
+
+ if (cs->ru_pushed) {
+ ru = cs->ru_pushed;
+ TRACE(("ru_pushed = %d\n", ru));
+ cs->ru_pushed = 0;
+ goto POSITION_SKIP;
+ }
+
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning should we walk pendings backwards?
+#endif
+ if (cs->cip) { /* possible pending weight */
+ if ((r = *(cs->cip++)) == 0) {
+ cs->cip = NULL;
+ continue;
+ }
+ cs->weightidx = r & WEIGHT_MASK;
+ assert(cs->weightidx);
+/* assert(cs->weightidx != WEIGHT_MASK); */
+ } else { /* get the next collation item from the string */
+ TRACE(("clearing popping flag\n"));
+ popping_backup_stack = 0;
+
+ IGNORE_LOOP:
+ /* keep first pos as 0 for a sentinal */
+ if (*cs->bp) { /* pending backward chars */
+ POP_BACKUP:
+ popping_backup_stack = 1;
+ TRACE(("setting popping flag\n"));
+ n = 0;
+ if (*cs->bp > 0) { /* singles pending */
+ cs->s -= 1;
+ if ((*cs->bp -= 1) == 0) {
+ cs->bp -= 1;
+ }
+ } else { /* last was a multi */
+ cs->s += *cs->bp;
+ cs->bp -= 1;
+ }
+ } else if (!*cs->s) { /* not in backward mode and end of string */
+ cs->weight = 0;
+ return;
+ } else {
+ cs->position += 1;
+ }
+
+ BACK_LOOP:
+#ifdef WANT_WIDE
+ n = 1;
+ cs->colitem = r = lookup(*cs->s);
+#else /* WANT_WIDE */
+ n = n0 = mbrtowc(&WC, cs->s, SIZE_MAX, &mbstate);
+ if (n < 0) {
+ __set_errno(EILSEQ);
+ cs->weight = 0;
+ return;
+ }
+ cs->colitem = r = lookup(WC);
+#endif /* WANT_WIDE */
+
+ TRACE((" r=%d WC=%#lx\n", r, (unsigned long)(WC)));
+
+ if (r > CUR_COLLATE->max_col_index) { /* starting char for one or more sequences */
+ p = CUR_COLLATE->multistart_tbl;
+ p += p[r-CUR_COLLATE->max_col_index -1];
+ do {
+ n = N;
+ r = *p++;
+ do {
+ if (!*p) { /* found it */
+ cs->colitem = r;
+ TRACE((" found multi %d\n", n));
+ goto FOUND;
+ }
+#ifdef WANT_WIDE
+ /* the lookup check here is safe since we're assured that *p is a valid colidx */
+ if (!cs->s[n] || (lookup(cs->s[n]) != *p)) {
+ do {} while (*p++);
+ break;
+ }
+ ++p;
+ ++n;
+#else /* WANT_WIDE */
+ if (cs->s[n]) {
+ nx = mbrtowc(&WC, cs->s + n, SIZE_MAX, &mbstate);
+ if (nx < 0) {
+ __set_errno(EILSEQ);
+ cs->weight = 0;
+ return;
+ }
+ }
+ if (!cs->s[n] || (lookup(WC) != *p)) {
+ do {} while (*p++);
+ break;
+ }
+ ++p;
+ n += nx;
+#endif /* WANT_WIDE */
+ } while (1);
+ } while (1);
+ } else if (r == 0) { /* illegal, undefined, or part of a range */
+ if ((CUR_COLLATE->range_count)
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning .. need to introduce range as a collating item?
+#endif
+ && (((__uwchar_t)(WC - CUR_COLLATE->range_low)) <= CUR_COLLATE->range_count)
+ ) { /* part of a range */
+ /* Note: cs->colitem = 0 already. */
+ TRACE((" found range\n"));
+ ru = CUR_COLLATE->ruletable[CUR_COLLATE->range_rule_offset*CUR_COLLATE->MAX_WEIGHTS + pass];
+ assert((ru & WEIGHT_MASK) != DITTO_IDX);
+ if ((ru & WEIGHT_MASK) == WEIGHT_MASK) {
+ ru = (ru & RULE_MASK) | RANGE_IDX;
+ cs->weight = CUR_COLLATE->range_base_weight + (WC - CUR_COLLATE->range_low);
+ }
+ goto RANGE_SKIP_TO;
+ } else if (((__uwchar_t)(WC)) <= 0x7fffffffUL) { /* legal but undefined */
+ UNDEFINED:
+ /* Note: cs->colitem = 0 already. */
+ ri = CUR_COLLATE->undefined_idx;
+ assert(ri != 0); /* implicit undefined isn't supported */
+
+ TRACE((" found explicit UNDEFINED\n"));
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning right now single weight locales do not support ..
+#endif
+ if (CUR_COLLATE->num_weights == 1) {
+ TRACE((" single weight UNDEFINED\n"));
+ cs->weightidx = RANGE_IDX;
+ cs->weight = ri;
+ cs->s += n;
+ goto PROCESS_WEIGHT;
+ }
+
+ ri = CUR_COLLATE->index2ruleidx[ri - 1];
+ ru = CUR_COLLATE->ruletable[ri * CUR_COLLATE->MAX_WEIGHTS + pass];
+ assert((ru & WEIGHT_MASK) != WEIGHT_MASK); /* TODO: handle ".." */
+ if ((ru & WEIGHT_MASK) == DITTO_IDX) {
+ cs->colitem = CUR_COLLATE->undefined_idx;
+ }
+ goto RANGE_SKIP_TO;
+ } else { /* illegal */
+ TRACE((" found illegal\n"));
+ __set_errno(EINVAL);
+ /* We put all illegals in the same equiv class with maximal weight,
+ * and ignore them after the first pass. */
+ if (pass > 0) {
+ cs->s += n;
+ goto IGNORE_LOOP;
+ }
+ ru = (RULE_FORWARD | RANGE_IDX);
+ cs->weight = 0xffffU;
+ goto RANGE_SKIP_TO;
+ }
+ } else if (CUR_COLLATE->num_weights == 1) {
+ TRACE((" single weight\n"));
+ cs->weightidx = RANGE_IDX;
+ cs->weight = cs->colitem;
+ cs->s += n;
+ goto PROCESS_WEIGHT;
+ } else {
+ TRACE((" normal\n"));
+ }
+
+ /* if we get here, it is a normal char either singlely weighted, undefined, or in a range */
+ FOUND:
+ ri = CUR_COLLATE->index2ruleidx[cs->colitem - 1];
+ TRACE((" ri=%d ", ri));
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning make sure this is correct
+#endif
+ if (!ri) {
+ TRACE(("NOT IN THIS LOCALE\n"));
+ goto UNDEFINED;
+ }
+ ru = CUR_COLLATE->ruletable[ri * CUR_COLLATE->MAX_WEIGHTS + pass];
+
+ RANGE_SKIP_TO:
+
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning ignoreables probably should not interrupt backwards processing, but this is wrong
+#endif
+/* if (!(ru & WEIGHT_MASK)) { */
+/* TRACE(("IGNORE\n")); */
+/* cs->s += n; */
+/* continue; */
+/* } */
+
+
+ TRACE((" rule = %#x weight = %#x popping = %d s = %p eob = %p\n",
+ ru & RULE_MASK, ru & WEIGHT_MASK, popping_backup_stack,
+ cs->s, cs->eob));
+ /* now we need to check if we're going backwards... */
+
+ if (!popping_backup_stack) {
+ if (!(ru & RULE_MASK)) { /* backward */
+ TRACE(("backwards\n"));
+ assert(cs->bp <= cs->bbe);
+ if (cs->bp == cs->bbe) {
+ if (cs->back_buf == cs->ibb) { /* was using internal buffer */
+ cs->bp = malloc(cs->bb_size + 128);
+ if (!cs->bp) {
+ __set_errno(ENOMEM);
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning what to do here?
+#endif
+ cs->weight = 0;
+ return;
+ }
+ memcpy(cs->bp, cs->back_buf, cs->bb_size);
+
+ } else {
+ cs->bp = realloc(cs->back_buf, cs->bb_size + 128);
+ if (!cs->bp) {
+ __set_errno(ENOMEM);
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning what to do here?
+#endif
+ cs->weight = 0;
+ return;
+ }
+ }
+ cs->bb_size += 128;
+ cs->bbe = cs->bp + (cs->bbe - cs->back_buf);
+ cs->back_buf = cs->bp;
+ cs->bp = cs->bbe;
+
+ }
+ if (n==1) { /* single char */
+ if (*cs->bp && (((unsigned char)(*cs->bp)) < CHAR_MAX)) {
+ *cs->bp += 1; /* increment last single's count */
+ } else { /* last was a multi, or just starting */
+ if (!cs->bp) {
+ cs->bp = cs->back_buf;
+ } else {
+ assert(cs->bp < cs->bbe);
+ ++cs->bp;
+ }
+ *cs->bp = 1;
+ }
+ } else { /* multichar */
+ assert(n>1);
+ assert(cs->bp < cs->bbe);
+ *++cs->bp = -n;
+ }
+ cs->s += n;
+ if (*cs->s) {
+ goto BACK_LOOP;
+ }
+ /* end-of-string so start popping */
+ cs->eob = cs->s;
+ TRACE(("popping\n"));
+ goto POP_BACKUP;
+ } else if (*cs->bp) { /* was going backward but this element isn't */
+ /* discard current and use previous backward element */
+ assert(!cs->cip);
+ cs->eob = cs->s;
+ TRACE(("popping\n"));
+ goto POP_BACKUP;
+ } else { /* was and still going forward */
+ TRACE(("forwards\n"));
+ if ((ru & (RULE_POSITION|WEIGHT_MASK)) > RULE_POSITION) {
+ assert(ru & WEIGHT_MASK);
+ cs->ru_pushed = ru;
+ cs->weight = cs->position;
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning devel code
+#endif
+ cs->position = 0; /* reset to reduce size for strcoll? */
+ cs->s += n;
+ cs->weightidx = RANGE_IDX;
+ goto PROCESS_WEIGHT;
+ }
+ }
+ } else { /* popping backwards stack */
+ TRACE(("popping (continued)\n"));
+ if (!*cs->bp) {
+ cs->s = cs->eob;
+ }
+ cs->s -= n;
+ }
+
+ cs->s += n;
+ POSITION_SKIP:
+ cs->weightidx = ru & WEIGHT_MASK;
+ cs->rule = ru & RULE_MASK;
+ }
+
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning for pending we only want the weight... _not_ the rule
+#endif
+ if (!cs->weightidx) { /* ignore */
+ continue;
+ }
+
+ PROCESS_WEIGHT:
+ assert(cs->weightidx);
+
+
+ if (((unsigned int)(cs->weightidx - UI_IDX)) <= (INVAL_IDX-UI_IDX)) {
+ if (cs->weightidx == UI_IDX) {
+ cs->weight = cs->ui_weight;
+ }
+ return;
+ }
+
+ assert(cs->weightidx != WEIGHT_MASK);
+ if (cs->weightidx == DITTO_IDX) { /* want the weight of the current collating item */
+ TRACE(("doing ditto\n"));
+ w = CUR_COLLATE->index2weight[cs->colitem -1];
+ } else if (cs->weightidx <= CUR_COLLATE->max_col_index) { /* normal */
+ TRACE(("doing normal\n"));
+ w = CUR_COLLATE->index2weight[cs->weightidx -1];
+ } else { /* a string */
+ TRACE(("doing string\n"));
+ assert(!(cs->weightidx & RULE_MASK));
+ /* note: iso14561 allows null string here */
+ p = CUR_COLLATE->weightstr + (cs->weightidx - (CUR_COLLATE->max_col_index + 2));
+ if (*p & WEIGHT_MASK) {
+ r = 0;
+ do {
+ assert(r < MAX_PENDING);
+ cs->ci_pending[r++] = *p++;
+ } while (*p & WEIGHT_MASK);
+ cs->cip = cs->ci_pending;
+ }
+ continue;
+ }
+
+ cs->weight = w;
+ return;
+ } while (1);
+}
+
+int wcscoll (const Wchar *s0, const Wchar *s1)
+{
+ col_state_t ws[2];
+ int pass;
+
+ if (!CUR_COLLATE->num_weights) { /* C locale */
+#ifdef WANT_WIDE
+ return wcscmp(s0, s1);
+#else /* WANT_WIDE */
+ return strcmp(s0, s1);
+#endif /* WANT_WIDE */
+ }
+
+ pass = 0;
+ do { /* loop through the weights levels */
+ init_col_state(ws, s0);
+ init_col_state(ws+1, s1);
+ do { /* loop through the strings */
+ /* for each string, get the next weight */
+ next_weight(ws, pass);
+ next_weight(ws+1, pass);
+ TRACE(("w0=%lu w1=%lu\n",
+ (unsigned long) ws[0].weight,
+ (unsigned long) ws[1].weight));
+
+ if (ws[0].weight != ws[1].weight) {
+ return ws[0].weight - ws[1].weight;
+ }
+ } while (ws[0].weight);
+ } while (++pass < CUR_COLLATE->num_weights);
+
+ return 0;
+}
+
+#ifdef WANT_WIDE
+
+size_t wcsxfrm(wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t n)
+{
+ col_state_t cs;
+ size_t count;
+ int pass;
+
+ if (!CUR_COLLATE->num_weights) { /* C locale */
+ return __wcslcpy(ws1, ws2, n);
+ }
+
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning handle empty string as a special case
+#endif
+
+ count = pass = 0;
+ do { /* loop through the weights levels */
+ init_col_state(&cs, ws2);
+ do { /* loop through the string */
+ next_weight(&cs, pass);
+ TRACE(("weight=%lu (%#lx)\n", (unsigned long) cs.weight, (unsigned long) cs.weight));
+ if (count < n) {
+ ws1[count] = cs.weight +1;
+ }
+ ++count;
+ TRACE(("--------------------------------------------\n"));
+ } while (cs.weight);
+ if (count <= n) { /* overwrite the trailing 0 end-of-pass marker */
+ ws1[count-1] = 1;
+ }
+ TRACE(("-------------------- pass %d --------------------\n", pass));
+ } while (++pass < CUR_COLLATE->num_weights);
+ if (count <= n) { /* oops... change it back */
+ ws1[count-1] = 0;
+ }
+ return count-1;
+}
+
+#else /* WANT_WIDE */
+
+static const unsigned long bound[] = {
+ 1UL << 7,
+ 1UL << 11,
+ 1UL << 16,
+ 1UL << 21,
+ 1UL << 26,
+};
+
+static unsigned char first[] = {
+ 0x0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
+};
+
+/* Use an extension of UTF-8 to store a 32 bit val in max 6 bytes. */
+
+static size_t store(unsigned char *s, size_t count, size_t n, __uwchar_t weight)
+{
+ int i, r;
+
+ i = 0;
+ do {
+ if (weight < bound[i]) {
+ break;
+ }
+ } while (++i < sizeof(bound)/sizeof(bound[0]));
+
+ r = i+1;
+ if (i + count < n) {
+ s += count;
+ s[0] = first[i];
+ while (i) {
+ s[i] = 0x80 | (weight & 0x3f);
+ weight >>= 6;
+ --i;
+ }
+ s[0] |= weight;
+ }
+
+ return r;
+}
+
+size_t strxfrm(char *__restrict ws1, const char *__restrict ws2, size_t n)
+{
+ col_state_t cs;
+ size_t count, inc;
+ int pass;
+
+ if (!CUR_COLLATE->num_weights) { /* C locale */
+ return strlcpy(ws1, ws2, n);
+ }
+
+#ifdef __UCLIBC_MJN3_ONLY__
+#warning handle empty string as a special case
+#endif
+
+ inc = count = pass = 0;
+ do { /* loop through the weights levels */
+ init_col_state(&cs, ws2);
+ do { /* loop through the string */
+ next_weight(&cs, pass);
+ TRACE(("weight=%lu (%#lx)\n", (unsigned long) cs.weight, (unsigned long) cs.weight));
+ inc = store((unsigned char *)ws1, count, n, cs.weight + 1);
+ count += inc;
+ TRACE(("--------------------------------------------\n"));
+ } while (cs.weight);
+ /* overwrite the trailing 0 end-of-pass marker */
+ assert(inc == 1);
+ if (count <= n) {
+ ws1[count-1] = 1;
+ }
+ TRACE(("-------------------- pass %d --------------------\n", pass));
+ } while (++pass < CUR_COLLATE->num_weights);
+ if (count <= n) { /* oops... change it back */
+ ws1[count-1] = 0;
+ }
+ return count-1;
+}
+
+
+#endif /* WANT_WIDE */
+
+#endif /* wcscoll */
+
+#endif /* __LOCALE_C_ONLY */
+/**********************************************************************/
+
diff --git a/libc/sysdeps/linux/common/bits/uClibc_locale.h b/libc/sysdeps/linux/common/bits/uClibc_locale.h
index 8025005ab..4e89188b8 100644
--- a/libc/sysdeps/linux/common/bits/uClibc_locale.h
+++ b/libc/sysdeps/linux/common/bits/uClibc_locale.h
@@ -120,6 +120,46 @@ enum {
* In particular, C/POSIX locale is '#' + "\x80\x01"}*LC_ALL + nul.
*/
+typedef struct {
+ uint16_t num_weights;
+ uint16_t num_starters;
+ uint16_t ii_shift;
+ uint16_t ti_shift;
+ uint16_t ii_len;
+ uint16_t ti_len;
+ uint16_t max_weight;
+ uint16_t num_col_base;
+ uint16_t max_col_index;
+ uint16_t undefined_idx;
+ uint16_t range_low;
+ uint16_t range_count;
+ uint16_t range_base_weight;
+ uint16_t range_rule_offset; /* change name to index? */
+
+ uint16_t ii_mask;
+ uint16_t ti_mask;
+
+ const uint16_t *index2weight_tbl;
+ const uint16_t *index2ruleidx_tbl;
+ const uint16_t *multistart_tbl;
+ /* uint16_t wcs2colidt_offset_low; */
+ /* uint16_t wcs2colidt_offset_hi; */
+ const uint16_t *wcs2colidt_tbl;
+
+ /* uint16_t undefined_idx; */
+ const uint16_t *overrides_tbl;
+ /* uint16_t *multistart_tbl; */
+
+ const uint16_t *weightstr;
+ const uint16_t *ruletable;
+
+
+ uint16_t *index2weight;
+ uint16_t *index2ruleidx;
+
+ uint16_t MAX_WEIGHTS;
+} __collate_t;
+
/* static unsigned char cur_locale[LOCALE_STRING_SIZE]; */
@@ -138,8 +178,7 @@ typedef struct {
/* ctype */
unsigned char encoding; /* C/POSIX, 8-bit, UTF-8 */
unsigned char mb_cur_max; /* determined by encoding _AND_ translit!!! */
-
- const char *codeset;
+ const unsigned char outdigit_length[10];
#ifdef __CTYPE_HAS_8_BIT_LOCALES
const unsigned char *idx8ctype;
@@ -162,6 +201,19 @@ typedef struct {
/* width?? */
#endif /* __WCHAR_ENABLED */
+ /* ctype */
+ const char *outdigit0_mb;
+ const char *outdigit1_mb;
+ const char *outdigit2_mb;
+ const char *outdigit3_mb;
+ const char *outdigit4_mb;
+ const char *outdigit5_mb;
+ const char *outdigit6_mb;
+ const char *outdigit7_mb;
+ const char *outdigit8_mb;
+ const char *outdigit9_mb;
+ const char *codeset; /* MUST BE LAST!!! */
+
/* numeric */
const char *decimal_point;
const char *thousands_sep;
@@ -250,11 +302,16 @@ typedef struct {
const char *era_d_t_fmt;
const char *era_t_fmt;
- /* collate */
+ /* collate is at the end */
/* messages */
const char *yesexpr;
const char *noexpr;
+ const char *yesstr;
+ const char *nostr;
+
+ /* collate is at the end */
+ __collate_t collate;
} __locale_t;