From c9d66e44af5c93a1ea5487fd9bff78274be65850 Mon Sep 17 00:00:00 2001 From: Manuel Novoa III Date: Fri, 20 Dec 2002 19:26:35 +0000 Subject: The big thing is locale dependent collation support. Also added outdigit support and (legacy) YESSTR/NOSTR support. --- libc/misc/locale/locale.c | 356 ++++++++++--- libc/stdio/printf.c | 19 +- libc/stdio/stdio.c | 37 +- libc/string/Makefile | 5 +- libc/string/wstring.c | 684 +++++++++++++++++++++++-- libc/sysdeps/linux/common/bits/uClibc_locale.h | 63 ++- 6 files changed, 1037 insertions(+), 127 deletions(-) (limited to 'libc') diff --git a/libc/misc/locale/locale.c b/libc/misc/locale/locale.c index 0875a4e5b..9c162a980 100644 --- a/libc/misc/locale/locale.c +++ b/libc/misc/locale/locale.c @@ -22,6 +22,10 @@ * query locale settings should now work... at the cost of almost * doubling the size of the setlocale object code. * Fixed a bug in the internal fixed-size-string locale specifier code. + * + * Dec 20, 2002 + * + * Added in collation support and updated stub nl_langinfo. */ @@ -415,6 +419,189 @@ struct lconv *localeconv(void) __locale_t __global_locale; +typedef struct { + uint16_t num_base; + uint16_t num_der; + uint16_t MAX_WEIGHTS; + uint16_t num_index2weight; +#define num_index2ruleidx num_index2weight + uint16_t num_weightstr; + uint16_t num_multistart; + uint16_t num_override; + uint16_t num_ruletable; +} coldata_header_t; + +typedef struct { + uint16_t num_weights; + uint16_t num_starters; + uint16_t ii_shift; + uint16_t ti_shift; + uint16_t ii_len; + uint16_t ti_len; + uint16_t max_weight; + uint16_t num_col_base; + uint16_t max_col_index; + uint16_t undefined_idx; + uint16_t range_low; + uint16_t range_count; + uint16_t range_base_weight; + uint16_t range_rule_offset; + + uint16_t index2weight_offset; + uint16_t index2ruleidx_offset; + uint16_t multistart_offset; + uint16_t wcs2colidt_offset_low; + uint16_t wcs2colidt_offset_hi; +} coldata_base_t; + +typedef struct { + uint16_t base_idx; + uint16_t undefined_idx; + uint16_t overrides_offset; + uint16_t multistart_offset; +} coldata_der_t; + +static int init_cur_collate(int der_num) +{ + __collate_t *cur_collate = &__global_locale.collate; + const uint16_t *__locale_collate_tbl = __locale_mmap->collate_data; + coldata_header_t *cdh; + coldata_base_t *cdb; + coldata_der_t *cdd; + const uint16_t *p; + size_t n; + uint16_t i, w; + + assert(sizeof(coldata_base_t) == 19*2); + assert(sizeof(coldata_der_t) == 4*2); + assert(sizeof(coldata_header_t) == 8*2); + + if (!der_num) { /* C locale... special */ + cur_collate->num_weights = 0; + return 1; + } + + --der_num; + + cdh = (coldata_header_t *) __locale_collate_tbl; + + if (der_num >= cdh->num_der) { + return 0; + } + + cdd = (coldata_der_t *)(__locale_collate_tbl + + (sizeof(coldata_header_t) + + cdh->num_base * sizeof(coldata_base_t) + + der_num * sizeof(coldata_der_t) + )/2 ); + + cdb = (coldata_base_t *)(__locale_collate_tbl + + (sizeof(coldata_header_t) + + cdd->base_idx * sizeof(coldata_base_t) + )/2 ); + + memcpy(cur_collate, cdb, offsetof(coldata_base_t,index2weight_offset)); + cur_collate->undefined_idx = cdd->undefined_idx; + + cur_collate->ti_mask = (1 << cur_collate->ti_shift)-1; + cur_collate->ii_mask = (1 << cur_collate->ii_shift)-1; + +/* printf("base=%d num_col_base: %d %d\n", cdd->base_idx ,cur_collate->num_col_base, cdb->num_col_base); */ + + n = (sizeof(coldata_header_t) + cdh->num_base * sizeof(coldata_base_t) + + cdh->num_der * sizeof(coldata_der_t))/2; + +/* printf("n = %d\n", n); */ + cur_collate->index2weight_tbl = __locale_collate_tbl + n + cdb->index2weight_offset; +/* printf("i2w = %d\n", n + cdb->index2weight_offset); */ + n += cdh->num_index2weight; + cur_collate->index2ruleidx_tbl = __locale_collate_tbl + n + cdb->index2ruleidx_offset; +/* printf("i2r = %d\n", n + cdb->index2ruleidx_offset); */ + n += cdh->num_index2ruleidx; + cur_collate->multistart_tbl = __locale_collate_tbl + n + cdd->multistart_offset; +/* printf("mts = %d\n", n + cdb->multistart_offset); */ + n += cdh->num_multistart; + cur_collate->overrides_tbl = __locale_collate_tbl + n + cdd->overrides_offset; +/* printf("ovr = %d\n", n + cdd->overrides_offset); */ + n += cdh->num_override; + cur_collate->ruletable = __locale_collate_tbl + n; +/* printf("rtb = %d\n", n); */ + n += cdh->num_ruletable; + cur_collate->weightstr = __locale_collate_tbl + n; +/* printf("wts = %d\n", n); */ + n += cdh->num_weightstr; + cur_collate->wcs2colidt_tbl = __locale_collate_tbl + n + + (((unsigned long)(cdb->wcs2colidt_offset_hi)) << 16) + + cdb->wcs2colidt_offset_low; +/* printf("wcs = %lu\n", n + (((unsigned long)(cdb->wcs2colidt_offset_hi)) << 16) */ +/* + cdb->wcs2colidt_offset_low); */ + + cur_collate->MAX_WEIGHTS = cdh->MAX_WEIGHTS; + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning if calloc fails, this is WRONG. there is also a memory leak here at the moment +#warning fix the +1 by increasing max_col_index? +#endif + cur_collate->index2weight = calloc(2*cur_collate->max_col_index+2, sizeof(uint16_t)); + if (!cur_collate->index2weight) { + return 0; + } + cur_collate->index2ruleidx = cur_collate->index2weight + cur_collate->max_col_index + 1; + + memcpy(cur_collate->index2weight, cur_collate->index2weight_tbl, + cur_collate->num_col_base * sizeof(uint16_t)); + memcpy(cur_collate->index2ruleidx, cur_collate->index2ruleidx_tbl, + cur_collate->num_col_base * sizeof(uint16_t)); + + /* now do the overrides */ + p = cur_collate->overrides_tbl; + while (*p > 1) { +/* fprintf(stderr, "processing override -- count = %d\n", *p); */ + n = *p++; + w = *p++; + do { + i = *p++; +/* fprintf(stderr, " i=%d w=%d *p=%d\n", i, w, *p); */ + cur_collate->index2weight[i-1] = w++; + cur_collate->index2ruleidx[i-1] = *p++; + } while (--n); + } + while (*++p) { + i = *p; + cur_collate->index2weight[i-1] = *++p; + cur_collate->index2ruleidx[i-1] = *++p; + } + + + for (i=0 ; i < cur_collate->multistart_tbl[0] ; i++) { + p = cur_collate->multistart_tbl; +/* fprintf(stderr, "%2d of %2d: %d ", i, cur_collate->multistart_tbl[0], p[i]); */ + p += p[i]; + + do { + n = *p++; + do { + if (!*p) { /* found it */ +/* fprintf(stderr, "found: n=%d (%#lx) |%.*ls|\n", n, (int) *cs->s, n, cs->s); */ +/* fprintf(stderr, ": %d - single\n", n); */ + goto FOUND; + } + /* the lookup check here is safe since we're assured that *p is a valid colidex */ +/* fprintf(stderr, "lookup(%lc)==%d *p==%d\n", cs->s[n], lookup(cs->s[n]), (int) *p); */ +/* fprintf(stderr, ": %d - ", n); */ + do { +/* fprintf(stderr, "%d|", *p); */ + } while (*p++); + break; + } while (1); + } while (1); + FOUND: + continue; + } + + return 1; +} + void _locale_init(void) { /* TODO: mmap the locale file */ @@ -427,7 +614,8 @@ void _locale_init(void) __locale_mmap->lc_common_item_offsets_LEN, LC_ALL); - __global_locale.category_offsets[0] = offsetof(__locale_t, codeset); + ++__global_locale.category_item_count[0]; /* Increment for codeset entry. */ + __global_locale.category_offsets[0] = offsetof(__locale_t, outdigit0_mb); __global_locale.category_offsets[1] = offsetof(__locale_t, decimal_point); __global_locale.category_offsets[2] = offsetof(__locale_t, int_curr_symbol); __global_locale.category_offsets[3] = offsetof(__locale_t, abday_1); @@ -489,6 +677,22 @@ void _locale_set(const unsigned char *p) *s = *p; s[1] = p[1]; + if ((i != LC_COLLATE) + && ((len = __locale_mmap->lc_common_item_offsets_LEN[i]) != 0) + ) { + crow = __locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ] + * len; + x = (const char **)(((char *) &__global_locale) + + __global_locale.category_offsets[i]); + stp = __locale_mmap->lc_common_tbl_offsets + 4*i; + r = (const unsigned char *)( ((char *)__locale_mmap) + *stp ); + io = (const uint16_t *)( ((char *)__locale_mmap) + *++stp ); + ii = (const uint16_t *)( ((char *)__locale_mmap) + *++stp ); + d = (const unsigned char *)( ((char *)__locale_mmap) + *++stp ); + for (c=0 ; c < len ; c++) { + *(x + c) = d + ii[ r[crow + c] + io[c] ]; + } + } if (i == LC_CTYPE) { c = __locale_mmap->locales[ WIDTH_LOCALES * row + 2 ]; /* codeset */ if (c <= 2) { @@ -524,22 +728,18 @@ void _locale_set(const unsigned char *p) #endif /* __WCHAR_ENABLED */ #endif /* __CTYPE_HAS_8_BIT_LOCALES */ } - - } else if ((len = __locale_mmap->lc_common_item_offsets_LEN[i]) != 0) { - crow = __locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ] - * len; - x = (const char **)(((char *) &__global_locale) - + __global_locale.category_offsets[i]); - stp = __locale_mmap->lc_common_tbl_offsets + 4*i; - r = (const unsigned char *)( ((char *)__locale_mmap) + *stp ); - io = (const uint16_t *)( ((char *)__locale_mmap) + *++stp ); - ii = (const uint16_t *)( ((char *)__locale_mmap) + *++stp ); - d = (const unsigned char *)( ((char *)__locale_mmap) + *++stp ); - for (c=0 ; c < len ; c++) { - *(x + c) = d + ii[ r[crow + c] + io[c] ]; +#ifdef __UCLIBC_MJN3_ONLY__ +#warning might want to just put this in the locale_mmap object +#endif + d = __global_locale.outdigit_length; + x = &__global_locale.outdigit0_mb; + for (c = 0 ; c < 10 ; c++) { + ((unsigned char *)d)[c] = strlen(x[c]); + assert(d[c] > 0); } + } else if (i == LC_COLLATE) { + init_cur_collate(__locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ]); } - } ++i; p += 2; @@ -558,10 +758,10 @@ void _locale_set(const unsigned char *p) #ifdef __LOCALE_C_ONLY -/* We need to index 300 bytes of data, so you might initially think we +/* We need to index 320 bytes of data, so you might initially think we * need to store the offsets in shorts. But since the offset of the - * 64th item is 231, we'll store "offset - 64" for all items >= 64 - * and always calculate the data offset as "offset[i] + (i & 64)". + * 64th item is 182, we'll store "offset - 2*64" for all items >= 64 + * and always calculate the data offset as "offset[i] + 2*(i & 64)". * This allows us to pack the data offsets in an unsigned char while * also avoiding an "if". * @@ -574,63 +774,67 @@ void _locale_set(const unsigned char *p) /* Combine the data to avoid size penalty for seperate char arrays when * compiler aligns objects. The original code is left in as documentation. */ #define cat_start nl_data -#define C_locale_data (nl_data + C_LC_ALL + 1 + 78) - -static const unsigned char nl_data[C_LC_ALL + 1 + 78 + 300] = { -/* static const unsigned char cat_start[C_LC_ALL + 1] = { */ - '\x00', '\x01', '\x04', '\x1a', '\x4c', '\x4c', '\x4e', -/* }; */ -/* static const unsigned char item_offset[78] = { */ - '\x00', '\x06', '\x07', '\x07', '\x07', '\x07', '\x07', '\x07', - '\x07', '\x07', '\x07', '\x08', '\x08', '\x08', '\x08', '\x08', - '\x08', '\x08', '\x08', '\x08', '\x08', '\x08', '\x08', '\x08', - '\x08', '\x0a', '\x0c', '\x10', '\x14', '\x18', '\x1c', '\x20', - '\x24', '\x28', '\x2f', '\x36', '\x3e', '\x48', '\x51', '\x58', - '\x61', '\x65', '\x69', '\x6d', '\x71', '\x75', '\x79', '\x7d', - '\x81', '\x85', '\x89', '\x8d', '\x91', '\x99', '\xa2', '\xa8', - '\xae', '\xb2', '\xb7', '\xbc', '\xc3', '\xcd', '\xd5', '\xde', - '\xa7', '\xaa', '\xad', '\xc2', '\xcb', '\xd4', '\xdf', '\xdf', - '\xdf', '\xdf', '\xdf', '\xdf', '\xe0', '\xe6', -/* }; */ -/* static const unsigned char C_locale_data[300] = { */ - 'A', 'S', 'C', 'I', 'I', '\x00', '.', '\x00', - '\x7f', '\x00', '-', '\x00', 'S', 'u', 'n', '\x00', - 'M', 'o', 'n', '\x00', 'T', 'u', 'e', '\x00', - 'W', 'e', 'd', '\x00', 'T', 'h', 'u', '\x00', - 'F', 'r', 'i', '\x00', 'S', 'a', 't', '\x00', - 'S', 'u', 'n', 'd', 'a', 'y', '\x00', 'M', - 'o', 'n', 'd', 'a', 'y', '\x00', 'T', 'u', - 'e', 's', 'd', 'a', 'y', '\x00', 'W', 'e', - 'd', 'n', 'e', 's', 'd', 'a', 'y', '\x00', - 'T', 'h', 'u', 'r', 's', 'd', 'a', 'y', - '\x00', 'F', 'r', 'i', 'd', 'a', 'y', '\x00', - 'S', 'a', 't', 'u', 'r', 'd', 'a', 'y', - '\x00', 'J', 'a', 'n', '\x00', 'F', 'e', 'b', - '\x00', 'M', 'a', 'r', '\x00', 'A', 'p', 'r', - '\x00', 'M', 'a', 'y', '\x00', 'J', 'u', 'n', - '\x00', 'J', 'u', 'l', '\x00', 'A', 'u', 'g', - '\x00', 'S', 'e', 'p', '\x00', 'O', 'c', 't', - '\x00', 'N', 'o', 'v', '\x00', 'D', 'e', 'c', - '\x00', 'J', 'a', 'n', 'u', 'a', 'r', 'y', - '\x00', 'F', 'e', 'b', 'r', 'u', 'a', 'r', - 'y', '\x00', 'M', 'a', 'r', 'c', 'h', '\x00', - 'A', 'p', 'r', 'i', 'l', '\x00', 'M', 'a', - 'y', '\x00', 'J', 'u', 'n', 'e', '\x00', 'J', - 'u', 'l', 'y', '\x00', 'A', 'u', 'g', 'u', - 's', 't', '\x00', 'S', 'e', 'p', 't', 'e', - 'm', 'b', 'e', 'r', '\x00', 'O', 'c', 't', - 'o', 'b', 'e', 'r', '\x00', 'N', 'o', 'v', - 'e', 'm', 'b', 'e', 'r', '\x00', 'D', 'e', - 'c', 'e', 'm', 'b', 'e', 'r', '\x00', 'A', - 'M', '\x00', 'P', 'M', '\x00', '%', 'a', ' ', - '%', 'b', ' ', '%', 'e', ' ', '%', 'H', - ':', '%', 'M', ':', '%', 'S', ' ', '%', - 'Y', '\x00', '%', 'm', '/', '%', 'd', '/', - '%', 'y', '\x00', '%', 'H', ':', '%', 'M', - ':', '%', 'S', '\x00', '%', 'I', ':', '%', - 'M', ':', '%', 'S', ' ', '%', 'p', '\x00', - '^', '[', 'y', 'Y', ']', '\x00', '^', '[', - 'n', 'N', ']', '\x00', +#define C_locale_data (nl_data + C_LC_ALL + 1 + 90) + +static const unsigned char nl_data[C_LC_ALL + 1 + 90 + 320] = { +/* static const char cat_start[LC_ALL + 1] = { */ + '\x00', '\x0b', '\x0e', '\x24', '\x56', '\x56', '\x5a', +/* }; */ +/* static const char item_offset[90] = { */ + '\x00', '\x02', '\x04', '\x06', '\x08', '\x0a', '\x0c', '\x0e', + '\x10', '\x12', '\x14', '\x1a', '\x1b', '\x1b', '\x1b', '\x1b', + '\x1b', '\x1b', '\x1b', '\x1b', '\x1b', '\x1c', '\x1c', '\x1c', + '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', + '\x1c', '\x1c', '\x1c', '\x1e', '\x20', '\x24', '\x28', '\x2c', + '\x30', '\x34', '\x38', '\x3c', '\x43', '\x4a', '\x52', '\x5c', + '\x65', '\x6c', '\x75', '\x79', '\x7d', '\x81', '\x85', '\x89', + '\x8d', '\x91', '\x95', '\x99', '\x9d', '\xa1', '\xa5', '\xad', + '\x36', '\x3c', '\x42', '\x46', '\x4b', '\x50', '\x57', '\x61', + '\x69', '\x72', '\x7b', '\x7e', '\x81', '\x96', '\x9f', '\xa8', + '\xb3', '\xb3', '\xb3', '\xb3', '\xb3', '\xb3', '\xb4', '\xba', + '\xbf', '\xbf', +/* }; */ +/* static const char C_locale_data[320] = { */ + '0', '\x00', '1', '\x00', '2', '\x00', '3', '\x00', + '4', '\x00', '5', '\x00', '6', '\x00', '7', '\x00', + '8', '\x00', '9', '\x00', 'A', 'S', 'C', 'I', + 'I', '\x00', '.', '\x00', '\x7f', '\x00', '-', '\x00', + 'S', 'u', 'n', '\x00', 'M', 'o', 'n', '\x00', + 'T', 'u', 'e', '\x00', 'W', 'e', 'd', '\x00', + 'T', 'h', 'u', '\x00', 'F', 'r', 'i', '\x00', + 'S', 'a', 't', '\x00', 'S', 'u', 'n', 'd', + 'a', 'y', '\x00', 'M', 'o', 'n', 'd', 'a', + 'y', '\x00', 'T', 'u', 'e', 's', 'd', 'a', + 'y', '\x00', 'W', 'e', 'd', 'n', 'e', 's', + 'd', 'a', 'y', '\x00', 'T', 'h', 'u', 'r', + 's', 'd', 'a', 'y', '\x00', 'F', 'r', 'i', + 'd', 'a', 'y', '\x00', 'S', 'a', 't', 'u', + 'r', 'd', 'a', 'y', '\x00', 'J', 'a', 'n', + '\x00', 'F', 'e', 'b', '\x00', 'M', 'a', 'r', + '\x00', 'A', 'p', 'r', '\x00', 'M', 'a', 'y', + '\x00', 'J', 'u', 'n', '\x00', 'J', 'u', 'l', + '\x00', 'A', 'u', 'g', '\x00', 'S', 'e', 'p', + '\x00', 'O', 'c', 't', '\x00', 'N', 'o', 'v', + '\x00', 'D', 'e', 'c', '\x00', 'J', 'a', 'n', + 'u', 'a', 'r', 'y', '\x00', 'F', 'e', 'b', + 'r', 'u', 'a', 'r', 'y', '\x00', 'M', 'a', + 'r', 'c', 'h', '\x00', 'A', 'p', 'r', 'i', + 'l', '\x00', 'M', 'a', 'y', '\x00', 'J', 'u', + 'n', 'e', '\x00', 'J', 'u', 'l', 'y', '\x00', + 'A', 'u', 'g', 'u', 's', 't', '\x00', 'S', + 'e', 'p', 't', 'e', 'm', 'b', 'e', 'r', + '\x00', 'O', 'c', 't', 'o', 'b', 'e', 'r', + '\x00', 'N', 'o', 'v', 'e', 'm', 'b', 'e', + 'r', '\x00', 'D', 'e', 'c', 'e', 'm', 'b', + 'e', 'r', '\x00', 'A', 'M', '\x00', 'P', 'M', + '\x00', '%', 'a', ' ', '%', 'b', ' ', '%', + 'e', ' ', '%', 'H', ':', '%', 'M', ':', + '%', 'S', ' ', '%', 'Y', '\x00', '%', 'm', + '/', '%', 'd', '/', '%', 'y', '\x00', '%', + 'H', ':', '%', 'M', ':', '%', 'S', '\x00', + '%', 'I', ':', '%', 'M', ':', '%', 'S', + ' ', '%', 'p', '\x00', '^', '[', 'y', 'Y', + ']', '\x00', '^', '[', 'n', 'N', ']', '\x00', }; char *nl_langinfo(nl_item item) @@ -641,7 +845,7 @@ char *nl_langinfo(nl_item item) if ((c = _NL_ITEM_CATEGORY(item)) < C_LC_ALL) { if ((i = cat_start[c] + _NL_ITEM_INDEX(item)) < cat_start[c+1]) { /* return (char *) C_locale_data + item_offset[i] + (i & 64); */ - return (char *) C_locale_data + nl_data[C_LC_ALL+1+i] + (i & 64); + return (char *) C_locale_data + nl_data[C_LC_ALL+1+i] + 2*(i & 64); } } return (char *) cat_start; /* Conveniently, this is the empty string. */ diff --git a/libc/stdio/printf.c b/libc/stdio/printf.c index 273bf3621..945d3c38d 100644 --- a/libc/stdio/printf.c +++ b/libc/stdio/printf.c @@ -2331,11 +2331,16 @@ static int _do_one_spec(FILE * __restrict stream, } if (ppfs->conv_num <= CONV_i) { /* pointer or (un)signed int */ alphacase = __UIM_LOWER; - if (((base = spec_base[(int)(ppfs->conv_num - CONV_p)]) == 10) - && (PRINT_INFO_FLAG_VAL(&(ppfs->info),group)) - ) { - alphacase = __UIM_GROUP; +#ifndef __LOCALE_C_ONLY + if ((base = spec_base[(int)(ppfs->conv_num - CONV_p)]) == 10) { + if (PRINT_INFO_FLAG_VAL(&(ppfs->info),group)) { + alphacase = __UIM_GROUP; + } + if (PRINT_INFO_FLAG_VAL(&(ppfs->info),i18n)) { + alphacase |= 0x80; + } } +#endif /* __LOCALE_C_ONLY */ if (ppfs->conv_num <= CONV_u) { /* pointer or unsigned int */ if (ppfs->conv_num == CONV_X) { alphacase = __UIM_UPPER; @@ -2350,6 +2355,9 @@ static int _do_one_spec(FILE * __restrict stream, if (ppfs->info.prec < 0) { /* Ignore '0' flag if prec specified. */ padchar = ppfs->info.pad; } +#ifdef __UCLIBC_MJN3_ONLY__ +#warning if using outdigits and/or grouping, how should we interpret precision? +#endif s = _uintmaxtostr(buf + sizeof(buf) - 1, (uintmax_t) _load_inttype(*argtype & __PA_INTMASK, @@ -2557,6 +2565,9 @@ static int _do_one_spec(FILE * __restrict stream, return -1; } +#ifdef __UCLIBC_MJN3_ONLY__ +#warning if using outdigits and/or grouping, how should we pad? +#endif { size_t t; diff --git a/libc/stdio/stdio.c b/libc/stdio/stdio.c index cf72a5ccc..e39cf5205 100644 --- a/libc/stdio/stdio.c +++ b/libc/stdio/stdio.c @@ -319,11 +319,11 @@ int putw(int w, FILE *stream) UNLOCKED(int,fileno,(register FILE *stream),(stream)) { #ifdef __STDIO_GLIBC_CUSTOM_STREAMS - return ( ((stream->cookie == &(stream->filedes)) && (stream->filedes >= 0)) + return ( (stream && (stream->cookie == &(stream->filedes)) && (stream->filedes >= 0)) ? stream->filedes : (__set_errno(EBADF), -1) ); #else /* __STDIO_GLIBC_CUSTOM_STREAMS */ - return (stream->filedes >= 0) ? stream->filedes : (__set_errno(EBADF), -1); + return ((stream && stream->filedes >= 0)) ? stream->filedes : (__set_errno(EBADF), -1); #endif /* __STDIO_GLIBC_CUSTOM_STREAMS */ } @@ -3331,7 +3331,7 @@ char *_uintmaxtostr(register char * __restrict bufend, uintmax_t uval, unsigned int H, L, high, low, rh; #endif #ifndef __LOCALE_C_ONLY - int grouping; + int grouping, outdigit; size_t gslen; /* This does not need to be initialized. */ const char *g; /* This does not need to be initialized. */ #endif /* __LOCALE_C_ONLY */ @@ -3350,6 +3350,11 @@ char *_uintmaxtostr(register char * __restrict bufend, uintmax_t uval, #ifndef __LOCALE_C_ONLY grouping = -1; + outdigit = 0x80 & alphacase; + alphacase ^= outdigit; +#ifdef __UCLIBC_MJN3_ONLY_ +#warning implement outdigit... need digit lengths! (put it in locale struct) +#endif if (alphacase == __UIM_GROUP) { assert(base == 10); if (*(g = CUR_LOCALE.grouping) @@ -3391,7 +3396,18 @@ char *_uintmaxtostr(register char * __restrict bufend, uintmax_t uval, digit = uval % base; uval /= base; - *--bufend = ( (digit < 10) ? digit + '0' : digit + alphacase ); +#ifndef __LOCALE_C_ONLY + if (outdigit) { + outdigit = CUR_LOCALE.outdigit_length[digit]; + do { + *--bufend = (&CUR_LOCALE.outdigit0_mb)[digit][--outdigit]; + } while (outdigit); + outdigit = 1; + } else +#endif + { + *--bufend = ( (digit < 10) ? digit + '0' : digit + alphacase ); + } } while (uval); #else /* ************************************************** */ @@ -3437,7 +3453,18 @@ char *_uintmaxtostr(register char * __restrict bufend, uintmax_t uval, low = (low / base) + (H * rh) + (digit / base); digit %= base; - *--bufend = ( (digit < 10) ? digit + '0' : digit + alphacase ); +#ifndef __LOCALE_C_ONLY + if (outdigit) { + outdigit = CUR_LOCALE.outdigit_length[digit]; + do { + *--bufend = (&CUR_LOCALE.outdigit0_mb)[digit][--outdigit]; + } while (outdigit); + outdigit = 1; + } else +#endif + { + *--bufend = ( (digit < 10) ? digit + '0' : digit + alphacase ); + } } while (low | high); #endif /******************************************************/ diff --git a/libc/string/Makefile b/libc/string/Makefile index 088058ebe..fe9b64382 100644 --- a/libc/string/Makefile +++ b/libc/string/Makefile @@ -33,13 +33,14 @@ MOBJW= basename.o bcopy.o bzero.o dirname.o ffs.o memccpy.o memchr.o memcmp.o \ strspn.o strstr.o strtok.o strtok_r.o strerror.o _susv3_strerror_r.o \ _string_syserrmsgs.o _glibc_strerror_r.o \ _string_syssigmsgs.o sys_siglist.o strsignal.o psignal.o \ - __xpg_basename.o strlcat.o strlcpy.o sys_errlist.o # strcoll.o + __xpg_basename.o strlcat.o strlcpy.o sys_errlist.o MOBJW2= wcscasecmp.o wcscat.o wcschrnul.o wcschr.o wcscmp.o wcscpy.o wcscspn.o \ wcsdup.o wcslen.o wcsncasecmp.o wcsncat.o wcsncmp.o wcsncpy.o \ wcsnlen.o wcspbrk.o wcsrchr.o wcsspn.o wcsstr.o wcstok.o wmemchr.o \ wmemcmp.o wmemcpy.o wmemmove.o wmempcpy.o wmemset.o wcpcpy.o wcpncpy.o \ - wcsxfrm.o # wcscoll + __wcslcpy.o \ + wcsxfrm.o strxfrm.o # wcscoll strcoll.o OBJS=$(MOBJ) $(MOBJ1) $(MOBJ2) $(MOBJW) diff --git a/libc/string/wstring.c b/libc/string/wstring.c index 531b1c9fd..c1ead6e00 100644 --- a/libc/string/wstring.c +++ b/libc/string/wstring.c @@ -26,6 +26,13 @@ * * ATTENTION! ATTENTION! ATTENTION! ATTENTION! ATTENTION! */ +/* Dec 20, 2002 + * + * Initial test implementation of strcoll, strxfrm, wcscoll, and wcsxfrm. + * The code needs to be cleaned up a good bit, but I'd like to see people + * test it out. + */ + #define _STDIO_UTILITY #define _GNU_SOURCE #include @@ -36,11 +43,12 @@ #include #include #include +#include +#include #ifdef WANT_WIDE #include #include -#include #define Wvoid wchar_t #define Wchar wchar_t @@ -627,17 +635,14 @@ int Wmemcmp(const Wvoid *s1, const Wvoid *s2, size_t n) #ifdef L_strcmp +#ifdef __LOCALE_C_ONLY +#warning c only #ifdef L_wcscmp -#ifdef __UCLIBC_MJN3_ONLY__ -#warning implement wcscoll and remove weak alias (or enable for C locale only) -#endif weak_alias(wcscmp,wcscoll); #else /* L_wcscmp */ -#ifdef __UCLIBC_MJN3_ONLY__ -#warning implement strcoll and remove weak alias (or enable for C locale only) -#endif weak_alias(strcmp,strcoll); #endif /* L_wcscmp */ +#endif /* __LOCALE_C_ONLY */ int Wstrcmp(register const Wchar *s1, register const Wchar *s2) { @@ -659,23 +664,6 @@ int Wstrcmp(register const Wchar *s1, register const Wchar *s2) return r; #endif } -#endif -/**********************************************************************/ -#ifdef L_strcoll -#error implement strcoll and remove weak_alias!! - -#if 0 -extern unsigned char *_ctype_collate; -int strcoll(register const char *s1, const char *s2) -{ - int r; - - while (!(r = (_ctype_collate[(int)(*s1++)]-_ctype_collate[(int)(*s2++)]))); - - return r; -} -#endif - #endif /**********************************************************************/ #ifdef L_wcsncmp @@ -713,11 +701,6 @@ int Wstrncmp(register const Wchar *s1, register const Wchar *s2, size_t n) #endif /**********************************************************************/ -#ifdef L_strxfrm -#error implement strxfrm -/* size_t strxfrm(char *dst, const char *src, size_t len); */ -#endif -/**********************************************************************/ #ifdef L_wmemchr #define L_memchr #define Wmemchr wmemchr @@ -1923,28 +1906,37 @@ size_t strlcat(register char *__restrict dst, #endif /**********************************************************************/ -#ifdef L_wcsxfrm +#ifdef WANT_WIDE +extern size_t __wcslcpy(wchar_t *__restrict dst, + const wchar_t *__restrict src, + size_t n); +#endif + + +#ifdef L___wcslcpy #define L_strlcpy -#define Wstrlcpy wcsxfrm +#define Wstrlcpy __wcslcpy +#ifdef __LOCALE_C_ONLY +weak_alias(__wcslcpy,wcsxfrm); +#endif #endif #ifdef L_strlcpy -#ifndef L_wcsxfrm +#ifndef L___wcslcpy #define Wstrlcpy strlcpy -#ifdef __UCLIBC_MJN3_ONLY__ -#warning implement wcscoll and remove weak alias (or enable for C locale only) -#endif +#ifdef __LOCALE_C_ONLY weak_alias(strlcpy,strxfrm); #endif +#endif /* OpenBSD function: * Copy at most n-1 chars from src to dst and nul-terminate dst. * Returns strlen(src), so truncation occurred if the return value is >= n. */ size_t Wstrlcpy(register Wchar *__restrict dst, - register const Wchar *__restrict src, - size_t n) + register const Wchar *__restrict src, + size_t n) { const Wchar *src0 = src; Wchar dummy[1]; @@ -2145,3 +2137,621 @@ void psignal(int signum, register const char *message) #endif /**********************************************************************/ +#ifndef __LOCALE_C_ONLY + +#ifdef L_strxfrm +#ifndef WANT_WIDE +#error WANT_WIDE should be defined for L_strxfrm +#endif +#ifdef L_wcsxfrm +#error L_wcsxfrm already defined for L_strxfrm +#endif + +#define wcscoll strcoll +#define L_wcsxfrm +#undef WANT_WIDE + +#undef Wvoid +#undef Wchar +#undef Wuchar +#undef Wint + +#define Wchar char + +#endif /* L_strxfrm */ + + + +#ifdef L_wcsxfrm + +#define CUR_COLLATE (&__global_locale.collate) + +#define MAX_PENDING 8 + +typedef struct { + const Wchar *s; + const Wchar *eob; /* end of backward */ + + __uwchar_t weight; + __uwchar_t ui_weight; /* undefined or invalid */ + int colitem; + int weightidx; + int rule; + size_t position; + /* should be wchar_t. if wchar < 0 do EILSEQ? */ + __uwchar_t *cip; + __uwchar_t ci_pending[MAX_PENDING]; /* nul-terminated */ + + char *back_buf; + char *bbe; /* end of back_buf (actual last... not 1 past end) */ + char *bp; /* ptr into backbuf, NULL if not in backward mode */ + char ibb[128]; + size_t bb_size; + + int ru_pushed; +} col_state_t; + + +#define WEIGHT_MASK 0x3fffU +#define RULE_MASK 0xc000U + +#define RULE_FORWARD (1 << 14) +#define RULE_POSITION (1 << 15) + +#define UI_IDX (WEIGHT_MASK-6) +#define POSIT_IDX (WEIGHT_MASK-5) +#define RANGE_IDX (WEIGHT_MASK-4) +#define UNDEF_IDX (WEIGHT_MASK-3) +#define INVAL_IDX (WEIGHT_MASK-2) +#define DITTO_IDX (WEIGHT_MASK-1) + + +#undef TRACE +#if 0 +#define TRACE(X) printf##X +#else +#define TRACE(X) ((void)0) +#endif + +static int lookup(wchar_t wc) +{ + unsigned int sc, n, i0, i1; + + if (((__uwchar_t) wc) > 0xffffU) { + return 0; + } + + sc = wc & CUR_COLLATE->ti_mask; + wc >>= CUR_COLLATE->ti_shift; + n = wc & CUR_COLLATE->ii_mask; + wc >>= CUR_COLLATE->ii_shift; + + i0 = CUR_COLLATE->wcs2colidt_tbl[wc]; + i0 <<= CUR_COLLATE->ii_shift; + i1 = CUR_COLLATE->wcs2colidt_tbl[CUR_COLLATE->ii_len + i0 + n]; + i1 <<= CUR_COLLATE->ti_shift; + return CUR_COLLATE->wcs2colidt_tbl[CUR_COLLATE->ii_len + CUR_COLLATE->ti_len + i1 + sc]; + +} + +static void init_col_state(col_state_t *cs, const Wchar *wcs) +{ + memset(cs, 0, sizeof(col_state_t)); + cs->s = wcs; + cs->bp = cs->back_buf = cs->ibb; + cs->bb_size = 128; + cs->bbe = cs->back_buf + (cs->bb_size -1); +} + +static void next_weight(col_state_t *cs, int pass) +{ + int r, w, ru, ri, popping_backup_stack; + ssize_t n; + const uint16_t *p; +#ifdef WANT_WIDE +#define WC (*cs->s) +#define N (1) +#else /* WANT_WIDE */ + mbstate_t mbstate; + wchar_t WC; + size_t n0, nx; +#define N n0 + + mbstate.mask = 0; +#endif /* WANT_WIDE */ + + do { + + if (cs->ru_pushed) { + ru = cs->ru_pushed; + TRACE(("ru_pushed = %d\n", ru)); + cs->ru_pushed = 0; + goto POSITION_SKIP; + } + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning should we walk pendings backwards? +#endif + if (cs->cip) { /* possible pending weight */ + if ((r = *(cs->cip++)) == 0) { + cs->cip = NULL; + continue; + } + cs->weightidx = r & WEIGHT_MASK; + assert(cs->weightidx); +/* assert(cs->weightidx != WEIGHT_MASK); */ + } else { /* get the next collation item from the string */ + TRACE(("clearing popping flag\n")); + popping_backup_stack = 0; + + IGNORE_LOOP: + /* keep first pos as 0 for a sentinal */ + if (*cs->bp) { /* pending backward chars */ + POP_BACKUP: + popping_backup_stack = 1; + TRACE(("setting popping flag\n")); + n = 0; + if (*cs->bp > 0) { /* singles pending */ + cs->s -= 1; + if ((*cs->bp -= 1) == 0) { + cs->bp -= 1; + } + } else { /* last was a multi */ + cs->s += *cs->bp; + cs->bp -= 1; + } + } else if (!*cs->s) { /* not in backward mode and end of string */ + cs->weight = 0; + return; + } else { + cs->position += 1; + } + + BACK_LOOP: +#ifdef WANT_WIDE + n = 1; + cs->colitem = r = lookup(*cs->s); +#else /* WANT_WIDE */ + n = n0 = mbrtowc(&WC, cs->s, SIZE_MAX, &mbstate); + if (n < 0) { + __set_errno(EILSEQ); + cs->weight = 0; + return; + } + cs->colitem = r = lookup(WC); +#endif /* WANT_WIDE */ + + TRACE((" r=%d WC=%#lx\n", r, (unsigned long)(WC))); + + if (r > CUR_COLLATE->max_col_index) { /* starting char for one or more sequences */ + p = CUR_COLLATE->multistart_tbl; + p += p[r-CUR_COLLATE->max_col_index -1]; + do { + n = N; + r = *p++; + do { + if (!*p) { /* found it */ + cs->colitem = r; + TRACE((" found multi %d\n", n)); + goto FOUND; + } +#ifdef WANT_WIDE + /* the lookup check here is safe since we're assured that *p is a valid colidx */ + if (!cs->s[n] || (lookup(cs->s[n]) != *p)) { + do {} while (*p++); + break; + } + ++p; + ++n; +#else /* WANT_WIDE */ + if (cs->s[n]) { + nx = mbrtowc(&WC, cs->s + n, SIZE_MAX, &mbstate); + if (nx < 0) { + __set_errno(EILSEQ); + cs->weight = 0; + return; + } + } + if (!cs->s[n] || (lookup(WC) != *p)) { + do {} while (*p++); + break; + } + ++p; + n += nx; +#endif /* WANT_WIDE */ + } while (1); + } while (1); + } else if (r == 0) { /* illegal, undefined, or part of a range */ + if ((CUR_COLLATE->range_count) +#ifdef __UCLIBC_MJN3_ONLY__ +#warning .. need to introduce range as a collating item? +#endif + && (((__uwchar_t)(WC - CUR_COLLATE->range_low)) <= CUR_COLLATE->range_count) + ) { /* part of a range */ + /* Note: cs->colitem = 0 already. */ + TRACE((" found range\n")); + ru = CUR_COLLATE->ruletable[CUR_COLLATE->range_rule_offset*CUR_COLLATE->MAX_WEIGHTS + pass]; + assert((ru & WEIGHT_MASK) != DITTO_IDX); + if ((ru & WEIGHT_MASK) == WEIGHT_MASK) { + ru = (ru & RULE_MASK) | RANGE_IDX; + cs->weight = CUR_COLLATE->range_base_weight + (WC - CUR_COLLATE->range_low); + } + goto RANGE_SKIP_TO; + } else if (((__uwchar_t)(WC)) <= 0x7fffffffUL) { /* legal but undefined */ + UNDEFINED: + /* Note: cs->colitem = 0 already. */ + ri = CUR_COLLATE->undefined_idx; + assert(ri != 0); /* implicit undefined isn't supported */ + + TRACE((" found explicit UNDEFINED\n")); +#ifdef __UCLIBC_MJN3_ONLY__ +#warning right now single weight locales do not support .. +#endif + if (CUR_COLLATE->num_weights == 1) { + TRACE((" single weight UNDEFINED\n")); + cs->weightidx = RANGE_IDX; + cs->weight = ri; + cs->s += n; + goto PROCESS_WEIGHT; + } + + ri = CUR_COLLATE->index2ruleidx[ri - 1]; + ru = CUR_COLLATE->ruletable[ri * CUR_COLLATE->MAX_WEIGHTS + pass]; + assert((ru & WEIGHT_MASK) != WEIGHT_MASK); /* TODO: handle ".." */ + if ((ru & WEIGHT_MASK) == DITTO_IDX) { + cs->colitem = CUR_COLLATE->undefined_idx; + } + goto RANGE_SKIP_TO; + } else { /* illegal */ + TRACE((" found illegal\n")); + __set_errno(EINVAL); + /* We put all illegals in the same equiv class with maximal weight, + * and ignore them after the first pass. */ + if (pass > 0) { + cs->s += n; + goto IGNORE_LOOP; + } + ru = (RULE_FORWARD | RANGE_IDX); + cs->weight = 0xffffU; + goto RANGE_SKIP_TO; + } + } else if (CUR_COLLATE->num_weights == 1) { + TRACE((" single weight\n")); + cs->weightidx = RANGE_IDX; + cs->weight = cs->colitem; + cs->s += n; + goto PROCESS_WEIGHT; + } else { + TRACE((" normal\n")); + } + + /* if we get here, it is a normal char either singlely weighted, undefined, or in a range */ + FOUND: + ri = CUR_COLLATE->index2ruleidx[cs->colitem - 1]; + TRACE((" ri=%d ", ri)); +#ifdef __UCLIBC_MJN3_ONLY__ +#warning make sure this is correct +#endif + if (!ri) { + TRACE(("NOT IN THIS LOCALE\n")); + goto UNDEFINED; + } + ru = CUR_COLLATE->ruletable[ri * CUR_COLLATE->MAX_WEIGHTS + pass]; + + RANGE_SKIP_TO: + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning ignoreables probably should not interrupt backwards processing, but this is wrong +#endif +/* if (!(ru & WEIGHT_MASK)) { */ +/* TRACE(("IGNORE\n")); */ +/* cs->s += n; */ +/* continue; */ +/* } */ + + + TRACE((" rule = %#x weight = %#x popping = %d s = %p eob = %p\n", + ru & RULE_MASK, ru & WEIGHT_MASK, popping_backup_stack, + cs->s, cs->eob)); + /* now we need to check if we're going backwards... */ + + if (!popping_backup_stack) { + if (!(ru & RULE_MASK)) { /* backward */ + TRACE(("backwards\n")); + assert(cs->bp <= cs->bbe); + if (cs->bp == cs->bbe) { + if (cs->back_buf == cs->ibb) { /* was using internal buffer */ + cs->bp = malloc(cs->bb_size + 128); + if (!cs->bp) { + __set_errno(ENOMEM); +#ifdef __UCLIBC_MJN3_ONLY__ +#warning what to do here? +#endif + cs->weight = 0; + return; + } + memcpy(cs->bp, cs->back_buf, cs->bb_size); + + } else { + cs->bp = realloc(cs->back_buf, cs->bb_size + 128); + if (!cs->bp) { + __set_errno(ENOMEM); +#ifdef __UCLIBC_MJN3_ONLY__ +#warning what to do here? +#endif + cs->weight = 0; + return; + } + } + cs->bb_size += 128; + cs->bbe = cs->bp + (cs->bbe - cs->back_buf); + cs->back_buf = cs->bp; + cs->bp = cs->bbe; + + } + if (n==1) { /* single char */ + if (*cs->bp && (((unsigned char)(*cs->bp)) < CHAR_MAX)) { + *cs->bp += 1; /* increment last single's count */ + } else { /* last was a multi, or just starting */ + if (!cs->bp) { + cs->bp = cs->back_buf; + } else { + assert(cs->bp < cs->bbe); + ++cs->bp; + } + *cs->bp = 1; + } + } else { /* multichar */ + assert(n>1); + assert(cs->bp < cs->bbe); + *++cs->bp = -n; + } + cs->s += n; + if (*cs->s) { + goto BACK_LOOP; + } + /* end-of-string so start popping */ + cs->eob = cs->s; + TRACE(("popping\n")); + goto POP_BACKUP; + } else if (*cs->bp) { /* was going backward but this element isn't */ + /* discard current and use previous backward element */ + assert(!cs->cip); + cs->eob = cs->s; + TRACE(("popping\n")); + goto POP_BACKUP; + } else { /* was and still going forward */ + TRACE(("forwards\n")); + if ((ru & (RULE_POSITION|WEIGHT_MASK)) > RULE_POSITION) { + assert(ru & WEIGHT_MASK); + cs->ru_pushed = ru; + cs->weight = cs->position; +#ifdef __UCLIBC_MJN3_ONLY__ +#warning devel code +#endif + cs->position = 0; /* reset to reduce size for strcoll? */ + cs->s += n; + cs->weightidx = RANGE_IDX; + goto PROCESS_WEIGHT; + } + } + } else { /* popping backwards stack */ + TRACE(("popping (continued)\n")); + if (!*cs->bp) { + cs->s = cs->eob; + } + cs->s -= n; + } + + cs->s += n; + POSITION_SKIP: + cs->weightidx = ru & WEIGHT_MASK; + cs->rule = ru & RULE_MASK; + } + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning for pending we only want the weight... _not_ the rule +#endif + if (!cs->weightidx) { /* ignore */ + continue; + } + + PROCESS_WEIGHT: + assert(cs->weightidx); + + + if (((unsigned int)(cs->weightidx - UI_IDX)) <= (INVAL_IDX-UI_IDX)) { + if (cs->weightidx == UI_IDX) { + cs->weight = cs->ui_weight; + } + return; + } + + assert(cs->weightidx != WEIGHT_MASK); + if (cs->weightidx == DITTO_IDX) { /* want the weight of the current collating item */ + TRACE(("doing ditto\n")); + w = CUR_COLLATE->index2weight[cs->colitem -1]; + } else if (cs->weightidx <= CUR_COLLATE->max_col_index) { /* normal */ + TRACE(("doing normal\n")); + w = CUR_COLLATE->index2weight[cs->weightidx -1]; + } else { /* a string */ + TRACE(("doing string\n")); + assert(!(cs->weightidx & RULE_MASK)); + /* note: iso14561 allows null string here */ + p = CUR_COLLATE->weightstr + (cs->weightidx - (CUR_COLLATE->max_col_index + 2)); + if (*p & WEIGHT_MASK) { + r = 0; + do { + assert(r < MAX_PENDING); + cs->ci_pending[r++] = *p++; + } while (*p & WEIGHT_MASK); + cs->cip = cs->ci_pending; + } + continue; + } + + cs->weight = w; + return; + } while (1); +} + +int wcscoll (const Wchar *s0, const Wchar *s1) +{ + col_state_t ws[2]; + int pass; + + if (!CUR_COLLATE->num_weights) { /* C locale */ +#ifdef WANT_WIDE + return wcscmp(s0, s1); +#else /* WANT_WIDE */ + return strcmp(s0, s1); +#endif /* WANT_WIDE */ + } + + pass = 0; + do { /* loop through the weights levels */ + init_col_state(ws, s0); + init_col_state(ws+1, s1); + do { /* loop through the strings */ + /* for each string, get the next weight */ + next_weight(ws, pass); + next_weight(ws+1, pass); + TRACE(("w0=%lu w1=%lu\n", + (unsigned long) ws[0].weight, + (unsigned long) ws[1].weight)); + + if (ws[0].weight != ws[1].weight) { + return ws[0].weight - ws[1].weight; + } + } while (ws[0].weight); + } while (++pass < CUR_COLLATE->num_weights); + + return 0; +} + +#ifdef WANT_WIDE + +size_t wcsxfrm(wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t n) +{ + col_state_t cs; + size_t count; + int pass; + + if (!CUR_COLLATE->num_weights) { /* C locale */ + return __wcslcpy(ws1, ws2, n); + } + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning handle empty string as a special case +#endif + + count = pass = 0; + do { /* loop through the weights levels */ + init_col_state(&cs, ws2); + do { /* loop through the string */ + next_weight(&cs, pass); + TRACE(("weight=%lu (%#lx)\n", (unsigned long) cs.weight, (unsigned long) cs.weight)); + if (count < n) { + ws1[count] = cs.weight +1; + } + ++count; + TRACE(("--------------------------------------------\n")); + } while (cs.weight); + if (count <= n) { /* overwrite the trailing 0 end-of-pass marker */ + ws1[count-1] = 1; + } + TRACE(("-------------------- pass %d --------------------\n", pass)); + } while (++pass < CUR_COLLATE->num_weights); + if (count <= n) { /* oops... change it back */ + ws1[count-1] = 0; + } + return count-1; +} + +#else /* WANT_WIDE */ + +static const unsigned long bound[] = { + 1UL << 7, + 1UL << 11, + 1UL << 16, + 1UL << 21, + 1UL << 26, +}; + +static unsigned char first[] = { + 0x0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc +}; + +/* Use an extension of UTF-8 to store a 32 bit val in max 6 bytes. */ + +static size_t store(unsigned char *s, size_t count, size_t n, __uwchar_t weight) +{ + int i, r; + + i = 0; + do { + if (weight < bound[i]) { + break; + } + } while (++i < sizeof(bound)/sizeof(bound[0])); + + r = i+1; + if (i + count < n) { + s += count; + s[0] = first[i]; + while (i) { + s[i] = 0x80 | (weight & 0x3f); + weight >>= 6; + --i; + } + s[0] |= weight; + } + + return r; +} + +size_t strxfrm(char *__restrict ws1, const char *__restrict ws2, size_t n) +{ + col_state_t cs; + size_t count, inc; + int pass; + + if (!CUR_COLLATE->num_weights) { /* C locale */ + return strlcpy(ws1, ws2, n); + } + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning handle empty string as a special case +#endif + + inc = count = pass = 0; + do { /* loop through the weights levels */ + init_col_state(&cs, ws2); + do { /* loop through the string */ + next_weight(&cs, pass); + TRACE(("weight=%lu (%#lx)\n", (unsigned long) cs.weight, (unsigned long) cs.weight)); + inc = store((unsigned char *)ws1, count, n, cs.weight + 1); + count += inc; + TRACE(("--------------------------------------------\n")); + } while (cs.weight); + /* overwrite the trailing 0 end-of-pass marker */ + assert(inc == 1); + if (count <= n) { + ws1[count-1] = 1; + } + TRACE(("-------------------- pass %d --------------------\n", pass)); + } while (++pass < CUR_COLLATE->num_weights); + if (count <= n) { /* oops... change it back */ + ws1[count-1] = 0; + } + return count-1; +} + + +#endif /* WANT_WIDE */ + +#endif /* wcscoll */ + +#endif /* __LOCALE_C_ONLY */ +/**********************************************************************/ + diff --git a/libc/sysdeps/linux/common/bits/uClibc_locale.h b/libc/sysdeps/linux/common/bits/uClibc_locale.h index 8025005ab..4e89188b8 100644 --- a/libc/sysdeps/linux/common/bits/uClibc_locale.h +++ b/libc/sysdeps/linux/common/bits/uClibc_locale.h @@ -120,6 +120,46 @@ enum { * In particular, C/POSIX locale is '#' + "\x80\x01"}*LC_ALL + nul. */ +typedef struct { + uint16_t num_weights; + uint16_t num_starters; + uint16_t ii_shift; + uint16_t ti_shift; + uint16_t ii_len; + uint16_t ti_len; + uint16_t max_weight; + uint16_t num_col_base; + uint16_t max_col_index; + uint16_t undefined_idx; + uint16_t range_low; + uint16_t range_count; + uint16_t range_base_weight; + uint16_t range_rule_offset; /* change name to index? */ + + uint16_t ii_mask; + uint16_t ti_mask; + + const uint16_t *index2weight_tbl; + const uint16_t *index2ruleidx_tbl; + const uint16_t *multistart_tbl; + /* uint16_t wcs2colidt_offset_low; */ + /* uint16_t wcs2colidt_offset_hi; */ + const uint16_t *wcs2colidt_tbl; + + /* uint16_t undefined_idx; */ + const uint16_t *overrides_tbl; + /* uint16_t *multistart_tbl; */ + + const uint16_t *weightstr; + const uint16_t *ruletable; + + + uint16_t *index2weight; + uint16_t *index2ruleidx; + + uint16_t MAX_WEIGHTS; +} __collate_t; + /* static unsigned char cur_locale[LOCALE_STRING_SIZE]; */ @@ -138,8 +178,7 @@ typedef struct { /* ctype */ unsigned char encoding; /* C/POSIX, 8-bit, UTF-8 */ unsigned char mb_cur_max; /* determined by encoding _AND_ translit!!! */ - - const char *codeset; + const unsigned char outdigit_length[10]; #ifdef __CTYPE_HAS_8_BIT_LOCALES const unsigned char *idx8ctype; @@ -162,6 +201,19 @@ typedef struct { /* width?? */ #endif /* __WCHAR_ENABLED */ + /* ctype */ + const char *outdigit0_mb; + const char *outdigit1_mb; + const char *outdigit2_mb; + const char *outdigit3_mb; + const char *outdigit4_mb; + const char *outdigit5_mb; + const char *outdigit6_mb; + const char *outdigit7_mb; + const char *outdigit8_mb; + const char *outdigit9_mb; + const char *codeset; /* MUST BE LAST!!! */ + /* numeric */ const char *decimal_point; const char *thousands_sep; @@ -250,11 +302,16 @@ typedef struct { const char *era_d_t_fmt; const char *era_t_fmt; - /* collate */ + /* collate is at the end */ /* messages */ const char *yesexpr; const char *noexpr; + const char *yesstr; + const char *nostr; + + /* collate is at the end */ + __collate_t collate; } __locale_t; -- cgit v1.2.3