diff options
Diffstat (limited to 'libc')
| -rw-r--r-- | libc/misc/locale/locale.c | 356 | ||||
| -rw-r--r-- | libc/stdio/printf.c | 19 | ||||
| -rw-r--r-- | libc/stdio/stdio.c | 37 | ||||
| -rw-r--r-- | libc/string/Makefile | 5 | ||||
| -rw-r--r-- | libc/string/wstring.c | 684 | ||||
| -rw-r--r-- | libc/sysdeps/linux/common/bits/uClibc_locale.h | 63 | 
6 files changed, 1037 insertions, 127 deletions
diff --git a/libc/misc/locale/locale.c b/libc/misc/locale/locale.c index 0875a4e5b..9c162a980 100644 --- a/libc/misc/locale/locale.c +++ b/libc/misc/locale/locale.c @@ -22,6 +22,10 @@   *   query locale settings should now work... at the cost of almost   *   doubling the size of the setlocale object code.   * Fixed a bug in the internal fixed-size-string locale specifier code. + * + * Dec 20, 2002 + * + * Added in collation support and updated stub nl_langinfo.   */ @@ -415,6 +419,189 @@ struct lconv *localeconv(void)  __locale_t __global_locale; +typedef struct { +	uint16_t num_base; +	uint16_t num_der; +	uint16_t MAX_WEIGHTS; +	uint16_t num_index2weight; +#define num_index2ruleidx num_index2weight +	uint16_t num_weightstr; +	uint16_t num_multistart; +	uint16_t num_override; +	uint16_t num_ruletable; +} coldata_header_t; + +typedef struct { +	uint16_t num_weights; +	uint16_t num_starters; +	uint16_t ii_shift; +	uint16_t ti_shift; +	uint16_t ii_len; +	uint16_t ti_len; +	uint16_t max_weight; +	uint16_t num_col_base; +	uint16_t max_col_index; +	uint16_t undefined_idx; +	uint16_t range_low; +	uint16_t range_count; +	uint16_t range_base_weight; +	uint16_t range_rule_offset; + +	uint16_t index2weight_offset; +	uint16_t index2ruleidx_offset; +	uint16_t multistart_offset; +	uint16_t wcs2colidt_offset_low; +	uint16_t wcs2colidt_offset_hi; +} coldata_base_t; + +typedef struct { +	uint16_t base_idx; +	uint16_t undefined_idx; +	uint16_t overrides_offset; +	uint16_t multistart_offset; +} coldata_der_t; + +static int init_cur_collate(int der_num) +{ +	__collate_t *cur_collate = &__global_locale.collate; +	const uint16_t *__locale_collate_tbl = __locale_mmap->collate_data; +	coldata_header_t *cdh; +	coldata_base_t *cdb; +	coldata_der_t *cdd; +	const uint16_t *p; +	size_t n; +	uint16_t i, w; + +	assert(sizeof(coldata_base_t) == 19*2); +	assert(sizeof(coldata_der_t) == 4*2); +	assert(sizeof(coldata_header_t) == 8*2); + +	if (!der_num) { 			/* C locale... special */ +		cur_collate->num_weights = 0; +		return 1; +	} + +	--der_num; + +	cdh = (coldata_header_t *) __locale_collate_tbl; + +	if (der_num >= cdh->num_der) { +		return 0; +	} + +	cdd = (coldata_der_t *)(__locale_collate_tbl +							+ (sizeof(coldata_header_t) +							   + cdh->num_base * sizeof(coldata_base_t) +							   + der_num * sizeof(coldata_der_t) +							   )/2 ); + +	cdb = (coldata_base_t *)(__locale_collate_tbl +							 + (sizeof(coldata_header_t) +								+ cdd->base_idx * sizeof(coldata_base_t) +								)/2 ); + +	memcpy(cur_collate, cdb, offsetof(coldata_base_t,index2weight_offset)); +	cur_collate->undefined_idx = cdd->undefined_idx; + +	cur_collate->ti_mask = (1 << cur_collate->ti_shift)-1; +	cur_collate->ii_mask = (1 << cur_collate->ii_shift)-1; + +/*	 printf("base=%d  num_col_base: %d  %d\n", cdd->base_idx ,cur_collate->num_col_base, cdb->num_col_base); */ + +	n = (sizeof(coldata_header_t) + cdh->num_base * sizeof(coldata_base_t) +		 + cdh->num_der * sizeof(coldata_der_t))/2; + +/*	 printf("n   = %d\n", n); */ +	cur_collate->index2weight_tbl = __locale_collate_tbl + n + cdb->index2weight_offset; +/*	 printf("i2w = %d\n", n + cdb->index2weight_offset); */ +	n += cdh->num_index2weight; +	cur_collate->index2ruleidx_tbl = __locale_collate_tbl + n + cdb->index2ruleidx_offset; +/*	 printf("i2r = %d\n", n + cdb->index2ruleidx_offset); */ +	n += cdh->num_index2ruleidx; +	cur_collate->multistart_tbl = __locale_collate_tbl + n + cdd->multistart_offset; +/*	 printf("mts = %d\n", n + cdb->multistart_offset); */ +	n += cdh->num_multistart; +	cur_collate->overrides_tbl = __locale_collate_tbl + n + cdd->overrides_offset; +/*	 printf("ovr = %d\n", n + cdd->overrides_offset); */ +	n += cdh->num_override; +	cur_collate->ruletable = __locale_collate_tbl + n; +/*	 printf("rtb = %d\n", n); */ +	n += cdh->num_ruletable; +	cur_collate->weightstr = __locale_collate_tbl + n; +/*	 printf("wts = %d\n", n); */ +	n += cdh->num_weightstr; +	cur_collate->wcs2colidt_tbl = __locale_collate_tbl + n +		+ (((unsigned long)(cdb->wcs2colidt_offset_hi)) << 16) +		+ cdb->wcs2colidt_offset_low; +/*	 printf("wcs = %lu\n", n	+ (((unsigned long)(cdb->wcs2colidt_offset_hi)) << 16) */ +/* 		   + cdb->wcs2colidt_offset_low); */ + +	cur_collate->MAX_WEIGHTS = cdh->MAX_WEIGHTS; + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning if calloc fails, this is WRONG.  there is also a memory leak here at the moment +#warning fix the +1 by increasing max_col_index? +#endif +	cur_collate->index2weight = calloc(2*cur_collate->max_col_index+2, sizeof(uint16_t)); +	if (!cur_collate->index2weight) { +		return 0; +	} +	cur_collate->index2ruleidx = cur_collate->index2weight + cur_collate->max_col_index + 1; + +	memcpy(cur_collate->index2weight, cur_collate->index2weight_tbl, +		   cur_collate->num_col_base * sizeof(uint16_t)); +	memcpy(cur_collate->index2ruleidx, cur_collate->index2ruleidx_tbl, +		   cur_collate->num_col_base * sizeof(uint16_t)); + +	/* now do the overrides */ +	p = cur_collate->overrides_tbl; +	while (*p > 1) { +/* 		fprintf(stderr, "processing override -- count = %d\n", *p); */ +		n = *p++; +		w = *p++; +		do { +			i = *p++; +/* 			fprintf(stderr, "	i=%d w=%d *p=%d\n", i, w, *p); */ +			cur_collate->index2weight[i-1] = w++; +			cur_collate->index2ruleidx[i-1] = *p++; +		} while (--n); +	} +	while (*++p) { +		i = *p; +		cur_collate->index2weight[i-1] = *++p; +		cur_collate->index2ruleidx[i-1] = *++p; +	} + + +	for (i=0 ; i < cur_collate->multistart_tbl[0] ; i++) { +		p = cur_collate->multistart_tbl; +/* 		fprintf(stderr, "%2d of %2d: %d ", i,  cur_collate->multistart_tbl[0], p[i]); */ +		p += p[i]; + +		do { +			n = *p++; +			do { +				if (!*p) {		/* found it */ +/* 					fprintf(stderr, "found: n=%d (%#lx) |%.*ls|\n", n, (int) *cs->s, n, cs->s); */ +/* 					fprintf(stderr, ": %d - single\n", n); */ +					goto FOUND; + 				} +				/* the lookup check here is safe since we're assured that *p is a valid colidex */ +/* 				fprintf(stderr, "lookup(%lc)==%d  *p==%d\n", cs->s[n], lookup(cs->s[n]), (int) *p); */ +/* 				fprintf(stderr, ": %d - ", n); */ +				do { +/* 					fprintf(stderr, "%d|",  *p); */ +				} while (*p++); +				break; +			} while (1); +		} while (1); +	FOUND: +		continue; +	} + +	return 1; +} +  void _locale_init(void)  {  	/* TODO: mmap the locale file  */ @@ -427,7 +614,8 @@ void _locale_init(void)  		   __locale_mmap->lc_common_item_offsets_LEN,  		   LC_ALL); -	__global_locale.category_offsets[0] = offsetof(__locale_t, codeset); +	++__global_locale.category_item_count[0]; /* Increment for codeset entry. */ +	__global_locale.category_offsets[0] = offsetof(__locale_t, outdigit0_mb);  	__global_locale.category_offsets[1] = offsetof(__locale_t, decimal_point);  	__global_locale.category_offsets[2] = offsetof(__locale_t, int_curr_symbol);  	__global_locale.category_offsets[3] = offsetof(__locale_t, abday_1); @@ -489,6 +677,22 @@ void _locale_set(const unsigned char *p)  			*s = *p;  			s[1] = p[1]; +			if ((i != LC_COLLATE) +				&& ((len = __locale_mmap->lc_common_item_offsets_LEN[i]) != 0) +				) { +				crow = __locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ] +					* len; +				x = (const char **)(((char *) &__global_locale) +									+ __global_locale.category_offsets[i]); +				stp = __locale_mmap->lc_common_tbl_offsets + 4*i; +				r = (const unsigned char *)( ((char *)__locale_mmap) + *stp ); +				io = (const uint16_t *)( ((char *)__locale_mmap) + *++stp ); +				ii = (const uint16_t *)( ((char *)__locale_mmap) + *++stp ); +				d = (const unsigned char *)( ((char *)__locale_mmap) + *++stp ); +				for (c=0 ; c < len ; c++) { +					*(x + c) = d + ii[ r[crow + c] + io[c] ]; +				} +			}  			if (i == LC_CTYPE) {  				c = __locale_mmap->locales[ WIDTH_LOCALES * row + 2 ]; /* codeset */  				if (c <= 2) { @@ -524,22 +728,18 @@ void _locale_set(const unsigned char *p)  #endif /* __WCHAR_ENABLED */  #endif /* __CTYPE_HAS_8_BIT_LOCALES */  				} - -			} else if ((len = __locale_mmap->lc_common_item_offsets_LEN[i]) != 0) { -				crow = __locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ] -					* len; -				x = (const char **)(((char *) &__global_locale) -									+ __global_locale.category_offsets[i]); -				stp = __locale_mmap->lc_common_tbl_offsets + 4*i; -				r = (const unsigned char *)( ((char *)__locale_mmap) + *stp ); -				io = (const uint16_t *)( ((char *)__locale_mmap) + *++stp ); -				ii = (const uint16_t *)( ((char *)__locale_mmap) + *++stp ); -				d = (const unsigned char *)( ((char *)__locale_mmap) + *++stp ); -				for (c=0 ; c < len ; c++) { -					*(x + c) = d + ii[ r[crow + c] + io[c] ]; +#ifdef __UCLIBC_MJN3_ONLY__ +#warning might want to just put this in the locale_mmap object +#endif +				d = __global_locale.outdigit_length; +				x = &__global_locale.outdigit0_mb; +				for (c = 0 ; c < 10 ; c++) { +					((unsigned char *)d)[c] = strlen(x[c]); +					assert(d[c] > 0);  				} +			} else if (i == LC_COLLATE) { +				init_cur_collate(__locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ]);  			} -  		}  		++i;  		p += 2; @@ -558,10 +758,10 @@ void _locale_set(const unsigned char *p)  #ifdef __LOCALE_C_ONLY -/* We need to index 300 bytes of data, so you might initially think we +/* We need to index 320 bytes of data, so you might initially think we   * need to store the offsets in shorts.  But since the offset of the - * 64th item is 231, we'll store "offset - 64" for all items >= 64 - * and always calculate the data offset as "offset[i] + (i & 64)". + * 64th item is 182, we'll store "offset - 2*64" for all items >= 64 + * and always calculate the data offset as "offset[i] + 2*(i & 64)".   * This allows us to pack the data offsets in an unsigned char while   * also avoiding an "if".   * @@ -574,63 +774,67 @@ void _locale_set(const unsigned char *p)  /* Combine the data to avoid size penalty for seperate char arrays when   * compiler aligns objects.  The original code is left in as documentation. */  #define cat_start nl_data -#define C_locale_data (nl_data + C_LC_ALL + 1 + 78) - -static const unsigned char nl_data[C_LC_ALL + 1 + 78 + 300] = { -/*  static const unsigned char cat_start[C_LC_ALL + 1] = { */ -	'\x00', '\x01', '\x04', '\x1a', '\x4c', '\x4c', '\x4e',  -/*  }; */ -/*  static const unsigned char item_offset[78] = { */ -	'\x00', '\x06', '\x07', '\x07', '\x07', '\x07', '\x07', '\x07',  -	'\x07', '\x07', '\x07', '\x08', '\x08', '\x08', '\x08', '\x08',  -	'\x08', '\x08', '\x08', '\x08', '\x08', '\x08', '\x08', '\x08',  -	'\x08', '\x0a', '\x0c', '\x10', '\x14', '\x18', '\x1c', '\x20',  -	'\x24', '\x28', '\x2f', '\x36', '\x3e', '\x48', '\x51', '\x58',  -	'\x61', '\x65', '\x69', '\x6d', '\x71', '\x75', '\x79', '\x7d',  -	'\x81', '\x85', '\x89', '\x8d', '\x91', '\x99', '\xa2', '\xa8',  -	'\xae', '\xb2', '\xb7', '\xbc', '\xc3', '\xcd', '\xd5', '\xde',  -	'\xa7', '\xaa', '\xad', '\xc2', '\xcb', '\xd4', '\xdf', '\xdf',  -	'\xdf', '\xdf', '\xdf', '\xdf', '\xe0', '\xe6',  -/*  }; */ -/*  static const unsigned char C_locale_data[300] = { */ -	   'A',    'S',    'C',    'I',    'I', '\x00',    '.', '\x00',  -	'\x7f', '\x00',    '-', '\x00',    'S',    'u',    'n', '\x00',  -	   'M',    'o',    'n', '\x00',    'T',    'u',    'e', '\x00',  -	   'W',    'e',    'd', '\x00',    'T',    'h',    'u', '\x00',  -	   'F',    'r',    'i', '\x00',    'S',    'a',    't', '\x00',  -	   'S',    'u',    'n',    'd',    'a',    'y', '\x00',    'M',  -	   'o',    'n',    'd',    'a',    'y', '\x00',    'T',    'u',  -	   'e',    's',    'd',    'a',    'y', '\x00',    'W',    'e',  -	   'd',    'n',    'e',    's',    'd',    'a',    'y', '\x00',  -	   'T',    'h',    'u',    'r',    's',    'd',    'a',    'y',  -	'\x00',    'F',    'r',    'i',    'd',    'a',    'y', '\x00',  -	   'S',    'a',    't',    'u',    'r',    'd',    'a',    'y',  -	'\x00',    'J',    'a',    'n', '\x00',    'F',    'e',    'b',  -	'\x00',    'M',    'a',    'r', '\x00',    'A',    'p',    'r',  -	'\x00',    'M',    'a',    'y', '\x00',    'J',    'u',    'n',  -	'\x00',    'J',    'u',    'l', '\x00',    'A',    'u',    'g',  -	'\x00',    'S',    'e',    'p', '\x00',    'O',    'c',    't',  -	'\x00',    'N',    'o',    'v', '\x00',    'D',    'e',    'c',  -	'\x00',    'J',    'a',    'n',    'u',    'a',    'r',    'y',  -	'\x00',    'F',    'e',    'b',    'r',    'u',    'a',    'r',  -	   'y', '\x00',    'M',    'a',    'r',    'c',    'h', '\x00',  -	   'A',    'p',    'r',    'i',    'l', '\x00',    'M',    'a',  -	   'y', '\x00',    'J',    'u',    'n',    'e', '\x00',    'J',  -	   'u',    'l',    'y', '\x00',    'A',    'u',    'g',    'u',  -	   's',    't', '\x00',    'S',    'e',    'p',    't',    'e',  -	   'm',    'b',    'e',    'r', '\x00',    'O',    'c',    't',  -	   'o',    'b',    'e',    'r', '\x00',    'N',    'o',    'v',  -	   'e',    'm',    'b',    'e',    'r', '\x00',    'D',    'e',  -	   'c',    'e',    'm',    'b',    'e',    'r', '\x00',    'A',  -	   'M', '\x00',    'P',    'M', '\x00',    '%',    'a',    ' ',  -	   '%',    'b',    ' ',    '%',    'e',    ' ',    '%',    'H',  -	   ':',    '%',    'M',    ':',    '%',    'S',    ' ',    '%',  -	   'Y', '\x00',    '%',    'm',    '/',    '%',    'd',    '/',  -	   '%',    'y', '\x00',    '%',    'H',    ':',    '%',    'M',  -	   ':',    '%',    'S', '\x00',    '%',    'I',    ':',    '%',  -	   'M',    ':',    '%',    'S',    ' ',    '%',    'p', '\x00',  -	   '^',    '[',    'y',    'Y',    ']', '\x00',    '^',    '[',  -	   'n',    'N',    ']', '\x00',  +#define C_locale_data (nl_data + C_LC_ALL + 1 + 90) + +static const unsigned char nl_data[C_LC_ALL + 1 + 90 + 320] = { +/* static const char cat_start[LC_ALL + 1] = { */ +        '\x00', '\x0b', '\x0e', '\x24', '\x56', '\x56', '\x5a',  +/* }; */ +/* static const char item_offset[90] = { */ +	'\x00', '\x02', '\x04', '\x06', '\x08', '\x0a', '\x0c', '\x0e',  +	'\x10', '\x12', '\x14', '\x1a', '\x1b', '\x1b', '\x1b', '\x1b',  +	'\x1b', '\x1b', '\x1b', '\x1b', '\x1b', '\x1c', '\x1c', '\x1c',  +	'\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c', '\x1c',  +	'\x1c', '\x1c', '\x1c', '\x1e', '\x20', '\x24', '\x28', '\x2c',  +	'\x30', '\x34', '\x38', '\x3c', '\x43', '\x4a', '\x52', '\x5c',  +	'\x65', '\x6c', '\x75', '\x79', '\x7d', '\x81', '\x85', '\x89',  +	'\x8d', '\x91', '\x95', '\x99', '\x9d', '\xa1', '\xa5', '\xad',  +	'\x36', '\x3c', '\x42', '\x46', '\x4b', '\x50', '\x57', '\x61',  +	'\x69', '\x72', '\x7b', '\x7e', '\x81', '\x96', '\x9f', '\xa8',  +	'\xb3', '\xb3', '\xb3', '\xb3', '\xb3', '\xb3', '\xb4', '\xba',  +	'\xbf', '\xbf',  +/* }; */ +/* static const char C_locale_data[320] = { */ +	   '0', '\x00',    '1', '\x00',    '2', '\x00',    '3', '\x00',  +	   '4', '\x00',    '5', '\x00',    '6', '\x00',    '7', '\x00',  +	   '8', '\x00',    '9', '\x00',    'A',    'S',    'C',    'I',  +	   'I', '\x00',    '.', '\x00', '\x7f', '\x00',    '-', '\x00',  +	   'S',    'u',    'n', '\x00',    'M',    'o',    'n', '\x00',  +	   'T',    'u',    'e', '\x00',    'W',    'e',    'd', '\x00',  +	   'T',    'h',    'u', '\x00',    'F',    'r',    'i', '\x00',  +	   'S',    'a',    't', '\x00',    'S',    'u',    'n',    'd',  +	   'a',    'y', '\x00',    'M',    'o',    'n',    'd',    'a',  +	   'y', '\x00',    'T',    'u',    'e',    's',    'd',    'a',  +	   'y', '\x00',    'W',    'e',    'd',    'n',    'e',    's',  +	   'd',    'a',    'y', '\x00',    'T',    'h',    'u',    'r',  +	   's',    'd',    'a',    'y', '\x00',    'F',    'r',    'i',  +	   'd',    'a',    'y', '\x00',    'S',    'a',    't',    'u',  +	   'r',    'd',    'a',    'y', '\x00',    'J',    'a',    'n',  +	'\x00',    'F',    'e',    'b', '\x00',    'M',    'a',    'r',  +	'\x00',    'A',    'p',    'r', '\x00',    'M',    'a',    'y',  +	'\x00',    'J',    'u',    'n', '\x00',    'J',    'u',    'l',  +	'\x00',    'A',    'u',    'g', '\x00',    'S',    'e',    'p',  +	'\x00',    'O',    'c',    't', '\x00',    'N',    'o',    'v',  +	'\x00',    'D',    'e',    'c', '\x00',    'J',    'a',    'n',  +	   'u',    'a',    'r',    'y', '\x00',    'F',    'e',    'b',  +	   'r',    'u',    'a',    'r',    'y', '\x00',    'M',    'a',  +	   'r',    'c',    'h', '\x00',    'A',    'p',    'r',    'i',  +	   'l', '\x00',    'M',    'a',    'y', '\x00',    'J',    'u',  +	   'n',    'e', '\x00',    'J',    'u',    'l',    'y', '\x00',  +	   'A',    'u',    'g',    'u',    's',    't', '\x00',    'S',  +	   'e',    'p',    't',    'e',    'm',    'b',    'e',    'r',  +	'\x00',    'O',    'c',    't',    'o',    'b',    'e',    'r',  +	'\x00',    'N',    'o',    'v',    'e',    'm',    'b',    'e',  +	   'r', '\x00',    'D',    'e',    'c',    'e',    'm',    'b',  +	   'e',    'r', '\x00',    'A',    'M', '\x00',    'P',    'M',  +	'\x00',    '%',    'a',    ' ',    '%',    'b',    ' ',    '%',  +	   'e',    ' ',    '%',    'H',    ':',    '%',    'M',    ':',  +	   '%',    'S',    ' ',    '%',    'Y', '\x00',    '%',    'm',  +	   '/',    '%',    'd',    '/',    '%',    'y', '\x00',    '%',  +	   'H',    ':',    '%',    'M',    ':',    '%',    'S', '\x00',  +	   '%',    'I',    ':',    '%',    'M',    ':',    '%',    'S',  +	   ' ',    '%',    'p', '\x00',    '^',    '[',    'y',    'Y',  +	   ']', '\x00',    '^',    '[',    'n',    'N',    ']', '\x00',   };  char *nl_langinfo(nl_item item) @@ -641,7 +845,7 @@ char *nl_langinfo(nl_item item)  	if ((c = _NL_ITEM_CATEGORY(item)) < C_LC_ALL) {  		if ((i = cat_start[c] + _NL_ITEM_INDEX(item)) < cat_start[c+1]) {  /*  			return (char *) C_locale_data + item_offset[i] + (i & 64); */ -			return (char *) C_locale_data + nl_data[C_LC_ALL+1+i] + (i & 64); +			return (char *) C_locale_data + nl_data[C_LC_ALL+1+i] + 2*(i & 64);  		}  	}  	return (char *) cat_start;	/* Conveniently, this is the empty string. */ diff --git a/libc/stdio/printf.c b/libc/stdio/printf.c index 273bf3621..945d3c38d 100644 --- a/libc/stdio/printf.c +++ b/libc/stdio/printf.c @@ -2331,11 +2331,16 @@ static int _do_one_spec(FILE * __restrict stream,  		}  		if (ppfs->conv_num <= CONV_i) {	/* pointer or (un)signed int */  			alphacase = __UIM_LOWER; -			if (((base = spec_base[(int)(ppfs->conv_num - CONV_p)]) == 10) -				&& (PRINT_INFO_FLAG_VAL(&(ppfs->info),group)) -				) { -				alphacase = __UIM_GROUP; +#ifndef __LOCALE_C_ONLY +			if ((base = spec_base[(int)(ppfs->conv_num - CONV_p)]) == 10) { +				if (PRINT_INFO_FLAG_VAL(&(ppfs->info),group)) { +					alphacase = __UIM_GROUP; +				} +				if (PRINT_INFO_FLAG_VAL(&(ppfs->info),i18n)) { +					alphacase |= 0x80; +				}  			} +#endif /* __LOCALE_C_ONLY */  			if (ppfs->conv_num <= CONV_u) { /* pointer or unsigned int */  				if (ppfs->conv_num == CONV_X) {  					alphacase = __UIM_UPPER; @@ -2350,6 +2355,9 @@ static int _do_one_spec(FILE * __restrict stream,  			if (ppfs->info.prec < 0) { /* Ignore '0' flag if prec specified. */  				padchar = ppfs->info.pad;  			} +#ifdef __UCLIBC_MJN3_ONLY__ +#warning if using outdigits and/or grouping, how should we interpret precision? +#endif  			s = _uintmaxtostr(buf + sizeof(buf) - 1,  							  (uintmax_t)  							  _load_inttype(*argtype & __PA_INTMASK, @@ -2557,6 +2565,9 @@ static int _do_one_spec(FILE * __restrict stream,  			return -1;  		} +#ifdef __UCLIBC_MJN3_ONLY__ +#warning if using outdigits and/or grouping, how should we pad? +#endif  		{  			size_t t; diff --git a/libc/stdio/stdio.c b/libc/stdio/stdio.c index cf72a5ccc..e39cf5205 100644 --- a/libc/stdio/stdio.c +++ b/libc/stdio/stdio.c @@ -319,11 +319,11 @@ int putw(int w, FILE *stream)  UNLOCKED(int,fileno,(register FILE *stream),(stream))  {  #ifdef __STDIO_GLIBC_CUSTOM_STREAMS -	return ( ((stream->cookie == &(stream->filedes)) && (stream->filedes >= 0)) +	return ( (stream && (stream->cookie == &(stream->filedes)) && (stream->filedes >= 0))  			 ? stream->filedes  			 : (__set_errno(EBADF), -1) );  #else  /* __STDIO_GLIBC_CUSTOM_STREAMS */ -	return (stream->filedes >= 0) ? stream->filedes : (__set_errno(EBADF), -1); +	return ((stream && stream->filedes >= 0)) ? stream->filedes : (__set_errno(EBADF), -1);  #endif /* __STDIO_GLIBC_CUSTOM_STREAMS */  } @@ -3331,7 +3331,7 @@ char *_uintmaxtostr(register char * __restrict bufend, uintmax_t uval,  	unsigned int H, L, high, low, rh;  #endif  #ifndef __LOCALE_C_ONLY -	int grouping; +	int grouping, outdigit;  	size_t gslen;		   /* This does not need to be initialized. */  	const char *g;		   /* This does not need to be initialized. */  #endif /* __LOCALE_C_ONLY */ @@ -3350,6 +3350,11 @@ char *_uintmaxtostr(register char * __restrict bufend, uintmax_t uval,  #ifndef __LOCALE_C_ONLY  	grouping = -1; +	outdigit = 0x80 & alphacase; +	alphacase ^= outdigit; +#ifdef __UCLIBC_MJN3_ONLY_ +#warning implement outdigit... need digit lengths!  (put it in locale struct) +#endif  	if (alphacase == __UIM_GROUP) {  		assert(base == 10);  		if (*(g = CUR_LOCALE.grouping) @@ -3391,7 +3396,18 @@ char *_uintmaxtostr(register char * __restrict bufend, uintmax_t uval,  		digit = uval % base;  		uval /= base; -		*--bufend = ( (digit < 10) ? digit + '0' : digit + alphacase ); +#ifndef __LOCALE_C_ONLY +		if (outdigit) { +			outdigit = CUR_LOCALE.outdigit_length[digit]; +			do { +				*--bufend = (&CUR_LOCALE.outdigit0_mb)[digit][--outdigit]; +			} while (outdigit); +			outdigit = 1; +		} else +#endif +		{ +			*--bufend = ( (digit < 10) ? digit + '0' : digit + alphacase ); +		}      } while (uval);  #else  /* ************************************************** */ @@ -3437,7 +3453,18 @@ char *_uintmaxtostr(register char * __restrict bufend, uintmax_t uval,  		low = (low / base) + (H * rh) + (digit / base);  		digit %= base; -		*--bufend = ( (digit < 10) ? digit + '0' : digit + alphacase ); +#ifndef __LOCALE_C_ONLY +		if (outdigit) { +			outdigit = CUR_LOCALE.outdigit_length[digit]; +			do { +				*--bufend = (&CUR_LOCALE.outdigit0_mb)[digit][--outdigit]; +			} while (outdigit); +			outdigit = 1; +		} else +#endif +		{ +			*--bufend = ( (digit < 10) ? digit + '0' : digit + alphacase ); +		}      } while (low | high);  #endif /******************************************************/ diff --git a/libc/string/Makefile b/libc/string/Makefile index 088058ebe..fe9b64382 100644 --- a/libc/string/Makefile +++ b/libc/string/Makefile @@ -33,13 +33,14 @@ MOBJW=  basename.o bcopy.o bzero.o dirname.o ffs.o memccpy.o memchr.o memcmp.o \  	strspn.o strstr.o strtok.o strtok_r.o strerror.o _susv3_strerror_r.o \  	_string_syserrmsgs.o _glibc_strerror_r.o \  	_string_syssigmsgs.o sys_siglist.o strsignal.o psignal.o \ -	 __xpg_basename.o strlcat.o strlcpy.o sys_errlist.o # strcoll.o +	 __xpg_basename.o strlcat.o strlcpy.o sys_errlist.o  MOBJW2= wcscasecmp.o wcscat.o wcschrnul.o wcschr.o wcscmp.o wcscpy.o wcscspn.o \  	wcsdup.o wcslen.o wcsncasecmp.o wcsncat.o wcsncmp.o wcsncpy.o \  	wcsnlen.o wcspbrk.o wcsrchr.o wcsspn.o wcsstr.o wcstok.o wmemchr.o \  	wmemcmp.o wmemcpy.o wmemmove.o wmempcpy.o wmemset.o wcpcpy.o wcpncpy.o \ -	wcsxfrm.o # wcscoll +	__wcslcpy.o \ +	wcsxfrm.o strxfrm.o # wcscoll strcoll.o  OBJS=$(MOBJ) $(MOBJ1) $(MOBJ2) $(MOBJW) diff --git a/libc/string/wstring.c b/libc/string/wstring.c index 531b1c9fd..c1ead6e00 100644 --- a/libc/string/wstring.c +++ b/libc/string/wstring.c @@ -26,6 +26,13 @@   *   *  ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION!   ATTENTION! */ +/*  Dec 20, 2002 + * + *  Initial test implementation of strcoll, strxfrm, wcscoll, and wcsxfrm. + *  The code needs to be cleaned up a good bit, but I'd like to see people + *  test it out. + */ +  #define _STDIO_UTILITY  #define _GNU_SOURCE  #include <string.h> @@ -36,11 +43,12 @@  #include <stdlib.h>  #include <errno.h>  #include <signal.h> +#include <assert.h> +#include <locale.h>  #ifdef WANT_WIDE  #include <wchar.h>  #include <wctype.h> -#include <locale.h>  #define Wvoid			wchar_t  #define Wchar			wchar_t @@ -627,17 +635,14 @@ int Wmemcmp(const Wvoid *s1, const Wvoid *s2, size_t n)  #ifdef L_strcmp +#ifdef __LOCALE_C_ONLY +#warning c only  #ifdef L_wcscmp -#ifdef __UCLIBC_MJN3_ONLY__ -#warning implement wcscoll and remove weak alias (or enable for C locale only) -#endif  weak_alias(wcscmp,wcscoll);  #else  /* L_wcscmp */ -#ifdef __UCLIBC_MJN3_ONLY__ -#warning implement strcoll and remove weak alias (or enable for C locale only) -#endif  weak_alias(strcmp,strcoll);  #endif /* L_wcscmp */ +#endif /* __LOCALE_C_ONLY */  int Wstrcmp(register const Wchar *s1, register const Wchar *s2)  { @@ -661,23 +666,6 @@ int Wstrcmp(register const Wchar *s1, register const Wchar *s2)  }  #endif  /**********************************************************************/ -#ifdef L_strcoll -#error implement strcoll and remove weak_alias!! - -#if 0 -extern unsigned char *_ctype_collate; -int strcoll(register const char *s1, const char *s2) -{ -    int r; - -    while (!(r = (_ctype_collate[(int)(*s1++)]-_ctype_collate[(int)(*s2++)]))); - -    return r; -} -#endif - -#endif -/**********************************************************************/  #ifdef L_wcsncmp  #define L_strncmp  #define Wstrncmp wcsncmp @@ -713,11 +701,6 @@ int Wstrncmp(register const Wchar *s1, register const Wchar *s2, size_t n)  #endif  /**********************************************************************/ -#ifdef L_strxfrm -#error implement strxfrm -/* size_t strxfrm(char *dst, const char *src, size_t len); */ -#endif	 -/**********************************************************************/  #ifdef L_wmemchr  #define L_memchr  #define Wmemchr wmemchr @@ -1923,28 +1906,37 @@ size_t strlcat(register char *__restrict dst,  #endif  /**********************************************************************/ -#ifdef L_wcsxfrm +#ifdef WANT_WIDE +extern size_t __wcslcpy(wchar_t *__restrict dst, +						const wchar_t *__restrict src, +						size_t n); +#endif + + +#ifdef L___wcslcpy  #define L_strlcpy -#define Wstrlcpy wcsxfrm +#define Wstrlcpy __wcslcpy +#ifdef __LOCALE_C_ONLY +weak_alias(__wcslcpy,wcsxfrm); +#endif  #endif  #ifdef L_strlcpy -#ifndef L_wcsxfrm +#ifndef L___wcslcpy  #define Wstrlcpy strlcpy -#ifdef __UCLIBC_MJN3_ONLY__ -#warning implement wcscoll and remove weak alias (or enable for C locale only) -#endif +#ifdef __LOCALE_C_ONLY  weak_alias(strlcpy,strxfrm);  #endif +#endif  /* OpenBSD function:   * Copy at most n-1 chars from src to dst and nul-terminate dst.   * Returns strlen(src), so truncation occurred if the return value is >= n. */  size_t Wstrlcpy(register Wchar *__restrict dst, -				register const Wchar *__restrict src, -				size_t n) +				  register const Wchar *__restrict src, +				  size_t n)  {  	const Wchar *src0 = src;  	Wchar dummy[1]; @@ -2145,3 +2137,621 @@ void psignal(int signum, register const char *message)  #endif  /**********************************************************************/ +#ifndef __LOCALE_C_ONLY + +#ifdef L_strxfrm +#ifndef WANT_WIDE +#error WANT_WIDE should be defined for L_strxfrm +#endif +#ifdef L_wcsxfrm +#error L_wcsxfrm already defined for L_strxfrm +#endif + +#define wcscoll strcoll +#define L_wcsxfrm +#undef WANT_WIDE + +#undef Wvoid +#undef Wchar +#undef Wuchar +#undef Wint + +#define Wchar char + +#endif /* L_strxfrm */ + + + +#ifdef L_wcsxfrm + +#define CUR_COLLATE (&__global_locale.collate) + +#define MAX_PENDING 8 + +typedef struct { +	const Wchar *s; +	const Wchar *eob;			/* end of backward */ + +	__uwchar_t weight; +	__uwchar_t ui_weight;		/* undefined or invalid */ +	int colitem; +	int weightidx; +	int rule; +	size_t position; +	/* should be wchar_t.  if wchar < 0 do EILSEQ? */ +	__uwchar_t *cip; +	__uwchar_t ci_pending[MAX_PENDING];	/* nul-terminated */ + +	char *back_buf; +	char *bbe;					/* end of back_buf (actual last... not 1 past end) */ +	char *bp;					/* ptr into backbuf, NULL if not in backward mode */ +	char ibb[128]; +	size_t bb_size; + +	int ru_pushed; +} col_state_t; + + +#define WEIGHT_MASK	0x3fffU +#define RULE_MASK	0xc000U + +#define RULE_FORWARD  (1 << 14) +#define RULE_POSITION (1 << 15) + +#define UI_IDX		(WEIGHT_MASK-6) +#define POSIT_IDX	(WEIGHT_MASK-5) +#define RANGE_IDX	(WEIGHT_MASK-4) +#define UNDEF_IDX	(WEIGHT_MASK-3) +#define INVAL_IDX	(WEIGHT_MASK-2) +#define DITTO_IDX   (WEIGHT_MASK-1) + + +#undef TRACE +#if 0 +#define TRACE(X)	printf##X +#else +#define TRACE(X)	((void)0) +#endif + +static int lookup(wchar_t wc) +{ +	unsigned int sc, n, i0, i1; + +	if (((__uwchar_t) wc) > 0xffffU) { +		return 0; +	} + +	sc = wc & CUR_COLLATE->ti_mask; +	wc >>= CUR_COLLATE->ti_shift; +	n = wc & CUR_COLLATE->ii_mask; +	wc >>= CUR_COLLATE->ii_shift; + +	i0 = CUR_COLLATE->wcs2colidt_tbl[wc]; +	i0 <<= CUR_COLLATE->ii_shift; +	i1 = CUR_COLLATE->wcs2colidt_tbl[CUR_COLLATE->ii_len + i0 + n]; +	i1 <<= CUR_COLLATE->ti_shift; +	return CUR_COLLATE->wcs2colidt_tbl[CUR_COLLATE->ii_len + CUR_COLLATE->ti_len + i1 + sc]; + +} + +static void init_col_state(col_state_t *cs, const Wchar *wcs) +{ +	memset(cs, 0, sizeof(col_state_t)); +	cs->s = wcs; +	cs->bp = cs->back_buf = cs->ibb; +	cs->bb_size = 128; +	cs->bbe = cs->back_buf + (cs->bb_size -1); +} + +static void next_weight(col_state_t *cs, int pass) +{ +	int r, w, ru, ri, popping_backup_stack; +	ssize_t n; +	const uint16_t *p; +#ifdef WANT_WIDE +#define WC (*cs->s) +#define N (1) +#else  /* WANT_WIDE */ +	mbstate_t mbstate; +	wchar_t WC; +	size_t n0, nx; +#define N n0 + +	mbstate.mask = 0; +#endif /* WANT_WIDE */ + +	do { + +		if (cs->ru_pushed) { +			ru = cs->ru_pushed; +			TRACE(("ru_pushed = %d\n", ru)); +			cs->ru_pushed = 0; +			goto POSITION_SKIP; +		} + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning should we walk pendings backwards? +#endif +		if (cs->cip) {			/* possible pending weight */ +			if ((r = *(cs->cip++)) == 0) { +				cs->cip = NULL; +				continue; +			} +			cs->weightidx = r & WEIGHT_MASK; +			assert(cs->weightidx); +/* 			assert(cs->weightidx != WEIGHT_MASK); */ +		} else {				/* get the next collation item from the string */ +			TRACE(("clearing popping flag\n")); +			popping_backup_stack = 0; + +		IGNORE_LOOP: +			/* keep first pos as 0 for a sentinal */ +			if (*cs->bp) {				/* pending backward chars */ +			POP_BACKUP: +				popping_backup_stack = 1; +				TRACE(("setting popping flag\n")); +				n = 0; +				if (*cs->bp > 0) {		/* singles pending */ +					cs->s -= 1; +					if ((*cs->bp -= 1) == 0) { +						cs->bp -= 1; +					} +				} else {				/* last was a multi */ +					cs->s += *cs->bp; +					cs->bp -= 1; +				} +			} else if (!*cs->s) { /* not in backward mode and end of string */ +				cs->weight = 0; +				return; +			} else { +				cs->position += 1; +			} + +		BACK_LOOP: +#ifdef WANT_WIDE +			n = 1; +			cs->colitem = r = lookup(*cs->s); +#else  /* WANT_WIDE */ +			n = n0 = mbrtowc(&WC, cs->s, SIZE_MAX, &mbstate); +			if (n < 0) { +				__set_errno(EILSEQ); +				cs->weight = 0; +				return; +			} +			cs->colitem = r = lookup(WC); +#endif /* WANT_WIDE */ + +			TRACE((" r=%d WC=%#lx\n", r, (unsigned long)(WC))); + +			if (r > CUR_COLLATE->max_col_index) { /* starting char for one or more sequences */ +				p = CUR_COLLATE->multistart_tbl; +				p += p[r-CUR_COLLATE->max_col_index -1]; +				do { +					n = N; +					r = *p++; +					do { +						if (!*p) {		/* found it */ +							cs->colitem = r; +							TRACE(("    found multi %d\n", n)); +							goto FOUND; +						} +#ifdef WANT_WIDE +						/* the lookup check here is safe since we're assured that *p is a valid colidx */ +						if (!cs->s[n] || (lookup(cs->s[n]) != *p)) { +							do {} while (*p++); +							break; +						} +						++p; +						++n; +#else  /* WANT_WIDE */ +						if (cs->s[n]) { +							nx = mbrtowc(&WC, cs->s + n, SIZE_MAX, &mbstate); +							if (nx < 0) { +								__set_errno(EILSEQ); +								cs->weight = 0; +								return; +							} +						} +						if (!cs->s[n] || (lookup(WC) != *p)) { +							do {} while (*p++); +							break; +						} +						++p; +						n += nx; +#endif /* WANT_WIDE */ +					} while (1); +				} while (1); +			} else if (r == 0) {		/* illegal, undefined, or part of a range */ +				if ((CUR_COLLATE->range_count) +#ifdef __UCLIBC_MJN3_ONLY__ +#warning .. need to introduce range as a collating item? +#endif +					&& (((__uwchar_t)(WC - CUR_COLLATE->range_low)) <= CUR_COLLATE->range_count) +					) {					/* part of a range */ +					/* Note: cs->colitem = 0 already. */ +					TRACE(("    found range\n")); +					ru = CUR_COLLATE->ruletable[CUR_COLLATE->range_rule_offset*CUR_COLLATE->MAX_WEIGHTS + pass]; +					assert((ru & WEIGHT_MASK) != DITTO_IDX); +					if ((ru & WEIGHT_MASK) == WEIGHT_MASK) { +						ru = (ru & RULE_MASK) | RANGE_IDX; +						cs->weight = CUR_COLLATE->range_base_weight + (WC - CUR_COLLATE->range_low); +					} +					goto RANGE_SKIP_TO; +				} else if (((__uwchar_t)(WC)) <= 0x7fffffffUL) { /* legal but undefined */ +				UNDEFINED: +					/* Note: cs->colitem = 0 already. */ +					ri = CUR_COLLATE->undefined_idx; +					assert(ri != 0); /* implicit undefined isn't supported */ + +					TRACE(("    found explicit UNDEFINED\n")); +#ifdef __UCLIBC_MJN3_ONLY__ +#warning right now single weight locales do not support .. +#endif +					if (CUR_COLLATE->num_weights == 1) { +						TRACE(("    single weight UNDEFINED\n")); +						cs->weightidx = RANGE_IDX; +						cs->weight = ri; +						cs->s += n; +						goto PROCESS_WEIGHT; +					} + +					ri = CUR_COLLATE->index2ruleidx[ri - 1]; +					ru = CUR_COLLATE->ruletable[ri * CUR_COLLATE->MAX_WEIGHTS + pass]; +					assert((ru & WEIGHT_MASK) != WEIGHT_MASK); /* TODO: handle ".." */ +					if ((ru & WEIGHT_MASK) == DITTO_IDX) { +						cs->colitem = CUR_COLLATE->undefined_idx; +					} +					goto RANGE_SKIP_TO; +				} else {		/* illegal */ +					TRACE(("    found illegal\n")); +					__set_errno(EINVAL); +					/* We put all illegals in the same equiv class with maximal weight, +					 * and ignore them after the first pass. */ +					if (pass > 0) { +						cs->s += n; +						goto IGNORE_LOOP; +					} +					ru = (RULE_FORWARD | RANGE_IDX); +					cs->weight = 0xffffU; +					goto RANGE_SKIP_TO; +				} +			} else if (CUR_COLLATE->num_weights == 1) { +				TRACE(("    single weight\n")); +				cs->weightidx = RANGE_IDX; +				cs->weight = cs->colitem; +				cs->s += n; +				goto PROCESS_WEIGHT; +			} else { +				TRACE(("    normal\n")); +			} + +			/* if we get here, it is a normal char either singlely weighted, undefined, or in a range */ +		FOUND: +			ri = CUR_COLLATE->index2ruleidx[cs->colitem - 1]; +			TRACE((" ri=%d ", ri)); +#ifdef __UCLIBC_MJN3_ONLY__ +#warning make sure this is correct +#endif +			if (!ri) { +				TRACE(("NOT IN THIS LOCALE\n")); +				goto UNDEFINED; +			} +			ru = CUR_COLLATE->ruletable[ri * CUR_COLLATE->MAX_WEIGHTS + pass]; + +		RANGE_SKIP_TO: + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning ignoreables probably should not interrupt backwards processing, but this is wrong +#endif +/* 			if (!(ru & WEIGHT_MASK)) { */ +/* 				TRACE(("IGNORE\n")); */ +/* 				cs->s += n; */ +/* 				continue; */ +/* 			} */ + + +			TRACE((" rule = %#x  weight = %#x  popping = %d  s = %p  eob = %p\n", +				   ru & RULE_MASK, ru & WEIGHT_MASK, popping_backup_stack, +				   cs->s, cs->eob)); +			/* now we need to check if we're going backwards... */ + +			if (!popping_backup_stack) { +				if (!(ru & RULE_MASK)) { /* backward */ +					TRACE(("backwards\n")); +					assert(cs->bp <= cs->bbe); +					if (cs->bp == cs->bbe) { +						if (cs->back_buf == cs->ibb) { /* was using internal buffer */ +							cs->bp = malloc(cs->bb_size + 128); +							if (!cs->bp) { +								__set_errno(ENOMEM); +#ifdef __UCLIBC_MJN3_ONLY__ +#warning what to do here? +#endif +								cs->weight = 0; +								return; +							} +							memcpy(cs->bp, cs->back_buf, cs->bb_size); + +						} else { +							cs->bp = realloc(cs->back_buf, cs->bb_size + 128); +							if (!cs->bp) { +								__set_errno(ENOMEM); +#ifdef __UCLIBC_MJN3_ONLY__ +#warning what to do here? +#endif +								cs->weight = 0; +								return; +							} +						} +						cs->bb_size += 128; +						cs->bbe = cs->bp + (cs->bbe - cs->back_buf); +						cs->back_buf = cs->bp; +						cs->bp = cs->bbe; + +					} +					if (n==1) {			/* single char */ +						if (*cs->bp && (((unsigned char)(*cs->bp)) < CHAR_MAX)) { +							*cs->bp += 1; /* increment last single's count */ +						} else {	  /* last was a multi, or just starting */ +							if (!cs->bp) { +								cs->bp = cs->back_buf; +							} else { +								assert(cs->bp < cs->bbe); +								++cs->bp; +							} +							*cs->bp = 1; +						} +					} else {			/* multichar */ +						assert(n>1); +						assert(cs->bp < cs->bbe); +						*++cs->bp = -n; +					} +					cs->s += n; +					if (*cs->s) { +						goto BACK_LOOP; +					} +					/* end-of-string so start popping */ +					cs->eob = cs->s; +					TRACE(("popping\n")); +					goto POP_BACKUP; +				} else if (*cs->bp) { /* was going backward but this element isn't */ +					/* discard current and use previous backward element */ +					assert(!cs->cip); +					cs->eob = cs->s; +					TRACE(("popping\n")); +					goto POP_BACKUP; +				} else {				/* was and still going forward */ +					TRACE(("forwards\n")); +					if ((ru & (RULE_POSITION|WEIGHT_MASK)) > RULE_POSITION) { +						assert(ru & WEIGHT_MASK); +						cs->ru_pushed = ru; +						cs->weight = cs->position; +#ifdef __UCLIBC_MJN3_ONLY__ +#warning devel code +#endif +						cs->position = 0;	/* reset to reduce size for strcoll? */ +						cs->s += n; +						cs->weightidx = RANGE_IDX; +						goto PROCESS_WEIGHT; +					} +				} +			} else {					/* popping backwards stack */ +				TRACE(("popping (continued)\n")); +				if (!*cs->bp) { +					cs->s = cs->eob; +				} +				cs->s -= n; +			} + +			cs->s += n; +		POSITION_SKIP: +			cs->weightidx = ru & WEIGHT_MASK; +			cs->rule = ru & RULE_MASK; +		} + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning for pending we only want the weight... _not_ the rule +#endif +		if (!cs->weightidx) {	/* ignore */ +			continue; +		} + +	PROCESS_WEIGHT: +		assert(cs->weightidx); + + +		if (((unsigned int)(cs->weightidx - UI_IDX)) <= (INVAL_IDX-UI_IDX)) { +			if (cs->weightidx == UI_IDX) { +				cs->weight = cs->ui_weight; +			} +			return; +		} + +		assert(cs->weightidx != WEIGHT_MASK); +		if (cs->weightidx == DITTO_IDX) { /* want the weight of the current collating item */ +			TRACE(("doing ditto\n")); +			w = CUR_COLLATE->index2weight[cs->colitem -1]; +		} else if (cs->weightidx <= CUR_COLLATE->max_col_index) { /* normal */ +			TRACE(("doing normal\n")); +			w = CUR_COLLATE->index2weight[cs->weightidx -1]; +		} else {				/* a string */ +			TRACE(("doing string\n")); +			assert(!(cs->weightidx & RULE_MASK)); +			/* note: iso14561 allows null string here */ +			p = CUR_COLLATE->weightstr + (cs->weightidx - (CUR_COLLATE->max_col_index + 2)); +			if (*p & WEIGHT_MASK) { +				r = 0; +				do { +					assert(r < MAX_PENDING); +					cs->ci_pending[r++] = *p++; +				} while (*p & WEIGHT_MASK); +				cs->cip = cs->ci_pending; +			} +			continue; +		} + +		cs->weight = w; +		return; +	} while (1); +} + +int wcscoll (const Wchar *s0, const Wchar *s1) +{ +	col_state_t ws[2]; +	int pass; + +	if (!CUR_COLLATE->num_weights) { /* C locale */ +#ifdef WANT_WIDE +		return wcscmp(s0, s1); +#else  /* WANT_WIDE */ +		return strcmp(s0, s1); +#endif /* WANT_WIDE */ +	} + +	pass = 0; +	do {						/* loop through the weights levels */ +		init_col_state(ws, s0); +		init_col_state(ws+1, s1); +		do {					/* loop through the strings */ +			/* for each string, get the next weight */ +			next_weight(ws, pass); +			next_weight(ws+1, pass); +			TRACE(("w0=%lu  w1=%lu\n", +				   (unsigned long) ws[0].weight, +				   (unsigned long) ws[1].weight)); + +			if (ws[0].weight != ws[1].weight) { +				return ws[0].weight - ws[1].weight; +			} +		} while (ws[0].weight); +	} while (++pass < CUR_COLLATE->num_weights); + +	return 0; +} + +#ifdef WANT_WIDE + +size_t wcsxfrm(wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t n) +{ +	col_state_t cs; +	size_t count; +	int pass; + +	if (!CUR_COLLATE->num_weights) { /* C locale */ +		return __wcslcpy(ws1, ws2, n); +	} + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning handle empty string as a special case +#endif + +	count = pass = 0; +	do {						/* loop through the weights levels */ +		init_col_state(&cs, ws2); +		do {					/* loop through the string */ +			next_weight(&cs, pass); +			TRACE(("weight=%lu (%#lx)\n", (unsigned long) cs.weight, (unsigned long) cs.weight)); +			if (count < n) { +				ws1[count] = cs.weight +1; +			} +			++count; +			TRACE(("--------------------------------------------\n")); +		} while (cs.weight); +		if (count <= n) {		/* overwrite the trailing 0 end-of-pass marker */ +			ws1[count-1] = 1; +		} +		TRACE(("--------------------  pass %d  --------------------\n", pass)); +	} while (++pass < CUR_COLLATE->num_weights); +	if (count <= n) {			/* oops... change it back */ +		ws1[count-1] = 0; +	} +	return count-1; +} + +#else  /* WANT_WIDE */ + +static const unsigned long bound[] = { +	1UL << 7, +	1UL << 11, +	1UL << 16, +	1UL << 21, +	1UL << 26, +}; + +static unsigned char first[] = { +	0x0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc +}; + +/* Use an extension of UTF-8 to store a 32 bit val in max 6 bytes. */ + +static size_t store(unsigned char *s, size_t count, size_t n, __uwchar_t weight) +{ +	int i, r; + +	i = 0; +	do { +		if (weight < bound[i]) { +			break; +		} +	} while (++i < sizeof(bound)/sizeof(bound[0])); + +	r = i+1; +	if (i + count < n) { +		s += count; +		s[0] = first[i]; +		while (i) { +			s[i] = 0x80 | (weight & 0x3f); +			weight >>= 6; +			--i; +		} +		s[0] |= weight; +	} + +	return r; +} + +size_t strxfrm(char *__restrict ws1, const char *__restrict ws2, size_t n) +{ +	col_state_t cs; +	size_t count, inc; +	int pass; + +	if (!CUR_COLLATE->num_weights) { /* C locale */ +		return strlcpy(ws1, ws2, n); +	} + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning handle empty string as a special case +#endif + +	inc = count = pass = 0; +	do {						/* loop through the weights levels */ +		init_col_state(&cs, ws2); +		do {					/* loop through the string */ +			next_weight(&cs, pass); +			TRACE(("weight=%lu (%#lx)\n", (unsigned long) cs.weight, (unsigned long) cs.weight)); +			inc = store((unsigned char *)ws1, count, n, cs.weight + 1); +			count += inc; +			TRACE(("--------------------------------------------\n")); +		} while (cs.weight); +		/* overwrite the trailing 0 end-of-pass marker */ +		assert(inc == 1); +		if (count <= n) { +			ws1[count-1] = 1; +		} +		TRACE(("--------------------  pass %d  --------------------\n", pass)); +	} while (++pass < CUR_COLLATE->num_weights); +	if (count <= n) {			/* oops... change it back */ +		ws1[count-1] = 0; +	} +	return count-1; +} + + +#endif /* WANT_WIDE */ + +#endif /* wcscoll */ + +#endif /* __LOCALE_C_ONLY */ +/**********************************************************************/ + diff --git a/libc/sysdeps/linux/common/bits/uClibc_locale.h b/libc/sysdeps/linux/common/bits/uClibc_locale.h index 8025005ab..4e89188b8 100644 --- a/libc/sysdeps/linux/common/bits/uClibc_locale.h +++ b/libc/sysdeps/linux/common/bits/uClibc_locale.h @@ -120,6 +120,46 @@ enum {    * In particular, C/POSIX locale is '#' + "\x80\x01"}*LC_ALL + nul.    */ +typedef struct { +	uint16_t num_weights; +	uint16_t num_starters; +	uint16_t ii_shift; +	uint16_t ti_shift; +	uint16_t ii_len; +	uint16_t ti_len; +	uint16_t max_weight; +	uint16_t num_col_base; +	uint16_t max_col_index; +	uint16_t undefined_idx; +	uint16_t range_low; +	uint16_t range_count; +	uint16_t range_base_weight; +	uint16_t range_rule_offset; /* change name to index? */ + +	uint16_t ii_mask; +	uint16_t ti_mask; + +	const uint16_t *index2weight_tbl; +	const uint16_t *index2ruleidx_tbl; +	const uint16_t *multistart_tbl; +	/*	 uint16_t wcs2colidt_offset_low; */ +	/*	 uint16_t wcs2colidt_offset_hi; */ +	const uint16_t *wcs2colidt_tbl; + +	/*	 uint16_t undefined_idx; */ +	const uint16_t *overrides_tbl; +	/*	 uint16_t *multistart_tbl; */ + +	const uint16_t *weightstr; +	const uint16_t *ruletable; + + +	uint16_t *index2weight; +	uint16_t *index2ruleidx; + +	uint16_t MAX_WEIGHTS; +} __collate_t; +  /*  static unsigned char cur_locale[LOCALE_STRING_SIZE]; */ @@ -138,8 +178,7 @@ typedef struct {  	/* ctype */  	unsigned char encoding;		/* C/POSIX, 8-bit, UTF-8 */  	unsigned char mb_cur_max;	/* determined by encoding _AND_ translit!!! */ - -	const char *codeset; +	const unsigned char outdigit_length[10];  #ifdef __CTYPE_HAS_8_BIT_LOCALES  	const unsigned char *idx8ctype; @@ -162,6 +201,19 @@ typedef struct {  	/* width?? */  #endif /* __WCHAR_ENABLED */ +	/* ctype */ +	const char *outdigit0_mb; +	const char *outdigit1_mb; +	const char *outdigit2_mb; +	const char *outdigit3_mb; +	const char *outdigit4_mb; +	const char *outdigit5_mb; +	const char *outdigit6_mb; +	const char *outdigit7_mb; +	const char *outdigit8_mb; +	const char *outdigit9_mb; +	const char *codeset;		/* MUST BE LAST!!! */ +  	/* numeric */  	const char *decimal_point;  	const char *thousands_sep; @@ -250,11 +302,16 @@ typedef struct {  	const char *era_d_t_fmt;  	const char *era_t_fmt; -	/* collate */ +	/* collate is at the end */  	/* messages */  	const char *yesexpr;  	const char *noexpr; +	const char *yesstr; +	const char *nostr; + +	/* collate is at the end */ +	__collate_t collate;  } __locale_t;  | 
