diff options
Diffstat (limited to 'libc/misc/locale/locale.c')
-rw-r--r-- | libc/misc/locale/locale.c | 1103 |
1 files changed, 764 insertions, 339 deletions
diff --git a/libc/misc/locale/locale.c b/libc/misc/locale/locale.c index 9c162a980..071a8df71 100644 --- a/libc/misc/locale/locale.c +++ b/libc/misc/locale/locale.c @@ -36,28 +36,94 @@ */ #define _GNU_SOURCE -#include <locale.h> + +#define __CTYPE_HAS_8_BIT_LOCALES 1 + + #include <string.h> #include <stdlib.h> #include <stddef.h> #include <limits.h> #include <stdint.h> #include <assert.h> +#include <errno.h> +#include <ctype.h> + +#undef __LOCALE_C_ONLY +#ifndef __UCLIBC_HAS_LOCALE__ +#define __LOCALE_C_ONLY +#endif /* __UCLIBC_HAS_LOCALE__ */ + + +#ifdef __LOCALE_C_ONLY + +#include <locale.h> + +#else /* __LOCALE_C_ONLY */ + +#ifdef __UCLIBC_MJN3_ONLY__ +#ifdef L_setlocale +#warning TODO: Fix the __CTYPE_HAS_8_BIT_LOCALES define at the top of the file. +#warning TODO: Fix __WCHAR_ENABLED. +#endif +#endif -#ifndef __LOCALE_C_ONLY +/* Need to include this before locale.h and xlocale.h! */ +#include <bits/uClibc_locale.h> -#define CUR_LOCALE_SPEC (__global_locale.cur_locale) #undef CODESET_LIST #define CODESET_LIST (__locale_mmap->codeset_list) +#ifdef __UCLIBC_HAS_XLOCALE__ +#include <xlocale.h> +#include <locale.h> +#else /* __UCLIBC_HAS_XLOCALE__ */ +/* We need this internally... */ +#define __UCLIBC_HAS_XLOCALE__ 1 +#include <xlocale.h> +#include <locale.h> +#undef __UCLIBC_HAS_XLOCALE__ +#endif /* __UCLIBC_HAS_XLOCALE__ */ + +#include <wchar.h> + +#define LOCALE_NAMES (__locale_mmap->locale_names5) +#define LOCALES (__locale_mmap->locales) +#define LOCALE_AT_MODIFIERS (__locale_mmap->locale_at_modifiers) +#define CATEGORY_NAMES (__locale_mmap->lc_names) + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning REMINDER: redo the MAX_LOCALE_STR stuff... +#endif +#define MAX_LOCALE_STR 256 /* TODO: Only sufficient for current case. */ +#define MAX_LOCALE_CATEGORY_STR 32 /* TODO: Only sufficient for current case. */ +/* Note: Best if MAX_LOCALE_CATEGORY_STR is a power of 2. */ + +extern int _locale_set_l(const unsigned char *p, __locale_t base); +extern void _locale_init_l(__locale_t base); + #endif /* __LOCALE_C_ONLY */ +#undef LOCALE_STRING_SIZE +#define LOCALE_SELECTOR_SIZE (2 * __LC_ALL + 2) + +#ifdef __UCLIBC_MJN3_ONLY__ +#ifdef L_setlocale +#warning TODO: Create a C locale selector string. +#endif +#endif +#define C_LOCALE_SELECTOR "\x23\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80" + + +#include <langinfo.h> +#include <nl_types.h> + /**********************************************************************/ #ifdef L_setlocale #ifdef __LOCALE_C_ONLY -link_warning(setlocale,"the 'setlocale' function supports only C|POSIX locales") +link_warning(setlocale,"REMINDER: The 'setlocale' function supports only C|POSIX locales.") static const char C_string[] = "C"; @@ -74,273 +140,125 @@ char *setlocale(int category, register const char *locale) #else /* ---------------------------------------------- __LOCALE_C_ONLY */ -#if !defined(NUM_LOCALES) || (NUM_LOCALES <= 1) -#error locales enabled, but not data other than for C locale! +#ifdef __UCLIBC_HAS_THREADS__ +link_warning(setlocale,"REMINDER: The 'setlocale' function is _not_ threadsafe except for simple queries.") #endif -#define LOCALE_NAMES (__locale_mmap->locale_names5) -#define LOCALES (__locale_mmap->locales) -#define LOCALE_AT_MODIFIERS (__locale_mmap->locale_at_modifiers) -#define CATEGORY_NAMES (__locale_mmap->lc_names) +#if !defined(__LOCALE_DATA_NUM_LOCALES) || (__LOCALE_DATA_NUM_LOCALES <= 1) +#error locales enabled, but not data other than for C locale! +#endif +#ifdef __UCLIBC_MJN3_ONLY__ +#warning TODO: Move posix and utf8 strings. +#endif static const char posix[] = "POSIX"; static const char utf8[] = "UTF-8"; #ifdef __UCLIBC_MJN3_ONLY__ -#warning REMINDER: redo the MAX_LOCALE_STR stuff... +#warning TODO: Fix dimensions of hr_locale. #endif -#define MAX_LOCALE_STR 256 /* TODO: Only sufficient for current case. */ - -static char hr_locale[MAX_LOCALE_STR]; +/* Individual category strings start at hr_locale + category * MAX_LOCALE_CATEGORY. + * This holds for LC_ALL as well. + */ +static char hr_locale[(MAX_LOCALE_CATEGORY_STR * LC_ALL) + MAX_LOCALE_STR]; -static __inline char *human_readable_locale(int category, const unsigned char *s) +static void update_hr_locale(const unsigned char *spec) { const unsigned char *loc; + const unsigned char *s; char *n; - int i; - - ++s; - - if (category == LC_ALL) { - for (i = 0 ; i < LC_ALL-1 ; i += 2) { - if ((s[i] != s[i+2]) || (s[i+1] != s[i+3])) { - goto SKIP; - } - } - /* All categories the same, so simplify string by using a single - * category. */ - category = LC_CTYPE; - } - - SKIP: - i = (category == LC_ALL) ? 0 : category; - s += 2*i; - n = hr_locale; + int i, category, done; + done = category = 0; do { - if ((*s != 0xff) || (s[1] != 0xff)) { - loc = LOCALES + WIDTH_LOCALES * ((((int)(*s & 0x7f)) << 7) + (s[1] & 0x7f)); - if (category == LC_ALL) { - n = stpcpy(n, CATEGORY_NAMES + (int) CATEGORY_NAMES[i]); - *n++ = '='; - } - if (*loc == 0) { - *n++ = 'C'; - *n = 0; - } else { - char at = 0; - memcpy(n, LOCALE_NAMES + 5*((*loc)-1), 5); - if (n[2] != '_') { - at = n[2]; - n[2] = '_'; - } - n += 5; - *n++ = '.'; - if (loc[2] == 2) { - n = stpcpy(n, utf8); - } else if (loc[2] >= 3) { - n = stpcpy(n, CODESET_LIST + (int)(CODESET_LIST[loc[2] - 3])); - } - if (at) { - const char *q; - *n++ = '@'; - q = LOCALE_AT_MODIFIERS; - do { - if (q[1] == at) { - n = stpcpy(n, q+2); - break; - } - q += 2 + *q; - } while (*q); + s = spec + 1; + n = hr_locale + category * MAX_LOCALE_CATEGORY_STR; + + if (category == LC_ALL) { + done = 1; + for (i = 0 ; i < LC_ALL-1 ; i += 2) { + if ((s[i] != s[i+2]) || (s[i+1] != s[i+3])) { + goto SKIP; } } - *n++ = ';'; + /* All categories the same, so simplify string by using a single + * category. */ + category = LC_CTYPE; } - s += 2; - } while (++i < category); - *--n = 0; /* Remove trailing ';' and nul-terminate. */ - assert(n-hr_locale < MAX_LOCALE_STR); - return hr_locale; -} - -static int find_locale(int category, const char *p, unsigned char *new_locale) -{ - int i; - const unsigned char *s; - uint16_t n; - unsigned char lang_cult, codeset; - -#if defined(LOCALE_AT_MODIFIERS_LENGTH) && 1 - /* Support standard locale handling for @-modifiers. */ + SKIP: + i = (category == LC_ALL) ? 0 : category; + s += 2*i; -#ifdef __UCLIBC_MJN3_ONLY__ -#warning REMINDER: fix buf size in find_locale -#endif - char buf[18]; /* TODO: 7+{max codeset name length} */ - const char *q; - - if ((q = strchr(p,'@')) != NULL) { - if ((((size_t)((q-p)-5)) > (sizeof(buf) - 5)) || (p[2] != '_')) { - return 0; - } - /* locale name at least 5 chars long and 3rd char is '_' */ - s = LOCALE_AT_MODIFIERS; do { - if (!strcmp(s+2, q+1)) { - break; - } - s += 2 + *s; /* TODO - fix this throughout */ - } while (*s); - if (!*s) { - return 0; - } - assert(q - p < sizeof(buf)); - memcpy(buf, p, q-p); - buf[q-p] = 0; - buf[2] = s[1]; - p = buf; - } -#endif - - lang_cult = codeset = 0; /* Assume C and default codeset. */ - if (((*p == 'C') && !p[1]) || !strcmp(p, posix)) { - goto FIND_LOCALE; - } - - if ((strlen(p) > 5) && (p[5] == '.')) { /* Codeset in locale name? */ - /* TODO: maybe CODESET_LIST + *s ??? */ - /* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */ - codeset = 2; - if (strcmp(utf8,p+6) != 0) {/* TODO - fix! */ - s = CODESET_LIST; - do { - ++codeset; /* Increment codeset first. */ - if (!strcmp(CODESET_LIST+*s, p+6)) { - goto FIND_LANG_CULT; + if ((*s != 0xff) || (s[1] != 0xff)) { + loc = LOCALES + + __LOCALE_DATA_WIDTH_LOCALES * ((((int)(*s & 0x7f)) << 7) + + (s[1] & 0x7f)); + if (category == LC_ALL) { + n = stpcpy(n, CATEGORY_NAMES + (int) CATEGORY_NAMES[i]); + *n++ = '='; } - } while (*++s); - return 0; /* No matching codeset! */ - } - } - - FIND_LANG_CULT: /* Find language_culture number. */ - s = LOCALE_NAMES; - do { /* TODO -- do a binary search? */ - /* TODO -- fix gen_mmap!*/ - ++lang_cult; /* Increment first since C/POSIX is 0. */ - if (!strncmp(s,p,5)) { /* Found a matching locale name; */ - goto FIND_LOCALE; - } - s += 5; - } while (lang_cult < NUM_LOCALE_NAMES); - return 0; /* No matching language_culture! */ - - FIND_LOCALE: /* Find locale row matching name and codeset */ - s = LOCALES; - n = 0; - do { /* TODO -- do a binary search? */ - if ((lang_cult == *s) && ((codeset == s[1]) || (codeset == s[2]))) { - i = ((category == LC_ALL) ? 0 : category); - s = new_locale + 2*i; - do { - /* Encode current locale row number. */ - *((unsigned char *) ++s) = (n >> 7) | 0x80; - *((unsigned char *) ++s) = (n & 0x7f) | 0x80; - } while (++i < category); - - return i; /* Return non-zero */ - } - s += WIDTH_LOCALES; - ++n; - } while (n <= NUM_LOCALES); /* We started at 1!!! */ - - return 0; /* Unsupported locale. */ -} - -static unsigned char *composite_locale(int category, const char *locale, unsigned char *new_locale) -{ - char buf[MAX_LOCALE_STR]; - char *t; - char *e; - int c; - - if (!strchr(locale,'=')) { - if (!find_locale(category, locale, new_locale)) { - return NULL; - } - return new_locale; - } - - if (strlen(locale) >= sizeof(buf)) { - return NULL; - } - stpcpy(buf, locale); - - t = strtok_r(buf, "=", &e); /* This can't fail because of strchr test above. */ - do { - for (c = 0 ; c < LC_ALL ; c++) { /* Find the category... */ - if (!strcmp(CATEGORY_NAMES + (int) CATEGORY_NAMES[c], t)) { - break; - } - } - t = strtok_r(NULL, ";", &e); - if ((category == LC_ALL) || (c == category)) { - if (!t || !find_locale(c, t, new_locale)) { - return NULL; + if (*loc == 0) { + *n++ = 'C'; + *n = 0; + } else { + char at = 0; + memcpy(n, LOCALE_NAMES + 5*((*loc)-1), 5); + if (n[2] != '_') { + at = n[2]; + n[2] = '_'; + } + n += 5; + *n++ = '.'; + if (loc[2] == 2) { + n = stpcpy(n, utf8); + } else if (loc[2] >= 3) { + n = stpcpy(n, CODESET_LIST + (int)(CODESET_LIST[loc[2] - 3])); + } + if (at) { + const char *q; + *n++ = '@'; + q = LOCALE_AT_MODIFIERS; + do { + if (q[1] == at) { + n = stpcpy(n, q+2); + break; + } + q += 2 + *q; + } while (*q); + } + } + *n++ = ';'; } - } - } while ((t = strtok_r(NULL, "=", &e)) != NULL); + s += 2; + } while (++i < category); + *--n = 0; /* Remove trailing ';' and nul-terminate. */ - return new_locale; + ++category; + } while (!done); } char *setlocale(int category, const char *locale) { - const unsigned char *p; - int i; - unsigned char new_locale[LOCALE_STRING_SIZE]; - if (((unsigned int)(category)) > LC_ALL) { - /* TODO - set errno? SUSv3 doesn't say too. */ +#if 0 + __set_errno(EINVAL); /* glibc sets errno -- SUSv3 doesn't say. */ +#endif return NULL; /* Illegal/unsupported category. */ } - if (locale != NULL) { /* Not just a query... */ - stpcpy(new_locale, CUR_LOCALE_SPEC); /* Start with current. */ - - if (!*locale) { /* locale == "", so check environment. */ - i = ((category == LC_ALL) ? 0 : category); - do { - /* Note: SUSv3 doesn't define a fallback mechanism here. So, - * if LC_ALL is invalid, we do _not_ continue trying the other - * environment vars. */ - if (!(p = getenv("LC_ALL"))) { - if (!(p = getenv(CATEGORY_NAMES + CATEGORY_NAMES[i]))) { - if (!(p = getenv("LANG"))) { - p = posix; - } - } - } - - /* The user set something... is it valid? */ - /* Note: Since we don't support user-supplied locales and - * alternate paths, we don't need to worry about special - * handling for suid/sgid apps. */ - if (!find_locale(i, p, new_locale)) { - return NULL; - } - } while (++i < category); - } else if (!composite_locale(category, locale, new_locale)) { + if (locale != NULL) { /* Not just a query... */ + if (!__newlocale((category == LC_ALL) ? LC_ALL_MASK : (1 << category), + locale, __global_locale) + ) { /* Failed! */ return NULL; } - - /* TODO: Ok, everything checks out, so install the new locale. */ - _locale_set(new_locale); + update_hr_locale(__global_locale->cur_locale); } /* Either a query or a successful set, so return current locale string. */ - return human_readable_locale(category, CUR_LOCALE_SPEC); + return hr_locale + (category * MAX_LOCALE_CATEGORY_STR); } #endif /* __LOCALE_C_ONLY */ @@ -355,7 +273,7 @@ char *setlocale(int category, const char *locale) #ifdef __LOCALE_C_ONLY -link_warning(localeconv,"the 'localeconv' function is hardwired for C/POSIX locale only") +link_warning(localeconv,"REMINDER: The 'localeconv' function is hardwired for C/POSIX locale only.") static struct lconv the_lconv; @@ -387,7 +305,7 @@ static struct lconv the_lconv; struct lconv *localeconv(void) { register char *p = (char *) &the_lconv; - register char **q = (char **) &__global_locale.decimal_point; + register char **q = (char **) &(__UCLIBC_CURLOCALE_DATA).decimal_point; do { *((char **)p) = *q; @@ -408,16 +326,22 @@ struct lconv *localeconv(void) #endif /**********************************************************************/ -#ifdef L__locale_init +#if defined(L__locale_init) && !defined(__LOCALE_C_ONLY) -#ifndef __LOCALE_C_ONLY +static __uclibc_locale_t __global_locale_data; -#define C_LOCALE_SELECTOR "\x23\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80" -#define LOCALE_INIT_FAILED "locale init failed!\n" +__locale_t __global_locale = &__global_locale_data; -#define CUR_LOCALE_SPEC (__global_locale.cur_locale) +#ifdef __UCLIBC_HAS_XLOCALE__ +__locale_t __curlocale_var = &__global_locale_data; +#endif -__locale_t __global_locale; +/*----------------------------------------------------------------------*/ +#ifdef __UCLIBC_MJN3_ONLY__ +#warning TODO: Move utf8 and ascii strings. +#endif +static const char utf8[] = "UTF-8"; +static const char ascii[] = "ASCII"; typedef struct { uint16_t num_base; @@ -461,9 +385,8 @@ typedef struct { uint16_t multistart_offset; } coldata_der_t; -static int init_cur_collate(int der_num) +static int init_cur_collate(int der_num, __collate_t *cur_collate) { - __collate_t *cur_collate = &__global_locale.collate; const uint16_t *__locale_collate_tbl = __locale_mmap->collate_data; coldata_header_t *cdh; coldata_base_t *cdb; @@ -485,9 +408,16 @@ static int init_cur_collate(int der_num) cdh = (coldata_header_t *) __locale_collate_tbl; +#ifdef __UCLIBC_MJN3_ONLY__ +#warning CONSIDER: Should we assert here? +#endif +#if 0 if (der_num >= cdh->num_der) { return 0; } +#else + assert((der_num < cdh->num_der)); +#endif cdd = (coldata_der_t *)(__locale_collate_tbl + (sizeof(coldata_header_t) @@ -539,14 +469,17 @@ static int init_cur_collate(int der_num) cur_collate->MAX_WEIGHTS = cdh->MAX_WEIGHTS; #ifdef __UCLIBC_MJN3_ONLY__ -#warning if calloc fails, this is WRONG. there is also a memory leak here at the moment -#warning fix the +1 by increasing max_col_index? +#warning CONSIDER: Fix the +1 by increasing max_col_index? +#warning CONSIDER: Since this collate info is dependent only on LC_COLLATE ll_cc and not on codeset, we could just globally allocate this for each in a table #endif - cur_collate->index2weight = calloc(2*cur_collate->max_col_index+2, sizeof(uint16_t)); + + cur_collate->index2weight = calloc(2*cur_collate->max_col_index+2, + sizeof(uint16_t)); if (!cur_collate->index2weight) { return 0; } - cur_collate->index2ruleidx = cur_collate->index2weight + cur_collate->max_col_index + 1; + cur_collate->index2ruleidx = cur_collate->index2weight + + cur_collate->max_col_index + 1; memcpy(cur_collate->index2weight, cur_collate->index2weight_tbl, cur_collate->num_col_base * sizeof(uint16_t)); @@ -602,61 +535,10 @@ static int init_cur_collate(int der_num) return 1; } -void _locale_init(void) -{ - /* TODO: mmap the locale file */ - - /* TODO - ??? */ - memset(CUR_LOCALE_SPEC, 0, LOCALE_STRING_SIZE); - CUR_LOCALE_SPEC[0] = '#'; - - memcpy(__global_locale.category_item_count, - __locale_mmap->lc_common_item_offsets_LEN, - LC_ALL); - - ++__global_locale.category_item_count[0]; /* Increment for codeset entry. */ - __global_locale.category_offsets[0] = offsetof(__locale_t, outdigit0_mb); - __global_locale.category_offsets[1] = offsetof(__locale_t, decimal_point); - __global_locale.category_offsets[2] = offsetof(__locale_t, int_curr_symbol); - __global_locale.category_offsets[3] = offsetof(__locale_t, abday_1); -/* __global_locale.category_offsets[4] = offsetof(__locale_t, collate???); */ - __global_locale.category_offsets[5] = offsetof(__locale_t, yesexpr); - -#ifdef __CTYPE_HAS_8_BIT_LOCALES - __global_locale.tbl8ctype - = (const unsigned char *) &__locale_mmap->tbl8ctype; - __global_locale.tbl8uplow - = (const unsigned char *) &__locale_mmap->tbl8uplow; -#ifdef __WCHAR_ENABLED - __global_locale.tbl8c2wc - = (const uint16_t *) &__locale_mmap->tbl8c2wc; - __global_locale.tbl8wc2c - = (const unsigned char *) &__locale_mmap->tbl8wc2c; - /* translit */ -#endif /* __WCHAR_ENABLED */ -#endif /* __CTYPE_HAS_8_BIT_LOCALES */ -#ifdef __WCHAR_ENABLED - __global_locale.tblwctype - = (const unsigned char *) &__locale_mmap->tblwctype; - __global_locale.tblwuplow - = (const unsigned char *) &__locale_mmap->tblwuplow; - __global_locale.tblwuplow_diff - = (const uint16_t *) &__locale_mmap->tblwuplow_diff; -/* __global_locale.tblwcomb */ -/* = (const unsigned char *) &__locale_mmap->tblwcomb; */ - /* width?? */ -#endif /* __WCHAR_ENABLED */ - - _locale_set(C_LOCALE_SELECTOR); -} - -static const char ascii[] = "ASCII"; -static const char utf8[] = "UTF-8"; - -void _locale_set(const unsigned char *p) +int _locale_set_l(const unsigned char *p, __locale_t base) { const char **x; - unsigned char *s = CUR_LOCALE_SPEC + 1; + unsigned char *s = base->cur_locale + 1; const size_t *stp; const unsigned char *r; const uint16_t *io; @@ -667,12 +549,30 @@ void _locale_set(const unsigned char *p) int len; int c; int i = 0; + __collate_t newcol; ++p; + + newcol.index2weight = NULL; + if ((p[2*LC_COLLATE] != s[2*LC_COLLATE]) + || (p[2*LC_COLLATE + 1] != s[2*LC_COLLATE + 1]) + ) { + row = (((int)(*p & 0x7f)) << 7) + (p[1] & 0x7f); + assert(row < __LOCALE_DATA_NUM_LOCALES); + if (!init_cur_collate(__locale_mmap->locales[ __LOCALE_DATA_WIDTH_LOCALES + * row + 3 + i ], + &newcol) + ) { + return 0; /* calloc failed. */ + } + free(base->collate.index2weight); + memcpy(&base->collate, &newcol, sizeof(__collate_t)); + } + do { if ((*p != *s) || (p[1] != s[1])) { row = (((int)(*p & 0x7f)) << 7) + (p[1] & 0x7f); - assert(row < NUM_LOCALES); + assert(row < __LOCALE_DATA_NUM_LOCALES); *s = *p; s[1] = p[1]; @@ -680,10 +580,13 @@ void _locale_set(const unsigned char *p) if ((i != LC_COLLATE) && ((len = __locale_mmap->lc_common_item_offsets_LEN[i]) != 0) ) { - crow = __locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ] + crow = __locale_mmap->locales[ __LOCALE_DATA_WIDTH_LOCALES * row + + 3 + i ] * len; - x = (const char **)(((char *) &__global_locale) - + __global_locale.category_offsets[i]); + + x = (const char **)(((char *) base) + + base->category_offsets[i]); + stp = __locale_mmap->lc_common_tbl_offsets + 4*i; r = (const unsigned char *)( ((char *)__locale_mmap) + *stp ); io = (const uint16_t *)( ((char *)__locale_mmap) + *++stp ); @@ -694,67 +597,176 @@ void _locale_set(const unsigned char *p) } } if (i == LC_CTYPE) { - c = __locale_mmap->locales[ WIDTH_LOCALES * row + 2 ]; /* codeset */ + c = __locale_mmap->locales[ __LOCALE_DATA_WIDTH_LOCALES * row + + 2 ]; /* codeset */ if (c <= 2) { if (c == 2) { - __global_locale.codeset = utf8; - __global_locale.encoding = __ctype_encoding_utf8; + base->codeset = utf8; + base->encoding = __ctype_encoding_utf8; /* TODO - fix for bcc */ - __global_locale.mb_cur_max = 6; + base->mb_cur_max = 6; } else { assert(c==1); - __global_locale.codeset = ascii; - __global_locale.encoding = __ctype_encoding_7_bit; - __global_locale.mb_cur_max = 1; + base->codeset = ascii; + base->encoding = __ctype_encoding_7_bit; + base->mb_cur_max = 1; } } else { - const codeset_8_bit_t *c8b; + const __codeset_8_bit_t *c8b; r = CODESET_LIST; - __global_locale.codeset = r + r[c -= 3]; - __global_locale.encoding = __ctype_encoding_8_bit; + base->codeset = r + r[c -= 3]; + base->encoding = __ctype_encoding_8_bit; #ifdef __UCLIBC_MJN3_ONLY__ -#warning REMINDER: update 8 bit mb_cur_max when trasnlit implemented! +#warning REMINDER: update 8 bit mb_cur_max when translit implemented! #endif /* TODO - update when translit implemented! */ - __global_locale.mb_cur_max = 1; + base->mb_cur_max = 1; c8b = __locale_mmap->codeset_8_bit + c; #ifdef __CTYPE_HAS_8_BIT_LOCALES - __global_locale.idx8ctype = c8b->idx8ctype; - __global_locale.idx8uplow = c8b->idx8uplow; -#ifdef __WCHAR_ENABLED - __global_locale.idx8c2wc = c8b->idx8c2wc; - __global_locale.idx8wc2c = c8b->idx8wc2c; + base->idx8ctype = c8b->idx8ctype; + base->idx8uplow = c8b->idx8uplow; +#ifdef __UCLIBC_HAS_WCHAR__ + base->idx8c2wc = c8b->idx8c2wc; + base->idx8wc2c = c8b->idx8wc2c; /* translit */ -#endif /* __WCHAR_ENABLED */ +#endif /* __UCLIBC_HAS_WCHAR__ */ #endif /* __CTYPE_HAS_8_BIT_LOCALES */ } #ifdef __UCLIBC_MJN3_ONLY__ -#warning might want to just put this in the locale_mmap object +#warning TODO: Put the outdigit string length in the locale_mmap object. #endif - d = __global_locale.outdigit_length; - x = &__global_locale.outdigit0_mb; + d = base->outdigit_length; + x = &base->outdigit0_mb; for (c = 0 ; c < 10 ; c++) { ((unsigned char *)d)[c] = strlen(x[c]); assert(d[c] > 0); } - } else if (i == LC_COLLATE) { - init_cur_collate(__locale_mmap->locales[ WIDTH_LOCALES * row + 3 + i ]); + } else if (i == LC_NUMERIC) { + assert(LC_NUMERIC > LC_CTYPE); /* Need ctype initialized. */ + + base->decimal_point_len + = __locale_mbrtowc_l(&base->decimal_point_wc, + base->decimal_point, base); + assert(base->decimal_point_len > 0); + assert(base->decimal_point[base->decimal_point_len] == 0); + + if (*base->grouping) { + base->thousands_sep_len + = __locale_mbrtowc_l(&base->thousands_sep_wc, + base->thousands_sep, base); + assert(base->thousands_sep_len > 0); + assert(base->thousands_sep[base->thousands_sep_len] == 0); + } + +/* } else if (i == LC_COLLATE) { */ +/* init_cur_collate(__locale_mmap->locales[ __LOCALE_DATA_WIDTH_LOCALES */ +/* * row + 3 + i ], */ +/* &base->collate); */ } } ++i; p += 2; s += 2; } while (i < LC_ALL); + + return 1; } -#endif /* __LOCALE_C_ONLY */ +static const uint16_t __code2flag[16] = { + 0, /* unclassified = 0 */ + _ISprint|_ISgraph|_ISalnum|_ISalpha, /* alpha_nonupper_nonlower */ + _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower, /* alpha_lower */ + _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISlower|_ISupper, /* alpha_upper_lower */ + _ISprint|_ISgraph|_ISalnum|_ISalpha|_ISupper, /* alpha_upper */ + _ISprint|_ISgraph|_ISalnum|_ISdigit, /* digit */ + _ISprint|_ISgraph|_ISpunct, /* punct */ + _ISprint|_ISgraph, /* graph */ + _ISprint|_ISspace, /* print_space_nonblank */ + _ISprint|_ISspace|_ISblank, /* print_space_blank */ + _ISspace, /* space_nonblank_noncntrl */ + _ISspace|_ISblank, /* space_blank_noncntrl */ + _IScntrl|_ISspace, /* cntrl_space_nonblank */ + _IScntrl|_ISspace|_ISblank, /* cntrl_space_blank */ + _IScntrl /* cntrl_nonspace */ +}; + +void _locale_init_l(__locale_t base) +{ + memset(base->cur_locale, 0, LOCALE_SELECTOR_SIZE); + base->cur_locale[0] = '#'; + + memcpy(base->category_item_count, + __locale_mmap->lc_common_item_offsets_LEN, + LC_ALL); + + ++base->category_item_count[0]; /* Increment for codeset entry. */ + base->category_offsets[0] = offsetof(__uclibc_locale_t, outdigit0_mb); + base->category_offsets[1] = offsetof(__uclibc_locale_t, decimal_point); + base->category_offsets[2] = offsetof(__uclibc_locale_t, int_curr_symbol); + base->category_offsets[3] = offsetof(__uclibc_locale_t, abday_1); +/* base->category_offsets[4] = offsetof(__uclibc_locale_t, collate???); */ + base->category_offsets[5] = offsetof(__uclibc_locale_t, yesexpr); +#ifdef __CTYPE_HAS_8_BIT_LOCALES + base->tbl8ctype + = (const unsigned char *) &__locale_mmap->tbl8ctype; + base->tbl8uplow + = (const unsigned char *) &__locale_mmap->tbl8uplow; +#ifdef __UCLIBC_HAS_WCHAR__ + base->tbl8c2wc + = (const uint16_t *) &__locale_mmap->tbl8c2wc; + base->tbl8wc2c + = (const unsigned char *) &__locale_mmap->tbl8wc2c; + /* translit */ +#endif /* __UCLIBC_HAS_WCHAR__ */ +#endif /* __CTYPE_HAS_8_BIT_LOCALES */ +#ifdef __UCLIBC_HAS_WCHAR__ + base->tblwctype + = (const unsigned char *) &__locale_mmap->tblwctype; + base->tblwuplow + = (const unsigned char *) &__locale_mmap->tblwuplow; + base->tblwuplow_diff + = (const uint16_t *) &__locale_mmap->tblwuplow_diff; +/* base->tblwcomb */ +/* = (const unsigned char *) &__locale_mmap->tblwcomb; */ + /* width?? */ +#endif /* __UCLIBC_HAS_WCHAR__ */ + + + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning wrong for now, but always set ctype arrays to global C version #endif -/**********************************************************************/ -#ifdef L_nl_langinfo +#ifdef __UCLIBC_HAS_XLOCALE__ + base->__ctype_b = __C_ctype_b; + base->__ctype_tolower = __C_ctype_tolower; + base->__ctype_toupper = __C_ctype_toupper; +#else /* __UCLIBC_HAS_XLOCALE__ */ + __ctype_b = __C_ctype_b; + __ctype_tolower = __C_ctype_tolower; + __ctype_toupper = __C_ctype_toupper; +#endif /* __UCLIBC_HAS_XLOCALE__ */ -#include <langinfo.h> -#include <nl_types.h> +#ifdef __UCLIBC_MJN3_ONLY__ +#warning TODO: Initialize code2flag correctly based on locale_mmap. +#endif + base->code2flag = __code2flag; + + + _locale_set_l(C_LOCALE_SELECTOR, base); +} + +void _locale_init(void) +{ + /* TODO: mmap the locale file */ + + /* TODO - ??? */ + _locale_init_l(__global_locale); +} + +#endif +/**********************************************************************/ +#if defined(L_nl_langinfo) || defined(L_nl_langinfo_l) #ifdef __LOCALE_C_ONLY @@ -853,22 +865,435 @@ char *nl_langinfo(nl_item item) #else /* __LOCALE_C_ONLY */ -static const char empty[] = ""; +#if defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) char *nl_langinfo(nl_item item) { + return nl_langinfo_l(item, __UCLIBC_CURLOCALE); +} + +#else /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */ + +static const char empty[] = ""; + +char *__XL(nl_langinfo)(nl_item item __LOCALE_PARAM ) +{ unsigned int c = _NL_ITEM_CATEGORY(item); unsigned int i = _NL_ITEM_INDEX(item); - if ((c < LC_ALL) && (i < __global_locale.category_item_count[c])) { - return ((char **)(((char *) &__global_locale) - + __global_locale.category_offsets[c]))[i]; - + if ((c < LC_ALL) && (i < __LOCALE_PTR->category_item_count[c])) { + return ((char **)(((char *) __LOCALE_PTR) + + __LOCALE_PTR->category_offsets[c]))[i]; } + return (char *) empty; } +#endif /* defined(__UCLIBC_HAS_XLOCALE__) && !defined(__UCLIBC_DO_XLOCALE) */ + #endif /* __LOCALE_C_ONLY */ #endif /**********************************************************************/ +#ifdef L_newlocale + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning TODO: Move posix and utf8 strings. +#endif +static const char posix[] = "POSIX"; +static const char utf8[] = "UTF-8"; + +static int find_locale(int category_mask, const char *p, + unsigned char *new_locale) +{ + int i; + const unsigned char *s; + uint16_t n; + unsigned char lang_cult, codeset; + +#if defined(__LOCALE_DATA_AT_MODIFIERS_LENGTH) && 1 + /* Support standard locale handling for @-modifiers. */ + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning REMINDER: Fix buf size in find_locale. +#endif + char buf[18]; /* TODO: 7+{max codeset name length} */ + const char *q; + + if ((q = strchr(p,'@')) != NULL) { + if ((((size_t)((q-p)-5)) > (sizeof(buf) - 5)) || (p[2] != '_')) { + return 0; + } + /* locale name at least 5 chars long and 3rd char is '_' */ + s = LOCALE_AT_MODIFIERS; + do { + if (!strcmp(s+2, q+1)) { + break; + } + s += 2 + *s; /* TODO - fix this throughout */ + } while (*s); + if (!*s) { + return 0; + } + assert(q - p < sizeof(buf)); + memcpy(buf, p, q-p); + buf[q-p] = 0; + buf[2] = s[1]; + p = buf; + } +#endif + + lang_cult = codeset = 0; /* Assume C and default codeset. */ + if (((*p == 'C') && !p[1]) || !strcmp(p, posix)) { + goto FIND_LOCALE; + } + + if ((strlen(p) > 5) && (p[5] == '.')) { /* Codeset in locale name? */ + /* TODO: maybe CODESET_LIST + *s ??? */ + /* 7bit is 1, UTF-8 is 2, 8-bit is >= 3 */ + codeset = 2; + if (strcmp(utf8,p+6) != 0) {/* TODO - fix! */ + s = CODESET_LIST; + do { + ++codeset; /* Increment codeset first. */ + if (!strcmp(CODESET_LIST+*s, p+6)) { + goto FIND_LANG_CULT; + } + } while (*++s); + return 0; /* No matching codeset! */ + } + } + + FIND_LANG_CULT: /* Find language_culture number. */ + s = LOCALE_NAMES; + do { /* TODO -- do a binary search? */ + /* TODO -- fix gen_mmap!*/ + ++lang_cult; /* Increment first since C/POSIX is 0. */ + if (!strncmp(s,p,5)) { /* Found a matching locale name; */ + goto FIND_LOCALE; + } + s += 5; + } while (lang_cult < __LOCALE_DATA_NUM_LOCALE_NAMES); + return 0; /* No matching language_culture! */ + + FIND_LOCALE: /* Find locale row matching name and codeset */ + s = LOCALES; + n = 0; + do { /* TODO -- do a binary search? */ + if ((lang_cult == *s) && ((codeset == s[1]) || (codeset == s[2]))) { + i = 1; + s = new_locale + 1; + do { + if (category_mask & i) { + /* Encode current locale row number. */ + ((unsigned char *) s)[0] = (n >> 7) | 0x80; + ((unsigned char *) s)[1] = (n & 0x7f) | 0x80; + } + s += 2; + i += i; + } while (i < (1 << LC_ALL)); + + return i; /* Return non-zero */ + } + s += __LOCALE_DATA_WIDTH_LOCALES; + ++n; + } while (n <= __LOCALE_DATA_NUM_LOCALES); /* We started at 1!!! */ + + return 0; /* Unsupported locale. */ +} + +static unsigned char *composite_locale(int category_mask, const char *locale, + unsigned char *new_locale) +{ + char buf[MAX_LOCALE_STR]; + char *t; + char *e; + int c; + int component_mask; + + if (!strchr(locale,'=')) { + if (!find_locale(category_mask, locale, new_locale)) { + return NULL; + } + return new_locale; + } + + if (strlen(locale) >= sizeof(buf)) { + return NULL; + } + stpcpy(buf, locale); + + component_mask = 0; + t = strtok_r(buf, "=", &e); /* This can't fail because of strchr test above. */ + do { + c = 0; + while (strcmp(CATEGORY_NAMES + (int) CATEGORY_NAMES[c], t)) { + if (++c == LC_ALL) { /* Unknown category name! */ + return NULL; + } + } + t = strtok_r(NULL, ";", &e); + c = (1 << c); + if (component_mask & c) { /* Multiple components for one category. */ + return NULL; + } + component_mask |= c; + if ((category_mask & c) && (!t || !find_locale(c, t, new_locale))) { + return NULL; + } + } while ((t = strtok_r(NULL, "=", &e)) != NULL); + + if (category_mask & ~component_mask) { /* Category component(s) missing. */ + return NULL; + } + + return new_locale; +} + +__locale_t __newlocale(int category_mask, const char *locale, __locale_t base) +{ + const unsigned char *p; + int i, j, k; + unsigned char new_selector[LOCALE_SELECTOR_SIZE]; + + if (!locale || (((unsigned int)(category_mask)) > LC_ALL_MASK)) { + INVALID: + __set_errno(EINVAL); + return NULL; /* No locale or illegal/unsupported category. */ + } + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning TODO: Rename cur_locale to locale_selector. +#endif + strcpy((char *) new_selector, + (base ? (char *) base->cur_locale : C_LOCALE_SELECTOR)); + + if (!*locale) { /* locale == "", so check environment. */ +#ifndef __UCLIBC_HAS_THREADS__ + static /* If no threads, then envstr can be static. */ +#endif /* __UCLIBC_HAS_THREADS__ */ + const char *envstr[4] = { "LC_ALL", NULL, "LANG", posix }; + + i = 1; + k = 0; + do { + if (category_mask & i) { + /* Note: SUSv3 doesn't define a fallback mechanism here. + * So, if LC_ALL is invalid, we do _not_ continue trying + * the other environment vars. */ + envstr[1] = CATEGORY_NAMES + CATEGORY_NAMES[k]; + j = 0; + do { + p = envstr[j]; + } while ((++j < 4) && (!(p = getenv(p)) || !*p)); + + + /* The user set something... is it valid? */ + /* Note: Since we don't support user-supplied locales and + * alternate paths, we don't need to worry about special + * handling for suid/sgid apps. */ + if (!find_locale(i, p, new_selector)) { + goto INVALID; + } + } + i += i; + } while (++k < LC_ALL); + } else if (!composite_locale(category_mask, locale, new_selector)) { + goto INVALID; + } + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning TODO: Do a compatible codeset check! +#endif + + /* If we get here, the new selector corresponds to a valid locale. */ + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning CONSIDER: Probably want a _locale_new func to allow for caching of locales. +#endif +#if 0 + if (base) { + _locale_set_l(new_selector, base); + } else { + base = _locale_new(new_selector); + } +#else + if (!base) { + if ((base = malloc(sizeof(__uclibc_locale_t))) == NULL) { + return base; + } + _locale_init_l(base); + } + + _locale_set_l(new_selector, base); +#endif + + return base; +} + +weak_alias(__newlocale, newlocale) + +#endif +/**********************************************************************/ +#ifdef L_duplocale + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning REMINDER: When we allocate ctype tables, remember to dup them. +#endif + +__locale_t duplocale(__locale_t dataset) +{ + __locale_t r; + uint16_t * i2w; + + assert(dataset != LC_GLOBAL_LOCALE); + + if ((r = malloc(sizeof(__uclibc_locale_t))) != NULL) { + if ((i2w = calloc(2*dataset->collate.max_col_index+2, + sizeof(uint16_t))) + != NULL + ) { + memcpy(r, dataset, sizeof(__uclibc_locale_t)); + r->collate.index2weight = i2w; + } else { + free(r); + r = NULL; + } + } + return r; +} + +#endif +/**********************************************************************/ +#ifdef L_freelocale + +#ifdef __UCLIBC_MJN3_ONLY__ +#warning REMINDER: When we allocate ctype tables, remember to free them. +#endif + +void freelocale(__locale_t dataset) +{ + assert(dataset != __global_locale); + assert(dataset != LC_GLOBAL_LOCALE); + + free(dataset->collate.index2weight); /* Free collation data. */ + free(dataset); /* Free locale */ +} + +#endif +/**********************************************************************/ +#ifdef L_uselocale + +__locale_t uselocale(__locale_t dataset) +{ + __locale_t old; + + if (!dataset) { + old = __UCLIBC_CURLOCALE; + } else { + if (dataset == LC_GLOBAL_LOCALE) { + dataset = __global_locale; + } +#ifdef __UCLIBC_HAS_THREADS__ + old = __curlocale_set(dataset); +#else + old = __curlocale_var; + __curlocale_var = dataset; +#endif + } + + if (old == __global_locale) { + return LC_GLOBAL_LOCALE; + } + return old; +} + +#endif +/**********************************************************************/ +#ifdef L___curlocale + +#ifdef __UCLIBC_HAS_THREADS__ + +__locale_t weak_const_function __curlocale(void) +{ + return __curlocale_var; /* This is overriden by the thread version. */ +} + +__locale_t weak_function __curlocale_set(__locale_t newloc) +{ + assert(newloc != LC_GLOBAL_LOCALE); + + __locale_t oldloc = __curlocale_var; + __curlocale_var = newloc; + return oldloc; +} + +#endif + +#endif +/**********************************************************************/ +#ifdef L___locale_mbrtowc_l + +/* NOTE: This returns an int... not size_t. Also, it is not a general + * routine. It is actually a very stripped-down version of mbrtowc + * that takes a __locale_t arg. This is used by strcoll and strxfrm. + * It is also used above to generate wchar_t versions of the decimal point + * and thousands seperator. */ + + +#ifndef __CTYPE_HAS_UTF_8_LOCALES +#warning __CTYPE_HAS_UTF_8_LOCALES not set! +#endif +#ifndef __CTYPE_HAS_8_BIT_LOCALES +#warning __CTYPE_HAS_8_BIT_LOCALES not set! +#endif + +#define Cc2wc_IDX_SHIFT __LOCALE_DATA_Cc2wc_IDX_SHIFT +#define Cc2wc_ROW_LEN __LOCALE_DATA_Cc2wc_ROW_LEN + +extern size_t _wchar_utf8sntowcs(wchar_t *__restrict pwc, size_t wn, + const char **__restrict src, size_t n, + mbstate_t *ps, int allow_continuation); + +int __locale_mbrtowc_l(wchar_t *__restrict dst, + const char *__restrict src, + __locale_t loc ) +{ +#ifdef __CTYPE_HAS_UTF_8_LOCALES + if (loc->encoding == __ctype_encoding_utf8) { + mbstate_t ps; + const char *p = src; + size_t r; + ps.mask = 0; + r = _wchar_utf8sntowcs(dst, 1, &p, SIZE_MAX, &ps, 1); + return (r == 1) ? (p-src) : r; /* Need to return 0 if nul char. */ + } +#endif + +#ifdef __CTYPE_HAS_8_BIT_LOCALES + assert((loc->encoding == __ctype_encoding_7_bit) || (loc->encoding == __ctype_encoding_8_bit)); +#else + assert(loc->encoding == __ctype_encoding_7_bit); +#endif + + if ((*dst = ((unsigned char)(*src))) < 0x80) { /* ASCII... */ + return (*src != 0); + } + +#ifdef __CTYPE_HAS_8_BIT_LOCALES + if (loc->encoding == __ctype_encoding_8_bit) { + wchar_t wc = *dst - 0x80; + *dst = __LOCALE_PTR->tbl8c2wc[ + (__LOCALE_PTR->idx8c2wc[wc >> Cc2wc_IDX_SHIFT] + << Cc2wc_IDX_SHIFT) + (wc & (Cc2wc_ROW_LEN - 1))]; + if (*dst) { + return 1; + } + } +#endif + + return -1; +} + +#endif +/**********************************************************************/ |