From d85536af73dc5d327075d983abfa69d70e129d20 Mon Sep 17 00:00:00 2001 From: Manuel Novoa III Date: Sat, 2 Jun 2001 21:46:42 +0000 Subject: Add locale-enabled strcoll function from vodz, plus supporting tool. --- extra/locale/README | 10 +- extra/locale/gen_collate_from_glibc.c | 207 ++++++++++++++++++++++ extra/locale/gen_ctype_from_glibc.c | 5 +- libc/misc/ctype/ctype.c | 2 +- libc/misc/ctype/ctype.h | 21 --- libc/misc/locale/_locale.h | 22 +++ libc/misc/locale/locale.c | 321 ++++++++++++++++++++++++++-------- libc/string/Makefile | 4 + libc/string/string.c | 31 ++++ 9 files changed, 520 insertions(+), 103 deletions(-) create mode 100644 extra/locale/gen_collate_from_glibc.c delete mode 100644 libc/misc/ctype/ctype.h create mode 100644 libc/misc/locale/_locale.h diff --git a/extra/locale/README b/extra/locale/README index f97bac36f..6ac487c9a 100644 --- a/extra/locale/README +++ b/extra/locale/README @@ -1,6 +1,8 @@ -The program gen_ctype_from_glibc.c will generate data files which can be -used by uClibc ctype functions to support locales. From the comments: +The programs gen_ctype_from_glibc.c and gen_collate_from_glibc.c +will generate data files which can be +used by uClibc ctype, strcoll and setlocale functions to support locales. +From the comments: /* * Generator locale ctype tables @@ -10,18 +12,18 @@ used by uClibc ctype functions to support locales. From the comments: * ./LOCALE/LC_CTYPE files for system with uclibc * * Written by Vladimir Oleynik 2001 - * Base on ideas Nickolay Saukh - * */ Sample usage to dump all the data files in a tmp directory: gcc gen_ctype_from_glibc.c -o gen_ctype_from_glibc +gcc gen_collate_from_glibc.c -o gen_ctype_from_glibc mkdir tmp cd tmp ../gen_ctype_from_glibc -d /usr/share/locale -c +../gen_collate_from_glibc Then just move the directory or directories you need (not the .c files) to the uClibc locale file directory you set in Config. diff --git a/extra/locale/gen_collate_from_glibc.c b/extra/locale/gen_collate_from_glibc.c new file mode 100644 index 000000000..f6d12ba9c --- /dev/null +++ b/extra/locale/gen_collate_from_glibc.c @@ -0,0 +1,207 @@ +/* + * Generator collate table from glibc special for Uclibc. + * Author Vladimir Oleynik. vodz@usa.net (c) 2001 + * + * Require setuped work non-C LC_COLLATE + * This programm created ./LOCALE/LC_COLLATE file for Uclibc + * setlocale() and strcoll(). + * Without argument this programm used setlocale(LC_COLLATE, "") - + * equivalent result setlocale(LC_COLLATE, getenv("LC_XXX")) + * + * Also, this programm have russian koi8 collate for test + * working Uclibc ;-) + * + */ + +#include +#include +#include +#include +#include +#include +#include /* mkdir() */ +#include + + +/* For strong test russian locale LC_COLLATE="ru_RU.KOI8-R" */ +static const unsigned char koi8_weights[256] = { + 0, 99, 100, 101, 102, 103, 104, 105, +106, 2, 5, 3, 6, 4, 107, 108, +109, 110, 111, 112, 113, 114, 115, 116, +117, 118, 119, 120, 121, 122, 123, 124, + 1, 12, 21, 34, 30, 35, 33, 20, + 22, 23, 31, 36, 9, 8, 15, 14, +127, 128, 129, 131, 132, 133, 134, 135, +136, 137, 11, 10, 38, 40, 42, 13, + 29, 138, 140, 142, 144, 146, 148, 150, +152, 154, 156, 158, 160, 162, 164, 166, +168, 170, 172, 174, 176, 178, 180, 182, +184, 186, 188, 24, 32, 25, 17, 7, + 16, 139, 141, 143, 145, 147, 149, 151, +153, 155, 157, 159, 161, 163, 165, 167, +169, 171, 173, 175, 177, 179, 181, 183, +185, 187, 189, 26, 43, 27, 18, 125, + 50, 52, 54, 58, 62, 66, 70, 74, + 78, 82, 86, 90, 91, 92, 93, 94, + 95, 96, 97, 48, 98, 45, 46, 47, + 39, 41, 126, 49, 44, 130, 19, 37, + 51, 53, 55, 203, 56, 57, 59, 60, + 61, 63, 64, 65, 67, 68, 69, 71, + 72, 73, 75, 202, 76, 77, 79, 80, + 81, 83, 84, 85, 87, 88, 89, 28, +253, 191, 193, 237, 199, 201, 233, 197, +235, 209, 211, 213, 215, 217, 219, 221, +223, 255, 225, 227, 229, 231, 205, 195, +249, 247, 207, 241, 251, 243, 239, 245, +252, 190, 192, 236, 198, 200, 232, 196, +234, 208, 210, 212, 214, 216, 218, 220, +222, 254, 224, 226, 228, 230, 204, 194, +248, 246, 206, 240, 250, 242, 238, 244 +}; + +int gen_weights(const char *collate) +{ + int weights[256]; + int i,j; + char probe_str1[2]; + char probe_str2[2]; + char print_buf[16]; + int retcode = 0; + unsigned char out_weights[256]; + FILE *out; + + memset(weights, 0, sizeof(weights)); + probe_str1[1]=probe_str2[1]=0; + + for(i=0; i<256; i++) { + probe_str1[0] = i; + for(j=0; j<256; j++) { + probe_str2[0] = j; + if(strcoll(probe_str1, probe_str2)>0) { + weights[i]++; + if(i==j) { + fprintf(stderr, "\ +\nWarning! c1=%d == c2, but strcoll returned greater zero\n", i); + retcode++; + } + } + } + } + for(i=0; i<256; ) { + if(isprint(i)) + sprintf(print_buf, " '%c'", i); + else { + if(i=='\0') + strcpy(print_buf, "'\\0'"); + else if(i=='\a') + strcpy(print_buf, "'\\a'"); + else if(i=='\b') + strcpy(print_buf, "'\\b'"); + else if(i=='\f') + strcpy(print_buf, "'\\f'"); + else if(i=='\r') + strcpy(print_buf, "'\\r'"); + else if(i=='\t') + strcpy(print_buf, "'\\t'"); + else sprintf(print_buf, " x%02X", i); + } + printf("weights[%s] = %3d ", print_buf, weights[i]); + i++; + if( (i%4) == 0) + printf("\n"); + } + + for(i=0; i<256; i++) { + if(weights[i]<0 || weights[i]>=256) { + fprintf(stderr, "Hmm, weights[%d]=%d\n", i, weights[i]); + retcode++; + } + for(j=0; j<256; j++) { + if(i==j) + continue; + if(weights[i]==weights[j]) { + fprintf(stderr, "\ +Warning! c1=%d c2=%d and strcoll returned equivalent weight\n", i, j); + retcode++; + } + } + } + if(retcode) + return 1; + + if(strcasecmp(collate, "ru_RU.KOI8-R")==0 || + strcmp(collate, "ru_RU")==0 || + strcmp(collate, "koi8-r")==0) { + for(i=0; i<256; i++) + if(weights[i]!=koi8_weights[i]) { + fprintf(stderr, "\ +Error koi8-r collate compare, glibc weights[%d]=%d but current generation %d\n", + i, koi8_weights[i], weights[i]); + retcode++; + } + if(retcode) + return 5; + } + for(i=0; i<256; i++) + out_weights[i] = weights[i]; + out = fopen("LC_COLLATE", "w"); + if(out == NULL) { + fprintf(stderr, "Can`t create ./%s/LC_COLLATE file\n", collate); + return 10; + } + if(fwrite(out_weights, 1, 256, out)!=256) { + fprintf(stderr, "IO error in process write ./%s/LC_COLLATE file\n", collate); + return 11; + } + return 0; +} + +int main(int argc, char **argv) +{ + char *locale; + char *slr; + char *collate; + + if(argc<1 || argc>2) { + fprintf(stderr, "Usage: %s [locale]\n", argv[0]); + } + locale = argc==1 ? "" : argv[1]; + + collate = setlocale(LC_COLLATE, locale); + fprintf(stderr, "setlocale(LC_COLLATE, \"%s\") returned %s\n", locale, collate); + if(collate==0) { + fprintf(stderr, "Can`t set LC_COLLATE\n"); + return 2; + } + if(strcmp(collate, "C")==0) { + fprintf(stderr, "\ +LC_COLLATE=\"C\" is trivial and not interesting for this programm\n"); + return 3; + } + slr = setlocale(LC_CTYPE, locale); + fprintf(stderr, "setlocale(LC_CTYPE, \"%s\") returned %s\n", locale, slr); + if(slr==0) { + slr = setlocale(LC_CTYPE, "POSIX"); + if(slr==0) { + fprintf(stderr, "Hmm, can`t set setlocale(LC_CTYPE, \"POSIX\")\n"); + return 4; + } + } + if(mkdir(collate, 0755)!=0 && errno!=EEXIST) { + fprintf(stderr, "Can`t make directory %s\n", collate); + return 6; + } + if(chdir(collate)) { + fprintf(stderr, "Hmm, can`t change directory to %s\n", collate); + return 7; + } + if(gen_weights(collate)) { + if(chdir("..")) { + fprintf(stderr, "Hmm, can`t change to current directory\n"); + return 7; + } + rmdir(collate); + return 1; + } + return 0; +} diff --git a/extra/locale/gen_ctype_from_glibc.c b/extra/locale/gen_ctype_from_glibc.c index 5cbceb052..0488048cd 100644 --- a/extra/locale/gen_ctype_from_glibc.c +++ b/extra/locale/gen_ctype_from_glibc.c @@ -19,8 +19,9 @@ #include #include #include +#include -#include "../../misc/ctype/ctype.h" +#include "../../libc/misc/locale/_locale.h" #define DEFAULT_LOCALE_DIR "/usr/share/locale/" @@ -229,7 +230,7 @@ Defaults:\n\ printf("setlocale(LC_CTYPE, %s) returned %s\n", ln, t); if(t==0) continue; - if(mkdir(ln, 0755)) { + if(mkdir(ln, 0755)!=0 && errno!=EEXIST) { fprintf(stderr, "Can`t create directory `%s'\n", ln); continue; } diff --git a/libc/misc/ctype/ctype.c b/libc/misc/ctype/ctype.c index 8d6a1dba7..18ffed4a5 100644 --- a/libc/misc/ctype/ctype.c +++ b/libc/misc/ctype/ctype.c @@ -157,7 +157,7 @@ toupper( int c ) #else /* __UCLIBC_HAS_LOCALE__ */ #include -#include "./ctype.h" +#include "../locale/_locale.h" #define _UC_ISCTYPE(c, type) \ ((c != -1) && ((_uc_ctype_b[(int)((unsigned char)c)] & type) != 0)) diff --git a/libc/misc/ctype/ctype.h b/libc/misc/ctype/ctype.h deleted file mode 100644 index f9a34cb18..000000000 --- a/libc/misc/ctype/ctype.h +++ /dev/null @@ -1,21 +0,0 @@ -extern const unsigned char *_uc_ctype_b; -extern const unsigned char *_uc_ctype_trans; - -extern const unsigned char _uc_ctype_b_C[256+256]; - -#define LOCALE_BUF_SIZE (sizeof(_uc_ctype_b_C)) - -#define ISbit(bit) (1 << bit) - -enum -{ - ISprint = ISbit (0), /* 1 Printable. */ - ISupper = ISbit (1), /* 2 UPPERCASE. */ - ISlower = ISbit (2), /* 4 lowercase. */ - IScntrl = ISbit (3), /* 8 Control character. */ - ISspace = ISbit (4), /* 16 Whitespace. */ - ISpunct = ISbit (5), /* 32 Punctuation. */ - ISalpha = ISbit (6), /* 64 Alphabetic. */ - ISxdigit = ISbit (7), /* 128 Hexnumeric. */ -}; - diff --git a/libc/misc/locale/_locale.h b/libc/misc/locale/_locale.h new file mode 100644 index 000000000..139a862f9 --- /dev/null +++ b/libc/misc/locale/_locale.h @@ -0,0 +1,22 @@ +extern const unsigned char *_uc_ctype_b; +extern const unsigned char *_uc_ctype_trans; + +extern const unsigned char _uc_ctype_b_C[256+256]; + +#define LOCALE_BUF_SIZE (sizeof(_uc_ctype_b_C)) + +#define ISbit(bit) (1 << bit) + +enum +{ + ISprint = ISbit (0), /* 1 Printable. */ + ISupper = ISbit (1), /* 2 UPPERCASE. */ + ISlower = ISbit (2), /* 4 lowercase. */ + IScntrl = ISbit (3), /* 8 Control character. */ + ISspace = ISbit (4), /* 16 Whitespace. */ + ISpunct = ISbit (5), /* 32 Punctuation. */ + ISalpha = ISbit (6), /* 64 Alphabetic. */ + ISxdigit = ISbit (7), /* 128 Hexnumeric. */ +}; + +extern const unsigned char *_uc_collate_b; diff --git a/libc/misc/locale/locale.c b/libc/misc/locale/locale.c index 2abdde34e..d978ae37c 100644 --- a/libc/misc/locale/locale.c +++ b/libc/misc/locale/locale.c @@ -1,5 +1,5 @@ /* setlocale.c - * Load LC_CTYPE locale only special for uclibc + * Load LC_CTYPE and LC_COLLATE locale only special for uclibc * * Written by Vladimir Oleynik (c) vodz@usa.net * @@ -8,103 +8,162 @@ * used ideas is part of the GNU C Library. */ -/* - * No-locale-support setlocale() added. - */ - #include #include /* NULL, fopen */ #include /* malloc */ #include #include /* PATH_MAX */ +#include /* EINVAL */ +#include /* get(e)[u|g]id */ -#include "../ctype/ctype.h" - -#undef TEST_LOCALE +#include "_locale.h" +static char C_LOCALE_NAME []="C"; +static char POSIX_LOCALE_NAME[]="POSIX"; +static char composite_name_C []= +"LC_CTYPE=C;LC_NUMERIC=C;LC_TIME=C;LC_COLLATE=C;LC_MONETARY=C;LC_MESSAGES=C"; #ifdef __UCLIBC_HAS_LOCALE__ -static char C_LOCALE_NAME[]="C"; - #ifdef TEST_LOCALE static const char PATH_LOCALE[]="./"; #else static const char PATH_LOCALE[]=__UCLIBC_LOCALE_DIR; #endif -static const char LC_CTYPE_STR[]="/LC_CTYPE"; - struct SAV_LOADED_LOCALE { + int category; char *locale; const unsigned char *buf; struct SAV_LOADED_LOCALE *next; }; +static struct SAV_LOADED_LOCALE sll_C_LC_MESSAGES = { + LC_MESSAGES, C_LOCALE_NAME, 0, 0 +}; -static struct SAV_LOADED_LOCALE sav_loaded_locale [1] = { - { C_LOCALE_NAME, _uc_ctype_b_C, 0 } +static struct SAV_LOADED_LOCALE sll_C_LC_MONETARY = { + LC_MONETARY, C_LOCALE_NAME, 0, &sll_C_LC_MESSAGES }; -static struct SAV_LOADED_LOCALE * old_locale = sav_loaded_locale; +static struct SAV_LOADED_LOCALE sll_C_LC_COLLATE = { + LC_COLLATE, C_LOCALE_NAME, 0, &sll_C_LC_MONETARY +}; -static char *set_new_locale(struct SAV_LOADED_LOCALE * s_locale) -{ - _uc_ctype_b = s_locale->buf; - _uc_ctype_trans = s_locale->buf+LOCALE_BUF_SIZE/2; - old_locale = s_locale; - return s_locale->locale; -} +static struct SAV_LOADED_LOCALE sll_C_LC_TIME = { + LC_TIME, C_LOCALE_NAME, 0, &sll_C_LC_COLLATE +}; -/* Current support only LC_CTYPE or LC_ALL category */ +static struct SAV_LOADED_LOCALE sll_C_LC_NUMERIC = { + LC_NUMERIC, C_LOCALE_NAME, 0, &sll_C_LC_TIME +}; -char *setlocale(int category, const char *locale) +static struct SAV_LOADED_LOCALE sll_C_LC_CTYPE = { + LC_CTYPE, C_LOCALE_NAME, _uc_ctype_b_C, &sll_C_LC_NUMERIC +}; + +static struct SAV_LOADED_LOCALE *sll = &sll_C_LC_CTYPE; + + +#endif /* __UCLIBC_HAS_LOCALE__ */ + + +static char *nl_current[LC_ALL+1] = { + C_LOCALE_NAME, C_LOCALE_NAME, C_LOCALE_NAME, + C_LOCALE_NAME, C_LOCALE_NAME, C_LOCALE_NAME, + composite_name_C +}; + +static const char * const LC_strs[LC_ALL+1] = { + "/LC_CTYPE", + "/LC_NUMERIC", + "/LC_TIME", + "/LC_COLLATE", + "/LC_MONETARY", + "/LC_MESSAGES", + "/LC_ALL" +}; + +static char *find_locale(int c, const char **plocale) { - FILE * fl; +#ifdef __UCLIBC_HAS_LOCALE__ struct SAV_LOADED_LOCALE *cur; - struct SAV_LOADED_LOCALE *bottom; - char full_path[PATH_MAX]; - char * buf = 0; - int l; +#endif + const char *name = *plocale; + + if (name[0] == '\0') { + /* The user decides which locale to use by setting environment + variables. */ + name = getenv (&LC_strs[LC_ALL][1]); + if (name == NULL || name[0] == '\0') + name = getenv (&LC_strs[c][1]); + if (name == NULL || name[0] == '\0') + name = getenv ("LANG"); + if (name == NULL || name[0] == '\0') + name = C_LOCALE_NAME; + } - if(category!=LC_CTYPE && category!=LC_ALL) - return NULL; + if (strcmp (name, C_LOCALE_NAME) == 0 || + strcmp (name, POSIX_LOCALE_NAME) == 0 || + /* TODO! */ (c!=LC_CTYPE && c!=LC_COLLATE)) + name = C_LOCALE_NAME; - if(locale==0) - return set_new_locale(old_locale); + *plocale = name; - if(strcmp(locale, "POSIX")==0) - return set_new_locale(sav_loaded_locale); - else if(*locale == '\0') { +#ifdef __UCLIBC_HAS_LOCALE__ + for(cur = sll; cur; cur = cur->next) + if(cur->category == c && strcmp(cur->locale, name)==0) + return cur->locale; +#else + if(name == C_LOCALE_NAME) + return C_LOCALE_NAME; +#endif + return NULL; +} - locale = getenv(LC_CTYPE_STR+1); - if(locale == 0 || *locale == 0) - locale = getenv("LANG"); - if(locale == 0 || *locale == '\0') - return set_new_locale(old_locale); - if(strcmp(locale, "POSIX")==0) - return set_new_locale(sav_loaded_locale); - } - for(cur = sav_loaded_locale; cur; cur = cur->next) - if(strcmp(cur->locale, locale)==0) - return set_new_locale(cur); +#ifdef __UCLIBC_HAS_LOCALE__ +static char *load_locale(int category, const char *locale) +{ + FILE * fl; + char full_path[PATH_MAX]; + char * buf = 0; + struct SAV_LOADED_LOCALE *cur; + struct SAV_LOADED_LOCALE *bottom; + int bufsize; + int l = strlen(locale); - l = strlen(locale); - if((l+sizeof(PATH_LOCALE)+sizeof(LC_CTYPE_STR))>=PATH_MAX) + if((l+sizeof(PATH_LOCALE)+strlen(LC_strs[category]))>=PATH_MAX) return NULL; + /* Not allow acces suid/sgid binaries to outside PATH_LOCALE */ + if((geteuid()!=getuid() || getegid()!=getgid()) && + strchr(locale, '/')!=NULL) + return NULL; + strcpy(full_path, PATH_LOCALE); strcat(full_path, locale); - strcat(full_path, LC_CTYPE_STR); + strcat(full_path, LC_strs[category]); fl = fopen(full_path, "r"); if(fl==0) return NULL; - cur = malloc(sizeof(struct SAV_LOADED_LOCALE)+LOCALE_BUF_SIZE+l); + switch(category) { + case LC_CTYPE: + bufsize = LOCALE_BUF_SIZE; + break; + case LC_COLLATE: + bufsize = 256; + break; + default: /* TODO */ + bufsize = 0; + break; + } + + cur = malloc(sizeof(struct SAV_LOADED_LOCALE)+bufsize+l+2); if(cur) { buf = (char *)(cur+1); - if(fread(buf, 1, LOCALE_BUF_SIZE+1, fl)!=(LOCALE_BUF_SIZE)) { + if(bufsize!=0 && fread(buf, 1, bufsize+1, fl)!=(bufsize)) { /* broken locale file */ free(cur); buf = 0; @@ -117,45 +176,157 @@ char *setlocale(int category, const char *locale) fclose(fl); if(cur==0) /* not enough memory */ return NULL; - if(buf==0) /* broken locale file, set to "C" */ - return set_new_locale(sav_loaded_locale); + if(buf==0) { /* broken locale file, set to "C" */ + return C_LOCALE_NAME; + } - cur->next = 0; - strcpy(buf+LOCALE_BUF_SIZE, locale); + cur->next = 0; + cur->buf = buf; + cur->category = category; + cur->locale = buf+bufsize; + strcpy(cur->locale, locale); - bottom = sav_loaded_locale; + bottom = sll; while(bottom->next!=0) bottom = bottom->next; bottom->next = cur; - /* next two line only pedantic */ - cur->buf = buf; - cur->locale = buf+LOCALE_BUF_SIZE; + return cur->locale; +} + +static char *set_composite(int category, char *locale) +{ + int i, l; + char *old_composite_name = nl_current[LC_ALL]; + char *new_composite_name; + struct SAV_LOADED_LOCALE *cur; - return set_new_locale(cur); + for(l=i=0; i LC_ALL)) { /* Illegal category! */ + if (category < 0 || category > LC_ALL) { +#ifdef __UCLIBC_HAS_LOCALE__ +einval: +#endif + errno = EINVAL; return NULL; } - return "C"; -} + if(locale==NULL) + return nl_current[category]; + if(category!=LC_ALL) { + tl = find_locale(category, &locale); +#ifdef __UCLIBC_HAS_LOCALE__ + if(tl==NULL) + tl = load_locale(category, locale); + if(tl) { + if(nl_current[category] != tl) + tl = set_composite(category, tl); + } #endif + return tl; + } + /* LC_ALL */ +#ifdef __UCLIBC_HAS_LOCALE__ + /* The user wants to set all categories. The desired locales + for the individual categories can be selected by using a + composite locale name. This is a semi-colon separated list + of entries of the form `CATEGORY=VALUE'. */ + tl = strchr(locale, ';'); + if(tl==NULL) { + /* This is not a composite name. Load the data for each category. */ + for(i=0; istrcmp */ + +int strcoll(const char *s1, const char *s2) +{ + unsigned char c1, c2; + + while(1) { + c1 = (unsigned char) *s1; + c2 = (unsigned char) *s2; + if(_uc_collate_b) { /* setuped non-C locale? */ + c1 = _uc_collate_b[c1]; + c2 = _uc_collate_b[c2]; + } + if (*s1 == '\0' || c1 != c2) + return c1 - c2; + s1++; + s2++; + } +} +#endif /* __UCLIBC_HAS_LOCALE__ */ #endif /********************** Function strncat ************************************/ -- cgit v1.2.3