diff options
author | Manuel Novoa III <mjn3@codepoet.org> | 2002-05-06 07:37:32 +0000 |
---|---|---|
committer | Manuel Novoa III <mjn3@codepoet.org> | 2002-05-06 07:37:32 +0000 |
commit | d07fdf8b9ece2c4339b325921add50792077bf97 (patch) | |
tree | b0886656bdd854728f2d1c05597368c4739ecc1b /extra/locale/gen_locale.c | |
parent | 7f09a14cabbec158d683542e53f53ccfe75031fa (diff) |
New locale support (in development). Supports LC_CTYPE, LC_NUMERIC,
LC_TIME, LC_MONETARY, and LC_MESSAGES for the SUSv3 items. Also,
nl_langinfo() when real locale support is enabled.
New implementation of ctype.h.
New implementation of wctype.h.
New implementation of most of the string functions (smaller).
New implementation of the wcs/wmem functions. These are untested, but
they're also just preprocessor-modified versions ot the corresponding
str/mem functions.
Tweaked qsort and new bsearch.
Stuff still pending:
stdlib.h and wchar.h mb<->wc functions. I actually have working
versions of the stdlib ones, but the reentrant versions from
wchar.h require some reworking.
Basic replacement and translit support for wc->mb conversions.
(groundwork laid).
Simple-minded collate support such as was provided by the previous
locale implementation. (mostly done -- 8-bit codesets only)
Shared mmaping of the locale data and strerror message text.
Diffstat (limited to 'extra/locale/gen_locale.c')
-rw-r--r-- | extra/locale/gen_locale.c | 1254 |
1 files changed, 1254 insertions, 0 deletions
diff --git a/extra/locale/gen_locale.c b/extra/locale/gen_locale.c new file mode 100644 index 000000000..9b2576e50 --- /dev/null +++ b/extra/locale/gen_locale.c @@ -0,0 +1,1254 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <assert.h> +#include <locale.h> +#include <langinfo.h> +#include <nl_types.h> +#include <stdint.h> + +#define __CTYPE_HAS_8_BIT_LOCALES +#include "c8tables.h" + + +#define CATEGORIES 6 + +/* must agree with ordering of gen_mmap! */ +static const unsigned char *lc_names[] = { + "LC_CTYPE", + "LC_NUMERIC", + "LC_MONETARY", + "LC_TIME", + "LC_COLLATE", + "LC_MESSAGES", +#if CATEGORIES == 12 + "LC_PAPER", + "LC_NAME", + "LC_ADDRESS", + "LC_TELEPHONE", + "LC_MEASUREMENT", + "LC_IDENTIFICATION", +#elif CATEGORIES != 6 +#error unsupported CATEGORIES value! +#endif +}; + + + +typedef struct { + char *glibc_name; + char name[5]; + char dot_cs; /* 0 if no codeset specified */ + char cs; + unsigned char idx_name; + unsigned char lc_time_row; + unsigned char lc_numeric_row; + unsigned char lc_monetary_row; + unsigned char lc_messages_row; +#if CATEGORIES != 6 +#error unsupported CATEGORIES value +#endif +} locale_entry; + +static void read_at_mappings(void); +static void read_enable_disable(void); +static void read_locale_list(void); + +static int find_codeset_num(const char *cs); +static int find_at_string_num(const char *as); +static int le_cmp(const void *, const void *); +static void dump_table8(const char *name, const char *tbl, int len); +static void dump_table8c(const char *name, const char *tbl, int len); +static void dump_table16(const char *name, const int *tbl, int len); + +static void do_lc_time(void); +static void do_lc_numeric(void); +static void do_lc_monetary(void); + +static void do_lc_messages(void); + + +static FILE *fp; +static FILE *ofp; +static char line_buf[80]; +static char at_mappings[256]; +static char at_mapto[256]; +static char at_strings[1024]; +static char *at_strings_end; +static locale_entry locales[700]; +static char glibc_locale_names[60000]; + +static int num_locales; + +static int default_utf8; +static int default_8bit; + +static int total_size; +static int null_count; + +static void do_locale_names(void) +{ + /* "C" locale name is handled specially by the setlocale code. */ + int uniq = 0; + int i; + + if (num_locales <= 1) { +/* printf("error - only C locale?\n"); */ +/* exit(EXIT_FAILURE); */ + fprintf(ofp, "static const unsigned char __locales[%d];\n", (3 + CATEGORIES)); + fprintf(ofp, "static const unsigned char __locale_names5[5];\n"); + } else { + fprintf(ofp, "#define CATEGORIES\t\t\t%d\n", CATEGORIES); + fprintf(ofp, "#define WIDTH_LOCALES\t\t\t%d\n", 3+CATEGORIES); + fprintf(ofp, "#define NUM_LOCALES\t\t\t%d\n", num_locales); + fprintf(ofp, "static const unsigned char __locales[%d] = {\n", + (num_locales) * (3 + CATEGORIES)); + for (i=0 ; i < num_locales ; i++) { + if (memcmp(locales[i].name, locales[i-1].name, 5) != 0) { + locales[i].idx_name = uniq; + ++uniq; + } else { + locales[i].idx_name = uniq - 1; + } + fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].idx_name)); + fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].dot_cs)); + fprintf(ofp, "\t%#4x, ", (int)((unsigned char) locales[i].cs)); + /* lc_ctype would store translit flags and turkish up/low flag. */ + fprintf(ofp, "%#4x, ", 0); /* place holder for lc_ctype */ + fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_numeric_row)); + fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_monetary_row)); + fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_time_row)); + fprintf(ofp, "%#4x, ", 0); /* place holder for lc_collate */ + fprintf(ofp, "%#4x, ", (int)((unsigned char) locales[i].lc_messages_row)); + fprintf(ofp, "\t/* %s */\n", locales[i].glibc_name); + } + fprintf(ofp, "};\n\n"); + + fprintf(ofp, "#define NUM_LOCALE_NAMES\t\t%d\n", uniq ); + fprintf(ofp, "static const unsigned char __locale_names5[%d] = \n\t", uniq * 5); + uniq = 0; + for (i=1 ; i < num_locales ; i++) { + if (memcmp(locales[i].name, locales[i-1].name, 5) != 0) { + fprintf(ofp, "\"%5.5s\" ", locales[i].name); + ++uniq; + if ((uniq % 8) == 0) { + fprintf(ofp, "\n\t"); + } + } + } + fprintf(ofp,";\n\n"); + + if (at_strings_end > at_strings) { + int i, j; + char *p; + i = 0; + p = at_strings; + while (*p) { + ++i; + p += 1 + (unsigned char) *p; + } + /* len, char, string\0 */ + fprintf(ofp, "#define LOCALE_AT_MODIFIERS_LENGTH\t\t%d\n", + i + (at_strings_end - at_strings)); + fprintf(ofp, "static const unsigned char __locale_at_modifiers[%d] = {", + i + (at_strings_end - at_strings)); + i = 0; + p = at_strings; + while (*p) { + fprintf(ofp, "\n\t%4d, '%c',", + (unsigned char) *p, /* len of string\0 */ + at_mapto[i]); + for (j=1 ; j < ((unsigned char) *p) ; j++) { + fprintf(ofp, " '%c',", p[j]); + } + fprintf(ofp, " 0,"); + ++i; + p += 1 + (unsigned char) *p; + } + fprintf(ofp, "\n};\n\n"); + } + + { + int pos[CATEGORIES]; + pos[0] = CATEGORIES; + for (i=0 ; i < CATEGORIES ; i++) { + fprintf(ofp, "#define __%s\t\t%d\n", lc_names[i], i); + if (i + 1 < CATEGORIES) { + pos[i+1] = 1 + strlen(lc_names[i]) + pos[i]; + } + } + if (pos[CATEGORIES-1] > 255) { + printf("error - lc_names is too big (%d)\n", pos[CATEGORIES-1]); + exit(EXIT_FAILURE); + } + fprintf(ofp, "#define __LC_ALL\t\t%d\n\n", i); + + fprintf(ofp, "#define lc_names_LEN\t\t%d\n", + pos[CATEGORIES-1] + strlen(lc_names[CATEGORIES-1]) + 1); + total_size += pos[CATEGORIES-1] + strlen(lc_names[CATEGORIES-1]) + 1; + + fprintf(ofp, "static unsigned const char lc_names[%d] =\n", + pos[CATEGORIES-1] + strlen(lc_names[CATEGORIES-1]) + 1); + fprintf(ofp, "\t\""); + for (i=0 ; i < CATEGORIES ; i++) { + fprintf(ofp, "\\x%02x", (unsigned char) pos[i]); + } + fprintf(ofp, "\""); + for (i=0 ; i < CATEGORIES ; i++) { + fprintf(ofp, "\n\t\"%s\\0\"", lc_names[i]); + } + fprintf(ofp, ";\n\n"); + } + + printf("locale data = %d name data = %d for %d uniq\n", + num_locales * (3 + CATEGORIES), uniq * 5, uniq); + + total_size += num_locales * (3 + CATEGORIES) + uniq * 5; + } + +} + +int main(int argc, char **argv) +{ + if ((argc != 2) || (!(fp = fopen(*++argv, "r")))) { + printf("error: missing filename or file!\n"); + return EXIT_FAILURE; + } + + at_strings_end = at_strings; + + read_at_mappings(); + read_enable_disable(); + read_locale_list(); + + fclose(fp); + + /* handle C locale specially */ + qsort(locales+1, num_locales-1, sizeof(locale_entry), le_cmp); + +#if 0 + for (i=0 ; i < num_locales ; i++) { + printf("locale: %5.5s %2d %2d %s\n", + locales[i].name, + locales[i].cs, + locales[i].dot_cs, + locales[i].glibc_name + ); + } +#endif + + if (!(ofp = fopen("locale_tables.h", "w"))) { + printf("error: can not open locale_tables.h for writing!\n"); + return EXIT_FAILURE; + } + + do_lc_time(); + do_lc_numeric(); + do_lc_monetary(); + do_lc_messages(); + + do_locale_names(); + + fclose(ofp); + + printf("total data size = %d\n", total_size); + printf("null count = %d\n", null_count); + + return EXIT_SUCCESS; +} + +static char *idx[10000]; +static char buf[100000]; +static char *last; +static int uniq; + +static int addstring(const char *s) +{ + int j; + size_t l; + + if (!s) { + ++null_count; + return 0; + } + + for (j=0 ; j < uniq ; j++) { + if (!strcmp(s, idx[j])) { + return idx[j] - buf; + } + } + if (uniq >= sizeof(idx)) { + printf("too many uniq strings!\n"); + exit(EXIT_FAILURE); + } + l = strlen(s) + 1; + if (last + l >= buf + sizeof(buf)) { + printf("need to increase size of buf!\n"); + exit(EXIT_FAILURE); + } + + idx[uniq] = last; + ++uniq; + strcpy(last, s); + last += l; + return idx[uniq - 1] - buf; +} + +static int buf16[50*256]; + + +static int lc_time_item[50][256]; +static int lc_time_count[50]; +static unsigned char lc_time_uniq_50[700][50]; +static int lc_time_uniq; + +#define DO_NL_S(X) lc_time_S(X, k++) + +static void lc_time_S(int X, int k) +{ + int j, m; + j = addstring(nl_langinfo(X)); + for (m=0 ; m < lc_time_count[k] ; m++) { + if (lc_time_item[k][m] == j) { + break; + } + } + if (m == lc_time_count[k]) { /* new for this nl_item */ + if (m > 255) { + printf("too many nl_item %d entries in lc_time\n", k); + exit(EXIT_FAILURE); + } + lc_time_item[k][m] = j; + ++lc_time_count[k]; + } +/* printf("\\x%02x", m); */ + lc_time_uniq_50[lc_time_uniq][k] = m; +} + +static void do_lc_time(void) +{ + int i, k, m; + + last = buf+1; + uniq = 1; + *buf = 0; + *idx = buf; + +/* printf("processing lc_time..."); */ + for (i=0 ; i < num_locales ; i++) { + k = 0; + +/* printf(" %d", i); fflush(stdout); */ + if (!setlocale(LC_ALL, locales[i].glibc_name)) { + printf("setlocale(LC_ALL,%s) failed!\n", + locales[i].glibc_name); + } + + DO_NL_S(ABDAY_1); + DO_NL_S(ABDAY_2); + DO_NL_S(ABDAY_3); + DO_NL_S(ABDAY_4); + DO_NL_S(ABDAY_5); + DO_NL_S(ABDAY_6); + DO_NL_S(ABDAY_7); + + DO_NL_S(DAY_1); + DO_NL_S(DAY_2); + DO_NL_S(DAY_3); + DO_NL_S(DAY_4); + DO_NL_S(DAY_5); + DO_NL_S(DAY_6); + DO_NL_S(DAY_7); + + DO_NL_S(ABMON_1); + DO_NL_S(ABMON_2); + DO_NL_S(ABMON_3); + DO_NL_S(ABMON_4); + DO_NL_S(ABMON_5); + DO_NL_S(ABMON_6); + DO_NL_S(ABMON_7); + DO_NL_S(ABMON_8); + DO_NL_S(ABMON_9); + DO_NL_S(ABMON_10); + DO_NL_S(ABMON_11); + DO_NL_S(ABMON_12); + + DO_NL_S(MON_1); + DO_NL_S(MON_2); + DO_NL_S(MON_3); + DO_NL_S(MON_4); + DO_NL_S(MON_5); + DO_NL_S(MON_6); + DO_NL_S(MON_7); + DO_NL_S(MON_8); + DO_NL_S(MON_9); + DO_NL_S(MON_10); + DO_NL_S(MON_11); + DO_NL_S(MON_12); + + DO_NL_S(AM_STR); + DO_NL_S(PM_STR); + + DO_NL_S(D_T_FMT); + DO_NL_S(D_FMT); + DO_NL_S(T_FMT); + DO_NL_S(T_FMT_AMPM); + DO_NL_S(ERA); + + DO_NL_S(ERA_YEAR); /* non SuSv3 */ + DO_NL_S(ERA_D_FMT); + DO_NL_S(ALT_DIGITS); + DO_NL_S(ERA_D_T_FMT); + DO_NL_S(ERA_T_FMT); + + if (k > 50) { + printf("error -- lc_time nl_item count > 50!\n"); + exit(EXIT_FAILURE); + } + + { + int r; + for (r=0 ; r < lc_time_uniq ; r++) { + if (!memcmp(lc_time_uniq_50[lc_time_uniq], + lc_time_uniq_50[r], 50)) { + break; + } + } + if (r == lc_time_uniq) { /* new locale row */ + ++lc_time_uniq; + if (lc_time_uniq > 255) { + printf("too many unique lc_time rows!\n"); + exit(EXIT_FAILURE); + } + } + locales[i].lc_time_row = r; + } +/* printf("\n"); */ + } +/* printf(" done\n"); */ + + m = k = 0; + for (i=0 ; i < 50 ; i++) { + k += lc_time_count[i]; + if (m < lc_time_count[i]) { + m = lc_time_count[i]; + } + } + printf("buf-size=%d uniq=%d item_offsets=%d max=%d rows=%d\n", + (int)(last - buf), uniq, k, m, lc_time_uniq); +/* printf("total = %d * 50 + 2 * (50 + %d) + %d = %d\n", */ +/* num_locales, k, (int)(last - buf), */ +/* num_locales*50 + 2*(50 + k) + (int)(last - buf)); */ + printf("total = %d + %d * 50 + 2 * (50 + %d) + %d = %d\n", + num_locales, lc_time_uniq, k, (int)(last - buf), + i = num_locales + lc_time_uniq*50 + 2*(50 + k) + (int)(last - buf)); + total_size += i; + + dump_table8c("__lc_time_data", buf, (int)(last - buf)); + + for (i=0 ; i < lc_time_uniq ; i++) { + m = locales[i].lc_time_row; + for (k=0 ; k < 50 ; k++) { + buf[50*i + k] = (char)((unsigned char) lc_time_uniq_50[i][k]); + } + } + dump_table8("__lc_time_rows", buf, lc_time_uniq * 50); + + buf16[0] =0; + for (i=0 ; i < 50 - 1 ; i++) { + buf16[i+1] = buf16[i] + lc_time_count[i]; + } + dump_table16("__lc_time_item_offsets", buf16, 50); + + m = 0; + for (k=0 ; k < 50 ; k++) { + for (i=0 ; i < lc_time_count[k] ; i++) { + buf16[m] = lc_time_item[k][i]; + ++m; + } + } + dump_table16("__lc_time_item_idx", buf16, m); +} + +static void dump_table8(const char *name, const char *tbl, int len) +{ + int i; + + fprintf(ofp, "#define %s_LEN\t\t%d\n", name, len); + fprintf(ofp, "static const unsigned char %s[%d] = {", name, len); + for (i=0 ; i < len ; i++) { + if ((i % 12) == 0) { + fprintf(ofp, "\n\t"); + } + fprintf(ofp, "%#4x, ", (int)((unsigned char) tbl[i])); + } + fprintf(ofp, "\n};\n\n"); +} + +#define __C_isdigit(c) \ + ((sizeof(c) == sizeof(char)) \ + ? (((unsigned char)((c) - '0')) < 10) \ + : (((unsigned int)((c) - '0')) < 10)) +#define __C_isalpha(c) \ + ((sizeof(c) == sizeof(char)) \ + ? (((unsigned char)(((c) | 0x20) - 'a')) < 26) \ + : (((unsigned int)(((c) | 0x20) - 'a')) < 26)) +#define __C_isalnum(c) (__C_isalpha(c) || __C_isdigit(c)) + +static void dump_table8c(const char *name, const char *tbl, int len) +{ + int i; + + fprintf(ofp, "#define %s_LEN\t\t%d\n", name, len); + fprintf(ofp, "static const unsigned char %s[%d] = {", name, len); + for (i=0 ; i < len ; i++) { + if ((i % 12) == 0) { + fprintf(ofp, "\n\t"); + } + if (__C_isalnum(tbl[i]) || (tbl[i] == ' ')) { + fprintf(ofp, " '%c', ", (int)((unsigned char) tbl[i])); + } else { + fprintf(ofp, "%#4x, ", (int)((unsigned char) tbl[i])); + } + } + fprintf(ofp, "\n};\n\n"); +} + +static void dump_table16(const char *name, const int *tbl, int len) +{ + int i; + + fprintf(ofp, "#define %s_LEN\t\t%d\n", name, len); + fprintf(ofp, "static const uint16_t %s[%d] = {", name, len); + for (i=0 ; i < len ; i++) { + if ((i % 8) == 0) { + fprintf(ofp, "\n\t"); + } + if (tbl[i] != (uint16_t) tbl[i]) { + printf("error - falls outside uint16 range!\n"); + exit(EXIT_FAILURE); + } + fprintf(ofp, "%#6x, ", tbl[i]); + } + fprintf(ofp, "\n};\n\n"); +} + +#undef DO_NL_S + +static int lc_numeric_item[3][256]; +static int lc_numeric_count[3]; +static unsigned char lc_numeric_uniq_3[700][3]; +static int lc_numeric_uniq; + +#define DO_NL_S(X) lc_numeric_S(X, k++) + +static void lc_numeric_S(int X, int k) +{ + int j, m; + j = addstring(nl_langinfo(X)); + for (m=0 ; m < lc_numeric_count[k] ; m++) { + if (lc_numeric_item[k][m] == j) { + break; + } + } + if (m == lc_numeric_count[k]) { /* new for this nl_item */ + if (m > 255) { + printf("too many nl_item %d entries in lc_numeric\n", k); + exit(EXIT_FAILURE); + } + lc_numeric_item[k][m] = j; + ++lc_numeric_count[k]; + } +/* printf("\\x%02x", m); */ + lc_numeric_uniq_3[lc_numeric_uniq][k] = m; +} + +static void do_lc_numeric(void) +{ + int i, k, m; + + last = buf+1; + uniq = 1; + *buf = 0; + *idx = buf; + + for (i=0 ; i < num_locales ; i++) { + k = 0; + + if (!setlocale(LC_ALL, locales[i].glibc_name)) { + printf("setlocale(LC_ALL,%s) failed!\n", + locales[i].glibc_name); + } + + DO_NL_S(RADIXCHAR); /* DECIMAL_POINT */ + DO_NL_S(THOUSEP); /* THOUSANDS_SEP */ + DO_NL_S(GROUPING); + + if (k > 3) { + printf("error -- lc_numeric nl_item count > 3!\n"); + exit(EXIT_FAILURE); + } + + { + int r; + for (r=0 ; r < lc_numeric_uniq ; r++) { + if (!memcmp(lc_numeric_uniq_3[lc_numeric_uniq], + lc_numeric_uniq_3[r], 3)) { + break; + } + } + if (r == lc_numeric_uniq) { /* new locale row */ + ++lc_numeric_uniq; + if (lc_numeric_uniq > 255) { + printf("too many unique lc_numeric rows!\n"); + exit(EXIT_FAILURE); + } + } + locales[i].lc_numeric_row = r; + } + } + + printf("buf-size=%d uniq=%d rows=%d\n", + (int)(last - buf), uniq, lc_numeric_uniq); + printf("total = %d + %d * 3 + %d = %d\n", + num_locales, lc_numeric_uniq, (int)(last - buf), + i = num_locales + lc_numeric_uniq*3 + (int)(last - buf)); + total_size += i; + +/* printf("buf-size=%d uniq=%d\n", (int)(last - buf), uniq); */ + + dump_table8c("__lc_numeric_data", buf, (int)(last - buf)); + + + for (i=0 ; i < lc_numeric_uniq ; i++) { + m = locales[i].lc_numeric_row; + for (k=0 ; k < 3 ; k++) { + buf[3*i + k] = (char)((unsigned char) lc_numeric_uniq_3[i][k]); + } + } + dump_table8("__lc_numeric_rows", buf, lc_numeric_uniq * 3); + + buf16[0] =0; + for (i=0 ; i < 3 - 1 ; i++) { + buf16[i+1] = buf16[i] + lc_numeric_count[i]; + } + dump_table16("__lc_numeric_item_offsets", buf16, 3); + + m = 0; + for (k=0 ; k < 3 ; k++) { + for (i=0 ; i < lc_numeric_count[k] ; i++) { + buf16[m] = lc_numeric_item[k][i]; + ++m; + } + } + dump_table16("__lc_numeric_item_idx", buf16, m); +} + +#undef DO_NL_S + +/* #define NUM_NL_MONETARY 7 */ +#define NUM_NL_MONETARY (7+14+1) + +static int lc_monetary_item[NUM_NL_MONETARY][256]; +static int lc_monetary_count[NUM_NL_MONETARY]; +static unsigned char lc_monetary_uniq_X[700][NUM_NL_MONETARY]; +static int lc_monetary_uniq; + +#define DO_NL_S(X) lc_monetary_S(X, k++) + +/* #define DO_NL_C(X) printf("%#02x", (int)(unsigned char)(*nl_langinfo(X))); */ +#define DO_NL_C(X) lc_monetary_C(X, k++) + +static void lc_monetary_C(int X, int k) +{ + int j, m; + char c_buf[2]; + +#warning fix the char entries for monetary... target signedness of char may be different! + + c_buf[1] = 0; + c_buf[0] = *nl_langinfo(X); + j = addstring(c_buf); + for (m=0 ; m < lc_monetary_count[k] ; m++) { + if (lc_monetary_item[k][m] == j) { + break; + } + } + if (m == lc_monetary_count[k]) { /* new for this nl_item */ + if (m > 255) { + printf("too many nl_item %d entries in lc_monetary\n", k); + exit(EXIT_FAILURE); + } + lc_monetary_item[k][m] = j; + ++lc_monetary_count[k]; + } +/* printf("\\x%02x", m); */ + lc_monetary_uniq_X[lc_monetary_uniq][k] = m; +} + + +static void lc_monetary_S(int X, int k) +{ + int j, m; + j = addstring(nl_langinfo(X)); + for (m=0 ; m < lc_monetary_count[k] ; m++) { + if (lc_monetary_item[k][m] == j) { + break; + } + } + if (m == lc_monetary_count[k]) { /* new for this nl_item */ + if (m > 255) { + printf("too many nl_item %d entries in lc_monetary\n", k); + exit(EXIT_FAILURE); + } + lc_monetary_item[k][m] = j; + ++lc_monetary_count[k]; + } +/* printf("\\x%02x", m); */ + lc_monetary_uniq_X[lc_monetary_uniq][k] = m; +} + +static void do_lc_monetary(void) +{ + int i, k, m; + + last = buf+1; + uniq = 1; + *buf = 0; + *idx = buf; + + for (i=0 ; i < num_locales ; i++) { + k = 0; + + if (!setlocale(LC_ALL, locales[i].glibc_name)) { + printf("setlocale(LC_ALL,%s) failed!\n", + locales[i].glibc_name); + } + + + /* non SUSv3 */ + DO_NL_S(INT_CURR_SYMBOL); + DO_NL_S(CURRENCY_SYMBOL); + DO_NL_S(MON_DECIMAL_POINT); + DO_NL_S(MON_THOUSANDS_SEP); + DO_NL_S(MON_GROUPING); + DO_NL_S(POSITIVE_SIGN); + DO_NL_S(NEGATIVE_SIGN); + DO_NL_C(INT_FRAC_DIGITS); + DO_NL_C(FRAC_DIGITS); + DO_NL_C(P_CS_PRECEDES); + DO_NL_C(P_SEP_BY_SPACE); + DO_NL_C(N_CS_PRECEDES); + DO_NL_C(N_SEP_BY_SPACE); + DO_NL_C(P_SIGN_POSN); + DO_NL_C(N_SIGN_POSN); + DO_NL_C(INT_P_CS_PRECEDES); + DO_NL_C(INT_P_SEP_BY_SPACE); + DO_NL_C(INT_N_CS_PRECEDES); + DO_NL_C(INT_N_SEP_BY_SPACE); + DO_NL_C(INT_P_SIGN_POSN); + DO_NL_C(INT_N_SIGN_POSN); + + DO_NL_S(CRNCYSTR); /* CURRENCY_SYMBOL */ + +/* printf("\n"); */ + + if (k > NUM_NL_MONETARY) { + printf("error -- lc_monetary nl_item count > %d!\n", NUM_NL_MONETARY); + exit(EXIT_FAILURE); + } + + { + int r; + for (r=0 ; r < lc_monetary_uniq ; r++) { + if (!memcmp(lc_monetary_uniq_X[lc_monetary_uniq], + lc_monetary_uniq_X[r], NUM_NL_MONETARY)) { + break; + } + } + if (r == lc_monetary_uniq) { /* new locale row */ + ++lc_monetary_uniq; + if (lc_monetary_uniq > 255) { + printf("too many unique lc_monetary rows!\n"); + exit(EXIT_FAILURE); + } + } + locales[i].lc_monetary_row = r; + } + } + + printf("buf-size=%d uniq=%d rows=%d\n", + (int)(last - buf), uniq, lc_monetary_uniq); + printf("total = %d + %d * %d + %d = %d\n", + num_locales, lc_monetary_uniq, NUM_NL_MONETARY, (int)(last - buf), + i = num_locales + lc_monetary_uniq*NUM_NL_MONETARY + (int)(last - buf)); + total_size += i; + + dump_table8c("__lc_monetary_data", buf, (int)(last - buf)); + + for (i=0 ; i < lc_monetary_uniq ; i++) { + m = locales[i].lc_monetary_row; + for (k=0 ; k < NUM_NL_MONETARY ; k++) { + buf[NUM_NL_MONETARY*i + k] = (char)((unsigned char) lc_monetary_uniq_X[i][k]); + } + } + dump_table8("__lc_monetary_rows", buf, lc_monetary_uniq * NUM_NL_MONETARY); + + buf16[0] =0; + for (i=0 ; i < NUM_NL_MONETARY - 1 ; i++) { + buf16[i+1] = buf16[i] + lc_monetary_count[i]; + } + dump_table16("__lc_monetary_item_offsets", buf16, NUM_NL_MONETARY); + + m = 0; + for (k=0 ; k < NUM_NL_MONETARY ; k++) { + for (i=0 ; i < lc_monetary_count[k] ; i++) { + buf16[m] = lc_monetary_item[k][i]; + ++m; + } + } + dump_table16("__lc_monetary_item_idx", buf16, m); +} + + +#undef DO_NL_S + +static int lc_messages_item[2][256]; +static int lc_messages_count[2]; +static unsigned char lc_messages_uniq_2[700][2]; +static int lc_messages_uniq; + +#define DO_NL_S(X) lc_messages_S(X, k++) + +static void lc_messages_S(int X, int k) +{ + int j, m; + j = addstring(nl_langinfo(X)); + for (m=0 ; m < lc_messages_count[k] ; m++) { + if (lc_messages_item[k][m] == j) { + break; + } + } + if (m == lc_messages_count[k]) { /* new for this nl_item */ + if (m > 255) { + printf("too many nl_item %d entries in lc_messages\n", k); + exit(EXIT_FAILURE); + } + lc_messages_item[k][m] = j; + ++lc_messages_count[k]; + } +/* printf("\\x%02x", m); */ + lc_messages_uniq_2[lc_messages_uniq][k] = m; +} + +static void do_lc_messages(void) +{ + int i, k, m; + + last = buf+1; + uniq = 1; + *buf = 0; + *idx = buf; + + for (i=0 ; i < num_locales ; i++) { + k = 0; + + if (!setlocale(LC_ALL, locales[i].glibc_name)) { + printf("setlocale(LC_ALL,%s) failed!\n", + locales[i].glibc_name); + } + + DO_NL_S(YESEXPR); + DO_NL_S(NOEXPR); + + if (k > 2) { + printf("error -- lc_messages nl_item count > 2!\n"); + exit(EXIT_FAILURE); + } + + { + int r; + for (r=0 ; r < lc_messages_uniq ; r++) { + if (!memcmp(lc_messages_uniq_2[lc_messages_uniq], + lc_messages_uniq_2[r], 2)) { + break; + } + } + if (r == lc_messages_uniq) { /* new locale row */ + ++lc_messages_uniq; + if (lc_messages_uniq > 255) { + printf("too many unique lc_messages rows!\n"); + exit(EXIT_FAILURE); + } + } + locales[i].lc_messages_row = r; + } + } + + printf("buf-size=%d uniq=%d rows=%d\n", + (int)(last - buf), uniq, lc_messages_uniq); + printf("total = %d + %d * 2 + %d = %d\n", + num_locales, lc_messages_uniq, (int)(last - buf), + i = num_locales + lc_messages_uniq*2 + (int)(last - buf)); + total_size += i; + +/* printf("buf-size=%d uniq=%d\n", (int)(last - buf), uniq); */ + + dump_table8c("__lc_messages_data", buf, (int)(last - buf)); + + for (i=0 ; i < lc_messages_uniq ; i++) { + m = locales[i].lc_messages_row; + for (k=0 ; k < 2 ; k++) { + buf[2*i + k] = (char)((unsigned char) lc_messages_uniq_2[i][k]); + } + } + dump_table8("__lc_messages_rows", buf, lc_messages_uniq * 2); + + buf16[0] =0; + for (i=0 ; i < 2 - 1 ; i++) { + buf16[i+1] = buf16[i] + lc_messages_count[i]; + } + dump_table16("__lc_messages_item_offsets", buf16, 2); + + m = 0; + for (k=0 ; k < 2 ; k++) { + for (i=0 ; i < lc_messages_count[k] ; i++) { + buf16[m] = lc_messages_item[k][i]; + ++m; + } + } + dump_table16("__lc_messages_item_idx", buf16, m); +} + + + + + +static void read_at_mappings(void) +{ + char *p; + char *m; + int mc = 0; + + do { + if (!(p = strtok(line_buf, " \t\n")) || (*p == '#')) { + if (!fgets(line_buf, sizeof(line_buf), fp)) { + if (ferror(fp)) { + printf("error reading file\n"); + exit(EXIT_FAILURE); + } + return; /* EOF */ + } + if ((*line_buf == '#') && (line_buf[1] == '-')) { + break; + } + continue; + } + if (*p == '@') { + if (p[1] == 0) { + printf("error: missing @modifier name\n"); + exit(EXIT_FAILURE); + } + m = p; /* save the modifier name */ + if (!(p = strtok(NULL, " \t\n")) || p[1] || (((unsigned char) *p) > 0x7f)) { + printf("error: missing or illegal @modifier mapping char\n"); + exit(EXIT_FAILURE); + } + if (at_mappings[(int)((unsigned char) *p)]) { + printf("error: reused @modifier mapping char\n"); + exit(EXIT_FAILURE); + } + at_mappings[(int)((unsigned char) *p)] = 1; + at_mapto[mc] = *p; + ++mc; + *at_strings_end = (char)( (unsigned char) (strlen(m)) ); + strcpy(++at_strings_end, m+1); + at_strings_end += (unsigned char) at_strings_end[-1]; + + printf("@mapping: \"%s\" to '%c'\n", m, *p); + + if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) { + printf("ignoring trailing text: %s...\n", p); + } + *line_buf = 0; + continue; + } + break; + } while (1); + +#if 0 + { + p = at_strings; + + if (!*p) { + printf("no @ strings\n"); + return; + } + + do { + printf("%s\n", p+1); + p += 1 + (unsigned char) *p; + } while (*p); + } +#endif +} + +static void read_enable_disable(void) +{ + char *p; + + do { + if (!(p = strtok(line_buf, " =\t\n")) || (*p == '#')) { + if (!fgets(line_buf, sizeof(line_buf), fp)) { + if (ferror(fp)) { + printf("error reading file\n"); + exit(EXIT_FAILURE); + } + return; /* EOF */ + } + if ((*line_buf == '#') && (line_buf[1] == '-')) { + break; + } + continue; + } + if (!strcmp(p, "UTF-8")) { + if (!(p = strtok(NULL, " =\t\n")) + || ((toupper(*p) != 'Y') && (toupper(*p) != 'N'))) { + printf("error: missing or illegal UTF-8 setting\n"); + exit(EXIT_FAILURE); + } + default_utf8 = (toupper(*p) == 'Y'); + printf("UTF-8 locales are %sabled\n", "dis\0en"+ (default_utf8 << 2)); + } else if (!strcmp(p, "8-BIT")) { + if (!(p = strtok(NULL, " =\t\n")) + || ((toupper(*p) != 'Y') && (toupper(*p) != 'N'))) { + printf("error: missing or illegal 8-BIT setting\n"); + exit(EXIT_FAILURE); + } + default_8bit = (toupper(*p) == 'Y'); + printf("8-BIT locales are %sabled\n", "dis\0en" + (default_8bit << 2)); + } else { + break; + } + + if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) { + printf("ignoring trailing text: %s...\n", p); + } + *line_buf = 0; + continue; + + } while (1); +} + +#ifdef CODESET_LIST + +static int find_codeset_num(const char *cs) +{ + int r = 2; + char *s = CODESET_LIST; + + /* 7-bit is 1, UTF-8 is 2, 8-bits are > 2 */ + + if (strcmp(cs, "UTF-8") != 0) { + ++r; + while (*s && strcmp(CODESET_LIST+ ((unsigned char) *s), cs)) { +/* printf("tried %s\n", CODESET_LIST + ((unsigned char) *s)); */ + ++r; + ++s; + } + if (!*s) { + printf("error: unsupported codeset %s\n", cs); + exit(EXIT_FAILURE); + } + } + return r; +} + +#else + +static int find_codeset_num(const char *cs) +{ + int r = 2; + + /* 7-bit is 1, UTF-8 is 2, 8-bits are > 2 */ + + if (strcmp(cs, "UTF-8") != 0) { + printf("error: unsupported codeset %s\n", cs); + exit(EXIT_FAILURE); + } + return r; +} + +#endif + +static int find_at_string_num(const char *as) +{ + int i = 0; + char *p = at_strings; + + while (*p) { + if (!strcmp(p+1, as)) { + return i; + } + ++i; + p += 1 + (unsigned char) *p; + } + + printf("error: unmapped @string %s\n", as); + exit(EXIT_FAILURE); +} + +static void read_locale_list(void) +{ + char *p; + char *s; + char *ln; /* locale name */ + char *ls; /* locale name ll_CC */ + char *as; /* at string */ + char *ds; /* dot string */ + char *cs; /* codeset */ + int i; + + typedef struct { + char *glibc_name; + char name[5]; + char dot_cs; /* 0 if no codeset specified */ + char cs; + } locale_entry; + + /* First the C locale. */ + locales[0].glibc_name = locales[0].name; + strncpy(locales[0].name,"C",5); + locales[0].dot_cs = 0; + locales[0].cs = 1; /* 7-bit encoding */ + ++num_locales; + + do { + if (!(p = strtok(line_buf, " \t\n")) || (*p == '#')) { + if (!fgets(line_buf, sizeof(line_buf), fp)) { + if (ferror(fp)) { + printf("error reading file\n"); + exit(EXIT_FAILURE); + } + return; /* EOF */ + } + if ((*line_buf == '#') && (line_buf[1] == '-')) { + break; + } + continue; + } + + s = glibc_locale_names; + for (i=0 ; i < num_locales ; i++) { + if (!strcmp(s+1, p)) { + break; + } + s += 1 + ((unsigned char) *s); + } + if (i < num_locales) { + printf("ignoring dulplicate locale name: %s", p); + *line_buf = 0; + continue; + } + + /* New locale, but don't increment num until codeset verified! */ + *s = (char)((unsigned char) (strlen(p) + 1)); + strcpy(s+1, p); + locales[num_locales].glibc_name = s+1; + ln = p; /* save locale name */ + + if (!(p = strtok(NULL, " \t\n"))) { + printf("error: missing codeset for locale %s\n", ln); + exit(EXIT_FAILURE); + } + cs = p; + i = find_codeset_num(p); + if ((i == 2) && !default_utf8) { + printf("ignoring UTF-8 locale %s\n", ln); + *line_buf = 0; + continue; + } else if ((i > 2) && !default_8bit) { + printf("ignoring 8-bit codeset locale %s\n", ln); + *line_buf = 0; + continue; + } + locales[num_locales].cs = (char)((unsigned char) i); + + if (((p = strtok(NULL, " \t\n")) != NULL) && (*p != '#')) { + printf("ignoring trailing text: %s...\n", p); + } + + /* Now go back to locale string for .codeset and @modifier */ + as = strtok(ln, "@"); + if (as) { + as = strtok(NULL, "@"); + } + ds = strtok(ln, "."); + if (ds) { + ds = strtok(NULL, "."); + } + ls = ln; + + if ((strlen(ls) != 5) || (ls[2] != '_')) { + printf("error: illegal locale name %s\n", ls); + exit(EXIT_FAILURE); + } + + i = 0; /* value for unspecified codeset */ + if (ds) { + i = find_codeset_num(ds); + if ((i == 2) && !default_utf8) { + printf("ignoring UTF-8 locale %s\n", ln); + *line_buf = 0; + continue; + } else if ((i > 2) && !default_8bit) { + printf("ignoring 8-bit codeset locale %s\n", ln); + *line_buf = 0; + continue; + } + } + locales[num_locales].dot_cs = (char)((unsigned char) i); + + if (as) { + i = find_at_string_num(as); + ls[2] = at_mapto[i]; + } + memcpy(locales[num_locales].name, ls, 5); +/* printf("locale: %5.5s %2d %2d %s\n", */ +/* locales[num_locales].name, */ +/* locales[num_locales].cs, */ +/* locales[num_locales].dot_cs, */ +/* locales[num_locales].glibc_name */ +/* ); */ + ++num_locales; + *line_buf = 0; + } while (1); +} + +static int le_cmp(const void *a, const void *b) +{ + const locale_entry *p; + const locale_entry *q; + int r; + + p = (const locale_entry *) a; + q = (const locale_entry *) b; + + if (!(r = p->name[0] - q->name[0]) + && !(r = p->name[1] - q->name[1]) + && !(r = p->name[3] - q->name[3]) + && !(r = p->name[4] - q->name[4]) + && !(r = p->name[2] - q->name[2]) + && !(r = -(p->cs - q->cs)) + ) { + r = -(p->dot_cs - q->dot_cs); + /* Reverse the ordering of the codesets so UTF-8 comes last. + * Work-around (hopefully) for glibc bug affecting at least + * the euro currency symbol. */ + } + + return r; +} + |