summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManuel Novoa III <mjn3@codepoet.org>2003-08-28 17:16:53 +0000
committerManuel Novoa III <mjn3@codepoet.org>2003-08-28 17:16:53 +0000
commit9c6f2391ed8dd72d9e13db459d149de7bc707567 (patch)
tree1c84d71700579e531e8c763b0271be3fbad6d776
parente317054726be481c98a9ffe712a39331d0044396 (diff)
Create a typedef for the ctype bitmask table entries.
Hack a fix for ctype support of 8-bit codeset locales. Note: toupper/tolower mappings do not handle the special cases for the tr_TR and az_AZ locales, since the wide versions currently handle them either. That will be addressed when I rewrite the data generation tools and the libc locale code.
-rw-r--r--include/ctype.h12
-rw-r--r--libc/misc/ctype/ctype.c36
-rw-r--r--libc/misc/locale/locale.c124
-rw-r--r--libc/sysdeps/linux/common/bits/uClibc_locale.h7
-rw-r--r--libc/sysdeps/linux/common/bits/uClibc_touplow.h10
5 files changed, 148 insertions, 41 deletions
diff --git a/include/ctype.h b/include/ctype.h
index 23ff199e4..361f160f8 100644
--- a/include/ctype.h
+++ b/include/ctype.h
@@ -32,7 +32,7 @@ __BEGIN_DECLS
#ifndef _ISbit
/* These are all the characteristics of characters.
If there get to be more than 16 distinct characteristics,
- many things must be changed that use `__uint16_t's. */
+ __ctype_mask_t will need to be adjusted. */
# define _ISbit(bit) (1 << (bit))
@@ -98,13 +98,13 @@ enum
*/
/* Pointers to the default C-locale data. */
-extern const __uint16_t *__C_ctype_b;
+extern const __ctype_mask_t *__C_ctype_b;
extern const __ctype_touplow_t *__C_ctype_toupper;
extern const __ctype_touplow_t *__C_ctype_tolower;
#ifdef __UCLIBC_HAS_XLOCALE__
-extern __const __uint16_t **__ctype_b_loc (void)
+extern __const __ctype_mask_t **__ctype_b_loc (void)
__attribute__ ((__const));
extern __const __ctype_touplow_t **__ctype_tolower_loc (void)
__attribute__ ((__const));
@@ -118,7 +118,7 @@ extern __const __ctype_touplow_t **__ctype_toupper_loc (void)
#else /* __UCLIBC_HAS_XLOCALE__ */
/* Pointers to the current global locale data in use. */
-extern const __uint16_t *__ctype_b;
+extern const __ctype_mask_t *__ctype_b;
extern const __ctype_touplow_t *__ctype_toupper;
extern const __ctype_touplow_t *__ctype_tolower;
@@ -129,7 +129,7 @@ extern const __ctype_touplow_t *__ctype_tolower;
#endif /* __UCLIBC_HAS_XLOCALE__ */
#define __isctype(c, type) \
- ((__UCLIBC_CTYPE_B)[(int) (c)] & (__uint16_t) type)
+ ((__UCLIBC_CTYPE_B)[(int) (c)] & (__ctype_mask_t) type)
#define __isascii(c) (((c) & ~0x7f) == 0) /* If C is a 7 bit value. */
#define __toascii(c) ((c) & 0x7f) /* Mask off high bits. */
@@ -292,7 +292,7 @@ toupper (int __c) __THROW
/* These definitions are similar to the ones above but all functions
take as an argument a handle for the locale which shall be used. */
# define __isctype_l(c, type, locale) \
- ((locale)->__ctype_b[(int) (c)] & (__uint16_t) type)
+ ((locale)->__ctype_b[(int) (c)] & (__ctype_mask_t) type)
# define __exctype_l(name) \
extern int name (int, __locale_t) __THROW
diff --git a/libc/misc/ctype/ctype.c b/libc/misc/ctype/ctype.c
index a89e1e75c..13095015d 100644
--- a/libc/misc/ctype/ctype.c
+++ b/libc/misc/ctype/ctype.c
@@ -378,7 +378,7 @@ int isctype(int c, int mask)
#ifdef __UCLIBC_HAS_XLOCALE__
-const uint16_t **__ctype_b_loc(void)
+const __ctype_mask_t **__ctype_b_loc(void)
{
return &(__UCLIBC_CURLOCALE_DATA).__ctype_b;
}
@@ -415,7 +415,7 @@ const __ctype_touplow_t **__ctype_toupper_loc(void)
/**********************************************************************/
#ifdef L___C_ctype_b
-const uint16_t __C_ctype_b_data[] = {
+const __ctype_mask_t __C_ctype_b_data[] = {
#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
/* -128 M-^@ */ 0,
/* -127 M-^A */ 0,
@@ -804,19 +804,11 @@ const uint16_t __C_ctype_b_data[] = {
/* 255 M-^? */ 0
};
-const uint16_t *__C_ctype_b = __C_ctype_b_data + 1
-#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
- + 127
-#endif
- ;
+const __ctype_mask_t *__C_ctype_b = __C_ctype_b_data + __UCLIBC_CTYPE_B_TBL_OFFSET;
#ifndef __UCLIBC_HAS_XLOCALE__
-const uint16_t *__ctype_b = __C_ctype_b_data + 1
-#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
- + 127
-#endif
- ;
+const __ctype_mask_t *__ctype_b = __C_ctype_b_data + __UCLIBC_CTYPE_B_TBL_OFFSET;
#endif
@@ -926,18 +918,12 @@ const __ctype_touplow_t __C_ctype_tolower_data[] = {
};
const __ctype_touplow_t *__C_ctype_tolower = __C_ctype_tolower_data
-#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
- + 128
-#endif
- ;
+ + __UCLIBC_CTYPE_TO_TBL_OFFSET;
#ifndef __UCLIBC_HAS_XLOCALE__
const __ctype_touplow_t *__ctype_tolower = __C_ctype_tolower_data
-#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
- + 128
-#endif
- ;
+ + __UCLIBC_CTYPE_TO_TBL_OFFSET;
#endif
@@ -1047,18 +1033,12 @@ const __ctype_touplow_t __C_ctype_toupper_data[] = {
};
const __ctype_touplow_t *__C_ctype_toupper = __C_ctype_toupper_data
-#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
- + 128
-#endif
- ;
+ + __UCLIBC_CTYPE_TO_TBL_OFFSET;
#ifndef __UCLIBC_HAS_XLOCALE__
const __ctype_touplow_t *__ctype_toupper = __C_ctype_toupper_data
-#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
- + 128
-#endif
- ;
+ + __UCLIBC_CTYPE_TO_TBL_OFFSET;
#endif
diff --git a/libc/misc/locale/locale.c b/libc/misc/locale/locale.c
index 8f0cbd415..cbed01146 100644
--- a/libc/misc/locale/locale.c
+++ b/libc/misc/locale/locale.c
@@ -54,7 +54,6 @@
#include <assert.h>
#include <errno.h>
#include <ctype.h>
-#warning devel code
#include <stdio.h>
#undef __LOCALE_C_ONLY
@@ -639,6 +638,122 @@ int _locale_set_l(const unsigned char *p, __locale_t base)
base->idx8wc2c = c8b->idx8wc2c;
/* translit */
#endif /* __UCLIBC_HAS_WCHAR__ */
+
+ /* What follows is fairly bloated, but it is just a hack
+ * to get the 8-bit codeset ctype stuff functioning.
+ * All of this will be replaced in the next generation
+ * of locale support anyway... */
+
+ memcpy(base->__ctype_b_data,
+ __C_ctype_b - __UCLIBC_CTYPE_B_TBL_OFFSET,
+ (256 + __UCLIBC_CTYPE_B_TBL_OFFSET)
+ * sizeof(__ctype_mask_t));
+ memcpy(base->__ctype_tolower_data,
+ __C_ctype_tolower - __UCLIBC_CTYPE_TO_TBL_OFFSET,
+ (256 + __UCLIBC_CTYPE_TO_TBL_OFFSET)
+ * sizeof(__ctype_touplow_t));
+ memcpy(base->__ctype_toupper_data,
+ __C_ctype_toupper - __UCLIBC_CTYPE_TO_TBL_OFFSET,
+ (256 + __UCLIBC_CTYPE_TO_TBL_OFFSET)
+ * sizeof(__ctype_touplow_t));
+
+#define Cctype_TBL_MASK ((1 << __LOCALE_DATA_Cctype_IDX_SHIFT) - 1)
+#define Cctype_IDX_OFFSET (128 >> __LOCALE_DATA_Cctype_IDX_SHIFT)
+
+ {
+ int u;
+ __ctype_mask_t m;
+
+ for (u=0 ; u < 128 ; u++) {
+#ifdef __LOCALE_DATA_Cctype_PACKED
+ c = base->tbl8ctype
+ [ ((int)(c8b->idx8ctype
+ [(u >> __LOCALE_DATA_Cctype_IDX_SHIFT) ])
+ << (__LOCALE_DATA_Cctype_IDX_SHIFT - 1))
+ + ((u & Cctype_TBL_MASK) >> 1)];
+ c = (u & 1) ? (c >> 4) : (c & 0xf);
+#else
+ c = base->tbl8ctype
+ [ ((int)(c8b->idx8ctype
+ [(u >> __LOCALE_DATA_Cctype_IDX_SHIFT) ])
+ << __LOCALE_DATA_Cctype_IDX_SHIFT)
+ + (u & Cctype_TBL_MASK) ];
+#endif
+
+ m = base->code2flag[c];
+
+ base->__ctype_b_data
+ [128 + __UCLIBC_CTYPE_B_TBL_OFFSET + u]
+ = m;
+
+#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
+ if (((signed char)(128 + u)) != -1) {
+ base->__ctype_b_data[__UCLIBC_CTYPE_B_TBL_OFFSET
+ + ((signed char)(128 + u))]
+ = m;
+ }
+#endif
+
+ base->__ctype_tolower_data
+ [128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
+ = 128 + u;
+ base->__ctype_toupper_data
+ [128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
+ = 128 + u;
+
+ if (m & (_ISlower|_ISupper)) {
+ c = base->tbl8uplow
+ [ ((int)(c8b->idx8uplow
+ [u >> __LOCALE_DATA_Cuplow_IDX_SHIFT])
+ << __LOCALE_DATA_Cuplow_IDX_SHIFT)
+ + ((128 + u)
+ & ((1 << __LOCALE_DATA_Cuplow_IDX_SHIFT)
+ - 1)) ];
+ if (m & _ISlower) {
+ base->__ctype_toupper_data
+ [128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
+ = (unsigned char)(128 + u + c);
+#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
+ if (((signed char)(128 + u)) != -1) {
+ base->__ctype_toupper_data
+ [__UCLIBC_CTYPE_TO_TBL_OFFSET
+ + ((signed char)(128 + u))]
+ = (unsigned char)(128 + u + c);
+ }
+#endif
+ } else {
+ base->__ctype_tolower_data
+ [128 + __UCLIBC_CTYPE_TO_TBL_OFFSET + u]
+ = (unsigned char)(128 + u - c);
+#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
+ if (((signed char)(128 + u)) != -1) {
+ base->__ctype_tolower_data
+ [__UCLIBC_CTYPE_TO_TBL_OFFSET
+ + ((signed char)(128 + u))]
+ = (unsigned char)(128 + u - c);
+ }
+#endif
+ }
+ }
+ }
+ }
+
+#ifdef __UCLIBC_HAS_XLOCALE__
+ base->__ctype_b = base->__ctype_b_data
+ + __UCLIBC_CTYPE_B_TBL_OFFSET;
+ base->__ctype_tolower = base->__ctype_tolower_data
+ + __UCLIBC_CTYPE_TO_TBL_OFFSET;
+ base->__ctype_toupper = base->__ctype_toupper_data
+ + __UCLIBC_CTYPE_TO_TBL_OFFSET;
+#else /* __UCLIBC_HAS_XLOCALE__ */
+ __ctype_b = base->__ctype_b_data
+ + __UCLIBC_CTYPE_B_TBL_OFFSET;
+ __ctype_tolower = base->__ctype_tolower_data
+ + __UCLIBC_CTYPE_TO_TBL_OFFSET;
+ __ctype_toupper = base->__ctype_toupper_data
+ + __UCLIBC_CTYPE_TO_TBL_OFFSET;
+#endif /* __UCLIBC_HAS_XLOCALE__ */
+
#endif /* __CTYPE_HAS_8_BIT_LOCALES */
}
#ifdef __UCLIBC_MJN3_ONLY__
@@ -741,11 +856,8 @@ void _locale_init_l(__locale_t base)
/* width?? */
#endif /* __UCLIBC_HAS_WCHAR__ */
-
-
-#ifdef __UCLIBC_MJN3_ONLY__
-#warning wrong for now, but always set ctype arrays to global C version
-#endif
+ /* Initially, set things up to use the global C ctype tables.
+ * This is correct for C (ASCII) and UTF-8 based locales (except tr_TR). */
#ifdef __UCLIBC_HAS_XLOCALE__
base->__ctype_b = __C_ctype_b;
base->__ctype_tolower = __C_ctype_tolower;
diff --git a/libc/sysdeps/linux/common/bits/uClibc_locale.h b/libc/sysdeps/linux/common/bits/uClibc_locale.h
index bf642b511..88226c638 100644
--- a/libc/sysdeps/linux/common/bits/uClibc_locale.h
+++ b/libc/sysdeps/linux/common/bits/uClibc_locale.h
@@ -141,11 +141,16 @@ typedef struct {
typedef struct {
#ifdef __UCLIBC_HAS_XLOCALE__
- const __uint16_t *__ctype_b;
+ const __ctype_mask_t *__ctype_b;
const __ctype_touplow_t *__ctype_tolower;
const __ctype_touplow_t *__ctype_toupper;
#endif
+ /* For now, just embed this in the structure. */
+ __ctype_mask_t __ctype_b_data[256 + __UCLIBC_CTYPE_B_TBL_OFFSET];
+ __ctype_touplow_t __ctype_tolower_data[256 + __UCLIBC_CTYPE_TO_TBL_OFFSET];
+ __ctype_touplow_t __ctype_toupper_data[256 + __UCLIBC_CTYPE_TO_TBL_OFFSET];
+
/* int tables_loaded; */
/* unsigned char lctypes[LOCALE_STRING_SIZE]; */
unsigned char cur_locale[LOCALE_STRING_SIZE];
diff --git a/libc/sysdeps/linux/common/bits/uClibc_touplow.h b/libc/sysdeps/linux/common/bits/uClibc_touplow.h
index 75d508546..d79d3cde7 100644
--- a/libc/sysdeps/linux/common/bits/uClibc_touplow.h
+++ b/libc/sysdeps/linux/common/bits/uClibc_touplow.h
@@ -34,10 +34,20 @@
/* glibc uses the equivalent of - typedef __int32_t __ctype_touplow_t; */
+typedef __uint16_t __ctype_mask_t;
+
#ifdef __UCLIBC_HAS_CTYPE_SIGNED__
+
typedef __int16_t __ctype_touplow_t;
+#define __UCLIBC_CTYPE_B_TBL_OFFSET 128
+#define __UCLIBC_CTYPE_TO_TBL_OFFSET 128
+
#else /* __UCLIBC_HAS_CTYPE_SIGNED__ */
+
typedef unsigned char __ctype_touplow_t;
+#define __UCLIBC_CTYPE_B_TBL_OFFSET 1
+#define __UCLIBC_CTYPE_TO_TBL_OFFSET 0
+
#endif /* __UCLIBC_HAS_CTYPE_SIGNED__ */
#endif /* _UCLIBC_TOUPLOW_H */