From 748b5e3d2d70e47ab72ed44a371bd0db9a3a0c20 Mon Sep 17 00:00:00 2001 From: Waldemar Brodkorb Date: Sat, 17 Feb 2018 18:17:08 +0000 Subject: add gettext-tiny tools For libintl stub users tiny gettext tools might be useful. Tested with x86_64 on Gentoo/uClibc-ng system. --- utils/Makefile.in | 17 +- utils/StringEscape.c | 114 ++++++++++ utils/StringEscape.h | 7 + utils/msgfmt.c | 612 +++++++++++++++++++++++++++++++++++++++++++++++++++ utils/msgmerge.c | 268 ++++++++++++++++++++++ utils/poparser.c | 218 ++++++++++++++++++ utils/poparser.h | 41 ++++ utils/xgettext | 108 +++++++++ 8 files changed, 1382 insertions(+), 3 deletions(-) create mode 100644 utils/StringEscape.c create mode 100644 utils/StringEscape.h create mode 100644 utils/msgfmt.c create mode 100644 utils/msgmerge.c create mode 100644 utils/poparser.c create mode 100644 utils/poparser.h create mode 100755 utils/xgettext diff --git a/utils/Makefile.in b/utils/Makefile.in index 461e953c3..15e349950 100644 --- a/utils/Makefile.in +++ b/utils/Makefile.in @@ -82,6 +82,8 @@ utils_OUT := $(top_builddir)utils DEPS-ldconfig := $(utils_DIR)/chroot_realpath.c DEPS-ldconfig.host := $(DEPS-ldconfig) +DEPS-msgfmt := $(utils_DIR)/poparser.c $(utils_DIR)/StringEscape.c +DEPS-msgmerge := $(utils_DIR)/poparser.c $(utils_DIR)/StringEscape.c utils_OBJ := getconf ifeq ($(HAVE_SHARED),y) @@ -94,6 +96,10 @@ utils_OBJ += iconv utils_LOCALE_OBJ += $(utils_OUT)/locale endif +ifeq ($(UCLIBC_HAS_LIBINTL),y) +utils_OBJ += msgfmt msgmerge +endif + utils_OBJ := $(patsubst %,$(utils_OUT)/%,$(utils_OBJ)) hostutils_OBJ := $(patsubst %,%.host,$(utils_OBJ)) @@ -117,10 +123,10 @@ $(hostutils_OBJ): $(utils_OUT)/%.host : $(utils_DIR)/%.c install-y += utils_install # This installs both utils and hostutils, so doesn't depend on either. -$(PREFIX)$(DEVEL_PREFIX)bin $(PREFIX)$(RUNTIME_PREFIX)sbin: +$(PREFIX)$(DEVEL_PREFIX)bin $$(PREFIX)$(DEVEL_PREFIX)usr/bin (PREFIX)$(RUNTIME_PREFIX)sbin: $(do_mkdir) -utils_install: $(PREFIX)$(DEVEL_PREFIX)bin +utils_install: $(PREFIX)$(DEVEL_PREFIX)bin $(PREFIX)$(DEVEL_PREFIX)usr/bin ifeq ($(HAVE_SHARED),y) utils_install: $(PREFIX)$(RUNTIME_PREFIX)sbin endif @@ -135,11 +141,16 @@ ifeq ($(UCLIBC_HAS_LOCALE),y) $(Q)$(INSTALL) -m 755 $(utils_OUT)/iconv$(DOTHOST) $(PREFIX)$(DEVEL_PREFIX)bin/iconv $(Q)$(INSTALL) -m 755 $(utils_OUT)/locale$(DOTHOST) $(PREFIX)$(DEVEL_PREFIX)bin/locale endif +ifeq ($(UCLIBC_HAS_LIBINTL),y) + $(Q)$(INSTALL) -m 755 $(utils_OUT)/msgmerge$(DOTHOST) $(PREFIX)$(DEVEL_PREFIX)usr/bin/msgmerge + $(Q)$(INSTALL) -m 755 $(utils_OUT)/msgfmt$(DOTHOST) $(PREFIX)$(DEVEL_PREFIX)usr/bin/msgfmt + $(Q)$(INSTALL) -m 755 $(utils_OUT)/xgettext $(PREFIX)$(DEVEL_PREFIX)usr/bin +endif objclean-y += CLEAN_utils CLEAN_utils: - $(do_rm) $(addprefix $(utils_OUT)/, getconf iconv ldconfig ldd locale *.host) + $(do_rm) $(addprefix $(utils_OUT)/, getconf iconv ldconfig ldd locale msgfmt msgmerge *.host) $(Q)# This is a hack.. $(Q)$(RM) $(utils_OUT)/.*.dep diff --git a/utils/StringEscape.c b/utils/StringEscape.c new file mode 100644 index 000000000..be5ec55c3 --- /dev/null +++ b/utils/StringEscape.c @@ -0,0 +1,114 @@ +#include + +//FIXME out gets silently truncated if outsize is too small + +size_t escape(char* in, char* out, size_t outsize) { + size_t l = 0; + while(*in && l + 3 < outsize) { + switch(*in) { + case '\n': + *out++ = '\\'; + l++; + *out = 'n'; + break; + case '\r': + *out++ = '\\'; + l++; + *out = 'r'; + break; + case '\t': + *out++ = '\\'; + l++; + *out = 't'; + break; + case '\\': + *out++ = '\\'; + l++; + *out = '\\'; + break; + case '"': + *out++ = '\\'; + l++; + *out = '"'; + break; + case '\v': + *out++ = '\\'; + l++; + *out = '\v'; + break; + case '\?': + *out++ = '\\'; + l++; + *out = '\?'; + break; + case '\f': + *out++ = '\\'; + l++; + *out = '\f'; + break; + default: + *out = *in; + } + in++; + out++; + l++; + } + *out = 0; + return l; +} +#include +#include +size_t unescape(char* in, char *out, size_t outsize) { + size_t l = 0; + while(*in && l + 2 < outsize) { + switch (*in) { + case '\\': + ++in; + assert(*in); + switch(*in) { + case 'n': + *out='\n'; + break; + case 'r': + *out='\r'; + break; + case 't': + *out='\t'; + break; + case '\\': + *out='\\'; + break; + case '"': + *out='"'; + break; + case 'v': + *out='\v'; + break; + case '\?': + *out = '\?'; + break; + case 'f': + *out = '\f'; + break; + case '\'': + *out = '\''; + break; + case 'b': + *out = '\b'; + break; + // FIXME add handling of hex and octal + default: + abort(); + } + break; + default: + *out=*in; + } + in++; + out++; + l++; + } + *out = 0; + return l; +} + diff --git a/utils/StringEscape.h b/utils/StringEscape.h new file mode 100644 index 000000000..fc764821b --- /dev/null +++ b/utils/StringEscape.h @@ -0,0 +1,7 @@ +#ifndef STRINGESCAPE_H +#define STRINGESCAPE_H +#include +size_t escape(char* in, char *out, size_t outsize); +size_t unescape(char* in, char *out, size_t outsize); +//RcB: DEP "StringEscape.c" +#endif diff --git a/utils/msgfmt.c b/utils/msgfmt.c new file mode 100644 index 000000000..6256eed27 --- /dev/null +++ b/utils/msgfmt.c @@ -0,0 +1,612 @@ +/* msgfmt utility (C) 2012 rofl0r + * released under the MIT license, see LICENSE for details */ +#include +#include +#include +#include +#include +#include "poparser.h" + +// in DO_NOTHING mode, we simply write the msgid twice, once for msgid, once for msgstr. +// TODO: maybe make it write "" instead of echoing the msgid. +//#define DO_NOTHING + +__attribute__((noreturn)) +static void syntax(void) { + fprintf(stdout, + "Usage: msgfmt [OPTION] filename.po ...\n"); + exit(1); +} + +__attribute__((noreturn)) +static void version(void) { + fprintf(stdout, + "these are not (GNU gettext-tools) 99.9999.9999\n"); + exit(0); +} + +#define streq(A, B) (!strcmp(A, B)) +#define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1))) + +struct mo_hdr { + unsigned magic; + int rev; + unsigned numstring; + unsigned off_tbl_org; + unsigned off_tbl_trans; + unsigned hash_tbl_size; + unsigned off_tbl_hash; +}; + +/* file layout: + header + strtable (lenghts/offsets) + transtable (lenghts/offsets) + [hashtable] + strings section + translations section */ + +const struct mo_hdr def_hdr = { + 0x950412de, + 0, + 0, + sizeof(struct mo_hdr), + 0, + 0, + 0, +}; + + +// pass 0: collect numbers of strings, calculate size and offsets for tables +// print header +// pass 1: create in-memory string tables +enum passes { + pass_first = 0, + pass_collect_sizes = pass_first, + pass_second, + pass_max, +}; + +struct strtbl { + unsigned len, off; +}; + +struct strmap { + struct strtbl str, *trans; +}; + +struct callbackdata { + enum passes pass; + unsigned off; + FILE* out; + unsigned msgidbuf1_len; + unsigned msgidbuf2_len; + unsigned pluralbuf1_len; + unsigned pluralbuf2_len; + unsigned ctxtbuf_len; + unsigned msgstr1_len; + unsigned msgstr2_len; + unsigned pluralstr_count; + unsigned string_maxlen; + char* msgidbuf1; + char* msgidbuf2; + char* pluralbuf1; + char* pluralbuf2; + char* msgctxtbuf; + char* msgstrbuf1; + char* msgstrbuf2; + unsigned priv_type; + unsigned priv_len; + unsigned num[pe_maxstr]; + unsigned len[pe_maxstr]; + struct strmap *strlist; + struct strtbl *translist; + char *strbuffer[pe_maxstr]; + unsigned stroff[pe_maxstr]; + unsigned curr[pe_maxstr]; +}; + +static struct callbackdata *cb_for_qsort; +int strmap_comp(const void *a_, const void *b_) { + const struct strmap *a = a_, *b = b_; + return strcmp(cb_for_qsort->strbuffer[0] + a->str.off, cb_for_qsort->strbuffer[0] + b->str.off); +} + +enum sysdep_types { + st_priu32 = 0, + st_priu64, + st_priumax, + st_max +}; + +static const char sysdep_str[][10]={ + [st_priu32] = "\x08", + [st_priu64] = "\x08", + [st_priumax] = "\x09", +}; +static const char sysdep_repl[][8]={ + [st_priu32] = "\x02lu\0u", + [st_priu64] = "\x02lu\0llu", + [st_priumax] = "\x01ju" +}; +static const char *get_repl(enum sysdep_types type, unsigned nr) { + assert(nr < (unsigned)sysdep_repl[type][0]); + const char* p = sysdep_repl[type]+1; + while(nr--) p+=strlen(p)+1; + return p; +} +static void replace(char* text, unsigned textlen, const char* what, const char * with) { + char*p = text; + size_t la = strlen(what), li=strlen(with); + assert(la >= li); + for(p=text;textlen >= la;) { + if(!memcmp(p,what,la)) { + memcpy(p, with, li); + textlen -= la; + memmove(p+li,p+la,textlen+1); + p+=li; + } else { + p++; + textlen--; + } + } +} +static unsigned get_form(enum sysdep_types type, unsigned no, unsigned occurences[st_max]) { + unsigned i,divisor = 1; + for(i=type+1;i=(unsigned)sysdep_str[i][0] && !memcmp(p,sysdep_str[i]+1,sysdep_str[i][0])) { + occurences[i]++; + f=1; + p+=sysdep_str[i][0]; + l-=sysdep_str[i][0]; + break; + } + if(!f) p++,l--; + } + *count = 1; + for(i=0;imsgidbuf1_len != 0) { + if(!d->strlist[d->curr[pe_msgid]].str.off) + d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid]; + + if(d->ctxtbuf_len != 0) { + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len); + d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len; + d->stroff[pe_msgid]+=d->ctxtbuf_len; + } + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf1, d->msgidbuf1_len); + d->stroff[pe_msgid]+=d->msgidbuf1_len; + d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf1_len-1; + if(d->pluralbuf1_len != 0) { + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf1, d->pluralbuf1_len); + d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf1_len; + d->stroff[pe_msgid]+=d->pluralbuf1_len; + } + d->curr[pe_msgid]++; + } + if(d->msgidbuf2_len != 0) { + if(!d->strlist[d->curr[pe_msgid]].str.off) + d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid]; + + if(d->ctxtbuf_len != 0) { + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len); + d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len; + d->stroff[pe_msgid]+=d->ctxtbuf_len; + } + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf2, d->msgidbuf2_len); + d->stroff[pe_msgid]+=d->msgidbuf2_len; + d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf2_len-1; + if(d->pluralbuf2_len != 0) { + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf2, d->pluralbuf2_len); + d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf2_len; + d->stroff[pe_msgid]+=d->pluralbuf2_len; + } + d->curr[pe_msgid]++; + } + + d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=0; +} + +static inline void writestr(struct callbackdata *d, struct po_info *info) { + // msgid xx; msgstr ""; is widely happened, it's invalid + + // https://github.com/sabotage-linux/gettext-tiny/issues/1 + // no invalid, when empty, check d->num[pe_msgid] + if(!d->pluralstr_count && d->num[pe_msgid] > 0) { + d->len[pe_msgid]-=d->msgidbuf1_len; + d->len[pe_msgid]-=d->msgidbuf2_len; + d->len[pe_plural]-=d->pluralbuf1_len; + d->len[pe_plural]-=d->pluralbuf2_len; + d->len[pe_ctxt]-=d->ctxtbuf_len; + d->len[pe_msgstr]--; + d->num[pe_msgid]--; + d->num[pe_msgstr]--; + d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0; + return; + } + + if(d->pluralstr_count && d->pluralstr_count <= info->nplurals) { + writemsg(d); + // plural <= nplurals is allowed + d->translist[d->curr[pe_msgstr]].len=d->msgstr1_len-1; + d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr]; + d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]]; + + memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf1, d->msgstr1_len); + d->stroff[pe_msgstr]+=d->msgstr1_len; + d->curr[pe_msgstr]++; + + if(d->msgstr2_len) { + d->translist[d->curr[pe_msgstr]].len=d->msgstr2_len-1; + d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr]; + d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]]; + + memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf2, d->msgstr2_len); + d->stroff[pe_msgstr]+=d->msgstr2_len; + d->curr[pe_msgstr]++; + } + + d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0; + } +} + +int process_line_callback(struct po_info* info, void* user) { + struct callbackdata *d = (struct callbackdata *) user; + assert(info->type == pe_msgid || info->type == pe_ctxt || info->type == pe_msgstr || info->type == pe_plural); + char **sysdeps; + unsigned len, count, i, l; + switch(d->pass) { + case pass_collect_sizes: + sysdep_transform(info->text, info->textlen, &len, &count, 1); + d->num[info->type] += count; + if(info->type == pe_msgid && count == 2 && d->priv_type == pe_ctxt) { + // ctxt meets msgid with sysdeps, multiply num and len to suit it + d->len[pe_ctxt] += d->priv_len +1; + d->num[pe_ctxt]++; + } + if(count != 1 && info->type == pe_ctxt) { + // except msgid, str, plural, all other types should not have sysdeps + abort(); + } + + d->priv_type = info->type; + d->priv_len = len; + d->len[info->type] += len +1; + + if(len+1 > d->string_maxlen) + d->string_maxlen = len+1; + break; + case pass_second: + sysdeps = sysdep_transform(info->text, info->textlen, &len, &count, 0); + for(i=0;istring_maxlen); + if(info->type == pe_msgid) { + if(i==0 && d->msgidbuf1_len) + writestr(d, info); + + // just copy, it's written down when writemsg() + if(i==0) { + memcpy(d->msgidbuf1, sysdeps[i], l+1); + d->msgidbuf1_len = l+1; + } else { + memcpy(d->msgidbuf2, sysdeps[i], l+1); + d->msgidbuf2_len = l+1; + } + } else if(info->type == pe_plural) { + if(i==0) { + memcpy(d->pluralbuf1, sysdeps[i], l+1); + d->pluralbuf1_len = l+1; + } else { + memcpy(d->pluralbuf2, sysdeps[i], l+1); + d->pluralbuf2_len = l+1; + } + } else if(info->type == pe_ctxt) { + writestr(d, info); + d->ctxtbuf_len = l+1; + memcpy(d->msgctxtbuf, sysdeps[i], l); + d->msgctxtbuf[l] = 0x4;//EOT + } else { + // just copy, it's written down when writestr() + if(l) { + if(i==0) { + memcpy(&d->msgstrbuf1[d->msgstr1_len], sysdeps[i], l+1); + d->msgstr1_len += l+1; + d->pluralstr_count++; + } else { + // sysdeps exist + memcpy(&d->msgstrbuf2[d->msgstr2_len], sysdeps[i], l+1); + d->msgstr2_len += l+1; + } + } + } + } + free(sysdeps); + break; + default: + abort(); + } + return 0; +} + +int process(FILE *in, FILE *out) { + struct mo_hdr mohdr = def_hdr; + char line[4096]; char *lp; + char convbuf[16384]; + + struct callbackdata d = { + .num = { + [pe_msgid] = 0, + [pe_msgstr] = 0, + [pe_plural] = 0, + [pe_ctxt] = 0, + }, + .len = { + [pe_msgid] = 0, + [pe_msgstr] = 0, + [pe_plural] = 0, + [pe_ctxt] = 0, + }, + .off = 0, + .out = out, + .pass = pass_first, + .ctxtbuf_len = 0, + .pluralbuf1_len = 0, + .pluralbuf2_len = 0, + .msgidbuf1_len = 0, + .msgidbuf2_len = 0, + .msgstr1_len = 0, + .msgstr2_len = 0, + .pluralstr_count = 0, + .string_maxlen = 0, + }; + + struct po_parser pb, *p = &pb; + + mohdr.off_tbl_trans = mohdr.off_tbl_org; + for(d.pass = pass_first; d.pass <= pass_second; d.pass++) { + if(d.pass == pass_second) { + // start of second pass: + // ensure we dont output when there's no strings at all + if(d.num[pe_msgid] == 0) { + return 1; + } + + // check that data gathered in first pass is consistent + if((d.num[pe_msgstr] < d.num[pe_msgid]) || (d.num[pe_msgstr] > (d.num[pe_msgid] + d.num[pe_plural] * (p->info.nplurals - 1)))) { + // one should actually abort here, + // but gnu gettext simply writes an empty .mo and returns success. + //abort(); + fprintf(stderr, "warning: mismatch of msgid/msgstr count, writing empty .mo file\n"); + d.num[pe_msgid] = 0; + return 0; + } + + d.msgidbuf1 = calloc(d.string_maxlen*5+2*d.string_maxlen*p->info.nplurals, 1); + d.msgidbuf2 = d.msgidbuf1 + d.string_maxlen; + d.pluralbuf1 = d.msgidbuf2 + d.string_maxlen; + d.pluralbuf2 = d.pluralbuf1 + d.string_maxlen; + d.msgctxtbuf = d.pluralbuf2 + d.string_maxlen; + d.msgstrbuf1 = d.msgctxtbuf + d.string_maxlen; + d.msgstrbuf2 = d.msgstrbuf1 + d.string_maxlen*p->info.nplurals; + + d.strlist = calloc(d.num[pe_msgid] * sizeof(struct strmap), 1); + d.translist = calloc(d.num[pe_msgstr] * sizeof(struct strtbl), 1); + d.strbuffer[pe_msgid] = calloc(d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1); + d.strbuffer[pe_msgstr] = calloc(d.len[pe_msgstr], 1); + d.stroff[pe_msgid] = d.stroff[pe_msgstr] = 0; + assert(d.msgidbuf1 && d.strlist && d.translist && d.strbuffer[pe_msgid] && d.strbuffer[pe_msgstr]); + } + + poparser_init(p, convbuf, sizeof(convbuf), process_line_callback, &d); + + while((lp = fgets(line, sizeof(line), in))) { + poparser_feed_line(p, lp, sizeof(line)); + } + poparser_finish(p); + if(d.pass == pass_second) + writestr(&d, &p->info); + + if(d.pass == pass_second) { + // calculate header fields from len and num arrays + mohdr.numstring = d.num[pe_msgid]; + mohdr.off_tbl_org = sizeof(struct mo_hdr); + mohdr.off_tbl_trans = mohdr.off_tbl_org + d.num[pe_msgid] * (sizeof(unsigned)*2); + // set offset startvalue + d.off = mohdr.off_tbl_trans + d.num[pe_msgid] * (sizeof(unsigned)*2); + } + fseek(in, 0, SEEK_SET); + } + + cb_for_qsort = &d; + qsort(d.strlist, d.num[pe_msgid], sizeof (struct strmap), strmap_comp); + unsigned i; + + // print header + fwrite(&mohdr, sizeof(mohdr), 1, out); + for(i = 0; i < d.num[pe_msgid]; i++) { + d.strlist[i].str.off += d.off; + fwrite(&d.strlist[i].str, sizeof(struct strtbl), 1, d.out); + } + for(i = 0; i < d.num[pe_msgid]; i++) { + d.strlist[i].trans->off += d.off + d.len[pe_msgid] + d.len[pe_plural] + d.len[pe_ctxt]; + fwrite(d.strlist[i].trans, sizeof(struct strtbl), 1, d.out); + } + fwrite(d.strbuffer[pe_msgid], d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1, d.out); + fwrite(d.strbuffer[pe_msgstr], d.len[pe_msgstr], 1, d.out); + + return 0; +} + + +void set_file(int out, char* fn, FILE** dest) { + if(streq(fn, "-")) { + if(out) { + *dest = stdout; + } else { + char b[4096]; + size_t n=0; + FILE* tmpf = tmpfile(); + if(!tmpf) + perror("tmpfile"); + + while((n=fread(b, sizeof(*b), sizeof(b), stdin)) > 0) + fwrite(b, sizeof(*b), n, tmpf); + + fseek(tmpf, 0, SEEK_SET); + *dest = tmpf; + } + } else { + *dest = fopen(fn, out ? "w" : "r"); + } + if(!*dest) { + perror("fopen"); + exit(1); + } +} + +int main(int argc, char**argv) { + if(argc == 1) syntax(); + int arg = 1; + FILE *out = NULL; + FILE *in = NULL; + int expect_in_fn = 1; + char* locale = NULL; + char* dest = NULL; +#define A argv[arg] + for(; arg < argc; arg++) { + if(A[0] == '-') { + if(A[1] == '-') { + if( + streq(A+2, "java") || + streq(A+2, "java2") || + streq(A+2, "csharp") || + streq(A+2, "csharp-resources") || + streq(A+2, "tcl") || + streq(A+2, "qt") || + streq(A+2, "strict") || + streq(A+2, "properties-input") || + streq(A+2, "stringtable-input") || + streq(A+2, "use-fuzzy") || + strstarts(A+2, "alignment=") || + streq(A+2, "check") || + streq(A+2, "check-format") || + streq(A+2, "check-header") || + streq(A+2, "check-domain") || + streq(A+2, "check-compatibility") || + streq(A+2, "check-accelerators") || + streq(A+2, "no-hash") || + streq(A+2, "verbose") || + streq(A+2, "statistics") || + strstarts(A+2, "check-accelerators=") || + strstarts(A+2, "resource=") + ) { + } else if((dest = strstarts(A+2, "locale="))) { + locale = dest; + } else if((dest = strstarts(A+2, "output-file="))) { + set_file(1, dest, &out); + } else if(streq(A+2, "version")) { + version(); + } else if(streq(A+2, "help")) { + syntax(); + } else if (expect_in_fn) { + set_file(0, A, &in); + expect_in_fn = 0; + } + } else if(streq(A + 1, "o")) { + arg++; + dest = A; + set_file(1, A, &out); + } else if( + streq(A+1, "j") || + streq(A+1, "r") || + streq(A+1, "P") || + streq(A+1, "f") || + streq(A+1, "a") || + streq(A+1, "c") || + streq(A+1, "v") || + streq(A+1, "C") + ) { + } else if (streq(A+1, "V")) { + version(); + } else if (streq(A+1, "h")) { + syntax(); + } else if (streq(A+1, "l")) { + arg++; + locale = A; + } else if (streq(A+1, "d")) { + arg++; + dest = A; + } else if (expect_in_fn) { + set_file(0, A, &in); + expect_in_fn = 0; + } + } else if (expect_in_fn) { + set_file(0, A, &in); + expect_in_fn = 0; + } + } + + if (locale != NULL && dest != NULL) { + int sz = snprintf(NULL, 0, "%s/%s.msg", dest, locale); + char msg[sz+1]; + snprintf(msg, sizeof(msg), "%s/%s.msg", dest, locale); + FILE *fp = fopen(msg, "w"); + if (fp) { + fclose(fp); + return 0; + } else return 1; + } + + if(out == NULL) { + dest = "messages.mo"; + set_file(1, "messages.mo", &out); + } + + if(in == NULL || out == NULL) { + return 1; + } + int ret = process(in, out); + fflush(in); fflush(out); + if(in != stdin) fclose(in); + if(out != stdout) fclose(out); + + if (ret == 1) { + return remove(dest); + } + return ret; +} diff --git a/utils/msgmerge.c b/utils/msgmerge.c new file mode 100644 index 000000000..2a5e041ff --- /dev/null +++ b/utils/msgmerge.c @@ -0,0 +1,268 @@ +/* msgfmt utility (C) 2012 rofl0r + * released under the MIT license, see LICENSE for details */ +#include +#include +#include +#include +#include +#include "poparser.h" +#include "StringEscape.h" + +__attribute__((noreturn)) +static void syntax(void) { + fprintf(stdout, + "Usage: msgmerge [OPTION] def.po ref.pot\n"); + exit(1); +} + +__attribute__((noreturn)) +static void version(void) { + fprintf(stdout, + "these are not (GNU gettext-tools) 99.9999.9999\n"); + exit(0); +} + +#define streq(A, B) (!strcmp(A, B)) +#define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1))) + +struct fiLes { + FILE *out; + /* we can haz 3 different input files: + * the .pot, which is the file containing only the ripped out strings from the program + * (and no translations) + * a .po, which contains translations and strings made from a previous .pot from that same source file, + * a compendium, which is basically a huge po file containing all sorts of strings (msgid's) and translations (msgstr's) + */ + FILE *po; + FILE *pot; + FILE *compend; + int plural_count; + char convbuf[16384]; + enum po_entry prev_type; +}; + +/* currently we only output input strings as output strings + * i.e. there is no translation lookup at all */ +int process_line_callback(struct po_info* info, void* user) { + struct fiLes* file = (struct fiLes*) user; + + // escape what is unescaped automatically by lib + escape(info->text, file->convbuf, sizeof(file->convbuf)); + switch (info->type) { + case pe_msgid: + file->plural_count = 1; + fprintf(file->out, "\nmsgid \"%s\"\n", file->convbuf); + file->prev_type = info->type; + break; + case pe_ctxt: + fprintf(file->out, "msgctxt \"%s\"\n", file->convbuf); + break; + case pe_plural: + fprintf(file->out, "msgid_plural \"%s\"\n", file->convbuf); + file->prev_type = info->type; + break; + case pe_msgstr: + if (file->prev_type == pe_plural) { + fprintf(file->out, "msgstr[%d] \"%s\"\n", file->plural_count++, file->convbuf); + } else { + fprintf(file->out, "msgstr \"%s\"\n", file->convbuf); + } + break; + } + return 0; +} + +int process(struct fiLes *files, int update, int backup) { + (void) update; (void) backup; + struct po_parser pb, *p = &pb; + char line[4096], conv[8192], *lb; + poparser_init(p, conv, sizeof(conv), process_line_callback, files); + while((lb = fgets(line, sizeof(line), files->po))) { + poparser_feed_line(p, lb, sizeof(line)); + } + poparser_finish(p); + return 0; +} + +void set_file(int out, char* fn, FILE** dest) { + if(streq(fn, "-")) { + *dest = out ? stdout : stdin; + } else { + *dest = fopen(fn, out ? "w" : "r"); + } + if(!*dest) { + perror("fopen"); + exit(1); + } +} + +int getbackuptype(char* str) { + if(!str || !*str || streq(str, "none") || streq(str, "off")) + return 0; + else if(streq(str, "t") || streq(str, "numbered")) + return 1; + else if(streq(str, "nil") || streq(str, "existing")) + return 2; + else if(streq(str, "simple") || streq(str, "never")) + return 3; + else syntax(); +} + +int main(int argc, char**argv) { + if(argc == 1) syntax(); + int arg = 1; + struct expect { + int out; + int po; + int pot; + int compend; + } expect_fn = { + .out = 0, + .po = 1, + .pot = 0, + .compend = 0, + }; + struct fiLes files = {0,0,0,0,1,0}; + char* backup_suffix = getenv("SIMPLE_BACKUP_SUFFIX"); + if(!backup_suffix) backup_suffix = "~"; + int update = 0; + int backup = getbackuptype(getenv("VERSION_CONTROL")); + char* dest; + set_file(1, "-", &files.out); +#define A argv[arg] + for(; arg < argc; arg++) { + if(A[0] == '-') { + if(A[1] == '-') { + if( + streq(A+2, "strict") || + streq(A+2, "properties-input") || + streq(A+2, "properties-output") || + streq(A+2, "stringtable-input") || + streq(A+2, "stringtable-output") || + streq(A+2, "no-fuzzy-matching") || + streq(A+2, "multi-domain") || + streq(A+2, "previous") || + streq(A+2, "escape") || + streq(A+2, "no-escape") || + streq(A+2, "force-po") || + streq(A+2, "indent") || + streq(A+2, "add-location") || + streq(A+2, "no-location") || + streq(A+2, "no-wrap") || + streq(A+2, "sort-output") || + streq(A+2, "sort-by-file") || + + strstarts(A+2, "lang=") || + strstarts(A+2, "color") || // can be --color or --color=xxx + strstarts(A+2, "style=") || + strstarts(A+2, "width=") || + + streq(A+2, "verbose") || + streq(A+2, "quiet") || + streq(A+2, "silent") ) { + } else if(streq(A+2, "version")) { + version(); + } else if((dest = strstarts(A+2, "output-file="))) { + set_file(1, dest, &files.out); + } else if((dest = strstarts(A+2, "compendium="))) { + set_file(1, dest, &files.compend); + } else if((dest = strstarts(A+2, "suffix="))) { + backup_suffix = dest; + } else if((dest = strstarts(A+2, "directory="))) { + goto nodir; + } else if((dest = strstarts(A+2, "backup"))) { + if (*dest == '=') + backup = getbackuptype(dest + 1); + else + backup = 0; + } else if(streq(A+2, "update")) { + set_update: + update = 1; + } else if(streq(A+2, "help")) syntax(); + + } else if(streq(A + 1, "o")) { + expect_fn.out = 1; + } else if(streq(A + 1, "C")) { + expect_fn.compend = 1; + } else if(streq(A + 1, "U")) { + goto set_update; + } else if( + streq(A+1, "m") || + streq(A+1, "N") || + streq(A+1, "P") || + streq(A+1, "e") || + streq(A+1, "E") || + streq(A+1, "i") || + streq(A+1, "p") || + streq(A+1, "w") || + streq(A+1, "s") || + streq(A+1, "F") || + streq(A+1, "V") || + streq(A+1, "q") + ) { + + } else if (streq(A+1, "v")) { + version(); + } else if (streq(A+1, "D")) { + // no support for -D at this time + nodir: + fprintf(stderr, "EINVAL\n"); + exit(1); + } else if (streq(A+1, "h")) { + syntax(); + } else if(expect_fn.out) { + if(update && streq(A, "/dev/null")) return 0; + set_file(1, A, &files.out); + expect_fn.out = 0; + } else if(expect_fn.compend) { + set_file(1, A, &files.compend); + expect_fn.compend = 0; + } else if(expect_fn.po) { + if(update && streq(A, "/dev/null")) return 0; + set_file(0, A, &files.po); + expect_fn.po = 0; + expect_fn.pot = 1; + } else if(expect_fn.pot) { + if(update && streq(A, "/dev/null")) return 0; + set_file(0, A, &files.pot); + expect_fn.pot = 0; + } + + } else if(expect_fn.out) { + if(update && streq(A, "/dev/null")) return 0; + set_file(1, A, &files.out); + expect_fn.out = 0; + } else if(expect_fn.compend) { + set_file(1, A, &files.compend); + expect_fn.compend = 0; + } else if(expect_fn.po) { + if(update && streq(A, "/dev/null")) return 0; + set_file(0, A, &files.po); + expect_fn.po = 0; + expect_fn.pot = 1; + } else if(expect_fn.pot) { + if(update && streq(A, "/dev/null")) return 0; + set_file(0, A, &files.pot); + expect_fn.pot = 0; + } + } + if(update) { + fprintf(stdout, "warning: update functionality unimplemented\n"); + return 0; + } + if(!files.out || !files.po || !files.pot) syntax(); + int ret = process(&files, update, backup); + FILE** filearr = (FILE**) &files; + unsigned i; + for (i = 0; i < 4; i++) { + if(filearr[i] != NULL) fflush(filearr[i]); + } + for (i = 0; i < 4; i++) { + if( + filearr[i] != NULL && + filearr[i] != stdout && + filearr[i] != stdin + ) fclose(filearr[i]); + } + return ret; +} diff --git a/utils/poparser.c b/utils/poparser.c new file mode 100644 index 000000000..236a1d995 --- /dev/null +++ b/utils/poparser.c @@ -0,0 +1,218 @@ +#include +#include +#include +#include +#include +#include "poparser.h" +#include "StringEscape.h" + +#define streq(A, B) (!strcmp(A, B)) +#define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1))) + +static unsigned fuzzymark = 0; +static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* end, size_t *stringstart) { + enum po_entry result_type; + char *x, *y; + size_t start = (size_t) lp; + while(isspace(*lp) && lp < end) lp++; + if(lp[0] == '#') { + char *s; + if((s = strstr(lp, ", fuzzy"))) { + if(fuzzymark != 0) fuzzymark++; + else fuzzymark=2; + } + inv: + *stringstart = 0; + return pe_invalid; + } else if((y = strstarts(lp, "msg"))) { + if((x = strstarts(y, "id")) && isspace(*x)) + result_type = pe_msgid; + else if ((x = strstarts(y, "id_plural")) && isspace(*x)) + result_type = pe_plural; + else if ((x = strstarts(y, "ctxt")) && isspace(*x)) + result_type = pe_ctxt; + else if ((x = strstarts(y, "str")) && (isspace(*x) || + (x[0] == '[' && (x[1]-'0') < info->nplurals && x[2] == ']' && (x += 3) && isspace(*x)))) + result_type = pe_msgstr; + else + goto inv; + while(isspace(*x) && x < end) x++; + if(*x != '"') abort(); + conv: + *stringstart = ((size_t) x - start) + 1; + } else if(lp[0] == '"') { + if(!(*info->charset)) { + if((x = strstr(lp, "charset="))) { + // charset=xxx\\n + int len = strlen(x+=8) - 4; + assert(len <= 11); + if(strncmp(x, "UTF-8", 5) && strncmp(x, "utf-8", 5)) { + memcpy(info->charset, x, len); + info->charset[len] = 0; + } + } + } + if((x = strstr(lp, "nplurals="))) + if(*(x+9) - '0') + info->nplurals = *(x+9) - '0'; + result_type = pe_str; + x = lp; + goto conv; + } else { + goto inv; + } + return result_type; +} + +/* expects a pointer to the first char after a opening " in a string, + * converts the string into convbuf, and returns the length of that string */ +static size_t get_length_and_convert(struct po_info *info, char* x, char* end, char* convbuf, size_t convbuflen) { + size_t result = 0; + char* e = x + strlen(x); + assert(e > x && e < end && *e == 0); + e--; + while(isspace(*e)) e--; + if(*e != '"') abort(); + *e = 0; + char *s; + if(*info->charset) { + iconv_t ret = iconv_open("UTF-8", info->charset); + if(ret != (iconv_t)-1) { + size_t a=end-x, b=a*4; + char mid[b], *midp=mid; + iconv(iconv_open("UTF-8", info->charset), &x, &a, &midp, &b); + if((s = strstr(mid, "charset="))) + memcpy(s+8, "UTF-8\\n\0", 8); + result = unescape(mid, convbuf, convbuflen); + // iconv doesnt recognize the encoding + } else result = unescape(x, convbuf, convbuflen); + } else result = unescape(x, convbuf, convbuflen); + return result; +} + + +void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_callback cb, void* cbdata) { + p->buf = workbuf; + p->bufsize = bufsize; + p->cb = cb; + p->prev_type = pe_invalid; + p->prev_rtype = pe_invalid; + p->curr_len = 0; + p->cbdata = cbdata; + *(p->info.charset) = 0; + // nplurals = 2 by default + p->info.nplurals = 2; + fuzzymark = 0; +} + +enum lineactions { + la_incr, + la_proc, + la_abort, + la_nop, + la_max, +}; + +/* return 0 on success */ +int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) { + char *convbuf = p->buf; + size_t convbuflen = p->bufsize; + size_t strstart; + + static const enum lineactions action_tbl[pe_max][pe_max] = { + // pe_str will never be set as curr_type + [pe_str] = { + [pe_str] = la_abort, + [pe_msgid] = la_abort, + [pe_ctxt] = la_abort, + [pe_plural] = la_abort, + [pe_msgstr] = la_abort, + [pe_invalid] = la_abort, + }, + [pe_msgid] = { + [pe_str] = la_incr, + [pe_msgid] = la_abort, + [pe_ctxt] = la_abort, + [pe_plural] = la_proc, + [pe_msgstr] = la_proc, + [pe_invalid] = la_proc, + }, + [pe_ctxt] = { + [pe_str] = la_incr, + [pe_msgid] = la_proc, + [pe_ctxt] = la_abort, + [pe_plural] = la_abort, + [pe_msgstr] = la_abort, + [pe_invalid] = la_proc, + }, + [pe_plural] = { + [pe_str] = la_incr, + [pe_msgid] = la_abort, + [pe_ctxt] = la_abort, + [pe_plural] = la_abort, + [pe_msgstr] = la_proc, + [pe_invalid] = la_proc, + }, + [pe_msgstr] = { + [pe_str] = la_incr, + [pe_msgid] = la_proc, + [pe_ctxt] = la_proc, + [pe_plural] = la_abort, + [pe_msgstr] = la_proc, + [pe_invalid] = la_proc, + }, + [pe_invalid] = { + [pe_str] = la_nop, + [pe_msgid] = la_incr, + [pe_ctxt] = la_incr, + [pe_plural] = la_nop, + [pe_msgstr] = la_nop, + [pe_invalid] = la_nop, + }, + }; + + enum po_entry type; + + type = get_type_and_start(&p->info, line, line + buflen, &strstart); + if(p->prev_rtype != pe_invalid && action_tbl[p->prev_rtype][type] == la_abort) + abort(); + if(type != pe_invalid && type != pe_str) + p->prev_rtype = type; + if(fuzzymark) { + if(type == pe_ctxt && fuzzymark == 1) fuzzymark--; + if(type == pe_msgid) fuzzymark--; + if(fuzzymark > 0) return 0; + } + switch(action_tbl[p->prev_type][type]) { + case la_incr: + assert(type == pe_msgid || type == pe_msgstr || type == pe_str || type == pe_plural || pe_ctxt); + p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len); + break; + case la_proc: + assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr || p->prev_type == pe_plural || p->prev_type == pe_ctxt); + p->info.text = convbuf; + p->info.textlen = p->curr_len; + p->info.type = p->prev_type; + p->cb(&p->info, p->cbdata); + if(type != pe_invalid) + p->curr_len = get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf, convbuflen); + else + p->curr_len = 0; + break; + case la_nop: + break; + case la_abort: + default: + abort(); + // todo : return error code + } + if(type != pe_str) { + p->prev_type = type; + } + return 0; +} + +int poparser_finish(struct po_parser *p) { + char empty[4] = ""; + return poparser_feed_line(p, empty, sizeof(empty)); +} diff --git a/utils/poparser.h b/utils/poparser.h new file mode 100644 index 000000000..29b7b16e0 --- /dev/null +++ b/utils/poparser.h @@ -0,0 +1,41 @@ +#ifndef POPARSER_H +#define POPARSER_H +#include + +enum po_entry { + pe_msgid = 0, + pe_plural, + pe_ctxt, + pe_msgstr, + pe_maxstr, + pe_str = pe_maxstr, + pe_invalid, + pe_max, +}; + +struct po_info { + enum po_entry type; + char *text; + char charset[12]; + unsigned int nplurals; + size_t textlen; +}; + +typedef int (*poparser_callback)(struct po_info* info, void* user); + +struct po_parser { + struct po_info info; + char *buf; + size_t bufsize; + enum po_entry prev_type; + enum po_entry prev_rtype; + unsigned curr_len; + poparser_callback cb; + void *cbdata; +}; + +void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_callback cb, void* cbdata); +int poparser_feed_line(struct po_parser *p, char* line, size_t buflen); +int poparser_finish(struct po_parser *p); + +#endif diff --git a/utils/xgettext b/utils/xgettext new file mode 100755 index 000000000..ebed620f0 --- /dev/null +++ b/utils/xgettext @@ -0,0 +1,108 @@ +#!/bin/sh +outputfile= +outputdir= +domain=messages + +spliteq() { + arg=$1 + echo "${arg#*=}" + #alternatives echo "$arg" | cut -d= -f2- + # or echo "$arg" | sed 's/[^=]*=//' +} + +syntax() { + printf "%s\n" "Usage: xgettext [OPTION] [INPUTFILE]..." + exit 1 +} + +show_version() { + printf "%s\n", "these are not (GNU gettext-tools) 99.9999.9999\n" + exit 0 +} + +while true ; do + case $1 in + #--files-from=*) readfile `spliteq "$1"`;; + #-f) expectfilefrom=1;; + --version) show_version;; + -V) show_version;; + --default-domain=*) domain=`spliteq "$1"` ;; + -d) shift ; domain="$1" ;; + --files-from=*) : ;; + -f) shift ;; + --directory=*) : ;; + -D) shift ;; + -o) shift ; outputfile="$1" ;; + --output=*) outputfile=`spliteq "$1"` ;; + --output-dir=*) outputdir=`spliteq "$1"` ;; + -p) shift ; outputdir=`spliteq "$1"` ;; + --language=*) : ;; + -L) shift ;; + --C) : ;; + --c++) : ;; + --from-code=*) : ;; + --join-existing) : ;; + -j) : ;; + --exclude-file=*) : ;; + -x) shift;; + --add-comments=*) : ;; + -cTAG) shift;; + --add-comments) : ;; + -c) : ;; + --extract-all) : ;; + -a) : ;; + --keyword=*) : ;; + -k*) : ;; + --keyword) : ;; + -k) : ;; + --flag=*) : ;; + --trigraphs) : ;; + -T) : ;; + --qt) : ;; + --kde) : ;; + --boost) : ;; + --debug) : ;; + --color) : ;; + --color=*) : ;; + --style=*) : ;; + --no-escape) : ;; + -e) : ;; + --escape) : ;; + -E) : ;; + --force-po) force=1 ;; + --indent) : ;; + -i) : ;; + --no-location) : ;; + --add-location) : ;; + -n) : ;; + --strict) : ;; + --properties-output) : ;; + --stringtable-output) : ;; + --width=*) : ;; + -w) : ;; + --no-wrap) : ;; + --sort-output) : ;; + -s) : ;; + --sort-by-file) : ;; + -F) : ;; + --omit-header) : ;; + --copyright-holder=*) : ;; + --foreign-user) : ;; + --package-name=*) : ;; + --package-version=*) : ;; + --msgid-bugs-address=*) : ;; + --msgstr-prefix*) : ;; + -m*) : ;; + --msgstr-suffix*) : ;; + -M*) : ;; + --help) syntax ;; + -h) syntax ;; + *) break ;; + esac + shift +done + +[ "$outputfile" = "-" ] && exit 0 +#[ -z "$outputdir" ] && outputdir=. +[ -z "$outputfile" ] && outputfile=${domain}.po +touch $outputdir/$outputfile -- cgit v1.2.3