diff options
Diffstat (limited to 'utils/msgfmt.c')
-rw-r--r-- | utils/msgfmt.c | 612 |
1 files changed, 612 insertions, 0 deletions
diff --git a/utils/msgfmt.c b/utils/msgfmt.c new file mode 100644 index 000000000..6256eed27 --- /dev/null +++ b/utils/msgfmt.c @@ -0,0 +1,612 @@ +/* msgfmt utility (C) 2012 rofl0r + * released under the MIT license, see LICENSE for details */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <assert.h> +#include "poparser.h" + +// in DO_NOTHING mode, we simply write the msgid twice, once for msgid, once for msgstr. +// TODO: maybe make it write "" instead of echoing the msgid. +//#define DO_NOTHING + +__attribute__((noreturn)) +static void syntax(void) { + fprintf(stdout, + "Usage: msgfmt [OPTION] filename.po ...\n"); + exit(1); +} + +__attribute__((noreturn)) +static void version(void) { + fprintf(stdout, + "these are not (GNU gettext-tools) 99.9999.9999\n"); + exit(0); +} + +#define streq(A, B) (!strcmp(A, B)) +#define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1))) + +struct mo_hdr { + unsigned magic; + int rev; + unsigned numstring; + unsigned off_tbl_org; + unsigned off_tbl_trans; + unsigned hash_tbl_size; + unsigned off_tbl_hash; +}; + +/* file layout: + header + strtable (lenghts/offsets) + transtable (lenghts/offsets) + [hashtable] + strings section + translations section */ + +const struct mo_hdr def_hdr = { + 0x950412de, + 0, + 0, + sizeof(struct mo_hdr), + 0, + 0, + 0, +}; + + +// pass 0: collect numbers of strings, calculate size and offsets for tables +// print header +// pass 1: create in-memory string tables +enum passes { + pass_first = 0, + pass_collect_sizes = pass_first, + pass_second, + pass_max, +}; + +struct strtbl { + unsigned len, off; +}; + +struct strmap { + struct strtbl str, *trans; +}; + +struct callbackdata { + enum passes pass; + unsigned off; + FILE* out; + unsigned msgidbuf1_len; + unsigned msgidbuf2_len; + unsigned pluralbuf1_len; + unsigned pluralbuf2_len; + unsigned ctxtbuf_len; + unsigned msgstr1_len; + unsigned msgstr2_len; + unsigned pluralstr_count; + unsigned string_maxlen; + char* msgidbuf1; + char* msgidbuf2; + char* pluralbuf1; + char* pluralbuf2; + char* msgctxtbuf; + char* msgstrbuf1; + char* msgstrbuf2; + unsigned priv_type; + unsigned priv_len; + unsigned num[pe_maxstr]; + unsigned len[pe_maxstr]; + struct strmap *strlist; + struct strtbl *translist; + char *strbuffer[pe_maxstr]; + unsigned stroff[pe_maxstr]; + unsigned curr[pe_maxstr]; +}; + +static struct callbackdata *cb_for_qsort; +int strmap_comp(const void *a_, const void *b_) { + const struct strmap *a = a_, *b = b_; + return strcmp(cb_for_qsort->strbuffer[0] + a->str.off, cb_for_qsort->strbuffer[0] + b->str.off); +} + +enum sysdep_types { + st_priu32 = 0, + st_priu64, + st_priumax, + st_max +}; + +static const char sysdep_str[][10]={ + [st_priu32] = "\x08<PRIu32>", + [st_priu64] = "\x08<PRIu64>", + [st_priumax] = "\x09<PRIuMAX>", +}; +static const char sysdep_repl[][8]={ + [st_priu32] = "\x02lu\0u", + [st_priu64] = "\x02lu\0llu", + [st_priumax] = "\x01ju" +}; +static const char *get_repl(enum sysdep_types type, unsigned nr) { + assert(nr < (unsigned)sysdep_repl[type][0]); + const char* p = sysdep_repl[type]+1; + while(nr--) p+=strlen(p)+1; + return p; +} +static void replace(char* text, unsigned textlen, const char* what, const char * with) { + char*p = text; + size_t la = strlen(what), li=strlen(with); + assert(la >= li); + for(p=text;textlen >= la;) { + if(!memcmp(p,what,la)) { + memcpy(p, with, li); + textlen -= la; + memmove(p+li,p+la,textlen+1); + p+=li; + } else { + p++; + textlen--; + } + } +} +static unsigned get_form(enum sysdep_types type, unsigned no, unsigned occurences[st_max]) { + unsigned i,divisor = 1; + for(i=type+1;i<st_max;i++) if(occurences[i]) divisor *= sysdep_repl[i][0]; + return (no/divisor)%sysdep_repl[type][0]; +} +static char** sysdep_transform(const char* text, unsigned textlen, unsigned *len, unsigned *count, int simulate) { + unsigned occurences[st_max] = {0}; + const char *p=text,*o; + unsigned i,j, l = textlen; + while(l && (o=strchr(p, '<'))) { + l-=o-p;p=o; + unsigned f = 0; + for(i=0;i<st_max;i++) + if(l>=(unsigned)sysdep_str[i][0] && !memcmp(p,sysdep_str[i]+1,sysdep_str[i][0])) { + occurences[i]++; + f=1; + p+=sysdep_str[i][0]; + l-=sysdep_str[i][0]; + break; + } + if(!f) p++,l--; + } + *count = 1; + for(i=0;i<st_max;i++) if(occurences[i]) *count *= sysdep_repl[i][0]; + l = textlen * *count; + for(i=0;i<*count;i++) for(j=0;j<st_max;j++) + if(occurences[j]) l-= occurences[j] * (sysdep_str[j][0] - strlen(get_repl(j, get_form(j, i, occurences)))); + *len = l+*count-1; + + char **out = 0; + if(!simulate) { + out = malloc((sizeof(char*)+textlen+1) * *count); + assert(out); + char *p = (void*)(out+*count); + for(i=0;i<*count;i++) { + out[i]=p; + memcpy(p, text, textlen+1); + p+=textlen+1; + } + for(i=0;i<*count;i++) for(j=0;j<st_max;j++) + if(occurences[j]) + replace(out[i], textlen, sysdep_str[j]+1, get_repl(j, get_form(j, i, occurences))); + } + + return out; +} + +static inline void writemsg(struct callbackdata *d) { + if(d->msgidbuf1_len != 0) { + if(!d->strlist[d->curr[pe_msgid]].str.off) + d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid]; + + if(d->ctxtbuf_len != 0) { + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len); + d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len; + d->stroff[pe_msgid]+=d->ctxtbuf_len; + } + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf1, d->msgidbuf1_len); + d->stroff[pe_msgid]+=d->msgidbuf1_len; + d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf1_len-1; + if(d->pluralbuf1_len != 0) { + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf1, d->pluralbuf1_len); + d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf1_len; + d->stroff[pe_msgid]+=d->pluralbuf1_len; + } + d->curr[pe_msgid]++; + } + if(d->msgidbuf2_len != 0) { + if(!d->strlist[d->curr[pe_msgid]].str.off) + d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid]; + + if(d->ctxtbuf_len != 0) { + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len); + d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len; + d->stroff[pe_msgid]+=d->ctxtbuf_len; + } + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf2, d->msgidbuf2_len); + d->stroff[pe_msgid]+=d->msgidbuf2_len; + d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf2_len-1; + if(d->pluralbuf2_len != 0) { + memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf2, d->pluralbuf2_len); + d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf2_len; + d->stroff[pe_msgid]+=d->pluralbuf2_len; + } + d->curr[pe_msgid]++; + } + + d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=0; +} + +static inline void writestr(struct callbackdata *d, struct po_info *info) { + // msgid xx; msgstr ""; is widely happened, it's invalid + + // https://github.com/sabotage-linux/gettext-tiny/issues/1 + // no invalid, when empty, check d->num[pe_msgid] + if(!d->pluralstr_count && d->num[pe_msgid] > 0) { + d->len[pe_msgid]-=d->msgidbuf1_len; + d->len[pe_msgid]-=d->msgidbuf2_len; + d->len[pe_plural]-=d->pluralbuf1_len; + d->len[pe_plural]-=d->pluralbuf2_len; + d->len[pe_ctxt]-=d->ctxtbuf_len; + d->len[pe_msgstr]--; + d->num[pe_msgid]--; + d->num[pe_msgstr]--; + d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0; + return; + } + + if(d->pluralstr_count && d->pluralstr_count <= info->nplurals) { + writemsg(d); + // plural <= nplurals is allowed + d->translist[d->curr[pe_msgstr]].len=d->msgstr1_len-1; + d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr]; + d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]]; + + memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf1, d->msgstr1_len); + d->stroff[pe_msgstr]+=d->msgstr1_len; + d->curr[pe_msgstr]++; + + if(d->msgstr2_len) { + d->translist[d->curr[pe_msgstr]].len=d->msgstr2_len-1; + d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr]; + d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]]; + + memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf2, d->msgstr2_len); + d->stroff[pe_msgstr]+=d->msgstr2_len; + d->curr[pe_msgstr]++; + } + + d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0; + } +} + +int process_line_callback(struct po_info* info, void* user) { + struct callbackdata *d = (struct callbackdata *) user; + assert(info->type == pe_msgid || info->type == pe_ctxt || info->type == pe_msgstr || info->type == pe_plural); + char **sysdeps; + unsigned len, count, i, l; + switch(d->pass) { + case pass_collect_sizes: + sysdep_transform(info->text, info->textlen, &len, &count, 1); + d->num[info->type] += count; + if(info->type == pe_msgid && count == 2 && d->priv_type == pe_ctxt) { + // ctxt meets msgid with sysdeps, multiply num and len to suit it + d->len[pe_ctxt] += d->priv_len +1; + d->num[pe_ctxt]++; + } + if(count != 1 && info->type == pe_ctxt) { + // except msgid, str, plural, all other types should not have sysdeps + abort(); + } + + d->priv_type = info->type; + d->priv_len = len; + d->len[info->type] += len +1; + + if(len+1 > d->string_maxlen) + d->string_maxlen = len+1; + break; + case pass_second: + sysdeps = sysdep_transform(info->text, info->textlen, &len, &count, 0); + for(i=0;i<count;i++) { + l = strlen(sysdeps[i]); + assert(l+1 <= d->string_maxlen); + if(info->type == pe_msgid) { + if(i==0 && d->msgidbuf1_len) + writestr(d, info); + + // just copy, it's written down when writemsg() + if(i==0) { + memcpy(d->msgidbuf1, sysdeps[i], l+1); + d->msgidbuf1_len = l+1; + } else { + memcpy(d->msgidbuf2, sysdeps[i], l+1); + d->msgidbuf2_len = l+1; + } + } else if(info->type == pe_plural) { + if(i==0) { + memcpy(d->pluralbuf1, sysdeps[i], l+1); + d->pluralbuf1_len = l+1; + } else { + memcpy(d->pluralbuf2, sysdeps[i], l+1); + d->pluralbuf2_len = l+1; + } + } else if(info->type == pe_ctxt) { + writestr(d, info); + d->ctxtbuf_len = l+1; + memcpy(d->msgctxtbuf, sysdeps[i], l); + d->msgctxtbuf[l] = 0x4;//EOT + } else { + // just copy, it's written down when writestr() + if(l) { + if(i==0) { + memcpy(&d->msgstrbuf1[d->msgstr1_len], sysdeps[i], l+1); + d->msgstr1_len += l+1; + d->pluralstr_count++; + } else { + // sysdeps exist + memcpy(&d->msgstrbuf2[d->msgstr2_len], sysdeps[i], l+1); + d->msgstr2_len += l+1; + } + } + } + } + free(sysdeps); + break; + default: + abort(); + } + return 0; +} + +int process(FILE *in, FILE *out) { + struct mo_hdr mohdr = def_hdr; + char line[4096]; char *lp; + char convbuf[16384]; + + struct callbackdata d = { + .num = { + [pe_msgid] = 0, + [pe_msgstr] = 0, + [pe_plural] = 0, + [pe_ctxt] = 0, + }, + .len = { + [pe_msgid] = 0, + [pe_msgstr] = 0, + [pe_plural] = 0, + [pe_ctxt] = 0, + }, + .off = 0, + .out = out, + .pass = pass_first, + .ctxtbuf_len = 0, + .pluralbuf1_len = 0, + .pluralbuf2_len = 0, + .msgidbuf1_len = 0, + .msgidbuf2_len = 0, + .msgstr1_len = 0, + .msgstr2_len = 0, + .pluralstr_count = 0, + .string_maxlen = 0, + }; + + struct po_parser pb, *p = &pb; + + mohdr.off_tbl_trans = mohdr.off_tbl_org; + for(d.pass = pass_first; d.pass <= pass_second; d.pass++) { + if(d.pass == pass_second) { + // start of second pass: + // ensure we dont output when there's no strings at all + if(d.num[pe_msgid] == 0) { + return 1; + } + + // check that data gathered in first pass is consistent + if((d.num[pe_msgstr] < d.num[pe_msgid]) || (d.num[pe_msgstr] > (d.num[pe_msgid] + d.num[pe_plural] * (p->info.nplurals - 1)))) { + // one should actually abort here, + // but gnu gettext simply writes an empty .mo and returns success. + //abort(); + fprintf(stderr, "warning: mismatch of msgid/msgstr count, writing empty .mo file\n"); + d.num[pe_msgid] = 0; + return 0; + } + + d.msgidbuf1 = calloc(d.string_maxlen*5+2*d.string_maxlen*p->info.nplurals, 1); + d.msgidbuf2 = d.msgidbuf1 + d.string_maxlen; + d.pluralbuf1 = d.msgidbuf2 + d.string_maxlen; + d.pluralbuf2 = d.pluralbuf1 + d.string_maxlen; + d.msgctxtbuf = d.pluralbuf2 + d.string_maxlen; + d.msgstrbuf1 = d.msgctxtbuf + d.string_maxlen; + d.msgstrbuf2 = d.msgstrbuf1 + d.string_maxlen*p->info.nplurals; + + d.strlist = calloc(d.num[pe_msgid] * sizeof(struct strmap), 1); + d.translist = calloc(d.num[pe_msgstr] * sizeof(struct strtbl), 1); + d.strbuffer[pe_msgid] = calloc(d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1); + d.strbuffer[pe_msgstr] = calloc(d.len[pe_msgstr], 1); + d.stroff[pe_msgid] = d.stroff[pe_msgstr] = 0; + assert(d.msgidbuf1 && d.strlist && d.translist && d.strbuffer[pe_msgid] && d.strbuffer[pe_msgstr]); + } + + poparser_init(p, convbuf, sizeof(convbuf), process_line_callback, &d); + + while((lp = fgets(line, sizeof(line), in))) { + poparser_feed_line(p, lp, sizeof(line)); + } + poparser_finish(p); + if(d.pass == pass_second) + writestr(&d, &p->info); + + if(d.pass == pass_second) { + // calculate header fields from len and num arrays + mohdr.numstring = d.num[pe_msgid]; + mohdr.off_tbl_org = sizeof(struct mo_hdr); + mohdr.off_tbl_trans = mohdr.off_tbl_org + d.num[pe_msgid] * (sizeof(unsigned)*2); + // set offset startvalue + d.off = mohdr.off_tbl_trans + d.num[pe_msgid] * (sizeof(unsigned)*2); + } + fseek(in, 0, SEEK_SET); + } + + cb_for_qsort = &d; + qsort(d.strlist, d.num[pe_msgid], sizeof (struct strmap), strmap_comp); + unsigned i; + + // print header + fwrite(&mohdr, sizeof(mohdr), 1, out); + for(i = 0; i < d.num[pe_msgid]; i++) { + d.strlist[i].str.off += d.off; + fwrite(&d.strlist[i].str, sizeof(struct strtbl), 1, d.out); + } + for(i = 0; i < d.num[pe_msgid]; i++) { + d.strlist[i].trans->off += d.off + d.len[pe_msgid] + d.len[pe_plural] + d.len[pe_ctxt]; + fwrite(d.strlist[i].trans, sizeof(struct strtbl), 1, d.out); + } + fwrite(d.strbuffer[pe_msgid], d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1, d.out); + fwrite(d.strbuffer[pe_msgstr], d.len[pe_msgstr], 1, d.out); + + return 0; +} + + +void set_file(int out, char* fn, FILE** dest) { + if(streq(fn, "-")) { + if(out) { + *dest = stdout; + } else { + char b[4096]; + size_t n=0; + FILE* tmpf = tmpfile(); + if(!tmpf) + perror("tmpfile"); + + while((n=fread(b, sizeof(*b), sizeof(b), stdin)) > 0) + fwrite(b, sizeof(*b), n, tmpf); + + fseek(tmpf, 0, SEEK_SET); + *dest = tmpf; + } + } else { + *dest = fopen(fn, out ? "w" : "r"); + } + if(!*dest) { + perror("fopen"); + exit(1); + } +} + +int main(int argc, char**argv) { + if(argc == 1) syntax(); + int arg = 1; + FILE *out = NULL; + FILE *in = NULL; + int expect_in_fn = 1; + char* locale = NULL; + char* dest = NULL; +#define A argv[arg] + for(; arg < argc; arg++) { + if(A[0] == '-') { + if(A[1] == '-') { + if( + streq(A+2, "java") || + streq(A+2, "java2") || + streq(A+2, "csharp") || + streq(A+2, "csharp-resources") || + streq(A+2, "tcl") || + streq(A+2, "qt") || + streq(A+2, "strict") || + streq(A+2, "properties-input") || + streq(A+2, "stringtable-input") || + streq(A+2, "use-fuzzy") || + strstarts(A+2, "alignment=") || + streq(A+2, "check") || + streq(A+2, "check-format") || + streq(A+2, "check-header") || + streq(A+2, "check-domain") || + streq(A+2, "check-compatibility") || + streq(A+2, "check-accelerators") || + streq(A+2, "no-hash") || + streq(A+2, "verbose") || + streq(A+2, "statistics") || + strstarts(A+2, "check-accelerators=") || + strstarts(A+2, "resource=") + ) { + } else if((dest = strstarts(A+2, "locale="))) { + locale = dest; + } else if((dest = strstarts(A+2, "output-file="))) { + set_file(1, dest, &out); + } else if(streq(A+2, "version")) { + version(); + } else if(streq(A+2, "help")) { + syntax(); + } else if (expect_in_fn) { + set_file(0, A, &in); + expect_in_fn = 0; + } + } else if(streq(A + 1, "o")) { + arg++; + dest = A; + set_file(1, A, &out); + } else if( + streq(A+1, "j") || + streq(A+1, "r") || + streq(A+1, "P") || + streq(A+1, "f") || + streq(A+1, "a") || + streq(A+1, "c") || + streq(A+1, "v") || + streq(A+1, "C") + ) { + } else if (streq(A+1, "V")) { + version(); + } else if (streq(A+1, "h")) { + syntax(); + } else if (streq(A+1, "l")) { + arg++; + locale = A; + } else if (streq(A+1, "d")) { + arg++; + dest = A; + } else if (expect_in_fn) { + set_file(0, A, &in); + expect_in_fn = 0; + } + } else if (expect_in_fn) { + set_file(0, A, &in); + expect_in_fn = 0; + } + } + + if (locale != NULL && dest != NULL) { + int sz = snprintf(NULL, 0, "%s/%s.msg", dest, locale); + char msg[sz+1]; + snprintf(msg, sizeof(msg), "%s/%s.msg", dest, locale); + FILE *fp = fopen(msg, "w"); + if (fp) { + fclose(fp); + return 0; + } else return 1; + } + + if(out == NULL) { + dest = "messages.mo"; + set_file(1, "messages.mo", &out); + } + + if(in == NULL || out == NULL) { + return 1; + } + int ret = process(in, out); + fflush(in); fflush(out); + if(in != stdin) fclose(in); + if(out != stdout) fclose(out); + + if (ret == 1) { + return remove(dest); + } + return ret; +} |