/* msgfmt utility (C) 2012 rofl0r * released under the MIT license, see LICENSE for details */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <ctype.h> #include <assert.h> #include "poparser.h" // in DO_NOTHING mode, we simply write the msgid twice, once for msgid, once for msgstr. // TODO: maybe make it write "" instead of echoing the msgid. //#define DO_NOTHING __attribute__((noreturn)) static void syntax(void) { fprintf(stdout, "Usage: msgfmt [OPTION] filename.po ...\n"); exit(1); } __attribute__((noreturn)) static void version(void) { fprintf(stdout, "these are not (GNU gettext-tools) 99.9999.9999\n"); exit(0); } #define streq(A, B) (!strcmp(A, B)) #define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1))) struct mo_hdr { unsigned magic; int rev; unsigned numstring; unsigned off_tbl_org; unsigned off_tbl_trans; unsigned hash_tbl_size; unsigned off_tbl_hash; }; /* file layout: header strtable (lenghts/offsets) transtable (lenghts/offsets) [hashtable] strings section translations section */ const struct mo_hdr def_hdr = { 0x950412de, 0, 0, sizeof(struct mo_hdr), 0, 0, 0, }; // pass 0: collect numbers of strings, calculate size and offsets for tables // print header // pass 1: create in-memory string tables enum passes { pass_first = 0, pass_collect_sizes = pass_first, pass_second, pass_max, }; struct strtbl { unsigned len, off; }; struct strmap { struct strtbl str, *trans; }; struct callbackdata { enum passes pass; unsigned off; FILE* out; unsigned msgidbuf1_len; unsigned msgidbuf2_len; unsigned pluralbuf1_len; unsigned pluralbuf2_len; unsigned ctxtbuf_len; unsigned msgstr1_len; unsigned msgstr2_len; unsigned pluralstr_count; unsigned string_maxlen; char* msgidbuf1; char* msgidbuf2; char* pluralbuf1; char* pluralbuf2; char* msgctxtbuf; char* msgstrbuf1; char* msgstrbuf2; unsigned priv_type; unsigned priv_len; unsigned num[pe_maxstr]; unsigned len[pe_maxstr]; struct strmap *strlist; struct strtbl *translist; char *strbuffer[pe_maxstr]; unsigned stroff[pe_maxstr]; unsigned curr[pe_maxstr]; }; static struct callbackdata *cb_for_qsort; int strmap_comp(const void *a_, const void *b_) { const struct strmap *a = a_, *b = b_; return strcmp(cb_for_qsort->strbuffer[0] + a->str.off, cb_for_qsort->strbuffer[0] + b->str.off); } enum sysdep_types { st_priu32 = 0, st_priu64, st_priumax, st_max }; static const char sysdep_str[][10]={ [st_priu32] = "\x08<PRIu32>", [st_priu64] = "\x08<PRIu64>", [st_priumax] = "\x09<PRIuMAX>", }; static const char sysdep_repl[][8]={ [st_priu32] = "\x02lu\0u", [st_priu64] = "\x02lu\0llu", [st_priumax] = "\x01ju" }; static const char *get_repl(enum sysdep_types type, unsigned nr) { assert(nr < (unsigned)sysdep_repl[type][0]); const char* p = sysdep_repl[type]+1; while(nr--) p+=strlen(p)+1; return p; } static void replace(char* text, unsigned textlen, const char* what, const char * with) { char*p = text; size_t la = strlen(what), li=strlen(with); assert(la >= li); for(p=text;textlen >= la;) { if(!memcmp(p,what,la)) { memcpy(p, with, li); textlen -= la; memmove(p+li,p+la,textlen+1); p+=li; } else { p++; textlen--; } } } static unsigned get_form(enum sysdep_types type, unsigned no, unsigned occurences[st_max]) { unsigned i,divisor = 1; for(i=type+1;i<st_max;i++) if(occurences[i]) divisor *= sysdep_repl[i][0]; return (no/divisor)%sysdep_repl[type][0]; } static char** sysdep_transform(const char* text, unsigned textlen, unsigned *len, unsigned *count, int simulate) { unsigned occurences[st_max] = {0}; const char *p=text,*o; unsigned i,j, l = textlen; while(l && (o=strchr(p, '<'))) { l-=o-p;p=o; unsigned f = 0; for(i=0;i<st_max;i++) if(l>=(unsigned)sysdep_str[i][0] && !memcmp(p,sysdep_str[i]+1,sysdep_str[i][0])) { occurences[i]++; f=1; p+=sysdep_str[i][0]; l-=sysdep_str[i][0]; break; } if(!f) p++,l--; } *count = 1; for(i=0;i<st_max;i++) if(occurences[i]) *count *= sysdep_repl[i][0]; l = textlen * *count; for(i=0;i<*count;i++) for(j=0;j<st_max;j++) if(occurences[j]) l-= occurences[j] * (sysdep_str[j][0] - strlen(get_repl(j, get_form(j, i, occurences)))); *len = l+*count-1; char **out = 0; if(!simulate) { out = malloc((sizeof(char*)+textlen+1) * *count); assert(out); char *p = (void*)(out+*count); for(i=0;i<*count;i++) { out[i]=p; memcpy(p, text, textlen+1); p+=textlen+1; } for(i=0;i<*count;i++) for(j=0;j<st_max;j++) if(occurences[j]) replace(out[i], textlen, sysdep_str[j]+1, get_repl(j, get_form(j, i, occurences))); } return out; } static inline void writemsg(struct callbackdata *d) { if(d->msgidbuf1_len != 0) { if(!d->strlist[d->curr[pe_msgid]].str.off) d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid]; if(d->ctxtbuf_len != 0) { memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len); d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len; d->stroff[pe_msgid]+=d->ctxtbuf_len; } memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf1, d->msgidbuf1_len); d->stroff[pe_msgid]+=d->msgidbuf1_len; d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf1_len-1; if(d->pluralbuf1_len != 0) { memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf1, d->pluralbuf1_len); d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf1_len; d->stroff[pe_msgid]+=d->pluralbuf1_len; } d->curr[pe_msgid]++; } if(d->msgidbuf2_len != 0) { if(!d->strlist[d->curr[pe_msgid]].str.off) d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid]; if(d->ctxtbuf_len != 0) { memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len); d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len; d->stroff[pe_msgid]+=d->ctxtbuf_len; } memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf2, d->msgidbuf2_len); d->stroff[pe_msgid]+=d->msgidbuf2_len; d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf2_len-1; if(d->pluralbuf2_len != 0) { memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf2, d->pluralbuf2_len); d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf2_len; d->stroff[pe_msgid]+=d->pluralbuf2_len; } d->curr[pe_msgid]++; } d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=0; } static inline void writestr(struct callbackdata *d, struct po_info *info) { // msgid xx; msgstr ""; is widely happened, it's invalid // https://github.com/sabotage-linux/gettext-tiny/issues/1 // no invalid, when empty, check d->num[pe_msgid] if(!d->pluralstr_count && d->num[pe_msgid] > 0) { d->len[pe_msgid]-=d->msgidbuf1_len; d->len[pe_msgid]-=d->msgidbuf2_len; d->len[pe_plural]-=d->pluralbuf1_len; d->len[pe_plural]-=d->pluralbuf2_len; d->len[pe_ctxt]-=d->ctxtbuf_len; d->len[pe_msgstr]--; d->num[pe_msgid]--; d->num[pe_msgstr]--; d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0; return; } if(d->pluralstr_count && d->pluralstr_count <= info->nplurals) { writemsg(d); // plural <= nplurals is allowed d->translist[d->curr[pe_msgstr]].len=d->msgstr1_len-1; d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr]; d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]]; memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf1, d->msgstr1_len); d->stroff[pe_msgstr]+=d->msgstr1_len; d->curr[pe_msgstr]++; if(d->msgstr2_len) { d->translist[d->curr[pe_msgstr]].len=d->msgstr2_len-1; d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr]; d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]]; memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf2, d->msgstr2_len); d->stroff[pe_msgstr]+=d->msgstr2_len; d->curr[pe_msgstr]++; } d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0; } } int process_line_callback(struct po_info* info, void* user) { struct callbackdata *d = (struct callbackdata *) user; assert(info->type == pe_msgid || info->type == pe_ctxt || info->type == pe_msgstr || info->type == pe_plural); char **sysdeps; unsigned len, count, i, l; switch(d->pass) { case pass_collect_sizes: sysdep_transform(info->text, info->textlen, &len, &count, 1); d->num[info->type] += count; if(info->type == pe_msgid && count == 2 && d->priv_type == pe_ctxt) { // ctxt meets msgid with sysdeps, multiply num and len to suit it d->len[pe_ctxt] += d->priv_len +1; d->num[pe_ctxt]++; } if(count != 1 && info->type == pe_ctxt) { // except msgid, str, plural, all other types should not have sysdeps abort(); } d->priv_type = info->type; d->priv_len = len; d->len[info->type] += len +1; if(len+1 > d->string_maxlen) d->string_maxlen = len+1; break; case pass_second: sysdeps = sysdep_transform(info->text, info->textlen, &len, &count, 0); for(i=0;i<count;i++) { l = strlen(sysdeps[i]); assert(l+1 <= d->string_maxlen); if(info->type == pe_msgid) { if(i==0 && d->msgidbuf1_len) writestr(d, info); // just copy, it's written down when writemsg() if(i==0) { memcpy(d->msgidbuf1, sysdeps[i], l+1); d->msgidbuf1_len = l+1; } else { memcpy(d->msgidbuf2, sysdeps[i], l+1); d->msgidbuf2_len = l+1; } } else if(info->type == pe_plural) { if(i==0) { memcpy(d->pluralbuf1, sysdeps[i], l+1); d->pluralbuf1_len = l+1; } else { memcpy(d->pluralbuf2, sysdeps[i], l+1); d->pluralbuf2_len = l+1; } } else if(info->type == pe_ctxt) { writestr(d, info); d->ctxtbuf_len = l+1; memcpy(d->msgctxtbuf, sysdeps[i], l); d->msgctxtbuf[l] = 0x4;//EOT } else { // just copy, it's written down when writestr() if(l) { if(i==0) { memcpy(&d->msgstrbuf1[d->msgstr1_len], sysdeps[i], l+1); d->msgstr1_len += l+1; d->pluralstr_count++; } else { // sysdeps exist memcpy(&d->msgstrbuf2[d->msgstr2_len], sysdeps[i], l+1); d->msgstr2_len += l+1; } } } } free(sysdeps); break; default: abort(); } return 0; } int process(FILE *in, FILE *out) { struct mo_hdr mohdr = def_hdr; char line[4096]; char *lp; char convbuf[16384]; struct callbackdata d = { .num = { [pe_msgid] = 0, [pe_msgstr] = 0, [pe_plural] = 0, [pe_ctxt] = 0, }, .len = { [pe_msgid] = 0, [pe_msgstr] = 0, [pe_plural] = 0, [pe_ctxt] = 0, }, .off = 0, .out = out, .pass = pass_first, .ctxtbuf_len = 0, .pluralbuf1_len = 0, .pluralbuf2_len = 0, .msgidbuf1_len = 0, .msgidbuf2_len = 0, .msgstr1_len = 0, .msgstr2_len = 0, .pluralstr_count = 0, .string_maxlen = 0, }; struct po_parser pb, *p = &pb; mohdr.off_tbl_trans = mohdr.off_tbl_org; for(d.pass = pass_first; d.pass <= pass_second; d.pass++) { if(d.pass == pass_second) { // start of second pass: // ensure we dont output when there's no strings at all if(d.num[pe_msgid] == 0) { return 1; } // check that data gathered in first pass is consistent if((d.num[pe_msgstr] < d.num[pe_msgid]) || (d.num[pe_msgstr] > (d.num[pe_msgid] + d.num[pe_plural] * (p->info.nplurals - 1)))) { // one should actually abort here, // but gnu gettext simply writes an empty .mo and returns success. //abort(); fprintf(stderr, "warning: mismatch of msgid/msgstr count, writing empty .mo file\n"); d.num[pe_msgid] = 0; return 0; } d.msgidbuf1 = calloc(d.string_maxlen*5+2*d.string_maxlen*p->info.nplurals, 1); d.msgidbuf2 = d.msgidbuf1 + d.string_maxlen; d.pluralbuf1 = d.msgidbuf2 + d.string_maxlen; d.pluralbuf2 = d.pluralbuf1 + d.string_maxlen; d.msgctxtbuf = d.pluralbuf2 + d.string_maxlen; d.msgstrbuf1 = d.msgctxtbuf + d.string_maxlen; d.msgstrbuf2 = d.msgstrbuf1 + d.string_maxlen*p->info.nplurals; d.strlist = calloc(d.num[pe_msgid] * sizeof(struct strmap), 1); d.translist = calloc(d.num[pe_msgstr] * sizeof(struct strtbl), 1); d.strbuffer[pe_msgid] = calloc(d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1); d.strbuffer[pe_msgstr] = calloc(d.len[pe_msgstr], 1); d.stroff[pe_msgid] = d.stroff[pe_msgstr] = 0; assert(d.msgidbuf1 && d.strlist && d.translist && d.strbuffer[pe_msgid] && d.strbuffer[pe_msgstr]); } poparser_init(p, convbuf, sizeof(convbuf), process_line_callback, &d); while((lp = fgets(line, sizeof(line), in))) { poparser_feed_line(p, lp, sizeof(line)); } poparser_finish(p); if(d.pass == pass_second) writestr(&d, &p->info); if(d.pass == pass_second) { // calculate header fields from len and num arrays mohdr.numstring = d.num[pe_msgid]; mohdr.off_tbl_org = sizeof(struct mo_hdr); mohdr.off_tbl_trans = mohdr.off_tbl_org + d.num[pe_msgid] * (sizeof(unsigned)*2); // set offset startvalue d.off = mohdr.off_tbl_trans + d.num[pe_msgid] * (sizeof(unsigned)*2); } fseek(in, 0, SEEK_SET); } cb_for_qsort = &d; qsort(d.strlist, d.num[pe_msgid], sizeof (struct strmap), strmap_comp); unsigned i; // print header fwrite(&mohdr, sizeof(mohdr), 1, out); for(i = 0; i < d.num[pe_msgid]; i++) { d.strlist[i].str.off += d.off; fwrite(&d.strlist[i].str, sizeof(struct strtbl), 1, d.out); } for(i = 0; i < d.num[pe_msgid]; i++) { d.strlist[i].trans->off += d.off + d.len[pe_msgid] + d.len[pe_plural] + d.len[pe_ctxt]; fwrite(d.strlist[i].trans, sizeof(struct strtbl), 1, d.out); } fwrite(d.strbuffer[pe_msgid], d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1, d.out); fwrite(d.strbuffer[pe_msgstr], d.len[pe_msgstr], 1, d.out); return 0; } void set_file(int out, char* fn, FILE** dest) { if(streq(fn, "-")) { if(out) { *dest = stdout; } else { char b[4096]; size_t n=0; FILE* tmpf = tmpfile(); if(!tmpf) perror("tmpfile"); while((n=fread(b, sizeof(*b), sizeof(b), stdin)) > 0) fwrite(b, sizeof(*b), n, tmpf); fseek(tmpf, 0, SEEK_SET); *dest = tmpf; } } else { *dest = fopen(fn, out ? "w" : "r"); } if(!*dest) { perror("fopen"); exit(1); } } int main(int argc, char**argv) { if(argc == 1) syntax(); int arg = 1; FILE *out = NULL; FILE *in = NULL; int expect_in_fn = 1; char* locale = NULL; char* dest = NULL; #define A argv[arg] for(; arg < argc; arg++) { if(A[0] == '-') { if(A[1] == '-') { if( streq(A+2, "java") || streq(A+2, "java2") || streq(A+2, "csharp") || streq(A+2, "csharp-resources") || streq(A+2, "tcl") || streq(A+2, "qt") || streq(A+2, "strict") || streq(A+2, "properties-input") || streq(A+2, "stringtable-input") || streq(A+2, "use-fuzzy") || strstarts(A+2, "alignment=") || streq(A+2, "check") || streq(A+2, "check-format") || streq(A+2, "check-header") || streq(A+2, "check-domain") || streq(A+2, "check-compatibility") || streq(A+2, "check-accelerators") || streq(A+2, "no-hash") || streq(A+2, "verbose") || streq(A+2, "statistics") || strstarts(A+2, "check-accelerators=") || strstarts(A+2, "resource=") ) { } else if((dest = strstarts(A+2, "locale="))) { locale = dest; } else if((dest = strstarts(A+2, "output-file="))) { set_file(1, dest, &out); } else if(streq(A+2, "version")) { version(); } else if(streq(A+2, "help")) { syntax(); } else if (expect_in_fn) { set_file(0, A, &in); expect_in_fn = 0; } } else if(streq(A + 1, "o")) { arg++; dest = A; set_file(1, A, &out); } else if( streq(A+1, "j") || streq(A+1, "r") || streq(A+1, "P") || streq(A+1, "f") || streq(A+1, "a") || streq(A+1, "c") || streq(A+1, "v") || streq(A+1, "C") ) { } else if (streq(A+1, "V")) { version(); } else if (streq(A+1, "h")) { syntax(); } else if (streq(A+1, "l")) { arg++; locale = A; } else if (streq(A+1, "d")) { arg++; dest = A; } else if (expect_in_fn) { set_file(0, A, &in); expect_in_fn = 0; } } else if (expect_in_fn) { set_file(0, A, &in); expect_in_fn = 0; } } if (locale != NULL && dest != NULL) { int sz = snprintf(NULL, 0, "%s/%s.msg", dest, locale); char msg[sz+1]; snprintf(msg, sizeof(msg), "%s/%s.msg", dest, locale); FILE *fp = fopen(msg, "w"); if (fp) { fclose(fp); return 0; } else return 1; } if(out == NULL) { dest = "messages.mo"; set_file(1, "messages.mo", &out); } if(in == NULL || out == NULL) { return 1; } int ret = process(in, out); fflush(in); fflush(out); if(in != stdin) fclose(in); if(out != stdout) fclose(out); if (ret == 1) { return remove(dest); } return ret; }