diff options
author | Jacob Moody <moody@posixcafe.org> | 2022-07-17 14:52:11 +0000 |
---|---|---|
committer | Jacob Moody <moody@posixcafe.org> | 2022-07-17 14:52:11 +0000 |
commit | c147614656b9c1b4338099a69c35e42c1f4313a5 (patch) | |
tree | 7a74217a2dfc87b1896a7256d4a969227cd9f138 /sys/src/cmd/ktrans/main.c | |
parent | ccbabf1c16caa4917c97b8d8f5cadfb8759610b3 (diff) |
ktrans: 你好
This consolidates jisho and map lookups
to use the same structure and removes
the old jisho code.
Diffstat (limited to 'sys/src/cmd/ktrans/main.c')
-rw-r--r-- | sys/src/cmd/ktrans/main.c | 265 |
1 files changed, 180 insertions, 85 deletions
diff --git a/sys/src/cmd/ktrans/main.c b/sys/src/cmd/ktrans/main.c index e6c6be754..3757096ee 100644 --- a/sys/src/cmd/ktrans/main.c +++ b/sys/src/cmd/ktrans/main.c @@ -6,16 +6,26 @@ * okamoto@granite.cias.osakafu-u.ac.jp */ +/* + * A glossary on some of the Japanese vocabulary used: + * kana: syllabic letting, either hiragana(ひらがな) or katakana(カタカナ) + * kanji(漢字): borrowed characters, 楽 in 楽しい + * Okurigana(送り仮名): kana tail to kanji, しい in 楽しい + * Joshi(助詞): particle, は in 私は + * Jisho(辞書): dictionary + * kouho(候補): candidate + */ + #include <u.h> #include <libc.h> #include <bio.h> +#include "hash.h" #include "ktrans.h" -#include "jisho.h" #define LSIZE 256 Rune lbuf[LSIZE]; /* hiragana buffer for key input written by send() */ -Map *table = hira; /* default language conversion table */ +Hmap *table; uchar okurigana[LSIZE]; /* buffer for okurigana */ char okuri = 0; /* buffer/flag for capital input char */ int in, out; @@ -23,16 +33,10 @@ int llen, olen, joshi = 0; int natural = 1; /* not Japanese but English mode */ int changelang(int); -int dotrans(Dictionary*); +int dotrans(Hmap*); int nrune(char *); void send(uchar *, int); -Map *match(uchar *p, int *nc, Map *table); - -extern Dictionary *openQDIC(char *); -extern KouhoList *getKouhoHash(Dictionary*, char *); -extern KouhoList *getKouhoFile(DicList*, char *); -extern void freeQDIC(Dictionary*); -extern void selectKouho(KouhoList **, KouhoList*); +Hmap* opendict(Hmap *, char *); void kbdopen(void) @@ -89,6 +93,50 @@ kbdopen(void) exits(nil); } +Map signalmore = { + "_", nil, 1, +}; + +Hmap* +initmap(Map *m, int n) +{ + int i, j; + char buf[16]; + char *s; + Map prev; + Hmap *h; + + h = hmapalloc(n, sizeof(Map)); + for(i = 0; i < n; i++){ + if(m[i].roma == nil || m[i].roma[0] == '\0') + continue; + + //We mark all partial strings so we know when + //we have partial match when ingesting. + j = 2; + for(s = m[i].roma; *s && j <= sizeof buf; s++){ + snprint(buf, j, "%s", m[i].roma); + prev = m[i]; + if(hmapget(h, buf, &prev) == 0){ + if(prev.leadstomore == 1 && s[1] == '\0'){ + //confict; partial & valid input + prev = m[i]; + prev.leadstomore = 1; + free(hmapkey(h, buf)); + } + } + + if(s[1] == '\0'){ + hmapset(&h, strdup(buf), &prev, nil); + } else { + hmapset(&h, strdup(buf), &signalmore, nil); + } + j++; + } + } + return h; +} + void usage(void) { @@ -101,11 +149,11 @@ main(int argc, char *argv[]) { uchar *bp, *ep, buf[128]; - Map *mp; - int nchar, wantmore; + Map lkup, last; + int wantmore; int n, c; - char *dictname; - Dictionary *jisho; + char *jishoname, *zidianname; + Hmap *jisho, *zidian; ARGBEGIN{ default: usage(); @@ -113,9 +161,20 @@ main(int argc, char *argv[]) if(argc != 0) usage(); - if((dictname = getenv("jisho")) == nil) - dictname = "/lib/kanji.jisho"; - jisho = openQDIC(dictname); + if((jishoname = getenv("jisho")) == nil) + jishoname = "/lib/kanji.jisho"; + jisho = opendict(nil, jishoname); + + if((zidianname = getenv("zidian")) == nil) + zidianname = "/lib/hanzi.zidian"; + zidian = opendict(nil, zidianname); + + hira = table = initmap(mhira, nelem(mhira)); + kata = initmap(mkata, nelem(mkata)); + greek = initmap(mgreek, nelem(mgreek)); + cyril = initmap(mcyril, nelem(mcyril)); + hangul = initmap(mhangul, nelem(mhangul)); + last = (Map){nil, nil, -1}; kbdopen(); if(fork()) @@ -147,8 +206,8 @@ main(int argc, char *argv[]) wantmore = 0; if (*bp=='') { /* ^x read ktrans-jisho once more */ - freeQDIC(jisho); - jisho = openQDIC(dictname); + jisho = opendict(jisho, jishoname); + zidian = opendict(zidian, zidianname); llen = 0; olen = okuri = joshi = 0; wantmore=0; @@ -156,7 +215,10 @@ main(int argc, char *argv[]) continue; } if (*bp=='') { /* ^\ (start translation command) */ - c = dotrans(jisho); + if (table == hanzi) + c = dotrans(zidian); + else + c = dotrans(jisho); if (c) *bp = c; /* pointer to translated rune */ else @@ -167,11 +229,13 @@ main(int argc, char *argv[]) bp++; llen = 0; olen = okuri = joshi = 0; + last.kana = nil; continue; } if (changelang(*bp)) { /* change language mode OK */ bp++; olen = okuri = joshi = 0; + last.kana = nil; continue; } if (natural || *bp<=' ' || *bp>='{') { /* English mode but not ascii */ @@ -179,6 +243,7 @@ main(int argc, char *argv[]) int rlen = chartorune(&r, (char *)bp); send(bp, rlen); /* write bp to /dev/cons */ bp += rlen; + last.kana = nil; continue; } if (table == hira && (*bp >= 'A' && *bp <= 'Z') && (*(bp+1) < 'A' @@ -192,27 +257,33 @@ main(int argc, char *argv[]) joshi = 1; olen = 0; } - mp = match(bp, &nchar, table); - if (mp == 0) { - if (nchar>0) { /* match, longer possible */ - wantmore++; - break; - } - send(bp++, 1); /* alphabet in kana mode */ - } else { - send((uchar*)mp->kana, strlen(mp->kana)); - bp += nchar; + if(hmapget(table, (char*)bp, &lkup) < 0){ + if(last.kana != nil){ + send((uchar*)last.kana, strlen(last.kana)); + bp += strlen(last.roma); + } else + send(bp++, 1); + last.kana = nil; + break; + } + /* concatinations; only advance a single character */ + if(lkup.kana != nil && strstr("ッっ", lkup.kana)) + lkup.roma = "_"; + /* partial match */ + if(lkup.kana == nil || lkup.leadstomore == 1){ + if(lkup.kana != nil) + last = lkup; + + wantmore = 1; + break; } + last.kana = nil; + send((uchar*)lkup.kana, strlen(lkup.kana)); + bp += strlen(lkup.roma); } } } -int -min(int a, int b) -{ - return a<b? a: b; -} - /* * send UTF string (p) with length (n) to stdout * and write rune (r) in global lbuf[] buffer @@ -232,7 +303,9 @@ send(uchar *p, int n) llen -= 64; } - if (table!=hira || natural) + if(table != hira && table != hanzi) + return; + if(natural && table != hanzi) return; ep = p+n; @@ -253,49 +326,13 @@ send(uchar *p, int n) } } -/* - * Romaji to Hiragana/Katakana conversion - * romaji shoud be input as small letter - * returns the matched address in table, hira, kata, etc. - * nc: number of character (return value) - */ -Map * -match(uchar *p, int *nc, Map *table) -{ - register Map *longp = 0, *kp; - static char last; - int longest = 0; - - *nc = -1; - for (kp=table; kp->roma; kp++) { - if (*p == *kp->roma) { - int lr = strlen(kp->roma); - int len = min(lr, strlen((char *)p)); - if (strncmp(kp->roma, (char *)p, len)==0) { - if (len<lr) { - *nc = 1; - return 0; - } - if (len>longest) { - longest = len; - longp = kp; - } - } - } - } - if (longp) { - last = longp->roma[longest-1]; - *nc = longp->advance; - } - return longp; -} - int changelang(int c) { switch(c){ case '': /* ^t (English mode) */ natural = 1; + table = hira; llen = 0; return 1; break; @@ -334,23 +371,80 @@ changelang(int c) llen = 0; return 1; break; + + case '': /* ^c (Chinese mode) */ + natural = 1; + table = hanzi; + llen = 0; + return 1; + break; } return 0; } +Hmap* +opendict(Hmap *h, char *name) +{ + Biobuf *b; + char *p; + char *dot, *rest; + char *kouho[16]; + int i; + + b = Bopen(name, OREAD); + if(b == nil) + return nil; + + if(h == nil) + h = hmapalloc(8192, sizeof(kouho)); + else + hmapreset(h, 1); + while(p = Brdstr(b, '\n', 1)){ + if(p[0] == '\0' || p[0] == ';'){ + Err: + free(p); + continue; + } + dot = utfrune(p, '\t'); + if(dot == nil) + goto Err; + + *dot = '\0'; + rest = dot+1; + if(*rest == '\0') + goto Err; + + memset(kouho, 0, sizeof kouho); + i = 0; + while(i < nelem(kouho)-1 && (dot = utfrune(rest, ' '))){ + *dot = '\0'; + kouho[i++] = rest; + rest = dot+1; + } + if(i < nelem(kouho)-1) + kouho[i] = rest; + + /* key is the base pointer; overwrites clean up for us */ + hmapset(&h, p, kouho, nil); + } + Bterm(b); + return h; +} + /* * write translated kanji runes to stdout and return last character * if it's not ctl-\. if the last is ctl-\, proceed with * translation of the next kouho */ int -dotrans(Dictionary *dic) +dotrans(Hmap *dic) { Rune *res, r[1]; char v[1024], *p, tbuf[64], hirabuf[64]; int j, lastlen, nokouho = 0; char ch; - KouhoList *fstkouho, *currentkouho; + int i; + char *kouho[16]; if (llen==0) return 0; /* don't use kanji transform function */ @@ -375,15 +469,13 @@ dotrans(Dictionary *dic) if (okuri && joshi != 1) /* verb mode */ hirabuf[strlen(hirabuf) - 1] = '\0'; - if(!(fstkouho = getKouhoHash(dic, v))) { /* not found */ + if(hmapget(dic, v, kouho) < 0){ llen = olen = okuri = joshi = 0; okurigana[0] = 0; return 0; } - - currentkouho = fstkouho; - for(;;) { - p = currentkouho->kouhotop; /* p to the head of kanji kouho array */ + for(i = 0; i < nelem(kouho) && kouho[i] != nil; i++) { + p = kouho[i]; lastlen = nrune(tbuf); /* number of rune chars */ if (okuri && joshi != 1) /* verb mode */ @@ -407,10 +499,9 @@ dotrans(Dictionary *dic) exits(nil); if (ch == '') { /* if next input is ^\, once again */ - if(currentkouho->nextkouho != 0) { /* have next kouho */ + if(i+1 < nelem(kouho) && kouho[i+1] != nil) { /* have next kouho */ nokouho = 0; strcpy(tbuf, p); - currentkouho = currentkouho->nextkouho; if (okuri && joshi != 1) /* verb mode */ for (j=0; j<nrune(tbuf); j++) @@ -442,8 +533,12 @@ dotrans(Dictionary *dic) break; } } else { - if(!nokouho) /* learn the previous use of the kouho */ - selectKouho(&(fstkouho->dicitem->kouho), currentkouho); + if(!nokouho && i != 0){ /* learn the previous use of the kouho */ + p = kouho[0]; + kouho[0] = kouho[i]; + kouho[i] = p; + hmapset(&dic, hmapkey(dic, v), kouho, nil); + } olen = okuri = joshi = 0; okurigana[0] = 0; |