summaryrefslogtreecommitdiff
path: root/sys/src/cmd/ktrans/main.c
diff options
context:
space:
mode:
authorJacob Moody <moody@posixcafe.org>2022-07-17 14:52:11 +0000
committerJacob Moody <moody@posixcafe.org>2022-07-17 14:52:11 +0000
commitc147614656b9c1b4338099a69c35e42c1f4313a5 (patch)
tree7a74217a2dfc87b1896a7256d4a969227cd9f138 /sys/src/cmd/ktrans/main.c
parentccbabf1c16caa4917c97b8d8f5cadfb8759610b3 (diff)
ktrans: 你好
This consolidates jisho and map lookups to use the same structure and removes the old jisho code.
Diffstat (limited to 'sys/src/cmd/ktrans/main.c')
-rw-r--r--sys/src/cmd/ktrans/main.c265
1 files changed, 180 insertions, 85 deletions
diff --git a/sys/src/cmd/ktrans/main.c b/sys/src/cmd/ktrans/main.c
index e6c6be754..3757096ee 100644
--- a/sys/src/cmd/ktrans/main.c
+++ b/sys/src/cmd/ktrans/main.c
@@ -6,16 +6,26 @@
* okamoto@granite.cias.osakafu-u.ac.jp
*/
+/*
+ * A glossary on some of the Japanese vocabulary used:
+ * kana: syllabic letting, either hiragana(ひらがな) or katakana(カタカナ)
+ * kanji(漢字): borrowed characters, 楽 in 楽しい
+ * Okurigana(送り仮名): kana tail to kanji, しい in 楽しい
+ * Joshi(助詞): particle, は in 私は
+ * Jisho(辞書): dictionary
+ * kouho(候補): candidate
+ */
+
#include <u.h>
#include <libc.h>
#include <bio.h>
+#include "hash.h"
#include "ktrans.h"
-#include "jisho.h"
#define LSIZE 256
Rune lbuf[LSIZE]; /* hiragana buffer for key input written by send() */
-Map *table = hira; /* default language conversion table */
+Hmap *table;
uchar okurigana[LSIZE]; /* buffer for okurigana */
char okuri = 0; /* buffer/flag for capital input char */
int in, out;
@@ -23,16 +33,10 @@ int llen, olen, joshi = 0;
int natural = 1; /* not Japanese but English mode */
int changelang(int);
-int dotrans(Dictionary*);
+int dotrans(Hmap*);
int nrune(char *);
void send(uchar *, int);
-Map *match(uchar *p, int *nc, Map *table);
-
-extern Dictionary *openQDIC(char *);
-extern KouhoList *getKouhoHash(Dictionary*, char *);
-extern KouhoList *getKouhoFile(DicList*, char *);
-extern void freeQDIC(Dictionary*);
-extern void selectKouho(KouhoList **, KouhoList*);
+Hmap* opendict(Hmap *, char *);
void
kbdopen(void)
@@ -89,6 +93,50 @@ kbdopen(void)
exits(nil);
}
+Map signalmore = {
+ "_", nil, 1,
+};
+
+Hmap*
+initmap(Map *m, int n)
+{
+ int i, j;
+ char buf[16];
+ char *s;
+ Map prev;
+ Hmap *h;
+
+ h = hmapalloc(n, sizeof(Map));
+ for(i = 0; i < n; i++){
+ if(m[i].roma == nil || m[i].roma[0] == '\0')
+ continue;
+
+ //We mark all partial strings so we know when
+ //we have partial match when ingesting.
+ j = 2;
+ for(s = m[i].roma; *s && j <= sizeof buf; s++){
+ snprint(buf, j, "%s", m[i].roma);
+ prev = m[i];
+ if(hmapget(h, buf, &prev) == 0){
+ if(prev.leadstomore == 1 && s[1] == '\0'){
+ //confict; partial & valid input
+ prev = m[i];
+ prev.leadstomore = 1;
+ free(hmapkey(h, buf));
+ }
+ }
+
+ if(s[1] == '\0'){
+ hmapset(&h, strdup(buf), &prev, nil);
+ } else {
+ hmapset(&h, strdup(buf), &signalmore, nil);
+ }
+ j++;
+ }
+ }
+ return h;
+}
+
void
usage(void)
{
@@ -101,11 +149,11 @@ main(int argc, char *argv[])
{
uchar *bp, *ep, buf[128];
- Map *mp;
- int nchar, wantmore;
+ Map lkup, last;
+ int wantmore;
int n, c;
- char *dictname;
- Dictionary *jisho;
+ char *jishoname, *zidianname;
+ Hmap *jisho, *zidian;
ARGBEGIN{
default: usage();
@@ -113,9 +161,20 @@ main(int argc, char *argv[])
if(argc != 0)
usage();
- if((dictname = getenv("jisho")) == nil)
- dictname = "/lib/kanji.jisho";
- jisho = openQDIC(dictname);
+ if((jishoname = getenv("jisho")) == nil)
+ jishoname = "/lib/kanji.jisho";
+ jisho = opendict(nil, jishoname);
+
+ if((zidianname = getenv("zidian")) == nil)
+ zidianname = "/lib/hanzi.zidian";
+ zidian = opendict(nil, zidianname);
+
+ hira = table = initmap(mhira, nelem(mhira));
+ kata = initmap(mkata, nelem(mkata));
+ greek = initmap(mgreek, nelem(mgreek));
+ cyril = initmap(mcyril, nelem(mcyril));
+ hangul = initmap(mhangul, nelem(mhangul));
+ last = (Map){nil, nil, -1};
kbdopen();
if(fork())
@@ -147,8 +206,8 @@ main(int argc, char *argv[])
wantmore = 0;
if (*bp=='') { /* ^x read ktrans-jisho once more */
- freeQDIC(jisho);
- jisho = openQDIC(dictname);
+ jisho = opendict(jisho, jishoname);
+ zidian = opendict(zidian, zidianname);
llen = 0;
olen = okuri = joshi = 0;
wantmore=0;
@@ -156,7 +215,10 @@ main(int argc, char *argv[])
continue;
}
if (*bp=='') { /* ^\ (start translation command) */
- c = dotrans(jisho);
+ if (table == hanzi)
+ c = dotrans(zidian);
+ else
+ c = dotrans(jisho);
if (c)
*bp = c; /* pointer to translated rune */
else
@@ -167,11 +229,13 @@ main(int argc, char *argv[])
bp++;
llen = 0;
olen = okuri = joshi = 0;
+ last.kana = nil;
continue;
}
if (changelang(*bp)) { /* change language mode OK */
bp++;
olen = okuri = joshi = 0;
+ last.kana = nil;
continue;
}
if (natural || *bp<=' ' || *bp>='{') { /* English mode but not ascii */
@@ -179,6 +243,7 @@ main(int argc, char *argv[])
int rlen = chartorune(&r, (char *)bp);
send(bp, rlen); /* write bp to /dev/cons */
bp += rlen;
+ last.kana = nil;
continue;
}
if (table == hira && (*bp >= 'A' && *bp <= 'Z') && (*(bp+1) < 'A'
@@ -192,27 +257,33 @@ main(int argc, char *argv[])
joshi = 1;
olen = 0;
}
- mp = match(bp, &nchar, table);
- if (mp == 0) {
- if (nchar>0) { /* match, longer possible */
- wantmore++;
- break;
- }
- send(bp++, 1); /* alphabet in kana mode */
- } else {
- send((uchar*)mp->kana, strlen(mp->kana));
- bp += nchar;
+ if(hmapget(table, (char*)bp, &lkup) < 0){
+ if(last.kana != nil){
+ send((uchar*)last.kana, strlen(last.kana));
+ bp += strlen(last.roma);
+ } else
+ send(bp++, 1);
+ last.kana = nil;
+ break;
+ }
+ /* concatinations; only advance a single character */
+ if(lkup.kana != nil && strstr("ッっ", lkup.kana))
+ lkup.roma = "_";
+ /* partial match */
+ if(lkup.kana == nil || lkup.leadstomore == 1){
+ if(lkup.kana != nil)
+ last = lkup;
+
+ wantmore = 1;
+ break;
}
+ last.kana = nil;
+ send((uchar*)lkup.kana, strlen(lkup.kana));
+ bp += strlen(lkup.roma);
}
}
}
-int
-min(int a, int b)
-{
- return a<b? a: b;
-}
-
/*
* send UTF string (p) with length (n) to stdout
* and write rune (r) in global lbuf[] buffer
@@ -232,7 +303,9 @@ send(uchar *p, int n)
llen -= 64;
}
- if (table!=hira || natural)
+ if(table != hira && table != hanzi)
+ return;
+ if(natural && table != hanzi)
return;
ep = p+n;
@@ -253,49 +326,13 @@ send(uchar *p, int n)
}
}
-/*
- * Romaji to Hiragana/Katakana conversion
- * romaji shoud be input as small letter
- * returns the matched address in table, hira, kata, etc.
- * nc: number of character (return value)
- */
-Map *
-match(uchar *p, int *nc, Map *table)
-{
- register Map *longp = 0, *kp;
- static char last;
- int longest = 0;
-
- *nc = -1;
- for (kp=table; kp->roma; kp++) {
- if (*p == *kp->roma) {
- int lr = strlen(kp->roma);
- int len = min(lr, strlen((char *)p));
- if (strncmp(kp->roma, (char *)p, len)==0) {
- if (len<lr) {
- *nc = 1;
- return 0;
- }
- if (len>longest) {
- longest = len;
- longp = kp;
- }
- }
- }
- }
- if (longp) {
- last = longp->roma[longest-1];
- *nc = longp->advance;
- }
- return longp;
-}
-
int
changelang(int c)
{
switch(c){
case '': /* ^t (English mode) */
natural = 1;
+ table = hira;
llen = 0;
return 1;
break;
@@ -334,23 +371,80 @@ changelang(int c)
llen = 0;
return 1;
break;
+
+ case '': /* ^c (Chinese mode) */
+ natural = 1;
+ table = hanzi;
+ llen = 0;
+ return 1;
+ break;
}
return 0;
}
+Hmap*
+opendict(Hmap *h, char *name)
+{
+ Biobuf *b;
+ char *p;
+ char *dot, *rest;
+ char *kouho[16];
+ int i;
+
+ b = Bopen(name, OREAD);
+ if(b == nil)
+ return nil;
+
+ if(h == nil)
+ h = hmapalloc(8192, sizeof(kouho));
+ else
+ hmapreset(h, 1);
+ while(p = Brdstr(b, '\n', 1)){
+ if(p[0] == '\0' || p[0] == ';'){
+ Err:
+ free(p);
+ continue;
+ }
+ dot = utfrune(p, '\t');
+ if(dot == nil)
+ goto Err;
+
+ *dot = '\0';
+ rest = dot+1;
+ if(*rest == '\0')
+ goto Err;
+
+ memset(kouho, 0, sizeof kouho);
+ i = 0;
+ while(i < nelem(kouho)-1 && (dot = utfrune(rest, ' '))){
+ *dot = '\0';
+ kouho[i++] = rest;
+ rest = dot+1;
+ }
+ if(i < nelem(kouho)-1)
+ kouho[i] = rest;
+
+ /* key is the base pointer; overwrites clean up for us */
+ hmapset(&h, p, kouho, nil);
+ }
+ Bterm(b);
+ return h;
+}
+
/*
* write translated kanji runes to stdout and return last character
* if it's not ctl-\. if the last is ctl-\, proceed with
* translation of the next kouho
*/
int
-dotrans(Dictionary *dic)
+dotrans(Hmap *dic)
{
Rune *res, r[1];
char v[1024], *p, tbuf[64], hirabuf[64];
int j, lastlen, nokouho = 0;
char ch;
- KouhoList *fstkouho, *currentkouho;
+ int i;
+ char *kouho[16];
if (llen==0)
return 0; /* don't use kanji transform function */
@@ -375,15 +469,13 @@ dotrans(Dictionary *dic)
if (okuri && joshi != 1) /* verb mode */
hirabuf[strlen(hirabuf) - 1] = '\0';
- if(!(fstkouho = getKouhoHash(dic, v))) { /* not found */
+ if(hmapget(dic, v, kouho) < 0){
llen = olen = okuri = joshi = 0;
okurigana[0] = 0;
return 0;
}
-
- currentkouho = fstkouho;
- for(;;) {
- p = currentkouho->kouhotop; /* p to the head of kanji kouho array */
+ for(i = 0; i < nelem(kouho) && kouho[i] != nil; i++) {
+ p = kouho[i];
lastlen = nrune(tbuf); /* number of rune chars */
if (okuri && joshi != 1) /* verb mode */
@@ -407,10 +499,9 @@ dotrans(Dictionary *dic)
exits(nil);
if (ch == '') { /* if next input is ^\, once again */
- if(currentkouho->nextkouho != 0) { /* have next kouho */
+ if(i+1 < nelem(kouho) && kouho[i+1] != nil) { /* have next kouho */
nokouho = 0;
strcpy(tbuf, p);
- currentkouho = currentkouho->nextkouho;
if (okuri && joshi != 1) /* verb mode */
for (j=0; j<nrune(tbuf); j++)
@@ -442,8 +533,12 @@ dotrans(Dictionary *dic)
break;
}
} else {
- if(!nokouho) /* learn the previous use of the kouho */
- selectKouho(&(fstkouho->dicitem->kouho), currentkouho);
+ if(!nokouho && i != 0){ /* learn the previous use of the kouho */
+ p = kouho[0];
+ kouho[0] = kouho[i];
+ kouho[i] = p;
+ hmapset(&dic, hmapkey(dic, v), kouho, nil);
+ }
olen = okuri = joshi = 0;
okurigana[0] = 0;