summaryrefslogtreecommitdiff
path: root/sys/src/cmd/dict/mkindex.c
diff options
context:
space:
mode:
authorTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
committerTaru Karttunen <taruti@taruti.net>2011-03-30 15:46:40 +0300
commite5888a1ffdae813d7575f5fb02275c6bb07e5199 (patch)
treed8d51eac403f07814b9e936eed0c9a79195e2450 /sys/src/cmd/dict/mkindex.c
Import sources from 2011-03-30 iso image
Diffstat (limited to 'sys/src/cmd/dict/mkindex.c')
-rwxr-xr-xsys/src/cmd/dict/mkindex.c106
1 files changed, 106 insertions, 0 deletions
diff --git a/sys/src/cmd/dict/mkindex.c b/sys/src/cmd/dict/mkindex.c
new file mode 100755
index 000000000..230247927
--- /dev/null
+++ b/sys/src/cmd/dict/mkindex.c
@@ -0,0 +1,106 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include "dict.h"
+
+/*
+ * Use this to start making an index for a new dictionary.
+ * Get the dictionary-specific nextoff and printentry(_,'h')
+ * commands working, add a record to the dicts[] array below,
+ * and run this program to get a list of offset,headword
+ * pairs
+ */
+Biobuf boutbuf;
+Biobuf *bdict;
+Biobuf *bout = &boutbuf;
+int linelen;
+int breaklen = 2000;
+int outinhibit;
+int debug;
+
+Dict *dict; /* current dictionary */
+
+Entry getentry(long);
+
+void
+main(int argc, char **argv)
+{
+ int i;
+ long a, ae;
+ char *p;
+ Entry e;
+
+ Binit(&boutbuf, 1, OWRITE);
+ dict = &dicts[0];
+ ARGBEGIN {
+ case 'd':
+ p = ARGF();
+ dict = 0;
+ if(p) {
+ for(i=0; dicts[i].name; i++)
+ if(strcmp(p, dicts[i].name)==0) {
+ dict = &dicts[i];
+ break;
+ }
+ }
+ if(!dict) {
+ err("unknown dictionary: %s", p);
+ exits("nodict");
+ }
+ break;
+ case 'D':
+ debug++;
+ break;
+ ARGEND }
+ USED(argc,argv);
+ bdict = Bopen(dict->path, OREAD);
+ ae = Bseek(bdict, 0, 2);
+ if(!bdict) {
+ err("can't open dictionary %s", dict->path);
+ exits("nodict");
+ }
+ for(a = 0; a < ae; a = (*dict->nextoff)(a+1)) {
+ linelen = 0;
+ e = getentry(a);
+ Bprint(bout, "%ld\t", a);
+ linelen = 4; /* only has to be approx right */
+ (*dict->printentry)(e, 'h');
+ }
+ exits(0);
+}
+
+Entry
+getentry(long b)
+{
+ long e, n, dtop;
+ static Entry ans;
+ static int anslen = 0;
+
+ e = (*dict->nextoff)(b+1);
+ ans.doff = b;
+ if(e < 0) {
+ dtop = Bseek(bdict, 0L, 2);
+ if(b < dtop) {
+ e = dtop;
+ } else {
+ err("couldn't seek to entry");
+ ans.start = 0;
+ ans.end = 0;
+ }
+ }
+ n = e-b;
+ if(n) {
+ if(n > anslen) {
+ ans.start = realloc(ans.start, n);
+ if(!ans.start) {
+ err("out of memory");
+ exits("nomem");
+ }
+ anslen = n;
+ }
+ Bseek(bdict, b, 0);
+ n = Bread(bdict, ans.start, n);
+ ans.end = ans.start + n;
+ }
+ return ans;
+}