add ktrans. こんにちは!
This commit is contained in:
parent
6972974f8d
commit
566c512106
9 changed files with 11524 additions and 0 deletions
7586
lib/kanji.jisho
Normal file
7586
lib/kanji.jisho
Normal file
File diff suppressed because it is too large
Load diff
106
sys/man/1/ktrans
Normal file
106
sys/man/1/ktrans
Normal file
|
@ -0,0 +1,106 @@
|
|||
.TH KTRANS 1
|
||||
.SH NAME
|
||||
ktrans \- language transliterator
|
||||
.SH SYNOPSIS
|
||||
.B ktrans
|
||||
.SH DESCRIPTION
|
||||
The
|
||||
.I ktrans
|
||||
program works with
|
||||
.IR kbdfs (8)
|
||||
to transliterate typed letter sequences into characters for languages
|
||||
that do not use the Latin character set, and pipes the result to
|
||||
.BR /dev/cons .
|
||||
The language is selected by typing a control character:
|
||||
.TP
|
||||
.B ctl-t
|
||||
return to default English mode (no transliteration).
|
||||
.TP
|
||||
.B ctl-n
|
||||
Japanese hiragana: interpret lower-case letters as a Hepburn
|
||||
representation of hiragana. In this mode, typing ctl-\\ looks up the
|
||||
last `word' in a hiragana-kanji dictionary and replaces it.
|
||||
Subsequent ctl-\\ characters cycle through the possibilities. A word
|
||||
is the longest immediately preceding unbroken string of hiragana
|
||||
characters.
|
||||
.TP
|
||||
.B ctl-k
|
||||
Japanese katakana.
|
||||
.TP
|
||||
.B ctl-l
|
||||
If you want to put the hiragana not converted, you can type.
|
||||
.TP
|
||||
.B ctl-x
|
||||
Read kana-kanji conversion dictionary once more, and renews it. This
|
||||
will be convenient when you updated kana-kanji conversion dictionary
|
||||
on such acme editor.In default, the kana-kanji conversion dictionary
|
||||
is read once at beginning and to make a hashed table, which will be
|
||||
arranged so that the last selected candidate will be the first
|
||||
candidate for later search.
|
||||
.TP
|
||||
.B ctl-r
|
||||
Russian: interpret letters as Cyrillic; the transliteration is mostly
|
||||
phonetic, with
|
||||
.B '
|
||||
for
|
||||
.IR myagkij-znak
|
||||
(ь),
|
||||
.B ''
|
||||
for
|
||||
.I tverdyj-znak
|
||||
(ъ)
|
||||
.I yo
|
||||
for ё,
|
||||
.B j
|
||||
for
|
||||
.IR i-kratkaya
|
||||
(й).
|
||||
.TP
|
||||
.B ctl-o
|
||||
Greek.
|
||||
.TP
|
||||
.B ctl-s
|
||||
Korean.
|
||||
.PP
|
||||
To use it you have to run it before a rio session. You can put it on your $home/lib/profile like:
|
||||
.EX
|
||||
...
|
||||
ktrans
|
||||
rio -i riostart
|
||||
...
|
||||
.EE
|
||||
or run it with a sub-rio on a window like:
|
||||
.EX
|
||||
% @{ktrans; rio}
|
||||
.EE
|
||||
.PP
|
||||
.SH SOURCE
|
||||
.B /sys/src/cmd/ktrans
|
||||
.SH SEE ALSO
|
||||
.IR rio (1)
|
||||
.IR kbdfs (8)
|
||||
.br
|
||||
.IR /sys/src/cmd/ktrans/README.kenji
|
||||
.br
|
||||
.IR /sys/src/cmd/ktrans/READMEJ.kenji
|
||||
.SH EXAMPLES
|
||||
If you want to make the Japanese text as below:
|
||||
|
||||
私は毎日35分以上歩いて、 更に10分電車に乗って学校に通います。
|
||||
健康の維持にも役だっていますが、 なかなかたのしいものです。
|
||||
|
||||
your keyboard typing stream should be:
|
||||
|
||||
[^l]watashiHA[^\\]mainichi[^\\]35[^l]fun[^\\]ijou[^\\]aruIte,
|
||||
[^\\]saraNI[^\\]10[^l]fun[^\\]denshaNI[^\\]noTte[^\\]gakkouNI
|
||||
[^\\]kayoImasu.[^\\]kenkouNO[^\\]ijiNImo[^\\]yakuDAtteimasuga,
|
||||
[^\\]nakanakatanoshiImonodesu.
|
||||
|
||||
where [^\\] and [^l] indicate 'ctl-\\' and 'ctl-l',
|
||||
respectively. See README.kenji for the details of this Japanese input
|
||||
method.
|
||||
.SH BUGS
|
||||
.PP
|
||||
There is no way to generate the control characters literally. At the
|
||||
beggining of a new line, you have to begin with ctl-l for successful
|
||||
kana-kanji conversion.
|
108
sys/src/cmd/ktrans/README.kenji
Normal file
108
sys/src/cmd/ktrans/README.kenji
Normal file
|
@ -0,0 +1,108 @@
|
|||
This version of ktrans works with pipefile by Rob Pike, which replaces /dev/kbd of
|
||||
2nd Edition Plan 9 by Dennis Ritchie. By this new idea, we can input Japanese
|
||||
anytime onto any windows. I included his post to 9fans mailing-list here (pipefile
|
||||
file).
|
||||
|
||||
This is a distribution as a derived work license of Plan 9, and I included the
|
||||
Plan 9 License term here. Please refer to it on the details of this license.
|
||||
|
||||
|
||||
Main features are as follows:
|
||||
|
||||
1) There is a "local" dictionary file for translation from kana to kanji, which can
|
||||
easily be edited by the user. The default file name is $home/lib/ktrans-jisho.
|
||||
If you want to use another dictionary file, set the KTJISHO environment
|
||||
variable to point to that file. This dictionary comprises many lines of
|
||||
edittable text strings.
|
||||
|
||||
2) Capital romaji input is used for words such as verbs or adjectives with okurigana,
|
||||
which follows the idea of the SKK system by Masahiko Sato of Kyoto Univ.
|
||||
(masahiko@kuis.kyoto-u.ac.jp). If you want to get the kanji string (runes)
|
||||
"動かす", which is a verb, you may input "ugoKasu" from the keyboard.
|
||||
Note here the Kasu's k is a capital (important). You will see hiragana
|
||||
runes "うごかす", and then the kanji runes of "動かす", when you type
|
||||
"Shift+Space".
|
||||
If you are satisfied by that translation, continue to input the next word.
|
||||
If you are not pleased with that candidate, hit 'Shift+Space' once more
|
||||
to see more candiates for that hiragana input. When there are no more candidates
|
||||
registered in your dictionary, you will see the initial hiragana input.
|
||||
|
||||
3) For Japanese "joshi", a post-positioned short word after a noun, you can use another
|
||||
method that I developed for this work. If you want the kanji string (runes) "私は",
|
||||
then type "watashiHA" from the keyboard. Note that the sound of "wa(ha)" is expressed
|
||||
as capitalized "HA". You will see a hiragana string of "わたしは", and then "私は"
|
||||
after `Shift+Space'.
|
||||
|
||||
4) A control sequence of 'ctl-l' is introduced to leave input hiragana runes unchanged.
|
||||
This is occasionally neccessary.
|
||||
|
||||
5) A simple leaning mechanism has been implemented for the in-memory hashing
|
||||
dictinary, in which the most recently used kanji runes (candidate) moves to the top
|
||||
of the list of candidates. This is valid only during the session you called ktrans.
|
||||
It is done this way intentionally, because the present learning method is ..well...
|
||||
naive. ^_^ I know this; however, I believe you can solve it by making a good
|
||||
dictionary best fitted to your purposes on your own.
|
||||
|
||||
6) 'ctl-x' re-reads the new kana-kanji translation dictionary when you have edited your
|
||||
kana-kanji translation, and want to incorporate it into your current in-memory translation
|
||||
dictionary. The kana-kanji translation dictionary is usually read only once, at the
|
||||
beginning of the ktrans session. I believe this simplification is not a serious constraint,
|
||||
because you can edit the dictionary anytime by co-working with acme and using this ctl-x
|
||||
mechanism.
|
||||
|
||||
7) A mode change to kana-input is triggered by 'ctl-n', ctl-k for katakana mode,
|
||||
ctl-g for Greek mode, and ctl-r for Russian mode.
|
||||
|
||||
8) As the starting $home/lib/ktrans-jisho, you may re-format the SKK-JISYO.S (66.9KB) of the
|
||||
SKK system, which can be retrieved from ftp.kuis.kyoto-u.ac.jp. The next three lines
|
||||
show the short sed filter to transform from an SKK type dictionary to Plan 9. Before
|
||||
this, you should change the kanji code of the SKK dictionary from ujis(euc) to UTF-8 by
|
||||
the tcs utility, of course.
|
||||
s/\// /g
|
||||
s/ / /g
|
||||
s/ $//g
|
||||
The header items are sorted in a strange order in the original SKK dictionary.
|
||||
This implementation does not care about the order, therefore, you can change it on
|
||||
your own.
|
||||
|
||||
9) SKK jisho, such as SKK-JISYO.S, is composed of two parts, okuri-ari and okuri-nashi
|
||||
entries. This greatly depends on the Japanese grammer, and okuri-ari may represent
|
||||
verb/adjective etc., i.e., not noun. These two parts work differently in the original
|
||||
SKK system, however, I did not employ that method; rather, I took a simple approarch
|
||||
as described in (2) and (3). Here, we make no difference between these two parts,
|
||||
and the reason why I left the two-part structure is just to make it easier to read for
|
||||
editting. Of course, you can change it without any side-effects.
|
||||
|
||||
10) The essence of this Japanese input method is to convert every one word by one key
|
||||
triggering. This may cause some cumbersome feelings for Nihongo users who are accustomed
|
||||
to, say, Windows. I know this. However, I intended to keep the codes as compact as
|
||||
possible as a first step towards developing a Nihongo input system on Plan 9.
|
||||
Furthermore, I've never seen the latter work perfectly. I think the conversion failed
|
||||
essentially when we see more than, say, five/six candidates for one set of hiragana runes.
|
||||
Finaly, I'd like to add that I have no problem to write long Japanese documents by
|
||||
this version.
|
||||
|
||||
11) The translation trigger key has been changed from ^t to Shift+Space, because
|
||||
we experienced butting of the trigger key sequence sometime. For this reason,
|
||||
you have to edit the key-binding table, such as kbtabshift[0x39] in /sys/src/9/pc/kbd.c
|
||||
or keymapshift[0x79] in /sys/src/9/ss/screen.c to assign Shift+Space to '^\'.
|
||||
I use here '^\' as the trigger key. Therefore, you can trigger translation
|
||||
from kana to kanji by Shit+Space or just '^\'.
|
||||
|
||||
12) A usage example: If you want to make the Japanese text as below:
|
||||
|
||||
私は毎日35分以上歩いて、 更に10分電車に乗って学校に通います。
|
||||
健康の維持にも役だっていますが、 なかなかたのしいものです。
|
||||
|
||||
your keyboard typing stream should be:
|
||||
|
||||
watashiHA[^t]mainichi[^t]35[^l]fun[^t]ijou[^t]aruIte, [^t]saraNI[^t]
|
||||
10[^l]fun[^t]denshaNI[^t]noTte[^t]gakkouNI[^t]kayoImasu.[^t]
|
||||
kenkouNO[^t]ijiNImo[^t]yakuDAtteimasuga, [^t]nakanaka[^l]tanoshiI[^t]
|
||||
monodesu.[^l]
|
||||
|
||||
where [^t], [^l] indicates 'Shift+Space' and 'ctl-l', respectively.
|
||||
|
||||
|
||||
Kenji Okamoto August 14, 2000
|
||||
|
126
sys/src/cmd/ktrans/READMEJ.kenji
Normal file
126
sys/src/cmd/ktrans/READMEJ.kenji
Normal file
|
@ -0,0 +1,126 @@
|
|||
Plan 9がRelase 3になってソースが公開されました。このため、更に多くのユーザがPlan 9を
|
||||
使い始める事になると思います。このバージョンになって、Windowシステムが8½からrioに
|
||||
変更になり、それにともなって、release 2に含まれていたktransが無くなりました。
|
||||
|
||||
そこで、Rob Pikeさんにお願いしたら、pipefileというすばらしいアイデアを考えて戴け
|
||||
ました。これはrelease 2でDennis Ritchieさんが提供した/dev/kbdを置き替えるもので、
|
||||
このいきさつは、Pikeさんの9fansのlmailing-listへの投稿を同封してありますので、
|
||||
そちら(pipefile)を御覧下さい。この方法は何時でもどのウィンドへも日本語を入力出来る
|
||||
ので、以前のバージョンの様にウィンド毎にktransを起動する必要がなくなりました。
|
||||
pipefileはrioより前に起動される必要がありますので、ここに同封したKanjiという
|
||||
スクリプトを各自のlib/profileにrioを起動する替わりに、このKanjiスクリプトを
|
||||
起動する様にして下さい。
|
||||
|
||||
|
||||
Main features are as follows:
|
||||
|
||||
1) There is a "local" dictionary file for translation from kana to kanji, which can
|
||||
easily be edited by the user. The default file name is $home/lib/ktrans-jisho.
|
||||
If you want to use another dictionary file, set the KTJISHO environment
|
||||
variable to point to that file. This dictionary comprises many lines of
|
||||
edittable text strings.
|
||||
|
||||
2) Capital romaji input is used for words such as verbs or adjectives with okurigana,
|
||||
which follows the idea of the SKK system by Masahiko Sato of Kyoto Univ.
|
||||
(masahiko@kuis.kyoto-u.ac.jp). If you want to get the kanji string (runes)
|
||||
"動かす", which is a verb, you may input "ugoKasu" from the keyboard.
|
||||
Note here the Kasu's k is a capital (important). You will see hiragana
|
||||
runes "うごかす", and then the kanji runes of "動かす", when you type
|
||||
"Shift+Space".
|
||||
If you are satisfied by that translation, continue to input the next word.
|
||||
If you are not pleased with that candidate, hit 'Shift+Space' once more
|
||||
to see more candiates for that hiragana input. When there are no more candidates
|
||||
registered in your dictionary, you will see the initial hiragana input.
|
||||
|
||||
3) For Japanese "joshi", a post-positioned short word after a noun, you can use another
|
||||
method that I developed for this work. If you want the kanji string (runes) "私は",
|
||||
then type "watashiHA" from the keyboard. Note that the sound of "wa(ha)" is expressed
|
||||
as capitalized "HA". You will see a hiragana string of "わたしは", and then "私は"
|
||||
after `Shift+Space'.
|
||||
|
||||
4) A control sequence of 'ctl-l' is introduced to leave input hiragana runes unchanged.
|
||||
This is occasionally neccessary.
|
||||
|
||||
5) A simple leaning mechanism has been implemented for the in-memory hashing
|
||||
dictinary, in which the most recently used kanji runes (candidate) moves to the top
|
||||
of the list of candidates. This is valid only during the session you called ktrans.
|
||||
It is done this way intentionally, because the present learning method is ..well...
|
||||
naive. ^_^ I know this; however, I believe you can solve it by making a good
|
||||
dictionary best fitted to your purposes on your own.
|
||||
|
||||
6) 'ctl-x' re-reads the new kana-kanji translation dictionary when you have edited your
|
||||
kana-kanji translation, and want to incorporate it into your current in-memory translation
|
||||
dictionary. The kana-kanji translation dictionary is usually read only once, at the
|
||||
beginning of the ktrans session. I believe this simplification is not a serious constraint,
|
||||
because you can edit the dictionary anytime by co-working with acme and using this ctl-x
|
||||
mechanism.
|
||||
|
||||
7) A mode change to kana-input is triggered by 'ctl-n', ctl-k for katakana mode,
|
||||
ctl-g for Greek mode, and ctl-r for Russian mode.
|
||||
|
||||
8) As the starting $home/lib/ktrans-jisho, you may re-format the SKK-JISYO.S (66.9KB) of the
|
||||
SKK system, which can be retrieved from ftp.kuis.kyoto-u.ac.jp. The next three lines
|
||||
show the short sed filter to transform from an SKK type dictionary to Plan 9. Before
|
||||
this, you should change the kanji code of the SKK dictionary from ujis(euc) to UTF-8 by
|
||||
the tcs utility, of course.
|
||||
s/\// /g
|
||||
s/ / /g
|
||||
s/ $//g
|
||||
The header items are sorted in a strange order in the original SKK dictionary.
|
||||
This implementation does not care about the order, therefore, you can change it on
|
||||
your own.
|
||||
|
||||
9) SKK jisho, such as SKK-JISYO.S, is composed of two parts, okuri-ari and okuri-nashi
|
||||
entries. This greatly depends on the Japanese grammer, and okuri-ari may represent
|
||||
verb/adjective etc., i.e., not noun. These two parts work differently in the original
|
||||
SKK system, however, I did not employ that method; rather, I took a simple approarch
|
||||
as described in (2) and (3). Here, we make no difference between these two parts,
|
||||
and the reason why I left the two-part structure is just to make it easier to read for
|
||||
editting. Of course, you can change it without any side-effects.
|
||||
|
||||
10) The essence of this Japanese input method is to convert every one word by one key
|
||||
triggering. This may cause some cumbersome feelings for Nihongo users who are accustomed
|
||||
to, say, Windows. I know this. However, I intended to keep the codes as compact as
|
||||
possible as a first step towards developing a Nihongo input system on Plan 9.
|
||||
Furthermore, I've never seen the latter work perfectly. I think the conversion failed
|
||||
essentially when we see more than, say, five/six candidates for one set of hiragana runes.
|
||||
Finaly, I'd like to add that I have no problem to write long Japanese documents by
|
||||
this version.
|
||||
|
||||
11) The translation trigger key has been changed from ^t to Shift+Space, because
|
||||
we experienced butting of the trigger key sequence sometime. For this reason,
|
||||
you have to edit the key-binding table, such as kbtabshift[0x39] in /sys/src/9/pc/kbd.c
|
||||
or keymapshift[0x79] in /sys/src/9/ss/screen.c to assign Shift+Space to '^\'.
|
||||
I use here '^\' as the trigger key. Therefore, you can trigger translation
|
||||
from kana to kanji by Shit+Space or just '^\'.
|
||||
|
||||
12) A usage example: If you want to make the Japanese text as below:
|
||||
|
||||
私は毎日35分以上歩いて、 更に10分電車に乗って学校に通います。
|
||||
健康の維持にも役だっていますが、 なかなかたのしいものです。
|
||||
|
||||
your keyboard typing stream should be:
|
||||
|
||||
watashiHA[^t]mainichi[^t]35[^l]fun[^t]ijou[^t]aruIte, [^t]saraNI[^t]
|
||||
10[^l]fun[^t]denshaNI[^t]noTte[^t]gakkouNI[^t]kayoImasu.[^t]
|
||||
kenkouNO[^t]ijiNImo[^t]yakuDAtteimasuga, [^t]nakanaka[^l]tanoshiI[^t]
|
||||
monodesu.[^l]
|
||||
|
||||
where [^t], [^l] indicates 'Shift+Space' and 'ctl-l', respectively.
|
||||
|
||||
|
||||
Kenji Okamoto August 14, 2000
|
||||
|
||||
|
||||
これはRelease 2のPlan 9に含まれていたDennis Ritchieさんのktransを機能強化したもので、
|
||||
大阪府立大学の岡本健二が作成いたしました。 疑問、提案等がございましたら、
|
||||
okamoto@granite.cias.osakafu-u.ac.jpまで御連絡ください。
|
||||
なお, Plan 9そのものにつきましては、
|
||||
http://basalt.cias.osakafu-u.ac.jp/plan9/p9index.html に95年度より
|
||||
ささやかなWeb Pageをもっていますので、そちらも参照くださいませ。
|
||||
|
||||
なおこのプログラムはPlan 9のderived workとしてのライセンスを持った配布となります。
|
||||
Plan 9のLicense fileを同封してありますので、ここに書かれている事を守ってお使い
|
||||
下さいませ。
|
||||
|
||||
大阪府立大学総合科学部 岡本健二 August 14, 2000
|
211
sys/src/cmd/ktrans/jisho.c
Normal file
211
sys/src/cmd/ktrans/jisho.c
Normal file
|
@ -0,0 +1,211 @@
|
|||
/*
|
||||
* open jisho file, and set the size of this jisho etc
|
||||
*
|
||||
* Kenji Okamoto August 4, 2000
|
||||
* Osaka Prefecture Univ.
|
||||
* okamoto@granite.cias.osakafu-u.ac.jp
|
||||
*/
|
||||
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include "jisho.h"
|
||||
|
||||
Dictionary *openQDIC(char *);
|
||||
void freeQDIC(Dictionary*);
|
||||
KouhoList *getKouhoHash(Dictionary*, char *);
|
||||
KouhoList *getKouhoFile(DicList*, char *);
|
||||
void selectKouho(KouhoList **, KouhoList*);
|
||||
int hashVal(char *);
|
||||
void addHash(Hash **, DicList*);
|
||||
|
||||
|
||||
|
||||
/*
|
||||
* Open QuickDIC (hashed personal dictionary)
|
||||
* open skk styled ktrans dictionary file, and make its hash table
|
||||
* based on individual header kana strings
|
||||
*
|
||||
* KouhoList
|
||||
* |---------|
|
||||
* Hash |---->kouho---->kouhotop
|
||||
* |-------| |
|
||||
* dic---->dhash---->dicindex---->kanahead
|
||||
* |--------| |--------|
|
||||
* Dictionary DicList
|
||||
*
|
||||
*/
|
||||
Dictionary *
|
||||
openQDIC(char *dicname)
|
||||
{
|
||||
Biobuf *f;
|
||||
void *Bbuf;
|
||||
Dictionary *dic;
|
||||
DicList *dicitem; /* for a future extension */
|
||||
char buf[1024], *startstr, *endstr;
|
||||
int i;
|
||||
|
||||
SET(dicitem); /* yes, I know I'm wrong, but... */
|
||||
|
||||
dic = (Dictionary*)malloc(sizeof(Dictionary));
|
||||
/* make room for pointer array (size=HASHSIZE) of hash table */
|
||||
for(i=0; i< HASHSIZE; i++) dic->dhash[i] = 0;
|
||||
dic->dlist = 0; /* for a future extension (more than one dics ^_^ */
|
||||
|
||||
if ((f = Bopen(dicname, OREAD)) == 0)
|
||||
return dic;
|
||||
|
||||
/* make hash table by the dic's header word */
|
||||
|
||||
while(Bbuf = Brdline(f, '\n')) {
|
||||
strncpy(buf, (char *)Bbuf, Blinelen(f));
|
||||
|
||||
if (buf[0] == ';') /* comment line */
|
||||
continue;
|
||||
else {
|
||||
/* get header word from jisho */
|
||||
startstr = buf;
|
||||
if(!(endstr = utfutf(startstr, "\t"))) break;
|
||||
*endstr = '\0';
|
||||
/* dicitem includes each header word from the jisho */
|
||||
|
||||
dicitem = (DicList*)malloc(sizeof(DicList)+(endstr-startstr+1));
|
||||
dicitem->nextitem = 0; /* for a future extension */
|
||||
strcpy(dicitem->kanahead, startstr);
|
||||
|
||||
dicitem->kouho = getKouhoFile(dicitem, endstr); /* read kouho from jisho */
|
||||
addHash(dic->dhash, dicitem);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
dic->dlist = dicitem;
|
||||
Bterm(f);
|
||||
return dic;
|
||||
}
|
||||
|
||||
/*
|
||||
* free dynamically allocated memory
|
||||
*/
|
||||
void
|
||||
freeQDIC(Dictionary *dic)
|
||||
{
|
||||
Hash *hash1, *hash2;
|
||||
DicList *dlist, *dlist2;
|
||||
int l;
|
||||
|
||||
for (dlist = dic->dlist;
|
||||
dlist != 0;
|
||||
dlist2 = dlist, dlist = dlist->nextitem, free((void *)dlist2));
|
||||
for (l = 0; l < HASHSIZE; l++) {
|
||||
for (hash1 = dic->dhash[l]; hash1; hash1 = hash2) {
|
||||
if (hash1->next !=0) {
|
||||
hash2 = hash1->next;
|
||||
free((void *)hash1);
|
||||
}else
|
||||
break;
|
||||
}
|
||||
}
|
||||
free((void *)dic);
|
||||
}
|
||||
|
||||
int
|
||||
hashVal(char *s)
|
||||
{
|
||||
uint h;
|
||||
|
||||
h = 0x811c9dc5;
|
||||
while(*s != 0)
|
||||
h = (h^(uchar)*s++) * 0x1000193;
|
||||
return h % HASHSIZE;
|
||||
}
|
||||
|
||||
void
|
||||
addHash(Hash **hash, DicList *ditem)
|
||||
{
|
||||
Hash *h;
|
||||
int v;
|
||||
|
||||
v = hashVal(ditem->kanahead);
|
||||
h = (Hash*)malloc(sizeof(Hash));
|
||||
h->dicindex = ditem;
|
||||
h->length = strlen(ditem->kanahead);
|
||||
h->next = hash[v];
|
||||
hash[v] = h;
|
||||
}
|
||||
|
||||
/*
|
||||
* read Kouho list from the jisho file defined by Biobuf descriptor f
|
||||
*
|
||||
* revised for Plan 9 by K.Okamoto
|
||||
*/
|
||||
KouhoList *
|
||||
getKouhoFile(DicList *dicitem, char * endstr)
|
||||
{
|
||||
char *kouhostart, *kouhoend;
|
||||
KouhoList *kouhoitem, *currntkouhoitem=0, *prevkouhoitem;
|
||||
|
||||
prevkouhoitem = 0;
|
||||
kouhostart = endstr + 1;
|
||||
while((kouhoend = utfutf(kouhostart, " ")) ||
|
||||
(kouhoend = utfutf(kouhostart, "\n"))) {
|
||||
*kouhoend = '\0';
|
||||
|
||||
kouhoitem = (KouhoList*)malloc(sizeof(KouhoList)+(kouhoend-kouhostart+1));
|
||||
kouhoitem->nextkouho = 0;
|
||||
kouhoitem->prevkouho = prevkouhoitem;
|
||||
kouhoitem->dicitem = dicitem;
|
||||
strcpy(kouhoitem->kouhotop, kouhostart);
|
||||
if (prevkouhoitem)
|
||||
prevkouhoitem->nextkouho = kouhoitem;
|
||||
else
|
||||
currntkouhoitem = kouhoitem;
|
||||
prevkouhoitem = kouhoitem;
|
||||
kouhostart = kouhoend + 1;
|
||||
}
|
||||
return currntkouhoitem;
|
||||
}
|
||||
|
||||
/*
|
||||
* get matched kouho from the hash table of header word of the dict
|
||||
* if found, returns pointer to the first candidate in the hash table.
|
||||
* if not found, returns 0.
|
||||
*
|
||||
* from getCand() in skklib.c by Akinori Ito et al.,(aito@ei5sun.yz.yamagata-u.ac.jp)
|
||||
*/
|
||||
KouhoList *
|
||||
getKouhoHash(Dictionary *dic, char *s)
|
||||
{
|
||||
int l, v;
|
||||
Hash *h;
|
||||
|
||||
l = strlen(s);
|
||||
v = hashVal(s);
|
||||
for (h = dic->dhash[v]; h != 0; h = h->next) {
|
||||
if (h->length != l ||
|
||||
strcmp(h->dicindex->kanahead, s)) continue;
|
||||
return h->dicindex->kouho; /* return matched kouho */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* from skklib.c by Akinori Ito et al.,(aito@ei5sun.yz.yamagata-u.ac.jp)
|
||||
* just modified to read easier for current purpose
|
||||
*/
|
||||
void
|
||||
selectKouho(KouhoList **first, KouhoList *current)
|
||||
{
|
||||
/* take off currentkouho from the kouholist table */
|
||||
if (current->prevkouho) {
|
||||
current->prevkouho->nextkouho = current->nextkouho;
|
||||
if (current->nextkouho)
|
||||
current->nextkouho->prevkouho = current->prevkouho;
|
||||
current->prevkouho = 0;
|
||||
}
|
||||
/* take place of firstkouho by currentkouho */
|
||||
if (*first != current) {
|
||||
(*first)->prevkouho = current;
|
||||
current->nextkouho = *first;
|
||||
*first = current;
|
||||
}
|
||||
}
|
41
sys/src/cmd/ktrans/jisho.h
Normal file
41
sys/src/cmd/ktrans/jisho.h
Normal file
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Kenji Okamoto August 4, 2000
|
||||
* Osaka Prefecture Univ.
|
||||
* okamoto@granite.cias.osakafu-u.ac.jp
|
||||
*/
|
||||
|
||||
#define HASHSIZE 257
|
||||
|
||||
/*
|
||||
* Structure for Dictionary's header word (in Hiragana)
|
||||
*/
|
||||
typedef struct DicList DicList;
|
||||
struct DicList {
|
||||
struct KouhoList *kouho;
|
||||
struct DicList *nextitem; /* for a future extension */
|
||||
char kanahead[1];
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure for Kouho of each index word in the dictionary
|
||||
*/
|
||||
typedef struct KouhoList KouhoList;
|
||||
struct KouhoList {
|
||||
struct KouhoList *nextkouho;
|
||||
struct KouhoList *prevkouho;
|
||||
struct DicList *dicitem;
|
||||
char kouhotop[1]; /* top of the kouhos */
|
||||
} ;
|
||||
|
||||
typedef struct Hash Hash;
|
||||
struct Hash {
|
||||
DicList *dicindex; /* pointer to a KouhoList and kanahead etc */
|
||||
short length;
|
||||
struct Hash *next;
|
||||
};
|
||||
|
||||
typedef struct Dictionary Dictionary;
|
||||
struct Dictionary {
|
||||
DicList *dlist; /* for a future extension, having more than one dictionaries */
|
||||
Hash *dhash[HASHSIZE];
|
||||
};
|
2865
sys/src/cmd/ktrans/ktrans.h
Normal file
2865
sys/src/cmd/ktrans/ktrans.h
Normal file
File diff suppressed because it is too large
Load diff
471
sys/src/cmd/ktrans/main.c
Normal file
471
sys/src/cmd/ktrans/main.c
Normal file
|
@ -0,0 +1,471 @@
|
|||
/*
|
||||
* Mostly based on the original source codes of Plan 9 release 2
|
||||
* distribution.
|
||||
* by Kenji Okamoto, August 4 2000
|
||||
* Osaka Prefecture Univ.
|
||||
* okamoto@granite.cias.osakafu-u.ac.jp
|
||||
*/
|
||||
|
||||
#include <u.h>
|
||||
#include <libc.h>
|
||||
#include <bio.h>
|
||||
#include "ktrans.h"
|
||||
#include "jisho.h"
|
||||
|
||||
#define LSIZE 256
|
||||
|
||||
Rune lbuf[LSIZE]; /* hiragana buffer for key input written by send() */
|
||||
Map *table = hira; /* default language conversion table */
|
||||
uchar okurigana[LSIZE]; /* buffer for okurigana */
|
||||
char okuri = 0; /* buffer/flag for capital input char */
|
||||
int in, out;
|
||||
int llen, olen, joshi = 0;
|
||||
int natural = 1; /* not Japanese but English mode */
|
||||
|
||||
int changelang(int);
|
||||
int dotrans(Dictionary*);
|
||||
int nrune(char *);
|
||||
void send(uchar *, int);
|
||||
Map *match(uchar *p, int *nc, Map *table);
|
||||
|
||||
extern Dictionary *openQDIC(char *);
|
||||
extern KouhoList *getKouhoHash(Dictionary*, char *);
|
||||
extern KouhoList *getKouhoFile(DicList*, char *);
|
||||
extern void freeQDIC(Dictionary*);
|
||||
extern void selectKouho(KouhoList **, KouhoList*);
|
||||
|
||||
void
|
||||
kbdopen(void)
|
||||
{
|
||||
int n, kinfd, koutfd, fd[2];
|
||||
char buf[128];
|
||||
int kbd;
|
||||
|
||||
kbd = 1;
|
||||
if((kinfd = open("/dev/kbd", OREAD)) < 0){
|
||||
kbd = 0;
|
||||
if((kinfd = open("/dev/cons", OREAD)) < 0)
|
||||
sysfatal("open kbd: %r");
|
||||
}
|
||||
if(bind("#|", "/n/temp", MREPL) < 0)
|
||||
sysfatal("bind /n/temp: %r");
|
||||
if((koutfd = open("/n/temp/data1", OWRITE)) < 0)
|
||||
sysfatal("open kbd pipe: %r");
|
||||
if(bind("/n/temp/data", kbd? "/dev/kbd": "/dev/cons", MREPL) < 0)
|
||||
sysfatal("bind kbd pipe: %r");
|
||||
unmount(nil, "/n/temp");
|
||||
if(!kbd){
|
||||
in = kinfd;
|
||||
out = koutfd;
|
||||
return;
|
||||
}
|
||||
if(pipe(fd) < 0)
|
||||
sysfatal("pipe: %r");
|
||||
if(fork()){
|
||||
in = out = fd[0];
|
||||
close(fd[1]);
|
||||
close(kinfd);
|
||||
close(koutfd);
|
||||
return;
|
||||
}
|
||||
close(fd[0]);
|
||||
if(fork()){
|
||||
Biobuf b;
|
||||
long r;
|
||||
|
||||
Binit(&b, fd[1], OREAD);
|
||||
while((r = Bgetrune(&b)) >= 0){
|
||||
n = snprint(buf, sizeof(buf), "c%C", (Rune)r)+1;
|
||||
write(koutfd, buf, n); /* pass on result */
|
||||
}
|
||||
} else
|
||||
while((n = read(kinfd, buf, sizeof(buf))) > 0){
|
||||
buf[n-1] = 0;
|
||||
if(n < 2 || buf[0] != 'c')
|
||||
write(koutfd, buf, n); /* pass on */
|
||||
else
|
||||
write(fd[1], buf+1, n-2); /* to translator */
|
||||
}
|
||||
exits(nil);
|
||||
}
|
||||
|
||||
void
|
||||
usage(void)
|
||||
{
|
||||
fprint(2, "usage: %s\n", argv0);
|
||||
exits("usage");
|
||||
}
|
||||
|
||||
void
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
uchar *bp, *ep, buf[128];
|
||||
Map *mp;
|
||||
int nchar, wantmore;
|
||||
int n, c;
|
||||
char *dictname;
|
||||
Dictionary *jisho;
|
||||
|
||||
ARGBEGIN{
|
||||
default: usage();
|
||||
}ARGEND;
|
||||
if(argc != 0)
|
||||
usage();
|
||||
|
||||
if((dictname = getenv("jisho")) == nil)
|
||||
dictname = "/lib/kanji.jisho";
|
||||
jisho = openQDIC(dictname);
|
||||
|
||||
kbdopen();
|
||||
if(fork())
|
||||
exits(nil); /* parent process will exit */
|
||||
|
||||
bp = ep = buf;
|
||||
wantmore = 0;
|
||||
for (;;) { /* key board input loop */
|
||||
getmore:
|
||||
if (bp>=ep || wantmore) {
|
||||
if (wantmore==0)
|
||||
bp = ep = buf; /* clear all */
|
||||
n = read(in, ep, &buf[sizeof(buf)]-ep);
|
||||
if (n<=0)
|
||||
exits("");
|
||||
ep += n;
|
||||
*ep = '\0';
|
||||
}
|
||||
while (bp<ep) { /* there are input data */
|
||||
if (table == hira && natural != 1 && (*bp>'A' && *bp<='Z') && ep-bp<2
|
||||
&& !strchr("EIOU", *bp)) {
|
||||
wantmore = 1;
|
||||
goto getmore;
|
||||
}
|
||||
if (!fullrune((char *)bp, ep-bp)) { /* not enough length of input */
|
||||
wantmore = 1;
|
||||
goto getmore;
|
||||
}
|
||||
wantmore = 0;
|
||||
|
||||
if (*bp=='') { /* ^x read ktrans-jisho once more */
|
||||
freeQDIC(jisho);
|
||||
jisho = openQDIC(dictname);
|
||||
llen = 0;
|
||||
olen = okuri = joshi = 0;
|
||||
wantmore=0;
|
||||
bp=ep=buf;
|
||||
continue;
|
||||
}
|
||||
if (*bp=='') { /* ^\ (start translation command) */
|
||||
c = dotrans(jisho);
|
||||
if (c)
|
||||
*bp = c; /* pointer to translated rune */
|
||||
else
|
||||
bp++;
|
||||
continue;
|
||||
}
|
||||
if (*bp=='') { /* ^l (no translate command) */
|
||||
bp++;
|
||||
llen = 0;
|
||||
olen = okuri = joshi = 0;
|
||||
continue;
|
||||
}
|
||||
if (changelang(*bp)) { /* change language mode OK */
|
||||
bp++;
|
||||
olen = okuri = joshi = 0;
|
||||
continue;
|
||||
}
|
||||
if (natural || *bp<=' ' || *bp>='{') { /* English mode but not ascii */
|
||||
Rune r;
|
||||
int rlen = chartorune(&r, (char *)bp);
|
||||
send(bp, rlen); /* write bp to /dev/cons */
|
||||
bp += rlen;
|
||||
continue;
|
||||
}
|
||||
if (table == hira && (*bp >= 'A' && *bp <= 'Z') && (*(bp+1) < 'A'
|
||||
|| *(bp+1) > 'Z')) {
|
||||
*bp = okuri = tolower(*bp);
|
||||
joshi = olen = 0;
|
||||
} else if (table == hira && (*bp >= 'A' && *bp <= 'Z') &&
|
||||
(*(bp+1) >= 'A' && *(bp+1) <= 'Z')) {
|
||||
*bp = okuri = tolower(*bp);
|
||||
*(bp+1) = tolower(*(bp+1));
|
||||
joshi = 1;
|
||||
olen = 0;
|
||||
}
|
||||
mp = match(bp, &nchar, table);
|
||||
if (mp == 0) {
|
||||
if (nchar>0) { /* match, longer possible */
|
||||
wantmore++;
|
||||
break;
|
||||
}
|
||||
send(bp++, 1); /* alphabet in kana mode */
|
||||
} else {
|
||||
send((uchar*)mp->kana, strlen(mp->kana));
|
||||
bp += nchar;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
min(int a, int b)
|
||||
{
|
||||
return a<b? a: b;
|
||||
}
|
||||
|
||||
/*
|
||||
* send UTF string (p) with length (n) to stdout
|
||||
* and write rune (r) in global lbuf[] buffer
|
||||
* or okurigana[] buffer if okuri (verb or joshi) mode
|
||||
*/
|
||||
void
|
||||
send(uchar *p, int n)
|
||||
{
|
||||
Rune r;
|
||||
uchar *ep;
|
||||
|
||||
if (write(out, (char*)p, n) != n)
|
||||
sysfatal("write: %r");
|
||||
|
||||
if (llen>LSIZE-64) {
|
||||
memmove((char*)lbuf, (char*)lbuf+64, 64*sizeof(Rune));
|
||||
llen -= 64;
|
||||
}
|
||||
|
||||
if (table!=hira || natural)
|
||||
return;
|
||||
|
||||
ep = p+n;
|
||||
if(okuri)
|
||||
while (olen<LSIZE && p<ep)
|
||||
okurigana[olen++] = *p++;
|
||||
else
|
||||
while (llen<LSIZE && p<ep) {
|
||||
p += chartorune(&r, (char*)p);
|
||||
if (r=='\b') {
|
||||
if (llen>0)
|
||||
llen--;
|
||||
continue;
|
||||
}
|
||||
if (r==0x80) /* ignore view key */
|
||||
continue;
|
||||
lbuf[llen++] = r;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Romaji to Hiragana/Katakana conversion
|
||||
* romaji shoud be input as small letter
|
||||
* returns the matched address in table, hira, kata, etc.
|
||||
* nc: number of character (return value)
|
||||
*/
|
||||
Map *
|
||||
match(uchar *p, int *nc, Map *table)
|
||||
{
|
||||
register Map *longp = 0, *kp;
|
||||
static char last;
|
||||
int longest = 0;
|
||||
|
||||
*nc = -1;
|
||||
for (kp=table; kp->roma; kp++) {
|
||||
if (*p == *kp->roma) {
|
||||
int lr = strlen(kp->roma);
|
||||
int len = min(lr, strlen((char *)p));
|
||||
if (strncmp(kp->roma, (char *)p, len)==0) {
|
||||
if (len<lr) {
|
||||
*nc = 1;
|
||||
return 0;
|
||||
}
|
||||
if (len>longest) {
|
||||
longest = len;
|
||||
longp = kp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (longp) {
|
||||
last = longp->roma[longest-1];
|
||||
*nc = longp->advance;
|
||||
}
|
||||
return longp;
|
||||
}
|
||||
|
||||
int
|
||||
changelang(int c)
|
||||
{
|
||||
switch(c){
|
||||
case '': /* ^t (English mode) */
|
||||
natural = 1;
|
||||
llen = 0;
|
||||
return 1;
|
||||
break;
|
||||
|
||||
case '': /* ^n (Japanese hiragana mode ) */
|
||||
natural = 0;
|
||||
table = hira;
|
||||
llen = 0;
|
||||
return 1;
|
||||
break;
|
||||
|
||||
case '': /* ^k (Japanese katakana mode) */
|
||||
natural = 0;
|
||||
table = kata;
|
||||
llen = 0;
|
||||
return 1;
|
||||
break;
|
||||
|
||||
case '': /* ^r (Russian mode) */
|
||||
natural = 0;
|
||||
table = cyril;
|
||||
llen = 0;
|
||||
return 1;
|
||||
break;
|
||||
|
||||
case '': /* ^o (Greek mode) */
|
||||
natural = 0;
|
||||
table = greek;
|
||||
llen = 0;
|
||||
return 1;
|
||||
break;
|
||||
|
||||
case '': /* ^s (Korean mode) */
|
||||
natural = 0;
|
||||
table = hangul;
|
||||
llen = 0;
|
||||
return 1;
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* write translated kanji runes to stdout and return last character
|
||||
* if it's not ctl-\. if the last is ctl-\, proceed with
|
||||
* translation of the next kouho
|
||||
*/
|
||||
int
|
||||
dotrans(Dictionary *dic)
|
||||
{
|
||||
Rune *res, r[1];
|
||||
char v[1024], *p, tbuf[64], hirabuf[64];
|
||||
int j, lastlen, nokouho = 0;
|
||||
char ch;
|
||||
KouhoList *fstkouho, *currentkouho;
|
||||
|
||||
if (llen==0)
|
||||
return 0; /* don't use kanji transform function */
|
||||
if (okuri && joshi != 1) {
|
||||
lbuf[llen++] = (Rune)okuri;
|
||||
lbuf[llen] = 0;
|
||||
}else
|
||||
lbuf[llen] = 0;
|
||||
okurigana[olen] = 0;
|
||||
|
||||
/*
|
||||
* search the matched index for the key word in the dict hash table, and
|
||||
* return a pointer to the matched kouho, 0 otherwise.
|
||||
*/
|
||||
res = lbuf;
|
||||
for (j=0; *res != L'\0'; j += runetochar(v+j, res++))
|
||||
;
|
||||
v[j] = '\0';
|
||||
strcpy(tbuf, v);
|
||||
strcpy(hirabuf, v); /* to remember the initial hiragana input */
|
||||
|
||||
if (okuri && joshi != 1) /* verb mode */
|
||||
hirabuf[strlen(hirabuf) - 1] = '\0';
|
||||
|
||||
if(!(fstkouho = getKouhoHash(dic, v))) { /* not found */
|
||||
llen = olen = okuri = joshi = 0;
|
||||
okurigana[0] = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
currentkouho = fstkouho;
|
||||
for(;;) {
|
||||
p = currentkouho->kouhotop; /* p to the head of kanji kouho array */
|
||||
lastlen = nrune(tbuf); /* number of rune chars */
|
||||
|
||||
if (okuri && joshi != 1) /* verb mode */
|
||||
for (j=0; j<lastlen-1; j++)
|
||||
write(out, "\b", 1); /* clear hiragana input */
|
||||
else
|
||||
for (j=0; j<lastlen; j++)
|
||||
write(out, "\b", 1); /* clear hiragana input */
|
||||
|
||||
if (okuri) {
|
||||
lastlen = nrune((char *)okurigana);
|
||||
for (j=0; j<lastlen; j++)
|
||||
write(out, "\b", 1);
|
||||
}
|
||||
|
||||
write(out, p, strlen(p)); /* write kanji to stdout */
|
||||
if (okuri)
|
||||
write(out, (char *)okurigana, olen);
|
||||
|
||||
if (read(in, &ch, 1)<=0) /* read from stdin */
|
||||
exits(nil);
|
||||
|
||||
if (ch == '') { /* if next input is ^\, once again */
|
||||
if(currentkouho->nextkouho != 0) { /* have next kouho */
|
||||
nokouho = 0;
|
||||
strcpy(tbuf, p);
|
||||
currentkouho = currentkouho->nextkouho;
|
||||
|
||||
if (okuri && joshi != 1) /* verb mode */
|
||||
for (j=0; j<nrune(tbuf); j++)
|
||||
write(out, "\b", 1);
|
||||
continue;
|
||||
} else { /* the last kouho */
|
||||
if (okuri) {
|
||||
lastlen = nrune((char *)okurigana);
|
||||
for (j=0; j<lastlen; j++)
|
||||
write(out, "\b", 1);
|
||||
}
|
||||
|
||||
for (lastlen=0; *p != 0; p += j) {
|
||||
j = chartorune(r, p);
|
||||
lastlen++;
|
||||
}
|
||||
|
||||
for (j=0; j<lastlen; j++)
|
||||
write(out, "\b", 1);
|
||||
|
||||
if(hirabuf[0])
|
||||
write(out, hirabuf, strlen(hirabuf));
|
||||
|
||||
if(okurigana[0])
|
||||
write(out, (char *)okurigana, olen);
|
||||
|
||||
olen = okuri = joshi = 0;
|
||||
okurigana[0] = 0;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
if(!nokouho) /* learn the previous use of the kouho */
|
||||
selectKouho(&(fstkouho->dicitem->kouho), currentkouho);
|
||||
|
||||
olen = okuri = joshi = 0;
|
||||
okurigana[0] = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
llen = 0;
|
||||
return ch;
|
||||
}
|
||||
|
||||
/*
|
||||
* returns the number of characters in the pointed Rune
|
||||
*/
|
||||
int
|
||||
nrune(char *p)
|
||||
{
|
||||
int n = 0;
|
||||
Rune r;
|
||||
|
||||
while (*p) {
|
||||
p += chartorune(&r, p);
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
10
sys/src/cmd/ktrans/mkfile
Normal file
10
sys/src/cmd/ktrans/mkfile
Normal file
|
@ -0,0 +1,10 @@
|
|||
</$objtype/mkfile
|
||||
|
||||
BIN=/$objtype/bin
|
||||
TARG=ktrans
|
||||
HFILES=jisho.h ktrans.h
|
||||
OFILES=\
|
||||
main.$O\
|
||||
jisho.$O
|
||||
|
||||
</sys/src/cmd/mkone
|
Loading…
Reference in a new issue