Merge branch 'front' of git://git.9front.org/plan9front/plan9front into front
This commit is contained in:
commit
33ba68d23d
11 changed files with 11559 additions and 5 deletions
7586
lib/kanji.jisho
Normal file
7586
lib/kanji.jisho
Normal file
File diff suppressed because it is too large
Load diff
106
sys/man/1/ktrans
Normal file
106
sys/man/1/ktrans
Normal file
|
@ -0,0 +1,106 @@
|
||||||
|
.TH KTRANS 1
|
||||||
|
.SH NAME
|
||||||
|
ktrans \- language transliterator
|
||||||
|
.SH SYNOPSIS
|
||||||
|
.B ktrans
|
||||||
|
.SH DESCRIPTION
|
||||||
|
The
|
||||||
|
.I ktrans
|
||||||
|
program works with
|
||||||
|
.IR kbdfs (8)
|
||||||
|
to transliterate typed letter sequences into characters for languages
|
||||||
|
that do not use the Latin character set, and pipes the result to
|
||||||
|
.BR /dev/cons .
|
||||||
|
The language is selected by typing a control character:
|
||||||
|
.TP
|
||||||
|
.B ctl-t
|
||||||
|
return to default English mode (no transliteration).
|
||||||
|
.TP
|
||||||
|
.B ctl-n
|
||||||
|
Japanese hiragana: interpret lower-case letters as a Hepburn
|
||||||
|
representation of hiragana. In this mode, typing ctl-\\ looks up the
|
||||||
|
last `word' in a hiragana-kanji dictionary and replaces it.
|
||||||
|
Subsequent ctl-\\ characters cycle through the possibilities. A word
|
||||||
|
is the longest immediately preceding unbroken string of hiragana
|
||||||
|
characters.
|
||||||
|
.TP
|
||||||
|
.B ctl-k
|
||||||
|
Japanese katakana.
|
||||||
|
.TP
|
||||||
|
.B ctl-l
|
||||||
|
If you want to put the hiragana not converted, you can type.
|
||||||
|
.TP
|
||||||
|
.B ctl-x
|
||||||
|
Read kana-kanji conversion dictionary once more, and renews it. This
|
||||||
|
will be convenient when you updated kana-kanji conversion dictionary
|
||||||
|
on such acme editor.In default, the kana-kanji conversion dictionary
|
||||||
|
is read once at beginning and to make a hashed table, which will be
|
||||||
|
arranged so that the last selected candidate will be the first
|
||||||
|
candidate for later search.
|
||||||
|
.TP
|
||||||
|
.B ctl-r
|
||||||
|
Russian: interpret letters as Cyrillic; the transliteration is mostly
|
||||||
|
phonetic, with
|
||||||
|
.B '
|
||||||
|
for
|
||||||
|
.IR myagkij-znak
|
||||||
|
(ь),
|
||||||
|
.B ''
|
||||||
|
for
|
||||||
|
.I tverdyj-znak
|
||||||
|
(ъ)
|
||||||
|
.I yo
|
||||||
|
for ё,
|
||||||
|
.B j
|
||||||
|
for
|
||||||
|
.IR i-kratkaya
|
||||||
|
(й).
|
||||||
|
.TP
|
||||||
|
.B ctl-o
|
||||||
|
Greek.
|
||||||
|
.TP
|
||||||
|
.B ctl-s
|
||||||
|
Korean.
|
||||||
|
.PP
|
||||||
|
To use it you have to run it before a rio session. You can put it on your $home/lib/profile like:
|
||||||
|
.EX
|
||||||
|
...
|
||||||
|
ktrans
|
||||||
|
rio -i riostart
|
||||||
|
...
|
||||||
|
.EE
|
||||||
|
or run it with a sub-rio on a window like:
|
||||||
|
.EX
|
||||||
|
% @{ktrans; rio}
|
||||||
|
.EE
|
||||||
|
.PP
|
||||||
|
.SH SOURCE
|
||||||
|
.B /sys/src/cmd/ktrans
|
||||||
|
.SH SEE ALSO
|
||||||
|
.IR rio (1)
|
||||||
|
.IR kbdfs (8)
|
||||||
|
.br
|
||||||
|
.IR /sys/src/cmd/ktrans/README.kenji
|
||||||
|
.br
|
||||||
|
.IR /sys/src/cmd/ktrans/READMEJ.kenji
|
||||||
|
.SH EXAMPLES
|
||||||
|
If you want to make the Japanese text as below:
|
||||||
|
|
||||||
|
私は毎日35分以上歩いて、 更に10分電車に乗って学校に通います。
|
||||||
|
健康の維持にも役だっていますが、 なかなかたのしいものです。
|
||||||
|
|
||||||
|
your keyboard typing stream should be:
|
||||||
|
|
||||||
|
[^l]watashiHA[^\\]mainichi[^\\]35[^l]fun[^\\]ijou[^\\]aruIte,
|
||||||
|
[^\\]saraNI[^\\]10[^l]fun[^\\]denshaNI[^\\]noTte[^\\]gakkouNI
|
||||||
|
[^\\]kayoImasu.[^\\]kenkouNO[^\\]ijiNImo[^\\]yakuDAtteimasuga,
|
||||||
|
[^\\]nakanakatanoshiImonodesu.
|
||||||
|
|
||||||
|
where [^\\] and [^l] indicate 'ctl-\\' and 'ctl-l',
|
||||||
|
respectively. See README.kenji for the details of this Japanese input
|
||||||
|
method.
|
||||||
|
.SH BUGS
|
||||||
|
.PP
|
||||||
|
There is no way to generate the control characters literally. At the
|
||||||
|
beggining of a new line, you have to begin with ctl-l for successful
|
||||||
|
kana-kanji conversion.
|
|
@ -822,31 +822,31 @@ static Clock clocks[] = {
|
||||||
{ "sai2.ipg_clk_s", AUDIO_IPG_CLK_ROOT, 52 },
|
{ "sai2.ipg_clk_s", AUDIO_IPG_CLK_ROOT, 52 },
|
||||||
{ "sai2.ipg_clk_sai_mclk_1", SAI2_CLK_ROOT, 52 },
|
{ "sai2.ipg_clk_sai_mclk_1", SAI2_CLK_ROOT, 52 },
|
||||||
{ "sai2.ipt_clk_sai_bclk", SAI2_CLK_ROOT, 52 },
|
{ "sai2.ipt_clk_sai_bclk", SAI2_CLK_ROOT, 52 },
|
||||||
{ "sai2.ipt_clk_sai_bclk_b", SAI1_CLK_ROOT, 52 },
|
{ "sai2.ipt_clk_sai_bclk_b", SAI2_CLK_ROOT, 52 },
|
||||||
|
|
||||||
{ "sai3.ipg_clk", AUDIO_IPG_CLK_ROOT, 53 },
|
{ "sai3.ipg_clk", AUDIO_IPG_CLK_ROOT, 53 },
|
||||||
{ "sai3.ipg_clk_s", AUDIO_IPG_CLK_ROOT, 53 },
|
{ "sai3.ipg_clk_s", AUDIO_IPG_CLK_ROOT, 53 },
|
||||||
{ "sai3.ipg_clk_sai_mclk_1", SAI3_CLK_ROOT, 53 },
|
{ "sai3.ipg_clk_sai_mclk_1", SAI3_CLK_ROOT, 53 },
|
||||||
{ "sai3.ipt_clk_sai_bclk", SAI3_CLK_ROOT, 53 },
|
{ "sai3.ipt_clk_sai_bclk", SAI3_CLK_ROOT, 53 },
|
||||||
{ "sai3.ipt_clk_sai_bclk_b", SAI1_CLK_ROOT, 53 },
|
{ "sai3.ipt_clk_sai_bclk_b", SAI3_CLK_ROOT, 53 },
|
||||||
|
|
||||||
{ "sai4.ipg_clk", AUDIO_IPG_CLK_ROOT, 54 },
|
{ "sai4.ipg_clk", AUDIO_IPG_CLK_ROOT, 54 },
|
||||||
{ "sai4.ipg_clk_s", AUDIO_IPG_CLK_ROOT, 54 },
|
{ "sai4.ipg_clk_s", AUDIO_IPG_CLK_ROOT, 54 },
|
||||||
{ "sai4.ipg_clk_sai_mclk_1", SAI4_CLK_ROOT, 54 },
|
{ "sai4.ipg_clk_sai_mclk_1", SAI4_CLK_ROOT, 54 },
|
||||||
{ "sai4.ipt_clk_sai_bclk", SAI4_CLK_ROOT, 54 },
|
{ "sai4.ipt_clk_sai_bclk", SAI4_CLK_ROOT, 54 },
|
||||||
{ "sai4.ipt_clk_sai_bclk_b", SAI1_CLK_ROOT, 54 },
|
{ "sai4.ipt_clk_sai_bclk_b", SAI4_CLK_ROOT, 54 },
|
||||||
|
|
||||||
{ "sai5.ipg_clk", AUDIO_IPG_CLK_ROOT, 55 },
|
{ "sai5.ipg_clk", AUDIO_IPG_CLK_ROOT, 55 },
|
||||||
{ "sai5.ipg_clk_s", AUDIO_IPG_CLK_ROOT, 55 },
|
{ "sai5.ipg_clk_s", AUDIO_IPG_CLK_ROOT, 55 },
|
||||||
{ "sai5.ipg_clk_sai_mclk_1", SAI5_CLK_ROOT, 55 },
|
{ "sai5.ipg_clk_sai_mclk_1", SAI5_CLK_ROOT, 55 },
|
||||||
{ "sai5.ipt_clk_sai_bclk", SAI5_CLK_ROOT, 55 },
|
{ "sai5.ipt_clk_sai_bclk", SAI5_CLK_ROOT, 55 },
|
||||||
{ "sai5.ipt_clk_sai_bclk_b", SAI1_CLK_ROOT, 55 },
|
{ "sai5.ipt_clk_sai_bclk_b", SAI5_CLK_ROOT, 55 },
|
||||||
|
|
||||||
{ "sai6.ipg_clk", AUDIO_IPG_CLK_ROOT, 56 },
|
{ "sai6.ipg_clk", AUDIO_IPG_CLK_ROOT, 56 },
|
||||||
{ "sai6.ipg_clk_s", AUDIO_IPG_CLK_ROOT, 56 },
|
{ "sai6.ipg_clk_s", AUDIO_IPG_CLK_ROOT, 56 },
|
||||||
{ "sai6.ipg_clk_sai_mclk_1", SAI6_CLK_ROOT, 56 },
|
{ "sai6.ipg_clk_sai_mclk_1", SAI6_CLK_ROOT, 56 },
|
||||||
{ "sai6.ipt_clk_sai_bclk", SAI6_CLK_ROOT, 56 },
|
{ "sai6.ipt_clk_sai_bclk", SAI6_CLK_ROOT, 56 },
|
||||||
{ "sai6.ipt_clk_sai_bclk_b", SAI1_CLK_ROOT, 56 },
|
{ "sai6.ipt_clk_sai_bclk_b", SAI6_CLK_ROOT, 56 },
|
||||||
|
|
||||||
{ "sctr.ipg_clk", IPG_CLK_ROOT, 57 },
|
{ "sctr.ipg_clk", IPG_CLK_ROOT, 57 },
|
||||||
{ "sctr.ipg_clk_s", IPG_CLK_ROOT, 57 },
|
{ "sctr.ipg_clk_s", IPG_CLK_ROOT, 57 },
|
||||||
|
|
|
@ -410,6 +410,34 @@ pcicfginit(void)
|
||||||
rootinit(&ctlrs[1]);
|
rootinit(&ctlrs[1]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* undocumented magic to avoid interference between lcdif and pcie */
|
||||||
|
static void
|
||||||
|
qosmagic(void)
|
||||||
|
{
|
||||||
|
static u32int *qosc = (u32int*)(VIRTIO + 0x7f0000);
|
||||||
|
|
||||||
|
/* unlock */
|
||||||
|
qosc[0x0000/4] = 0x0;
|
||||||
|
qosc[0x0000/4] = 0x1;
|
||||||
|
qosc[0x0060/4] = 0x0;
|
||||||
|
|
||||||
|
/* pci1 */
|
||||||
|
qosc[0x1000/4] = 0x0;
|
||||||
|
qosc[0x1000/4] = 0x1;
|
||||||
|
qosc[0x1050/4] = 0x01010100;
|
||||||
|
qosc[0x1060/4] = 0x01010100;
|
||||||
|
qosc[0x1070/4] = 0x01010100;
|
||||||
|
qosc[0x1000/4] = 0x1;
|
||||||
|
|
||||||
|
/* pcie2 */
|
||||||
|
qosc[0x2000/4] = 0x0;
|
||||||
|
qosc[0x2000/4] = 0x1;
|
||||||
|
qosc[0x2050/4] = 0x01010100;
|
||||||
|
qosc[0x2060/4] = 0x01010100;
|
||||||
|
qosc[0x2070/4] = 0x01010100;
|
||||||
|
qosc[0x2000/4] = 0x1;
|
||||||
|
}
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
SRC_PCIEPHY_RCR = 0x2C/4,
|
SRC_PCIEPHY_RCR = 0x2C/4,
|
||||||
SRC_PCIE2_RCR = 0x48/4,
|
SRC_PCIE2_RCR = 0x48/4,
|
||||||
|
@ -494,4 +522,6 @@ pciimxlink(void)
|
||||||
resetc[SRC_PCIE2_RCR] &= ~(PCIE_BTN | PCIE_G_RST);
|
resetc[SRC_PCIE2_RCR] &= ~(PCIE_BTN | PCIE_G_RST);
|
||||||
|
|
||||||
pcicfginit();
|
pcicfginit();
|
||||||
|
|
||||||
|
qosmagic();
|
||||||
}
|
}
|
||||||
|
|
108
sys/src/cmd/ktrans/README.kenji
Normal file
108
sys/src/cmd/ktrans/README.kenji
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
This version of ktrans works with pipefile by Rob Pike, which replaces /dev/kbd of
|
||||||
|
2nd Edition Plan 9 by Dennis Ritchie. By this new idea, we can input Japanese
|
||||||
|
anytime onto any windows. I included his post to 9fans mailing-list here (pipefile
|
||||||
|
file).
|
||||||
|
|
||||||
|
This is a distribution as a derived work license of Plan 9, and I included the
|
||||||
|
Plan 9 License term here. Please refer to it on the details of this license.
|
||||||
|
|
||||||
|
|
||||||
|
Main features are as follows:
|
||||||
|
|
||||||
|
1) There is a "local" dictionary file for translation from kana to kanji, which can
|
||||||
|
easily be edited by the user. The default file name is $home/lib/ktrans-jisho.
|
||||||
|
If you want to use another dictionary file, set the KTJISHO environment
|
||||||
|
variable to point to that file. This dictionary comprises many lines of
|
||||||
|
edittable text strings.
|
||||||
|
|
||||||
|
2) Capital romaji input is used for words such as verbs or adjectives with okurigana,
|
||||||
|
which follows the idea of the SKK system by Masahiko Sato of Kyoto Univ.
|
||||||
|
(masahiko@kuis.kyoto-u.ac.jp). If you want to get the kanji string (runes)
|
||||||
|
"動かす", which is a verb, you may input "ugoKasu" from the keyboard.
|
||||||
|
Note here the Kasu's k is a capital (important). You will see hiragana
|
||||||
|
runes "うごかす", and then the kanji runes of "動かす", when you type
|
||||||
|
"Shift+Space".
|
||||||
|
If you are satisfied by that translation, continue to input the next word.
|
||||||
|
If you are not pleased with that candidate, hit 'Shift+Space' once more
|
||||||
|
to see more candiates for that hiragana input. When there are no more candidates
|
||||||
|
registered in your dictionary, you will see the initial hiragana input.
|
||||||
|
|
||||||
|
3) For Japanese "joshi", a post-positioned short word after a noun, you can use another
|
||||||
|
method that I developed for this work. If you want the kanji string (runes) "私は",
|
||||||
|
then type "watashiHA" from the keyboard. Note that the sound of "wa(ha)" is expressed
|
||||||
|
as capitalized "HA". You will see a hiragana string of "わたしは", and then "私は"
|
||||||
|
after `Shift+Space'.
|
||||||
|
|
||||||
|
4) A control sequence of 'ctl-l' is introduced to leave input hiragana runes unchanged.
|
||||||
|
This is occasionally neccessary.
|
||||||
|
|
||||||
|
5) A simple leaning mechanism has been implemented for the in-memory hashing
|
||||||
|
dictinary, in which the most recently used kanji runes (candidate) moves to the top
|
||||||
|
of the list of candidates. This is valid only during the session you called ktrans.
|
||||||
|
It is done this way intentionally, because the present learning method is ..well...
|
||||||
|
naive. ^_^ I know this; however, I believe you can solve it by making a good
|
||||||
|
dictionary best fitted to your purposes on your own.
|
||||||
|
|
||||||
|
6) 'ctl-x' re-reads the new kana-kanji translation dictionary when you have edited your
|
||||||
|
kana-kanji translation, and want to incorporate it into your current in-memory translation
|
||||||
|
dictionary. The kana-kanji translation dictionary is usually read only once, at the
|
||||||
|
beginning of the ktrans session. I believe this simplification is not a serious constraint,
|
||||||
|
because you can edit the dictionary anytime by co-working with acme and using this ctl-x
|
||||||
|
mechanism.
|
||||||
|
|
||||||
|
7) A mode change to kana-input is triggered by 'ctl-n', ctl-k for katakana mode,
|
||||||
|
ctl-g for Greek mode, and ctl-r for Russian mode.
|
||||||
|
|
||||||
|
8) As the starting $home/lib/ktrans-jisho, you may re-format the SKK-JISYO.S (66.9KB) of the
|
||||||
|
SKK system, which can be retrieved from ftp.kuis.kyoto-u.ac.jp. The next three lines
|
||||||
|
show the short sed filter to transform from an SKK type dictionary to Plan 9. Before
|
||||||
|
this, you should change the kanji code of the SKK dictionary from ujis(euc) to UTF-8 by
|
||||||
|
the tcs utility, of course.
|
||||||
|
s/\// /g
|
||||||
|
s/ / /g
|
||||||
|
s/ $//g
|
||||||
|
The header items are sorted in a strange order in the original SKK dictionary.
|
||||||
|
This implementation does not care about the order, therefore, you can change it on
|
||||||
|
your own.
|
||||||
|
|
||||||
|
9) SKK jisho, such as SKK-JISYO.S, is composed of two parts, okuri-ari and okuri-nashi
|
||||||
|
entries. This greatly depends on the Japanese grammer, and okuri-ari may represent
|
||||||
|
verb/adjective etc., i.e., not noun. These two parts work differently in the original
|
||||||
|
SKK system, however, I did not employ that method; rather, I took a simple approarch
|
||||||
|
as described in (2) and (3). Here, we make no difference between these two parts,
|
||||||
|
and the reason why I left the two-part structure is just to make it easier to read for
|
||||||
|
editting. Of course, you can change it without any side-effects.
|
||||||
|
|
||||||
|
10) The essence of this Japanese input method is to convert every one word by one key
|
||||||
|
triggering. This may cause some cumbersome feelings for Nihongo users who are accustomed
|
||||||
|
to, say, Windows. I know this. However, I intended to keep the codes as compact as
|
||||||
|
possible as a first step towards developing a Nihongo input system on Plan 9.
|
||||||
|
Furthermore, I've never seen the latter work perfectly. I think the conversion failed
|
||||||
|
essentially when we see more than, say, five/six candidates for one set of hiragana runes.
|
||||||
|
Finaly, I'd like to add that I have no problem to write long Japanese documents by
|
||||||
|
this version.
|
||||||
|
|
||||||
|
11) The translation trigger key has been changed from ^t to Shift+Space, because
|
||||||
|
we experienced butting of the trigger key sequence sometime. For this reason,
|
||||||
|
you have to edit the key-binding table, such as kbtabshift[0x39] in /sys/src/9/pc/kbd.c
|
||||||
|
or keymapshift[0x79] in /sys/src/9/ss/screen.c to assign Shift+Space to '^\'.
|
||||||
|
I use here '^\' as the trigger key. Therefore, you can trigger translation
|
||||||
|
from kana to kanji by Shit+Space or just '^\'.
|
||||||
|
|
||||||
|
12) A usage example: If you want to make the Japanese text as below:
|
||||||
|
|
||||||
|
私は毎日35分以上歩いて、 更に10分電車に乗って学校に通います。
|
||||||
|
健康の維持にも役だっていますが、 なかなかたのしいものです。
|
||||||
|
|
||||||
|
your keyboard typing stream should be:
|
||||||
|
|
||||||
|
watashiHA[^t]mainichi[^t]35[^l]fun[^t]ijou[^t]aruIte, [^t]saraNI[^t]
|
||||||
|
10[^l]fun[^t]denshaNI[^t]noTte[^t]gakkouNI[^t]kayoImasu.[^t]
|
||||||
|
kenkouNO[^t]ijiNImo[^t]yakuDAtteimasuga, [^t]nakanaka[^l]tanoshiI[^t]
|
||||||
|
monodesu.[^l]
|
||||||
|
|
||||||
|
where [^t], [^l] indicates 'Shift+Space' and 'ctl-l', respectively.
|
||||||
|
|
||||||
|
|
||||||
|
Kenji Okamoto August 14, 2000
|
||||||
|
|
126
sys/src/cmd/ktrans/READMEJ.kenji
Normal file
126
sys/src/cmd/ktrans/READMEJ.kenji
Normal file
|
@ -0,0 +1,126 @@
|
||||||
|
Plan 9がRelase 3になってソースが公開されました。このため、更に多くのユーザがPlan 9を
|
||||||
|
使い始める事になると思います。このバージョンになって、Windowシステムが8½からrioに
|
||||||
|
変更になり、それにともなって、release 2に含まれていたktransが無くなりました。
|
||||||
|
|
||||||
|
そこで、Rob Pikeさんにお願いしたら、pipefileというすばらしいアイデアを考えて戴け
|
||||||
|
ました。これはrelease 2でDennis Ritchieさんが提供した/dev/kbdを置き替えるもので、
|
||||||
|
このいきさつは、Pikeさんの9fansのlmailing-listへの投稿を同封してありますので、
|
||||||
|
そちら(pipefile)を御覧下さい。この方法は何時でもどのウィンドへも日本語を入力出来る
|
||||||
|
ので、以前のバージョンの様にウィンド毎にktransを起動する必要がなくなりました。
|
||||||
|
pipefileはrioより前に起動される必要がありますので、ここに同封したKanjiという
|
||||||
|
スクリプトを各自のlib/profileにrioを起動する替わりに、このKanjiスクリプトを
|
||||||
|
起動する様にして下さい。
|
||||||
|
|
||||||
|
|
||||||
|
Main features are as follows:
|
||||||
|
|
||||||
|
1) There is a "local" dictionary file for translation from kana to kanji, which can
|
||||||
|
easily be edited by the user. The default file name is $home/lib/ktrans-jisho.
|
||||||
|
If you want to use another dictionary file, set the KTJISHO environment
|
||||||
|
variable to point to that file. This dictionary comprises many lines of
|
||||||
|
edittable text strings.
|
||||||
|
|
||||||
|
2) Capital romaji input is used for words such as verbs or adjectives with okurigana,
|
||||||
|
which follows the idea of the SKK system by Masahiko Sato of Kyoto Univ.
|
||||||
|
(masahiko@kuis.kyoto-u.ac.jp). If you want to get the kanji string (runes)
|
||||||
|
"動かす", which is a verb, you may input "ugoKasu" from the keyboard.
|
||||||
|
Note here the Kasu's k is a capital (important). You will see hiragana
|
||||||
|
runes "うごかす", and then the kanji runes of "動かす", when you type
|
||||||
|
"Shift+Space".
|
||||||
|
If you are satisfied by that translation, continue to input the next word.
|
||||||
|
If you are not pleased with that candidate, hit 'Shift+Space' once more
|
||||||
|
to see more candiates for that hiragana input. When there are no more candidates
|
||||||
|
registered in your dictionary, you will see the initial hiragana input.
|
||||||
|
|
||||||
|
3) For Japanese "joshi", a post-positioned short word after a noun, you can use another
|
||||||
|
method that I developed for this work. If you want the kanji string (runes) "私は",
|
||||||
|
then type "watashiHA" from the keyboard. Note that the sound of "wa(ha)" is expressed
|
||||||
|
as capitalized "HA". You will see a hiragana string of "わたしは", and then "私は"
|
||||||
|
after `Shift+Space'.
|
||||||
|
|
||||||
|
4) A control sequence of 'ctl-l' is introduced to leave input hiragana runes unchanged.
|
||||||
|
This is occasionally neccessary.
|
||||||
|
|
||||||
|
5) A simple leaning mechanism has been implemented for the in-memory hashing
|
||||||
|
dictinary, in which the most recently used kanji runes (candidate) moves to the top
|
||||||
|
of the list of candidates. This is valid only during the session you called ktrans.
|
||||||
|
It is done this way intentionally, because the present learning method is ..well...
|
||||||
|
naive. ^_^ I know this; however, I believe you can solve it by making a good
|
||||||
|
dictionary best fitted to your purposes on your own.
|
||||||
|
|
||||||
|
6) 'ctl-x' re-reads the new kana-kanji translation dictionary when you have edited your
|
||||||
|
kana-kanji translation, and want to incorporate it into your current in-memory translation
|
||||||
|
dictionary. The kana-kanji translation dictionary is usually read only once, at the
|
||||||
|
beginning of the ktrans session. I believe this simplification is not a serious constraint,
|
||||||
|
because you can edit the dictionary anytime by co-working with acme and using this ctl-x
|
||||||
|
mechanism.
|
||||||
|
|
||||||
|
7) A mode change to kana-input is triggered by 'ctl-n', ctl-k for katakana mode,
|
||||||
|
ctl-g for Greek mode, and ctl-r for Russian mode.
|
||||||
|
|
||||||
|
8) As the starting $home/lib/ktrans-jisho, you may re-format the SKK-JISYO.S (66.9KB) of the
|
||||||
|
SKK system, which can be retrieved from ftp.kuis.kyoto-u.ac.jp. The next three lines
|
||||||
|
show the short sed filter to transform from an SKK type dictionary to Plan 9. Before
|
||||||
|
this, you should change the kanji code of the SKK dictionary from ujis(euc) to UTF-8 by
|
||||||
|
the tcs utility, of course.
|
||||||
|
s/\// /g
|
||||||
|
s/ / /g
|
||||||
|
s/ $//g
|
||||||
|
The header items are sorted in a strange order in the original SKK dictionary.
|
||||||
|
This implementation does not care about the order, therefore, you can change it on
|
||||||
|
your own.
|
||||||
|
|
||||||
|
9) SKK jisho, such as SKK-JISYO.S, is composed of two parts, okuri-ari and okuri-nashi
|
||||||
|
entries. This greatly depends on the Japanese grammer, and okuri-ari may represent
|
||||||
|
verb/adjective etc., i.e., not noun. These two parts work differently in the original
|
||||||
|
SKK system, however, I did not employ that method; rather, I took a simple approarch
|
||||||
|
as described in (2) and (3). Here, we make no difference between these two parts,
|
||||||
|
and the reason why I left the two-part structure is just to make it easier to read for
|
||||||
|
editting. Of course, you can change it without any side-effects.
|
||||||
|
|
||||||
|
10) The essence of this Japanese input method is to convert every one word by one key
|
||||||
|
triggering. This may cause some cumbersome feelings for Nihongo users who are accustomed
|
||||||
|
to, say, Windows. I know this. However, I intended to keep the codes as compact as
|
||||||
|
possible as a first step towards developing a Nihongo input system on Plan 9.
|
||||||
|
Furthermore, I've never seen the latter work perfectly. I think the conversion failed
|
||||||
|
essentially when we see more than, say, five/six candidates for one set of hiragana runes.
|
||||||
|
Finaly, I'd like to add that I have no problem to write long Japanese documents by
|
||||||
|
this version.
|
||||||
|
|
||||||
|
11) The translation trigger key has been changed from ^t to Shift+Space, because
|
||||||
|
we experienced butting of the trigger key sequence sometime. For this reason,
|
||||||
|
you have to edit the key-binding table, such as kbtabshift[0x39] in /sys/src/9/pc/kbd.c
|
||||||
|
or keymapshift[0x79] in /sys/src/9/ss/screen.c to assign Shift+Space to '^\'.
|
||||||
|
I use here '^\' as the trigger key. Therefore, you can trigger translation
|
||||||
|
from kana to kanji by Shit+Space or just '^\'.
|
||||||
|
|
||||||
|
12) A usage example: If you want to make the Japanese text as below:
|
||||||
|
|
||||||
|
私は毎日35分以上歩いて、 更に10分電車に乗って学校に通います。
|
||||||
|
健康の維持にも役だっていますが、 なかなかたのしいものです。
|
||||||
|
|
||||||
|
your keyboard typing stream should be:
|
||||||
|
|
||||||
|
watashiHA[^t]mainichi[^t]35[^l]fun[^t]ijou[^t]aruIte, [^t]saraNI[^t]
|
||||||
|
10[^l]fun[^t]denshaNI[^t]noTte[^t]gakkouNI[^t]kayoImasu.[^t]
|
||||||
|
kenkouNO[^t]ijiNImo[^t]yakuDAtteimasuga, [^t]nakanaka[^l]tanoshiI[^t]
|
||||||
|
monodesu.[^l]
|
||||||
|
|
||||||
|
where [^t], [^l] indicates 'Shift+Space' and 'ctl-l', respectively.
|
||||||
|
|
||||||
|
|
||||||
|
Kenji Okamoto August 14, 2000
|
||||||
|
|
||||||
|
|
||||||
|
これはRelease 2のPlan 9に含まれていたDennis Ritchieさんのktransを機能強化したもので、
|
||||||
|
大阪府立大学の岡本健二が作成いたしました。 疑問、提案等がございましたら、
|
||||||
|
okamoto@granite.cias.osakafu-u.ac.jpまで御連絡ください。
|
||||||
|
なお, Plan 9そのものにつきましては、
|
||||||
|
http://basalt.cias.osakafu-u.ac.jp/plan9/p9index.html に95年度より
|
||||||
|
ささやかなWeb Pageをもっていますので、そちらも参照くださいませ。
|
||||||
|
|
||||||
|
なおこのプログラムはPlan 9のderived workとしてのライセンスを持った配布となります。
|
||||||
|
Plan 9のLicense fileを同封してありますので、ここに書かれている事を守ってお使い
|
||||||
|
下さいませ。
|
||||||
|
|
||||||
|
大阪府立大学総合科学部 岡本健二 August 14, 2000
|
211
sys/src/cmd/ktrans/jisho.c
Normal file
211
sys/src/cmd/ktrans/jisho.c
Normal file
|
@ -0,0 +1,211 @@
|
||||||
|
/*
|
||||||
|
* open jisho file, and set the size of this jisho etc
|
||||||
|
*
|
||||||
|
* Kenji Okamoto August 4, 2000
|
||||||
|
* Osaka Prefecture Univ.
|
||||||
|
* okamoto@granite.cias.osakafu-u.ac.jp
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <u.h>
|
||||||
|
#include <libc.h>
|
||||||
|
#include <bio.h>
|
||||||
|
#include "jisho.h"
|
||||||
|
|
||||||
|
Dictionary *openQDIC(char *);
|
||||||
|
void freeQDIC(Dictionary*);
|
||||||
|
KouhoList *getKouhoHash(Dictionary*, char *);
|
||||||
|
KouhoList *getKouhoFile(DicList*, char *);
|
||||||
|
void selectKouho(KouhoList **, KouhoList*);
|
||||||
|
int hashVal(char *);
|
||||||
|
void addHash(Hash **, DicList*);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Open QuickDIC (hashed personal dictionary)
|
||||||
|
* open skk styled ktrans dictionary file, and make its hash table
|
||||||
|
* based on individual header kana strings
|
||||||
|
*
|
||||||
|
* KouhoList
|
||||||
|
* |---------|
|
||||||
|
* Hash |---->kouho---->kouhotop
|
||||||
|
* |-------| |
|
||||||
|
* dic---->dhash---->dicindex---->kanahead
|
||||||
|
* |--------| |--------|
|
||||||
|
* Dictionary DicList
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
Dictionary *
|
||||||
|
openQDIC(char *dicname)
|
||||||
|
{
|
||||||
|
Biobuf *f;
|
||||||
|
void *Bbuf;
|
||||||
|
Dictionary *dic;
|
||||||
|
DicList *dicitem; /* for a future extension */
|
||||||
|
char buf[1024], *startstr, *endstr;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
SET(dicitem); /* yes, I know I'm wrong, but... */
|
||||||
|
|
||||||
|
dic = (Dictionary*)malloc(sizeof(Dictionary));
|
||||||
|
/* make room for pointer array (size=HASHSIZE) of hash table */
|
||||||
|
for(i=0; i< HASHSIZE; i++) dic->dhash[i] = 0;
|
||||||
|
dic->dlist = 0; /* for a future extension (more than one dics ^_^ */
|
||||||
|
|
||||||
|
if ((f = Bopen(dicname, OREAD)) == 0)
|
||||||
|
return dic;
|
||||||
|
|
||||||
|
/* make hash table by the dic's header word */
|
||||||
|
|
||||||
|
while(Bbuf = Brdline(f, '\n')) {
|
||||||
|
strncpy(buf, (char *)Bbuf, Blinelen(f));
|
||||||
|
|
||||||
|
if (buf[0] == ';') /* comment line */
|
||||||
|
continue;
|
||||||
|
else {
|
||||||
|
/* get header word from jisho */
|
||||||
|
startstr = buf;
|
||||||
|
if(!(endstr = utfutf(startstr, "\t"))) break;
|
||||||
|
*endstr = '\0';
|
||||||
|
/* dicitem includes each header word from the jisho */
|
||||||
|
|
||||||
|
dicitem = (DicList*)malloc(sizeof(DicList)+(endstr-startstr+1));
|
||||||
|
dicitem->nextitem = 0; /* for a future extension */
|
||||||
|
strcpy(dicitem->kanahead, startstr);
|
||||||
|
|
||||||
|
dicitem->kouho = getKouhoFile(dicitem, endstr); /* read kouho from jisho */
|
||||||
|
addHash(dic->dhash, dicitem);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
dic->dlist = dicitem;
|
||||||
|
Bterm(f);
|
||||||
|
return dic;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* free dynamically allocated memory
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
freeQDIC(Dictionary *dic)
|
||||||
|
{
|
||||||
|
Hash *hash1, *hash2;
|
||||||
|
DicList *dlist, *dlist2;
|
||||||
|
int l;
|
||||||
|
|
||||||
|
for (dlist = dic->dlist;
|
||||||
|
dlist != 0;
|
||||||
|
dlist2 = dlist, dlist = dlist->nextitem, free((void *)dlist2));
|
||||||
|
for (l = 0; l < HASHSIZE; l++) {
|
||||||
|
for (hash1 = dic->dhash[l]; hash1; hash1 = hash2) {
|
||||||
|
if (hash1->next !=0) {
|
||||||
|
hash2 = hash1->next;
|
||||||
|
free((void *)hash1);
|
||||||
|
}else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
free((void *)dic);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
hashVal(char *s)
|
||||||
|
{
|
||||||
|
uint h;
|
||||||
|
|
||||||
|
h = 0x811c9dc5;
|
||||||
|
while(*s != 0)
|
||||||
|
h = (h^(uchar)*s++) * 0x1000193;
|
||||||
|
return h % HASHSIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
addHash(Hash **hash, DicList *ditem)
|
||||||
|
{
|
||||||
|
Hash *h;
|
||||||
|
int v;
|
||||||
|
|
||||||
|
v = hashVal(ditem->kanahead);
|
||||||
|
h = (Hash*)malloc(sizeof(Hash));
|
||||||
|
h->dicindex = ditem;
|
||||||
|
h->length = strlen(ditem->kanahead);
|
||||||
|
h->next = hash[v];
|
||||||
|
hash[v] = h;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* read Kouho list from the jisho file defined by Biobuf descriptor f
|
||||||
|
*
|
||||||
|
* revised for Plan 9 by K.Okamoto
|
||||||
|
*/
|
||||||
|
KouhoList *
|
||||||
|
getKouhoFile(DicList *dicitem, char * endstr)
|
||||||
|
{
|
||||||
|
char *kouhostart, *kouhoend;
|
||||||
|
KouhoList *kouhoitem, *currntkouhoitem=0, *prevkouhoitem;
|
||||||
|
|
||||||
|
prevkouhoitem = 0;
|
||||||
|
kouhostart = endstr + 1;
|
||||||
|
while((kouhoend = utfutf(kouhostart, " ")) ||
|
||||||
|
(kouhoend = utfutf(kouhostart, "\n"))) {
|
||||||
|
*kouhoend = '\0';
|
||||||
|
|
||||||
|
kouhoitem = (KouhoList*)malloc(sizeof(KouhoList)+(kouhoend-kouhostart+1));
|
||||||
|
kouhoitem->nextkouho = 0;
|
||||||
|
kouhoitem->prevkouho = prevkouhoitem;
|
||||||
|
kouhoitem->dicitem = dicitem;
|
||||||
|
strcpy(kouhoitem->kouhotop, kouhostart);
|
||||||
|
if (prevkouhoitem)
|
||||||
|
prevkouhoitem->nextkouho = kouhoitem;
|
||||||
|
else
|
||||||
|
currntkouhoitem = kouhoitem;
|
||||||
|
prevkouhoitem = kouhoitem;
|
||||||
|
kouhostart = kouhoend + 1;
|
||||||
|
}
|
||||||
|
return currntkouhoitem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* get matched kouho from the hash table of header word of the dict
|
||||||
|
* if found, returns pointer to the first candidate in the hash table.
|
||||||
|
* if not found, returns 0.
|
||||||
|
*
|
||||||
|
* from getCand() in skklib.c by Akinori Ito et al.,(aito@ei5sun.yz.yamagata-u.ac.jp)
|
||||||
|
*/
|
||||||
|
KouhoList *
|
||||||
|
getKouhoHash(Dictionary *dic, char *s)
|
||||||
|
{
|
||||||
|
int l, v;
|
||||||
|
Hash *h;
|
||||||
|
|
||||||
|
l = strlen(s);
|
||||||
|
v = hashVal(s);
|
||||||
|
for (h = dic->dhash[v]; h != 0; h = h->next) {
|
||||||
|
if (h->length != l ||
|
||||||
|
strcmp(h->dicindex->kanahead, s)) continue;
|
||||||
|
return h->dicindex->kouho; /* return matched kouho */
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* from skklib.c by Akinori Ito et al.,(aito@ei5sun.yz.yamagata-u.ac.jp)
|
||||||
|
* just modified to read easier for current purpose
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
selectKouho(KouhoList **first, KouhoList *current)
|
||||||
|
{
|
||||||
|
/* take off currentkouho from the kouholist table */
|
||||||
|
if (current->prevkouho) {
|
||||||
|
current->prevkouho->nextkouho = current->nextkouho;
|
||||||
|
if (current->nextkouho)
|
||||||
|
current->nextkouho->prevkouho = current->prevkouho;
|
||||||
|
current->prevkouho = 0;
|
||||||
|
}
|
||||||
|
/* take place of firstkouho by currentkouho */
|
||||||
|
if (*first != current) {
|
||||||
|
(*first)->prevkouho = current;
|
||||||
|
current->nextkouho = *first;
|
||||||
|
*first = current;
|
||||||
|
}
|
||||||
|
}
|
41
sys/src/cmd/ktrans/jisho.h
Normal file
41
sys/src/cmd/ktrans/jisho.h
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Kenji Okamoto August 4, 2000
|
||||||
|
* Osaka Prefecture Univ.
|
||||||
|
* okamoto@granite.cias.osakafu-u.ac.jp
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define HASHSIZE 257
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Structure for Dictionary's header word (in Hiragana)
|
||||||
|
*/
|
||||||
|
typedef struct DicList DicList;
|
||||||
|
struct DicList {
|
||||||
|
struct KouhoList *kouho;
|
||||||
|
struct DicList *nextitem; /* for a future extension */
|
||||||
|
char kanahead[1];
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Structure for Kouho of each index word in the dictionary
|
||||||
|
*/
|
||||||
|
typedef struct KouhoList KouhoList;
|
||||||
|
struct KouhoList {
|
||||||
|
struct KouhoList *nextkouho;
|
||||||
|
struct KouhoList *prevkouho;
|
||||||
|
struct DicList *dicitem;
|
||||||
|
char kouhotop[1]; /* top of the kouhos */
|
||||||
|
} ;
|
||||||
|
|
||||||
|
typedef struct Hash Hash;
|
||||||
|
struct Hash {
|
||||||
|
DicList *dicindex; /* pointer to a KouhoList and kanahead etc */
|
||||||
|
short length;
|
||||||
|
struct Hash *next;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct Dictionary Dictionary;
|
||||||
|
struct Dictionary {
|
||||||
|
DicList *dlist; /* for a future extension, having more than one dictionaries */
|
||||||
|
Hash *dhash[HASHSIZE];
|
||||||
|
};
|
2865
sys/src/cmd/ktrans/ktrans.h
Normal file
2865
sys/src/cmd/ktrans/ktrans.h
Normal file
File diff suppressed because it is too large
Load diff
471
sys/src/cmd/ktrans/main.c
Normal file
471
sys/src/cmd/ktrans/main.c
Normal file
|
@ -0,0 +1,471 @@
|
||||||
|
/*
|
||||||
|
* Mostly based on the original source codes of Plan 9 release 2
|
||||||
|
* distribution.
|
||||||
|
* by Kenji Okamoto, August 4 2000
|
||||||
|
* Osaka Prefecture Univ.
|
||||||
|
* okamoto@granite.cias.osakafu-u.ac.jp
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <u.h>
|
||||||
|
#include <libc.h>
|
||||||
|
#include <bio.h>
|
||||||
|
#include "ktrans.h"
|
||||||
|
#include "jisho.h"
|
||||||
|
|
||||||
|
#define LSIZE 256
|
||||||
|
|
||||||
|
Rune lbuf[LSIZE]; /* hiragana buffer for key input written by send() */
|
||||||
|
Map *table = hira; /* default language conversion table */
|
||||||
|
uchar okurigana[LSIZE]; /* buffer for okurigana */
|
||||||
|
char okuri = 0; /* buffer/flag for capital input char */
|
||||||
|
int in, out;
|
||||||
|
int llen, olen, joshi = 0;
|
||||||
|
int natural = 1; /* not Japanese but English mode */
|
||||||
|
|
||||||
|
int changelang(int);
|
||||||
|
int dotrans(Dictionary*);
|
||||||
|
int nrune(char *);
|
||||||
|
void send(uchar *, int);
|
||||||
|
Map *match(uchar *p, int *nc, Map *table);
|
||||||
|
|
||||||
|
extern Dictionary *openQDIC(char *);
|
||||||
|
extern KouhoList *getKouhoHash(Dictionary*, char *);
|
||||||
|
extern KouhoList *getKouhoFile(DicList*, char *);
|
||||||
|
extern void freeQDIC(Dictionary*);
|
||||||
|
extern void selectKouho(KouhoList **, KouhoList*);
|
||||||
|
|
||||||
|
void
|
||||||
|
kbdopen(void)
|
||||||
|
{
|
||||||
|
int n, kinfd, koutfd, fd[2];
|
||||||
|
char buf[128];
|
||||||
|
int kbd;
|
||||||
|
|
||||||
|
kbd = 1;
|
||||||
|
if((kinfd = open("/dev/kbd", OREAD)) < 0){
|
||||||
|
kbd = 0;
|
||||||
|
if((kinfd = open("/dev/cons", OREAD)) < 0)
|
||||||
|
sysfatal("open kbd: %r");
|
||||||
|
}
|
||||||
|
if(bind("#|", "/n/temp", MREPL) < 0)
|
||||||
|
sysfatal("bind /n/temp: %r");
|
||||||
|
if((koutfd = open("/n/temp/data1", OWRITE)) < 0)
|
||||||
|
sysfatal("open kbd pipe: %r");
|
||||||
|
if(bind("/n/temp/data", kbd? "/dev/kbd": "/dev/cons", MREPL) < 0)
|
||||||
|
sysfatal("bind kbd pipe: %r");
|
||||||
|
unmount(nil, "/n/temp");
|
||||||
|
if(!kbd){
|
||||||
|
in = kinfd;
|
||||||
|
out = koutfd;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(pipe(fd) < 0)
|
||||||
|
sysfatal("pipe: %r");
|
||||||
|
if(fork()){
|
||||||
|
in = out = fd[0];
|
||||||
|
close(fd[1]);
|
||||||
|
close(kinfd);
|
||||||
|
close(koutfd);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
close(fd[0]);
|
||||||
|
if(fork()){
|
||||||
|
Biobuf b;
|
||||||
|
long r;
|
||||||
|
|
||||||
|
Binit(&b, fd[1], OREAD);
|
||||||
|
while((r = Bgetrune(&b)) >= 0){
|
||||||
|
n = snprint(buf, sizeof(buf), "c%C", (Rune)r)+1;
|
||||||
|
write(koutfd, buf, n); /* pass on result */
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
while((n = read(kinfd, buf, sizeof(buf))) > 0){
|
||||||
|
buf[n-1] = 0;
|
||||||
|
if(n < 2 || buf[0] != 'c')
|
||||||
|
write(koutfd, buf, n); /* pass on */
|
||||||
|
else
|
||||||
|
write(fd[1], buf+1, n-2); /* to translator */
|
||||||
|
}
|
||||||
|
exits(nil);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
usage(void)
|
||||||
|
{
|
||||||
|
fprint(2, "usage: %s\n", argv0);
|
||||||
|
exits("usage");
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
|
||||||
|
uchar *bp, *ep, buf[128];
|
||||||
|
Map *mp;
|
||||||
|
int nchar, wantmore;
|
||||||
|
int n, c;
|
||||||
|
char *dictname;
|
||||||
|
Dictionary *jisho;
|
||||||
|
|
||||||
|
ARGBEGIN{
|
||||||
|
default: usage();
|
||||||
|
}ARGEND;
|
||||||
|
if(argc != 0)
|
||||||
|
usage();
|
||||||
|
|
||||||
|
if((dictname = getenv("jisho")) == nil)
|
||||||
|
dictname = "/lib/kanji.jisho";
|
||||||
|
jisho = openQDIC(dictname);
|
||||||
|
|
||||||
|
kbdopen();
|
||||||
|
if(fork())
|
||||||
|
exits(nil); /* parent process will exit */
|
||||||
|
|
||||||
|
bp = ep = buf;
|
||||||
|
wantmore = 0;
|
||||||
|
for (;;) { /* key board input loop */
|
||||||
|
getmore:
|
||||||
|
if (bp>=ep || wantmore) {
|
||||||
|
if (wantmore==0)
|
||||||
|
bp = ep = buf; /* clear all */
|
||||||
|
n = read(in, ep, &buf[sizeof(buf)]-ep);
|
||||||
|
if (n<=0)
|
||||||
|
exits("");
|
||||||
|
ep += n;
|
||||||
|
*ep = '\0';
|
||||||
|
}
|
||||||
|
while (bp<ep) { /* there are input data */
|
||||||
|
if (table == hira && natural != 1 && (*bp>'A' && *bp<='Z') && ep-bp<2
|
||||||
|
&& !strchr("EIOU", *bp)) {
|
||||||
|
wantmore = 1;
|
||||||
|
goto getmore;
|
||||||
|
}
|
||||||
|
if (!fullrune((char *)bp, ep-bp)) { /* not enough length of input */
|
||||||
|
wantmore = 1;
|
||||||
|
goto getmore;
|
||||||
|
}
|
||||||
|
wantmore = 0;
|
||||||
|
|
||||||
|
if (*bp=='') { /* ^x read ktrans-jisho once more */
|
||||||
|
freeQDIC(jisho);
|
||||||
|
jisho = openQDIC(dictname);
|
||||||
|
llen = 0;
|
||||||
|
olen = okuri = joshi = 0;
|
||||||
|
wantmore=0;
|
||||||
|
bp=ep=buf;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (*bp=='') { /* ^\ (start translation command) */
|
||||||
|
c = dotrans(jisho);
|
||||||
|
if (c)
|
||||||
|
*bp = c; /* pointer to translated rune */
|
||||||
|
else
|
||||||
|
bp++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (*bp=='') { /* ^l (no translate command) */
|
||||||
|
bp++;
|
||||||
|
llen = 0;
|
||||||
|
olen = okuri = joshi = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (changelang(*bp)) { /* change language mode OK */
|
||||||
|
bp++;
|
||||||
|
olen = okuri = joshi = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (natural || *bp<=' ' || *bp>='{') { /* English mode but not ascii */
|
||||||
|
Rune r;
|
||||||
|
int rlen = chartorune(&r, (char *)bp);
|
||||||
|
send(bp, rlen); /* write bp to /dev/cons */
|
||||||
|
bp += rlen;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (table == hira && (*bp >= 'A' && *bp <= 'Z') && (*(bp+1) < 'A'
|
||||||
|
|| *(bp+1) > 'Z')) {
|
||||||
|
*bp = okuri = tolower(*bp);
|
||||||
|
joshi = olen = 0;
|
||||||
|
} else if (table == hira && (*bp >= 'A' && *bp <= 'Z') &&
|
||||||
|
(*(bp+1) >= 'A' && *(bp+1) <= 'Z')) {
|
||||||
|
*bp = okuri = tolower(*bp);
|
||||||
|
*(bp+1) = tolower(*(bp+1));
|
||||||
|
joshi = 1;
|
||||||
|
olen = 0;
|
||||||
|
}
|
||||||
|
mp = match(bp, &nchar, table);
|
||||||
|
if (mp == 0) {
|
||||||
|
if (nchar>0) { /* match, longer possible */
|
||||||
|
wantmore++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
send(bp++, 1); /* alphabet in kana mode */
|
||||||
|
} else {
|
||||||
|
send((uchar*)mp->kana, strlen(mp->kana));
|
||||||
|
bp += nchar;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
min(int a, int b)
|
||||||
|
{
|
||||||
|
return a<b? a: b;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* send UTF string (p) with length (n) to stdout
|
||||||
|
* and write rune (r) in global lbuf[] buffer
|
||||||
|
* or okurigana[] buffer if okuri (verb or joshi) mode
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
send(uchar *p, int n)
|
||||||
|
{
|
||||||
|
Rune r;
|
||||||
|
uchar *ep;
|
||||||
|
|
||||||
|
if (write(out, (char*)p, n) != n)
|
||||||
|
sysfatal("write: %r");
|
||||||
|
|
||||||
|
if (llen>LSIZE-64) {
|
||||||
|
memmove((char*)lbuf, (char*)lbuf+64, 64*sizeof(Rune));
|
||||||
|
llen -= 64;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (table!=hira || natural)
|
||||||
|
return;
|
||||||
|
|
||||||
|
ep = p+n;
|
||||||
|
if(okuri)
|
||||||
|
while (olen<LSIZE && p<ep)
|
||||||
|
okurigana[olen++] = *p++;
|
||||||
|
else
|
||||||
|
while (llen<LSIZE && p<ep) {
|
||||||
|
p += chartorune(&r, (char*)p);
|
||||||
|
if (r=='\b') {
|
||||||
|
if (llen>0)
|
||||||
|
llen--;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (r==0x80) /* ignore view key */
|
||||||
|
continue;
|
||||||
|
lbuf[llen++] = r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Romaji to Hiragana/Katakana conversion
|
||||||
|
* romaji shoud be input as small letter
|
||||||
|
* returns the matched address in table, hira, kata, etc.
|
||||||
|
* nc: number of character (return value)
|
||||||
|
*/
|
||||||
|
Map *
|
||||||
|
match(uchar *p, int *nc, Map *table)
|
||||||
|
{
|
||||||
|
register Map *longp = 0, *kp;
|
||||||
|
static char last;
|
||||||
|
int longest = 0;
|
||||||
|
|
||||||
|
*nc = -1;
|
||||||
|
for (kp=table; kp->roma; kp++) {
|
||||||
|
if (*p == *kp->roma) {
|
||||||
|
int lr = strlen(kp->roma);
|
||||||
|
int len = min(lr, strlen((char *)p));
|
||||||
|
if (strncmp(kp->roma, (char *)p, len)==0) {
|
||||||
|
if (len<lr) {
|
||||||
|
*nc = 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (len>longest) {
|
||||||
|
longest = len;
|
||||||
|
longp = kp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (longp) {
|
||||||
|
last = longp->roma[longest-1];
|
||||||
|
*nc = longp->advance;
|
||||||
|
}
|
||||||
|
return longp;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
changelang(int c)
|
||||||
|
{
|
||||||
|
switch(c){
|
||||||
|
case '': /* ^t (English mode) */
|
||||||
|
natural = 1;
|
||||||
|
llen = 0;
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '': /* ^n (Japanese hiragana mode ) */
|
||||||
|
natural = 0;
|
||||||
|
table = hira;
|
||||||
|
llen = 0;
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '': /* ^k (Japanese katakana mode) */
|
||||||
|
natural = 0;
|
||||||
|
table = kata;
|
||||||
|
llen = 0;
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '': /* ^r (Russian mode) */
|
||||||
|
natural = 0;
|
||||||
|
table = cyril;
|
||||||
|
llen = 0;
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '': /* ^o (Greek mode) */
|
||||||
|
natural = 0;
|
||||||
|
table = greek;
|
||||||
|
llen = 0;
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '': /* ^s (Korean mode) */
|
||||||
|
natural = 0;
|
||||||
|
table = hangul;
|
||||||
|
llen = 0;
|
||||||
|
return 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* write translated kanji runes to stdout and return last character
|
||||||
|
* if it's not ctl-\. if the last is ctl-\, proceed with
|
||||||
|
* translation of the next kouho
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
dotrans(Dictionary *dic)
|
||||||
|
{
|
||||||
|
Rune *res, r[1];
|
||||||
|
char v[1024], *p, tbuf[64], hirabuf[64];
|
||||||
|
int j, lastlen, nokouho = 0;
|
||||||
|
char ch;
|
||||||
|
KouhoList *fstkouho, *currentkouho;
|
||||||
|
|
||||||
|
if (llen==0)
|
||||||
|
return 0; /* don't use kanji transform function */
|
||||||
|
if (okuri && joshi != 1) {
|
||||||
|
lbuf[llen++] = (Rune)okuri;
|
||||||
|
lbuf[llen] = 0;
|
||||||
|
}else
|
||||||
|
lbuf[llen] = 0;
|
||||||
|
okurigana[olen] = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* search the matched index for the key word in the dict hash table, and
|
||||||
|
* return a pointer to the matched kouho, 0 otherwise.
|
||||||
|
*/
|
||||||
|
res = lbuf;
|
||||||
|
for (j=0; *res != L'\0'; j += runetochar(v+j, res++))
|
||||||
|
;
|
||||||
|
v[j] = '\0';
|
||||||
|
strcpy(tbuf, v);
|
||||||
|
strcpy(hirabuf, v); /* to remember the initial hiragana input */
|
||||||
|
|
||||||
|
if (okuri && joshi != 1) /* verb mode */
|
||||||
|
hirabuf[strlen(hirabuf) - 1] = '\0';
|
||||||
|
|
||||||
|
if(!(fstkouho = getKouhoHash(dic, v))) { /* not found */
|
||||||
|
llen = olen = okuri = joshi = 0;
|
||||||
|
okurigana[0] = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
currentkouho = fstkouho;
|
||||||
|
for(;;) {
|
||||||
|
p = currentkouho->kouhotop; /* p to the head of kanji kouho array */
|
||||||
|
lastlen = nrune(tbuf); /* number of rune chars */
|
||||||
|
|
||||||
|
if (okuri && joshi != 1) /* verb mode */
|
||||||
|
for (j=0; j<lastlen-1; j++)
|
||||||
|
write(out, "\b", 1); /* clear hiragana input */
|
||||||
|
else
|
||||||
|
for (j=0; j<lastlen; j++)
|
||||||
|
write(out, "\b", 1); /* clear hiragana input */
|
||||||
|
|
||||||
|
if (okuri) {
|
||||||
|
lastlen = nrune((char *)okurigana);
|
||||||
|
for (j=0; j<lastlen; j++)
|
||||||
|
write(out, "\b", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
write(out, p, strlen(p)); /* write kanji to stdout */
|
||||||
|
if (okuri)
|
||||||
|
write(out, (char *)okurigana, olen);
|
||||||
|
|
||||||
|
if (read(in, &ch, 1)<=0) /* read from stdin */
|
||||||
|
exits(nil);
|
||||||
|
|
||||||
|
if (ch == '') { /* if next input is ^\, once again */
|
||||||
|
if(currentkouho->nextkouho != 0) { /* have next kouho */
|
||||||
|
nokouho = 0;
|
||||||
|
strcpy(tbuf, p);
|
||||||
|
currentkouho = currentkouho->nextkouho;
|
||||||
|
|
||||||
|
if (okuri && joshi != 1) /* verb mode */
|
||||||
|
for (j=0; j<nrune(tbuf); j++)
|
||||||
|
write(out, "\b", 1);
|
||||||
|
continue;
|
||||||
|
} else { /* the last kouho */
|
||||||
|
if (okuri) {
|
||||||
|
lastlen = nrune((char *)okurigana);
|
||||||
|
for (j=0; j<lastlen; j++)
|
||||||
|
write(out, "\b", 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (lastlen=0; *p != 0; p += j) {
|
||||||
|
j = chartorune(r, p);
|
||||||
|
lastlen++;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j=0; j<lastlen; j++)
|
||||||
|
write(out, "\b", 1);
|
||||||
|
|
||||||
|
if(hirabuf[0])
|
||||||
|
write(out, hirabuf, strlen(hirabuf));
|
||||||
|
|
||||||
|
if(okurigana[0])
|
||||||
|
write(out, (char *)okurigana, olen);
|
||||||
|
|
||||||
|
olen = okuri = joshi = 0;
|
||||||
|
okurigana[0] = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if(!nokouho) /* learn the previous use of the kouho */
|
||||||
|
selectKouho(&(fstkouho->dicitem->kouho), currentkouho);
|
||||||
|
|
||||||
|
olen = okuri = joshi = 0;
|
||||||
|
okurigana[0] = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
llen = 0;
|
||||||
|
return ch;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* returns the number of characters in the pointed Rune
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
nrune(char *p)
|
||||||
|
{
|
||||||
|
int n = 0;
|
||||||
|
Rune r;
|
||||||
|
|
||||||
|
while (*p) {
|
||||||
|
p += chartorune(&r, p);
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
10
sys/src/cmd/ktrans/mkfile
Normal file
10
sys/src/cmd/ktrans/mkfile
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
</$objtype/mkfile
|
||||||
|
|
||||||
|
BIN=/$objtype/bin
|
||||||
|
TARG=ktrans
|
||||||
|
HFILES=jisho.h ktrans.h
|
||||||
|
OFILES=\
|
||||||
|
main.$O\
|
||||||
|
jisho.$O
|
||||||
|
|
||||||
|
</sys/src/cmd/mkone
|
Loading…
Reference in a new issue