null+****@clear*****
null+****@clear*****
2011年 5月 6日 (金) 15:46:03 JST
Kouhei Sutou 2011-05-06 06:46:03 +0000 (Fri, 06 May 2011)
New Revision: e8ed74bcb318c2e3b3f5cf2df480e342a77fe842
Log:
[example][dictionary] support reading extraction for edict.
Modified files:
examples/dictionary/edict/edict2grn.rb
Modified: examples/dictionary/edict/edict2grn.rb (+12 -2)
===================================================================
--- examples/dictionary/edict/edict2grn.rb 2011-05-06 06:27:42 +0000 (79e0713)
+++ examples/dictionary/edict/edict2grn.rb 2011-05-06 06:46:03 +0000 (3fe23f6)
@@ -3,6 +3,7 @@
$KCODE = 'u'
+require 'English'
require 'kconv'
class String
@@ -35,12 +36,21 @@ puts <<END
column_create item_dictionary edict_desc COLUMN_SCALAR ShortText
column_create bigram item_dictionary_edict_desc COLUMN_INDEX|WITH_POSITION item_dictionary edict_desc
load --table item_dictionary
-[["_key","edict_desc"],
+[["_key","edict_desc","kana"],
END
while !STDIN.eof?
line = Kconv.toutf8(gets.strip)
key, body = line.split('/', 2)
- puts [key, body].to_json
+ key = key.strip
+ if /\s*\[(.+)\]\z/ =~ key
+ key = $PREMATCH
+ reading = $1
+ body = "[#{reading}] #{body}"
+ kana = NKF.nkf("-Ww --katakana", reading)
+ else
+ kana = NKF.nkf("-Ww --katakana", key)
+ end
+ puts [key, body, kana].to_json
end
puts ']'