null+****@clear*****
null+****@clear*****
2012年 5月 30日 (水) 10:02:53 JST
Susumu Yata 2012-05-30 10:02:53 +0900 (Wed, 30 May 2012)
New Revision: 0a6b50d9e250f4829a18faa0f8061f290d6574df
Log:
Update TokenKytea for KyTea version 0.4.2.
Modified files:
plugins/tokenizers/kytea.cpp
Modified: plugins/tokenizers/kytea.cpp (+4 -2)
===================================================================
--- plugins/tokenizers/kytea.cpp 2012-05-30 00:00:35 +0900 (58efb58)
+++ plugins/tokenizers/kytea.cpp 2012-05-30 10:02:53 +0900 (2e12f9b)
@@ -187,7 +187,9 @@ grn_obj *grn_kytea_init(grn_ctx *ctx, int num_args, grn_obj **args,
grn_plugin_mutex_lock(ctx, kytea_mutex);
try {
const std::string str(query->ptr, query->length);
- tokenizer->sentence = kytea::KyteaSentence(kytea_util->mapString(str));
+ const kytea::KyteaString &surface_str = kytea_util->mapString(str);
+ const kytea::KyteaString &normalized_str = kytea_util->normalize(surface_str);
+ tokenizer->sentence = kytea::KyteaSentence(surface_str, normalized_str);
kytea_tagger->calculateWS(tokenizer->sentence);
} catch (...) {
grn_plugin_mutex_unlock(ctx, kytea_mutex);
@@ -200,7 +202,7 @@ grn_obj *grn_kytea_init(grn_ctx *ctx, int num_args, grn_obj **args,
try {
for (std::size_t i = 0; i < tokenizer->sentence.words.size(); ++i) {
const std::string &token =
- kytea_util->showString(tokenizer->sentence.words[i].surf);
+ kytea_util->showString(tokenizer->sentence.words[i].surface);
const char *ptr = token.c_str();
unsigned int left = static_cast<unsigned int>(token.length());
while (left > 0) {