Kouhei Sutou
null+****@clear*****
Thu Feb 6 15:46:41 JST 2014
Kouhei Sutou 2014-02-06 15:46:41 +0900 (Thu, 06 Feb 2014) New Revision: a681fc81be2958e5ca75c469d44ce35489be1dd1 https://github.com/droonga/fluent-plugin-droonga/commit/a681fc81be2958e5ca75c469d44ce35489be1dd1 Message: Add experimental MeCab filter It can be enabled by running fluentd with "DROONGA_ENABLE_SEARCH_MECAB_FILTER=yes" environment variable. Added files: lib/droonga/searcher/mecab_filter.rb Modified files: lib/droonga/searcher.rb Modified: lib/droonga/searcher.rb (+4 -0) =================================================================== --- lib/droonga/searcher.rb 2014-02-06 15:44:38 +0900 (15fa084) +++ lib/droonga/searcher.rb 2014-02-06 15:46:41 +0900 (b5aaddd) @@ -641,3 +641,7 @@ module Droonga end end end + +if ENV["DROONGA_ENABLE_SEARCH_MECAB_FILTER"] == "yes" + require "droonga/searcher/mecab_filter" +end Added: lib/droonga/searcher/mecab_filter.rb (+67 -0) 100644 =================================================================== --- /dev/null +++ lib/droonga/searcher/mecab_filter.rb 2014-02-06 15:46:41 +0900 (b10fd66) @@ -0,0 +1,67 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2014 Droonga Project +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License version 2.1 as published by the Free Software Foundation. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +require "MeCab" + +module Droonga + class Searcher + class QuerySearcher + class MeCabTokenizer + def initialize + @mecab = MeCab::Tagger.new("-Owakati") + end + + def tokenize(text) + tokens =****@mecab*****(text).force_encoding("utf-8").split(/\s+/) + tokens.reject do |token| + token.empty? + end + end + end + + def apply_mecab_filter(condition) + return unless condition.is_a?(Hash) + return unless condition["useMeCabFilter"] + query = condition["query"] + return if query.nil? + match_columns = condition["matchTo"] + return unless match_columns.is_a?(Array) + return if match_columns.size != 1 + match_column = match_columns.first + + tokenizer = MeCabTokenizer.new + + @records.open_cursor do |cursor| + count = 0 + cursor.each do |record| + match_target = record[match_column] + body_terms = tokenizer.tokenize(match_target) + unless body_terms.include?(query) + record.delete + end + end + end + end + + alias_method :original_apply_condition!, :apply_condition! + def apply_condition!(condition) + original_apply_condition!(condition) + apply_mecab_filter(condition) + end + end + end +end -------------- next part -------------- HTML����������������������������...Download