Kouhei Sutou
null+****@clear*****
Fri Apr 4 18:55:23 JST 2014
Kouhei Sutou 2014-04-04 18:55:23 +0900 (Fri, 04 Apr 2014) New Revision: be6801e28e6e8724bb1b8e8160fafb5aa07f0312 https://github.com/droonga/wikipedia-search/commit/be6801e28e6e8724bb1b8e8160fafb5aa07f0312 Message: Add schema and indexes for Groonga and use it Added files: config/groonga/indexes.grn config/groonga/schema.grn Modified files: lib/wikipedia-search/task.rb Added: config/groonga/indexes.grn (+7 -0) 100644 =================================================================== --- /dev/null +++ config/groonga/indexes.grn 2014-04-04 18:55:23 +0900 (8f87259) @@ -0,0 +1,7 @@ +column_create Categories pages_categories COLUMN_INDEX Pages categories + +table_create Terms TABLE_PAT_KEY ShortText \ + --default_tokenizer TokenBigram \ + --normalizer NormalizerAuto +column_create Terms pages COLUMN_INDEX|WITH_SECTION|WITH_POSITION \ + Pages title,text Added: config/groonga/schema.grn (+6 -0) 100644 =================================================================== --- /dev/null +++ config/groonga/schema.grn 2014-04-04 18:55:23 +0900 (9440707) @@ -0,0 +1,6 @@ +table_create Categories TABLE_HASH_KEY ShortText + +table_create Pages TABLE_HASH_KEY UInt64 +column_create Pages title COLUMN_SCALAR ShortText +column_create Pages text COLUMN_SCALAR Text +column_create Pages categories COLUMN_VECTOR Categories Modified: lib/wikipedia-search/task.rb (+48 -0) =================================================================== --- lib/wikipedia-search/task.rb 2014-04-04 18:39:54 +0900 (d0e053f) +++ lib/wikipedia-search/task.rb 2014-04-04 18:55:23 +0900 (3b10bde) @@ -14,6 +14,7 @@ module WikipediaSearch def define define_data_tasks + define_groonga_tasks end private @@ -93,6 +94,33 @@ module WikipediaSearch end end + def define_groonga_tasks + namespace :groonga do + desc "Load data." + task :load do + rm_rf(groonga_database_dir_path.to_s) + mkdir_p(groonga_database_dir_path.to_s) + groonga_run(groonga_schema_path.to_s) + groonga_run(ja_groonga_pages_path.to_s.to_s) + groonga_run(groonga_indexes_path.to_s) + end + end + end + + def groonga_run(input) + command_line = [ + "groonga", + "--log-path", (groonga_database_dir_path + "groonga.log").to_s, + "--query-log-path", (groonga_database_dir_path + "query.log").to_s, + "--file", input, + ] + unless groonga_database_path.exist? + command_line << "-n" + end + command_line << groonga_database_path.to_s + sh(*command_line) + end + def download_base_url(language) "http://dumps.wikimedia.org/#{language}wiki/latest" end @@ -128,5 +156,25 @@ module WikipediaSearch def ja_titles_base_name "jawiki-latest-all-titles.gz" end + + def config_dir + Pathname.new("config") + end + + def groonga_schema_path + config_dir + "groonga" + "schema.grn" + end + + def groonga_indexes_path + config_dir + "groonga" + "indexes.grn" + end + + def groonga_database_dir_path + data_dir_path + "groonga" + end + + def groonga_database_path + groonga_database_dir_path + "db" + end end end -------------- next part -------------- HTML����������������������������... Download