[Groonga-commit] groonga/wikipedia-search at afb6384 [master] Add InnoDB FTS + MeCab

Back to archive index

Kouhei Sutou null+****@clear*****
Tue Feb 9 00:29:03 JST 2016


Kouhei Sutou	2016-02-09 00:29:03 +0900 (Tue, 09 Feb 2016)

  New Revision: afb6384e3a60bbf37616246594d53ab2f85ab59d
  https://github.com/groonga/wikipedia-search/commit/afb6384e3a60bbf37616246594d53ab2f85ab59d

  Message:
    Add InnoDB FTS + MeCab

  Copied files:
    config/sql/indexes.innodb.mecab.sql
      (from config/sql/indexes.innodb.sql)
  Modified files:
    benchmark/centos7/mysql.sh
  Renamed files:
    config/sql/indexes.innodb.ngram.sql
      (from config/sql/indexes.innodb.sql)

  Modified: benchmark/centos7/mysql.sh (+95 -24)
===================================================================
--- benchmark/centos7/mysql.sh    2016-02-08 19:39:03 +0900 (18545c9)
+++ benchmark/centos7/mysql.sh    2016-02-09 00:29:03 +0900 (a89da9e)
@@ -15,7 +15,8 @@ data_dir="${base_dir}/data/csv"
 benchmark_dir="${base_dir}/benchmark"
 
 mroonga_db="benchmark_mroonga"
-innodb_db="benchmark_innodb"
+innodb_ngram_db="benchmark_innodb_ngram"
+innodb_mecab_db="benchmark_innodb_mecab"
 
 run()
 {
@@ -79,16 +80,27 @@ install_groonga_tokenizer_mecab()
 install_mroonga()
 {
   run sudo yum install -y mysql57-community-mroonga
+}
+
+setup_mysql()
+{
   echo "log-bin" | run sudo tee --append /etc/my.cnf
   echo "server-id=1" | run sudo tee --append /etc/my.cnf
   echo "character-set-server=utf8mb4" | run sudo tee --append /etc/my.cnf
   echo "validate-password=off" | run sudo tee --append /etc/my.cnf
+  echo "loose-mecab-rc-file=/usr/lib64/mysql/mecab/etc/mecabrc" | \
+    run sudo tee --append /etc/my.cnf
+  echo "innodb-ft-min-token-size=1" | \
+    run sudo tee --append /etc/my.cnf
+  echo "dicdir = /usr/lib64/mysql/mecab/dic/ipadic_utf-8" | \
+    run sudo tee /usr/lib64/mysql/mecab/etc/mecabrc
   run sudo systemctl start mysqld
   tmp_password=$(sudo grep 'A temporary password' /var/log/mysqld.log | \
                     sed -e 's/^.*: //' | tail -1)
   run sudo mysql -u root "-p${tmp_password}" \
       --connect-expired-password \
       -e "ALTER USER user() IDENTIFIED BY ''; CREATE USER root@'%'; GRANT ALL ON *.* TO root@'%' WITH GRANT OPTION"
+  run sudo mysql -u root -e "INSTALL PLUGIN mecab SONAME 'libpluginmecab.so'"
 }
 
 setup_benchmark_db_mroonga()
@@ -97,16 +109,23 @@ setup_benchmark_db_mroonga()
   run mysql -u root -e "CREATE DATABASE ${mroonga_db}"
 }
 
-setup_benchmark_db_innodb()
+setup_benchmark_db_innodb_ngram()
+{
+  run mysql -u root -e "DROP DATABASE IF EXISTS ${innodb_ngram_db}"
+  run mysql -u root -e "CREATE DATABASE ${innodb_ngram_db}"
+}
+
+setup_benchmark_db_innodb_mecab()
 {
-  run mysql -u root -e "DROP DATABASE IF EXISTS ${innodb_db}"
-  run mysql -u root -e "CREATE DATABASE ${innodb_db}"
+  run mysql -u root -e "DROP DATABASE IF EXISTS ${innodb_mecab_db}"
+  run mysql -u root -e "CREATE DATABASE ${innodb_mecab_db}"
 }
 
 setup_benchmark_db()
 {
   setup_benchmark_db_mroonga
-  setup_benchmark_db_innodb
+  setup_benchmark_db_innodb_ngram
+  setup_benchmark_db_innodb_mecab
 }
 
 load_data_mroonga()
@@ -123,24 +142,39 @@ load_data_mroonga()
       sh -c "du -hsc /var/lib/mysql/${mroonga_db}.mrn*"
 }
 
-load_data_innodb()
+load_data_innodb_ngram()
 {
   run sudo -H systemctl restart mysqld
 
-  echo "InnoDB: data: load:"
-  run mysql -u root ${innodb_db} < \
+  echo "InnoDB: ngram: data: load:"
+  run mysql -u root ${innodb_ngram_db} < \
       "${config_dir}/schema.innodb.sql"
-  time mysql -u root ${innodb_db} \
+  time mysql -u root ${innodb_ngram_db} \
        -e "LOAD DATA LOCAL INFILE '${data_dir}/ja-all-pages.csv' INTO TABLE wikipedia FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"'"
-  echo "InnoDB: data: load: size:"
+  echo "InnoDB: ngram: data: load: size:"
   run sudo -u mysql -H \
-      sh -c "du -hsc /var/lib/mysql/${innodb_db}/*"
+      sh -c "du -hsc /var/lib/mysql/${innodb_ngram_db}/*"
+}
+
+load_data_innodb_mecab()
+{
+  run sudo -H systemctl restart mysqld
+
+  echo "InnoDB: mecab: data: load:"
+  run mysql -u root ${innodb_mecab_db} < \
+      "${config_dir}/schema.innodb.sql"
+  time mysql -u root ${innodb_mecab_db} \
+       -e "LOAD DATA LOCAL INFILE '${data_dir}/ja-all-pages.csv' INTO TABLE wikipedia FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"'"
+  echo "InnoDB: mecab: data: load: size:"
+  run sudo -u mysql -H \
+      sh -c "du -hsc /var/lib/mysql/${innodb_mecab_db}/*"
 }
 
 load_data()
 {
   load_data_mroonga
-  load_data_innodb
+  load_data_innodb_ngram
+  load_data_innodb_mecab
 }
 
 benchmark_create_index_mroonga()
@@ -161,20 +195,38 @@ benchmark_create_index_mroonga()
   done
 }
 
-benchmark_create_index_innodb()
+benchmark_create_index_innodb_ngram()
+{
+  run sudo -H systemctl restart mysqld
+
+  for i in $(seq ${n_create_index_tries}); do
+    echo "InnoDB: ngram: create index: ${i}:"
+    mysql -u root ${innodb_ngram_db} \
+          -e "ALTER TABLE DROP INDEX fulltext_index"
+    time mysql -u root ${innodb_ngram_db} < \
+         "${config_dir}/indexes.innodb.ngram.sql"
+    if [ ${i} -eq 1 ]; then
+      echo "InnoDB: ngram: create index: size:"
+      run sudo -u mysql -H \
+          sh -c "du -hsc /var/lib/mysql/${innodb_ngram_db}/*"
+    fi
+  done
+}
+
+benchmark_create_index_innodb_mecab()
 {
   run sudo -H systemctl restart mysqld
 
   for i in $(seq ${n_create_index_tries}); do
-    echo "InnoDB: create index: ${i}:"
-    mysql -u root ${innodb_db} \
+    echo "InnoDB: mecab: create index: ${i}:"
+    mysql -u root ${innodb_mecab_db} \
           -e "ALTER TABLE DROP INDEX fulltext_index"
-    time mysql -u root ${innodb_db} < \
-         "${config_dir}/indexes.innodb.sql"
+    time mysql -u root ${innodb_mecab_db} < \
+         "${config_dir}/indexes.innodb.mecab.sql"
     if [ ${i} -eq 1 ]; then
-      echo "InnoDB: create index: size:"
+      echo "InnoDB: mecab: create index: size:"
       run sudo -u mysql -H \
-          sh -c "du -hsc /var/lib/mysql/${innodb_db}/*"
+          sh -c "du -hsc /var/lib/mysql/${innodb_mecab_db}/*"
     fi
   done
 }
@@ -182,7 +234,8 @@ benchmark_create_index_innodb()
 benchmark_create_index()
 {
   benchmark_create_index_mroonga
-  benchmark_create_index_innodb
+  benchmark_create_index_innodb_ngram
+  benchmark_create_index_innodb_mecab
 }
 
 benchmark_search_mroonga()
@@ -200,7 +253,23 @@ benchmark_search_mroonga()
   done
 }
 
-benchmark_search_innodb()
+benchmark_search_innodb_ngram()
+{
+  run sudo -H systemctl restart mysqld
+
+  cat "${benchmark_dir}/search-words.list" | while read search_word; do
+    for i in $(seq ${n_search_tries}); do
+      query=$(echo ${search_word} | sed -e "s/ OR / /g")
+      where="MATCH(title, text) AGAINST('${query}' IN BOOLEAN MODE)"
+      echo "InnoDB: ngram: search: ${where}: ${i}:"
+      time mysql --default-character-set=utf8mb4 \
+           -u root ${innodb_ngram_db} \
+           -e "SELECT SQL_NO_CACHE COUNT(*) FROM wikipedia WHERE ${where}"
+    done
+  done
+}
+
+benchmark_search_innodb_mecab()
 {
   run sudo -H systemctl restart mysqld
 
@@ -208,9 +277,9 @@ benchmark_search_innodb()
     for i in $(seq ${n_search_tries}); do
       query=$(echo ${search_word} | sed -e "s/ OR / /g")
       where="MATCH(title, text) AGAINST('${query}' IN BOOLEAN MODE)"
-      echo "InnoDB: search: ${where}: ${i}:"
+      echo "InnoDB: mecab: search: ${where}: ${i}:"
       time mysql --default-character-set=utf8mb4 \
-           -u root ${innodb_db} \
+           -u root ${innodb_mecab_db} \
            -e "SELECT SQL_NO_CACHE COUNT(*) FROM wikipedia WHERE ${where}"
     done
   done
@@ -219,7 +288,8 @@ benchmark_search_innodb()
 benchmark_search()
 {
   benchmark_search_mroonga
-  benchmark_search_innodb
+  benchmark_search_innodb_ngram
+  benchmark_search_innodb_mecab
 }
 
 show_environment
@@ -229,6 +299,7 @@ ensure_data
 setup_mysql_repository
 setup_groonga_repository
 install_mroonga
+setup_mysql
 
 setup_benchmark_db
 

  Copied: config/sql/indexes.innodb.mecab.sql (+1 -1) 76%
===================================================================
--- config/sql/indexes.innodb.sql    2016-02-08 19:39:03 +0900 (3ea725e)
+++ config/sql/indexes.innodb.mecab.sql    2016-02-09 00:29:03 +0900 (bd2c913)
@@ -1,2 +1,2 @@
 ALTER TABLE wikipedia ADD FULLTEXT INDEX fulltext_index (title, text)
-  WITH PARSER ngram;
+  WITH PARSER MeCab;

  Renamed: config/sql/indexes.innodb.ngram.sql (+0 -0) 100%
===================================================================
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index