Kouhei Sutou
null+****@clear*****
Tue Feb 9 00:29:03 JST 2016
Kouhei Sutou 2016-02-09 00:29:03 +0900 (Tue, 09 Feb 2016) New Revision: afb6384e3a60bbf37616246594d53ab2f85ab59d https://github.com/groonga/wikipedia-search/commit/afb6384e3a60bbf37616246594d53ab2f85ab59d Message: Add InnoDB FTS + MeCab Copied files: config/sql/indexes.innodb.mecab.sql (from config/sql/indexes.innodb.sql) Modified files: benchmark/centos7/mysql.sh Renamed files: config/sql/indexes.innodb.ngram.sql (from config/sql/indexes.innodb.sql) Modified: benchmark/centos7/mysql.sh (+95 -24) =================================================================== --- benchmark/centos7/mysql.sh 2016-02-08 19:39:03 +0900 (18545c9) +++ benchmark/centos7/mysql.sh 2016-02-09 00:29:03 +0900 (a89da9e) @@ -15,7 +15,8 @@ data_dir="${base_dir}/data/csv" benchmark_dir="${base_dir}/benchmark" mroonga_db="benchmark_mroonga" -innodb_db="benchmark_innodb" +innodb_ngram_db="benchmark_innodb_ngram" +innodb_mecab_db="benchmark_innodb_mecab" run() { @@ -79,16 +80,27 @@ install_groonga_tokenizer_mecab() install_mroonga() { run sudo yum install -y mysql57-community-mroonga +} + +setup_mysql() +{ echo "log-bin" | run sudo tee --append /etc/my.cnf echo "server-id=1" | run sudo tee --append /etc/my.cnf echo "character-set-server=utf8mb4" | run sudo tee --append /etc/my.cnf echo "validate-password=off" | run sudo tee --append /etc/my.cnf + echo "loose-mecab-rc-file=/usr/lib64/mysql/mecab/etc/mecabrc" | \ + run sudo tee --append /etc/my.cnf + echo "innodb-ft-min-token-size=1" | \ + run sudo tee --append /etc/my.cnf + echo "dicdir = /usr/lib64/mysql/mecab/dic/ipadic_utf-8" | \ + run sudo tee /usr/lib64/mysql/mecab/etc/mecabrc run sudo systemctl start mysqld tmp_password=$(sudo grep 'A temporary password' /var/log/mysqld.log | \ sed -e 's/^.*: //' | tail -1) run sudo mysql -u root "-p${tmp_password}" \ --connect-expired-password \ -e "ALTER USER user() IDENTIFIED BY ''; CREATE USER root@'%'; GRANT ALL ON *.* TO root@'%' WITH GRANT OPTION" + run sudo mysql -u root -e "INSTALL PLUGIN mecab SONAME 'libpluginmecab.so'" } setup_benchmark_db_mroonga() @@ -97,16 +109,23 @@ setup_benchmark_db_mroonga() run mysql -u root -e "CREATE DATABASE ${mroonga_db}" } -setup_benchmark_db_innodb() +setup_benchmark_db_innodb_ngram() +{ + run mysql -u root -e "DROP DATABASE IF EXISTS ${innodb_ngram_db}" + run mysql -u root -e "CREATE DATABASE ${innodb_ngram_db}" +} + +setup_benchmark_db_innodb_mecab() { - run mysql -u root -e "DROP DATABASE IF EXISTS ${innodb_db}" - run mysql -u root -e "CREATE DATABASE ${innodb_db}" + run mysql -u root -e "DROP DATABASE IF EXISTS ${innodb_mecab_db}" + run mysql -u root -e "CREATE DATABASE ${innodb_mecab_db}" } setup_benchmark_db() { setup_benchmark_db_mroonga - setup_benchmark_db_innodb + setup_benchmark_db_innodb_ngram + setup_benchmark_db_innodb_mecab } load_data_mroonga() @@ -123,24 +142,39 @@ load_data_mroonga() sh -c "du -hsc /var/lib/mysql/${mroonga_db}.mrn*" } -load_data_innodb() +load_data_innodb_ngram() { run sudo -H systemctl restart mysqld - echo "InnoDB: data: load:" - run mysql -u root ${innodb_db} < \ + echo "InnoDB: ngram: data: load:" + run mysql -u root ${innodb_ngram_db} < \ "${config_dir}/schema.innodb.sql" - time mysql -u root ${innodb_db} \ + time mysql -u root ${innodb_ngram_db} \ -e "LOAD DATA LOCAL INFILE '${data_dir}/ja-all-pages.csv' INTO TABLE wikipedia FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"'" - echo "InnoDB: data: load: size:" + echo "InnoDB: ngram: data: load: size:" run sudo -u mysql -H \ - sh -c "du -hsc /var/lib/mysql/${innodb_db}/*" + sh -c "du -hsc /var/lib/mysql/${innodb_ngram_db}/*" +} + +load_data_innodb_mecab() +{ + run sudo -H systemctl restart mysqld + + echo "InnoDB: mecab: data: load:" + run mysql -u root ${innodb_mecab_db} < \ + "${config_dir}/schema.innodb.sql" + time mysql -u root ${innodb_mecab_db} \ + -e "LOAD DATA LOCAL INFILE '${data_dir}/ja-all-pages.csv' INTO TABLE wikipedia FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '\"'" + echo "InnoDB: mecab: data: load: size:" + run sudo -u mysql -H \ + sh -c "du -hsc /var/lib/mysql/${innodb_mecab_db}/*" } load_data() { load_data_mroonga - load_data_innodb + load_data_innodb_ngram + load_data_innodb_mecab } benchmark_create_index_mroonga() @@ -161,20 +195,38 @@ benchmark_create_index_mroonga() done } -benchmark_create_index_innodb() +benchmark_create_index_innodb_ngram() +{ + run sudo -H systemctl restart mysqld + + for i in $(seq ${n_create_index_tries}); do + echo "InnoDB: ngram: create index: ${i}:" + mysql -u root ${innodb_ngram_db} \ + -e "ALTER TABLE DROP INDEX fulltext_index" + time mysql -u root ${innodb_ngram_db} < \ + "${config_dir}/indexes.innodb.ngram.sql" + if [ ${i} -eq 1 ]; then + echo "InnoDB: ngram: create index: size:" + run sudo -u mysql -H \ + sh -c "du -hsc /var/lib/mysql/${innodb_ngram_db}/*" + fi + done +} + +benchmark_create_index_innodb_mecab() { run sudo -H systemctl restart mysqld for i in $(seq ${n_create_index_tries}); do - echo "InnoDB: create index: ${i}:" - mysql -u root ${innodb_db} \ + echo "InnoDB: mecab: create index: ${i}:" + mysql -u root ${innodb_mecab_db} \ -e "ALTER TABLE DROP INDEX fulltext_index" - time mysql -u root ${innodb_db} < \ - "${config_dir}/indexes.innodb.sql" + time mysql -u root ${innodb_mecab_db} < \ + "${config_dir}/indexes.innodb.mecab.sql" if [ ${i} -eq 1 ]; then - echo "InnoDB: create index: size:" + echo "InnoDB: mecab: create index: size:" run sudo -u mysql -H \ - sh -c "du -hsc /var/lib/mysql/${innodb_db}/*" + sh -c "du -hsc /var/lib/mysql/${innodb_mecab_db}/*" fi done } @@ -182,7 +234,8 @@ benchmark_create_index_innodb() benchmark_create_index() { benchmark_create_index_mroonga - benchmark_create_index_innodb + benchmark_create_index_innodb_ngram + benchmark_create_index_innodb_mecab } benchmark_search_mroonga() @@ -200,7 +253,23 @@ benchmark_search_mroonga() done } -benchmark_search_innodb() +benchmark_search_innodb_ngram() +{ + run sudo -H systemctl restart mysqld + + cat "${benchmark_dir}/search-words.list" | while read search_word; do + for i in $(seq ${n_search_tries}); do + query=$(echo ${search_word} | sed -e "s/ OR / /g") + where="MATCH(title, text) AGAINST('${query}' IN BOOLEAN MODE)" + echo "InnoDB: ngram: search: ${where}: ${i}:" + time mysql --default-character-set=utf8mb4 \ + -u root ${innodb_ngram_db} \ + -e "SELECT SQL_NO_CACHE COUNT(*) FROM wikipedia WHERE ${where}" + done + done +} + +benchmark_search_innodb_mecab() { run sudo -H systemctl restart mysqld @@ -208,9 +277,9 @@ benchmark_search_innodb() for i in $(seq ${n_search_tries}); do query=$(echo ${search_word} | sed -e "s/ OR / /g") where="MATCH(title, text) AGAINST('${query}' IN BOOLEAN MODE)" - echo "InnoDB: search: ${where}: ${i}:" + echo "InnoDB: mecab: search: ${where}: ${i}:" time mysql --default-character-set=utf8mb4 \ - -u root ${innodb_db} \ + -u root ${innodb_mecab_db} \ -e "SELECT SQL_NO_CACHE COUNT(*) FROM wikipedia WHERE ${where}" done done @@ -219,7 +288,8 @@ benchmark_search_innodb() benchmark_search() { benchmark_search_mroonga - benchmark_search_innodb + benchmark_search_innodb_ngram + benchmark_search_innodb_mecab } show_environment @@ -229,6 +299,7 @@ ensure_data setup_mysql_repository setup_groonga_repository install_mroonga +setup_mysql setup_benchmark_db Copied: config/sql/indexes.innodb.mecab.sql (+1 -1) 76% =================================================================== --- config/sql/indexes.innodb.sql 2016-02-08 19:39:03 +0900 (3ea725e) +++ config/sql/indexes.innodb.mecab.sql 2016-02-09 00:29:03 +0900 (bd2c913) @@ -1,2 +1,2 @@ ALTER TABLE wikipedia ADD FULLTEXT INDEX fulltext_index (title, text) - WITH PARSER ngram; + WITH PARSER MeCab; Renamed: config/sql/indexes.innodb.ngram.sql (+0 -0) 100% =================================================================== -------------- next part -------------- HTML����������������������������...Download