Kouhei Sutou
null+****@clear*****
Sun Feb 7 17:19:50 JST 2016
Kouhei Sutou 2016-02-07 17:19:50 +0900 (Sun, 07 Feb 2016) New Revision: d92ff5da63bddca42ba4a2614024e14dc3ada70e https://github.com/groonga/wikipedia-search/commit/d92ff5da63bddca42ba4a2614024e14dc3ada70e Message: Add benchmark script for Mroonga, MySQL InnoDB, PGroonga and pg_bigm Added files: benchmark/centos7/mysql-postgresql.sh config/sql/indexes.innodb.sql config/sql/schema.innodb.sql Modified files: config/sql/indexes.mroonga.sql Added: benchmark/centos7/mysql-postgresql.sh (+389 -0) 100755 =================================================================== --- /dev/null +++ benchmark/centos7/mysql-postgresql.sh 2016-02-07 17:19:50 +0900 (1f356be) @@ -0,0 +1,389 @@ +#!/bin/bash + +set -u + +LANG=C + +n_load_tries=1 +n_create_index_tries=1 +n_search_tries=10 + +script_dir=$(cd "$(dirname $0)"; pwd) +base_dir="${script_dir}/../.." +config_dir="${base_dir}/config/sql" +data_dir="${base_dir}/data/csv" +benchmark_dir="${base_dir}/benchmark" + +mroonga_db="benchmark_mroonga" +innodb_db="benchmark_innodb" + +pgroonga_db="benchmark_pgroonga" +pg_bigm_db="benchmark_pg_bigm" + +run() +{ + "$@" + if test $? -ne 0; then + echo "Failed $@" + exit 1 + fi +} + +show_environment() +{ + echo "CPU:" + cat /proc/cpuinfo + + echo "Memory:" + free +} + +ensure_data() +{ + if [ -f "${data_dir}/ja-all-pages.csv" ]; then + return + fi + +# if which rake > /dev/null 2>&1; then +# run rake data/sql/ja-all-pages.csv +# return +# fi + + run sudo -H yum install -y epel-release + run sudo -H yum install -y wget xz + run mkdir -p "${data_dir}" + cd "${data_dir}" + run wget --no-verbose http://packages.groonga.org/tmp/ja-all-pages.csv.xz + run unxz ja-all-pages.csv.xz + cd - +} + +setup_mysql_repository() +{ + os_version=$(run rpm -qf --queryformat="%{VERSION}" /etc/redhat-release) + run sudo yum install -y \ + http://repo.mysql.com/mysql-community-release-el${os_version}-7.noarch.rpm + run sudo yum install -y yum-utils + run sudo yum-config-manager --disable mysql56-community + run sudo yum-config-manager --enable mysql57-community +} + +setup_postgresql_repository() +{ + os_version=$(run rpm -qf --queryformat="%{VERSION}" /etc/redhat-release) + os_arch=$(run rpm -qf --queryformat="%{ARCH}" /etc/redhat-release) + run sudo yum install -y \ + http://yum.postgresql.org/9.5/redhat/rhel-${os_version}-${os_arch}/pgdg-centos95-9.5-2.noarch.rpm +} + +setup_groonga_repository() +{ + run sudo yum install -y \ + http://packages.groonga.org/centos/groonga-release-1.1.0-1.noarch.rpm +} + +install_groonga_tokenizer_mecab() +{ + run sudo yum install -y groonga-tokenizer-mecab +} + +install_mroonga() +{ + run sudo yum install -y mysql57-community-mroonga + echo "character-set-server=utf8mb4" | run sudo tee --append /etc/my.cnf + echo "validate-password=off" | run sudo tee --append /etc/my.cnf + run sudo systemctl start mysqld + tmp_password=$(sudo grep 'A temporary password' /var/log/mysqld.log | \ + sed -e 's/^.*: //' | tail -1) + run sudo mysql -u root "-p${tmp_password}" \ + --connect-expired-password \ + -e "ALTER USER user() IDENTIFIED BY ''; CREATE USER root@'%'; GRANT ALL ON *.* TO root@'%' WITH GRANT OPTION" +} + +install_pgroonga() +{ + run sudo yum install -y postgresql95-pgroonga +} + +install_pg_bigm() +{ + run sudo yum install -y postgresql95-devel gcc + run wget http://osdn.dl.osdn.jp/pgbigm/63792/pg_bigm-1.1-20150910.tar.gz + run tar xvf pg_bigm-1.1-20150910.tar.gz + run cd pg_bigm-1.1-20150910 + run PATH=/usr/pgsql-9.5/bin:$PATH make USE_PGXS=1 + run sudo env PATH=/usr/pgsql-9.5/bin:$PATH make USE_PGXS=1 install + run cd .. +} + +setup_postgresql() +{ + run sudo -H /usr/pgsql-9.5/bin/postgresql95-setup initdb + run sudo -H systemctl enable postgresql-9.5 + run sudo -H systemctl start postgresql-9.5 +} + +setup_benchmark_db_mroonga() +{ + run mysql -u root -e "DROP DATABASE IF EXISTS ${mroonga_db}" + run mysql -u root -e "CREATE DATABASE ${mroonga_db}" +} + +setup_benchmark_db_innodb() +{ + run mysql -u root -e "DROP DATABASE IF EXISTS ${innodb_db}" + run mysql -u root -e "CREATE DATABASE ${innodb_db}" +} + +setup_benchmark_db_pgroonga() +{ + run sudo -u postgres -H psql \ + --command "DROP DATABASE IF EXISTS ${pgroonga_db}" + run sudo -u postgres -H psql \ + --command "CREATE DATABASE ${pgroonga_db}" + run sudo -u postgres -H psql -d ${pgroonga_db} \ + --command "CREATE EXTENSION pgroonga" +} + +setup_benchmark_db_pg_bigm() +{ + run sudo -u postgres -H psql \ + --command "DROP DATABASE IF EXISTS ${pg_bigm_db}" + run sudo -u postgres -H psql \ + --command "CREATE DATABASE ${pg_bigm_db}" + run sudo -u postgres -H psql -d ${pg_bigm_db} \ + --command "CREATE EXTENSION pg_bigm" +} + +setup_benchmark_db() +{ + setup_benchmark_db_mroonga + setup_benchmark_db_innodb + setup_benchmark_db_pgroonga + setup_benchmark_db_pg_bigm +} + +database_oid() +{ + sudo -u postgres -H psql \ + --command "SELECT datid FROM pg_stat_database WHERE datname = '$1'" | \ + head -3 | \ + tail -1 | \ + sed -e 's/ *//g' +} + +load_data_mroonga() +{ + echo "Mroonga: data: load:" + run mysql -u root ${mroonga_db} < \ + "${config_dir}/schema.mroonga.sql" + time mysql -u root ${mroonga_db} \ + -e "LOAD DATA LOCAL INFILE '${data_dir}/ja-all-pages.csv' INTO TABLE wikipedia FIELDS TERMINATED BY ',' OPTINALLY ENCLOSED BY '\"'" + echo "Mroonga: data: load: size:" + run sudo -u mysql -H \ + sh -c "du -hsc /var/lib/mysql/data/${mroonga_db}.mrn*" +} + +load_data_innodb() +{ + echo "InnoDB: data: load:" + run mysql -u root ${innodb_db} < \ + "${config_dir}/schema.innodb.sql" + time mysql -u root ${innodb_db} \ + -e "LOAD DATA LOCAL INFILE '${data_dir}/ja-all-pages.csv' INTO TABLE wikipedia FIELDS TERMINATED BY ',' OPTINALLY ENCLOSED BY '\"'" + echo "InnoDB: data: load: size:" + run sudo -u mysql -H \ + sh -c "du -hsc /var/lib/mysql/data/${innodb_db}/*" +} + +load_data_pgroonga() +{ + echo "PGroonga: data: load:" + run sudo -u postgres -H psql -d ${pgroonga_db} < \ + "${config_dir}/schema.postgresql.sql" + time run sudo -u postgres -H psql -d ${pgroonga_db} \ + --command "COPY wikipedia FROM '${data_dir}/ja-all-pages.csv' WITH CSV ENCODING 'utf8'" + echo "PGroonga: data: load: size:" + run sudo -u postgres -H \ + sh -c "du -hsc /var/lib/pgsql/9.5/data/base/$(database_oid ${pgroonga_db})/*" +} + +load_data_pg_bigm() +{ + echo "pg_bigm: data: load:" + run sudo -u postgres -H psql -d ${pg_bigm_db} < \ + "${config_dir}/schema.postgresql.sql" + time run sudo -u postgres -H psql -d ${pg_bigm_db} \ + --command "COPY wikipedia FROM '${data_dir}/ja-all-pages.csv' WITH CSV ENCODING 'utf8'" + echo "pg_biggm: data: load: size:" + run sudo -u postgres -H \ + sh -c "du -hsc /var/lib/pgsql/9.5/data/base/$(database_oid ${pg_bigm_db})/*" +} + +load_data() +{ + load_data_mroonga + load_data_innodb + load_data_pgroonga + load_data_pg_bigm +} + +benchmark_create_index_mroonga() +{ + for i in $(seq ${n_create_index_tries}); do + echo "Mroonga: create index: ${i}:" + mysql -u root ${mroonga_db} \ + -e "ALTER TABLE DROP INDEX fulltext_index" + time mysql -u root ${mroonga_db} < \ + "${config_dir}/indexes.mroonga.sql" + if [ ${i} -eq 1 ]; then + echo "Mroonga: create index: size:" + run sudo -u mysql -H \ + sh -c "du -hsc /var/lib/mysql/data/${mroonga_db}.mrn*" + fi + done +} + +benchmark_create_index_mroonga() +{ + for i in $(seq ${n_create_index_tries}); do + echo "InnoDB: create index: ${i}:" + mysql -u root ${innodb_db} \ + -e "ALTER TABLE DROP INDEX fulltext_index" + time mysql -u root ${innodb_db} < \ + "${config_dir}/indexes.innodb.sql" + if [ ${i} -eq 1 ]; then + echo "InnoDB: create index: size:" + run sudo -u mysql -H \ + sh -c "du -hsc /var/lib/mysql/data/${innodb_db}/*" + fi + done +} + +benchmark_create_index_pgroonga() +{ + run sudo -H systemctl restart postgresql-9.5 + + for i in $(seq ${n_create_index_tries}); do + echo "PGroonga: create index: ${i}:" + run sudo -u postgres -H psql -d ${pgroonga_db} \ + --command "DROP INDEX IF EXISTS wikipedia_index_pgroonga" + time run sudo -u postgres -H psql -d ${pgroonga_db} < \ + "${config_dir}/indexes.pgroonga.sql" + if [ ${i} -eq 1 ]; then + echo "PGroonga: create index: size:" + run sudo -u postgres -H \ + sh -c "du -hsc /var/lib/pgsql/9.5/data/base/$(database_oid ${pgroonga_db})/pgrn*" + fi + done +} + +benchmark_create_index_pg_bigm() +{ + run sudo -H systemctl restart postgresql-9.5 + + for i in $(seq ${n_create_index_tries}); do + echo "pg_bigm: create index: ${i}:" + run sudo -u postgres -H psql -d ${pg_bigm_db} \ + --command "DROP INDEX IF EXISTS wikipedia_index_pg_bigm" + time run sudo -u postgres -H psql -d ${pg_bigm_db} < \ + "${config_dir}/indexes.pg_bigm.sql" + if [ ${i} -eq 1 ]; then + echo "pg_bigm: create index: size:" + pg_bigm_data_path=$(sudo -u postgres -H psql -d ${pg_bigm_db} \ + --command "SELECT pg_relation_filepath(oid) FROM pg_class WHERE relname = 'wikipedia_index_pg_bigm'" | \ + head -3 | \ + tail -1 | \ + sed -e 's/ *//g') + run sudo -u postgres -H \ + sh -c "du -hsc /var/lib/pgsql/9.5/data/${pg_bigm_data_path}*" + fi + done +} + +benchmark_create_index() +{ + benchmark_create_index_mroonga + benchmark_create_index_innodb + benchmark_create_index_pgroonga + benchmark_create_index_pg_bigm +} + +benchmark_search_mroonga() +{ + cat "${benchmark_dir}/search-words.list" | while read search_word; do + for i in $(seq ${n_search_tries}); do + where="MATCH(title, text) AGAINST('*D+ ${search_word}' IN BOOLEAN MODE)" + echo "Mroonga: search: ${where}: ${i}:" + time run mysql -u root ${mroonga_db} \ + -e "SELECT COUNT(*) FROM wikipedia WHERE ${where}" + done + done +} + +benchmark_search_innodb() +{ + cat "${benchmark_dir}/search-words.list" | while read search_word; do + for i in $(seq ${n_search_tries}); do + where="MATCH(title, text) AGAINST('${search_word}' IN BOOLEAN MODE)" + echo "InnoDB: search: ${where}: ${i}:" + time run mysql -u root ${innodb_db} \ + -e "SELECT COUNT(*) FROM wikipedia WHERE ${where}" + done + done +} + +benchmark_search_pgroonga() +{ + work_mem_size='10MB' + work_mem="SET work_mem = '${work_mem_size}';" + cat "${benchmark_dir}/search-words.list" | while read search_word; do + for i in $(seq ${n_search_tries}); do + where="text @@ '${search_word}'" + echo "PGroonga: search: ${where}: ${i}:" + time run sudo -u postgres -H psql -d ${pgroonga_db} \ + --command "${work_mem} SELECT COUNT(*) FROM wikipedia WHERE ${where}" + done + done +} + +benchmark_search_pg_bigm() +{ + work_mem_size='10MB' + work_mem="SET work_mem = '${work_mem_size}';" + cat "${benchmark_dir}/search-words.list" | while read search_word; do + for i in $(seq ${n_search_tries}); do + where="text LIKE '%${search_word}%'" + where=$(echo $where | sed -e "s/ OR /%' OR text LIKE '%/g") + echo "pg_bigm: search: ${where}: ${i}:" + time run sudo -u postgres -H psql -d ${pg_bigm_db} \ + --command "${work_mem} SELECT COUNT(*) FROM wikipedia WHERE ${where}" + done + done +} + +benchmark_search() +{ + benchmark_search_mroonga + benchmark_search_innodb + benchmark_search_pgroonga + benchmark_search_pg_bigm +} + +show_environment + +ensure_data + +setup_mysql_repository +setup_postgresql_repository +setup_groonga_repository +install_mroonga +install_pgroonga +install_pg_bigm + +setup_postgresql +setup_benchmark_db + +load_data +benchmark_create_index +benchmark_search Added: config/sql/indexes.innodb.sql (+2 -0) 100644 =================================================================== --- /dev/null +++ config/sql/indexes.innodb.sql 2016-02-07 17:19:50 +0900 (3ea725e) @@ -0,0 +1,2 @@ +ALTER TABLE wikipedia ADD FULLTEXT INDEX fulltext_index (title, text) + WITH PARSER ngram; Modified: config/sql/indexes.mroonga.sql (+1 -1) =================================================================== --- config/sql/indexes.mroonga.sql 2016-02-07 15:39:33 +0900 (4cc6840) +++ config/sql/indexes.mroonga.sql 2016-02-07 17:19:50 +0900 (8d17ac6) @@ -1 +1 @@ -ALTER TABLE wikipedia ADD FULLTEXT INDEX (title, text); +ALTER TABLE wikipedia ADD FULLTEXT INDEX fulltext_index (title, text); Added: config/sql/schema.innodb.sql (+6 -0) 100644 =================================================================== --- /dev/null +++ config/sql/schema.innodb.sql 2016-02-07 17:19:50 +0900 (0ea0a8e) @@ -0,0 +1,6 @@ +DROP TABLE IF EXISTS wikipedia; +CREATE TABLE wikipedia ( + id integer PRIMARY KEY, + title text, + text text +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4; -------------- next part -------------- HTML����������������������������...Download