require “benchmark” require “rabbit/task/slide”
# Edit ./config.yaml to customize meta data
spec = nil Rabbit::Task::Slide.new do |task|
spec = task.spec spec.files += Dir.glob("images/**/*.*") # spec.files -= Dir.glob("private/**/*.*") spec.add_runtime_dependency("groonga", ">= 1.0.2")
end
desc “Tag #{spec.version}” task :tag do
sh("git", "tag", "-a", spec.version.to_s, "-m", "Publish #{spec.version}") sh("git", "push", "--tags")
end
namespace :benchmark do
benchmark_dir = "benchmark" tmp_dir = "#{benchmark_dir}/data" data_dir = "#{benchmark_dir}/data" directory tmp_dir directory data_dir data_download_base_url = "https://github.com/livedoor/datasets/raw/master" data_tgz = "#{data_dir}/ldgourmet.tar.gz" file data_tgz => data_dir do sh("wget", "--output-document", data_tgz, "#{data_download_base_url}/#{File.basename(data_tgz)}") end ratings_csv = "#{data_dir}/ratings.csv" file ratings_csv => data_tgz do cd(data_dir) do sh("tar", "xvf", File.basename(data_tgz)) end touch(ratings_csv) if File.exist?(ratings_csv) end n_records_list = [ 1000, 5000, 10000, 100000, :all, :all_index, :all_index_id, ] prepared_time_stamp = "#{tmp_dir}/prepared.time_stamp" initialize_database_sql = "#{benchmark_dir}/initialize-database.sql" convert_ratings_to_sql = "#{benchmark_dir}/convert-ratings-to-sql.rb" prepare_files = [ tmp_dir, ratings_csv, initialize_database_sql, convert_ratings_to_sql, ] file prepared_time_stamp => prepare_files do sh("mysql -u root < #{initialize_database_sql}") n_records_list.each do |n_records| if n_records.is_a?(Symbol) read_command = "cat" else read_command = "head -#{n_records + 1}" end sh("#{read_command} #{ratings_csv} | " + "#{convert_ratings_to_sql} ratings_#{n_records} | " + "mysql -u root full_text_search") end touch(prepared_time_stamp) end desc "Run benchmark" task :run => prepared_time_stamp do queries = [ [:and, "ラーメン"], [:and, "ラーメン", "焼き肉"], [:or, "ラーメン", "焼き肉"], ] queries.each do |query| operator, *keywords = query n_records_list.each do |n_records| if n_records == :all_index or n_records == :all_index_id keywords_in_boolean_mode = keywords.collect do |keyword| if operator == :and "+#{keyword}" else keyword end end boolean_mode_query = keywords_in_boolean_mode.join(" ") condition = "MATCH (comment) " + "AGAINST (\"#{boolean_mode_query}\" IN BOOLEAN MODE)" else conditions = keywords.collect do |keyword| "#{comment} LIKE \"%#{keyword}%\"" end condition = conditions.join(" #{operator.to_s.upcase} ") end table_name = "ratings_#{n_records}" sql = <<-SQL
SELECT AVG(CHAR_LENGTH(comment)) AS average,
MIN(CHAR_LENGTH(comment)) as min, MAX(CHAR_LENGTH(comment)) as max FROM #{table_name}; SQL sh("mysql -u root full_text_search -e '#{sql}'") sql = <<-SQL
SET SESSION query_cache_type = OFF; SELECT COUNT(*) FROM #{table_name} WHERE #{condition};
SQL elapsed = Benchmark.measure do sh("mysql -u root full_text_search -e '#{sql}'") end puts("#{n_records}: #{elapsed}") $stdout.flush end end end
end