Kouhei Sutou
null+****@clear*****
Fri Apr 4 11:42:20 JST 2014
Kouhei Sutou 2014-04-04 11:42:20 +0900 (Fri, 04 Apr 2014) New Revision: 0dbc3f3b2e995b8fc4fe9068fda20f9e990d577d https://github.com/droonga/wikipedia-search/commit/0dbc3f3b2e995b8fc4fe9068fda20f9e990d577d Message: Extract downloader Added files: lib/wikipedia-search/downloader.rb lib/wikipedia-search/task/download.rb Removed files: lib/task/download.rb Modified files: Rakefile Modified: Rakefile (+1 -1) =================================================================== --- Rakefile 2014-04-04 11:39:27 +0900 (10754c7) +++ Rakefile 2014-04-04 11:42:20 +0900 (48ce133) @@ -7,4 +7,4 @@ lib_dir_path = base_dir_path + "lib" $LOAD_PATH.unshift(lib_dir_path.to_s) -require "task/download" +require "wikipedia-search/task/download" Deleted: lib/task/download.rb (+0 -62) 100644 =================================================================== --- lib/task/download.rb 2014-04-04 11:39:27 +0900 (2022286) +++ /dev/null @@ -1,62 +0,0 @@ -require "open-uri" - -def format_size(size) - if size < 1024 - "%d" % size - elsif size < (1024 ** 2) - "%7.2fKiB" % (size.to_f / 1024) - elsif size < (1024 ** 3) - "%7.2fMiB" % (size.to_f / (1024 ** 2)) - elsif size < (1024 ** 4) - "%7.2fGiB" % (size.to_f / (1024 ** 3)) - else - "%.2fTiB" % (size.to_f / (1024 ** 4)) - end -end - -def download(url, output_path) - base_name = File.basename(url) - max = nil - content_length_proc = lambda do |content_length| - max = content_length - end - progress_proc = lambda do |current| - if max - percent = (current / max.to_f) * 100 - formatted_size = "[%s/%s]" % [format_size(current), format_size(max)] - print("\r%s - %06.2f%% %s" % [base_name, percent, formatted_size]) - puts if current == max - end - end - options = { - :content_length_proc => content_length_proc, - :progress_proc => progress_proc, - } - - open(url, options) do |input| - output_path.open("wb") do |output| - chunk = "" - chunk_size = 8192 - while input.read(chunk_size, chunk) - output.print(chunk) - end - end - end -end - -namespace :data do - data_dir_path = Pathname.new("data") - directory data_dir_path.to_s - - namespace :download do - base_name = "jawiki-latest-pages-articles.xml.bz2" - ja_data_path = data_dir_path + base_name - file ja_data_path.to_s => data_dir_path.to_s do - download("http://dumps.wikimedia.org/jawiki/latest/#{base_name}", - ja_data_path) - end - - desc "Download the latest Japanese Wikipedia data." - task :ja => ja_data_path.to_s - end -end Added: lib/wikipedia-search/downloader.rb (+61 -0) 100644 =================================================================== --- /dev/null +++ lib/wikipedia-search/downloader.rb 2014-04-04 11:42:20 +0900 (38792c0) @@ -0,0 +1,61 @@ +require "open-uri" + +module WikipediaSearch + class Downloader + class << self + def download(url, output_path) + new(url, output_path).download + end + end + + def initialize(url, output_path) + @url = url + @output_path = output_path + end + + def download + base_name = File.basename(@url) + max = nil + content_length_proc = lambda do |content_length| + max = content_length + end + progress_proc = lambda do |current| + if max + percent = (current / max.to_f) * 100 + formatted_size = "[%s/%s]" % [format_size(current), format_size(max)] + print("\r%s - %06.2f%% %s" % [base_name, percent, formatted_size]) + puts if current == max + end + end + options = { + :content_length_proc => content_length_proc, + :progress_proc => progress_proc, + } + + open(@url, options) do |input| + @output_path.open("wb") do |output| + chunk = "" + chunk_size = 8192 + while input.read(chunk_size, chunk) + output.print(chunk) + end + end + end + end + + private + def format_size(size) + if size < 1024 + "%d" % size + elsif size < (1024 ** 2) + "%7.2fKiB" % (size.to_f / 1024) + elsif size < (1024 ** 3) + "%7.2fMiB" % (size.to_f / (1024 ** 2)) + elsif size < (1024 ** 4) + "%7.2fGiB" % (size.to_f / (1024 ** 3)) + else + "%.2fTiB" % (size.to_f / (1024 ** 4)) + end + end + end +end Added: lib/wikipedia-search/task/download.rb (+18 -0) 100644 =================================================================== --- /dev/null +++ lib/wikipedia-search/task/download.rb 2014-04-04 11:42:20 +0900 (c7d9a24) @@ -0,0 +1,18 @@ +require "wikipedia-search/downloader" + +namespace :data do + data_dir_path = Pathname.new("data") + directory data_dir_path.to_s + + namespace :download do + base_name = "jawiki-latest-pages-articles.xml.bz2" + ja_data_path = data_dir_path + base_name + file ja_data_path.to_s => data_dir_path.to_s do + url = "http://dumps.wikimedia.org/jawiki/latest/#{base_name}" + WikipediaSearch::Downloader.download(url, ja_data_path) + end + + desc "Download the latest Japanese Wikipedia data." + task :ja => ja_data_path.to_s + end +end -------------- next part -------------- HTML����������������������������...Download