[Groonga-commit] droonga/wikipedia-search at 0dbc3f3 [master] Extract downloader

Back to archive index

Kouhei Sutou null+****@clear*****
Fri Apr 4 11:42:20 JST 2014


Kouhei Sutou	2014-04-04 11:42:20 +0900 (Fri, 04 Apr 2014)

  New Revision: 0dbc3f3b2e995b8fc4fe9068fda20f9e990d577d
  https://github.com/droonga/wikipedia-search/commit/0dbc3f3b2e995b8fc4fe9068fda20f9e990d577d

  Message:
    Extract downloader

  Added files:
    lib/wikipedia-search/downloader.rb
    lib/wikipedia-search/task/download.rb
  Removed files:
    lib/task/download.rb
  Modified files:
    Rakefile

  Modified: Rakefile (+1 -1)
===================================================================
--- Rakefile    2014-04-04 11:39:27 +0900 (10754c7)
+++ Rakefile    2014-04-04 11:42:20 +0900 (48ce133)
@@ -7,4 +7,4 @@ lib_dir_path = base_dir_path + "lib"
 
 $LOAD_PATH.unshift(lib_dir_path.to_s)
 
-require "task/download"
+require "wikipedia-search/task/download"

  Deleted: lib/task/download.rb (+0 -62) 100644
===================================================================
--- lib/task/download.rb    2014-04-04 11:39:27 +0900 (2022286)
+++ /dev/null
@@ -1,62 +0,0 @@
-require "open-uri"
-
-def format_size(size)
-  if size < 1024
-    "%d" % size
-  elsif size < (1024 ** 2)
-    "%7.2fKiB" % (size.to_f / 1024)
-  elsif size < (1024 ** 3)
-    "%7.2fMiB" % (size.to_f / (1024 ** 2))
-  elsif size < (1024 ** 4)
-    "%7.2fGiB" % (size.to_f / (1024 ** 3))
-  else
-    "%.2fTiB" % (size.to_f / (1024 ** 4))
-  end
-end
-
-def download(url, output_path)
-  base_name = File.basename(url)
-  max = nil
-  content_length_proc = lambda do |content_length|
-    max = content_length
-  end
-  progress_proc = lambda do |current|
-    if max
-      percent = (current / max.to_f) * 100
-      formatted_size = "[%s/%s]" % [format_size(current), format_size(max)]
-      print("\r%s - %06.2f%% %s" % [base_name, percent, formatted_size])
-      puts if current == max
-    end
-  end
-  options = {
-    :content_length_proc => content_length_proc,
-    :progress_proc => progress_proc,
-  }
-
-  open(url, options) do |input|
-    output_path.open("wb") do |output|
-      chunk = ""
-      chunk_size = 8192
-      while input.read(chunk_size, chunk)
-        output.print(chunk)
-      end
-    end
-  end
-end
-
-namespace :data do
-  data_dir_path = Pathname.new("data")
-  directory data_dir_path.to_s
-
-  namespace :download do
-    base_name = "jawiki-latest-pages-articles.xml.bz2"
-    ja_data_path = data_dir_path + base_name
-    file ja_data_path.to_s => data_dir_path.to_s do
-      download("http://dumps.wikimedia.org/jawiki/latest/#{base_name}",
-               ja_data_path)
-    end
-
-    desc "Download the latest Japanese Wikipedia data."
-    task :ja => ja_data_path.to_s
-  end
-end

  Added: lib/wikipedia-search/downloader.rb (+61 -0) 100644
===================================================================
--- /dev/null
+++ lib/wikipedia-search/downloader.rb    2014-04-04 11:42:20 +0900 (38792c0)
@@ -0,0 +1,61 @@
+require "open-uri"
+
+module WikipediaSearch
+  class Downloader
+    class << self
+      def download(url, output_path)
+        new(url, output_path).download
+      end
+    end
+
+    def initialize(url, output_path)
+      @url = url
+      @output_path = output_path
+    end
+
+    def download
+      base_name = File.basename(@url)
+      max = nil
+      content_length_proc = lambda do |content_length|
+        max = content_length
+      end
+      progress_proc = lambda do |current|
+        if max
+          percent = (current / max.to_f) * 100
+          formatted_size = "[%s/%s]" % [format_size(current), format_size(max)]
+          print("\r%s - %06.2f%% %s" % [base_name, percent, formatted_size])
+          puts if current == max
+        end
+      end
+      options = {
+        :content_length_proc => content_length_proc,
+        :progress_proc => progress_proc,
+      }
+
+      open(@url, options) do |input|
+        @output_path.open("wb") do |output|
+          chunk = ""
+          chunk_size = 8192
+          while input.read(chunk_size, chunk)
+            output.print(chunk)
+          end
+        end
+      end
+    end
+
+    private
+    def format_size(size)
+      if size < 1024
+        "%d" % size
+      elsif size < (1024 ** 2)
+        "%7.2fKiB" % (size.to_f / 1024)
+      elsif size < (1024 ** 3)
+        "%7.2fMiB" % (size.to_f / (1024 ** 2))
+      elsif size < (1024 ** 4)
+        "%7.2fGiB" % (size.to_f / (1024 ** 3))
+      else
+        "%.2fTiB" % (size.to_f / (1024 ** 4))
+      end
+    end
+  end
+end

  Added: lib/wikipedia-search/task/download.rb (+18 -0) 100644
===================================================================
--- /dev/null
+++ lib/wikipedia-search/task/download.rb    2014-04-04 11:42:20 +0900 (c7d9a24)
@@ -0,0 +1,18 @@
+require "wikipedia-search/downloader"
+
+namespace :data do
+  data_dir_path = Pathname.new("data")
+  directory data_dir_path.to_s
+
+  namespace :download do
+    base_name = "jawiki-latest-pages-articles.xml.bz2"
+    ja_data_path = data_dir_path + base_name
+    file ja_data_path.to_s => data_dir_path.to_s do
+      url = "http://dumps.wikimedia.org/jawiki/latest/#{base_name}"
+      WikipediaSearch::Downloader.download(url, ja_data_path)
+    end
+
+    desc "Download the latest Japanese Wikipedia data."
+    task :ja => ja_data_path.to_s
+  end
+end
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index