[Groonga-commit] ranguba/chupa-text at 0c481b5 [master] Add support for multibyte path in tar

Back to archive index
Kouhei Sutou null+****@clear*****
Thu Feb 28 15:14:11 JST 2019


Kouhei Sutou	2019-02-28 15:14:11 +0900 (Thu, 28 Feb 2019)

  Revision: 0c481b5763a6d0ca07f390b8e4d5b3bde084f3ff
  https://github.com/ranguba/chupa-text/commit/0c481b5763a6d0ca07f390b8e4d5b3bde084f3ff

  Message:
    Add support for multibyte path in tar

  Added files:
    lib/chupa-text/path-converter.rb
    test/fixture/tar/utf-8.tar
  Modified files:
    lib/chupa-text/decomposers/tar.rb
    lib/chupa-text/decomposers/zip.rb
    test/decomposers/test-tar.rb

  Modified: lib/chupa-text/decomposers/tar.rb (+6 -2)
===================================================================
--- lib/chupa-text/decomposers/tar.rb    2019-02-28 14:54:41 +0900 (976e248)
+++ lib/chupa-text/decomposers/tar.rb    2019-02-28 15:14:11 +0900 (27e4e9f)
@@ -1,4 +1,4 @@
-# Copyright (C) 2013-2017  Kouhei Sutou <kou****@clear*****>
+# Copyright (C) 2013-2019  Kouhei Sutou <kou****@clear*****>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -17,6 +17,8 @@
 require "stringio"
 require "rubygems/package"
 
+require "chupa-text/path-converter"
+
 module ChupaText
   module Decomposers
     class Tar < Decomposer
@@ -35,7 +37,9 @@ module ChupaText
             entry.extend(CopyStreamable)
             entry_uri = data.uri.dup
             base_path = entry_uri.path.gsub(/\.tar\z/i, "")
-            entry_uri.path = "#{base_path}/#{entry.full_name}"
+            path_converter = PathConverter.new(entry.full_name,
+                                               uri_escape: true)
+            entry_uri.path = "#{base_path}/#{path_converter.convert}"
             extracted = VirtualFileData.new(entry_uri,
                                             entry,
                                             :source_data => data)

  Modified: lib/chupa-text/decomposers/zip.rb (+6 -3)
===================================================================
--- lib/chupa-text/decomposers/zip.rb    2019-02-28 14:54:41 +0900 (af91447)
+++ lib/chupa-text/decomposers/zip.rb    2019-02-28 15:14:11 +0900 (e9d84e9)
@@ -15,10 +15,11 @@
 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 
 require "stringio"
-require "tmpdir"
 
 require "archive/zip"
 
+require "chupa-text/path-converter"
+
 module ChupaText
   module Decomposers
     class Zip < Decomposer
@@ -45,8 +46,10 @@ module ChupaText
             end
             entry_uri = data.uri.dup
             base_path = entry_uri.path.gsub(/\.zip\z/i, "")
-            path = convert_path_encoding(entry.zip_path, base_path.encoding)
-            entry_uri.path = "#{base_path}/#{convert_to_uri_path(path)}"
+            path_converter = PathConverter.new(entry.zip_path,
+                                               encoding: base_path.encoding,
+                                               uri_escape: true)
+            entry_uri.path = "#{base_path}/#{path_converter.convert}"
             entry_data = VirtualFileData.new(entry_uri,
                                              entry.file_data,
                                              source_data: data)

  Added: lib/chupa-text/path-converter.rb (+70 -0) 100644
===================================================================
--- /dev/null
+++ lib/chupa-text/path-converter.rb    2019-02-28 15:14:11 +0900 (ca3c221)
@@ -0,0 +1,70 @@
+# Copyright (C) 2019  Kouhei Sutou <kou****@clear*****>
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+require "cgi/util"
+
+module ChupaText
+  class PathConverter
+    def initialize(path, options={})
+      @path = path
+      @options = options
+    end
+
+    def convert
+      path = @path
+      encoding = @options[:encoding]
+      path = convert_encoding(path, encoding) if encoding
+      path = convert_to_uri_path(path) if @options[:uri_escape]
+      path
+    end
+
+    private
+    def convert_encoding(path, encoding)
+      case path.encoding
+      when Encoding::ASCII_8BIT
+        if path.ascii_only?
+          path.force_encoding(Encoding::UTF_8)
+        else
+          candidates = [
+            Encoding::UTF_8,
+            Encoding::EUC_JP,
+            Encoding::Windows_31J,
+          ]
+          found = false
+          candidates.find do |candidate|
+            path.force_encoding(candidate)
+            if path.valid_encoding?
+              found = true
+              break
+            end
+          end
+          path.force_encoding(Encoding::ASCII_8BIT) unless found
+        end
+      end
+      path.encode(encoding,
+                  invalid: :replace,
+                  undef: :replace,
+                  replace: "")
+    end
+
+    def convert_to_uri_path(path)
+      converted_components = path.split("/").collect do |component|
+        CGI.escape(component)
+      end
+      converted_components.join("/")
+    end
+  end
+end

  Modified: test/decomposers/test-tar.rb (+18 -1)
===================================================================
--- test/decomposers/test-tar.rb    2019-02-28 14:54:41 +0900 (503bc65)
+++ test/decomposers/test-tar.rb    2019-02-28 15:14:11 +0900 (0bc75a7)
@@ -1,4 +1,4 @@
-# Copyright (C) 2013-2017  Kouhei Sutou <kou****@clear*****>
+# Copyright (C) 2013-2019  Kouhei Sutou <kou****@clear*****>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -70,5 +70,22 @@ class TestDecomposersTar < Test::Unit::TestCase
                      decompose(data))
       end
     end
+
+    sub_test_case("multibyte") do
+      test("UTF-8") do
+        data_path = Pathname.new(fixture_path("utf-8.tar"))
+        base_path = data_path.sub_ext("")
+        data = ChupaText::InputData.new(data_path)
+        path = CGI.escape("こんにちは.txt")
+        assert_equal([
+                       {
+                         :uri    => file_uri("#{base_path}/utf-8/#{path}").to_s,
+                         :body   => "こんにちは\n".b,
+                         :source => data.uri.to_s,
+                       },
+                     ],
+                     decompose(data))
+      end
+    end
   end
 end

  Added: test/fixture/tar/utf-8.tar (+0 -0) 100644
===================================================================
(Binary files differ)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190228/9823c823/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index