[Groonga-commit] ranguba/chupa-text at 4605b33 [master] zip: add support multibyte path

Back to archive index
Kouhei Sutou null+****@clear*****
Thu Feb 28 12:21:24 JST 2019


Kouhei Sutou	2019-02-28 12:21:24 +0900 (Thu, 28 Feb 2019)

  Revision: 4605b33d2893cd86d80829f7a589427292719d6e
  https://github.com/ranguba/chupa-text/commit/4605b33d2893cd86d80829f7a589427292719d6e

  Message:
    zip: add support multibyte path

  Added files:
    test/fixture/zip/cp932.zip
    test/fixture/zip/utf-8.zip
  Modified files:
    lib/chupa-text/decomposers/zip.rb
    test/decomposers/test-zip.rb

  Modified: lib/chupa-text/decomposers/zip.rb (+28 -2)
===================================================================
--- lib/chupa-text/decomposers/zip.rb    2019-02-28 11:20:05 +0900 (bef8a02)
+++ lib/chupa-text/decomposers/zip.rb    2019-02-28 12:21:24 +0900 (af91447)
@@ -1,4 +1,4 @@
-# Copyright (C) 2017  Kouhei Sutou <kou****@clear*****>
+# Copyright (C) 2017-2019  Kouhei Sutou <kou****@clear*****>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -45,7 +45,8 @@ module ChupaText
             end
             entry_uri = data.uri.dup
             base_path = entry_uri.path.gsub(/\.zip\z/i, "")
-            entry_uri.path = "#{base_path}/#{entry.zip_path}"
+            path = convert_path_encoding(entry.zip_path, base_path.encoding)
+            entry_uri.path = "#{base_path}/#{convert_to_uri_path(path)}"
             entry_data = VirtualFileData.new(entry_uri,
                                              entry.file_data,
                                              source_data: data)
@@ -53,6 +54,31 @@ module ChupaText
           end
         end
       end
+
+      private
+      def convert_path_encoding(path, encoding)
+        return path if path.ascii_only?
+
+        candidates = [
+          Encoding::UTF_8,
+          Encoding::Windows_31J,
+        ]
+        candidates.each do |candidate|
+          path.force_encoding(candidate)
+          return path.encode(encoding) if path.valid_encoding?
+        end
+        path.encode(encoding,
+                    Encoding::UTF_8,
+                    invalid: :replace,
+                    undef: :replace)
+      end
+
+      def convert_to_uri_path(path)
+        converted_components = path.split("/").collect do |component|
+          CGI.escape(component)
+        end
+        converted_components.join("/")
+      end
     end
   end
 end

  Modified: test/decomposers/test-zip.rb (+31 -1)
===================================================================
--- test/decomposers/test-zip.rb    2019-02-28 11:20:05 +0900 (235836a)
+++ test/decomposers/test-zip.rb    2019-02-28 12:21:24 +0900 (ba3f955)
@@ -1,4 +1,4 @@
-# Copyright (C) 2017  Kouhei Sutou <kou****@clear*****>
+# Copyright (C) 2017-2019  Kouhei Sutou <kou****@clear*****>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -63,6 +63,36 @@ class TestDecomposersZip < Test::Unit::TestCase
                    decompose(data_path))
     end
 
+    sub_test_case("multibyte") do
+      test("cp932") do
+        data_path = Pathname.new(fixture_path("cp932.zip"))
+        base_path = data_path.sub_ext("")
+        path = CGI.escape("こんにちは.txt")
+        assert_equal([
+                       {
+                         :uri    => file_uri("#{base_path}/cp932/#{path}").to_s,
+                         :body   => "こんにちは\n".encode("cp932").b,
+                         :source => file_uri(data_path).to_s,
+                       },
+                     ],
+                     decompose(data_path))
+      end
+
+      test("UTF-8") do
+        data_path = Pathname.new(fixture_path("utf-8.zip"))
+        base_path = data_path.sub_ext("")
+        path = CGI.escape("こんにちは.txt")
+        assert_equal([
+                       {
+                         :uri    => file_uri("#{base_path}/utf-8/#{path}").to_s,
+                         :body   => "こんにちは\n".b,
+                         :source => file_uri(data_path).to_s,
+                       },
+                     ],
+                     decompose(data_path))
+      end
+    end
+
     sub_test_case("encrypted") do
       test("without password") do
         data_path = Pathname.new(fixture_path("password.zip"))

  Added: test/fixture/zip/cp932.zip (+0 -0) 100644
===================================================================
(Binary files differ)

  Added: test/fixture/zip/utf-8.zip (+0 -0) 100644
===================================================================
(Binary files differ)
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190228/42618dbe/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index