Kouhei Sutou 2019-02-28 12:21:24 +0900 (Thu, 28 Feb 2019) Revision: 4605b33d2893cd86d80829f7a589427292719d6e https://github.com/ranguba/chupa-text/commit/4605b33d2893cd86d80829f7a589427292719d6e Message: zip: add support multibyte path Added files: test/fixture/zip/cp932.zip test/fixture/zip/utf-8.zip Modified files: lib/chupa-text/decomposers/zip.rb test/decomposers/test-zip.rb Modified: lib/chupa-text/decomposers/zip.rb (+28 -2) =================================================================== --- lib/chupa-text/decomposers/zip.rb 2019-02-28 11:20:05 +0900 (bef8a02) +++ lib/chupa-text/decomposers/zip.rb 2019-02-28 12:21:24 +0900 (af91447) @@ -1,4 +1,4 @@ -# Copyright (C) 2017 Kouhei Sutou <kou****@clear*****> +# Copyright (C) 2017-2019 Kouhei Sutou <kou****@clear*****> # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -45,7 +45,8 @@ module ChupaText end entry_uri = data.uri.dup base_path = entry_uri.path.gsub(/\.zip\z/i, "") - entry_uri.path = "#{base_path}/#{entry.zip_path}" + path = convert_path_encoding(entry.zip_path, base_path.encoding) + entry_uri.path = "#{base_path}/#{convert_to_uri_path(path)}" entry_data = VirtualFileData.new(entry_uri, entry.file_data, source_data: data) @@ -53,6 +54,31 @@ module ChupaText end end end + + private + def convert_path_encoding(path, encoding) + return path if path.ascii_only? + + candidates = [ + Encoding::UTF_8, + Encoding::Windows_31J, + ] + candidates.each do |candidate| + path.force_encoding(candidate) + return path.encode(encoding) if path.valid_encoding? + end + path.encode(encoding, + Encoding::UTF_8, + invalid: :replace, + undef: :replace) + end + + def convert_to_uri_path(path) + converted_components = path.split("/").collect do |component| + CGI.escape(component) + end + converted_components.join("/") + end end end end Modified: test/decomposers/test-zip.rb (+31 -1) =================================================================== --- test/decomposers/test-zip.rb 2019-02-28 11:20:05 +0900 (235836a) +++ test/decomposers/test-zip.rb 2019-02-28 12:21:24 +0900 (ba3f955) @@ -1,4 +1,4 @@ -# Copyright (C) 2017 Kouhei Sutou <kou****@clear*****> +# Copyright (C) 2017-2019 Kouhei Sutou <kou****@clear*****> # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -63,6 +63,36 @@ class TestDecomposersZip < Test::Unit::TestCase decompose(data_path)) end + sub_test_case("multibyte") do + test("cp932") do + data_path = Pathname.new(fixture_path("cp932.zip")) + base_path = data_path.sub_ext("") + path = CGI.escape("こんにちは.txt") + assert_equal([ + { + :uri => file_uri("#{base_path}/cp932/#{path}").to_s, + :body => "こんにちは\n".encode("cp932").b, + :source => file_uri(data_path).to_s, + }, + ], + decompose(data_path)) + end + + test("UTF-8") do + data_path = Pathname.new(fixture_path("utf-8.zip")) + base_path = data_path.sub_ext("") + path = CGI.escape("こんにちは.txt") + assert_equal([ + { + :uri => file_uri("#{base_path}/utf-8/#{path}").to_s, + :body => "こんにちは\n".b, + :source => file_uri(data_path).to_s, + }, + ], + decompose(data_path)) + end + end + sub_test_case("encrypted") do test("without password") do data_path = Pathname.new(fixture_path("password.zip")) Added: test/fixture/zip/cp932.zip (+0 -0) 100644 =================================================================== (Binary files differ) Added: test/fixture/zip/utf-8.zip (+0 -0) 100644 =================================================================== (Binary files differ) -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190228/42618dbe/attachment-0001.html>