Kouhei Sutou 2019-02-28 15:14:11 +0900 (Thu, 28 Feb 2019) Revision: 0c481b5763a6d0ca07f390b8e4d5b3bde084f3ff https://github.com/ranguba/chupa-text/commit/0c481b5763a6d0ca07f390b8e4d5b3bde084f3ff Message: Add support for multibyte path in tar Added files: lib/chupa-text/path-converter.rb test/fixture/tar/utf-8.tar Modified files: lib/chupa-text/decomposers/tar.rb lib/chupa-text/decomposers/zip.rb test/decomposers/test-tar.rb Modified: lib/chupa-text/decomposers/tar.rb (+6 -2) =================================================================== --- lib/chupa-text/decomposers/tar.rb 2019-02-28 14:54:41 +0900 (976e248) +++ lib/chupa-text/decomposers/tar.rb 2019-02-28 15:14:11 +0900 (27e4e9f) @@ -1,4 +1,4 @@ -# Copyright (C) 2013-2017 Kouhei Sutou <kou****@clear*****> +# Copyright (C) 2013-2019 Kouhei Sutou <kou****@clear*****> # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -17,6 +17,8 @@ require "stringio" require "rubygems/package" +require "chupa-text/path-converter" + module ChupaText module Decomposers class Tar < Decomposer @@ -35,7 +37,9 @@ module ChupaText entry.extend(CopyStreamable) entry_uri = data.uri.dup base_path = entry_uri.path.gsub(/\.tar\z/i, "") - entry_uri.path = "#{base_path}/#{entry.full_name}" + path_converter = PathConverter.new(entry.full_name, + uri_escape: true) + entry_uri.path = "#{base_path}/#{path_converter.convert}" extracted = VirtualFileData.new(entry_uri, entry, :source_data => data) Modified: lib/chupa-text/decomposers/zip.rb (+6 -3) =================================================================== --- lib/chupa-text/decomposers/zip.rb 2019-02-28 14:54:41 +0900 (af91447) +++ lib/chupa-text/decomposers/zip.rb 2019-02-28 15:14:11 +0900 (e9d84e9) @@ -15,10 +15,11 @@ # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA require "stringio" -require "tmpdir" require "archive/zip" +require "chupa-text/path-converter" + module ChupaText module Decomposers class Zip < Decomposer @@ -45,8 +46,10 @@ module ChupaText end entry_uri = data.uri.dup base_path = entry_uri.path.gsub(/\.zip\z/i, "") - path = convert_path_encoding(entry.zip_path, base_path.encoding) - entry_uri.path = "#{base_path}/#{convert_to_uri_path(path)}" + path_converter = PathConverter.new(entry.zip_path, + encoding: base_path.encoding, + uri_escape: true) + entry_uri.path = "#{base_path}/#{path_converter.convert}" entry_data = VirtualFileData.new(entry_uri, entry.file_data, source_data: data) Added: lib/chupa-text/path-converter.rb (+70 -0) 100644 =================================================================== --- /dev/null +++ lib/chupa-text/path-converter.rb 2019-02-28 15:14:11 +0900 (ca3c221) @@ -0,0 +1,70 @@ +# Copyright (C) 2019 Kouhei Sutou <kou****@clear*****> +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +require "cgi/util" + +module ChupaText + class PathConverter + def initialize(path, options={}) + @path = path + @options = options + end + + def convert + path = @path + encoding = @options[:encoding] + path = convert_encoding(path, encoding) if encoding + path = convert_to_uri_path(path) if @options[:uri_escape] + path + end + + private + def convert_encoding(path, encoding) + case path.encoding + when Encoding::ASCII_8BIT + if path.ascii_only? + path.force_encoding(Encoding::UTF_8) + else + candidates = [ + Encoding::UTF_8, + Encoding::EUC_JP, + Encoding::Windows_31J, + ] + found = false + candidates.find do |candidate| + path.force_encoding(candidate) + if path.valid_encoding? + found = true + break + end + end + path.force_encoding(Encoding::ASCII_8BIT) unless found + end + end + path.encode(encoding, + invalid: :replace, + undef: :replace, + replace: "") + end + + def convert_to_uri_path(path) + converted_components = path.split("/").collect do |component| + CGI.escape(component) + end + converted_components.join("/") + end + end +end Modified: test/decomposers/test-tar.rb (+18 -1) =================================================================== --- test/decomposers/test-tar.rb 2019-02-28 14:54:41 +0900 (503bc65) +++ test/decomposers/test-tar.rb 2019-02-28 15:14:11 +0900 (0bc75a7) @@ -1,4 +1,4 @@ -# Copyright (C) 2013-2017 Kouhei Sutou <kou****@clear*****> +# Copyright (C) 2013-2019 Kouhei Sutou <kou****@clear*****> # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -70,5 +70,22 @@ class TestDecomposersTar < Test::Unit::TestCase decompose(data)) end end + + sub_test_case("multibyte") do + test("UTF-8") do + data_path = Pathname.new(fixture_path("utf-8.tar")) + base_path = data_path.sub_ext("") + data = ChupaText::InputData.new(data_path) + path = CGI.escape("こんにちは.txt") + assert_equal([ + { + :uri => file_uri("#{base_path}/utf-8/#{path}").to_s, + :body => "こんにちは\n".b, + :source => data.uri.to_s, + }, + ], + decompose(data)) + end + end end end Added: test/fixture/tar/utf-8.tar (+0 -0) 100644 =================================================================== (Binary files differ) -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190228/9823c823/attachment-0001.html>