Kouhei Sutou 2019-03-02 06:16:52 +0900 (Sat, 02 Mar 2019) Revision: df288adba2935025299eadd923819979e85d2aba https://github.com/ranguba/chupa-text/commit/df288adba2935025299eadd923819979e85d2aba Message: xml: handle invalid encoding case Modified files: lib/chupa-text/sax-parser.rb test/decomposers/test-xml.rb Modified: lib/chupa-text/sax-parser.rb (+7 -0) =================================================================== --- lib/chupa-text/sax-parser.rb 2019-03-02 05:59:03 +0900 (8c1f1b2) +++ lib/chupa-text/sax-parser.rb 2019-03-02 06:16:52 +0900 (1bab0bc) @@ -123,6 +123,13 @@ module ChupaText rescue REXML::ParseException => error message = "#{error.class}: #{error.message}" raise ParseError, message + rescue ArgumentError => error + if error.message.start_with?("invalid byte sequence") + message = "#{error.class}: #{error.message}" + raise ParseError, message + else + raise + end end end Modified: test/decomposers/test-xml.rb (+27 -8) =================================================================== --- test/decomposers/test-xml.rb 2019-03-02 05:59:03 +0900 (ee846ea) +++ test/decomposers/test-xml.rb 2019-03-02 06:16:52 +0900 (8f5550e) @@ -40,16 +40,24 @@ class TestDecomposersXML < Test::Unit::TestCase decompose(xml).collect(&:body)) end - def test_invalid + def test_invalid_xml messages = capture_log do assert_equal([], decompose("<root x=/>")) end - normalized_messages = messages.collect do |level, message| - [ - level, - message.gsub(/(ChupaText::SAXParser::ParseError:) .*/, - "\\1 ...") - ] + assert_equal([ + [ + :error, + "[decomposer][xml] Failed to parse XML: " + + "ChupaText::SAXParser::ParseError: ...", + ], + ], + messages) + end + + def test_invalid_encoding + messages = capture_log do + assert_equal([], + decompose("\x00\x05\a\xA6")) end assert_equal([ [ @@ -58,7 +66,7 @@ class TestDecomposersXML < Test::Unit::TestCase "ChupaText::SAXParser::ParseError: ...", ], ], - normalized_messages) + messages) end private @@ -74,5 +82,16 @@ class TestDecomposersXML < Test::Unit::TestCase end decomposed end + + def capture_log + messages = super + messages.collect do |level, message| + [ + level, + message.gsub(/(ChupaText::SAXParser::ParseError:) .*/, + "\\1 ...") + ] + end + end end end -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190302/fd718eaf/attachment-0001.html>