[Groonga-commit] ranguba/chupa-text at df288ad [master] xml: handle invalid encoding case

Back to archive index
Kouhei Sutou null+****@clear*****
Sat Mar 2 06:16:52 JST 2019


Kouhei Sutou	2019-03-02 06:16:52 +0900 (Sat, 02 Mar 2019)

  Revision: df288adba2935025299eadd923819979e85d2aba
  https://github.com/ranguba/chupa-text/commit/df288adba2935025299eadd923819979e85d2aba

  Message:
    xml: handle invalid encoding case

  Modified files:
    lib/chupa-text/sax-parser.rb
    test/decomposers/test-xml.rb

  Modified: lib/chupa-text/sax-parser.rb (+7 -0)
===================================================================
--- lib/chupa-text/sax-parser.rb    2019-03-02 05:59:03 +0900 (8c1f1b2)
+++ lib/chupa-text/sax-parser.rb    2019-03-02 06:16:52 +0900 (1bab0bc)
@@ -123,6 +123,13 @@ module ChupaText
         rescue REXML::ParseException => error
           message = "#{error.class}: #{error.message}"
           raise ParseError, message
+        rescue ArgumentError => error
+          if error.message.start_with?("invalid byte sequence")
+            message = "#{error.class}: #{error.message}"
+            raise ParseError, message
+          else
+            raise
+          end
         end
       end
 

  Modified: test/decomposers/test-xml.rb (+27 -8)
===================================================================
--- test/decomposers/test-xml.rb    2019-03-02 05:59:03 +0900 (ee846ea)
+++ test/decomposers/test-xml.rb    2019-03-02 06:16:52 +0900 (8f5550e)
@@ -40,16 +40,24 @@ class TestDecomposersXML < Test::Unit::TestCase
                    decompose(xml).collect(&:body))
     end
 
-    def test_invalid
+    def test_invalid_xml
       messages = capture_log do
         assert_equal([], decompose("<root x=/>"))
       end
-      normalized_messages = messages.collect do |level, message|
-        [
-          level,
-          message.gsub(/(ChupaText::SAXParser::ParseError:) .*/,
-                       "\\1 ...")
-        ]
+      assert_equal([
+                     [
+                       :error,
+                       "[decomposer][xml] Failed to parse XML: " +
+                       "ChupaText::SAXParser::ParseError: ...",
+                     ],
+                   ],
+                   messages)
+    end
+
+    def test_invalid_encoding
+      messages = capture_log do
+        assert_equal([],
+                     decompose("\x00\x05\a\xA6"))
       end
       assert_equal([
                      [
@@ -58,7 +66,7 @@ class TestDecomposersXML < Test::Unit::TestCase
                        "ChupaText::SAXParser::ParseError: ...",
                      ],
                    ],
-                   normalized_messages)
+                   messages)
     end
 
     private
@@ -74,5 +82,16 @@ class TestDecomposersXML < Test::Unit::TestCase
       end
       decomposed
     end
+
+    def capture_log
+      messages = super
+      messages.collect do |level, message|
+        [
+          level,
+          message.gsub(/(ChupaText::SAXParser::ParseError:) .*/,
+                       "\\1 ...")
+        ]
+      end
+    end
   end
 end
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190302/fd718eaf/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index