[Groonga-commit] ranguba/chupa-text at b75b293 [master] csv: add error check

Back to archive index
Kouhei Sutou null+****@clear*****
Fri Mar 1 12:02:38 JST 2019


Kouhei Sutou	2019-03-01 12:02:38 +0900 (Fri, 01 Mar 2019)

  Revision: b75b293c1b552b6d2c01dbeb9b403ac3a328959a
  https://github.com/ranguba/chupa-text/commit/b75b293c1b552b6d2c01dbeb9b403ac3a328959a

  Message:
    csv: add error check

  Modified files:
    lib/chupa-text/decomposers/csv.rb
    test/command/test-chupa-text.rb
    test/decomposers/test-csv.rb
    test/helper.rb

  Modified: lib/chupa-text/decomposers/csv.rb (+20 -4)
===================================================================
--- lib/chupa-text/decomposers/csv.rb    2019-03-01 11:49:29 +0900 (07ebba5)
+++ lib/chupa-text/decomposers/csv.rb    2019-03-01 12:02:38 +0900 (edc5bc2)
@@ -20,6 +20,8 @@ require "csv"
 module ChupaText
   module Decomposers
     class CSV < Decomposer
+      include Loggable
+
       registry.register("csv", self)
 
       def target?(data)
@@ -36,10 +38,20 @@ module ChupaText
       def decompose(data)
         text = ""
         data.open do |input|
-          csv = ::CSV.new(input)
-          csv.each do |row|
-            text << row.join(" ")
-            text << "\n"
+          begin
+            csv = ::CSV.new(input, liberal_parsing: true)
+            csv.each do |row|
+              text << row.join("\t")
+              text << "\n"
+            end
+          rescue ::CSV::MalformedCSVError => csv_error
+            error do
+              message = "#{log_tag} Failed to parse CSV: "
+              message << "#{csv_error.class}: #{csv_error.message}\n"
+              message << csv_error.backtrace.join("\n")
+              message
+            end
+            return
           end
         end
 
@@ -78,6 +90,10 @@ module ChupaText
         SVG
         Screenshot.new(mime_type, data)
       end
+
+      def log_tag
+        "[decomposer][csv]"
+      end
     end
   end
 end

  Modified: test/command/test-chupa-text.rb (+4 -4)
===================================================================
--- test/command/test-chupa-text.rb    2019-03-01 11:49:29 +0900 (4ac6b51)
+++ test/command/test-chupa-text.rb    2019-03-01 12:02:38 +0900 (4b13f76)
@@ -289,7 +289,7 @@ class TestCommandChupaText < Test::Unit::TestCase
                            "path"      => path.sub_ext(".txt").to_s,
                            "mime-type" => "text/plain",
                            "source-mime-types" => ["text/csv"],
-                           "body"      => "1 2 3\n4 5 6\n7 8 9\n",
+                           "body"      => "1\t2\t3\n4\t5\t6\n7\t8\t9\n",
                            "size"      => 18,
                            "screenshot" => {
                              "mime-type" => "image/svg+xml",
@@ -304,9 +304,9 @@ class TestCommandChupaText < Test::Unit::TestCase
     x="0"
     y="20"
     style="font-size: 20px; white-space: pre-wrap;"
-    xml:space="preserve">1 2 3
-4 5 6
-7 8 9
+    xml:space="preserve">1\t2\t3
+4\t5\t6
+7\t8\t9
 </text>
 </svg>
                              SVG

  Modified: test/decomposers/test-csv.rb (+18 -3)
===================================================================
--- test/decomposers/test-csv.rb    2019-03-01 11:49:29 +0900 (3fb6455)
+++ test/decomposers/test-csv.rb    2019-03-01 12:02:38 +0900 (91937f1)
@@ -1,4 +1,4 @@
-# Copyright (C) 2013  Kouhei Sutou <kou****@clear*****>
+# Copyright (C) 2013-2019  Kouhei Sutou <kou****@clear*****>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -22,15 +22,30 @@ class TestDecomposersCSV< Test::Unit::TestCase
   end
 
   sub_test_case("decompose") do
-    def test_body
+    def test_valid
       csv = <<-CSV
 Hello,World
 Ruby,ChupaText
       CSV
-      assert_equal([csv.gsub(/,/, " ")],
+      assert_equal([csv.gsub(/,/, "\t")],
                    decompose(csv).collect(&:body))
     end
 
+    def test_invalid
+      messages = capture_log do
+        assert_equal([], decompose("He\x82\x00llo").collect(&:body))
+      end
+      assert_equal([
+                     [
+                       :error,
+                       "[decomposer][csv] Failed to parse CSV: " +
+                       "CSV::MalformedCSVError: " +
+                       "Invalid byte sequence in UTF-8 in line 1.",
+                     ],
+                   ],
+                   messages)
+    end
+
     private
     def decompose(csv)
       data = ChupaText::Data.new

  Modified: test/helper.rb (+31 -1)
===================================================================
--- test/helper.rb    2019-03-01 11:49:29 +0900 (ff41c41)
+++ test/helper.rb    2019-03-01 12:02:38 +0900 (68f814e)
@@ -1,4 +1,4 @@
-# Copyright (C) 2013-2017  Kouhei Sutou <kou****@clear*****>
+# Copyright (C) 2013-2019  Kouhei Sutou <kou****@clear*****>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -32,4 +32,34 @@ module Helper
   def file_uri(path)
     URI.parse("file://#{path}")
   end
+
+
+  class CaptureLogger
+    def initialize(output)
+      @output = output
+    end
+
+    def error(message=nil)
+      @output << [:error, message || yield]
+    end
+  end
+
+  def capture_log
+    original_logger = ChupaText.logger
+    begin
+      output = []
+      ChupaText.logger = CaptureLogger.new(output)
+      yield
+      normalize_log(output)
+    ensure
+      ChupaText.logger = original_logger
+    end
+  end
+
+  def normalize_log(log)
+    log.collect do |level, message|
+      message = message.split("\n", 2)[0]
+      [level, message]
+    end
+  end
 end
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190301/209048ff/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index