[Groonga-commit] ranguba/chupa-text-decomposer-spreadsheet at 99f9ccc [master] Simplify extracted text

Back to archive index
Kouhei Sutou null+****@clear*****
Thu Mar 28 16:32:07 JST 2019


Kouhei Sutou	2019-03-28 16:32:07 +0900 (Thu, 28 Mar 2019)

  Revision: 99f9cccdd3b515cde4859fb98a02d64e0e2d603a
  https://github.com/ranguba/chupa-text-decomposer-spreadsheet/commit/99f9cccdd3b515cde4859fb98a02d64e0e2d603a

  Message:
    Simplify extracted text

  Modified files:
    lib/chupa-text/decomposers/spreadsheet.rb
    test/test-spreadsheet.rb

  Modified: lib/chupa-text/decomposers/spreadsheet.rb (+33 -1)
===================================================================
--- lib/chupa-text/decomposers/spreadsheet.rb    2019-03-01 09:54:39 +0900 (bde2f63)
+++ lib/chupa-text/decomposers/spreadsheet.rb    2019-03-28 16:32:07 +0900 (1740d1d)
@@ -34,7 +34,7 @@ module ChupaText
         open_book(data) do |book|
           book.sheets.each do |sheet_name|
             sheet = book.sheet(sheet_name)
-            body = sheet.to_csv
+            body = build_body(sheet)
             text_data = TextData.new(body, source_data: data)
             text_data["name"] = sheet_name
             text_data["digest"] = Digest::SHA1.hexdigest(body)
@@ -70,6 +70,38 @@ module ChupaText
         end
       end
 
+      def build_body(sheet)
+        body = ""
+        first_row = sheet.first_row
+        return body if first_row.nil?
+
+        1.upto(sheet.last_row) do |row|
+          1.upto(sheet.last_column) do |column|
+            body << "\t" if column > 1
+            body << build_cell(sheet, row, column)
+          end
+          body << "\n"
+        end
+
+        body
+      end
+
+      def build_cell(sheet, row, column)
+        return "" if sheet.empty?(row, column)
+
+        cell = sheet.cell(row, column)
+        case sheet.celltype(row, column)
+        when :string
+          cell
+        when :time
+          sheet.integer_to_timestring(cell)
+        when :link
+          cell.url
+        else
+          cell.to_s
+        end
+      end
+
       def log_tag
         "[decomposer][spreadsheet]"
       end

  Modified: test/test-spreadsheet.rb (+18 -18)
===================================================================
--- test/test-spreadsheet.rb    2019-03-01 09:54:39 +0900 (e59c34c)
+++ test/test-spreadsheet.rb    2019-03-28 16:32:07 +0900 (003e508)
@@ -19,28 +19,28 @@ class TestSpreadsheet < Test::Unit::TestCase
 
     def test_ods
       assert_equal([<<-SHEET1, <<-SHEET2, <<-SHEET3],
-"Sheet1 - A1","Sheet1 - B1"
-"Sheet1 - A2","Sheet1 - B2"
+Sheet1 - A1\tSheet1 - B1
+Sheet1 - A2\tSheet1 - B2
       SHEET1
-"Sheet2 - A1","Sheet2 - B1"
-"Sheet2 - A2","Sheet2 - B2"
+Sheet2 - A1\tSheet2 - B1
+Sheet2 - A2\tSheet2 - B2
       SHEET2
-"Sheet3 - A1","Sheet3 - B1"
-"Sheet3 - A2","Sheet3 - B2"
+Sheet3 - A1\tSheet3 - B1
+Sheet3 - A2\tSheet3 - B2
       SHEET3
                    decompose("ods/multi-sheets.ods"))
     end
 
     def test_xls
       assert_equal([<<-SHEET1, <<-SHEET2, <<-SHEET3],
-"Sheet1 - A1","Sheet1 - B1"
-"Sheet1 - A2","Sheet1 - B2"
+Sheet1 - A1\tSheet1 - B1
+Sheet1 - A2\tSheet1 - B2
       SHEET1
-"Sheet2 - A1","Sheet2 - B1"
-"Sheet2 - A2","Sheet2 - B2"
+Sheet2 - A1\tSheet2 - B1
+Sheet2 - A2\tSheet2 - B2
       SHEET2
-"Sheet3 - A1","Sheet3 - B1"
-"Sheet3 - A2","Sheet3 - B2"
+Sheet3 - A1\tSheet3 - B1
+Sheet3 - A2\tSheet3 - B2
       SHEET3
                    decompose("xls/multi-sheets.xls"))
     end
@@ -61,14 +61,14 @@ class TestSpreadsheet < Test::Unit::TestCase
 
     def test_xlsx
       assert_equal([<<-SHEET1, <<-SHEET2, <<-SHEET3],
-"Sheet1 - A1","Sheet1 - B1"
-"Sheet1 - A2","Sheet1 - B2"
+Sheet1 - A1\tSheet1 - B1
+Sheet1 - A2\tSheet1 - B2
       SHEET1
-"Sheet2 - A1","Sheet2 - B1"
-"Sheet2 - A2","Sheet2 - B2"
+Sheet2 - A1\tSheet2 - B1
+Sheet2 - A2\tSheet2 - B2
       SHEET2
-"Sheet3 - A1","Sheet3 - B1"
-"Sheet3 - A2","Sheet3 - B2"
+Sheet3 - A1\tSheet3 - B1
+Sheet3 - A2\tSheet3 - B2
       SHEET3
                    decompose("xlsx/multi-sheets.xlsx"))
     end
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190328/a041a4e5/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index