Kouhei Sutou 2019-03-28 16:32:07 +0900 (Thu, 28 Mar 2019) Revision: 99f9cccdd3b515cde4859fb98a02d64e0e2d603a https://github.com/ranguba/chupa-text-decomposer-spreadsheet/commit/99f9cccdd3b515cde4859fb98a02d64e0e2d603a Message: Simplify extracted text Modified files: lib/chupa-text/decomposers/spreadsheet.rb test/test-spreadsheet.rb Modified: lib/chupa-text/decomposers/spreadsheet.rb (+33 -1) =================================================================== --- lib/chupa-text/decomposers/spreadsheet.rb 2019-03-01 09:54:39 +0900 (bde2f63) +++ lib/chupa-text/decomposers/spreadsheet.rb 2019-03-28 16:32:07 +0900 (1740d1d) @@ -34,7 +34,7 @@ module ChupaText open_book(data) do |book| book.sheets.each do |sheet_name| sheet = book.sheet(sheet_name) - body = sheet.to_csv + body = build_body(sheet) text_data = TextData.new(body, source_data: data) text_data["name"] = sheet_name text_data["digest"] = Digest::SHA1.hexdigest(body) @@ -70,6 +70,38 @@ module ChupaText end end + def build_body(sheet) + body = "" + first_row = sheet.first_row + return body if first_row.nil? + + 1.upto(sheet.last_row) do |row| + 1.upto(sheet.last_column) do |column| + body << "\t" if column > 1 + body << build_cell(sheet, row, column) + end + body << "\n" + end + + body + end + + def build_cell(sheet, row, column) + return "" if sheet.empty?(row, column) + + cell = sheet.cell(row, column) + case sheet.celltype(row, column) + when :string + cell + when :time + sheet.integer_to_timestring(cell) + when :link + cell.url + else + cell.to_s + end + end + def log_tag "[decomposer][spreadsheet]" end Modified: test/test-spreadsheet.rb (+18 -18) =================================================================== --- test/test-spreadsheet.rb 2019-03-01 09:54:39 +0900 (e59c34c) +++ test/test-spreadsheet.rb 2019-03-28 16:32:07 +0900 (003e508) @@ -19,28 +19,28 @@ class TestSpreadsheet < Test::Unit::TestCase def test_ods assert_equal([<<-SHEET1, <<-SHEET2, <<-SHEET3], -"Sheet1 - A1","Sheet1 - B1" -"Sheet1 - A2","Sheet1 - B2" +Sheet1 - A1\tSheet1 - B1 +Sheet1 - A2\tSheet1 - B2 SHEET1 -"Sheet2 - A1","Sheet2 - B1" -"Sheet2 - A2","Sheet2 - B2" +Sheet2 - A1\tSheet2 - B1 +Sheet2 - A2\tSheet2 - B2 SHEET2 -"Sheet3 - A1","Sheet3 - B1" -"Sheet3 - A2","Sheet3 - B2" +Sheet3 - A1\tSheet3 - B1 +Sheet3 - A2\tSheet3 - B2 SHEET3 decompose("ods/multi-sheets.ods")) end def test_xls assert_equal([<<-SHEET1, <<-SHEET2, <<-SHEET3], -"Sheet1 - A1","Sheet1 - B1" -"Sheet1 - A2","Sheet1 - B2" +Sheet1 - A1\tSheet1 - B1 +Sheet1 - A2\tSheet1 - B2 SHEET1 -"Sheet2 - A1","Sheet2 - B1" -"Sheet2 - A2","Sheet2 - B2" +Sheet2 - A1\tSheet2 - B1 +Sheet2 - A2\tSheet2 - B2 SHEET2 -"Sheet3 - A1","Sheet3 - B1" -"Sheet3 - A2","Sheet3 - B2" +Sheet3 - A1\tSheet3 - B1 +Sheet3 - A2\tSheet3 - B2 SHEET3 decompose("xls/multi-sheets.xls")) end @@ -61,14 +61,14 @@ class TestSpreadsheet < Test::Unit::TestCase def test_xlsx assert_equal([<<-SHEET1, <<-SHEET2, <<-SHEET3], -"Sheet1 - A1","Sheet1 - B1" -"Sheet1 - A2","Sheet1 - B2" +Sheet1 - A1\tSheet1 - B1 +Sheet1 - A2\tSheet1 - B2 SHEET1 -"Sheet2 - A1","Sheet2 - B1" -"Sheet2 - A2","Sheet2 - B2" +Sheet2 - A1\tSheet2 - B1 +Sheet2 - A2\tSheet2 - B2 SHEET2 -"Sheet3 - A1","Sheet3 - B1" -"Sheet3 - A2","Sheet3 - B2" +Sheet3 - A1\tSheet3 - B1 +Sheet3 - A2\tSheet3 - B2 SHEET3 decompose("xlsx/multi-sheets.xlsx")) end -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190328/a041a4e5/attachment-0001.html>