[Groonga-commit] ranguba/chupa-text at 430e86d [master] Use just the first 1024 characters for guessing mime type

Back to archive index
Kouhei Sutou null+****@clear*****
Fri Mar 1 17:44:55 JST 2019


Kouhei Sutou	2019-03-01 17:44:55 +0900 (Fri, 01 Mar 2019)

  Revision: 430e86dce0dd329b027b039cb2f2f397039ffa69
  https://github.com/ranguba/chupa-text/commit/430e86dce0dd329b027b039cb2f2f397039ffa69

  Message:
    Use just the first 1024 characters for guessing mime type

  Modified files:
    lib/chupa-text/data.rb
    lib/chupa-text/file-content.rb
    lib/chupa-text/virtual-content.rb
    lib/chupa-text/virtual-file-data.rb

  Modified: lib/chupa-text/data.rb (+9 -4)
===================================================================
--- lib/chupa-text/data.rb    2019-03-01 17:36:41 +0900 (dc61947)
+++ lib/chupa-text/data.rb    2019-03-01 17:44:55 +0900 (9dd7ca8)
@@ -140,6 +140,10 @@ module ChupaText
       yield(StringIO.new(body))
     end
 
+    def peek_body(size)
+      body[0, size]
+    end
+
     def [](name)
       @attributes[name]
     end
@@ -216,10 +220,11 @@ module ChupaText
 
     def guess_mime_type_from_body
       mime_type = nil
-      change_encoding(body, "UTF-8") do |utf8_body|
-        return nil unless utf8_body.valid_encoding?
-        n_null_characters = utf8_body.count("\u0000")
-        return nil if n_null_characters > (utf8_body.bytesize * 0.01)
+      chunk = peek_body(1024)
+      change_encoding(chunk, "UTF-8") do |utf8_chunk|
+        return nil unless utf8_chunk.valid_encoding?
+        n_null_characters = utf8_chunk.count("\u0000")
+        return nil if n_null_characters > (utf8_chunk.bytesize * 0.01)
         mime_type = "text/plain"
       end
       mime_type

  Modified: lib/chupa-text/file-content.rb (+10 -2)
===================================================================
--- lib/chupa-text/file-content.rb    2019-03-01 17:36:41 +0900 (23274c1)
+++ lib/chupa-text/file-content.rb    2019-03-01 17:44:55 +0900 (67a31fb)
@@ -1,4 +1,4 @@
-# Copyright (C) 2013  Kouhei Sutou <kou****@clear*****>
+# Copyright (C) 2013-2019  Kouhei Sutou <kou****@clear*****>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -29,7 +29,15 @@ module ChupaText
     end
 
     def body
-      @body ||= open {|file| file.read}
+      open do |file|
+        file.read
+      end
+    end
+
+    def peek_body(size)
+      open do |file|
+        file.read(size)
+      end
     end
   end
 end

  Modified: lib/chupa-text/virtual-content.rb (+10 -2)
===================================================================
--- lib/chupa-text/virtual-content.rb    2019-03-01 17:36:41 +0900 (132e72c)
+++ lib/chupa-text/virtual-content.rb    2019-03-01 17:44:55 +0900 (1f76cce)
@@ -1,4 +1,4 @@
-# Copyright (C) 2013  Kouhei Sutou <kou****@clear*****>
+# Copyright (C) 2013-2019  Kouhei Sutou <kou****@clear*****>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -44,7 +44,15 @@ module ChupaText
     end
 
     def body
-      open {|file| file.read}
+      open do |file|
+        file.read
+      end
+    end
+
+    def peek_body(size)
+      open do |file|
+        file.read(size)
+      end
     end
 
     def path

  Modified: lib/chupa-text/virtual-file-data.rb (+5 -1)
===================================================================
--- lib/chupa-text/virtual-file-data.rb    2019-03-01 17:36:41 +0900 (ae89dc3)
+++ lib/chupa-text/virtual-file-data.rb    2019-03-01 17:44:55 +0900 (1f94196)
@@ -1,4 +1,4 @@
-# Copyright (C) 2013-2017  Kouhei Sutou <kou****@clear*****>
+# Copyright (C) 2013-2019  Kouhei Sutou <kou****@clear*****>
 #
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
@@ -27,6 +27,10 @@ module ChupaText
       @content.body
     end
 
+    def peek_body(size)
+      @content.peek_body(size)
+    end
+
     def size
       @content.size
     end
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20190301/3fe47803/attachment-0001.html>


More information about the Groonga-commit mailing list
Back to archive index