| 1 |
/* |
| 2 |
* Copyright(C) Dreamgate co.,ltd. All rights reserved. |
| 3 |
*/ |
| 4 |
using System; |
| 5 |
using System.IO; |
| 6 |
using System.Text; |
| 7 |
|
| 8 |
namespace ktox.Utils |
| 9 |
{ |
| 10 |
/// <summary> |
| 11 |
/// ファイルのエンコーディングを確認します。 |
| 12 |
/// </summary> |
| 13 |
public static class FileEncodingDetector |
| 14 |
{ |
| 15 |
public static Encoding GetEncoding(string path) |
| 16 |
{ |
| 17 |
using (var fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) { |
| 18 |
var buffer = new byte[1024]; |
| 19 |
var len = fs.Read(buffer, 0, 4); |
| 20 |
Encoding result = GetEncodingFromBOM(buffer, len); |
| 21 |
if (result != null) { |
| 22 |
return result; |
| 23 |
} |
| 24 |
fs.Seek(0, SeekOrigin.Begin); |
| 25 |
for (var count = 0; count < 10; count++) { |
| 26 |
len = fs.Read(buffer, 0, buffer.Length); |
| 27 |
if (IsUTF8(buffer, len)) |
| 28 |
return Encoding.UTF8; |
| 29 |
if (IsSJIS(buffer, len)) |
| 30 |
return Encoding.GetEncoding(932); |
| 31 |
} |
| 32 |
} |
| 33 |
return null; |
| 34 |
} |
| 35 |
|
| 36 |
private static Encoding GetEncodingFromBOM(byte[] buffer, int length) |
| 37 |
{ |
| 38 |
var bytes = new byte[length]; |
| 39 |
Array.Copy(buffer, bytes, length); |
| 40 |
if (EqualsBytes(bytes, 0xff, 0xfe, 0x00, 0x00)) |
| 41 |
return Encoding.UTF32; |
| 42 |
if (EqualsBytes(bytes, 0xfe, 0xff)) |
| 43 |
return Encoding.BigEndianUnicode; |
| 44 |
if (EqualsBytes(bytes, 0xff, 0xfe)) |
| 45 |
return Encoding.Unicode; |
| 46 |
if (EqualsBytes(bytes, 0xef, 0xbb, 0xbf)) |
| 47 |
return Encoding.UTF8; |
| 48 |
return null; |
| 49 |
} |
| 50 |
|
| 51 |
private static bool EqualsBytes(byte[] a, params byte[] b) |
| 52 |
{ |
| 53 |
if (a.Length >= b.Length) { |
| 54 |
for (int i = 0; i < b.Length; i++) { |
| 55 |
if (a[i] != b[i]) |
| 56 |
return false; |
| 57 |
} |
| 58 |
return true; |
| 59 |
} |
| 60 |
return false; |
| 61 |
} |
| 62 |
|
| 63 |
private static bool IsUTF8(byte[] buffer, int length) |
| 64 |
{ |
| 65 |
bool multibyte = false; |
| 66 |
for (int i = 0; i < length; i++) { |
| 67 |
var b = buffer[i]; |
| 68 |
int charLen = 0; |
| 69 |
if ((b & 0x80) == 0) { |
| 70 |
/* 1 byte */ |
| 71 |
continue; |
| 72 |
} |
| 73 |
if ((b & 0xe0) == 0xc0) { |
| 74 |
/* 2 bytes */ |
| 75 |
charLen = 2; |
| 76 |
} else if ((b & 0xf0) == 0xe0) { |
| 77 |
/* 3 bytes */ |
| 78 |
charLen = 3; |
| 79 |
} else if ((b & 0xf8) == 0xf0) { |
| 80 |
/* 4 bytes */ |
| 81 |
charLen = 4; |
| 82 |
} |
| 83 |
if (charLen == 0) |
| 84 |
return false; |
| 85 |
for (int j = 1; j < charLen && i + j < length; j++) { |
| 86 |
if (!Is2ndByteOfUTF8(buffer[i + j])) |
| 87 |
return false; |
| 88 |
} |
| 89 |
multibyte = true; |
| 90 |
i += charLen - 1; |
| 91 |
} |
| 92 |
return multibyte; |
| 93 |
} |
| 94 |
|
| 95 |
private static bool IsSJIS(byte[] buffer, int length) |
| 96 |
{ |
| 97 |
bool multibyte = false; |
| 98 |
for (int i = 0; i < length; i++) { |
| 99 |
var b = buffer[i]; |
| 100 |
if ((b & 0x80) == 0) { |
| 101 |
/* 1 byte */ |
| 102 |
continue; |
| 103 |
} |
| 104 |
if (!IsLeadByteOfSJIS(b)) |
| 105 |
return false; |
| 106 |
if (i + 1 < length && !IsTrailByteOfSJIS(buffer[i + 1])) |
| 107 |
return false; |
| 108 |
multibyte = true; |
| 109 |
i++; |
| 110 |
} |
| 111 |
return multibyte; |
| 112 |
} |
| 113 |
|
| 114 |
private static bool Is2ndByteOfUTF8(byte b) |
| 115 |
{ |
| 116 |
return (b & 0xc0) == 0x80; |
| 117 |
} |
| 118 |
|
| 119 |
private static bool IsLeadByteOfSJIS(byte b) |
| 120 |
{ |
| 121 |
return (b >= 0x81 && b <= 0x9f) || (b >= 0xe0 && b <= 0xfc); |
| 122 |
} |
| 123 |
|
| 124 |
private static bool IsTrailByteOfSJIS(byte b) |
| 125 |
{ |
| 126 |
return (b >= 0x40 && b <= 0x7e) || (b >= 0x80 && b <= 0xfc); |
| 127 |
} |
| 128 |
} |
| 129 |
} |