Develop and Download Open Source Software

Browse Subversion Repository

Contents of /trunk/ktx/Utils/FileEncodingDetector.cs

Parent Directory Parent Directory | Revision Log Revision Log


Revision 41 - (show annotations) (download)
Wed Nov 16 08:29:09 2022 UTC (18 months, 1 week ago) by bananajinn
File size: 3164 byte(s)
文字コード判別処理でBOMの判定が間違えていたのを修正
1 /*
2 * Copyright(C) Dreamgate co.,ltd. All rights reserved.
3 */
4 using System;
5 using System.IO;
6 using System.Text;
7
8 namespace ktox.Utils
9 {
10 /// <summary>
11 /// ファイルのエンコーディングを確認します。
12 /// </summary>
13 public static class FileEncodingDetector
14 {
15 public static Encoding GetEncoding(string path)
16 {
17 using (var fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite)) {
18 var buffer = new byte[1024];
19 var len = fs.Read(buffer, 0, 4);
20 Encoding result = GetEncodingFromBOM(buffer, len);
21 if (result != null) {
22 return result;
23 }
24 fs.Seek(0, SeekOrigin.Begin);
25 for (var count = 0; count < 10; count++) {
26 len = fs.Read(buffer, 0, buffer.Length);
27 if (IsUTF8(buffer, len))
28 return Encoding.UTF8;
29 if (IsSJIS(buffer, len))
30 return Encoding.GetEncoding(932);
31 }
32 }
33 return null;
34 }
35
36 private static Encoding GetEncodingFromBOM(byte[] buffer, int length)
37 {
38 var bytes = new byte[length];
39 Array.Copy(buffer, bytes, length);
40 if (EqualsBytes(bytes, 0xff, 0xfe, 0x00, 0x00))
41 return Encoding.UTF32;
42 if (EqualsBytes(bytes, 0xfe, 0xff))
43 return Encoding.BigEndianUnicode;
44 if (EqualsBytes(bytes, 0xff, 0xfe))
45 return Encoding.Unicode;
46 if (EqualsBytes(bytes, 0xef, 0xbb, 0xbf))
47 return Encoding.UTF8;
48 return null;
49 }
50
51 private static bool EqualsBytes(byte[] a, params byte[] b)
52 {
53 if (a.Length >= b.Length) {
54 for (int i = 0; i < b.Length; i++) {
55 if (a[i] != b[i])
56 return false;
57 }
58 return true;
59 }
60 return false;
61 }
62
63 private static bool IsUTF8(byte[] buffer, int length)
64 {
65 bool multibyte = false;
66 for (int i = 0; i < length; i++) {
67 var b = buffer[i];
68 int charLen = 0;
69 if ((b & 0x80) == 0) {
70 /* 1 byte */
71 continue;
72 }
73 if ((b & 0xe0) == 0xc0) {
74 /* 2 bytes */
75 charLen = 2;
76 } else if ((b & 0xf0) == 0xe0) {
77 /* 3 bytes */
78 charLen = 3;
79 } else if ((b & 0xf8) == 0xf0) {
80 /* 4 bytes */
81 charLen = 4;
82 }
83 if (charLen == 0)
84 return false;
85 for (int j = 1; j < charLen && i + j < length; j++) {
86 if (!Is2ndByteOfUTF8(buffer[i + j]))
87 return false;
88 }
89 multibyte = true;
90 i += charLen - 1;
91 }
92 return multibyte;
93 }
94
95 private static bool IsSJIS(byte[] buffer, int length)
96 {
97 bool multibyte = false;
98 for (int i = 0; i < length; i++) {
99 var b = buffer[i];
100 if ((b & 0x80) == 0) {
101 /* 1 byte */
102 continue;
103 }
104 if (!IsLeadByteOfSJIS(b))
105 return false;
106 if (i + 1 < length && !IsTrailByteOfSJIS(buffer[i + 1]))
107 return false;
108 multibyte = true;
109 i++;
110 }
111 return multibyte;
112 }
113
114 private static bool Is2ndByteOfUTF8(byte b)
115 {
116 return (b & 0xc0) == 0x80;
117 }
118
119 private static bool IsLeadByteOfSJIS(byte b)
120 {
121 return (b >= 0x81 && b <= 0x9f) || (b >= 0xe0 && b <= 0xfc);
122 }
123
124 private static bool IsTrailByteOfSJIS(byte b)
125 {
126 return (b >= 0x40 && b <= 0x7e) || (b >= 0x80 && b <= 0xfc);
127 }
128 }
129 }

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26