Develop and Download Open Source Software

Browse Subversion Repository

Contents of /SchoolIdolFestivalSimulator.Components/FileEncoding.cs

Parent Directory Parent Directory | Revision Log Revision Log


Revision 11 - (show annotations) (download)
Wed Jun 4 12:23:45 2014 UTC (9 years, 11 months ago) by kayochin_3141
File size: 7033 byte(s)


1 using System;
2 using System.Collections.Generic;
3 using System.Linq;
4 using System.Text;
5 using System.IO;
6
7 namespace SchoolIdolFestivalSimulator.Components {
8 public class FileEncoding {
9 public static Encoding GetFileEncoding(string fileName) {
10 using(FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read)) {
11 byte[] bs = new byte[fs.Length - 1];
12 fs.Read(bs, 0, bs.Length);
13 fs.Close();
14 return GetEncoding(bs);
15 }
16 }
17 /// <summary>
18 /// 文字コードを判別する
19 /// </summary>
20 /// <remarks>
21 /// Jcode.pmのgetcodeメソッドを移植したものです。
22 /// Jcode.pm(http://openlab.ring.gr.jp/Jcode/index-j.html)
23 /// Jcode.pmのCopyright: Copyright 1999-2005 Dan Kogai
24 /// </remarks>
25 /// <param name="bytes">文字コードを調べるデータ</param>
26 /// <returns>適当と思われるEncodingオブジェクト。
27 /// 判断できなかった時はnull。</returns>
28 public static System.Text.Encoding GetEncoding(byte[] bytes) {
29 const byte bEscape = 0x1B;
30 const byte bAt = 0x40;
31 const byte bDollar = 0x24;
32 const byte bAnd = 0x26;
33 const byte bOpen = 0x28; //'('
34 const byte bB = 0x42;
35 const byte bD = 0x44;
36 const byte bJ = 0x4A;
37 const byte bI = 0x49;
38
39 int len = bytes.Length;
40 byte b1, b2, b3, b4;
41
42 //Encode::is_utf8 は無視
43 bool isBinary = false;
44 for(int i = 0;i < len;i++) {
45 b1 = bytes[i];
46 if(b1 <= 0x06 || b1 == 0x7F || b1 == 0xFF) {
47 //'binary'
48 isBinary = true;
49 if(b1 == 0x00 && i < len - 1 && bytes[i + 1] <= 0x7F) {
50 //smells like raw unicode
51 return System.Text.Encoding.Unicode;
52 }
53 }
54 }
55 if(isBinary) {
56 return null;
57 }
58
59 //not Japanese
60 bool notJapanese = true;
61 for(int i = 0;i < len;i++) {
62 b1 = bytes[i];
63 if(b1 == bEscape || 0x80 <= b1) {
64 notJapanese = false;
65 break;
66 }
67 }
68 if(notJapanese) {
69 return System.Text.Encoding.ASCII;
70 }
71
72 for(int i = 0;i < len - 2;i++) {
73 b1 = bytes[i];
74 b2 = bytes[i + 1];
75 b3 = bytes[i + 2];
76
77 if(b1 == bEscape) {
78 if(b2 == bDollar && b3 == bAt) {
79 //JIS_0208 1978
80 //JIS
81 return System.Text.Encoding.GetEncoding(50220);
82 } else if(b2 == bDollar && b3 == bB) {
83 //JIS_0208 1983
84 //JIS
85 return System.Text.Encoding.GetEncoding(50220);
86 } else if(b2 == bOpen && (b3 == bB || b3 == bJ)) {
87 //JIS_ASC
88 //JIS
89 return System.Text.Encoding.GetEncoding(50220);
90 } else if(b2 == bOpen && b3 == bI) {
91 //JIS_KANA
92 //JIS
93 return System.Text.Encoding.GetEncoding(50220);
94 }
95 if(i < len - 3) {
96 b4 = bytes[i + 3];
97 if(b2 == bDollar && b3 == bOpen && b4 == bD) {
98 //JIS_0212
99 //JIS
100 return System.Text.Encoding.GetEncoding(50220);
101 }
102 if(i < len - 5 &&
103 b2 == bAnd && b3 == bAt && b4 == bEscape &&
104 bytes[i + 4] == bDollar && bytes[i + 5] == bB) {
105 //JIS_0208 1990
106 //JIS
107 return System.Text.Encoding.GetEncoding(50220);
108 }
109 }
110 }
111 }
112
113 //should be euc|sjis|utf8
114 //use of (?:) by Hiroki Ohzaki <ohzaki@iod.ricoh.co.jp>
115 int sjis = 0;
116 int euc = 0;
117 int utf8 = 0;
118 for(int i = 0;i < len - 1;i++) {
119 b1 = bytes[i];
120 b2 = bytes[i + 1];
121 if(((0x81 <= b1 && b1 <= 0x9F) || (0xE0 <= b1 && b1 <= 0xFC)) &&
122 ((0x40 <= b2 && b2 <= 0x7E) || (0x80 <= b2 && b2 <= 0xFC))) {
123 //SJIS_C
124 sjis += 2;
125 i++;
126 }
127 }
128 for(int i = 0;i < len - 1;i++) {
129 b1 = bytes[i];
130 b2 = bytes[i + 1];
131 if(((0xA1 <= b1 && b1 <= 0xFE) && (0xA1 <= b2 && b2 <= 0xFE)) ||
132 (b1 == 0x8E && (0xA1 <= b2 && b2 <= 0xDF))) {
133 //EUC_C
134 //EUC_KANA
135 euc += 2;
136 i++;
137 } else if(i < len - 2) {
138 b3 = bytes[i + 2];
139 if(b1 == 0x8F && (0xA1 <= b2 && b2 <= 0xFE) &&
140 (0xA1 <= b3 && b3 <= 0xFE)) {
141 //EUC_0212
142 euc += 3;
143 i += 2;
144 }
145 }
146 }
147 for(int i = 0;i < len - 1;i++) {
148 b1 = bytes[i];
149 b2 = bytes[i + 1];
150 if((0xC0 <= b1 && b1 <= 0xDF) && (0x80 <= b2 && b2 <= 0xBF)) {
151 //UTF8
152 utf8 += 2;
153 i++;
154 } else if(i < len - 2) {
155 b3 = bytes[i + 2];
156 if((0xE0 <= b1 && b1 <= 0xEF) && (0x80 <= b2 && b2 <= 0xBF) &&
157 (0x80 <= b3 && b3 <= 0xBF)) {
158 //UTF8
159 utf8 += 3;
160 i += 2;
161 }
162 }
163 }
164 //M. Takahashi's suggestion
165 //utf8 += utf8 / 2;
166
167 System.Diagnostics.Debug.WriteLine(
168 string.Format("sjis = {0}, euc = {1}, utf8 = {2}", sjis, euc, utf8));
169 if(euc > sjis && euc > utf8) {
170 //EUC
171 return System.Text.Encoding.GetEncoding(51932);
172 } else if(sjis > euc && sjis > utf8) {
173 //SJIS
174 return System.Text.Encoding.GetEncoding(932);
175 } else if(utf8 > euc && utf8 > sjis) {
176 //UTF8
177 return System.Text.Encoding.UTF8;
178 }
179 return null;
180 }
181 }
182 }

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26