Develop and Download Open Source Software

Browse Subversion Repository

Contents of /U_CharCode.pas

Parent Directory Parent Directory | Revision Log Revision Log


Revision 7 - (show annotations) (download) (as text)
Sat Aug 15 03:06:09 2015 UTC (8 years, 6 months ago) by yamat0jp
File MIME type: text/x-pascal
File size: 8897 byte(s)
タグ読み取りに使用するユニットを変更しました。現在Androidでは利用できませんが、書き換えをして使えるようにしようと思います。
1 {
2 part of: MP3FileUtils v0.5
3
4 -------------------------------------------------------
5
6 The contents of this file are subject to the Mozilla Public License
7 Version 1.1 (the "License"); you may not use this file except in
8 compliance with the License. You may obtain a copy of the License at
9 http://www.mozilla.org/MPL/
10
11 Software distributed under the License is distributed on an "AS IS"
12 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
13 License for the specific language governing rights and limitations
14 under the License.
15
16 The Original Code is U_CharCode, part of MP3FileUtils.
17
18 The Initial Developer of the Original Code is Daniel Gaussmann,
19 mail@gausi.de. Portions created by the Initial Developer are
20 Copyright (C) 2005-2009 the Initial Developer. All Rights Reserved.
21
22 Contributor(s): (none yet)
23
24 Alternatively, the contents of this file may be used under the terms
25 of the GNU Lesser General Public License Version 2.1 or later
26 (the "LGPL"), in which case the provisions of LGPL are applicable
27 instead of those above. If you wish to allow use of your version of
28 this file only under the terms of the LGPL and not to allow others to use
29 your version of this file under the MPL, indicate your decision by
30 deleting the provisions above and replace them with the notice and
31 other provisions required by the LGPL. If you do not delete
32 the provisions above, a recipient may use your version of this file
33 under either the MPL or the LGPL License.
34
35 -------------------------------------------------------
36
37 Use this unit to detect the probably used codepage of an Ansi-tagged
38 mp3-file.
39
40 Idea behind this function here:
41
42 * Count the greek, hebrew, cyrillic, ... symbols in the filename
43 to get the language
44 * choose a codepage for this language.
45 Note: Not every codepage is supported
46
47 Note: This is a workaround. You _should_really_ use Unicode-formats
48 (utf-16, utf-8) for chars beyond #255
49 It seems to work pretty well, but there can be no guarantee for
50 proper functionality!
51
52 }
53
54
55 unit U_CharCode;
56
57 interface
58
59 uses
60 SysUtils, Classes, Windows;
61
62 type
63
64 {$IFNDEF UNICODE}
65 UnicodeString = WideString;
66 {$ENDIF}
67
68 TCodePage = record
69 Description: string;
70 CodePage: Cardinal;
71 Index: integer;
72 end;
73
74 TConvertOptions = record
75 Greek : TCodePage;
76 Cyrillic : TCodePage;
77 Hebrew : TCodePage;
78 Arabic : TCodePage;
79 Thai : TCodePage;
80 Korean : TCodePage;
81 Chinese : TCodePage;
82 Japanese : TCodePage;
83 // If you want to store user settings for Unicode/Ansi-Handling,
84 // the following two settings may be useful as well.
85 // Use the settings from here in your ID3v*Tag-Objects
86 AlwaysWriteUnicode : Boolean;
87 AutoDetectCodePage : Boolean;
88 end;
89
90
91 const
92 DefaultCharCode : TCodePage = (Description: 'System default'; CodePage: CP_ACP; Index: 0);
93
94 GreekEncodings : Array[0..1] of TCodePage =
95 ( (Description: 'MS Windows Greek'; CodePage: 1253; Index:0),
96 (Description: 'IBM PC Greek' ; CodePage: 727 ; Index:1));
97
98 CyrillicEncodings : Array[0..2] of TCodePage =
99 ( (Description: 'MS Windows Cyrillic'; CodePage: 1251; Index:0),
100 (Description: 'IBM PC Cyrillic' ; CodePage: 855; Index:1),
101 (Description: 'ISO 8859-5 Latin/Cyrillic'; CodePage: 28595; Index:2));
102
103 HebrewEncodings : Array[0..2] of TCodePage =
104 ( (Description: 'MS Windows Hebrew'; CodePage: 1255; Index:0),
105 (Description: 'Hebrew (DOS)'; CodePage: 862; Index:1),
106 (Description: 'ISO 8859-8 Latin/Hebrew'; CodePage: 28598; Index:2));
107
108 ArabicEncodings : Array[0..2] of TCodePage =
109 ((Description: 'MS Windows Arabisch'; CodePage: 1256; Index:0),
110 (Description: 'Arabisch (DOS)'; CodePage: 720; Index:1),
111 (Description: 'ISO 8859-6 Latin/Arabisch'; CodePage: 28596; Index:2));
112
113 ThaiEncodings : Array[0..0] of TCodePage =
114 ((Description: 'MS Windows Thai'; CodePage: 874; Index:0));
115
116 ChineseEncodings : Array[0..1] of TCodePage =
117 ( (Description: 'Traditional Chinese (Big5)'; CodePage: 950; Index:0 ),
118 (Description: 'Simplified Chinese GBK' ; CodePage: 936; Index:1));
119
120 KoreanEncodings : Array[0..0] of TCodePage =
121 ( (Description: 'MS Korean'; CodePage: 949; Index:0));
122
123 JapaneseEncodings : Array[0..0] of TCodePage =
124 ( (Description: 'Japanese Shift-JIS'; CodePage: 932; Index:0));
125
126
127 // Get Codepage and use user-settings, if more than one codepage is supported by this unit
128 function GetCodepage(aFilename: UnicodeString; Options: TConvertOptions): TCodePage; overload;
129
130 // get codepage and return the first matching codepage
131 function GetCodepage(aFilename: UnicodeString): TCodePage; overload;
132
133 implementation
134
135
136 function GetCodepage(aFilename: UnicodeString; Options: TConvertOptions): TCodePage;
137 var Greek, Cyrillic, Hebrew, Arabic, Thai, Korean, Chinese, Japanese: integer;
138 i, max: integer;
139 begin
140 Greek := 0; Cyrillic := 0;
141 Hebrew := 0; Arabic := 0;
142 Thai := 0; Korean := 0;
143 Chinese := 0; Japanese := 0;
144
145 for i:= 1 to length(aFilename) do
146 begin
147 case Longint(aFilename[i]) of
148 $0384..$03CE : inc(Greek);
149 $0401..$045F : inc(Cyrillic);
150 $05D1..$05EA : inc(Hebrew);
151 $061B..$0652 : inc(Arabic);
152 $0E01..$0E5B : inc(Thai);
153 $AC02..$CEFF : inc(Korean); //Hangeul
154 $3041..$30F6 : inc(Japanese); //Hiragana / Katakana
155 $3105..$3129 : inc(Chinese); //Bopomofo / Zhuyin
156 $4E00..$9F67 : begin // Ideographs,
157 inc(Japanese); // common in these languages (?)
158 inc(Chinese);
159 inc(Korean);
160 end;
161 end;
162 end;
163
164 result := DefaultCharCode;
165 max := 0;
166
167 if Greek > max then
168 begin
169 max := Greek ;
170 result := Options.Greek;
171 end;
172
173 if Cyrillic > max then
174 begin
175 max := Cyrillic ;
176 result := Options.Cyrillic;
177 end;
178
179 if Hebrew > max then
180 begin
181 max := Hebrew ;
182 result := Options.Hebrew;
183 end;
184
185 if Arabic > max then
186 begin
187 max := Arabic ;
188 result := Options.Arabic;
189 end;
190
191 if Thai > max then
192 begin
193 max := Thai ;
194 result := Options.Thai;
195 end;
196
197 if Korean > max then
198 begin
199 max := Korean ;
200 result := Options.Korean;
201 end;
202
203 if Japanese > max then
204 begin
205 max := Japanese ;
206 result := Options.Japanese;
207 end;
208
209 if (Chinese >= max) And (max>0) then // chinese ">=" max, not ">" because:
210 begin // Chinese, korean, japanese use some common signs
211 //max := Chinese; // Probably the language is chinese, if only these common signs are used.
212 result := Options.Chinese;
213 end;
214 end;
215
216
217 function GetCodepage(aFilename: UnicodeString): TCodePage; overload;
218 var Greek, Cyrillic, Hebrew, Arabic, Thai, Korean, Chinese, Japanese: integer;
219 i, max: integer;
220 begin
221 Greek := 0; Cyrillic := 0;
222 Hebrew := 0; Arabic := 0;
223 Thai := 0; Korean := 0;
224 Chinese := 0; Japanese := 0;
225
226 for i:= 1 to length(aFilename) do
227 begin
228 case Longint(aFilename[i]) of
229 $0384..$03CE : inc(Greek);
230 $0401..$045F : inc(Cyrillic);
231 $05D1..$05EA : inc(Hebrew);
232 $061B..$0652 : inc(Arabic);
233 $0E01..$0E5B : inc(Thai);
234 $AC02..$CEFF : inc(Korean);
235 $3041..$30F6 : inc(Japanese);
236 $3105..$3129 : inc(Chinese);
237 $4E00..$9F67 : begin
238 inc(Japanese);
239 inc(Chinese);
240 inc(Korean);
241 end;
242 end;
243 end;
244
245 result := DefaultCharCode;
246 max := 0;
247
248 if Greek > max then
249 begin
250 max := Greek ;
251 result := GreekEncodings[0];
252 end;
253
254 if Cyrillic > max then
255 begin
256 max := Cyrillic ;
257 result := CyrillicEncodings[0];
258 end;
259
260 if Hebrew > max then
261 begin
262 max := Hebrew ;
263 result := HebrewEncodings[0];
264 end;
265
266 if Arabic > max then
267 begin
268 max := Arabic ;
269 result := ArabicEncodings[0];
270 end;
271
272 if Thai > max then
273 begin
274 max := Thai ;
275 result := ThaiEncodings[0];
276 end;
277
278 if Korean > max then
279 begin
280 max := Korean ;
281 result := KoreanEncodings[0];
282 end;
283
284 if Japanese > max then
285 begin
286 max := Japanese ;
287 result := JapaneseEncodings[0];
288 end;
289
290 if (Chinese >= max) And (max>0) then
291 begin
292 //max := Chinese;
293 result := ChineseEncodings[0];
294 end;
295 end;
296 end.
297

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26