Develop and Download Open Source Software

Browse Subversion Repository

Annotation of /U_CharCode.pas

Parent Directory Parent Directory | Revision Log Revision Log


Revision 7 - (hide annotations) (download) (as text)
Sat Aug 15 03:06:09 2015 UTC (8 years, 7 months ago) by yamat0jp
File MIME type: text/x-pascal
File size: 8897 byte(s)
タグ読み取りに使用するユニットを変更しました。現在Androidでは利用できませんが、書き換えをして使えるようにしようと思います。
1 yamat0jp 7 {
2     part of: MP3FileUtils v0.5
3    
4     -------------------------------------------------------
5    
6     The contents of this file are subject to the Mozilla Public License
7     Version 1.1 (the "License"); you may not use this file except in
8     compliance with the License. You may obtain a copy of the License at
9     http://www.mozilla.org/MPL/
10    
11     Software distributed under the License is distributed on an "AS IS"
12     basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
13     License for the specific language governing rights and limitations
14     under the License.
15    
16     The Original Code is U_CharCode, part of MP3FileUtils.
17    
18     The Initial Developer of the Original Code is Daniel Gaussmann,
19     mail@gausi.de. Portions created by the Initial Developer are
20     Copyright (C) 2005-2009 the Initial Developer. All Rights Reserved.
21    
22     Contributor(s): (none yet)
23    
24     Alternatively, the contents of this file may be used under the terms
25     of the GNU Lesser General Public License Version 2.1 or later
26     (the "LGPL"), in which case the provisions of LGPL are applicable
27     instead of those above. If you wish to allow use of your version of
28     this file only under the terms of the LGPL and not to allow others to use
29     your version of this file under the MPL, indicate your decision by
30     deleting the provisions above and replace them with the notice and
31     other provisions required by the LGPL. If you do not delete
32     the provisions above, a recipient may use your version of this file
33     under either the MPL or the LGPL License.
34    
35     -------------------------------------------------------
36    
37     Use this unit to detect the probably used codepage of an Ansi-tagged
38     mp3-file.
39    
40     Idea behind this function here:
41    
42     * Count the greek, hebrew, cyrillic, ... symbols in the filename
43     to get the language
44     * choose a codepage for this language.
45     Note: Not every codepage is supported
46    
47     Note: This is a workaround. You _should_really_ use Unicode-formats
48     (utf-16, utf-8) for chars beyond #255
49     It seems to work pretty well, but there can be no guarantee for
50     proper functionality!
51    
52     }
53    
54    
55     unit U_CharCode;
56    
57     interface
58    
59     uses
60     SysUtils, Classes, Windows;
61    
62     type
63    
64     {$IFNDEF UNICODE}
65     UnicodeString = WideString;
66     {$ENDIF}
67    
68     TCodePage = record
69     Description: string;
70     CodePage: Cardinal;
71     Index: integer;
72     end;
73    
74     TConvertOptions = record
75     Greek : TCodePage;
76     Cyrillic : TCodePage;
77     Hebrew : TCodePage;
78     Arabic : TCodePage;
79     Thai : TCodePage;
80     Korean : TCodePage;
81     Chinese : TCodePage;
82     Japanese : TCodePage;
83     // If you want to store user settings for Unicode/Ansi-Handling,
84     // the following two settings may be useful as well.
85     // Use the settings from here in your ID3v*Tag-Objects
86     AlwaysWriteUnicode : Boolean;
87     AutoDetectCodePage : Boolean;
88     end;
89    
90    
91     const
92     DefaultCharCode : TCodePage = (Description: 'System default'; CodePage: CP_ACP; Index: 0);
93    
94     GreekEncodings : Array[0..1] of TCodePage =
95     ( (Description: 'MS Windows Greek'; CodePage: 1253; Index:0),
96     (Description: 'IBM PC Greek' ; CodePage: 727 ; Index:1));
97    
98     CyrillicEncodings : Array[0..2] of TCodePage =
99     ( (Description: 'MS Windows Cyrillic'; CodePage: 1251; Index:0),
100     (Description: 'IBM PC Cyrillic' ; CodePage: 855; Index:1),
101     (Description: 'ISO 8859-5 Latin/Cyrillic'; CodePage: 28595; Index:2));
102    
103     HebrewEncodings : Array[0..2] of TCodePage =
104     ( (Description: 'MS Windows Hebrew'; CodePage: 1255; Index:0),
105     (Description: 'Hebrew (DOS)'; CodePage: 862; Index:1),
106     (Description: 'ISO 8859-8 Latin/Hebrew'; CodePage: 28598; Index:2));
107    
108     ArabicEncodings : Array[0..2] of TCodePage =
109     ((Description: 'MS Windows Arabisch'; CodePage: 1256; Index:0),
110     (Description: 'Arabisch (DOS)'; CodePage: 720; Index:1),
111     (Description: 'ISO 8859-6 Latin/Arabisch'; CodePage: 28596; Index:2));
112    
113     ThaiEncodings : Array[0..0] of TCodePage =
114     ((Description: 'MS Windows Thai'; CodePage: 874; Index:0));
115    
116     ChineseEncodings : Array[0..1] of TCodePage =
117     ( (Description: 'Traditional Chinese (Big5)'; CodePage: 950; Index:0 ),
118     (Description: 'Simplified Chinese GBK' ; CodePage: 936; Index:1));
119    
120     KoreanEncodings : Array[0..0] of TCodePage =
121     ( (Description: 'MS Korean'; CodePage: 949; Index:0));
122    
123     JapaneseEncodings : Array[0..0] of TCodePage =
124     ( (Description: 'Japanese Shift-JIS'; CodePage: 932; Index:0));
125    
126    
127     // Get Codepage and use user-settings, if more than one codepage is supported by this unit
128     function GetCodepage(aFilename: UnicodeString; Options: TConvertOptions): TCodePage; overload;
129    
130     // get codepage and return the first matching codepage
131     function GetCodepage(aFilename: UnicodeString): TCodePage; overload;
132    
133     implementation
134    
135    
136     function GetCodepage(aFilename: UnicodeString; Options: TConvertOptions): TCodePage;
137     var Greek, Cyrillic, Hebrew, Arabic, Thai, Korean, Chinese, Japanese: integer;
138     i, max: integer;
139     begin
140     Greek := 0; Cyrillic := 0;
141     Hebrew := 0; Arabic := 0;
142     Thai := 0; Korean := 0;
143     Chinese := 0; Japanese := 0;
144    
145     for i:= 1 to length(aFilename) do
146     begin
147     case Longint(aFilename[i]) of
148     $0384..$03CE : inc(Greek);
149     $0401..$045F : inc(Cyrillic);
150     $05D1..$05EA : inc(Hebrew);
151     $061B..$0652 : inc(Arabic);
152     $0E01..$0E5B : inc(Thai);
153     $AC02..$CEFF : inc(Korean); //Hangeul
154     $3041..$30F6 : inc(Japanese); //Hiragana / Katakana
155     $3105..$3129 : inc(Chinese); //Bopomofo / Zhuyin
156     $4E00..$9F67 : begin // Ideographs,
157     inc(Japanese); // common in these languages (?)
158     inc(Chinese);
159     inc(Korean);
160     end;
161     end;
162     end;
163    
164     result := DefaultCharCode;
165     max := 0;
166    
167     if Greek > max then
168     begin
169     max := Greek ;
170     result := Options.Greek;
171     end;
172    
173     if Cyrillic > max then
174     begin
175     max := Cyrillic ;
176     result := Options.Cyrillic;
177     end;
178    
179     if Hebrew > max then
180     begin
181     max := Hebrew ;
182     result := Options.Hebrew;
183     end;
184    
185     if Arabic > max then
186     begin
187     max := Arabic ;
188     result := Options.Arabic;
189     end;
190    
191     if Thai > max then
192     begin
193     max := Thai ;
194     result := Options.Thai;
195     end;
196    
197     if Korean > max then
198     begin
199     max := Korean ;
200     result := Options.Korean;
201     end;
202    
203     if Japanese > max then
204     begin
205     max := Japanese ;
206     result := Options.Japanese;
207     end;
208    
209     if (Chinese >= max) And (max>0) then // chinese ">=" max, not ">" because:
210     begin // Chinese, korean, japanese use some common signs
211     //max := Chinese; // Probably the language is chinese, if only these common signs are used.
212     result := Options.Chinese;
213     end;
214     end;
215    
216    
217     function GetCodepage(aFilename: UnicodeString): TCodePage; overload;
218     var Greek, Cyrillic, Hebrew, Arabic, Thai, Korean, Chinese, Japanese: integer;
219     i, max: integer;
220     begin
221     Greek := 0; Cyrillic := 0;
222     Hebrew := 0; Arabic := 0;
223     Thai := 0; Korean := 0;
224     Chinese := 0; Japanese := 0;
225    
226     for i:= 1 to length(aFilename) do
227     begin
228     case Longint(aFilename[i]) of
229     $0384..$03CE : inc(Greek);
230     $0401..$045F : inc(Cyrillic);
231     $05D1..$05EA : inc(Hebrew);
232     $061B..$0652 : inc(Arabic);
233     $0E01..$0E5B : inc(Thai);
234     $AC02..$CEFF : inc(Korean);
235     $3041..$30F6 : inc(Japanese);
236     $3105..$3129 : inc(Chinese);
237     $4E00..$9F67 : begin
238     inc(Japanese);
239     inc(Chinese);
240     inc(Korean);
241     end;
242     end;
243     end;
244    
245     result := DefaultCharCode;
246     max := 0;
247    
248     if Greek > max then
249     begin
250     max := Greek ;
251     result := GreekEncodings[0];
252     end;
253    
254     if Cyrillic > max then
255     begin
256     max := Cyrillic ;
257     result := CyrillicEncodings[0];
258     end;
259    
260     if Hebrew > max then
261     begin
262     max := Hebrew ;
263     result := HebrewEncodings[0];
264     end;
265    
266     if Arabic > max then
267     begin
268     max := Arabic ;
269     result := ArabicEncodings[0];
270     end;
271    
272     if Thai > max then
273     begin
274     max := Thai ;
275     result := ThaiEncodings[0];
276     end;
277    
278     if Korean > max then
279     begin
280     max := Korean ;
281     result := KoreanEncodings[0];
282     end;
283    
284     if Japanese > max then
285     begin
286     max := Japanese ;
287     result := JapaneseEncodings[0];
288     end;
289    
290     if (Chinese >= max) And (max>0) then
291     begin
292     //max := Chinese;
293     result := ChineseEncodings[0];
294     end;
295     end;
296     end.
297    

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26