Develop and Download Open Source Software

Browse CVS Repository

Annotation of /gikonavigoeson/gikonavi/GikoBayesian.pas

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.13 - (hide annotations) (download) (as text)
Mon Nov 1 05:18:21 2004 UTC (19 years, 5 months ago) by yoffy
Branch: MAIN
Changes since 1.12: +8 -6 lines
File MIME type: text/x-pascal
前回のコミットで ModeWhite のチェックが抜けてしまっていたので修正。

1 yoffy 1.1 unit GikoBayesian;
2    
3     {!
4     \file GikoBayesian.pas
5     \brief ???ゃ?吾?≪?潟???c????/span>
6    
7 yoffy 1.13 $Id: GikoBayesian.pas,v 1.12 2004/11/01 04:45:25 yoffy Exp $
8 yoffy 1.1 }
9    
10     interface
11    
12     //==================================================
13     uses
14     //==================================================
15 yoffy 1.4 Classes;
16 yoffy 1.1
17     //==================================================
18     type
19     //==================================================
20    
21     {!***********************************************************
22     \brief ??茯???????????/span>
23     ************************************************************}
24     TWordInfo = class( TObject )
25     private
26     FNormalWord : Integer; //!< ??絽吾????茯????????糸?眼????????/span>
27     FImportantWord : Integer; //!< 羈?????茯????????糸?眼????????/span>
28     FNormalText : Integer; //!< ??絽吾????茯??????????障??????????腴?????/span>
29     FImportantText : Integer; //!< 羈?????茯??????????障??????????腴?????/span>
30    
31     public
32     property NormalWord : Integer read FNormalWord write FNormalWord;
33     property ImportantWord : Integer read FImportantWord write FImportantWord;
34     property NormalText : Integer read FNormalText write FNormalText;
35     property ImportantText : Integer read FImportantText write FImportantText;
36     end;
37    
38     {!***********************************************************
39     \brief 茹f??羝??水??茯???????????/span>
40     ************************************************************}
41     TWordCountInfo = class( TObject )
42     private
43     FWordCount : Integer; //!< ??茯???/span>
44    
45     public
46     property WordCount : Integer read FWordCount write FWordCount;
47     end;
48    
49     {!***********************************************************
50     \brief 茹f??羝??水??茯????鴻??
51     ************************************************************}
52     // TWordCount = class( THashedStringList ) // 羶???
53 yoffy 1.3 TWordCount = class( TStringList )
54 yoffy 1.1 public
55 yoffy 1.3 constructor Create;
56 yoffy 1.1 destructor Destroy; override;
57     end;
58    
59     {!***********************************************************
60     \brief ???c???帥?≪???眼???冴??
61     ************************************************************}
62     TGikoBayesianAlgorithm =
63 yoffy 1.9 (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
64 yoffy 1.1
65     {!***********************************************************
66     \brief ???ゃ?吾?≪?潟???c????/span>
67     ************************************************************}
68 yoffy 1.3 // TGikoBayesian = class( THashedStringList ) // 羶???
69     TGikoBayesian = class( TStringList )
70 yoffy 1.1 private
71     FFilePath : string; //!< 茯??粋昭???????<?ゃ??????/span>
72     function GetObject( const name : string ) : TWordInfo;
73     procedure SetObject( const name : string; value : TWordInfo );
74    
75     public
76     constructor Create;
77     destructor Destroy; override;
78    
79     //! ???<?ゃ??????絖??絮ユ???茯??水?冴???障??
80     procedure LoadFromFile( const filePath : string );
81    
82     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
83     procedure SaveToFile( const filePath : string );
84    
85     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
86     procedure Save;
87    
88     //! ??茯???????????宴????緇????障??
89     property Objects[ const name : string ] : TWordInfo
90     read GetObject write SetObject; default;
91    
92     //! ??腴??????障??????茯????????潟?????障??
93     procedure CountWord(
94     const text : string;
95     wordCount : TWordCount );
96    
97     {!
98     \brief Paul Graham 羈????冴?ャ??????腴???絵??墾??羆阪????障??
99     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
100     }
101     function CalcPaulGraham( wordCount : TWordCount ) : Extended;
102    
103     {!
104     \brief GaryRobinson 羈????冴?ャ??????腴???絵??墾??羆阪????障??
105     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
106     }
107     function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
108    
109 yoffy 1.9 {!
110     \brief GaryRobinson-Fisher 羈????冴?ャ??????腴???絵??墾??羆阪????障??
111     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
112     }
113     function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
114 yoffy 1.1
115     {!
116     \brief ??腴???茹f??
117     \param text 茹f????????腴?
118     \param wordCount 茹f??????????茯????鴻????菴???
119     \param algorithm 羈???墾??浦絎??????????≪???眼???冴??????絎????障??
120     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
121    
122     CountWord ? Calcxxxxx ???障???????茵??????????с????
123     }
124     function Parse(
125     const text : string;
126     wordCount : TWordCount;
127 yoffy 1.9 algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
128 yoffy 1.1 ) : Extended;
129    
130     {!
131     \brief 絖??????
132     \param wordCount Parse ?цВ??????????茯????鴻??
133     \param isImportant 羈??????鴻????腴???????????????? True
134     }
135     procedure Learn(
136     wordCount : TWordCount;
137     isImportant : Boolean );
138    
139     {!
140     \brief 絖??腟?????綽?????
141     \param wordCount Parse ?цВ??????????茯????鴻??
142     \param isImportant 羈??????鴻????腴???????????????????????? True
143     \warning 絖??羝??帥????腴???????????∈茯??堺?ャ?障??????<br>
144     Learn ????????????腴??? isImportant ???????c????????腴???
145     Forget ?????????若?帥???若?鴻???贋?????障????<br>
146     絖??羝??帥????????????????????????????????
147    
148     ???????膺?腟??????????≪????????с???????障??????<br>
149     wordCount ??緇?????腴? (Parse ? text 綣??? ???膺?腟??????帥?????≪???障????<br><br>
150    
151     筝祉??絵????腴?????羈?????腴????????帥????????? Forget -> Learn ?????т戎?????障????
152     }
153     procedure Forget(
154     wordCount : TWordCount;
155     isImportant : Boolean );
156     end;
157    
158     //==================================================
159     implementation
160     //==================================================
161    
162     uses
163 yoffy 1.12 SysUtils, Math, Windows,
164     MojuUtils;
165 yoffy 1.1
166     const
167     GIKO_BAYESIAN_FILE_VERSION = '1.0';
168 yoffy 1.3 {
169     Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
170     ModeWGraph, ModeWAlpha, ModeWNum,
171     ModeWHira, ModeWKata, ModeWKanji);
172     }
173     CharMode1 : array [ 0..255 ] of Byte =
174     (
175     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
178     2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
179     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
180     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
181     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
182     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
183    
184     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
185     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
186     0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
187     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
188     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
189 yoffy 1.6 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
190 yoffy 1.3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
191     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
192     );
193 yoffy 1.1
194     //************************************************************
195     // misc
196     //************************************************************
197    
198     //==============================
199     // RemoveToken
200     //==============================
201     function RemoveToken(var s: string;const delimiter: string): string;
202     var
203     p: Integer;
204     begin
205     p := AnsiPos(delimiter, s);
206     if p = 0 then
207     Result := s
208     else
209     Result := Copy(s, 1, p - 1);
210     s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
211     end;
212    
213     //==============================
214     // AbsSort
215     //==============================
216     function AbsSort( p1, p2 : Pointer ) : Integer;
217     var
218     v1, v2 : Single;
219     begin
220    
221     v1 := Abs( Single( p1 ) - 0.5 );
222     v2 := Abs( Single( p2 ) - 0.5 );
223     if v1 > v2 then
224     Result := -1
225     else if v1 = v2 then
226     Result := 0
227     else
228     Result := 1;
229    
230     end;
231    
232     //************************************************************
233     // TWordCount class
234     //************************************************************
235 yoffy 1.3 constructor TWordCount.Create;
236     begin
237    
238     Duplicates := dupIgnore;
239     CaseSensitive := True;
240     Sorted := True;
241    
242     end;
243    
244 yoffy 1.1 destructor TWordCount.Destroy;
245     var
246     i : Integer;
247     begin
248    
249     for i := Count - 1 downto 0 do
250     if Objects[ i ] <> nil then
251     Objects[ i ].Free;
252    
253     inherited;
254    
255     end;
256    
257     //************************************************************
258     // TGikoBayesian class
259     //************************************************************
260    
261     //==============================
262     // Create
263     //==============================
264     constructor TGikoBayesian.Create;
265     begin
266    
267 yoffy 1.3 Duplicates := dupIgnore;
268     CaseSensitive := True;
269     Sorted := True;
270 yoffy 1.1
271     end;
272    
273     //==============================
274     // Destroy
275     //==============================
276     destructor TGikoBayesian.Destroy;
277     var
278     i : Integer;
279     begin
280    
281     for i := Count - 1 downto 0 do
282     if inherited Objects[ i ] <> nil then
283     inherited Objects[ i ].Free;
284    
285     inherited;
286    
287     end;
288    
289     procedure TGikoBayesian.LoadFromFile( const filePath : string );
290     var
291     i : Integer;
292     sl : TStringList;
293     s : string;
294     name : string;
295     info : TWordInfo;
296     begin
297    
298 yoffy 1.2 FFilePath := filePath;
299    
300 yoffy 1.1 if not FileExists( filePath ) then
301     Exit;
302 yoffy 1.2
303 yoffy 1.1 sl := TStringList.Create;
304     try
305     sl.LoadFromFile( filePath );
306    
307     for i := 1 to sl.Count - 1 do begin
308     s := sl[ i ];
309     name := RemoveToken( s, #1 );
310     info := TWordInfo.Create;
311     info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
312     info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
313     info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
314     info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
315    
316     AddObject( name, info );
317     end;
318     finally
319     sl.Free;
320     end;
321    
322     end;
323    
324     procedure TGikoBayesian.SaveToFile( const filePath : string );
325     var
326     i : Integer;
327     sl : TStringList;
328     s : string;
329     info : TWordInfo;
330     begin
331    
332 yoffy 1.2 FFilePath := filePath;
333    
334 yoffy 1.1 sl := TStringList.Create;
335     try
336     sl.BeginUpdate;
337     sl.Add( GIKO_BAYESIAN_FILE_VERSION );
338    
339     for i := 0 to Count - 1 do begin
340     info := TWordInfo( inherited Objects[ i ] );
341     s := Strings[ i ] + #1
342     + Format('%x', [info.NormalWord]) + #1
343     + Format('%x', [info.ImportantWord]) + #1
344     + Format('%x', [info.NormalText]) + #1
345     + Format('%x', [info.ImportantText]);
346    
347     sl.Add(s);
348     end;
349     sl.EndUpdate;
350     sl.SaveToFile( filePath );
351     finally
352     sl.Free;
353     end;
354    
355     end;
356    
357     procedure TGikoBayesian.Save;
358     begin
359    
360     if FFilePath <> '' then
361     SaveToFile( FFilePath );
362    
363     end;
364    
365     //==============================
366     // GetObject
367     //==============================
368     function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
369     var
370     idx : Integer;
371     begin
372    
373 yoffy 1.11 if Find( name, idx ) then
374     Result := TWordInfo( inherited Objects[ idx ] )
375 yoffy 1.1 else
376 yoffy 1.11 Result := nil;
377 yoffy 1.1
378     end;
379    
380     //==============================
381     // SetObject
382     //==============================
383     procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
384     var
385     idx : Integer;
386     begin
387    
388 yoffy 1.11 if Find( name, idx ) then
389     inherited Objects[ idx ] := value
390 yoffy 1.1 else
391 yoffy 1.11 AddObject( name, value );
392 yoffy 1.1
393     end;
394    
395    
396     //==============================
397     // CountWord
398     //==============================
399     procedure TGikoBayesian.CountWord(
400     const text : string;
401     wordCount : TWordCount );
402     type
403 yoffy 1.12 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeNum, ModeHanKana,
404 yoffy 1.1 ModeWGraph, ModeWAlpha, ModeWNum,
405     ModeWHira, ModeWKata, ModeWKanji);
406     var
407 yoffy 1.12 p, tail, last : PChar;
408     mode, newMode : Modes;
409     ch : Longword;
410     chSize : Integer;
411     wHiraDelimiter : TStringList;
412     wHiraFinalDelimiter : TStringList;
413     wKanjiDelimiter : TStringList;
414     words : TStringList;
415     aWord : string;
416     countInfo : TWordCountInfo;
417    
418     function cutBoth( _aWord : string; _delim : TStringList ) : string;
419     var
420     _i : Integer;
421     begin
422     for _i := 0 to _delim.Count - 1 do begin
423     _aWord := CustomStringReplace(
424     _aWord,
425     _delim[ _i ],
426     #10 + _delim[ _i ] + #10, False );
427     end;
428     Result := _aWord;
429     end;
430    
431     function cutFirst( _aWord : string; _delim : TStringList ) : string;
432     var
433     _i : Integer;
434     begin
435     for _i := 0 to _delim.Count - 1 do begin
436     _aWord := CustomStringReplace(
437     _aWord,
438     _delim[ _i ],
439     #10 + _delim[ _i ], False );
440     end;
441     Result := _aWord;
442     end;
443    
444     function cutFinal( _aWord : string; _delim : TStringList ) : string;
445     var
446     _i : Integer;
447     begin
448     for _i := 0 to _delim.Count - 1 do begin
449     _aWord := CustomStringReplace(
450     _aWord,
451     _delim[ _i ],
452     _delim[ _i ] + #10, False );
453     end;
454     Result := _aWord;
455     end;
456    
457     procedure addWord( _dst : TWordCount; _words : TStringList );
458     var
459     _aWord : string;
460     _i, _idx : Integer;
461     _countInfo : TWordCountInfo;
462     begin
463     for _i := 0 to _words.Count - 1 do begin
464     _aWord := _words[ _i ];
465     if Length( _aWord ) > 0 then begin
466     if _dst.Find( _aWord, _idx ) then begin
467     _countInfo := TWordCountInfo( _dst.Objects[ _idx ] );
468     end else begin
469     _countInfo := TWordCountInfo.Create;
470     _dst.AddObject( _aWord, _countInfo );
471     end;
472     _countInfo.WordCount := _countInfo.WordCount + 1;
473     end;
474     end;
475     end;
476    
477     function changeMode( _aWord : string; _mode : Modes ) : string;
478     var
479     _i : Integer;
480     _aWord2 : string;
481     _pWord, _pWord2 : PChar;
482     _pWordTail, _pFound : PChar;
483     const
484     _delim : string = #10;
485     begin
486     if Ord( _mode ) >= Ord( ModeWGraph ) then begin
487     // ?ユ???
488     // ?鴻???若?鴻??荅違????
489     _aWord := CustomStringReplace( _aWord, ' ', '', False );
490     _aWord := CustomStringReplace( _aWord, '??', '', False );
491    
492     // ???????帥?у??茯?????
493     case mode of
494     ModeWHira:
495     begin
496     _aWord := cutFinal( _aWord, wHiraFinalDelimiter );
497     Result := cutBoth( _aWord, wHiraDelimiter );
498     end;
499    
500     ModeWKanji:
501     begin
502     // ???????帥?у??茯?????
503     _aWord := cutBoth( _aWord, wKanjiDelimiter );
504     // 4 byte (2 絖?) ???ゃ?у??茯?????
505     _pWord := PChar( _aWord );
506     _i := Length( _aWord );
507     _pWordTail := _pWord + _i;
508     SetLength( _aWord2, _i + (_i shr 2) );
509     _pWord2 := PChar( _aWord2 );
510    
511     while _pWord < _pWordTail do begin
512     _pFound := AnsiStrPos( _pWord, PChar( _delim ) );
513     if _pFound = nil then
514     _pFound := _pWordTail;
515     _pFound := _pFound - 3;
516    
517     while _pWord <= _pFound do begin
518     CopyMemory( _pWord2, _pWord, 4 ); _pWord2[ 4 ] := #10;
519     _pWord2 := _pWord2 + 5; _pWord := _pWord + 4;
520     end;
521     _i := _pFound + 4 - _pWord; // 4 = 3 + #10
522     CopyMemory( _pWord2, _pWord, _i );
523     _pWord2 := _pWord2 + _i; _pWord := _pWord + _i;
524     end;
525     if _pWord < _pWordTail then begin
526     _i := _pWordTail - _pWord;
527     CopyMemory( _pWord2, _pWord, _i );
528     _pWord2 := _pWord2 + _i;
529     end;
530     SetLength( _aWord2, _pWord2 - PChar( _aWord2 ) );
531    
532     Result := _aWord2;
533     end;
534    
535     else
536     Result := _aWord;
537     end;
538     end else begin
539     Result := _aWord;
540     end;
541     end;
542 yoffy 1.1 const
543 yoffy 1.12 WHIRA_DELIMITER = '??' + #10 + '??#39; + #10 + '??' + #10 + '??#39; + #10 + '????'
544     + #10 + '??#39; + #10 + '????' + #10 + '?障??#39;+ #10 + '??#39;
545     + #10 + '????' + #10 + '????' + #10 + '????'
546 yoffy 1.11 + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
547     + #10 + '????#39; + #10 + '????#39; + #10 + '????#39; + #10 + '????#39;
548     + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
549     + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39;
550 yoffy 1.12 + #10 + '????' + #10 + '????#39; + #10 + '????' + #10 + '????'
551     + #10 + '????' + #10 + '??????'
552     + #10 + '?с??' + #10 + '?障??' + #10 + '?障????'
553     + #10 + '?с????' + #10 + '?障????'
554     + #10 + '????' + #10 + '??????' + #10 + '??????' + #10 + '????????'
555     ;
556     WKANJI_DELIMITER = '??' + #10 + '??#39; + #10 + '綣?39; + #10 + '??' + #10 + '羈?'
557     + #10 + '筝?' + #10 + '??#39; + #10 + '??' + #10 + '??'
558     ;
559     WHIRA_FINAL_DELIMITER = '?c??' + #10 + '?c??#39;
560     ;{
561     + #10 + '???c??#39; + #10 + '???????c??#39; + #10 + '??????#39;
562     + #10 + '??????' + #10 + '?с??????'
563 yoffy 1.11 + #10 + '?障??'
564     + #10 + '??????' + #10 + '????' + #10 + '????#39; + #10 + '??????#39;
565     + #10 + '??????' + #10 + '???c?宴??'
566 yoffy 1.12 + #10 + '?с??' + #10 + '????'
567 yoffy 1.11 + #10 + '????' + #10 + '??????' + #10 + '????' + #10 + '??????'
568     ;}
569 yoffy 1.12 // '??#39; ?? '??????????' ????
570     HA_LINE = '?????????????障????????????違?宴????#39;;
571     HI_LINE = '???????<???蚊?帥?????????潟?眼??';
572     HU_LINE = '??????ゃ???泣?????????吟?激??';
573     HE_LINE = '???????????吾?????????鴻?冴??';
574     HO_LINE = '???????????祉???????????若?純??';
575     KA_LINE = '?≪???泣?帥??????ゃ???????吟???????<?泣??#39;;
576     KI_LINE = '?ゃ???激???????????違???吾??????#39;;
577     KU_LINE = '?????鴻?????????????違?????ャ??#39;;
578     KE_LINE = '???宴?祉???????<???宴?蚊?????с??#39;;
579     KO_LINE = '???潟?純???????≪?????蚊?眼??????#39;;
580 yoffy 1.5 kKanji = [$80..$A0, $E0..$ff];
581 yoffy 1.1 begin
582    
583 yoffy 1.12 wHiraDelimiter := TStringList.Create;
584     wHiraFinalDelimiter := TStringList.Create;
585     wKanjiDelimiter := TStringList.Create;
586     words := TStringList.Create;
587 yoffy 1.1 try
588     mode := ModeWhite;
589 yoffy 1.12 wHiraDelimiter.Text := WHIRA_DELIMITER;
590     wHiraFinalDelimiter.Text := WHIRA_FINAL_DELIMITER;
591     wKanjiDelimiter.Text := WKANJI_DELIMITER;
592 yoffy 1.1 p := PChar( text );
593     tail := p + Length( text );
594     last := p;
595    
596     while p < tail do begin
597     // ??絖????帥?ゃ?????ゅ??/span>
598     // ?糸???鴻? ModeGraph ?????????у???ャ???綽??????????????
599 yoffy 1.5 // if Byte(Byte( p^ ) - $a1) < $5e then begin
600     if Byte( p^ ) in kKanji then begin
601 yoffy 1.1 if p + 1 < tail then begin
602     ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
603     case ch of
604 yoffy 1.12 // ?鴻???若?鴻?у??茯????????????????
605     //$8140: newMode := ModeWhite;
606 yoffy 1.1 $8141..$824e: newMode := ModeWGraph;
607     $824f..$8258: newMode := ModeWNum;
608     $8260..$829a: newMode := ModeWAlpha;
609     $829f..$82f1: newMode := ModeWHira;
610     $8340..$8396: newMode := ModeWKata;
611     else newMode := ModeWKanji;
612     end;
613 yoffy 1.6 // '??????#39; ??抗篁??????障???????帥?????????障????
614     if (mode = ModeWHira) or (mode = ModeWKata) then
615 yoffy 1.7 if (ch = $814a) or (ch = $814b) or (ch = $815b) then
616 yoffy 1.6 newMode := mode;
617 yoffy 1.1 end else begin
618     newMode := ModeWhite;
619     end;
620    
621     chSize := 2;
622     end else begin
623 yoffy 1.3 newMode := Modes( CharMode1[ Byte( p^ ) ] );
624 yoffy 1.12 if (p^ = ' ') and (Ord( mode ) >= Ord( ModeWGraph )) then begin
625     // 篁??障?ф?ユ????т??鴻???若??/span>
626     // ??茯???膵???????с?鴻???若?鴻??荅違????
627     // ?糸??茹?????????絽吾?鴻???若?鴻?у?阪??????????????荅違??????
628     newMode := mode;
629     end;
630 yoffy 1.1
631     chSize := 1;
632     end;
633    
634 yoffy 1.12 if mode <> newMode then begin
635 yoffy 1.1
636     // ??絖????帥?ゃ????紊??眼??????
637 yoffy 1.13 if mode <> ModeWhite then begin
638     SetLength( aWord, p - last );
639     CopyMemory( PChar( aWord ), last, p - last );
640 yoffy 1.11
641 yoffy 1.13 words.Text := changeMode( aWord, mode );
642 yoffy 1.11
643 yoffy 1.13 // ??茯??脂??/span>
644     addWord( wordCount, words );
645     end;
646 yoffy 1.1
647 yoffy 1.12 last := p;
648 yoffy 1.1 mode := newMode;
649    
650     end;
651    
652     p := p + chSize;
653     end; // while
654    
655     if mode <> ModeWhite then begin
656     aWord := Copy( last, 0, p - last );
657 yoffy 1.12 words.Text := changeMode( aWord, mode );
658    
659     // ??茯??脂??/span>
660     addWord( wordCount, words );
661 yoffy 1.1 end;
662     finally
663 yoffy 1.12 words.Free;
664     wKanjiDelimiter.Free;
665     wHiraFinalDelimiter.Free;
666     wHiraDelimiter.Free;
667 yoffy 1.1 end;
668    
669     end;
670    
671     //==============================
672     // CalcPaulGraham
673     //==============================
674     function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
675    
676     function p( const aWord : string ) : Single;
677     var
678     info : TWordInfo;
679     begin
680     info := Objects[ aWord ];
681     if info = nil then
682 yoffy 1.9 Result := 0.415
683 yoffy 1.1 else if info.NormalWord = 0 then
684     Result := 0.99
685     else if info.ImportantWord = 0 then
686     Result := 0.01
687 yoffy 1.9 else if info.ImportantWord + info.NormalWord * 2 < 5 then
688     Result := 0.5
689 yoffy 1.1 else
690     Result := ( info.ImportantWord / info.ImportantText ) /
691     ((info.NormalWord * 2 / info.NormalText ) +
692     (info.ImportantWord / info.ImportantText));
693     end;
694    
695     var
696     s, q : Extended;
697     i : Integer;
698     narray : TList;
699     const
700     SAMPLE_COUNT = 15;
701     begin
702    
703     Result := 1;
704     if wordCount.Count = 0 then
705     Exit;
706    
707     narray := TList.Create;
708     try
709     for i := 0 to wordCount.Count - 1 do begin
710     narray.Add( Pointer( p( wordCount[ i ] ) ) );
711     end;
712    
713     narray.Sort( AbsSort );
714    
715     s := 1;
716     q := 1;
717     i := min( SAMPLE_COUNT, narray.Count );
718     while i > 0 do begin
719     Dec( i );
720 yoffy 1.9
721 yoffy 1.1 s := s * Single( narray[ i ] );
722     q := q * (1 - Single( narray[ i ] ));
723     end;
724    
725     Result := s / (s + q);
726     finally
727     narray.Free;
728     end;
729    
730     end;
731    
732     //==============================
733     // CalcGaryRobinson
734     //==============================
735     function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
736    
737     function p( const aWord : string ) : Single;
738     var
739     info : TWordInfo;
740     begin
741     info := Objects[ aWord ];
742     if info = nil then
743     Result := 0.415
744     else if info.ImportantWord = 0 then
745 yoffy 1.9 Result := 0.01
746 yoffy 1.1 else if info.NormalWord = 0 then
747 yoffy 1.9 Result := 0.99
748 yoffy 1.1 else
749     Result := ( info.ImportantWord / info.ImportantText ) /
750     ((info.NormalWord / info.NormalText ) +
751     (info.ImportantWord / info.ImportantText));
752     end;
753    
754     function f( cnt : Integer; n, mean : Single ) : Extended;
755     const
756     k = 0.00001;
757     begin
758     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
759     end;
760    
761     var
762     n : Extended;
763     narray : array of Single;
764     mean : Extended;
765     countInfo : TWordCountInfo;
766     i : Integer;
767 yoffy 1.9 P1, Q1, R1 : Extended;
768     cnt : Extended;
769     begin
770    
771     if wordCount.Count = 0 then begin
772     Result := 1;
773     Exit;
774     end;
775    
776     SetLength( narray, wordCount.Count );
777     mean := 0;
778     for i := 0 to wordCount.Count - 1 do begin
779     n := p( wordCount[ i ] );
780     narray[ i ] := n;
781     mean := mean + n;
782     end;
783     mean := mean / wordCount.Count;
784    
785 yoffy 1.10 P1 := 1;
786     Q1 := 1;
787 yoffy 1.9 for i := 0 to wordCount.Count - 1 do begin
788     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
789     n := f( countInfo.WordCount, narray[ i ], mean );
790 yoffy 1.10 P1 := P1 * ( 1 - n );
791     Q1 := Q1 * n;
792 yoffy 1.9 end;
793 yoffy 1.10 cnt := wordCount.Count;
794 yoffy 1.9 if cnt = 0 then
795 yoffy 1.10 cnt := 1
796     else
797     P1 := 1 - Power( P1, 1 / cnt );
798     Q1 := 1 - Power( Q1, 1 / cnt );
799 yoffy 1.9
800     if P1 + Q1 = 0 then begin
801     Result := 0.5
802     end else begin
803     n := (P1 - Q1) / (P1 + Q1);
804     Result := (1 + n) / 2;
805     end;
806    
807     end;
808    
809     //==============================
810     // CalcGaryRobinsonFisher
811     //==============================
812     function TGikoBayesian.CalcGaryRobinsonFisher(
813     wordCount : TWordCount
814     ) : Extended;
815    
816     function p( const aWord : string ) : Single;
817     var
818     info : TWordInfo;
819     begin
820     info := Objects[ aWord ];
821     if info = nil then
822     Result := 0.415
823     else if info.ImportantWord = 0 then
824     Result := 0.01
825     else if info.NormalWord = 0 then
826     Result := 0.99
827     else
828     Result := info.ImportantWord /
829     (info.ImportantWord + info.NormalWord *
830     info.ImportantText / info.NormalText);
831     end;
832    
833     function f( cnt : Integer; n, mean : Single ) : Extended;
834     const
835     k = 0.00001;
836     begin
837     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
838     end;
839    
840     function prbx( x2, degree : Extended ) : Extended;
841     var
842     m : Extended;
843     sum : Extended;
844     term : Extended;
845     i : extended;
846     begin
847    
848     m := x2 / 2;
849     sum := exp( -m );
850     term := -m;
851    
852     i := 1;
853     while i < (degree / 2 - 1) do begin
854     term := term + ln( m / i );
855     sum := sum + exp( term );
856     i := i + 1;
857     end;
858    
859     if sum < 1 then
860     Result := sum
861     else
862     Result := 1.0;
863    
864     end;
865    
866     var
867     n : Extended;
868     narray : array of Single;
869     mean : Extended;
870     countInfo : TWordCountInfo;
871     i : Integer;
872 yoffy 1.1 normal : Extended;
873     important : Extended;
874 yoffy 1.9 P1, Q1 : Extended;
875 yoffy 1.1 cnt : Extended;
876     begin
877    
878     if wordCount.Count = 0 then begin
879     Result := 1;
880     Exit;
881     end;
882    
883     SetLength( narray, wordCount.Count );
884     mean := 0;
885     for i := 0 to wordCount.Count - 1 do begin
886     n := p( wordCount[ i ] );
887     narray[ i ] := n;
888     mean := mean + n;
889     end;
890     mean := mean / wordCount.Count;
891    
892 yoffy 1.9 cnt := 0;
893     (*
894     P1 := 1;
895     Q1 := 1;
896     (*)
897     P1 := 0;
898     Q1 := 0;
899     //*
900 yoffy 1.1 for i := 0 to wordCount.Count - 1 do begin
901     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
902     n := f( countInfo.WordCount, narray[ i ], mean );
903     if countInfo <> nil then
904 yoffy 1.9 cnt := cnt + countInfo.WordCount;
905     (*
906     P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
907     Q1 := Q1 + Ln( n ) * countInfo.WordCount;
908     (*)
909     P1 := P1 + Ln( 1 - n );
910     Q1 := Q1 + Ln( n );
911     //*)
912 yoffy 1.1 end;
913     if cnt = 0 then
914     cnt := 1;
915 yoffy 1.9 //(*
916     P1 := prbx( -2 * P1, 2 * cnt );
917     Q1 := prbx( -2 * Q1, 2 * cnt );
918     (*)
919     P1 := prbx( -2 * Ln( P1 ), 2 * cnt );
920     Q1 := prbx( -2 * Ln( Q1 ), 2 * cnt );
921     //*)
922     if P1 + Q1 = 0 then begin
923     Result := 0.5
924     end else begin
925     Result := (1 + Q1 + P1) / 2;
926     end;
927 yoffy 1.1
928     end;
929    
930     //==============================
931     // Parse
932     //==============================
933     function TGikoBayesian.Parse(
934     const text : string;
935     wordCount : TWordCount;
936 yoffy 1.9 algorithm : TGikoBayesianAlgorithm
937 yoffy 1.1 ) : Extended;
938     begin
939    
940     CountWord( text, wordCount );
941     case algorithm of
942     gbaPaulGraham: Result := CalcPaulGraham( wordCount );
943 yoffy 1.9 gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
944     gbaGaryRobinsonFisher:
945     Result := CalcGaryRobinsonFisher( wordCount );
946 yoffy 1.1 else Result := 0;
947     end;
948    
949     end;
950    
951     //==============================
952     // Learn
953     //==============================
954     procedure TGikoBayesian.Learn(
955     wordCount : TWordCount;
956     isImportant : Boolean );
957     var
958     aWord : string;
959     wordinfo : TWordInfo;
960     countinfo : TWordCountInfo;
961 yoffy 1.3 i : Integer;
962 yoffy 1.1 begin
963    
964     for i := 0 to wordCount.Count - 1 do begin
965     aWord := wordCount[ i ];
966     wordinfo := Objects[ aWord ];
967 yoffy 1.3 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
968 yoffy 1.1 if wordinfo = nil then begin
969     wordinfo := TWordInfo.Create;
970     Objects[ aWord ] := wordinfo;
971     end;
972    
973     if isImportant then begin
974     wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
975     wordinfo.ImportantText := wordinfo.ImportantText + 1;
976     end else begin
977     wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
978     wordinfo.NormalText := wordinfo.NormalText + 1;
979     end;
980     end;
981    
982     end;
983    
984     //==============================
985     // Forget
986     //==============================
987     procedure TGikoBayesian.Forget(
988     wordCount : TWordCount;
989     isImportant : Boolean );
990     var
991     aWord : string;
992     wordinfo : TWordInfo;
993     countinfo : TWordCountInfo;
994     i : Integer;
995     begin
996    
997     for i := 0 to wordCount.Count - 1 do begin
998     aWord := wordCount[ i ];
999     wordinfo := Objects[ aWord ];
1000     if wordinfo = nil then
1001     Continue;
1002    
1003     countinfo := TWordCountInfo( wordCount.Objects[ i ] );
1004     if isImportant then begin
1005 yoffy 1.8 if wordInfo.ImportantText > 0 then begin
1006     wordinfo.ImportantText := wordinfo.ImportantText - 1;
1007     wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
1008     end;
1009 yoffy 1.1 end else begin
1010 yoffy 1.8 if wordinfo.NormalText > 0 then begin
1011     wordinfo.NormalText := wordinfo.NormalText - 1;
1012     wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
1013     end;
1014 yoffy 1.1 end;
1015     end;
1016    
1017     end;
1018    
1019     end.

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26