Develop and Download Open Source Software

Browse CVS Repository

Annotation of /gikonavigoeson/gikonavi/GikoBayesian.pas

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.10 - (hide annotations) (download) (as text)
Sun Oct 31 12:24:33 2004 UTC (19 years, 5 months ago) by yoffy
Branch: MAIN
Changes since 1.9: +10 -11 lines
File MIME type: text/x-pascal
- DEBUG を定義してビルドした場合に、
 スパムフィルタのアルゴリズムを選択できるようになった。
- スパムフィルタの Gary Robinson 法を教科書通りに実装してみた。
 ※Gary Robinson-Fisher 法は未だ機能せず。

1 yoffy 1.1 unit GikoBayesian;
2    
3     {!
4     \file GikoBayesian.pas
5     \brief ???ゃ?吾?≪?潟???c????/span>
6    
7 yoffy 1.10 $Id: GikoBayesian.pas,v 1.9 2004/10/27 00:10:12 yoffy Exp $
8 yoffy 1.1 }
9    
10     interface
11    
12     //==================================================
13     uses
14     //==================================================
15 yoffy 1.4 Classes;
16 yoffy 1.1
17     //==================================================
18     type
19     //==================================================
20    
21     {!***********************************************************
22     \brief ??茯???????????/span>
23     ************************************************************}
24     TWordInfo = class( TObject )
25     private
26     FNormalWord : Integer; //!< ??絽吾????茯????????糸?眼????????/span>
27     FImportantWord : Integer; //!< 羈?????茯????????糸?眼????????/span>
28     FNormalText : Integer; //!< ??絽吾????茯??????????障??????????腴?????/span>
29     FImportantText : Integer; //!< 羈?????茯??????????障??????????腴?????/span>
30    
31     public
32     property NormalWord : Integer read FNormalWord write FNormalWord;
33     property ImportantWord : Integer read FImportantWord write FImportantWord;
34     property NormalText : Integer read FNormalText write FNormalText;
35     property ImportantText : Integer read FImportantText write FImportantText;
36     end;
37    
38     {!***********************************************************
39     \brief 茹f??羝??水??茯???????????/span>
40     ************************************************************}
41     TWordCountInfo = class( TObject )
42     private
43     FWordCount : Integer; //!< ??茯???/span>
44    
45     public
46     property WordCount : Integer read FWordCount write FWordCount;
47     end;
48    
49     {!***********************************************************
50     \brief 茹f??羝??水??茯????鴻??
51     ************************************************************}
52     // TWordCount = class( THashedStringList ) // 羶???
53 yoffy 1.3 TWordCount = class( TStringList )
54 yoffy 1.1 public
55 yoffy 1.3 constructor Create;
56 yoffy 1.1 destructor Destroy; override;
57     end;
58    
59     {!***********************************************************
60     \brief ???c???帥?≪???眼???冴??
61     ************************************************************}
62     TGikoBayesianAlgorithm =
63 yoffy 1.9 (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
64 yoffy 1.1
65     {!***********************************************************
66     \brief ???ゃ?吾?≪?潟???c????/span>
67     ************************************************************}
68 yoffy 1.3 // TGikoBayesian = class( THashedStringList ) // 羶???
69     TGikoBayesian = class( TStringList )
70 yoffy 1.1 private
71     FFilePath : string; //!< 茯??粋昭???????<?ゃ??????/span>
72     function GetObject( const name : string ) : TWordInfo;
73     procedure SetObject( const name : string; value : TWordInfo );
74    
75     public
76     constructor Create;
77     destructor Destroy; override;
78    
79     //! ???<?ゃ??????絖??絮ユ???茯??水?冴???障??
80     procedure LoadFromFile( const filePath : string );
81    
82     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
83     procedure SaveToFile( const filePath : string );
84    
85     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
86     procedure Save;
87    
88     //! ??茯???????????宴????緇????障??
89     property Objects[ const name : string ] : TWordInfo
90     read GetObject write SetObject; default;
91    
92     //! ??腴??????障??????茯????????潟?????障??
93     procedure CountWord(
94     const text : string;
95     wordCount : TWordCount );
96    
97     {!
98     \brief Paul Graham 羈????冴?ャ??????腴???絵??墾??羆阪????障??
99     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
100     }
101     function CalcPaulGraham( wordCount : TWordCount ) : Extended;
102    
103     {!
104     \brief GaryRobinson 羈????冴?ャ??????腴???絵??墾??羆阪????障??
105     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
106     }
107     function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
108    
109 yoffy 1.9 {!
110     \brief GaryRobinson-Fisher 羈????冴?ャ??????腴???絵??墾??羆阪????障??
111     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
112     }
113     function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
114 yoffy 1.1
115     {!
116     \brief ??腴???茹f??
117     \param text 茹f????????腴?
118     \param wordCount 茹f??????????茯????鴻????菴???
119     \param algorithm 羈???墾??浦絎??????????≪???眼???冴??????絎????障??
120     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
121    
122     CountWord ? Calcxxxxx ???障???????茵??????????с????
123     }
124     function Parse(
125     const text : string;
126     wordCount : TWordCount;
127 yoffy 1.9 algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
128 yoffy 1.1 ) : Extended;
129    
130     {!
131     \brief 絖??????
132     \param wordCount Parse ?цВ??????????茯????鴻??
133     \param isImportant 羈??????鴻????腴???????????????? True
134     }
135     procedure Learn(
136     wordCount : TWordCount;
137     isImportant : Boolean );
138    
139     {!
140     \brief 絖??腟?????綽?????
141     \param wordCount Parse ?цВ??????????茯????鴻??
142     \param isImportant 羈??????鴻????腴???????????????????????? True
143     \warning 絖??羝??帥????腴???????????∈茯??堺?ャ?障??????<br>
144     Learn ????????????腴??? isImportant ???????c????????腴???
145     Forget ?????????若?帥???若?鴻???贋?????障????<br>
146     絖??羝??帥????????????????????????????????
147    
148     ???????膺?腟??????????≪????????с???????障??????<br>
149     wordCount ??緇?????腴? (Parse ? text 綣??? ???膺?腟??????帥?????≪???障????<br><br>
150    
151     筝祉??絵????腴?????羈?????腴????????帥????????? Forget -> Learn ?????т戎?????障????
152     }
153     procedure Forget(
154     wordCount : TWordCount;
155     isImportant : Boolean );
156     end;
157    
158     //==================================================
159     implementation
160     //==================================================
161    
162     uses
163 yoffy 1.4 SysUtils, Math, Windows;
164 yoffy 1.1
165     const
166     GIKO_BAYESIAN_FILE_VERSION = '1.0';
167 yoffy 1.3 {
168     Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
169     ModeWGraph, ModeWAlpha, ModeWNum,
170     ModeWHira, ModeWKata, ModeWKanji);
171     }
172     CharMode1 : array [ 0..255 ] of Byte =
173     (
174     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177     2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
178     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
179     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
180     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
181     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
182    
183     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
185     0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
186     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
187     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
188 yoffy 1.6 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
189 yoffy 1.3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
191     );
192 yoffy 1.1
193     //************************************************************
194     // misc
195     //************************************************************
196    
197     //==============================
198     // RemoveToken
199     //==============================
200     function RemoveToken(var s: string;const delimiter: string): string;
201     var
202     p: Integer;
203     begin
204     p := AnsiPos(delimiter, s);
205     if p = 0 then
206     Result := s
207     else
208     Result := Copy(s, 1, p - 1);
209     s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
210     end;
211    
212     //==============================
213     // AbsSort
214     //==============================
215     function AbsSort( p1, p2 : Pointer ) : Integer;
216     var
217     v1, v2 : Single;
218     begin
219    
220     v1 := Abs( Single( p1 ) - 0.5 );
221     v2 := Abs( Single( p2 ) - 0.5 );
222     if v1 > v2 then
223     Result := -1
224     else if v1 = v2 then
225     Result := 0
226     else
227     Result := 1;
228    
229     end;
230    
231     //************************************************************
232     // TWordCount class
233     //************************************************************
234 yoffy 1.3 constructor TWordCount.Create;
235     begin
236    
237     Duplicates := dupIgnore;
238     CaseSensitive := True;
239     Sorted := True;
240    
241     end;
242    
243 yoffy 1.1 destructor TWordCount.Destroy;
244     var
245     i : Integer;
246     begin
247    
248     for i := Count - 1 downto 0 do
249     if Objects[ i ] <> nil then
250     Objects[ i ].Free;
251    
252     inherited;
253    
254     end;
255    
256     //************************************************************
257     // TGikoBayesian class
258     //************************************************************
259    
260     //==============================
261     // Create
262     //==============================
263     constructor TGikoBayesian.Create;
264     begin
265    
266 yoffy 1.3 Duplicates := dupIgnore;
267     CaseSensitive := True;
268     Sorted := True;
269 yoffy 1.1
270     end;
271    
272     //==============================
273     // Destroy
274     //==============================
275     destructor TGikoBayesian.Destroy;
276     var
277     i : Integer;
278     begin
279    
280     for i := Count - 1 downto 0 do
281     if inherited Objects[ i ] <> nil then
282     inherited Objects[ i ].Free;
283    
284     inherited;
285    
286     end;
287    
288     procedure TGikoBayesian.LoadFromFile( const filePath : string );
289     var
290     i : Integer;
291     sl : TStringList;
292     s : string;
293     name : string;
294     info : TWordInfo;
295     begin
296    
297 yoffy 1.2 FFilePath := filePath;
298    
299 yoffy 1.1 if not FileExists( filePath ) then
300     Exit;
301 yoffy 1.2
302 yoffy 1.1 sl := TStringList.Create;
303     try
304     sl.LoadFromFile( filePath );
305    
306     for i := 1 to sl.Count - 1 do begin
307     s := sl[ i ];
308     name := RemoveToken( s, #1 );
309     info := TWordInfo.Create;
310     info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
311     info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
312     info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
313     info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
314    
315     AddObject( name, info );
316     end;
317     finally
318     sl.Free;
319     end;
320    
321     end;
322    
323     procedure TGikoBayesian.SaveToFile( const filePath : string );
324     var
325     i : Integer;
326     sl : TStringList;
327     s : string;
328     info : TWordInfo;
329     begin
330    
331 yoffy 1.2 FFilePath := filePath;
332    
333 yoffy 1.1 sl := TStringList.Create;
334     try
335     sl.BeginUpdate;
336     sl.Add( GIKO_BAYESIAN_FILE_VERSION );
337    
338     for i := 0 to Count - 1 do begin
339     info := TWordInfo( inherited Objects[ i ] );
340     s := Strings[ i ] + #1
341     + Format('%x', [info.NormalWord]) + #1
342     + Format('%x', [info.ImportantWord]) + #1
343     + Format('%x', [info.NormalText]) + #1
344     + Format('%x', [info.ImportantText]);
345    
346     sl.Add(s);
347     end;
348     sl.EndUpdate;
349     sl.SaveToFile( filePath );
350     finally
351     sl.Free;
352     end;
353    
354     end;
355    
356     procedure TGikoBayesian.Save;
357     begin
358    
359     if FFilePath <> '' then
360     SaveToFile( FFilePath );
361    
362     end;
363    
364     //==============================
365     // GetObject
366     //==============================
367     function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
368     var
369     idx : Integer;
370     begin
371    
372 yoffy 1.3 idx := IndexOf( name ); // 羶???
373 yoffy 1.1 if idx < 0 then
374     Result := nil
375     else
376     Result := TWordInfo( inherited Objects[ idx ] );
377    
378     end;
379    
380     //==============================
381     // SetObject
382     //==============================
383     procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
384     var
385     idx : Integer;
386     begin
387    
388     idx := IndexOf( name );
389     if idx < 0 then
390     AddObject( name, value )
391     else
392     inherited Objects[ idx ] := value;
393    
394     end;
395    
396    
397     //==============================
398     // CountWord
399     //==============================
400     procedure TGikoBayesian.CountWord(
401     const text : string;
402     wordCount : TWordCount );
403     type
404 yoffy 1.3 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
405 yoffy 1.1 ModeWGraph, ModeWAlpha, ModeWNum,
406     ModeWHira, ModeWKata, ModeWKanji);
407     var
408     p, tail, last : PChar;
409     mode, newMode : Modes;
410     aWord : string;
411     ch : Longword;
412     chSize : Integer;
413     delimiter : TStringList;
414     delimited : Boolean;
415     i, idx : Integer;
416     countInfo : TWordCountInfo;
417     const
418     KAKUJOSI = '??' + #10 + '??#39; + #10 + '??' + #10 + '??#39; + #10 + '????' +
419     #10 + '??#39; + #10 + '??#39; + #10 + '????' + #10 + '?障??#39;;
420 yoffy 1.5 kKanji = [$80..$A0, $E0..$ff];
421 yoffy 1.1 begin
422    
423     delimiter := TStringList.Create;
424     try
425     mode := ModeWhite;
426     delimiter.Text := KAKUJOSI;
427     p := PChar( text );
428     tail := p + Length( text );
429     last := p;
430    
431     while p < tail do begin
432     delimited := False;
433     // ??絖????帥?ゃ?????ゅ??/span>
434     // ?糸???鴻? ModeGraph ?????????у???ャ???綽??????????????
435 yoffy 1.5 // if Byte(Byte( p^ ) - $a1) < $5e then begin
436     if Byte( p^ ) in kKanji then begin
437 yoffy 1.1 if p + 1 < tail then begin
438     ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
439     case ch of
440     $8140: newMode := ModeWhite;
441     $8141..$824e: newMode := ModeWGraph;
442     $824f..$8258: newMode := ModeWNum;
443     $8260..$829a: newMode := ModeWAlpha;
444     $829f..$82f1: newMode := ModeWHira;
445     $8340..$8396: newMode := ModeWKata;
446     else newMode := ModeWKanji;
447     end;
448 yoffy 1.6 // '??????#39; ??抗篁??????障???????帥?????????障????
449     if (mode = ModeWHira) or (mode = ModeWKata) then
450 yoffy 1.7 if (ch = $814a) or (ch = $814b) or (ch = $815b) then
451 yoffy 1.6 newMode := mode;
452 yoffy 1.1 end else begin
453     newMode := ModeWhite;
454     end;
455    
456     chSize := 2;
457    
458     // ?阪????????????絖?????????罎??祉????
459     if p + 3 < tail then begin // 3 = delimiter ????紊у?? - 1
460     for i := 0 to delimiter.Count - 1 do begin
461     if CompareMem(
462     p, PChar( delimiter[ i ] ), Length( delimiter[ i ] ) ) then begin
463     delimited := True;
464     chSize := Length( delimiter[ i ] );
465     Break;
466     end;
467     end;
468     end;
469     end else begin
470 yoffy 1.3 newMode := Modes( CharMode1[ Byte( p^ ) ] );
471 yoffy 1.1
472     chSize := 1;
473     end;
474    
475     if (mode <> newMode) or delimited then begin
476    
477     // ??絖????帥?ゃ????紊??眼??????
478     // ????????阪????????????絖???????????
479     if mode <> ModeWhite then begin
480 yoffy 1.3 SetLength( aWord, p - last );
481     CopyMemory( PChar( aWord ), last, p - last );
482 yoffy 1.5 //aWord := Copy( last, 0, p - last );
483 yoffy 1.3 idx := wordCount.IndexOf( aWord ); // ??
484 yoffy 1.1 if idx < 0 then begin
485     countInfo := TWordCountInfo.Create;
486     wordCount.AddObject( aWord, countInfo );
487     end else begin
488     countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
489     end;
490     countInfo.WordCount := countInfo.WordCount + 1;
491     end;
492    
493     last := p;
494     mode := newMode;
495    
496     end;
497    
498     p := p + chSize;
499     end; // while
500    
501     if mode <> ModeWhite then begin
502     aWord := Copy( last, 0, p - last );
503     idx := wordCount.IndexOf( aWord );
504     if idx < 0 then begin
505     countInfo := TWordCountInfo.Create;
506     wordCount.AddObject( aWord, countInfo );
507     end else begin
508     countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
509     end;
510     countInfo.WordCount := countInfo.WordCount + 1;
511     end;
512     finally
513     delimiter.Free;
514     end;
515    
516     end;
517    
518     //==============================
519     // CalcPaulGraham
520     //==============================
521     function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
522    
523     function p( const aWord : string ) : Single;
524     var
525     info : TWordInfo;
526     begin
527     info := Objects[ aWord ];
528     if info = nil then
529 yoffy 1.9 Result := 0.415
530 yoffy 1.1 else if info.NormalWord = 0 then
531     Result := 0.99
532     else if info.ImportantWord = 0 then
533     Result := 0.01
534 yoffy 1.9 else if info.ImportantWord + info.NormalWord * 2 < 5 then
535     Result := 0.5
536 yoffy 1.1 else
537     Result := ( info.ImportantWord / info.ImportantText ) /
538     ((info.NormalWord * 2 / info.NormalText ) +
539     (info.ImportantWord / info.ImportantText));
540     end;
541    
542     var
543     s, q : Extended;
544     i : Integer;
545     narray : TList;
546     const
547     SAMPLE_COUNT = 15;
548     begin
549    
550     Result := 1;
551     if wordCount.Count = 0 then
552     Exit;
553    
554     narray := TList.Create;
555     try
556     for i := 0 to wordCount.Count - 1 do begin
557     narray.Add( Pointer( p( wordCount[ i ] ) ) );
558     end;
559    
560     narray.Sort( AbsSort );
561    
562     s := 1;
563     q := 1;
564     i := min( SAMPLE_COUNT, narray.Count );
565     while i > 0 do begin
566     Dec( i );
567 yoffy 1.9
568 yoffy 1.1 s := s * Single( narray[ i ] );
569     q := q * (1 - Single( narray[ i ] ));
570     end;
571    
572     Result := s / (s + q);
573     finally
574     narray.Free;
575     end;
576    
577     end;
578    
579     //==============================
580     // CalcGaryRobinson
581     //==============================
582     function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
583    
584     function p( const aWord : string ) : Single;
585     var
586     info : TWordInfo;
587     begin
588     info := Objects[ aWord ];
589     if info = nil then
590     Result := 0.415
591     else if info.ImportantWord = 0 then
592 yoffy 1.9 Result := 0.01
593 yoffy 1.1 else if info.NormalWord = 0 then
594 yoffy 1.9 Result := 0.99
595 yoffy 1.1 else
596     Result := ( info.ImportantWord / info.ImportantText ) /
597     ((info.NormalWord / info.NormalText ) +
598     (info.ImportantWord / info.ImportantText));
599     end;
600    
601     function f( cnt : Integer; n, mean : Single ) : Extended;
602     const
603     k = 0.00001;
604     begin
605     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
606     end;
607    
608     var
609     n : Extended;
610     narray : array of Single;
611     mean : Extended;
612     countInfo : TWordCountInfo;
613     i : Integer;
614 yoffy 1.9 P1, Q1, R1 : Extended;
615     cnt : Extended;
616     begin
617    
618     if wordCount.Count = 0 then begin
619     Result := 1;
620     Exit;
621     end;
622    
623     SetLength( narray, wordCount.Count );
624     mean := 0;
625     for i := 0 to wordCount.Count - 1 do begin
626     n := p( wordCount[ i ] );
627     narray[ i ] := n;
628     mean := mean + n;
629     end;
630     mean := mean / wordCount.Count;
631    
632 yoffy 1.10 P1 := 1;
633     Q1 := 1;
634 yoffy 1.9 for i := 0 to wordCount.Count - 1 do begin
635     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
636     n := f( countInfo.WordCount, narray[ i ], mean );
637 yoffy 1.10 P1 := P1 * ( 1 - n );
638     Q1 := Q1 * n;
639 yoffy 1.9 end;
640 yoffy 1.10 cnt := wordCount.Count;
641 yoffy 1.9 if cnt = 0 then
642 yoffy 1.10 cnt := 1
643     else
644     P1 := 1 - Power( P1, 1 / cnt );
645     Q1 := 1 - Power( Q1, 1 / cnt );
646 yoffy 1.9
647     if P1 + Q1 = 0 then begin
648     Result := 0.5
649     end else begin
650     n := (P1 - Q1) / (P1 + Q1);
651     Result := (1 + n) / 2;
652     end;
653    
654     end;
655    
656     //==============================
657     // CalcGaryRobinsonFisher
658     //==============================
659     function TGikoBayesian.CalcGaryRobinsonFisher(
660     wordCount : TWordCount
661     ) : Extended;
662    
663     function p( const aWord : string ) : Single;
664     var
665     info : TWordInfo;
666     begin
667     info := Objects[ aWord ];
668     if info = nil then
669     Result := 0.415
670     else if info.ImportantWord = 0 then
671     Result := 0.01
672     else if info.NormalWord = 0 then
673     Result := 0.99
674     else
675     Result := info.ImportantWord /
676     (info.ImportantWord + info.NormalWord *
677     info.ImportantText / info.NormalText);
678     end;
679    
680     function f( cnt : Integer; n, mean : Single ) : Extended;
681     const
682     k = 0.00001;
683     begin
684     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
685     end;
686    
687     function prbx( x2, degree : Extended ) : Extended;
688     var
689     m : Extended;
690     sum : Extended;
691     term : Extended;
692     i : extended;
693     begin
694    
695     m := x2 / 2;
696     sum := exp( -m );
697     term := -m;
698    
699     i := 1;
700     while i < (degree / 2 - 1) do begin
701     term := term + ln( m / i );
702     sum := sum + exp( term );
703     i := i + 1;
704     end;
705    
706     if sum < 1 then
707     Result := sum
708     else
709     Result := 1.0;
710    
711     end;
712    
713     var
714     n : Extended;
715     narray : array of Single;
716     mean : Extended;
717     countInfo : TWordCountInfo;
718     i : Integer;
719 yoffy 1.1 normal : Extended;
720     important : Extended;
721 yoffy 1.9 P1, Q1 : Extended;
722 yoffy 1.1 cnt : Extended;
723     begin
724    
725     if wordCount.Count = 0 then begin
726     Result := 1;
727     Exit;
728     end;
729    
730     SetLength( narray, wordCount.Count );
731     mean := 0;
732     for i := 0 to wordCount.Count - 1 do begin
733     n := p( wordCount[ i ] );
734     narray[ i ] := n;
735     mean := mean + n;
736     end;
737     mean := mean / wordCount.Count;
738    
739 yoffy 1.9 cnt := 0;
740     (*
741     P1 := 1;
742     Q1 := 1;
743     (*)
744     P1 := 0;
745     Q1 := 0;
746     //*
747 yoffy 1.1 for i := 0 to wordCount.Count - 1 do begin
748     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
749     n := f( countInfo.WordCount, narray[ i ], mean );
750     if countInfo <> nil then
751 yoffy 1.9 cnt := cnt + countInfo.WordCount;
752     (*
753     P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
754     Q1 := Q1 + Ln( n ) * countInfo.WordCount;
755     (*)
756     P1 := P1 + Ln( 1 - n );
757     Q1 := Q1 + Ln( n );
758     //*)
759 yoffy 1.1 end;
760     if cnt = 0 then
761     cnt := 1;
762 yoffy 1.9 //(*
763     P1 := prbx( -2 * P1, 2 * cnt );
764     Q1 := prbx( -2 * Q1, 2 * cnt );
765     (*)
766     P1 := prbx( -2 * Ln( P1 ), 2 * cnt );
767     Q1 := prbx( -2 * Ln( Q1 ), 2 * cnt );
768     //*)
769     if P1 + Q1 = 0 then begin
770     Result := 0.5
771     end else begin
772     Result := (1 + Q1 + P1) / 2;
773     end;
774 yoffy 1.1
775     end;
776    
777     //==============================
778     // Parse
779     //==============================
780     function TGikoBayesian.Parse(
781     const text : string;
782     wordCount : TWordCount;
783 yoffy 1.9 algorithm : TGikoBayesianAlgorithm
784 yoffy 1.1 ) : Extended;
785     begin
786    
787     CountWord( text, wordCount );
788     case algorithm of
789     gbaPaulGraham: Result := CalcPaulGraham( wordCount );
790 yoffy 1.9 gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
791     gbaGaryRobinsonFisher:
792     Result := CalcGaryRobinsonFisher( wordCount );
793 yoffy 1.1 else Result := 0;
794     end;
795    
796     end;
797    
798     //==============================
799     // Learn
800     //==============================
801     procedure TGikoBayesian.Learn(
802     wordCount : TWordCount;
803     isImportant : Boolean );
804     var
805     aWord : string;
806     wordinfo : TWordInfo;
807     countinfo : TWordCountInfo;
808 yoffy 1.3 i : Integer;
809 yoffy 1.1 begin
810    
811     for i := 0 to wordCount.Count - 1 do begin
812     aWord := wordCount[ i ];
813     wordinfo := Objects[ aWord ];
814 yoffy 1.3 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
815 yoffy 1.1 if wordinfo = nil then begin
816     wordinfo := TWordInfo.Create;
817     Objects[ aWord ] := wordinfo;
818     end;
819    
820     if isImportant then begin
821     wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
822     wordinfo.ImportantText := wordinfo.ImportantText + 1;
823     end else begin
824     wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
825     wordinfo.NormalText := wordinfo.NormalText + 1;
826     end;
827     end;
828    
829     end;
830    
831     //==============================
832     // Forget
833     //==============================
834     procedure TGikoBayesian.Forget(
835     wordCount : TWordCount;
836     isImportant : Boolean );
837     var
838     aWord : string;
839     wordinfo : TWordInfo;
840     countinfo : TWordCountInfo;
841     i : Integer;
842     begin
843    
844     for i := 0 to wordCount.Count - 1 do begin
845     aWord := wordCount[ i ];
846     wordinfo := Objects[ aWord ];
847     if wordinfo = nil then
848     Continue;
849    
850     countinfo := TWordCountInfo( wordCount.Objects[ i ] );
851     if isImportant then begin
852 yoffy 1.8 if wordInfo.ImportantText > 0 then begin
853     wordinfo.ImportantText := wordinfo.ImportantText - 1;
854     wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
855     end;
856 yoffy 1.1 end else begin
857 yoffy 1.8 if wordinfo.NormalText > 0 then begin
858     wordinfo.NormalText := wordinfo.NormalText - 1;
859     wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
860     end;
861 yoffy 1.1 end;
862     end;
863    
864     end;
865    
866     end.

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26