Develop and Download Open Source Software

Browse CVS Repository

Annotation of /gikonavigoeson/gikonavi/GikoBayesian.pas

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.9 - (hide annotations) (download) (as text)
Wed Oct 27 00:10:12 2004 UTC (19 years, 5 months ago) by yoffy
Branch: MAIN
Changes since 1.8: +153 -20 lines
File MIME type: text/x-pascal
スパムフィルタの ON/OFF オプションを追加。

1 yoffy 1.1 unit GikoBayesian;
2    
3     {!
4     \file GikoBayesian.pas
5     \brief ???ゃ?吾?≪?潟???c????/span>
6    
7 yoffy 1.9 $Id: GikoBayesian.pas,v 1.8 2004/10/21 05:59:39 yoffy Exp $
8 yoffy 1.1 }
9    
10     interface
11    
12     //==================================================
13     uses
14     //==================================================
15 yoffy 1.4 Classes;
16 yoffy 1.1
17     //==================================================
18     type
19     //==================================================
20    
21     {!***********************************************************
22     \brief ??茯???????????/span>
23     ************************************************************}
24     TWordInfo = class( TObject )
25     private
26     FNormalWord : Integer; //!< ??絽吾????茯????????糸?眼????????/span>
27     FImportantWord : Integer; //!< 羈?????茯????????糸?眼????????/span>
28     FNormalText : Integer; //!< ??絽吾????茯??????????障??????????腴?????/span>
29     FImportantText : Integer; //!< 羈?????茯??????????障??????????腴?????/span>
30    
31     public
32     property NormalWord : Integer read FNormalWord write FNormalWord;
33     property ImportantWord : Integer read FImportantWord write FImportantWord;
34     property NormalText : Integer read FNormalText write FNormalText;
35     property ImportantText : Integer read FImportantText write FImportantText;
36     end;
37    
38     {!***********************************************************
39     \brief 茹f??羝??水??茯???????????/span>
40     ************************************************************}
41     TWordCountInfo = class( TObject )
42     private
43     FWordCount : Integer; //!< ??茯???/span>
44    
45     public
46     property WordCount : Integer read FWordCount write FWordCount;
47     end;
48    
49     {!***********************************************************
50     \brief 茹f??羝??水??茯????鴻??
51     ************************************************************}
52     // TWordCount = class( THashedStringList ) // 羶???
53 yoffy 1.3 TWordCount = class( TStringList )
54 yoffy 1.1 public
55 yoffy 1.3 constructor Create;
56 yoffy 1.1 destructor Destroy; override;
57     end;
58    
59     {!***********************************************************
60     \brief ???c???帥?≪???眼???冴??
61     ************************************************************}
62     TGikoBayesianAlgorithm =
63 yoffy 1.9 (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
64 yoffy 1.1
65     {!***********************************************************
66     \brief ???ゃ?吾?≪?潟???c????/span>
67     ************************************************************}
68 yoffy 1.3 // TGikoBayesian = class( THashedStringList ) // 羶???
69     TGikoBayesian = class( TStringList )
70 yoffy 1.1 private
71     FFilePath : string; //!< 茯??粋昭???????<?ゃ??????/span>
72     function GetObject( const name : string ) : TWordInfo;
73     procedure SetObject( const name : string; value : TWordInfo );
74    
75     public
76     constructor Create;
77     destructor Destroy; override;
78    
79     //! ???<?ゃ??????絖??絮ユ???茯??水?冴???障??
80     procedure LoadFromFile( const filePath : string );
81    
82     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
83     procedure SaveToFile( const filePath : string );
84    
85     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
86     procedure Save;
87    
88     //! ??茯???????????宴????緇????障??
89     property Objects[ const name : string ] : TWordInfo
90     read GetObject write SetObject; default;
91    
92     //! ??腴??????障??????茯????????潟?????障??
93     procedure CountWord(
94     const text : string;
95     wordCount : TWordCount );
96    
97     {!
98     \brief Paul Graham 羈????冴?ャ??????腴???絵??墾??羆阪????障??
99     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
100     }
101     function CalcPaulGraham( wordCount : TWordCount ) : Extended;
102    
103     {!
104     \brief GaryRobinson 羈????冴?ャ??????腴???絵??墾??羆阪????障??
105     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
106     }
107     function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
108    
109 yoffy 1.9 {!
110     \brief GaryRobinson-Fisher 羈????冴?ャ??????腴???絵??墾??羆阪????障??
111     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
112     }
113     function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
114 yoffy 1.1
115     {!
116     \brief ??腴???茹f??
117     \param text 茹f????????腴?
118     \param wordCount 茹f??????????茯????鴻????菴???
119     \param algorithm 羈???墾??浦絎??????????≪???眼???冴??????絎????障??
120     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
121    
122     CountWord ? Calcxxxxx ???障???????茵??????????с????
123     }
124     function Parse(
125     const text : string;
126     wordCount : TWordCount;
127 yoffy 1.9 algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
128 yoffy 1.1 ) : Extended;
129    
130     {!
131     \brief 絖??????
132     \param wordCount Parse ?цВ??????????茯????鴻??
133     \param isImportant 羈??????鴻????腴???????????????? True
134     }
135     procedure Learn(
136     wordCount : TWordCount;
137     isImportant : Boolean );
138    
139     {!
140     \brief 絖??腟?????綽?????
141     \param wordCount Parse ?цВ??????????茯????鴻??
142     \param isImportant 羈??????鴻????腴???????????????????????? True
143     \warning 絖??羝??帥????腴???????????∈茯??堺?ャ?障??????<br>
144     Learn ????????????腴??? isImportant ???????c????????腴???
145     Forget ?????????若?帥???若?鴻???贋?????障????<br>
146     絖??羝??帥????????????????????????????????
147    
148     ???????膺?腟??????????≪????????с???????障??????<br>
149     wordCount ??緇?????腴? (Parse ? text 綣??? ???膺?腟??????帥?????≪???障????<br><br>
150    
151     筝祉??絵????腴?????羈?????腴????????帥????????? Forget -> Learn ?????т戎?????障????
152     }
153     procedure Forget(
154     wordCount : TWordCount;
155     isImportant : Boolean );
156     end;
157    
158     //==================================================
159     implementation
160     //==================================================
161    
162     uses
163 yoffy 1.4 SysUtils, Math, Windows;
164 yoffy 1.1
165     const
166     GIKO_BAYESIAN_FILE_VERSION = '1.0';
167 yoffy 1.3 {
168     Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
169     ModeWGraph, ModeWAlpha, ModeWNum,
170     ModeWHira, ModeWKata, ModeWKanji);
171     }
172     CharMode1 : array [ 0..255 ] of Byte =
173     (
174     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177     2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
178     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
179     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
180     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
181     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
182    
183     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
185     0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
186     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
187     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
188 yoffy 1.6 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
189 yoffy 1.3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
191     );
192 yoffy 1.1
193     //************************************************************
194     // misc
195     //************************************************************
196    
197     //==============================
198     // RemoveToken
199     //==============================
200     function RemoveToken(var s: string;const delimiter: string): string;
201     var
202     p: Integer;
203     begin
204     p := AnsiPos(delimiter, s);
205     if p = 0 then
206     Result := s
207     else
208     Result := Copy(s, 1, p - 1);
209     s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
210     end;
211    
212     //==============================
213     // AbsSort
214     //==============================
215     function AbsSort( p1, p2 : Pointer ) : Integer;
216     var
217     v1, v2 : Single;
218     begin
219    
220     v1 := Abs( Single( p1 ) - 0.5 );
221     v2 := Abs( Single( p2 ) - 0.5 );
222     if v1 > v2 then
223     Result := -1
224     else if v1 = v2 then
225     Result := 0
226     else
227     Result := 1;
228    
229     end;
230    
231     //************************************************************
232     // TWordCount class
233     //************************************************************
234 yoffy 1.3 constructor TWordCount.Create;
235     begin
236    
237     Duplicates := dupIgnore;
238     CaseSensitive := True;
239     Sorted := True;
240    
241     end;
242    
243 yoffy 1.1 destructor TWordCount.Destroy;
244     var
245     i : Integer;
246     begin
247    
248     for i := Count - 1 downto 0 do
249     if Objects[ i ] <> nil then
250     Objects[ i ].Free;
251    
252     inherited;
253    
254     end;
255    
256     //************************************************************
257     // TGikoBayesian class
258     //************************************************************
259    
260     //==============================
261     // Create
262     //==============================
263     constructor TGikoBayesian.Create;
264     begin
265    
266 yoffy 1.3 Duplicates := dupIgnore;
267     CaseSensitive := True;
268     Sorted := True;
269 yoffy 1.1
270     end;
271    
272     //==============================
273     // Destroy
274     //==============================
275     destructor TGikoBayesian.Destroy;
276     var
277     i : Integer;
278     begin
279    
280     for i := Count - 1 downto 0 do
281     if inherited Objects[ i ] <> nil then
282     inherited Objects[ i ].Free;
283    
284     inherited;
285    
286     end;
287    
288     procedure TGikoBayesian.LoadFromFile( const filePath : string );
289     var
290     i : Integer;
291     sl : TStringList;
292     s : string;
293     name : string;
294     info : TWordInfo;
295     begin
296    
297 yoffy 1.2 FFilePath := filePath;
298    
299 yoffy 1.1 if not FileExists( filePath ) then
300     Exit;
301 yoffy 1.2
302 yoffy 1.1 sl := TStringList.Create;
303     try
304     sl.LoadFromFile( filePath );
305    
306     for i := 1 to sl.Count - 1 do begin
307     s := sl[ i ];
308     name := RemoveToken( s, #1 );
309     info := TWordInfo.Create;
310     info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
311     info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
312     info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
313     info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
314    
315     AddObject( name, info );
316     end;
317     finally
318     sl.Free;
319     end;
320    
321     end;
322    
323     procedure TGikoBayesian.SaveToFile( const filePath : string );
324     var
325     i : Integer;
326     sl : TStringList;
327     s : string;
328     info : TWordInfo;
329     begin
330    
331 yoffy 1.2 FFilePath := filePath;
332    
333 yoffy 1.1 sl := TStringList.Create;
334     try
335     sl.BeginUpdate;
336     sl.Add( GIKO_BAYESIAN_FILE_VERSION );
337    
338     for i := 0 to Count - 1 do begin
339     info := TWordInfo( inherited Objects[ i ] );
340     s := Strings[ i ] + #1
341     + Format('%x', [info.NormalWord]) + #1
342     + Format('%x', [info.ImportantWord]) + #1
343     + Format('%x', [info.NormalText]) + #1
344     + Format('%x', [info.ImportantText]);
345    
346     sl.Add(s);
347     end;
348     sl.EndUpdate;
349     sl.SaveToFile( filePath );
350     finally
351     sl.Free;
352     end;
353    
354     end;
355    
356     procedure TGikoBayesian.Save;
357     begin
358    
359     if FFilePath <> '' then
360     SaveToFile( FFilePath );
361    
362     end;
363    
364     //==============================
365     // GetObject
366     //==============================
367     function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
368     var
369     idx : Integer;
370     begin
371    
372 yoffy 1.3 idx := IndexOf( name ); // 羶???
373 yoffy 1.1 if idx < 0 then
374     Result := nil
375     else
376     Result := TWordInfo( inherited Objects[ idx ] );
377    
378     end;
379    
380     //==============================
381     // SetObject
382     //==============================
383     procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
384     var
385     idx : Integer;
386     begin
387    
388     idx := IndexOf( name );
389     if idx < 0 then
390     AddObject( name, value )
391     else
392     inherited Objects[ idx ] := value;
393    
394     end;
395    
396    
397     //==============================
398     // CountWord
399     //==============================
400     procedure TGikoBayesian.CountWord(
401     const text : string;
402     wordCount : TWordCount );
403     type
404 yoffy 1.3 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
405 yoffy 1.1 ModeWGraph, ModeWAlpha, ModeWNum,
406     ModeWHira, ModeWKata, ModeWKanji);
407     var
408     p, tail, last : PChar;
409     mode, newMode : Modes;
410     aWord : string;
411     ch : Longword;
412     chSize : Integer;
413     delimiter : TStringList;
414     delimited : Boolean;
415     i, idx : Integer;
416     countInfo : TWordCountInfo;
417     const
418     KAKUJOSI = '??' + #10 + '??#39; + #10 + '??' + #10 + '??#39; + #10 + '????' +
419     #10 + '??#39; + #10 + '??#39; + #10 + '????' + #10 + '?障??#39;;
420 yoffy 1.5 kKanji = [$80..$A0, $E0..$ff];
421 yoffy 1.1 begin
422    
423     delimiter := TStringList.Create;
424     try
425     mode := ModeWhite;
426     delimiter.Text := KAKUJOSI;
427     p := PChar( text );
428     tail := p + Length( text );
429     last := p;
430    
431     while p < tail do begin
432     delimited := False;
433     // ??絖????帥?ゃ?????ゅ??/span>
434     // ?糸???鴻? ModeGraph ?????????у???ャ???綽??????????????
435 yoffy 1.5 // if Byte(Byte( p^ ) - $a1) < $5e then begin
436     if Byte( p^ ) in kKanji then begin
437 yoffy 1.1 if p + 1 < tail then begin
438     ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
439     case ch of
440     $8140: newMode := ModeWhite;
441     $8141..$824e: newMode := ModeWGraph;
442     $824f..$8258: newMode := ModeWNum;
443     $8260..$829a: newMode := ModeWAlpha;
444     $829f..$82f1: newMode := ModeWHira;
445     $8340..$8396: newMode := ModeWKata;
446     else newMode := ModeWKanji;
447     end;
448 yoffy 1.6 // '??????#39; ??抗篁??????障???????帥?????????障????
449     if (mode = ModeWHira) or (mode = ModeWKata) then
450 yoffy 1.7 if (ch = $814a) or (ch = $814b) or (ch = $815b) then
451 yoffy 1.6 newMode := mode;
452 yoffy 1.1 end else begin
453     newMode := ModeWhite;
454     end;
455    
456     chSize := 2;
457    
458     // ?阪????????????絖?????????罎??祉????
459     if p + 3 < tail then begin // 3 = delimiter ????紊у?? - 1
460     for i := 0 to delimiter.Count - 1 do begin
461     if CompareMem(
462     p, PChar( delimiter[ i ] ), Length( delimiter[ i ] ) ) then begin
463     delimited := True;
464     chSize := Length( delimiter[ i ] );
465     Break;
466     end;
467     end;
468     end;
469     end else begin
470 yoffy 1.3 newMode := Modes( CharMode1[ Byte( p^ ) ] );
471 yoffy 1.1
472     chSize := 1;
473     end;
474    
475     if (mode <> newMode) or delimited then begin
476    
477     // ??絖????帥?ゃ????紊??眼??????
478     // ????????阪????????????絖???????????
479     if mode <> ModeWhite then begin
480 yoffy 1.3 SetLength( aWord, p - last );
481     CopyMemory( PChar( aWord ), last, p - last );
482 yoffy 1.5 //aWord := Copy( last, 0, p - last );
483 yoffy 1.3 idx := wordCount.IndexOf( aWord ); // ??
484 yoffy 1.1 if idx < 0 then begin
485     countInfo := TWordCountInfo.Create;
486     wordCount.AddObject( aWord, countInfo );
487     end else begin
488     countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
489     end;
490     countInfo.WordCount := countInfo.WordCount + 1;
491     end;
492    
493     last := p;
494     mode := newMode;
495    
496     end;
497    
498     p := p + chSize;
499     end; // while
500    
501     if mode <> ModeWhite then begin
502     aWord := Copy( last, 0, p - last );
503     idx := wordCount.IndexOf( aWord );
504     if idx < 0 then begin
505     countInfo := TWordCountInfo.Create;
506     wordCount.AddObject( aWord, countInfo );
507     end else begin
508     countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
509     end;
510     countInfo.WordCount := countInfo.WordCount + 1;
511     end;
512     finally
513     delimiter.Free;
514     end;
515    
516     end;
517    
518     //==============================
519     // CalcPaulGraham
520     //==============================
521     function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
522    
523     function p( const aWord : string ) : Single;
524     var
525     info : TWordInfo;
526     begin
527     info := Objects[ aWord ];
528     if info = nil then
529 yoffy 1.9 Result := 0.415
530 yoffy 1.1 else if info.NormalWord = 0 then
531     Result := 0.99
532     else if info.ImportantWord = 0 then
533     Result := 0.01
534 yoffy 1.9 else if info.ImportantWord + info.NormalWord * 2 < 5 then
535     Result := 0.5
536 yoffy 1.1 else
537     Result := ( info.ImportantWord / info.ImportantText ) /
538     ((info.NormalWord * 2 / info.NormalText ) +
539     (info.ImportantWord / info.ImportantText));
540     end;
541    
542     var
543     s, q : Extended;
544     i : Integer;
545     narray : TList;
546     const
547     SAMPLE_COUNT = 15;
548     begin
549    
550     Result := 1;
551     if wordCount.Count = 0 then
552     Exit;
553    
554     narray := TList.Create;
555     try
556     for i := 0 to wordCount.Count - 1 do begin
557     narray.Add( Pointer( p( wordCount[ i ] ) ) );
558     end;
559    
560     narray.Sort( AbsSort );
561    
562     s := 1;
563     q := 1;
564     i := min( SAMPLE_COUNT, narray.Count );
565     while i > 0 do begin
566     Dec( i );
567 yoffy 1.9
568 yoffy 1.1 s := s * Single( narray[ i ] );
569     q := q * (1 - Single( narray[ i ] ));
570     end;
571    
572     Result := s / (s + q);
573     finally
574     narray.Free;
575     end;
576    
577     end;
578    
579     //==============================
580     // CalcGaryRobinson
581     //==============================
582     function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
583    
584     function p( const aWord : string ) : Single;
585     var
586     info : TWordInfo;
587     begin
588     info := Objects[ aWord ];
589     if info = nil then
590     Result := 0.415
591     else if info.ImportantWord = 0 then
592 yoffy 1.9 Result := 0.01
593 yoffy 1.1 else if info.NormalWord = 0 then
594 yoffy 1.9 Result := 0.99
595 yoffy 1.1 else
596     Result := ( info.ImportantWord / info.ImportantText ) /
597     ((info.NormalWord / info.NormalText ) +
598     (info.ImportantWord / info.ImportantText));
599     end;
600    
601     function f( cnt : Integer; n, mean : Single ) : Extended;
602     const
603     k = 0.00001;
604     begin
605     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
606     end;
607    
608     var
609     n : Extended;
610     narray : array of Single;
611     mean : Extended;
612     countInfo : TWordCountInfo;
613     i : Integer;
614 yoffy 1.9 P1, Q1, R1 : Extended;
615     cnt : Extended;
616     begin
617    
618     if wordCount.Count = 0 then begin
619     Result := 1;
620     Exit;
621     end;
622    
623     SetLength( narray, wordCount.Count );
624     mean := 0;
625     for i := 0 to wordCount.Count - 1 do begin
626     n := p( wordCount[ i ] );
627     narray[ i ] := n;
628     mean := mean + n;
629     end;
630     mean := mean / wordCount.Count;
631    
632     cnt := 0;
633     P1 := 0;
634     Q1 := 0;
635     for i := 0 to wordCount.Count - 1 do begin
636     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
637     n := f( countInfo.WordCount, narray[ i ], mean );
638     if countInfo <> nil then
639     cnt := cnt + countInfo.WordCount;
640     P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
641     Q1 := Q1 + Ln( n ) * countInfo.WordCount;
642     end;
643     if cnt = 0 then
644     cnt := 1;
645     P1 := 1 - Exp( P1 * (1 / cnt) );
646     Q1 := 1 - Exp( Q1 * (1 / cnt) );
647    
648     if P1 + Q1 = 0 then begin
649     Result := 0.5
650     end else begin
651     n := (P1 - Q1) / (P1 + Q1);
652     Result := (1 + n) / 2;
653     end;
654    
655     end;
656    
657     //==============================
658     // CalcGaryRobinsonFisher
659     //==============================
660     function TGikoBayesian.CalcGaryRobinsonFisher(
661     wordCount : TWordCount
662     ) : Extended;
663    
664     function p( const aWord : string ) : Single;
665     var
666     info : TWordInfo;
667     begin
668     info := Objects[ aWord ];
669     if info = nil then
670     Result := 0.415
671     else if info.ImportantWord = 0 then
672     Result := 0.01
673     else if info.NormalWord = 0 then
674     Result := 0.99
675     else
676     Result := info.ImportantWord /
677     (info.ImportantWord + info.NormalWord *
678     info.ImportantText / info.NormalText);
679     end;
680    
681     function f( cnt : Integer; n, mean : Single ) : Extended;
682     const
683     k = 0.00001;
684     begin
685     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
686     end;
687    
688     function prbx( x2, degree : Extended ) : Extended;
689     var
690     m : Extended;
691     sum : Extended;
692     term : Extended;
693     i : extended;
694     begin
695    
696     m := x2 / 2;
697     sum := exp( -m );
698     term := -m;
699    
700     i := 1;
701     while i < (degree / 2 - 1) do begin
702     term := term + ln( m / i );
703     sum := sum + exp( term );
704     i := i + 1;
705     end;
706    
707     if sum < 1 then
708     Result := sum
709     else
710     Result := 1.0;
711    
712     end;
713    
714     var
715     n : Extended;
716     narray : array of Single;
717     mean : Extended;
718     countInfo : TWordCountInfo;
719     i : Integer;
720 yoffy 1.1 normal : Extended;
721     important : Extended;
722 yoffy 1.9 P1, Q1 : Extended;
723 yoffy 1.1 cnt : Extended;
724     begin
725    
726     if wordCount.Count = 0 then begin
727     Result := 1;
728     Exit;
729     end;
730    
731     SetLength( narray, wordCount.Count );
732     mean := 0;
733     for i := 0 to wordCount.Count - 1 do begin
734     n := p( wordCount[ i ] );
735     narray[ i ] := n;
736     mean := mean + n;
737     end;
738     mean := mean / wordCount.Count;
739    
740 yoffy 1.9 cnt := 0;
741     (*
742     P1 := 1;
743     Q1 := 1;
744     (*)
745     P1 := 0;
746     Q1 := 0;
747     //*
748 yoffy 1.1 for i := 0 to wordCount.Count - 1 do begin
749     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
750     n := f( countInfo.WordCount, narray[ i ], mean );
751     if countInfo <> nil then
752 yoffy 1.9 cnt := cnt + countInfo.WordCount;
753     (*
754     P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
755     Q1 := Q1 + Ln( n ) * countInfo.WordCount;
756     (*)
757     P1 := P1 + Ln( 1 - n );
758     Q1 := Q1 + Ln( n );
759     //*)
760 yoffy 1.1 end;
761     if cnt = 0 then
762     cnt := 1;
763 yoffy 1.9 //(*
764     P1 := prbx( -2 * P1, 2 * cnt );
765     Q1 := prbx( -2 * Q1, 2 * cnt );
766     (*)
767     P1 := prbx( -2 * Ln( P1 ), 2 * cnt );
768     Q1 := prbx( -2 * Ln( Q1 ), 2 * cnt );
769     //*)
770     if P1 + Q1 = 0 then begin
771     Result := 0.5
772     end else begin
773     Result := (1 + Q1 + P1) / 2;
774     end;
775 yoffy 1.1
776     end;
777    
778     //==============================
779     // Parse
780     //==============================
781     function TGikoBayesian.Parse(
782     const text : string;
783     wordCount : TWordCount;
784 yoffy 1.9 algorithm : TGikoBayesianAlgorithm
785 yoffy 1.1 ) : Extended;
786     begin
787    
788     CountWord( text, wordCount );
789     case algorithm of
790     gbaPaulGraham: Result := CalcPaulGraham( wordCount );
791 yoffy 1.9 gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
792     gbaGaryRobinsonFisher:
793     Result := CalcGaryRobinsonFisher( wordCount );
794 yoffy 1.1 else Result := 0;
795     end;
796    
797     end;
798    
799     //==============================
800     // Learn
801     //==============================
802     procedure TGikoBayesian.Learn(
803     wordCount : TWordCount;
804     isImportant : Boolean );
805     var
806     aWord : string;
807     wordinfo : TWordInfo;
808     countinfo : TWordCountInfo;
809 yoffy 1.3 i : Integer;
810 yoffy 1.1 begin
811    
812     for i := 0 to wordCount.Count - 1 do begin
813     aWord := wordCount[ i ];
814     wordinfo := Objects[ aWord ];
815 yoffy 1.3 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
816 yoffy 1.1 if wordinfo = nil then begin
817     wordinfo := TWordInfo.Create;
818     Objects[ aWord ] := wordinfo;
819     end;
820    
821     if isImportant then begin
822     wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
823     wordinfo.ImportantText := wordinfo.ImportantText + 1;
824     end else begin
825     wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
826     wordinfo.NormalText := wordinfo.NormalText + 1;
827     end;
828     end;
829    
830     end;
831    
832     //==============================
833     // Forget
834     //==============================
835     procedure TGikoBayesian.Forget(
836     wordCount : TWordCount;
837     isImportant : Boolean );
838     var
839     aWord : string;
840     wordinfo : TWordInfo;
841     countinfo : TWordCountInfo;
842     i : Integer;
843     begin
844    
845     for i := 0 to wordCount.Count - 1 do begin
846     aWord := wordCount[ i ];
847     wordinfo := Objects[ aWord ];
848     if wordinfo = nil then
849     Continue;
850    
851     countinfo := TWordCountInfo( wordCount.Objects[ i ] );
852     if isImportant then begin
853 yoffy 1.8 if wordInfo.ImportantText > 0 then begin
854     wordinfo.ImportantText := wordinfo.ImportantText - 1;
855     wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
856     end;
857 yoffy 1.1 end else begin
858 yoffy 1.8 if wordinfo.NormalText > 0 then begin
859     wordinfo.NormalText := wordinfo.NormalText - 1;
860     wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
861     end;
862 yoffy 1.1 end;
863     end;
864    
865     end;
866    
867     end.

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26