Develop and Download Open Source Software

Browse CVS Repository

Contents of /gikonavigoeson/gikonavi/GikoBayesian.pas

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.9 - (show annotations) (download) (as text)
Wed Oct 27 00:10:12 2004 UTC (19 years, 5 months ago) by yoffy
Branch: MAIN
Changes since 1.8: +153 -20 lines
File MIME type: text/x-pascal
スパムフィルタの ON/OFF オプションを追加。

1 unit GikoBayesian;
2
3 {!
4 \file GikoBayesian.pas
5 \brief ???ゃ?吾?≪?潟???c????/span>
6
7 $Id: GikoBayesian.pas,v 1.8 2004/10/21 05:59:39 yoffy Exp $
8 }
9
10 interface
11
12 //==================================================
13 uses
14 //==================================================
15 Classes;
16
17 //==================================================
18 type
19 //==================================================
20
21 {!***********************************************************
22 \brief ??茯???????????/span>
23 ************************************************************}
24 TWordInfo = class( TObject )
25 private
26 FNormalWord : Integer; //!< ??絽吾????茯????????糸?眼????????/span>
27 FImportantWord : Integer; //!< 羈?????茯????????糸?眼????????/span>
28 FNormalText : Integer; //!< ??絽吾????茯??????????障??????????腴?????/span>
29 FImportantText : Integer; //!< 羈?????茯??????????障??????????腴?????/span>
30
31 public
32 property NormalWord : Integer read FNormalWord write FNormalWord;
33 property ImportantWord : Integer read FImportantWord write FImportantWord;
34 property NormalText : Integer read FNormalText write FNormalText;
35 property ImportantText : Integer read FImportantText write FImportantText;
36 end;
37
38 {!***********************************************************
39 \brief 茹f??羝??水??茯???????????/span>
40 ************************************************************}
41 TWordCountInfo = class( TObject )
42 private
43 FWordCount : Integer; //!< ??茯???/span>
44
45 public
46 property WordCount : Integer read FWordCount write FWordCount;
47 end;
48
49 {!***********************************************************
50 \brief 茹f??羝??水??茯????鴻??
51 ************************************************************}
52 // TWordCount = class( THashedStringList ) // 羶???
53 TWordCount = class( TStringList )
54 public
55 constructor Create;
56 destructor Destroy; override;
57 end;
58
59 {!***********************************************************
60 \brief ???c???帥?≪???眼???冴??
61 ************************************************************}
62 TGikoBayesianAlgorithm =
63 (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
64
65 {!***********************************************************
66 \brief ???ゃ?吾?≪?潟???c????/span>
67 ************************************************************}
68 // TGikoBayesian = class( THashedStringList ) // 羶???
69 TGikoBayesian = class( TStringList )
70 private
71 FFilePath : string; //!< 茯??粋昭???????<?ゃ??????/span>
72 function GetObject( const name : string ) : TWordInfo;
73 procedure SetObject( const name : string; value : TWordInfo );
74
75 public
76 constructor Create;
77 destructor Destroy; override;
78
79 //! ???<?ゃ??????絖??絮ユ???茯??水?冴???障??
80 procedure LoadFromFile( const filePath : string );
81
82 //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
83 procedure SaveToFile( const filePath : string );
84
85 //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
86 procedure Save;
87
88 //! ??茯???????????宴????緇????障??
89 property Objects[ const name : string ] : TWordInfo
90 read GetObject write SetObject; default;
91
92 //! ??腴??????障??????茯????????潟?????障??
93 procedure CountWord(
94 const text : string;
95 wordCount : TWordCount );
96
97 {!
98 \brief Paul Graham 羈????冴?ャ??????腴???絵??墾??羆阪????障??
99 \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
100 }
101 function CalcPaulGraham( wordCount : TWordCount ) : Extended;
102
103 {!
104 \brief GaryRobinson 羈????冴?ャ??????腴???絵??墾??羆阪????障??
105 \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
106 }
107 function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
108
109 {!
110 \brief GaryRobinson-Fisher 羈????冴?ャ??????腴???絵??墾??羆阪????障??
111 \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
112 }
113 function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
114
115 {!
116 \brief ??腴???茹f??
117 \param text 茹f????????腴?
118 \param wordCount 茹f??????????茯????鴻????菴???
119 \param algorithm 羈???墾??浦絎??????????≪???眼???冴??????絎????障??
120 \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
121
122 CountWord ? Calcxxxxx ???障???????茵??????????с????
123 }
124 function Parse(
125 const text : string;
126 wordCount : TWordCount;
127 algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
128 ) : Extended;
129
130 {!
131 \brief 絖??????
132 \param wordCount Parse ?цВ??????????茯????鴻??
133 \param isImportant 羈??????鴻????腴???????????????? True
134 }
135 procedure Learn(
136 wordCount : TWordCount;
137 isImportant : Boolean );
138
139 {!
140 \brief 絖??腟?????綽?????
141 \param wordCount Parse ?цВ??????????茯????鴻??
142 \param isImportant 羈??????鴻????腴???????????????????????? True
143 \warning 絖??羝??帥????腴???????????∈茯??堺?ャ?障??????<br>
144 Learn ????????????腴??? isImportant ???????c????????腴???
145 Forget ?????????若?帥???若?鴻???贋?????障????<br>
146 絖??羝??帥????????????????????????????????
147
148 ???????膺?腟??????????≪????????с???????障??????<br>
149 wordCount ??緇?????腴? (Parse ? text 綣??? ???膺?腟??????帥?????≪???障????<br><br>
150
151 筝祉??絵????腴?????羈?????腴????????帥????????? Forget -> Learn ?????т戎?????障????
152 }
153 procedure Forget(
154 wordCount : TWordCount;
155 isImportant : Boolean );
156 end;
157
158 //==================================================
159 implementation
160 //==================================================
161
162 uses
163 SysUtils, Math, Windows;
164
165 const
166 GIKO_BAYESIAN_FILE_VERSION = '1.0';
167 {
168 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
169 ModeWGraph, ModeWAlpha, ModeWNum,
170 ModeWHira, ModeWKata, ModeWKanji);
171 }
172 CharMode1 : array [ 0..255 ] of Byte =
173 (
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
178 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
179 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
180 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
181 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
182
183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
185 0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
186 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
187 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
188 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
189 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
191 );
192
193 //************************************************************
194 // misc
195 //************************************************************
196
197 //==============================
198 // RemoveToken
199 //==============================
200 function RemoveToken(var s: string;const delimiter: string): string;
201 var
202 p: Integer;
203 begin
204 p := AnsiPos(delimiter, s);
205 if p = 0 then
206 Result := s
207 else
208 Result := Copy(s, 1, p - 1);
209 s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
210 end;
211
212 //==============================
213 // AbsSort
214 //==============================
215 function AbsSort( p1, p2 : Pointer ) : Integer;
216 var
217 v1, v2 : Single;
218 begin
219
220 v1 := Abs( Single( p1 ) - 0.5 );
221 v2 := Abs( Single( p2 ) - 0.5 );
222 if v1 > v2 then
223 Result := -1
224 else if v1 = v2 then
225 Result := 0
226 else
227 Result := 1;
228
229 end;
230
231 //************************************************************
232 // TWordCount class
233 //************************************************************
234 constructor TWordCount.Create;
235 begin
236
237 Duplicates := dupIgnore;
238 CaseSensitive := True;
239 Sorted := True;
240
241 end;
242
243 destructor TWordCount.Destroy;
244 var
245 i : Integer;
246 begin
247
248 for i := Count - 1 downto 0 do
249 if Objects[ i ] <> nil then
250 Objects[ i ].Free;
251
252 inherited;
253
254 end;
255
256 //************************************************************
257 // TGikoBayesian class
258 //************************************************************
259
260 //==============================
261 // Create
262 //==============================
263 constructor TGikoBayesian.Create;
264 begin
265
266 Duplicates := dupIgnore;
267 CaseSensitive := True;
268 Sorted := True;
269
270 end;
271
272 //==============================
273 // Destroy
274 //==============================
275 destructor TGikoBayesian.Destroy;
276 var
277 i : Integer;
278 begin
279
280 for i := Count - 1 downto 0 do
281 if inherited Objects[ i ] <> nil then
282 inherited Objects[ i ].Free;
283
284 inherited;
285
286 end;
287
288 procedure TGikoBayesian.LoadFromFile( const filePath : string );
289 var
290 i : Integer;
291 sl : TStringList;
292 s : string;
293 name : string;
294 info : TWordInfo;
295 begin
296
297 FFilePath := filePath;
298
299 if not FileExists( filePath ) then
300 Exit;
301
302 sl := TStringList.Create;
303 try
304 sl.LoadFromFile( filePath );
305
306 for i := 1 to sl.Count - 1 do begin
307 s := sl[ i ];
308 name := RemoveToken( s, #1 );
309 info := TWordInfo.Create;
310 info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
311 info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
312 info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
313 info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
314
315 AddObject( name, info );
316 end;
317 finally
318 sl.Free;
319 end;
320
321 end;
322
323 procedure TGikoBayesian.SaveToFile( const filePath : string );
324 var
325 i : Integer;
326 sl : TStringList;
327 s : string;
328 info : TWordInfo;
329 begin
330
331 FFilePath := filePath;
332
333 sl := TStringList.Create;
334 try
335 sl.BeginUpdate;
336 sl.Add( GIKO_BAYESIAN_FILE_VERSION );
337
338 for i := 0 to Count - 1 do begin
339 info := TWordInfo( inherited Objects[ i ] );
340 s := Strings[ i ] + #1
341 + Format('%x', [info.NormalWord]) + #1
342 + Format('%x', [info.ImportantWord]) + #1
343 + Format('%x', [info.NormalText]) + #1
344 + Format('%x', [info.ImportantText]);
345
346 sl.Add(s);
347 end;
348 sl.EndUpdate;
349 sl.SaveToFile( filePath );
350 finally
351 sl.Free;
352 end;
353
354 end;
355
356 procedure TGikoBayesian.Save;
357 begin
358
359 if FFilePath <> '' then
360 SaveToFile( FFilePath );
361
362 end;
363
364 //==============================
365 // GetObject
366 //==============================
367 function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
368 var
369 idx : Integer;
370 begin
371
372 idx := IndexOf( name ); // 羶???
373 if idx < 0 then
374 Result := nil
375 else
376 Result := TWordInfo( inherited Objects[ idx ] );
377
378 end;
379
380 //==============================
381 // SetObject
382 //==============================
383 procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
384 var
385 idx : Integer;
386 begin
387
388 idx := IndexOf( name );
389 if idx < 0 then
390 AddObject( name, value )
391 else
392 inherited Objects[ idx ] := value;
393
394 end;
395
396
397 //==============================
398 // CountWord
399 //==============================
400 procedure TGikoBayesian.CountWord(
401 const text : string;
402 wordCount : TWordCount );
403 type
404 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
405 ModeWGraph, ModeWAlpha, ModeWNum,
406 ModeWHira, ModeWKata, ModeWKanji);
407 var
408 p, tail, last : PChar;
409 mode, newMode : Modes;
410 aWord : string;
411 ch : Longword;
412 chSize : Integer;
413 delimiter : TStringList;
414 delimited : Boolean;
415 i, idx : Integer;
416 countInfo : TWordCountInfo;
417 const
418 KAKUJOSI = '??' + #10 + '??#39; + #10 + '??' + #10 + '??#39; + #10 + '????' +
419 #10 + '??#39; + #10 + '??#39; + #10 + '????' + #10 + '?障??#39;;
420 kKanji = [$80..$A0, $E0..$ff];
421 begin
422
423 delimiter := TStringList.Create;
424 try
425 mode := ModeWhite;
426 delimiter.Text := KAKUJOSI;
427 p := PChar( text );
428 tail := p + Length( text );
429 last := p;
430
431 while p < tail do begin
432 delimited := False;
433 // ??絖????帥?ゃ?????ゅ??/span>
434 // ?糸???鴻? ModeGraph ?????????у???ャ???綽??????????????
435 // if Byte(Byte( p^ ) - $a1) < $5e then begin
436 if Byte( p^ ) in kKanji then begin
437 if p + 1 < tail then begin
438 ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
439 case ch of
440 $8140: newMode := ModeWhite;
441 $8141..$824e: newMode := ModeWGraph;
442 $824f..$8258: newMode := ModeWNum;
443 $8260..$829a: newMode := ModeWAlpha;
444 $829f..$82f1: newMode := ModeWHira;
445 $8340..$8396: newMode := ModeWKata;
446 else newMode := ModeWKanji;
447 end;
448 // '??????#39; ??抗篁??????障???????帥?????????障????
449 if (mode = ModeWHira) or (mode = ModeWKata) then
450 if (ch = $814a) or (ch = $814b) or (ch = $815b) then
451 newMode := mode;
452 end else begin
453 newMode := ModeWhite;
454 end;
455
456 chSize := 2;
457
458 // ?阪????????????絖?????????罎??祉????
459 if p + 3 < tail then begin // 3 = delimiter ????紊у?? - 1
460 for i := 0 to delimiter.Count - 1 do begin
461 if CompareMem(
462 p, PChar( delimiter[ i ] ), Length( delimiter[ i ] ) ) then begin
463 delimited := True;
464 chSize := Length( delimiter[ i ] );
465 Break;
466 end;
467 end;
468 end;
469 end else begin
470 newMode := Modes( CharMode1[ Byte( p^ ) ] );
471
472 chSize := 1;
473 end;
474
475 if (mode <> newMode) or delimited then begin
476
477 // ??絖????帥?ゃ????紊??眼??????
478 // ????????阪????????????絖???????????
479 if mode <> ModeWhite then begin
480 SetLength( aWord, p - last );
481 CopyMemory( PChar( aWord ), last, p - last );
482 //aWord := Copy( last, 0, p - last );
483 idx := wordCount.IndexOf( aWord ); // ??
484 if idx < 0 then begin
485 countInfo := TWordCountInfo.Create;
486 wordCount.AddObject( aWord, countInfo );
487 end else begin
488 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
489 end;
490 countInfo.WordCount := countInfo.WordCount + 1;
491 end;
492
493 last := p;
494 mode := newMode;
495
496 end;
497
498 p := p + chSize;
499 end; // while
500
501 if mode <> ModeWhite then begin
502 aWord := Copy( last, 0, p - last );
503 idx := wordCount.IndexOf( aWord );
504 if idx < 0 then begin
505 countInfo := TWordCountInfo.Create;
506 wordCount.AddObject( aWord, countInfo );
507 end else begin
508 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
509 end;
510 countInfo.WordCount := countInfo.WordCount + 1;
511 end;
512 finally
513 delimiter.Free;
514 end;
515
516 end;
517
518 //==============================
519 // CalcPaulGraham
520 //==============================
521 function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
522
523 function p( const aWord : string ) : Single;
524 var
525 info : TWordInfo;
526 begin
527 info := Objects[ aWord ];
528 if info = nil then
529 Result := 0.415
530 else if info.NormalWord = 0 then
531 Result := 0.99
532 else if info.ImportantWord = 0 then
533 Result := 0.01
534 else if info.ImportantWord + info.NormalWord * 2 < 5 then
535 Result := 0.5
536 else
537 Result := ( info.ImportantWord / info.ImportantText ) /
538 ((info.NormalWord * 2 / info.NormalText ) +
539 (info.ImportantWord / info.ImportantText));
540 end;
541
542 var
543 s, q : Extended;
544 i : Integer;
545 narray : TList;
546 const
547 SAMPLE_COUNT = 15;
548 begin
549
550 Result := 1;
551 if wordCount.Count = 0 then
552 Exit;
553
554 narray := TList.Create;
555 try
556 for i := 0 to wordCount.Count - 1 do begin
557 narray.Add( Pointer( p( wordCount[ i ] ) ) );
558 end;
559
560 narray.Sort( AbsSort );
561
562 s := 1;
563 q := 1;
564 i := min( SAMPLE_COUNT, narray.Count );
565 while i > 0 do begin
566 Dec( i );
567
568 s := s * Single( narray[ i ] );
569 q := q * (1 - Single( narray[ i ] ));
570 end;
571
572 Result := s / (s + q);
573 finally
574 narray.Free;
575 end;
576
577 end;
578
579 //==============================
580 // CalcGaryRobinson
581 //==============================
582 function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
583
584 function p( const aWord : string ) : Single;
585 var
586 info : TWordInfo;
587 begin
588 info := Objects[ aWord ];
589 if info = nil then
590 Result := 0.415
591 else if info.ImportantWord = 0 then
592 Result := 0.01
593 else if info.NormalWord = 0 then
594 Result := 0.99
595 else
596 Result := ( info.ImportantWord / info.ImportantText ) /
597 ((info.NormalWord / info.NormalText ) +
598 (info.ImportantWord / info.ImportantText));
599 end;
600
601 function f( cnt : Integer; n, mean : Single ) : Extended;
602 const
603 k = 0.00001;
604 begin
605 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
606 end;
607
608 var
609 n : Extended;
610 narray : array of Single;
611 mean : Extended;
612 countInfo : TWordCountInfo;
613 i : Integer;
614 P1, Q1, R1 : Extended;
615 cnt : Extended;
616 begin
617
618 if wordCount.Count = 0 then begin
619 Result := 1;
620 Exit;
621 end;
622
623 SetLength( narray, wordCount.Count );
624 mean := 0;
625 for i := 0 to wordCount.Count - 1 do begin
626 n := p( wordCount[ i ] );
627 narray[ i ] := n;
628 mean := mean + n;
629 end;
630 mean := mean / wordCount.Count;
631
632 cnt := 0;
633 P1 := 0;
634 Q1 := 0;
635 for i := 0 to wordCount.Count - 1 do begin
636 countInfo := TWordCountInfo( wordCount.Objects[ i ] );
637 n := f( countInfo.WordCount, narray[ i ], mean );
638 if countInfo <> nil then
639 cnt := cnt + countInfo.WordCount;
640 P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
641 Q1 := Q1 + Ln( n ) * countInfo.WordCount;
642 end;
643 if cnt = 0 then
644 cnt := 1;
645 P1 := 1 - Exp( P1 * (1 / cnt) );
646 Q1 := 1 - Exp( Q1 * (1 / cnt) );
647
648 if P1 + Q1 = 0 then begin
649 Result := 0.5
650 end else begin
651 n := (P1 - Q1) / (P1 + Q1);
652 Result := (1 + n) / 2;
653 end;
654
655 end;
656
657 //==============================
658 // CalcGaryRobinsonFisher
659 //==============================
660 function TGikoBayesian.CalcGaryRobinsonFisher(
661 wordCount : TWordCount
662 ) : Extended;
663
664 function p( const aWord : string ) : Single;
665 var
666 info : TWordInfo;
667 begin
668 info := Objects[ aWord ];
669 if info = nil then
670 Result := 0.415
671 else if info.ImportantWord = 0 then
672 Result := 0.01
673 else if info.NormalWord = 0 then
674 Result := 0.99
675 else
676 Result := info.ImportantWord /
677 (info.ImportantWord + info.NormalWord *
678 info.ImportantText / info.NormalText);
679 end;
680
681 function f( cnt : Integer; n, mean : Single ) : Extended;
682 const
683 k = 0.00001;
684 begin
685 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
686 end;
687
688 function prbx( x2, degree : Extended ) : Extended;
689 var
690 m : Extended;
691 sum : Extended;
692 term : Extended;
693 i : extended;
694 begin
695
696 m := x2 / 2;
697 sum := exp( -m );
698 term := -m;
699
700 i := 1;
701 while i < (degree / 2 - 1) do begin
702 term := term + ln( m / i );
703 sum := sum + exp( term );
704 i := i + 1;
705 end;
706
707 if sum < 1 then
708 Result := sum
709 else
710 Result := 1.0;
711
712 end;
713
714 var
715 n : Extended;
716 narray : array of Single;
717 mean : Extended;
718 countInfo : TWordCountInfo;
719 i : Integer;
720 normal : Extended;
721 important : Extended;
722 P1, Q1 : Extended;
723 cnt : Extended;
724 begin
725
726 if wordCount.Count = 0 then begin
727 Result := 1;
728 Exit;
729 end;
730
731 SetLength( narray, wordCount.Count );
732 mean := 0;
733 for i := 0 to wordCount.Count - 1 do begin
734 n := p( wordCount[ i ] );
735 narray[ i ] := n;
736 mean := mean + n;
737 end;
738 mean := mean / wordCount.Count;
739
740 cnt := 0;
741 (*
742 P1 := 1;
743 Q1 := 1;
744 (*)
745 P1 := 0;
746 Q1 := 0;
747 //*
748 for i := 0 to wordCount.Count - 1 do begin
749 countInfo := TWordCountInfo( wordCount.Objects[ i ] );
750 n := f( countInfo.WordCount, narray[ i ], mean );
751 if countInfo <> nil then
752 cnt := cnt + countInfo.WordCount;
753 (*
754 P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
755 Q1 := Q1 + Ln( n ) * countInfo.WordCount;
756 (*)
757 P1 := P1 + Ln( 1 - n );
758 Q1 := Q1 + Ln( n );
759 //*)
760 end;
761 if cnt = 0 then
762 cnt := 1;
763 //(*
764 P1 := prbx( -2 * P1, 2 * cnt );
765 Q1 := prbx( -2 * Q1, 2 * cnt );
766 (*)
767 P1 := prbx( -2 * Ln( P1 ), 2 * cnt );
768 Q1 := prbx( -2 * Ln( Q1 ), 2 * cnt );
769 //*)
770 if P1 + Q1 = 0 then begin
771 Result := 0.5
772 end else begin
773 Result := (1 + Q1 + P1) / 2;
774 end;
775
776 end;
777
778 //==============================
779 // Parse
780 //==============================
781 function TGikoBayesian.Parse(
782 const text : string;
783 wordCount : TWordCount;
784 algorithm : TGikoBayesianAlgorithm
785 ) : Extended;
786 begin
787
788 CountWord( text, wordCount );
789 case algorithm of
790 gbaPaulGraham: Result := CalcPaulGraham( wordCount );
791 gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
792 gbaGaryRobinsonFisher:
793 Result := CalcGaryRobinsonFisher( wordCount );
794 else Result := 0;
795 end;
796
797 end;
798
799 //==============================
800 // Learn
801 //==============================
802 procedure TGikoBayesian.Learn(
803 wordCount : TWordCount;
804 isImportant : Boolean );
805 var
806 aWord : string;
807 wordinfo : TWordInfo;
808 countinfo : TWordCountInfo;
809 i : Integer;
810 begin
811
812 for i := 0 to wordCount.Count - 1 do begin
813 aWord := wordCount[ i ];
814 wordinfo := Objects[ aWord ];
815 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
816 if wordinfo = nil then begin
817 wordinfo := TWordInfo.Create;
818 Objects[ aWord ] := wordinfo;
819 end;
820
821 if isImportant then begin
822 wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
823 wordinfo.ImportantText := wordinfo.ImportantText + 1;
824 end else begin
825 wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
826 wordinfo.NormalText := wordinfo.NormalText + 1;
827 end;
828 end;
829
830 end;
831
832 //==============================
833 // Forget
834 //==============================
835 procedure TGikoBayesian.Forget(
836 wordCount : TWordCount;
837 isImportant : Boolean );
838 var
839 aWord : string;
840 wordinfo : TWordInfo;
841 countinfo : TWordCountInfo;
842 i : Integer;
843 begin
844
845 for i := 0 to wordCount.Count - 1 do begin
846 aWord := wordCount[ i ];
847 wordinfo := Objects[ aWord ];
848 if wordinfo = nil then
849 Continue;
850
851 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
852 if isImportant then begin
853 if wordInfo.ImportantText > 0 then begin
854 wordinfo.ImportantText := wordinfo.ImportantText - 1;
855 wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
856 end;
857 end else begin
858 if wordinfo.NormalText > 0 then begin
859 wordinfo.NormalText := wordinfo.NormalText - 1;
860 wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
861 end;
862 end;
863 end;
864
865 end;
866
867 end.

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26