Develop and Download Open Source Software

Browse CVS Repository

Contents of /gikonavigoeson/gikonavi/GikoBayesian.pas

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.11 - (show annotations) (download) (as text)
Sun Oct 31 16:48:44 2004 UTC (19 years, 5 months ago) by yoffy
Branch: MAIN
Changes since 1.10: +44 -20 lines
File MIME type: text/x-pascal
- デリミタの前後で単語を区切るはずが、デリミタ前しか切っていなかったバグを修正。
- IndexOf の代わりに Find を使うようになった。

1 unit GikoBayesian;
2
3 {!
4 \file GikoBayesian.pas
5 \brief ???ゃ?吾?≪?潟???c????/span>
6
7 $Id: GikoBayesian.pas,v 1.10 2004/10/31 12:24:33 yoffy Exp $
8 }
9
10 interface
11
12 //==================================================
13 uses
14 //==================================================
15 Classes;
16
17 //==================================================
18 type
19 //==================================================
20
21 {!***********************************************************
22 \brief ??茯???????????/span>
23 ************************************************************}
24 TWordInfo = class( TObject )
25 private
26 FNormalWord : Integer; //!< ??絽吾????茯????????糸?眼????????/span>
27 FImportantWord : Integer; //!< 羈?????茯????????糸?眼????????/span>
28 FNormalText : Integer; //!< ??絽吾????茯??????????障??????????腴?????/span>
29 FImportantText : Integer; //!< 羈?????茯??????????障??????????腴?????/span>
30
31 public
32 property NormalWord : Integer read FNormalWord write FNormalWord;
33 property ImportantWord : Integer read FImportantWord write FImportantWord;
34 property NormalText : Integer read FNormalText write FNormalText;
35 property ImportantText : Integer read FImportantText write FImportantText;
36 end;
37
38 {!***********************************************************
39 \brief 茹f??羝??水??茯???????????/span>
40 ************************************************************}
41 TWordCountInfo = class( TObject )
42 private
43 FWordCount : Integer; //!< ??茯???/span>
44
45 public
46 property WordCount : Integer read FWordCount write FWordCount;
47 end;
48
49 {!***********************************************************
50 \brief 茹f??羝??水??茯????鴻??
51 ************************************************************}
52 // TWordCount = class( THashedStringList ) // 羶???
53 TWordCount = class( TStringList )
54 public
55 constructor Create;
56 destructor Destroy; override;
57 end;
58
59 {!***********************************************************
60 \brief ???c???帥?≪???眼???冴??
61 ************************************************************}
62 TGikoBayesianAlgorithm =
63 (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
64
65 {!***********************************************************
66 \brief ???ゃ?吾?≪?潟???c????/span>
67 ************************************************************}
68 // TGikoBayesian = class( THashedStringList ) // 羶???
69 TGikoBayesian = class( TStringList )
70 private
71 FFilePath : string; //!< 茯??粋昭???????<?ゃ??????/span>
72 function GetObject( const name : string ) : TWordInfo;
73 procedure SetObject( const name : string; value : TWordInfo );
74
75 public
76 constructor Create;
77 destructor Destroy; override;
78
79 //! ???<?ゃ??????絖??絮ユ???茯??水?冴???障??
80 procedure LoadFromFile( const filePath : string );
81
82 //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
83 procedure SaveToFile( const filePath : string );
84
85 //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
86 procedure Save;
87
88 //! ??茯???????????宴????緇????障??
89 property Objects[ const name : string ] : TWordInfo
90 read GetObject write SetObject; default;
91
92 //! ??腴??????障??????茯????????潟?????障??
93 procedure CountWord(
94 const text : string;
95 wordCount : TWordCount );
96
97 {!
98 \brief Paul Graham 羈????冴?ャ??????腴???絵??墾??羆阪????障??
99 \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
100 }
101 function CalcPaulGraham( wordCount : TWordCount ) : Extended;
102
103 {!
104 \brief GaryRobinson 羈????冴?ャ??????腴???絵??墾??羆阪????障??
105 \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
106 }
107 function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
108
109 {!
110 \brief GaryRobinson-Fisher 羈????冴?ャ??????腴???絵??墾??羆阪????障??
111 \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
112 }
113 function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
114
115 {!
116 \brief ??腴???茹f??
117 \param text 茹f????????腴?
118 \param wordCount 茹f??????????茯????鴻????菴???
119 \param algorithm 羈???墾??浦絎??????????≪???眼???冴??????絎????障??
120 \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
121
122 CountWord ? Calcxxxxx ???障???????茵??????????с????
123 }
124 function Parse(
125 const text : string;
126 wordCount : TWordCount;
127 algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
128 ) : Extended;
129
130 {!
131 \brief 絖??????
132 \param wordCount Parse ?цВ??????????茯????鴻??
133 \param isImportant 羈??????鴻????腴???????????????? True
134 }
135 procedure Learn(
136 wordCount : TWordCount;
137 isImportant : Boolean );
138
139 {!
140 \brief 絖??腟?????綽?????
141 \param wordCount Parse ?цВ??????????茯????鴻??
142 \param isImportant 羈??????鴻????腴???????????????????????? True
143 \warning 絖??羝??帥????腴???????????∈茯??堺?ャ?障??????<br>
144 Learn ????????????腴??? isImportant ???????c????????腴???
145 Forget ?????????若?帥???若?鴻???贋?????障????<br>
146 絖??羝??帥????????????????????????????????
147
148 ???????膺?腟??????????≪????????с???????障??????<br>
149 wordCount ??緇?????腴? (Parse ? text 綣??? ???膺?腟??????帥?????≪???障????<br><br>
150
151 筝祉??絵????腴?????羈?????腴????????帥????????? Forget -> Learn ?????т戎?????障????
152 }
153 procedure Forget(
154 wordCount : TWordCount;
155 isImportant : Boolean );
156 end;
157
158 //==================================================
159 implementation
160 //==================================================
161
162 uses
163 SysUtils, Math, Windows;
164
165 const
166 GIKO_BAYESIAN_FILE_VERSION = '1.0';
167 {
168 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
169 ModeWGraph, ModeWAlpha, ModeWNum,
170 ModeWHira, ModeWKata, ModeWKanji);
171 }
172 CharMode1 : array [ 0..255 ] of Byte =
173 (
174 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
178 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
179 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
180 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
181 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
182
183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
185 0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
186 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
187 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
188 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
189 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
191 );
192
193 //************************************************************
194 // misc
195 //************************************************************
196
197 //==============================
198 // RemoveToken
199 //==============================
200 function RemoveToken(var s: string;const delimiter: string): string;
201 var
202 p: Integer;
203 begin
204 p := AnsiPos(delimiter, s);
205 if p = 0 then
206 Result := s
207 else
208 Result := Copy(s, 1, p - 1);
209 s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
210 end;
211
212 //==============================
213 // AbsSort
214 //==============================
215 function AbsSort( p1, p2 : Pointer ) : Integer;
216 var
217 v1, v2 : Single;
218 begin
219
220 v1 := Abs( Single( p1 ) - 0.5 );
221 v2 := Abs( Single( p2 ) - 0.5 );
222 if v1 > v2 then
223 Result := -1
224 else if v1 = v2 then
225 Result := 0
226 else
227 Result := 1;
228
229 end;
230
231 //************************************************************
232 // TWordCount class
233 //************************************************************
234 constructor TWordCount.Create;
235 begin
236
237 Duplicates := dupIgnore;
238 CaseSensitive := True;
239 Sorted := True;
240
241 end;
242
243 destructor TWordCount.Destroy;
244 var
245 i : Integer;
246 begin
247
248 for i := Count - 1 downto 0 do
249 if Objects[ i ] <> nil then
250 Objects[ i ].Free;
251
252 inherited;
253
254 end;
255
256 //************************************************************
257 // TGikoBayesian class
258 //************************************************************
259
260 //==============================
261 // Create
262 //==============================
263 constructor TGikoBayesian.Create;
264 begin
265
266 Duplicates := dupIgnore;
267 CaseSensitive := True;
268 Sorted := True;
269
270 end;
271
272 //==============================
273 // Destroy
274 //==============================
275 destructor TGikoBayesian.Destroy;
276 var
277 i : Integer;
278 begin
279
280 for i := Count - 1 downto 0 do
281 if inherited Objects[ i ] <> nil then
282 inherited Objects[ i ].Free;
283
284 inherited;
285
286 end;
287
288 procedure TGikoBayesian.LoadFromFile( const filePath : string );
289 var
290 i : Integer;
291 sl : TStringList;
292 s : string;
293 name : string;
294 info : TWordInfo;
295 begin
296
297 FFilePath := filePath;
298
299 if not FileExists( filePath ) then
300 Exit;
301
302 sl := TStringList.Create;
303 try
304 sl.LoadFromFile( filePath );
305
306 for i := 1 to sl.Count - 1 do begin
307 s := sl[ i ];
308 name := RemoveToken( s, #1 );
309 info := TWordInfo.Create;
310 info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
311 info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
312 info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
313 info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
314
315 AddObject( name, info );
316 end;
317 finally
318 sl.Free;
319 end;
320
321 end;
322
323 procedure TGikoBayesian.SaveToFile( const filePath : string );
324 var
325 i : Integer;
326 sl : TStringList;
327 s : string;
328 info : TWordInfo;
329 begin
330
331 FFilePath := filePath;
332
333 sl := TStringList.Create;
334 try
335 sl.BeginUpdate;
336 sl.Add( GIKO_BAYESIAN_FILE_VERSION );
337
338 for i := 0 to Count - 1 do begin
339 info := TWordInfo( inherited Objects[ i ] );
340 s := Strings[ i ] + #1
341 + Format('%x', [info.NormalWord]) + #1
342 + Format('%x', [info.ImportantWord]) + #1
343 + Format('%x', [info.NormalText]) + #1
344 + Format('%x', [info.ImportantText]);
345
346 sl.Add(s);
347 end;
348 sl.EndUpdate;
349 sl.SaveToFile( filePath );
350 finally
351 sl.Free;
352 end;
353
354 end;
355
356 procedure TGikoBayesian.Save;
357 begin
358
359 if FFilePath <> '' then
360 SaveToFile( FFilePath );
361
362 end;
363
364 //==============================
365 // GetObject
366 //==============================
367 function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
368 var
369 idx : Integer;
370 begin
371
372 if Find( name, idx ) then
373 Result := TWordInfo( inherited Objects[ idx ] )
374 else
375 Result := nil;
376
377 end;
378
379 //==============================
380 // SetObject
381 //==============================
382 procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
383 var
384 idx : Integer;
385 begin
386
387 if Find( name, idx ) then
388 inherited Objects[ idx ] := value
389 else
390 AddObject( name, value );
391
392 end;
393
394
395 //==============================
396 // CountWord
397 //==============================
398 procedure TGikoBayesian.CountWord(
399 const text : string;
400 wordCount : TWordCount );
401 type
402 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
403 ModeWGraph, ModeWAlpha, ModeWNum,
404 ModeWHira, ModeWKata, ModeWKanji);
405 var
406 p, tail, last : PChar;
407 mode, newMode : Modes;
408 aWord : string;
409 ch : Longword;
410 chSize : Integer;
411 delimiter : TStringList;
412 delimited : Boolean;
413 i, idx : Integer;
414 countInfo : TWordCountInfo;
415 const
416 KAKUJOSI = '??' + #10 + '??#39; + #10 + '??' + #10 + '??#39; + #10 + '????'
417 + #10 + '??#39; + #10 + '??#39; + #10 + '????' + #10 + '?障??#39;
418 ;{
419 + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
420 + #10 + '????#39; + #10 + '????#39; + #10 + '????#39; + #10 + '????#39;
421 + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
422 + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39;
423 + #10 + '??' + #10 + '??#39; + #10 + '綣?39; + #10 + '??' + #10 + '羈?'
424 + #10 + '筝?' + #10 + '??#39; + #10 + '??'
425 + #10 + '?障??'
426 + #10 + '??????' + #10 + '????' + #10 + '????#39; + #10 + '??????#39;
427 + #10 + '??????' + #10 + '???c?宴??'
428 + #10 + '?с??' + #10 + '?障??' + #10 + '?с??' + #10 + '????'
429 + #10 + '????' + #10 + '??????' + #10 + '????' + #10 + '??????'
430 ;}
431 kKanji = [$80..$A0, $E0..$ff];
432 begin
433
434 delimiter := TStringList.Create;
435 try
436 mode := ModeWhite;
437 delimiter.Text := KAKUJOSI;
438 p := PChar( text );
439 tail := p + Length( text );
440 last := p;
441
442 while p < tail do begin
443 delimited := False;
444 // ??絖????帥?ゃ?????ゅ??/span>
445 // ?糸???鴻? ModeGraph ?????????у???ャ???綽??????????????
446 // if Byte(Byte( p^ ) - $a1) < $5e then begin
447 if Byte( p^ ) in kKanji then begin
448 if p + 1 < tail then begin
449 ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
450 case ch of
451 $8140: newMode := ModeWhite;
452 $8141..$824e: newMode := ModeWGraph;
453 $824f..$8258: newMode := ModeWNum;
454 $8260..$829a: newMode := ModeWAlpha;
455 $829f..$82f1: newMode := ModeWHira;
456 $8340..$8396: newMode := ModeWKata;
457 else newMode := ModeWKanji;
458 end;
459 // '??????#39; ??抗篁??????障???????帥?????????障????
460 if (mode = ModeWHira) or (mode = ModeWKata) then
461 if (ch = $814a) or (ch = $814b) or (ch = $815b) then
462 newMode := mode;
463 end else begin
464 newMode := ModeWhite;
465 end;
466
467 chSize := 2;
468
469 // ?阪????????????絖?????????罎??祉????
470 if p + 3 < tail then begin // 3 = delimiter ????紊у?? - 1
471 for i := 0 to delimiter.Count - 1 do begin
472 if CompareMem(
473 p, PChar( delimiter[ i ] ), Length( delimiter[ i ] ) ) then begin
474 delimited := True;
475 chSize := Length( delimiter[ i ] );
476 Break;
477 end;
478 end;
479 end;
480 end else begin
481 newMode := Modes( CharMode1[ Byte( p^ ) ] );
482
483 chSize := 1;
484 end;
485
486 if (mode <> newMode) or delimited then begin
487
488 // ??絖????帥?ゃ????紊??眼??????
489 if mode <> ModeWhite then begin
490 SetLength( aWord, p - last );
491 CopyMemory( PChar( aWord ), last, p - last );
492 //aWord := Copy( last, 0, p - last );
493 if wordCount.Find( aWord, idx ) then begin
494 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
495 end else begin
496 countInfo := TWordCountInfo.Create;
497 wordCount.AddObject( aWord, countInfo );
498 end;
499 countInfo.WordCount := countInfo.WordCount + 1;
500 end;
501
502 last := p;
503
504 // ?阪????????????絖???????????
505 if delimited then begin
506 SetLength( aWord, chSize );
507 CopyMemory( PChar( aWord ), last, chSize );
508 //aWord := Copy( last, 0, p - last );
509 if wordCount.Find( aWord, idx ) then begin
510 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
511 end else begin
512 countInfo := TWordCountInfo.Create;
513 wordCount.AddObject( aWord, countInfo );
514 end;
515 countInfo.WordCount := countInfo.WordCount + 1;
516 last := last + chSize;
517 end;
518
519 mode := newMode;
520
521 end;
522
523 p := p + chSize;
524 end; // while
525
526 if mode <> ModeWhite then begin
527 aWord := Copy( last, 0, p - last );
528 if wordCount.Find( aWord, idx ) then begin
529 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
530 end else begin
531 countInfo := TWordCountInfo.Create;
532 wordCount.AddObject( aWord, countInfo );
533 end;
534 countInfo.WordCount := countInfo.WordCount + 1;
535 end;
536 finally
537 delimiter.Free;
538 end;
539
540 end;
541
542 //==============================
543 // CalcPaulGraham
544 //==============================
545 function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
546
547 function p( const aWord : string ) : Single;
548 var
549 info : TWordInfo;
550 begin
551 info := Objects[ aWord ];
552 if info = nil then
553 Result := 0.415
554 else if info.NormalWord = 0 then
555 Result := 0.99
556 else if info.ImportantWord = 0 then
557 Result := 0.01
558 else if info.ImportantWord + info.NormalWord * 2 < 5 then
559 Result := 0.5
560 else
561 Result := ( info.ImportantWord / info.ImportantText ) /
562 ((info.NormalWord * 2 / info.NormalText ) +
563 (info.ImportantWord / info.ImportantText));
564 end;
565
566 var
567 s, q : Extended;
568 i : Integer;
569 narray : TList;
570 const
571 SAMPLE_COUNT = 15;
572 begin
573
574 Result := 1;
575 if wordCount.Count = 0 then
576 Exit;
577
578 narray := TList.Create;
579 try
580 for i := 0 to wordCount.Count - 1 do begin
581 narray.Add( Pointer( p( wordCount[ i ] ) ) );
582 end;
583
584 narray.Sort( AbsSort );
585
586 s := 1;
587 q := 1;
588 i := min( SAMPLE_COUNT, narray.Count );
589 while i > 0 do begin
590 Dec( i );
591
592 s := s * Single( narray[ i ] );
593 q := q * (1 - Single( narray[ i ] ));
594 end;
595
596 Result := s / (s + q);
597 finally
598 narray.Free;
599 end;
600
601 end;
602
603 //==============================
604 // CalcGaryRobinson
605 //==============================
606 function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
607
608 function p( const aWord : string ) : Single;
609 var
610 info : TWordInfo;
611 begin
612 info := Objects[ aWord ];
613 if info = nil then
614 Result := 0.415
615 else if info.ImportantWord = 0 then
616 Result := 0.01
617 else if info.NormalWord = 0 then
618 Result := 0.99
619 else
620 Result := ( info.ImportantWord / info.ImportantText ) /
621 ((info.NormalWord / info.NormalText ) +
622 (info.ImportantWord / info.ImportantText));
623 end;
624
625 function f( cnt : Integer; n, mean : Single ) : Extended;
626 const
627 k = 0.00001;
628 begin
629 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
630 end;
631
632 var
633 n : Extended;
634 narray : array of Single;
635 mean : Extended;
636 countInfo : TWordCountInfo;
637 i : Integer;
638 P1, Q1, R1 : Extended;
639 cnt : Extended;
640 begin
641
642 if wordCount.Count = 0 then begin
643 Result := 1;
644 Exit;
645 end;
646
647 SetLength( narray, wordCount.Count );
648 mean := 0;
649 for i := 0 to wordCount.Count - 1 do begin
650 n := p( wordCount[ i ] );
651 narray[ i ] := n;
652 mean := mean + n;
653 end;
654 mean := mean / wordCount.Count;
655
656 P1 := 1;
657 Q1 := 1;
658 for i := 0 to wordCount.Count - 1 do begin
659 countInfo := TWordCountInfo( wordCount.Objects[ i ] );
660 n := f( countInfo.WordCount, narray[ i ], mean );
661 P1 := P1 * ( 1 - n );
662 Q1 := Q1 * n;
663 end;
664 cnt := wordCount.Count;
665 if cnt = 0 then
666 cnt := 1
667 else
668 P1 := 1 - Power( P1, 1 / cnt );
669 Q1 := 1 - Power( Q1, 1 / cnt );
670
671 if P1 + Q1 = 0 then begin
672 Result := 0.5
673 end else begin
674 n := (P1 - Q1) / (P1 + Q1);
675 Result := (1 + n) / 2;
676 end;
677
678 end;
679
680 //==============================
681 // CalcGaryRobinsonFisher
682 //==============================
683 function TGikoBayesian.CalcGaryRobinsonFisher(
684 wordCount : TWordCount
685 ) : Extended;
686
687 function p( const aWord : string ) : Single;
688 var
689 info : TWordInfo;
690 begin
691 info := Objects[ aWord ];
692 if info = nil then
693 Result := 0.415
694 else if info.ImportantWord = 0 then
695 Result := 0.01
696 else if info.NormalWord = 0 then
697 Result := 0.99
698 else
699 Result := info.ImportantWord /
700 (info.ImportantWord + info.NormalWord *
701 info.ImportantText / info.NormalText);
702 end;
703
704 function f( cnt : Integer; n, mean : Single ) : Extended;
705 const
706 k = 0.00001;
707 begin
708 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
709 end;
710
711 function prbx( x2, degree : Extended ) : Extended;
712 var
713 m : Extended;
714 sum : Extended;
715 term : Extended;
716 i : extended;
717 begin
718
719 m := x2 / 2;
720 sum := exp( -m );
721 term := -m;
722
723 i := 1;
724 while i < (degree / 2 - 1) do begin
725 term := term + ln( m / i );
726 sum := sum + exp( term );
727 i := i + 1;
728 end;
729
730 if sum < 1 then
731 Result := sum
732 else
733 Result := 1.0;
734
735 end;
736
737 var
738 n : Extended;
739 narray : array of Single;
740 mean : Extended;
741 countInfo : TWordCountInfo;
742 i : Integer;
743 normal : Extended;
744 important : Extended;
745 P1, Q1 : Extended;
746 cnt : Extended;
747 begin
748
749 if wordCount.Count = 0 then begin
750 Result := 1;
751 Exit;
752 end;
753
754 SetLength( narray, wordCount.Count );
755 mean := 0;
756 for i := 0 to wordCount.Count - 1 do begin
757 n := p( wordCount[ i ] );
758 narray[ i ] := n;
759 mean := mean + n;
760 end;
761 mean := mean / wordCount.Count;
762
763 cnt := 0;
764 (*
765 P1 := 1;
766 Q1 := 1;
767 (*)
768 P1 := 0;
769 Q1 := 0;
770 //*
771 for i := 0 to wordCount.Count - 1 do begin
772 countInfo := TWordCountInfo( wordCount.Objects[ i ] );
773 n := f( countInfo.WordCount, narray[ i ], mean );
774 if countInfo <> nil then
775 cnt := cnt + countInfo.WordCount;
776 (*
777 P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
778 Q1 := Q1 + Ln( n ) * countInfo.WordCount;
779 (*)
780 P1 := P1 + Ln( 1 - n );
781 Q1 := Q1 + Ln( n );
782 //*)
783 end;
784 if cnt = 0 then
785 cnt := 1;
786 //(*
787 P1 := prbx( -2 * P1, 2 * cnt );
788 Q1 := prbx( -2 * Q1, 2 * cnt );
789 (*)
790 P1 := prbx( -2 * Ln( P1 ), 2 * cnt );
791 Q1 := prbx( -2 * Ln( Q1 ), 2 * cnt );
792 //*)
793 if P1 + Q1 = 0 then begin
794 Result := 0.5
795 end else begin
796 Result := (1 + Q1 + P1) / 2;
797 end;
798
799 end;
800
801 //==============================
802 // Parse
803 //==============================
804 function TGikoBayesian.Parse(
805 const text : string;
806 wordCount : TWordCount;
807 algorithm : TGikoBayesianAlgorithm
808 ) : Extended;
809 begin
810
811 CountWord( text, wordCount );
812 case algorithm of
813 gbaPaulGraham: Result := CalcPaulGraham( wordCount );
814 gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
815 gbaGaryRobinsonFisher:
816 Result := CalcGaryRobinsonFisher( wordCount );
817 else Result := 0;
818 end;
819
820 end;
821
822 //==============================
823 // Learn
824 //==============================
825 procedure TGikoBayesian.Learn(
826 wordCount : TWordCount;
827 isImportant : Boolean );
828 var
829 aWord : string;
830 wordinfo : TWordInfo;
831 countinfo : TWordCountInfo;
832 i : Integer;
833 begin
834
835 for i := 0 to wordCount.Count - 1 do begin
836 aWord := wordCount[ i ];
837 wordinfo := Objects[ aWord ];
838 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
839 if wordinfo = nil then begin
840 wordinfo := TWordInfo.Create;
841 Objects[ aWord ] := wordinfo;
842 end;
843
844 if isImportant then begin
845 wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
846 wordinfo.ImportantText := wordinfo.ImportantText + 1;
847 end else begin
848 wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
849 wordinfo.NormalText := wordinfo.NormalText + 1;
850 end;
851 end;
852
853 end;
854
855 //==============================
856 // Forget
857 //==============================
858 procedure TGikoBayesian.Forget(
859 wordCount : TWordCount;
860 isImportant : Boolean );
861 var
862 aWord : string;
863 wordinfo : TWordInfo;
864 countinfo : TWordCountInfo;
865 i : Integer;
866 begin
867
868 for i := 0 to wordCount.Count - 1 do begin
869 aWord := wordCount[ i ];
870 wordinfo := Objects[ aWord ];
871 if wordinfo = nil then
872 Continue;
873
874 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
875 if isImportant then begin
876 if wordInfo.ImportantText > 0 then begin
877 wordinfo.ImportantText := wordinfo.ImportantText - 1;
878 wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
879 end;
880 end else begin
881 if wordinfo.NormalText > 0 then begin
882 wordinfo.NormalText := wordinfo.NormalText - 1;
883 wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
884 end;
885 end;
886 end;
887
888 end;
889
890 end.

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26