Develop and Download Open Source Software

Browse CVS Repository

Contents of /gikonavigoeson/gikonavi/GikoBayesian.pas

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.3 - (show annotations) (download) (as text)
Thu Oct 21 03:13:19 2004 UTC (19 years, 6 months ago) by yoffy
Branch: MAIN
Changes since 1.2: +53 -83 lines
File MIME type: text/x-pascal
コヌナャイス。」

1 unit GikoBayesian;
2
3 {!
4 \file GikoBayesian.pas
5 \brief 繝吶う繧ク繧「繝ウ繝輔ぅ繝ォ繧ソ
6
7 $Id: GikoBayesian.pas,v 1.2 2004/10/21 01:20:34 yoffy Exp $
8 }
9
10 interface
11
12 //==================================================
13 uses
14 //==================================================
15 Classes, IniFiles;
16
17 //==================================================
18 type
19 //==================================================
20
21 {!***********************************************************
22 \brief 蜊倩ェ槭?繝ュ繝代ユ繧」
23 ************************************************************}
24 TWordInfo = class( TObject )
25 private
26 FNormalWord : Integer; //!< 騾壼クク縺ョ蜊倩ェ槭→縺励※逋サ蝣エ縺励◆蝗樊焚
27 FImportantWord : Integer; //!< 豕ィ逶ョ蜊倩ェ槭→縺励※逋サ蝣エ縺励◆蝗樊焚
28 FNormalText : Integer; //!< 騾壼クク縺ョ蜊倩ェ槭→縺励※蜷ォ縺セ繧後※縺?◆譁?ォ?縺ョ謨ー
29 FImportantText : Integer; //!< 豕ィ逶ョ蜊倩ェ槭→縺励※蜷ォ縺セ繧後※縺?◆譁?ォ?縺ョ謨ー
30
31 public
32 property NormalWord : Integer read FNormalWord write FNormalWord;
33 property ImportantWord : Integer read FImportantWord write FImportantWord;
34 property NormalText : Integer read FNormalText write FNormalText;
35 property ImportantText : Integer read FImportantText write FImportantText;
36 end;
37
38 {!***********************************************************
39 \brief 隗」譫先ク医∩蜊倩ェ槭?繝ュ繝代ユ繧」
40 ************************************************************}
41 TWordCountInfo = class( TObject )
42 private
43 FWordCount : Integer; //!< 蜊倩ェ樊焚
44
45 public
46 property WordCount : Integer read FWordCount write FWordCount;
47 end;
48
49 {!***********************************************************
50 \brief 隗」譫先ク医∩蜊倩ェ槭Μ繧ケ繝?/span>
51 ************************************************************}
52 // TWordCount = class( THashedStringList ) // 豼?驕?/span>
53 TWordCount = class( TStringList )
54 public
55 constructor Create;
56 destructor Destroy; override;
57 end;
58
59 {!***********************************************************
60 \brief 繝輔ぅ繝ォ繧ソ繧「繝ォ繧エ繝ェ繧コ繝?
61 ************************************************************}
62 TGikoBayesianAlgorithm =
63 (gbaPaulGraham, gbaGaryRonbinson{, gbaGaryRonbinsonFisher});
64
65 {!***********************************************************
66 \brief 繝吶う繧ク繧「繝ウ繝輔ぅ繝ォ繧ソ
67 ************************************************************}
68 // TGikoBayesian = class( THashedStringList ) // 豼?驕?/span>
69 TGikoBayesian = class( TStringList )
70 private
71 FFilePath : string; //!< 隱ュ縺ソ霎シ繧薙□繝輔ぃ繧、繝ォ繝代せ
72 function GetObject( const name : string ) : TWordInfo;
73 procedure SetObject( const name : string; value : TWordInfo );
74
75 public
76 constructor Create;
77 destructor Destroy; override;
78
79 //! 繝輔ぃ繧、繝ォ縺九i蟄ヲ鄙貞ア・豁エ繧定ェュ縺ソ蜃コ縺励∪縺?/span>
80 procedure LoadFromFile( const filePath : string );
81
82 //! 繝輔ぃ繧、繝ォ縺ォ蟄ヲ鄙貞ア・豁エ繧剃ソ晏ュ倥@縺セ縺?/span>
83 procedure SaveToFile( const filePath : string );
84
85 //! 繝輔ぃ繧、繝ォ縺ォ蟄ヲ鄙貞ア・豁エ繧剃ソ晏ュ倥@縺セ縺?/span>
86 procedure Save;
87
88 //! 蜊倩ェ槭↓蟇セ縺吶k諠??ア繧貞叙蠕励@縺セ縺?/span>
89 property Objects[ const name : string ] : TWordInfo
90 read GetObject write SetObject; default;
91
92 //! 譁?ォ?縺ォ蜷ォ縺セ繧後k蜊倩ェ槭r繧ォ繧ヲ繝ウ繝医@縺セ縺?/span>
93 procedure CountWord(
94 const text : string;
95 wordCount : TWordCount );
96
97 {!
98 \brief Paul Graham 豕輔↓蝓コ縺・縺?※譁?ォ?縺ョ豕ィ逶ョ蠎ヲ繧呈アコ螳壹@縺セ縺?/span>
99 \return 譁?ォ?縺ョ豕ィ逶ョ蠎ヲ (豕ィ逶ョ縺ォ蛟、縺励↑縺 0.0縲?.0 豕ィ逶ョ縺吶∋縺?
100 }
101 function CalcPaulGraham( wordCount : TWordCount ) : Extended;
102
103 {!
104 \brief GaryRobinson 豕輔↓蝓コ縺・縺?※譁?ォ?縺ョ豕ィ逶ョ蠎ヲ繧呈アコ螳壹@縺セ縺?/span>
105 \return 譁?ォ?縺ョ豕ィ逶ョ蠎ヲ (豕ィ逶ョ縺ォ蛟、縺励↑縺 0.0縲?.0 豕ィ逶ョ縺吶∋縺?
106 }
107 function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
108
109 // function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
110
111 {!
112 \brief 譁?ォ?繧定ァ」譫?/span>
113 \param text 隗」譫舌☆繧区枚遶?
114 \param wordCount 隗」譫舌&繧後◆蜊倩ェ槭Μ繧ケ繝医′霑斐k
115 \param algorithm 豕ィ逶ョ蠎ヲ縺ョ豎コ螳壹↓逕ィ縺?k繧「繝ォ繧エ繝ェ繧コ繝?繧呈欠螳壹@縺セ縺?/span>
116 \return 譁?ォ?縺ョ豕ィ逶ョ蠎ヲ (豕ィ逶ョ縺ォ蛟、縺励↑縺 0.0縲?.0 豕ィ逶ョ縺吶∋縺?
117
118 CountWord 縺ィ Calcxxxxx 繧偵∪縺ィ繧√※螳溯。後☆繧九□縺代〒縺吶??/span>
119 }
120 function Parse(
121 const text : string;
122 wordCount : TWordCount;
123 algorithm : TGikoBayesianAlgorithm = gbaGaryRonbinson
124 ) : Extended;
125
126 {!
127 \brief 蟄ヲ鄙偵☆繧?/span>
128 \param wordCount Parse 縺ァ隗」譫舌&繧後◆蜊倩ェ槭Μ繧ケ繝?/span>
129 \param isImportant 豕ィ逶ョ縺吶∋縺肴枚遶?縺ィ縺励※隕壹∴繧九↑繧 True
130 }
131 procedure Learn(
132 wordCount : TWordCount;
133 isImportant : Boolean );
134
135 {!
136 \brief 蟄ヲ鄙堤オ先棡繧貞ソ倥l繧?/span>
137 \param wordCount Parse 縺ァ隗」譫舌&繧後◆蜊倩ェ槭Μ繧ケ繝?/span>
138 \param isImportant 豕ィ逶ョ縺吶∋縺肴枚遶?縺ィ縺励※隕壹∴繧峨l縺ヲ縺?◆縺ェ繧 True
139 \warning 蟄ヲ鄙呈ク医∩縺ョ譁?ォ?縺九←縺?°縺ッ遒コ隱榊?譚・縺セ縺帙s縲?lt;br>
140 Learn 縺励※縺?↑縺?枚遶?繧 isImportant 縺碁俣驕輔▲縺ヲ縺?k譁?ォ?繧?/span>
141 Forget 縺吶k縺ィ繝??繧ソ繝吶?繧ケ縺檎?エ謳阪@縺セ縺吶??lt;br>
142 蟄ヲ鄙呈ク医∩縺九←縺?°縺ッ迢ャ閾ェ縺ォ邂。逅?@縺ヲ縺上□縺輔>縲?/span>
143
144 蜈ィ縺ヲ縺ョ蟄ヲ鄙堤オ先棡繧偵け繝ェ繧「縺吶k繧上¢縺ァ縺ッ縺ゅj縺セ縺帙s縲?lt;br>
145 wordCount 繧貞セ励◆譁?ォ? (Parse 縺ョ text 蠑墓焚) 縺ョ蟄ヲ鄙堤オ先棡縺ョ縺ソ繧ッ繝ェ繧「縺励∪縺吶??lt;br><br>
146
147 荳サ縺ォ豕ィ逶ョ譁?ォ?縺ィ髱樊ウィ逶ョ譁?ォ?繧貞?繧頑崛縺医k縺溘a縺ォ Forget -> Learn 縺ョ鬆?〒菴ソ逕ィ縺励∪縺吶??/span>
148 }
149 procedure Forget(
150 wordCount : TWordCount;
151 isImportant : Boolean );
152 end;
153
154 //==================================================
155 implementation
156 //==================================================
157
158 uses
159 SysUtils, Math;
160
161 const
162 GIKO_BAYESIAN_FILE_VERSION = '1.0';
163 {
164 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
165 ModeWGraph, ModeWAlpha, ModeWNum,
166 ModeWHira, ModeWKata, ModeWKanji);
167 }
168 CharMode1 : array [ 0..255 ] of Byte =
169 (
170 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
171 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
172 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
173 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
174 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
175 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
176 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
177 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
178
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
182 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
183 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
184 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 1,
185 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
186 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
187 );
188
189 //************************************************************
190 // misc
191 //************************************************************
192
193 //==============================
194 // RemoveToken
195 //==============================
196 function RemoveToken(var s: string;const delimiter: string): string;
197 var
198 p: Integer;
199 begin
200 p := AnsiPos(delimiter, s);
201 if p = 0 then
202 Result := s
203 else
204 Result := Copy(s, 1, p - 1);
205 s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
206 end;
207
208 //==============================
209 // AbsSort
210 //==============================
211 function AbsSort( p1, p2 : Pointer ) : Integer;
212 var
213 v1, v2 : Single;
214 begin
215
216 v1 := Abs( Single( p1 ) - 0.5 );
217 v2 := Abs( Single( p2 ) - 0.5 );
218 if v1 > v2 then
219 Result := -1
220 else if v1 = v2 then
221 Result := 0
222 else
223 Result := 1;
224
225 end;
226
227 //************************************************************
228 // TWordCount class
229 //************************************************************
230 constructor TWordCount.Create;
231 begin
232
233 Duplicates := dupIgnore;
234 CaseSensitive := True;
235 Sorted := True;
236
237 end;
238
239 destructor TWordCount.Destroy;
240 var
241 i : Integer;
242 begin
243
244 for i := Count - 1 downto 0 do
245 if Objects[ i ] <> nil then
246 Objects[ i ].Free;
247
248 inherited;
249
250 end;
251
252 //************************************************************
253 // TGikoBayesian class
254 //************************************************************
255
256 //==============================
257 // Create
258 //==============================
259 constructor TGikoBayesian.Create;
260 begin
261
262 Duplicates := dupIgnore;
263 CaseSensitive := True;
264 Sorted := True;
265
266 end;
267
268 //==============================
269 // Destroy
270 //==============================
271 destructor TGikoBayesian.Destroy;
272 var
273 i : Integer;
274 begin
275
276 for i := Count - 1 downto 0 do
277 if inherited Objects[ i ] <> nil then
278 inherited Objects[ i ].Free;
279
280 inherited;
281
282 end;
283
284 procedure TGikoBayesian.LoadFromFile( const filePath : string );
285 var
286 i : Integer;
287 sl : TStringList;
288 s : string;
289 name : string;
290 info : TWordInfo;
291 begin
292
293 FFilePath := filePath;
294
295 if not FileExists( filePath ) then
296 Exit;
297
298 sl := TStringList.Create;
299 try
300 sl.LoadFromFile( filePath );
301
302 for i := 1 to sl.Count - 1 do begin
303 s := sl[ i ];
304 name := RemoveToken( s, #1 );
305 info := TWordInfo.Create;
306 info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
307 info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
308 info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
309 info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
310
311 AddObject( name, info );
312 end;
313 finally
314 sl.Free;
315 end;
316
317 end;
318
319 procedure TGikoBayesian.SaveToFile( const filePath : string );
320 var
321 i : Integer;
322 sl : TStringList;
323 s : string;
324 info : TWordInfo;
325 begin
326
327 FFilePath := filePath;
328
329 sl := TStringList.Create;
330 try
331 sl.BeginUpdate;
332 sl.Add( GIKO_BAYESIAN_FILE_VERSION );
333
334 for i := 0 to Count - 1 do begin
335 info := TWordInfo( inherited Objects[ i ] );
336 s := Strings[ i ] + #1
337 + Format('%x', [info.NormalWord]) + #1
338 + Format('%x', [info.ImportantWord]) + #1
339 + Format('%x', [info.NormalText]) + #1
340 + Format('%x', [info.ImportantText]);
341
342 sl.Add(s);
343 end;
344 sl.EndUpdate;
345 sl.SaveToFile( filePath );
346 finally
347 sl.Free;
348 end;
349
350 end;
351
352 procedure TGikoBayesian.Save;
353 begin
354
355 if FFilePath <> '' then
356 SaveToFile( FFilePath );
357
358 end;
359
360 //==============================
361 // GetObject
362 //==============================
363 function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
364 var
365 idx : Integer;
366 begin
367
368 idx := IndexOf( name ); // 豼?驕?/span>
369 if idx < 0 then
370 Result := nil
371 else
372 Result := TWordInfo( inherited Objects[ idx ] );
373
374 end;
375
376 //==============================
377 // SetObject
378 //==============================
379 procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
380 var
381 idx : Integer;
382 begin
383
384 idx := IndexOf( name );
385 if idx < 0 then
386 AddObject( name, value )
387 else
388 inherited Objects[ idx ] := value;
389
390 end;
391
392
393 //==============================
394 // CountWord
395 //==============================
396 procedure TGikoBayesian.CountWord(
397 const text : string;
398 wordCount : TWordCount );
399 type
400 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
401 ModeWGraph, ModeWAlpha, ModeWNum,
402 ModeWHira, ModeWKata, ModeWKanji);
403 var
404 p, tail, last : PChar;
405 mode, newMode : Modes;
406 aWord : string;
407 ch : Longword;
408 chSize : Integer;
409 delimiter : TStringList;
410 delimited : Boolean;
411 i, idx : Integer;
412 countInfo : TWordCountInfo;
413 const
414 KAKUJOSI = '繧?#39; + #10 + '縺ォ' + #10 + '縺?#39; + #10 + '縺ィ' + #10 + '縺九i' +
415 #10 + '縺ァ' + #10 + '縺ク' + #10 + '繧医j' + #10 + '縺セ縺ァ';
416 begin
417
418 delimiter := TStringList.Create;
419 try
420 mode := ModeWhite;
421 delimiter.Text := KAKUJOSI;
422 p := PChar( text );
423 tail := p + Length( text );
424 last := p;
425
426 while p < tail do begin
427 delimited := False;
428 // 譁?ュ励?繧ソ繧、繝励r蛻、蛻・
429 // 窶サ蜿・隱ュ轤ケ縺ッ ModeGraph 縺ォ縺ェ繧九?縺ァ蛟句挨縺ォ蟇セ蠢懊@縺ェ縺上※繧ゅ>縺?/span>
430 if Byte(Byte( p^ ) - $a1) < $5e then begin
431 if p + 1 < tail then begin
432 ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
433 case ch of
434 $8140: newMode := ModeWhite;
435 $8141..$824e: newMode := ModeWGraph;
436 $824f..$8258: newMode := ModeWNum;
437 $8260..$829a: newMode := ModeWAlpha;
438 $829f..$82f1: newMode := ModeWHira;
439 $8340..$8396: newMode := ModeWKata;
440 else newMode := ModeWKanji;
441 end;
442 end else begin
443 newMode := ModeWhite;
444 end;
445
446 chSize := 2;
447
448 // 蛹コ蛻?j縺ォ縺ェ繧区枚蟄励′縺ゅk縺区、懈渊縺吶k
449 if p + 3 < tail then begin // 3 = delimiter 縺ョ譛?螟ァ蟄玲焚 - 1
450 for i := 0 to delimiter.Count - 1 do begin
451 if CompareMem(
452 p, PChar( delimiter[ i ] ), Length( delimiter[ i ] ) ) then begin
453 delimited := True;
454 chSize := Length( delimiter[ i ] );
455 Break;
456 end;
457 end;
458 end;
459 end else begin
460 // 竊鯛?螟峨o繧峨★
461 newMode := Modes( CharMode1[ Byte( p^ ) ] );
462
463 chSize := 1;
464 end;
465
466 if (mode <> newMode) or delimited then begin
467
468 // 譁?ュ励?繧ソ繧、繝励′螟画峩縺輔l縺?/span>
469 // 繧ゅ@縺上?蛹コ蛻?j縺ォ縺ェ繧区枚蟄励↓驕ュ驕?@縺?/span>
470 if mode <> ModeWhite then begin
471 SetLength( aWord, p - last );
472 CopyMemory( PChar( aWord ), last, p - last );
473 idx := wordCount.IndexOf( aWord ); // 驕?/span>
474 if idx < 0 then begin
475 countInfo := TWordCountInfo.Create;
476 wordCount.AddObject( aWord, countInfo );
477 end else begin
478 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
479 end;
480 countInfo.WordCount := countInfo.WordCount + 1;
481 end;
482
483 last := p;
484 mode := newMode;
485
486 end;
487
488 p := p + chSize;
489 end; // while
490
491 if mode <> ModeWhite then begin
492 aWord := Copy( last, 0, p - last );
493 idx := wordCount.IndexOf( aWord );
494 if idx < 0 then begin
495 countInfo := TWordCountInfo.Create;
496 wordCount.AddObject( aWord, countInfo );
497 end else begin
498 countInfo := TWordCountInfo( wordCount.Objects[ idx ] );
499 end;
500 countInfo.WordCount := countInfo.WordCount + 1;
501 end;
502 finally
503 delimiter.Free;
504 end;
505
506 end;
507
508 //==============================
509 // CalcPaulGraham
510 //==============================
511 function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
512
513 function p( const aWord : string ) : Single;
514 var
515 info : TWordInfo;
516 begin
517 info := Objects[ aWord ];
518 if info = nil then
519 Result := 0.4
520 else if info.NormalWord = 0 then
521 Result := 0.99
522 else if info.ImportantWord = 0 then
523 Result := 0.01
524 else
525 Result := ( info.ImportantWord / info.ImportantText ) /
526 ((info.NormalWord * 2 / info.NormalText ) +
527 (info.ImportantWord / info.ImportantText));
528 end;
529
530 var
531 s, q : Extended;
532 i : Integer;
533 narray : TList;
534 const
535 SAMPLE_COUNT = 15;
536 begin
537
538 Result := 1;
539 if wordCount.Count = 0 then
540 Exit;
541
542 narray := TList.Create;
543 try
544 for i := 0 to wordCount.Count - 1 do begin
545 narray.Add( Pointer( p( wordCount[ i ] ) ) );
546 end;
547
548 narray.Sort( AbsSort );
549
550 s := 1;
551 q := 1;
552 i := min( SAMPLE_COUNT, narray.Count );
553 while i > 0 do begin
554 Dec( i );
555 s := s * Single( narray[ i ] );
556 q := q * (1 - Single( narray[ i ] ));
557 end;
558
559 Result := s / (s + q);
560 finally
561 narray.Free;
562 end;
563
564 end;
565
566 //==============================
567 // CalcGaryRobinson
568 //==============================
569 function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
570
571 function p( const aWord : string ) : Single;
572 var
573 info : TWordInfo;
574 begin
575 info := Objects[ aWord ];
576 if info = nil then
577 Result := 0.415
578 else if info.ImportantWord = 0 then
579 Result := 0.0001
580 else if info.NormalWord = 0 then
581 Result := 0.9999
582 else
583 Result := ( info.ImportantWord / info.ImportantText ) /
584 ((info.NormalWord / info.NormalText ) +
585 (info.ImportantWord / info.ImportantText));
586 end;
587
588 function f( cnt : Integer; n, mean : Single ) : Extended;
589 const
590 k = 0.00001;
591 begin
592 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
593 end;
594
595 var
596 n : Extended;
597 narray : array of Single;
598 mean : Extended;
599 countInfo : TWordCountInfo;
600 i : Integer;
601 normal : Extended;
602 important : Extended;
603 cnt : Extended;
604 begin
605
606 if wordCount.Count = 0 then begin
607 Result := 1;
608 Exit;
609 end;
610
611 SetLength( narray, wordCount.Count );
612 mean := 0;
613 for i := 0 to wordCount.Count - 1 do begin
614 n := p( wordCount[ i ] );
615 narray[ i ] := n;
616 mean := mean + n;
617 end;
618 mean := mean / wordCount.Count;
619
620 cnt := 0;
621 normal := 1;
622 important := 1;
623 for i := 0 to wordCount.Count - 1 do begin
624 countInfo := TWordCountInfo( wordCount.Objects[ i ] );
625 n := f( countInfo.WordCount, narray[ i ], mean );
626 normal := normal * n;
627 important := important * (1 - n);
628 if countInfo <> nil then
629 cnt := cnt + countInfo.WordCount;
630 end;
631 if cnt = 0 then
632 cnt := 1;
633 normal := 1 - Exp( Ln( normal ) * (1 / cnt) );
634 important := 1 - Exp( Ln( important ) * (1 / cnt) );
635
636 n := (important - normal+ 0.00001) / (important + normal + 0.00001);
637 Result := (1 + n) / 2;
638
639 end;
640
641 //==============================
642 // Parse
643 //==============================
644 function TGikoBayesian.Parse(
645 const text : string;
646 wordCount : TWordCount;
647 algorithm : TGikoBayesianAlgorithm = gbaGaryRonbinson
648 ) : Extended;
649 begin
650
651 CountWord( text, wordCount );
652 case algorithm of
653 gbaPaulGraham: Result := CalcPaulGraham( wordCount );
654 gbaGaryRonbinson: Result := CalcGaryRobinson( wordCount );
655 else Result := 0;
656 end;
657
658 end;
659
660 //==============================
661 // Learn
662 //==============================
663 procedure TGikoBayesian.Learn(
664 wordCount : TWordCount;
665 isImportant : Boolean );
666 var
667 aWord : string;
668 wordinfo : TWordInfo;
669 countinfo : TWordCountInfo;
670 i : Integer;
671 begin
672
673 for i := 0 to wordCount.Count - 1 do begin
674 aWord := wordCount[ i ];
675 wordinfo := Objects[ aWord ];
676 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
677 if wordinfo = nil then begin
678 wordinfo := TWordInfo.Create;
679 Objects[ aWord ] := wordinfo;
680 end;
681
682 if isImportant then begin
683 wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
684 wordinfo.ImportantText := wordinfo.ImportantText + 1;
685 end else begin
686 wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
687 wordinfo.NormalText := wordinfo.NormalText + 1;
688 end;
689 end;
690
691 end;
692
693 //==============================
694 // Forget
695 //==============================
696 procedure TGikoBayesian.Forget(
697 wordCount : TWordCount;
698 isImportant : Boolean );
699 var
700 aWord : string;
701 wordinfo : TWordInfo;
702 countinfo : TWordCountInfo;
703 i : Integer;
704 begin
705
706 for i := 0 to wordCount.Count - 1 do begin
707 aWord := wordCount[ i ];
708 wordinfo := Objects[ aWord ];
709 if wordinfo = nil then
710 Continue;
711
712 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
713 if isImportant then begin
714 wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
715 wordinfo.ImportantText := wordinfo.ImportantText - 1;
716 end else begin
717 wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
718 wordinfo.NormalText := wordinfo.NormalText - 1;
719 end;
720 end;
721
722 end;
723
724 end.

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26