Develop and Download Open Source Software

Browse CVS Repository

Contents of /gikonavigoeson/gikonavi/GikoBayesian.pas

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.17 - (show annotations) (download) (as text)
Fri Nov 5 14:24:26 2004 UTC (19 years, 5 months ago) by h677
Branch: MAIN
CVS Tags: v1_50_0_581, v1_50_0_580, v1_50_0_587, v1_50_0_586, v1_50_0_595, v1_50_0_572, v1_50_0_590, bv1_49_0_564, v1_50_0_593, v1_50_0_596, v1_50_0_585, v1_50_0_582, merged-remodeling, v1_50_0_577, v1_50_0_588, bv1_49_0_565, v1_50_0_574, v1_50_0_584, v_step1, v1_50_0_573, v1_50_0_571, v1_50_0_576, v1_50_0_578, v1_50_0_579, root-of-Bb50, root-of-remodel, v1_50_0_594
Branch point for: remodeling, Bb50
Changes since 1.16: +10 -5 lines
File MIME type: text/x-pascal
0除算例外が出ても平気にした。

1 unit GikoBayesian;
2
3 {!
4 \file GikoBayesian.pas
5 \brief ???ゃ?吾?≪?潟???c????/span>
6
7 $Id: GikoBayesian.pas,v 1.16 2004/11/01 10:32:02 yoffy Exp $
8 }
9
10 //! 綛割皿????莨??吾??????????
11 {$DEFINE GIKO_BAYESIAN_NO_HIRAGANA_DIC}
12
13 interface
14
15 //==================================================
16 uses
17 //==================================================
18 Classes;
19
20 //==================================================
21 type
22 //==================================================
23
24 {!***********************************************************
25 \brief ??茯???????????/span>
26 ************************************************************}
27 TWordInfo = class( TObject )
28 private
29 FNormalWord : Integer; //!< ??絽吾????茯????????糸?眼????????/span>
30 FImportantWord : Integer; //!< 羈?????茯????????糸?眼????????/span>
31 FNormalText : Integer; //!< ??絽吾????茯??????????障??????????腴?????/span>
32 FImportantText : Integer; //!< 羈?????茯??????????障??????????腴?????/span>
33
34 public
35 property NormalWord : Integer read FNormalWord write FNormalWord;
36 property ImportantWord : Integer read FImportantWord write FImportantWord;
37 property NormalText : Integer read FNormalText write FNormalText;
38 property ImportantText : Integer read FImportantText write FImportantText;
39 end;
40
41 {!***********************************************************
42 \brief 茹f??羝??水??茯???????????/span>
43 ************************************************************}
44 TWordCountInfo = class( TObject )
45 private
46 FWordCount : Integer; //!< ??茯???/span>
47
48 public
49 property WordCount : Integer read FWordCount write FWordCount;
50 end;
51
52 {!***********************************************************
53 \brief 茹f??羝??水??茯????鴻??
54 ************************************************************}
55 // TWordCount = class( THashedStringList ) // 羶???
56 TWordCount = class( TStringList )
57 public
58 constructor Create;
59 destructor Destroy; override;
60 end;
61
62 {!***********************************************************
63 \brief ???c???帥?≪???眼???冴??
64 ************************************************************}
65 TGikoBayesianAlgorithm =
66 (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
67
68 {!***********************************************************
69 \brief ???ゃ?吾?≪?潟???c????/span>
70 ************************************************************}
71 // TGikoBayesian = class( THashedStringList ) // 羶???
72 TGikoBayesian = class( TStringList )
73 private
74 FFilePath : string; //!< 茯??粋昭???????<?ゃ??????/span>
75 function GetObject( const name : string ) : TWordInfo;
76 procedure SetObject( const name : string; value : TWordInfo );
77
78 public
79 constructor Create;
80 destructor Destroy; override;
81
82 //! ???<?ゃ??????絖??絮ユ???茯??水?冴???障??
83 procedure LoadFromFile( const filePath : string );
84
85 //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
86 procedure SaveToFile( const filePath : string );
87
88 //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
89 procedure Save;
90
91 //! ??茯???????????宴????緇????障??
92 property Objects[ const name : string ] : TWordInfo
93 read GetObject write SetObject; default;
94
95 //! ??腴??????障??????茯????????潟?????障??
96 procedure CountWord(
97 const text : string;
98 wordCount : TWordCount );
99
100 {!
101 \brief Paul Graham 羈????冴?ャ??????腴???絵??墾??羆阪????障??
102 \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
103 }
104 function CalcPaulGraham( wordCount : TWordCount ) : Extended;
105
106 {!
107 \brief GaryRobinson 羈????冴?ャ??????腴???絵??墾??羆阪????障??
108 \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
109 }
110 function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
111
112 {!
113 \brief GaryRobinson-Fisher 羈????冴?ャ??????腴???絵??墾??羆阪????障??
114 \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
115 }
116 function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
117
118 {!
119 \brief ??腴???茹f??
120 \param text 茹f????????腴?
121 \param wordCount 茹f??????????茯????鴻????菴???
122 \param algorithm 羈???墾??浦絎??????????≪???眼???冴??????絎????障??
123 \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
124
125 CountWord ? Calcxxxxx ???障???????茵??????????с????
126 }
127 function Parse(
128 const text : string;
129 wordCount : TWordCount;
130 algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
131 ) : Extended;
132
133 {!
134 \brief 絖??????
135 \param wordCount Parse ?цВ??????????茯????鴻??
136 \param isImportant 羈??????鴻????腴???????????????? True
137 }
138 procedure Learn(
139 wordCount : TWordCount;
140 isImportant : Boolean );
141
142 {!
143 \brief 絖??腟?????綽?????
144 \param wordCount Parse ?цВ??????????茯????鴻??
145 \param isImportant 羈??????鴻????腴???????????????????????? True
146 \warning 絖??羝??帥????腴???????????∈茯??堺?ャ?障??????<br>
147 Learn ????????????腴??? isImportant ???????c????????腴???
148 Forget ?????????若?帥???若?鴻???贋?????障????<br>
149 絖??羝??帥????????????????????????????????
150
151 ???????膺?腟??????????≪????????с???????障??????<br>
152 wordCount ??緇?????腴? (Parse ? text 綣??? ???膺?腟??????帥?????≪???障????<br><br>
153
154 筝祉??絵????腴?????羈?????腴????????帥????????? Forget -> Learn ?????т戎?????障????
155 }
156 procedure Forget(
157 wordCount : TWordCount;
158 isImportant : Boolean );
159 end;
160
161 //==================================================
162 implementation
163 //==================================================
164
165 uses
166 SysUtils, Math, Windows,
167 MojuUtils;
168
169 const
170 GIKO_BAYESIAN_FILE_VERSION = '1.0';
171 {
172 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
173 ModeWGraph, ModeWAlpha, ModeWNum,
174 ModeWHira, ModeWKata, ModeWKanji);
175 }
176 CharMode1 : array [ 0..255 ] of Byte =
177 (
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
182 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
183 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
184 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
185 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
186
187 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
190 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
191 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
192 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
193 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
194 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
195 );
196
197 //************************************************************
198 // misc
199 //************************************************************
200
201 //==============================
202 // RemoveToken
203 //==============================
204 function RemoveToken(var s: string;const delimiter: string): string;
205 var
206 p: Integer;
207 begin
208 p := AnsiPos(delimiter, s);
209 if p = 0 then
210 Result := s
211 else
212 Result := Copy(s, 1, p - 1);
213 s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
214 end;
215
216 //==============================
217 // AbsSort
218 //==============================
219 function AbsSort( p1, p2 : Pointer ) : Integer;
220 var
221 v1, v2 : Single;
222 begin
223
224 v1 := Abs( Single( p1 ) - 0.5 );
225 v2 := Abs( Single( p2 ) - 0.5 );
226 if v1 > v2 then
227 Result := -1
228 else if v1 = v2 then
229 Result := 0
230 else
231 Result := 1;
232
233 end;
234
235 //************************************************************
236 // TWordCount class
237 //************************************************************
238 constructor TWordCount.Create;
239 begin
240
241 Duplicates := dupIgnore;
242 CaseSensitive := True;
243 Sorted := True;
244
245 end;
246
247 destructor TWordCount.Destroy;
248 var
249 i : Integer;
250 begin
251
252 for i := Count - 1 downto 0 do
253 if Objects[ i ] <> nil then
254 Objects[ i ].Free;
255
256 inherited;
257
258 end;
259
260 //************************************************************
261 // TGikoBayesian class
262 //************************************************************
263
264 //==============================
265 // Create
266 //==============================
267 constructor TGikoBayesian.Create;
268 begin
269
270 Duplicates := dupIgnore;
271 CaseSensitive := True;
272 Sorted := True;
273
274 end;
275
276 //==============================
277 // Destroy
278 //==============================
279 destructor TGikoBayesian.Destroy;
280 var
281 i : Integer;
282 begin
283
284 for i := Count - 1 downto 0 do
285 if inherited Objects[ i ] <> nil then
286 inherited Objects[ i ].Free;
287
288 inherited;
289
290 end;
291
292 procedure TGikoBayesian.LoadFromFile( const filePath : string );
293 var
294 i : Integer;
295 sl : TStringList;
296 s : string;
297 name : string;
298 info : TWordInfo;
299 begin
300
301 FFilePath := filePath;
302
303 if not FileExists( filePath ) then
304 Exit;
305
306 sl := TStringList.Create;
307 try
308 sl.LoadFromFile( filePath );
309
310 for i := 1 to sl.Count - 1 do begin
311 s := sl[ i ];
312 name := RemoveToken( s, #1 );
313 info := TWordInfo.Create;
314 info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
315 info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
316 info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
317 info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
318
319 AddObject( name, info );
320 end;
321 finally
322 sl.Free;
323 end;
324
325 end;
326
327 procedure TGikoBayesian.SaveToFile( const filePath : string );
328 var
329 i : Integer;
330 sl : TStringList;
331 s : string;
332 info : TWordInfo;
333 begin
334
335 FFilePath := filePath;
336
337 sl := TStringList.Create;
338 try
339 sl.BeginUpdate;
340 sl.Add( GIKO_BAYESIAN_FILE_VERSION );
341
342 for i := 0 to Count - 1 do begin
343 info := TWordInfo( inherited Objects[ i ] );
344 s := Strings[ i ] + #1
345 + Format('%x', [info.NormalWord]) + #1
346 + Format('%x', [info.ImportantWord]) + #1
347 + Format('%x', [info.NormalText]) + #1
348 + Format('%x', [info.ImportantText]);
349
350 sl.Add(s);
351 end;
352 sl.EndUpdate;
353 sl.SaveToFile( filePath );
354 finally
355 sl.Free;
356 end;
357
358 end;
359
360 procedure TGikoBayesian.Save;
361 begin
362
363 if FFilePath <> '' then
364 SaveToFile( FFilePath );
365
366 end;
367
368 //==============================
369 // GetObject
370 //==============================
371 function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
372 var
373 idx : Integer;
374 begin
375
376 if Find( name, idx ) then
377 Result := TWordInfo( inherited Objects[ idx ] )
378 else
379 Result := nil;
380
381 end;
382
383 //==============================
384 // SetObject
385 //==============================
386 procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
387 var
388 idx : Integer;
389 begin
390
391 if Find( name, idx ) then
392 inherited Objects[ idx ] := value
393 else
394 AddObject( name, value );
395
396 end;
397
398
399 //==============================
400 // CountWord
401 //==============================
402 procedure TGikoBayesian.CountWord(
403 const text : string;
404 wordCount : TWordCount );
405 type
406 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeNum, ModeHanKana,
407 ModeWGraph, ModeWAlpha, ModeWNum,
408 ModeWHira, ModeWKata, ModeWKanji);
409 var
410 p, tail, last : PChar;
411 mode, newMode : Modes;
412 ch : Longword;
413 chSize : Integer;
414 wHiraDelimiter : TStringList;
415 wHiraFinalDelimiter : TStringList;
416 wKanjiDelimiter : TStringList;
417 words : TStringList;
418 aWord : string;
419 countInfo : TWordCountInfo;
420
421 function cutBoth( _aWord : string; _delim : TStringList ) : string;
422 var
423 _i : Integer;
424 begin
425 for _i := 0 to _delim.Count - 1 do begin
426 _aWord := CustomStringReplace(
427 _aWord,
428 _delim[ _i ],
429 #10 + _delim[ _i ] + #10, False );
430 end;
431 Result := _aWord;
432 end;
433
434 function cutFirst( _aWord : string; _delim : TStringList ) : string;
435 var
436 _i : Integer;
437 begin
438 for _i := 0 to _delim.Count - 1 do begin
439 _aWord := CustomStringReplace(
440 _aWord,
441 _delim[ _i ],
442 #10 + _delim[ _i ], False );
443 end;
444 Result := _aWord;
445 end;
446
447 function cutFinal( _aWord : string; _delim : TStringList ) : string;
448 var
449 _i : Integer;
450 begin
451 for _i := 0 to _delim.Count - 1 do begin
452 _aWord := CustomStringReplace(
453 _aWord,
454 _delim[ _i ],
455 _delim[ _i ] + #10, False );
456 end;
457 Result := _aWord;
458 end;
459
460 procedure addWord( _dst : TWordCount; _words : TStringList );
461 var
462 _aWord : string;
463 _i, _idx : Integer;
464 _countInfo : TWordCountInfo;
465 begin
466 for _i := 0 to _words.Count - 1 do begin
467 _aWord := _words[ _i ];
468 if Length( _aWord ) > 0 then begin
469 if _dst.Find( _aWord, _idx ) then begin
470 _countInfo := TWordCountInfo( _dst.Objects[ _idx ] );
471 end else begin
472 _countInfo := TWordCountInfo.Create;
473 _dst.AddObject( _aWord, _countInfo );
474 end;
475 _countInfo.WordCount := _countInfo.WordCount + 1;
476 end;
477 end;
478 end;
479
480 function changeMode( _aWord : string; _mode : Modes ) : string;
481 var
482 _i : Integer;
483 _aWord2 : string;
484 _pWord, _pWord2 : PChar;
485 _pWordTail, _pFound : PChar;
486 const
487 _delim : string = #10;
488 begin
489 {$IFDEF GIKO_BAYESIAN_NO_HIRAGANA_DIC}
490 if mode = ModeWHira then begin
491 Result := '';
492 Exit;
493 end;
494 {$ENDIF}
495 if Ord( _mode ) >= Ord( ModeWGraph ) then begin
496 // ?ユ???
497 // ?鴻???若?鴻??荅違????
498 _aWord := CustomStringReplace( _aWord, ' ', '', False );
499 _aWord := CustomStringReplace( _aWord, '??', '', False );
500
501 // ???????帥?у??茯?????
502 case mode of
503 ModeWHira:
504 begin
505 _aWord := cutFinal( _aWord, wHiraFinalDelimiter );
506 Result := cutBoth( _aWord, wHiraDelimiter );
507 end;
508
509 ModeWKanji:
510 begin
511 // ???????帥?у??茯?????
512 _aWord := cutBoth( _aWord, wKanjiDelimiter );
513 // 4 byte (2 絖?) ???ゃ?у??茯?????
514 _pWord := PChar( _aWord );
515 _i := Length( _aWord );
516 _pWordTail := _pWord + _i;
517 SetLength( _aWord2, _i + (_i shr 2) );
518 _pWord2 := PChar( _aWord2 );
519
520 while _pWord < _pWordTail do begin
521 _pFound := AnsiStrPos( _pWord, PChar( _delim ) );
522 if _pFound = nil then
523 _pFound := _pWordTail;
524 _pFound := _pFound - 3;
525
526 while _pWord <= _pFound do begin
527 CopyMemory( _pWord2, _pWord, 4 ); _pWord2[ 4 ] := #10;
528 _pWord2 := _pWord2 + 5; _pWord := _pWord + 4;
529 end;
530 _i := _pFound + 4 - _pWord; // 4 = 3 + #10
531 CopyMemory( _pWord2, _pWord, _i );
532 _pWord2 := _pWord2 + _i; _pWord := _pWord + _i;
533 end;
534 if _pWord < _pWordTail then begin
535 _i := _pWordTail - _pWord;
536 CopyMemory( _pWord2, _pWord, _i );
537 _pWord2 := _pWord2 + _i;
538 end;
539 SetLength( _aWord2, _pWord2 - PChar( _aWord2 ) );
540
541 Result := _aWord2;
542 end;
543
544 else
545 Result := _aWord;
546 end;
547 end else begin
548 Result := _aWord;
549 end;
550 end;
551 const
552 WHIRA_DELIMITER = '??' + #10 + '??#39; + #10 + '??' + #10 + '??#39; + #10 + '????'
553 + #10 + '??#39; + #10 + '????' + #10 + '?障??#39;+ #10 + '??#39;
554 + #10 + '????' + #10 + '????' + #10 + '????'
555 + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
556 + #10 + '????#39; + #10 + '????#39; + #10 + '????#39; + #10 + '????#39;
557 + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
558 + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39;
559 + #10 + '????' + #10 + '????#39; + #10 + '????' + #10 + '????'
560 + #10 + '????' + #10 + '??????'
561 + #10 + '?с??' + #10 + '?障??' + #10 + '?障????'
562 + #10 + '?с????' + #10 + '?障????'
563 + #10 + '????' + #10 + '??????' + #10 + '??????' + #10 + '????????'
564 ;
565 WKANJI_DELIMITER = '??' + #10 + '??#39; + #10 + '綣?39; + #10 + '??' + #10 + '羈?'
566 + #10 + '筝?' + #10 + '??#39; + #10 + '??' + #10 + '??'
567 ;
568 WHIRA_FINAL_DELIMITER = '?c??' + #10 + '?c??#39;
569 ;{
570 + #10 + '???c??#39; + #10 + '???????c??#39; + #10 + '??????#39;
571 + #10 + '??????' + #10 + '?с??????'
572 + #10 + '?障??'
573 + #10 + '??????' + #10 + '????' + #10 + '????#39; + #10 + '??????#39;
574 + #10 + '??????' + #10 + '???c?宴??'
575 + #10 + '?с??' + #10 + '????'
576 + #10 + '????' + #10 + '??????' + #10 + '????' + #10 + '??????'
577 ;}
578 // '??#39; ?? '??????????' ????
579 HA_LINE = '?????????????障????????????違?宴????#39;;
580 HI_LINE = '???????<???蚊?帥?????????潟?眼??';
581 HU_LINE = '??????ゃ???泣?????????吟?激??';
582 HE_LINE = '???????????吾?????????鴻?冴??';
583 HO_LINE = '???????????祉???????????若?純??';
584 KA_LINE = '?≪???泣?帥??????ゃ???????吟???????<?泣??#39;;
585 KI_LINE = '?ゃ???激???????????違???吾??????#39;;
586 KU_LINE = '?????鴻?????????????違?????ャ??#39;;
587 KE_LINE = '???宴?祉???????<???宴?蚊?????с??#39;;
588 KO_LINE = '???潟?純???????≪?????蚊?眼??????#39;;
589 kKanji = [$80..$A0, $E0..$ff];
590 begin
591
592 wHiraDelimiter := TStringList.Create;
593 wHiraFinalDelimiter := TStringList.Create;
594 wKanjiDelimiter := TStringList.Create;
595 words := TStringList.Create;
596 try
597 mode := ModeWhite;
598 {$IFNDEF GIKO_BAYESIAN_NO_HIRAGANA_DIC}
599 wHiraDelimiter.Text := WHIRA_DELIMITER;
600 wHiraFinalDelimiter.Text := WHIRA_FINAL_DELIMITER;
601 {$ENDIF}
602 wKanjiDelimiter.Text := WKANJI_DELIMITER;
603 p := PChar( text );
604 tail := p + Length( text );
605 last := p;
606
607 while p < tail do begin
608 // ??絖????帥?ゃ?????ゅ??/span>
609 // ?糸???鴻? ModeGraph ?????????у???ャ???綽??????????????
610 // if Byte(Byte( p^ ) - $a1) < $5e then begin
611 if Byte( p^ ) in kKanji then begin
612 if p + 1 < tail then begin
613 ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
614 case ch of
615 // ?鴻???若?鴻?у??茯????????????????
616 //$8140: newMode := ModeWhite;
617 $8141..$824e: newMode := ModeWGraph;
618 $824f..$8258: newMode := ModeWNum;
619 $8260..$829a: newMode := ModeWAlpha;
620 $829f..$82f1: newMode := ModeWHira;
621 $8340..$8396: newMode := ModeWKata;
622 else newMode := ModeWKanji;
623 end;
624 // '??????#39; ??抗篁??????障???????帥?????????障????
625 if (mode = ModeWHira) or (mode = ModeWKata) then
626 if (ch = $814a) or (ch = $814b) or (ch = $815b) then
627 newMode := mode;
628 end else begin
629 newMode := ModeWhite;
630 end;
631
632 chSize := 2;
633 end else begin
634 newMode := Modes( CharMode1[ Byte( p^ ) ] );
635 if (p^ = ' ') and (Ord( mode ) >= Ord( ModeWGraph )) then begin
636 // 篁??障?ф?ユ????т??鴻???若??/span>
637 // ??茯???膵???????с?鴻???若?鴻??荅違????
638 // ?糸??茹?????????絽吾?鴻???若?鴻?у?阪??????????????荅違??????
639 newMode := mode;
640 end;
641
642 chSize := 1;
643 end;
644
645 if mode <> newMode then begin
646
647 // ??絖????帥?ゃ????紊??眼??????
648 if mode <> ModeWhite then begin
649 SetLength( aWord, p - last );
650 CopyMemory( PChar( aWord ), last, p - last );
651
652 words.Text := changeMode( aWord, mode );
653
654 // ??茯??脂??/span>
655 addWord( wordCount, words );
656 end;
657
658 last := p;
659 mode := newMode;
660
661 end;
662
663 p := p + chSize;
664 end; // while
665
666 if mode <> ModeWhite then begin
667 SetLength( aWord, p - last );
668 CopyMemory( PChar( aWord ), last, p - last );
669
670 words.Text := changeMode( aWord, mode );
671
672 // ??茯??脂??/span>
673 addWord( wordCount, words );
674 end;
675 finally
676 words.Free;
677 wKanjiDelimiter.Free;
678 wHiraFinalDelimiter.Free;
679 wHiraDelimiter.Free;
680 end;
681
682 end;
683
684 //==============================
685 // CalcPaulGraham
686 //==============================
687 function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
688
689 function p( const aWord : string ) : Single;
690 var
691 info : TWordInfo;
692 begin
693 info := Objects[ aWord ];
694 if info = nil then
695 Result := 0.415
696 else if info.NormalWord = 0 then
697 Result := 0.99
698 else if info.ImportantWord = 0 then
699 Result := 0.01
700 else if info.ImportantWord + info.NormalWord * 2 < 5 then
701 Result := 0.5
702 else begin
703 try
704 Result := ( info.ImportantWord / info.ImportantText ) /
705 ((info.NormalWord * 2 / info.NormalText ) +
706 (info.ImportantWord / info.ImportantText));
707 except
708 on EZeroDivide do Result := 0.99;
709 end;
710 end;
711 end;
712
713 var
714 s, q : Extended;
715 i : Integer;
716 narray : TList;
717 const
718 SAMPLE_COUNT = 15;
719 begin
720
721 Result := 1;
722 if wordCount.Count = 0 then
723 Exit;
724
725 narray := TList.Create;
726 try
727 for i := 0 to wordCount.Count - 1 do begin
728 narray.Add( Pointer( p( wordCount[ i ] ) ) );
729 end;
730
731 narray.Sort( AbsSort );
732
733 s := 1;
734 q := 1;
735 i := min( SAMPLE_COUNT, narray.Count );
736 while i > 0 do begin
737 Dec( i );
738
739 s := s * Single( narray[ i ] );
740 q := q * (1 - Single( narray[ i ] ));
741 end;
742
743 Result := s / (s + q);
744 finally
745 narray.Free;
746 end;
747
748 end;
749
750 //==============================
751 // CalcGaryRobinson
752 //==============================
753 function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
754
755 function p( const aWord : string ) : Single;
756 var
757 info : TWordInfo;
758 begin
759 info := Objects[ aWord ];
760 if info = nil then
761 Result := 0.415
762 else if info.ImportantWord = 0 then
763 Result := 0.01
764 else if info.NormalWord = 0 then
765 Result := 0.99
766 else
767 {
768 Result := ( info.ImportantWord / info.ImportantText ) /
769 ((info.NormalWord / info.NormalText ) +
770 (info.ImportantWord / info.ImportantText));
771 }
772 Result := (info.ImportantWord * info.NormalText) /
773 (info.NormalWord * info.ImportantText +
774 info.ImportantWord * info.NormalText);
775 end;
776
777 function f( cnt : Integer; n, mean : Single ) : Extended;
778 const
779 k = 0.001;
780 begin
781 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
782 end;
783
784 var
785 n : Extended;
786 narray : array of Single;
787 mean : Extended;
788 countInfo : TWordCountInfo;
789 i : Integer;
790 P1, Q1, R1 : Extended;
791 cnt : Extended;
792 begin
793
794 if wordCount.Count = 0 then begin
795 Result := 1;
796 Exit;
797 end;
798
799 SetLength( narray, wordCount.Count );
800 mean := 0;
801 for i := 0 to wordCount.Count - 1 do begin
802 n := p( wordCount[ i ] );
803 narray[ i ] := n;
804 mean := mean + n;
805 end;
806 mean := mean / wordCount.Count;
807
808 P1 := 1;
809 Q1 := 1;
810 for i := 0 to wordCount.Count - 1 do begin
811 countInfo := TWordCountInfo( wordCount.Objects[ i ] );
812 n := f( countInfo.WordCount, narray[ i ], mean );
813 P1 := P1 * ( 1 - n );
814 Q1 := Q1 * n;
815 end;
816 cnt := wordCount.Count;
817 if cnt = 0 then
818 cnt := 1;
819 P1 := 1 - Power( P1, 1 / cnt );
820 Q1 := 1 - Power( Q1, 1 / cnt );
821
822 if P1 + Q1 = 0 then begin
823 Result := 0.5
824 end else begin
825 n := (P1 - Q1) / (P1 + Q1);
826 Result := (1 + n) / 2;
827 end;
828
829 end;
830
831 //==============================
832 // CalcGaryRobinsonFisher
833 //==============================
834 function TGikoBayesian.CalcGaryRobinsonFisher(
835 wordCount : TWordCount
836 ) : Extended;
837
838 function p( const aWord : string ) : Single;
839 var
840 info : TWordInfo;
841 begin
842 info := Objects[ aWord ];
843 if info = nil then
844 Result := 0.415
845 else if info.ImportantWord = 0 then
846 Result := 0.01
847 else if info.NormalWord = 0 then
848 Result := 0.99
849 else
850 {
851 Result := ( info.ImportantWord / info.ImportantText ) /
852 ((info.NormalWord / info.NormalText ) +
853 (info.ImportantWord / info.ImportantText));
854 }
855 Result := (info.ImportantWord * info.NormalText) /
856 (info.NormalWord * info.ImportantText +
857 info.ImportantWord * info.NormalText);
858 end;
859
860 function f( cnt : Integer; n, mean : Single ) : Extended;
861 const
862 k = 0.001;
863 begin
864 Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
865 end;
866
867 function prbx( x2, degree : Extended ) : Extended;
868 begin
869
870 Result := 0.5;
871
872 end;
873
874 var
875 n : Extended;
876 narray : array of Single;
877 mean : Extended;
878 countInfo : TWordCountInfo;
879 i : Integer;
880 normal : Extended;
881 important : Extended;
882 P1, Q1 : Extended;
883 cnt : Extended;
884 begin
885
886 if wordCount.Count = 0 then begin
887 Result := 1;
888 Exit;
889 end;
890
891 SetLength( narray, wordCount.Count );
892 mean := 0;
893 for i := 0 to wordCount.Count - 1 do begin
894 n := p( wordCount[ i ] );
895 narray[ i ] := n;
896 mean := mean + n;
897 end;
898 mean := mean / wordCount.Count;
899
900 P1 := 1;
901 Q1 := 1;
902 for i := 0 to wordCount.Count - 1 do begin
903 countInfo := TWordCountInfo( wordCount.Objects[ i ] );
904 n := f( countInfo.WordCount, narray[ i ], mean );
905 P1 := P1 * ( 1 - n );
906 Q1 := Q1 * n;
907 end;
908 cnt := wordCount.Count;
909 if cnt = 0 then
910 cnt := 1;
911 P1 := Power( P1, 1 / cnt );
912 Q1 := Power( Q1, 1 / cnt );
913
914 P1 := 1 - prbx( -2 * Ln( P1 ), 2 * cnt );
915 Q1 := 1 - prbx( -2 * Ln( Q1 ), 2 * cnt );
916
917 Result := (1 + P1 - Q1) / 2;
918
919 end;
920
921 //==============================
922 // Parse
923 //==============================
924 function TGikoBayesian.Parse(
925 const text : string;
926 wordCount : TWordCount;
927 algorithm : TGikoBayesianAlgorithm
928 ) : Extended;
929 begin
930
931 CountWord( text, wordCount );
932 case algorithm of
933 gbaPaulGraham: Result := CalcPaulGraham( wordCount );
934 gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
935 gbaGaryRobinsonFisher:
936 Result := CalcGaryRobinsonFisher( wordCount );
937 else Result := 0;
938 end;
939
940 end;
941
942 //==============================
943 // Learn
944 //==============================
945 procedure TGikoBayesian.Learn(
946 wordCount : TWordCount;
947 isImportant : Boolean );
948 var
949 aWord : string;
950 wordinfo : TWordInfo;
951 countinfo : TWordCountInfo;
952 i : Integer;
953 begin
954
955 for i := 0 to wordCount.Count - 1 do begin
956 aWord := wordCount[ i ];
957 wordinfo := Objects[ aWord ];
958 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
959 if wordinfo = nil then begin
960 wordinfo := TWordInfo.Create;
961 Objects[ aWord ] := wordinfo;
962 end;
963
964 if isImportant then begin
965 wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
966 wordinfo.ImportantText := wordinfo.ImportantText + 1;
967 end else begin
968 wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
969 wordinfo.NormalText := wordinfo.NormalText + 1;
970 end;
971 end;
972
973 end;
974
975 //==============================
976 // Forget
977 //==============================
978 procedure TGikoBayesian.Forget(
979 wordCount : TWordCount;
980 isImportant : Boolean );
981 var
982 aWord : string;
983 wordinfo : TWordInfo;
984 countinfo : TWordCountInfo;
985 i : Integer;
986 begin
987
988 for i := 0 to wordCount.Count - 1 do begin
989 aWord := wordCount[ i ];
990 wordinfo := Objects[ aWord ];
991 if wordinfo = nil then
992 Continue;
993
994 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
995 if isImportant then begin
996 if wordInfo.ImportantText > 0 then begin
997 wordinfo.ImportantText := wordinfo.ImportantText - 1;
998 wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
999 end;
1000 end else begin
1001 if wordinfo.NormalText > 0 then begin
1002 wordinfo.NormalText := wordinfo.NormalText - 1;
1003 wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
1004 end;
1005 end;
1006 end;
1007
1008 end;
1009
1010 end.

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26