Develop and Download Open Source Software

Browse CVS Repository

Annotation of /gikonavigoeson/gikonavi/GikoBayesian.pas

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.19 - (hide annotations) (download) (as text)
Sat Jul 23 11:49:34 2005 UTC (18 years, 9 months ago) by h677
Branch: MAIN
Changes since 1.18: +0 -0 lines
File MIME type: text/x-pascal
1.50.0.600の変更分をマージ

1 yoffy 1.1 unit GikoBayesian;
2    
3     {!
4     \file GikoBayesian.pas
5     \brief ???ゃ?吾?≪?潟???c????/span>
6    
7 h677 1.18 $Id: GikoBayesian.pas,v 1.17.4.1 2005/07/10 04:16:46 h677 Exp $
8 yoffy 1.1 }
9    
10 yoffy 1.14 //! 綛割皿????莨??吾??????????
11     {$DEFINE GIKO_BAYESIAN_NO_HIRAGANA_DIC}
12    
13 yoffy 1.1 interface
14    
15     //==================================================
16     uses
17     //==================================================
18 yoffy 1.4 Classes;
19 yoffy 1.1
20     //==================================================
21     type
22     //==================================================
23    
24     {!***********************************************************
25     \brief ??茯???????????/span>
26     ************************************************************}
27     TWordInfo = class( TObject )
28     private
29     FNormalWord : Integer; //!< ??絽吾????茯????????糸?眼????????/span>
30     FImportantWord : Integer; //!< 羈?????茯????????糸?眼????????/span>
31     FNormalText : Integer; //!< ??絽吾????茯??????????障??????????腴?????/span>
32     FImportantText : Integer; //!< 羈?????茯??????????障??????????腴?????/span>
33    
34     public
35     property NormalWord : Integer read FNormalWord write FNormalWord;
36     property ImportantWord : Integer read FImportantWord write FImportantWord;
37     property NormalText : Integer read FNormalText write FNormalText;
38     property ImportantText : Integer read FImportantText write FImportantText;
39     end;
40    
41     {!***********************************************************
42     \brief 茹f??羝??水??茯???????????/span>
43     ************************************************************}
44     TWordCountInfo = class( TObject )
45     private
46     FWordCount : Integer; //!< ??茯???/span>
47    
48     public
49     property WordCount : Integer read FWordCount write FWordCount;
50     end;
51    
52     {!***********************************************************
53     \brief 茹f??羝??水??茯????鴻??
54     ************************************************************}
55     // TWordCount = class( THashedStringList ) // 羶???
56 yoffy 1.3 TWordCount = class( TStringList )
57 yoffy 1.1 public
58 yoffy 1.3 constructor Create;
59 yoffy 1.1 destructor Destroy; override;
60     end;
61    
62     {!***********************************************************
63     \brief ???c???帥?≪???眼???冴??
64     ************************************************************}
65     TGikoBayesianAlgorithm =
66 yoffy 1.9 (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
67 yoffy 1.1
68     {!***********************************************************
69     \brief ???ゃ?吾?≪?潟???c????/span>
70     ************************************************************}
71 yoffy 1.3 // TGikoBayesian = class( THashedStringList ) // 羶???
72     TGikoBayesian = class( TStringList )
73 yoffy 1.1 private
74     FFilePath : string; //!< 茯??粋昭???????<?ゃ??????/span>
75     function GetObject( const name : string ) : TWordInfo;
76     procedure SetObject( const name : string; value : TWordInfo );
77    
78     public
79     constructor Create;
80     destructor Destroy; override;
81    
82     //! ???<?ゃ??????絖??絮ユ???茯??水?冴???障??
83     procedure LoadFromFile( const filePath : string );
84    
85     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
86     procedure SaveToFile( const filePath : string );
87    
88     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
89     procedure Save;
90    
91     //! ??茯???????????宴????緇????障??
92     property Objects[ const name : string ] : TWordInfo
93     read GetObject write SetObject; default;
94    
95     //! ??腴??????障??????茯????????潟?????障??
96     procedure CountWord(
97     const text : string;
98     wordCount : TWordCount );
99    
100     {!
101     \brief Paul Graham 羈????冴?ャ??????腴???絵??墾??羆阪????障??
102     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
103     }
104     function CalcPaulGraham( wordCount : TWordCount ) : Extended;
105    
106     {!
107     \brief GaryRobinson 羈????冴?ャ??????腴???絵??墾??羆阪????障??
108     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
109     }
110     function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
111    
112 yoffy 1.9 {!
113     \brief GaryRobinson-Fisher 羈????冴?ャ??????腴???絵??墾??羆阪????障??
114     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
115     }
116     function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
117 yoffy 1.1
118     {!
119     \brief ??腴???茹f??
120     \param text 茹f????????腴?
121     \param wordCount 茹f??????????茯????鴻????菴???
122     \param algorithm 羈???墾??浦絎??????????≪???眼???冴??????絎????障??
123     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
124    
125     CountWord ? Calcxxxxx ???障???????茵??????????с????
126     }
127     function Parse(
128     const text : string;
129     wordCount : TWordCount;
130 yoffy 1.9 algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
131 yoffy 1.1 ) : Extended;
132    
133     {!
134     \brief 絖??????
135     \param wordCount Parse ?цВ??????????茯????鴻??
136     \param isImportant 羈??????鴻????腴???????????????? True
137     }
138     procedure Learn(
139     wordCount : TWordCount;
140     isImportant : Boolean );
141    
142     {!
143     \brief 絖??腟?????綽?????
144     \param wordCount Parse ?цВ??????????茯????鴻??
145     \param isImportant 羈??????鴻????腴???????????????????????? True
146     \warning 絖??羝??帥????腴???????????∈茯??堺?ャ?障??????<br>
147     Learn ????????????腴??? isImportant ???????c????????腴???
148     Forget ?????????若?帥???若?鴻???贋?????障????<br>
149     絖??羝??帥????????????????????????????????
150    
151     ???????膺?腟??????????≪????????с???????障??????<br>
152     wordCount ??緇?????腴? (Parse ? text 綣??? ???膺?腟??????帥?????≪???障????<br><br>
153    
154     筝祉??絵????腴?????羈?????腴????????帥????????? Forget -> Learn ?????т戎?????障????
155     }
156     procedure Forget(
157     wordCount : TWordCount;
158     isImportant : Boolean );
159     end;
160    
161     //==================================================
162     implementation
163     //==================================================
164    
165     uses
166 yoffy 1.12 SysUtils, Math, Windows,
167     MojuUtils;
168 yoffy 1.1
169     const
170     GIKO_BAYESIAN_FILE_VERSION = '1.0';
171 yoffy 1.3 {
172     Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
173     ModeWGraph, ModeWAlpha, ModeWNum,
174     ModeWHira, ModeWKata, ModeWKanji);
175     }
176     CharMode1 : array [ 0..255 ] of Byte =
177     (
178     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181     2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
182     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
183     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
184     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
185     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
186    
187     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189     0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
190     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
191     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
192 yoffy 1.6 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
193 yoffy 1.3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
194     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
195     );
196 yoffy 1.1
197     //************************************************************
198     // misc
199     //************************************************************
200    
201     //==============================
202     // RemoveToken
203     //==============================
204     function RemoveToken(var s: string;const delimiter: string): string;
205     var
206     p: Integer;
207     begin
208     p := AnsiPos(delimiter, s);
209     if p = 0 then
210     Result := s
211     else
212     Result := Copy(s, 1, p - 1);
213     s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
214     end;
215    
216     //==============================
217     // AbsSort
218     //==============================
219     function AbsSort( p1, p2 : Pointer ) : Integer;
220     var
221     v1, v2 : Single;
222     begin
223    
224     v1 := Abs( Single( p1 ) - 0.5 );
225     v2 := Abs( Single( p2 ) - 0.5 );
226     if v1 > v2 then
227     Result := -1
228     else if v1 = v2 then
229     Result := 0
230     else
231     Result := 1;
232    
233     end;
234    
235     //************************************************************
236     // TWordCount class
237     //************************************************************
238 yoffy 1.3 constructor TWordCount.Create;
239     begin
240    
241     Duplicates := dupIgnore;
242     CaseSensitive := True;
243     Sorted := True;
244    
245     end;
246    
247 yoffy 1.1 destructor TWordCount.Destroy;
248     var
249     i : Integer;
250     begin
251    
252     for i := Count - 1 downto 0 do
253     if Objects[ i ] <> nil then
254     Objects[ i ].Free;
255    
256     inherited;
257    
258     end;
259    
260     //************************************************************
261     // TGikoBayesian class
262     //************************************************************
263    
264     //==============================
265     // Create
266     //==============================
267     constructor TGikoBayesian.Create;
268     begin
269    
270 yoffy 1.3 Duplicates := dupIgnore;
271     CaseSensitive := True;
272     Sorted := True;
273 yoffy 1.1
274     end;
275    
276     //==============================
277     // Destroy
278     //==============================
279     destructor TGikoBayesian.Destroy;
280     var
281     i : Integer;
282     begin
283    
284     for i := Count - 1 downto 0 do
285     if inherited Objects[ i ] <> nil then
286     inherited Objects[ i ].Free;
287    
288     inherited;
289    
290     end;
291    
292     procedure TGikoBayesian.LoadFromFile( const filePath : string );
293     var
294     i : Integer;
295     sl : TStringList;
296     s : string;
297     name : string;
298     info : TWordInfo;
299     begin
300    
301 yoffy 1.2 FFilePath := filePath;
302    
303 yoffy 1.1 if not FileExists( filePath ) then
304     Exit;
305 yoffy 1.2
306 yoffy 1.1 sl := TStringList.Create;
307     try
308     sl.LoadFromFile( filePath );
309    
310     for i := 1 to sl.Count - 1 do begin
311     s := sl[ i ];
312     name := RemoveToken( s, #1 );
313     info := TWordInfo.Create;
314     info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
315     info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
316     info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
317     info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
318    
319     AddObject( name, info );
320     end;
321     finally
322     sl.Free;
323     end;
324    
325     end;
326    
327     procedure TGikoBayesian.SaveToFile( const filePath : string );
328     var
329     i : Integer;
330     sl : TStringList;
331     s : string;
332     info : TWordInfo;
333     begin
334    
335 yoffy 1.2 FFilePath := filePath;
336    
337 yoffy 1.1 sl := TStringList.Create;
338     try
339     sl.BeginUpdate;
340     sl.Add( GIKO_BAYESIAN_FILE_VERSION );
341    
342     for i := 0 to Count - 1 do begin
343     info := TWordInfo( inherited Objects[ i ] );
344     s := Strings[ i ] + #1
345     + Format('%x', [info.NormalWord]) + #1
346     + Format('%x', [info.ImportantWord]) + #1
347     + Format('%x', [info.NormalText]) + #1
348     + Format('%x', [info.ImportantText]);
349    
350     sl.Add(s);
351     end;
352     sl.EndUpdate;
353     sl.SaveToFile( filePath );
354     finally
355     sl.Free;
356     end;
357    
358     end;
359    
360     procedure TGikoBayesian.Save;
361     begin
362    
363     if FFilePath <> '' then
364     SaveToFile( FFilePath );
365    
366     end;
367    
368     //==============================
369     // GetObject
370     //==============================
371     function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
372     var
373     idx : Integer;
374     begin
375    
376 yoffy 1.11 if Find( name, idx ) then
377     Result := TWordInfo( inherited Objects[ idx ] )
378 yoffy 1.1 else
379 yoffy 1.11 Result := nil;
380 yoffy 1.1
381     end;
382    
383     //==============================
384     // SetObject
385     //==============================
386     procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
387     var
388     idx : Integer;
389     begin
390    
391 yoffy 1.11 if Find( name, idx ) then
392     inherited Objects[ idx ] := value
393 yoffy 1.1 else
394 yoffy 1.11 AddObject( name, value );
395 yoffy 1.1
396     end;
397    
398    
399     //==============================
400     // CountWord
401     //==============================
402     procedure TGikoBayesian.CountWord(
403     const text : string;
404     wordCount : TWordCount );
405     type
406 yoffy 1.12 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeNum, ModeHanKana,
407 yoffy 1.1 ModeWGraph, ModeWAlpha, ModeWNum,
408     ModeWHira, ModeWKata, ModeWKanji);
409     var
410 yoffy 1.12 p, tail, last : PChar;
411     mode, newMode : Modes;
412     ch : Longword;
413     chSize : Integer;
414     wHiraDelimiter : TStringList;
415     wHiraFinalDelimiter : TStringList;
416     wKanjiDelimiter : TStringList;
417     words : TStringList;
418     aWord : string;
419     countInfo : TWordCountInfo;
420    
421     function cutBoth( _aWord : string; _delim : TStringList ) : string;
422     var
423     _i : Integer;
424     begin
425     for _i := 0 to _delim.Count - 1 do begin
426     _aWord := CustomStringReplace(
427     _aWord,
428     _delim[ _i ],
429     #10 + _delim[ _i ] + #10, False );
430     end;
431     Result := _aWord;
432     end;
433    
434     function cutFirst( _aWord : string; _delim : TStringList ) : string;
435     var
436     _i : Integer;
437     begin
438     for _i := 0 to _delim.Count - 1 do begin
439     _aWord := CustomStringReplace(
440     _aWord,
441     _delim[ _i ],
442     #10 + _delim[ _i ], False );
443     end;
444     Result := _aWord;
445     end;
446    
447     function cutFinal( _aWord : string; _delim : TStringList ) : string;
448     var
449     _i : Integer;
450     begin
451     for _i := 0 to _delim.Count - 1 do begin
452     _aWord := CustomStringReplace(
453     _aWord,
454     _delim[ _i ],
455     _delim[ _i ] + #10, False );
456     end;
457     Result := _aWord;
458     end;
459    
460     procedure addWord( _dst : TWordCount; _words : TStringList );
461     var
462     _aWord : string;
463     _i, _idx : Integer;
464     _countInfo : TWordCountInfo;
465     begin
466     for _i := 0 to _words.Count - 1 do begin
467     _aWord := _words[ _i ];
468     if Length( _aWord ) > 0 then begin
469     if _dst.Find( _aWord, _idx ) then begin
470     _countInfo := TWordCountInfo( _dst.Objects[ _idx ] );
471     end else begin
472     _countInfo := TWordCountInfo.Create;
473     _dst.AddObject( _aWord, _countInfo );
474     end;
475     _countInfo.WordCount := _countInfo.WordCount + 1;
476     end;
477     end;
478     end;
479    
480     function changeMode( _aWord : string; _mode : Modes ) : string;
481     var
482     _i : Integer;
483     _aWord2 : string;
484     _pWord, _pWord2 : PChar;
485     _pWordTail, _pFound : PChar;
486     const
487     _delim : string = #10;
488     begin
489 yoffy 1.14 {$IFDEF GIKO_BAYESIAN_NO_HIRAGANA_DIC}
490     if mode = ModeWHira then begin
491     Result := '';
492     Exit;
493     end;
494     {$ENDIF}
495 yoffy 1.12 if Ord( _mode ) >= Ord( ModeWGraph ) then begin
496     // ?ユ???
497     // ?鴻???若?鴻??荅違????
498     _aWord := CustomStringReplace( _aWord, ' ', '', False );
499     _aWord := CustomStringReplace( _aWord, '??', '', False );
500    
501     // ???????帥?у??茯?????
502     case mode of
503     ModeWHira:
504     begin
505     _aWord := cutFinal( _aWord, wHiraFinalDelimiter );
506     Result := cutBoth( _aWord, wHiraDelimiter );
507     end;
508    
509     ModeWKanji:
510     begin
511     // ???????帥?у??茯?????
512     _aWord := cutBoth( _aWord, wKanjiDelimiter );
513     // 4 byte (2 絖?) ???ゃ?у??茯?????
514     _pWord := PChar( _aWord );
515     _i := Length( _aWord );
516     _pWordTail := _pWord + _i;
517     SetLength( _aWord2, _i + (_i shr 2) );
518     _pWord2 := PChar( _aWord2 );
519    
520     while _pWord < _pWordTail do begin
521     _pFound := AnsiStrPos( _pWord, PChar( _delim ) );
522     if _pFound = nil then
523     _pFound := _pWordTail;
524     _pFound := _pFound - 3;
525    
526     while _pWord <= _pFound do begin
527     CopyMemory( _pWord2, _pWord, 4 ); _pWord2[ 4 ] := #10;
528     _pWord2 := _pWord2 + 5; _pWord := _pWord + 4;
529     end;
530     _i := _pFound + 4 - _pWord; // 4 = 3 + #10
531     CopyMemory( _pWord2, _pWord, _i );
532     _pWord2 := _pWord2 + _i; _pWord := _pWord + _i;
533     end;
534     if _pWord < _pWordTail then begin
535     _i := _pWordTail - _pWord;
536     CopyMemory( _pWord2, _pWord, _i );
537     _pWord2 := _pWord2 + _i;
538     end;
539     SetLength( _aWord2, _pWord2 - PChar( _aWord2 ) );
540    
541     Result := _aWord2;
542     end;
543    
544     else
545     Result := _aWord;
546     end;
547     end else begin
548     Result := _aWord;
549     end;
550     end;
551 yoffy 1.1 const
552 yoffy 1.12 WHIRA_DELIMITER = '??' + #10 + '??#39; + #10 + '??' + #10 + '??#39; + #10 + '????'
553     + #10 + '??#39; + #10 + '????' + #10 + '?障??#39;+ #10 + '??#39;
554     + #10 + '????' + #10 + '????' + #10 + '????'
555 yoffy 1.11 + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
556     + #10 + '????#39; + #10 + '????#39; + #10 + '????#39; + #10 + '????#39;
557     + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
558     + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39;
559 yoffy 1.12 + #10 + '????' + #10 + '????#39; + #10 + '????' + #10 + '????'
560     + #10 + '????' + #10 + '??????'
561     + #10 + '?с??' + #10 + '?障??' + #10 + '?障????'
562     + #10 + '?с????' + #10 + '?障????'
563     + #10 + '????' + #10 + '??????' + #10 + '??????' + #10 + '????????'
564     ;
565     WKANJI_DELIMITER = '??' + #10 + '??#39; + #10 + '綣?39; + #10 + '??' + #10 + '羈?'
566     + #10 + '筝?' + #10 + '??#39; + #10 + '??' + #10 + '??'
567     ;
568     WHIRA_FINAL_DELIMITER = '?c??' + #10 + '?c??#39;
569     ;{
570     + #10 + '???c??#39; + #10 + '???????c??#39; + #10 + '??????#39;
571     + #10 + '??????' + #10 + '?с??????'
572 yoffy 1.11 + #10 + '?障??'
573     + #10 + '??????' + #10 + '????' + #10 + '????#39; + #10 + '??????#39;
574     + #10 + '??????' + #10 + '???c?宴??'
575 yoffy 1.12 + #10 + '?с??' + #10 + '????'
576 yoffy 1.11 + #10 + '????' + #10 + '??????' + #10 + '????' + #10 + '??????'
577     ;}
578 yoffy 1.12 // '??#39; ?? '??????????' ????
579     HA_LINE = '?????????????障????????????違?宴????#39;;
580     HI_LINE = '???????<???蚊?帥?????????潟?眼??';
581     HU_LINE = '??????ゃ???泣?????????吟?激??';
582     HE_LINE = '???????????吾?????????鴻?冴??';
583     HO_LINE = '???????????祉???????????若?純??';
584     KA_LINE = '?≪???泣?帥??????ゃ???????吟???????<?泣??#39;;
585     KI_LINE = '?ゃ???激???????????違???吾??????#39;;
586     KU_LINE = '?????鴻?????????????違?????ャ??#39;;
587     KE_LINE = '???宴?祉???????<???宴?蚊?????с??#39;;
588     KO_LINE = '???潟?純???????≪?????蚊?眼??????#39;;
589 yoffy 1.5 kKanji = [$80..$A0, $E0..$ff];
590 yoffy 1.1 begin
591    
592 yoffy 1.12 wHiraDelimiter := TStringList.Create;
593     wHiraFinalDelimiter := TStringList.Create;
594     wKanjiDelimiter := TStringList.Create;
595     words := TStringList.Create;
596 yoffy 1.1 try
597     mode := ModeWhite;
598 yoffy 1.14 {$IFNDEF GIKO_BAYESIAN_NO_HIRAGANA_DIC}
599 yoffy 1.12 wHiraDelimiter.Text := WHIRA_DELIMITER;
600     wHiraFinalDelimiter.Text := WHIRA_FINAL_DELIMITER;
601 yoffy 1.14 {$ENDIF}
602 yoffy 1.12 wKanjiDelimiter.Text := WKANJI_DELIMITER;
603 yoffy 1.1 p := PChar( text );
604     tail := p + Length( text );
605     last := p;
606    
607     while p < tail do begin
608     // ??絖????帥?ゃ?????ゅ??/span>
609     // ?糸???鴻? ModeGraph ?????????у???ャ???綽??????????????
610 yoffy 1.5 // if Byte(Byte( p^ ) - $a1) < $5e then begin
611     if Byte( p^ ) in kKanji then begin
612 yoffy 1.1 if p + 1 < tail then begin
613     ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
614     case ch of
615 yoffy 1.12 // ?鴻???若?鴻?у??茯????????????????
616     //$8140: newMode := ModeWhite;
617 yoffy 1.1 $8141..$824e: newMode := ModeWGraph;
618     $824f..$8258: newMode := ModeWNum;
619     $8260..$829a: newMode := ModeWAlpha;
620     $829f..$82f1: newMode := ModeWHira;
621     $8340..$8396: newMode := ModeWKata;
622     else newMode := ModeWKanji;
623     end;
624 yoffy 1.6 // '??????#39; ??抗篁??????障???????帥?????????障????
625     if (mode = ModeWHira) or (mode = ModeWKata) then
626 yoffy 1.7 if (ch = $814a) or (ch = $814b) or (ch = $815b) then
627 yoffy 1.6 newMode := mode;
628 yoffy 1.1 end else begin
629     newMode := ModeWhite;
630     end;
631    
632     chSize := 2;
633     end else begin
634 yoffy 1.3 newMode := Modes( CharMode1[ Byte( p^ ) ] );
635 yoffy 1.12 if (p^ = ' ') and (Ord( mode ) >= Ord( ModeWGraph )) then begin
636     // 篁??障?ф?ユ????т??鴻???若??/span>
637     // ??茯???膵???????с?鴻???若?鴻??荅違????
638     // ?糸??茹?????????絽吾?鴻???若?鴻?у?阪??????????????荅違??????
639     newMode := mode;
640     end;
641 yoffy 1.1
642     chSize := 1;
643     end;
644    
645 yoffy 1.12 if mode <> newMode then begin
646 yoffy 1.1
647     // ??絖????帥?ゃ????紊??眼??????
648 yoffy 1.13 if mode <> ModeWhite then begin
649     SetLength( aWord, p - last );
650     CopyMemory( PChar( aWord ), last, p - last );
651 yoffy 1.11
652 yoffy 1.13 words.Text := changeMode( aWord, mode );
653 yoffy 1.11
654 yoffy 1.13 // ??茯??脂??/span>
655     addWord( wordCount, words );
656     end;
657 yoffy 1.1
658 yoffy 1.12 last := p;
659 yoffy 1.1 mode := newMode;
660    
661     end;
662    
663     p := p + chSize;
664     end; // while
665    
666     if mode <> ModeWhite then begin
667 yoffy 1.14 SetLength( aWord, p - last );
668     CopyMemory( PChar( aWord ), last, p - last );
669    
670 yoffy 1.12 words.Text := changeMode( aWord, mode );
671    
672     // ??茯??脂??/span>
673     addWord( wordCount, words );
674 yoffy 1.1 end;
675     finally
676 yoffy 1.12 words.Free;
677     wKanjiDelimiter.Free;
678     wHiraFinalDelimiter.Free;
679     wHiraDelimiter.Free;
680 yoffy 1.1 end;
681    
682     end;
683    
684     //==============================
685     // CalcPaulGraham
686     //==============================
687     function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
688    
689     function p( const aWord : string ) : Single;
690     var
691     info : TWordInfo;
692     begin
693     info := Objects[ aWord ];
694     if info = nil then
695 yoffy 1.9 Result := 0.415
696 yoffy 1.1 else if info.NormalWord = 0 then
697     Result := 0.99
698     else if info.ImportantWord = 0 then
699     Result := 0.01
700 yoffy 1.9 else if info.ImportantWord + info.NormalWord * 2 < 5 then
701     Result := 0.5
702 h677 1.17 else begin
703     try
704     Result := ( info.ImportantWord / info.ImportantText ) /
705     ((info.NormalWord * 2 / info.NormalText ) +
706     (info.ImportantWord / info.ImportantText));
707     except
708     on EZeroDivide do Result := 0.99;
709     end;
710     end;
711 yoffy 1.1 end;
712    
713     var
714     s, q : Extended;
715     i : Integer;
716     narray : TList;
717     const
718     SAMPLE_COUNT = 15;
719     begin
720    
721     Result := 1;
722     if wordCount.Count = 0 then
723     Exit;
724    
725     narray := TList.Create;
726     try
727     for i := 0 to wordCount.Count - 1 do begin
728     narray.Add( Pointer( p( wordCount[ i ] ) ) );
729     end;
730    
731     narray.Sort( AbsSort );
732    
733     s := 1;
734     q := 1;
735     i := min( SAMPLE_COUNT, narray.Count );
736     while i > 0 do begin
737     Dec( i );
738 yoffy 1.9
739 yoffy 1.1 s := s * Single( narray[ i ] );
740     q := q * (1 - Single( narray[ i ] ));
741     end;
742 h677 1.18 try
743     Result := s / (s + q);
744     except
745     Result := 0.5;
746     end;
747 yoffy 1.1 finally
748     narray.Free;
749     end;
750    
751     end;
752    
753     //==============================
754     // CalcGaryRobinson
755     //==============================
756     function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
757    
758     function p( const aWord : string ) : Single;
759     var
760     info : TWordInfo;
761     begin
762     info := Objects[ aWord ];
763     if info = nil then
764     Result := 0.415
765     else if info.ImportantWord = 0 then
766 yoffy 1.9 Result := 0.01
767 yoffy 1.1 else if info.NormalWord = 0 then
768 yoffy 1.9 Result := 0.99
769 yoffy 1.1 else
770 yoffy 1.15 {
771 yoffy 1.1 Result := ( info.ImportantWord / info.ImportantText ) /
772     ((info.NormalWord / info.NormalText ) +
773     (info.ImportantWord / info.ImportantText));
774 yoffy 1.15 }
775 h677 1.18 try
776     Result := (info.ImportantWord * info.NormalText) /
777     (info.NormalWord * info.ImportantText +
778     info.ImportantWord * info.NormalText);
779     except
780     Result := 0.5;
781     end;
782 yoffy 1.1 end;
783    
784     function f( cnt : Integer; n, mean : Single ) : Extended;
785     const
786 yoffy 1.15 k = 0.001;
787 yoffy 1.1 begin
788     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
789     end;
790    
791     var
792     n : Extended;
793     narray : array of Single;
794     mean : Extended;
795     countInfo : TWordCountInfo;
796     i : Integer;
797 yoffy 1.9 P1, Q1, R1 : Extended;
798     cnt : Extended;
799     begin
800    
801     if wordCount.Count = 0 then begin
802     Result := 1;
803     Exit;
804     end;
805    
806     SetLength( narray, wordCount.Count );
807     mean := 0;
808     for i := 0 to wordCount.Count - 1 do begin
809     n := p( wordCount[ i ] );
810     narray[ i ] := n;
811     mean := mean + n;
812     end;
813     mean := mean / wordCount.Count;
814    
815 yoffy 1.10 P1 := 1;
816     Q1 := 1;
817 yoffy 1.9 for i := 0 to wordCount.Count - 1 do begin
818     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
819     n := f( countInfo.WordCount, narray[ i ], mean );
820 yoffy 1.10 P1 := P1 * ( 1 - n );
821     Q1 := Q1 * n;
822 yoffy 1.9 end;
823 yoffy 1.10 cnt := wordCount.Count;
824 yoffy 1.9 if cnt = 0 then
825 yoffy 1.16 cnt := 1;
826 h677 1.18 try
827     P1 := 1 - Power( P1, 1 / cnt );
828     except
829     end;
830     try
831     Q1 := 1 - Power( Q1, 1 / cnt );
832     except
833     end;
834 yoffy 1.9
835     if P1 + Q1 = 0 then begin
836     Result := 0.5
837     end else begin
838     n := (P1 - Q1) / (P1 + Q1);
839     Result := (1 + n) / 2;
840     end;
841    
842     end;
843    
844     //==============================
845     // CalcGaryRobinsonFisher
846     //==============================
847     function TGikoBayesian.CalcGaryRobinsonFisher(
848     wordCount : TWordCount
849     ) : Extended;
850    
851     function p( const aWord : string ) : Single;
852     var
853     info : TWordInfo;
854     begin
855     info := Objects[ aWord ];
856     if info = nil then
857     Result := 0.415
858     else if info.ImportantWord = 0 then
859     Result := 0.01
860     else if info.NormalWord = 0 then
861     Result := 0.99
862     else
863 yoffy 1.15 {
864     Result := ( info.ImportantWord / info.ImportantText ) /
865     ((info.NormalWord / info.NormalText ) +
866     (info.ImportantWord / info.ImportantText));
867     }
868     Result := (info.ImportantWord * info.NormalText) /
869     (info.NormalWord * info.ImportantText +
870     info.ImportantWord * info.NormalText);
871 yoffy 1.9 end;
872    
873     function f( cnt : Integer; n, mean : Single ) : Extended;
874     const
875 yoffy 1.15 k = 0.001;
876 yoffy 1.9 begin
877     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
878     end;
879    
880     function prbx( x2, degree : Extended ) : Extended;
881     begin
882    
883 yoffy 1.15 Result := 0.5;
884 yoffy 1.9
885     end;
886    
887     var
888     n : Extended;
889     narray : array of Single;
890     mean : Extended;
891     countInfo : TWordCountInfo;
892     i : Integer;
893 yoffy 1.1 normal : Extended;
894     important : Extended;
895 yoffy 1.9 P1, Q1 : Extended;
896 yoffy 1.1 cnt : Extended;
897     begin
898    
899     if wordCount.Count = 0 then begin
900     Result := 1;
901     Exit;
902     end;
903    
904     SetLength( narray, wordCount.Count );
905     mean := 0;
906     for i := 0 to wordCount.Count - 1 do begin
907     n := p( wordCount[ i ] );
908     narray[ i ] := n;
909     mean := mean + n;
910     end;
911     mean := mean / wordCount.Count;
912    
913 yoffy 1.9 P1 := 1;
914     Q1 := 1;
915 yoffy 1.1 for i := 0 to wordCount.Count - 1 do begin
916     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
917     n := f( countInfo.WordCount, narray[ i ], mean );
918 yoffy 1.15 P1 := P1 * ( 1 - n );
919     Q1 := Q1 * n;
920 yoffy 1.1 end;
921 yoffy 1.15 cnt := wordCount.Count;
922 yoffy 1.1 if cnt = 0 then
923 yoffy 1.16 cnt := 1;
924 h677 1.18 try
925     P1 := Power( P1, 1 / cnt );
926     except
927     end;
928     try
929     Q1 := Power( Q1, 1 / cnt );
930     except
931     end;
932 yoffy 1.15
933     P1 := 1 - prbx( -2 * Ln( P1 ), 2 * cnt );
934     Q1 := 1 - prbx( -2 * Ln( Q1 ), 2 * cnt );
935    
936     Result := (1 + P1 - Q1) / 2;
937 yoffy 1.1
938     end;
939    
940     //==============================
941     // Parse
942     //==============================
943     function TGikoBayesian.Parse(
944     const text : string;
945     wordCount : TWordCount;
946 yoffy 1.9 algorithm : TGikoBayesianAlgorithm
947 yoffy 1.1 ) : Extended;
948     begin
949    
950     CountWord( text, wordCount );
951     case algorithm of
952     gbaPaulGraham: Result := CalcPaulGraham( wordCount );
953 yoffy 1.9 gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
954     gbaGaryRobinsonFisher:
955     Result := CalcGaryRobinsonFisher( wordCount );
956 yoffy 1.1 else Result := 0;
957     end;
958    
959     end;
960    
961     //==============================
962     // Learn
963     //==============================
964     procedure TGikoBayesian.Learn(
965     wordCount : TWordCount;
966     isImportant : Boolean );
967     var
968     aWord : string;
969     wordinfo : TWordInfo;
970     countinfo : TWordCountInfo;
971 yoffy 1.3 i : Integer;
972 yoffy 1.1 begin
973    
974     for i := 0 to wordCount.Count - 1 do begin
975     aWord := wordCount[ i ];
976     wordinfo := Objects[ aWord ];
977 yoffy 1.3 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
978 yoffy 1.1 if wordinfo = nil then begin
979     wordinfo := TWordInfo.Create;
980     Objects[ aWord ] := wordinfo;
981     end;
982    
983     if isImportant then begin
984     wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
985     wordinfo.ImportantText := wordinfo.ImportantText + 1;
986     end else begin
987     wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
988     wordinfo.NormalText := wordinfo.NormalText + 1;
989     end;
990     end;
991    
992     end;
993    
994     //==============================
995     // Forget
996     //==============================
997     procedure TGikoBayesian.Forget(
998     wordCount : TWordCount;
999     isImportant : Boolean );
1000     var
1001     aWord : string;
1002     wordinfo : TWordInfo;
1003     countinfo : TWordCountInfo;
1004     i : Integer;
1005     begin
1006    
1007     for i := 0 to wordCount.Count - 1 do begin
1008     aWord := wordCount[ i ];
1009     wordinfo := Objects[ aWord ];
1010     if wordinfo = nil then
1011     Continue;
1012    
1013     countinfo := TWordCountInfo( wordCount.Objects[ i ] );
1014     if isImportant then begin
1015 yoffy 1.8 if wordInfo.ImportantText > 0 then begin
1016     wordinfo.ImportantText := wordinfo.ImportantText - 1;
1017     wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
1018     end;
1019 yoffy 1.1 end else begin
1020 yoffy 1.8 if wordinfo.NormalText > 0 then begin
1021     wordinfo.NormalText := wordinfo.NormalText - 1;
1022     wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
1023     end;
1024 yoffy 1.1 end;
1025     end;
1026    
1027     end;
1028    
1029     end.

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26