Develop and Download Open Source Software

Browse CVS Repository

Annotation of /gikonavigoeson/gikonavi/GikoBayesian.pas

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.16 - (hide annotations) (download) (as text)
Mon Nov 1 10:32:02 2004 UTC (19 years, 5 months ago) by yoffy
Branch: MAIN
CVS Tags: bv1_49_0_563
Changes since 1.15: +3 -5 lines
File MIME type: text/x-pascal
else が紛れ込んでたので修正。

1 yoffy 1.1 unit GikoBayesian;
2    
3     {!
4     \file GikoBayesian.pas
5     \brief ???ゃ?吾?≪?潟???c????/span>
6    
7 yoffy 1.16 $Id: GikoBayesian.pas,v 1.15 2004/11/01 10:28:24 yoffy Exp $
8 yoffy 1.1 }
9    
10 yoffy 1.14 //! 綛割皿????莨??吾??????????
11     {$DEFINE GIKO_BAYESIAN_NO_HIRAGANA_DIC}
12    
13 yoffy 1.1 interface
14    
15     //==================================================
16     uses
17     //==================================================
18 yoffy 1.4 Classes;
19 yoffy 1.1
20     //==================================================
21     type
22     //==================================================
23    
24     {!***********************************************************
25     \brief ??茯???????????/span>
26     ************************************************************}
27     TWordInfo = class( TObject )
28     private
29     FNormalWord : Integer; //!< ??絽吾????茯????????糸?眼????????/span>
30     FImportantWord : Integer; //!< 羈?????茯????????糸?眼????????/span>
31     FNormalText : Integer; //!< ??絽吾????茯??????????障??????????腴?????/span>
32     FImportantText : Integer; //!< 羈?????茯??????????障??????????腴?????/span>
33    
34     public
35     property NormalWord : Integer read FNormalWord write FNormalWord;
36     property ImportantWord : Integer read FImportantWord write FImportantWord;
37     property NormalText : Integer read FNormalText write FNormalText;
38     property ImportantText : Integer read FImportantText write FImportantText;
39     end;
40    
41     {!***********************************************************
42     \brief 茹f??羝??水??茯???????????/span>
43     ************************************************************}
44     TWordCountInfo = class( TObject )
45     private
46     FWordCount : Integer; //!< ??茯???/span>
47    
48     public
49     property WordCount : Integer read FWordCount write FWordCount;
50     end;
51    
52     {!***********************************************************
53     \brief 茹f??羝??水??茯????鴻??
54     ************************************************************}
55     // TWordCount = class( THashedStringList ) // 羶???
56 yoffy 1.3 TWordCount = class( TStringList )
57 yoffy 1.1 public
58 yoffy 1.3 constructor Create;
59 yoffy 1.1 destructor Destroy; override;
60     end;
61    
62     {!***********************************************************
63     \brief ???c???帥?≪???眼???冴??
64     ************************************************************}
65     TGikoBayesianAlgorithm =
66 yoffy 1.9 (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
67 yoffy 1.1
68     {!***********************************************************
69     \brief ???ゃ?吾?≪?潟???c????/span>
70     ************************************************************}
71 yoffy 1.3 // TGikoBayesian = class( THashedStringList ) // 羶???
72     TGikoBayesian = class( TStringList )
73 yoffy 1.1 private
74     FFilePath : string; //!< 茯??粋昭???????<?ゃ??????/span>
75     function GetObject( const name : string ) : TWordInfo;
76     procedure SetObject( const name : string; value : TWordInfo );
77    
78     public
79     constructor Create;
80     destructor Destroy; override;
81    
82     //! ???<?ゃ??????絖??絮ユ???茯??水?冴???障??
83     procedure LoadFromFile( const filePath : string );
84    
85     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
86     procedure SaveToFile( const filePath : string );
87    
88     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
89     procedure Save;
90    
91     //! ??茯???????????宴????緇????障??
92     property Objects[ const name : string ] : TWordInfo
93     read GetObject write SetObject; default;
94    
95     //! ??腴??????障??????茯????????潟?????障??
96     procedure CountWord(
97     const text : string;
98     wordCount : TWordCount );
99    
100     {!
101     \brief Paul Graham 羈????冴?ャ??????腴???絵??墾??羆阪????障??
102     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
103     }
104     function CalcPaulGraham( wordCount : TWordCount ) : Extended;
105    
106     {!
107     \brief GaryRobinson 羈????冴?ャ??????腴???絵??墾??羆阪????障??
108     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
109     }
110     function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
111    
112 yoffy 1.9 {!
113     \brief GaryRobinson-Fisher 羈????冴?ャ??????腴???絵??墾??羆阪????障??
114     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
115     }
116     function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
117 yoffy 1.1
118     {!
119     \brief ??腴???茹f??
120     \param text 茹f????????腴?
121     \param wordCount 茹f??????????茯????鴻????菴???
122     \param algorithm 羈???墾??浦絎??????????≪???眼???冴??????絎????障??
123     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
124    
125     CountWord ? Calcxxxxx ???障???????茵??????????с????
126     }
127     function Parse(
128     const text : string;
129     wordCount : TWordCount;
130 yoffy 1.9 algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
131 yoffy 1.1 ) : Extended;
132    
133     {!
134     \brief 絖??????
135     \param wordCount Parse ?цВ??????????茯????鴻??
136     \param isImportant 羈??????鴻????腴???????????????? True
137     }
138     procedure Learn(
139     wordCount : TWordCount;
140     isImportant : Boolean );
141    
142     {!
143     \brief 絖??腟?????綽?????
144     \param wordCount Parse ?цВ??????????茯????鴻??
145     \param isImportant 羈??????鴻????腴???????????????????????? True
146     \warning 絖??羝??帥????腴???????????∈茯??堺?ャ?障??????<br>
147     Learn ????????????腴??? isImportant ???????c????????腴???
148     Forget ?????????若?帥???若?鴻???贋?????障????<br>
149     絖??羝??帥????????????????????????????????
150    
151     ???????膺?腟??????????≪????????с???????障??????<br>
152     wordCount ??緇?????腴? (Parse ? text 綣??? ???膺?腟??????帥?????≪???障????<br><br>
153    
154     筝祉??絵????腴?????羈?????腴????????帥????????? Forget -> Learn ?????т戎?????障????
155     }
156     procedure Forget(
157     wordCount : TWordCount;
158     isImportant : Boolean );
159     end;
160    
161     //==================================================
162     implementation
163     //==================================================
164    
165     uses
166 yoffy 1.12 SysUtils, Math, Windows,
167     MojuUtils;
168 yoffy 1.1
169     const
170     GIKO_BAYESIAN_FILE_VERSION = '1.0';
171 yoffy 1.3 {
172     Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
173     ModeWGraph, ModeWAlpha, ModeWNum,
174     ModeWHira, ModeWKata, ModeWKanji);
175     }
176     CharMode1 : array [ 0..255 ] of Byte =
177     (
178     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181     2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
182     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
183     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
184     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
185     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
186    
187     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189     0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
190     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
191     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
192 yoffy 1.6 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
193 yoffy 1.3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
194     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
195     );
196 yoffy 1.1
197     //************************************************************
198     // misc
199     //************************************************************
200    
201     //==============================
202     // RemoveToken
203     //==============================
204     function RemoveToken(var s: string;const delimiter: string): string;
205     var
206     p: Integer;
207     begin
208     p := AnsiPos(delimiter, s);
209     if p = 0 then
210     Result := s
211     else
212     Result := Copy(s, 1, p - 1);
213     s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
214     end;
215    
216     //==============================
217     // AbsSort
218     //==============================
219     function AbsSort( p1, p2 : Pointer ) : Integer;
220     var
221     v1, v2 : Single;
222     begin
223    
224     v1 := Abs( Single( p1 ) - 0.5 );
225     v2 := Abs( Single( p2 ) - 0.5 );
226     if v1 > v2 then
227     Result := -1
228     else if v1 = v2 then
229     Result := 0
230     else
231     Result := 1;
232    
233     end;
234    
235     //************************************************************
236     // TWordCount class
237     //************************************************************
238 yoffy 1.3 constructor TWordCount.Create;
239     begin
240    
241     Duplicates := dupIgnore;
242     CaseSensitive := True;
243     Sorted := True;
244    
245     end;
246    
247 yoffy 1.1 destructor TWordCount.Destroy;
248     var
249     i : Integer;
250     begin
251    
252     for i := Count - 1 downto 0 do
253     if Objects[ i ] <> nil then
254     Objects[ i ].Free;
255    
256     inherited;
257    
258     end;
259    
260     //************************************************************
261     // TGikoBayesian class
262     //************************************************************
263    
264     //==============================
265     // Create
266     //==============================
267     constructor TGikoBayesian.Create;
268     begin
269    
270 yoffy 1.3 Duplicates := dupIgnore;
271     CaseSensitive := True;
272     Sorted := True;
273 yoffy 1.1
274     end;
275    
276     //==============================
277     // Destroy
278     //==============================
279     destructor TGikoBayesian.Destroy;
280     var
281     i : Integer;
282     begin
283    
284     for i := Count - 1 downto 0 do
285     if inherited Objects[ i ] <> nil then
286     inherited Objects[ i ].Free;
287    
288     inherited;
289    
290     end;
291    
292     procedure TGikoBayesian.LoadFromFile( const filePath : string );
293     var
294     i : Integer;
295     sl : TStringList;
296     s : string;
297     name : string;
298     info : TWordInfo;
299     begin
300    
301 yoffy 1.2 FFilePath := filePath;
302    
303 yoffy 1.1 if not FileExists( filePath ) then
304     Exit;
305 yoffy 1.2
306 yoffy 1.1 sl := TStringList.Create;
307     try
308     sl.LoadFromFile( filePath );
309    
310     for i := 1 to sl.Count - 1 do begin
311     s := sl[ i ];
312     name := RemoveToken( s, #1 );
313     info := TWordInfo.Create;
314     info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
315     info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
316     info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
317     info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
318    
319     AddObject( name, info );
320     end;
321     finally
322     sl.Free;
323     end;
324    
325     end;
326    
327     procedure TGikoBayesian.SaveToFile( const filePath : string );
328     var
329     i : Integer;
330     sl : TStringList;
331     s : string;
332     info : TWordInfo;
333     begin
334    
335 yoffy 1.2 FFilePath := filePath;
336    
337 yoffy 1.1 sl := TStringList.Create;
338     try
339     sl.BeginUpdate;
340     sl.Add( GIKO_BAYESIAN_FILE_VERSION );
341    
342     for i := 0 to Count - 1 do begin
343     info := TWordInfo( inherited Objects[ i ] );
344     s := Strings[ i ] + #1
345     + Format('%x', [info.NormalWord]) + #1
346     + Format('%x', [info.ImportantWord]) + #1
347     + Format('%x', [info.NormalText]) + #1
348     + Format('%x', [info.ImportantText]);
349    
350     sl.Add(s);
351     end;
352     sl.EndUpdate;
353     sl.SaveToFile( filePath );
354     finally
355     sl.Free;
356     end;
357    
358     end;
359    
360     procedure TGikoBayesian.Save;
361     begin
362    
363     if FFilePath <> '' then
364     SaveToFile( FFilePath );
365    
366     end;
367    
368     //==============================
369     // GetObject
370     //==============================
371     function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
372     var
373     idx : Integer;
374     begin
375    
376 yoffy 1.11 if Find( name, idx ) then
377     Result := TWordInfo( inherited Objects[ idx ] )
378 yoffy 1.1 else
379 yoffy 1.11 Result := nil;
380 yoffy 1.1
381     end;
382    
383     //==============================
384     // SetObject
385     //==============================
386     procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
387     var
388     idx : Integer;
389     begin
390    
391 yoffy 1.11 if Find( name, idx ) then
392     inherited Objects[ idx ] := value
393 yoffy 1.1 else
394 yoffy 1.11 AddObject( name, value );
395 yoffy 1.1
396     end;
397    
398    
399     //==============================
400     // CountWord
401     //==============================
402     procedure TGikoBayesian.CountWord(
403     const text : string;
404     wordCount : TWordCount );
405     type
406 yoffy 1.12 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeNum, ModeHanKana,
407 yoffy 1.1 ModeWGraph, ModeWAlpha, ModeWNum,
408     ModeWHira, ModeWKata, ModeWKanji);
409     var
410 yoffy 1.12 p, tail, last : PChar;
411     mode, newMode : Modes;
412     ch : Longword;
413     chSize : Integer;
414     wHiraDelimiter : TStringList;
415     wHiraFinalDelimiter : TStringList;
416     wKanjiDelimiter : TStringList;
417     words : TStringList;
418     aWord : string;
419     countInfo : TWordCountInfo;
420    
421     function cutBoth( _aWord : string; _delim : TStringList ) : string;
422     var
423     _i : Integer;
424     begin
425     for _i := 0 to _delim.Count - 1 do begin
426     _aWord := CustomStringReplace(
427     _aWord,
428     _delim[ _i ],
429     #10 + _delim[ _i ] + #10, False );
430     end;
431     Result := _aWord;
432     end;
433    
434     function cutFirst( _aWord : string; _delim : TStringList ) : string;
435     var
436     _i : Integer;
437     begin
438     for _i := 0 to _delim.Count - 1 do begin
439     _aWord := CustomStringReplace(
440     _aWord,
441     _delim[ _i ],
442     #10 + _delim[ _i ], False );
443     end;
444     Result := _aWord;
445     end;
446    
447     function cutFinal( _aWord : string; _delim : TStringList ) : string;
448     var
449     _i : Integer;
450     begin
451     for _i := 0 to _delim.Count - 1 do begin
452     _aWord := CustomStringReplace(
453     _aWord,
454     _delim[ _i ],
455     _delim[ _i ] + #10, False );
456     end;
457     Result := _aWord;
458     end;
459    
460     procedure addWord( _dst : TWordCount; _words : TStringList );
461     var
462     _aWord : string;
463     _i, _idx : Integer;
464     _countInfo : TWordCountInfo;
465     begin
466     for _i := 0 to _words.Count - 1 do begin
467     _aWord := _words[ _i ];
468     if Length( _aWord ) > 0 then begin
469     if _dst.Find( _aWord, _idx ) then begin
470     _countInfo := TWordCountInfo( _dst.Objects[ _idx ] );
471     end else begin
472     _countInfo := TWordCountInfo.Create;
473     _dst.AddObject( _aWord, _countInfo );
474     end;
475     _countInfo.WordCount := _countInfo.WordCount + 1;
476     end;
477     end;
478     end;
479    
480     function changeMode( _aWord : string; _mode : Modes ) : string;
481     var
482     _i : Integer;
483     _aWord2 : string;
484     _pWord, _pWord2 : PChar;
485     _pWordTail, _pFound : PChar;
486     const
487     _delim : string = #10;
488     begin
489 yoffy 1.14 {$IFDEF GIKO_BAYESIAN_NO_HIRAGANA_DIC}
490     if mode = ModeWHira then begin
491     Result := '';
492     Exit;
493     end;
494     {$ENDIF}
495 yoffy 1.12 if Ord( _mode ) >= Ord( ModeWGraph ) then begin
496     // ?ユ???
497     // ?鴻???若?鴻??荅違????
498     _aWord := CustomStringReplace( _aWord, ' ', '', False );
499     _aWord := CustomStringReplace( _aWord, '??', '', False );
500    
501     // ???????帥?у??茯?????
502     case mode of
503     ModeWHira:
504     begin
505     _aWord := cutFinal( _aWord, wHiraFinalDelimiter );
506     Result := cutBoth( _aWord, wHiraDelimiter );
507     end;
508    
509     ModeWKanji:
510     begin
511     // ???????帥?у??茯?????
512     _aWord := cutBoth( _aWord, wKanjiDelimiter );
513     // 4 byte (2 絖?) ???ゃ?у??茯?????
514     _pWord := PChar( _aWord );
515     _i := Length( _aWord );
516     _pWordTail := _pWord + _i;
517     SetLength( _aWord2, _i + (_i shr 2) );
518     _pWord2 := PChar( _aWord2 );
519    
520     while _pWord < _pWordTail do begin
521     _pFound := AnsiStrPos( _pWord, PChar( _delim ) );
522     if _pFound = nil then
523     _pFound := _pWordTail;
524     _pFound := _pFound - 3;
525    
526     while _pWord <= _pFound do begin
527     CopyMemory( _pWord2, _pWord, 4 ); _pWord2[ 4 ] := #10;
528     _pWord2 := _pWord2 + 5; _pWord := _pWord + 4;
529     end;
530     _i := _pFound + 4 - _pWord; // 4 = 3 + #10
531     CopyMemory( _pWord2, _pWord, _i );
532     _pWord2 := _pWord2 + _i; _pWord := _pWord + _i;
533     end;
534     if _pWord < _pWordTail then begin
535     _i := _pWordTail - _pWord;
536     CopyMemory( _pWord2, _pWord, _i );
537     _pWord2 := _pWord2 + _i;
538     end;
539     SetLength( _aWord2, _pWord2 - PChar( _aWord2 ) );
540    
541     Result := _aWord2;
542     end;
543    
544     else
545     Result := _aWord;
546     end;
547     end else begin
548     Result := _aWord;
549     end;
550     end;
551 yoffy 1.1 const
552 yoffy 1.12 WHIRA_DELIMITER = '??' + #10 + '??#39; + #10 + '??' + #10 + '??#39; + #10 + '????'
553     + #10 + '??#39; + #10 + '????' + #10 + '?障??#39;+ #10 + '??#39;
554     + #10 + '????' + #10 + '????' + #10 + '????'
555 yoffy 1.11 + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
556     + #10 + '????#39; + #10 + '????#39; + #10 + '????#39; + #10 + '????#39;
557     + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
558     + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39;
559 yoffy 1.12 + #10 + '????' + #10 + '????#39; + #10 + '????' + #10 + '????'
560     + #10 + '????' + #10 + '??????'
561     + #10 + '?с??' + #10 + '?障??' + #10 + '?障????'
562     + #10 + '?с????' + #10 + '?障????'
563     + #10 + '????' + #10 + '??????' + #10 + '??????' + #10 + '????????'
564     ;
565     WKANJI_DELIMITER = '??' + #10 + '??#39; + #10 + '綣?39; + #10 + '??' + #10 + '羈?'
566     + #10 + '筝?' + #10 + '??#39; + #10 + '??' + #10 + '??'
567     ;
568     WHIRA_FINAL_DELIMITER = '?c??' + #10 + '?c??#39;
569     ;{
570     + #10 + '???c??#39; + #10 + '???????c??#39; + #10 + '??????#39;
571     + #10 + '??????' + #10 + '?с??????'
572 yoffy 1.11 + #10 + '?障??'
573     + #10 + '??????' + #10 + '????' + #10 + '????#39; + #10 + '??????#39;
574     + #10 + '??????' + #10 + '???c?宴??'
575 yoffy 1.12 + #10 + '?с??' + #10 + '????'
576 yoffy 1.11 + #10 + '????' + #10 + '??????' + #10 + '????' + #10 + '??????'
577     ;}
578 yoffy 1.12 // '??#39; ?? '??????????' ????
579     HA_LINE = '?????????????障????????????違?宴????#39;;
580     HI_LINE = '???????<???蚊?帥?????????潟?眼??';
581     HU_LINE = '??????ゃ???泣?????????吟?激??';
582     HE_LINE = '???????????吾?????????鴻?冴??';
583     HO_LINE = '???????????祉???????????若?純??';
584     KA_LINE = '?≪???泣?帥??????ゃ???????吟???????<?泣??#39;;
585     KI_LINE = '?ゃ???激???????????違???吾??????#39;;
586     KU_LINE = '?????鴻?????????????違?????ャ??#39;;
587     KE_LINE = '???宴?祉???????<???宴?蚊?????с??#39;;
588     KO_LINE = '???潟?純???????≪?????蚊?眼??????#39;;
589 yoffy 1.5 kKanji = [$80..$A0, $E0..$ff];
590 yoffy 1.1 begin
591    
592 yoffy 1.12 wHiraDelimiter := TStringList.Create;
593     wHiraFinalDelimiter := TStringList.Create;
594     wKanjiDelimiter := TStringList.Create;
595     words := TStringList.Create;
596 yoffy 1.1 try
597     mode := ModeWhite;
598 yoffy 1.14 {$IFNDEF GIKO_BAYESIAN_NO_HIRAGANA_DIC}
599 yoffy 1.12 wHiraDelimiter.Text := WHIRA_DELIMITER;
600     wHiraFinalDelimiter.Text := WHIRA_FINAL_DELIMITER;
601 yoffy 1.14 {$ENDIF}
602 yoffy 1.12 wKanjiDelimiter.Text := WKANJI_DELIMITER;
603 yoffy 1.1 p := PChar( text );
604     tail := p + Length( text );
605     last := p;
606    
607     while p < tail do begin
608     // ??絖????帥?ゃ?????ゅ??/span>
609     // ?糸???鴻? ModeGraph ?????????у???ャ???綽??????????????
610 yoffy 1.5 // if Byte(Byte( p^ ) - $a1) < $5e then begin
611     if Byte( p^ ) in kKanji then begin
612 yoffy 1.1 if p + 1 < tail then begin
613     ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
614     case ch of
615 yoffy 1.12 // ?鴻???若?鴻?у??茯????????????????
616     //$8140: newMode := ModeWhite;
617 yoffy 1.1 $8141..$824e: newMode := ModeWGraph;
618     $824f..$8258: newMode := ModeWNum;
619     $8260..$829a: newMode := ModeWAlpha;
620     $829f..$82f1: newMode := ModeWHira;
621     $8340..$8396: newMode := ModeWKata;
622     else newMode := ModeWKanji;
623     end;
624 yoffy 1.6 // '??????#39; ??抗篁??????障???????帥?????????障????
625     if (mode = ModeWHira) or (mode = ModeWKata) then
626 yoffy 1.7 if (ch = $814a) or (ch = $814b) or (ch = $815b) then
627 yoffy 1.6 newMode := mode;
628 yoffy 1.1 end else begin
629     newMode := ModeWhite;
630     end;
631    
632     chSize := 2;
633     end else begin
634 yoffy 1.3 newMode := Modes( CharMode1[ Byte( p^ ) ] );
635 yoffy 1.12 if (p^ = ' ') and (Ord( mode ) >= Ord( ModeWGraph )) then begin
636     // 篁??障?ф?ユ????т??鴻???若??/span>
637     // ??茯???膵???????с?鴻???若?鴻??荅違????
638     // ?糸??茹?????????絽吾?鴻???若?鴻?у?阪??????????????荅違??????
639     newMode := mode;
640     end;
641 yoffy 1.1
642     chSize := 1;
643     end;
644    
645 yoffy 1.12 if mode <> newMode then begin
646 yoffy 1.1
647     // ??絖????帥?ゃ????紊??眼??????
648 yoffy 1.13 if mode <> ModeWhite then begin
649     SetLength( aWord, p - last );
650     CopyMemory( PChar( aWord ), last, p - last );
651 yoffy 1.11
652 yoffy 1.13 words.Text := changeMode( aWord, mode );
653 yoffy 1.11
654 yoffy 1.13 // ??茯??脂??/span>
655     addWord( wordCount, words );
656     end;
657 yoffy 1.1
658 yoffy 1.12 last := p;
659 yoffy 1.1 mode := newMode;
660    
661     end;
662    
663     p := p + chSize;
664     end; // while
665    
666     if mode <> ModeWhite then begin
667 yoffy 1.14 SetLength( aWord, p - last );
668     CopyMemory( PChar( aWord ), last, p - last );
669    
670 yoffy 1.12 words.Text := changeMode( aWord, mode );
671    
672     // ??茯??脂??/span>
673     addWord( wordCount, words );
674 yoffy 1.1 end;
675     finally
676 yoffy 1.12 words.Free;
677     wKanjiDelimiter.Free;
678     wHiraFinalDelimiter.Free;
679     wHiraDelimiter.Free;
680 yoffy 1.1 end;
681    
682     end;
683    
684     //==============================
685     // CalcPaulGraham
686     //==============================
687     function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
688    
689     function p( const aWord : string ) : Single;
690     var
691     info : TWordInfo;
692     begin
693     info := Objects[ aWord ];
694     if info = nil then
695 yoffy 1.9 Result := 0.415
696 yoffy 1.1 else if info.NormalWord = 0 then
697     Result := 0.99
698     else if info.ImportantWord = 0 then
699     Result := 0.01
700 yoffy 1.9 else if info.ImportantWord + info.NormalWord * 2 < 5 then
701     Result := 0.5
702 yoffy 1.1 else
703     Result := ( info.ImportantWord / info.ImportantText ) /
704     ((info.NormalWord * 2 / info.NormalText ) +
705     (info.ImportantWord / info.ImportantText));
706     end;
707    
708     var
709     s, q : Extended;
710     i : Integer;
711     narray : TList;
712     const
713     SAMPLE_COUNT = 15;
714     begin
715    
716     Result := 1;
717     if wordCount.Count = 0 then
718     Exit;
719    
720     narray := TList.Create;
721     try
722     for i := 0 to wordCount.Count - 1 do begin
723     narray.Add( Pointer( p( wordCount[ i ] ) ) );
724     end;
725    
726     narray.Sort( AbsSort );
727    
728     s := 1;
729     q := 1;
730     i := min( SAMPLE_COUNT, narray.Count );
731     while i > 0 do begin
732     Dec( i );
733 yoffy 1.9
734 yoffy 1.1 s := s * Single( narray[ i ] );
735     q := q * (1 - Single( narray[ i ] ));
736     end;
737    
738     Result := s / (s + q);
739     finally
740     narray.Free;
741     end;
742    
743     end;
744    
745     //==============================
746     // CalcGaryRobinson
747     //==============================
748     function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
749    
750     function p( const aWord : string ) : Single;
751     var
752     info : TWordInfo;
753     begin
754     info := Objects[ aWord ];
755     if info = nil then
756     Result := 0.415
757     else if info.ImportantWord = 0 then
758 yoffy 1.9 Result := 0.01
759 yoffy 1.1 else if info.NormalWord = 0 then
760 yoffy 1.9 Result := 0.99
761 yoffy 1.1 else
762 yoffy 1.15 {
763 yoffy 1.1 Result := ( info.ImportantWord / info.ImportantText ) /
764     ((info.NormalWord / info.NormalText ) +
765     (info.ImportantWord / info.ImportantText));
766 yoffy 1.15 }
767     Result := (info.ImportantWord * info.NormalText) /
768     (info.NormalWord * info.ImportantText +
769     info.ImportantWord * info.NormalText);
770 yoffy 1.1 end;
771    
772     function f( cnt : Integer; n, mean : Single ) : Extended;
773     const
774 yoffy 1.15 k = 0.001;
775 yoffy 1.1 begin
776     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
777     end;
778    
779     var
780     n : Extended;
781     narray : array of Single;
782     mean : Extended;
783     countInfo : TWordCountInfo;
784     i : Integer;
785 yoffy 1.9 P1, Q1, R1 : Extended;
786     cnt : Extended;
787     begin
788    
789     if wordCount.Count = 0 then begin
790     Result := 1;
791     Exit;
792     end;
793    
794     SetLength( narray, wordCount.Count );
795     mean := 0;
796     for i := 0 to wordCount.Count - 1 do begin
797     n := p( wordCount[ i ] );
798     narray[ i ] := n;
799     mean := mean + n;
800     end;
801     mean := mean / wordCount.Count;
802    
803 yoffy 1.10 P1 := 1;
804     Q1 := 1;
805 yoffy 1.9 for i := 0 to wordCount.Count - 1 do begin
806     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
807     n := f( countInfo.WordCount, narray[ i ], mean );
808 yoffy 1.10 P1 := P1 * ( 1 - n );
809     Q1 := Q1 * n;
810 yoffy 1.9 end;
811 yoffy 1.10 cnt := wordCount.Count;
812 yoffy 1.9 if cnt = 0 then
813 yoffy 1.16 cnt := 1;
814 yoffy 1.10 P1 := 1 - Power( P1, 1 / cnt );
815     Q1 := 1 - Power( Q1, 1 / cnt );
816 yoffy 1.9
817     if P1 + Q1 = 0 then begin
818     Result := 0.5
819     end else begin
820     n := (P1 - Q1) / (P1 + Q1);
821     Result := (1 + n) / 2;
822     end;
823    
824     end;
825    
826     //==============================
827     // CalcGaryRobinsonFisher
828     //==============================
829     function TGikoBayesian.CalcGaryRobinsonFisher(
830     wordCount : TWordCount
831     ) : Extended;
832    
833     function p( const aWord : string ) : Single;
834     var
835     info : TWordInfo;
836     begin
837     info := Objects[ aWord ];
838     if info = nil then
839     Result := 0.415
840     else if info.ImportantWord = 0 then
841     Result := 0.01
842     else if info.NormalWord = 0 then
843     Result := 0.99
844     else
845 yoffy 1.15 {
846     Result := ( info.ImportantWord / info.ImportantText ) /
847     ((info.NormalWord / info.NormalText ) +
848     (info.ImportantWord / info.ImportantText));
849     }
850     Result := (info.ImportantWord * info.NormalText) /
851     (info.NormalWord * info.ImportantText +
852     info.ImportantWord * info.NormalText);
853 yoffy 1.9 end;
854    
855     function f( cnt : Integer; n, mean : Single ) : Extended;
856     const
857 yoffy 1.15 k = 0.001;
858 yoffy 1.9 begin
859     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
860     end;
861    
862     function prbx( x2, degree : Extended ) : Extended;
863     begin
864    
865 yoffy 1.15 Result := 0.5;
866 yoffy 1.9
867     end;
868    
869     var
870     n : Extended;
871     narray : array of Single;
872     mean : Extended;
873     countInfo : TWordCountInfo;
874     i : Integer;
875 yoffy 1.1 normal : Extended;
876     important : Extended;
877 yoffy 1.9 P1, Q1 : Extended;
878 yoffy 1.1 cnt : Extended;
879     begin
880    
881     if wordCount.Count = 0 then begin
882     Result := 1;
883     Exit;
884     end;
885    
886     SetLength( narray, wordCount.Count );
887     mean := 0;
888     for i := 0 to wordCount.Count - 1 do begin
889     n := p( wordCount[ i ] );
890     narray[ i ] := n;
891     mean := mean + n;
892     end;
893     mean := mean / wordCount.Count;
894    
895 yoffy 1.9 P1 := 1;
896     Q1 := 1;
897 yoffy 1.1 for i := 0 to wordCount.Count - 1 do begin
898     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
899     n := f( countInfo.WordCount, narray[ i ], mean );
900 yoffy 1.15 P1 := P1 * ( 1 - n );
901     Q1 := Q1 * n;
902 yoffy 1.1 end;
903 yoffy 1.15 cnt := wordCount.Count;
904 yoffy 1.1 if cnt = 0 then
905 yoffy 1.16 cnt := 1;
906 yoffy 1.15 P1 := Power( P1, 1 / cnt );
907     Q1 := Power( Q1, 1 / cnt );
908    
909     P1 := 1 - prbx( -2 * Ln( P1 ), 2 * cnt );
910     Q1 := 1 - prbx( -2 * Ln( Q1 ), 2 * cnt );
911    
912     Result := (1 + P1 - Q1) / 2;
913 yoffy 1.1
914     end;
915    
916     //==============================
917     // Parse
918     //==============================
919     function TGikoBayesian.Parse(
920     const text : string;
921     wordCount : TWordCount;
922 yoffy 1.9 algorithm : TGikoBayesianAlgorithm
923 yoffy 1.1 ) : Extended;
924     begin
925    
926     CountWord( text, wordCount );
927     case algorithm of
928     gbaPaulGraham: Result := CalcPaulGraham( wordCount );
929 yoffy 1.9 gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
930     gbaGaryRobinsonFisher:
931     Result := CalcGaryRobinsonFisher( wordCount );
932 yoffy 1.1 else Result := 0;
933     end;
934    
935     end;
936    
937     //==============================
938     // Learn
939     //==============================
940     procedure TGikoBayesian.Learn(
941     wordCount : TWordCount;
942     isImportant : Boolean );
943     var
944     aWord : string;
945     wordinfo : TWordInfo;
946     countinfo : TWordCountInfo;
947 yoffy 1.3 i : Integer;
948 yoffy 1.1 begin
949    
950     for i := 0 to wordCount.Count - 1 do begin
951     aWord := wordCount[ i ];
952     wordinfo := Objects[ aWord ];
953 yoffy 1.3 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
954 yoffy 1.1 if wordinfo = nil then begin
955     wordinfo := TWordInfo.Create;
956     Objects[ aWord ] := wordinfo;
957     end;
958    
959     if isImportant then begin
960     wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
961     wordinfo.ImportantText := wordinfo.ImportantText + 1;
962     end else begin
963     wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
964     wordinfo.NormalText := wordinfo.NormalText + 1;
965     end;
966     end;
967    
968     end;
969    
970     //==============================
971     // Forget
972     //==============================
973     procedure TGikoBayesian.Forget(
974     wordCount : TWordCount;
975     isImportant : Boolean );
976     var
977     aWord : string;
978     wordinfo : TWordInfo;
979     countinfo : TWordCountInfo;
980     i : Integer;
981     begin
982    
983     for i := 0 to wordCount.Count - 1 do begin
984     aWord := wordCount[ i ];
985     wordinfo := Objects[ aWord ];
986     if wordinfo = nil then
987     Continue;
988    
989     countinfo := TWordCountInfo( wordCount.Objects[ i ] );
990     if isImportant then begin
991 yoffy 1.8 if wordInfo.ImportantText > 0 then begin
992     wordinfo.ImportantText := wordinfo.ImportantText - 1;
993     wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
994     end;
995 yoffy 1.1 end else begin
996 yoffy 1.8 if wordinfo.NormalText > 0 then begin
997     wordinfo.NormalText := wordinfo.NormalText - 1;
998     wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
999     end;
1000 yoffy 1.1 end;
1001     end;
1002    
1003     end;
1004    
1005     end.

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26