Develop and Download Open Source Software

Browse CVS Repository

Annotation of /gikonavigoeson/gikonavi/GikoBayesian.pas

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.14 - (hide annotations) (download) (as text)
Mon Nov 1 09:51:57 2004 UTC (19 years, 5 months ago) by yoffy
Branch: MAIN
Changes since 1.13: +15 -2 lines
File MIME type: text/x-pascal
平仮名を辞書に含めないようになった。
(ソースの先頭にある GIKO_BAYESIAN_NO_HIRAGANA_DIC のコメントアウトで復活可能)

1 yoffy 1.1 unit GikoBayesian;
2    
3     {!
4     \file GikoBayesian.pas
5     \brief ???ゃ?吾?≪?潟???c????/span>
6    
7 yoffy 1.14 $Id: GikoBayesian.pas,v 1.13 2004/11/01 05:18:21 yoffy Exp $
8 yoffy 1.1 }
9    
10 yoffy 1.14 //! 綛割皿????莨??吾??????????
11     {$DEFINE GIKO_BAYESIAN_NO_HIRAGANA_DIC}
12    
13 yoffy 1.1 interface
14    
15     //==================================================
16     uses
17     //==================================================
18 yoffy 1.4 Classes;
19 yoffy 1.1
20     //==================================================
21     type
22     //==================================================
23    
24     {!***********************************************************
25     \brief ??茯???????????/span>
26     ************************************************************}
27     TWordInfo = class( TObject )
28     private
29     FNormalWord : Integer; //!< ??絽吾????茯????????糸?眼????????/span>
30     FImportantWord : Integer; //!< 羈?????茯????????糸?眼????????/span>
31     FNormalText : Integer; //!< ??絽吾????茯??????????障??????????腴?????/span>
32     FImportantText : Integer; //!< 羈?????茯??????????障??????????腴?????/span>
33    
34     public
35     property NormalWord : Integer read FNormalWord write FNormalWord;
36     property ImportantWord : Integer read FImportantWord write FImportantWord;
37     property NormalText : Integer read FNormalText write FNormalText;
38     property ImportantText : Integer read FImportantText write FImportantText;
39     end;
40    
41     {!***********************************************************
42     \brief 茹f??羝??水??茯???????????/span>
43     ************************************************************}
44     TWordCountInfo = class( TObject )
45     private
46     FWordCount : Integer; //!< ??茯???/span>
47    
48     public
49     property WordCount : Integer read FWordCount write FWordCount;
50     end;
51    
52     {!***********************************************************
53     \brief 茹f??羝??水??茯????鴻??
54     ************************************************************}
55     // TWordCount = class( THashedStringList ) // 羶???
56 yoffy 1.3 TWordCount = class( TStringList )
57 yoffy 1.1 public
58 yoffy 1.3 constructor Create;
59 yoffy 1.1 destructor Destroy; override;
60     end;
61    
62     {!***********************************************************
63     \brief ???c???帥?≪???眼???冴??
64     ************************************************************}
65     TGikoBayesianAlgorithm =
66 yoffy 1.9 (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
67 yoffy 1.1
68     {!***********************************************************
69     \brief ???ゃ?吾?≪?潟???c????/span>
70     ************************************************************}
71 yoffy 1.3 // TGikoBayesian = class( THashedStringList ) // 羶???
72     TGikoBayesian = class( TStringList )
73 yoffy 1.1 private
74     FFilePath : string; //!< 茯??粋昭???????<?ゃ??????/span>
75     function GetObject( const name : string ) : TWordInfo;
76     procedure SetObject( const name : string; value : TWordInfo );
77    
78     public
79     constructor Create;
80     destructor Destroy; override;
81    
82     //! ???<?ゃ??????絖??絮ユ???茯??水?冴???障??
83     procedure LoadFromFile( const filePath : string );
84    
85     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
86     procedure SaveToFile( const filePath : string );
87    
88     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
89     procedure Save;
90    
91     //! ??茯???????????宴????緇????障??
92     property Objects[ const name : string ] : TWordInfo
93     read GetObject write SetObject; default;
94    
95     //! ??腴??????障??????茯????????潟?????障??
96     procedure CountWord(
97     const text : string;
98     wordCount : TWordCount );
99    
100     {!
101     \brief Paul Graham 羈????冴?ャ??????腴???絵??墾??羆阪????障??
102     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
103     }
104     function CalcPaulGraham( wordCount : TWordCount ) : Extended;
105    
106     {!
107     \brief GaryRobinson 羈????冴?ャ??????腴???絵??墾??羆阪????障??
108     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
109     }
110     function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
111    
112 yoffy 1.9 {!
113     \brief GaryRobinson-Fisher 羈????冴?ャ??????腴???絵??墾??羆阪????障??
114     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
115     }
116     function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
117 yoffy 1.1
118     {!
119     \brief ??腴???茹f??
120     \param text 茹f????????腴?
121     \param wordCount 茹f??????????茯????鴻????菴???
122     \param algorithm 羈???墾??浦絎??????????≪???眼???冴??????絎????障??
123     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
124    
125     CountWord ? Calcxxxxx ???障???????茵??????????с????
126     }
127     function Parse(
128     const text : string;
129     wordCount : TWordCount;
130 yoffy 1.9 algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
131 yoffy 1.1 ) : Extended;
132    
133     {!
134     \brief 絖??????
135     \param wordCount Parse ?цВ??????????茯????鴻??
136     \param isImportant 羈??????鴻????腴???????????????? True
137     }
138     procedure Learn(
139     wordCount : TWordCount;
140     isImportant : Boolean );
141    
142     {!
143     \brief 絖??腟?????綽?????
144     \param wordCount Parse ?цВ??????????茯????鴻??
145     \param isImportant 羈??????鴻????腴???????????????????????? True
146     \warning 絖??羝??帥????腴???????????∈茯??堺?ャ?障??????<br>
147     Learn ????????????腴??? isImportant ???????c????????腴???
148     Forget ?????????若?帥???若?鴻???贋?????障????<br>
149     絖??羝??帥????????????????????????????????
150    
151     ???????膺?腟??????????≪????????с???????障??????<br>
152     wordCount ??緇?????腴? (Parse ? text 綣??? ???膺?腟??????帥?????≪???障????<br><br>
153    
154     筝祉??絵????腴?????羈?????腴????????帥????????? Forget -> Learn ?????т戎?????障????
155     }
156     procedure Forget(
157     wordCount : TWordCount;
158     isImportant : Boolean );
159     end;
160    
161     //==================================================
162     implementation
163     //==================================================
164    
165     uses
166 yoffy 1.12 SysUtils, Math, Windows,
167     MojuUtils;
168 yoffy 1.1
169     const
170     GIKO_BAYESIAN_FILE_VERSION = '1.0';
171 yoffy 1.3 {
172     Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
173     ModeWGraph, ModeWAlpha, ModeWNum,
174     ModeWHira, ModeWKata, ModeWKanji);
175     }
176     CharMode1 : array [ 0..255 ] of Byte =
177     (
178     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181     2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
182     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
183     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
184     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
185     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
186    
187     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189     0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
190     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
191     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
192 yoffy 1.6 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
193 yoffy 1.3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
194     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
195     );
196 yoffy 1.1
197     //************************************************************
198     // misc
199     //************************************************************
200    
201     //==============================
202     // RemoveToken
203     //==============================
204     function RemoveToken(var s: string;const delimiter: string): string;
205     var
206     p: Integer;
207     begin
208     p := AnsiPos(delimiter, s);
209     if p = 0 then
210     Result := s
211     else
212     Result := Copy(s, 1, p - 1);
213     s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
214     end;
215    
216     //==============================
217     // AbsSort
218     //==============================
219     function AbsSort( p1, p2 : Pointer ) : Integer;
220     var
221     v1, v2 : Single;
222     begin
223    
224     v1 := Abs( Single( p1 ) - 0.5 );
225     v2 := Abs( Single( p2 ) - 0.5 );
226     if v1 > v2 then
227     Result := -1
228     else if v1 = v2 then
229     Result := 0
230     else
231     Result := 1;
232    
233     end;
234    
235     //************************************************************
236     // TWordCount class
237     //************************************************************
238 yoffy 1.3 constructor TWordCount.Create;
239     begin
240    
241     Duplicates := dupIgnore;
242     CaseSensitive := True;
243     Sorted := True;
244    
245     end;
246    
247 yoffy 1.1 destructor TWordCount.Destroy;
248     var
249     i : Integer;
250     begin
251    
252     for i := Count - 1 downto 0 do
253     if Objects[ i ] <> nil then
254     Objects[ i ].Free;
255    
256     inherited;
257    
258     end;
259    
260     //************************************************************
261     // TGikoBayesian class
262     //************************************************************
263    
264     //==============================
265     // Create
266     //==============================
267     constructor TGikoBayesian.Create;
268     begin
269    
270 yoffy 1.3 Duplicates := dupIgnore;
271     CaseSensitive := True;
272     Sorted := True;
273 yoffy 1.1
274     end;
275    
276     //==============================
277     // Destroy
278     //==============================
279     destructor TGikoBayesian.Destroy;
280     var
281     i : Integer;
282     begin
283    
284     for i := Count - 1 downto 0 do
285     if inherited Objects[ i ] <> nil then
286     inherited Objects[ i ].Free;
287    
288     inherited;
289    
290     end;
291    
292     procedure TGikoBayesian.LoadFromFile( const filePath : string );
293     var
294     i : Integer;
295     sl : TStringList;
296     s : string;
297     name : string;
298     info : TWordInfo;
299     begin
300    
301 yoffy 1.2 FFilePath := filePath;
302    
303 yoffy 1.1 if not FileExists( filePath ) then
304     Exit;
305 yoffy 1.2
306 yoffy 1.1 sl := TStringList.Create;
307     try
308     sl.LoadFromFile( filePath );
309    
310     for i := 1 to sl.Count - 1 do begin
311     s := sl[ i ];
312     name := RemoveToken( s, #1 );
313     info := TWordInfo.Create;
314     info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
315     info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
316     info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
317     info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
318    
319     AddObject( name, info );
320     end;
321     finally
322     sl.Free;
323     end;
324    
325     end;
326    
327     procedure TGikoBayesian.SaveToFile( const filePath : string );
328     var
329     i : Integer;
330     sl : TStringList;
331     s : string;
332     info : TWordInfo;
333     begin
334    
335 yoffy 1.2 FFilePath := filePath;
336    
337 yoffy 1.1 sl := TStringList.Create;
338     try
339     sl.BeginUpdate;
340     sl.Add( GIKO_BAYESIAN_FILE_VERSION );
341    
342     for i := 0 to Count - 1 do begin
343     info := TWordInfo( inherited Objects[ i ] );
344     s := Strings[ i ] + #1
345     + Format('%x', [info.NormalWord]) + #1
346     + Format('%x', [info.ImportantWord]) + #1
347     + Format('%x', [info.NormalText]) + #1
348     + Format('%x', [info.ImportantText]);
349    
350     sl.Add(s);
351     end;
352     sl.EndUpdate;
353     sl.SaveToFile( filePath );
354     finally
355     sl.Free;
356     end;
357    
358     end;
359    
360     procedure TGikoBayesian.Save;
361     begin
362    
363     if FFilePath <> '' then
364     SaveToFile( FFilePath );
365    
366     end;
367    
368     //==============================
369     // GetObject
370     //==============================
371     function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
372     var
373     idx : Integer;
374     begin
375    
376 yoffy 1.11 if Find( name, idx ) then
377     Result := TWordInfo( inherited Objects[ idx ] )
378 yoffy 1.1 else
379 yoffy 1.11 Result := nil;
380 yoffy 1.1
381     end;
382    
383     //==============================
384     // SetObject
385     //==============================
386     procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
387     var
388     idx : Integer;
389     begin
390    
391 yoffy 1.11 if Find( name, idx ) then
392     inherited Objects[ idx ] := value
393 yoffy 1.1 else
394 yoffy 1.11 AddObject( name, value );
395 yoffy 1.1
396     end;
397    
398    
399     //==============================
400     // CountWord
401     //==============================
402     procedure TGikoBayesian.CountWord(
403     const text : string;
404     wordCount : TWordCount );
405     type
406 yoffy 1.12 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeNum, ModeHanKana,
407 yoffy 1.1 ModeWGraph, ModeWAlpha, ModeWNum,
408     ModeWHira, ModeWKata, ModeWKanji);
409     var
410 yoffy 1.12 p, tail, last : PChar;
411     mode, newMode : Modes;
412     ch : Longword;
413     chSize : Integer;
414     wHiraDelimiter : TStringList;
415     wHiraFinalDelimiter : TStringList;
416     wKanjiDelimiter : TStringList;
417     words : TStringList;
418     aWord : string;
419     countInfo : TWordCountInfo;
420    
421     function cutBoth( _aWord : string; _delim : TStringList ) : string;
422     var
423     _i : Integer;
424     begin
425     for _i := 0 to _delim.Count - 1 do begin
426     _aWord := CustomStringReplace(
427     _aWord,
428     _delim[ _i ],
429     #10 + _delim[ _i ] + #10, False );
430     end;
431     Result := _aWord;
432     end;
433    
434     function cutFirst( _aWord : string; _delim : TStringList ) : string;
435     var
436     _i : Integer;
437     begin
438     for _i := 0 to _delim.Count - 1 do begin
439     _aWord := CustomStringReplace(
440     _aWord,
441     _delim[ _i ],
442     #10 + _delim[ _i ], False );
443     end;
444     Result := _aWord;
445     end;
446    
447     function cutFinal( _aWord : string; _delim : TStringList ) : string;
448     var
449     _i : Integer;
450     begin
451     for _i := 0 to _delim.Count - 1 do begin
452     _aWord := CustomStringReplace(
453     _aWord,
454     _delim[ _i ],
455     _delim[ _i ] + #10, False );
456     end;
457     Result := _aWord;
458     end;
459    
460     procedure addWord( _dst : TWordCount; _words : TStringList );
461     var
462     _aWord : string;
463     _i, _idx : Integer;
464     _countInfo : TWordCountInfo;
465     begin
466     for _i := 0 to _words.Count - 1 do begin
467     _aWord := _words[ _i ];
468     if Length( _aWord ) > 0 then begin
469     if _dst.Find( _aWord, _idx ) then begin
470     _countInfo := TWordCountInfo( _dst.Objects[ _idx ] );
471     end else begin
472     _countInfo := TWordCountInfo.Create;
473     _dst.AddObject( _aWord, _countInfo );
474     end;
475     _countInfo.WordCount := _countInfo.WordCount + 1;
476     end;
477     end;
478     end;
479    
480     function changeMode( _aWord : string; _mode : Modes ) : string;
481     var
482     _i : Integer;
483     _aWord2 : string;
484     _pWord, _pWord2 : PChar;
485     _pWordTail, _pFound : PChar;
486     const
487     _delim : string = #10;
488     begin
489 yoffy 1.14 {$IFDEF GIKO_BAYESIAN_NO_HIRAGANA_DIC}
490     if mode = ModeWHira then begin
491     Result := '';
492     Exit;
493     end;
494     {$ENDIF}
495 yoffy 1.12 if Ord( _mode ) >= Ord( ModeWGraph ) then begin
496     // ?ユ???
497     // ?鴻???若?鴻??荅違????
498     _aWord := CustomStringReplace( _aWord, ' ', '', False );
499     _aWord := CustomStringReplace( _aWord, '??', '', False );
500    
501     // ???????帥?у??茯?????
502     case mode of
503     ModeWHira:
504     begin
505     _aWord := cutFinal( _aWord, wHiraFinalDelimiter );
506     Result := cutBoth( _aWord, wHiraDelimiter );
507     end;
508    
509     ModeWKanji:
510     begin
511     // ???????帥?у??茯?????
512     _aWord := cutBoth( _aWord, wKanjiDelimiter );
513     // 4 byte (2 絖?) ???ゃ?у??茯?????
514     _pWord := PChar( _aWord );
515     _i := Length( _aWord );
516     _pWordTail := _pWord + _i;
517     SetLength( _aWord2, _i + (_i shr 2) );
518     _pWord2 := PChar( _aWord2 );
519    
520     while _pWord < _pWordTail do begin
521     _pFound := AnsiStrPos( _pWord, PChar( _delim ) );
522     if _pFound = nil then
523     _pFound := _pWordTail;
524     _pFound := _pFound - 3;
525    
526     while _pWord <= _pFound do begin
527     CopyMemory( _pWord2, _pWord, 4 ); _pWord2[ 4 ] := #10;
528     _pWord2 := _pWord2 + 5; _pWord := _pWord + 4;
529     end;
530     _i := _pFound + 4 - _pWord; // 4 = 3 + #10
531     CopyMemory( _pWord2, _pWord, _i );
532     _pWord2 := _pWord2 + _i; _pWord := _pWord + _i;
533     end;
534     if _pWord < _pWordTail then begin
535     _i := _pWordTail - _pWord;
536     CopyMemory( _pWord2, _pWord, _i );
537     _pWord2 := _pWord2 + _i;
538     end;
539     SetLength( _aWord2, _pWord2 - PChar( _aWord2 ) );
540    
541     Result := _aWord2;
542     end;
543    
544     else
545     Result := _aWord;
546     end;
547     end else begin
548     Result := _aWord;
549     end;
550     end;
551 yoffy 1.1 const
552 yoffy 1.12 WHIRA_DELIMITER = '??' + #10 + '??#39; + #10 + '??' + #10 + '??#39; + #10 + '????'
553     + #10 + '??#39; + #10 + '????' + #10 + '?障??#39;+ #10 + '??#39;
554     + #10 + '????' + #10 + '????' + #10 + '????'
555 yoffy 1.11 + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
556     + #10 + '????#39; + #10 + '????#39; + #10 + '????#39; + #10 + '????#39;
557     + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
558     + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39;
559 yoffy 1.12 + #10 + '????' + #10 + '????#39; + #10 + '????' + #10 + '????'
560     + #10 + '????' + #10 + '??????'
561     + #10 + '?с??' + #10 + '?障??' + #10 + '?障????'
562     + #10 + '?с????' + #10 + '?障????'
563     + #10 + '????' + #10 + '??????' + #10 + '??????' + #10 + '????????'
564     ;
565     WKANJI_DELIMITER = '??' + #10 + '??#39; + #10 + '綣?39; + #10 + '??' + #10 + '羈?'
566     + #10 + '筝?' + #10 + '??#39; + #10 + '??' + #10 + '??'
567     ;
568     WHIRA_FINAL_DELIMITER = '?c??' + #10 + '?c??#39;
569     ;{
570     + #10 + '???c??#39; + #10 + '???????c??#39; + #10 + '??????#39;
571     + #10 + '??????' + #10 + '?с??????'
572 yoffy 1.11 + #10 + '?障??'
573     + #10 + '??????' + #10 + '????' + #10 + '????#39; + #10 + '??????#39;
574     + #10 + '??????' + #10 + '???c?宴??'
575 yoffy 1.12 + #10 + '?с??' + #10 + '????'
576 yoffy 1.11 + #10 + '????' + #10 + '??????' + #10 + '????' + #10 + '??????'
577     ;}
578 yoffy 1.12 // '??#39; ?? '??????????' ????
579     HA_LINE = '?????????????障????????????違?宴????#39;;
580     HI_LINE = '???????<???蚊?帥?????????潟?眼??';
581     HU_LINE = '??????ゃ???泣?????????吟?激??';
582     HE_LINE = '???????????吾?????????鴻?冴??';
583     HO_LINE = '???????????祉???????????若?純??';
584     KA_LINE = '?≪???泣?帥??????ゃ???????吟???????<?泣??#39;;
585     KI_LINE = '?ゃ???激???????????違???吾??????#39;;
586     KU_LINE = '?????鴻?????????????違?????ャ??#39;;
587     KE_LINE = '???宴?祉???????<???宴?蚊?????с??#39;;
588     KO_LINE = '???潟?純???????≪?????蚊?眼??????#39;;
589 yoffy 1.5 kKanji = [$80..$A0, $E0..$ff];
590 yoffy 1.1 begin
591    
592 yoffy 1.12 wHiraDelimiter := TStringList.Create;
593     wHiraFinalDelimiter := TStringList.Create;
594     wKanjiDelimiter := TStringList.Create;
595     words := TStringList.Create;
596 yoffy 1.1 try
597     mode := ModeWhite;
598 yoffy 1.14 {$IFNDEF GIKO_BAYESIAN_NO_HIRAGANA_DIC}
599 yoffy 1.12 wHiraDelimiter.Text := WHIRA_DELIMITER;
600     wHiraFinalDelimiter.Text := WHIRA_FINAL_DELIMITER;
601 yoffy 1.14 {$ENDIF}
602 yoffy 1.12 wKanjiDelimiter.Text := WKANJI_DELIMITER;
603 yoffy 1.1 p := PChar( text );
604     tail := p + Length( text );
605     last := p;
606    
607     while p < tail do begin
608     // ??絖????帥?ゃ?????ゅ??/span>
609     // ?糸???鴻? ModeGraph ?????????у???ャ???綽??????????????
610 yoffy 1.5 // if Byte(Byte( p^ ) - $a1) < $5e then begin
611     if Byte( p^ ) in kKanji then begin
612 yoffy 1.1 if p + 1 < tail then begin
613     ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
614     case ch of
615 yoffy 1.12 // ?鴻???若?鴻?у??茯????????????????
616     //$8140: newMode := ModeWhite;
617 yoffy 1.1 $8141..$824e: newMode := ModeWGraph;
618     $824f..$8258: newMode := ModeWNum;
619     $8260..$829a: newMode := ModeWAlpha;
620     $829f..$82f1: newMode := ModeWHira;
621     $8340..$8396: newMode := ModeWKata;
622     else newMode := ModeWKanji;
623     end;
624 yoffy 1.6 // '??????#39; ??抗篁??????障???????帥?????????障????
625     if (mode = ModeWHira) or (mode = ModeWKata) then
626 yoffy 1.7 if (ch = $814a) or (ch = $814b) or (ch = $815b) then
627 yoffy 1.6 newMode := mode;
628 yoffy 1.1 end else begin
629     newMode := ModeWhite;
630     end;
631    
632     chSize := 2;
633     end else begin
634 yoffy 1.3 newMode := Modes( CharMode1[ Byte( p^ ) ] );
635 yoffy 1.12 if (p^ = ' ') and (Ord( mode ) >= Ord( ModeWGraph )) then begin
636     // 篁??障?ф?ユ????т??鴻???若??/span>
637     // ??茯???膵???????с?鴻???若?鴻??荅違????
638     // ?糸??茹?????????絽吾?鴻???若?鴻?у?阪??????????????荅違??????
639     newMode := mode;
640     end;
641 yoffy 1.1
642     chSize := 1;
643     end;
644    
645 yoffy 1.12 if mode <> newMode then begin
646 yoffy 1.1
647     // ??絖????帥?ゃ????紊??眼??????
648 yoffy 1.13 if mode <> ModeWhite then begin
649     SetLength( aWord, p - last );
650     CopyMemory( PChar( aWord ), last, p - last );
651 yoffy 1.11
652 yoffy 1.13 words.Text := changeMode( aWord, mode );
653 yoffy 1.11
654 yoffy 1.13 // ??茯??脂??/span>
655     addWord( wordCount, words );
656     end;
657 yoffy 1.1
658 yoffy 1.12 last := p;
659 yoffy 1.1 mode := newMode;
660    
661     end;
662    
663     p := p + chSize;
664     end; // while
665    
666     if mode <> ModeWhite then begin
667 yoffy 1.14 SetLength( aWord, p - last );
668     CopyMemory( PChar( aWord ), last, p - last );
669    
670 yoffy 1.12 words.Text := changeMode( aWord, mode );
671    
672     // ??茯??脂??/span>
673     addWord( wordCount, words );
674 yoffy 1.1 end;
675     finally
676 yoffy 1.12 words.Free;
677     wKanjiDelimiter.Free;
678     wHiraFinalDelimiter.Free;
679     wHiraDelimiter.Free;
680 yoffy 1.1 end;
681    
682     end;
683    
684     //==============================
685     // CalcPaulGraham
686     //==============================
687     function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
688    
689     function p( const aWord : string ) : Single;
690     var
691     info : TWordInfo;
692     begin
693     info := Objects[ aWord ];
694     if info = nil then
695 yoffy 1.9 Result := 0.415
696 yoffy 1.1 else if info.NormalWord = 0 then
697     Result := 0.99
698     else if info.ImportantWord = 0 then
699     Result := 0.01
700 yoffy 1.9 else if info.ImportantWord + info.NormalWord * 2 < 5 then
701     Result := 0.5
702 yoffy 1.1 else
703     Result := ( info.ImportantWord / info.ImportantText ) /
704     ((info.NormalWord * 2 / info.NormalText ) +
705     (info.ImportantWord / info.ImportantText));
706     end;
707    
708     var
709     s, q : Extended;
710     i : Integer;
711     narray : TList;
712     const
713     SAMPLE_COUNT = 15;
714     begin
715    
716     Result := 1;
717     if wordCount.Count = 0 then
718     Exit;
719    
720     narray := TList.Create;
721     try
722     for i := 0 to wordCount.Count - 1 do begin
723     narray.Add( Pointer( p( wordCount[ i ] ) ) );
724     end;
725    
726     narray.Sort( AbsSort );
727    
728     s := 1;
729     q := 1;
730     i := min( SAMPLE_COUNT, narray.Count );
731     while i > 0 do begin
732     Dec( i );
733 yoffy 1.9
734 yoffy 1.1 s := s * Single( narray[ i ] );
735     q := q * (1 - Single( narray[ i ] ));
736     end;
737    
738     Result := s / (s + q);
739     finally
740     narray.Free;
741     end;
742    
743     end;
744    
745     //==============================
746     // CalcGaryRobinson
747     //==============================
748     function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
749    
750     function p( const aWord : string ) : Single;
751     var
752     info : TWordInfo;
753     begin
754     info := Objects[ aWord ];
755     if info = nil then
756     Result := 0.415
757     else if info.ImportantWord = 0 then
758 yoffy 1.9 Result := 0.01
759 yoffy 1.1 else if info.NormalWord = 0 then
760 yoffy 1.9 Result := 0.99
761 yoffy 1.1 else
762     Result := ( info.ImportantWord / info.ImportantText ) /
763     ((info.NormalWord / info.NormalText ) +
764     (info.ImportantWord / info.ImportantText));
765     end;
766    
767     function f( cnt : Integer; n, mean : Single ) : Extended;
768     const
769     k = 0.00001;
770     begin
771     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
772     end;
773    
774     var
775     n : Extended;
776     narray : array of Single;
777     mean : Extended;
778     countInfo : TWordCountInfo;
779     i : Integer;
780 yoffy 1.9 P1, Q1, R1 : Extended;
781     cnt : Extended;
782     begin
783    
784     if wordCount.Count = 0 then begin
785     Result := 1;
786     Exit;
787     end;
788    
789     SetLength( narray, wordCount.Count );
790     mean := 0;
791     for i := 0 to wordCount.Count - 1 do begin
792     n := p( wordCount[ i ] );
793     narray[ i ] := n;
794     mean := mean + n;
795     end;
796     mean := mean / wordCount.Count;
797    
798 yoffy 1.10 P1 := 1;
799     Q1 := 1;
800 yoffy 1.9 for i := 0 to wordCount.Count - 1 do begin
801     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
802     n := f( countInfo.WordCount, narray[ i ], mean );
803 yoffy 1.10 P1 := P1 * ( 1 - n );
804     Q1 := Q1 * n;
805 yoffy 1.9 end;
806 yoffy 1.10 cnt := wordCount.Count;
807 yoffy 1.9 if cnt = 0 then
808 yoffy 1.10 cnt := 1
809     else
810     P1 := 1 - Power( P1, 1 / cnt );
811     Q1 := 1 - Power( Q1, 1 / cnt );
812 yoffy 1.9
813     if P1 + Q1 = 0 then begin
814     Result := 0.5
815     end else begin
816     n := (P1 - Q1) / (P1 + Q1);
817     Result := (1 + n) / 2;
818     end;
819    
820     end;
821    
822     //==============================
823     // CalcGaryRobinsonFisher
824     //==============================
825     function TGikoBayesian.CalcGaryRobinsonFisher(
826     wordCount : TWordCount
827     ) : Extended;
828    
829     function p( const aWord : string ) : Single;
830     var
831     info : TWordInfo;
832     begin
833     info := Objects[ aWord ];
834     if info = nil then
835     Result := 0.415
836     else if info.ImportantWord = 0 then
837     Result := 0.01
838     else if info.NormalWord = 0 then
839     Result := 0.99
840     else
841     Result := info.ImportantWord /
842     (info.ImportantWord + info.NormalWord *
843     info.ImportantText / info.NormalText);
844     end;
845    
846     function f( cnt : Integer; n, mean : Single ) : Extended;
847     const
848     k = 0.00001;
849     begin
850     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
851     end;
852    
853     function prbx( x2, degree : Extended ) : Extended;
854     var
855     m : Extended;
856     sum : Extended;
857     term : Extended;
858     i : extended;
859     begin
860    
861     m := x2 / 2;
862     sum := exp( -m );
863     term := -m;
864    
865     i := 1;
866     while i < (degree / 2 - 1) do begin
867     term := term + ln( m / i );
868     sum := sum + exp( term );
869     i := i + 1;
870     end;
871    
872     if sum < 1 then
873     Result := sum
874     else
875     Result := 1.0;
876    
877     end;
878    
879     var
880     n : Extended;
881     narray : array of Single;
882     mean : Extended;
883     countInfo : TWordCountInfo;
884     i : Integer;
885 yoffy 1.1 normal : Extended;
886     important : Extended;
887 yoffy 1.9 P1, Q1 : Extended;
888 yoffy 1.1 cnt : Extended;
889     begin
890    
891     if wordCount.Count = 0 then begin
892     Result := 1;
893     Exit;
894     end;
895    
896     SetLength( narray, wordCount.Count );
897     mean := 0;
898     for i := 0 to wordCount.Count - 1 do begin
899     n := p( wordCount[ i ] );
900     narray[ i ] := n;
901     mean := mean + n;
902     end;
903     mean := mean / wordCount.Count;
904    
905 yoffy 1.9 cnt := 0;
906     (*
907     P1 := 1;
908     Q1 := 1;
909     (*)
910     P1 := 0;
911     Q1 := 0;
912     //*
913 yoffy 1.1 for i := 0 to wordCount.Count - 1 do begin
914     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
915     n := f( countInfo.WordCount, narray[ i ], mean );
916     if countInfo <> nil then
917 yoffy 1.9 cnt := cnt + countInfo.WordCount;
918     (*
919     P1 := P1 + Ln( 1 - n ) * countInfo.WordCount;
920     Q1 := Q1 + Ln( n ) * countInfo.WordCount;
921     (*)
922     P1 := P1 + Ln( 1 - n );
923     Q1 := Q1 + Ln( n );
924     //*)
925 yoffy 1.1 end;
926     if cnt = 0 then
927     cnt := 1;
928 yoffy 1.9 //(*
929     P1 := prbx( -2 * P1, 2 * cnt );
930     Q1 := prbx( -2 * Q1, 2 * cnt );
931     (*)
932     P1 := prbx( -2 * Ln( P1 ), 2 * cnt );
933     Q1 := prbx( -2 * Ln( Q1 ), 2 * cnt );
934     //*)
935     if P1 + Q1 = 0 then begin
936     Result := 0.5
937     end else begin
938     Result := (1 + Q1 + P1) / 2;
939     end;
940 yoffy 1.1
941     end;
942    
943     //==============================
944     // Parse
945     //==============================
946     function TGikoBayesian.Parse(
947     const text : string;
948     wordCount : TWordCount;
949 yoffy 1.9 algorithm : TGikoBayesianAlgorithm
950 yoffy 1.1 ) : Extended;
951     begin
952    
953     CountWord( text, wordCount );
954     case algorithm of
955     gbaPaulGraham: Result := CalcPaulGraham( wordCount );
956 yoffy 1.9 gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
957     gbaGaryRobinsonFisher:
958     Result := CalcGaryRobinsonFisher( wordCount );
959 yoffy 1.1 else Result := 0;
960     end;
961    
962     end;
963    
964     //==============================
965     // Learn
966     //==============================
967     procedure TGikoBayesian.Learn(
968     wordCount : TWordCount;
969     isImportant : Boolean );
970     var
971     aWord : string;
972     wordinfo : TWordInfo;
973     countinfo : TWordCountInfo;
974 yoffy 1.3 i : Integer;
975 yoffy 1.1 begin
976    
977     for i := 0 to wordCount.Count - 1 do begin
978     aWord := wordCount[ i ];
979     wordinfo := Objects[ aWord ];
980 yoffy 1.3 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
981 yoffy 1.1 if wordinfo = nil then begin
982     wordinfo := TWordInfo.Create;
983     Objects[ aWord ] := wordinfo;
984     end;
985    
986     if isImportant then begin
987     wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
988     wordinfo.ImportantText := wordinfo.ImportantText + 1;
989     end else begin
990     wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
991     wordinfo.NormalText := wordinfo.NormalText + 1;
992     end;
993     end;
994    
995     end;
996    
997     //==============================
998     // Forget
999     //==============================
1000     procedure TGikoBayesian.Forget(
1001     wordCount : TWordCount;
1002     isImportant : Boolean );
1003     var
1004     aWord : string;
1005     wordinfo : TWordInfo;
1006     countinfo : TWordCountInfo;
1007     i : Integer;
1008     begin
1009    
1010     for i := 0 to wordCount.Count - 1 do begin
1011     aWord := wordCount[ i ];
1012     wordinfo := Objects[ aWord ];
1013     if wordinfo = nil then
1014     Continue;
1015    
1016     countinfo := TWordCountInfo( wordCount.Objects[ i ] );
1017     if isImportant then begin
1018 yoffy 1.8 if wordInfo.ImportantText > 0 then begin
1019     wordinfo.ImportantText := wordinfo.ImportantText - 1;
1020     wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
1021     end;
1022 yoffy 1.1 end else begin
1023 yoffy 1.8 if wordinfo.NormalText > 0 then begin
1024     wordinfo.NormalText := wordinfo.NormalText - 1;
1025     wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
1026     end;
1027 yoffy 1.1 end;
1028     end;
1029    
1030     end;
1031    
1032     end.

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26