Develop and Download Open Source Software

Browse CVS Repository

Annotation of /gikonavigoeson/gikonavi/GikoBayesian.pas

Parent Directory Parent Directory | Revision Log Revision Log | View Revision Graph Revision Graph


Revision 1.17 - (hide annotations) (download) (as text)
Fri Nov 5 14:24:26 2004 UTC (19 years, 5 months ago) by h677
Branch: MAIN
CVS Tags: v1_50_0_581, v1_50_0_580, v1_50_0_587, v1_50_0_586, v1_50_0_595, v1_50_0_572, v1_50_0_590, bv1_49_0_564, v1_50_0_593, v1_50_0_596, v1_50_0_585, v1_50_0_582, merged-remodeling, v1_50_0_577, v1_50_0_588, bv1_49_0_565, v1_50_0_574, v1_50_0_584, v_step1, v1_50_0_573, v1_50_0_571, v1_50_0_576, v1_50_0_578, v1_50_0_579, root-of-Bb50, root-of-remodel, v1_50_0_594
Branch point for: remodeling, Bb50
Changes since 1.16: +10 -5 lines
File MIME type: text/x-pascal
0除算例外が出ても平気にした。

1 yoffy 1.1 unit GikoBayesian;
2    
3     {!
4     \file GikoBayesian.pas
5     \brief ???ゃ?吾?≪?潟???c????/span>
6    
7 h677 1.17 $Id: GikoBayesian.pas,v 1.16 2004/11/01 10:32:02 yoffy Exp $
8 yoffy 1.1 }
9    
10 yoffy 1.14 //! 綛割皿????莨??吾??????????
11     {$DEFINE GIKO_BAYESIAN_NO_HIRAGANA_DIC}
12    
13 yoffy 1.1 interface
14    
15     //==================================================
16     uses
17     //==================================================
18 yoffy 1.4 Classes;
19 yoffy 1.1
20     //==================================================
21     type
22     //==================================================
23    
24     {!***********************************************************
25     \brief ??茯???????????/span>
26     ************************************************************}
27     TWordInfo = class( TObject )
28     private
29     FNormalWord : Integer; //!< ??絽吾????茯????????糸?眼????????/span>
30     FImportantWord : Integer; //!< 羈?????茯????????糸?眼????????/span>
31     FNormalText : Integer; //!< ??絽吾????茯??????????障??????????腴?????/span>
32     FImportantText : Integer; //!< 羈?????茯??????????障??????????腴?????/span>
33    
34     public
35     property NormalWord : Integer read FNormalWord write FNormalWord;
36     property ImportantWord : Integer read FImportantWord write FImportantWord;
37     property NormalText : Integer read FNormalText write FNormalText;
38     property ImportantText : Integer read FImportantText write FImportantText;
39     end;
40    
41     {!***********************************************************
42     \brief 茹f??羝??水??茯???????????/span>
43     ************************************************************}
44     TWordCountInfo = class( TObject )
45     private
46     FWordCount : Integer; //!< ??茯???/span>
47    
48     public
49     property WordCount : Integer read FWordCount write FWordCount;
50     end;
51    
52     {!***********************************************************
53     \brief 茹f??羝??水??茯????鴻??
54     ************************************************************}
55     // TWordCount = class( THashedStringList ) // 羶???
56 yoffy 1.3 TWordCount = class( TStringList )
57 yoffy 1.1 public
58 yoffy 1.3 constructor Create;
59 yoffy 1.1 destructor Destroy; override;
60     end;
61    
62     {!***********************************************************
63     \brief ???c???帥?≪???眼???冴??
64     ************************************************************}
65     TGikoBayesianAlgorithm =
66 yoffy 1.9 (gbaPaulGraham, gbaGaryRobinson, gbaGaryRobinsonFisher);
67 yoffy 1.1
68     {!***********************************************************
69     \brief ???ゃ?吾?≪?潟???c????/span>
70     ************************************************************}
71 yoffy 1.3 // TGikoBayesian = class( THashedStringList ) // 羶???
72     TGikoBayesian = class( TStringList )
73 yoffy 1.1 private
74     FFilePath : string; //!< 茯??粋昭???????<?ゃ??????/span>
75     function GetObject( const name : string ) : TWordInfo;
76     procedure SetObject( const name : string; value : TWordInfo );
77    
78     public
79     constructor Create;
80     destructor Destroy; override;
81    
82     //! ???<?ゃ??????絖??絮ユ???茯??水?冴???障??
83     procedure LoadFromFile( const filePath : string );
84    
85     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
86     procedure SaveToFile( const filePath : string );
87    
88     //! ???<?ゃ?????膺?絮ユ???篆?絖????障??
89     procedure Save;
90    
91     //! ??茯???????????宴????緇????障??
92     property Objects[ const name : string ] : TWordInfo
93     read GetObject write SetObject; default;
94    
95     //! ??腴??????障??????茯????????潟?????障??
96     procedure CountWord(
97     const text : string;
98     wordCount : TWordCount );
99    
100     {!
101     \brief Paul Graham 羈????冴?ャ??????腴???絵??墾??羆阪????障??
102     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
103     }
104     function CalcPaulGraham( wordCount : TWordCount ) : Extended;
105    
106     {!
107     \brief GaryRobinson 羈????冴?ャ??????腴???絵??墾??羆阪????障??
108     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
109     }
110     function CalcGaryRobinson( wordCount : TWordCount ) : Extended;
111    
112 yoffy 1.9 {!
113     \brief GaryRobinson-Fisher 羈????冴?ャ??????腴???絵??墾??羆阪????障??
114     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
115     }
116     function CalcGaryRobinsonFisher( wordCount : TWordCount ) : Extended;
117 yoffy 1.1
118     {!
119     \brief ??腴???茹f??
120     \param text 茹f????????腴?
121     \param wordCount 茹f??????????茯????鴻????菴???
122     \param algorithm 羈???墾??浦絎??????????≪???眼???冴??????絎????障??
123     \return ??腴???絵??墾 (羈??????ゃ?????? 0.0??1.0 羈??????鴻??)
124    
125     CountWord ? Calcxxxxx ???障???????茵??????????с????
126     }
127     function Parse(
128     const text : string;
129     wordCount : TWordCount;
130 yoffy 1.9 algorithm : TGikoBayesianAlgorithm = gbaGaryRobinsonFisher
131 yoffy 1.1 ) : Extended;
132    
133     {!
134     \brief 絖??????
135     \param wordCount Parse ?цВ??????????茯????鴻??
136     \param isImportant 羈??????鴻????腴???????????????? True
137     }
138     procedure Learn(
139     wordCount : TWordCount;
140     isImportant : Boolean );
141    
142     {!
143     \brief 絖??腟?????綽?????
144     \param wordCount Parse ?цВ??????????茯????鴻??
145     \param isImportant 羈??????鴻????腴???????????????????????? True
146     \warning 絖??羝??帥????腴???????????∈茯??堺?ャ?障??????<br>
147     Learn ????????????腴??? isImportant ???????c????????腴???
148     Forget ?????????若?帥???若?鴻???贋?????障????<br>
149     絖??羝??帥????????????????????????????????
150    
151     ???????膺?腟??????????≪????????с???????障??????<br>
152     wordCount ??緇?????腴? (Parse ? text 綣??? ???膺?腟??????帥?????≪???障????<br><br>
153    
154     筝祉??絵????腴?????羈?????腴????????帥????????? Forget -> Learn ?????т戎?????障????
155     }
156     procedure Forget(
157     wordCount : TWordCount;
158     isImportant : Boolean );
159     end;
160    
161     //==================================================
162     implementation
163     //==================================================
164    
165     uses
166 yoffy 1.12 SysUtils, Math, Windows,
167     MojuUtils;
168 yoffy 1.1
169     const
170     GIKO_BAYESIAN_FILE_VERSION = '1.0';
171 yoffy 1.3 {
172     Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeHanKana, ModeNum,
173     ModeWGraph, ModeWAlpha, ModeWNum,
174     ModeWHira, ModeWKata, ModeWKanji);
175     }
176     CharMode1 : array [ 0..255 ] of Byte =
177     (
178     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181     2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
182     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
183     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 1,
184     1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
185     3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
186    
187     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
188     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189     0, 1, 1, 1, 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
190     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
191     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
192 yoffy 1.6 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
193 yoffy 1.3 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
194     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
195     );
196 yoffy 1.1
197     //************************************************************
198     // misc
199     //************************************************************
200    
201     //==============================
202     // RemoveToken
203     //==============================
204     function RemoveToken(var s: string;const delimiter: string): string;
205     var
206     p: Integer;
207     begin
208     p := AnsiPos(delimiter, s);
209     if p = 0 then
210     Result := s
211     else
212     Result := Copy(s, 1, p - 1);
213     s := Copy(s, Length(Result) + Length(delimiter) + 1, Length(s));
214     end;
215    
216     //==============================
217     // AbsSort
218     //==============================
219     function AbsSort( p1, p2 : Pointer ) : Integer;
220     var
221     v1, v2 : Single;
222     begin
223    
224     v1 := Abs( Single( p1 ) - 0.5 );
225     v2 := Abs( Single( p2 ) - 0.5 );
226     if v1 > v2 then
227     Result := -1
228     else if v1 = v2 then
229     Result := 0
230     else
231     Result := 1;
232    
233     end;
234    
235     //************************************************************
236     // TWordCount class
237     //************************************************************
238 yoffy 1.3 constructor TWordCount.Create;
239     begin
240    
241     Duplicates := dupIgnore;
242     CaseSensitive := True;
243     Sorted := True;
244    
245     end;
246    
247 yoffy 1.1 destructor TWordCount.Destroy;
248     var
249     i : Integer;
250     begin
251    
252     for i := Count - 1 downto 0 do
253     if Objects[ i ] <> nil then
254     Objects[ i ].Free;
255    
256     inherited;
257    
258     end;
259    
260     //************************************************************
261     // TGikoBayesian class
262     //************************************************************
263    
264     //==============================
265     // Create
266     //==============================
267     constructor TGikoBayesian.Create;
268     begin
269    
270 yoffy 1.3 Duplicates := dupIgnore;
271     CaseSensitive := True;
272     Sorted := True;
273 yoffy 1.1
274     end;
275    
276     //==============================
277     // Destroy
278     //==============================
279     destructor TGikoBayesian.Destroy;
280     var
281     i : Integer;
282     begin
283    
284     for i := Count - 1 downto 0 do
285     if inherited Objects[ i ] <> nil then
286     inherited Objects[ i ].Free;
287    
288     inherited;
289    
290     end;
291    
292     procedure TGikoBayesian.LoadFromFile( const filePath : string );
293     var
294     i : Integer;
295     sl : TStringList;
296     s : string;
297     name : string;
298     info : TWordInfo;
299     begin
300    
301 yoffy 1.2 FFilePath := filePath;
302    
303 yoffy 1.1 if not FileExists( filePath ) then
304     Exit;
305 yoffy 1.2
306 yoffy 1.1 sl := TStringList.Create;
307     try
308     sl.LoadFromFile( filePath );
309    
310     for i := 1 to sl.Count - 1 do begin
311     s := sl[ i ];
312     name := RemoveToken( s, #1 );
313     info := TWordInfo.Create;
314     info.NormalWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
315     info.ImportantWord := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
316     info.NormalText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
317     info.ImportantText := StrToIntDef( '$' + RemoveToken( s, #1 ), 0 );
318    
319     AddObject( name, info );
320     end;
321     finally
322     sl.Free;
323     end;
324    
325     end;
326    
327     procedure TGikoBayesian.SaveToFile( const filePath : string );
328     var
329     i : Integer;
330     sl : TStringList;
331     s : string;
332     info : TWordInfo;
333     begin
334    
335 yoffy 1.2 FFilePath := filePath;
336    
337 yoffy 1.1 sl := TStringList.Create;
338     try
339     sl.BeginUpdate;
340     sl.Add( GIKO_BAYESIAN_FILE_VERSION );
341    
342     for i := 0 to Count - 1 do begin
343     info := TWordInfo( inherited Objects[ i ] );
344     s := Strings[ i ] + #1
345     + Format('%x', [info.NormalWord]) + #1
346     + Format('%x', [info.ImportantWord]) + #1
347     + Format('%x', [info.NormalText]) + #1
348     + Format('%x', [info.ImportantText]);
349    
350     sl.Add(s);
351     end;
352     sl.EndUpdate;
353     sl.SaveToFile( filePath );
354     finally
355     sl.Free;
356     end;
357    
358     end;
359    
360     procedure TGikoBayesian.Save;
361     begin
362    
363     if FFilePath <> '' then
364     SaveToFile( FFilePath );
365    
366     end;
367    
368     //==============================
369     // GetObject
370     //==============================
371     function TGikoBayesian.GetObject( const name : string ) : TWordInfo;
372     var
373     idx : Integer;
374     begin
375    
376 yoffy 1.11 if Find( name, idx ) then
377     Result := TWordInfo( inherited Objects[ idx ] )
378 yoffy 1.1 else
379 yoffy 1.11 Result := nil;
380 yoffy 1.1
381     end;
382    
383     //==============================
384     // SetObject
385     //==============================
386     procedure TGikoBayesian.SetObject( const name : string; value : TWordInfo );
387     var
388     idx : Integer;
389     begin
390    
391 yoffy 1.11 if Find( name, idx ) then
392     inherited Objects[ idx ] := value
393 yoffy 1.1 else
394 yoffy 1.11 AddObject( name, value );
395 yoffy 1.1
396     end;
397    
398    
399     //==============================
400     // CountWord
401     //==============================
402     procedure TGikoBayesian.CountWord(
403     const text : string;
404     wordCount : TWordCount );
405     type
406 yoffy 1.12 Modes = (ModeWhite, ModeGraph, ModeAlpha, ModeNum, ModeHanKana,
407 yoffy 1.1 ModeWGraph, ModeWAlpha, ModeWNum,
408     ModeWHira, ModeWKata, ModeWKanji);
409     var
410 yoffy 1.12 p, tail, last : PChar;
411     mode, newMode : Modes;
412     ch : Longword;
413     chSize : Integer;
414     wHiraDelimiter : TStringList;
415     wHiraFinalDelimiter : TStringList;
416     wKanjiDelimiter : TStringList;
417     words : TStringList;
418     aWord : string;
419     countInfo : TWordCountInfo;
420    
421     function cutBoth( _aWord : string; _delim : TStringList ) : string;
422     var
423     _i : Integer;
424     begin
425     for _i := 0 to _delim.Count - 1 do begin
426     _aWord := CustomStringReplace(
427     _aWord,
428     _delim[ _i ],
429     #10 + _delim[ _i ] + #10, False );
430     end;
431     Result := _aWord;
432     end;
433    
434     function cutFirst( _aWord : string; _delim : TStringList ) : string;
435     var
436     _i : Integer;
437     begin
438     for _i := 0 to _delim.Count - 1 do begin
439     _aWord := CustomStringReplace(
440     _aWord,
441     _delim[ _i ],
442     #10 + _delim[ _i ], False );
443     end;
444     Result := _aWord;
445     end;
446    
447     function cutFinal( _aWord : string; _delim : TStringList ) : string;
448     var
449     _i : Integer;
450     begin
451     for _i := 0 to _delim.Count - 1 do begin
452     _aWord := CustomStringReplace(
453     _aWord,
454     _delim[ _i ],
455     _delim[ _i ] + #10, False );
456     end;
457     Result := _aWord;
458     end;
459    
460     procedure addWord( _dst : TWordCount; _words : TStringList );
461     var
462     _aWord : string;
463     _i, _idx : Integer;
464     _countInfo : TWordCountInfo;
465     begin
466     for _i := 0 to _words.Count - 1 do begin
467     _aWord := _words[ _i ];
468     if Length( _aWord ) > 0 then begin
469     if _dst.Find( _aWord, _idx ) then begin
470     _countInfo := TWordCountInfo( _dst.Objects[ _idx ] );
471     end else begin
472     _countInfo := TWordCountInfo.Create;
473     _dst.AddObject( _aWord, _countInfo );
474     end;
475     _countInfo.WordCount := _countInfo.WordCount + 1;
476     end;
477     end;
478     end;
479    
480     function changeMode( _aWord : string; _mode : Modes ) : string;
481     var
482     _i : Integer;
483     _aWord2 : string;
484     _pWord, _pWord2 : PChar;
485     _pWordTail, _pFound : PChar;
486     const
487     _delim : string = #10;
488     begin
489 yoffy 1.14 {$IFDEF GIKO_BAYESIAN_NO_HIRAGANA_DIC}
490     if mode = ModeWHira then begin
491     Result := '';
492     Exit;
493     end;
494     {$ENDIF}
495 yoffy 1.12 if Ord( _mode ) >= Ord( ModeWGraph ) then begin
496     // ?ユ???
497     // ?鴻???若?鴻??荅違????
498     _aWord := CustomStringReplace( _aWord, ' ', '', False );
499     _aWord := CustomStringReplace( _aWord, '??', '', False );
500    
501     // ???????帥?у??茯?????
502     case mode of
503     ModeWHira:
504     begin
505     _aWord := cutFinal( _aWord, wHiraFinalDelimiter );
506     Result := cutBoth( _aWord, wHiraDelimiter );
507     end;
508    
509     ModeWKanji:
510     begin
511     // ???????帥?у??茯?????
512     _aWord := cutBoth( _aWord, wKanjiDelimiter );
513     // 4 byte (2 絖?) ???ゃ?у??茯?????
514     _pWord := PChar( _aWord );
515     _i := Length( _aWord );
516     _pWordTail := _pWord + _i;
517     SetLength( _aWord2, _i + (_i shr 2) );
518     _pWord2 := PChar( _aWord2 );
519    
520     while _pWord < _pWordTail do begin
521     _pFound := AnsiStrPos( _pWord, PChar( _delim ) );
522     if _pFound = nil then
523     _pFound := _pWordTail;
524     _pFound := _pFound - 3;
525    
526     while _pWord <= _pFound do begin
527     CopyMemory( _pWord2, _pWord, 4 ); _pWord2[ 4 ] := #10;
528     _pWord2 := _pWord2 + 5; _pWord := _pWord + 4;
529     end;
530     _i := _pFound + 4 - _pWord; // 4 = 3 + #10
531     CopyMemory( _pWord2, _pWord, _i );
532     _pWord2 := _pWord2 + _i; _pWord := _pWord + _i;
533     end;
534     if _pWord < _pWordTail then begin
535     _i := _pWordTail - _pWord;
536     CopyMemory( _pWord2, _pWord, _i );
537     _pWord2 := _pWord2 + _i;
538     end;
539     SetLength( _aWord2, _pWord2 - PChar( _aWord2 ) );
540    
541     Result := _aWord2;
542     end;
543    
544     else
545     Result := _aWord;
546     end;
547     end else begin
548     Result := _aWord;
549     end;
550     end;
551 yoffy 1.1 const
552 yoffy 1.12 WHIRA_DELIMITER = '??' + #10 + '??#39; + #10 + '??' + #10 + '??#39; + #10 + '????'
553     + #10 + '??#39; + #10 + '????' + #10 + '?障??#39;+ #10 + '??#39;
554     + #10 + '????' + #10 + '????' + #10 + '????'
555 yoffy 1.11 + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
556     + #10 + '????#39; + #10 + '????#39; + #10 + '????#39; + #10 + '????#39;
557     + #10 + '????' + #10 + '????' + #10 + '????' + #10 + '????'
558     + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39; + #10 + '??????#39;
559 yoffy 1.12 + #10 + '????' + #10 + '????#39; + #10 + '????' + #10 + '????'
560     + #10 + '????' + #10 + '??????'
561     + #10 + '?с??' + #10 + '?障??' + #10 + '?障????'
562     + #10 + '?с????' + #10 + '?障????'
563     + #10 + '????' + #10 + '??????' + #10 + '??????' + #10 + '????????'
564     ;
565     WKANJI_DELIMITER = '??' + #10 + '??#39; + #10 + '綣?39; + #10 + '??' + #10 + '羈?'
566     + #10 + '筝?' + #10 + '??#39; + #10 + '??' + #10 + '??'
567     ;
568     WHIRA_FINAL_DELIMITER = '?c??' + #10 + '?c??#39;
569     ;{
570     + #10 + '???c??#39; + #10 + '???????c??#39; + #10 + '??????#39;
571     + #10 + '??????' + #10 + '?с??????'
572 yoffy 1.11 + #10 + '?障??'
573     + #10 + '??????' + #10 + '????' + #10 + '????#39; + #10 + '??????#39;
574     + #10 + '??????' + #10 + '???c?宴??'
575 yoffy 1.12 + #10 + '?с??' + #10 + '????'
576 yoffy 1.11 + #10 + '????' + #10 + '??????' + #10 + '????' + #10 + '??????'
577     ;}
578 yoffy 1.12 // '??#39; ?? '??????????' ????
579     HA_LINE = '?????????????障????????????違?宴????#39;;
580     HI_LINE = '???????<???蚊?帥?????????潟?眼??';
581     HU_LINE = '??????ゃ???泣?????????吟?激??';
582     HE_LINE = '???????????吾?????????鴻?冴??';
583     HO_LINE = '???????????祉???????????若?純??';
584     KA_LINE = '?≪???泣?帥??????ゃ???????吟???????<?泣??#39;;
585     KI_LINE = '?ゃ???激???????????違???吾??????#39;;
586     KU_LINE = '?????鴻?????????????違?????ャ??#39;;
587     KE_LINE = '???宴?祉???????<???宴?蚊?????с??#39;;
588     KO_LINE = '???潟?純???????≪?????蚊?眼??????#39;;
589 yoffy 1.5 kKanji = [$80..$A0, $E0..$ff];
590 yoffy 1.1 begin
591    
592 yoffy 1.12 wHiraDelimiter := TStringList.Create;
593     wHiraFinalDelimiter := TStringList.Create;
594     wKanjiDelimiter := TStringList.Create;
595     words := TStringList.Create;
596 yoffy 1.1 try
597     mode := ModeWhite;
598 yoffy 1.14 {$IFNDEF GIKO_BAYESIAN_NO_HIRAGANA_DIC}
599 yoffy 1.12 wHiraDelimiter.Text := WHIRA_DELIMITER;
600     wHiraFinalDelimiter.Text := WHIRA_FINAL_DELIMITER;
601 yoffy 1.14 {$ENDIF}
602 yoffy 1.12 wKanjiDelimiter.Text := WKANJI_DELIMITER;
603 yoffy 1.1 p := PChar( text );
604     tail := p + Length( text );
605     last := p;
606    
607     while p < tail do begin
608     // ??絖????帥?ゃ?????ゅ??/span>
609     // ?糸???鴻? ModeGraph ?????????у???ャ???綽??????????????
610 yoffy 1.5 // if Byte(Byte( p^ ) - $a1) < $5e then begin
611     if Byte( p^ ) in kKanji then begin
612 yoffy 1.1 if p + 1 < tail then begin
613     ch := (PByte( p )^ shl 8) or PByte( p + 1 )^;
614     case ch of
615 yoffy 1.12 // ?鴻???若?鴻?у??茯????????????????
616     //$8140: newMode := ModeWhite;
617 yoffy 1.1 $8141..$824e: newMode := ModeWGraph;
618     $824f..$8258: newMode := ModeWNum;
619     $8260..$829a: newMode := ModeWAlpha;
620     $829f..$82f1: newMode := ModeWHira;
621     $8340..$8396: newMode := ModeWKata;
622     else newMode := ModeWKanji;
623     end;
624 yoffy 1.6 // '??????#39; ??抗篁??????障???????帥?????????障????
625     if (mode = ModeWHira) or (mode = ModeWKata) then
626 yoffy 1.7 if (ch = $814a) or (ch = $814b) or (ch = $815b) then
627 yoffy 1.6 newMode := mode;
628 yoffy 1.1 end else begin
629     newMode := ModeWhite;
630     end;
631    
632     chSize := 2;
633     end else begin
634 yoffy 1.3 newMode := Modes( CharMode1[ Byte( p^ ) ] );
635 yoffy 1.12 if (p^ = ' ') and (Ord( mode ) >= Ord( ModeWGraph )) then begin
636     // 篁??障?ф?ユ????т??鴻???若??/span>
637     // ??茯???膵???????с?鴻???若?鴻??荅違????
638     // ?糸??茹?????????絽吾?鴻???若?鴻?у?阪??????????????荅違??????
639     newMode := mode;
640     end;
641 yoffy 1.1
642     chSize := 1;
643     end;
644    
645 yoffy 1.12 if mode <> newMode then begin
646 yoffy 1.1
647     // ??絖????帥?ゃ????紊??眼??????
648 yoffy 1.13 if mode <> ModeWhite then begin
649     SetLength( aWord, p - last );
650     CopyMemory( PChar( aWord ), last, p - last );
651 yoffy 1.11
652 yoffy 1.13 words.Text := changeMode( aWord, mode );
653 yoffy 1.11
654 yoffy 1.13 // ??茯??脂??/span>
655     addWord( wordCount, words );
656     end;
657 yoffy 1.1
658 yoffy 1.12 last := p;
659 yoffy 1.1 mode := newMode;
660    
661     end;
662    
663     p := p + chSize;
664     end; // while
665    
666     if mode <> ModeWhite then begin
667 yoffy 1.14 SetLength( aWord, p - last );
668     CopyMemory( PChar( aWord ), last, p - last );
669    
670 yoffy 1.12 words.Text := changeMode( aWord, mode );
671    
672     // ??茯??脂??/span>
673     addWord( wordCount, words );
674 yoffy 1.1 end;
675     finally
676 yoffy 1.12 words.Free;
677     wKanjiDelimiter.Free;
678     wHiraFinalDelimiter.Free;
679     wHiraDelimiter.Free;
680 yoffy 1.1 end;
681    
682     end;
683    
684     //==============================
685     // CalcPaulGraham
686     //==============================
687     function TGikoBayesian.CalcPaulGraham( wordCount : TWordCount ) : Extended;
688    
689     function p( const aWord : string ) : Single;
690     var
691     info : TWordInfo;
692     begin
693     info := Objects[ aWord ];
694     if info = nil then
695 yoffy 1.9 Result := 0.415
696 yoffy 1.1 else if info.NormalWord = 0 then
697     Result := 0.99
698     else if info.ImportantWord = 0 then
699     Result := 0.01
700 yoffy 1.9 else if info.ImportantWord + info.NormalWord * 2 < 5 then
701     Result := 0.5
702 h677 1.17 else begin
703     try
704     Result := ( info.ImportantWord / info.ImportantText ) /
705     ((info.NormalWord * 2 / info.NormalText ) +
706     (info.ImportantWord / info.ImportantText));
707     except
708     on EZeroDivide do Result := 0.99;
709     end;
710     end;
711 yoffy 1.1 end;
712    
713     var
714     s, q : Extended;
715     i : Integer;
716     narray : TList;
717     const
718     SAMPLE_COUNT = 15;
719     begin
720    
721     Result := 1;
722     if wordCount.Count = 0 then
723     Exit;
724    
725     narray := TList.Create;
726     try
727     for i := 0 to wordCount.Count - 1 do begin
728     narray.Add( Pointer( p( wordCount[ i ] ) ) );
729     end;
730    
731     narray.Sort( AbsSort );
732    
733     s := 1;
734     q := 1;
735     i := min( SAMPLE_COUNT, narray.Count );
736     while i > 0 do begin
737     Dec( i );
738 yoffy 1.9
739 yoffy 1.1 s := s * Single( narray[ i ] );
740     q := q * (1 - Single( narray[ i ] ));
741     end;
742    
743     Result := s / (s + q);
744     finally
745     narray.Free;
746     end;
747    
748     end;
749    
750     //==============================
751     // CalcGaryRobinson
752     //==============================
753     function TGikoBayesian.CalcGaryRobinson( wordCount : TWordCount ) : Extended;
754    
755     function p( const aWord : string ) : Single;
756     var
757     info : TWordInfo;
758     begin
759     info := Objects[ aWord ];
760     if info = nil then
761     Result := 0.415
762     else if info.ImportantWord = 0 then
763 yoffy 1.9 Result := 0.01
764 yoffy 1.1 else if info.NormalWord = 0 then
765 yoffy 1.9 Result := 0.99
766 yoffy 1.1 else
767 yoffy 1.15 {
768 yoffy 1.1 Result := ( info.ImportantWord / info.ImportantText ) /
769     ((info.NormalWord / info.NormalText ) +
770     (info.ImportantWord / info.ImportantText));
771 yoffy 1.15 }
772     Result := (info.ImportantWord * info.NormalText) /
773     (info.NormalWord * info.ImportantText +
774     info.ImportantWord * info.NormalText);
775 yoffy 1.1 end;
776    
777     function f( cnt : Integer; n, mean : Single ) : Extended;
778     const
779 yoffy 1.15 k = 0.001;
780 yoffy 1.1 begin
781     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
782     end;
783    
784     var
785     n : Extended;
786     narray : array of Single;
787     mean : Extended;
788     countInfo : TWordCountInfo;
789     i : Integer;
790 yoffy 1.9 P1, Q1, R1 : Extended;
791     cnt : Extended;
792     begin
793    
794     if wordCount.Count = 0 then begin
795     Result := 1;
796     Exit;
797     end;
798    
799     SetLength( narray, wordCount.Count );
800     mean := 0;
801     for i := 0 to wordCount.Count - 1 do begin
802     n := p( wordCount[ i ] );
803     narray[ i ] := n;
804     mean := mean + n;
805     end;
806     mean := mean / wordCount.Count;
807    
808 yoffy 1.10 P1 := 1;
809     Q1 := 1;
810 yoffy 1.9 for i := 0 to wordCount.Count - 1 do begin
811     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
812     n := f( countInfo.WordCount, narray[ i ], mean );
813 yoffy 1.10 P1 := P1 * ( 1 - n );
814     Q1 := Q1 * n;
815 yoffy 1.9 end;
816 yoffy 1.10 cnt := wordCount.Count;
817 yoffy 1.9 if cnt = 0 then
818 yoffy 1.16 cnt := 1;
819 yoffy 1.10 P1 := 1 - Power( P1, 1 / cnt );
820     Q1 := 1 - Power( Q1, 1 / cnt );
821 yoffy 1.9
822     if P1 + Q1 = 0 then begin
823     Result := 0.5
824     end else begin
825     n := (P1 - Q1) / (P1 + Q1);
826     Result := (1 + n) / 2;
827     end;
828    
829     end;
830    
831     //==============================
832     // CalcGaryRobinsonFisher
833     //==============================
834     function TGikoBayesian.CalcGaryRobinsonFisher(
835     wordCount : TWordCount
836     ) : Extended;
837    
838     function p( const aWord : string ) : Single;
839     var
840     info : TWordInfo;
841     begin
842     info := Objects[ aWord ];
843     if info = nil then
844     Result := 0.415
845     else if info.ImportantWord = 0 then
846     Result := 0.01
847     else if info.NormalWord = 0 then
848     Result := 0.99
849     else
850 yoffy 1.15 {
851     Result := ( info.ImportantWord / info.ImportantText ) /
852     ((info.NormalWord / info.NormalText ) +
853     (info.ImportantWord / info.ImportantText));
854     }
855     Result := (info.ImportantWord * info.NormalText) /
856     (info.NormalWord * info.ImportantText +
857     info.ImportantWord * info.NormalText);
858 yoffy 1.9 end;
859    
860     function f( cnt : Integer; n, mean : Single ) : Extended;
861     const
862 yoffy 1.15 k = 0.001;
863 yoffy 1.9 begin
864     Result := ( (k * mean) + (cnt * n) ) / (k + cnt);
865     end;
866    
867     function prbx( x2, degree : Extended ) : Extended;
868     begin
869    
870 yoffy 1.15 Result := 0.5;
871 yoffy 1.9
872     end;
873    
874     var
875     n : Extended;
876     narray : array of Single;
877     mean : Extended;
878     countInfo : TWordCountInfo;
879     i : Integer;
880 yoffy 1.1 normal : Extended;
881     important : Extended;
882 yoffy 1.9 P1, Q1 : Extended;
883 yoffy 1.1 cnt : Extended;
884     begin
885    
886     if wordCount.Count = 0 then begin
887     Result := 1;
888     Exit;
889     end;
890    
891     SetLength( narray, wordCount.Count );
892     mean := 0;
893     for i := 0 to wordCount.Count - 1 do begin
894     n := p( wordCount[ i ] );
895     narray[ i ] := n;
896     mean := mean + n;
897     end;
898     mean := mean / wordCount.Count;
899    
900 yoffy 1.9 P1 := 1;
901     Q1 := 1;
902 yoffy 1.1 for i := 0 to wordCount.Count - 1 do begin
903     countInfo := TWordCountInfo( wordCount.Objects[ i ] );
904     n := f( countInfo.WordCount, narray[ i ], mean );
905 yoffy 1.15 P1 := P1 * ( 1 - n );
906     Q1 := Q1 * n;
907 yoffy 1.1 end;
908 yoffy 1.15 cnt := wordCount.Count;
909 yoffy 1.1 if cnt = 0 then
910 yoffy 1.16 cnt := 1;
911 yoffy 1.15 P1 := Power( P1, 1 / cnt );
912     Q1 := Power( Q1, 1 / cnt );
913    
914     P1 := 1 - prbx( -2 * Ln( P1 ), 2 * cnt );
915     Q1 := 1 - prbx( -2 * Ln( Q1 ), 2 * cnt );
916    
917     Result := (1 + P1 - Q1) / 2;
918 yoffy 1.1
919     end;
920    
921     //==============================
922     // Parse
923     //==============================
924     function TGikoBayesian.Parse(
925     const text : string;
926     wordCount : TWordCount;
927 yoffy 1.9 algorithm : TGikoBayesianAlgorithm
928 yoffy 1.1 ) : Extended;
929     begin
930    
931     CountWord( text, wordCount );
932     case algorithm of
933     gbaPaulGraham: Result := CalcPaulGraham( wordCount );
934 yoffy 1.9 gbaGaryRobinson: Result := CalcGaryRobinson( wordCount );
935     gbaGaryRobinsonFisher:
936     Result := CalcGaryRobinsonFisher( wordCount );
937 yoffy 1.1 else Result := 0;
938     end;
939    
940     end;
941    
942     //==============================
943     // Learn
944     //==============================
945     procedure TGikoBayesian.Learn(
946     wordCount : TWordCount;
947     isImportant : Boolean );
948     var
949     aWord : string;
950     wordinfo : TWordInfo;
951     countinfo : TWordCountInfo;
952 yoffy 1.3 i : Integer;
953 yoffy 1.1 begin
954    
955     for i := 0 to wordCount.Count - 1 do begin
956     aWord := wordCount[ i ];
957     wordinfo := Objects[ aWord ];
958 yoffy 1.3 countinfo := TWordCountInfo( wordCount.Objects[ i ] );
959 yoffy 1.1 if wordinfo = nil then begin
960     wordinfo := TWordInfo.Create;
961     Objects[ aWord ] := wordinfo;
962     end;
963    
964     if isImportant then begin
965     wordinfo.ImportantWord := wordinfo.ImportantWord + countinfo.WordCount;
966     wordinfo.ImportantText := wordinfo.ImportantText + 1;
967     end else begin
968     wordinfo.NormalWord := wordinfo.NormalWord + countinfo.WordCount;
969     wordinfo.NormalText := wordinfo.NormalText + 1;
970     end;
971     end;
972    
973     end;
974    
975     //==============================
976     // Forget
977     //==============================
978     procedure TGikoBayesian.Forget(
979     wordCount : TWordCount;
980     isImportant : Boolean );
981     var
982     aWord : string;
983     wordinfo : TWordInfo;
984     countinfo : TWordCountInfo;
985     i : Integer;
986     begin
987    
988     for i := 0 to wordCount.Count - 1 do begin
989     aWord := wordCount[ i ];
990     wordinfo := Objects[ aWord ];
991     if wordinfo = nil then
992     Continue;
993    
994     countinfo := TWordCountInfo( wordCount.Objects[ i ] );
995     if isImportant then begin
996 yoffy 1.8 if wordInfo.ImportantText > 0 then begin
997     wordinfo.ImportantText := wordinfo.ImportantText - 1;
998     wordinfo.ImportantWord := wordinfo.ImportantWord - countinfo.WordCount;
999     end;
1000 yoffy 1.1 end else begin
1001 yoffy 1.8 if wordinfo.NormalText > 0 then begin
1002     wordinfo.NormalText := wordinfo.NormalText - 1;
1003     wordinfo.NormalWord := wordinfo.NormalWord - countinfo.WordCount;
1004     end;
1005 yoffy 1.1 end;
1006     end;
1007    
1008     end;
1009    
1010     end.

Back to OSDN">Back to OSDN
ViewVC Help
Powered by ViewVC 1.1.26