• R/O
  • SSH
  • HTTPS

dprojfilter: Commit


Commit MetaInfo

Revision17 (tree)
Time2018-09-28 21:12:12
Authordenis

Log Message

* merged extension for regular expressions submitted by Denis Bisson (with a few changes)
(RegExpr.pas taken from GExperts)
* Warning: has not been tested!

Change Summary

Incremental Difference

--- trunk/src/DprojFilter.dpr (revision 16)
+++ trunk/src/DprojFilter.dpr (revision 17)
@@ -5,7 +5,8 @@
55 uses
66 SysUtils,
77 u_DprojFilterMain in 'u_DprojFilterMain.pas',
8- u_dzDefaultMain in '..\libs\dzlib\src\u_dzDefaultMain.pas';
8+ u_dzDefaultMain in '..\libs\dzlib\src\u_dzDefaultMain.pas',
9+ RegExpr in 'RegExpr.pas';
910
1011 {$R *_version.res}
1112 {$R *_icon.res}
--- trunk/src/DprojFilter.dproj (revision 16)
+++ trunk/src/DprojFilter.dproj (revision 17)
@@ -1,105 +1,66 @@
11 <?xml version="1.0" encoding="utf-8"?>
22 <Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3- <PropertyGroup>
4- <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
5- <DCC_DCCCompiler>DCC32</DCC_DCCCompiler>
6- <DCC_DependencyCheckOutputName>..\DprojFilter.exe</DCC_DependencyCheckOutputName>
7- <MainSource>DprojFilter.dpr</MainSource>
8- <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
9- <ProjectGuid>{29985e20-7aa0-4356-91d8-bb4bc8b57893}</ProjectGuid>
10- </PropertyGroup>
11- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
12- <DCC_DcuOutput>..\dcu\win32\release</DCC_DcuOutput>
13- <DCC_DebugInformation>False</DCC_DebugInformation>
14- <DCC_Define>RELEASE;no_translation;no_translation_hint</DCC_Define>
15- <DCC_ExeOutput>..</DCC_ExeOutput>
16- <DCC_HppOutput>..\dcu\win32\release</DCC_HppOutput>
17- <DCC_IncludePath>..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_IncludePath>
18- <DCC_LocalDebugSymbols>False</DCC_LocalDebugSymbols>
19- <DCC_ObjOutput>..\dcu\win32\release</DCC_ObjOutput>
20- <DCC_ObjPath>..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_ObjPath>
21- <DCC_ResourcePath>..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_ResourcePath>
22- <DCC_SYMBOL_PLATFORM>False</DCC_SYMBOL_PLATFORM>
23- <DCC_SymbolReferenceInfo>0</DCC_SymbolReferenceInfo>
24- <DCC_UNIT_PLATFORM>False</DCC_UNIT_PLATFORM>
25- <DCC_UnitSearchPath>..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_UnitSearchPath>
26- <Version>7.0</Version>
27- </PropertyGroup>
28- <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
29- <DCC_DcuOutput>..\dcu\win32\debug</DCC_DcuOutput>
30- <DCC_Define>DEBUG;no_translation;no_translation_hint</DCC_Define>
31- <DCC_ExeOutput>..</DCC_ExeOutput>
32- <DCC_HppOutput>..\dcu\win32\debug</DCC_HppOutput>
33- <DCC_IncludePath>$(BDS)\lib\Debug;..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_IncludePath>
34- <DCC_ObjOutput>..\dcu\win32\debug</DCC_ObjOutput>
35- <DCC_ObjPath>$(BDS)\lib\Debug;..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_ObjPath>
36- <DCC_Optimize>False</DCC_Optimize>
37- <DCC_ResourcePath>$(BDS)\lib\Debug;..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_ResourcePath>
38- <DCC_SYMBOL_PLATFORM>False</DCC_SYMBOL_PLATFORM>
39- <DCC_UNIT_PLATFORM>False</DCC_UNIT_PLATFORM>
40- <DCC_UnitSearchPath>$(BDS)\lib\Debug;..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_UnitSearchPath>
41- <Version>7.0</Version>
42- </PropertyGroup>
43- <ProjectExtensions>
44- <Borland.Personality>Delphi.Personality</Borland.Personality>
45- <Borland.ProjectType/>
46- <BorlandProject>
47- <BorlandProject>
48- <Delphi.Personality>
49- <Parameters>
50- <Parameters Name="UseLauncher">False</Parameters>
51- <Parameters Name="LoadAllSymbols">True</Parameters>
52- <Parameters Name="LoadUnspecifiedSymbols">False</Parameters>
53- </Parameters>
54- <VersionInfo>
55- <VersionInfo Name="IncludeVerInfo">False</VersionInfo>
56- <VersionInfo Name="AutoIncBuild">False</VersionInfo>
57- <VersionInfo Name="MajorVer">1</VersionInfo>
58- <VersionInfo Name="MinorVer">0</VersionInfo>
59- <VersionInfo Name="Release">0</VersionInfo>
60- <VersionInfo Name="Build">0</VersionInfo>
61- <VersionInfo Name="Debug">False</VersionInfo>
62- <VersionInfo Name="PreRelease">False</VersionInfo>
63- <VersionInfo Name="Special">False</VersionInfo>
64- <VersionInfo Name="Private">False</VersionInfo>
65- <VersionInfo Name="DLL">False</VersionInfo>
66- <VersionInfo Name="Locale">1031</VersionInfo>
67- <VersionInfo Name="CodePage">1252</VersionInfo>
68- </VersionInfo>
69- <VersionInfoKeys>
70- <VersionInfoKeys Name="CompanyName"/>
71- <VersionInfoKeys Name="FileDescription"/>
72- <VersionInfoKeys Name="FileVersion">1.0.0.0</VersionInfoKeys>
73- <VersionInfoKeys Name="InternalName"/>
74- <VersionInfoKeys Name="LegalCopyright"/>
75- <VersionInfoKeys Name="LegalTrademarks"/>
76- <VersionInfoKeys Name="OriginalFilename"/>
77- <VersionInfoKeys Name="ProductName"/>
78- <VersionInfoKeys Name="ProductVersion">1.0.0.0</VersionInfoKeys>
79- <VersionInfoKeys Name="Comments"/>
80- </VersionInfoKeys>
81- <Excluded_Packages>
82- <Excluded_Packages Name="$(BDS)\bin\dcloffice2k100.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
83- <Excluded_Packages Name="$(BDS)\bin\dclofficexp100.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
84- <Excluded_Packages Name="C:\Users\Public\Documents\RAD Studio\5.0\Bpl\SIGComponents110.bpl">SIG internal components</Excluded_Packages>
85- </Excluded_Packages>
86- <Source>
87- <Source Name="MainSource">DprojFilter.dpr</Source>
88- </Source>
89- </Delphi.Personality>
90- </BorlandProject>
91- </BorlandProject>
92- </ProjectExtensions>
93- <Import Project="$(MSBuildBinPath)\Borland.Delphi.Targets"/>
94- <PropertyGroup>
95- <PostBuildEvent>call ..\buildtools\postbuild.cmd $(OUTPUTDIR)$(OUTPUTNAME)</PostBuildEvent>
96- <PreBuildEvent>call ..\buildtools\prebuild.cmd $(PROJECTPATH)</PreBuildEvent>
97- </PropertyGroup>
98- <ItemGroup>
99- <DelphiCompile Include="DprojFilter.dpr">
100- <MainSource>MainSource</MainSource>
101- </DelphiCompile>
102- <DCCReference Include="..\libs\dzlib\src\u_dzDefaultMain.pas"/>
103- <DCCReference Include="u_DprojFilterMain.pas"/>
104- </ItemGroup>
105-</Project>
3+ <PropertyGroup>
4+ <Configuration Condition=" '$(Configuration)' == '' ">Release</Configuration>
5+ <DCC_DCCCompiler>DCC32</DCC_DCCCompiler>
6+ <DCC_DependencyCheckOutputName>..\DprojFilter.exe</DCC_DependencyCheckOutputName>
7+ <MainSource>DprojFilter.dpr</MainSource>
8+ <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
9+ <ProjectGuid>{29985e20-7aa0-4356-91d8-bb4bc8b57893}</ProjectGuid>
10+ </PropertyGroup>
11+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
12+ <DCC_DcuOutput>..\dcu\win32\release</DCC_DcuOutput>
13+ <DCC_DebugInformation>False</DCC_DebugInformation>
14+ <DCC_Define>RELEASE;no_translation;no_translation_hint</DCC_Define>
15+ <DCC_ExeOutput>..</DCC_ExeOutput>
16+ <DCC_HppOutput>..\dcu\win32\release</DCC_HppOutput>
17+ <DCC_IncludePath>..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_IncludePath>
18+ <DCC_LocalDebugSymbols>False</DCC_LocalDebugSymbols>
19+ <DCC_ObjOutput>..\dcu\win32\release</DCC_ObjOutput>
20+ <DCC_ObjPath>..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_ObjPath>
21+ <DCC_ResourcePath>..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_ResourcePath>
22+ <DCC_SYMBOL_PLATFORM>False</DCC_SYMBOL_PLATFORM>
23+ <DCC_SymbolReferenceInfo>0</DCC_SymbolReferenceInfo>
24+ <DCC_UNIT_PLATFORM>False</DCC_UNIT_PLATFORM>
25+ <DCC_UnitSearchPath>..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_UnitSearchPath>
26+ <Version>7.0</Version>
27+ </PropertyGroup>
28+ <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
29+ <DCC_DcuOutput>..\dcu\win32\debug</DCC_DcuOutput>
30+ <DCC_Define>DEBUG;no_translation;no_translation_hint</DCC_Define>
31+ <DCC_ExeOutput>..</DCC_ExeOutput>
32+ <DCC_HppOutput>..\dcu\win32\debug</DCC_HppOutput>
33+ <DCC_IncludePath>$(BDS)\lib\Debug;..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_IncludePath>
34+ <DCC_ObjOutput>..\dcu\win32\debug</DCC_ObjOutput>
35+ <DCC_ObjPath>$(BDS)\lib\Debug;..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_ObjPath>
36+ <DCC_Optimize>False</DCC_Optimize>
37+ <DCC_ResourcePath>$(BDS)\lib\Debug;..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_ResourcePath>
38+ <DCC_SYMBOL_PLATFORM>False</DCC_SYMBOL_PLATFORM>
39+ <DCC_UNIT_PLATFORM>False</DCC_UNIT_PLATFORM>
40+ <DCC_UnitSearchPath>$(BDS)\lib\Debug;..\libs\dzlib\src;..\libs\dzlib\templates;..\libs\dzlib\forms;..\libs\dzlib\jedi_inc</DCC_UnitSearchPath>
41+ <Version>7.0</Version>
42+ </PropertyGroup>
43+ <ProjectExtensions>
44+ <Borland.Personality>Delphi.Personality</Borland.Personality>
45+ <Borland.ProjectType />
46+ <BorlandProject>
47+<BorlandProject><Delphi.Personality><Parameters><Parameters Name="UseLauncher">False</Parameters><Parameters Name="LoadAllSymbols">True</Parameters><Parameters Name="LoadUnspecifiedSymbols">False</Parameters></Parameters><VersionInfo><VersionInfo Name="IncludeVerInfo">False</VersionInfo><VersionInfo Name="AutoIncBuild">False</VersionInfo><VersionInfo Name="MajorVer">1</VersionInfo><VersionInfo Name="MinorVer">0</VersionInfo><VersionInfo Name="Release">0</VersionInfo><VersionInfo Name="Build">0</VersionInfo><VersionInfo Name="Debug">False</VersionInfo><VersionInfo Name="PreRelease">False</VersionInfo><VersionInfo Name="Special">False</VersionInfo><VersionInfo Name="Private">False</VersionInfo><VersionInfo Name="DLL">False</VersionInfo><VersionInfo Name="Locale">1031</VersionInfo><VersionInfo Name="CodePage">1252</VersionInfo></VersionInfo><VersionInfoKeys><VersionInfoKeys Name="CompanyName"></VersionInfoKeys><VersionInfoKeys Name="FileDescription"></VersionInfoKeys><VersionInfoKeys Name="FileVersion">1.0.0.0</VersionInfoKeys><VersionInfoKeys Name="InternalName"></VersionInfoKeys><VersionInfoKeys Name="LegalCopyright"></VersionInfoKeys><VersionInfoKeys Name="LegalTrademarks"></VersionInfoKeys><VersionInfoKeys Name="OriginalFilename"></VersionInfoKeys><VersionInfoKeys Name="ProductName"></VersionInfoKeys><VersionInfoKeys Name="ProductVersion">1.0.0.0</VersionInfoKeys><VersionInfoKeys Name="Comments"></VersionInfoKeys></VersionInfoKeys><Excluded_Packages>
48+ <Excluded_Packages Name="$(BDS)\bin\dcloffice2k100.bpl">Microsoft Office 2000 Sample Automation Server Wrapper Components</Excluded_Packages>
49+ <Excluded_Packages Name="$(BDS)\bin\dclofficexp100.bpl">Microsoft Office XP Sample Automation Server Wrapper Components</Excluded_Packages>
50+ <Excluded_Packages Name="C:\Users\Public\Documents\RAD Studio\5.0\Bpl\SIGComponents110.bpl">SIG internal components</Excluded_Packages>
51+ </Excluded_Packages><Source><Source Name="MainSource">DprojFilter.dpr</Source></Source></Delphi.Personality></BorlandProject></BorlandProject>
52+ </ProjectExtensions>
53+ <Import Project="$(MSBuildBinPath)\Borland.Delphi.Targets" />
54+ <PropertyGroup>
55+ <PreBuildEvent>call ..\buildtools\prebuild.cmd $(PROJECTPATH)</PreBuildEvent>
56+ <PostBuildEvent>call ..\buildtools\postbuild.cmd $(OUTPUTDIR)$(OUTPUTNAME)</PostBuildEvent>
57+ </PropertyGroup>
58+ <ItemGroup>
59+ <DelphiCompile Include="DprojFilter.dpr">
60+ <MainSource>MainSource</MainSource>
61+ </DelphiCompile>
62+ <DCCReference Include="..\libs\dzlib\src\u_dzDefaultMain.pas" />
63+ <DCCReference Include="RegExpr.pas" />
64+ <DCCReference Include="u_DprojFilterMain.pas" />
65+ </ItemGroup>
66+</Project>
\ No newline at end of file
--- trunk/src/DprojFilter_version.ini (revision 16)
+++ trunk/src/DprojFilter_version.ini (revision 17)
@@ -1,6 +1,6 @@
11 [Version Info]
22 AutoIncBuild=0
3-Build=1
3+Build=28
44 MajorVer=1
55 MinorVer=0
66 Release=0
@@ -7,7 +7,7 @@
77 Revision=0
88
99 [Version Info Keys]
10-FileVersion=1.0.0.1
10+FileVersion=1.0.0.28
1111 ProductVersion={today}
1212 FileDescription=DprojFilter
1313 OriginalFilename=DprojFilter.exe
--- trunk/src/RegExpr.pas (nonexistent)
+++ trunk/src/RegExpr.pas (revision 17)
@@ -0,0 +1,4165 @@
1+unit RegExpr;
2+
3+{
4+ TRegExpr class library
5+ Delphi Regular Expressions
6+
7+ Copyright (c) 1999-2004 Andrey V. Sorokin, St.Petersburg, Russia
8+ Delphi 2009 Adaption by Sebastian Zierer
9+ Minor fixes and error message changes by Erik Berry
10+
11+ You may use this software in any kind of development,
12+ including comercial, redistribute, and modify it freely,
13+ under the following restrictions :
14+ 1. This software is provided as it is, without any kind of
15+ warranty given. Use it at Your own risk.The author is not
16+ responsible for any consequences of use of this software.
17+ 2. The origin of this software may not be mispresented, You
18+ must not claim that You wrote the original software. If
19+ You use this software in any kind of product, it would be
20+ appreciated that there in a information box, or in the
21+ documentation would be an acknowledgement like
22+
23+ Partial Copyright (c) 2004 Andrey V. Sorokin
24+ http://RegExpStudio.com
25+ mailto:anso@mail.ru
26+
27+ 3. You may not have any income from distributing this source
28+ (or altered version of it) to other developers. When You
29+ use this product in a comercial package, the source may
30+ not be charged seperatly.
31+ 4. Altered versions must be plainly marked as such, and must
32+ not be misrepresented as being the original software.
33+ 5. RegExp Studio application and all the visual components as
34+ well as documentation is not part of the TRegExpr library
35+ and is not free for usage.
36+
37+ mailto:anso@mail.ru
38+ http://RegExpStudio.com
39+ http://anso.da.ru/
40+}
41+
42+interface
43+
44+// ======== Determine compiler
45+{$IFDEF VER80} Sorry, TRegExpr is for 32-bits Delphi only. Delphi 1 is not supported (and whos really care today?!). {$ENDIF}
46+{$IFDEF VER90} {$DEFINE D2} {$ENDIF} // D2
47+{$IFDEF VER93} {$DEFINE D2} {$ENDIF} // CPPB 1
48+{$IFDEF VER100} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D3
49+{$IFDEF VER110} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // CPPB 3
50+{$IFDEF VER120} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D4
51+{$IFDEF VER130} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D5
52+{$IFDEF VER140} {$DEFINE D6} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D6
53+{$IFDEF VER150} {$DEFINE D7} {$DEFINE D6} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$ENDIF} // D7
54+{$IF CompilerVersion > 15} {$DEFINE D7} {$DEFINE D6} {$DEFINE D5} {$DEFINE D4} {$DEFINE D3} {$DEFINE D2} {$IFEND} // D7
55+{$IFDEF VER310} {$DEFINE D101} {$ENDIF}
56+{$IFDEF VER320} {$DEFINE D101} {$DEFINE D102} {$ENDIF}
57+
58+// ======== Define base compiler options
59+{$BOOLEVAL OFF}
60+{$EXTENDEDSYNTAX ON}
61+{$LONGSTRINGS ON}
62+{$OPTIMIZATION ON}
63+{$IFDEF D6}
64+ {$WARN SYMBOL_PLATFORM OFF} // Suppress .Net warnings
65+{$ENDIF}
66+{$IFDEF D7}
67+ {$WARN UNSAFE_CAST OFF} // Suppress .Net warnings
68+ {$WARN UNSAFE_TYPE OFF} // Suppress .Net warnings
69+ {$WARN UNSAFE_CODE OFF} // Suppress .Net warnings
70+{$ENDIF}
71+{$IFDEF FPC}
72+ {$MODE DELPHI} // Delphi-compatible mode in FreePascal
73+{$ENDIF}
74+
75+// ======== Define options for TRegExpr engine
76+{.$DEFINE UniCode} // Unicode support
77+{$IF CompilerVersion >= 20}
78+ {$DEFINE UniCode}
79+{$IFEND}
80+{$DEFINE RegExpPCodeDump} // p-code dumping (see Dump method)
81+{$IFNDEF FPC} // the option is not supported in FreePascal
82+ {$DEFINE reRealExceptionAddr} // exceptions will point to appropriate source line, not to Error procedure
83+{$ENDIF}
84+{$DEFINE ComplexBraces} // support braces in complex cases
85+{$IFNDEF UniCode} // the option applicable only for non-UniCode mode
86+ {$DEFINE UseSetOfChar} // Significant optimization by using set of char
87+{$ENDIF}
88+{$IFDEF UseSetOfChar}
89+ {$DEFINE UseFirstCharSet} // Fast skip between matches for r.e. that starts with determined set of chars
90+{$ENDIF}
91+
92+// ======== Define Pascal-language options
93+// Define 'UseAsserts' option (do not edit this definitions).
94+// Asserts used to catch 'strange bugs' in TRegExpr implementation (when something goes
95+// completely wrong). You can swith asserts on/off with help of {$C+}/{$C-} compiler options.
96+{$IFDEF D3} {$DEFINE UseAsserts} {$ENDIF}
97+{$IFDEF FPC} {$DEFINE UseAsserts} {$ENDIF}
98+
99+// Define 'use subroutine parameters default values' option (do not edit this definition).
100+{$IFDEF D4} {$DEFINE DefParam} {$ENDIF}
101+
102+// Define 'OverMeth' options, to use method overloading (do not edit this definitions).
103+{$IFDEF D5} {$DEFINE OverMeth} {$ENDIF}
104+{$IFDEF FPC} {$DEFINE OverMeth} {$ENDIF}
105+
106+uses
107+ Classes, // TStrings in Split method
108+ SysUtils; // Exception
109+
110+type
111+ {$IFDEF UniCode}
112+ PRegExprChar = PWideChar;
113+ {$IF CompilerVersion >= 20}
114+ RegExprString = UnicodeString;
115+ {$ELSE}
116+ RegExprString = WideString;
117+ {$IFEND}
118+ REChar = WideChar;
119+ {$ELSE}
120+ PRegExprChar = PChar;
121+ RegExprString = AnsiString; //###0.952 was string
122+ REChar = Char;
123+ {$ENDIF}
124+ TREOp = REChar; // internal p-code type //###0.933
125+ PREOp = ^TREOp;
126+ TRENextOff = integer; // internal Next "pointer" (offset to current p-code) //###0.933
127+ PRENextOff = ^TRENextOff; // used for extracting Next "pointers" from compiled r.e. //###0.933
128+ TREBracesArg = integer; // type of {m,n} arguments
129+ PREBracesArg = ^TREBracesArg;
130+
131+const
132+ REOpSz = SizeOf (TREOp) div SizeOf (REChar); // size of p-code in RegExprString units
133+ RENextOffSz = SizeOf (TRENextOff) div SizeOf (REChar); // size of Next 'pointer' -"-
134+ REBracesArgSz = SizeOf (TREBracesArg) div SizeOf (REChar); // size of BRACES arguments -"-
135+
136+type
137+ TRegExprInvertCaseFunction = function (const Ch : REChar) : REChar
138+ of object;
139+
140+const
141+ EscChar = '\'; // 'Escape'-char ('\' in common r.e.) used for escaping metachars (\w, \d etc).
142+ RegExprModifierI : boolean = False; // default value for ModifierI
143+ RegExprModifierR : boolean = False; // default value for ModifierR
144+ RegExprModifierS : boolean = True; // default value for ModifierS
145+ RegExprModifierG : boolean = True; // default value for ModifierG
146+ RegExprModifierM : boolean = False; // default value for ModifierM
147+ RegExprModifierX : boolean = False; // default value for ModifierX
148+ RegExprSpaceChars : RegExprString = // default value for SpaceChars
149+ ' '#$9#$A#$D#$C;
150+ RegExprWordChars : RegExprString = // default value for WordChars
151+ '0123456789' //###0.940
152+ + 'abcdefghijklmnopqrstuvwxyz'
153+ + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_';
154+ RegExprLineSeparators : RegExprString =// default value for LineSeparators
155+ #$d#$a{$IFDEF UniCode}+#$b#$c#$2028#$2029#$85{$ENDIF}; //###0.947
156+ RegExprLinePairedSeparator : RegExprString =// default value for LinePairedSeparator
157+ #$d#$a;
158+ { if You need Unix-styled line separators (only \n), then use:
159+ RegExprLineSeparators = #$a;
160+ RegExprLinePairedSeparator = '';
161+ }
162+
163+
164+const
165+ NSUBEXP = 15; // max number of subexpression //###0.929
166+ // Cannot be more than NSUBEXPMAX
167+ // Be carefull - don't use values which overflow CLOSE opcode
168+ // (in this case you'll get compiler erorr).
169+ // Big NSUBEXP will cause more slow work and more stack required
170+ NSUBEXPMAX = 255; // Max possible value for NSUBEXP. //###0.945
171+ // Don't change it! It's defined by internal TRegExpr design.
172+
173+ MaxBracesArg = $7FFFFFFF - 1; // max value for {n,m} arguments //###0.933
174+
175+ {$IFDEF ComplexBraces}
176+ LoopStackMax = 10; // max depth of loops stack //###0.925
177+ {$ENDIF}
178+
179+ TinySetLen = 3;
180+ // if range includes more then TinySetLen chars, //###0.934
181+ // then use full (32 bytes) ANYOFFULL instead of ANYOF[BUT]TINYSET
182+ // !!! Attension ! If you change TinySetLen, you must
183+ // change code marked as "//!!!TinySet"
184+
185+
186+type
187+
188+{$IFDEF UseSetOfChar}
189+ PSetOfREChar = ^TSetOfREChar;
190+ TSetOfREChar = set of REChar;
191+{$ENDIF}
192+
193+ TRegExpr = class;
194+
195+ TRegExprReplaceFunction = function (ARegExpr : TRegExpr): string
196+ of object;
197+
198+ TRegExpr = class
199+ private
200+ startp : array [0 .. NSUBEXP - 1] of PRegExprChar; // founded expr starting points
201+ endp : array [0 .. NSUBEXP - 1] of PRegExprChar; // founded expr end points
202+
203+ {$IFDEF ComplexBraces}
204+ LoopStack : array [1 .. LoopStackMax] of integer; // state before entering loop
205+ LoopStackIdx : integer; // 0 - out of all loops
206+ {$ENDIF}
207+
208+ // The "internal use only" fields to pass info from compile
209+ // to execute that permits the execute phase to run lots faster on
210+ // simple cases.
211+ regstart : REChar; // char that must begin a match; '\0' if none obvious
212+ reganch : REChar; // is the match anchored (at beginning-of-line only)?
213+ regmust : PRegExprChar; // string (pointer into program) that match must include, or nil
214+ regmlen : integer; // length of regmust string
215+ // Regstart and reganch permit very fast decisions on suitable starting points
216+ // for a match, cutting down the work a lot. Regmust permits fast rejection
217+ // of lines that cannot possibly match. The regmust tests are costly enough
218+ // that regcomp() supplies a regmust only if the r.e. contains something
219+ // potentially expensive (at present, the only such thing detected is * or +
220+ // at the start of the r.e., which can involve a lot of backup). Regmlen is
221+ // supplied because the test in regexec() needs it and regcomp() is computing
222+ // it anyway.
223+ {$IFDEF UseFirstCharSet} //###0.929
224+ FirstCharSet : TSetOfREChar;
225+ {$ENDIF}
226+
227+ // work variables for Exec's routins - save stack in recursion}
228+ reginput : PRegExprChar; // String-input pointer.
229+ fInputStart : PRegExprChar; // Pointer to first char of input string.
230+ fInputEnd : PRegExprChar; // Pointer to char AFTER last char of input string
231+
232+ // work variables for compiler's routines
233+ regparse : PRegExprChar; // Input-scan pointer.
234+ regnpar : integer; // count.
235+ regdummy : char;
236+ regcode : PRegExprChar; // Code-emit pointer; @regdummy = don't.
237+ regsize : integer; // Code size.
238+
239+ regexpbeg : PRegExprChar; // only for error handling. Contains
240+ // pointer to beginning of r.e. while compiling
241+ fExprIsCompiled : boolean; // true if r.e. successfully compiled
242+
243+ // programm is essentially a linear encoding
244+ // of a nondeterministic finite-state machine (aka syntax charts or
245+ // "railroad normal form" in parsing technology). Each node is an opcode
246+ // plus a "next" pointer, possibly plus an operand. "Next" pointers of
247+ // all nodes except BRANCH implement concatenation; a "next" pointer with
248+ // a BRANCH on both ends of it is connecting two alternatives. (Here we
249+ // have one of the subtle syntax dependencies: an individual BRANCH (as
250+ // opposed to a collection of them) is never concatenated with anything
251+ // because of operator precedence.) The operand of some types of node is
252+ // a literal string; for others, it is a node leading into a sub-FSM. In
253+ // particular, the operand of a BRANCH node is the first node of the branch.
254+ // (NB this is *not* a tree structure: the tail of the branch connects
255+ // to the thing following the set of BRANCHes.) The opcodes are:
256+ programm : PRegExprChar; // Unwarranted chumminess with compiler.
257+
258+ fExpression : PRegExprChar; // source of compiled r.e.
259+ fInputString : PRegExprChar; // input string
260+
261+ fLastError : integer; // see Error, LastError
262+
263+ fModifiers : integer; // modifiers
264+ fCompModifiers : integer; // compiler's copy of modifiers
265+ fProgModifiers : integer; // modifiers values from last programm compilation
266+
267+ fSpaceChars : RegExprString; //###0.927
268+ fWordChars : RegExprString; //###0.929
269+ fInvertCase : TRegExprInvertCaseFunction; //###0.927
270+
271+ fLineSeparators : RegExprString; //###0.941
272+ fLinePairedSeparatorAssigned : boolean;
273+ fLinePairedSeparatorHead,
274+ fLinePairedSeparatorTail : REChar;
275+ {$IFNDEF UniCode}
276+ fLineSeparatorsSet : set of REChar;
277+ {$ENDIF}
278+
279+ procedure InvalidateProgramm;
280+ // Mark programm as have to be [re]compiled
281+
282+ function IsProgrammOk : boolean; //###0.941
283+ // Check if we can use precompiled r.e. or
284+ // [re]compile it if something changed
285+
286+ function GetExpression : RegExprString;
287+ procedure SetExpression (const s : RegExprString);
288+
289+ function GetModifierStr : RegExprString;
290+ class function ParseModifiersStr (const AModifiers : RegExprString;
291+ var AModifiersInt : integer) : boolean; //###0.941 class function now
292+ // Parse AModifiers string and return true and set AModifiersInt
293+ // if it's in format 'ismxrg-ismxrg'.
294+ procedure SetModifierStr (const AModifiers : RegExprString);
295+
296+ function GetModifier (AIndex : integer) : boolean;
297+ procedure SetModifier (AIndex : integer; ASet : boolean);
298+
299+ procedure Error (AErrorID : integer); virtual; // error handler.
300+ // Default handler raise exception ERegExpr with
301+ // Message = ErrorMsg (AErrorID), ErrorCode = AErrorID
302+ // and CompilerErrorPos = value of property CompilerErrorPos.
303+
304+
305+ {==================== Compiler section ===================}
306+ function CompileRegExpr (exp : PRegExprChar) : boolean;
307+ // compile a regular expression into internal code
308+
309+ procedure Tail (p : PRegExprChar; val : PRegExprChar);
310+ // set the next-pointer at the end of a node chain
311+
312+ procedure OpTail (p : PRegExprChar; val : PRegExprChar);
313+ // regoptail - regtail on operand of first argument; nop if operandless
314+
315+ function EmitNode (op : TREOp) : PRegExprChar;
316+ // regnode - emit a node, return location
317+
318+ procedure EmitC (b : REChar);
319+ // emit (if appropriate) a byte of code
320+
321+ procedure InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer); //###0.90
322+ // insert an operator in front of already-emitted operand
323+ // Means relocating the operand.
324+
325+ function ParseReg (paren : integer; var flagp : integer) : PRegExprChar;
326+ // regular expression, i.e. main body or parenthesized thing
327+
328+ function ParseBranch (var flagp : integer) : PRegExprChar;
329+ // one alternative of an | operator
330+
331+ function ParsePiece (var flagp : integer) : PRegExprChar;
332+ // something followed by possible [*+?]
333+
334+ function ParseAtom (var flagp : integer) : PRegExprChar;
335+ // the lowest level
336+
337+ function GetCompilerErrorPos : integer;
338+ // current pos in r.e. - for error hanling
339+
340+ {$IFDEF UseFirstCharSet} //###0.929
341+ procedure FillFirstCharSet (prog : PRegExprChar);
342+ {$ENDIF}
343+
344+ {===================== Mathing section ===================}
345+ function regrepeat (p : PRegExprChar; AMax : integer) : integer;
346+ // repeatedly match something simple, report how many
347+
348+ function regnext (p : PRegExprChar) : PRegExprChar;
349+ // dig the "next" pointer out of a node
350+
351+ function MatchPrim (prog : PRegExprChar) : boolean;
352+ // recursively matching routine
353+
354+ function ExecPrim (AOffset: integer) : boolean;
355+ // Exec for stored InputString
356+
357+ {$IFDEF RegExpPCodeDump}
358+ function DumpOp (op : REChar) : RegExprString;
359+ {$ENDIF}
360+
361+ function GetSubExprMatchCount : integer;
362+ function GetMatchPos (Idx : integer) : integer;
363+ function GetMatchLen (Idx : integer) : integer;
364+ function GetMatch (Idx : integer) : RegExprString;
365+
366+ function GetInputString : RegExprString;
367+ procedure SetInputString (const AInputString : RegExprString);
368+
369+ {$IFNDEF UseSetOfChar}
370+ function StrScanCI (s : PRegExprChar; ch : REChar) : PRegExprChar; //###0.928
371+ {$ENDIF}
372+
373+ procedure SetLineSeparators (const AStr : RegExprString);
374+ procedure SetLinePairedSeparator (const AStr : RegExprString);
375+ function GetLinePairedSeparator : RegExprString;
376+
377+ public
378+ constructor Create;
379+ destructor Destroy; override;
380+
381+ class function VersionMajor : integer; //###0.944
382+ class function VersionMinor : integer; //###0.944
383+
384+ property Expression : RegExprString read GetExpression write SetExpression;
385+ // Regular expression.
386+ // For optimization, TRegExpr will automatically compiles it into 'P-code'
387+ // (You can see it with help of Dump method) and stores in internal
388+ // structures. Real [re]compilation occures only when it really needed -
389+ // while calling Exec[Next], Substitute, Dump, etc
390+ // and only if Expression or other P-code affected properties was changed
391+ // after last [re]compilation.
392+ // If any errors while [re]compilation occures, Error method is called
393+ // (by default Error raises exception - see below)
394+
395+ property ModifierStr : RegExprString read GetModifierStr write SetModifierStr;
396+ // Set/get default values of r.e.syntax modifiers. Modifiers in
397+ // r.e. (?ismx-ismx) will replace this default values.
398+ // If you try to set unsupported modifier, Error will be called
399+ // (by defaul Error raises exception ERegExpr).
400+
401+ property ModifierI : boolean index 1 read GetModifier write SetModifier;
402+ // Modifier /i - caseinsensitive, initialized from RegExprModifierI
403+
404+ property ModifierR : boolean index 2 read GetModifier write SetModifier;
405+ // Modifier /r - use r.e.syntax extended for russian,
406+ // (was property ExtSyntaxEnabled in previous versions)
407+ // If true, then à-ÿ additional include russian letter '¸',
408+ // À-ß additional include '¨', and à-ß include all russian symbols.
409+ // You have to turn it off if it may interfere with you national alphabet.
410+ // , initialized from RegExprModifierR
411+
412+ property ModifierS : boolean index 3 read GetModifier write SetModifier;
413+ // Modifier /s - '.' works as any char (else as [^\n]),
414+ // , initialized from RegExprModifierS
415+
416+ property ModifierG : boolean index 4 read GetModifier write SetModifier;
417+ // Switching off modifier /g switchs all operators in
418+ // non-greedy style, so if ModifierG = False, then
419+ // all '*' works as '*?', all '+' as '+?' and so on.
420+ // , initialized from RegExprModifierG
421+
422+ property ModifierM : boolean index 5 read GetModifier write SetModifier;
423+ // Treat string as multiple lines. That is, change `^' and `$' from
424+ // matching at only the very start or end of the string to the start
425+ // or end of any line anywhere within the string.
426+ // , initialized from RegExprModifierM
427+
428+ property ModifierX : boolean index 6 read GetModifier write SetModifier;
429+ // Modifier /x - eXtended syntax, allow r.e. text formatting,
430+ // see description in the help. Initialized from RegExprModifierX
431+
432+ function Exec (const AInputString : RegExprString) : boolean; {$IFDEF OverMeth} overload;
433+ {$IFNDEF FPC} // I do not know why FreePascal cannot overload methods with empty param list
434+ function Exec : boolean; overload; //###0.949
435+ {$ENDIF}
436+ function Exec (AOffset: integer) : boolean; overload; //###0.949
437+ {$ENDIF}
438+ // match a programm against a string AInputString
439+ // !!! Exec store AInputString into InputString property
440+ // For Delphi 5 and higher available overloaded versions - first without
441+ // parameter (uses already assigned to InputString property value)
442+ // and second that has integer parameter and is same as ExecPos
443+
444+ function ExecNext : boolean;
445+ // find next match:
446+ // ExecNext;
447+ // works same as
448+ // if MatchLen [0] = 0 then ExecPos (MatchPos [0] + 1)
449+ // else ExecPos (MatchPos [0] + MatchLen [0]);
450+ // but it's more simpler !
451+ // Raises exception if used without preceeding SUCCESSFUL call to
452+ // Exec* (Exec, ExecPos, ExecNext). So You always must use something like
453+ // if Exec (InputString) then repeat { proceed results} until not ExecNext;
454+
455+ function ExecPos (AOffset: integer {$IFDEF DefParam}= 1{$ENDIF}) : boolean;
456+ // find match for InputString starting from AOffset position
457+ // (AOffset=1 - first char of InputString)
458+
459+ property InputString : RegExprString read GetInputString write SetInputString;
460+ // returns current input string (from last Exec call or last assign
461+ // to this property).
462+ // Any assignment to this property clear Match* properties !
463+
464+ function Substitute (const ATemplate : RegExprString) : RegExprString;
465+ // Returns ATemplate with '$&' or '$0' replaced by whole r.e.
466+ // occurence and '$n' replaced by occurence of subexpression #n.
467+ // Since v.0.929 '$' used instead of '\' (for future extensions
468+ // and for more Perl-compatibility) and accept more then one digit.
469+ // If you want place into template raw '$' or '\', use prefix '\'
470+ // Example: '1\$ is $2\\rub\\' -> '1$ is <Match[2]>\rub\'
471+ // If you want to place raw digit after '$n' you must delimit
472+ // n with curly braces '{}'.
473+ // Example: 'a$12bc' -> 'a<Match[12]>bc'
474+ // 'a${1}2bc' -> 'a<Match[1]>2bc'.
475+
476+ procedure Split (AInputStr : RegExprString; APieces : TStrings);
477+ // Split AInputStr into APieces by r.e. occurencies
478+ // Internally calls Exec[Next]
479+
480+ function Replace (AInputStr : RegExprString;
481+ const AReplaceStr : RegExprString;
482+ AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) //###0.946
483+ : RegExprString; {$IFDEF OverMeth} overload;
484+ function Replace (AInputStr : RegExprString;
485+ AReplaceFunc : TRegExprReplaceFunction)
486+ : RegExprString; overload;
487+ {$ENDIF}
488+ function ReplaceEx (AInputStr : RegExprString;
489+ AReplaceFunc : TRegExprReplaceFunction)
490+ : RegExprString;
491+ // Returns AInputStr with r.e. occurencies replaced by AReplaceStr
492+ // If AUseSubstitution is true, then AReplaceStr will be used
493+ // as template for Substitution methods.
494+ // For example:
495+ // Expression := '({-i}block|var)\s*\(\s*([^ ]*)\s*\)\s*';
496+ // Replace ('BLOCK( test1)', 'def "$1" value "$2"', True);
497+ // will return: def 'BLOCK' value 'test1'
498+ // Replace ('BLOCK( test1)', 'def "$1" value "$2"')
499+ // will return: def "$1" value "$2"
500+ // Internally calls Exec[Next]
501+ // Overloaded version and ReplaceEx operate with call-back function,
502+ // so You can implement really complex functionality.
503+
504+ property SubExprMatchCount : integer read GetSubExprMatchCount;
505+ // Number of subexpressions has been found in last Exec* call.
506+ // If there are no subexpr. but whole expr was found (Exec* returned True),
507+ // then SubExprMatchCount=0, if no subexpressions nor whole
508+ // r.e. found (Exec* returned false) then SubExprMatchCount=-1.
509+ // Note, that some subexpr. may be not found and for such
510+ // subexpr. MathPos=MatchLen=-1 and Match=''.
511+ // For example: Expression := '(1)?2(3)?';
512+ // Exec ('123'): SubExprMatchCount=2, Match[0]='123', [1]='1', [2]='3'
513+ // Exec ('12'): SubExprMatchCount=1, Match[0]='12', [1]='1'
514+ // Exec ('23'): SubExprMatchCount=2, Match[0]='23', [1]='', [2]='3'
515+ // Exec ('2'): SubExprMatchCount=0, Match[0]='2'
516+ // Exec ('7') - return False: SubExprMatchCount=-1
517+
518+ property MatchPos [Idx : integer] : integer read GetMatchPos;
519+ // pos of entrance subexpr. #Idx into tested in last Exec*
520+ // string. First subexpr. have Idx=1, last - MatchCount,
521+ // whole r.e. have Idx=0.
522+ // Returns -1 if in r.e. no such subexpr. or this subexpr.
523+ // not found in input string.
524+
525+ property MatchLen [Idx : integer] : integer read GetMatchLen;
526+ // len of entrance subexpr. #Idx r.e. into tested in last Exec*
527+ // string. First subexpr. have Idx=1, last - MatchCount,
528+ // whole r.e. have Idx=0.
529+ // Returns -1 if in r.e. no such subexpr. or this subexpr.
530+ // not found in input string.
531+ // Remember - MatchLen may be 0 (if r.e. match empty string) !
532+
533+ property Match [Idx : integer] : RegExprString read GetMatch;
534+ // == copy (InputString, MatchPos [Idx], MatchLen [Idx])
535+ // Returns '' if in r.e. no such subexpr. or this subexpr.
536+ // not found in input string.
537+
538+ function LastError : integer;
539+ // Returns ID of last error, 0 if no errors (unusable if
540+ // Error method raises exception) and clear internal status
541+ // into 0 (no errors).
542+
543+ function ErrorMsg (AErrorID : integer) : RegExprString; virtual;
544+ // Returns Error message for error with ID = AErrorID.
545+
546+ property CompilerErrorPos : integer read GetCompilerErrorPos;
547+ // Returns pos in r.e. there compiler stopped.
548+ // Usefull for error diagnostics
549+
550+ property SpaceChars : RegExprString read fSpaceChars write fSpaceChars; //###0.927
551+ // Contains chars, treated as /s (initially filled with RegExprSpaceChars
552+ // global constant)
553+
554+ property WordChars : RegExprString read fWordChars write fWordChars; //###0.929
555+ // Contains chars, treated as /w (initially filled with RegExprWordChars
556+ // global constant)
557+
558+ property LineSeparators : RegExprString read fLineSeparators write SetLineSeparators; //###0.941
559+ // line separators (like \n in Unix)
560+
561+ property LinePairedSeparator : RegExprString read GetLinePairedSeparator write SetLinePairedSeparator; //###0.941
562+ // paired line separator (like \r\n in DOS and Windows).
563+ // must contain exactly two chars or no chars at all
564+
565+ class function InvertCaseFunction (const Ch : REChar) : REChar;
566+ // Converts Ch into upper case if it in lower case or in lower
567+ // if it in upper (uses current system local setings)
568+
569+ property InvertCase : TRegExprInvertCaseFunction read fInvertCase write fInvertCase; //##0.935
570+ // Set this property if you want to override case-insensitive functionality.
571+ // Create set it to RegExprInvertCaseFunction (InvertCaseFunction by default)
572+
573+ procedure Compile; //###0.941
574+ // [Re]compile r.e. Usefull for example for GUI r.e. editors (to check
575+ // all properties validity).
576+
577+ {$IFDEF RegExpPCodeDump}
578+ function Dump : RegExprString;
579+ // dump a compiled regexp in vaguely comprehensible form
580+ {$ENDIF}
581+ end;
582+
583+ ERegExpr = class (Exception)
584+ public
585+ ErrorCode : integer;
586+ CompilerErrorPos : integer;
587+ end;
588+
589+const
590+ RegExprInvertCaseFunction : TRegExprInvertCaseFunction = {$IFDEF FPC} nil {$ELSE} TRegExpr.InvertCaseFunction{$ENDIF};
591+ // defaul for InvertCase property
592+
593+function ExecRegExpr (const ARegExpr, AInputStr : RegExprString) : boolean;
594+// true if string AInputString match regular expression ARegExpr
595+// ! will raise exeption if syntax errors in ARegExpr
596+
597+procedure SplitRegExpr (const ARegExpr, AInputStr : RegExprString; APieces : TStrings);
598+// Split AInputStr into APieces by r.e. ARegExpr occurencies
599+
600+function ReplaceRegExpr (const ARegExpr, AInputStr, AReplaceStr : RegExprString;
601+ AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) : RegExprString; //###0.947
602+// Returns AInputStr with r.e. occurencies replaced by AReplaceStr
603+// If AUseSubstitution is true, then AReplaceStr will be used
604+// as template for Substitution methods.
605+// For example:
606+// ReplaceRegExpr ('({-i}block|var)\s*\(\s*([^ ]*)\s*\)\s*',
607+// 'BLOCK( test1)', 'def "$1" value "$2"', True)
608+// will return: def 'BLOCK' value 'test1'
609+// ReplaceRegExpr ('({-i}block|var)\s*\(\s*([^ ]*)\s*\)\s*',
610+// 'BLOCK( test1)', 'def "$1" value "$2"')
611+// will return: def "$1" value "$2"
612+
613+function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString;
614+// Replace all metachars with its safe representation,
615+// for example 'abc$cd.(' converts into 'abc\$cd\.\('
616+// This function usefull for r.e. autogeneration from
617+// user input
618+
619+function RegExprSubExpressions (const ARegExpr : string;
620+ ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : integer;
621+// Makes list of subexpressions found in ARegExpr r.e.
622+// In ASubExps every item represent subexpression,
623+// from first to last, in format:
624+// String - subexpression text (without '()')
625+// low word of Object - starting position in ARegExpr, including '('
626+// if exists! (first position is 1)
627+// high word of Object - length, including starting '(' and ending ')'
628+// if exist!
629+// AExtendedSyntax - must be True if modifier /m will be On while
630+// using the r.e.
631+// Usefull for GUI editors of r.e. etc (You can find example of using
632+// in TestRExp.dpr project)
633+// Returns
634+// 0 Success. No unbalanced brackets was found;
635+// -1 There are not enough closing brackets ')';
636+// -(n+1) At position n was found opening '[' without //###0.942
637+// corresponding closing ']';
638+// n At position n was found closing bracket ')' without
639+// corresponding opening '('.
640+// If Result <> 0, then ASubExpr can contain empty items or illegal ones
641+
642+
643+implementation
644+
645+uses
646+ Windows; // CharUpper/Lower
647+
648+const
649+ TRegExprVersionMajor : integer = 0;
650+ TRegExprVersionMinor : integer = 952;
651+ // TRegExpr.VersionMajor/Minor return values of this constants
652+
653+ MaskModI = 1; // modifier /i bit in fModifiers
654+ MaskModR = 2; // -"- /r
655+ MaskModS = 4; // -"- /s
656+ MaskModG = 8; // -"- /g
657+ MaskModM = 16; // -"- /m
658+ MaskModX = 32; // -"- /x
659+
660+ {$IFDEF UniCode}
661+ XIgnoredChars = ' '#9#$d#$a;
662+ {$ELSE}
663+ XIgnoredChars = [' ', #9, #$d, #$a];
664+ {$ENDIF}
665+
666+{=============================================================}
667+{=================== WideString functions ====================}
668+{=============================================================}
669+
670+{$IFDEF UniCode}
671+
672+function StrPCopy (Dest: PRegExprChar; const Source: RegExprString): PRegExprChar;
673+ var
674+ i, Len : Integer;
675+ begin
676+ Len := length (Source); //###0.932
677+ for i := 1 to Len do
678+ Dest [i - 1] := Source [i];
679+ Dest [Len] := #0;
680+ Result := Dest;
681+ end; { of function StrPCopy
682+--------------------------------------------------------------}
683+
684+function StrLCopy (Dest, Source: PRegExprChar; MaxLen: Cardinal): PRegExprChar;
685+ var i: Integer;
686+ begin
687+ for i := 0 to MaxLen - 1 do
688+ Dest [i] := Source [i];
689+ Result := Dest;
690+ end; { of function StrLCopy
691+--------------------------------------------------------------}
692+
693+function StrLen (Str: PRegExprChar): Cardinal;
694+ begin
695+ Result:=0;
696+ while Str [result] <> #0
697+ do Inc (Result);
698+ end; { of function StrLen
699+--------------------------------------------------------------}
700+
701+function StrPos (Str1, Str2: PRegExprChar): PRegExprChar;
702+ var n: Integer;
703+ begin
704+ Result := nil;
705+ n := Pos (RegExprString (Str2), RegExprString (Str1));
706+ if n = 0
707+ then EXIT;
708+ Result := Str1 + n - 1;
709+ end; { of function StrPos
710+--------------------------------------------------------------}
711+
712+function StrLComp (Str1, Str2: PRegExprChar; MaxLen: Cardinal): Integer;
713+ var S1, S2: RegExprString;
714+ begin
715+ S1 := Str1;
716+ S2 := Str2;
717+ if Copy (S1, 1, MaxLen) > Copy (S2, 1, MaxLen)
718+ then Result := 1
719+ else
720+ if Copy (S1, 1, MaxLen) < Copy (S2, 1, MaxLen)
721+ then Result := -1
722+ else Result := 0;
723+ end; { function StrLComp
724+--------------------------------------------------------------}
725+
726+function StrScan (Str: PRegExprChar; Chr: WideChar): PRegExprChar;
727+ begin
728+ Result := nil;
729+ while (Str^ <> #0) and (Str^ <> Chr)
730+ do Inc (Str);
731+ if (Str^ <> #0)
732+ then Result := Str;
733+ end; { of function StrScan
734+--------------------------------------------------------------}
735+
736+{$ENDIF}
737+
738+
739+{=============================================================}
740+{===================== Global functions ======================}
741+{=============================================================}
742+
743+function ExecRegExpr (const ARegExpr, AInputStr : RegExprString) : boolean;
744+ var r : TRegExpr;
745+ begin
746+ r := TRegExpr.Create;
747+ try
748+ r.Expression := ARegExpr;
749+ Result := r.Exec (AInputStr);
750+ finally r.Free;
751+ end;
752+ end; { of function ExecRegExpr
753+--------------------------------------------------------------}
754+
755+procedure SplitRegExpr (const ARegExpr, AInputStr : RegExprString; APieces : TStrings);
756+ var r : TRegExpr;
757+ begin
758+ APieces.Clear;
759+ r := TRegExpr.Create;
760+ try
761+ r.Expression := ARegExpr;
762+ r.Split (AInputStr, APieces);
763+ finally r.Free;
764+ end;
765+ end; { of procedure SplitRegExpr
766+--------------------------------------------------------------}
767+
768+function ReplaceRegExpr (const ARegExpr, AInputStr, AReplaceStr : RegExprString;
769+ AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) : RegExprString;
770+ begin
771+ with TRegExpr.Create do try
772+ Expression := ARegExpr;
773+ Result := Replace (AInputStr, AReplaceStr, AUseSubstitution);
774+ finally Free;
775+ end;
776+ end; { of function ReplaceRegExpr
777+--------------------------------------------------------------}
778+
779+function QuoteRegExprMetaChars (const AStr : RegExprString) : RegExprString;
780+ const
781+ RegExprMetaSet : RegExprString = '^$.[()|?+*'+EscChar+'{'
782+ + ']}'; // - this last are additional to META.
783+ // Very similar to META array, but slighly changed.
784+ // !Any changes in META array must be synchronized with this set.
785+ var
786+ i, i0, Len : integer;
787+ begin
788+ Result := '';
789+ Len := length (AStr);
790+ i := 1;
791+ i0 := i;
792+ while i <= Len do begin
793+ if Pos (AStr [i], RegExprMetaSet) > 0 then begin
794+ Result := Result + System.Copy (AStr, i0, i - i0)
795+ + EscChar + AStr [i];
796+ i0 := i + 1;
797+ end;
798+ inc (i);
799+ end;
800+ Result := Result + System.Copy (AStr, i0, MaxInt); // Tail
801+ end; { of function QuoteRegExprMetaChars
802+--------------------------------------------------------------}
803+
804+function RegExprSubExpressions (const ARegExpr : string;
805+ ASubExprs : TStrings; AExtendedSyntax : boolean{$IFDEF DefParam}= False{$ENDIF}) : integer;
806+ type
807+ TStackItemRec = record //###0.945
808+ SubExprIdx : integer;
809+ StartPos : integer;
810+ end;
811+ TStackArray = packed array [0 .. NSUBEXPMAX - 1] of TStackItemRec;
812+ var
813+ Len, SubExprLen : integer;
814+ i, i0 : integer;
815+ Modif : integer;
816+ Stack : ^TStackArray; //###0.945
817+ StackIdx, StackSz : integer;
818+ begin
819+ Result := 0; // no unbalanced brackets found at this very moment
820+
821+ ASubExprs.Clear; // I don't think that adding to non empty list
822+ // can be usefull, so I simplified algorithm to work only with empty list
823+
824+ Len := length (ARegExpr); // some optimization tricks
825+
826+ // first we have to calculate number of subexpression to reserve
827+ // space in Stack array (may be we'll reserve more then need, but
828+ // it's faster then memory reallocation during parsing)
829+ StackSz := 1; // add 1 for entire r.e.
830+ for i := 1 to Len do
831+ if ARegExpr [i] = '('
832+ then inc (StackSz);
833+// SetLength (Stack, StackSz); //###0.945
834+ GetMem (Stack, SizeOf (TStackItemRec) * StackSz);
835+ try
836+
837+ StackIdx := 0;
838+ i := 1;
839+ while (i <= Len) do begin
840+ case ARegExpr [i] of
841+ '(': begin
842+ if (i < Len) and (ARegExpr [i + 1] = '?') then begin
843+ // this is not subexpression, but comment or other
844+ // Perl extension. We must check is it (?ismxrg-ismxrg)
845+ // and change AExtendedSyntax if /x is changed.
846+ inc (i, 2); // skip '(?'
847+ i0 := i;
848+ while (i <= Len) and (ARegExpr [i] <> ')')
849+ do inc (i);
850+ if i > Len
851+ then Result := -1 // unbalansed '('
852+ else
853+ if TRegExpr.ParseModifiersStr (System.Copy (ARegExpr, i, i - i0), Modif)
854+ then AExtendedSyntax := (Modif and MaskModX) <> 0;
855+ end
856+ else begin // subexpression starts
857+ ASubExprs.Add (''); // just reserve space
858+ with Stack [StackIdx] do begin
859+ SubExprIdx := ASubExprs.Count - 1;
860+ StartPos := i;
861+ end;
862+ inc (StackIdx);
863+ end;
864+ end;
865+ ')': begin
866+ if StackIdx = 0
867+ then Result := i // unbalanced ')'
868+ else begin
869+ dec (StackIdx);
870+ with Stack [StackIdx] do begin
871+ SubExprLen := i - StartPos + 1;
872+ ASubExprs.Objects [SubExprIdx] :=
873+ TObject (StartPos or (SubExprLen ShL 16));
874+ ASubExprs [SubExprIdx] := System.Copy (
875+ ARegExpr, StartPos + 1, SubExprLen - 2); // add without brackets
876+ end;
877+ end;
878+ end;
879+ EscChar: inc (i); // skip quoted symbol
880+ '[': begin
881+ // we have to skip character ranges at once, because they can
882+ // contain '#', and '#' in it must NOT be recognized as eXtended
883+ // comment beginning!
884+ i0 := i;
885+ inc (i);
886+ if ARegExpr [i] = ']' // cannot be 'emty' ranges - this interpretes
887+ then inc (i); // as ']' by itself
888+ while (i <= Len) and (ARegExpr [i] <> ']') do
889+ if ARegExpr [i] = EscChar //###0.942
890+ then inc (i, 2) // skip 'escaped' char to prevent stopping at '\]'
891+ else inc (i);
892+ if (i > Len) or (ARegExpr [i] <> ']') //###0.942
893+ then Result := - (i0 + 1); // unbalansed '[' //###0.942
894+ end;
895+ '#': if AExtendedSyntax then begin
896+ // skip eXtended comments
897+ while (i <= Len) and (ARegExpr [i] <> #$d) and (ARegExpr [i] <> #$a)
898+ // do not use [#$d, #$a] due to UniCode compatibility
899+ do inc (i);
900+ while (i + 1 <= Len) and ((ARegExpr [i + 1] = #$d) or (ARegExpr [i + 1] = #$a))
901+ do inc (i); // attempt to work with different kinds of line separators
902+ // now we are at the line separator that must be skipped.
903+ end;
904+ // here is no 'else' clause - we simply skip ordinary chars
905+ end; // of case
906+ inc (i); // skip scanned char
907+ // ! can move after Len due to skipping quoted symbol
908+ end;
909+
910+ // check brackets balance
911+ if StackIdx <> 0
912+ then Result := -1; // unbalansed '('
913+
914+ // check if entire r.e. added
915+ if (ASubExprs.Count = 0)
916+ or ((integer (ASubExprs.Objects [0]) and $FFFF) <> 1)
917+ or (((integer (ASubExprs.Objects [0]) ShR 16) and $FFFF) <> Len)
918+ // whole r.e. wasn't added because it isn't bracketed
919+ // well, we add it now:
920+ then ASubExprs.InsertObject (0, ARegExpr, TObject ((Len ShL 16) or 1));
921+
922+ finally FreeMem (Stack);
923+ end;
924+ end; { of function RegExprSubExpressions
925+--------------------------------------------------------------}
926+
927+
928+
929+const
930+ MAGIC = TREOp (216);// programm signature
931+
932+// name opcode opnd? meaning
933+ EEND = TREOp (0); // - End of program
934+ BOL = TREOp (1); // - Match "" at beginning of line
935+ EOL = TREOp (2); // - Match "" at end of line
936+ ANY = TREOp (3); // - Match any one character
937+ ANYOF = TREOp (4); // Str Match any character in string Str
938+ ANYBUT = TREOp (5); // Str Match any char. not in string Str
939+ BRANCH = TREOp (6); // Node Match this alternative, or the next
940+ BACK = TREOp (7); // - Jump backward (Next < 0)
941+ EXACTLY = TREOp (8); // Str Match string Str
942+ NOTHING = TREOp (9); // - Match empty string
943+ STAR = TREOp (10); // Node Match this (simple) thing 0 or more times
944+ PLUS = TREOp (11); // Node Match this (simple) thing 1 or more times
945+ ANYDIGIT = TREOp (12); // - Match any digit (equiv [0-9])
946+ NOTDIGIT = TREOp (13); // - Match not digit (equiv [0-9])
947+ ANYLETTER = TREOp (14); // - Match any letter from property WordChars
948+ NOTLETTER = TREOp (15); // - Match not letter from property WordChars
949+ ANYSPACE = TREOp (16); // - Match any space char (see property SpaceChars)
950+ NOTSPACE = TREOp (17); // - Match not space char (see property SpaceChars)
951+ BRACES = TREOp (18); // Node,Min,Max Match this (simple) thing from Min to Max times.
952+ // Min and Max are TREBracesArg
953+ COMMENT = TREOp (19); // - Comment ;)
954+ EXACTLYCI = TREOp (20); // Str Match string Str case insensitive
955+ ANYOFCI = TREOp (21); // Str Match any character in string Str, case insensitive
956+ ANYBUTCI = TREOp (22); // Str Match any char. not in string Str, case insensitive
957+ LOOPENTRY = TREOp (23); // Node Start of loop (Node - LOOP for this loop)
958+ LOOP = TREOp (24); // Node,Min,Max,LoopEntryJmp - back jump for LOOPENTRY.
959+ // Min and Max are TREBracesArg
960+ // Node - next node in sequence,
961+ // LoopEntryJmp - associated LOOPENTRY node addr
962+ ANYOFTINYSET= TREOp (25); // Chrs Match any one char from Chrs (exactly TinySetLen chars)
963+ ANYBUTTINYSET=TREOp (26); // Chrs Match any one char not in Chrs (exactly TinySetLen chars)
964+ ANYOFFULLSET= TREOp (27); // Set Match any one char from set of char
965+ // - very fast (one CPU instruction !) but takes 32 bytes of p-code
966+ BSUBEXP = TREOp (28); // Idx Match previously matched subexpression #Idx (stored as REChar) //###0.936
967+ BSUBEXPCI = TREOp (29); // Idx -"- in case-insensitive mode
968+
969+ // Non-Greedy Style Ops //###0.940
970+ STARNG = TREOp (30); // Same as START but in non-greedy mode
971+ PLUSNG = TREOp (31); // Same as PLUS but in non-greedy mode
972+ BRACESNG = TREOp (32); // Same as BRACES but in non-greedy mode
973+ LOOPNG = TREOp (33); // Same as LOOP but in non-greedy mode
974+
975+ // Multiline mode \m
976+ BOLML = TREOp (34); // - Match "" at beginning of line
977+ EOLML = TREOp (35); // - Match "" at end of line
978+ ANYML = TREOp (36); // - Match any one character
979+
980+ // Word boundary
981+ BOUND = TREOp (37); // Match "" between words //###0.943
982+ NOTBOUND = TREOp (38); // Match "" not between words //###0.943
983+
984+ // !!! Change OPEN value if you add new opcodes !!!
985+
986+ OPEN = TREOp (39); // - Mark this point in input as start of \n
987+ // OPEN + 1 is \1, etc.
988+ CLOSE = TREOp (ord (OPEN) + NSUBEXP);
989+ // - Analogous to OPEN.
990+
991+ // !!! Don't add new OpCodes after CLOSE !!!
992+
993+// We work with p-code thru pointers, compatible with PRegExprChar.
994+// Note: all code components (TRENextOff, TREOp, TREBracesArg, etc)
995+// must have lengths that can be divided by SizeOf (REChar) !
996+// A node is TREOp of opcode followed Next "pointer" of TRENextOff type.
997+// The Next is a offset from the opcode of the node containing it.
998+// An operand, if any, simply follows the node. (Note that much of
999+// the code generation knows about this implicit relationship!)
1000+// Using TRENextOff=integer speed up p-code processing.
1001+
1002+// Opcodes description:
1003+//
1004+// BRANCH The set of branches constituting a single choice are hooked
1005+// together with their "next" pointers, since precedence prevents
1006+// anything being concatenated to any individual branch. The
1007+// "next" pointer of the last BRANCH in a choice points to the
1008+// thing following the whole choice. This is also where the
1009+// final "next" pointer of each individual branch points; each
1010+// branch starts with the operand node of a BRANCH node.
1011+// BACK Normal "next" pointers all implicitly point forward; BACK
1012+// exists to make loop structures possible.
1013+// STAR,PLUS,BRACES '?', and complex '*' and '+', are implemented as
1014+// circular BRANCH structures using BACK. Complex '{min,max}'
1015+// - as pair LOOPENTRY-LOOP (see below). Simple cases (one
1016+// character per match) are implemented with STAR, PLUS and
1017+// BRACES for speed and to minimize recursive plunges.
1018+// LOOPENTRY,LOOP {min,max} are implemented as special pair
1019+// LOOPENTRY-LOOP. Each LOOPENTRY initialize loopstack for
1020+// current level.
1021+// OPEN,CLOSE are numbered at compile time.
1022+
1023+
1024+{=============================================================}
1025+{================== Error handling section ===================}
1026+{=============================================================}
1027+
1028+const
1029+ reeOk = 0;
1030+ reeCompNullArgument = 100;
1031+ reeCompRegexpTooBig = 101;
1032+ reeCompParseRegTooManyBrackets = 102;
1033+ reeCompParseRegUnmatchedBrackets = 103;
1034+ reeCompParseRegUnmatchedBrackets2 = 104;
1035+ reeCompParseRegJunkOnEnd = 105;
1036+ reePlusStarOperandCouldBeEmpty = 106;
1037+ reeNestedSQP = 107;
1038+ reeBadHexDigit = 108;
1039+ reeInvalidRange = 109;
1040+ reeParseAtomTrailingBackSlash = 110;
1041+ reeNoHexCodeAfterBSlashX = 111;
1042+ reeHexCodeAfterBSlashXTooBig = 112;
1043+ reeUnmatchedSqBrackets = 113;
1044+ reeInternalUrp = 114;
1045+ reeQPSBFollowsNothing = 115;
1046+ reeTrailingBackSlash = 116;
1047+ reeRarseAtomInternalDisaster = 119;
1048+ reeBRACESArgTooBig = 122;
1049+ reeBracesMinParamGreaterMax = 124;
1050+ reeUnclosedComment = 125;
1051+ reeComplexBracesNotImplemented = 126;
1052+ reeUrecognizedModifier = 127;
1053+ reeBadLinePairedSeparator = 128;
1054+ reeRegRepeatCalledInappropriately = 1000;
1055+ reeMatchPrimMemoryCorruption = 1001;
1056+ reeMatchPrimCorruptedPointers = 1002;
1057+ reeNoExpression = 1003;
1058+ reeCorruptedProgram = 1004;
1059+ reeNoInputStringSpecified = 1005;
1060+ reeOffsetMustBeGreaterThan0 = 1006;
1061+ reeExecNextWithoutExec = 1007;
1062+ reeGetInputStringWithoutInputString = 1008;
1063+ reeDumpCorruptedOpcode = 1011;
1064+ reeModifierUnsupported = 1013;
1065+ reeLoopStackExceeded = 1014;
1066+ reeLoopWithoutEntry = 1015;
1067+ reeBadPCodeImported = 2000;
1068+
1069+function TRegExpr.ErrorMsg (AErrorID : integer) : RegExprString;
1070+ begin
1071+ case AErrorID of
1072+ reeOk: Result := 'No errors';
1073+ reeCompNullArgument: Result := 'Null argument';
1074+ reeCompRegexpTooBig: Result := 'Regular expression too long';
1075+ reeCompParseRegTooManyBrackets: Result := 'Too many parenthsis';
1076+ reeCompParseRegUnmatchedBrackets: Result := 'Unmatched parenthesis';
1077+ reeCompParseRegUnmatchedBrackets2: Result := 'Unmatched parenthesis';
1078+ reeCompParseRegJunkOnEnd: Result := 'Junk on end of expression';
1079+ reePlusStarOperandCouldBeEmpty: Result := '*+ Operand could be empty';
1080+ reeNestedSQP: Result := 'Nested *?+ operator';
1081+ reeBadHexDigit: Result := 'Invalid hex digit';
1082+ reeInvalidRange: Result := 'Invalid [] range';
1083+ reeParseAtomTrailingBackSlash: Result := 'Trailing \ found';
1084+ reeNoHexCodeAfterBSlashX: Result := 'No hex code found after \x';
1085+ reeHexCodeAfterBSlashXTooBig: Result := 'Hex code after \x is too big';
1086+ reeUnmatchedSqBrackets: Result := 'Unmatched brackets';
1087+ reeInternalUrp: Result := 'Internal error: Invalid character found';
1088+ reeQPSBFollowsNothing: Result := '?+*{ should follow something';
1089+ reeTrailingBackSlash: Result := 'Trailing \';
1090+ reeRarseAtomInternalDisaster: Result := 'Internal disaster!';
1091+ reeBRACESArgTooBig: Result := 'Numer inside braces ({n,m}) is too large';
1092+ reeBracesMinParamGreaterMax: Result := 'Brace min value is greater than max value';
1093+ reeUnclosedComment: Result := 'Unclosed (?#Comment)';
1094+ reeComplexBracesNotImplemented: Result := 'If you want take part in beta-testing BRACES ''{min,max}'' and non-greedy ops ''*?'', ''+?'', ''??'' for complex cases - remove ''.'' from {.$DEFINE ComplexBraces}';
1095+ reeUrecognizedModifier: Result := 'Urecognized modifier';
1096+ reeBadLinePairedSeparator: Result := 'LinePairedSeparator must countain two different chars or no chars at all';
1097+
1098+ reeRegRepeatCalledInappropriately: Result := 'RegRepeat called inappropriately';
1099+ reeMatchPrimMemoryCorruption: Result := 'Memory corruption in MatchPrim';
1100+ reeMatchPrimCorruptedPointers: Result := 'Corrupted pointers in MatchPrim';
1101+ reeNoExpression: Result := 'Blank regular expression';
1102+ reeCorruptedProgram: Result := 'Corrupted program';
1103+ reeNoInputStringSpecified: Result := 'No data to search';
1104+ reeOffsetMustBeGreaterThan0: Result := 'Search start offset must be greater than 0';
1105+ reeExecNextWithoutExec: Result := 'ExecNext called without first calling Exec[Pos]';
1106+ reeGetInputStringWithoutInputString: Result := 'GetInputString called without first calling InputString';
1107+ reeDumpCorruptedOpcode: Result := 'Corrupted opcode';
1108+ reeLoopStackExceeded: Result := 'Loop stack exceeded';
1109+ reeLoopWithoutEntry: Result := 'Loop without a loop entry';
1110+
1111+ reeBadPCodeImported: Result := 'Bad p-code imported';
1112+ else Result := 'Unknown error';
1113+ end;
1114+ Result := 'Regular Expression Error: ' + Result;
1115+ end; { of procedure TRegExpr.Error
1116+--------------------------------------------------------------}
1117+
1118+function TRegExpr.LastError : integer;
1119+ begin
1120+ Result := fLastError;
1121+ fLastError := reeOk;
1122+ end; { of function TRegExpr.LastError
1123+--------------------------------------------------------------}
1124+
1125+
1126+{=============================================================}
1127+{===================== Common section ========================}
1128+{=============================================================}
1129+
1130+class function TRegExpr.VersionMajor : integer; //###0.944
1131+ begin
1132+ Result := TRegExprVersionMajor;
1133+ end; { of class function TRegExpr.VersionMajor
1134+--------------------------------------------------------------}
1135+
1136+class function TRegExpr.VersionMinor : integer; //###0.944
1137+ begin
1138+ Result := TRegExprVersionMinor;
1139+ end; { of class function TRegExpr.VersionMinor
1140+--------------------------------------------------------------}
1141+
1142+constructor TRegExpr.Create;
1143+ begin
1144+ inherited;
1145+ programm := nil;
1146+ fExpression := nil;
1147+ fInputString := nil;
1148+
1149+ regexpbeg := nil;
1150+ fExprIsCompiled := false;
1151+
1152+ ModifierI := RegExprModifierI;
1153+ ModifierR := RegExprModifierR;
1154+ ModifierS := RegExprModifierS;
1155+ ModifierG := RegExprModifierG;
1156+ ModifierM := RegExprModifierM; //###0.940
1157+
1158+ SpaceChars := RegExprSpaceChars; //###0.927
1159+ WordChars := RegExprWordChars; //###0.929
1160+ fInvertCase := RegExprInvertCaseFunction; //###0.927
1161+
1162+ fLineSeparators := RegExprLineSeparators; //###0.941
1163+ LinePairedSeparator := RegExprLinePairedSeparator; //###0.941
1164+ end; { of constructor TRegExpr.Create
1165+--------------------------------------------------------------}
1166+
1167+destructor TRegExpr.Destroy;
1168+ begin
1169+ if programm <> nil
1170+ then FreeMem (programm);
1171+ if fExpression <> nil
1172+ then FreeMem (fExpression);
1173+ if fInputString <> nil
1174+ then FreeMem (fInputString);
1175+ end; { of destructor TRegExpr.Destroy
1176+--------------------------------------------------------------}
1177+
1178+class function TRegExpr.InvertCaseFunction (const Ch : REChar) : REChar;
1179+ begin
1180+ {$IF Defined(UniCode) and (CompilerVersion < 19)}
1181+ if Ch >= #128
1182+ then Result := Ch
1183+ else
1184+ {$IFEND}
1185+ begin
1186+ Result := {$IFDEF FPC}AnsiUpperCase (Ch) [1]{$ELSE} REChar (CharUpper (PChar (Ch))){$ENDIF};
1187+ if Result = Ch
1188+ then Result := {$IFDEF FPC}AnsiLowerCase (Ch) [1]{$ELSE} REChar (CharLower (PChar (Ch))){$ENDIF};
1189+ end;
1190+ end; { of function TRegExpr.InvertCaseFunction
1191+--------------------------------------------------------------}
1192+
1193+function TRegExpr.GetExpression : RegExprString;
1194+ begin
1195+ if fExpression <> nil
1196+ then Result := fExpression
1197+ else Result := '';
1198+ end; { of function TRegExpr.GetExpression
1199+--------------------------------------------------------------}
1200+
1201+procedure TRegExpr.SetExpression (const s : RegExprString);
1202+ var
1203+ Len : integer; //###0.950
1204+ begin
1205+ if (s <> fExpression) or not fExprIsCompiled then begin
1206+ fExprIsCompiled := false;
1207+ if fExpression <> nil then begin
1208+ FreeMem (fExpression);
1209+ fExpression := nil;
1210+ end;
1211+ if s <> '' then begin
1212+ Len := length (s); //###0.950
1213+ GetMem (fExpression, (Len + 1) * SizeOf (REChar));
1214+// StrPCopy (fExpression, s); //###0.950 replaced due to StrPCopy limitation of 255 chars
1215+ {$IFDEF UniCode}
1216+ StrPCopy (fExpression, Copy (s, 1, Len)); //###0.950
1217+ {$ELSE}
1218+ StrLCopy (fExpression, PRegExprChar (s), Len); //###0.950
1219+ {$ENDIF UniCode}
1220+
1221+ InvalidateProgramm; //###0.941
1222+ end;
1223+ end;
1224+ end; { of procedure TRegExpr.SetExpression
1225+--------------------------------------------------------------}
1226+
1227+function TRegExpr.GetSubExprMatchCount : integer;
1228+ begin
1229+ if Assigned (fInputString) then begin
1230+ Result := NSUBEXP - 1;
1231+ while (Result > 0) and ((startp [Result] = nil)
1232+ or (endp [Result] = nil))
1233+ do dec (Result);
1234+ end
1235+ else Result := -1;
1236+ end; { of function TRegExpr.GetSubExprMatchCount
1237+--------------------------------------------------------------}
1238+
1239+function TRegExpr.GetMatchPos (Idx : integer) : integer;
1240+ begin
1241+ if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)
1242+ and Assigned (startp [Idx]) and Assigned (endp [Idx]) then begin
1243+ Result := (startp [Idx] - fInputString) + 1;
1244+ end
1245+ else Result := -1;
1246+ end; { of function TRegExpr.GetMatchPos
1247+--------------------------------------------------------------}
1248+
1249+function TRegExpr.GetMatchLen (Idx : integer) : integer;
1250+ begin
1251+ if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)
1252+ and Assigned (startp [Idx]) and Assigned (endp [Idx]) then begin
1253+ Result := endp [Idx] - startp [Idx];
1254+ end
1255+ else Result := -1;
1256+ end; { of function TRegExpr.GetMatchLen
1257+--------------------------------------------------------------}
1258+
1259+function TRegExpr.GetMatch (Idx : integer) : RegExprString;
1260+ begin
1261+ if (Idx >= 0) and (Idx < NSUBEXP) and Assigned (fInputString)
1262+ and Assigned (startp [Idx]) and Assigned (endp [Idx])
1263+ //then Result := copy (fInputString, MatchPos [Idx], MatchLen [Idx]) //###0.929
1264+ then SetString (Result, startp [idx], endp [idx] - startp [idx])
1265+ else Result := '';
1266+ end; { of function TRegExpr.GetMatch
1267+--------------------------------------------------------------}
1268+
1269+function TRegExpr.GetModifierStr : RegExprString;
1270+ begin
1271+ Result := '-';
1272+
1273+ if ModifierI
1274+ then Result := 'i' + Result
1275+ else Result := Result + 'i';
1276+ if ModifierR
1277+ then Result := 'r' + Result
1278+ else Result := Result + 'r';
1279+ if ModifierS
1280+ then Result := 's' + Result
1281+ else Result := Result + 's';
1282+ if ModifierG
1283+ then Result := 'g' + Result
1284+ else Result := Result + 'g';
1285+ if ModifierM
1286+ then Result := 'm' + Result
1287+ else Result := Result + 'm';
1288+ if ModifierX
1289+ then Result := 'x' + Result
1290+ else Result := Result + 'x';
1291+
1292+ if Result [length (Result)] = '-' // remove '-' if all modifiers are 'On'
1293+ then System.Delete (Result, length (Result), 1);
1294+ end; { of function TRegExpr.GetModifierStr
1295+--------------------------------------------------------------}
1296+
1297+class function TRegExpr.ParseModifiersStr (const AModifiers : RegExprString;
1298+var AModifiersInt : integer) : boolean;
1299+// !!! Be carefull - this is class function and must not use object instance fields
1300+ var
1301+ i : integer;
1302+ IsOn : boolean;
1303+ Mask : integer;
1304+ begin
1305+ Result := true;
1306+ IsOn := true;
1307+{$IFNDEF D101}
1308+ Mask := 0; // prevent compiler warning (But Delphi 10.1 complains that this value is never used)
1309+{$ENDIF}
1310+ for i := 1 to length (AModifiers) do
1311+ if AModifiers [i] = '-'
1312+ then IsOn := false
1313+ else begin
1314+ if Pos (AModifiers [i], 'iI') > 0
1315+ then Mask := MaskModI
1316+ else if Pos (AModifiers [i], 'rR') > 0
1317+ then Mask := MaskModR
1318+ else if Pos (AModifiers [i], 'sS') > 0
1319+ then Mask := MaskModS
1320+ else if Pos (AModifiers [i], 'gG') > 0
1321+ then Mask := MaskModG
1322+ else if Pos (AModifiers [i], 'mM') > 0
1323+ then Mask := MaskModM
1324+ else if Pos (AModifiers [i], 'xX') > 0
1325+ then Mask := MaskModX
1326+ else begin
1327+ Result := false;
1328+ EXIT;
1329+ end;
1330+ if IsOn
1331+ then AModifiersInt := AModifiersInt or Mask
1332+ else AModifiersInt := AModifiersInt and not Mask;
1333+ end;
1334+ end; { of function TRegExpr.ParseModifiersStr
1335+--------------------------------------------------------------}
1336+
1337+procedure TRegExpr.SetModifierStr (const AModifiers : RegExprString);
1338+ begin
1339+ if not ParseModifiersStr (AModifiers, fModifiers)
1340+ then Error (reeModifierUnsupported);
1341+ end; { of procedure TRegExpr.SetModifierStr
1342+--------------------------------------------------------------}
1343+
1344+function TRegExpr.GetModifier (AIndex : integer) : boolean;
1345+ var
1346+ Mask : integer;
1347+ begin
1348+ Result := false;
1349+ case AIndex of
1350+ 1: Mask := MaskModI;
1351+ 2: Mask := MaskModR;
1352+ 3: Mask := MaskModS;
1353+ 4: Mask := MaskModG;
1354+ 5: Mask := MaskModM;
1355+ 6: Mask := MaskModX;
1356+ else begin
1357+ Error (reeModifierUnsupported);
1358+ EXIT;
1359+ end;
1360+ end;
1361+ Result := (fModifiers and Mask) <> 0;
1362+ end; { of function TRegExpr.GetModifier
1363+--------------------------------------------------------------}
1364+
1365+procedure TRegExpr.SetModifier (AIndex : integer; ASet : boolean);
1366+ var
1367+ Mask : integer;
1368+ begin
1369+ case AIndex of
1370+ 1: Mask := MaskModI;
1371+ 2: Mask := MaskModR;
1372+ 3: Mask := MaskModS;
1373+ 4: Mask := MaskModG;
1374+ 5: Mask := MaskModM;
1375+ 6: Mask := MaskModX;
1376+ else begin
1377+ Error (reeModifierUnsupported);
1378+ EXIT;
1379+ end;
1380+ end;
1381+ if ASet
1382+ then fModifiers := fModifiers or Mask
1383+ else fModifiers := fModifiers and not Mask;
1384+ end; { of procedure TRegExpr.SetModifier
1385+--------------------------------------------------------------}
1386+
1387+
1388+{=============================================================}
1389+{==================== Compiler section =======================}
1390+{=============================================================}
1391+
1392+procedure TRegExpr.InvalidateProgramm;
1393+ begin
1394+ if programm <> nil then begin
1395+ FreeMem (programm);
1396+ programm := nil;
1397+ end;
1398+ end; { of procedure TRegExpr.InvalidateProgramm
1399+--------------------------------------------------------------}
1400+
1401+procedure TRegExpr.Compile; //###0.941
1402+ begin
1403+ if fExpression = nil then begin // No Expression assigned
1404+ Error (reeNoExpression);
1405+ EXIT;
1406+ end;
1407+ CompileRegExpr (fExpression);
1408+ end; { of procedure TRegExpr.Compile
1409+--------------------------------------------------------------}
1410+
1411+function TRegExpr.IsProgrammOk : boolean;
1412+ {$IFNDEF UniCode}
1413+ var
1414+ i : integer;
1415+ {$ENDIF}
1416+ begin
1417+ Result := false;
1418+
1419+ // check modifiers
1420+ if fModifiers <> fProgModifiers //###0.941
1421+ then InvalidateProgramm;
1422+
1423+ // can we optimize line separators by using sets?
1424+ {$IFNDEF UniCode}
1425+ fLineSeparatorsSet := [];
1426+ for i := 1 to length (fLineSeparators)
1427+ do System.Include (fLineSeparatorsSet, fLineSeparators [i]);
1428+ {$ENDIF}
1429+
1430+ // [Re]compile if needed
1431+ if programm = nil
1432+ then Compile; //###0.941
1433+
1434+ // check [re]compiled programm
1435+ if programm = nil
1436+ then EXIT // error was set/raised by Compile (was reeExecAfterCompErr)
1437+ else if programm [0] <> MAGIC // Program corrupted.
1438+ then Error (reeCorruptedProgram)
1439+ else Result := true;
1440+ end; { of function TRegExpr.IsProgrammOk
1441+--------------------------------------------------------------}
1442+
1443+procedure TRegExpr.Tail (p : PRegExprChar; val : PRegExprChar);
1444+// set the next-pointer at the end of a node chain
1445+ var
1446+ scan : PRegExprChar;
1447+ temp : PRegExprChar;
1448+// i : int64;
1449+ begin
1450+ if p = @regdummy
1451+ then EXIT;
1452+ // Find last node.
1453+ scan := p;
1454+ REPEAT
1455+ temp := regnext (scan);
1456+ if temp = nil
1457+ then BREAK;
1458+ scan := temp;
1459+ UNTIL false;
1460+ // Set Next 'pointer'
1461+ if val < scan
1462+ then PRENextOff (scan + REOpSz)^ := - (scan - val) //###0.948
1463+ // work around PWideChar subtraction bug (Delphi uses
1464+ // shr after subtraction to calculate widechar distance %-( )
1465+ // so, if difference is negative we have .. the "feature" :(
1466+ // I could wrap it in $IFDEF UniCode, but I didn't because
1467+ // "P – Q computes the difference between the address given
1468+ // by P (the higher address) and the address given by Q (the
1469+ // lower address)" - Delphi help quotation.
1470+ else PRENextOff (scan + REOpSz)^ := val - scan; //###0.933
1471+ end; { of procedure TRegExpr.Tail
1472+--------------------------------------------------------------}
1473+
1474+procedure TRegExpr.OpTail (p : PRegExprChar; val : PRegExprChar);
1475+// regtail on operand of first argument; nop if operandless
1476+ begin
1477+ // "Operandless" and "op != BRANCH" are synonymous in practice.
1478+ if (p = nil) or (p = @regdummy) or (PREOp (p)^ <> BRANCH)
1479+ then EXIT;
1480+ Tail (p + REOpSz + RENextOffSz, val); //###0.933
1481+ end; { of procedure TRegExpr.OpTail
1482+--------------------------------------------------------------}
1483+
1484+function TRegExpr.EmitNode (op : TREOp) : PRegExprChar; //###0.933
1485+// emit a node, return location
1486+ begin
1487+ Result := regcode;
1488+ if Result <> @regdummy then begin
1489+ PREOp (regcode)^ := op;
1490+ inc (regcode, REOpSz);
1491+ PRENextOff (regcode)^ := 0; // Next "pointer" := nil
1492+ inc (regcode, RENextOffSz);
1493+ end
1494+ else inc (regsize, REOpSz + RENextOffSz); // compute code size without code generation
1495+ end; { of function TRegExpr.EmitNode
1496+--------------------------------------------------------------}
1497+
1498+procedure TRegExpr.EmitC (b : REChar);
1499+// emit a byte to code
1500+ begin
1501+ if regcode <> @regdummy then begin
1502+ regcode^ := b;
1503+ inc (regcode);
1504+ end
1505+ else inc (regsize); // Type of p-code pointer always is ^REChar
1506+ end; { of procedure TRegExpr.EmitC
1507+--------------------------------------------------------------}
1508+
1509+procedure TRegExpr.InsertOperator (op : TREOp; opnd : PRegExprChar; sz : integer);
1510+// insert an operator in front of already-emitted operand
1511+// Means relocating the operand.
1512+ var
1513+ src, dst, place : PRegExprChar;
1514+ i : integer;
1515+ begin
1516+ if regcode = @regdummy then begin
1517+ inc (regsize, sz);
1518+ EXIT;
1519+ end;
1520+ src := regcode;
1521+ inc (regcode, sz);
1522+ dst := regcode;
1523+ while src > opnd do begin
1524+ dec (dst);
1525+ dec (src);
1526+ dst^ := src^;
1527+ end;
1528+ place := opnd; // Op node, where operand used to be.
1529+ PREOp (place)^ := op;
1530+ inc (place, REOpSz);
1531+ for i := 1 + REOpSz to sz do begin
1532+ place^ := #0;
1533+ inc (place);
1534+ end;
1535+ end; { of procedure TRegExpr.InsertOperator
1536+--------------------------------------------------------------}
1537+
1538+function strcspn (s1 : PRegExprChar; s2 : PRegExprChar) : integer;
1539+// find length of initial segment of s1 consisting
1540+// entirely of characters not from s2
1541+ var scan1, scan2 : PRegExprChar;
1542+ begin
1543+ Result := 0;
1544+ scan1 := s1;
1545+ while scan1^ <> #0 do begin
1546+ scan2 := s2;
1547+ while scan2^ <> #0 do
1548+ if scan1^ = scan2^
1549+ then EXIT
1550+ else inc (scan2);
1551+ inc (Result);
1552+ inc (scan1)
1553+ end;
1554+ end; { of function strcspn
1555+--------------------------------------------------------------}
1556+
1557+const
1558+// Flags to be passed up and down.
1559+ HASWIDTH = 01; // Known never to match nil string.
1560+ SIMPLE = 02; // Simple enough to be STAR/PLUS/BRACES operand.
1561+ SPSTART = 04; // Starts with * or +.
1562+ WORST = 0; // Worst case.
1563+ META : array [0 .. 12] of REChar = (
1564+ '^', '$', '.', '[', '(', ')', '|', '?', '+', '*', EscChar, '{', #0);
1565+ // Any modification must be synchronized with QuoteRegExprMetaChars !!!
1566+
1567+{$IFDEF UniCode}
1568+ RusRangeLo : array [0 .. 33] of REChar =
1569+ (#$430,#$431,#$432,#$433,#$434,#$435,#$451,#$436,#$437,
1570+ #$438,#$439,#$43A,#$43B,#$43C,#$43D,#$43E,#$43F,
1571+ #$440,#$441,#$442,#$443,#$444,#$445,#$446,#$447,
1572+ #$448,#$449,#$44A,#$44B,#$44C,#$44D,#$44E,#$44F,#0);
1573+ RusRangeHi : array [0 .. 33] of REChar =
1574+ (#$410,#$411,#$412,#$413,#$414,#$415,#$401,#$416,#$417,
1575+ #$418,#$419,#$41A,#$41B,#$41C,#$41D,#$41E,#$41F,
1576+ #$420,#$421,#$422,#$423,#$424,#$425,#$426,#$427,
1577+ #$428,#$429,#$42A,#$42B,#$42C,#$42D,#$42E,#$42F,#0);
1578+ RusRangeLoLow = #$430{'à'};
1579+ RusRangeLoHigh = #$44F{'ÿ'};
1580+ RusRangeHiLow = #$410{'À'};
1581+ RusRangeHiHigh = #$42F{'ß'};
1582+{$ELSE}
1583+ RusRangeLo = 'àáâãäå¸æçèéêëìíîïðñòóôõö÷øùúûüýþÿ';
1584+ RusRangeHi = 'ÀÁÂÃÄŨÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞß';
1585+ RusRangeLoLow = 'à';
1586+ RusRangeLoHigh = 'ÿ';
1587+ RusRangeHiLow = 'À';
1588+ RusRangeHiHigh = 'ß';
1589+{$ENDIF}
1590+
1591+function TRegExpr.CompileRegExpr (exp : PRegExprChar) : boolean;
1592+// compile a regular expression into internal code
1593+// We can't allocate space until we know how big the compiled form will be,
1594+// but we can't compile it (and thus know how big it is) until we've got a
1595+// place to put the code. So we cheat: we compile it twice, once with code
1596+// generation turned off and size counting turned on, and once "for real".
1597+// This also means that we don't allocate space until we are sure that the
1598+// thing really will compile successfully, and we never have to move the
1599+// code and thus invalidate pointers into it. (Note that it has to be in
1600+// one piece because free() must be able to free it all.)
1601+// Beware that the optimization-preparation code in here knows about some
1602+// of the structure of the compiled regexp.
1603+ var
1604+ scan, longest : PRegExprChar;
1605+ len : cardinal;
1606+ flags : integer;
1607+ begin
1608+ Result := false; // life too dark
1609+
1610+ regparse := nil; // for correct error handling
1611+ regexpbeg := exp;
1612+ try
1613+
1614+ if programm <> nil then begin
1615+ FreeMem (programm);
1616+ programm := nil;
1617+ end;
1618+
1619+ if exp = nil then begin
1620+ Error (reeCompNullArgument);
1621+ EXIT;
1622+ end;
1623+
1624+ fProgModifiers := fModifiers;
1625+ // well, may it's paranoia. I'll check it later... !!!!!!!!
1626+
1627+ // First pass: determine size, legality.
1628+ fCompModifiers := fModifiers;
1629+ regparse := exp;
1630+ regnpar := 1;
1631+ regsize := 0;
1632+ regcode := @regdummy;
1633+ EmitC (MAGIC);
1634+ if ParseReg (0, flags) = nil
1635+ then EXIT;
1636+
1637+ // Small enough for 2-bytes programm pointers ?
1638+ // ###0.933 no real p-code length limits now :)))
1639+// if regsize >= 64 * 1024 then begin
1640+// Error (reeCompRegexpTooBig);
1641+// EXIT;
1642+// end;
1643+
1644+ // Allocate space.
1645+ GetMem (programm, regsize * SizeOf (REChar));
1646+
1647+ // Second pass: emit code.
1648+ fCompModifiers := fModifiers;
1649+ regparse := exp;
1650+ regnpar := 1;
1651+ regcode := programm;
1652+ EmitC (MAGIC);
1653+ if ParseReg (0, flags) = nil
1654+ then EXIT;
1655+
1656+ // Dig out information for optimizations.
1657+ {$IFDEF UseFirstCharSet} //###0.929
1658+ FirstCharSet := [];
1659+ FillFirstCharSet (programm + REOpSz);
1660+ {$ENDIF}
1661+ regstart := #0; // Worst-case defaults.
1662+ reganch := #0;
1663+ regmust := nil;
1664+ regmlen := 0;
1665+ scan := programm + REOpSz; // First BRANCH.
1666+ if PREOp (regnext (scan))^ = EEND then begin // Only one top-level choice.
1667+ scan := scan + REOpSz + RENextOffSz;
1668+
1669+ // Starting-point info.
1670+ if PREOp (scan)^ = EXACTLY
1671+ then regstart := (scan + REOpSz + RENextOffSz)^
1672+ else if PREOp (scan)^ = BOL
1673+ then inc (reganch);
1674+
1675+ // If there's something expensive in the r.e., find the longest
1676+ // literal string that must appear and make it the regmust. Resolve
1677+ // ties in favor of later strings, since the regstart check works
1678+ // with the beginning of the r.e. and avoiding duplication
1679+ // strengthens checking. Not a strong reason, but sufficient in the
1680+ // absence of others.
1681+ if (flags and SPSTART) <> 0 then begin
1682+ longest := nil;
1683+ len := 0;
1684+ while scan <> nil do begin
1685+ if (PREOp (scan)^ = EXACTLY)
1686+ and (strlen (scan + REOpSz + RENextOffSz) >= len) then begin
1687+ longest := scan + REOpSz + RENextOffSz;
1688+ len := strlen (longest);
1689+ end;
1690+ scan := regnext (scan);
1691+ end;
1692+ regmust := longest;
1693+ regmlen := len;
1694+ end;
1695+ end;
1696+
1697+ Result := true;
1698+
1699+ finally begin
1700+ if not Result
1701+ then InvalidateProgramm;
1702+ regexpbeg := nil;
1703+ fExprIsCompiled := Result; //###0.944
1704+ end;
1705+ end;
1706+
1707+ end; { of function TRegExpr.CompileRegExpr
1708+--------------------------------------------------------------}
1709+
1710+function TRegExpr.ParseReg (paren : integer; var flagp : integer) : PRegExprChar;
1711+// regular expression, i.e. main body or parenthesized thing
1712+// Caller must absorb opening parenthesis.
1713+// Combining parenthesis handling with the base level of regular expression
1714+// is a trifle forced, but the need to tie the tails of the branches to what
1715+// follows makes it hard to avoid.
1716+ var
1717+ ret, br, ender : PRegExprChar;
1718+ parno : integer;
1719+ flags : integer;
1720+ SavedModifiers : integer;
1721+ begin
1722+ Result := nil;
1723+ flagp := HASWIDTH; // Tentatively.
1724+ parno := 0; // eliminate compiler stupid warning
1725+ SavedModifiers := fCompModifiers;
1726+
1727+ // Make an OPEN node, if parenthesized.
1728+ if paren <> 0 then begin
1729+ if regnpar >= NSUBEXP then begin
1730+ Error (reeCompParseRegTooManyBrackets);
1731+ EXIT;
1732+ end;
1733+ parno := regnpar;
1734+ inc (regnpar);
1735+ ret := EmitNode (TREOp (ord (OPEN) + parno));
1736+ end
1737+ else ret := nil;
1738+
1739+ // Pick up the branches, linking them together.
1740+ br := ParseBranch (flags);
1741+ if br = nil then begin
1742+ Result := nil;
1743+ EXIT;
1744+ end;
1745+ if ret <> nil
1746+ then Tail (ret, br) // OPEN -> first.
1747+ else ret := br;
1748+ if (flags and HASWIDTH) = 0
1749+ then flagp := flagp and not HASWIDTH;
1750+ flagp := flagp or flags and SPSTART;
1751+ while (regparse^ = '|') do begin
1752+ inc (regparse);
1753+ br := ParseBranch (flags);
1754+ if br = nil then begin
1755+ Result := nil;
1756+ EXIT;
1757+ end;
1758+ Tail (ret, br); // BRANCH -> BRANCH.
1759+ if (flags and HASWIDTH) = 0
1760+ then flagp := flagp and not HASWIDTH;
1761+ flagp := flagp or flags and SPSTART;
1762+ end;
1763+
1764+ // Make a closing node, and hook it on the end.
1765+ if paren <> 0
1766+ then ender := EmitNode (TREOp (ord (CLOSE) + parno))
1767+ else ender := EmitNode (EEND);
1768+ Tail (ret, ender);
1769+
1770+ // Hook the tails of the branches to the closing node.
1771+ br := ret;
1772+ while br <> nil do begin
1773+ OpTail (br, ender);
1774+ br := regnext (br);
1775+ end;
1776+
1777+ // Check for proper termination.
1778+ if paren <> 0 then
1779+ if regparse^ <> ')' then begin
1780+ Error (reeCompParseRegUnmatchedBrackets);
1781+ EXIT;
1782+ end
1783+ else inc (regparse); // skip trailing ')'
1784+ if (paren = 0) and (regparse^ <> #0) then begin
1785+ if regparse^ = ')'
1786+ then Error (reeCompParseRegUnmatchedBrackets2)
1787+ else Error (reeCompParseRegJunkOnEnd);
1788+ EXIT;
1789+ end;
1790+ fCompModifiers := SavedModifiers; // restore modifiers of parent
1791+ Result := ret;
1792+ end; { of function TRegExpr.ParseReg
1793+--------------------------------------------------------------}
1794+
1795+function TRegExpr.ParseBranch (var flagp : integer) : PRegExprChar;
1796+// one alternative of an | operator
1797+// Implements the concatenation operator.
1798+ var
1799+ ret, chain, latest : PRegExprChar;
1800+ flags : integer;
1801+ begin
1802+ flagp := WORST; // Tentatively.
1803+
1804+ ret := EmitNode (BRANCH);
1805+ chain := nil;
1806+ while (regparse^ <> #0) and (regparse^ <> '|')
1807+ and (regparse^ <> ')') do begin
1808+ latest := ParsePiece (flags);
1809+ if latest = nil then begin
1810+ Result := nil;
1811+ EXIT;
1812+ end;
1813+ flagp := flagp or flags and HASWIDTH;
1814+ if chain = nil // First piece.
1815+ then flagp := flagp or flags and SPSTART
1816+ else Tail (chain, latest);
1817+ chain := latest;
1818+ end;
1819+ if chain = nil // Loop ran zero times.
1820+ then EmitNode (NOTHING);
1821+ Result := ret;
1822+ end; { of function TRegExpr.ParseBranch
1823+--------------------------------------------------------------}
1824+
1825+function TRegExpr.ParsePiece (var flagp : integer) : PRegExprChar;
1826+// something followed by possible [*+?{]
1827+// Note that the branching code sequences used for ? and the general cases
1828+// of * and + and { are somewhat optimized: they use the same NOTHING node as
1829+// both the endmarker for their branch list and the body of the last branch.
1830+// It might seem that this node could be dispensed with entirely, but the
1831+// endmarker role is not redundant.
1832+ function parsenum (AStart, AEnd : PRegExprChar) : TREBracesArg;
1833+ begin
1834+ Result := 0;
1835+ if AEnd - AStart + 1 > 8 then begin // prevent stupid scanning
1836+ Error (reeBRACESArgTooBig);
1837+ EXIT;
1838+ end;
1839+ while AStart <= AEnd do begin
1840+ Result := Result * 10 + (ord (AStart^) - ord ('0'));
1841+ inc (AStart);
1842+ end;
1843+ if (Result > MaxBracesArg) or (Result < 0) then begin
1844+ Error (reeBRACESArgTooBig);
1845+ EXIT;
1846+ end;
1847+ end;
1848+
1849+ var
1850+ op : REChar;
1851+ NonGreedyOp, NonGreedyCh : boolean; //###0.940
1852+ TheOp : TREOp; //###0.940
1853+ NextNode : PRegExprChar;
1854+ flags : integer;
1855+ BracesMin, Bracesmax : TREBracesArg;
1856+ p, savedparse : PRegExprChar;
1857+
1858+ procedure EmitComplexBraces (ABracesMin, ABracesMax : TREBracesArg;
1859+ ANonGreedyOp : boolean); //###0.940
1860+ {$IFDEF ComplexBraces}
1861+ var
1862+ off : integer;
1863+ {$ENDIF}
1864+ begin
1865+ {$IFNDEF ComplexBraces}
1866+ Error (reeComplexBracesNotImplemented);
1867+ {$ELSE}
1868+ if ANonGreedyOp
1869+ then TheOp := LOOPNG
1870+ else TheOp := LOOP;
1871+ InsertOperator (LOOPENTRY, Result, REOpSz + RENextOffSz);
1872+ NextNode := EmitNode (TheOp);
1873+ if regcode <> @regdummy then begin
1874+ off := (Result + REOpSz + RENextOffSz)
1875+ - (regcode - REOpSz - RENextOffSz); // back to Atom after LOOPENTRY
1876+ PREBracesArg (regcode)^ := ABracesMin;
1877+ inc (regcode, REBracesArgSz);
1878+ PREBracesArg (regcode)^ := ABracesMax;
1879+ inc (regcode, REBracesArgSz);
1880+ PRENextOff (regcode)^ := off;
1881+ inc (regcode, RENextOffSz);
1882+ end
1883+ else inc (regsize, REBracesArgSz * 2 + RENextOffSz);
1884+ Tail (Result, NextNode); // LOOPENTRY -> LOOP
1885+ if regcode <> @regdummy then
1886+ Tail (Result + REOpSz + RENextOffSz, NextNode); // Atom -> LOOP
1887+ {$ENDIF}
1888+ end;
1889+
1890+ procedure EmitSimpleBraces (ABracesMin, ABracesMax : TREBracesArg;
1891+ ANonGreedyOp : boolean); //###0.940
1892+ begin
1893+ if ANonGreedyOp //###0.940
1894+ then TheOp := BRACESNG
1895+ else TheOp := BRACES;
1896+ InsertOperator (TheOp, Result, REOpSz + RENextOffSz + REBracesArgSz * 2);
1897+ if regcode <> @regdummy then begin
1898+ PREBracesArg (Result + REOpSz + RENextOffSz)^ := ABracesMin;
1899+ PREBracesArg (Result + REOpSz + RENextOffSz + REBracesArgSz)^ := ABracesMax;
1900+ end;
1901+ end;
1902+
1903+ begin
1904+ Result := ParseAtom (flags);
1905+ if Result = nil
1906+ then EXIT;
1907+
1908+ op := regparse^;
1909+ if not ((op = '*') or (op = '+') or (op = '?') or (op = '{')) then begin
1910+ flagp := flags;
1911+ EXIT;
1912+ end;
1913+ if ((flags and HASWIDTH) = 0) and (op <> '?') then begin
1914+ Error (reePlusStarOperandCouldBeEmpty);
1915+ EXIT;
1916+ end;
1917+
1918+ case op of
1919+ '*': begin
1920+ flagp := WORST or SPSTART;
1921+ NonGreedyCh := (regparse + 1)^ = '?'; //###0.940
1922+ NonGreedyOp := NonGreedyCh or ((fCompModifiers and MaskModG) = 0); //###0.940
1923+ if (flags and SIMPLE) = 0 then begin
1924+ if NonGreedyOp //###0.940
1925+ then EmitComplexBraces (0, MaxBracesArg, NonGreedyOp)
1926+ else begin // Emit x* as (x&|), where & means "self".
1927+ InsertOperator (BRANCH, Result, REOpSz + RENextOffSz); // Either x
1928+ OpTail (Result, EmitNode (BACK)); // and loop
1929+ OpTail (Result, Result); // back
1930+ Tail (Result, EmitNode (BRANCH)); // or
1931+ Tail (Result, EmitNode (NOTHING)); // nil.
1932+ end
1933+ end
1934+ else begin // Simple
1935+ if NonGreedyOp //###0.940
1936+ then TheOp := STARNG
1937+ else TheOp := STAR;
1938+ InsertOperator (TheOp, Result, REOpSz + RENextOffSz);
1939+ end;
1940+ if NonGreedyCh //###0.940
1941+ then inc (regparse); // Skip extra char ('?')
1942+ end; { of case '*'}
1943+ '+': begin
1944+ flagp := WORST or SPSTART or HASWIDTH;
1945+ NonGreedyCh := (regparse + 1)^ = '?'; //###0.940
1946+ NonGreedyOp := NonGreedyCh or ((fCompModifiers and MaskModG) = 0); //###0.940
1947+ if (flags and SIMPLE) = 0 then begin
1948+ if NonGreedyOp //###0.940
1949+ then EmitComplexBraces (1, MaxBracesArg, NonGreedyOp)
1950+ else begin // Emit x+ as x(&|), where & means "self".
1951+ NextNode := EmitNode (BRANCH); // Either
1952+ Tail (Result, NextNode);
1953+ Tail (EmitNode (BACK), Result); // loop back
1954+ Tail (NextNode, EmitNode (BRANCH)); // or
1955+ Tail (Result, EmitNode (NOTHING)); // nil.
1956+ end
1957+ end
1958+ else begin // Simple
1959+ if NonGreedyOp //###0.940
1960+ then TheOp := PLUSNG
1961+ else TheOp := PLUS;
1962+ InsertOperator (TheOp, Result, REOpSz + RENextOffSz);
1963+ end;
1964+ if NonGreedyCh //###0.940
1965+ then inc (regparse); // Skip extra char ('?')
1966+ end; { of case '+'}
1967+ '?': begin
1968+ flagp := WORST;
1969+ NonGreedyCh := (regparse + 1)^ = '?'; //###0.940
1970+ NonGreedyOp := NonGreedyCh or ((fCompModifiers and MaskModG) = 0); //###0.940
1971+ if NonGreedyOp then begin //###0.940 // We emit x?? as x{0,1}?
1972+ if (flags and SIMPLE) = 0
1973+ then EmitComplexBraces (0, 1, NonGreedyOp)
1974+ else EmitSimpleBraces (0, 1, NonGreedyOp);
1975+ end
1976+ else begin // greedy '?'
1977+ InsertOperator (BRANCH, Result, REOpSz + RENextOffSz); // Either x
1978+ Tail (Result, EmitNode (BRANCH)); // or
1979+ NextNode := EmitNode (NOTHING); // nil.
1980+ Tail (Result, NextNode);
1981+ OpTail (Result, NextNode);
1982+ end;
1983+ if NonGreedyCh //###0.940
1984+ then inc (regparse); // Skip extra char ('?')
1985+ end; { of case '?'}
1986+ '{': begin
1987+ savedparse := regparse;
1988+ // !!!!!!!!!!!!
1989+ // Filip Jirsak's note - what will happen, when we are at the end of regparse?
1990+ inc (regparse);
1991+ p := regparse;
1992+ while Pos (regparse^, '0123456789') > 0 // <min> MUST appear
1993+ do inc (regparse);
1994+ if (regparse^ <> '}') and (regparse^ <> ',') or (p = regparse) then begin
1995+ regparse := savedparse;
1996+ flagp := flags;
1997+ EXIT;
1998+ end;
1999+ BracesMin := parsenum (p, regparse - 1);
2000+ if regparse^ = ',' then begin
2001+ inc (regparse);
2002+ p := regparse;
2003+ while Pos (regparse^, '0123456789') > 0
2004+ do inc (regparse);
2005+ if regparse^ <> '}' then begin
2006+ regparse := savedparse;
2007+ EXIT;
2008+ end;
2009+ if p = regparse
2010+ then BracesMax := MaxBracesArg
2011+ else BracesMax := parsenum (p, regparse - 1);
2012+ end
2013+ else BracesMax := BracesMin; // {n} == {n,n}
2014+ if BracesMin > BracesMax then begin
2015+ Error (reeBracesMinParamGreaterMax);
2016+ EXIT;
2017+ end;
2018+ if BracesMin > 0
2019+ then flagp := WORST;
2020+ if BracesMax > 0
2021+ then flagp := flagp or HASWIDTH or SPSTART;
2022+
2023+ NonGreedyCh := (regparse + 1)^ = '?'; //###0.940
2024+ NonGreedyOp := NonGreedyCh or ((fCompModifiers and MaskModG) = 0); //###0.940
2025+ if (flags and SIMPLE) <> 0
2026+ then EmitSimpleBraces (BracesMin, BracesMax, NonGreedyOp)
2027+ else EmitComplexBraces (BracesMin, BracesMax, NonGreedyOp);
2028+ if NonGreedyCh //###0.940
2029+ then inc (regparse); // Skip extra char '?'
2030+ end; { of case '{'}
2031+// else // here we can't be
2032+ end; { of case op}
2033+
2034+ inc (regparse);
2035+ if (regparse^ = '*') or (regparse^ = '+') or (regparse^ = '?') or (regparse^ = '{') then begin
2036+ Error (reeNestedSQP);
2037+ EXIT;
2038+ end;
2039+ end; { of function TRegExpr.ParsePiece
2040+--------------------------------------------------------------}
2041+
2042+function TRegExpr.ParseAtom (var flagp : integer) : PRegExprChar;
2043+// the lowest level
2044+// Optimization: gobbles an entire sequence of ordinary characters so that
2045+// it can turn them into a single node, which is smaller to store and
2046+// faster to run. Backslashed characters are exceptions, each becoming a
2047+// separate node; the code is simpler that way and it's not worth fixing.
2048+ var
2049+ ret : PRegExprChar;
2050+ flags : integer;
2051+ RangeBeg, RangeEnd : REChar;
2052+ CanBeRange : boolean;
2053+ len : integer;
2054+ ender : REChar;
2055+ begmodfs : PRegExprChar;
2056+
2057+ {$IFDEF UseSetOfChar} //###0.930
2058+ RangePCodeBeg : PRegExprChar;
2059+ RangePCodeIdx : integer;
2060+ RangeIsCI : boolean;
2061+ RangeSet : TSetOfREChar;
2062+ RangeLen : integer;
2063+ RangeChMin, RangeChMax : REChar;
2064+ {$ENDIF}
2065+
2066+ procedure EmitExactly (ch : REChar);
2067+ begin
2068+ if (fCompModifiers and MaskModI) <> 0
2069+ then ret := EmitNode (EXACTLYCI)
2070+ else ret := EmitNode (EXACTLY);
2071+ EmitC (ch);
2072+ EmitC (#0);
2073+ flagp := flagp or HASWIDTH or SIMPLE;
2074+ end;
2075+
2076+ procedure EmitStr (const s : RegExprString);
2077+ var i : integer;
2078+ begin
2079+ for i := 1 to length (s)
2080+ do EmitC (s [i]);
2081+ end;
2082+
2083+ function HexDig (ch : REChar) : integer;
2084+ begin
2085+ Result := 0;
2086+ if (ch >= 'a') and (ch <= 'f')
2087+ then ch := REChar (ord (ch) - (ord ('a') - ord ('A')));
2088+ if (ch < '0') or (ch > 'F') or ((ch > '9') and (ch < 'A')) then begin
2089+ Error (reeBadHexDigit);
2090+ EXIT;
2091+ end;
2092+ Result := ord (ch) - ord ('0');
2093+ if ch >= 'A'
2094+ then Result := Result - (ord ('A') - ord ('9') - 1);
2095+ end;
2096+
2097+ function EmitRange (AOpCode : REChar) : PRegExprChar;
2098+ begin
2099+ {$IFDEF UseSetOfChar}
2100+ case AOpCode of
2101+ ANYBUTCI, ANYBUT:
2102+ Result := EmitNode (ANYBUTTINYSET);
2103+ else // ANYOFCI, ANYOF
2104+ Result := EmitNode (ANYOFTINYSET);
2105+ end;
2106+ case AOpCode of
2107+ ANYBUTCI, ANYOFCI:
2108+ RangeIsCI := True;
2109+ else // ANYBUT, ANYOF
2110+ RangeIsCI := False;
2111+ end;
2112+ RangePCodeBeg := regcode;
2113+ RangePCodeIdx := regsize;
2114+ RangeLen := 0;
2115+ RangeSet := [];
2116+ RangeChMin := #255;
2117+ RangeChMax := #0;
2118+ {$ELSE}
2119+ Result := EmitNode (AOpCode);
2120+ // ToDo:
2121+ // !!!!!!!!!!!!! Implement ANYOF[BUT]TINYSET generation for UniCode !!!!!!!!!!
2122+ {$ENDIF}
2123+ end;
2124+
2125+{$IFDEF UseSetOfChar}
2126+ procedure EmitRangeCPrim (b : REChar); //###0.930
2127+ begin
2128+ if b in RangeSet
2129+ then EXIT;
2130+ inc (RangeLen);
2131+ if b < RangeChMin
2132+ then RangeChMin := b;
2133+ if b > RangeChMax
2134+ then RangeChMax := b;
2135+ Include (RangeSet, b);
2136+ end;
2137+ {$ENDIF}
2138+
2139+ procedure EmitRangeC (b : REChar);
2140+ {$IFDEF UseSetOfChar}
2141+ var
2142+ Ch : REChar;
2143+ {$ENDIF}
2144+ begin
2145+ CanBeRange := false;
2146+ {$IFDEF UseSetOfChar}
2147+ if b <> #0 then begin
2148+ EmitRangeCPrim (b); //###0.930
2149+ if RangeIsCI
2150+ then EmitRangeCPrim (InvertCase (b)); //###0.930
2151+ end
2152+ else begin
2153+ {$IFDEF UseAsserts}
2154+ Assert (RangeLen > 0, 'TRegExpr.ParseAtom(subroutine EmitRangeC): empty range'); // impossible, but who knows..
2155+ Assert (RangeChMin <= RangeChMax, 'TRegExpr.ParseAtom(subroutine EmitRangeC): RangeChMin > RangeChMax'); // impossible, but who knows..
2156+ {$ENDIF}
2157+ if RangeLen <= TinySetLen then begin // emit "tiny set"
2158+ if regcode = @regdummy then begin
2159+ regsize := RangePCodeIdx + TinySetLen; // RangeChMin/Max !!!
2160+ EXIT;
2161+ end;
2162+ regcode := RangePCodeBeg;
2163+ for Ch := RangeChMin to RangeChMax do //###0.930
2164+ if Ch in RangeSet then begin
2165+ regcode^ := Ch;
2166+ inc (regcode);
2167+ end;
2168+ // fill rest:
2169+ while regcode < RangePCodeBeg + TinySetLen do begin
2170+ regcode^ := RangeChMax;
2171+ inc (regcode);
2172+ end;
2173+ end
2174+ else begin
2175+ if regcode = @regdummy then begin
2176+ regsize := RangePCodeIdx + SizeOf (TSetOfREChar);
2177+ EXIT;
2178+ end;
2179+ if (RangePCodeBeg - REOpSz - RENextOffSz)^ = ANYBUTTINYSET
2180+ then RangeSet := [#0 .. #255] - RangeSet;
2181+ PREOp (RangePCodeBeg - REOpSz - RENextOffSz)^ := ANYOFFULLSET;
2182+ regcode := RangePCodeBeg;
2183+ Move (RangeSet, regcode^, SizeOf (TSetOfREChar));
2184+ inc (regcode, SizeOf (TSetOfREChar));
2185+ end;
2186+ end;
2187+ {$ELSE}
2188+ EmitC (b);
2189+ {$ENDIF}
2190+ end;
2191+
2192+ procedure EmitSimpleRangeC (b : REChar);
2193+ begin
2194+ RangeBeg := b;
2195+ EmitRangeC (b);
2196+ CanBeRange := true;
2197+ end;
2198+
2199+ procedure EmitRangeStr (const s : RegExprString);
2200+ var i : integer;
2201+ begin
2202+ for i := 1 to length (s)
2203+ do EmitRangeC (s [i]);
2204+ end;
2205+
2206+ function UnQuoteChar (var APtr : PRegExprChar) : REChar; //###0.934
2207+ begin
2208+ case APtr^ of
2209+ 't': Result := #$9; // tab (HT/TAB)
2210+ 'n': Result := #$a; // newline (NL)
2211+ 'r': Result := #$d; // car.return (CR)
2212+ 'f': Result := #$c; // form feed (FF)
2213+ 'a': Result := #$7; // alarm (bell) (BEL)
2214+ 'e': Result := #$1b; // escape (ESC)
2215+ 'x': begin // hex char
2216+ Result := #0;
2217+ inc (APtr);
2218+ if APtr^ = #0 then begin
2219+ Error (reeNoHexCodeAfterBSlashX);
2220+ EXIT;
2221+ end;
2222+ if APtr^ = '{' then begin // \x{nnnn} //###0.936
2223+ REPEAT
2224+ inc (APtr);
2225+ if APtr^ = #0 then begin
2226+ Error (reeNoHexCodeAfterBSlashX);
2227+ EXIT;
2228+ end;
2229+ if APtr^ <> '}' then begin
2230+ if (Ord (Result)
2231+ ShR (SizeOf (REChar) * 8 - 4)) and $F <> 0 then begin
2232+ Error (reeHexCodeAfterBSlashXTooBig);
2233+ EXIT;
2234+ end;
2235+ Result := REChar ((Ord (Result) ShL 4) or HexDig (APtr^));
2236+ // HexDig will cause Error if bad hex digit found
2237+ end
2238+ else BREAK;
2239+ UNTIL False;
2240+ end
2241+ else begin
2242+ Result := REChar (HexDig (APtr^));
2243+ // HexDig will cause Error if bad hex digit found
2244+ inc (APtr);
2245+ if APtr^ = #0 then begin
2246+ Error (reeNoHexCodeAfterBSlashX);
2247+ EXIT;
2248+ end;
2249+ Result := REChar ((Ord (Result) ShL 4) or HexDig (APtr^));
2250+ // HexDig will cause Error if bad hex digit found
2251+ end;
2252+ end;
2253+ else Result := APtr^;
2254+ end;
2255+ end;
2256+
2257+ begin
2258+ Result := nil;
2259+ flagp := WORST; // Tentatively.
2260+
2261+ inc (regparse);
2262+ case (regparse - 1)^ of
2263+ '^': if ((fCompModifiers and MaskModM) = 0)
2264+ or ((fLineSeparators = '') and not fLinePairedSeparatorAssigned)
2265+ then ret := EmitNode (BOL)
2266+ else ret := EmitNode (BOLML);
2267+ '$': if ((fCompModifiers and MaskModM) = 0)
2268+ or ((fLineSeparators = '') and not fLinePairedSeparatorAssigned)
2269+ then ret := EmitNode (EOL)
2270+ else ret := EmitNode (EOLML);
2271+ '.':
2272+ if (fCompModifiers and MaskModS) <> 0 then begin
2273+ ret := EmitNode (ANY);
2274+ flagp := flagp or HASWIDTH or SIMPLE;
2275+ end
2276+ else begin // not /s, so emit [^:LineSeparators:]
2277+ ret := EmitNode (ANYML);
2278+ flagp := flagp or HASWIDTH; // not so simple ;)
2279+// ret := EmitRange (ANYBUT);
2280+// EmitRangeStr (LineSeparators); //###0.941
2281+// EmitRangeStr (LinePairedSeparator); // !!! isn't correct if have to accept only paired
2282+// EmitRangeC (#0);
2283+// flagp := flagp or HASWIDTH or SIMPLE;
2284+ end;
2285+ '[': begin
2286+ if regparse^ = '^' then begin // Complement of range.
2287+ if (fCompModifiers and MaskModI) <> 0
2288+ then ret := EmitRange (ANYBUTCI)
2289+ else ret := EmitRange (ANYBUT);
2290+ inc (regparse);
2291+ end
2292+ else
2293+ if (fCompModifiers and MaskModI) <> 0
2294+ then ret := EmitRange (ANYOFCI)
2295+ else ret := EmitRange (ANYOF);
2296+
2297+ CanBeRange := false;
2298+
2299+ if (regparse^ = ']') then begin
2300+ EmitSimpleRangeC (regparse^); // []-a] -> ']' .. 'a'
2301+ inc (regparse);
2302+ end;
2303+
2304+ while (regparse^ <> #0) and (regparse^ <> ']') do begin
2305+ if (regparse^ = '-')
2306+ and ((regparse + 1)^ <> #0) and ((regparse + 1)^ <> ']')
2307+ and CanBeRange then begin
2308+ inc (regparse);
2309+ RangeEnd := regparse^;
2310+ if RangeEnd = EscChar then begin
2311+ {$IFDEF UniCode} //###0.935
2312+ if (ord ((regparse + 1)^) < 256)
2313+ and (CharInSet(char ((regparse + 1)^), ['d', 'D', 's', 'S', 'w', 'W'])) then begin
2314+ {$ELSE}
2315+ if (regparse + 1)^ in ['d', 'D', 's', 'S', 'w', 'W'] then begin
2316+ {$ENDIF}
2317+ EmitRangeC ('-'); // or treat as error ?!!
2318+ CONTINUE;
2319+ end;
2320+ inc (regparse);
2321+ RangeEnd := UnQuoteChar (regparse);
2322+ end;
2323+
2324+ // r.e.ranges extension for russian
2325+ if ((fCompModifiers and MaskModR) <> 0)
2326+ and (RangeBeg = RusRangeLoLow) and (RangeEnd = RusRangeLoHigh) then begin
2327+ EmitRangeStr (RusRangeLo);
2328+ end
2329+ else if ((fCompModifiers and MaskModR) <> 0)
2330+ and (RangeBeg = RusRangeHiLow) and (RangeEnd = RusRangeHiHigh) then begin
2331+ EmitRangeStr (RusRangeHi);
2332+ end
2333+ else if ((fCompModifiers and MaskModR) <> 0)
2334+ and (RangeBeg = RusRangeLoLow) and (RangeEnd = RusRangeHiHigh) then begin
2335+ EmitRangeStr (RusRangeLo);
2336+ EmitRangeStr (RusRangeHi);
2337+ end
2338+ else begin // standard r.e. handling
2339+ if RangeBeg > RangeEnd then begin
2340+ Error (reeInvalidRange);
2341+ EXIT;
2342+ end;
2343+ inc (RangeBeg);
2344+ EmitRangeC (RangeEnd); // prevent infinite loop if RangeEnd=$ff
2345+ while RangeBeg < RangeEnd do begin //###0.929
2346+ EmitRangeC (RangeBeg);
2347+ inc (RangeBeg);
2348+ end;
2349+ end;
2350+ inc (regparse);
2351+ end
2352+ else begin
2353+ if regparse^ = EscChar then begin
2354+ inc (regparse);
2355+ if regparse^ = #0 then begin
2356+ Error (reeParseAtomTrailingBackSlash);
2357+ EXIT;
2358+ end;
2359+ case regparse^ of // r.e.extensions
2360+ 'd': EmitRangeStr ('0123456789');
2361+ 'w': EmitRangeStr (WordChars);
2362+ 's': EmitRangeStr (SpaceChars);
2363+ else EmitSimpleRangeC (UnQuoteChar (regparse));
2364+ end; { of case}
2365+ end
2366+ else EmitSimpleRangeC (regparse^);
2367+ inc (regparse);
2368+ end;
2369+ end; { of while}
2370+ EmitRangeC (#0);
2371+ if regparse^ <> ']' then begin
2372+ Error (reeUnmatchedSqBrackets);
2373+ EXIT;
2374+ end;
2375+ inc (regparse);
2376+ flagp := flagp or HASWIDTH or SIMPLE;
2377+ end;
2378+ '(': begin
2379+ if regparse^ = '?' then begin
2380+ // check for extended Perl syntax : (?..)
2381+ if (regparse + 1)^ = '#' then begin // (?#comment)
2382+ inc (regparse, 2); // find closing ')'
2383+ while (regparse^ <> #0) and (regparse^ <> ')')
2384+ do inc (regparse);
2385+ if regparse^ <> ')' then begin
2386+ Error (reeUnclosedComment);
2387+ EXIT;
2388+ end;
2389+ inc (regparse); // skip ')'
2390+ ret := EmitNode (COMMENT); // comment
2391+ end
2392+ else begin // modifiers ?
2393+ inc (regparse); // skip '?'
2394+ begmodfs := regparse;
2395+ while (regparse^ <> #0) and (regparse^ <> ')')
2396+ do inc (regparse);
2397+ if (regparse^ <> ')')
2398+ or not ParseModifiersStr (copy (begmodfs, 1, (regparse - begmodfs)), fCompModifiers) then begin
2399+ Error (reeUrecognizedModifier);
2400+ EXIT;
2401+ end;
2402+ inc (regparse); // skip ')'
2403+ ret := EmitNode (COMMENT); // comment
2404+// Error (reeQPSBFollowsNothing);
2405+// EXIT;
2406+ end;
2407+ end
2408+ else begin
2409+ ret := ParseReg (1, flags);
2410+ if ret = nil then begin
2411+ Result := nil;
2412+ EXIT;
2413+ end;
2414+ flagp := flagp or flags and (HASWIDTH or SPSTART);
2415+ end;
2416+ end;
2417+ #0, '|', ')': begin // Supposed to be caught earlier.
2418+ Error (reeInternalUrp);
2419+ EXIT;
2420+ end;
2421+ '?', '+', '*': begin
2422+ Error (reeQPSBFollowsNothing);
2423+ EXIT;
2424+ end;
2425+ EscChar: begin
2426+ if regparse^ = #0 then begin
2427+ Error (reeTrailingBackSlash);
2428+ EXIT;
2429+ end;
2430+ case regparse^ of // r.e.extensions
2431+ 'b': ret := EmitNode (BOUND); //###0.943
2432+ 'B': ret := EmitNode (NOTBOUND); //###0.943
2433+ 'A': ret := EmitNode (BOL); //###0.941
2434+ 'Z': ret := EmitNode (EOL); //###0.941
2435+ 'd': begin // r.e.extension - any digit ('0' .. '9')
2436+ ret := EmitNode (ANYDIGIT);
2437+ flagp := flagp or HASWIDTH or SIMPLE;
2438+ end;
2439+ 'D': begin // r.e.extension - not digit ('0' .. '9')
2440+ ret := EmitNode (NOTDIGIT);
2441+ flagp := flagp or HASWIDTH or SIMPLE;
2442+ end;
2443+ 's': begin // r.e.extension - any space char
2444+ {$IFDEF UseSetOfChar}
2445+ ret := EmitRange (ANYOF);
2446+ EmitRangeStr (SpaceChars);
2447+ EmitRangeC (#0);
2448+ {$ELSE}
2449+ ret := EmitNode (ANYSPACE);
2450+ {$ENDIF}
2451+ flagp := flagp or HASWIDTH or SIMPLE;
2452+ end;
2453+ 'S': begin // r.e.extension - not space char
2454+ {$IFDEF UseSetOfChar}
2455+ ret := EmitRange (ANYBUT);
2456+ EmitRangeStr (SpaceChars);
2457+ EmitRangeC (#0);
2458+ {$ELSE}
2459+ ret := EmitNode (NOTSPACE);
2460+ {$ENDIF}
2461+ flagp := flagp or HASWIDTH or SIMPLE;
2462+ end;
2463+ 'w': begin // r.e.extension - any english char / digit / '_'
2464+ {$IFDEF UseSetOfChar}
2465+ ret := EmitRange (ANYOF);
2466+ EmitRangeStr (WordChars);
2467+ EmitRangeC (#0);
2468+ {$ELSE}
2469+ ret := EmitNode (ANYLETTER);
2470+ {$ENDIF}
2471+ flagp := flagp or HASWIDTH or SIMPLE;
2472+ end;
2473+ 'W': begin // r.e.extension - not english char / digit / '_'
2474+ {$IFDEF UseSetOfChar}
2475+ ret := EmitRange (ANYBUT);
2476+ EmitRangeStr (WordChars);
2477+ EmitRangeC (#0);
2478+ {$ELSE}
2479+ ret := EmitNode (NOTLETTER);
2480+ {$ENDIF}
2481+ flagp := flagp or HASWIDTH or SIMPLE;
2482+ end;
2483+ '1' .. '9': begin //###0.936
2484+ if (fCompModifiers and MaskModI) <> 0
2485+ then ret := EmitNode (BSUBEXPCI)
2486+ else ret := EmitNode (BSUBEXP);
2487+ EmitC (REChar (ord (regparse^) - ord ('0')));
2488+ flagp := flagp or HASWIDTH or SIMPLE;
2489+ end;
2490+ else EmitExactly (UnQuoteChar (regparse));
2491+ end; { of case}
2492+ inc (regparse);
2493+ end;
2494+ else begin
2495+ dec (regparse);
2496+ if ((fCompModifiers and MaskModX) <> 0) and // check for eXtended syntax
2497+ ((regparse^ = '#')
2498+ or ({$IFDEF UniCode}StrScan (XIgnoredChars, regparse^) <> nil //###0.947
2499+ {$ELSE}regparse^ in XIgnoredChars{$ENDIF})) then begin //###0.941 \x
2500+ if regparse^ = '#' then begin // Skip eXtended comment
2501+ // find comment terminator (group of \n and/or \r)
2502+ while (regparse^ <> #0) and (regparse^ <> #$d) and (regparse^ <> #$a)
2503+ do inc (regparse);
2504+ while (regparse^ = #$d) or (regparse^ = #$a) // skip comment terminator
2505+ do inc (regparse); // attempt to support different type of line separators
2506+ end
2507+ else begin // Skip the blanks!
2508+ while {$IFDEF UniCode}StrScan (XIgnoredChars, regparse^) <> nil //###0.947
2509+ {$ELSE}regparse^ in XIgnoredChars{$ENDIF}
2510+ do inc (regparse);
2511+ end;
2512+ ret := EmitNode (COMMENT); // comment
2513+ end
2514+ else begin
2515+ len := strcspn (regparse, META);
2516+ if len <= 0 then
2517+ if regparse^ <> '{' then begin
2518+ Error (reeRarseAtomInternalDisaster);
2519+ EXIT;
2520+ end
2521+ else len := strcspn (regparse + 1, META) + 1; // bad {n,m} - compile as EXATLY
2522+ ender := (regparse + len)^;
2523+ if (len > 1)
2524+ and ((ender = '*') or (ender = '+') or (ender = '?') or (ender = '{'))
2525+ then dec (len); // Back off clear of ?+*{ operand.
2526+ flagp := flagp or HASWIDTH;
2527+ if len = 1
2528+ then flagp := flagp or SIMPLE;
2529+ if (fCompModifiers and MaskModI) <> 0
2530+ then ret := EmitNode (EXACTLYCI)
2531+ else ret := EmitNode (EXACTLY);
2532+ while (len > 0)
2533+ and (((fCompModifiers and MaskModX) = 0) or (regparse^ <> '#')) do begin
2534+ if ((fCompModifiers and MaskModX) = 0) or not ( //###0.941
2535+ {$IFDEF UniCode}StrScan (XIgnoredChars, regparse^) <> nil //###0.947
2536+ {$ELSE}regparse^ in XIgnoredChars{$ENDIF} )
2537+ then EmitC (regparse^);
2538+ inc (regparse);
2539+ dec (len);
2540+ end;
2541+ EmitC (#0);
2542+ end; { of if not comment}
2543+ end; { of case else}
2544+ end; { of case}
2545+
2546+ Result := ret;
2547+ end; { of function TRegExpr.ParseAtom
2548+--------------------------------------------------------------}
2549+
2550+function TRegExpr.GetCompilerErrorPos : integer;
2551+ begin
2552+ Result := 0;
2553+ if (regexpbeg = nil) or (regparse = nil)
2554+ then EXIT; // not in compiling mode ?
2555+ Result := regparse - regexpbeg;
2556+ end; { of function TRegExpr.GetCompilerErrorPos
2557+--------------------------------------------------------------}
2558+
2559+
2560+{=============================================================}
2561+{===================== Matching section ======================}
2562+{=============================================================}
2563+
2564+{$IFNDEF UseSetOfChar}
2565+function TRegExpr.StrScanCI (s : PRegExprChar; ch : REChar) : PRegExprChar; //###0.928 - now method of TRegExpr
2566+ begin
2567+ while (s^ <> #0) and (s^ <> ch) and (s^ <> InvertCase (ch))
2568+ do inc (s);
2569+ if s^ <> #0
2570+ then Result := s
2571+ else Result := nil;
2572+ end; { of function TRegExpr.StrScanCI
2573+--------------------------------------------------------------}
2574+{$ENDIF}
2575+
2576+function TRegExpr.regrepeat (p : PRegExprChar; AMax : integer) : integer;
2577+// repeatedly match something simple, report how many
2578+ var
2579+ scan : PRegExprChar;
2580+ opnd : PRegExprChar;
2581+ TheMax : integer;
2582+ {Ch,} InvCh : REChar; //###0.931
2583+ sestart, seend : PRegExprChar; //###0.936
2584+ begin
2585+ Result := 0;
2586+ scan := reginput;
2587+ opnd := p + REOpSz + RENextOffSz; //OPERAND
2588+ TheMax := fInputEnd - scan;
2589+ if TheMax > AMax
2590+ then TheMax := AMax;
2591+ case PREOp (p)^ of
2592+ ANY: begin
2593+ // note - ANYML cannot be proceeded in regrepeat because can skip
2594+ // more than one char at once
2595+ Result := TheMax;
2596+ inc (scan, Result);
2597+ end;
2598+ EXACTLY: begin // in opnd can be only ONE char !!!
2599+// Ch := opnd^; // store in register //###0.931
2600+ while (Result < TheMax) and (opnd^ = scan^) do begin
2601+ inc (Result);
2602+ inc (scan);
2603+ end;
2604+ end;
2605+ EXACTLYCI: begin // in opnd can be only ONE char !!!
2606+// Ch := opnd^; // store in register //###0.931
2607+ while (Result < TheMax) and (opnd^ = scan^) do begin // prevent unneeded InvertCase //###0.931
2608+ inc (Result);
2609+ inc (scan);
2610+ end;
2611+ if Result < TheMax then begin //###0.931
2612+ InvCh := InvertCase (opnd^); // store in register
2613+ while (Result < TheMax) and
2614+ ((opnd^ = scan^) or (InvCh = scan^)) do begin
2615+ inc (Result);
2616+ inc (scan);
2617+ end;
2618+ end;
2619+ end;
2620+ BSUBEXP: begin //###0.936
2621+ sestart := startp [ord (opnd^)];
2622+ if sestart = nil
2623+ then EXIT;
2624+ seend := endp [ord (opnd^)];
2625+ if seend = nil
2626+ then EXIT;
2627+ REPEAT
2628+ opnd := sestart;
2629+ while opnd < seend do begin
2630+ if (scan >= fInputEnd) or (scan^ <> opnd^)
2631+ then EXIT;
2632+ inc (scan);
2633+ inc (opnd);
2634+ end;
2635+ inc (Result);
2636+ reginput := scan;
2637+ UNTIL Result >= AMax;
2638+ end;
2639+ BSUBEXPCI: begin //###0.936
2640+ sestart := startp [ord (opnd^)];
2641+ if sestart = nil
2642+ then EXIT;
2643+ seend := endp [ord (opnd^)];
2644+ if seend = nil
2645+ then EXIT;
2646+ REPEAT
2647+ opnd := sestart;
2648+ while opnd < seend do begin
2649+ if (scan >= fInputEnd) or
2650+ ((scan^ <> opnd^) and (scan^ <> InvertCase (opnd^)))
2651+ then EXIT;
2652+ inc (scan);
2653+ inc (opnd);
2654+ end;
2655+ inc (Result);
2656+ reginput := scan;
2657+ UNTIL Result >= AMax;
2658+ end;
2659+ ANYDIGIT:
2660+ while (Result < TheMax) and
2661+ (scan^ >= '0') and (scan^ <= '9') do begin
2662+ inc (Result);
2663+ inc (scan);
2664+ end;
2665+ NOTDIGIT:
2666+ while (Result < TheMax) and
2667+ ((scan^ < '0') or (scan^ > '9')) do begin
2668+ inc (Result);
2669+ inc (scan);
2670+ end;
2671+ {$IFNDEF UseSetOfChar} //###0.929
2672+ ANYLETTER:
2673+ while (Result < TheMax) and
2674+ (Pos (scan^, fWordChars) > 0) //###0.940
2675+ { ((scan^ >= 'a') and (scan^ <= 'z') !! I've forgotten (>='0') and (<='9')
2676+ or (scan^ >= 'A') and (scan^ <= 'Z') or (scan^ = '_'))} do begin
2677+ inc (Result);
2678+ inc (scan);
2679+ end;
2680+ NOTLETTER:
2681+ while (Result < TheMax) and
2682+ (Pos (scan^, fWordChars) <= 0) //###0.940
2683+ { not ((scan^ >= 'a') and (scan^ <= 'z') !! I've forgotten (>='0') and (<='9')
2684+ or (scan^ >= 'A') and (scan^ <= 'Z')
2685+ or (scan^ = '_'))} do begin
2686+ inc (Result);
2687+ inc (scan);
2688+ end;
2689+ ANYSPACE:
2690+ while (Result < TheMax) and
2691+ (Pos (scan^, fSpaceChars) > 0) do begin
2692+ inc (Result);
2693+ inc (scan);
2694+ end;
2695+ NOTSPACE:
2696+ while (Result < TheMax) and
2697+ (Pos (scan^, fSpaceChars) <= 0) do begin
2698+ inc (Result);
2699+ inc (scan);
2700+ end;
2701+ {$ENDIF}
2702+ ANYOFTINYSET: begin
2703+ while (Result < TheMax) and //!!!TinySet
2704+ ((scan^ = opnd^) or (scan^ = (opnd + 1)^)
2705+ or (scan^ = (opnd + 2)^)) do begin
2706+ inc (Result);
2707+ inc (scan);
2708+ end;
2709+ end;
2710+ ANYBUTTINYSET: begin
2711+ while (Result < TheMax) and //!!!TinySet
2712+ (scan^ <> opnd^) and (scan^ <> (opnd + 1)^)
2713+ and (scan^ <> (opnd + 2)^) do begin
2714+ inc (Result);
2715+ inc (scan);
2716+ end;
2717+ end;
2718+ {$IFDEF UseSetOfChar} //###0.929
2719+ ANYOFFULLSET: begin
2720+ while (Result < TheMax) and
2721+ (scan^ in PSetOfREChar (opnd)^) do begin
2722+ inc (Result);
2723+ inc (scan);
2724+ end;
2725+ end;
2726+ {$ELSE}
2727+ ANYOF:
2728+ while (Result < TheMax) and
2729+ (StrScan (opnd, scan^) <> nil) do begin
2730+ inc (Result);
2731+ inc (scan);
2732+ end;
2733+ ANYBUT:
2734+ while (Result < TheMax) and
2735+ (StrScan (opnd, scan^) = nil) do begin
2736+ inc (Result);
2737+ inc (scan);
2738+ end;
2739+ ANYOFCI:
2740+ while (Result < TheMax) and (StrScanCI (opnd, scan^) <> nil) do begin
2741+ inc (Result);
2742+ inc (scan);
2743+ end;
2744+ ANYBUTCI:
2745+ while (Result < TheMax) and (StrScanCI (opnd, scan^) = nil) do begin
2746+ inc (Result);
2747+ inc (scan);
2748+ end;
2749+ {$ENDIF}
2750+ else begin // Oh dear. Called inappropriately.
2751+ Result := 0; // Best compromise.
2752+ Error (reeRegRepeatCalledInappropriately);
2753+ EXIT;
2754+ end;
2755+ end; { of case}
2756+ reginput := scan;
2757+ end; { of function TRegExpr.regrepeat
2758+--------------------------------------------------------------}
2759+
2760+function TRegExpr.regnext (p : PRegExprChar) : PRegExprChar;
2761+// dig the "next" pointer out of a node
2762+ var offset : TRENextOff;
2763+ begin
2764+ if p = @regdummy then begin
2765+ Result := nil;
2766+ EXIT;
2767+ end;
2768+ offset := PRENextOff (p + REOpSz)^; //###0.933 inlined NEXT
2769+ if offset = 0
2770+ then Result := nil
2771+ else Result := p + offset;
2772+ end; { of function TRegExpr.regnext
2773+--------------------------------------------------------------}
2774+
2775+function TRegExpr.MatchPrim (prog : PRegExprChar) : boolean;
2776+// recursively matching routine
2777+// Conceptually the strategy is simple: check to see whether the current
2778+// node matches, call self recursively to see whether the rest matches,
2779+// and then act accordingly. In practice we make some effort to avoid
2780+// recursion, in particular by going through "ordinary" nodes (that don't
2781+// need to know whether the rest of the match failed) by a loop instead of
2782+// by recursion.
2783+ var
2784+ scan : PRegExprChar; // Current node.
2785+ next : PRegExprChar; // Next node.
2786+ len : integer;
2787+ opnd : PRegExprChar;
2788+ no : integer;
2789+ save : PRegExprChar;
2790+ nextch : REChar;
2791+ BracesMin, BracesMax : integer; // we use integer instead of TREBracesArg for better support */+
2792+ {$IFDEF ComplexBraces}
2793+ SavedLoopStack : array [1 .. LoopStackMax] of integer; // :(( very bad for recursion
2794+ SavedLoopStackIdx : integer; //###0.925
2795+ {$ENDIF}
2796+ begin
2797+ Result := false;
2798+ scan := prog;
2799+
2800+ while scan <> nil do begin
2801+ len := PRENextOff (scan + 1)^; //###0.932 inlined regnext
2802+ if len = 0
2803+ then next := nil
2804+ else next := scan + len;
2805+
2806+ case scan^ of
2807+ NOTBOUND, //###0.943 //!!! think about UseSetOfChar !!!
2808+ BOUND:
2809+ if (scan^ = BOUND)
2810+ xor (
2811+ ((reginput = fInputStart) or (Pos ((reginput - 1)^, fWordChars) <= 0))
2812+ and (reginput^ <> #0) and (Pos (reginput^, fWordChars) > 0)
2813+ or
2814+ (reginput <> fInputStart) and (Pos ((reginput - 1)^, fWordChars) > 0)
2815+ and ((reginput^ = #0) or (Pos (reginput^, fWordChars) <= 0)))
2816+ then EXIT;
2817+
2818+ BOL: if reginput <> fInputStart
2819+ then EXIT;
2820+ EOL: if reginput^ <> #0
2821+ then EXIT;
2822+ BOLML: if reginput > fInputStart then begin
2823+ nextch := (reginput - 1)^;
2824+ if (nextch <> fLinePairedSeparatorTail)
2825+ or ((reginput - 1) <= fInputStart)
2826+ or ((reginput - 2)^ <> fLinePairedSeparatorHead)
2827+ then begin
2828+ if (nextch = fLinePairedSeparatorHead)
2829+ and (reginput^ = fLinePairedSeparatorTail)
2830+ then EXIT; // don't stop between paired separator
2831+ if
2832+ {$IFNDEF UniCode}
2833+ not (nextch in fLineSeparatorsSet)
2834+ {$ELSE}
2835+ (pos (nextch, fLineSeparators) <= 0)
2836+ {$ENDIF}
2837+ then EXIT;
2838+ end;
2839+ end;
2840+ EOLML: if reginput^ <> #0 then begin
2841+ nextch := reginput^;
2842+ if (nextch <> fLinePairedSeparatorHead)
2843+ or ((reginput + 1)^ <> fLinePairedSeparatorTail)
2844+ then begin
2845+ if (nextch = fLinePairedSeparatorTail)
2846+ and (reginput > fInputStart)
2847+ and ((reginput - 1)^ = fLinePairedSeparatorHead)
2848+ then EXIT; // don't stop between paired separator
2849+ if
2850+ {$IFNDEF UniCode}
2851+ not (nextch in fLineSeparatorsSet)
2852+ {$ELSE}
2853+ (pos (nextch, fLineSeparators) <= 0)
2854+ {$ENDIF}
2855+ then EXIT;
2856+ end;
2857+ end;
2858+ ANY: begin
2859+ if reginput^ = #0
2860+ then EXIT;
2861+ inc (reginput);
2862+ end;
2863+ ANYML: begin //###0.941
2864+ if (reginput^ = #0)
2865+ or ((reginput^ = fLinePairedSeparatorHead)
2866+ and ((reginput + 1)^ = fLinePairedSeparatorTail))
2867+ or {$IFNDEF UniCode} (reginput^ in fLineSeparatorsSet)
2868+ {$ELSE} (pos (reginput^, fLineSeparators) > 0) {$ENDIF}
2869+ then EXIT;
2870+ inc (reginput);
2871+ end;
2872+ ANYDIGIT: begin
2873+ if (reginput^ = #0) or (reginput^ < '0') or (reginput^ > '9')
2874+ then EXIT;
2875+ inc (reginput);
2876+ end;
2877+ NOTDIGIT: begin
2878+ if (reginput^ = #0) or ((reginput^ >= '0') and (reginput^ <= '9'))
2879+ then EXIT;
2880+ inc (reginput);
2881+ end;
2882+ {$IFNDEF UseSetOfChar} //###0.929
2883+ ANYLETTER: begin
2884+ if (reginput^ = #0) or (Pos (reginput^, fWordChars) <= 0) //###0.943
2885+ then EXIT;
2886+ inc (reginput);
2887+ end;
2888+ NOTLETTER: begin
2889+ if (reginput^ = #0) or (Pos (reginput^, fWordChars) > 0) //###0.943
2890+ then EXIT;
2891+ inc (reginput);
2892+ end;
2893+ ANYSPACE: begin
2894+ if (reginput^ = #0) or not (Pos (reginput^, fSpaceChars) > 0) //###0.943
2895+ then EXIT;
2896+ inc (reginput);
2897+ end;
2898+ NOTSPACE: begin
2899+ if (reginput^ = #0) or (Pos (reginput^, fSpaceChars) > 0) //###0.943
2900+ then EXIT;
2901+ inc (reginput);
2902+ end;
2903+ {$ENDIF}
2904+ EXACTLYCI: begin
2905+ opnd := scan + REOpSz + RENextOffSz; // OPERAND
2906+ // Inline the first character, for speed.
2907+ if (opnd^ <> reginput^)
2908+ and (InvertCase (opnd^) <> reginput^)
2909+ then EXIT;
2910+ len := strlen (opnd);
2911+ //###0.929 begin
2912+ no := len;
2913+ save := reginput;
2914+ while no > 1 do begin
2915+ inc (save);
2916+ inc (opnd);
2917+ if (opnd^ <> save^)
2918+ and (InvertCase (opnd^) <> save^)
2919+ then EXIT;
2920+ dec (no);
2921+ end;
2922+ //###0.929 end
2923+ inc (reginput, len);
2924+ end;
2925+ EXACTLY: begin
2926+ opnd := scan + REOpSz + RENextOffSz; // OPERAND
2927+ // Inline the first character, for speed.
2928+ if opnd^ <> reginput^
2929+ then EXIT;
2930+ len := strlen (opnd);
2931+ //###0.929 begin
2932+ no := len;
2933+ save := reginput;
2934+ while no > 1 do begin
2935+ inc (save);
2936+ inc (opnd);
2937+ if opnd^ <> save^
2938+ then EXIT;
2939+ dec (no);
2940+ end;
2941+ //###0.929 end
2942+ inc (reginput, len);
2943+ end;
2944+ BSUBEXP: begin //###0.936
2945+ no := ord ((scan + REOpSz + RENextOffSz)^);
2946+ if startp [no] = nil
2947+ then EXIT;
2948+ if endp [no] = nil
2949+ then EXIT;
2950+ save := reginput;
2951+ opnd := startp [no];
2952+ while opnd < endp [no] do begin
2953+ if (save >= fInputEnd) or (save^ <> opnd^)
2954+ then EXIT;
2955+ inc (save);
2956+ inc (opnd);
2957+ end;
2958+ reginput := save;
2959+ end;
2960+ BSUBEXPCI: begin //###0.936
2961+ no := ord ((scan + REOpSz + RENextOffSz)^);
2962+ if startp [no] = nil
2963+ then EXIT;
2964+ if endp [no] = nil
2965+ then EXIT;
2966+ save := reginput;
2967+ opnd := startp [no];
2968+ while opnd < endp [no] do begin
2969+ if (save >= fInputEnd) or
2970+ ((save^ <> opnd^) and (save^ <> InvertCase (opnd^)))
2971+ then EXIT;
2972+ inc (save);
2973+ inc (opnd);
2974+ end;
2975+ reginput := save;
2976+ end;
2977+ ANYOFTINYSET: begin
2978+ if (reginput^ = #0) or //!!!TinySet
2979+ ((reginput^ <> (scan + REOpSz + RENextOffSz)^)
2980+ and (reginput^ <> (scan + REOpSz + RENextOffSz + 1)^)
2981+ and (reginput^ <> (scan + REOpSz + RENextOffSz + 2)^))
2982+ then EXIT;
2983+ inc (reginput);
2984+ end;
2985+ ANYBUTTINYSET: begin
2986+ if (reginput^ = #0) or //!!!TinySet
2987+ (reginput^ = (scan + REOpSz + RENextOffSz)^)
2988+ or (reginput^ = (scan + REOpSz + RENextOffSz + 1)^)
2989+ or (reginput^ = (scan + REOpSz + RENextOffSz + 2)^)
2990+ then EXIT;
2991+ inc (reginput);
2992+ end;
2993+ {$IFDEF UseSetOfChar} //###0.929
2994+ ANYOFFULLSET: begin
2995+ if (reginput^ = #0)
2996+ or not (reginput^ in PSetOfREChar (scan + REOpSz + RENextOffSz)^)
2997+ then EXIT;
2998+ inc (reginput);
2999+ end;
3000+ {$ELSE}
3001+ ANYOF: begin
3002+ if (reginput^ = #0) or (StrScan (scan + REOpSz + RENextOffSz, reginput^) = nil)
3003+ then EXIT;
3004+ inc (reginput);
3005+ end;
3006+ ANYBUT: begin
3007+ if (reginput^ = #0) or (StrScan (scan + REOpSz + RENextOffSz, reginput^) <> nil)
3008+ then EXIT;
3009+ inc (reginput);
3010+ end;
3011+ ANYOFCI: begin
3012+ if (reginput^ = #0) or (StrScanCI (scan + REOpSz + RENextOffSz, reginput^) = nil)
3013+ then EXIT;
3014+ inc (reginput);
3015+ end;
3016+ ANYBUTCI: begin
3017+ if (reginput^ = #0) or (StrScanCI (scan + REOpSz + RENextOffSz, reginput^) <> nil)
3018+ then EXIT;
3019+ inc (reginput);
3020+ end;
3021+ {$ENDIF}
3022+ NOTHING: ;
3023+ COMMENT: ;
3024+ BACK: ;
3025+ Succ (OPEN) .. TREOp (Ord (OPEN) + NSUBEXP - 1) : begin //###0.929
3026+ no := ord (scan^) - ord (OPEN);
3027+// save := reginput;
3028+ save := startp [no]; //###0.936
3029+ startp [no] := reginput; //###0.936
3030+ Result := MatchPrim (next);
3031+ if not Result //###0.936
3032+ then startp [no] := save;
3033+// if Result and (startp [no] = nil)
3034+// then startp [no] := save;
3035+ // Don't set startp if some later invocation of the same
3036+ // parentheses already has.
3037+ EXIT;
3038+ end;
3039+ Succ (CLOSE) .. TREOp (Ord (CLOSE) + NSUBEXP - 1): begin //###0.929
3040+ no := ord (scan^) - ord (CLOSE);
3041+// save := reginput;
3042+ save := endp [no]; //###0.936
3043+ endp [no] := reginput; //###0.936
3044+ Result := MatchPrim (next);
3045+ if not Result //###0.936
3046+ then endp [no] := save;
3047+// if Result and (endp [no] = nil)
3048+// then endp [no] := save;
3049+ // Don't set endp if some later invocation of the same
3050+ // parentheses already has.
3051+ EXIT;
3052+ end;
3053+ BRANCH: begin
3054+ if (next^ <> BRANCH) // No choice.
3055+ then next := scan + REOpSz + RENextOffSz // Avoid recursion
3056+ else begin
3057+ REPEAT
3058+ save := reginput;
3059+ Result := MatchPrim (scan + REOpSz + RENextOffSz);
3060+ if Result
3061+ then EXIT;
3062+ reginput := save;
3063+ scan := regnext (scan);
3064+ UNTIL (scan = nil) or (scan^ <> BRANCH);
3065+ EXIT;
3066+ end;
3067+ end;
3068+ {$IFDEF ComplexBraces}
3069+ LOOPENTRY: begin //###0.925
3070+ no := LoopStackIdx;
3071+ inc (LoopStackIdx);
3072+ if LoopStackIdx > LoopStackMax then begin
3073+ Error (reeLoopStackExceeded);
3074+ EXIT;
3075+ end;
3076+ save := reginput;
3077+ LoopStack [LoopStackIdx] := 0; // init loop counter
3078+ Result := MatchPrim (next); // execute LOOP
3079+ LoopStackIdx := no; // cleanup
3080+ if Result
3081+ then EXIT;
3082+ reginput := save;
3083+ EXIT;
3084+ end;
3085+ LOOP, LOOPNG: begin //###0.940
3086+ if LoopStackIdx <= 0 then begin
3087+ Error (reeLoopWithoutEntry);
3088+ EXIT;
3089+ end;
3090+ opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + 2 * REBracesArgSz)^;
3091+ BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^;
3092+ BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^;
3093+ save := reginput;
3094+ if LoopStack [LoopStackIdx] >= BracesMin then begin // Min alredy matched - we can work
3095+ if scan^ = LOOP then begin
3096+ // greedy way - first try to max deep of greed ;)
3097+ if LoopStack [LoopStackIdx] < BracesMax then begin
3098+ inc (LoopStack [LoopStackIdx]);
3099+ no := LoopStackIdx;
3100+ Result := MatchPrim (opnd);
3101+ LoopStackIdx := no;
3102+ if Result
3103+ then EXIT;
3104+ reginput := save;
3105+ end;
3106+ dec (LoopStackIdx); // Fail. May be we are too greedy? ;)
3107+ Result := MatchPrim (next);
3108+ if not Result
3109+ then reginput := save;
3110+ EXIT;
3111+ end
3112+ else begin
3113+ // non-greedy - try just now
3114+ Result := MatchPrim (next);
3115+ if Result
3116+ then EXIT
3117+ else reginput := save; // failed - move next and try again
3118+ if LoopStack [LoopStackIdx] < BracesMax then begin
3119+ inc (LoopStack [LoopStackIdx]);
3120+ no := LoopStackIdx;
3121+ Result := MatchPrim (opnd);
3122+ LoopStackIdx := no;
3123+ if Result
3124+ then EXIT;
3125+ reginput := save;
3126+ end;
3127+ dec (LoopStackIdx); // Failed - back up
3128+ EXIT;
3129+ end
3130+ end
3131+ else begin // first match a min_cnt times
3132+ inc (LoopStack [LoopStackIdx]);
3133+ no := LoopStackIdx;
3134+ Result := MatchPrim (opnd);
3135+ LoopStackIdx := no;
3136+ if Result
3137+ then EXIT;
3138+ dec (LoopStack [LoopStackIdx]);
3139+ reginput := save;
3140+ EXIT;
3141+ end;
3142+ end;
3143+ {$ENDIF}
3144+ STAR, PLUS, BRACES, STARNG, PLUSNG, BRACESNG: begin
3145+ // Lookahead to avoid useless match attempts when we know
3146+ // what character comes next.
3147+ nextch := #0;
3148+ if next^ = EXACTLY
3149+ then nextch := (next + REOpSz + RENextOffSz)^;
3150+ BracesMax := MaxInt; // infinite loop for * and + //###0.92
3151+ if (scan^ = STAR) or (scan^ = STARNG)
3152+ then BracesMin := 0 // STAR
3153+ else if (scan^ = PLUS) or (scan^ = PLUSNG)
3154+ then BracesMin := 1 // PLUS
3155+ else begin // BRACES
3156+ BracesMin := PREBracesArg (scan + REOpSz + RENextOffSz)^;
3157+ BracesMax := PREBracesArg (scan + REOpSz + RENextOffSz + REBracesArgSz)^;
3158+ end;
3159+ save := reginput;
3160+ opnd := scan + REOpSz + RENextOffSz;
3161+ if (scan^ = BRACES) or (scan^ = BRACESNG)
3162+ then inc (opnd, 2 * REBracesArgSz);
3163+
3164+ if (scan^ = PLUSNG) or (scan^ = STARNG) or (scan^ = BRACESNG) then begin
3165+ // non-greedy mode
3166+ BracesMax := regrepeat (opnd, BracesMax); // don't repeat more than BracesMax
3167+ // Now we know real Max limit to move forward (for recursion 'back up')
3168+ // In some cases it can be faster to check only Min positions first,
3169+ // but after that we have to check every position separtely instead
3170+ // of fast scannig in loop.
3171+ no := BracesMin;
3172+ while no <= BracesMax do begin
3173+ reginput := save + no;
3174+ // If it could work, try it.
3175+ if (nextch = #0) or (reginput^ = nextch) then begin
3176+ {$IFDEF ComplexBraces}
3177+ System.Move (LoopStack, SavedLoopStack, SizeOf (LoopStack)); //###0.925
3178+ SavedLoopStackIdx := LoopStackIdx;
3179+ {$ENDIF}
3180+ if MatchPrim (next) then begin
3181+ Result := true;
3182+ EXIT;
3183+ end;
3184+ {$IFDEF ComplexBraces}
3185+ System.Move (SavedLoopStack, LoopStack, SizeOf (LoopStack));
3186+ LoopStackIdx := SavedLoopStackIdx;
3187+ {$ENDIF}
3188+ end;
3189+ inc (no); // Couldn't or didn't - move forward.
3190+ end; { of while}
3191+ EXIT;
3192+ end
3193+ else begin // greedy mode
3194+ no := regrepeat (opnd, BracesMax); // don't repeat more than max_cnt
3195+ while no >= BracesMin do begin
3196+ // If it could work, try it.
3197+ if (nextch = #0) or (reginput^ = nextch) then begin
3198+ {$IFDEF ComplexBraces}
3199+ System.Move (LoopStack, SavedLoopStack, SizeOf (LoopStack)); //###0.925
3200+ SavedLoopStackIdx := LoopStackIdx;
3201+ {$ENDIF}
3202+ if MatchPrim (next) then begin
3203+ Result := true;
3204+ EXIT;
3205+ end;
3206+ {$IFDEF ComplexBraces}
3207+ System.Move (SavedLoopStack, LoopStack, SizeOf (LoopStack));
3208+ LoopStackIdx := SavedLoopStackIdx;
3209+ {$ENDIF}
3210+ end;
3211+ dec (no); // Couldn't or didn't - back up.
3212+ reginput := save + no;
3213+ end; { of while}
3214+ EXIT;
3215+ end;
3216+ end;
3217+ EEND: begin
3218+ Result := true; // Success!
3219+ EXIT;
3220+ end;
3221+ else begin
3222+ Error (reeMatchPrimMemoryCorruption);
3223+ EXIT;
3224+ end;
3225+ end; { of case scan^}
3226+ scan := next;
3227+ end; { of while scan <> nil}
3228+
3229+ // We get here only if there's trouble -- normally "case END" is the
3230+ // terminating point.
3231+ Error (reeMatchPrimCorruptedPointers);
3232+ end; { of function TRegExpr.MatchPrim
3233+--------------------------------------------------------------}
3234+
3235+{$IFDEF UseFirstCharSet} //###0.929
3236+procedure TRegExpr.FillFirstCharSet (prog : PRegExprChar);
3237+ var
3238+ scan : PRegExprChar; // Current node.
3239+ next : PRegExprChar; // Next node.
3240+ opnd : PRegExprChar;
3241+ min_cnt : integer;
3242+ begin
3243+ scan := prog;
3244+ while scan <> nil do begin
3245+ next := regnext (scan);
3246+ case PREOp (scan)^ of
3247+ BSUBEXP, BSUBEXPCI: begin //###0.938
3248+ FirstCharSet := [#0 .. #255]; // :((( we cannot
3249+ // optimize r.e. if it starts with back reference
3250+ EXIT;
3251+ end;
3252+ BOL, BOLML: ; // EXIT; //###0.937
3253+ EOL, EOLML: begin //###0.948 was empty in 0.947, was EXIT in 0.937
3254+ Include (FirstCharSet, #0);
3255+ if ModifierM
3256+ then begin
3257+ opnd := PRegExprChar (LineSeparators);
3258+ while opnd^ <> #0 do begin
3259+ Include (FirstCharSet, opnd^);
3260+ inc (opnd);
3261+ end;
3262+ end;
3263+ EXIT;
3264+ end;
3265+ BOUND, NOTBOUND: ; //###0.943 ?!!
3266+ ANY, ANYML: begin // we can better define ANYML !!!
3267+ FirstCharSet := [#0 .. #255]; //###0.930
3268+ EXIT;
3269+ end;
3270+ ANYDIGIT: begin
3271+ FirstCharSet := FirstCharSet + ['0' .. '9'];
3272+ EXIT;
3273+ end;
3274+ NOTDIGIT: begin
3275+ FirstCharSet := FirstCharSet + ([#0 .. #255] - ['0' .. '9']); //###0.948 FirstCharSet was forgotten
3276+ EXIT;
3277+ end;
3278+ EXACTLYCI: begin
3279+ Include (FirstCharSet, (scan + REOpSz + RENextOffSz)^);
3280+ Include (FirstCharSet, InvertCase ((scan + REOpSz + RENextOffSz)^));
3281+ EXIT;
3282+ end;
3283+ EXACTLY: begin
3284+ Include (FirstCharSet, (scan + REOpSz + RENextOffSz)^);
3285+ EXIT;
3286+ end;
3287+ ANYOFFULLSET: begin
3288+ FirstCharSet := FirstCharSet + PSetOfREChar (scan + REOpSz + RENextOffSz)^;
3289+ EXIT;
3290+ end;
3291+ ANYOFTINYSET: begin
3292+ //!!!TinySet
3293+ Include (FirstCharSet, (scan + REOpSz + RENextOffSz)^);
3294+ Include (FirstCharSet, (scan + REOpSz + RENextOffSz + 1)^);
3295+ Include (FirstCharSet, (scan + REOpSz + RENextOffSz + 2)^);
3296+ // ... // up to TinySetLen
3297+ EXIT;
3298+ end;
3299+ ANYBUTTINYSET: begin
3300+ //!!!TinySet
3301+ FirstCharSet := FirstCharSet + ([#0 .. #255] - [ //###0.948 FirstCharSet was forgotten
3302+ (scan + REOpSz + RENextOffSz)^,
3303+ (scan + REOpSz + RENextOffSz + 1)^,
3304+ (scan + REOpSz + RENextOffSz + 2)^]);
3305+ // ... // up to TinySetLen
3306+ EXIT;
3307+ end;
3308+ NOTHING: ;
3309+ COMMENT: ;
3310+ BACK: ;
3311+ Succ (OPEN) .. TREOp (Ord (OPEN) + NSUBEXP - 1) : begin //###0.929
3312+ FillFirstCharSet (next);
3313+ EXIT;
3314+ end;
3315+ Succ (CLOSE) .. TREOp (Ord (CLOSE) + NSUBEXP - 1): begin //###0.929
3316+ FillFirstCharSet (next);
3317+ EXIT;
3318+ end;
3319+ BRANCH: begin
3320+ if (PREOp (next)^ <> BRANCH) // No choice.
3321+ then next := scan + REOpSz + RENextOffSz // Avoid recursion.
3322+ else begin
3323+ REPEAT
3324+ FillFirstCharSet (scan + REOpSz + RENextOffSz);
3325+ scan := regnext (scan);
3326+ UNTIL (scan = nil) or (PREOp (scan)^ <> BRANCH);
3327+ EXIT;
3328+ end;
3329+ end;
3330+ {$IFDEF ComplexBraces}
3331+ LOOPENTRY: begin //###0.925
3332+// LoopStack [LoopStackIdx] := 0; //###0.940 line removed
3333+ FillFirstCharSet (next); // execute LOOP
3334+ EXIT;
3335+ end;
3336+ LOOP, LOOPNG: begin //###0.940
3337+ opnd := scan + PRENextOff (scan + REOpSz + RENextOffSz + REBracesArgSz * 2)^;
3338+ min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^;
3339+ FillFirstCharSet (opnd);
3340+ if min_cnt = 0
3341+ then FillFirstCharSet (next);
3342+ EXIT;
3343+ end;
3344+ {$ENDIF}
3345+ STAR, STARNG: //###0.940
3346+ FillFirstCharSet (scan + REOpSz + RENextOffSz);
3347+ PLUS, PLUSNG: begin //###0.940
3348+ FillFirstCharSet (scan + REOpSz + RENextOffSz);
3349+ EXIT;
3350+ end;
3351+ BRACES, BRACESNG: begin //###0.940
3352+ opnd := scan + REOpSz + RENextOffSz + REBracesArgSz * 2;
3353+ min_cnt := PREBracesArg (scan + REOpSz + RENextOffSz)^; // BRACES
3354+ FillFirstCharSet (opnd);
3355+ if min_cnt > 0
3356+ then EXIT;
3357+ end;
3358+ EEND: begin
3359+ FirstCharSet := [#0 .. #255]; //###0.948
3360+ EXIT;
3361+ end;
3362+ else begin
3363+ Error (reeMatchPrimMemoryCorruption);
3364+ EXIT;
3365+ end;
3366+ end; { of case scan^}
3367+ scan := next;
3368+ end; { of while scan <> nil}
3369+ end; { of procedure FillFirstCharSet
3370+--------------------------------------------------------------}
3371+{$ENDIF}
3372+
3373+function TRegExpr.Exec (const AInputString : RegExprString) : boolean;
3374+ begin
3375+ InputString := AInputString;
3376+ Result := ExecPrim (1);
3377+ end; { of function TRegExpr.Exec
3378+--------------------------------------------------------------}
3379+
3380+{$IFDEF OverMeth}
3381+{$IFNDEF FPC}
3382+function TRegExpr.Exec : boolean;
3383+ begin
3384+ Result := ExecPrim (1);
3385+ end; { of function TRegExpr.Exec
3386+--------------------------------------------------------------}
3387+{$ENDIF}
3388+function TRegExpr.Exec (AOffset: integer) : boolean;
3389+ begin
3390+ Result := ExecPrim (AOffset);
3391+ end; { of function TRegExpr.Exec
3392+--------------------------------------------------------------}
3393+{$ENDIF}
3394+
3395+function TRegExpr.ExecPos (AOffset: integer {$IFDEF DefParam}= 1{$ENDIF}) : boolean;
3396+ begin
3397+ Result := ExecPrim (AOffset);
3398+ end; { of function TRegExpr.ExecPos
3399+--------------------------------------------------------------}
3400+
3401+function TRegExpr.ExecPrim (AOffset: integer) : boolean;
3402+ procedure ClearMatchs;
3403+ // Clears matchs array
3404+ var i : integer;
3405+ begin
3406+ for i := 0 to NSUBEXP - 1 do begin
3407+ startp [i] := nil;
3408+ endp [i] := nil;
3409+ end;
3410+ end; { of procedure ClearMatchs;
3411+..............................................................}
3412+ function RegMatch (str : PRegExprChar) : boolean;
3413+ // try match at specific point
3414+ begin
3415+ //###0.949 removed clearing of start\endp
3416+ reginput := str;
3417+ Result := MatchPrim (programm + REOpSz);
3418+ if Result then begin
3419+ startp [0] := str;
3420+ endp [0] := reginput;
3421+ end;
3422+ end; { of function RegMatch
3423+..............................................................}
3424+ var
3425+ s : PRegExprChar;
3426+ StartPtr: PRegExprChar;
3427+ InputLen : integer;
3428+ begin
3429+ Result := false; // Be paranoid...
3430+
3431+ ClearMatchs; //###0.949
3432+ // ensure that Match cleared either if optimization tricks or some error
3433+ // will lead to leaving ExecPrim without actual search. That is
3434+ // importent for ExecNext logic and so on.
3435+
3436+ if not IsProgrammOk //###0.929
3437+ then EXIT;
3438+
3439+ // Check InputString presence
3440+ if not Assigned (fInputString) then begin
3441+ Error (reeNoInputStringSpecified);
3442+ EXIT;
3443+ end;
3444+
3445+ InputLen := length (fInputString);
3446+
3447+ //Check that the start position is not negative
3448+ if AOffset < 1 then begin
3449+ Error (reeOffsetMustBeGreaterThan0);
3450+ EXIT;
3451+ end;
3452+ // Check that the start position is not longer than the line
3453+ // If so then exit with nothing found
3454+ if AOffset > (InputLen + 1) // for matching empty string after last char.
3455+ then EXIT;
3456+
3457+ StartPtr := fInputString + AOffset - 1;
3458+
3459+ // If there is a "must appear" string, look for it.
3460+ if regmust <> nil then begin
3461+ s := StartPtr;
3462+ REPEAT
3463+ s := StrScan (s, regmust [0]);
3464+ if s <> nil then begin
3465+ if StrLComp (s, regmust, regmlen) = 0
3466+ then BREAK; // Found it.
3467+ inc (s);
3468+ end;
3469+ UNTIL s = nil;
3470+ if s = nil // Not present.
3471+ then EXIT;
3472+ end;
3473+
3474+ // Mark beginning of line for ^ .
3475+ fInputStart := fInputString;
3476+
3477+ // Pointer to end of input stream - for
3478+ // pascal-style string processing (may include #0)
3479+ fInputEnd := fInputString + InputLen;
3480+
3481+ {$IFDEF ComplexBraces}
3482+ // no loops started
3483+ LoopStackIdx := 0; //###0.925
3484+ {$ENDIF}
3485+
3486+ // Simplest case: anchored match need be tried only once.
3487+ if reganch <> #0 then begin
3488+ Result := RegMatch (StartPtr);
3489+ EXIT;
3490+ end;
3491+
3492+ // Messy cases: unanchored match.
3493+ s := StartPtr;
3494+ if regstart <> #0 then // We know what char it must start with.
3495+ REPEAT
3496+ s := StrScan (s, regstart);
3497+ if s <> nil then begin
3498+ Result := RegMatch (s);
3499+ if Result
3500+ then EXIT
3501+ else ClearMatchs; //###0.949
3502+ inc (s);
3503+ end;
3504+ UNTIL s = nil
3505+ else begin // We don't - general case.
3506+ repeat //###0.948
3507+ {$IFDEF UseFirstCharSet}
3508+ if s^ in FirstCharSet
3509+ then Result := RegMatch (s);
3510+ {$ELSE}
3511+ Result := RegMatch (s);
3512+ {$ENDIF}
3513+ if Result or (s^ = #0) // Exit on a match or after testing the end-of-string.
3514+ then EXIT
3515+ else ClearMatchs; //###0.949
3516+ inc (s);
3517+ until false;
3518+(* optimized and fixed by Martin Fuller - empty strings
3519+ were not allowed to pass thru in UseFirstCharSet mode
3520+ {$IFDEF UseFirstCharSet} //###0.929
3521+ while s^ <> #0 do begin
3522+ if s^ in FirstCharSet
3523+ then Result := RegMatch (s);
3524+ if Result
3525+ then EXIT;
3526+ inc (s);
3527+ end;
3528+ {$ELSE}
3529+ REPEAT
3530+ Result := RegMatch (s);
3531+ if Result
3532+ then EXIT;
3533+ inc (s);
3534+ UNTIL s^ = #0;
3535+ {$ENDIF}
3536+*)
3537+ end;
3538+ // Failure
3539+ end; { of function TRegExpr.ExecPrim
3540+--------------------------------------------------------------}
3541+
3542+function TRegExpr.ExecNext : boolean;
3543+ var offset : integer;
3544+ begin
3545+ Result := false;
3546+ if not Assigned (startp[0]) or not Assigned (endp[0]) then begin
3547+ Error (reeExecNextWithoutExec);
3548+ EXIT;
3549+ end;
3550+// Offset := MatchPos [0] + MatchLen [0];
3551+// if MatchLen [0] = 0
3552+ Offset := endp [0] - fInputString + 1; //###0.929
3553+ if endp [0] = startp [0] //###0.929
3554+ then inc (Offset); // prevent infinite looping if empty string match r.e.
3555+ Result := ExecPrim (Offset);
3556+ end; { of function TRegExpr.ExecNext
3557+--------------------------------------------------------------}
3558+
3559+function TRegExpr.GetInputString : RegExprString;
3560+ begin
3561+ if not Assigned (fInputString) then begin
3562+ Error (reeGetInputStringWithoutInputString);
3563+ EXIT;
3564+ end;
3565+ Result := fInputString;
3566+ end; { of function TRegExpr.GetInputString
3567+--------------------------------------------------------------}
3568+
3569+procedure TRegExpr.SetInputString (const AInputString : RegExprString);
3570+ var
3571+ Len : integer;
3572+ i : integer;
3573+ begin
3574+ // clear Match* - before next Exec* call it's undefined
3575+ for i := 0 to NSUBEXP - 1 do begin
3576+ startp [i] := nil;
3577+ endp [i] := nil;
3578+ end;
3579+
3580+ // need reallocation of input string buffer ?
3581+ Len := length (AInputString);
3582+ if Assigned (fInputString) and (Length (fInputString) <> Len) then begin
3583+ FreeMem (fInputString);
3584+ fInputString := nil;
3585+ end;
3586+ // buffer [re]allocation
3587+ if not Assigned (fInputString)
3588+ then GetMem (fInputString, (Len + 1) * SizeOf (REChar));
3589+
3590+ // copy input string into buffer
3591+ {$IFDEF UniCode}
3592+ StrPCopy (fInputString, Copy (AInputString, 1, Len)); //###0.927
3593+ {$ELSE}
3594+ StrLCopy (fInputString, PRegExprChar (AInputString), Len);
3595+ {$ENDIF}
3596+
3597+ {
3598+ fInputString : string;
3599+ fInputStart, fInputEnd : PRegExprChar;
3600+
3601+ SetInputString:
3602+ fInputString := AInputString;
3603+ UniqueString (fInputString);
3604+ fInputStart := PChar (fInputString);
3605+ Len := length (fInputString);
3606+ fInputEnd := PRegExprChar (integer (fInputStart) + Len); ??
3607+ !! startp/endp âñå ðàâíî áóäåò îïàñíî èñïîëüçîâàòü ?
3608+ }
3609+ end; { of procedure TRegExpr.SetInputString
3610+--------------------------------------------------------------}
3611+
3612+procedure TRegExpr.SetLineSeparators (const AStr : RegExprString);
3613+ begin
3614+ if AStr <> fLineSeparators then begin
3615+ fLineSeparators := AStr;
3616+ InvalidateProgramm;
3617+ end;
3618+ end; { of procedure TRegExpr.SetLineSeparators
3619+--------------------------------------------------------------}
3620+
3621+procedure TRegExpr.SetLinePairedSeparator (const AStr : RegExprString);
3622+ begin
3623+ if length (AStr) = 2 then begin
3624+ if AStr [1] = AStr [2] then begin
3625+ // it's impossible for our 'one-point' checking to support
3626+ // two chars separator for identical chars
3627+ Error (reeBadLinePairedSeparator);
3628+ EXIT;
3629+ end;
3630+ if not fLinePairedSeparatorAssigned
3631+ or (AStr [1] <> fLinePairedSeparatorHead)
3632+ or (AStr [2] <> fLinePairedSeparatorTail) then begin
3633+ fLinePairedSeparatorAssigned := true;
3634+ fLinePairedSeparatorHead := AStr [1];
3635+ fLinePairedSeparatorTail := AStr [2];
3636+ InvalidateProgramm;
3637+ end;
3638+ end
3639+ else if length (AStr) = 0 then begin
3640+ if fLinePairedSeparatorAssigned then begin
3641+ fLinePairedSeparatorAssigned := false;
3642+ InvalidateProgramm;
3643+ end;
3644+ end
3645+ else Error (reeBadLinePairedSeparator);
3646+ end; { of procedure TRegExpr.SetLinePairedSeparator
3647+--------------------------------------------------------------}
3648+
3649+function TRegExpr.GetLinePairedSeparator : RegExprString;
3650+begin
3651+ if fLinePairedSeparatorAssigned then begin
3652+ {$IFDEF UniCode}
3653+ // Here is some UniCode 'magic'
3654+ // If You do know better decision to concatenate
3655+ // two WideChars, please, let me know!
3656+ Result := fLinePairedSeparatorHead; //###0.947
3657+ Result := Result + fLinePairedSeparatorTail;
3658+ {$ELSE}
3659+ Result := fLinePairedSeparatorHead + fLinePairedSeparatorTail;
3660+ {$ENDIF}
3661+ end
3662+ else Result := '';
3663+end; { of function TRegExpr.GetLinePairedSeparator
3664+--------------------------------------------------------------}
3665+
3666+function TRegExpr.Substitute (const ATemplate : RegExprString) : RegExprString;
3667+// perform substitutions after a regexp match
3668+// completely rewritten in 0.929
3669+type
3670+ TSubstMode = (smodeNormal, smodeOneUpper, smodeOneLower, smodeAllUpper,
3671+ smodeAllLower);
3672+var
3673+ TemplateLen : integer;
3674+ TemplateBeg, TemplateEnd : PRegExprChar;
3675+ p, p0, p1, ResultPtr : PRegExprChar;
3676+ ResultLen : integer;
3677+ n : integer;
3678+ Ch : REChar;
3679+ Mode: TSubstMode;
3680+ EscapedCharacter: string;
3681+
3682+ function ParseVarName (var APtr : PRegExprChar) : integer;
3683+ // extract name of variable (digits, may be enclosed with
3684+ // curly braces) from APtr^, uses TemplateEnd !!!
3685+ const
3686+ Digits = ['0' .. '9'];
3687+ var
3688+ p : PRegExprChar;
3689+ Delimited : boolean;
3690+ begin
3691+ Result := 0;
3692+ p := APtr;
3693+ Delimited := (p < TemplateEnd) and (p^ = '{');
3694+ if Delimited
3695+ then inc (p); // skip left curly brace
3696+ if (p < TemplateEnd) and (p^ = '&')
3697+ then inc (p) // this is '$&' or '${&}'
3698+ else
3699+ while (p < TemplateEnd) and
3700+ {$IFDEF UniCode} //###0.935
3701+ (ord (p^) < 256) and (CharInSet(char (p^), Digits))
3702+ {$ELSE}
3703+ (p^ in Digits)
3704+ {$ENDIF}
3705+ do begin
3706+ Result := Result * 10 + (ord (p^) - ord ('0')); //###0.939
3707+ inc (p);
3708+ end;
3709+ if Delimited then
3710+ if (p < TemplateEnd) and (p^ = '}')
3711+ then inc (p) // skip right curly brace
3712+ else p := APtr; // isn't properly terminated
3713+ if p = APtr
3714+ then Result := -1; // no valid digits found or no right curly brace
3715+ APtr := p;
3716+ end;
3717+
3718+begin
3719+ EscapedCharacter := #0;
3720+ // Check programm and input string
3721+ if not IsProgrammOk
3722+ then EXIT;
3723+ if not Assigned (fInputString) then begin
3724+ Error (reeNoInputStringSpecified);
3725+ EXIT;
3726+ end;
3727+ // Prepare for working
3728+ TemplateLen := length (ATemplate);
3729+ if TemplateLen = 0 then begin // prevent nil pointers
3730+ Result := '';
3731+ EXIT;
3732+ end;
3733+ TemplateBeg := pointer (ATemplate);
3734+ TemplateEnd := TemplateBeg + TemplateLen;
3735+ // Count result length for speed optimization.
3736+ ResultLen := 0;
3737+ p := TemplateBeg;
3738+ while p < TemplateEnd do begin
3739+ Ch := p^;
3740+ inc (p);
3741+ if Ch = '$'
3742+ then n := ParseVarName (p)
3743+ else n := -1;
3744+ if n >= 0 then begin
3745+ if (n < NSUBEXP) and Assigned (startp [n]) and Assigned (endp [n])
3746+ then inc (ResultLen, endp [n] - startp [n]);
3747+ end
3748+ else begin
3749+ if (Ch = EscChar) and (p < TemplateEnd) then begin // quoted or special char followed
3750+ Ch := p^;
3751+ inc (p);
3752+ case Ch of
3753+ 't', 'r', 'n', 'f', 'a', 'e':
3754+ Inc(ResultLen, 1);
3755+ 'x': begin
3756+ Inc(ResultLen, 1); // will be replaced by a single character
3757+ Inc(p, 2); // must be followed by a two digit hex number (no support for Unicode here)
3758+ end;
3759+ 'u', 'l', 'U', 'L': {nothing};
3760+ else
3761+ inc(ResultLen);
3762+ end;
3763+ end
3764+ else
3765+ inc (ResultLen);
3766+ end;
3767+ end;
3768+ // Get memory. We do it once and it significant speed up work !
3769+ if ResultLen = 0 then begin
3770+ Result := '';
3771+ EXIT;
3772+ end;
3773+ //SetString (Result, nil, ResultLen);
3774+ SetLength(Result,ResultLen);
3775+ // Fill Result
3776+ ResultPtr := pointer (Result);
3777+ p := TemplateBeg;
3778+ Mode := smodeNormal;
3779+ while p < TemplateEnd do begin
3780+ Ch := p^;
3781+ p0 := p;
3782+ inc (p);
3783+ p1 := p;
3784+ if Ch = '$'
3785+ then n := ParseVarName (p)
3786+ else n := -1;
3787+ if (n >= 0) then begin
3788+ p0 := startp [n];
3789+ p1 := endp[n];
3790+ if (n >= NSUBEXP) or not Assigned (p0) or not Assigned (endp [n]) then
3791+ p1 := p0; // empty
3792+ end
3793+ else begin
3794+ if (Ch = EscChar) and (p < TemplateEnd) then begin // quoted or special char followed
3795+ Ch := p^;
3796+ inc (p);
3797+ case Ch of
3798+ 't': begin
3799+ EscapedCharacter := #$09;
3800+ p0 := @EscapedCharacter[1];
3801+ p1 := p0 + 1;
3802+ end;
3803+ 'r': begin
3804+ EscapedCharacter := #$0d;
3805+ p0 := @EscapedCharacter[1];
3806+ p1 := p0 + 1;
3807+ end;
3808+ 'f': begin
3809+ EscapedCharacter := #$0c;
3810+ p0 := @EscapedCharacter[1];
3811+ p1 := p0 + 1;
3812+ end;
3813+ 'a': begin
3814+ EscapedCharacter := #$07;
3815+ p0 := @EscapedCharacter[1];
3816+ p1 := p0 + 1;
3817+ end;
3818+ 'e': begin
3819+ EscapedCharacter := #$1b;
3820+ p0 := @EscapedCharacter[1];
3821+ p1 := p0 + 1;
3822+ end;
3823+ 'n' : begin
3824+ EscapedCharacter := #$0a;
3825+ p0 := @EscapedCharacter[1];
3826+ p1 := p0 + 1;
3827+ end;
3828+ 'x': begin
3829+ // this can raise an EConvertError exception
3830+ EscapedCharacter := chr(StrtoInt('$' + Copy(p, 1, 2)));
3831+ p0 := @EscapedCharacter[1];
3832+ p1 := p0 + 1;
3833+ Inc(p, 2);
3834+ end;
3835+ 'l' : begin
3836+ Mode := smodeOneLower;
3837+ p1 := p0;
3838+ end;
3839+ 'L' : begin
3840+ Mode := smodeAllLower;
3841+ p1 := p0;
3842+ end;
3843+ 'u' : begin
3844+ Mode := smodeOneUpper;
3845+ p1 := p0;
3846+ end;
3847+ 'U' : begin
3848+ Mode := smodeAllUpper;
3849+ p1 := p0;
3850+ end;
3851+ else
3852+ begin
3853+ inc(p0);
3854+ inc(p1);
3855+ end;
3856+ end;
3857+ end
3858+ end;
3859+ if p0 < p1 then begin
3860+ while p0 < p1 do begin
3861+ case Mode of
3862+ smodeOneLower, smodeAllLower:
3863+ begin
3864+ Ch := p0^;
3865+ if Ch < #128 then
3866+ Ch := AnsiLowerCase(Ch)[1];
3867+ ResultPtr^ := Ch;
3868+ if Mode = smodeOneLower then
3869+ Mode := smodeNormal;
3870+ end;
3871+ smodeOneUpper, smodeAllUpper:
3872+ begin
3873+ Ch := p0^;
3874+ if Ch < #128 then
3875+ Ch := AnsiUpperCase(Ch)[1];
3876+ ResultPtr^ := Ch;
3877+ if Mode = smodeOneUpper then
3878+ Mode := smodeNormal;
3879+ end;
3880+ else
3881+ ResultPtr^ := p0^;
3882+ end;
3883+ inc (ResultPtr);
3884+ inc (p0);
3885+ end;
3886+ Mode := smodeNormal;
3887+ end;
3888+ end;
3889+end; { of function TRegExpr.Substitute
3890+--------------------------------------------------------------}
3891+
3892+procedure TRegExpr.Split (AInputStr : RegExprString; APieces : TStrings);
3893+ var PrevPos : integer;
3894+ begin
3895+ PrevPos := 1;
3896+ if Exec (AInputStr) then
3897+ REPEAT
3898+ APieces.Add (System.Copy (AInputStr, PrevPos, MatchPos [0] - PrevPos));
3899+ PrevPos := MatchPos [0] + MatchLen [0];
3900+ UNTIL not ExecNext;
3901+ APieces.Add (System.Copy (AInputStr, PrevPos, MaxInt)); // Tail
3902+ end; { of procedure TRegExpr.Split
3903+--------------------------------------------------------------}
3904+
3905+function TRegExpr.Replace (AInputStr : RegExprString; const AReplaceStr : RegExprString;
3906+ AUseSubstitution : boolean{$IFDEF DefParam}= False{$ENDIF}) : RegExprString;
3907+ var
3908+ PrevPos : integer;
3909+ begin
3910+ Result := '';
3911+ PrevPos := 1;
3912+ if Exec (AInputStr) then
3913+ REPEAT
3914+ Result := Result + System.Copy (AInputStr, PrevPos,
3915+ MatchPos [0] - PrevPos);
3916+ if AUseSubstitution //###0.946
3917+ then Result := Result + Substitute (AReplaceStr)
3918+ else Result := Result + AReplaceStr;
3919+ PrevPos := MatchPos [0] + MatchLen [0];
3920+ UNTIL not ExecNext;
3921+ Result := Result + System.Copy (AInputStr, PrevPos, MaxInt); // Tail
3922+ end; { of function TRegExpr.Replace
3923+--------------------------------------------------------------}
3924+
3925+function TRegExpr.ReplaceEx (AInputStr : RegExprString;
3926+ AReplaceFunc : TRegExprReplaceFunction)
3927+ : RegExprString;
3928+ var
3929+ PrevPos : integer;
3930+ begin
3931+ Result := '';
3932+ PrevPos := 1;
3933+ if Exec (AInputStr) then
3934+ REPEAT
3935+ Result := Result + System.Copy (AInputStr, PrevPos,
3936+ MatchPos [0] - PrevPos)
3937+ + AReplaceFunc (Self);
3938+ PrevPos := MatchPos [0] + MatchLen [0];
3939+ UNTIL not ExecNext;
3940+ Result := Result + System.Copy (AInputStr, PrevPos, MaxInt); // Tail
3941+ end; { of function TRegExpr.ReplaceEx
3942+--------------------------------------------------------------}
3943+
3944+
3945+{$IFDEF OverMeth}
3946+function TRegExpr.Replace (AInputStr : RegExprString;
3947+ AReplaceFunc : TRegExprReplaceFunction)
3948+ : RegExprString;
3949+ begin
3950+ ReplaceEx (AInputStr, AReplaceFunc);
3951+ end; { of function TRegExpr.Replace
3952+--------------------------------------------------------------}
3953+{$ENDIF}
3954+
3955+{=============================================================}
3956+{====================== Debug section ========================}
3957+{=============================================================}
3958+
3959+{$IFDEF RegExpPCodeDump}
3960+function TRegExpr.DumpOp (op : TREOp) : RegExprString;
3961+// printable representation of opcode
3962+ begin
3963+ case op of
3964+ BOL: Result := 'BOL';
3965+ EOL: Result := 'EOL';
3966+ BOLML: Result := 'BOLML';
3967+ EOLML: Result := 'EOLML';
3968+ BOUND: Result := 'BOUND'; //###0.943
3969+ NOTBOUND: Result := 'NOTBOUND'; //###0.943
3970+ ANY: Result := 'ANY';
3971+ ANYML: Result := 'ANYML'; //###0.941
3972+ ANYLETTER: Result := 'ANYLETTER';
3973+ NOTLETTER: Result := 'NOTLETTER';
3974+ ANYDIGIT: Result := 'ANYDIGIT';
3975+ NOTDIGIT: Result := 'NOTDIGIT';
3976+ ANYSPACE: Result := 'ANYSPACE';
3977+ NOTSPACE: Result := 'NOTSPACE';
3978+ ANYOF: Result := 'ANYOF';
3979+ ANYBUT: Result := 'ANYBUT';
3980+ ANYOFCI: Result := 'ANYOF/CI';
3981+ ANYBUTCI: Result := 'ANYBUT/CI';
3982+ BRANCH: Result := 'BRANCH';
3983+ EXACTLY: Result := 'EXACTLY';
3984+ EXACTLYCI: Result := 'EXACTLY/CI';
3985+ NOTHING: Result := 'NOTHING';
3986+ COMMENT: Result := 'COMMENT';
3987+ BACK: Result := 'BACK';
3988+ EEND: Result := 'END';
3989+ BSUBEXP: Result := 'BSUBEXP';
3990+ BSUBEXPCI: Result := 'BSUBEXP/CI';
3991+ Succ (OPEN) .. TREOp (Ord (OPEN) + NSUBEXP - 1): //###0.929
3992+ Result := Format ('OPEN[%d]', [ord (op) - ord (OPEN)]);
3993+ Succ (CLOSE) .. TREOp (Ord (CLOSE) + NSUBEXP - 1): //###0.929
3994+ Result := Format ('CLOSE[%d]', [ord (op) - ord (CLOSE)]);
3995+ STAR: Result := 'STAR';
3996+ PLUS: Result := 'PLUS';
3997+ BRACES: Result := 'BRACES';
3998+ {$IFDEF ComplexBraces}
3999+ LOOPENTRY: Result := 'LOOPENTRY'; //###0.925
4000+ LOOP: Result := 'LOOP'; //###0.925
4001+ LOOPNG: Result := 'LOOPNG'; //###0.940
4002+ {$ENDIF}
4003+ ANYOFTINYSET: Result:= 'ANYOFTINYSET';
4004+ ANYBUTTINYSET:Result:= 'ANYBUTTINYSET';
4005+ {$IFDEF UseSetOfChar} //###0.929
4006+ ANYOFFULLSET: Result:= 'ANYOFFULLSET';
4007+ {$ENDIF}
4008+ STARNG: Result := 'STARNG'; //###0.940
4009+ PLUSNG: Result := 'PLUSNG'; //###0.940
4010+ BRACESNG: Result := 'BRACESNG'; //###0.940
4011+ else Error (reeDumpCorruptedOpcode);
4012+ end; {of case op}
4013+ Result := ':' + Result;
4014+ end; { of function TRegExpr.DumpOp
4015+--------------------------------------------------------------}
4016+
4017+function TRegExpr.Dump : RegExprString;
4018+// dump a regexp in vaguely comprehensible form
4019+ var
4020+ s : PRegExprChar;
4021+ op : TREOp; // Arbitrary non-END op.
4022+ next : PRegExprChar;
4023+ i : integer;
4024+ Diff : integer;
4025+{$IFDEF UseSetOfChar} //###0.929
4026+ Ch : REChar;
4027+{$ENDIF}
4028+ begin
4029+ if not IsProgrammOk //###0.929
4030+ then EXIT;
4031+
4032+ op := EXACTLY;
4033+ Result := '';
4034+ s := programm + REOpSz;
4035+ while op <> EEND do begin // While that wasn't END last time...
4036+ op := s^;
4037+ Result := Result + Format ('%2d%s', [s - programm, DumpOp (s^)]); // Where, what.
4038+ next := regnext (s);
4039+ if next = nil // Next ptr.
4040+ then Result := Result + ' (0)'
4041+ else begin
4042+ if next > s //###0.948 PWideChar subtraction workaround (see comments in Tail method for details)
4043+ then Diff := next - s
4044+ else Diff := - (s - next);
4045+ Result := Result + Format (' (%d) ', [(s - programm) + Diff]);
4046+ end;
4047+ inc (s, REOpSz + RENextOffSz);
4048+ if (op = ANYOF) or (op = ANYOFCI) or (op = ANYBUT) or (op = ANYBUTCI)
4049+ or (op = EXACTLY) or (op = EXACTLYCI) then begin
4050+ // Literal string, where present.
4051+ while s^ <> #0 do begin
4052+ Result := Result + s^;
4053+ inc (s);
4054+ end;
4055+ inc (s);
4056+ end;
4057+ if (op = ANYOFTINYSET) or (op = ANYBUTTINYSET) then begin
4058+ for i := 1 to TinySetLen do begin
4059+ Result := Result + s^;
4060+ inc (s);
4061+ end;
4062+ end;
4063+ if (op = BSUBEXP) or (op = BSUBEXPCI) then begin
4064+ Result := Result + ' \' + IntToStr (Ord (s^));
4065+ inc (s);
4066+ end;
4067+ {$IFDEF UseSetOfChar} //###0.929
4068+ if op = ANYOFFULLSET then begin
4069+ for Ch := #0 to #255 do
4070+ if Ch in PSetOfREChar (s)^ then
4071+ if Ch < ' '
4072+ then Result := Result + '#' + IntToStr (Ord (Ch)) //###0.936
4073+ else Result := Result + Ch;
4074+ inc (s, SizeOf (TSetOfREChar));
4075+ end;
4076+ {$ENDIF}
4077+ if (op = BRACES) or (op = BRACESNG) then begin //###0.941
4078+ // show min/max argument of BRACES operator
4079+ Result := Result + Format ('{%d,%d}', [PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]);
4080+ inc (s, REBracesArgSz * 2);
4081+ end;
4082+ {$IFDEF ComplexBraces}
4083+ if (op = LOOP) or (op = LOOPNG) then begin //###0.940
4084+ Result := Result + Format (' -> (%d) {%d,%d}', [
4085+ (s - programm - (REOpSz + RENextOffSz)) + PRENextOff (s + 2 * REBracesArgSz)^,
4086+ PREBracesArg (s)^, PREBracesArg (s + REBracesArgSz)^]);
4087+ inc (s, 2 * REBracesArgSz + RENextOffSz);
4088+ end;
4089+ {$ENDIF}
4090+ Result := Result + #$d#$a;
4091+ end; { of while}
4092+
4093+ // Header fields of interest.
4094+
4095+ if regstart <> #0
4096+ then Result := Result + 'start ' + regstart;
4097+ if reganch <> #0
4098+ then Result := Result + 'anchored ';
4099+ if regmust <> nil
4100+ then Result := Result + 'must have ' + regmust;
4101+ {$IFDEF UseFirstCharSet} //###0.929
4102+ Result := Result + #$d#$a'FirstCharSet:';
4103+ for Ch := #0 to #255 do
4104+ if Ch in FirstCharSet
4105+ then begin
4106+ if Ch < ' '
4107+ then Result := Result + '#' + IntToStr(Ord(Ch)) //###0.948
4108+ else Result := Result + Ch;
4109+ end;
4110+ {$ENDIF}
4111+ Result := Result + #$d#$a;
4112+ end; { of function TRegExpr.Dump
4113+--------------------------------------------------------------}
4114+{$ENDIF}
4115+
4116+{$IFDEF reRealExceptionAddr}
4117+{$OPTIMIZATION ON}
4118+// ReturnAddr works correctly only if compiler optimization is ON
4119+// I placed this method at very end of unit because there are no
4120+// way to restore compiler optimization flag ...
4121+{$ENDIF}
4122+procedure TRegExpr.Error (AErrorID : integer);
4123+{$IFDEF reRealExceptionAddr}
4124+ function ReturnAddr : pointer; //###0.938
4125+ asm
4126+ mov eax,[ebp+4]
4127+ end;
4128+{$ENDIF}
4129+ var
4130+ e : ERegExpr;
4131+ begin
4132+ fLastError := AErrorID; // dummy stub - useless because will raise exception
4133+ if AErrorID < 1000 // compilation error ?
4134+ then e := ERegExpr.Create (ErrorMsg (AErrorID) // yes - show error pos
4135+ + ' (Character #' + IntToStr (CompilerErrorPos) + ')')
4136+ else e := ERegExpr.Create (ErrorMsg (AErrorID));
4137+ e.ErrorCode := AErrorID;
4138+ e.CompilerErrorPos := CompilerErrorPos;
4139+ raise e
4140+ {$IFDEF reRealExceptionAddr}
4141+ At ReturnAddr; //###0.938
4142+ {$ENDIF}
4143+ end; { of procedure TRegExpr.Error
4144+--------------------------------------------------------------}
4145+
4146+(*
4147+ PCode persistence:
4148+ FirstCharSet
4149+ programm, regsize
4150+ regstart // -> programm
4151+ reganch // -> programm
4152+ regmust, regmlen // -> programm
4153+ fExprIsCompiled
4154+*)
4155+
4156+// be carefull - placed here code will be always compiled with
4157+// compiler optimization flag
4158+
4159+{$IFDEF FPC}
4160+initialization
4161+ RegExprInvertCaseFunction := TRegExpr.InvertCaseFunction;
4162+
4163+{$ENDIF}
4164+end.
4165+
--- trunk/src/u_DprojFilterMain.pas (revision 16)
+++ trunk/src/u_DprojFilterMain.pas (revision 17)
@@ -5,6 +5,7 @@
55 interface
66
77 uses
8+ RegExpr,
89 SysUtils,
910 Classes,
1011 u_dzDefaultMain;
@@ -15,6 +16,8 @@
1516 FDeleteLine: string;
1617 FChangeFrom: string;
1718 FChangeTo: string;
19+ FRegExReplaceFrom: string;
20+ FRegExReplaceTo: string;
1821 FInsertAfter: string;
1922 FInsert: string;
2023 FInsertAfterAll: Boolean;
@@ -47,13 +50,16 @@
4750 p: Integer;
4851 InsertAfterDone: Boolean;
4952 doInsert: Boolean;
53+ RegExpr: TRegExpr;
5054 begin
5155 WriteLn('processing file ', _fn);
52- InitializeNil(Orig, Changed);
56+ InitializeNil(Orig, Changed, RegExpr);
5357 try
5458 Orig := TStringList.Create;
5559 Changed := TStringList.Create;
60+ RegExpr := TRegExpr.Create;
5661 try
62+ RegExpr.Expression := FRegExReplaceFrom;
5763 Orig.LoadFromFile(_fn);
5864 {$IFDEF SUPPORTS_UNICODE}
5965 Changed.DefaultEncoding := Orig.Encoding;
@@ -97,6 +103,11 @@
97103 InsertAfterDone := True;
98104 end;
99105 end else begin
106+ if FRegExReplaceFrom <> '' then
107+ if RegExpr.Exec(Line) then begin
108+ Line := RegExpr.Replace(Line, FRegExReplaceTo, True);
109+ Inc(ChangeCnt);
110+ end;
100111 Changed.Add(Line);
101112 end;
102113 end;
@@ -117,7 +128,7 @@
117128 end;
118129 end;
119130 finally
120- FreeAndNil(Orig, Changed);
131+ FreeAndNil(Orig, Changed, RegExpr);
121132 end;
122133 end;
123134
@@ -176,6 +187,16 @@
176187 raise Exception.Create('--ChangeTo is not allowed without a --ChangeFrom option');
177188 end;
178189
190+ if FGetOpt.OptionPassed('RegExReplaceFrom', FRegExReplaceFrom) then begin
191+ if FGetOpt.OptionPassed('RegExReplaceTo', FRegExReplaceTo) then begin
192+ OptionsOK := True;
193+ end else
194+ raise Exception.Create('--RegExReplaceFrom also requires --RegExReplaceTo option');
195+ end else begin
196+ if FGetOpt.OptionPassed('RegExReplaceTo', FRegExReplaceTo) then
197+ raise Exception.Create('--RegExReplaceTo is not allowed without a --RegExReplaceFrom option');
198+ end;
199+
179200 FINsertAfterAllowDuplicates := False;
180201 FInsertAfterAll := False;
181202 if FGetOpt.OptionPassed('InsertAfter', FInsertAfter) then begin
@@ -195,11 +216,13 @@
195216 end;
196217
197218 if not OptionsOK then
198- raise Exception.Create('You must pass one of the options: --DeleteLine, --ChangeFrom or --InsertAfter');
219+ raise Exception.Create('You must pass one of the options: --DeleteLine, --ChangeFrom, --RegExReplaceFrom or --InsertAfter');
199220
200221 FDeleteLine := SimpleDequoteString(FDeleteLine);
201222 FChangeFrom := SimpleDequoteString(FChangeFrom);
202223 FChangeTo := SimpleDequoteString(FChangeTo);
224+ FRegExReplaceFrom := SimpleDequoteString(FRegExReplaceFrom);
225+ FRegExReplaceTo := SimpleDequoteString(FRegExReplaceTo);
203226 FInsertAfter := SimpleDequoteString(FInsertAfter);
204227 FInsert := SimpleDequoteString(FInsert);
205228
@@ -219,6 +242,8 @@
219242 FGetOpt.RegisterOption('DeleteLine', 'Delete a line matching the parameter. E.g. --DeleteLine="<DCC_DcpOutput>..\..\lib\16</DCC_DcpOutput>"', True);
220243 FGetOpt.RegisterOption('ChangeFrom', 'Change a line matching the parameter to the parameter of --ChangeTo. E.g. --ChangeFrom="bla" --ChangeTo="blub"', True);
221244 FGetOpt.RegisterOption('ChangeTo', 'Gives the new content for the ChangeFrom parameter. E.g. --ChangeFrom="bla" --ChangeTo="blub"', True);
245+ FGetOpt.RegisterOption('RegExReplaceFrom', 'Assuming a given regular expression, replace it if found inside a line with the parameter of --RegExReplaceTo.', True);
246+ FGetOpt.RegisterOption('RegExReplaceTo', 'Gives the new content for the RegExReplaceFrom parameter. E.g. --RegExReplaceFrom="Ver[0-9]" --RegExReplaceTo="Ver2"', True);
222247 FGetOpt.RegisterOption('InsertAfter', 'Insert a new line after the one matching the parameter. Requires an --Insert option.', True);
223248 FGetOpt.RegisterOption('Insert', 'Gives the line to insert for the --InsertAfter option.', True);
224249 FGetOpt.RegisterOption('InsertAfterAll', 'If given, InsertAfter applies to all matching lines, if not, only to the first. Requires an --Insert option.', False);
@@ -227,4 +252,3 @@
227252 end;
228253
229254 end.
230-
Show on old repository browser