• R/O
  • SSH
  • HTTPS

tsukurimashou: Commit


Commit MetaInfo

Revision204 (tree)
Time2012-01-21 12:34:17
Authormskala

Log Message

idsgrep utility (still incomplete)

Change Summary

Incremental Difference

--- trunk/idsgrep/m4/ax_func_getopt_long.m4 (nonexistent)
+++ trunk/idsgrep/m4/ax_func_getopt_long.m4 (revision 204)
@@ -0,0 +1,69 @@
1+# ===========================================================================
2+# http://www.gnu.org/software/autoconf-archive/ax_func_getopt_long.html
3+# ===========================================================================
4+#
5+# SYNOPSIS
6+#
7+# AX_FUNC_GETOPT_LONG
8+#
9+# DESCRIPTION
10+#
11+# Check for getopt_long support.
12+#
13+# This assume that the standard getopt.h file (from GNU libc) is available
14+# as lib/gnugetopt.h. If needed, this file will be linked as getopt.h, but
15+# we want to default to the system's getopt.h file. (See
16+# http://sources.redhat.com/ml/automake/2000-09/msg00041.html for an
17+# explanation about why using the system's getopt.h file is important.)
18+#
19+# MODIFIED
20+#
21+# Modified by Matthew Skala for the IDSgrep project, to use source files
22+# in the top directory instead of in lib/.
23+#
24+# LICENSE
25+#
26+# Copyright (c) 2008 Alexandre Duret-Lutz <adl@gnu.org>
27+#
28+# This program is free software; you can redistribute it and/or modify it
29+# under the terms of the GNU General Public License as published by the
30+# Free Software Foundation; either version 2 of the License, or (at your
31+# option) any later version.
32+#
33+# This program is distributed in the hope that it will be useful, but
34+# WITHOUT ANY WARRANTY; without even the implied warranty of
35+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
36+# Public License for more details.
37+#
38+# You should have received a copy of the GNU General Public License along
39+# with this program. If not, see <http://www.gnu.org/licenses/>.
40+#
41+# As a special exception, the respective Autoconf Macro's copyright owner
42+# gives unlimited permission to copy, distribute and modify the configure
43+# scripts that are the output of Autoconf when processing the Macro. You
44+# need not follow the terms of the GNU General Public License when using
45+# or distributing such scripts, even though portions of the text of the
46+# Macro appear in them. The GNU General Public License (GPL) does govern
47+# all other use of the material that constitutes the Autoconf Macro.
48+#
49+# This special exception to the GPL applies to versions of the Autoconf
50+# Macro released by the Autoconf Archive. When you make and distribute a
51+# modified version of the Autoconf Macro, you may extend this special
52+# exception to the GPL to apply to your modified version as well.
53+
54+#serial 5
55+
56+AU_ALIAS([ADL_FUNC_GETOPT_LONG], [AX_FUNC_GETOPT_LONG])
57+AC_DEFUN([AX_FUNC_GETOPT_LONG],
58+ [AC_PREREQ(2.49)dnl
59+ # clean out junk possibly left behind by a previous configuration
60+ rm -f getopt.h
61+ # Check for getopt_long support
62+ AC_CHECK_HEADERS([getopt.h])
63+ AC_CHECK_FUNCS([getopt_long],,
64+ [# FreeBSD has a gnugetopt library for this
65+ AC_CHECK_LIB([gnugetopt],[getopt_long],[AC_DEFINE([HAVE_GETOPT_LONG])],
66+ [# use the GNU replacement
67+ AC_LIBOBJ(getopt)
68+ AC_LIBOBJ(getopt1)
69+ AC_CONFIG_LINKS([getopt.h:gnugetopt.h])])])])
--- trunk/idsgrep/m4/ax_perl_module_version.m4 (nonexistent)
+++ trunk/idsgrep/m4/ax_perl_module_version.m4 (revision 204)
@@ -0,0 +1,85 @@
1+# ===========================================================================
2+# http://www.gnu.org/software/autoconf-archive/ax_perl_module_version.html
3+# ===========================================================================
4+#
5+# SYNOPSIS
6+#
7+# AX_PERL_MODULE_VERSION([MODULE VERSION], [ACTION-IF-TRUE], [ACTION-IF-FALSE])
8+#
9+# DESCRIPTION
10+#
11+# Checks to see if the list of 'Module Version' are avaiable in the
12+# system. If all the modules in the list are avaiable ACTION-IF-TRUE is
13+# executed. Case one module is not avaiable ACTION-IF-FALSE is executed
14+# and the macro execution is aborted. NOTE: Perl is needed.
15+#
16+# Example:
17+#
18+# AX_PERL_MODULE_VERSION(CGI::Test 0.104 CGI::Ajax 0.694, ,
19+# AC_MSG_ERROR(Need some Perl modules))
20+#
21+# LICENSE
22+#
23+# Copyright (c) 2009 Marco Gomes <mpglesi@gmail.com>
24+# Copyright (c) 2009 Ruben Fonseca <fonseka@gmail.com>
25+#
26+# Copying and distribution of this file, with or without modification, are
27+# permitted in any medium without royalty provided the copyright notice
28+# and this notice are preserved. This file is offered as-is, without any
29+# warranty.
30+
31+#serial 5
32+
33+AU_ALIAS([AC_PERL_MODULE_VERSION], [AX_PERL_MODULE_VERSION])
34+AC_DEFUN([AX_PERL_MODULE_VERSION],[dnl
35+ac_perl_list_modules="$1"
36+# Make sure we have perl
37+if test -z "$PERL"; then
38+AC_CHECK_PROG(PERL,perl,perl)
39+fi
40+
41+# Check the number of arguments
42+args_num=`echo $ac_perl_list_modules | wc -w`
43+let "ckeck_args = $args_num % 2"
44+if test "$check_args" = "1" ; then
45+ AC_MSG_ERROR(syntax error)
46+else
47+ eval
48+fi
49+
50+if test "x$PERL" != x; then
51+ ac_failed=0
52+ while test ${#ac_perl_list_modules} -gt 2 ; do
53+ module_name=`echo $ac_perl_list_modules | cut -d " " -f 1`
54+ module_version=`echo $ac_perl_list_modules | cut -d " " -f 2`
55+ ac_perl_list_modules=`echo $ac_perl_list_modules | cut -d " " -f 3-`
56+ AC_MSG_CHECKING(for perl module $module_name version $module_version)
57+
58+ $PERL "-M$module_name" -e exit > /dev/null 2>&1
59+ if test $? -ne 0; then
60+ AC_MSG_RESULT(no);
61+ ac_failed=1
62+ ac_perl_list_modules=""
63+ else
64+ version=`$PERL "-M$module_name" -e 'print $'"$module_name::VERSION" 2>&1`
65+ $PERL -e 'exit(shift cmp shift)' "$version" "$module_version"
66+ if test $? -eq 0 -o $? -eq 1 ; then
67+ AC_MSG_RESULT(ok);
68+ else
69+ AC_MSG_RESULT(no)
70+ ac_failed=1
71+ ac_perl_list_modules=""
72+ fi
73+ fi;
74+ done
75+
76+ if test "$ac_failed" = 0; then
77+ :
78+ $2
79+ else
80+ :
81+ $3
82+ fi
83+else
84+ AC_MSG_ERROR(could not find perl)
85+fi])dnl
--- trunk/idsgrep/idsgrep.1.in (nonexistent)
+++ trunk/idsgrep/idsgrep.1.in (revision 204)
@@ -0,0 +1,372 @@
1+.TH IDSGREP 1 "@release_date@" "@PACKAGE_STRING@" "User Commands"
2+.SH NAME
3+@PACKAGE@ \- match Extended Ideographic Description Sequences
4+.
5+.SH SYNOPSIS
6+.B @PACKAGE@
7+.RI [ OPTIONS ]
8+.I PATTERN
9+.RI [ FILE .\|.\|.]
10+.
11+.SH DESCRIPTION
12+The
13+.B @PACKAGE@
14+program parses the input files, or standard input if no filename is
15+specified, into Extended Ideographic Description Sequences
16+(EIDSes, described
17+in more detail below) separated by whitespace.
18+Any EIDS in the input that matches
19+.I PATTERN
20+is echoed through to standard output, along with its trailing whitespace
21+until the next EIDS in the input.
22+This is like
23+.BR grep (1)
24+except that it operates on EIDSes instead of text lines, and it follows the
25+rules of EIDS matching instead of regular expression matching.
26+.PP
27+The function of this program is deliberately general in nature, and it
28+could concievably be applied to many different purposes, but the
29+application motivating development is to descriptions of Han
30+characters (as used to write Chinese, Japanese, and some other languages)
31+in terms of their decomposition into smaller parts.
32+For instance, the character <U+840C> can be described as <U+8279> above
33+<U+660E>, which in turn can be described as <U+65E5> next to <U+6708>.
34+With
35+.B @PACKAGE@
36+it is possible to match a partial description against those in a database;
37+such an operation may be useful in looking up an unfamiliar character or in
38+certain font development contexts.
39+.PP
40+Please note that
41+.B @PACKAGE@
42+is somewhat unusual in being a command-line utility, for which a
43+.B man
44+page in English and ASCII would be the preferred form of documentation, whose
45+operation nonetheless revolves around certain non-ASCII characters, namely
46+the so-called Ideographic Description Characters in the range <U+2FF0> to
47+<U+2FFB>.
48+Those are not included in many fonts, and cannot be included in portable
49+.B man
50+pages.
51+Most users are likely to be native English speakers working with East
52+Asian languages,
53+and so Chinese or Japanese characters are also necessary to document
54+typical usage examples.
55+In this document, the notation <U+xxxx> denotes a Unicode character by
56+its hexadecimal code point; for a
57+more detailed description of
58+.B @PACKAGE@
59+that shows the characters in their proper forms, consult the PDF
60+documentation in
61+.IR @flat_docdir@/@PACKAGE@.pdf .
62+.
63+.SH OPTIONS
64+At present,
65+.B @PACKAGE@
66+does not support any options.
67+But in the future, it is planned to accept command-line options to do things
68+like invert the sense of matching, specify the details of output format,
69+and select standard dictionaries.
70+.
71+.SH EXTENDED IDEOGRAPHIC DESCRIPTION SEQUENCES
72+The Han character set is open-ended.
73+Although a few thousand characters suffice to write the most popular
74+Han-script languages most of the time,
75+popular standards define tens of thousands of less-popular characters,
76+and there are at least hundreds of thousands of rare characters known to
77+occur in names and historical contexts.
78+Computer text processing systems that use fixed lists of characters will
79+inevitably find themselves unable to represent some text.
80+As a result, there is a need to
81+.I describe
82+characters in a standard way that may have no standard code
83+points of their own.
84+A similar need for descriptions of characters arises when looking up
85+characters in a dictionary; a user may recognize some or all the visual
86+features of a character (such as its parts and the way they are laid out)
87+without knowing how to enter the character as a whole.
88+.PP
89+The Unicode standard offers a partial solution to some of these issues by
90+defining a series of \(lqIdeographic Descripion Characters\(rq (IDCs),
91+<U+2FF0> to <U+2FFB>, and a syntax for using them to construct
92+\(lqIdeographic Description Sequences\(rq (IDSes).
93+.PP
94+Here are the rules of Unicode IDSes:
95+.IP \(bu 4
96+A character from the one of the Unified Han or CJK Radical ranges
97+is a complete IDS and simply represents itself.
98+.IP \(bu 4
99+The IDC code points <U+2FF0>, <U+2FF1>, and <U+2FF4> through <U+2FFB>
100+are prefix binary operators.
101+One of these characters followed by two complete IDSes
102+forms another complete IDS, representing a character formed by joining the
103+two smaller characters in a way suggested by the name and graphical image
104+of the IDC.
105+.IP \(bu 4
106+The IDCs <U+2FF2> and <U+2FF3> are prefix ternary operators.
107+(Unicode uses the less-standard word \(lqtrinary.\(rq)
108+One of them can be followed by three complete IDSes to form an IDS that
109+describes a
110+character made of three parts, much in the same manner as the binary
111+operators.
112+.IP \(bu 4
113+An IDS may not be more than 16 code points long overall nor contain more
114+than six consecutive non-operator characters.
115+This rule appears to be intended to make things easier for systems that need
116+to be able to jump into the middle of text and quickly find the start and
117+end of IDSes.
118+.IP \(bu 4
119+IDSes non-bindingly \(lqshould\(rq be as short as possible.
120+.PP
121+To create a dictionary of character decompositions for
122+.BR @PACKAGE@ ,
123+we need to be able to describe characters in a little more detail than
124+provided by standard Unicode IDSes, and in particular, we need to be able to
125+specify a code point for a
126+character or part of one while also specifying that code point's
127+further decomposition.
128+There is also a need for specifying
129+.I partial
130+descriptions, similar in spirit to
131+.BR grep (1)'s
132+regular expressions.
133+Both these needs are served by the
134+.I Extended
135+Ideographic Description Sequence (EIDS) syntax of
136+.BR @PACKAGE@ .
137+Thorough discussion of the syntax, with visual examples, is reserved for the
138+PDF documentation, but the following rules are an outline.
139+.IP \(bu 4
140+An EIDS represents a tree in which each node has an optional
141+.IR head ,
142+a required
143+.IR functor ,
144+and between zero and three
145+.IR children ,
146+each of which is a similar tree, recursively.
147+Heads and functors are nonempty unlimited-length strings of Unicode code
148+points.
149+In the current implementation, no Unicode canonicalization is performed.
150+The number of children of a node is called its
151+.IR arity .
152+Nodes with arity zero through three are called
153+.IR nullary ,
154+.IR unary ,
155+.IR binary ,
156+and
157+.I ternary
158+respectively.
159+.IP \(bu 4
160+To write an EIDS, write down the head if any of the root,
161+the functor of the root, and then the EIDSes for its children.
162+.IP \(bu 4
163+Heads and functors, in their most explicit form, are written as bracketed
164+strings, where the choice of
165+opening bracket indicates whether the string is a head or
166+a functor, and if a functor, the arity of the node.
167+.IP \(bu 4
168+A head may be opened by < (ASCII angle bracket) in which case it is closed
169+by >; it may be opened by <U+3010> and closed by <U+3011> (black lenticular
170+brackets); or it may be opened by <U+3016> and closed by <U+3017> (white
171+lenticular brackets).
172+.IP \(bu 4
173+The opening and closing brackets for the functor of a nullary node may be (
174+with ) (ASCII parentheses); <U+FF08> with <U+FF09> (fullwidth parentheses);
175+or <U+FF5F> with
176+<U+FF60> (fullwidth \(lqwhite\(rq [usually double] parentheses).
177+.IP \(bu 4
178+For the functor of a unary node, the closing bracket is always the same as
179+the opening bracket.
180+Three characters may be used to open and close a unary node's
181+functor: . (ASCII period); <U+30FB> (katakana middle dot); and
182+<U+301C> (wave dash).
183+.IP \(bu 4
184+The opening and closing brackets for the functor of a binary node may be [
185+with ] (ASCII square brackets);
186+<U+FF3B> with <U+FF3D> (fullwidth square brackets); or <U+301A> with
187+<U+301B> (white square brackets).
188+.IP \(bu 4
189+The opening and closing brackets for the functor of a ternary node may be {
190+with } (ASCII curly braces);
191+<U+3014> with <U+3015> (tortoiseshell brackets); or <U+3018> with
192+<U+3019> (white tortoiseshell brackets).
193+.IP \(bu 4
194+The closing bracket of a bracketed string must be the one that
195+corresponds to the opening bracket.
196+For instance, a head opened by < must be closed by >.
197+Any occurrence of <U+3011> is
198+taken literally and does not close the string.
199+.IP \(bu 4
200+Nested brackets are not detected nor specially processed.
201+For instance, in a nullary node's functor opened by (,
202+the first ) ends the string regardless of how many other copies of ( may
203+have been included in the string.
204+However, in no case may a bracketed string be empty.
205+If what would otherwise be a matching closing bracket
206+appears immediately after the opening bracket, then it is taken literally as
207+the first character of the string and does
208+.I not
209+close the string.
210+One frequently-used case is that three ASCII periods in a row are valid
211+syntax for a unary node functor containing a single ASCII period.
212+The first opens the string, the second is the literal first character, and
213+the third closes the string.
214+.IP \(bu 4
215+It is likely that some backslash escapes will be implemented in the future.
216+.IP \(bu 4
217+ASCII control characters and whitespace characters, <U+0000> through
218+<U+0020> (notably including <U+0000>), are ignored outside bracketed
219+strings and taken
220+literally inside bracketed strings.
221+Non-ASCII Unicode whitespace characters, such as <U+3000>, may be
222+treated this way in the future but currently are not.
223+.IP \(bu 4
224+Some characters have
225+.IR "sugary implicit brackets" .
226+That means that if one of these characters occurs where an opening bracket
227+would otherwise be expected, it will be treated as a one-character bracketed
228+string with brackets that depend on what character it is.
229+For instance, ASCII semicolon will be interpreted as if it appeared in ASCII
230+parentheses, and will thus become the functor of a nullary node.
231+The complete list of characters that have sugary implicit brackets, with
232+the brackets they imply, is:
233+(;) (?) .!. .=. .*. .@. [&] [|]
234+[<U+2FF0>] [<U+2FF1>] [<U+2FF4>] [<U+2FF5>] [<U+2FF6>] [<U+2FF7>]
235+[<U+2FF8>] [<U+2FF9>] [<U+2FFA>] [<U+2FFB>]
236+{<U+2FF2>} {<U+2FF3>}.
237+.IP \(bu 4
238+All remaining characters (those without other dispositions specified above,
239+and notably including the Unified Han characters) have
240+.IR "syrupy implicit semicolons" .
241+That means when one occurs outside a bracketed string,
242+it not only becomes a single-character head
243+(sugary implicit angle brackets) but it also receives arity zero and a
244+functor consisting of a single semicolon.
245+For instance, the lone character x is equivalent to <x>(;) or (because
246+semicolon itself has implicit parentheses) <x>;.
247+.IP \(bu 4
248+After parsing, EIDSes are subjected to a \(lqcanonicalization\(rq
249+transformation in which certain functor and arity combinations (generally
250+relatively verbose ASCII alphabetic strings) are replaced by
251+single-character forms.
252+The idea is to provide human-readable pure-ASCII alternate forms for
253+the IDCs and matching operators.
254+In the future there may be options to skip this transformation
255+on input, or perform its reverse on output.
256+The list of replacements is: (anything) to (?); .anywhere. to ...; [and] to
257+[&]; [or] to [|]; .not. to .!.; .equal. to .=.; [lr] to [<U+2FF0>]; [tb] to
258+[<U+2FF1>]; {lcr} to {<U+2FF2>}; {tcb} to {<U+2FF3>}; [enclose] to
259+[<U+2FF4>]; [wrapu] to [<U+2FF5>]; [wrapd] to [<U+2FF6>]; [wrapl] to
260+[<U+2FF7>]; [wrapul] to [<U+2FF8>]; [wrapur] to [<U+2FF9>]; [wrapll] to
261+[<U+2FFA>]; and [overlap] to [<U+2FFB>].
262+.
263+.IP \(bu 4
264+Total length, and number of consecutive nullary nodes (which are like
265+the non-operator characters in Unicode IDSes), are unlimited.
266+In the case of a dictionary entry it may be desirable to make the EIDS as
267+.I long
268+as possible (contrary to Unicode's recommendation)
269+in order to offer a detailed decomposition and many query strategies to
270+the user.
271+.IP \(bu 4
272+It is a consequence of these rules that all syntactically valid Unicode
273+IDSes are syntactically valid EIDSes, but the reverse is not true.
274+.
275+.SH EIDS MATCHING
276+.
277+.SH FILES
278+Individual sites may well have a different set of dictionaries installed,
279+but these are some popular ones.
280+The paths shown are those that were specified during configuration of
281+this package and built into the
282+.BR idsgrep (1)
283+binary; site admins who think they are too cool for
284+.B configure
285+may possibly have installed the files elsewhere.
286+.PP
287+.I @flat_dictdir@/tsukurimashou.eids
288+.RS
289+Japanese kanji decompositions from the Tsukurimashou font project.
290+These are relatively clean in terms of accurately reflecting the visual
291+construction of each character, but they only cover the glyphs included in
292+the fonts, and they are based on the visual appearance of the glyphs
293+(and, specifically, their appearance
294+.IR "in the Tsukurimashou fonts" )
295+rather than traditional etymology.
296+.RE
297+.I @flat_dictdir@/kanjivg.eids
298+.RS
299+Japanese kanji decompositions from the KanjiVG database.
300+Basically complete coverage of about 6700 characters.
301+However, KanjiVG decomposes glyphs according to stroke order and traditional
302+etymology, even to
303+the point of listing a single radical more than once in the decomposition if
304+its strokes are written non-consecutively,
305+and the format translator that generates this file only sort
306+of works.
307+As a result, decompositions in this database may be incomplete,
308+idiosyncratic, or even flat-out wrong.
309+.
310+.SH ENVIRONMENT
311+.IP FOOCONF
312+If non-null the full pathname for an alternate system wide
313+.IR foo.conf .
314+Overridden by the
315+.B -c
316+option.
317+.
318+.SH DIAGNOSTICS
319+The following diagnostics may be issued on stderr:
320+
321+Bad magic number.
322+.RS
323+The input file does not look like an archive file.
324+.RE
325+Old style baz segments.
326+.RS
327+.B foo
328+can only handle new style baz segments. COBOL
329+object libraries are not supported in this version.
330+.
331+.SH BUGS
332+Non-ASCII characters are essential to documenting this software, and
333+.B man
334+can't handle them portably, so this document is less useful than it should
335+be.
336+.PP
337+Many of the matching features described in this document are not actually
338+implemented.
339+.
340+.SH AUTHOR
341+Matthew Skala <mskala@ansuz.sooke.bc.ca>
342+.
343+.SH COPYRIGHT
344+Copyright \(co
345+2012
346+Matthew Skala
347+.PP
348+This program is free software: you can redistribute it and/or modify
349+it under the terms of the GNU General Public License as published by
350+the Free Software Foundation, version 3.
351+.PP
352+This program is distributed in the hope that it will be useful,
353+but WITHOUT ANY WARRANTY; without even the implied warranty of
354+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
355+See the GNU General Public License for more details.
356+.PP
357+You should have received a copy of the GNU General Public License
358+along with this program.
359+If not, see <http://www.gnu.org/licenses/>.
360+.PP
361+Please note that dictionaries prepared for use with IDSgrep may be subject to
362+their own copyright terms differing from those of IDSgrep itself.
363+In particular, the IDSgrep distribution contains code to build a dictionary
364+based on Ulrich Apel's KanjiVG project.
365+That dictionary would be subject to his copyright and the Creative Commons
366+Attribution-Share Alike 3.0 Licence.
367+The Tsukurimashou project also builds an EIDS-format dictionary for
368+use with IDSgrep, but happens to use the same copyright and GPL 3 licensing
369+terms as IDSgrep anyway.
370+.
371+.SH "SEE ALSO"
372+.BR grep (1)
--- trunk/idsgrep/gnugetopt.h (nonexistent)
+++ trunk/idsgrep/gnugetopt.h (revision 204)
@@ -0,0 +1,180 @@
1+/* Declarations for getopt.
2+ Copyright (C) 1989-1994, 1996-1999, 2001 Free Software Foundation, Inc.
3+ This file is part of the GNU C Library.
4+
5+ The GNU C Library is free software; you can redistribute it and/or
6+ modify it under the terms of the GNU Lesser General Public
7+ License as published by the Free Software Foundation; either
8+ version 2.1 of the License, or (at your option) any later version.
9+
10+ The GNU C Library is distributed in the hope that it will be useful,
11+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13+ Lesser General Public License for more details.
14+
15+ You should have received a copy of the GNU Lesser General Public
16+ License along with the GNU C Library; if not, write to the Free
17+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18+ 02111-1307 USA. */
19+
20+#ifndef _GETOPT_H
21+
22+#ifndef __need_getopt
23+# define _GETOPT_H 1
24+#endif
25+
26+/* If __GNU_LIBRARY__ is not already defined, either we are being used
27+ standalone, or this is the first header included in the source file.
28+ If we are being used with glibc, we need to include <features.h>, but
29+ that does not exist if we are standalone. So: if __GNU_LIBRARY__ is
30+ not defined, include <ctype.h>, which will pull in <features.h> for us
31+ if it's from glibc. (Why ctype.h? It's guaranteed to exist and it
32+ doesn't flood the namespace with stuff the way some other headers do.) */
33+#if !defined __GNU_LIBRARY__
34+# include <ctype.h>
35+#endif
36+
37+#ifdef __cplusplus
38+extern "C" {
39+#endif
40+
41+/* For communication from `getopt' to the caller.
42+ When `getopt' finds an option that takes an argument,
43+ the argument value is returned here.
44+ Also, when `ordering' is RETURN_IN_ORDER,
45+ each non-option ARGV-element is returned here. */
46+
47+extern char *optarg;
48+
49+/* Index in ARGV of the next element to be scanned.
50+ This is used for communication to and from the caller
51+ and for communication between successive calls to `getopt'.
52+
53+ On entry to `getopt', zero means this is the first call; initialize.
54+
55+ When `getopt' returns -1, this is the index of the first of the
56+ non-option elements that the caller should itself scan.
57+
58+ Otherwise, `optind' communicates from one call to the next
59+ how much of ARGV has been scanned so far. */
60+
61+extern int optind;
62+
63+/* Callers store zero here to inhibit the error message `getopt' prints
64+ for unrecognized options. */
65+
66+extern int opterr;
67+
68+/* Set to an option character which was unrecognized. */
69+
70+extern int optopt;
71+
72+#ifndef __need_getopt
73+/* Describe the long-named options requested by the application.
74+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
75+ of `struct option' terminated by an element containing a name which is
76+ zero.
77+
78+ The field `has_arg' is:
79+ no_argument (or 0) if the option does not take an argument,
80+ required_argument (or 1) if the option requires an argument,
81+ optional_argument (or 2) if the option takes an optional argument.
82+
83+ If the field `flag' is not NULL, it points to a variable that is set
84+ to the value given in the field `val' when the option is found, but
85+ left unchanged if the option is not found.
86+
87+ To have a long-named option do something other than set an `int' to
88+ a compiled-in constant, such as set a value from `optarg', set the
89+ option's `flag' field to zero and its `val' field to a nonzero
90+ value (the equivalent single-letter option character, if there is
91+ one). For long options that have a zero `flag' field, `getopt'
92+ returns the contents of the `val' field. */
93+
94+struct option
95+{
96+# if (defined __STDC__ && __STDC__) || defined __cplusplus
97+ const char *name;
98+# else
99+ char *name;
100+# endif
101+ /* has_arg can't be an enum because some compilers complain about
102+ type mismatches in all the code that assumes it is an int. */
103+ int has_arg;
104+ int *flag;
105+ int val;
106+};
107+
108+/* Names for the values of the `has_arg' field of `struct option'. */
109+
110+# define no_argument 0
111+# define required_argument 1
112+# define optional_argument 2
113+#endif /* need getopt */
114+
115+
116+/* Get definitions and prototypes for functions to process the
117+ arguments in ARGV (ARGC of them, minus the program name) for
118+ options given in OPTS.
119+
120+ Return the option character from OPTS just read. Return -1 when
121+ there are no more options. For unrecognized options, or options
122+ missing arguments, `optopt' is set to the option letter, and '?' is
123+ returned.
124+
125+ The OPTS string is a list of characters which are recognized option
126+ letters, optionally followed by colons, specifying that that letter
127+ takes an argument, to be placed in `optarg'.
128+
129+ If a letter in OPTS is followed by two colons, its argument is
130+ optional. This behavior is specific to the GNU `getopt'.
131+
132+ The argument `--' causes premature termination of argument
133+ scanning, explicitly telling `getopt' that there are no more
134+ options.
135+
136+ If OPTS begins with `--', then non-option arguments are treated as
137+ arguments to the option '\0'. This behavior is specific to the GNU
138+ `getopt'. */
139+
140+#if (defined __STDC__ && __STDC__) || defined __cplusplus
141+# ifdef __GNU_LIBRARY__
142+/* Many other libraries have conflicting prototypes for getopt, with
143+ differences in the consts, in stdlib.h. To avoid compilation
144+ errors, only prototype getopt for the GNU C library. */
145+extern int getopt (int __argc, char *const *__argv, const char *__shortopts);
146+# else /* not __GNU_LIBRARY__ */
147+extern int getopt ();
148+# endif /* __GNU_LIBRARY__ */
149+
150+# ifndef __need_getopt
151+extern int getopt_long (int __argc, char *const *__argv, const char *__shortopts,
152+ const struct option *__longopts, int *__longind);
153+extern int getopt_long_only (int __argc, char *const *__argv,
154+ const char *__shortopts,
155+ const struct option *__longopts, int *__longind);
156+
157+/* Internal only. Users should not call this directly. */
158+extern int _getopt_internal (int __argc, char *const *__argv,
159+ const char *__shortopts,
160+ const struct option *__longopts, int *__longind,
161+ int __long_only);
162+# endif
163+#else /* not __STDC__ */
164+extern int getopt ();
165+# ifndef __need_getopt
166+extern int getopt_long ();
167+extern int getopt_long_only ();
168+
169+extern int _getopt_internal ();
170+# endif
171+#endif /* __STDC__ */
172+
173+#ifdef __cplusplus
174+}
175+#endif
176+
177+/* Make sure we later can get all the definitions and declarations. */
178+#undef __need_getopt
179+
180+#endif /* getopt.h */
--- trunk/idsgrep/configure.ac (nonexistent)
+++ trunk/idsgrep/configure.ac (revision 204)
@@ -0,0 +1,292 @@
1+# -*- Autoconf -*-
2+# Process this file with autoconf to produce a configure script.
3+#
4+
5+#
6+# Configuration source for IDSgrep
7+# Copyright (C) 2012 Matthew Skala
8+#
9+# This program is free software: you can redistribute it and/or modify
10+# it under the terms of the GNU General Public License as published by
11+# the Free Software Foundation, version 3.
12+#
13+# This program is distributed in the hope that it will be useful,
14+# but WITHOUT ANY WARRANTY; without even the implied warranty of
15+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+# GNU General Public License for more details.
17+#
18+# You should have received a copy of the GNU General Public License
19+# along with this program. If not, see <http://www.gnu.org/licenses/>.
20+#
21+# Matthew Skala
22+# http://ansuz.sooke.bc.ca/
23+# mskala@ansuz.sooke.bc.ca
24+#
25+
26+#
27+############################################################################
28+#
29+# Override Autoconf's argument parsing - must be done here,
30+# before AC_INIT has a chance to run
31+#
32+
33+# Much of this code is copied from the Tsukurimashou project, but since it
34+# has the same copyright holder, there's no licensing problem.
35+
36+# General macro for editing a definition
37+m4_define([TSUKU_DEFINITION_SUBST],
38+ [m4_define([$1],
39+ m4_bpatsubst(m4_dquote(m4_defn([$1])),
40+ [$2],
41+ [$3]))])
42+
43+# Delete an option from the argument parser
44+m4_define([TSUKU_DELETE_CMDLINE_OPTION],
45+ [TSUKU_DEFINITION_SUBST([_AC_INIT_PARSE_ARGS],
46+ [\
47+ -$1[^)]*)\
48+.*;;\
49+ -$1=\*[^)]*)\
50+.*;;\
51+],
52+ [\
53+ # Deleted $1 option\
54+])
55+ TSUKU_DEFINITION_SUBST([_AC_INIT_PARSE_ARGS],
56+ [\
57+ -$1[^)]*)\
58+ # Obsolete.*\
59+.*;;\
60+],
61+ [\
62+ # Deleted obsolete $1 option\
63+])
64+ ])
65+
66+# Delete unwanted non-install-dir options
67+TSUKU_DELETE_CMDLINE_OPTION([-x])
68+TSUKU_DELETE_CMDLINE_OPTION([x-includes])
69+TSUKU_DELETE_CMDLINE_OPTION([x-libraries])
70+
71+# Delete an install dir option from both parser and help
72+m4_define([TSUKU_DELETE_INSTDIR_OPTION],
73+ [TSUKU_DELETE_CMDLINE_OPTION([$1dir])
74+ TSUKU_DEFINITION_SUBST([_AC_INIT_PARSE_ARGS],
75+ [for ac_var in\([^#]*\).\b$1dir\b],
76+ [for ac_var in\1])
77+ TSUKU_DEFINITION_SUBST([_AC_INIT_HELP],
78+ [ --$1dir=DIR.*\
79+],
80+ [])])
81+
82+# Delete many of the default install dirs
83+TSUKU_DELETE_INSTDIR_OPTION([data])
84+TSUKU_DELETE_INSTDIR_OPTION([dvi])
85+TSUKU_DELETE_INSTDIR_OPTION([html])
86+TSUKU_DELETE_INSTDIR_OPTION([include])
87+TSUKU_DELETE_INSTDIR_OPTION([info])
88+TSUKU_DELETE_INSTDIR_OPTION([lib])
89+TSUKU_DELETE_INSTDIR_OPTION([libexec])
90+TSUKU_DELETE_INSTDIR_OPTION([info])
91+TSUKU_DELETE_INSTDIR_OPTION([locale])
92+TSUKU_DELETE_INSTDIR_OPTION([localstate])
93+TSUKU_DELETE_INSTDIR_OPTION([oldinclude])
94+TSUKU_DELETE_INSTDIR_OPTION([ps])
95+TSUKU_DELETE_INSTDIR_OPTION([sbin])
96+TSUKU_DELETE_INSTDIR_OPTION([sharedstate])
97+TSUKU_DELETE_INSTDIR_OPTION([sysconf])
98+
99+# generate a "case" pattern matching chunk
100+# TSUKU_CASE_PATTERN_CHUNK(option,shortest,before,after)
101+m4_define([TSUKU_CASE_PATTERN_CHUNK],
102+ [$3$1$4[]m4_if([$1],[$2],[],
103+ [TSUKU_CASE_PATTERN_CHUNK(m4_substr($1,0,m4_eval(m4_len($1)-1)),
104+ [$2],[$3],[$4])])])
105+
106+# Add a new installation directory
107+# TSUKU_ADD_INSTDIR_OPTION(dirname,under,default,help,abbrev,casebefore)
108+m4_define([TSUKU_ADD_INSTDIR_OPTION],
109+ [TSUKU_DEFINITION_SUBST([_AC_INIT_PARSE_ARGS],
110+ [AC_SUBST(\[$2\].*)dnl\
111+],
112+ [\&AC_SUBST([$1dir],['${$2}/$3'])dnl\
113+])
114+ TSUKU_DEFINITION_SUBST([_AC_INIT_HELP],
115+ [\
116+\(.*--$2=.*\)\
117+],
118+ m4_expand([\
119+\1\
120+AS_HELP_STRING([--$1dir=DIR],m4_expand([$4 @<:@m4_toupper([$2])/$3@:>@]))\
121+]))
122+ TSUKU_DEFINITION_SUBST([_AC_INIT_PARSE_ARGS],
123+ [\
124+\(.*$6.*\)\
125+],
126+ [\
127+ -$1dir[]TSUKU_CASE_PATTERN_CHUNK([$1dir],[$5],[ | --],[])@:}@
128+ ac_prev=$1dir ;;
129+ -$1dir=*TSUKU_CASE_PATTERN_CHUNK([$1dir],[$5],[ | --],[=*])@:}@
130+ $1dir=$ac_optarg ;;\
131+\
132+\1\
133+])
134+ TSUKU_DEFINITION_SUBST([_AC_INIT_PARSE_ARGS],
135+ [for ac_var in\([^#]*.\b$2\)\b],
136+ [for ac_var in\1 $1dir])])
137+
138+# Add a new directory
139+TSUKU_ADD_INSTDIR_OPTION([dict],[datarootdir],[dict],[dictionaries],
140+ [di],[ --disable])
141+
142+# Reduce the ridiculous verbosity of Autoconf's help message
143+TSUKU_DEFINITION_SUBST([_AC_INIT_HELP],
144+ [ to adapt to many kinds of systems],[])
145+TSUKU_DEFINITION_SUBST([_AC_INIT_HELP],
146+ [of some of the useful variables],
147+ [of some useful variables])
148+TSUKU_DEFINITION_SUBST([_AC_INIT_HELP],
149+ [Defaults for the options are specified in brackets],
150+ [Defaults are specified in brackets])
151+TSUKU_DEFINITION_SUBST([_AC_INIT_HELP],
152+ [\barch.*-independent \b],
153+ [])
154+
155+#
156+############################################################################
157+#
158+# Autoconf initialization
159+#
160+AC_PREREQ([2.63])
161+AC_INIT([IDSgrep],
162+ [0.1], [mskala@ansuz.sooke.bc.ca], [idsgrep],
163+ [[http://ansuz.sooke.bc.ca/]])
164+AC_PRESERVE_HELP_ORDER
165+AM_INIT_AUTOMAKE([foreign])
166+AC_CONFIG_SRCDIR([idsgrep.c])
167+AC_CONFIG_HEADERS([config.h])
168+AC_CONFIG_MACRO_DIR([m4])
169+AC_REVISION([$Id: configure.ac 1015 2011-12-15 22:24:32Z mskala $])
170+AC_COPYRIGHT([Copyright (C) 2011 Matthew Skala])
171+AC_SUBST([release_date],["January 2, 2012"])
172+#
173+############################################################################
174+#
175+# Checks for programs.
176+#
177+AC_PROG_CC
178+AC_PROG_CC_C_O
179+AC_PROG_GREP
180+AC_PROG_INSTALL
181+AC_PROG_LN_S
182+AC_PROG_MAKE_SET
183+AC_PROG_MKDIR_P
184+AC_PATH_PROG([AUTOCONF],[autoconf])
185+AC_PATH_PROG([AUTOMAKE],[automake])
186+AC_PATH_PROG([GZIP],[gzip])
187+AC_ARG_VAR([PERL],[Perl])
188+AC_PATH_PROG([PERL],[perl])
189+AC_ARG_VAR([XELATEX],[XeLaTeX])
190+AC_PATH_PROG([XELATEX],[xelatex])
191+#
192+############################################################################
193+#
194+# Checks for libraries.
195+#
196+AX_PERL_MODULE_VERSION([XML::Parser 2.36],[],
197+ [AC_MSG_ERROR([Required Perl modules are missing])])
198+#
199+############################################################################
200+#
201+# Checks for header files.
202+#
203+AC_CHECK_HEADERS([libintl.h stdlib.h string.h wchar.h])
204+#
205+############################################################################
206+#
207+# Checks for typedefs, structures, and compiler characteristics.
208+#
209+AC_CHECK_SIZEOF([int])
210+AC_TYPE_SIZE_T
211+#
212+############################################################################
213+#
214+# Checks for library functions.
215+#
216+AX_FUNC_GETOPT_LONG
217+AC_CHECK_FUNCS([memmove memset strchr])
218+#
219+############################################################################
220+#
221+# Checks for system services
222+#
223+AC_SYS_LONG_FILE_NAMES
224+#
225+############################################################################
226+#
227+# User-settable options
228+#
229+AS_IF([test "x$prefix" = "xNONE"],[prefix="$ac_default_prefix"])
230+AC_ARG_WITH([kanjivg],
231+ [AS_HELP_STRING([--with-kanjivg=PATH],
232+ [KanjiVG database [auto]])],
233+ [],
234+ [with_kanjivg=auto])
235+AS_IF([test "x$with_kanjivg" = "xauto"],[
236+AC_MSG_CHECKING([for KanjiVG])
237+m4_foreach_w([kvcheckpath],m4_expand([$srcdir dnl
238+$prefix/dict $prefix/share dnl
239+/dict /share /usr/dict /usr/share/dict /usr/local/dict dnl
240+/usr/local/share/dict $HOME $HOME/dict]),[
241+ for testfn in kvcheckpath/kanjivg*.xml.gz kvcheckpath/kanjivg*.xml
242+ do
243+ AS_IF([test "$with_kanjivg" = "auto"],
244+ [AS_IF([test -r "$testfn"],
245+ [with_kanjivg="$testfn"])])
246+ done
247+ ])
248+ AS_IF([test "$with_kanjivg" = "auto"],[with_kanjivg=no])
249+ AC_MSG_RESULT([$with_kanjivg])
250+ ])
251+AM_CONDITIONAL([COND_KANJIVG], [test '!' "$with_kanjivg" = no])
252+AC_SUBST([with_kanjivg])
253+#
254+AC_ARG_WITH([tsuku-build],
255+ [AS_HELP_STRING([--with-tsuku-build=PATH],
256+ [Tsukurimashou build dir [auto]])],
257+ [],
258+ [with_tsuku_build=auto])
259+AS_IF([test "x$with_tsuku_build" = "xauto"],[
260+AC_MSG_CHECKING([for Tsukurimashou build dir])
261+m4_foreach_w([tbcheckname],m4_expand([tsukurimashou dnl
262+tsukurimashou-0.6 tsukurimashou-0.7 tsukurimashou-0.8 dnl
263+tsukurimashou-0.9 tsukurimashou-0.10 tsukurimashou-0.11]),[
264+m4_foreach_w([tbcheckpath],m4_expand([$srcdir $srcdir/.. $prefix/src dnl
265+/src /usr/src /usr/local/src dnl
266+$HOME $HOME/src]),[
267+ AS_IF([test "$with_tsuku_build" = "auto"],
268+ [AS_IF([test -r "tbcheckpath/tbcheckname/Makefile"],[
269+check_line=x`grep 'Makefile for Tsu' tbcheckpath/tbcheckname/Makefile`
270+ AS_IF([test "$check_line" = "x# Makefile for Tsukurimashou"],
271+ [with_tsuku_build=tbcheckpath/tbcheckname])])])])])
272+AS_IF([test "$with_tsuku_build" = "auto"],[with_tsuku_build=no])
273+AC_MSG_RESULT([$with_tsuku_build])
274+])
275+AM_CONDITIONAL([COND_TSUKU_BUILD], [test '!' "$with_tsuku_build" = no])
276+AC_SUBST([with_tsuku_build])
277+#
278+############################################################################
279+#
280+# Generate output
281+#
282+AC_SUBST([flat_dictdir])
283+AC_SUBST([flat_docdir])
284+eval flat_dictdir=${dictdir}
285+eval flat_dictdir=${flat_dictdir}
286+eval flat_dictdir=${flat_dictdir}
287+eval flat_docdir=${docdir}
288+eval flat_docdir=${flat_docdir}
289+eval flat_docdir=${flat_docdir}
290+#
291+AC_CONFIG_FILES([Makefile idsgrep.1])
292+AC_OUTPUT
--- trunk/idsgrep/getopt.c (nonexistent)
+++ trunk/idsgrep/getopt.c (revision 204)
@@ -0,0 +1,1055 @@
1+/* Getopt for GNU.
2+ NOTE: getopt is now part of the C library, so if you don't know what
3+ "Keep this file name-space clean" means, talk to drepper@gnu.org
4+ before changing it!
5+ Copyright (C) 1987,88,89,90,91,92,93,94,95,96,98,99,2000,2001
6+ Free Software Foundation, Inc.
7+ This file is part of the GNU C Library.
8+
9+ The GNU C Library is free software; you can redistribute it and/or
10+ modify it under the terms of the GNU Lesser General Public
11+ License as published by the Free Software Foundation; either
12+ version 2.1 of the License, or (at your option) any later version.
13+
14+ The GNU C Library is distributed in the hope that it will be useful,
15+ but WITHOUT ANY WARRANTY; without even the implied warranty of
16+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17+ Lesser General Public License for more details.
18+
19+ You should have received a copy of the GNU Lesser General Public
20+ License along with the GNU C Library; if not, write to the Free
21+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
22+ 02111-1307 USA. */
23+
24+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
25+ Ditto for AIX 3.2 and <stdlib.h>. */
26+#ifndef _NO_PROTO
27+# define _NO_PROTO
28+#endif
29+
30+#ifdef HAVE_CONFIG_H
31+# include <config.h>
32+#endif
33+
34+#if !defined __STDC__ || !__STDC__
35+/* This is a separate conditional since some stdc systems
36+ reject `defined (const)'. */
37+# ifndef const
38+# define const
39+# endif
40+#endif
41+
42+#include <stdio.h>
43+
44+/* Comment out all this code if we are using the GNU C Library, and are not
45+ actually compiling the library itself. This code is part of the GNU C
46+ Library, but also included in many other GNU distributions. Compiling
47+ and linking in this code is a waste when using the GNU C library
48+ (especially if it is a shared library). Rather than having every GNU
49+ program understand `configure --with-gnu-libc' and omit the object files,
50+ it is simpler to just do this in the source for each such file. */
51+
52+#define GETOPT_INTERFACE_VERSION 2
53+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
54+# include <gnu-versions.h>
55+# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
56+# define ELIDE_CODE
57+# endif
58+#endif
59+
60+#ifndef ELIDE_CODE
61+
62+
63+/* This needs to come after some library #include
64+ to get __GNU_LIBRARY__ defined. */
65+#ifdef __GNU_LIBRARY__
66+/* Don't include stdlib.h for non-GNU C libraries because some of them
67+ contain conflicting prototypes for getopt. */
68+# include <stdlib.h>
69+# include <unistd.h>
70+#endif /* GNU C library. */
71+
72+#ifdef VMS
73+# include <unixlib.h>
74+# if HAVE_STRING_H - 0
75+# include <string.h>
76+# endif
77+#endif
78+
79+#ifndef _
80+/* This is for other GNU distributions with internationalized messages. */
81+# if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
82+# include <libintl.h>
83+# ifndef _
84+# define _(msgid) gettext (msgid)
85+# endif
86+# else
87+# define _(msgid) (msgid)
88+# endif
89+#endif
90+
91+/* This version of `getopt' appears to the caller like standard Unix `getopt'
92+ but it behaves differently for the user, since it allows the user
93+ to intersperse the options with the other arguments.
94+
95+ As `getopt' works, it permutes the elements of ARGV so that,
96+ when it is done, all the options precede everything else. Thus
97+ all application programs are extended to handle flexible argument order.
98+
99+ Setting the environment variable POSIXLY_CORRECT disables permutation.
100+ Then the behavior is completely standard.
101+
102+ GNU application programs can use a third alternative mode in which
103+ they can distinguish the relative order of options and other arguments. */
104+
105+#include "getopt.h"
106+
107+/* For communication from `getopt' to the caller.
108+ When `getopt' finds an option that takes an argument,
109+ the argument value is returned here.
110+ Also, when `ordering' is RETURN_IN_ORDER,
111+ each non-option ARGV-element is returned here. */
112+
113+char *optarg;
114+
115+/* Index in ARGV of the next element to be scanned.
116+ This is used for communication to and from the caller
117+ and for communication between successive calls to `getopt'.
118+
119+ On entry to `getopt', zero means this is the first call; initialize.
120+
121+ When `getopt' returns -1, this is the index of the first of the
122+ non-option elements that the caller should itself scan.
123+
124+ Otherwise, `optind' communicates from one call to the next
125+ how much of ARGV has been scanned so far. */
126+
127+/* 1003.2 says this must be 1 before any call. */
128+int optind = 1;
129+
130+/* Formerly, initialization of getopt depended on optind==0, which
131+ causes problems with re-calling getopt as programs generally don't
132+ know that. */
133+
134+int __getopt_initialized;
135+
136+/* The next char to be scanned in the option-element
137+ in which the last option character we returned was found.
138+ This allows us to pick up the scan where we left off.
139+
140+ If this is zero, or a null string, it means resume the scan
141+ by advancing to the next ARGV-element. */
142+
143+static char *nextchar;
144+
145+/* Callers store zero here to inhibit the error message
146+ for unrecognized options. */
147+
148+int opterr = 1;
149+
150+/* Set to an option character which was unrecognized.
151+ This must be initialized on some systems to avoid linking in the
152+ system's own getopt implementation. */
153+
154+int optopt = '?';
155+
156+/* Describe how to deal with options that follow non-option ARGV-elements.
157+
158+ If the caller did not specify anything,
159+ the default is REQUIRE_ORDER if the environment variable
160+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
161+
162+ REQUIRE_ORDER means don't recognize them as options;
163+ stop option processing when the first non-option is seen.
164+ This is what Unix does.
165+ This mode of operation is selected by either setting the environment
166+ variable POSIXLY_CORRECT, or using `+' as the first character
167+ of the list of option characters.
168+
169+ PERMUTE is the default. We permute the contents of ARGV as we scan,
170+ so that eventually all the non-options are at the end. This allows options
171+ to be given in any order, even with programs that were not written to
172+ expect this.
173+
174+ RETURN_IN_ORDER is an option available to programs that were written
175+ to expect options and other ARGV-elements in any order and that care about
176+ the ordering of the two. We describe each non-option ARGV-element
177+ as if it were the argument of an option with character code 1.
178+ Using `-' as the first character of the list of option characters
179+ selects this mode of operation.
180+
181+ The special argument `--' forces an end of option-scanning regardless
182+ of the value of `ordering'. In the case of RETURN_IN_ORDER, only
183+ `--' can cause `getopt' to return -1 with `optind' != ARGC. */
184+
185+static enum
186+{
187+ REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
188+} ordering;
189+
190+/* Value of POSIXLY_CORRECT environment variable. */
191+static char *posixly_correct;
192+
193+#ifdef __GNU_LIBRARY__
194+/* We want to avoid inclusion of string.h with non-GNU libraries
195+ because there are many ways it can cause trouble.
196+ On some systems, it contains special magic macros that don't work
197+ in GCC. */
198+# include <string.h>
199+# define my_index strchr
200+#else
201+
202+# if HAVE_STRING_H
203+# include <string.h>
204+# else
205+# include <strings.h>
206+# endif
207+
208+/* Avoid depending on library functions or files
209+ whose names are inconsistent. */
210+
211+#ifndef getenv
212+extern char *getenv ();
213+#endif
214+
215+static char *
216+my_index (str, chr)
217+ const char *str;
218+ int chr;
219+{
220+ while (*str)
221+ {
222+ if (*str == chr)
223+ return (char *) str;
224+ str++;
225+ }
226+ return 0;
227+}
228+
229+/* If using GCC, we can safely declare strlen this way.
230+ If not using GCC, it is ok not to declare it. */
231+#ifdef __GNUC__
232+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
233+ That was relevant to code that was here before. */
234+# if (!defined __STDC__ || !__STDC__) && !defined strlen
235+/* gcc with -traditional declares the built-in strlen to return int,
236+ and has done so at least since version 2.4.5. -- rms. */
237+extern int strlen (const char *);
238+# endif /* not __STDC__ */
239+#endif /* __GNUC__ */
240+
241+#endif /* not __GNU_LIBRARY__ */
242+
243+/* Handle permutation of arguments. */
244+
245+/* Describe the part of ARGV that contains non-options that have
246+ been skipped. `first_nonopt' is the index in ARGV of the first of them;
247+ `last_nonopt' is the index after the last of them. */
248+
249+static int first_nonopt;
250+static int last_nonopt;
251+
252+#ifdef _LIBC
253+/* Stored original parameters.
254+ XXX This is no good solution. We should rather copy the args so
255+ that we can compare them later. But we must not use malloc(3). */
256+extern int __libc_argc;
257+extern char **__libc_argv;
258+
259+/* Bash 2.0 gives us an environment variable containing flags
260+ indicating ARGV elements that should not be considered arguments. */
261+
262+# ifdef USE_NONOPTION_FLAGS
263+/* Defined in getopt_init.c */
264+extern char *__getopt_nonoption_flags;
265+
266+static int nonoption_flags_max_len;
267+static int nonoption_flags_len;
268+# endif
269+
270+# ifdef USE_NONOPTION_FLAGS
271+# define SWAP_FLAGS(ch1, ch2) \
272+ if (nonoption_flags_len > 0) \
273+ { \
274+ char __tmp = __getopt_nonoption_flags[ch1]; \
275+ __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \
276+ __getopt_nonoption_flags[ch2] = __tmp; \
277+ }
278+# else
279+# define SWAP_FLAGS(ch1, ch2)
280+# endif
281+#else /* !_LIBC */
282+# define SWAP_FLAGS(ch1, ch2)
283+#endif /* _LIBC */
284+
285+/* Exchange two adjacent subsequences of ARGV.
286+ One subsequence is elements [first_nonopt,last_nonopt)
287+ which contains all the non-options that have been skipped so far.
288+ The other is elements [last_nonopt,optind), which contains all
289+ the options processed since those non-options were skipped.
290+
291+ `first_nonopt' and `last_nonopt' are relocated so that they describe
292+ the new indices of the non-options in ARGV after they are moved. */
293+
294+#if defined __STDC__ && __STDC__
295+static void exchange (char **);
296+#endif
297+
298+static void
299+exchange (argv)
300+ char **argv;
301+{
302+ int bottom = first_nonopt;
303+ int middle = last_nonopt;
304+ int top = optind;
305+ char *tem;
306+
307+ /* Exchange the shorter segment with the far end of the longer segment.
308+ That puts the shorter segment into the right place.
309+ It leaves the longer segment in the right place overall,
310+ but it consists of two parts that need to be swapped next. */
311+
312+#if defined _LIBC && defined USE_NONOPTION_FLAGS
313+ /* First make sure the handling of the `__getopt_nonoption_flags'
314+ string can work normally. Our top argument must be in the range
315+ of the string. */
316+ if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len)
317+ {
318+ /* We must extend the array. The user plays games with us and
319+ presents new arguments. */
320+ char *new_str = malloc (top + 1);
321+ if (new_str == NULL)
322+ nonoption_flags_len = nonoption_flags_max_len = 0;
323+ else
324+ {
325+ memset (__mempcpy (new_str, __getopt_nonoption_flags,
326+ nonoption_flags_max_len),
327+ '\0', top + 1 - nonoption_flags_max_len);
328+ nonoption_flags_max_len = top + 1;
329+ __getopt_nonoption_flags = new_str;
330+ }
331+ }
332+#endif
333+
334+ while (top > middle && middle > bottom)
335+ {
336+ if (top - middle > middle - bottom)
337+ {
338+ /* Bottom segment is the short one. */
339+ int len = middle - bottom;
340+ register int i;
341+
342+ /* Swap it with the top part of the top segment. */
343+ for (i = 0; i < len; i++)
344+ {
345+ tem = argv[bottom + i];
346+ argv[bottom + i] = argv[top - (middle - bottom) + i];
347+ argv[top - (middle - bottom) + i] = tem;
348+ SWAP_FLAGS (bottom + i, top - (middle - bottom) + i);
349+ }
350+ /* Exclude the moved bottom segment from further swapping. */
351+ top -= len;
352+ }
353+ else
354+ {
355+ /* Top segment is the short one. */
356+ int len = top - middle;
357+ register int i;
358+
359+ /* Swap it with the bottom part of the bottom segment. */
360+ for (i = 0; i < len; i++)
361+ {
362+ tem = argv[bottom + i];
363+ argv[bottom + i] = argv[middle + i];
364+ argv[middle + i] = tem;
365+ SWAP_FLAGS (bottom + i, middle + i);
366+ }
367+ /* Exclude the moved top segment from further swapping. */
368+ bottom += len;
369+ }
370+ }
371+
372+ /* Update records for the slots the non-options now occupy. */
373+
374+ first_nonopt += (optind - last_nonopt);
375+ last_nonopt = optind;
376+}
377+
378+/* Initialize the internal data when the first call is made. */
379+
380+#if defined __STDC__ && __STDC__
381+static const char *_getopt_initialize (int, char *const *, const char *);
382+#endif
383+static const char *
384+_getopt_initialize (argc, argv, optstring)
385+ int argc;
386+ char *const *argv;
387+ const char *optstring;
388+{
389+ /* Start processing options with ARGV-element 1 (since ARGV-element 0
390+ is the program name); the sequence of previously skipped
391+ non-option ARGV-elements is empty. */
392+
393+ first_nonopt = last_nonopt = optind;
394+
395+ nextchar = NULL;
396+
397+ posixly_correct = getenv ("POSIXLY_CORRECT");
398+
399+ /* Determine how to handle the ordering of options and nonoptions. */
400+
401+ if (optstring[0] == '-')
402+ {
403+ ordering = RETURN_IN_ORDER;
404+ ++optstring;
405+ }
406+ else if (optstring[0] == '+')
407+ {
408+ ordering = REQUIRE_ORDER;
409+ ++optstring;
410+ }
411+ else if (posixly_correct != NULL)
412+ ordering = REQUIRE_ORDER;
413+ else
414+ ordering = PERMUTE;
415+
416+#if defined _LIBC && defined USE_NONOPTION_FLAGS
417+ if (posixly_correct == NULL
418+ && argc == __libc_argc && argv == __libc_argv)
419+ {
420+ if (nonoption_flags_max_len == 0)
421+ {
422+ if (__getopt_nonoption_flags == NULL
423+ || __getopt_nonoption_flags[0] == '\0')
424+ nonoption_flags_max_len = -1;
425+ else
426+ {
427+ const char *orig_str = __getopt_nonoption_flags;
428+ int len = nonoption_flags_max_len = strlen (orig_str);
429+ if (nonoption_flags_max_len < argc)
430+ nonoption_flags_max_len = argc;
431+ __getopt_nonoption_flags =
432+ (char *) malloc (nonoption_flags_max_len);
433+ if (__getopt_nonoption_flags == NULL)
434+ nonoption_flags_max_len = -1;
435+ else
436+ memset (__mempcpy (__getopt_nonoption_flags, orig_str, len),
437+ '\0', nonoption_flags_max_len - len);
438+ }
439+ }
440+ nonoption_flags_len = nonoption_flags_max_len;
441+ }
442+ else
443+ nonoption_flags_len = 0;
444+#endif
445+
446+ return optstring;
447+}
448+
449+/* Scan elements of ARGV (whose length is ARGC) for option characters
450+ given in OPTSTRING.
451+
452+ If an element of ARGV starts with '-', and is not exactly "-" or "--",
453+ then it is an option element. The characters of this element
454+ (aside from the initial '-') are option characters. If `getopt'
455+ is called repeatedly, it returns successively each of the option characters
456+ from each of the option elements.
457+
458+ If `getopt' finds another option character, it returns that character,
459+ updating `optind' and `nextchar' so that the next call to `getopt' can
460+ resume the scan with the following option character or ARGV-element.
461+
462+ If there are no more option characters, `getopt' returns -1.
463+ Then `optind' is the index in ARGV of the first ARGV-element
464+ that is not an option. (The ARGV-elements have been permuted
465+ so that those that are not options now come last.)
466+
467+ OPTSTRING is a string containing the legitimate option characters.
468+ If an option character is seen that is not listed in OPTSTRING,
469+ return '?' after printing an error message. If you set `opterr' to
470+ zero, the error message is suppressed but we still return '?'.
471+
472+ If a char in OPTSTRING is followed by a colon, that means it wants an arg,
473+ so the following text in the same ARGV-element, or the text of the following
474+ ARGV-element, is returned in `optarg'. Two colons mean an option that
475+ wants an optional arg; if there is text in the current ARGV-element,
476+ it is returned in `optarg', otherwise `optarg' is set to zero.
477+
478+ If OPTSTRING starts with `-' or `+', it requests different methods of
479+ handling the non-option ARGV-elements.
480+ See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
481+
482+ Long-named options begin with `--' instead of `-'.
483+ Their names may be abbreviated as long as the abbreviation is unique
484+ or is an exact match for some defined option. If they have an
485+ argument, it follows the option name in the same ARGV-element, separated
486+ from the option name by a `=', or else the in next ARGV-element.
487+ When `getopt' finds a long-named option, it returns 0 if that option's
488+ `flag' field is nonzero, the value of the option's `val' field
489+ if the `flag' field is zero.
490+
491+ The elements of ARGV aren't really const, because we permute them.
492+ But we pretend they're const in the prototype to be compatible
493+ with other systems.
494+
495+ LONGOPTS is a vector of `struct option' terminated by an
496+ element containing a name which is zero.
497+
498+ LONGIND returns the index in LONGOPT of the long-named option found.
499+ It is only valid when a long-named option has been found by the most
500+ recent call.
501+
502+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
503+ long-named options. */
504+
505+int
506+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
507+ int argc;
508+ char *const *argv;
509+ const char *optstring;
510+ const struct option *longopts;
511+ int *longind;
512+ int long_only;
513+{
514+ int print_errors = opterr;
515+ if (optstring[0] == ':')
516+ print_errors = 0;
517+
518+ if (argc < 1)
519+ return -1;
520+
521+ optarg = NULL;
522+
523+ if (optind == 0 || !__getopt_initialized)
524+ {
525+ if (optind == 0)
526+ optind = 1; /* Don't scan ARGV[0], the program name. */
527+ optstring = _getopt_initialize (argc, argv, optstring);
528+ __getopt_initialized = 1;
529+ }
530+
531+ /* Test whether ARGV[optind] points to a non-option argument.
532+ Either it does not have option syntax, or there is an environment flag
533+ from the shell indicating it is not an option. The later information
534+ is only used when the used in the GNU libc. */
535+#if defined _LIBC && defined USE_NONOPTION_FLAGS
536+# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \
537+ || (optind < nonoption_flags_len \
538+ && __getopt_nonoption_flags[optind] == '1'))
539+#else
540+# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0')
541+#endif
542+
543+ if (nextchar == NULL || *nextchar == '\0')
544+ {
545+ /* Advance to the next ARGV-element. */
546+
547+ /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been
548+ moved back by the user (who may also have changed the arguments). */
549+ if (last_nonopt > optind)
550+ last_nonopt = optind;
551+ if (first_nonopt > optind)
552+ first_nonopt = optind;
553+
554+ if (ordering == PERMUTE)
555+ {
556+ /* If we have just processed some options following some non-options,
557+ exchange them so that the options come first. */
558+
559+ if (first_nonopt != last_nonopt && last_nonopt != optind)
560+ exchange ((char **) argv);
561+ else if (last_nonopt != optind)
562+ first_nonopt = optind;
563+
564+ /* Skip any additional non-options
565+ and extend the range of non-options previously skipped. */
566+
567+ while (optind < argc && NONOPTION_P)
568+ optind++;
569+ last_nonopt = optind;
570+ }
571+
572+ /* The special ARGV-element `--' means premature end of options.
573+ Skip it like a null option,
574+ then exchange with previous non-options as if it were an option,
575+ then skip everything else like a non-option. */
576+
577+ if (optind != argc && !strcmp (argv[optind], "--"))
578+ {
579+ optind++;
580+
581+ if (first_nonopt != last_nonopt && last_nonopt != optind)
582+ exchange ((char **) argv);
583+ else if (first_nonopt == last_nonopt)
584+ first_nonopt = optind;
585+ last_nonopt = argc;
586+
587+ optind = argc;
588+ }
589+
590+ /* If we have done all the ARGV-elements, stop the scan
591+ and back over any non-options that we skipped and permuted. */
592+
593+ if (optind == argc)
594+ {
595+ /* Set the next-arg-index to point at the non-options
596+ that we previously skipped, so the caller will digest them. */
597+ if (first_nonopt != last_nonopt)
598+ optind = first_nonopt;
599+ return -1;
600+ }
601+
602+ /* If we have come to a non-option and did not permute it,
603+ either stop the scan or describe it to the caller and pass it by. */
604+
605+ if (NONOPTION_P)
606+ {
607+ if (ordering == REQUIRE_ORDER)
608+ return -1;
609+ optarg = argv[optind++];
610+ return 1;
611+ }
612+
613+ /* We have found another option-ARGV-element.
614+ Skip the initial punctuation. */
615+
616+ nextchar = (argv[optind] + 1
617+ + (longopts != NULL && argv[optind][1] == '-'));
618+ }
619+
620+ /* Decode the current option-ARGV-element. */
621+
622+ /* Check whether the ARGV-element is a long option.
623+
624+ If long_only and the ARGV-element has the form "-f", where f is
625+ a valid short option, don't consider it an abbreviated form of
626+ a long option that starts with f. Otherwise there would be no
627+ way to give the -f short option.
628+
629+ On the other hand, if there's a long option "fubar" and
630+ the ARGV-element is "-fu", do consider that an abbreviation of
631+ the long option, just like "--fu", and not "-f" with arg "u".
632+
633+ This distinction seems to be the most useful approach. */
634+
635+ if (longopts != NULL
636+ && (argv[optind][1] == '-'
637+ || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
638+ {
639+ char *nameend;
640+ const struct option *p;
641+ const struct option *pfound = NULL;
642+ int exact = 0;
643+ int ambig = 0;
644+ int indfound = -1;
645+ int option_index;
646+
647+ for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
648+ /* Do nothing. */ ;
649+
650+ /* Test all long options for either exact match
651+ or abbreviated matches. */
652+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
653+ if (!strncmp (p->name, nextchar, nameend - nextchar))
654+ {
655+ if ((unsigned int) (nameend - nextchar)
656+ == (unsigned int) strlen (p->name))
657+ {
658+ /* Exact match found. */
659+ pfound = p;
660+ indfound = option_index;
661+ exact = 1;
662+ break;
663+ }
664+ else if (pfound == NULL)
665+ {
666+ /* First nonexact match found. */
667+ pfound = p;
668+ indfound = option_index;
669+ }
670+ else if (long_only
671+ || pfound->has_arg != p->has_arg
672+ || pfound->flag != p->flag
673+ || pfound->val != p->val)
674+ /* Second or later nonexact match found. */
675+ ambig = 1;
676+ }
677+
678+ if (ambig && !exact)
679+ {
680+ if (print_errors)
681+ fprintf (stderr, _("%s: option `%s' is ambiguous\n"),
682+ argv[0], argv[optind]);
683+ nextchar += strlen (nextchar);
684+ optind++;
685+ optopt = 0;
686+ return '?';
687+ }
688+
689+ if (pfound != NULL)
690+ {
691+ option_index = indfound;
692+ optind++;
693+ if (*nameend)
694+ {
695+ /* Don't test has_arg with >, because some C compilers don't
696+ allow it to be used on enums. */
697+ if (pfound->has_arg)
698+ optarg = nameend + 1;
699+ else
700+ {
701+ if (print_errors)
702+ {
703+ if (argv[optind - 1][1] == '-')
704+ /* --option */
705+ fprintf (stderr,
706+ _("%s: option `--%s' doesn't allow an argument\n"),
707+ argv[0], pfound->name);
708+ else
709+ /* +option or -option */
710+ fprintf (stderr,
711+ _("%s: option `%c%s' doesn't allow an argument\n"),
712+ argv[0], argv[optind - 1][0], pfound->name);
713+ }
714+
715+ nextchar += strlen (nextchar);
716+
717+ optopt = pfound->val;
718+ return '?';
719+ }
720+ }
721+ else if (pfound->has_arg == 1)
722+ {
723+ if (optind < argc)
724+ optarg = argv[optind++];
725+ else
726+ {
727+ if (print_errors)
728+ fprintf (stderr,
729+ _("%s: option `%s' requires an argument\n"),
730+ argv[0], argv[optind - 1]);
731+ nextchar += strlen (nextchar);
732+ optopt = pfound->val;
733+ return optstring[0] == ':' ? ':' : '?';
734+ }
735+ }
736+ nextchar += strlen (nextchar);
737+ if (longind != NULL)
738+ *longind = option_index;
739+ if (pfound->flag)
740+ {
741+ *(pfound->flag) = pfound->val;
742+ return 0;
743+ }
744+ return pfound->val;
745+ }
746+
747+ /* Can't find it as a long option. If this is not getopt_long_only,
748+ or the option starts with '--' or is not a valid short
749+ option, then it's an error.
750+ Otherwise interpret it as a short option. */
751+ if (!long_only || argv[optind][1] == '-'
752+ || my_index (optstring, *nextchar) == NULL)
753+ {
754+ if (print_errors)
755+ {
756+ if (argv[optind][1] == '-')
757+ /* --option */
758+ fprintf (stderr, _("%s: unrecognized option `--%s'\n"),
759+ argv[0], nextchar);
760+ else
761+ /* +option or -option */
762+ fprintf (stderr, _("%s: unrecognized option `%c%s'\n"),
763+ argv[0], argv[optind][0], nextchar);
764+ }
765+ nextchar = (char *) "";
766+ optind++;
767+ optopt = 0;
768+ return '?';
769+ }
770+ }
771+
772+ /* Look at and handle the next short option-character. */
773+
774+ {
775+ char c = *nextchar++;
776+ char *temp = my_index (optstring, c);
777+
778+ /* Increment `optind' when we start to process its last character. */
779+ if (*nextchar == '\0')
780+ ++optind;
781+
782+ if (temp == NULL || c == ':')
783+ {
784+ if (print_errors)
785+ {
786+ if (posixly_correct)
787+ /* 1003.2 specifies the format of this message. */
788+ fprintf (stderr, _("%s: illegal option -- %c\n"),
789+ argv[0], c);
790+ else
791+ fprintf (stderr, _("%s: invalid option -- %c\n"),
792+ argv[0], c);
793+ }
794+ optopt = c;
795+ return '?';
796+ }
797+ /* Convenience. Treat POSIX -W foo same as long option --foo */
798+ if (temp[0] == 'W' && temp[1] == ';')
799+ {
800+ char *nameend;
801+ const struct option *p;
802+ const struct option *pfound = NULL;
803+ int exact = 0;
804+ int ambig = 0;
805+ int indfound = 0;
806+ int option_index;
807+
808+ /* This is an option that requires an argument. */
809+ if (*nextchar != '\0')
810+ {
811+ optarg = nextchar;
812+ /* If we end this ARGV-element by taking the rest as an arg,
813+ we must advance to the next element now. */
814+ optind++;
815+ }
816+ else if (optind == argc)
817+ {
818+ if (print_errors)
819+ {
820+ /* 1003.2 specifies the format of this message. */
821+ fprintf (stderr, _("%s: option requires an argument -- %c\n"),
822+ argv[0], c);
823+ }
824+ optopt = c;
825+ if (optstring[0] == ':')
826+ c = ':';
827+ else
828+ c = '?';
829+ return c;
830+ }
831+ else
832+ /* We already incremented `optind' once;
833+ increment it again when taking next ARGV-elt as argument. */
834+ optarg = argv[optind++];
835+
836+ /* optarg is now the argument, see if it's in the
837+ table of longopts. */
838+
839+ for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++)
840+ /* Do nothing. */ ;
841+
842+ /* Test all long options for either exact match
843+ or abbreviated matches. */
844+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
845+ if (!strncmp (p->name, nextchar, nameend - nextchar))
846+ {
847+ if ((unsigned int) (nameend - nextchar) == strlen (p->name))
848+ {
849+ /* Exact match found. */
850+ pfound = p;
851+ indfound = option_index;
852+ exact = 1;
853+ break;
854+ }
855+ else if (pfound == NULL)
856+ {
857+ /* First nonexact match found. */
858+ pfound = p;
859+ indfound = option_index;
860+ }
861+ else
862+ /* Second or later nonexact match found. */
863+ ambig = 1;
864+ }
865+ if (ambig && !exact)
866+ {
867+ if (print_errors)
868+ fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"),
869+ argv[0], argv[optind]);
870+ nextchar += strlen (nextchar);
871+ optind++;
872+ return '?';
873+ }
874+ if (pfound != NULL)
875+ {
876+ option_index = indfound;
877+ if (*nameend)
878+ {
879+ /* Don't test has_arg with >, because some C compilers don't
880+ allow it to be used on enums. */
881+ if (pfound->has_arg)
882+ optarg = nameend + 1;
883+ else
884+ {
885+ if (print_errors)
886+ fprintf (stderr, _("\
887+%s: option `-W %s' doesn't allow an argument\n"),
888+ argv[0], pfound->name);
889+
890+ nextchar += strlen (nextchar);
891+ return '?';
892+ }
893+ }
894+ else if (pfound->has_arg == 1)
895+ {
896+ if (optind < argc)
897+ optarg = argv[optind++];
898+ else
899+ {
900+ if (print_errors)
901+ fprintf (stderr,
902+ _("%s: option `%s' requires an argument\n"),
903+ argv[0], argv[optind - 1]);
904+ nextchar += strlen (nextchar);
905+ return optstring[0] == ':' ? ':' : '?';
906+ }
907+ }
908+ nextchar += strlen (nextchar);
909+ if (longind != NULL)
910+ *longind = option_index;
911+ if (pfound->flag)
912+ {
913+ *(pfound->flag) = pfound->val;
914+ return 0;
915+ }
916+ return pfound->val;
917+ }
918+ nextchar = NULL;
919+ return 'W'; /* Let the application handle it. */
920+ }
921+ if (temp[1] == ':')
922+ {
923+ if (temp[2] == ':')
924+ {
925+ /* This is an option that accepts an argument optionally. */
926+ if (*nextchar != '\0')
927+ {
928+ optarg = nextchar;
929+ optind++;
930+ }
931+ else
932+ optarg = NULL;
933+ nextchar = NULL;
934+ }
935+ else
936+ {
937+ /* This is an option that requires an argument. */
938+ if (*nextchar != '\0')
939+ {
940+ optarg = nextchar;
941+ /* If we end this ARGV-element by taking the rest as an arg,
942+ we must advance to the next element now. */
943+ optind++;
944+ }
945+ else if (optind == argc)
946+ {
947+ if (print_errors)
948+ {
949+ /* 1003.2 specifies the format of this message. */
950+ fprintf (stderr,
951+ _("%s: option requires an argument -- %c\n"),
952+ argv[0], c);
953+ }
954+ optopt = c;
955+ if (optstring[0] == ':')
956+ c = ':';
957+ else
958+ c = '?';
959+ }
960+ else
961+ /* We already incremented `optind' once;
962+ increment it again when taking next ARGV-elt as argument. */
963+ optarg = argv[optind++];
964+ nextchar = NULL;
965+ }
966+ }
967+ return c;
968+ }
969+}
970+
971+int
972+getopt (argc, argv, optstring)
973+ int argc;
974+ char *const *argv;
975+ const char *optstring;
976+{
977+ return _getopt_internal (argc, argv, optstring,
978+ (const struct option *) 0,
979+ (int *) 0,
980+ 0);
981+}
982+
983+#endif /* Not ELIDE_CODE. */
984+
985+#ifdef TEST
986+
987+/* Compile with -DTEST to make an executable for use in testing
988+ the above definition of `getopt'. */
989+
990+int
991+main (argc, argv)
992+ int argc;
993+ char **argv;
994+{
995+ int c;
996+ int digit_optind = 0;
997+
998+ while (1)
999+ {
1000+ int this_option_optind = optind ? optind : 1;
1001+
1002+ c = getopt (argc, argv, "abc:d:0123456789");
1003+ if (c == -1)
1004+ break;
1005+
1006+ switch (c)
1007+ {
1008+ case '0':
1009+ case '1':
1010+ case '2':
1011+ case '3':
1012+ case '4':
1013+ case '5':
1014+ case '6':
1015+ case '7':
1016+ case '8':
1017+ case '9':
1018+ if (digit_optind != 0 && digit_optind != this_option_optind)
1019+ printf ("digits occur in two different argv-elements.\n");
1020+ digit_optind = this_option_optind;
1021+ printf ("option %c\n", c);
1022+ break;
1023+
1024+ case 'a':
1025+ printf ("option a\n");
1026+ break;
1027+
1028+ case 'b':
1029+ printf ("option b\n");
1030+ break;
1031+
1032+ case 'c':
1033+ printf ("option c with value `%s'\n", optarg);
1034+ break;
1035+
1036+ case '?':
1037+ break;
1038+
1039+ default:
1040+ printf ("?? getopt returned character code 0%o ??\n", c);
1041+ }
1042+ }
1043+
1044+ if (optind < argc)
1045+ {
1046+ printf ("non-option ARGV-elements: ");
1047+ while (optind < argc)
1048+ printf ("%s ", argv[optind++]);
1049+ printf ("\n");
1050+ }
1051+
1052+ exit (0);
1053+}
1054+
1055+#endif /* TEST */
--- trunk/idsgrep/getopt1.c (nonexistent)
+++ trunk/idsgrep/getopt1.c (revision 204)
@@ -0,0 +1,188 @@
1+/* getopt_long and getopt_long_only entry points for GNU getopt.
2+ Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98
3+ Free Software Foundation, Inc.
4+ This file is part of the GNU C Library.
5+
6+ The GNU C Library is free software; you can redistribute it and/or
7+ modify it under the terms of the GNU Lesser General Public
8+ License as published by the Free Software Foundation; either
9+ version 2.1 of the License, or (at your option) any later version.
10+
11+ The GNU C Library is distributed in the hope that it will be useful,
12+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14+ Lesser General Public License for more details.
15+
16+ You should have received a copy of the GNU Lesser General Public
17+ License along with the GNU C Library; if not, write to the Free
18+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19+ 02111-1307 USA. */
20+
21+#ifdef HAVE_CONFIG_H
22+#include <config.h>
23+#endif
24+
25+#include "getopt.h"
26+
27+#if !defined __STDC__ || !__STDC__
28+/* This is a separate conditional since some stdc systems
29+ reject `defined (const)'. */
30+#ifndef const
31+#define const
32+#endif
33+#endif
34+
35+#include <stdio.h>
36+
37+/* Comment out all this code if we are using the GNU C Library, and are not
38+ actually compiling the library itself. This code is part of the GNU C
39+ Library, but also included in many other GNU distributions. Compiling
40+ and linking in this code is a waste when using the GNU C library
41+ (especially if it is a shared library). Rather than having every GNU
42+ program understand `configure --with-gnu-libc' and omit the object files,
43+ it is simpler to just do this in the source for each such file. */
44+
45+#define GETOPT_INTERFACE_VERSION 2
46+#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2
47+#include <gnu-versions.h>
48+#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION
49+#define ELIDE_CODE
50+#endif
51+#endif
52+
53+#ifndef ELIDE_CODE
54+
55+
56+/* This needs to come after some library #include
57+ to get __GNU_LIBRARY__ defined. */
58+#ifdef __GNU_LIBRARY__
59+#include <stdlib.h>
60+#endif
61+
62+#ifndef NULL
63+#define NULL 0
64+#endif
65+
66+int
67+getopt_long (argc, argv, options, long_options, opt_index)
68+ int argc;
69+ char *const *argv;
70+ const char *options;
71+ const struct option *long_options;
72+ int *opt_index;
73+{
74+ return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
75+}
76+
77+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
78+ If an option that starts with '-' (not '--') doesn't match a long option,
79+ but does match a short option, it is parsed as a short option
80+ instead. */
81+
82+int
83+getopt_long_only (argc, argv, options, long_options, opt_index)
84+ int argc;
85+ char *const *argv;
86+ const char *options;
87+ const struct option *long_options;
88+ int *opt_index;
89+{
90+ return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
91+}
92+
93+
94+#endif /* Not ELIDE_CODE. */
95+
96+#ifdef TEST
97+
98+#include <stdio.h>
99+
100+int
101+main (argc, argv)
102+ int argc;
103+ char **argv;
104+{
105+ int c;
106+ int digit_optind = 0;
107+
108+ while (1)
109+ {
110+ int this_option_optind = optind ? optind : 1;
111+ int option_index = 0;
112+ static struct option long_options[] =
113+ {
114+ {"add", 1, 0, 0},
115+ {"append", 0, 0, 0},
116+ {"delete", 1, 0, 0},
117+ {"verbose", 0, 0, 0},
118+ {"create", 0, 0, 0},
119+ {"file", 1, 0, 0},
120+ {0, 0, 0, 0}
121+ };
122+
123+ c = getopt_long (argc, argv, "abc:d:0123456789",
124+ long_options, &option_index);
125+ if (c == -1)
126+ break;
127+
128+ switch (c)
129+ {
130+ case 0:
131+ printf ("option %s", long_options[option_index].name);
132+ if (optarg)
133+ printf (" with arg %s", optarg);
134+ printf ("\n");
135+ break;
136+
137+ case '0':
138+ case '1':
139+ case '2':
140+ case '3':
141+ case '4':
142+ case '5':
143+ case '6':
144+ case '7':
145+ case '8':
146+ case '9':
147+ if (digit_optind != 0 && digit_optind != this_option_optind)
148+ printf ("digits occur in two different argv-elements.\n");
149+ digit_optind = this_option_optind;
150+ printf ("option %c\n", c);
151+ break;
152+
153+ case 'a':
154+ printf ("option a\n");
155+ break;
156+
157+ case 'b':
158+ printf ("option b\n");
159+ break;
160+
161+ case 'c':
162+ printf ("option c with value `%s'\n", optarg);
163+ break;
164+
165+ case 'd':
166+ printf ("option d with value `%s'\n", optarg);
167+ break;
168+
169+ case '?':
170+ break;
171+
172+ default:
173+ printf ("?? getopt returned character code 0%o ??\n", c);
174+ }
175+ }
176+
177+ if (optind < argc)
178+ {
179+ printf ("non-option ARGV-elements: ");
180+ while (optind < argc)
181+ printf ("%s ", argv[optind++]);
182+ printf ("\n");
183+ }
184+
185+ exit (0);
186+}
187+
188+#endif /* TEST */
--- trunk/idsgrep/INSTALL (nonexistent)
+++ trunk/idsgrep/INSTALL (revision 204)
@@ -0,0 +1,10 @@
1+This is IDSgrep 0.1.
2+
3+Quick start:
4+ ./configure ; make ; make install
5+
6+Summary of configuration options:
7+ ./configure --help
8+
9+For more details, see idsgrep.pdf , or (after configuration)
10+the man page idsgrep.1 .
--- trunk/idsgrep/COPYING (nonexistent)
+++ trunk/idsgrep/COPYING (revision 204)
@@ -0,0 +1,675 @@
1+
2+ GNU GENERAL PUBLIC LICENSE
3+ Version 3, 29 June 2007
4+
5+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
6+ Everyone is permitted to copy and distribute verbatim copies
7+ of this license document, but changing it is not allowed.
8+
9+ Preamble
10+
11+ The GNU General Public License is a free, copyleft license for
12+software and other kinds of works.
13+
14+ The licenses for most software and other practical works are designed
15+to take away your freedom to share and change the works. By contrast,
16+the GNU General Public License is intended to guarantee your freedom to
17+share and change all versions of a program--to make sure it remains free
18+software for all its users. We, the Free Software Foundation, use the
19+GNU General Public License for most of our software; it applies also to
20+any other work released this way by its authors. You can apply it to
21+your programs, too.
22+
23+ When we speak of free software, we are referring to freedom, not
24+price. Our General Public Licenses are designed to make sure that you
25+have the freedom to distribute copies of free software (and charge for
26+them if you wish), that you receive source code or can get it if you
27+want it, that you can change the software or use pieces of it in new
28+free programs, and that you know you can do these things.
29+
30+ To protect your rights, we need to prevent others from denying you
31+these rights or asking you to surrender the rights. Therefore, you have
32+certain responsibilities if you distribute copies of the software, or if
33+you modify it: responsibilities to respect the freedom of others.
34+
35+ For example, if you distribute copies of such a program, whether
36+gratis or for a fee, you must pass on to the recipients the same
37+freedoms that you received. You must make sure that they, too, receive
38+or can get the source code. And you must show them these terms so they
39+know their rights.
40+
41+ Developers that use the GNU GPL protect your rights with two steps:
42+(1) assert copyright on the software, and (2) offer you this License
43+giving you legal permission to copy, distribute and/or modify it.
44+
45+ For the developers' and authors' protection, the GPL clearly explains
46+that there is no warranty for this free software. For both users' and
47+authors' sake, the GPL requires that modified versions be marked as
48+changed, so that their problems will not be attributed erroneously to
49+authors of previous versions.
50+
51+ Some devices are designed to deny users access to install or run
52+modified versions of the software inside them, although the manufacturer
53+can do so. This is fundamentally incompatible with the aim of
54+protecting users' freedom to change the software. The systematic
55+pattern of such abuse occurs in the area of products for individuals to
56+use, which is precisely where it is most unacceptable. Therefore, we
57+have designed this version of the GPL to prohibit the practice for those
58+products. If such problems arise substantially in other domains, we
59+stand ready to extend this provision to those domains in future versions
60+of the GPL, as needed to protect the freedom of users.
61+
62+ Finally, every program is threatened constantly by software patents.
63+States should not allow patents to restrict development and use of
64+software on general-purpose computers, but in those that do, we wish to
65+avoid the special danger that patents applied to a free program could
66+make it effectively proprietary. To prevent this, the GPL assures that
67+patents cannot be used to render the program non-free.
68+
69+ The precise terms and conditions for copying, distribution and
70+modification follow.
71+
72+ TERMS AND CONDITIONS
73+
74+ 0. Definitions.
75+
76+ "This License" refers to version 3 of the GNU General Public License.
77+
78+ "Copyright" also means copyright-like laws that apply to other kinds of
79+works, such as semiconductor masks.
80+
81+ "The Program" refers to any copyrightable work licensed under this
82+License. Each licensee is addressed as "you". "Licensees" and
83+"recipients" may be individuals or organizations.
84+
85+ To "modify" a work means to copy from or adapt all or part of the work
86+in a fashion requiring copyright permission, other than the making of an
87+exact copy. The resulting work is called a "modified version" of the
88+earlier work or a work "based on" the earlier work.
89+
90+ A "covered work" means either the unmodified Program or a work based
91+on the Program.
92+
93+ To "propagate" a work means to do anything with it that, without
94+permission, would make you directly or secondarily liable for
95+infringement under applicable copyright law, except executing it on a
96+computer or modifying a private copy. Propagation includes copying,
97+distribution (with or without modification), making available to the
98+public, and in some countries other activities as well.
99+
100+ To "convey" a work means any kind of propagation that enables other
101+parties to make or receive copies. Mere interaction with a user through
102+a computer network, with no transfer of a copy, is not conveying.
103+
104+ An interactive user interface displays "Appropriate Legal Notices"
105+to the extent that it includes a convenient and prominently visible
106+feature that (1) displays an appropriate copyright notice, and (2)
107+tells the user that there is no warranty for the work (except to the
108+extent that warranties are provided), that licensees may convey the
109+work under this License, and how to view a copy of this License. If
110+the interface presents a list of user commands or options, such as a
111+menu, a prominent item in the list meets this criterion.
112+
113+ 1. Source Code.
114+
115+ The "source code" for a work means the preferred form of the work
116+for making modifications to it. "Object code" means any non-source
117+form of a work.
118+
119+ A "Standard Interface" means an interface that either is an official
120+standard defined by a recognized standards body, or, in the case of
121+interfaces specified for a particular programming language, one that
122+is widely used among developers working in that language.
123+
124+ The "System Libraries" of an executable work include anything, other
125+than the work as a whole, that (a) is included in the normal form of
126+packaging a Major Component, but which is not part of that Major
127+Component, and (b) serves only to enable use of the work with that
128+Major Component, or to implement a Standard Interface for which an
129+implementation is available to the public in source code form. A
130+"Major Component", in this context, means a major essential component
131+(kernel, window system, and so on) of the specific operating system
132+(if any) on which the executable work runs, or a compiler used to
133+produce the work, or an object code interpreter used to run it.
134+
135+ The "Corresponding Source" for a work in object code form means all
136+the source code needed to generate, install, and (for an executable
137+work) run the object code and to modify the work, including scripts to
138+control those activities. However, it does not include the work's
139+System Libraries, or general-purpose tools or generally available free
140+programs which are used unmodified in performing those activities but
141+which are not part of the work. For example, Corresponding Source
142+includes interface definition files associated with source files for
143+the work, and the source code for shared libraries and dynamically
144+linked subprograms that the work is specifically designed to require,
145+such as by intimate data communication or control flow between those
146+subprograms and other parts of the work.
147+
148+ The Corresponding Source need not include anything that users
149+can regenerate automatically from other parts of the Corresponding
150+Source.
151+
152+ The Corresponding Source for a work in source code form is that
153+same work.
154+
155+ 2. Basic Permissions.
156+
157+ All rights granted under this License are granted for the term of
158+copyright on the Program, and are irrevocable provided the stated
159+conditions are met. This License explicitly affirms your unlimited
160+permission to run the unmodified Program. The output from running a
161+covered work is covered by this License only if the output, given its
162+content, constitutes a covered work. This License acknowledges your
163+rights of fair use or other equivalent, as provided by copyright law.
164+
165+ You may make, run and propagate covered works that you do not
166+convey, without conditions so long as your license otherwise remains
167+in force. You may convey covered works to others for the sole purpose
168+of having them make modifications exclusively for you, or provide you
169+with facilities for running those works, provided that you comply with
170+the terms of this License in conveying all material for which you do
171+not control copyright. Those thus making or running the covered works
172+for you must do so exclusively on your behalf, under your direction
173+and control, on terms that prohibit them from making any copies of
174+your copyrighted material outside their relationship with you.
175+
176+ Conveying under any other circumstances is permitted solely under
177+the conditions stated below. Sublicensing is not allowed; section 10
178+makes it unnecessary.
179+
180+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
181+
182+ No covered work shall be deemed part of an effective technological
183+measure under any applicable law fulfilling obligations under article
184+11 of the WIPO copyright treaty adopted on 20 December 1996, or
185+similar laws prohibiting or restricting circumvention of such
186+measures.
187+
188+ When you convey a covered work, you waive any legal power to forbid
189+circumvention of technological measures to the extent such circumvention
190+is effected by exercising rights under this License with respect to
191+the covered work, and you disclaim any intention to limit operation or
192+modification of the work as a means of enforcing, against the work's
193+users, your or third parties' legal rights to forbid circumvention of
194+technological measures.
195+
196+ 4. Conveying Verbatim Copies.
197+
198+ You may convey verbatim copies of the Program's source code as you
199+receive it, in any medium, provided that you conspicuously and
200+appropriately publish on each copy an appropriate copyright notice;
201+keep intact all notices stating that this License and any
202+non-permissive terms added in accord with section 7 apply to the code;
203+keep intact all notices of the absence of any warranty; and give all
204+recipients a copy of this License along with the Program.
205+
206+ You may charge any price or no price for each copy that you convey,
207+and you may offer support or warranty protection for a fee.
208+
209+ 5. Conveying Modified Source Versions.
210+
211+ You may convey a work based on the Program, or the modifications to
212+produce it from the Program, in the form of source code under the
213+terms of section 4, provided that you also meet all of these conditions:
214+
215+ a) The work must carry prominent notices stating that you modified
216+ it, and giving a relevant date.
217+
218+ b) The work must carry prominent notices stating that it is
219+ released under this License and any conditions added under section
220+ 7. This requirement modifies the requirement in section 4 to
221+ "keep intact all notices".
222+
223+ c) You must license the entire work, as a whole, under this
224+ License to anyone who comes into possession of a copy. This
225+ License will therefore apply, along with any applicable section 7
226+ additional terms, to the whole of the work, and all its parts,
227+ regardless of how they are packaged. This License gives no
228+ permission to license the work in any other way, but it does not
229+ invalidate such permission if you have separately received it.
230+
231+ d) If the work has interactive user interfaces, each must display
232+ Appropriate Legal Notices; however, if the Program has interactive
233+ interfaces that do not display Appropriate Legal Notices, your
234+ work need not make them do so.
235+
236+ A compilation of a covered work with other separate and independent
237+works, which are not by their nature extensions of the covered work,
238+and which are not combined with it such as to form a larger program,
239+in or on a volume of a storage or distribution medium, is called an
240+"aggregate" if the compilation and its resulting copyright are not
241+used to limit the access or legal rights of the compilation's users
242+beyond what the individual works permit. Inclusion of a covered work
243+in an aggregate does not cause this License to apply to the other
244+parts of the aggregate.
245+
246+ 6. Conveying Non-Source Forms.
247+
248+ You may convey a covered work in object code form under the terms
249+of sections 4 and 5, provided that you also convey the
250+machine-readable Corresponding Source under the terms of this License,
251+in one of these ways:
252+
253+ a) Convey the object code in, or embodied in, a physical product
254+ (including a physical distribution medium), accompanied by the
255+ Corresponding Source fixed on a durable physical medium
256+ customarily used for software interchange.
257+
258+ b) Convey the object code in, or embodied in, a physical product
259+ (including a physical distribution medium), accompanied by a
260+ written offer, valid for at least three years and valid for as
261+ long as you offer spare parts or customer support for that product
262+ model, to give anyone who possesses the object code either (1) a
263+ copy of the Corresponding Source for all the software in the
264+ product that is covered by this License, on a durable physical
265+ medium customarily used for software interchange, for a price no
266+ more than your reasonable cost of physically performing this
267+ conveying of source, or (2) access to copy the
268+ Corresponding Source from a network server at no charge.
269+
270+ c) Convey individual copies of the object code with a copy of the
271+ written offer to provide the Corresponding Source. This
272+ alternative is allowed only occasionally and noncommercially, and
273+ only if you received the object code with such an offer, in accord
274+ with subsection 6b.
275+
276+ d) Convey the object code by offering access from a designated
277+ place (gratis or for a charge), and offer equivalent access to the
278+ Corresponding Source in the same way through the same place at no
279+ further charge. You need not require recipients to copy the
280+ Corresponding Source along with the object code. If the place to
281+ copy the object code is a network server, the Corresponding Source
282+ may be on a different server (operated by you or a third party)
283+ that supports equivalent copying facilities, provided you maintain
284+ clear directions next to the object code saying where to find the
285+ Corresponding Source. Regardless of what server hosts the
286+ Corresponding Source, you remain obligated to ensure that it is
287+ available for as long as needed to satisfy these requirements.
288+
289+ e) Convey the object code using peer-to-peer transmission, provided
290+ you inform other peers where the object code and Corresponding
291+ Source of the work are being offered to the general public at no
292+ charge under subsection 6d.
293+
294+ A separable portion of the object code, whose source code is excluded
295+from the Corresponding Source as a System Library, need not be
296+included in conveying the object code work.
297+
298+ A "User Product" is either (1) a "consumer product", which means any
299+tangible personal property which is normally used for personal, family,
300+or household purposes, or (2) anything designed or sold for incorporation
301+into a dwelling. In determining whether a product is a consumer product,
302+doubtful cases shall be resolved in favor of coverage. For a particular
303+product received by a particular user, "normally used" refers to a
304+typical or common use of that class of product, regardless of the status
305+of the particular user or of the way in which the particular user
306+actually uses, or expects or is expected to use, the product. A product
307+is a consumer product regardless of whether the product has substantial
308+commercial, industrial or non-consumer uses, unless such uses represent
309+the only significant mode of use of the product.
310+
311+ "Installation Information" for a User Product means any methods,
312+procedures, authorization keys, or other information required to install
313+and execute modified versions of a covered work in that User Product from
314+a modified version of its Corresponding Source. The information must
315+suffice to ensure that the continued functioning of the modified object
316+code is in no case prevented or interfered with solely because
317+modification has been made.
318+
319+ If you convey an object code work under this section in, or with, or
320+specifically for use in, a User Product, and the conveying occurs as
321+part of a transaction in which the right of possession and use of the
322+User Product is transferred to the recipient in perpetuity or for a
323+fixed term (regardless of how the transaction is characterized), the
324+Corresponding Source conveyed under this section must be accompanied
325+by the Installation Information. But this requirement does not apply
326+if neither you nor any third party retains the ability to install
327+modified object code on the User Product (for example, the work has
328+been installed in ROM).
329+
330+ The requirement to provide Installation Information does not include a
331+requirement to continue to provide support service, warranty, or updates
332+for a work that has been modified or installed by the recipient, or for
333+the User Product in which it has been modified or installed. Access to a
334+network may be denied when the modification itself materially and
335+adversely affects the operation of the network or violates the rules and
336+protocols for communication across the network.
337+
338+ Corresponding Source conveyed, and Installation Information provided,
339+in accord with this section must be in a format that is publicly
340+documented (and with an implementation available to the public in
341+source code form), and must require no special password or key for
342+unpacking, reading or copying.
343+
344+ 7. Additional Terms.
345+
346+ "Additional permissions" are terms that supplement the terms of this
347+License by making exceptions from one or more of its conditions.
348+Additional permissions that are applicable to the entire Program shall
349+be treated as though they were included in this License, to the extent
350+that they are valid under applicable law. If additional permissions
351+apply only to part of the Program, that part may be used separately
352+under those permissions, but the entire Program remains governed by
353+this License without regard to the additional permissions.
354+
355+ When you convey a copy of a covered work, you may at your option
356+remove any additional permissions from that copy, or from any part of
357+it. (Additional permissions may be written to require their own
358+removal in certain cases when you modify the work.) You may place
359+additional permissions on material, added by you to a covered work,
360+for which you have or can give appropriate copyright permission.
361+
362+ Notwithstanding any other provision of this License, for material you
363+add to a covered work, you may (if authorized by the copyright holders of
364+that material) supplement the terms of this License with terms:
365+
366+ a) Disclaiming warranty or limiting liability differently from the
367+ terms of sections 15 and 16 of this License; or
368+
369+ b) Requiring preservation of specified reasonable legal notices or
370+ author attributions in that material or in the Appropriate Legal
371+ Notices displayed by works containing it; or
372+
373+ c) Prohibiting misrepresentation of the origin of that material, or
374+ requiring that modified versions of such material be marked in
375+ reasonable ways as different from the original version; or
376+
377+ d) Limiting the use for publicity purposes of names of licensors or
378+ authors of the material; or
379+
380+ e) Declining to grant rights under trademark law for use of some
381+ trade names, trademarks, or service marks; or
382+
383+ f) Requiring indemnification of licensors and authors of that
384+ material by anyone who conveys the material (or modified versions of
385+ it) with contractual assumptions of liability to the recipient, for
386+ any liability that these contractual assumptions directly impose on
387+ those licensors and authors.
388+
389+ All other non-permissive additional terms are considered "further
390+restrictions" within the meaning of section 10. If the Program as you
391+received it, or any part of it, contains a notice stating that it is
392+governed by this License along with a term that is a further
393+restriction, you may remove that term. If a license document contains
394+a further restriction but permits relicensing or conveying under this
395+License, you may add to a covered work material governed by the terms
396+of that license document, provided that the further restriction does
397+not survive such relicensing or conveying.
398+
399+ If you add terms to a covered work in accord with this section, you
400+must place, in the relevant source files, a statement of the
401+additional terms that apply to those files, or a notice indicating
402+where to find the applicable terms.
403+
404+ Additional terms, permissive or non-permissive, may be stated in the
405+form of a separately written license, or stated as exceptions;
406+the above requirements apply either way.
407+
408+ 8. Termination.
409+
410+ You may not propagate or modify a covered work except as expressly
411+provided under this License. Any attempt otherwise to propagate or
412+modify it is void, and will automatically terminate your rights under
413+this License (including any patent licenses granted under the third
414+paragraph of section 11).
415+
416+ However, if you cease all violation of this License, then your
417+license from a particular copyright holder is reinstated (a)
418+provisionally, unless and until the copyright holder explicitly and
419+finally terminates your license, and (b) permanently, if the copyright
420+holder fails to notify you of the violation by some reasonable means
421+prior to 60 days after the cessation.
422+
423+ Moreover, your license from a particular copyright holder is
424+reinstated permanently if the copyright holder notifies you of the
425+violation by some reasonable means, this is the first time you have
426+received notice of violation of this License (for any work) from that
427+copyright holder, and you cure the violation prior to 30 days after
428+your receipt of the notice.
429+
430+ Termination of your rights under this section does not terminate the
431+licenses of parties who have received copies or rights from you under
432+this License. If your rights have been terminated and not permanently
433+reinstated, you do not qualify to receive new licenses for the same
434+material under section 10.
435+
436+ 9. Acceptance Not Required for Having Copies.
437+
438+ You are not required to accept this License in order to receive or
439+run a copy of the Program. Ancillary propagation of a covered work
440+occurring solely as a consequence of using peer-to-peer transmission
441+to receive a copy likewise does not require acceptance. However,
442+nothing other than this License grants you permission to propagate or
443+modify any covered work. These actions infringe copyright if you do
444+not accept this License. Therefore, by modifying or propagating a
445+covered work, you indicate your acceptance of this License to do so.
446+
447+ 10. Automatic Licensing of Downstream Recipients.
448+
449+ Each time you convey a covered work, the recipient automatically
450+receives a license from the original licensors, to run, modify and
451+propagate that work, subject to this License. You are not responsible
452+for enforcing compliance by third parties with this License.
453+
454+ An "entity transaction" is a transaction transferring control of an
455+organization, or substantially all assets of one, or subdividing an
456+organization, or merging organizations. If propagation of a covered
457+work results from an entity transaction, each party to that
458+transaction who receives a copy of the work also receives whatever
459+licenses to the work the party's predecessor in interest had or could
460+give under the previous paragraph, plus a right to possession of the
461+Corresponding Source of the work from the predecessor in interest, if
462+the predecessor has it or can get it with reasonable efforts.
463+
464+ You may not impose any further restrictions on the exercise of the
465+rights granted or affirmed under this License. For example, you may
466+not impose a license fee, royalty, or other charge for exercise of
467+rights granted under this License, and you may not initiate litigation
468+(including a cross-claim or counterclaim in a lawsuit) alleging that
469+any patent claim is infringed by making, using, selling, offering for
470+sale, or importing the Program or any portion of it.
471+
472+ 11. Patents.
473+
474+ A "contributor" is a copyright holder who authorizes use under this
475+License of the Program or a work on which the Program is based. The
476+work thus licensed is called the contributor's "contributor version".
477+
478+ A contributor's "essential patent claims" are all patent claims
479+owned or controlled by the contributor, whether already acquired or
480+hereafter acquired, that would be infringed by some manner, permitted
481+by this License, of making, using, or selling its contributor version,
482+but do not include claims that would be infringed only as a
483+consequence of further modification of the contributor version. For
484+purposes of this definition, "control" includes the right to grant
485+patent sublicenses in a manner consistent with the requirements of
486+this License.
487+
488+ Each contributor grants you a non-exclusive, worldwide, royalty-free
489+patent license under the contributor's essential patent claims, to
490+make, use, sell, offer for sale, import and otherwise run, modify and
491+propagate the contents of its contributor version.
492+
493+ In the following three paragraphs, a "patent license" is any express
494+agreement or commitment, however denominated, not to enforce a patent
495+(such as an express permission to practice a patent or covenant not to
496+sue for patent infringement). To "grant" such a patent license to a
497+party means to make such an agreement or commitment not to enforce a
498+patent against the party.
499+
500+ If you convey a covered work, knowingly relying on a patent license,
501+and the Corresponding Source of the work is not available for anyone
502+to copy, free of charge and under the terms of this License, through a
503+publicly available network server or other readily accessible means,
504+then you must either (1) cause the Corresponding Source to be so
505+available, or (2) arrange to deprive yourself of the benefit of the
506+patent license for this particular work, or (3) arrange, in a manner
507+consistent with the requirements of this License, to extend the patent
508+license to downstream recipients. "Knowingly relying" means you have
509+actual knowledge that, but for the patent license, your conveying the
510+covered work in a country, or your recipient's use of the covered work
511+in a country, would infringe one or more identifiable patents in that
512+country that you have reason to believe are valid.
513+
514+ If, pursuant to or in connection with a single transaction or
515+arrangement, you convey, or propagate by procuring conveyance of, a
516+covered work, and grant a patent license to some of the parties
517+receiving the covered work authorizing them to use, propagate, modify
518+or convey a specific copy of the covered work, then the patent license
519+you grant is automatically extended to all recipients of the covered
520+work and works based on it.
521+
522+ A patent license is "discriminatory" if it does not include within
523+the scope of its coverage, prohibits the exercise of, or is
524+conditioned on the non-exercise of one or more of the rights that are
525+specifically granted under this License. You may not convey a covered
526+work if you are a party to an arrangement with a third party that is
527+in the business of distributing software, under which you make payment
528+to the third party based on the extent of your activity of conveying
529+the work, and under which the third party grants, to any of the
530+parties who would receive the covered work from you, a discriminatory
531+patent license (a) in connection with copies of the covered work
532+conveyed by you (or copies made from those copies), or (b) primarily
533+for and in connection with specific products or compilations that
534+contain the covered work, unless you entered into that arrangement,
535+or that patent license was granted, prior to 28 March 2007.
536+
537+ Nothing in this License shall be construed as excluding or limiting
538+any implied license or other defenses to infringement that may
539+otherwise be available to you under applicable patent law.
540+
541+ 12. No Surrender of Others' Freedom.
542+
543+ If conditions are imposed on you (whether by court order, agreement or
544+otherwise) that contradict the conditions of this License, they do not
545+excuse you from the conditions of this License. If you cannot convey a
546+covered work so as to satisfy simultaneously your obligations under this
547+License and any other pertinent obligations, then as a consequence you may
548+not convey it at all. For example, if you agree to terms that obligate you
549+to collect a royalty for further conveying from those to whom you convey
550+the Program, the only way you could satisfy both those terms and this
551+License would be to refrain entirely from conveying the Program.
552+
553+ 13. Use with the GNU Affero General Public License.
554+
555+ Notwithstanding any other provision of this License, you have
556+permission to link or combine any covered work with a work licensed
557+under version 3 of the GNU Affero General Public License into a single
558+combined work, and to convey the resulting work. The terms of this
559+License will continue to apply to the part which is the covered work,
560+but the special requirements of the GNU Affero General Public License,
561+section 13, concerning interaction through a network will apply to the
562+combination as such.
563+
564+ 14. Revised Versions of this License.
565+
566+ The Free Software Foundation may publish revised and/or new versions of
567+the GNU General Public License from time to time. Such new versions will
568+be similar in spirit to the present version, but may differ in detail to
569+address new problems or concerns.
570+
571+ Each version is given a distinguishing version number. If the
572+Program specifies that a certain numbered version of the GNU General
573+Public License "or any later version" applies to it, you have the
574+option of following the terms and conditions either of that numbered
575+version or of any later version published by the Free Software
576+Foundation. If the Program does not specify a version number of the
577+GNU General Public License, you may choose any version ever published
578+by the Free Software Foundation.
579+
580+ If the Program specifies that a proxy can decide which future
581+versions of the GNU General Public License can be used, that proxy's
582+public statement of acceptance of a version permanently authorizes you
583+to choose that version for the Program.
584+
585+ Later license versions may give you additional or different
586+permissions. However, no additional obligations are imposed on any
587+author or copyright holder as a result of your choosing to follow a
588+later version.
589+
590+ 15. Disclaimer of Warranty.
591+
592+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
593+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
594+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
595+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
596+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
597+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
598+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
599+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
600+
601+ 16. Limitation of Liability.
602+
603+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
604+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
605+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
606+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
607+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
608+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
609+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
610+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
611+SUCH DAMAGES.
612+
613+ 17. Interpretation of Sections 15 and 16.
614+
615+ If the disclaimer of warranty and limitation of liability provided
616+above cannot be given local legal effect according to their terms,
617+reviewing courts shall apply local law that most closely approximates
618+an absolute waiver of all civil liability in connection with the
619+Program, unless a warranty or assumption of liability accompanies a
620+copy of the Program in return for a fee.
621+
622+ END OF TERMS AND CONDITIONS
623+
624+ How to Apply These Terms to Your New Programs
625+
626+ If you develop a new program, and you want it to be of the greatest
627+possible use to the public, the best way to achieve this is to make it
628+free software which everyone can redistribute and change under these terms.
629+
630+ To do so, attach the following notices to the program. It is safest
631+to attach them to the start of each source file to most effectively
632+state the exclusion of warranty; and each file should have at least
633+the "copyright" line and a pointer to where the full notice is found.
634+
635+ <one line to give the program's name and a brief idea of what it does.>
636+ Copyright (C) <year> <name of author>
637+
638+ This program is free software: you can redistribute it and/or modify
639+ it under the terms of the GNU General Public License as published by
640+ the Free Software Foundation, either version 3 of the License, or
641+ (at your option) any later version.
642+
643+ This program is distributed in the hope that it will be useful,
644+ but WITHOUT ANY WARRANTY; without even the implied warranty of
645+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
646+ GNU General Public License for more details.
647+
648+ You should have received a copy of the GNU General Public License
649+ along with this program. If not, see <http://www.gnu.org/licenses/>.
650+
651+Also add information on how to contact you by electronic and paper mail.
652+
653+ If the program does terminal interaction, make it output a short
654+notice like this when it starts in an interactive mode:
655+
656+ <program> Copyright (C) <year> <name of author>
657+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
658+ This is free software, and you are welcome to redistribute it
659+ under certain conditions; type `show c' for details.
660+
661+The hypothetical commands `show w' and `show c' should show the appropriate
662+parts of the General Public License. Of course, your program's commands
663+might be different; for a GUI interface, you would use an "about box".
664+
665+ You should also get your employer (if you work as a programmer) or school,
666+if any, to sign a "copyright disclaimer" for the program, if necessary.
667+For more information on this, and how to apply and follow the GNU GPL, see
668+<http://www.gnu.org/licenses/>.
669+
670+ The GNU General Public License does not permit incorporating your program
671+into proprietary programs. If your program is a subroutine library, you
672+may consider it more useful to permit linking proprietary applications with
673+the library. If this is what you want to do, use the GNU Lesser General
674+Public License instead of this License. But first, please read
675+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
--- trunk/idsgrep/Makefile.am (nonexistent)
+++ trunk/idsgrep/Makefile.am (revision 204)
@@ -0,0 +1,46 @@
1+## Process this file with automake to produce Makefile.in
2+
3+ACLOCAL_AMFLAGS = -I m4
4+
5+mvp:=$(if $(VPATH),$(VPATH),.)
6+
7+if COND_KANJIVG
8+ MAYBE_KVDATA=kanjivg.eids
9+endif
10+if COND_TSUKU_BUILD
11+ MAYBE_TSUKUDATA=tsukurimashou.eids
12+endif
13+
14+bin_PROGRAMS = idsgrep
15+
16+CLEANFILES = kanjivg.eids tsukurimashou.eids
17+
18+dict_DATA = $(MAYBE_KVDATA) $(MAYBE_TSUKUDATA)
19+
20+dist_noinst_SCRIPTS = kvg2eids
21+
22+dist_pdf_DATA = idsgrep.pdf
23+
24+EXTRA_DIST = gnugetopt.h idsgrep.tex
25+
26+LDADD = @LIBOBJS@
27+
28+man1_MANS = idsgrep.1
29+
30+idsgrep.pdf: idsgrep.tex
31+ $(XELATEX) idsgrep
32+
33+kanjivg.eids: @with_kanjivg@ kvg2eids
34+ if $(PERL) \
35+ -e 'read STDIN,$$_,2;exit 1 if unpack("n",$$_)!=8075' \
36+ < @with_kanjivg@ ; \
37+ then $(GZIP) -cd @with_kanjivg@ \
38+ | $(PERL) -CS $(mvp)/kvg2eids > kanjivg.eids ; \
39+ else $(PERL) -CS $(mvp)/kvg2eids < @with_kanjivg@ > kanjivg.eids ; fi
40+
41+@with_tsuku_build@/txt/tsukurimashou.eids: Makefile
42+ cd @with_tsuku_build@ ; $(MAKE) $(AM_MAKEFLAGS) eids
43+
44+tsukurimashou.eids: @with_tsuku_build@/txt/tsukurimashou.eids
45+ $(LN_S) -f @with_tsuku_build@/txt/tsukurimashou.eids \
46+ tsukurimashou.eids
--- trunk/idsgrep/idsgrep.tex (nonexistent)
+++ trunk/idsgrep/idsgrep.tex (revision 204)
@@ -0,0 +1,279 @@
1+\documentclass{article}
2+
3+\usepackage{fontspec}
4+
5+\title{The \texttt{idsgrep} utility}
6+\author{Matthew Skala}
7+
8+\setmonofont[Path=../otf/]{TsukurimashouMincho}
9+
10+\begin{document}
11+
12+\maketitle
13+
14+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
15+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16+
17+\section{Simplified usage}
18+
19+Use \texttt{idsgrep} much as you would use \texttt{grep}:
20+
21+\begin{quotation}
22+ \texttt{idsgrep} \textit{$\langle$pattern$\rangle$}
23+ \textit{$[\langle$file$\rangle\ldots ]$}
24+\end{quotation}
25+
26+If no files are specified, standard input
27+will be read instead. Each line\footnote{Not really. It's technically
28+each Extended Ideographic Description Sequence; but those are expected
29+to correspond to lines.} in the input will be checked against
30+\textit{$\langle$pattern$\rangle$} and printed if it matches.
31+
32+The syntax for matching patterns is complicated. The next section
33+describes it in more detail than most users will really want; here are
34+some examples illustrating commonly-expected cases, assuming a
35+dictionary of kanji and their decompositions.
36+
37+\begin{itemize}
38+ \item[\texttt{idsgrep 萌 dictionary}] A literal character searches
39+ for the decomposition of that character, exact match only.
40+ \item[\texttt{idsgrep ...日 dictionary}] Three dots match their
41+ argument anywhere, so this will match \texttt{日}, \texttt{早}, and
42+ \texttt{萌}.
43+ \item[\texttt{idsgrep ? dictionary}] A question mark matches
44+ anything (most useful as part of a more complex pattern).
45+ \item[\texttt{idsgrep ⿱?心 dictionary}] Unicode Ideographic
46+ Description Characters can be used to build up sequences that also
47+ incorporate the wildcards; this example matches characters
48+ consisting of something above \texttt{心}, such as \texttt{忽} and
49+ \texttt{恋} but not \texttt{応}.
50+ \item[\texttt{idsgrep '[tb]?心'} dictionary] There are ASCII aliases
51+ for operators that may be inconvenient to type; this query is
52+ functionally the same as the previous one.
53+\end{itemize}
54+
55+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
56+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
57+
58+\section{Detailed reference}
59+
60+This system is best understood as three interconnected major concepts:
61+\begin{itemize}
62+ \item an abstract data structure;
63+ \item a syntax for expressing instances of the data structure as
64+ ``Extended Ideographic Description Sequences'' (EIDSes);
65+ \item a function for determining whether two instances of the data
66+ structure ``match.''
67+\end{itemize}
68+
69+Then the basic function of \texttt{idsgrep} is to take one EIDS as a
70+matching pattern, scan a file containing many more, and write out the ones
71+that match the matching pattern. The three major concepts are described,
72+one each, in the following subsections.
73+
74+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
75+
76+\subsection{The data structure}
77+
78+An \emph{EIDS tree} consists of the following:
79+
80+\begin{itemize}
81+ \item An optional \emph{head}, which if present consists of a nonempty
82+ string of Unicode characters.
83+ \item A required \emph{functor}, which is a nonempty string of Unicode
84+ characters.
85+ \item A required \emph{arity}, which is an integer from 0 to 3 inclusive.
86+ \item A sequence of \emph{children}, of length equal to the arity (no
87+ children if arity is zero). Each child is, recursively, an EIDS tree.
88+\end{itemize}
89+
90+Trees with arity zero, one, two, and three are called, respectively,
91+nullary, unary, binary, and ternary.
92+
93+Note that these ``nonempty strings of Unicode characters'' will very often
94+tend to be of length one (single characters) but that is not a requirement.
95+They cannot be empty (length zero); the case of a tree without a head is
96+properly described by ``there is no head,'' not by ``the head is the empty
97+string.'' \emph{At present} no Unicode canonicalization is performed, that
98+being left to the user, but this may change in the future.
99+
100+Typically, these trees are used to describe kanji characters. The literal
101+Unicode character being described will be the head, if there is a code point
102+for it; the functor will be either an ideographic description character like
103+\texttt{⿱} if the character can be subdivided, or else nullary \texttt{;}
104+if not. Then the children will correspond to the parts into which it can be
105+decomposed.
106+
107+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
108+
109+\subsection{EIDS syntax}
110+
111+EIDS trees are written in a simple prefix notation that could be called
112+``Polish notation'' inasmuch as it is the reverse of ``reverse Polish
113+notation.'' To write a tree, simply write the head if there is one, the
114+functor, and then if the tree is not nullary, write each of the children.
115+Heads and the functors of trees of different arity are (unless otherwise
116+specified below) written enclosed in different kinds of brackets that
117+indicate the difference between heads and functors, and the arity of the
118+tree when writing a functor.
119+
120+The basic ASCII brackets for heads and functors are as follows:
121+
122+\hspace*{\fill}
123+\begin{tabular}{cccc}
124+ head & \texttt{<} & \texttt{>} & \texttt{<example>} \\
125+ nullary functor (0) & \texttt{(} & \texttt{)} & \texttt{(example)} \\
126+ unary functor (1) & \texttt{.} & \texttt{.} & \texttt{.example.} \\
127+ binary functor (2) & \texttt{[} & \texttt{]} & \texttt{[example]} \\
128+ ternary functor (3) & \texttt{\{} & \texttt{\}} & \texttt{\{example\}}
129+\end{tabular}
130+\hspace*{\fill}\par
131+
132+Note that the opening and closing brackets for unary functors are both equal
133+to the ASCII period, U+002E.
134+
135+Parsing of bracketed strings has a few special features. First, there is no
136+special treatment of nested brackets. After the ``\texttt{<}'' that begins
137+a head, for instance, the next ``\texttt{>}'' will end the head, regardless
138+of how many other instances of ``\texttt{<}'' have been seen. However,
139+because no head or functor can be less than one character long, a closing
140+bracket immediately after the opening bracket (which would otherwise create
141+an illegal empty string) is specially treated as the first
142+character of the string and \emph{not} as a closing bracket. Thus,
143+``\texttt{())}'' is legal syntax for a functor equal to a closing
144+parenthesis, in a nullary tree; and ``\texttt{...}'' is a functor equal to a
145+single ASCII period in a unary tree.
146+
147+Each pair of ASCII brackets also has two pairs of non-ASCII synonyms, as
148+follows:
149+
150+{\ttfamily\hspace*{\fill}
151+\begin{tabular}{cccccc}
152+ <&>&【&】&〖&〗\\
153+ (&)&(&)&⦅&⦆\\
154+ .&.&・&・&〜&〜\\\relax
155+ [&]&[&]&〚&〛\\
156+ \{&\}&〔&〕&〘&〙
157+\end{tabular}
158+\hspace*{\fill}\par}
159+
160+As in the ASCII case, the closing synonymous brackets for functors of unary
161+trees are identical to the opening brackets. A string may be opened by any
162+of the three opening bracket characters for that type of string; then it
163+must be closed by the closing bracket character that goes with that opening
164+bracket. Brackets from other pairs are counted as part of the string. For
165+instance, ``\texttt{【<example>】}'' is a head whose value consists of
166+``\texttt{<example>}'' including the ASCII angle brackets. There are
167+several reasons for the existence of the synonyms:
168+
169+\begin{itemize}
170+ \item They look cool.
171+ \item There is an established tradition of using \texttt{【}lenticular
172+ brackets\texttt{】} for heads in printed dictionaries, which is exactly
173+ their meaning here.
174+ \item Allowing more than one way to bracket each kind of string makes it
175+ easier to express bracket characters that may occur literally in a string.
176+ \item The non-ASCII brackets may be easier to type without switching modes
177+ in some input methods.
178+ \item On the other hand, keeping an ASCII option for every bracket type
179+ allows matching patterns to be entered on ASCII-only terminals.
180+ \item Multiple bracket types allow for creating human-visible
181+ computer-invisible distinctions in dictionary files, for instance to
182+ flag pseudo-entries that contain metadata, without needing to create a
183+ special syntax for comments.
184+\end{itemize}
185+
186+If a character other than an opening bracket occurs in an EIDS where an
187+opening bracket would be expected, it is treated in one of three ways.
188+
189+\begin{itemize}
190+ \item ASCII whitespace and control characters, U+0000 to U+0020 inclusive,
191+ are ignored. In the future, this treatment might be extended to
192+ non-ASCII Unicode whitespace characters, which are best avoided because
193+ of the uncertainty.
194+ \item Some special characters, such as ``\texttt{⿰},'' have ``sugary
195+ implicit brackets.'' If one of these characters appears outside of
196+ brackets, it will be interpreted as a functor whose value is a
197+ single-character string equal to the literal character, and a fixed
198+ arity that depends on which character it is. For instance,
199+ ``\texttt{⿰}'' and ``\texttt{[⿰]}'' will be parsed identically.
200+ A list of characters getting this treatment is below.
201+ \item Any other non-bracket character has a ``syrupy implicit semicolon.''
202+ That means it will be interpreted as a complete nullary tree with
203+ a single-character head equal to the literal character, and a
204+ single semicolon as the functor. For instance, ``\texttt{x}'' and
205+ ``\texttt{<x>(;)}'' will be parsed identically. Because semicolon
206+ itself has sugary implicit nullary brackets, we could also write
207+ ``\texttt{<x>;}'' for the same effect.
208+\end{itemize}
209+
210+Here are all the characters that have sugary implicit brackets, with the
211+brackets they imply: {\ttfamily (;) (?) .!. .=. .*. .@. [\&] [|] [⿰]
212+[⿱] [⿴] [⿵] [⿶] [⿷] [⿸] [⿹] [⿺] [⿻] \{⿲\} \{⿳\}}
213+
214+Note that the sugary and syrupy implications of a character are only
215+relevant when the character occurs where an opening bracket of some
216+type would otherwise be required; inside a bracketed string,
217+characters behave normally.
218+
219+It is planned that in the future, \texttt{idsgrep}'s parser will also
220+recognize some backslash escape sequences. This is not yet
221+implemented.
222+
223+Although it is technically not a parsing issue but rather a
224+transformation applied to the tree after parsing, there is one more
225+issue to mention: some functors have aliases. If a functor and arity
226+matches one of the aliases on the following list, it will be replaced
227+with the indicated single-character functor. The idea is to provide
228+verbose ASCII names for single-character functors of special
229+importance to the matching algorithm. Note that the single-character
230+versions are always the canonical ones, and (although the brackets are
231+shown explicitly for clarity) these all have sugary implicit brackets.
232+
233+\texttt{(anything)} $\rightarrow$ \texttt{(?)}
234+
235+\texttt{.anywhere.} $\rightarrow$ \texttt{...}
236+
237+\texttt{.not.} $\rightarrow$ \texttt{.!.}
238+
239+\texttt{.equal.} $\rightarrow$ \texttt{.=.}
240+
241+\texttt{.unord.} $\rightarrow$ \texttt{.*.}
242+
243+\texttt{.assoc.} $\rightarrow$ \texttt{.@.}
244+
245+\texttt{[and]} $\rightarrow$ \texttt{[\&]}
246+
247+\texttt{[or]} $\rightarrow$ \texttt{[|]}
248+
249+\texttt{[lr]} $\rightarrow$ \texttt{[⿰]}
250+
251+\texttt{[tb]} $\rightarrow$ \texttt{[⿱]}
252+
253+\texttt{[FIXME]} $\rightarrow$ \texttt{[⿴]}
254+
255+\texttt{[FIXME]} $\rightarrow$ \texttt{[⿵]}
256+
257+\texttt{[FIXME]} $\rightarrow$ \texttt{[⿶]}
258+
259+\texttt{[FIXME]} $\rightarrow$ \texttt{[⿷]}
260+
261+\texttt{[FIXME]} $\rightarrow$ \texttt{[⿸]}
262+
263+\texttt{[FIXME]} $\rightarrow$ \texttt{[⿹]}
264+
265+\texttt{[FIXME]} $\rightarrow$ \texttt{[⿺]}
266+
267+\texttt{[FIXME]} $\rightarrow$ \texttt{[⿻]}
268+
269+\texttt{[lcr]} $\rightarrow$ \texttt{\{⿲\}}
270+
271+\texttt{[tcb]} $\rightarrow$ \texttt{\{⿳\}}
272+
273+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
274+
275+\subsection{Matching}
276+
277+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
278+
279+\end{document}
--- trunk/idsgrep/idsgrep.c (nonexistent)
+++ trunk/idsgrep/idsgrep.c (revision 204)
@@ -0,0 +1,936 @@
1+/*
2+ * Extended IDS matcher
3+ * Copyright (C) 2012 Matthew Skala
4+ *
5+ * This program is free software: you can redistribute it and/or modify
6+ * it under the terms of the GNU General Public License as published by
7+ * the Free Software Foundation, version 3.
8+ *
9+ * This program is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+ * GNU General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU General Public License
15+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16+ *
17+ * Matthew Skala
18+ * http://ansuz.sooke.bc.ca/
19+ * mskala@ansuz.sooke.bc.ca
20+ */
21+
22+#include <stdio.h>
23+#include <stdlib.h>
24+#include <string.h>
25+
26+#include "config.h"
27+#include "getopt.h"
28+
29+/**********************************************************************/
30+
31+typedef struct _NODE *(*MATCH_FN)(struct _NODE *);
32+
33+typedef enum _MATCH_RESULT {
34+ MR_INITIAL=0,
35+ MR_FALSE,
36+ MR_TRUE,
37+ MR_AND_MAYBE,
38+ MR_OR_MAYBE,
39+ MR_NOT_MAYBE,
40+} MATCH_RESULT;
41+
42+typedef struct _HASHED_STRING {
43+ struct _HASHED_STRING *next,*mate,*canonical;
44+ char *data;
45+ size_t length;
46+ int refs,arity;
47+ MATCH_FN match_fn;
48+} HASHED_STRING;
49+
50+typedef struct _NODE {
51+ HASHED_STRING *head,*functor;
52+ struct _NODE *child[3],*match_parent;
53+ int refs,arity,complete;
54+ MATCH_RESULT match_result;
55+} NODE;
56+
57+typedef enum _PARSE_STATE {
58+ PS_ERROR=-5,
59+ PS_SEEKING_FUNCTOR=-4,
60+ PS_COMPLETE_TREE=-3,
61+ PS_SEEKING_HEAD=-2,
62+ PS_READING_HEAD=-1,
63+ PS_READING_NULLARY=0,
64+ PS_READING_UNARY,
65+ PS_READING_BINARY,
66+ PS_READING_TERNARY,
67+} PARSE_STATE;
68+
69+/**********************************************************************/
70+
71+#define NUM_BITS (sizeof(size_t)*8)
72+
73+HASHED_STRING *free_strings[NUM_BITS]={
74+#if SIZEOF_INT>=8
75+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
76+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
77+#endif
78+#if SIZEOF_INT>=6
79+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
80+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
81+#endif
82+#if SIZEOF_INT>=4
83+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
84+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
85+#endif
86+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
87+ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL
88+};
89+
90+NODE *default_match_fn(NODE *);
91+
92+HASHED_STRING *alloc_string(size_t len) {
93+ int i=1;
94+ HASHED_STRING *rval;
95+ char *save_data;
96+
97+ while ((((size_t)1)<<i)<len) i++;
98+ if (free_strings[i]==NULL) {
99+ rval=(HASHED_STRING *)malloc(sizeof(HASHED_STRING));
100+ memset(rval,0,sizeof(HASHED_STRING));
101+ rval->data=(char *)malloc(((size_t)1)<<i);
102+ } else {
103+ rval=free_strings[i];
104+ free_strings[i]=rval->next;
105+ save_data=rval->data;
106+ memset(rval,0,sizeof(HASHED_STRING));
107+ rval->data=save_data;
108+ }
109+ rval->length=len;
110+ rval->arity=-2;
111+ rval->match_fn=default_match_fn;
112+ return rval;
113+}
114+
115+void free_string(HASHED_STRING *s) {
116+ int i=1;
117+
118+ while ((((size_t)1)<<i)<s->length) i++;
119+ s->next=free_strings[i];
120+ free_strings[i]=s;
121+}
122+
123+#define MIN_HTABLE 100
124+
125+HASHED_STRING **hash_table=NULL;
126+int hash_table_size=0;
127+int hash_table_occupancy=0;
128+
129+unsigned int hash_function(size_t len,char *s) {
130+ unsigned int rval=1;
131+ size_t i;
132+
133+ for (i=0;i<len;i++) {
134+ rval=(rval<<1)^rval^(rval>>1)^(unsigned int)(s[i]);
135+ }
136+ return rval;
137+}
138+
139+HASHED_STRING *new_string(size_t len,char *s) {
140+ unsigned int h,tmph;
141+ int i;
142+ HASHED_STRING *tmps;
143+ HASHED_STRING **new_table;
144+
145+ /* make sure we have a table to begin with */
146+ if (hash_table_size==0) {
147+ hash_table=(HASHED_STRING **)malloc(sizeof(HASHED_STRING *)*MIN_HTABLE);
148+ hash_table_size=MIN_HTABLE;
149+ for (i=0;i<hash_table_size;i++)
150+ hash_table[i]=NULL;
151+ }
152+
153+ /* search for the string in the hash table */
154+ h=hash_function(len,s);
155+ for (tmps=hash_table[h%hash_table_size];
156+ tmps && ((tmps->length!=len) || memcmp(tmps->data,s,len));
157+ tmps=tmps->next);
158+
159+ /* if found, add a reference and we're done */
160+ if (tmps) {
161+ tmps->refs++;
162+
163+ /* otherwise, it'll be a new entry */
164+ } else {
165+
166+ /* first, deal with expanding the table */
167+ if (hash_table_occupancy>2*hash_table_size) {
168+ hash_table_size*=2;
169+ new_table=(HASHED_STRING **)
170+ malloc(sizeof(HASHED_STRING *)*hash_table_size);
171+ for (i=0;i<hash_table_size;i++)
172+ new_table[i]=NULL;
173+ for (i=0;i<(hash_table_size/2);i++)
174+ while (hash_table[i]) {
175+ tmps=hash_table[i];
176+ hash_table[i]=tmps->next;
177+ tmph=hash_function(tmps->length,tmps->data)%hash_table_size;
178+ tmps->next=new_table[tmph];
179+ new_table[tmph]=tmps;
180+ }
181+ free(hash_table);
182+ hash_table=new_table;
183+ }
184+
185+ /* actually add the new entry */
186+ tmps=alloc_string(len);
187+ memcpy(tmps->data,s,len);
188+ tmps->length=len;
189+ tmps->refs=1;
190+ tmps->next=hash_table[h%hash_table_size];
191+ hash_table[h%hash_table_size]=tmps;
192+ hash_table_occupancy++;
193+ }
194+
195+ return tmps;
196+}
197+
198+void delete_string(HASHED_STRING *s) {
199+ unsigned int h;
200+ HASHED_STRING *tmps;
201+
202+ s->refs--;
203+ if (s->refs==0) {
204+ h=hash_function(s->length,s->data)%hash_table_size;
205+ if (hash_table[h]==s) {
206+ hash_table[h]=s->next;
207+ } else {
208+ for (tmps=hash_table[h];tmps->next!=s;tmps=tmps->next);
209+ tmps->next=s->next;
210+ }
211+ free_string(s);
212+ hash_table_occupancy--;
213+
214+ /* FIXME handle shrinking table */
215+ }
216+}
217+
218+/**********************************************************************/
219+
220+NODE *free_nodes=NULL;
221+
222+NODE *new_node(void) {
223+ NODE *rval;
224+
225+ if (free_nodes) {
226+ rval=free_nodes;
227+ free_nodes=rval->child[0];
228+ } else {
229+ rval=(NODE *)malloc(sizeof(NODE));
230+ }
231+ memset(rval,0,sizeof(NODE));
232+ rval->refs=1;
233+ return rval;
234+}
235+
236+void free_node(NODE *n) {
237+ NODE *tmpn,*to_free=NULL;
238+ int i;
239+
240+ n->refs--;
241+ if (n->refs==0) {
242+ n->match_parent=to_free;
243+ to_free=n;
244+ }
245+
246+ while (to_free) {
247+ tmpn=to_free;
248+ to_free=tmpn->match_parent;
249+
250+ for (i=0;i<4;i++)
251+ if (tmpn->child[i]!=NULL) {
252+ tmpn->child[i]->refs--;
253+ if (tmpn->child[i]->refs==0) {
254+ tmpn->child[i]->match_parent=to_free;
255+ to_free=tmpn->child[i];
256+ }
257+ }
258+
259+ if (tmpn->head!=NULL)
260+ delete_string(tmpn->head);
261+ if (tmpn->functor!=NULL)
262+ delete_string(tmpn->functor);
263+
264+/* tmpn->child[0]=free_nodes;
265+ free_nodes=tmpn; */
266+ free(tmpn);
267+ }
268+}
269+
270+/**********************************************************************/
271+
272+NODE **parse_stack=NULL;
273+int stack_size=0,stack_ptr=0;
274+char *partstr=NULL;
275+int partstr_size=0,partstr_len=0,parse_state=PS_SEEKING_HEAD;
276+HASHED_STRING *close_bracket,*semicolon,*socked_head=NULL;
277+
278+size_t parse(size_t len,char *inp) {
279+ int offs=0,clen,escaped,flag;
280+ char ebuf[4];
281+ HASHED_STRING *hchar,*newstr,*tmps;
282+
283+ /* can't parse if we are in an error state */
284+ if (parse_state==PS_ERROR)
285+ return 0;
286+
287+ /* reset state if the tree has been consumed */
288+ if ((stack_ptr==0) && (parse_state==PS_COMPLETE_TREE))
289+ parse_state=PS_SEEKING_HEAD;
290+
291+ /* make sure we have a buffer */
292+ if (partstr==NULL) {
293+ partstr_size=1024;
294+ partstr=(char *)malloc(partstr_size);
295+ }
296+
297+ /* make sure we have a stack */
298+ if (parse_stack==NULL) {
299+ stack_size=16;
300+ parse_stack=(NODE **)malloc(sizeof(NODE *)*stack_size);
301+ }
302+
303+ /* while we have input and no other reason to stop */
304+ while (offs<len) {
305+
306+ /* validate UTF-8 or escaped character */
307+ escaped=0;
308+/* if (inp[offs]=='\\') {
309+ if (len-offs<2)
310+ return offs;
311+
312+FIXME
313+
314+ } else */ if ((inp[offs]&0x80)==0) {
315+ /* single-byte ASCII */
316+ clen=1;
317+
318+ } else if ((inp[offs]&0xC0)==0x80) {
319+ /* continuation byte, should never be first */
320+ offs++;
321+ continue;
322+
323+ } else if ((inp[offs]&0xE0)==0xC0) {
324+ /* first of two */
325+
326+ /* check for rest of char */
327+ if (len-offs<2)
328+ return offs;
329+
330+ /* check for continuation */
331+ if ((inp[offs+1]&0xC0)!=0x80) {
332+ offs++;
333+ continue;
334+ }
335+
336+ /* check for overlong */
337+ if ((inp[offs]&0xFE)==0xC0) {
338+ offs+=2;
339+ continue;
340+ }
341+ clen=2;
342+
343+ } else if ((inp[offs]&0xF0)==0xE0) {
344+ /* first of three bytes */
345+
346+ /* check for rest of char */
347+ if (len-offs<3)
348+ return offs;
349+
350+ /* check for continuation */
351+ if ((inp[offs+1]&0xC0)!=0x80) {
352+ offs++;
353+ continue;
354+ }
355+
356+ /* check for overlong */
357+ if ((inp[offs]&0xFE)==0xC0) {
358+ offs+=2;
359+ continue;
360+ }
361+ clen=3;
362+
363+ } else if ((inp[offs]&0xF8)==0xF0) {
364+ /* first of four bytes */
365+
366+ /* check for rest of char */
367+ if (len-offs<4)
368+ return offs;
369+
370+ /* check for continuation */
371+ if ((inp[offs+1]&0xC0)!=0x80) {
372+ offs++;
373+ continue;
374+ }
375+
376+ /* check for overlong */
377+ if ((inp[offs]&0xFE)==0xC0) {
378+ offs+=2;
379+ continue;
380+ }
381+ clen=4;
382+
383+ } else {
384+ /* invalid byte */
385+ offs++;
386+ continue;
387+ }
388+
389+ /* see if we are waiting for an opening bracket */
390+ if (parse_state<PS_READING_HEAD) {
391+
392+ /* skip unescaped whitespace in this context */
393+ if ((clen==1) && (inp[offs]<=0x20) && !escaped) {
394+ offs++;
395+ continue;
396+ }
397+
398+ /* look up the character */
399+ hchar=new_string(clen,inp+offs);
400+
401+ /* can't open a head if we aren't looking for one */
402+ if ((parse_state!=PS_SEEKING_HEAD) && (hchar->arity==-1) &&
403+ !escaped) {
404+ parse_state=PS_ERROR;
405+ delete_string(hchar);
406+ return offs;
407+ }
408+
409+ /* can we open a string? */
410+ if ((hchar->arity>=-1) && (hchar->mate!=NULL) && !escaped) {
411+
412+ /* yes; open the string */
413+ close_bracket=hchar->mate;
414+ offs+=clen;
415+ parse_state=(PARSE_STATE)hchar->arity;
416+ delete_string(hchar);
417+
418+ /* is this a special character that makes its own functor? */
419+ } else if ((hchar->arity>=-1) && !escaped) {
420+
421+ /* put it in the string, then re-read it as closing bracket */
422+ close_bracket=hchar;
423+ parse_state=(PARSE_STATE)hchar->arity;
424+ memcpy(partstr,inp+offs,clen);
425+ partstr_len=clen;
426+ delete_string(hchar);
427+ continue;
428+
429+ } else {
430+ /* no; this becomes head of nullary semicolon */
431+
432+ /* and we had better be in state -2 */
433+ if (parse_state!=PS_SEEKING_HEAD) {
434+ parse_state=PS_ERROR;
435+ delete_string(hchar);
436+ return offs;
437+ }
438+
439+ /* create a new node */
440+ parse_stack[stack_ptr]=new_node();
441+ parse_stack[stack_ptr]->head=hchar;
442+ parse_stack[stack_ptr]->functor=semicolon;
443+ parse_stack[stack_ptr]->arity=0;
444+ parse_stack[stack_ptr]->complete=1;
445+ semicolon->refs++;
446+ stack_ptr++;
447+
448+ /* and now we can look for a new head */
449+ offs+=clen;
450+ if (stack_ptr==1) {
451+ parse_state=PS_COMPLETE_TREE;
452+ return offs;
453+ } else
454+ parse_state=PS_SEEKING_HEAD;
455+ }
456+
457+ } else {
458+ /* we are waiting for a closing bracket */
459+
460+ /* look up the character */
461+ hchar=new_string(clen,inp+offs);
462+
463+ /* unescaped matching bracket ends it */
464+ if ((hchar==close_bracket) && (!escaped) && (partstr_len>0)) {
465+
466+ /* hash the partial string */
467+ newstr=new_string(partstr_len,partstr);
468+ partstr_len=0;
469+
470+ /* replace with canonical if any */
471+ while (newstr->canonical!=NULL) {
472+ tmps=newstr->canonical;
473+ delete_string(newstr);
474+ newstr=tmps;
475+ }
476+
477+ /* if that was a head, sock it away, look for a functor */
478+ if (parse_state==-1) {
479+ socked_head=newstr;
480+ parse_state=PS_SEEKING_FUNCTOR;
481+
482+ } else {
483+ /* it was a functor */
484+ parse_stack[stack_ptr]=new_node();
485+ parse_stack[stack_ptr]->head=socked_head;
486+ parse_stack[stack_ptr]->functor=newstr;
487+ parse_stack[stack_ptr]->arity=parse_state;
488+ if (parse_state==0)
489+ parse_stack[stack_ptr]->complete=1;
490+ socked_head=NULL;
491+ stack_ptr++;
492+ parse_state=PS_SEEKING_HEAD;
493+ }
494+
495+ offs+=clen;
496+ close_bracket=NULL;
497+
498+ } else {
499+ /* add the char to the partial string */
500+
501+ /* there must be enough space for it */
502+ if (partstr_len+clen>partstr_size) {
503+ partstr_size*=2;
504+ partstr=(char *)realloc(partstr,partstr_size);
505+ }
506+
507+ /* append the data */
508+ memcpy(partstr+partstr_len,inp+offs,clen);
509+ partstr_len+=clen;
510+ offs+=clen;
511+ }
512+
513+ delete_string(hchar);
514+ }
515+
516+ /* try to hook up children to parent */
517+ flag=(stack_ptr>0) && (parse_stack[stack_ptr-1]->complete);
518+ while (flag) {
519+ flag=0;
520+
521+ /* return if we're done */
522+ if (stack_ptr==1) {
523+ parse_state=PS_COMPLETE_TREE;
524+ return offs;
525+ }
526+
527+ /* handle unary nodes */
528+ if ((stack_ptr>=2) &&
529+ (parse_stack[stack_ptr-2]->arity==1) &&
530+ (!parse_stack[stack_ptr-2]->complete)) {
531+ parse_stack[stack_ptr-2]->child[0]=parse_stack[stack_ptr-1];
532+ parse_stack[stack_ptr-2]->complete=1;
533+ stack_ptr--;
534+ flag=1;
535+ }
536+
537+ /* handle binary nodes */
538+ if ((stack_ptr>=3) &&
539+ (parse_stack[stack_ptr-3]->arity==2) &&
540+ (parse_stack[stack_ptr-2]->complete) &&
541+ (!parse_stack[stack_ptr-3]->complete)) {
542+ parse_stack[stack_ptr-3]->child[0]=parse_stack[stack_ptr-2];
543+ parse_stack[stack_ptr-3]->child[1]=parse_stack[stack_ptr-1];
544+ parse_stack[stack_ptr-3]->complete=1;
545+ stack_ptr-=2;
546+ flag=1;
547+ }
548+
549+ /* handle ternary nodes */
550+ if ((stack_ptr>=4) &&
551+ (parse_stack[stack_ptr-4]->arity==3) &&
552+ (parse_stack[stack_ptr-2]->complete) &&
553+ (parse_stack[stack_ptr-3]->complete) &&
554+ (!parse_stack[stack_ptr-4]->complete)) {
555+ parse_stack[stack_ptr-4]->child[0]=parse_stack[stack_ptr-3];
556+ parse_stack[stack_ptr-4]->child[1]=parse_stack[stack_ptr-2];
557+ parse_stack[stack_ptr-4]->child[2]=parse_stack[stack_ptr-1];
558+ parse_stack[stack_ptr-4]->complete=1;
559+ stack_ptr-=3;
560+ flag=1;
561+ }
562+ }
563+ }
564+ return offs;
565+}
566+
567+
568+/**********************************************************************/
569+
570+void register_bracket_pair(char *opb,char *clb,int arity) {
571+ HASHED_STRING *oph,*clh;
572+
573+ oph=new_string(strlen(opb),opb);
574+ clh=new_string(strlen(clb),clb);
575+
576+ oph->arity=arity;
577+ oph->mate=clh;
578+ oph->refs++;
579+ clh->mate=oph;
580+ if (clh!=oph) clh->refs++;
581+}
582+
583+void register_special_functor(char *fctr,int arity,MATCH_FN mf) {
584+ HASHED_STRING *hs;
585+
586+ hs=new_string(strlen(fctr),fctr);
587+
588+ if ((hs->arity>-2) && (hs->arity!=arity)) {
589+ puts("attempt to register conflicting arities for functor");
590+ exit(1);
591+ }
592+
593+ hs->arity=arity;
594+ hs->match_fn=mf;
595+}
596+
597+void register_alias(char *fctr,char *canon) {
598+ HASHED_STRING *hs,*cs;
599+
600+ hs=new_string(strlen(fctr),fctr);
601+ cs=new_string(strlen(canon),canon);
602+
603+ if ((hs->arity>-2) && (cs->arity!=hs->arity)) {
604+ puts("attempt to register conflicting arities for functor");
605+ exit(1);
606+ }
607+
608+ hs->arity=cs->arity;
609+ hs->match_fn=cs->match_fn;
610+ hs->canonical=cs;
611+}
612+
613+/**********************************************************************/
614+
615+NODE *default_match_fn(NODE *ms) {
616+ NODE *rval,*tmpn;
617+ int i;
618+
619+ if ((ms->child[1]->arity!=ms->child[2]->arity) ||
620+ (ms->child[1]->functor!=ms->child[2]->functor)) {
621+ ms->match_result=MR_FALSE;
622+ return ms;
623+ }
624+ if (ms->child[1]->arity==0) {
625+ ms->match_result=MR_TRUE;
626+ return ms;
627+ }
628+ rval=ms;
629+ ms->match_result=MR_AND_MAYBE;
630+ for (i=0;i<ms->child[1]->arity;i++) {
631+ tmpn=new_node();
632+ tmpn->child[0]=rval;
633+ tmpn->child[0]->refs++;
634+ rval=tmpn;
635+ rval->child[1]=ms->child[1]->child[i];
636+ rval->child[1]->refs++;
637+ rval->child[2]=ms->child[2]->child[i];
638+ rval->child[2]->refs++;
639+ rval->match_parent=ms;
640+ }
641+ return rval;
642+}
643+
644+/**********************************************************************/
645+
646+NODE *anything_match_fn(NODE *ms) {
647+ ms->match_result=MR_TRUE;
648+ return ms;
649+}
650+
651+NODE *anywhere_match_fn(NODE *ms) {
652+ NODE *rval,*tmpn;
653+ int i;
654+
655+ rval=ms;
656+ ms->match_result=MR_OR_MAYBE;
657+
658+ tmpn=new_node();
659+ tmpn->child[0]=rval;
660+ tmpn->child[0]->refs++;
661+ rval=tmpn;
662+
663+ rval->child[1]=ms->child[1]->child[0];
664+ rval->child[1]->refs++;
665+ rval->child[2]=ms->child[2];
666+ rval->child[2]->refs++;
667+ rval->match_parent=ms;
668+
669+ for (i=0;i<ms->child[2]->arity;i++) {
670+ tmpn=new_node();
671+ tmpn->child[0]=rval;
672+ tmpn->child[0]->refs++;
673+ rval=tmpn;
674+
675+ tmpn->child[1]=new_node();
676+ tmpn->child[1]->arity=1;
677+ tmpn->child[1]->functor=new_string(1,".");
678+ tmpn->child[1]->child[0]=ms->child[1]->child[0];
679+ tmpn->child[1]->child[0]->refs++;
680+
681+ tmpn->child[2]=ms->child[2]->child[i];
682+ tmpn->child[2]->refs++;
683+ tmpn->match_parent=ms;
684+ }
685+ return rval;
686+}
687+
688+NODE *and_match_fn(NODE *ms) {
689+ /* FIXME */
690+ ms->match_result=MR_TRUE;
691+ return ms;
692+}
693+
694+NODE *or_match_fn(NODE *ms) {
695+ /* FIXME */
696+ ms->match_result=MR_TRUE;
697+ return ms;
698+}
699+
700+NODE *not_match_fn(NODE *ms) {
701+ /* FIXME */
702+ ms->match_result=MR_TRUE;
703+ return ms;
704+}
705+
706+NODE *equal_match_fn(NODE *ms) {
707+ /* FIXME */
708+ ms->match_result=MR_TRUE;
709+ return ms;
710+}
711+
712+/**********************************************************************/
713+
714+int tree_match(NODE *needle,NODE *haystack) {
715+ NODE *mn,*tmpn,*tmpnn;
716+
717+ mn=new_node();
718+ mn->child[1]=needle;
719+ mn->child[2]=haystack;
720+ needle->refs++;
721+ haystack->refs++;
722+
723+ while (1) {
724+ if (mn->match_result==MR_INITIAL) {
725+ if ((mn->child[1]->head!=NULL) && (mn->child[2]->head!=NULL)) {
726+ mn->match_result=(mn->child[1]->head==mn->child[2]->head)?
727+ MR_TRUE:MR_FALSE;
728+ } else if (mn->child[1]->arity==mn->child[1]->functor->arity) {
729+ mn=mn->child[1]->functor->match_fn(mn);
730+ } else {
731+ mn=default_match_fn(mn);
732+ }
733+ } else if (mn->match_parent!=NULL) {
734+ tmpn=mn->match_parent;
735+ switch (mn->match_result) {
736+ case MR_TRUE:
737+ case MR_AND_MAYBE:
738+ if (tmpn->match_result==MR_OR_MAYBE)
739+ tmpn->match_result=MR_TRUE;
740+ else if (tmpn->match_result==MR_NOT_MAYBE)
741+ tmpn->match_result=MR_FALSE;
742+ break;
743+ case MR_FALSE:
744+ case MR_OR_MAYBE:
745+ if (tmpn->match_result==MR_AND_MAYBE)
746+ tmpn->match_result=MR_FALSE;
747+ else if (tmpn->match_result==MR_NOT_MAYBE)
748+ tmpn->match_result=MR_TRUE;
749+ break;
750+ default:
751+ puts("illegal match result");
752+ exit(1);
753+ }
754+ if ((tmpn->match_result==MR_TRUE) ||
755+ (tmpn->match_result==MR_FALSE)) {
756+ while (mn!=tmpn) {
757+ tmpnn=mn->child[0];
758+ free_node(mn);
759+ mn=tmpnn;
760+ }
761+ } else {
762+ tmpnn=mn->child[0];
763+ free_node(mn);
764+ mn=tmpnn;
765+ }
766+ } else switch (mn->match_result) {
767+ case MR_TRUE:
768+ case MR_AND_MAYBE:
769+ free_node(mn);
770+ return 1;
771+ case MR_FALSE:
772+ case MR_OR_MAYBE:
773+ free_node(mn);
774+ return 0;
775+ }
776+ }
777+}
778+
779+/**********************************************************************/
780+
781+NODE *match_pattern;
782+char *input_buffer=NULL;
783+int inbuf_size=0,inbuf_used=0,parse_ptr=0;
784+
785+/**********************************************************************/
786+
787+int main(int argc,char **argv) {
788+ int read_amt,flag;
789+ NODE *to_match;
790+
791+ /* initialize */
792+
793+ semicolon=new_string(1,";");
794+
795+ register_bracket_pair("<",">",-1);
796+ register_bracket_pair("\xE3\x80\x90","\xE3\x80\x91",-1); /* b lenticular */
797+ register_bracket_pair("\xE3\x80\x96","\xE3\x80\x97",-1); /* w lenticular */
798+
799+ register_bracket_pair("(",")",0);
800+ register_bracket_pair("\xEF\xBC\x88","\xEF\xBC\x89",0); /* wide paren */
801+ register_bracket_pair("\xE2\xB8\xA8","\xE2\xB8\xA9",0); /* dblwide paren */
802+
803+ register_bracket_pair(".",".",1);
804+ register_bracket_pair("\xE3\x83\xBB","\xE3\x83\xBB",1); /* centre dot */
805+ register_bracket_pair("\xE3\x80\x9C","\xE3\x80\x9C",1); /* wide tilde */
806+
807+ register_bracket_pair("[","]",2);
808+ register_bracket_pair("\xEF\xBC\xBB","\xEF\xBC\xBD",2); /* wide sqb */
809+ register_bracket_pair("\xE3\x80\x9A","\xE3\x80\x9B",2); /* dblwide sqb */
810+
811+ register_bracket_pair("{","}",3);
812+ register_bracket_pair("\xE3\x80\x94","\xE3\x80\x95",3); /* b tortoise */
813+ register_bracket_pair("\xE3\x80\x98","\xE3\x80\x99",3); /* w tortoise */
814+
815+ register_special_functor(";",0,default_match_fn);
816+
817+ register_special_functor("?",0,anything_match_fn);
818+ register_alias("anything","?");
819+
820+ register_special_functor(".",1,anywhere_match_fn);
821+ register_alias("anywhere",".");
822+
823+ register_special_functor("&",2,and_match_fn);
824+ register_alias("and","&");
825+
826+ register_special_functor("|",2,or_match_fn);
827+ register_alias("or","|");
828+
829+ register_special_functor("!",1,not_match_fn);
830+ register_alias("not","1");
831+
832+ register_special_functor("=",1,equal_match_fn);
833+ register_alias("equal","1");
834+
835+ register_special_functor("\xE2\xBF\xB0",2,default_match_fn);
836+ register_alias("lr","\xE2\xBF\xB0");
837+
838+ register_special_functor("\xE2\xBF\xB1",2,default_match_fn);
839+ register_alias("tb","\xE2\xBF\xB1");
840+
841+ register_special_functor("\xE2\xBF\xB2",3,default_match_fn);
842+ register_alias("lcr","\xE2\xBF\xB2");
843+
844+ register_special_functor("\xE2\xBF\xB3",3,default_match_fn);
845+ register_alias("tcb","\xE2\xBF\xB3");
846+
847+ register_special_functor("\xE2\xBF\xB4",2,default_match_fn);
848+ register_alias("enclose","\xE2\xBF\xB4");
849+
850+ register_special_functor("\xE2\xBF\xB5",2,default_match_fn);
851+ register_alias("wrapu","\xE2\xBF\xB5");
852+
853+ register_special_functor("\xE2\xBF\xB6",2,default_match_fn);
854+ register_alias("wrapd","\xE2\xBF\xB6");
855+
856+ register_special_functor("\xE2\xBF\xB7",2,default_match_fn);
857+ register_alias("wrapl","\xE2\xBF\xB7");
858+
859+ register_special_functor("\xE2\xBF\xB8",2,default_match_fn);
860+ register_alias("wrapul","\xE2\xBF\xB8");
861+
862+ register_special_functor("\xE2\xBF\xB9",2,default_match_fn);
863+ register_alias("wrapur","\xE2\xBF\xB9");
864+
865+ register_special_functor("\xE2\xBF\xBA",2,default_match_fn);
866+ register_alias("wrapll","\xE2\xBF\xBA");
867+
868+ register_special_functor("\xE2\xBF\xBB",2,default_match_fn);
869+ register_alias("overlap","\xE2\xBF\xBB");
870+
871+ /* read options and matching pattern */
872+ if (argc<2) {
873+ puts("not enough arguments");
874+ exit(1);
875+ } else {
876+ parse_ptr=parse(strlen(argv[1]),argv[1]);
877+ if ((parse_ptr<strlen(argv[1])) || (parse_state!=PS_COMPLETE_TREE)) {
878+ puts("can't parse matching pattern");
879+ exit(1);
880+ }
881+ match_pattern=parse_stack[0];
882+ stack_ptr=0;
883+ }
884+
885+ /* loop over input */
886+ parse_ptr=0;
887+ while (!(feof(stdin) || ferror(stdin))) {
888+
889+ /* make sure we have a buffer at all */
890+ if (input_buffer==NULL) {
891+ input_buffer=(char *)malloc(1024);
892+ inbuf_size=1024;
893+ }
894+
895+ /* make sure we have space in the buffer */
896+ if (inbuf_used+128>inbuf_size) {
897+ inbuf_size*=2;
898+ input_buffer=(char *)realloc(input_buffer,inbuf_size);
899+ }
900+
901+ /* try reading some input */
902+ inbuf_used+=fread(input_buffer+inbuf_used,1,1,stdin);
903+
904+ /* loop parsing and processing */
905+ while (1) {
906+ /* parse */
907+ parse_ptr+=parse(inbuf_used-parse_ptr,input_buffer+parse_ptr);
908+
909+ /* complain about errors */
910+ if (parse_state==PS_ERROR) {
911+ puts("can't parse input pattern");
912+ fwrite(input_buffer,1,parse_ptr,stdout);
913+ putchar('\n');
914+ exit(1);
915+ }
916+
917+ /* deal with a complete tree if we have one */
918+ if (parse_state==PS_COMPLETE_TREE) {
919+ to_match=parse_stack[0];
920+ stack_ptr=0;
921+ if (tree_match(match_pattern,to_match)) {
922+ fwrite(input_buffer,1,parse_ptr,stdout);
923+ putchar('\n');
924+ }
925+ free_node(to_match);
926+ if (parse_ptr<inbuf_used)
927+ memmove(input_buffer,input_buffer+parse_ptr,inbuf_used-parse_ptr);
928+ inbuf_used-=parse_ptr;
929+ parse_ptr=0;
930+ } else
931+ break;
932+ }
933+ }
934+
935+ exit(0);
936+}
Show on old repository browser