• R/O
  • SSH
  • HTTPS

tsukurimashou: Commit


Commit MetaInfo

Revision205 (tree)
Time2012-01-22 04:48:47
Authormskala

Log Message

documentation stuff

Change Summary

Incremental Difference

--- trunk/idsgrep/idsgrep.1.in (revision 204)
+++ trunk/idsgrep/idsgrep.1.in (revision 205)
@@ -178,8 +178,7 @@
178178 For the functor of a unary node, the closing bracket is always the same as
179179 the opening bracket.
180180 Three characters may be used to open and close a unary node's
181-functor: . (ASCII period); <U+30FB> (katakana middle dot); and
182-<U+301C> (wave dash).
181+functor: . (ASCII period); : (ASCII colon); and <U+30FB> (katakana middle dot).
183182 .IP \(bu 4
184183 The opening and closing brackets for the functor of a binary node may be [
185184 with ] (ASCII square brackets);
--- trunk/idsgrep/configure.ac (revision 204)
+++ trunk/idsgrep/configure.ac (revision 205)
@@ -261,8 +261,8 @@
261261 m4_foreach_w([tbcheckname],m4_expand([tsukurimashou dnl
262262 tsukurimashou-0.6 tsukurimashou-0.7 tsukurimashou-0.8 dnl
263263 tsukurimashou-0.9 tsukurimashou-0.10 tsukurimashou-0.11]),[
264-m4_foreach_w([tbcheckpath],m4_expand([$srcdir $srcdir/.. $prefix/src dnl
265-/src /usr/src /usr/local/src dnl
264+m4_foreach_w([tbcheckpath],m4_expand([$srcdir $srcdir/.. $srcdir/../.. dnl
265+$prefix/src /src /usr/src /usr/local/src dnl
266266 $HOME $HOME/src]),[
267267 AS_IF([test "$with_tsuku_build" = "auto"],
268268 [AS_IF([test -r "tbcheckpath/tbcheckname/Makefile"],[
@@ -288,5 +288,5 @@
288288 eval flat_docdir=${flat_docdir}
289289 eval flat_docdir=${flat_docdir}
290290 #
291-AC_CONFIG_FILES([Makefile idsgrep.1])
291+AC_CONFIG_FILES([Makefile idsgrep.1 config.tex])
292292 AC_OUTPUT
--- trunk/idsgrep/Makefile.am (revision 204)
+++ trunk/idsgrep/Makefile.am (revision 205)
@@ -27,8 +27,12 @@
2727
2828 man1_MANS = idsgrep.1
2929
30-idsgrep.pdf: idsgrep.tex
30+idsgrep.pdf: idsgrep.tex config.tex idsgrep.bib \
31+ @with_tsuku_build@/otf/TsukurimashouMincho.otf
3132 $(XELATEX) idsgrep
33+ bibtex idsgrep
34+ $(XELATEX) idsgrep
35+ $(XELATEX) idsgrep
3236
3337 kanjivg.eids: @with_kanjivg@ kvg2eids
3438 if $(PERL) \
--- trunk/idsgrep/config.tex.in (nonexistent)
+++ trunk/idsgrep/config.tex.in (revision 205)
@@ -0,0 +1,2 @@
1+\setmonofont[Path={@with_tsuku_build@/otf/}]{TsukurimashouMincho.otf}
2+\def\idsgrepversion{@VERSION@}
--- trunk/idsgrep/idsgrep.tex (revision 204)
+++ trunk/idsgrep/idsgrep.tex (revision 205)
@@ -1,62 +1,317 @@
1-\documentclass{article}
1+\documentclass[twocolumn]{report}
22
33 \usepackage{fontspec}
44
5-\title{The \texttt{idsgrep} utility}
5+\usepackage{achicago}
6+\usepackage{calc}
7+\usepackage[rigidchapters]{titlesec}
8+\usepackage{tocloft}
9+\usepackage{url}
10+
11+\title{IDSgrep, version \idsgrepversion}
612 \author{Matthew Skala}
713
8-\setmonofont[Path=../otf/]{TsukurimashouMincho}
14+\input{config.tex}
915
16+\setsansfont{Nimbus Sans L}
17+
18+\setlength{\topmargin}{0.25in}
19+\setlength{\headheight}{0pt}
20+\setlength{\headsep}{0pt}
21+
22+\setlength{\oddsidemargin}{\paperwidth/17-0.5in}
23+\setlength{\columnsep}{1em}
24+\setlength{\textwidth}{\paperwidth*15/17-1in}
25+\setlength{\textheight}{8.5in}
26+
27+\setlength{\parindent}{1.5em}
28+
29+\makeatletter
30+\dimen@=\f@size\p@\dimen@6\dimen@\divide\dimen@5\edef\l@rgesize{\the\dimen@}
31+\dimen@=\f@size\p@\dimen@2\dimen@\edef\@hugesize{\the\dimen@}
32+
33+\renewcommand\maketitle{%
34+ \begin{titlepage}%
35+ \let\footnotesize\small
36+ \let\footnoterule\relax
37+ \let \footnote \thanks
38+ \vspace*{\fill}
39+ \vspace*{\fill}
40+ {\sffamily\bfseries\huge \hfill\@title\hfill\null\par}
41+ \vspace{\fill}
42+ {\sffamily\bfseries\Large \hfill\@author\hfill\null\par}
43+ \vspace{\fill}
44+ \vspace{\fill}
45+ \vspace{\fill}
46+ \vspace{\fill}
47+ \vspace{\fill}
48+ {\sffamily\bfseries\Large \hfill\@date\par}
49+ \vspace*{\fill}
50+ \@thanks\par
51+ \null
52+ \end{titlepage}%
53+ \setcounter{footnote}{0}%
54+ \global\let\thanks\relax
55+ \global\let\maketitle\relax
56+ \global\let\@thanks\@empty
57+ \global\let\@author\@empty
58+ \global\let\@date\@empty
59+ \global\let\@title\@empty
60+ \global\let\title\relax
61+ \global\let\author\relax
62+ \global\let\date\relax
63+ \global\let\and\relax
64+}
65+
66+\def\@maketitle{%
67+ \newpage\noindent
68+ \null
69+ \begingroup
70+ \let\footnote\thanks
71+ {\fontsize{\@hugesize}{2\baselineskip}\sffamily\bfseries\selectfont
72+ \@title\,\leaders\hrule height 0.2ex\hfill\null}\par\noindent
73+ {\fontsize{\l@rgesize}{\baselineskip}\sffamily\bfseries
74+ \@author\hfill\@date}\par
75+ \endgroup
76+ \dimen@=0.5\baselineskip\relax\advance\dimen@-0.5\p@\relax
77+ \vspace{\dimen@}\noindent
78+}
79+
80+\newcommand{\ch@pterform@t}[1]{%
81+ \begin{@twocolumnfalse}%
82+ \fontsize{\@hugesize}{3\baselineskip}\sffamily\bfseries\selectfont
83+ #1\,\leaders\hrule height 0.2ex\hfill\null
84+ \end{@twocolumnfalse}%
85+}%
86+\titlespacing*{\chapter}{0pt}{6\baselineskip}{2\baselineskip}
87+\titleformat{\chapter}[hang]{}{}{0pt}{\ch@pterform@t}
88+
89+\titleformat{\section}[runin]%
90+ {\fontsize{\l@rgesize}{\baselineskip}\sffamily\bfseries}{}{0pt}{}%
91+ [\,\leaders\hrule height 0.16ex\hfill\null\\]
92+\titlespacing*{\section}{0pt}{\baselineskip}{0pt}
93+
94+\titleformat{\subsection}[runin]{\sffamily\bfseries}{}{0pt}{}
95+\titlespacing*{\subsection}{0pt}{\baselineskip}{0.666em}
96+
97+\titleformat{\subsubsection}[runin]{\rmfamily\scshape}{}{0pt}{}
98+\titlespacing*{\subsubsection}{0pt}{\baselineskip}{0.666em}
99+
100+\titleformat{\paragraph}[runin]{\rmfamily\itshape}{}{0pt}{}
101+\titlespacing*{\paragraph}{\parindent}{0pt}{0.666em}
102+
103+\raggedbottom
104+\makeatother
105+
106+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
107+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
108+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
109+
10110 \begin{document}
11111
12112 \maketitle
13113
114+\renewcommand{\cfttoctitlefont}{%
115+ \huge\sffamily\bfseries}
116+\renewcommand{\cftaftertoctitle}{%
117+ {\huge\,\leaders\hrule height 0.2ex\hfill\null\vspace*{-4ex}}}
118+
119+\def\gobbtohfil#1{%
120+ \begingroup\if#1\hfil\else\aftergroup\gobbtohfil\fi\endgroup}
121+
122+\renewcommand{\cftchappresnum}{\gobbtohfil}
123+\renewcommand{\cftchapnumwidth}{0pt}
124+\renewcommand{\cftchapfont}{\sffamily\bfseries}
125+\renewcommand{\cftchappagefont}{\sffamily\bfseries}
126+
127+\renewcommand{\cftsecpresnum}{\gobbtohfil}
128+\renewcommand{\cftsecnumwidth}{0pt}
129+
130+\tableofcontents
131+
14132 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
15133 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
134+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
16135
17-\section{Simplified usage}
136+\chapter{Quick start}
18137
138+\noindent
19139 Use \texttt{idsgrep} much as you would use \texttt{grep}:
20140
21141 \begin{quotation}
22- \texttt{idsgrep} \textit{$\langle$pattern$\rangle$}
142+ \texttt{idsgrep}
143+ \textit{$[\langle$options$\rangle]$}
144+ \textit{$\langle$pattern$\rangle$}
23145 \textit{$[\langle$file$\rangle\ldots ]$}
24146 \end{quotation}
25147
26-If no files are specified, standard input
27-will be read instead. Each line\footnote{Not really. It's technically
28-each Extended Ideographic Description Sequence; but those are expected
29-to correspond to lines.} in the input will be checked against
30-\textit{$\langle$pattern$\rangle$} and printed if it matches.
148+Its general function is to search one or more files for items matching a
149+pattern, like \texttt{grep}~\citeA{grep} but with a different pattern
150+syntax. Although potentially usable for an unlimited range of tasks,
151+\texttt{idsgrep}'s motivating application is to searching databases of Han
152+script (Chinese, Japanese, etc.)\ character descriptions. It provides a
153+much more powerful replacement for the ``radical search'' feature of
154+dictionaries like Kiten~\citeA{Kiten} and WWWJDIC~\citeA{WWWJDIC}.
31155
32-The syntax for matching patterns is complicated. The next section
33-describes it in more detail than most users will really want; here are
34-some examples illustrating commonly-expected cases, assuming a
35-dictionary of kanji and their decompositions.
156+The syntax for matching patterns, and the range of command-line options
157+available, are complicated. Later sections of this document explain those
158+things in detail; for now, here are some examples.
36159
37-\begin{itemize}
38- \item[\texttt{idsgrep 萌 dictionary}] A literal character searches
39- for the decomposition of that character, exact match only.
40- \item[\texttt{idsgrep ...日 dictionary}] Three dots match their
41- argument anywhere, so this will match \texttt{日}, \texttt{早}, and
42- \texttt{萌}.
43- \item[\texttt{idsgrep ? dictionary}] A question mark matches
44- anything (most useful as part of a more complex pattern).
45- \item[\texttt{idsgrep ⿱?心 dictionary}] Unicode Ideographic
46- Description Characters can be used to build up sequences that also
160+\begin{description}
161+\item[\texttt{idsgrep 萌 dictionary}]~\\
162+ A literal character searches for the decomposition of that character,
163+ exact match only.
164+\item[\texttt{idsgrep -d 萌}]~\\
165+ The \texttt{-d} option with empty argument searches a default collection
166+ of dictionaries.
167+\item[\texttt{idsgrep -dtsuku 萌}]~\\
168+ The \texttt{-d} option can be given an argument to choose a specific
169+ default dictionary. Note the argument must be given in the same
170+ \texttt{argv}-element with the \texttt{-d}; the syntax \texttt{-d tsuku}
171+ with a space would mean ``Use the default dictionaries and search for the
172+ (syntactically invalid) pattern `\texttt{tsuku}.'\,''
173+\item[\texttt{othersoft | idsgrep 萌}]~\\
174+ Standard input will be used if no other input source is specified.
175+\item[\texttt{idsgrep -d ...日}]~\\
176+ Three dots match their argument anywhere, so this will match \texttt{日},
177+ \texttt{早}, and \texttt{萌}.
178+\item[\texttt{idsgrep -d '?'}]~\\
179+ A question mark, which will probably require shell escaping, matches
180+ anything. This is most useful as part of a more complex pattern.
181+\item[\texttt{idsgrep -d '⿱?心'}]~\\
182+ Unicode Ideographic Description Characters can be used to build up
183+ sequences that also
47184 incorporate the wildcards; this example matches characters
48185 consisting of something above \texttt{心}, such as \texttt{忽} and
49186 \texttt{恋} but not \texttt{応}.
50- \item[\texttt{idsgrep '[tb]?心'} dictionary] There are ASCII aliases
187+\item[\texttt{idsgrep -d '[tb][anything]心'}]~\\
188+ There are ASCII aliases
51189 for operators that may be inconvenient to type; this query is
52190 functionally the same as the previous one.
191+\end{description}
192+
193+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
194+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
195+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
196+
197+\chapter{Introduction}
198+
199+\noindent
200+FIXME.
201+
202+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
203+
204+\section{Build and install}
205+
206+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
207+
208+\section{Interface to KanjiVG}
209+
210+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
211+
212+\section{Interface to Tsukurimashou}
213+
214+IDSgrep is closely connected with the Tsukuimashou font
215+family~\cite{Tsukurimashou}. They have the same author; it was largely for
216+use in Tsukurimashou development that IDSgrep was developed at all; and
217+IDSgrep's source control system is a subdirectory within Tsukurimashou's.
218+Building IDSgrep in conjunction with Tsukurimashou allows IDSgrep to extract
219+from the Tsukurimashou build system a dictionary of character decompositions
220+as they appear in Tsukurimashou. The Tsukurimashou fonts are also necessary
221+to build this IDSgrep user manual itself. However, IDSgrep and Tsukurimashou
222+are distributed as separate packages because they have very different
223+audiences and build prerequisites. Many people who can use one will be
224+unable to use the other, so it seems inappropriate to force all users to
225+download both.
226+
227+When IDSgrep's \texttt{configure} script runs, it looks for a valid
228+Tsukurimashou build directory. Ideally, that would be one in which
229+Tsukurimashou has actually been fully built; but a directory where the
230+Tsukurimashou \texttt{configure} script has been executed is enough. If a
231+valid Tsukurimashou build directory is found automatically or specified with
232+the \texttt{--with-tsuku-build} option to \texttt{configure}, then when
233+\texttt{make} is run on IDSgrep, it will recursively go call \texttt{make
234+eids} in the Tsukurimashou build. That is a hook that causes
235+Tsukurimashou's build system to generate the EIDS decomposition dictionary,
236+which is then copied or linked back into IDSgrep's build directory and can
237+be installed with \texttt{make install}. IDSgrep's build will also look in
238+Tsukurimashou's build directory for the fonts needed to build this user
239+manual, and make recursive calls to \texttt{make} on Tsukurimashou to build
240+those if necessary.
241+
242+Note that neither Tsukurimashou nor IDSgrep is a true Autotools
243+``sub-package'' of the other, as mediated by the \texttt{SUBDIRS} Automake
244+variable and so on, notwithstanding that a checked-out SVN working copy
245+of Tsukurimashou will contain a working copy of IDSgrep in a subdirectory.
246+Running the Tsukurimashou build will not invoke the IDSgrep build at all;
247+and running the IDSgrep build is not a good way to trigger a full
248+Tsukurimashou build, because it won't use the preferred \texttt{-j} option
249+nor generate anything that doesn't happen to be a prerequisite for the files
250+IDSgrep needs. If you want to build both systems, it's best to build
251+Tsukurimashou first and then build IDSgrep pointing at Tsukurimashou. Also,
252+these two packages do not necessarily have the same portability
253+considerations, and it's possible that the link between them may fail even
254+on systems where each package builds correctly by itself (for instance,
255+possibly on some systems where GNU Make is installed but non-default).
256+The link between Tsukurimashou and IDSgrep provides some convenience for
257+my own frequent case of making changes to both packages at once.
258+
259+In order for IDSgrep to work together with Tsukurimashou, it is necessary
260+that the Tsukurimashou build be one that supports the \texttt{make eids}
261+target in the first place. No released version does that yet, but it is
262+planned for Tsukurimashou~0.6. Development versions in the SVN repository
263+have included EIDS support since early January 2012.
264+
265+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
266+
267+\section{Unicode IDSes}
268+
269+Although \texttt{idsgrep} uses a more elaborate syntax, it is well to know
270+about the Unicode Consortium's ``Ideographic Description Sequences''
271+(IDSes), which
272+are a subset of \texttt{idsgrep}'s. These are documented more fully in the
273+Unicode standard~\cite{Unicode:IDS}.
274+
275+\begin{itemize}
276+
277+\item A character from the one of the Unified Han or CJK Radical ranges is a
278+complete IDS and simply represents itself. For instance, ``\texttt{大}'' is
279+a complete IDS.
280+
281+\item The Ideographic Description Characer (IDC) code points U+2FF0, U+2FF1,
282+and U+2FF4 through U+2FFB, whose graphic images look like
283+\texttt{⿰⿱⿴⿵⿶⿷⿸⿹⿺⿻}, are prefix binary operators.
284+One of these characters followed by two complete IDSes
285+forms another complete IDS, representing a character formed by joining the
286+two smaller characters in a way suggested by the name and graphical image
287+of the IDC. For instance, ``\texttt{⿰日月}'' describes the
288+character \texttt{明}. These structures may be nested; for instance,
289+``\texttt{⿰言⿱五口}'' describes the character \texttt{語}.
290+
291+\item The IDC code points U+2FF2 and U+2FF3. which look like \texttt{⿲⿳},
292+are prefix ternary operators. (Unicode uses the less-standard word
293+``trinary'' to describe them.) One of them can be followed by three complete
294+IDSes to form an IDS that describes a character made of three parts, much in
295+the same manner as the binary operators. For instance,
296+``\texttt{⿳厶口心}'' describes the character \texttt{怠}.
297+
298+\item An IDS may not be more than 16 code points long overall nor contain more
299+than six consecutive non-operator characters.
300+This rule appears to be intended to make things easier for systems that need
301+to be able to jump into the middle of text and quickly find the starts and
302+ends of IDSes.
303+
304+\item IDSes non-bindingly ``should'' be as short as possible.
305+
53306 \end{itemize}
54307
55308 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
56309 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
310+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
57311
58-\section{Detailed reference}
312+\chapter{Technical details}
59313
314+\noindent
60315 This system is best understood as three interconnected major concepts:
61316 \begin{itemize}
62317 \item an abstract data structure;
@@ -73,7 +328,7 @@
73328
74329 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
75330
76-\subsection{The data structure}
331+\section{The data structure}
77332
78333 An \emph{EIDS tree} consists of the following:
79334
@@ -106,7 +361,7 @@
106361
107362 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
108363
109-\subsection{EIDS syntax}
364+\section{EIDS syntax}
110365
111366 EIDS trees are written in a simple prefix notation that could be called
112367 ``Polish notation'' inasmuch as it is the reverse of ``reverse Polish
@@ -144,7 +399,8 @@
144399 parenthesis, in a nullary tree; and ``\texttt{...}'' is a functor equal to a
145400 single ASCII period in a unary tree.
146401
147-Each pair of ASCII brackets also has two pairs of non-ASCII synonyms, as
402+Each pair of ASCII brackets also has two pairs of generally
403+non-ASCII synonyms, as
148404 follows:
149405
150406 {\ttfamily\hspace*{\fill}
@@ -151,18 +407,19 @@
151407 \begin{tabular}{cccccc}
152408 <&>&【&】&〖&〗\\
153409 (&)&(&)&⦅&⦆\\
154- .&.&・&・&〜&〜\\\relax
410+ .&.&:&:&・&・\\\relax
155411 [&]&[&]&〚&〛\\
156412 \{&\}&〔&〕&〘&〙
157413 \end{tabular}
158414 \hspace*{\fill}\par}
159415
160-As in the ASCII case, the closing synonymous brackets for functors of unary
161-trees are identical to the opening brackets. A string may be opened by any
162-of the three opening bracket characters for that type of string; then it
163-must be closed by the closing bracket character that goes with that opening
164-bracket. Brackets from other pairs are counted as part of the string. For
165-instance, ``\texttt{【<example>】}'' is a head whose value consists of
416+The closing synonymous brackets for functors of unary trees are always
417+identical to the opening brackets. A string may be opened by any of the
418+three opening bracket characters for its type of string; but then it must be
419+closed by the closing bracket character that goes with the opening bracket.
420+Brackets from other pairs are taken literally and do not end the string.
421+For instance,
422+``\texttt{【<example>】}'' is a head whose value consists of
166423 ``\texttt{<example>}'' including the ASCII angle brackets. There are
167424 several reasons for the existence of the synonyms:
168425
@@ -171,6 +428,9 @@
171428 \item There is an established tradition of using \texttt{【}lenticular
172429 brackets\texttt{】} for heads in printed dictionaries, which is exactly
173430 their meaning here.
431+ \item Allowing ASCII colons to bracket unary-node functors makes possible
432+ a more appealing and \texttt{grep}-like syntax for \texttt{idsgrep}'s
433+ output in the case of processing multiple input files.
174434 \item Allowing more than one way to bracket each kind of string makes it
175435 easier to express bracket characters that may occur literally in a string.
176436 \item The non-ASCII brackets may be easier to type without switching modes
@@ -220,6 +480,9 @@
220480 recognize some backslash escape sequences. This is not yet
221481 implemented.
222482
483+It is a consequence of these rules that all syntactically valid Unicode
484+IDSes are syntactically valid EIDSes, but the reverse is not true.
485+
223486 Although it is technically not a parsing issue but rather a
224487 transformation applied to the tree after parsing, there is one more
225488 issue to mention: some functors have aliases. If a functor and arity
@@ -227,53 +490,38 @@
227490 with the indicated single-character functor. The idea is to provide
228491 verbose ASCII names for single-character functors of special
229492 importance to the matching algorithm. Note that the single-character
230-versions are always the canonical ones, and (although the brackets are
231-shown explicitly for clarity) these all have sugary implicit brackets.
493+versions are always the canonical ones, and although the brackets are
494+shown explicitly for clarity, they are nearly all characters from the
495+``sugary implicit'' list.
232496
233-\texttt{(anything)} $\rightarrow$ \texttt{(?)}
497+\texttt{\begin{tabular}{cccccc}
498+ (anything) & $\Rightarrow$ & (?) & .anywhere. & $\Rightarrow$ & ... \\
499+ .not. & $\Rightarrow$ & .!. & .equal. & $\Rightarrow$ & .=. \\
500+ .unord. & $\Rightarrow$ & .*. & .assoc. & $\Rightarrow$ & .@. \\\relax
501+ [and] & $\Rightarrow$ & [\&] & [or] & $\Rightarrow$ & [|] \\\relax
502+ [lr] & $\Rightarrow$ & [⿰] & [tb] & $\Rightarrow$ & [⿱] \\\relax
503+ [FIXME] & $\Rightarrow$ & [⿴] & [FIXME] & $\Rightarrow$ & [⿵] \\\relax
504+ [FIXME] & $\Rightarrow$ & [⿶] & [FIXME] & $\Rightarrow$ & [⿷] \\\relax
505+ [FIXME] & $\Rightarrow$ & [⿸] & [FIXME] & $\Rightarrow$ & [⿹] \\\relax
506+ [FIXME] & $\Rightarrow$ & [⿺] & [FIXME] & $\Rightarrow$ & [⿻] \\\relax
507+ [lcr] & $\Rightarrow$ & \{⿲\} & [tcb] & $\Rightarrow$ & \{⿳\}
508+\end{tabular}}
234509
235-\texttt{.anywhere.} $\rightarrow$ \texttt{...}
510+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
236511
237-\texttt{.not.} $\rightarrow$ \texttt{.!.}
512+\section{Matching}
238513
239-\texttt{.equal.} $\rightarrow$ \texttt{.=.}
240-
241-\texttt{.unord.} $\rightarrow$ \texttt{.*.}
242-
243-\texttt{.assoc.} $\rightarrow$ \texttt{.@.}
244-
245-\texttt{[and]} $\rightarrow$ \texttt{[\&]}
246-
247-\texttt{[or]} $\rightarrow$ \texttt{[|]}
248-
249-\texttt{[lr]} $\rightarrow$ \texttt{[⿰]}
250-
251-\texttt{[tb]} $\rightarrow$ \texttt{[⿱]}
252-
253-\texttt{[FIXME]} $\rightarrow$ \texttt{[⿴]}
254-
255-\texttt{[FIXME]} $\rightarrow$ \texttt{[⿵]}
256-
257-\texttt{[FIXME]} $\rightarrow$ \texttt{[⿶]}
258-
259-\texttt{[FIXME]} $\rightarrow$ \texttt{[⿷]}
260-
261-\texttt{[FIXME]} $\rightarrow$ \texttt{[⿸]}
262-
263-\texttt{[FIXME]} $\rightarrow$ \texttt{[⿹]}
264-
265-\texttt{[FIXME]} $\rightarrow$ \texttt{[⿺]}
266-
267-\texttt{[FIXME]} $\rightarrow$ \texttt{[⿻]}
268-
269-\texttt{[lcr]} $\rightarrow$ \texttt{\{⿲\}}
270-
271-\texttt{[tcb]} $\rightarrow$ \texttt{\{⿳\}}
272-
273514 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
515+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
516+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
274517
275-\subsection{Matching}
518+\clearpage
519+\addcontentsline{toc}{chapter}{Bibliography}
520+\bibliographystyle{achicago}
521+\bibliography{idsgrep}
276522
277523 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
524+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
525+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
278526
279527 \end{document}
--- trunk/idsgrep/idsgrep.c (revision 204)
+++ trunk/idsgrep/idsgrep.c (revision 205)
@@ -801,8 +801,8 @@
801801 register_bracket_pair("\xE2\xB8\xA8","\xE2\xB8\xA9",0); /* dblwide paren */
802802
803803 register_bracket_pair(".",".",1);
804+ register_bracket_pair(":",":",1);
804805 register_bracket_pair("\xE3\x83\xBB","\xE3\x83\xBB",1); /* centre dot */
805- register_bracket_pair("\xE3\x80\x9C","\xE3\x80\x9C",1); /* wide tilde */
806806
807807 register_bracket_pair("[","]",2);
808808 register_bracket_pair("\xEF\xBC\xBB","\xEF\xBC\xBD",2); /* wide sqb */
Show on old repository browser