• R/O
  • SSH
  • HTTPS

tsukurimashou: Commit


Commit MetaInfo

Revision309 (tree)
Time2012-08-15 05:27:30
Authormskala

Log Message

Web site translation, tighter UTF-8 checks, output cooking

Change Summary

Incremental Difference

--- trunk/idsgrep/parse.c (revision 308)
+++ trunk/idsgrep/parse.c (revision 309)
@@ -204,7 +204,7 @@
204204 continue;
205205 }
206206 eptr=ebuf;
207- if (xval>=0x200000)
207+ if (xval>=0x110000)
208208 xval=0xFFFD;
209209 if (xval<0x80) {
210210 clen=1;
@@ -283,6 +283,12 @@
283283 continue;
284284 }
285285
286+ /* check for surrogate */
287+ if (((eptr[0]&0xFF)==0xED) && ((eptr[1]&0xE0)==0xA0)) {
288+ offs+=3;
289+ continue;
290+ }
291+
286292 /* check for overlong */
287293 if (((eptr[0]&0xFF)==0xE0) && ((eptr[1]&0xE0)==0x80)) {
288294 offs+=3;
@@ -305,6 +311,13 @@
305311 continue;
306312 }
307313
314+ /* check for non-Unicode */
315+ if (((eptr[0]==0xF4) && (eptr[2]&0xF0)>0x80) ||
316+ ((eptr[0]&0xFF)>0xF4)) {
317+ offs+=4;
318+ continue;
319+ }
320+
308321 /* check for overlong */
309322 if (((eptr[0]&0xFF)==0xF0) && ((eptr[1]&0xF0)==0x80)) {
310323 offs+=4;
--- trunk/idsgrep/idsgrep.tex (revision 308)
+++ trunk/idsgrep/idsgrep.tex (revision 309)
@@ -291,7 +291,7 @@
291291
292292 IDSgrep is distributed under the umbrella of the Tsukurimashou project on
293293 Sourceforge.JP~\cite{Tsukurimashou},
294-\url{http://en.sourceforge.jp/projects/tsukurimashou/}. Releases of IDSgrep
294+\url{http://tsukurimashou.sourceforge.jp/}. Releases of IDSgrep
295295 will appear on the project download page; development versions are available
296296 by SVN checkout from the \texttt{trunk/idsgrep} subdirectory of the
297297 repository. For the convenience of Github users, the Tsukurimashou (and
@@ -504,12 +504,12 @@
504504 valid IDSgrep EIDS syntax, it is easy to convert them into EIDS format.
505505 IDSgrep includes a \texttt{chise2eids} Perl script for that purpose. The
506506 \texttt{configure} script will look for CHISE IDS in a directory named
507-\texttt{chise-ids-*} in a short list of likely places, or use the value
508-of the \texttt{--enable-chise-ids} command-line option if one is given.
509-This directory should simply be an unpacked CHISE IDS distribution tarball,
510-or a checkout from the CHISE IDS Git repository. It is not necessary to run
511-the Makefile, which would require also having and installing other parts of
512-the larger system.
507+\texttt{chise-ids-*} in a short list of likely places, or use the value of
508+the \texttt{--enable-chise-ids} command-line option if one is given. This
509+directory should simply be an unpacked CHISE IDS distribution tarball, or a
510+checkout from the CHISE IDS Git repository. It is not necessary to run
511+CHISE's Makefile, which would require also having and installing other parts
512+of the larger system.
513513
514514 CHISE IDS distribution tarballs are available from
515515 \url{http://chise.zinbun.kyoto-u.ac.jp/dist/ids/}, and the Git repository
--- trunk/idsgrep/configure.ac (revision 308)
+++ trunk/idsgrep/configure.ac (revision 309)
@@ -160,7 +160,7 @@
160160 AC_PREREQ([2.63])
161161 AC_INIT([IDSgrep],
162162 [0.3pre], [mskala@ansuz.sooke.bc.ca], [idsgrep],
163- [[http://ansuz.sooke.bc.ca/]])
163+ [[http://tsukurimashou.sourceforge.jp/]])
164164 AC_PRESERVE_HELP_ORDER
165165 AM_INIT_AUTOMAKE([foreign parallel-tests color-tests])
166166 AC_CONFIG_SRCDIR([idsgrep.c])
@@ -168,7 +168,7 @@
168168 AC_CONFIG_MACRO_DIR([m4])
169169 AC_REVISION([$Id: configure.ac 1015 2011-12-15 22:24:32Z mskala $])
170170 AC_COPYRIGHT([Copyright (C) 2012 Matthew Skala])
171-AC_SUBST([release_date],["March 17, 2012"])
171+AC_SUBST([release_date],["August 1, 2012"])
172172 #
173173 ############################################################################
174174 #
--- trunk/idsgrep/cook.c (revision 308)
+++ trunk/idsgrep/cook.c (revision 309)
@@ -33,14 +33,19 @@
3333 #define OS_UNARY_BRACKET_TYPE 3
3434 #define OS_BINARY_BRACKET_TYPE 4
3535 #define OS_TERNARY_BRACKET_TYPE 5
36+#define OS_INDENTATION 6
37+#define OS_SEPARATOR 7
38+#define OS_SUGAR 8
39+#define OS_ESCAPE_WHAT 9
40+#define OS_ESCAPE_HOW 10
3641
37-#define NUM_OUTPUT_SETTINGS 6
42+#define NUM_OUTPUT_SETTINGS 11
3843
39-static char output_recipe[NUM_OUTPUT_SETTINGS]="111111";
44+static char output_recipe[NUM_OUTPUT_SETTINGS]="11111111111";
4045
4146 #define NUM_PRESET_RECIPES 1
4247
43-static struct {char *name,*recipe} preset_recipe[NUM_PRESET_RECIPES]={
48+static struct {char *name,*recipe;} preset_recipe[NUM_PRESET_RECIPES]={
4449 {"",""},
4550 };
4651
@@ -70,9 +75,66 @@
7075
7176 /**********************************************************************/
7277
78+int char_length(char *c) {
79+ if (((unsigned char)*c)<0x80)
80+ return 1;
81+ else if (((unsigned char)*c)<0xC0) {
82+ puts("found UTF-8 continuation byte when clean " /* SNH */
83+ "character expected"); /* SNH */
84+ exit(1); /* SNH */
85+ } else if (((unsigned char)*c)<0xE0)
86+ return 2;
87+ else if (((unsigned char)*c)<0xF0)
88+ return 3;
89+ else if (((unsigned char)*c)<0xF5)
90+ return 4;
91+ else {
92+ puts("found non-Unicode UTF-8 length byte when " /* SNH */
93+ "clean character expected"); /* SNH */
94+ exit(1); /* SNH */
95+ }
96+}
97+
98+/**********************************************************************/
99+
73100 void write_bracketed_string(HASHED_STRING *hs,HASHED_STRING *br) {
74- /* FIXME */
75- printf("%s%s%s",br->data,hs->data,br->mate->data);
101+ int i,do_esc;
102+
103+ fwrite(br->data,1,br->length,stdout);
104+ for (i=0;i<hs->length;i+=char_length(hs->data+i)) {
105+
106+ if (hs->data[i]=='\\') /* backslash - always escape */
107+ do_esc=1;
108+ else if ((i+br->mate->length<=hs->length) &&
109+ (strncmp(hs->data+i,br->mate->data,br->mate->length)==0))
110+ do_esc=((i>0) || (output_recipe[OS_ESCAPE_WHAT]>='6'));
111+ else if (((unsigned char)hs->data[i])>=0xF0) /* astral planes */
112+ do_esc=output_recipe[OS_ESCAPE_WHAT]>='1';
113+ else if ((((unsigned char)hs->data[i])>=0xE3) &&
114+ (((unsigned char)hs->data[i])<=0xED)) /* mainline CJK */
115+ do_esc=output_recipe[OS_ESCAPE_WHAT]>='4';
116+ else if (((unsigned char)hs->data[i])==0xEE) /* low PUA */
117+ do_esc=output_recipe[OS_ESCAPE_WHAT]>='2';
118+ else if (((unsigned char)hs->data[i])>=0x80) /* non-ASCII */
119+ do_esc=output_recipe[OS_ESCAPE_WHAT]>='3';
120+ else if (((unsigned char)hs->data[i])<=0x20) /* ASCII controls */
121+ do_esc=output_recipe[OS_ESCAPE_WHAT]>='5';
122+
123+ if (do_esc) {
124+ switch (output_recipe[OS_ESCAPE_HOW]) {
125+
126+ case '0':
127+ default:
128+ fputc('\\',stdout);
129+ fwrite(hs->data+i,1,char_length(hs->data+i),stdout);
130+ break;
131+
132+ /* FIXME add other escape forms */
133+ }
134+ } else
135+ fwrite(hs->data+i,1,char_length(hs->data+i),stdout);
136+ }
137+ fwrite(br->mate->data,1,br->mate->length,stdout);
76138 }
77139
78140 /**********************************************************************/
@@ -79,9 +141,11 @@
79141
80142 void write_cooked_tree(NODE *ms) {
81143 NODE *tail;
144+ HASHED_STRING *semicolon;
82145 int i;
83146
84147 ms->complete=0;
148+ semicolon=new_string(1,";");
85149
86150 while (ms) {
87151 tail=ms->match_parent;
@@ -92,31 +156,78 @@
92156 tail=ms->child[i];
93157 }
94158
95- if (ms->head) {
96- if (ms->complete==0) {
97- if (output_recipe[OS_TOP_HEAD_BRACKET_TYPE]<'2')
98- write_bracketed_string(ms->head,hashed_bracket
99- [output_recipe
100- [OS_TOP_HEAD_BRACKET_TYPE]-'0']);
101- else
102- write_bracketed_string(ms->head,hashed_bracket[2]);
159+ if ((output_recipe[OS_INDENTATION]!='0') && (ms->complete>0))
160+ putchar('\n');
161+ if (output_recipe[OS_INDENTATION]=='8')
162+ for (i=0;i<ms->complete;i++)
163+ putchar('\t');
164+ else
165+ for (i=0;i<(ms->complete*(output_recipe[OS_INDENTATION]-'0'));i++)
166+ putchar(' ');
167+
168+ if (((output_recipe[OS_SUGAR]&2) || (ms->complete==0)) &&
169+ ((output_recipe[OS_SUGAR]&4) || (ms->complete>0)) &&
170+ (ms->head!=NULL) &&
171+ (((unsigned char)ms->head->data[0])>0x20) &&
172+ (char_length(ms->head->data)==ms->head->length) &&
173+ (ms->head->arity==-2) &&
174+ (ms->arity==0) &&
175+ (ms->functor==semicolon)) {
176+ fwrite(ms->head->data,ms->head->length,1,stdout);
177+
178+ } else {
179+
180+ if (ms->head) {
181+ if (ms->complete==0) {
182+ if (output_recipe[OS_TOP_HEAD_BRACKET_TYPE]<'2')
183+ write_bracketed_string(ms->head,hashed_bracket
184+ [output_recipe
185+ [OS_TOP_HEAD_BRACKET_TYPE]-'0']);
186+ else
187+ write_bracketed_string(ms->head,hashed_bracket[2]);
188+ } else {
189+ if (output_recipe[OS_INNER_HEAD_BRACKET_TYPE]<'2')
190+ write_bracketed_string(ms->head,hashed_bracket
191+ [output_recipe
192+ [OS_INNER_HEAD_BRACKET_TYPE]-'0']);
193+ else
194+ write_bracketed_string(ms->head,hashed_bracket[2]);
195+ }
196+ }
197+
198+ if ((output_recipe[OS_SUGAR]&1) &&
199+ (ms->functor->arity==ms->arity) &&
200+ (char_length(ms->functor->data)==ms->functor->length)) {
201+ fwrite(ms->functor->data,ms->functor->length,1,stdout);
202+
103203 } else {
104- if (output_recipe[OS_INNER_HEAD_BRACKET_TYPE]<'2')
105- write_bracketed_string(ms->head,hashed_bracket
106- [output_recipe
107- [OS_INNER_HEAD_BRACKET_TYPE]-'0']);
108- else
109- write_bracketed_string(ms->head,hashed_bracket[2]);
204+ i=output_recipe[OS_NULLARY_BRACKET_TYPE+ms->arity]-'0';
205+ if (i>2) i=2;
206+ write_bracketed_string(ms->functor,
207+ hashed_bracket[3*(ms->arity+1)+i]);
110208 }
111209 }
112210
113- i=output_recipe[OS_NULLARY_BRACKET_TYPE+ms->arity]-'0';
114- if (i>2) i=2;
115- write_bracketed_string(ms->functor,hashed_bracket[3*(ms->arity+1)+i]);
116-
117211 ms->match_parent=NULL;
118212 ms=tail;
119213 }
120214
121- putchar('\n');
215+ switch (output_recipe[OS_SEPARATOR]) {
216+ case '0':
217+ putchar('\0');
218+ break;
219+ /* 1 is default newline */
220+ case '2':
221+ putchar('\n');
222+ putchar('\n');
223+ break;
224+ case '3':
225+ /* 3 is nothing */
226+ break;
227+ default:
228+ putchar('\n');
229+ break;
230+ }
231+
232+ delete_string(semicolon);
122233 }
--- trunk/website/Makefile (revision 308)
+++ trunk/website/Makefile (revision 309)
@@ -1,6 +1,6 @@
11 all: \
22 ul/.htaccess \
3- ul/index.html.en ul/index.html.jp \
3+ ul/index.html.en ul/index.html.ja \
44 ul/prosimii-print.css ul/prosimii-screen.css \
55 ul/demo200.png ul/flag-jp.png ul/flag-en.png ul/rss.png \
66 ul/rg00.png ul/rglyph.php
@@ -12,7 +12,7 @@
1212 ul/rglyph.php: ul/.htaccess rglyph.php
1313 cp rglyph.php ul/rglyph.php
1414
15-ul/index.html.en ul/index.html.jp: \
15+ul/index.html.en ul/index.html.ja: \
1616 ul/.htaccess mkindex ../doc/kanjichart.tex
1717 lynx -source \
1818 'http://sourceforge.jp/projects/tsukurimashou/releases/rss' \
@@ -28,6 +28,20 @@
2828 ul/%.css: %.css url/.htaccess
2929 cp $< $@
3030
31+ul/prosimii-print.css: prosimii-print.css
32+
33+ul/prosimii-screen.css: prosimii-screen.css
34+
35+ul/demo200.png: demo200.png
36+
37+ul/flag-jp.png: flag-jp.png
38+
39+ul/flag-en.png: flag-en.png
40+
41+ul/rss.png: rss.png
42+
43+ul/rglyph.php: rglyph.php
44+
3145 glyphgrid.pdf: glyphgrid.tex
3246 xelatex glyphgrid
3347 xelatex glyphgrid
--- trunk/configure.ac (revision 308)
+++ trunk/configure.ac (revision 309)
@@ -190,8 +190,8 @@
190190 #
191191 AC_PREREQ([2.63])
192192 AC_INIT([Tsukurimashou],
193- [0.6], [mskala@ansuz.sooke.bc.ca], [tsukurimashou],
194- [[http://en.sourceforge.jp/projects/tsukurimashou/]])
193+ [0.7pre], [mskala@ansuz.sooke.bc.ca], [tsukurimashou],
194+ [[http://tsukurimashou.sourceforge.jp/]])
195195 AC_PRESERVE_HELP_ORDER
196196 AM_INIT_AUTOMAKE([foreign check-news dist-zip no-dist-gzip
197197 silent-rules subdir-objects])
@@ -198,7 +198,7 @@
198198 AC_CONFIG_SRCDIR([hamlog/hamlog])
199199 AC_CONFIG_HEADERS([config.h])
200200 AC_CONFIG_MACRO_DIR([m4])
201-AC_REVISION([$Id: configure.ac 1459 2012-06-18 12:44:00Z mskala $])
201+AC_REVISION([$Id: configure.ac 1595 2012-07-31 19:56:54Z mskala $])
202202 AC_COPYRIGHT([Copyright (C) 2011, 2012 Matthew Skala])
203203 AC_SUBST([release_date],["June 18, 2012"])
204204 AM_SILENT_RULES
--- trunk/doc/usermanual.tex (revision 308)
+++ trunk/doc/usermanual.tex (revision 309)
@@ -130,9 +130,9 @@
130130 \phantomsection\addcontentsline{toc}{section}{コピーライト Copyright}
131131
132132 This project's English-language home page is at\\
133-\hspace*{1em}\url{http://en.sourceforge.jp/projects/tsukurimashou/}.\\
133+\hspace*{1em}\url{http://tsukurimashou.sourceforge.jp/index.html.en}.\\
134134 このプロジェクトは、日本語のページが\\
135-\hspace*{1em}\url{http://tsukurimashou.sourceforge.jp/}です。
135+\hspace*{1em}\url{http://tsukurimashou.sourceforge.jp/index.html.ja}です。
136136
137137 \vspace*{1in}
138138
@@ -377,9 +377,10 @@
377377
378378 The home pages for this project, where you can download the latest releases,
379379 browse the source-control repository, and so on, are:\\
380-\hspace*{1em}\url{http://en.sourceforge.jp/projects/tsukurimashou/}%
380+\hspace*{1em}\url{http://tsukurimashou.sourceforge.jp/index.html.en}%
381381 \quad (English)\\
382-\hspace*{1em}\url{http://tsukurimashou.sourceforge.jp/}\quad (日本語)
382+\hspace*{1em}\url{http://tsukurimashou.sourceforge.jp/index.html.ja}%
383+\quad(日本語)
383384
384385 よろしくおねがいします。
385386
Show on old repository browser