• R/O
  • SSH
  • HTTPS

tsukurimashou: Commit


Commit MetaInfo

Revision381 (tree)
Time2013-02-25 11:13:08
Authormskala

Log Message

start of "user-defined predicate" feature

Change Summary

Incremental Difference

--- trunk/idsgrep/m4/ax_c___attribute__.m4 (nonexistent)
+++ trunk/idsgrep/m4/ax_c___attribute__.m4 (revision 381)
@@ -0,0 +1,66 @@
1+# ===========================================================================
2+# http://www.gnu.org/software/autoconf-archive/ax_c___attribute__.html
3+# ===========================================================================
4+#
5+# SYNOPSIS
6+#
7+# AX_C___ATTRIBUTE__
8+#
9+# DESCRIPTION
10+#
11+# Provides a test for the compiler support of __attribute__ extensions.
12+# Defines HAVE___ATTRIBUTE__ if it is found.
13+#
14+# LICENSE
15+#
16+# Copyright (c) 2008 Stepan Kasal <skasal@redhat.com>
17+# Copyright (c) 2008 Christian Haggstrom
18+# Copyright (c) 2008 Ryan McCabe <ryan@numb.org>
19+#
20+# This program is free software; you can redistribute it and/or modify it
21+# under the terms of the GNU General Public License as published by the
22+# Free Software Foundation; either version 2 of the License, or (at your
23+# option) any later version.
24+#
25+# This program is distributed in the hope that it will be useful, but
26+# WITHOUT ANY WARRANTY; without even the implied warranty of
27+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
28+# Public License for more details.
29+#
30+# You should have received a copy of the GNU General Public License along
31+# with this program. If not, see <http://www.gnu.org/licenses/>.
32+#
33+# As a special exception, the respective Autoconf Macro's copyright owner
34+# gives unlimited permission to copy, distribute and modify the configure
35+# scripts that are the output of Autoconf when processing the Macro. You
36+# need not follow the terms of the GNU General Public License when using
37+# or distributing such scripts, even though portions of the text of the
38+# Macro appear in them. The GNU General Public License (GPL) does govern
39+# all other use of the material that constitutes the Autoconf Macro.
40+#
41+# This special exception to the GPL applies to versions of the Autoconf
42+# Macro released by the Autoconf Archive. When you make and distribute a
43+# modified version of the Autoconf Macro, you may extend this special
44+# exception to the GPL to apply to your modified version as well.
45+
46+#serial 8
47+
48+AC_DEFUN([AX_C___ATTRIBUTE__], [
49+ AC_CACHE_CHECK([for __attribute__], [ax_cv___attribute__],
50+ [AC_COMPILE_IFELSE(
51+ [AC_LANG_PROGRAM(
52+ [[#include <stdlib.h>
53+ static void foo(void) __attribute__ ((unused));
54+ static void
55+ foo(void) {
56+ exit(1);
57+ }
58+ ]], [])],
59+ [ax_cv___attribute__=yes],
60+ [ax_cv___attribute__=no]
61+ )
62+ ])
63+ if test "$ax_cv___attribute__" = "yes"; then
64+ AC_DEFINE([HAVE___ATTRIBUTE__], 1, [define if your compiler has __attribute__])
65+ fi
66+])
--- trunk/idsgrep/m4/tsu_c_gcc_builtin_bswap.m4 (nonexistent)
+++ trunk/idsgrep/m4/tsu_c_gcc_builtin_bswap.m4 (revision 381)
@@ -0,0 +1,28 @@
1+#
2+# SYNOPSIS
3+#
4+# TSU_C_GCC_BUILTIN_BSWAP
5+#
6+# DESCRIPTION
7+#
8+# Check, by compiling a test program, whether the C compiler defines
9+# __builtin_bswap32 and __builtin_bswap64 as GCC does.
10+#
11+# LICENSE
12+#
13+# This macro is released to the public domain by its author,
14+# Matthew Skala <mskala@ansuz.sooke.bc.ca>.
15+
16+#serial 1
17+
18+AC_DEFUN([TSU_C_GCC_BUILTIN_BSWAP],[dnl
19+AC_CACHE_CHECK(
20+ [for __builtin_bswap{32,64}],
21+ tsu_cv_c_gcc_builtin_bswap,[
22+ AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
23+ [__builtin_bswap32(0);__builtin_bswap64(0);])],
24+ [tsu_cv_c_gcc_builtin_bswap=yes],[tsu_cv_c_gcc_builtin_bswap=no])])
25+AS_IF([test "x$tsu_cv_c_gcc_builtin_bswap" = xyes],
26+ [AC_DEFINE([GCC_BUILTIN_BSWAP],[1],
27+ [Define to 1 if GCC-style builtin bswaps are supported])])
28+])
--- trunk/idsgrep/idsgrep.h (revision 380)
+++ trunk/idsgrep/idsgrep.h (revision 381)
@@ -1,6 +1,6 @@
11 /*
2- * General header file IDSgrep
3- * Copyright (C) 2012 Matthew Skala
2+ * General header file for IDSgrep
3+ * Copyright (C) 2012, 2013 Matthew Skala
44 *
55 * This program is free software: you can redistribute it and/or modify
66 * it under the terms of the GNU General Public License as published by
@@ -20,6 +20,7 @@
2020 */
2121
2222 #include "config.h"
23+#include "_stdint.h"
2324
2425 #ifdef HAVE_PCRE
2526 #include <pcre.h>
@@ -48,6 +49,7 @@
4849 pcre *pcre_compiled;
4950 pcre_extra *pcre_studied;
5051 #endif
52+ uintmax_t userpreds;
5153 } HASHED_STRING;
5254
5355 typedef struct _NODE {
@@ -131,6 +133,7 @@
131133 extern PARSE_STATE parse_state;
132134 extern int echoing_whitespace;
133135
136+int construct_utf8(int,char *);
134137 size_t parse(size_t,char *);
135138 void register_syntax(void);
136139
@@ -139,3 +142,11 @@
139142 /* regex.c */
140143
141144 NODE *regex_match_fn(NODE *);
145+
146+/**********************************************************************/
147+
148+/* userpred.c */
149+
150+void font_file_userpred(char *);
151+
152+NODE *user_match_fn(NODE *);
--- trunk/idsgrep/parse.c (revision 380)
+++ trunk/idsgrep/parse.c (revision 381)
@@ -1,6 +1,6 @@
11 /*
22 * Parser for IDSgrep
3- * Copyright (C) 2012 Matthew Skala
3+ * Copyright (C) 2012, 2013 Matthew Skala
44 *
55 * This program is free software: you can redistribute it and/or modify
66 * it under the terms of the GNU General Public License as published by
@@ -42,6 +42,35 @@
4242 || (((x)>='A') && ((x)<='F')))
4343 #define MYXVAL(x) (((x)&0x40)?(((x)&0xF)+9):((x)&0xF))
4444
45+int construct_utf8(int xval,char *ebuf) {
46+ int clen;
47+
48+ if (xval>=0x110000)
49+ xval=0xFFFD;
50+ if ((xval>=0xD800) && (xval<=0xDFFF))
51+ xval=0xFFFD;
52+ if (xval<0x80) {
53+ clen=1;
54+ ebuf[0]=xval;
55+ } else if (xval<0x800) {
56+ clen=2;
57+ ebuf[0]=0xC0|(xval>>6);
58+ ebuf[1]=0x80|(xval&0x3F);
59+ } else if (xval<0x10000) {
60+ clen=3;
61+ ebuf[0]=0xE0|(xval>>12);
62+ ebuf[1]=0x80|((xval>>6)&0x3F);
63+ ebuf[2]=0x80|(xval&0x3F);
64+ } else {
65+ clen=4;
66+ ebuf[0]=0xF0|(xval>>18);
67+ ebuf[1]=0x80|((xval>>12)&0x3F);
68+ ebuf[2]=0x80|((xval>>6)&0x3F);
69+ ebuf[3]=0x80|(xval&0x3F);
70+ }
71+ return clen;
72+}
73+
4574 size_t parse(size_t len,char *inp) {
4675 int offs=0,clen,escaped,flag,xval,i;
4776 char ebuf[4],*eptr;
@@ -206,29 +235,7 @@
206235 continue;
207236 }
208237 eptr=ebuf;
209- if (xval>=0x110000)
210- xval=0xFFFD;
211- if ((xval>=0xD800) && (xval<=0xDFFF))
212- xval=0xFFFD;
213- if (xval<0x80) {
214- clen=1;
215- ebuf[0]=xval;
216- } else if (xval<0x800) {
217- clen=2;
218- ebuf[0]=0xC0|(xval>>6);
219- ebuf[1]=0x80|(xval&0x3F);
220- } else if (xval<0x10000) {
221- clen=3;
222- ebuf[0]=0xE0|(xval>>12);
223- ebuf[1]=0x80|((xval>>6)&0x3F);
224- ebuf[2]=0x80|(xval&0x3F);
225- } else {
226- clen=4;
227- ebuf[0]=0xF0|(xval>>18);
228- ebuf[1]=0x80|((xval>>12)&0x3F);
229- ebuf[2]=0x80|((xval>>6)&0x3F);
230- ebuf[3]=0x80|(xval&0x3F);
231- }
238+ clen=construct_utf8(xval,ebuf);
232239 offs-=clen;
233240 break;
234241
@@ -623,6 +630,9 @@
623630
624631 register_special_functor("/",1,regex_match_fn);
625632 register_alias("regex","/");
633+
634+ register_special_functor("#",1,user_match_fn);
635+ register_alias("user","#");
626636
627637 register_special_functor("\xE2\xBF\xB0",2,default_match_fn);
628638 register_alias("lr","\xE2\xBF\xB0");
--- trunk/idsgrep/configure.ac (revision 380)
+++ trunk/idsgrep/configure.ac (revision 381)
@@ -228,6 +228,7 @@
228228 # Checks for header files.
229229 #
230230 AC_CHECK_HEADERS([libintl.h stdlib.h string.h wchar.h])
231+AX_CREATE_STDINT_H
231232 #
232233 ############################################################################
233234 #
@@ -235,7 +236,9 @@
235236 #
236237 AC_CHECK_SIZEOF([int])
237238 AC_TYPE_SIZE_T
239+AX_C___ATTRIBUTE__
238240 TSU_C_ANON_UNION_STRUCT
241+TSU_C_GCC_BUILTIN_BSWAP
239242 #
240243 ############################################################################
241244 #
@@ -242,7 +245,7 @@
242245 # Checks for library functions.
243246 #
244247 AX_FUNC_GETOPT_LONG
245-AC_CHECK_FUNCS([memmove memset strchr])
248+AC_CHECK_FUNCS_ONCE([memmove memset strchr])
246249 #
247250 ############################################################################
248251 #
--- trunk/idsgrep/hash.c (revision 380)
+++ trunk/idsgrep/hash.c (revision 381)
@@ -1,6 +1,6 @@
11 /*
22 * String hash table for IDSgrep
3- * Copyright (C) 2012 Matthew Skala
3+ * Copyright (C) 2012, 2013 Matthew Skala
44 *
55 * This program is free software: you can redistribute it and/or modify
66 * it under the terms of the GNU General Public License as published by
@@ -68,8 +68,11 @@
6868 rval->length=len;
6969 rval->arity=-2;
7070 rval->match_fn=default_match_fn;
71+#ifdef HAVE_PCRE
7172 rval->pcre_compiled=NULL;
7273 rval->pcre_studied=NULL;
74+#endif
75+ rval->userpreds=UINTMAX_C(0);
7376 return rval;
7477 }
7578
@@ -78,6 +81,7 @@
7881
7982 while ((((size_t)1)<<i)<=s->length) i++;
8083 s->next=free_strings[i];
84+#ifdef HAVE_PCRE
8185 if (s->pcre_compiled) {
8286 free(s->pcre_compiled); /* SNH */
8387 s->pcre_compiled=NULL; /* SNH */
@@ -86,6 +90,7 @@
8690 free(s->pcre_studied); /* SNH */
8791 s->pcre_studied=NULL; /* SNH */
8892 }
93+#endif
8994 free_strings[i]=s;
9095 }
9196
--- trunk/idsgrep/Makefile.am (revision 380)
+++ trunk/idsgrep/Makefile.am (revision 381)
@@ -171,7 +171,8 @@
171171
172172 AM_CFLAGS := $(MAYBE_COVERAGE) $(PCRE_CFLAGS) $(AM_CFLAGS)
173173 idsgrep_SOURCES = \
174- assoc.c cook.c hash.c idsgrep.c idsgrep.h match.c parse.c regex.c
174+ assoc.c cook.c hash.c idsgrep.c idsgrep.h match.c parse.c \
175+ regex.c userpred.c
175176
176177 LDADD = @LIBOBJS@ $(PCRE_LIBS)
177178
@@ -299,6 +300,9 @@
299300 m4/ax_check_gnu_make.m4: ../m4/ax_check_gnu_make.m4
300301 $(TSU_V_CP) cp $< $@
301302
303+m4/ax_create_stdint_h.m4: ../m4/ax_create_stdint_h.m4
304+ $(TSU_V_CP) cp $< $@
305+
302306 m4/ax_count_cpus.m4: ../m4/ax_count_cpus.m4
303307 $(TSU_V_CP) cp $< $@
304308
--- trunk/idsgrep/userpred.c (nonexistent)
+++ trunk/idsgrep/userpred.c (revision 381)
@@ -0,0 +1,604 @@
1+/*
2+ * User-defined matching predicates for IDSgrep
3+ * Copyright (C) 2013 Matthew Skala
4+ *
5+ * This program is free software: you can redistribute it and/or modify
6+ * it under the terms of the GNU General Public License as published by
7+ * the Free Software Foundation, version 3.
8+ *
9+ * This program is distributed in the hope that it will be useful,
10+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+ * GNU General Public License for more details.
13+ *
14+ * You should have received a copy of the GNU General Public License
15+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16+ *
17+ * Matthew Skala
18+ * http://ansuz.sooke.bc.ca/
19+ * mskala@ansuz.sooke.bc.ca
20+ */
21+
22+#include <stdio.h>
23+#include <stdlib.h>
24+#include <string.h>
25+
26+#include "idsgrep.h"
27+
28+/**********************************************************************/
29+
30+#ifdef HAVE___ATTRIBUTE__
31+#define PACKED __attribute__ ((packed))
32+#else
33+#define PACKED /* */
34+#endif
35+
36+/* Do you swap your byte at us, sir? */
37+
38+/* No, sir, I do not swap my byte at you, sir, but I swap my byte, sir. */
39+
40+#define BSWAP16(x) ((((x)>>8)&0xFF)|(((x)<<8)&0xFF00))
41+#ifdef GCC_BUILTIN_BSWAP
42+#define BSWAP32(x) __builtin_bswap32(x)
43+#else
44+#define BSWAP32(x) ((((x)>>24)&0xFF)|(((x)>>8)&0xFF00)|\
45+ (((x)<<8)&0xFF0000)|(((x)<<24)&0xFF000000))
46+#endif
47+
48+/* note that these structs cover only as much of the format as we need! */
49+
50+typedef struct _TTC_HEADER {
51+ uint32_t ttc_tag PACKED;
52+ uint32_t version PACKED;
53+ uint32_t num_fonts PACKED;
54+ uint32_t offset_table0 PACKED;
55+} TTC_HEADER;
56+
57+typedef struct _OFFSET_TABLE {
58+ uint32_t sfnt_version PACKED;
59+ uint16_t num_tables PACKED;
60+ uint16_t search_range PACKED;
61+ uint16_t entry_selector PACKED;
62+ uint16_t range_shift PACKED;
63+} OFFSET_TABLE;
64+
65+typedef struct _TABLE_RECORD {
66+ uint32_t tag PACKED;
67+ uint32_t check_sum PACKED;
68+ uint32_t offset PACKED;
69+ uint32_t length PACKED;
70+} TABLE_RECORD;
71+
72+typedef struct _CMAP_HEADER {
73+ uint16_t version PACKED;
74+ uint16_t num_tables PACKED;
75+} CMAP_HEADER;
76+
77+typedef struct _ENCODING_RECORD {
78+ uint16_t platform_id PACKED;
79+ uint16_t encoding_id PACKED;
80+ uint32_t offset PACKED;
81+} ENCODING_RECORD;
82+
83+static int num_userpreds=0;
84+
85+/**********************************************************************/
86+
87+void add_userpred_character(int i) {
88+ char ebuf[4];
89+ int clen;
90+ HASHED_STRING *hs;
91+
92+ clen=construct_utf8(i,ebuf);
93+ hs=new_string(clen,ebuf);
94+ hs->userpreds|=(UINTMAX_C(1)<<(num_userpreds-1));
95+}
96+
97+/**********************************************************************/
98+
99+typedef struct _FORMAT0_TABLE {
100+ uint16_t length PACKED;
101+ uint16_t language PACKED;
102+ uint8_t glyph_ids[256];
103+} FORMAT0_TABLE;
104+
105+void scan_format0_table(FILE *fontfile,int swap_votes,
106+ char *fn,int table_number) {
107+ FORMAT0_TABLE format0_table;
108+ int i;
109+
110+ if (fread(&format0_table,sizeof(format0_table),1,fontfile)!=1) {
111+ fprintf(stderr,"error reading %s (format 0 cmap subtable %d)\n",
112+ fn,table_number);
113+ return;
114+ }
115+ for (i=0;i<256;i++)
116+ if (format0_table.glyph_ids[i]>0)
117+ add_userpred_character(i);
118+}
119+
120+/**********************************************************************/
121+
122+typedef struct _FORMAT2_SUBHEADER {
123+ uint16_t first_code PACKED;
124+ uint16_t entry_count PACKED;
125+ int16_t id_delta PACKED; /* note this one is signed */
126+ uint16_t id_range_offset;
127+} FORMAT2_SUBHEADER;
128+
129+typedef struct _FORMAT2_TABLE {
130+ uint16_t format PACKED;
131+ uint16_t length PACKED;
132+ uint16_t language PACKED;
133+ uint16_t sub_header_keys[256] PACKED;
134+ FORMAT2_SUBHEADER sub_headers[] PACKED;
135+} FORMAT2_TABLE;
136+
137+void scan_format2_table(FILE *fontfile,int swap_votes,
138+ char *fn,int table_number) {
139+ FORMAT2_TABLE *format2_table;
140+ int i,j;
141+ uint16_t length;
142+ FORMAT2_SUBHEADER *sub_header;
143+
144+ /* read the table */
145+ if (fread(&length,sizeof(length),1,fontfile)!=1) {
146+ fprintf(stderr,"error reading %s (format 2 cmap subtable %d length)\n",
147+ fn,table_number);
148+ return;
149+ }
150+ if (swap_votes>0)
151+ length=BSWAP16(length);
152+ format2_table=malloc(length);
153+ format2_table->format=2;
154+ format2_table->length=length;
155+ if (fread(((uint8_t *)&format2_table)+4,length-4,1,fontfile)!=1) {
156+ fprintf(stderr,"error reading %s (format 2 cmap subtable %d)\n",
157+ fn,table_number);
158+ free(format2_table);
159+ return;
160+ }
161+
162+ /* do swapping up front - works because entire table is 16-bit entries */
163+ for (i=2;i<(length/2);i++)
164+ ((uint16_t *)format2_table)[i]=BSWAP16(((uint16_t *)format2_table)[i]);
165+
166+ /* scan through high bytes */
167+ for (i=0;i<256;i++)
168+ if (format2_table->sub_header_keys[i]!=0) {
169+ if (((uint8_t *)&(format2_table->sub_headers))
170+ -((uint8_t *)format2_table)
171+ +format2_table->sub_header_keys[i]
172+ <format2_table->length) {
173+
174+ /* scan through low bytes */
175+ sub_header=(FORMAT2_SUBHEADER *)
176+ (((uint8_t *)&(format2_table->sub_headers))
177+ +format2_table->sub_header_keys[i]);
178+ for (j=0;j<sub_header->entry_count;j++)
179+ if (((uint8_t *)&(sub_header->id_range_offset))
180+ -((uint8_t *)format2_table)
181+ +sub_header->id_range_offset
182+ +j*sizeof(uint16_t)
183+ <format2_table->length) {
184+
185+ /* I'm sorry about this. */
186+ if (*((uint16_t *)((uint8_t *)&(sub_header->id_range_offset)
187+ +sub_header->id_range_offset
188+ +j*sizeof(uint16_t)))!=0)
189+ add_userpred_character(i*256+j);
190+
191+ } else {
192+ fprintf(stderr,"glyph index pointer out of range in %s "
193+ "(format 2 cmap subtable %d)\n",fn,table_number);
194+ free(format2_table);
195+ return;
196+ }
197+
198+ } else {
199+ fprintf(stderr,"subheader pointer out of range in %s "
200+ "(format 2 cmap subtable %d)\n",fn,table_number);
201+ free(format2_table);
202+ return;
203+ }
204+ }
205+
206+ free(format2_table);
207+}
208+
209+/**********************************************************************/
210+
211+typedef struct _FORMAT4_TABLE {
212+ uint16_t format PACKED;
213+ uint16_t length PACKED;
214+ uint16_t language PACKED;
215+ uint16_t seg_count_x2 PACKED;
216+ uint16_t search_range PACKED;
217+ uint16_t entry_selector PACKED;
218+ uint16_t range_shift PACKED;
219+ uint16_t end_count[] PACKED;
220+ /* there are several more variable-length arrays, but we have to
221+ * access them via pointer arithmetic */
222+} FORMAT4_TABLE;
223+
224+void scan_format4_table(FILE *fontfile,int swap_votes,
225+ char *fn,int table_number) {
226+ FORMAT4_TABLE *format4_table;
227+ int i,j,k;
228+ uint16_t length;
229+ uint16_t *start_count;
230+ int16_t *id_delta;
231+ uint16_t *id_range_offset;
232+
233+ /* read the table */
234+ if (fread(&length,sizeof(length),1,fontfile)!=1) {
235+ fprintf(stderr,"error reading %s (format 4 cmap subtable %d length)\n",
236+ fn,table_number);
237+ return;
238+ }
239+ if (swap_votes>0)
240+ length=BSWAP16(length);
241+ format4_table=malloc(length);
242+ format4_table->format=4;
243+ format4_table->length=length;
244+ if (fread(((uint8_t *)format4_table)+4,length-4,1,fontfile)!=1) {
245+ fprintf(stderr,"error reading %s (format 4 cmap subtable %d)\n",
246+ fn,table_number);
247+ free(format4_table);
248+ return;
249+ }
250+
251+ /* do swapping up front - works because entire table is 16-bit entries */
252+ for (i=2;i<(length/2);i++)
253+ ((uint16_t *)format4_table)[i]=BSWAP16(((uint16_t *)format4_table)[i]);
254+
255+ /* set up the pointers */
256+ start_count=&(format4_table->end_count[0])
257+ +(format4_table->seg_count_x2/2)+1;
258+ id_delta=(int16_t *)(start_count+(format4_table->seg_count_x2/2));
259+ id_range_offset=((uint16_t *)id_delta)+(format4_table->seg_count_x2/2);
260+ if (((uint8_t *)&(id_range_offset[format4_table->seg_count_x2/2]))
261+ -((uint8_t *)format4_table)
262+ >format4_table->length) {
263+ fprintf(stderr,"subtable too small in %s "
264+ "(format 4 cmap subtable %d)\n",fn,table_number);
265+ free(format4_table);
266+ return;
267+ }
268+ if ((format4_table->end_count[format4_table->seg_count_x2/2-1]!=0xFFFF) ||
269+ (format4_table->end_count[format4_table->seg_count_x2/2]!=0)) {
270+ fprintf(stderr,"endCount terminator missing in %s "
271+ "(format 4 cmap subtable %d)\n",fn,table_number);
272+ free(format4_table);
273+ return;
274+ }
275+
276+ /* scan through character codes */
277+ j=0;
278+ for (i=0;i<65536;i++) {
279+ while (format4_table->end_count[j]<i) j++;
280+ if (start_count[j]>i)
281+ i=start_count[j];
282+ if (id_range_offset[j]==0)
283+ add_userpred_character(i);
284+ else {
285+
286+ /* check that pointer is in range, and see where it points */
287+ k=((uint8_t *)&(id_range_offset[j]))-((uint8_t *)format4_table)
288+ +id_range_offset[j]+(i-id_delta[j])*2;
289+ if (k<format4_table->length) {
290+ if (*((uint16_t *)(((uint8_t *)format4_table)+k))!=0)
291+ add_userpred_character(i);
292+
293+ } else {
294+ fprintf(stderr,"glyph index pointer out of range in %s "
295+ "(format 4 cmap subtable %d)\n",fn,table_number);
296+ free(format4_table);
297+ return;
298+ }
299+ }
300+ }
301+
302+ free(format4_table);
303+}
304+
305+/**********************************************************************/
306+
307+#define CHECKSUM_BUFFER 2048
308+
309+uint32_t compute_opentype_checksum(FILE *fontfile,uint32_t length,
310+ int swap_votes) {
311+ int i,j,to_read=CHECKSUM_BUFFER;
312+ uint32_t acc=0;
313+ uint32_t buff[CHECKSUM_BUFFER];
314+
315+ length=(length+sizeof(uint32_t)-1)/sizeof(uint32_t);
316+ for (i=0;i<length;) {
317+ if (i+to_read>length)
318+ to_read=length-i;
319+ if (fread(buff,sizeof(uint32_t),to_read,fontfile)!=to_read)
320+ return acc; /* we'll catch the error in the caller */
321+ if (swap_votes>0) {
322+ for (j=0;j<to_read;j++)
323+ acc+=BSWAP32(buff[j]);
324+ } else {
325+ for (j=0;j<to_read;j++)
326+ acc+=buff[j];
327+ }
328+ i+=to_read;
329+ }
330+ return acc;
331+}
332+
333+void font_file_userpred(char *fn) {
334+ FILE *fontfile;
335+ int swap_votes=0;
336+ TTC_HEADER ttc_header;
337+ OFFSET_TABLE offset_table;
338+ TABLE_RECORD table_record;
339+ int table_number;
340+ CMAP_HEADER cmap_header;
341+ ENCODING_RECORD encoding_record;
342+ uint16_t subtable_format;
343+
344+ /* allocate a user predicate number - do first so files that fail will
345+ * still consume numbers and not screw up the indexing of others. */
346+ if (num_userpreds>=8*sizeof(uintmax_t)) {
347+ fprintf(stderr,"too many user predicates, skipping %s\n",fn);
348+ return;
349+ }
350+ num_userpreds++;
351+
352+ /* open font file */
353+ fontfile=fopen(fn,"rb");
354+ if (fontfile==NULL) {
355+ fprintf(stderr,"can't open %s for reading\n",fn);
356+ return;
357+ }
358+
359+ /* look for "TrueType Collection header" (not really expected) */
360+ if (fread(&ttc_header,sizeof(ttc_header),1,fontfile)!=1) {
361+ fprintf(stderr,"error reading %s (TTC header)\n",fn);
362+ return;
363+ }
364+ if ((ttc_header.ttc_tag==0x74746366 /* 'ttcf' */) ||
365+ (ttc_header.ttc_tag==0x66637474 /* 'fctt' */)) {
366+ if (ttc_header.ttc_tag==0x74746366)
367+ swap_votes--;
368+ else
369+ swap_votes++;
370+ if ((ttc_header.version&0xFFFF0000)==0)
371+ swap_votes++;
372+ if (swap_votes>0)
373+ ttc_header.offset_table0=BSWAP32(ttc_header.offset_table0);
374+ } else
375+ ttc_header.offset_table0=0;
376+
377+ /* look for "offset table" */
378+ if (fseek(fontfile,ttc_header.offset_table0,SEEK_SET)!=0) {
379+ fprintf(stderr,"error seeking on %s (offset table)\n",fn);
380+ return;
381+ }
382+ if (fread(&offset_table,sizeof(offset_table),1,fontfile)!=1) {
383+ fprintf(stderr,"error reading %s (offset table)\n",fn);
384+ return;
385+ }
386+ /* "I know this for a fact 'cuz I used to get the shit beat out of me
387+ * by great big square-headed cowboys called Otto on a fairly regular
388+ * basis. Yeah, they were ALL named Otto, which I, uh, I'm not sure
389+ * what's up with that." - Steve Earle */
390+ if ((offset_table.sfnt_version==0x4F54544F /* 'OTTO' */) ||
391+ (offset_table.sfnt_version==0x00010000 /* 1.0 */) ||
392+ (offset_table.sfnt_version==0x00000100 /* byte-swapped 1.0 */)) {
393+ if (offset_table.sfnt_version==0x00010000)
394+ swap_votes--;
395+ else if (offset_table.sfnt_version==0x00000100)
396+ swap_votes++;
397+ } else {
398+ fprintf(stderr,"can't find sfnt wrapper in %s\n",fn);
399+ return;
400+ }
401+
402+ /* Unfortunately, in case of 'OTTO' and not a TrueType collection,
403+ * at this point we might still not have seen anything from which we
404+ * can infer whether we need byte swapping. So we might not be able
405+ * to trust num_tables, and can't necessarily use it as a termination
406+ * condition. */
407+ table_number=0;
408+ while (1) {
409+ table_number++;
410+ if ((swap_votes!=0) &&
411+ (table_number>((swap_votes>0)?
412+ BSWAP16(offset_table.num_tables):
413+ offset_table.num_tables))) {
414+ fprintf(stderr,"can't find cmap table in %s\n",fn);
415+ return;
416+ }
417+
418+ /* look for "table record" */
419+ if (fread(&table_record,sizeof(table_record),1,fontfile)!=1) {
420+ fprintf(stderr,"error reading %s (table record %d)\n",
421+ fn,table_number-1);
422+ return;
423+ }
424+
425+ /* required tables, PfEd, and space padding, are byte-swap clues */
426+ if ((table_record.tag==0x636D6170 /* 'cmap' */) ||
427+ (table_record.tag==0x68656164 /* 'head' */) ||
428+ (table_record.tag==0x68686561 /* 'hhea' */) ||
429+ (table_record.tag==0x686D7478 /* 'hmtx' */) ||
430+ (table_record.tag==0x6D617870 /* 'maxp' */) ||
431+ (table_record.tag==0x6E616D65 /* 'name' */) ||
432+ (table_record.tag==0x4F532F32 /* 'OS/2' */) ||
433+ (table_record.tag==0x706F7374 /* 'post' */) ||
434+ (table_record.tag==0x50664564 /* 'PfEd' */) ||
435+ ((table_record.tag&0xFF)==0x20 /* padding space */))
436+ swap_votes--;
437+ if ((table_record.tag==0x70616D63 /* 'pamc' */) ||
438+ (table_record.tag==0x64616568 /* 'daeh' */) ||
439+ (table_record.tag==0x61656868 /* 'aehh' */) ||
440+ (table_record.tag==0x78746D68 /* 'xtmh' */) ||
441+ (table_record.tag==0x7078616D /* 'pxam' */) ||
442+ (table_record.tag==0x656D616E /* 'eman' */) ||
443+ (table_record.tag==0x322F534F /* '2/SO' */) ||
444+ (table_record.tag==0x74736F70 /* 'tsop' */) ||
445+ (table_record.tag==0x64456650 /* 'dEfP' */) ||
446+ ((table_record.tag&0xFF000000)==0x20000000 /* ecaps gniddap */))
447+ swap_votes++;
448+
449+ if ((table_record.tag==0x636D6170 /* 'cmap' */) ||
450+ (table_record.tag==0x70616D63 /* 'pamc' */)) {
451+ if (swap_votes>0) {
452+ table_record.check_sum=BSWAP32(table_record.check_sum);
453+ table_record.offset=BSWAP32(table_record.offset);
454+ table_record.length=BSWAP32(table_record.length);
455+ }
456+ break;
457+ }
458+ }
459+
460+ /* check the checksum */
461+ if (fseek(fontfile,table_record.offset,SEEK_SET)!=0) {
462+ fprintf(stderr,"error seeking on %s (before cmap checksum)\n",fn);
463+ return;
464+ }
465+ if (compute_opentype_checksum(fontfile,table_record.length,swap_votes)
466+ !=table_record.check_sum) {
467+ fprintf(stderr,"cmap table has bad checksum in %s\n",fn);
468+ return;
469+ }
470+
471+ /* look for cmap header */
472+ if (fseek(fontfile,table_record.offset,SEEK_SET)!=0) {
473+ fprintf(stderr,"error seeking on %s (cmap header)\n",fn);
474+ return;
475+ }
476+ if (fread(&cmap_header,sizeof(cmap_header),1,fontfile)!=1) {
477+ fprintf(stderr,"error reading %s (cmap header)\n",fn);
478+ return;
479+ }
480+ if (swap_votes>0)
481+ cmap_header.num_tables=BSWAP16(cmap_header.num_tables);
482+
483+ /* look at all subtables */
484+ for (table_number=0;table_number<cmap_header.num_tables;table_number++) {
485+
486+ /* look for encoding record */
487+ if (fread(&encoding_record,sizeof(encoding_record),1,fontfile)!=1) {
488+ fprintf(stderr,"error reading %s (cmap encoding record %d)\n",
489+ fn,table_number);
490+ return;
491+ }
492+ if (swap_votes>0) {
493+ encoding_record.platform_id=BSWAP16(encoding_record.platform_id);
494+ encoding_record.encoding_id=BSWAP16(encoding_record.encoding_id);
495+ encoding_record.offset=BSWAP32(encoding_record.offset);
496+ }
497+
498+ /* We understand Unicode/all, Windows/UTF-16, Windows/UTF-32,
499+ * and ISO/all. Notably, no Macintosh-specific encodings; we will
500+ * fail on a pure old-school Mac font. Windows/UTF-32 is most
501+ * likely to be useful in practice. Many fine details of these
502+ * encodings are ignored; they are all close enough to compatible with
503+ * each other for our purposes. We use the union of all codes we
504+ * find in subtables whose encodings we understand. */
505+ if ((encoding_record.platform_id==0 /* Unicode platform */) ||
506+ (encoding_record.platform_id==2 /* ISO platform */) ||
507+ ((encoding_record.platform_id==3 /* Windows platform */)
508+ && ((encoding_record.encoding_id==1 /* UTF-16 */) ||
509+ (encoding_record.encoding_id==10 /* UTF-32 */)))) {
510+
511+ /* go to the subtable */
512+ if (fseek(fontfile,table_record.offset+encoding_record.offset,
513+ SEEK_SET)!=0) {
514+ fprintf(stderr,"error seeking on %s (for cmap subtable %d)\n",
515+ fn,table_number);
516+ return;
517+ }
518+
519+ /* process subtable format */
520+ if (fread(&subtable_format,sizeof(subtable_format),1,fontfile)!=1) {
521+ fprintf(stderr,"error reading %s (cmap subtable %d format)\n",
522+ fn,table_number);
523+ return;
524+ }
525+ if (swap_votes>0)
526+ subtable_format=BSWAP16(subtable_format);
527+ switch (subtable_format) {
528+ case 0: /* Mac byte encoding */
529+ scan_format0_table(fontfile,swap_votes,fn,table_number);
530+ break;
531+
532+ case 2: /* high byte through table */
533+ scan_format2_table(fontfile,swap_votes,fn,table_number);
534+ break;
535+
536+ case 4: /* segment mapping to delta */
537+ scan_format4_table(fontfile,swap_votes,fn,table_number);
538+ break;
539+
540+ case 6: /* 16-bit trimmed array */
541+ break;
542+
543+ case 8: /* mixed 16- and 32-bit */
544+ break;
545+
546+ case 10: /* 32-bit trimmed array */
547+ break;
548+
549+ case 12: /* Microsoft segmented */
550+ break;
551+
552+ case 13: /* many-to-one */
553+ break;
554+
555+ default:
556+ /* Subtable type 14, Unicode Variation Sequences, is
557+ * deliberately ignored because main characters in it are only
558+ * relevant after already found in another subtable and the
559+ * details of variation sequences are consciously excluded
560+ * from the EIDS syntax definition. */
561+
562+ /* unknown format is not an error here, we expect all manner
563+ * of freakiness */
564+ break;
565+ }
566+
567+ /* return to just after the encoding record */
568+ if (fseek(fontfile,
569+ table_record.offset+sizeof(cmap_header)+
570+ (table_number+1)*sizeof(encoding_record),
571+ SEEK_SET)!=0) {
572+ fprintf(stderr,"error seeking on %s "
573+ "(returning from cmap subtable %d)\n",
574+ fn,table_number);
575+ return;
576+ }
577+ }
578+ }
579+
580+ /* seen all subtables - close the file */
581+ fclose(fontfile);
582+}
583+
584+/**********************************************************************/
585+
586+NODE *user_match_fn(NODE *ms) {
587+ int i;
588+
589+ if ((ms->nc_needle->arity!=1) ||
590+ (ms->nc_haystack->head==NULL)) {
591+ ms->match_result=MR_FALSE;
592+ return ms;
593+ }
594+ if (ms->nc_needle->child[0]->head!=NULL)
595+ i=atoi(ms->nc_needle->child[0]->head->data);
596+ else
597+ i=1;
598+ if (i==0)
599+ i=1;
600+ ms->match_result=((i<=num_userpreds) && (i>0) &&
601+ ((ms->nc_haystack->head->userpreds&(1<<(i-1)))!=0))?
602+ MR_TRUE:MR_FALSE;
603+ return ms;
604+}
--- trunk/idsgrep/idsgrep.c (revision 380)
+++ trunk/idsgrep/idsgrep.c (revision 381)
@@ -129,6 +129,7 @@
129129 static struct option long_opts[] = {
130130 {"cooking",required_argument,NULL,'c'},
131131 {"dictionary",optional_argument,NULL,'d'},
132+ {"font-chars",required_argument,NULL,'f'},
132133 {"help",no_argument,NULL,'h'},
133134 {"version",no_argument,NULL,'V'},
134135 {0,0,0,0},
@@ -156,7 +157,7 @@
156157 register_syntax();
157158
158159 /* loop on command-line options */
159- while ((c=getopt_long(argc,argv,"Vc:d::h",long_opts,NULL))!=-1) {
160+ while ((c=getopt_long(argc,argv,"Vc:d::f:h",long_opts,NULL))!=-1) {
160161 switch (c) {
161162
162163 case 'V':
@@ -174,6 +175,10 @@
174175 dictname=optarg;
175176 break;
176177
178+ case 'f':
179+ font_file_userpred(optarg);
180+ break;
181+
177182 case 'h':
178183 show_help=1;
179184 break;
@@ -198,6 +203,8 @@
198203 " -V, --version display version and license\n"
199204 " -c, --cooking=FMT set input/output cooking\n"
200205 " -d, --dictionary=NAME search standard dictionary\n"
206+ " -f, --font-chars=FONT use chars in FONT as a user-defined"
207+ " predicate\n"
201208 " -h, --help display this help");
202209
203210 if (show_version || show_help)
@@ -233,6 +240,7 @@
233240 num_files>1?strlen(dictdir)+1:-1);
234241 globfree(&globres);
235242 }
243+ free(dictglob);
236244 }
237245
238246 /* loop on explicit filenames */
Show on old repository browser