| 1 |
# |
| 2 |
# This is a Namazu configuration file for mknmz. |
| 3 |
# |
| 4 |
package conf; # Don't remove this line! |
| 5 |
|
| 6 |
#=================================================================== |
| 7 |
# |
| 8 |
# Administrator's email address |
| 9 |
# |
| 10 |
$ADDRESS = '%AUTHOR%'; |
| 11 |
|
| 12 |
|
| 13 |
#=================================================================== |
| 14 |
# |
| 15 |
# Regular Expression Patterns |
| 16 |
# |
| 17 |
|
| 18 |
# |
| 19 |
# This pattern specifies HTML suffixes. |
| 20 |
# |
| 21 |
# $HTML_SUFFIX = "html?|[ps]html|html\\.[a-z]{2}"; |
| 22 |
|
| 23 |
# |
| 24 |
# This pattern specifies file names which will be targeted. |
| 25 |
# NOTE: It can be specified by --allow=regex option. |
| 26 |
# Do NOT use `$' or `^' anchors. |
| 27 |
# Case-insensitive. |
| 28 |
# |
| 29 |
# $ALLOW_FILE = ".*\\.(?:$HTML_SUFFIX)|.*\\.txt" . # HTML, plain text |
| 30 |
# "|.*\\.gz|.*\\.Z|.*\\.bz2" . # Compressed files |
| 31 |
# "|.*\\.pdf|.*\\.ps" . # PDF, PostScript |
| 32 |
# "|.*\\.tex|.*\\.dvi" . # TeX, DVI |
| 33 |
# "|.*\\.rpm|.*\\.deb" . # RPM, DEB |
| 34 |
# "|.*\\.doc|.*\\.xls|.*\\.ppt" . # Word, Excel, PowerPoint |
| 35 |
# "|.*\\.j[sabf]w|.*\\.jtd" . # Ichitaro 4, 5, 6, 7, 8 |
| 36 |
# "|\\d+|[-\\w]+\\.[1-9n]"; # Mail/News, man |
| 37 |
$ALLOW_FILE = '.*\.hnf'; |
| 38 |
|
| 39 |
# |
| 40 |
# This pattern specifies file names which will NOT be targeted. |
| 41 |
# NOTE: It can be specified by --deny=regex option. |
| 42 |
# Do NOT use `$' or `^' anchors. |
| 43 |
# Case-insensitive. |
| 44 |
# |
| 45 |
# $DENY_FILE = ".*\\.(gif|png|jpg|jpeg)|.*\\.tar\\.gz|core|.*\\.bak|.*~|\\..*|\x23.*"; |
| 46 |
|
| 47 |
# |
| 48 |
# This pattern specifies PATHNAMEs which will NOT be targeted. |
| 49 |
# NOTE: Usually specified by --exclude=regex option. |
| 50 |
# |
| 51 |
# $EXCLUDE_PATH = undef; |
| 52 |
|
| 53 |
# |
| 54 |
# This pattern specifies file names which can be omitted |
| 55 |
# in URI. e.g., 'index.html|index.htm|Default.html' |
| 56 |
# |
| 57 |
# NOTE: This is similar to Apache's "DirectoryIndex" directive. |
| 58 |
# |
| 59 |
# $DIRECTORY_INDEX = ""; |
| 60 |
|
| 61 |
# |
| 62 |
# This pattern specifies Mail/News's fields in its header which |
| 63 |
# should be searchable. NOTE: case-insensitive |
| 64 |
# |
| 65 |
# $REMAIN_HEADER = "From|Date|Message-ID"; |
| 66 |
|
| 67 |
# |
| 68 |
# This pattern specifies fields which used for field-specified |
| 69 |
# searching. NOTE: case-insensitive |
| 70 |
# |
| 71 |
# $SEARCH_FIELD = "message-id|subject|from|date|uri|newsgroups|to|summary|size"; |
| 72 |
|
| 73 |
# |
| 74 |
# This pattern specifies meta tags which used for field-specified |
| 75 |
# searching. NOTE: case-insensitive |
| 76 |
# |
| 77 |
# $META_TAGS = "keywords|description"; |
| 78 |
|
| 79 |
# |
| 80 |
# This pattern specifies aliases for NMZ.field.* files. |
| 81 |
# NOTE: Editing NOT recommended. |
| 82 |
# |
| 83 |
# %FIELD_ALIASES = ('title' => 'subject', 'author' => 'from'); |
| 84 |
|
| 85 |
# |
| 86 |
# This pattern specifies HTML elements which should be replaced with |
| 87 |
# null string when removing them. Normally, the elements are replaced |
| 88 |
# with a single space character. |
| 89 |
# |
| 90 |
# $NON_SEPARATION_ELEMENTS = 'A|TT|CODE|SAMP|KBD|VAR|B|STRONG|I|EM|CITE|FONT|U|'. |
| 91 |
# 'STRIKE|BIG|SMALL|DFN|ABBR|ACRONYM|Q|SUB|SUP|SPAN|BDO'; |
| 92 |
|
| 93 |
#=================================================================== |
| 94 |
# |
| 95 |
# Critical Numbers |
| 96 |
# |
| 97 |
|
| 98 |
# |
| 99 |
# The max size of files which can be loaded in memory at once. |
| 100 |
# If you have much memory, you can increase the value. |
| 101 |
# If you have less memory, you can decrease the value. |
| 102 |
# |
| 103 |
$ON_MEMORY_MAX = 1000000; |
| 104 |
|
| 105 |
# |
| 106 |
# The max file size for indexing. Files larger than this |
| 107 |
# will be ignored. |
| 108 |
# NOTE: This value is usually larger than TEXT_SIZE_MAX because |
| 109 |
# binary-formated files such as PDF, Word are larger. |
| 110 |
# |
| 111 |
# $FILE_SIZE_MAX = 2000000; |
| 112 |
|
| 113 |
# |
| 114 |
# The max text size for indexing. Files larger than this |
| 115 |
# will be ignored. |
| 116 |
# |
| 117 |
# $TEXT_SIZE_MAX = 600000; |
| 118 |
|
| 119 |
# |
| 120 |
# The max length of a word. the word longer than this will be ignored. |
| 121 |
# |
| 122 |
# $WORD_LENG_MAX = 128; |
| 123 |
|
| 124 |
|
| 125 |
# |
| 126 |
# Weights for HTML elements which are used for term weightning. |
| 127 |
# |
| 128 |
# %Weight = |
| 129 |
# ( |
| 130 |
# 'html' => { |
| 131 |
# 'title' => 16, |
| 132 |
# 'h1' => 8, |
| 133 |
# 'h2' => 7, |
| 134 |
# 'h3' => 6, |
| 135 |
# 'h4' => 5, |
| 136 |
# 'h5' => 4, |
| 137 |
# 'h6' => 3, |
| 138 |
# 'a' => 4, |
| 139 |
# 'strong' => 2, |
| 140 |
# 'em' => 2, |
| 141 |
# 'kbd' => 2, |
| 142 |
# 'samp' => 2, |
| 143 |
# 'var' => 2, |
| 144 |
# 'code' => 2, |
| 145 |
# 'cite' => 2, |
| 146 |
# 'abbr' => 2, |
| 147 |
# 'acronym'=> 2, |
| 148 |
# 'dfn' => 2, |
| 149 |
# }, |
| 150 |
# 'metakey' => 32, # for <meta name="keywords" content="foo bar"> |
| 151 |
# 'headers' => 8, # for Mail/News' headers |
| 152 |
# ); |
| 153 |
|
| 154 |
# |
| 155 |
# The max length of a HTML-tagged string which can be processed for |
| 156 |
# term weighting. |
| 157 |
# NOTE: There are not a few people has a bad manner using |
| 158 |
# <h[1-6]> for changing a font size. |
| 159 |
# |
| 160 |
# $INVALID_LENG = 128; |
| 161 |
|
| 162 |
# |
| 163 |
# The max length of a field. |
| 164 |
# This MUST be smaller than libnamazu.h's BUFSIZE (usually 1024). |
| 165 |
# |
| 166 |
# $MAX_FIELD_LENGTH = 200; |
| 167 |
|
| 168 |
|
| 169 |
#=================================================================== |
| 170 |
# |
| 171 |
# Softwares for handling a Japanese text |
| 172 |
# |
| 173 |
|
| 174 |
# |
| 175 |
# Network Kanji Filter nkf v1.62 or later |
| 176 |
# |
| 177 |
# $NKF = "module_nkf"; |
| 178 |
|
| 179 |
# |
| 180 |
# KAKASI |
| 181 |
# |
| 182 |
# $KAKASI = "module_kakasi -ieuc -oeuc -w"; |
| 183 |
|
| 184 |
# |
| 185 |
# ChaSen 1.51 or later (simple wakatigaki) |
| 186 |
# |
| 187 |
# $CHASEN = "no"; |
| 188 |
|
| 189 |
# |
| 190 |
# ChaSen 1.51 or later (with noun words extraction) |
| 191 |
# |
| 192 |
# $CHASEN_NOUN = "no"; |
| 193 |
|
| 194 |
# |
| 195 |
# Default Japanese processer: KAKASI or ChaSen. |
| 196 |
# |
| 197 |
# $WAKATI = $KAKASI; |
| 198 |
|
| 199 |
|
| 200 |
#=================================================================== |
| 201 |
# |
| 202 |
# Directories |
| 203 |
# |
| 204 |
# $LIBDIR = "@PERLLIBDIR@"; |
| 205 |
# $FILTERDIR = "@FILTERDIR@"; |
| 206 |
# $TEMPLATEDIR = "@TEMPLATEDIR@"; |
| 207 |
|
| 208 |
|
| 209 |
#--------------------------------------------------------------- |
| 210 |
# |
| 211 |
# HyperNikkiSystem (hns) |
| 212 |
# |
| 213 |
|
| 214 |
$hnf::hns_version = %VERSION%; # hns major version, 2 or 1 |
| 215 |
$hnf::diary_uri = "%URI%"; |
| 216 |
$hnf::author = '%AUTHOR%'; |
| 217 |
$hnf::alias_file = "%ALIAS_FILE%"; |
| 218 |
$hnf::grp_hide = 1; # 1 -> don't show GRP section |
| 219 |
# 0 -> show GRP section |
| 220 |
# you can customize link to your diary |
| 221 |
#$hnf::link_templ = '?%year%month%abc#%year%month%day0'; # hns-2.x |
| 222 |
#$hnf::link_templ = '?%year%month%hiday#%year%month%day0'; # hns-1.x |
| 223 |
#$hnf::link_templ = '%year%month.html#%year%month%day0'; # static |
| 224 |
#$hnf::link_templ = '?%year%month%day#%year%month%day0'; # one day |
| 225 |
|
| 226 |
# 1; |
| 227 |
|