1 # $Id: mknmzrc.in.dist,v 1.3 2002/03/06 22:47:29 ehood Exp $
2 # This is a Namazu configuration file for mknmz.
4 package conf; # Don't remove this line!
6 #===================================================================
8 # Administrator's email address
10 $ADDRESS = '@@ADMIN_ADDRESS@@';
13 #===================================================================
15 # Regular Expression Patterns
19 # This pattern specifies HTML suffixes.
21 $HTML_SUFFIX = "html|[ps]html";
24 # This pattern specifies file names which will be targeted.
25 # NOTE: It can be specified by --allow=regex option.
26 # Do NOT use `$' or `^' anchors.
29 $ALLOW_FILE = ".*\\.(?:$HTML_SUFFIX)|.*\\.txt" . # HTML, plain text
30 "|.*\\.gz|.*\\.Z|.*\\.bz2" . # Compressed files
31 "|\\d+|[-\\w]+\\.[1-9n]"; # Mail/News, man
34 # This pattern specifies file names which will NOT be targeted.
35 # NOTE: It can be specified by --deny=regex option.
36 # Do NOT use `$' or `^' anchors.
39 $DENY_FILE = "(index|threads)\\.html|.*\\.(gif|png|jpg|jpeg)|.*\\.tar\\.gz|core|.*\\.bak|.*~|\\..*|\x23.*";
42 # This pattern specifies PATHNAMEs which will NOT be targeted.
43 # NOTE: Usually specified by --exclude=regex option.
45 # $EXCLUDE_PATH = undef;
48 # This pattern specifies file names which can be omitted
49 # in URI. e.g., 'index.html|index.htm|Default.html'
51 # NOTE: This is similar to Apache's "DirectoryIndex" directive.
53 # $DIRECTORY_INDEX = "";
56 # This pattern specifies Mail/News's fields in its header which
57 # should be searchable. NOTE: case-insensitive
59 # $REMAIN_HEADER = "From|Date|Message-ID";
62 # This pattern specifies fields which used for field-specified
63 # searching. NOTE: case-insensitive
65 # $SEARCH_FIELD = "message-id|subject|from|date|uri|newsgroups|to|summary|size";
68 # This pattern specifies meta tags which used for field-specified
69 # searching. NOTE: case-insensitive
71 # $META_TAGS = "keywords|description";
74 # This pattern specifies aliases for NMZ.field.* files.
75 # NOTE: Editing NOT recommended.
77 # %FIELD_ALIASES = ('title' => 'subject', 'author' => 'from');
80 # This pattern specifies HTML elements which should be replaced with
81 # null string when removing them. Normally, the elements are replaced
82 # with a single space character.
84 # $NON_SEPARATION_ELEMENTS = 'A|TT|CODE|SAMP|KBD|VAR|B|STRONG|I|EM|CITE|FONT|U|'.
85 # 'STRIKE|BIG|SMALL|DFN|ABBR|ACRONYM|Q|SUB|SUP|SPAN|BDO';
87 #===================================================================
93 # The max size of files which can be loaded in memory at once.
94 # If you have much memory, you can increase the value.
95 # If you have less memory, you can decrease the value.
97 $ON_MEMORY_MAX = 5000000;
100 # The max file size for indexing. Files larger than this
102 # NOTE: This value is usually larger than TEXT_SIZE_MAX because
103 # binary-formated files such as PDF, Word are larger.
105 $FILE_SIZE_MAX = 500000;
108 # The max text size for indexing. Files larger than this
111 $TEXT_SIZE_MAX = 100000;
114 # The max length of a word. the word longer than this will be ignored.
120 # Weights for HTML elements which are used for term weightning.
144 # 'metakey' => 32, # for <meta name="keywords" content="foo bar">
145 # 'headers' => 8, # for Mail/News' headers
149 # The max length of a HTML-tagged string which can be processed for
151 # NOTE: There are not a few people has a bad manner using
152 # <h[1-6]> for changing a font size.
154 # $INVALID_LENG = 128;
157 # The max length of a field.
158 # This MUST be smaller than libnamazu.h's BUFSIZE (usually 1024).
160 # $MAX_FIELD_LENGTH = 200;
163 #===================================================================
165 # Softwares for handling a Japanese text
169 # Network Kanji Filter nkf v1.62 or later
176 # $KAKASI = "no -ieuc -oeuc -w";
179 # ChaSen 1.51 or later (simple wakatigaki)
181 # $CHASEN = "no -j -F '\%m '";
184 # ChaSen 1.51 or later (with noun words extraction)
186 # $CHASEN_NOUN = "no -j -F '\%m %H\\n'";
189 # Default Japanese processer: KAKASI or ChaSen.
194 #===================================================================
198 # $LIBDIR = "@PERLLIBDIR@";
199 # $FILTERDIR = "@FILTERDIR@";
200 # $TEMPLATEDIR = "@TEMPLATEDIR@";