Commit | Line | Data |
---|---|---|
0756eb3c PH |
1 | /* $Cambridge: exim/src/src/lookups/cdb.c,v 1.1 2004/10/07 13:10:01 ph10 Exp $ */ |
2 | ||
3 | /************************************************* | |
4 | * Exim - an Internet mail transport agent * | |
5 | *************************************************/ | |
6 | ||
7 | /* | |
8 | * Exim - CDB database lookup module | |
9 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
10 | * | |
11 | * Copyright (c) 1998 Nigel Metheringham, Planet Online Ltd | |
12 | * | |
13 | * This program is free software; you can redistribute it and/or | |
14 | * modify it under the terms of the GNU General Public License | |
15 | * as published by the Free Software Foundation; either version 2 | |
16 | * of the License, or (at your option) any later version. | |
17 | * | |
18 | * -------------------------------------------------------------- | |
19 | * Modified by PH for Exim 4: | |
20 | * Changed over to using unsigned chars | |
21 | * Makes use of lf_check_file() for file checking | |
22 | * -------------------------------------------------------------- | |
23 | * | |
24 | * This program is distributed in the hope that it will be useful, | |
25 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
26 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
27 | * GNU General Public License for more details. | |
28 | * | |
29 | * You should have received a copy of the GNU General Public License | |
30 | * along with this program; if not, write to the Free Software | |
31 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | |
32 | * 02111-1307, USA. | |
33 | * | |
34 | * | |
35 | * This code implements Dan Bernstein's Constant DataBase (cdb) spec. | |
36 | * Information, the spec and sample code for cdb can be obtained from | |
37 | * http://www.pobox.com/~djb/cdb.html | |
38 | * | |
39 | * This implementation borrows some code from Dan Bernstein's | |
40 | * implementation (which has no license restrictions applied to it). | |
41 | * This (read-only) implementation is completely contained within | |
42 | * cdb.[ch] it does *not* link against an external cdb library. | |
43 | * | |
44 | * | |
45 | * There are 2 varients included within this code. One uses MMAP and | |
46 | * should give better performance especially for multiple lookups on a | |
47 | * modern machine. The other is the default implementation which is | |
48 | * used in the case where the MMAP fails or if MMAP was not compiled | |
49 | * in. this implementation is the same as the original reference cdb | |
50 | * implementation. The MMAP version is compiled in if the HAVE_MMAP | |
51 | * preprocessor define is defined - this should be set in the system | |
52 | * specific os.h file. | |
53 | * | |
54 | */ | |
55 | ||
56 | ||
57 | #include "../exim.h" | |
58 | #include "lf_functions.h" | |
59 | #include "cdb.h" | |
60 | ||
61 | #ifdef HAVE_MMAP | |
62 | # include <sys/mman.h> | |
63 | /* Not all implementations declare MAP_FAILED */ | |
64 | # ifndef MAP_FAILED | |
65 | # define MAP_FAILED ((void *) -1) | |
66 | # endif /* MAP_FAILED */ | |
67 | #endif /* HAVE_MMAP */ | |
68 | ||
69 | ||
70 | #define CDB_HASH_SPLIT 256 /* num pieces the hash table is split into */ | |
71 | #define CDB_HASH_MASK 255 /* mask to and off split value */ | |
72 | #define CDB_HASH_ENTRY 8 /* how big each offset it */ | |
73 | #define CDB_HASH_TABLE (CDB_HASH_SPLIT * CDB_HASH_ENTRY) | |
74 | ||
75 | /* State information for cdb databases that are open NB while the db | |
76 | * is open its contents will not change (cdb dbs are normally updated | |
77 | * atomically by renaming). However the lifetime of one of these | |
78 | * state structures should be limited - ie a long running daemon | |
79 | * that opens one may hit problems.... | |
80 | */ | |
81 | ||
82 | struct cdb_state { | |
83 | int fileno; | |
84 | off_t filelen; | |
85 | uschar *cdb_map; | |
86 | uschar *cdb_offsets; | |
87 | }; | |
88 | ||
89 | /* 32 bit unsigned type - this is an int on all modern machines */ | |
90 | typedef unsigned int uint32; | |
91 | ||
92 | /* | |
93 | * cdb_hash() | |
94 | * Internal function to make hash value */ | |
95 | ||
96 | static uint32 | |
97 | cdb_hash(uschar *buf, unsigned int len) | |
98 | { | |
99 | uint32 h; | |
100 | ||
101 | h = 5381; | |
102 | while (len) { | |
103 | --len; | |
104 | h += (h << 5); | |
105 | h ^= (uint32) *buf++; | |
106 | } | |
107 | return h; | |
108 | } | |
109 | ||
110 | /* | |
111 | * cdb_bread() | |
112 | * Internal function to read len bytes from disk, coping with oddities */ | |
113 | ||
114 | static int | |
115 | cdb_bread(int fd, | |
116 | uschar *buf, | |
117 | int len) | |
118 | { | |
119 | int r; | |
120 | while (len > 0) { | |
121 | do | |
122 | r = Uread(fd,buf,len); | |
123 | while ((r == -1) && (errno == EINTR)); | |
124 | if (r == -1) return -1; | |
125 | if (r == 0) { errno = EIO; return -1; } | |
126 | buf += r; | |
127 | len -= r; | |
128 | } | |
129 | return 0; | |
130 | } | |
131 | ||
132 | /* | |
133 | * cdb_bread() | |
134 | * Internal function to parse 4 byte number (endian independant) */ | |
135 | ||
136 | static uint32 | |
137 | cdb_unpack(uschar *buf) | |
138 | { | |
139 | uint32 num; | |
140 | num = buf[3]; num <<= 8; | |
141 | num += buf[2]; num <<= 8; | |
142 | num += buf[1]; num <<= 8; | |
143 | num += buf[0]; | |
144 | return num; | |
145 | } | |
146 | ||
147 | void * | |
148 | cdb_open(uschar *filename, | |
149 | uschar **errmsg) | |
150 | { | |
151 | int fileno; | |
152 | struct cdb_state *cdbp; | |
153 | struct stat statbuf; | |
154 | void * mapbuf; | |
155 | ||
156 | fileno = Uopen(filename, O_RDONLY, 0); | |
157 | if (fileno == -1) { | |
158 | int save_errno = errno; | |
159 | *errmsg = string_open_failed(errno, "%s for cdb lookup", filename); | |
160 | errno = save_errno; | |
161 | return NULL; | |
162 | } | |
163 | ||
164 | if (fstat(fileno, &statbuf) == 0) { | |
165 | /* If this is a valid file, then it *must* be at least | |
166 | * CDB_HASH_TABLE bytes long */ | |
167 | if (statbuf.st_size < CDB_HASH_TABLE) { | |
168 | int save_errno = errno; | |
169 | *errmsg = string_open_failed(errno, | |
170 | "%s to short for cdb lookup", | |
171 | filename); | |
172 | errno = save_errno; | |
173 | return NULL; | |
174 | } | |
175 | } else { | |
176 | int save_errno = errno; | |
177 | *errmsg = string_open_failed(errno, | |
178 | "fstat(%s) failed - cannot do cdb lookup", | |
179 | filename); | |
180 | errno = save_errno; | |
181 | return NULL; | |
182 | } | |
183 | ||
184 | /* Having got a file open we need the structure to put things in */ | |
185 | cdbp = store_get(sizeof(struct cdb_state)); | |
186 | /* store_get() does not return if memory was not available... */ | |
187 | /* preload the structure.... */ | |
188 | cdbp->fileno = fileno; | |
189 | cdbp->filelen = statbuf.st_size; | |
190 | cdbp->cdb_map = NULL; | |
191 | cdbp->cdb_offsets = NULL; | |
192 | ||
193 | /* if we are allowed to we use mmap here.... */ | |
194 | #ifdef HAVE_MMAP | |
195 | mapbuf = mmap(NULL, | |
196 | statbuf.st_size, | |
197 | PROT_READ, | |
198 | MAP_SHARED, | |
199 | fileno, | |
200 | 0); | |
201 | if (mapbuf != MAP_FAILED) { | |
202 | /* We have an mmap-ed section. Now we can just use it */ | |
203 | cdbp->cdb_map = mapbuf; | |
204 | /* The offsets can be set to the same value since they should | |
205 | * effectively be cached as well | |
206 | */ | |
207 | cdbp->cdb_offsets = mapbuf; | |
208 | ||
209 | /* Now return the state struct */ | |
210 | return(cdbp); | |
211 | } else { | |
212 | /* If we got here the map failed. Basically we can ignore | |
213 | * this since we fall back to slower methods.... | |
214 | * However lets debug log it... | |
215 | */ | |
216 | DEBUG(D_lookup) debug_printf("cdb mmap failed - %d\n", errno); | |
217 | } | |
218 | #endif /* HAVE_MMAP */ | |
219 | ||
220 | /* In this case we have either not got MMAP allowed, or it failed */ | |
221 | ||
222 | /* get a buffer to stash the basic offsets in - this should speed | |
223 | * things up a lot - especially on multiple lookups */ | |
224 | cdbp->cdb_offsets = store_get(CDB_HASH_TABLE); | |
225 | ||
226 | /* now fill the buffer up... */ | |
227 | if (cdb_bread(fileno, cdbp->cdb_offsets, CDB_HASH_TABLE) == -1) { | |
228 | /* read of hash table failed, oh dear, oh..... | |
229 | * time to give up I think.... | |
230 | * call the close routine (deallocs the memory), and return NULL */ | |
231 | *errmsg = string_open_failed(errno, | |
232 | "cannot read header from %s for cdb lookup", | |
233 | filename); | |
234 | cdb_close(cdbp); | |
235 | return NULL; | |
236 | } | |
237 | ||
238 | /* Everything else done - return the cache structure */ | |
239 | return cdbp; | |
240 | } | |
241 | ||
242 | ||
243 | ||
244 | /************************************************* | |
245 | * Check entry point * | |
246 | *************************************************/ | |
247 | ||
248 | BOOL | |
249 | cdb_check(void *handle, | |
250 | uschar *filename, | |
251 | int modemask, | |
252 | uid_t *owners, | |
253 | gid_t *owngroups, | |
254 | uschar **errmsg) | |
255 | { | |
256 | struct cdb_state * cdbp = handle; | |
257 | return lf_check_file(cdbp->fileno, | |
258 | filename, | |
259 | S_IFREG, | |
260 | modemask, | |
261 | owners, | |
262 | owngroups, | |
263 | "cdb", | |
264 | errmsg) == 0; | |
265 | } | |
266 | ||
267 | ||
268 | ||
269 | /************************************************* | |
270 | * Find entry point * | |
271 | *************************************************/ | |
272 | ||
273 | int | |
274 | cdb_find(void *handle, | |
275 | uschar *filename, | |
276 | uschar *keystring, | |
277 | int key_len, | |
278 | uschar **result, | |
279 | uschar **errmsg, | |
280 | BOOL *do_cache) | |
281 | { | |
282 | struct cdb_state * cdbp = handle; | |
283 | uint32 item_key_len, | |
284 | item_dat_len, | |
285 | key_hash, | |
286 | item_hash, | |
287 | item_posn, | |
288 | cur_offset, | |
289 | end_offset, | |
290 | hash_offset_entry, | |
291 | hash_offset, | |
292 | hash_offlen, | |
293 | hash_slotnm; | |
294 | int loop; | |
295 | ||
296 | /* Keep picky compilers happy */ | |
297 | do_cache = do_cache; | |
298 | ||
299 | key_hash = cdb_hash((uschar *)keystring, key_len); | |
300 | ||
301 | hash_offset_entry = CDB_HASH_ENTRY * (key_hash & CDB_HASH_MASK); | |
302 | hash_offset = cdb_unpack(cdbp->cdb_offsets + hash_offset_entry); | |
303 | hash_offlen = cdb_unpack(cdbp->cdb_offsets + hash_offset_entry + 4); | |
304 | ||
305 | /* If the offset length is zero this key cannot be in the file */ | |
306 | if (hash_offlen == 0) { | |
307 | return FAIL; | |
308 | } | |
309 | hash_slotnm = (key_hash >> 8) % hash_offlen; | |
310 | ||
311 | /* check to ensure that the file is not corrupt | |
312 | * if the hash_offset + (hash_offlen * CDB_HASH_ENTRY) is longer | |
313 | * than the file, then we have problems.... */ | |
314 | if ((hash_offset + (hash_offlen * CDB_HASH_ENTRY)) > cdbp->filelen) { | |
315 | *errmsg = string_sprintf("cdb: corrupt cdb file %s (too short)", | |
316 | filename); | |
317 | DEBUG(D_lookup) debug_printf("%s\n", *errmsg); | |
318 | return DEFER; | |
319 | } | |
320 | ||
321 | cur_offset = hash_offset + (hash_slotnm * CDB_HASH_ENTRY); | |
322 | end_offset = hash_offset + (hash_offlen * CDB_HASH_ENTRY); | |
323 | /* if we are allowed to we use mmap here.... */ | |
324 | #ifdef HAVE_MMAP | |
325 | /* make sure the mmap was OK */ | |
326 | if (cdbp->cdb_map != NULL) { | |
327 | uschar * cur_pos = cur_offset + cdbp->cdb_map; | |
328 | uschar * end_pos = end_offset + cdbp->cdb_map; | |
329 | for (loop = 0; (loop < hash_offlen); ++loop) { | |
330 | item_hash = cdb_unpack(cur_pos); | |
331 | cur_pos += 4; | |
332 | item_posn = cdb_unpack(cur_pos); | |
333 | cur_pos += 4; | |
334 | /* if the position is zero then we have a definite miss */ | |
335 | if (item_posn == 0) | |
336 | return FAIL; | |
337 | ||
338 | if (item_hash == key_hash) { | |
339 | /* matching hash value */ | |
340 | uschar * item_ptr = cdbp->cdb_map + item_posn; | |
341 | item_key_len = cdb_unpack(item_ptr); | |
342 | item_ptr += 4; | |
343 | item_dat_len = cdb_unpack(item_ptr); | |
344 | item_ptr += 4; | |
345 | /* check key length matches */ | |
346 | if (item_key_len == key_len) { | |
347 | /* finally check if key matches */ | |
348 | if (Ustrncmp(keystring, item_ptr, key_len) == 0) { | |
349 | /* we have a match.... | |
350 | * make item_ptr point to data */ | |
351 | item_ptr += item_key_len; | |
352 | /* ... and the returned result */ | |
353 | *result = store_get(item_dat_len + 1); | |
354 | memcpy(*result, item_ptr, item_dat_len); | |
355 | (*result)[item_dat_len] = 0; | |
356 | return OK; | |
357 | } | |
358 | } | |
359 | } | |
360 | /* handle warp round of table */ | |
361 | if (cur_pos == end_pos) | |
362 | cur_pos = cdbp->cdb_map + hash_offset; | |
363 | } | |
364 | /* looks like we failed... */ | |
365 | return FAIL; | |
366 | } | |
367 | #endif /* HAVE_MMAP */ | |
368 | for (loop = 0; (loop < hash_offlen); ++loop) { | |
369 | uschar packbuf[8]; | |
370 | if (lseek(cdbp->fileno, (off_t) cur_offset,SEEK_SET) == -1) return DEFER; | |
371 | if (cdb_bread(cdbp->fileno, packbuf,8) == -1) return DEFER; | |
372 | item_hash = cdb_unpack(packbuf); | |
373 | item_posn = cdb_unpack(packbuf + 4); | |
374 | /* if the position is zero then we have a definite miss */ | |
375 | if (item_posn == 0) | |
376 | return FAIL; | |
377 | ||
378 | if (item_hash == key_hash) { | |
379 | /* matching hash value */ | |
380 | if (lseek(cdbp->fileno, (off_t) item_posn, SEEK_SET) == -1) return DEFER; | |
381 | if (cdb_bread(cdbp->fileno, packbuf, 8) == -1) return DEFER; | |
382 | item_key_len = cdb_unpack(packbuf); | |
383 | /* check key length matches */ | |
384 | if (item_key_len == key_len) { | |
385 | /* finally check if key matches */ | |
386 | uschar * item_key = store_get(key_len); | |
387 | if (cdb_bread(cdbp->fileno, item_key, key_len) == -1) return DEFER; | |
388 | if (Ustrncmp(keystring, item_key, key_len) == 0) { | |
389 | /* Reclaim some store */ | |
390 | store_reset(item_key); | |
391 | /* matches - get data length */ | |
392 | item_dat_len = cdb_unpack(packbuf + 4); | |
393 | /* then we build a new result string */ | |
394 | *result = store_get(item_dat_len + 1); | |
395 | if (cdb_bread(cdbp->fileno, *result, item_dat_len) == -1) | |
396 | return DEFER; | |
397 | (*result)[item_dat_len] = 0; | |
398 | return OK; | |
399 | } | |
400 | /* Reclaim some store */ | |
401 | store_reset(item_key); | |
402 | } | |
403 | } | |
404 | cur_offset += 8; | |
405 | ||
406 | /* handle warp round of table */ | |
407 | if (cur_offset == end_offset) | |
408 | cur_offset = hash_offset; | |
409 | } | |
410 | return FAIL; | |
411 | } | |
412 | ||
413 | ||
414 | ||
415 | /************************************************* | |
416 | * Close entry point * | |
417 | *************************************************/ | |
418 | ||
419 | /* See local README for interface description */ | |
420 | ||
421 | void | |
422 | cdb_close(void *handle) | |
423 | { | |
424 | struct cdb_state * cdbp = handle; | |
425 | ||
426 | #ifdef HAVE_MMAP | |
427 | if (cdbp->cdb_map) { | |
428 | munmap(CS cdbp->cdb_map, cdbp->filelen); | |
429 | if (cdbp->cdb_map == cdbp->cdb_offsets) | |
430 | cdbp->cdb_offsets = NULL; | |
431 | } | |
432 | #endif /* HAVE_MMAP */ | |
433 | ||
434 | close(cdbp->fileno); | |
435 | } | |
436 | ||
437 | /* End of lookups/cdb.c */ |