Version reporting & module ABI change.
[exim.git] / src / src / lookups / cdb.c
CommitLineData
f1e894f3 1/* $Cambridge: exim/src/src/lookups/cdb.c,v 1.3 2005/06/27 14:29:44 ph10 Exp $ */
0756eb3c
PH
2
3/*************************************************
4* Exim - an Internet mail transport agent *
5*************************************************/
6
7/*
8 * Exim - CDB database lookup module
9 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
10 *
11 * Copyright (c) 1998 Nigel Metheringham, Planet Online Ltd
12 *
13 * This program is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU General Public License
15 * as published by the Free Software Foundation; either version 2
16 * of the License, or (at your option) any later version.
17 *
18 * --------------------------------------------------------------
19 * Modified by PH for Exim 4:
20 * Changed over to using unsigned chars
21 * Makes use of lf_check_file() for file checking
22 * --------------------------------------------------------------
23 *
24 * This program is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27 * GNU General Public License for more details.
28 *
29 * You should have received a copy of the GNU General Public License
30 * along with this program; if not, write to the Free Software
31 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
32 * 02111-1307, USA.
33 *
34 *
35 * This code implements Dan Bernstein's Constant DataBase (cdb) spec.
36 * Information, the spec and sample code for cdb can be obtained from
37 * http://www.pobox.com/~djb/cdb.html
38 *
39 * This implementation borrows some code from Dan Bernstein's
40 * implementation (which has no license restrictions applied to it).
41 * This (read-only) implementation is completely contained within
42 * cdb.[ch] it does *not* link against an external cdb library.
43 *
44 *
45 * There are 2 varients included within this code. One uses MMAP and
46 * should give better performance especially for multiple lookups on a
47 * modern machine. The other is the default implementation which is
48 * used in the case where the MMAP fails or if MMAP was not compiled
49 * in. this implementation is the same as the original reference cdb
50 * implementation. The MMAP version is compiled in if the HAVE_MMAP
51 * preprocessor define is defined - this should be set in the system
52 * specific os.h file.
53 *
54 */
55
56
57#include "../exim.h"
58#include "lf_functions.h"
0756eb3c
PH
59
60#ifdef HAVE_MMAP
61# include <sys/mman.h>
62/* Not all implementations declare MAP_FAILED */
63# ifndef MAP_FAILED
64# define MAP_FAILED ((void *) -1)
65# endif /* MAP_FAILED */
66#endif /* HAVE_MMAP */
67
68
69#define CDB_HASH_SPLIT 256 /* num pieces the hash table is split into */
70#define CDB_HASH_MASK 255 /* mask to and off split value */
71#define CDB_HASH_ENTRY 8 /* how big each offset it */
72#define CDB_HASH_TABLE (CDB_HASH_SPLIT * CDB_HASH_ENTRY)
73
74/* State information for cdb databases that are open NB while the db
75 * is open its contents will not change (cdb dbs are normally updated
76 * atomically by renaming). However the lifetime of one of these
77 * state structures should be limited - ie a long running daemon
78 * that opens one may hit problems....
79 */
80
81struct cdb_state {
82 int fileno;
83 off_t filelen;
84 uschar *cdb_map;
85 uschar *cdb_offsets;
86};
87
88/* 32 bit unsigned type - this is an int on all modern machines */
89typedef unsigned int uint32;
90
91/*
92 * cdb_hash()
93 * Internal function to make hash value */
94
95static uint32
96cdb_hash(uschar *buf, unsigned int len)
97{
98 uint32 h;
99
100 h = 5381;
101 while (len) {
102 --len;
103 h += (h << 5);
104 h ^= (uint32) *buf++;
105 }
106 return h;
107}
108
109/*
110 * cdb_bread()
111 * Internal function to read len bytes from disk, coping with oddities */
112
113static int
114cdb_bread(int fd,
115 uschar *buf,
116 int len)
117{
118 int r;
119 while (len > 0) {
120 do
121 r = Uread(fd,buf,len);
122 while ((r == -1) && (errno == EINTR));
123 if (r == -1) return -1;
124 if (r == 0) { errno = EIO; return -1; }
125 buf += r;
126 len -= r;
127 }
128 return 0;
129}
130
131/*
132 * cdb_bread()
133 * Internal function to parse 4 byte number (endian independant) */
134
135static uint32
136cdb_unpack(uschar *buf)
137{
138 uint32 num;
139 num = buf[3]; num <<= 8;
140 num += buf[2]; num <<= 8;
141 num += buf[1]; num <<= 8;
142 num += buf[0];
143 return num;
144}
145
e6d225ae
DW
146static void cdb_close(void *handle);
147
148static void *
0756eb3c
PH
149cdb_open(uschar *filename,
150 uschar **errmsg)
151{
152 int fileno;
153 struct cdb_state *cdbp;
154 struct stat statbuf;
155 void * mapbuf;
156
157 fileno = Uopen(filename, O_RDONLY, 0);
158 if (fileno == -1) {
159 int save_errno = errno;
160 *errmsg = string_open_failed(errno, "%s for cdb lookup", filename);
161 errno = save_errno;
162 return NULL;
163 }
164
165 if (fstat(fileno, &statbuf) == 0) {
166 /* If this is a valid file, then it *must* be at least
167 * CDB_HASH_TABLE bytes long */
168 if (statbuf.st_size < CDB_HASH_TABLE) {
169 int save_errno = errno;
170 *errmsg = string_open_failed(errno,
0d7eb84a 171 "%s too short for cdb lookup",
0756eb3c
PH
172 filename);
173 errno = save_errno;
174 return NULL;
175 }
176 } else {
177 int save_errno = errno;
178 *errmsg = string_open_failed(errno,
179 "fstat(%s) failed - cannot do cdb lookup",
180 filename);
181 errno = save_errno;
182 return NULL;
183 }
184
185 /* Having got a file open we need the structure to put things in */
186 cdbp = store_get(sizeof(struct cdb_state));
187 /* store_get() does not return if memory was not available... */
188 /* preload the structure.... */
189 cdbp->fileno = fileno;
190 cdbp->filelen = statbuf.st_size;
191 cdbp->cdb_map = NULL;
192 cdbp->cdb_offsets = NULL;
193
194 /* if we are allowed to we use mmap here.... */
195#ifdef HAVE_MMAP
196 mapbuf = mmap(NULL,
197 statbuf.st_size,
198 PROT_READ,
199 MAP_SHARED,
200 fileno,
201 0);
202 if (mapbuf != MAP_FAILED) {
203 /* We have an mmap-ed section. Now we can just use it */
204 cdbp->cdb_map = mapbuf;
205 /* The offsets can be set to the same value since they should
206 * effectively be cached as well
207 */
208 cdbp->cdb_offsets = mapbuf;
209
210 /* Now return the state struct */
211 return(cdbp);
212 } else {
213 /* If we got here the map failed. Basically we can ignore
214 * this since we fall back to slower methods....
215 * However lets debug log it...
216 */
217 DEBUG(D_lookup) debug_printf("cdb mmap failed - %d\n", errno);
218 }
219#endif /* HAVE_MMAP */
220
221 /* In this case we have either not got MMAP allowed, or it failed */
222
223 /* get a buffer to stash the basic offsets in - this should speed
224 * things up a lot - especially on multiple lookups */
225 cdbp->cdb_offsets = store_get(CDB_HASH_TABLE);
226
227 /* now fill the buffer up... */
228 if (cdb_bread(fileno, cdbp->cdb_offsets, CDB_HASH_TABLE) == -1) {
229 /* read of hash table failed, oh dear, oh.....
230 * time to give up I think....
231 * call the close routine (deallocs the memory), and return NULL */
232 *errmsg = string_open_failed(errno,
233 "cannot read header from %s for cdb lookup",
234 filename);
235 cdb_close(cdbp);
236 return NULL;
237 }
238
239 /* Everything else done - return the cache structure */
240 return cdbp;
241}
242
243
244
245/*************************************************
246* Check entry point *
247*************************************************/
248
e6d225ae 249static BOOL
0756eb3c
PH
250cdb_check(void *handle,
251 uschar *filename,
252 int modemask,
253 uid_t *owners,
254 gid_t *owngroups,
255 uschar **errmsg)
256{
257 struct cdb_state * cdbp = handle;
258 return lf_check_file(cdbp->fileno,
259 filename,
260 S_IFREG,
261 modemask,
262 owners,
263 owngroups,
264 "cdb",
265 errmsg) == 0;
266}
267
268
269
270/*************************************************
271* Find entry point *
272*************************************************/
273
e6d225ae 274static int
0756eb3c
PH
275cdb_find(void *handle,
276 uschar *filename,
277 uschar *keystring,
278 int key_len,
279 uschar **result,
280 uschar **errmsg,
281 BOOL *do_cache)
282{
283 struct cdb_state * cdbp = handle;
284 uint32 item_key_len,
285 item_dat_len,
286 key_hash,
287 item_hash,
288 item_posn,
289 cur_offset,
290 end_offset,
291 hash_offset_entry,
292 hash_offset,
293 hash_offlen,
294 hash_slotnm;
295 int loop;
296
297 /* Keep picky compilers happy */
298 do_cache = do_cache;
299
300 key_hash = cdb_hash((uschar *)keystring, key_len);
301
302 hash_offset_entry = CDB_HASH_ENTRY * (key_hash & CDB_HASH_MASK);
303 hash_offset = cdb_unpack(cdbp->cdb_offsets + hash_offset_entry);
304 hash_offlen = cdb_unpack(cdbp->cdb_offsets + hash_offset_entry + 4);
305
306 /* If the offset length is zero this key cannot be in the file */
307 if (hash_offlen == 0) {
308 return FAIL;
309 }
310 hash_slotnm = (key_hash >> 8) % hash_offlen;
311
312 /* check to ensure that the file is not corrupt
313 * if the hash_offset + (hash_offlen * CDB_HASH_ENTRY) is longer
314 * than the file, then we have problems.... */
315 if ((hash_offset + (hash_offlen * CDB_HASH_ENTRY)) > cdbp->filelen) {
316 *errmsg = string_sprintf("cdb: corrupt cdb file %s (too short)",
317 filename);
318 DEBUG(D_lookup) debug_printf("%s\n", *errmsg);
319 return DEFER;
320 }
321
322 cur_offset = hash_offset + (hash_slotnm * CDB_HASH_ENTRY);
323 end_offset = hash_offset + (hash_offlen * CDB_HASH_ENTRY);
324 /* if we are allowed to we use mmap here.... */
325#ifdef HAVE_MMAP
326 /* make sure the mmap was OK */
327 if (cdbp->cdb_map != NULL) {
328 uschar * cur_pos = cur_offset + cdbp->cdb_map;
329 uschar * end_pos = end_offset + cdbp->cdb_map;
330 for (loop = 0; (loop < hash_offlen); ++loop) {
331 item_hash = cdb_unpack(cur_pos);
332 cur_pos += 4;
333 item_posn = cdb_unpack(cur_pos);
334 cur_pos += 4;
335 /* if the position is zero then we have a definite miss */
336 if (item_posn == 0)
337 return FAIL;
338
339 if (item_hash == key_hash) {
340 /* matching hash value */
341 uschar * item_ptr = cdbp->cdb_map + item_posn;
342 item_key_len = cdb_unpack(item_ptr);
343 item_ptr += 4;
344 item_dat_len = cdb_unpack(item_ptr);
345 item_ptr += 4;
346 /* check key length matches */
347 if (item_key_len == key_len) {
348 /* finally check if key matches */
349 if (Ustrncmp(keystring, item_ptr, key_len) == 0) {
350 /* we have a match....
351 * make item_ptr point to data */
352 item_ptr += item_key_len;
353 /* ... and the returned result */
354 *result = store_get(item_dat_len + 1);
355 memcpy(*result, item_ptr, item_dat_len);
356 (*result)[item_dat_len] = 0;
357 return OK;
358 }
359 }
360 }
361 /* handle warp round of table */
362 if (cur_pos == end_pos)
363 cur_pos = cdbp->cdb_map + hash_offset;
364 }
365 /* looks like we failed... */
366 return FAIL;
367 }
368#endif /* HAVE_MMAP */
369 for (loop = 0; (loop < hash_offlen); ++loop) {
370 uschar packbuf[8];
371 if (lseek(cdbp->fileno, (off_t) cur_offset,SEEK_SET) == -1) return DEFER;
372 if (cdb_bread(cdbp->fileno, packbuf,8) == -1) return DEFER;
373 item_hash = cdb_unpack(packbuf);
374 item_posn = cdb_unpack(packbuf + 4);
375 /* if the position is zero then we have a definite miss */
376 if (item_posn == 0)
377 return FAIL;
378
379 if (item_hash == key_hash) {
380 /* matching hash value */
381 if (lseek(cdbp->fileno, (off_t) item_posn, SEEK_SET) == -1) return DEFER;
382 if (cdb_bread(cdbp->fileno, packbuf, 8) == -1) return DEFER;
383 item_key_len = cdb_unpack(packbuf);
384 /* check key length matches */
385 if (item_key_len == key_len) {
386 /* finally check if key matches */
387 uschar * item_key = store_get(key_len);
388 if (cdb_bread(cdbp->fileno, item_key, key_len) == -1) return DEFER;
389 if (Ustrncmp(keystring, item_key, key_len) == 0) {
390 /* Reclaim some store */
391 store_reset(item_key);
392 /* matches - get data length */
393 item_dat_len = cdb_unpack(packbuf + 4);
394 /* then we build a new result string */
395 *result = store_get(item_dat_len + 1);
396 if (cdb_bread(cdbp->fileno, *result, item_dat_len) == -1)
397 return DEFER;
398 (*result)[item_dat_len] = 0;
399 return OK;
400 }
401 /* Reclaim some store */
402 store_reset(item_key);
403 }
404 }
405 cur_offset += 8;
406
407 /* handle warp round of table */
408 if (cur_offset == end_offset)
409 cur_offset = hash_offset;
410 }
411 return FAIL;
412}
413
414
415
416/*************************************************
417* Close entry point *
418*************************************************/
419
420/* See local README for interface description */
421
e6d225ae 422static void
0756eb3c
PH
423cdb_close(void *handle)
424{
425struct cdb_state * cdbp = handle;
426
427#ifdef HAVE_MMAP
428 if (cdbp->cdb_map) {
429 munmap(CS cdbp->cdb_map, cdbp->filelen);
430 if (cdbp->cdb_map == cdbp->cdb_offsets)
431 cdbp->cdb_offsets = NULL;
432 }
433#endif /* HAVE_MMAP */
434
f1e894f3 435 (void)close(cdbp->fileno);
0756eb3c
PH
436}
437
6545de78
PP
438
439
440/*************************************************
441* Version reporting entry point *
442*************************************************/
443
444/* See local README for interface description. */
445
446#include "../version.h"
447
448void
449cdb_version_report(FILE *f)
450{
451#ifdef DYNLOOKUP
452fprintf(f, "Library version: CDB: Exim version %s\n", EXIM_VERSION_STR);
453#endif
454}
455
456
e6d225ae
DW
457lookup_info cdb_lookup_info = {
458 US"cdb", /* lookup name */
459 lookup_absfile, /* uses absolute file name */
460 cdb_open, /* open function */
461 cdb_check, /* check function */
462 cdb_find, /* find function */
463 cdb_close, /* close function */
464 NULL, /* no tidy function */
6545de78
PP
465 NULL, /* no quoting function */
466 cdb_version_report /* version reporting */
e6d225ae
DW
467};
468
469#ifdef DYNLOOKUP
470#define cdb_lookup_module_info _lookup_module_info
471#endif
472
473static lookup_info *_lookup_list[] = { &cdb_lookup_info };
474lookup_module_info cdb_lookup_module_info = { LOOKUP_MODULE_INFO_MAGIC, _lookup_list, 1 };
475
0756eb3c 476/* End of lookups/cdb.c */