Lookups: per-searchtype options framework
[exim.git] / src / src / lookups / cdb.c
CommitLineData
0756eb3c
PH
1/*************************************************
2* Exim - an Internet mail transport agent *
3*************************************************/
4
5/*
6 * Exim - CDB database lookup module
7 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
8 *
9 * Copyright (c) 1998 Nigel Metheringham, Planet Online Ltd
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public License
13 * as published by the Free Software Foundation; either version 2
14 * of the License, or (at your option) any later version.
15 *
16 * --------------------------------------------------------------
17 * Modified by PH for Exim 4:
18 * Changed over to using unsigned chars
19 * Makes use of lf_check_file() for file checking
20 * --------------------------------------------------------------
3386088d
PP
21 * Modified by The Exim Maintainers 2015:
22 * const propagation
23 * --------------------------------------------------------------
0756eb3c
PH
24 *
25 * This program is distributed in the hope that it will be useful,
26 * but WITHOUT ANY WARRANTY; without even the implied warranty of
27 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 * GNU General Public License for more details.
29 *
30 * You should have received a copy of the GNU General Public License
31 * along with this program; if not, write to the Free Software
32 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
33 * 02111-1307, USA.
34 *
35 *
36 * This code implements Dan Bernstein's Constant DataBase (cdb) spec.
37 * Information, the spec and sample code for cdb can be obtained from
38 * http://www.pobox.com/~djb/cdb.html
39 *
40 * This implementation borrows some code from Dan Bernstein's
41 * implementation (which has no license restrictions applied to it).
42 * This (read-only) implementation is completely contained within
43 * cdb.[ch] it does *not* link against an external cdb library.
44 *
45 *
4c04137d 46 * There are 2 variants included within this code. One uses MMAP and
0756eb3c
PH
47 * should give better performance especially for multiple lookups on a
48 * modern machine. The other is the default implementation which is
49 * used in the case where the MMAP fails or if MMAP was not compiled
50 * in. this implementation is the same as the original reference cdb
51 * implementation. The MMAP version is compiled in if the HAVE_MMAP
52 * preprocessor define is defined - this should be set in the system
53 * specific os.h file.
54 *
55 */
56
57
58#include "../exim.h"
59#include "lf_functions.h"
0756eb3c
PH
60
61#ifdef HAVE_MMAP
62# include <sys/mman.h>
63/* Not all implementations declare MAP_FAILED */
64# ifndef MAP_FAILED
65# define MAP_FAILED ((void *) -1)
66# endif /* MAP_FAILED */
67#endif /* HAVE_MMAP */
68
69
70#define CDB_HASH_SPLIT 256 /* num pieces the hash table is split into */
71#define CDB_HASH_MASK 255 /* mask to and off split value */
72#define CDB_HASH_ENTRY 8 /* how big each offset it */
73#define CDB_HASH_TABLE (CDB_HASH_SPLIT * CDB_HASH_ENTRY)
74
75/* State information for cdb databases that are open NB while the db
76 * is open its contents will not change (cdb dbs are normally updated
77 * atomically by renaming). However the lifetime of one of these
78 * state structures should be limited - ie a long running daemon
79 * that opens one may hit problems....
80 */
81
82struct cdb_state {
83 int fileno;
84 off_t filelen;
85 uschar *cdb_map;
86 uschar *cdb_offsets;
87};
88
89/* 32 bit unsigned type - this is an int on all modern machines */
90typedef unsigned int uint32;
91
92/*
93 * cdb_hash()
94 * Internal function to make hash value */
95
96static uint32
576bd90c 97cdb_hash(const uschar *buf, unsigned int len)
0756eb3c
PH
98{
99 uint32 h;
100
101 h = 5381;
102 while (len) {
103 --len;
104 h += (h << 5);
105 h ^= (uint32) *buf++;
106 }
107 return h;
108}
109
110/*
111 * cdb_bread()
112 * Internal function to read len bytes from disk, coping with oddities */
113
114static int
115cdb_bread(int fd,
116 uschar *buf,
117 int len)
118{
119 int r;
120 while (len > 0) {
121 do
122 r = Uread(fd,buf,len);
123 while ((r == -1) && (errno == EINTR));
124 if (r == -1) return -1;
125 if (r == 0) { errno = EIO; return -1; }
126 buf += r;
127 len -= r;
128 }
129 return 0;
130}
131
132/*
133 * cdb_bread()
4c04137d 134 * Internal function to parse 4 byte number (endian independent) */
0756eb3c
PH
135
136static uint32
137cdb_unpack(uschar *buf)
138{
d447dbd1
JH
139uint32 num;
140num = buf[3]; num <<= 8;
141num += buf[2]; num <<= 8;
142num += buf[1]; num <<= 8;
143num += buf[0];
144return num;
0756eb3c
PH
145}
146
e6d225ae
DW
147static void cdb_close(void *handle);
148
149static void *
d447dbd1 150cdb_open(const uschar * filename, uschar ** errmsg)
0756eb3c 151{
d447dbd1
JH
152int fileno;
153struct cdb_state *cdbp;
154struct stat statbuf;
155void * mapbuf;
156
157if ((fileno = Uopen(filename, O_RDONLY, 0)) < 0)
158 {
159 int save_errno = errno;
160 *errmsg = string_open_failed(errno, "%s for cdb lookup", filename);
161 errno = save_errno;
162 return NULL;
0756eb3c
PH
163 }
164
d447dbd1
JH
165if (fstat(fileno, &statbuf) != 0)
166 {
167 int save_errno = errno;
168 *errmsg = string_open_failed(errno,
169 "fstat(%s) failed - cannot do cdb lookup",
170 filename);
171 errno = save_errno;
172 return NULL;
0756eb3c
PH
173 }
174
d447dbd1
JH
175/* If this is a valid file, then it *must* be at least
176CDB_HASH_TABLE bytes long */
0756eb3c 177
d447dbd1
JH
178if (statbuf.st_size < CDB_HASH_TABLE)
179 {
180 int save_errno = errno;
181 *errmsg = string_open_failed(errno,
182 "%s too short for cdb lookup",
183 filename);
184 errno = save_errno;
185 return NULL;
186 }
187
188/* Having got a file open we need the structure to put things in */
189cdbp = store_get(sizeof(struct cdb_state), FALSE);
190/* store_get() does not return if memory was not available... */
191/* preload the structure.... */
192cdbp->fileno = fileno;
193cdbp->filelen = statbuf.st_size;
194cdbp->cdb_map = NULL;
195cdbp->cdb_offsets = NULL;
196
197/* if we are allowed to we use mmap here.... */
0756eb3c 198#ifdef HAVE_MMAP
d447dbd1
JH
199if ((mapbuf = mmap(NULL, statbuf.st_size, PROT_READ, MAP_SHARED, fileno, 0))
200 != MAP_FAILED)
201 {
202 /* We have an mmap-ed section. Now we can just use it */
203 cdbp->cdb_map = mapbuf;
204 /* The offsets can be set to the same value since they should
205 * effectively be cached as well
206 */
207 cdbp->cdb_offsets = mapbuf;
208
209 /* Now return the state struct */
210 return(cdbp);
211 }
212
213/* If we got here the map failed. Basically we can ignore this since we fall
214back to slower methods.... However lets debug log it... */
215
216DEBUG(D_lookup) debug_printf_indent("cdb mmap failed - %d\n", errno);
0756eb3c
PH
217#endif /* HAVE_MMAP */
218
d447dbd1
JH
219/* In this case we have either not got MMAP allowed, or it failed */
220
221/* get a buffer to stash the basic offsets in - this should speed
222things up a lot - especially on multiple lookups */
223
224cdbp->cdb_offsets = store_get(CDB_HASH_TABLE, FALSE);
225
226/* now fill the buffer up... */
227
228if (cdb_bread(fileno, cdbp->cdb_offsets, CDB_HASH_TABLE) == -1)
229 {
230 /* read of hash table failed, oh dear, oh..... time to give up I think....
231 call the close routine (deallocs the memory), and return NULL */
232
233 *errmsg = string_open_failed(errno,
234 "cannot read header from %s for cdb lookup",
235 filename);
236 cdb_close(cdbp);
237 return NULL;
0756eb3c
PH
238 }
239
d447dbd1
JH
240/* Everything else done - return the cache structure */
241return cdbp;
0756eb3c
PH
242}
243
244
245
246/*************************************************
247* Check entry point *
248*************************************************/
249
e6d225ae 250static BOOL
d447dbd1
JH
251cdb_check(void * handle, const uschar * filename, int modemask,
252 uid_t * owners, gid_t * owngroups, uschar ** errmsg)
0756eb3c 253{
d447dbd1
JH
254struct cdb_state * cdbp = handle;
255return lf_check_file(cdbp->fileno, filename, S_IFREG, modemask,
256 owners, owngroups, "cdb", errmsg) == 0;
0756eb3c
PH
257}
258
259
260
261/*************************************************
262* Find entry point *
263*************************************************/
264
e6d225ae 265static int
d447dbd1 266cdb_find(void * handle, const uschar * filename, const uschar * keystring,
67a57a5a
JH
267 int key_len, uschar ** result, uschar ** errmsg, uint * do_cache,
268 const uschar * opts)
0756eb3c 269{
d88f0784
JH
270struct cdb_state * cdbp = handle;
271uint32 item_key_len,
272item_dat_len,
273key_hash,
274item_hash,
275item_posn,
276cur_offset,
277end_offset,
278hash_offset_entry,
279hash_offset,
280hash_offlen,
281hash_slotnm;
d88f0784
JH
282
283/* Keep picky compilers happy */
284do_cache = do_cache;
285
286key_hash = cdb_hash(keystring, key_len);
287
288hash_offset_entry = CDB_HASH_ENTRY * (key_hash & CDB_HASH_MASK);
289hash_offset = cdb_unpack(cdbp->cdb_offsets + hash_offset_entry);
290hash_offlen = cdb_unpack(cdbp->cdb_offsets + hash_offset_entry + 4);
291
292/* If the offset length is zero this key cannot be in the file */
293
294if (hash_offlen == 0)
295 return FAIL;
296
297hash_slotnm = (key_hash >> 8) % hash_offlen;
298
299/* check to ensure that the file is not corrupt
300 * if the hash_offset + (hash_offlen * CDB_HASH_ENTRY) is longer
301 * than the file, then we have problems.... */
302
303if ((hash_offset + (hash_offlen * CDB_HASH_ENTRY)) > cdbp->filelen)
304 {
305 *errmsg = string_sprintf("cdb: corrupt cdb file %s (too short)",
306 filename);
42c7f0b4 307 DEBUG(D_lookup) debug_printf_indent("%s\n", *errmsg);
d88f0784 308 return DEFER;
0756eb3c
PH
309 }
310
d88f0784
JH
311cur_offset = hash_offset + (hash_slotnm * CDB_HASH_ENTRY);
312end_offset = hash_offset + (hash_offlen * CDB_HASH_ENTRY);
313
314/* if we are allowed to we use mmap here.... */
315
0756eb3c 316#ifdef HAVE_MMAP
d88f0784
JH
317/* make sure the mmap was OK */
318if (cdbp->cdb_map != NULL)
319 {
320 uschar * cur_pos = cur_offset + cdbp->cdb_map;
321 uschar * end_pos = end_offset + cdbp->cdb_map;
322
d7978c0f 323 for (int loop = 0; (loop < hash_offlen); ++loop)
d88f0784
JH
324 {
325 item_hash = cdb_unpack(cur_pos);
326 cur_pos += 4;
327 item_posn = cdb_unpack(cur_pos);
328 cur_pos += 4;
329
0756eb3c 330 /* if the position is zero then we have a definite miss */
d88f0784 331
0756eb3c
PH
332 if (item_posn == 0)
333 return FAIL;
334
d88f0784
JH
335 if (item_hash == key_hash)
336 { /* matching hash value */
337 uschar * item_ptr = cdbp->cdb_map + item_posn;
338
339 item_key_len = cdb_unpack(item_ptr);
340 item_ptr += 4;
341 item_dat_len = cdb_unpack(item_ptr);
342 item_ptr += 4;
343
0756eb3c 344 /* check key length matches */
d88f0784
JH
345
346 if (item_key_len == key_len)
347 {
348 /* finally check if key matches */
349 if (Ustrncmp(keystring, item_ptr, key_len) == 0)
350 {
351 /* we have a match.... * make item_ptr point to data */
352
353 item_ptr += item_key_len;
354
f3ebb786
JH
355 /* ... and the returned result. Assume it is not
356 tainted, lacking any way of telling. */
d88f0784 357
f3ebb786 358 *result = store_get(item_dat_len + 1, FALSE);
d88f0784
JH
359 memcpy(*result, item_ptr, item_dat_len);
360 (*result)[item_dat_len] = 0;
361 return OK;
362 }
363 }
364 }
365 /* handle warp round of table */
366 if (cur_pos == end_pos)
367 cur_pos = cdbp->cdb_map + hash_offset;
368 }
369 /* looks like we failed... */
370 return FAIL;
371 }
372
373#endif /* HAVE_MMAP */
374
d7978c0f 375for (int loop = 0; (loop < hash_offlen); ++loop)
d88f0784
JH
376 {
377 uschar packbuf[8];
378
d315eda1
JH
379 if (lseek(cdbp->fileno, (off_t) cur_offset, SEEK_SET) == -1) return DEFER;
380 if (cdb_bread(cdbp->fileno, packbuf, 8) == -1) return DEFER;
d88f0784
JH
381
382 item_hash = cdb_unpack(packbuf);
383 item_posn = cdb_unpack(packbuf + 4);
384
385 /* if the position is zero then we have a definite miss */
386
387 if (item_posn == 0)
388 return FAIL;
389
390 if (item_hash == key_hash)
391 { /* matching hash value */
392 if (lseek(cdbp->fileno, (off_t) item_posn, SEEK_SET) == -1) return DEFER;
393 if (cdb_bread(cdbp->fileno, packbuf, 8) == -1) return DEFER;
394
395 item_key_len = cdb_unpack(packbuf);
396
397 /* check key length matches */
398
399 if (item_key_len == key_len)
400 { /* finally check if key matches */
f3ebb786
JH
401 rmark reset_point = store_mark();
402 uschar * item_key = store_get(key_len, TRUE); /* keys liable to be tainted */
d88f0784
JH
403
404 if (cdb_bread(cdbp->fileno, item_key, key_len) == -1) return DEFER;
f3ebb786
JH
405 if (Ustrncmp(keystring, item_key, key_len) == 0)
406 {
407 /* Reclaim some store */
408 store_reset(reset_point);
409
410 /* matches - get data length */
411 item_dat_len = cdb_unpack(packbuf + 4);
412
413 /* then we build a new result string. We know we have enough
414 memory so disable Coverity errors about the tainted item_dat_ken */
415
416 *result = store_get(item_dat_len + 1, FALSE);
417 /* coverity[tainted_data] */
418 if (cdb_bread(cdbp->fileno, *result, item_dat_len) == -1)
419 return DEFER;
420
421 /* coverity[tainted_data] */
422 (*result)[item_dat_len] = 0;
423 return OK;
424 }
d88f0784 425 /* Reclaim some store */
f3ebb786 426 store_reset(reset_point);
0756eb3c
PH
427 }
428 }
d88f0784 429 cur_offset += 8;
0756eb3c 430
d88f0784
JH
431 /* handle warp round of table */
432 if (cur_offset == end_offset)
433 cur_offset = hash_offset;
0756eb3c 434 }
d88f0784 435return FAIL;
0756eb3c
PH
436}
437
438
439
440/*************************************************
441* Close entry point *
442*************************************************/
443
444/* See local README for interface description */
445
e6d225ae 446static void
0756eb3c
PH
447cdb_close(void *handle)
448{
449struct cdb_state * cdbp = handle;
450
451#ifdef HAVE_MMAP
d7978c0f
JH
452if (cdbp->cdb_map)
453 {
454 munmap(CS cdbp->cdb_map, cdbp->filelen);
455 if (cdbp->cdb_map == cdbp->cdb_offsets)
0756eb3c 456 cdbp->cdb_offsets = NULL;
d7978c0f 457 }
0756eb3c
PH
458#endif /* HAVE_MMAP */
459
d7978c0f 460(void)close(cdbp->fileno);
0756eb3c
PH
461}
462
6545de78
PP
463
464
465/*************************************************
466* Version reporting entry point *
467*************************************************/
468
469/* See local README for interface description. */
470
471#include "../version.h"
472
473void
474cdb_version_report(FILE *f)
475{
476#ifdef DYNLOOKUP
477fprintf(f, "Library version: CDB: Exim version %s\n", EXIM_VERSION_STR);
478#endif
479}
480
481
e6d225ae
DW
482lookup_info cdb_lookup_info = {
483 US"cdb", /* lookup name */
484 lookup_absfile, /* uses absolute file name */
485 cdb_open, /* open function */
486 cdb_check, /* check function */
487 cdb_find, /* find function */
488 cdb_close, /* close function */
489 NULL, /* no tidy function */
6545de78
PP
490 NULL, /* no quoting function */
491 cdb_version_report /* version reporting */
e6d225ae
DW
492};
493
494#ifdef DYNLOOKUP
495#define cdb_lookup_module_info _lookup_module_info
496#endif
497
498static lookup_info *_lookup_list[] = { &cdb_lookup_info };
499lookup_module_info cdb_lookup_module_info = { LOOKUP_MODULE_INFO_MAGIC, _lookup_list, 1 };
500
0756eb3c 501/* End of lookups/cdb.c */