CVS-ing the new test suite.
[exim.git] / src / src / string.c
CommitLineData
1549ea3b 1/* $Cambridge: exim/src/src/string.c,v 1.7 2005/06/20 10:04:55 ph10 Exp $ */
059ec3d9
PH
2
3/*************************************************
4* Exim - an Internet mail transport agent *
5*************************************************/
6
c988f1f4 7/* Copyright (c) University of Cambridge 1995 - 2005 */
059ec3d9
PH
8/* See the file NOTICE for conditions of use and distribution. */
9
10/* Miscellaneous string-handling functions. Some are not required for
11utilities and tests, and are cut out by the COMPILE_UTILITY macro. */
12
13
14#include "exim.h"
15
16
17#ifndef COMPILE_UTILITY
18/*************************************************
19* Test for IP address *
20*************************************************/
21
22/* This used just to be a regular expression, but with IPv6 things are a bit
23more complicated. If the address contains a colon, it is assumed to be a v6
24address (assuming HAVE_IPV6 is set). If a mask is permitted and one is present,
25and maskptr is not NULL, its offset is placed there.
26
27Arguments:
28 s a string
29 maskptr NULL if no mask is permitted to follow
30 otherwise, points to an int where the offset of '/' is placed
31
32Returns: 0 if the string is not a textual representation of an IP address
33 4 if it is an IPv4 address
34 6 if it is an IPv6 address
35*/
36
37int
38string_is_ip_address(uschar *s, int *maskptr)
39{
40int i;
41int yield = 4;
42
43/* If an optional mask is permitted, check for it. If found, pass back the
44offset. */
45
46if (maskptr != NULL)
47 {
48 uschar *ss = s + Ustrlen(s);
49 *maskptr = 0;
50 if (s != ss && isdigit(*(--ss)))
51 {
52 while (ss > s && isdigit(ss[-1])) ss--;
53 if (ss > s && *(--ss) == '/') *maskptr = ss - s;
54 }
55 }
56
57/* A colon anywhere in the string => IPv6 address */
58
59if (Ustrchr(s, ':') != NULL)
60 {
61 BOOL had_double_colon = FALSE;
62 BOOL v4end = FALSE;
63 int count = 0;
64
65 yield = 6;
66
67 /* An IPv6 address must start with hex digit or double colon. A single
68 colon is invalid. */
69
70 if (*s == ':' && *(++s) != ':') return 0;
71
72 /* Now read up to 8 components consisting of up to 4 hex digits each. There
73 may be one and only one appearance of double colon, which implies any number
74 of binary zero bits. The number of preceding components is held in count. */
75
76 for (count = 0; count < 8; count++)
77 {
78 /* If the end of the string is reached before reading 8 components, the
79 address is valid provided a double colon has been read. This also applies
80 if we hit the / that introduces a mask or the % that introduces the
81 interface specifier (scope id) of a link-local address. */
82
83 if (*s == 0 || *s == '%' || *s == '/') return had_double_colon? yield : 0;
84
85 /* If a component starts with an additional colon, we have hit a double
86 colon. This is permitted to appear once only, and counts as at least
87 one component. The final component may be of this form. */
88
89 if (*s == ':')
90 {
91 if (had_double_colon) return 0;
92 had_double_colon = TRUE;
93 s++;
94 continue;
95 }
96
97 /* If the remainder of the string contains a dot but no colons, we
98 can expect a trailing IPv4 address. This is valid if either there has
99 been no double-colon and this is the 7th component (with the IPv4 address
100 being the 7th & 8th components), OR if there has been a double-colon
101 and fewer than 6 components. */
102
103 if (Ustrchr(s, ':') == NULL && Ustrchr(s, '.') != NULL)
104 {
105 if ((!had_double_colon && count != 6) ||
106 (had_double_colon && count > 6)) return 0;
107 v4end = TRUE;
108 yield = 6;
109 break;
110 }
111
112 /* Check for at least one and not more than 4 hex digits for this
113 component. */
114
115 if (!isxdigit(*s++)) return 0;
116 if (isxdigit(*s) && isxdigit(*(++s)) && isxdigit(*(++s))) s++;
117
118 /* If the component is terminated by colon and there is more to
119 follow, skip over the colon. If there is no more to follow the address is
120 invalid. */
121
122 if (*s == ':' && *(++s) == 0) return 0;
123 }
124
125 /* If about to handle a trailing IPv4 address, drop through. Otherwise
126 all is well if we are at the end of the string or at the mask or at a percent
127 sign, which introduces the interface specifier (scope id) of a link local
128 address. */
129
130 if (!v4end) return (*s == 0 || *s == '%' || *s == '/')? yield : 0;
131 }
132
133/* Test for IPv4 address, which may be the tail-end of an IPv6 address. */
134
135for (i = 0; i < 4; i++)
136 {
137 if (i != 0 && *s++ != '.') return 0;
138 if (!isdigit(*s++)) return 0;
139 if (isdigit(*s) && isdigit(*(++s))) s++;
140 }
141
142return (*s == 0 || *s == '/')? yield : 0;
143}
144#endif /* COMPILE_UTILITY */
145
146
147/*************************************************
148* Format message size *
149*************************************************/
150
151/* Convert a message size in bytes to printing form, rounding
152according to the magnitude of the number. A value of zero causes
153a string of spaces to be returned.
154
155Arguments:
156 size the message size in bytes
157 buffer where to put the answer
158
159Returns: pointer to the buffer
160 a string of exactly 5 characters is normally returned
161*/
162
163uschar *
164string_format_size(int size, uschar *buffer)
165{
166if (size == 0) Ustrcpy(CS buffer, " ");
167else if (size < 1024) sprintf(CS buffer, "%5d", size);
168else if (size < 10*1024)
169 sprintf(CS buffer, "%4.1fK", (double)size / 1024.0);
170else if (size < 1024*1024)
171 sprintf(CS buffer, "%4dK", (size + 512)/1024);
172else if (size < 10*1024*1024)
173 sprintf(CS buffer, "%4.1fM", (double)size / (1024.0 * 1024.0));
174else
175 sprintf(CS buffer, "%4dM", (size + 512 * 1024)/(1024*1024));
176return buffer;
177}
178
179
180
181#ifndef COMPILE_UTILITY
182/*************************************************
183* Convert a number to base 62 format *
184*************************************************/
185
186/* Convert a long integer into an ASCII base 62 string. For Cygwin the value of
187BASE_62 is actually 36. Always return exactly 6 characters plus zero, in a
188static area.
189
190Argument: a long integer
191Returns: pointer to base 62 string
192*/
193
194uschar *
195string_base62(unsigned long int value)
196{
197static uschar yield[7];
198uschar *p = yield + sizeof(yield) - 1;
199*p = 0;
200while (p > yield)
201 {
202 *(--p) = base62_chars[value % BASE_62];
203 value /= BASE_62;
204 }
205return yield;
206}
207#endif /* COMPILE_UTILITY */
208
209
210
211#ifndef COMPILE_UTILITY
212/*************************************************
213* Interpret escape sequence *
214*************************************************/
215
216/* This function is called from several places where escape sequences are to be
217interpreted in strings.
218
219Arguments:
220 pp points a pointer to the initiating "\" in the string;
221 the pointer gets updated to point to the final character
222Returns: the value of the character escape
223*/
224
225int
226string_interpret_escape(uschar **pp)
227{
228int ch;
229uschar *p = *pp;
230ch = *(++p);
231if (isdigit(ch) && ch != '8' && ch != '9')
232 {
233 ch -= '0';
234 if (isdigit(p[1]) && p[1] != '8' && p[1] != '9')
235 {
236 ch = ch * 8 + *(++p) - '0';
237 if (isdigit(p[1]) && p[1] != '8' && p[1] != '9')
238 ch = ch * 8 + *(++p) - '0';
239 }
240 }
241else switch(ch)
242 {
243 case 'n': ch = '\n'; break;
244 case 'r': ch = '\r'; break;
245 case 't': ch = '\t'; break;
246 case 'x':
247 ch = 0;
248 if (isxdigit(p[1]))
249 {
250 ch = ch * 16 +
251 Ustrchr(hex_digits, tolower(*(++p))) - hex_digits;
252 if (isxdigit(p[1])) ch = ch * 16 +
253 Ustrchr(hex_digits, tolower(*(++p))) - hex_digits;
254 }
255 break;
256 }
257*pp = p;
258return ch;
259}
260#endif /* COMPILE_UTILITY */
261
262
263
264#ifndef COMPILE_UTILITY
265/*************************************************
266* Ensure string is printable *
267*************************************************/
268
269/* This function is called for critical strings. It checks for any
270non-printing characters, and if any are found, it makes a new copy
271of the string with suitable escape sequences. It is most often called by the
272macro string_printing(), which sets allow_tab TRUE.
273
274Arguments:
275 s the input string
276 allow_tab TRUE to allow tab as a printing character
277
278Returns: string with non-printers encoded as printing sequences
279*/
280
281uschar *
282string_printing2(uschar *s, BOOL allow_tab)
283{
284int nonprintcount = 0;
285int length = 0;
286uschar *t = s;
287uschar *ss, *tt;
288
289while (*t != 0)
290 {
291 int c = *t++;
292 if (!mac_isprint(c) || (!allow_tab && c == '\t')) nonprintcount++;
293 length++;
294 }
295
296if (nonprintcount == 0) return s;
297
298/* Get a new block of store guaranteed big enough to hold the
299expanded string. */
300
301ss = store_get(length + nonprintcount * 4 + 1);
302
303/* Copy everying, escaping non printers. */
304
305t = s;
306tt = ss;
307
308while (*t != 0)
309 {
310 int c = *t;
311 if (mac_isprint(c) && (allow_tab || c != '\t')) *tt++ = *t++; else
312 {
313 *tt++ = '\\';
314 switch (*t)
315 {
316 case '\n': *tt++ = 'n'; break;
317 case '\r': *tt++ = 'r'; break;
318 case '\b': *tt++ = 'b'; break;
319 case '\v': *tt++ = 'v'; break;
320 case '\f': *tt++ = 'f'; break;
321 case '\t': *tt++ = 't'; break;
322 default: sprintf(CS tt, "%03o", *t); tt += 3; break;
323 }
324 t++;
325 }
326 }
327*tt = 0;
328return ss;
329}
330#endif /* COMPILE_UTILITY */
331
332
333
334
335/*************************************************
336* Copy and save string *
337*************************************************/
338
339/* This function assumes that memcpy() is faster than strcpy().
340
341Argument: string to copy
342Returns: copy of string in new store
343*/
344
345uschar *
346string_copy(uschar *s)
347{
348int len = Ustrlen(s) + 1;
349uschar *ss = store_get(len);
350memcpy(ss, s, len);
351return ss;
352}
353
354
355
356/*************************************************
357* Copy and save string in malloc'd store *
358*************************************************/
359
360/* This function assumes that memcpy() is faster than strcpy().
361
362Argument: string to copy
363Returns: copy of string in new store
364*/
365
366uschar *
367string_copy_malloc(uschar *s)
368{
369int len = Ustrlen(s) + 1;
370uschar *ss = store_malloc(len);
371memcpy(ss, s, len);
372return ss;
373}
374
375
376
377/*************************************************
378* Copy, lowercase and save string *
379*************************************************/
380
381/*
382Argument: string to copy
383Returns: copy of string in new store, with letters lowercased
384*/
385
386uschar *
387string_copylc(uschar *s)
388{
389uschar *ss = store_get(Ustrlen(s) + 1);
390uschar *p = ss;
391while (*s != 0) *p++ = tolower(*s++);
392*p = 0;
393return ss;
394}
395
396
397
398/*************************************************
399* Copy and save string, given length *
400*************************************************/
401
402/* It is assumed the data contains no zeros. A zero is added
403onto the end.
404
405Arguments:
406 s string to copy
407 n number of characters
408
409Returns: copy of string in new store
410*/
411
412uschar *
413string_copyn(uschar *s, int n)
414{
415uschar *ss = store_get(n + 1);
416Ustrncpy(ss, s, n);
417ss[n] = 0;
418return ss;
419}
420
421
422/*************************************************
423* Copy, lowercase, and save string, given length *
424*************************************************/
425
426/* It is assumed the data contains no zeros. A zero is added
427onto the end.
428
429Arguments:
430 s string to copy
431 n number of characters
432
433Returns: copy of string in new store, with letters lowercased
434*/
435
436uschar *
437string_copynlc(uschar *s, int n)
438{
439uschar *ss = store_get(n + 1);
440uschar *p = ss;
441while (n-- > 0) *p++ = tolower(*s++);
442*p = 0;
443return ss;
444}
445
446
447
448/*************************************************
449* Copy returned DNS domain name, de-escaping *
450*************************************************/
451
452/* If a domain name contains top-bit characters, some resolvers return
453the fully qualified name with those characters turned into escapes. The
454convention is a backslash followed by _decimal_ digits. We convert these
455back into the original binary values. This will be relevant when
456allow_utf8_domains is set true and UTF-8 characters are used in domain
457names. Backslash can also be used to escape other characters, though we
458shouldn't come across them in domain names.
459
460Argument: the domain name string
461Returns: copy of string in new store, de-escaped
462*/
463
464uschar *
465string_copy_dnsdomain(uschar *s)
466{
467uschar *yield;
468uschar *ss = yield = store_get(Ustrlen(s) + 1);
469
470while (*s != 0)
471 {
472 if (*s != '\\')
473 {
474 *ss++ = *s++;
475 }
476 else if (isdigit(s[1]))
477 {
478 *ss++ = (s[1] - '0')*100 + (s[2] - '0')*10 + s[3] - '0';
479 s += 4;
480 }
481 else if (*(++s) != 0)
482 {
483 *ss++ = *s++;
484 }
485 }
486
487*ss = 0;
488return yield;
489}
490
491
492#ifndef COMPILE_UTILITY
493/*************************************************
494* Copy space-terminated or quoted string *
495*************************************************/
496
497/* This function copies from a string until its end, or until whitespace is
498encountered, unless the string begins with a double quote, in which case the
499terminating quote is sought, and escaping within the string is done. The length
500of a de-quoted string can be no longer than the original, since escaping always
501turns n characters into 1 character.
502
503Argument: pointer to the pointer to the first character, which gets updated
504Returns: the new string
505*/
506
507uschar *
508string_dequote(uschar **sptr)
509{
510uschar *s = *sptr;
511uschar *t, *yield;
512
513/* First find the end of the string */
514
515if (*s != '\"')
516 {
517 while (*s != 0 && !isspace(*s)) s++;
518 }
519else
520 {
521 s++;
522 while (*s != 0 && *s != '\"')
523 {
524 if (*s == '\\') (void)string_interpret_escape(&s);
525 s++;
526 }
527 if (*s != 0) s++;
528 }
529
530/* Get enough store to copy into */
531
532t = yield = store_get(s - *sptr + 1);
533s = *sptr;
534
535/* Do the copy */
536
537if (*s != '\"')
538 {
539 while (*s != 0 && !isspace(*s)) *t++ = *s++;
540 }
541else
542 {
543 s++;
544 while (*s != 0 && *s != '\"')
545 {
546 if (*s == '\\') *t++ = string_interpret_escape(&s);
547 else *t++ = *s;
548 s++;
549 }
550 if (*s != 0) s++;
551 }
552
553/* Update the pointer and return the terminated copy */
554
555*sptr = s;
556*t = 0;
557return yield;
558}
559#endif /* COMPILE_UTILITY */
560
561
562
563/*************************************************
564* Format a string and save it *
565*************************************************/
566
567/* The formatting is done by string_format, which checks the length of
568everything.
569
570Arguments:
571 format a printf() format - deliberately char * rather than uschar *
572 because it will most usually be a literal string
573 ... arguments for format
574
575Returns: pointer to fresh piece of store containing sprintf'ed string
576*/
577
578uschar *
579string_sprintf(char *format, ...)
580{
581va_list ap;
582uschar buffer[STRING_SPRINTF_BUFFER_SIZE];
583va_start(ap, format);
584if (!string_vformat(buffer, sizeof(buffer), format, ap))
585 log_write(0, LOG_MAIN|LOG_PANIC_DIE,
586 "string_sprintf expansion was longer than %d", sizeof(buffer));
587va_end(ap);
588return string_copy(buffer);
589}
590
591
592
593/*************************************************
594* Case-independent strncmp() function *
595*************************************************/
596
597/*
598Arguments:
599 s first string
600 t second string
601 n number of characters to compare
602
603Returns: < 0, = 0, or > 0, according to the comparison
604*/
605
606int
607strncmpic(uschar *s, uschar *t, int n)
608{
609while (n--)
610 {
611 int c = tolower(*s++) - tolower(*t++);
612 if (c) return c;
613 }
614return 0;
615}
616
617
618/*************************************************
619* Case-independent strcmp() function *
620*************************************************/
621
622/*
623Arguments:
624 s first string
625 t second string
626
627Returns: < 0, = 0, or > 0, according to the comparison
628*/
629
630int
631strcmpic(uschar *s, uschar *t)
632{
633while (*s != 0)
634 {
635 int c = tolower(*s++) - tolower(*t++);
636 if (c != 0) return c;
637 }
638return *t;
639}
640
641
642/*************************************************
643* Case-independent strstr() function *
644*************************************************/
645
646/* The third argument specifies whether whitespace is required
647to follow the matched string.
648
649Arguments:
650 s string to search
651 t substring to search for
652 space_follows if TRUE, match only if whitespace follows
653
654Returns: pointer to substring in string, or NULL if not found
655*/
656
657uschar *
658strstric(uschar *s, uschar *t, BOOL space_follows)
659{
660uschar *p = t;
661uschar *yield = NULL;
662int cl = tolower(*p);
663int cu = toupper(*p);
664
665while (*s)
666 {
667 if (*s == cl || *s == cu)
668 {
669 if (yield == NULL) yield = s;
670 if (*(++p) == 0)
671 {
672 if (!space_follows || s[1] == ' ' || s[1] == '\n' ) return yield;
673 yield = NULL;
674 p = t;
675 }
676 cl = tolower(*p);
677 cu = toupper(*p);
678 s++;
679 }
680 else if (yield != NULL)
681 {
682 yield = NULL;
683 p = t;
684 cl = tolower(*p);
685 cu = toupper(*p);
686 }
687 else s++;
688 }
689return NULL;
690}
691
692
693
694#ifndef COMPILE_UTILITY
695/*************************************************
696* Get next string from separated list *
697*************************************************/
698
699/* Leading and trailing space is removed from each item. The separator in the
700list is controlled by the int pointed to by the separator argument as follows:
701
702 If its value is > 0 it is used as the delimiter.
703 (If its value is actually > UCHAR_MAX there is only one item in the list.
704 This is used for some cases when called via functions that sometimes
705 plough through lists, and sometimes are given single items.)
706 If its value is <= 0, the string is inspected for a leading <x, where
707 x is an ispunct() value. If found, it is used as the delimiter. If not
708 found: (a) if separator == 0, ':' is used
709 (b) if separator <0, then -separator is used
710 In all cases the value of the separator that is used is written back to
711 the int so that it is used on subsequent calls as we progress through
712 the list.
713
714The separator can always be represented in the string by doubling.
715
716Arguments:
717 listptr points to a pointer to the current start of the list; the
718 pointer gets updated to point after the end of the next item
719 separator a pointer to the separator character in an int (see above)
720 buffer where to put a copy of the next string in the list; or
721 NULL if the next string is returned in new memory
722 buflen when buffer is not NULL, the size of buffer; otherwise ignored
723
724Returns: pointer to buffer, containing the next substring,
725 or NULL if no more substrings
726*/
727
728uschar *
729string_nextinlist(uschar **listptr, int *separator, uschar *buffer, int buflen)
730{
731register int p = 0;
732register int sep = *separator;
733register uschar *s = *listptr;
734
735if (s == NULL) return NULL;
736while (isspace(*s)) s++;
737
738if (sep <= 0)
739 {
740 if (*s == '<' && ispunct(s[1]))
741 {
742 sep = s[1];
743 s += 2;
744 while (isspace(*s)) s++;
745 }
746 else
747 {
748 sep = (sep == 0)? ':' : -sep;
749 }
750 *separator = sep;
751 }
752
753if (*s == 0) return NULL;
754
755/* Handle the case when a buffer is provided. */
756
757if (buffer != NULL)
758 {
759 for (; *s != 0; s++)
760 {
761 if (*s == sep && *(++s) != sep) break;
762 if (p < buflen - 1) buffer[p++] = *s;
763 }
764 while (p > 0 && isspace(buffer[p-1])) p--;
765 buffer[p] = 0;
766 }
767
768/* Handle the case when a buffer is not provided. */
769
770else
771 {
772 /* We know that *s != 0 at this point. However, it might be pointing to a
773 separator, which could indicate an empty string, or could be doubled to
774 indicate a separator character as data at the start of a string. */
775
776 if (*s == sep)
777 {
778 s++;
779 if (*s != sep) buffer = string_copy(US"");
780 }
781
782 if (buffer == NULL)
783 {
784 int size = 0;
785 int ptr = 0;
786 uschar *ss;
787 for (;;)
788 {
789 for (ss = s + 1; *ss != 0 && *ss != sep; ss++);
790 buffer = string_cat(buffer, &size, &ptr, s, ss-s);
791 s = ss;
792 if (*s == 0 || *(++s) != sep) break;
793 }
794 while (ptr > 0 && isspace(buffer[ptr-1])) ptr--;
795 buffer[ptr] = 0;
796 }
797 }
798
799/* Update the current pointer and return the new string */
800
801*listptr = s;
802return buffer;
803}
804#endif /* COMPILE_UTILITY */
805
806
807
808#ifndef COMPILE_UTILITY
809/*************************************************
810* Add chars to string *
811*************************************************/
812
813/* This function is used when building up strings of unknown length. Room is
814always left for a terminating zero to be added to the string that is being
815built. This function does not require the string that is being added to be NUL
816terminated, because the number of characters to add is given explicitly. It is
817sometimes called to extract parts of other strings.
818
819Arguments:
820 string points to the start of the string that is being built, or NULL
821 if this is a new string that has no contents yet
822 size points to a variable that holds the current capacity of the memory
823 block (updated if changed)
824 ptr points to a variable that holds the offset at which to add
825 characters, updated to the new offset
826 s points to characters to add
827 count count of characters to add; must not exceed the length of s, if s
828 is a C string
829
830If string is given as NULL, *size and *ptr should both be zero.
831
832Returns: pointer to the start of the string, changed if copied for expansion.
833 Note that a NUL is not added, though space is left for one. This is
834 because string_cat() is often called multiple times to build up a
835 string - there's no point adding the NUL till the end.
836*/
837
838uschar *
839string_cat(uschar *string, int *size, int *ptr, const uschar *s, int count)
840{
841int p = *ptr;
842
843if (p + count >= *size)
844 {
845 int oldsize = *size;
846
847 /* Mostly, string_cat() is used to build small strings of a few hundred
848 characters at most. There are times, however, when the strings are very much
849 longer (for example, a lookup that returns a vast number of alias addresses).
850 To try to keep things reasonable, we use increments whose size depends on the
851 existing length of the string. */
852
853 int inc = (oldsize < 4096)? 100 : 1024;
854 while (*size <= p + count) *size += inc;
855
856 /* New string */
857
858 if (string == NULL) string = store_get(*size);
859
860 /* Try to extend an existing allocation. If the result of calling
861 store_extend() is false, either there isn't room in the current memory block,
862 or this string is not the top item on the dynamic store stack. We then have
863 to get a new chunk of store and copy the old string. When building large
864 strings, it is helpful to call store_release() on the old string, to release
865 memory blocks that have become empty. (The block will be freed if the string
866 is at its start.) However, we can do this only if we know that the old string
867 was the last item on the dynamic memory stack. This is the case if it matches
868 store_last_get. */
869
870 else if (!store_extend(string, oldsize, *size))
871 {
872 BOOL release_ok = store_last_get[store_pool] == string;
873 uschar *newstring = store_get(*size);
874 memcpy(newstring, string, p);
875 if (release_ok) store_release(string);
876 string = newstring;
877 }
878 }
879
880/* Because we always specify the exact number of characters to copy, we can
881use memcpy(), which is likely to be more efficient than strncopy() because the
882latter has to check for zero bytes. */
883
884memcpy(string + p, s, count);
885*ptr = p + count;
886return string;
887}
888#endif /* COMPILE_UTILITY */
889
890
891
892#ifndef COMPILE_UTILITY
893/*************************************************
894* Append strings to another string *
895*************************************************/
896
897/* This function can be used to build a string from many other strings.
898It calls string_cat() to do the dirty work.
899
900Arguments:
901 string points to the start of the string that is being built, or NULL
902 if this is a new string that has no contents yet
903 size points to a variable that holds the current capacity of the memory
904 block (updated if changed)
905 ptr points to a variable that holds the offset at which to add
906 characters, updated to the new offset
907 count the number of strings to append
908 ... "count" uschar* arguments, which must be valid zero-terminated
909 C strings
910
911Returns: pointer to the start of the string, changed if copied for expansion.
912 The string is not zero-terminated - see string_cat() above.
913*/
914
915uschar *
916string_append(uschar *string, int *size, int *ptr, int count, ...)
917{
918va_list ap;
919int i;
920
921va_start(ap, count);
922for (i = 0; i < count; i++)
923 {
924 uschar *t = va_arg(ap, uschar *);
925 string = string_cat(string, size, ptr, t, Ustrlen(t));
926 }
927va_end(ap);
928
929return string;
930}
931#endif
932
933
934
935/*************************************************
936* Format a string with length checks *
937*************************************************/
938
939/* This function is used to format a string with checking of the length of the
940output for all conversions. It protects Exim from absent-mindedness when
941calling functions like debug_printf and string_sprintf, and elsewhere. There
942are two different entry points to what is actually the same function, depending
943on whether the variable length list of data arguments are given explicitly or
944as a va_list item.
945
946The formats are the usual printf() ones, with some omissions (never used) and
0d7eb84a
PH
947two additions for strings: %S forces lower case, and %#s or %#S prints nothing
948for a NULL string. Without the # "NULL" is printed (useful in debugging). There
949is also the addition of %D, which inserts the date in the form used for
059ec3d9
PH
950datestamped log files.
951
952Arguments:
953 buffer a buffer in which to put the formatted string
954 buflen the length of the buffer
955 format the format string - deliberately char * and not uschar *
956 ... or ap variable list of supplementary arguments
957
958Returns: TRUE if the result fitted in the buffer
959*/
960
961BOOL
962string_format(uschar *buffer, int buflen, char *format, ...)
963{
964BOOL yield;
965va_list ap;
966va_start(ap, format);
967yield = string_vformat(buffer, buflen, format, ap);
968va_end(ap);
969return yield;
970}
971
972
973BOOL
974string_vformat(uschar *buffer, int buflen, char *format, va_list ap)
975{
b1c749bb
PH
976enum { L_NORMAL, L_SHORT, L_LONG, L_LONGLONG, L_LONGDOUBLE };
977
059ec3d9
PH
978BOOL yield = TRUE;
979int width, precision;
980char *fp = format; /* Deliberately not unsigned */
981uschar *p = buffer;
982uschar *last = buffer + buflen - 1;
983
984string_datestamp_offset = -1; /* Datestamp not inserted */
985
986/* Scan the format and handle the insertions */
987
988while (*fp != 0)
989 {
b1c749bb 990 int length = L_NORMAL;
059ec3d9
PH
991 int *nptr;
992 int slen;
993 char *null = "NULL"; /* ) These variables */
994 char *item_start, *s; /* ) are deliberately */
995 char newformat[16]; /* ) not unsigned */
996
997 /* Non-% characters just get copied verbatim */
998
999 if (*fp != '%')
1000 {
1001 if (p >= last) { yield = FALSE; break; }
1002 *p++ = (uschar)*fp++;
1003 continue;
1004 }
1005
1006 /* Deal with % characters. Pick off the width and precision, for checking
1007 strings, skipping over the flag and modifier characters. */
1008
1009 item_start = fp;
1010 width = precision = -1;
1011
1012 if (strchr("-+ #0", *(++fp)) != NULL)
1013 {
1014 if (*fp == '#') null = "";
1015 fp++;
1016 }
1017
1018 if (isdigit((uschar)*fp))
1019 {
1020 width = *fp++ - '0';
1021 while (isdigit((uschar)*fp)) width = width * 10 + *fp++ - '0';
1022 }
1023 else if (*fp == '*')
1024 {
1025 width = va_arg(ap, int);
1026 fp++;
1027 }
1028
1029 if (*fp == '.')
1030 {
1031 if (*(++fp) == '*')
1032 {
1033 precision = va_arg(ap, int);
1034 fp++;
1035 }
1036 else
1037 {
1038 precision = 0;
1039 while (isdigit((uschar)*fp))
1040 precision = precision*10 + *fp++ - '0';
1041 }
1042 }
1043
b1c749bb
PH
1044 /* Skip over 'h', 'L', 'l', and 'll', remembering the item length */
1045
1046 if (*fp == 'h')
1047 { fp++; length = L_SHORT; }
1048 else if (*fp == 'L')
1049 { fp++; length = L_LONGDOUBLE; }
1050 else if (*fp == 'l')
1051 {
1052 if (fp[1] == 'l')
1053 {
1054 fp += 2;
1055 length = L_LONGLONG;
1056 }
1057 else
1058 {
1059 fp++;
1060 length = L_LONG;
1061 }
1062 }
059ec3d9
PH
1063
1064 /* Handle each specific format type. */
1065
1066 switch (*fp++)
1067 {
1068 case 'n':
1069 nptr = va_arg(ap, int *);
1070 *nptr = p - buffer;
1071 break;
1072
1073 case 'd':
1074 case 'o':
1075 case 'u':
1076 case 'x':
1077 case 'X':
1549ea3b
PH
1078 if (p >= last - ((length > L_LONG)? 24 : 12))
1079 { yield = FALSE; goto END_FORMAT; }
059ec3d9
PH
1080 strncpy(newformat, item_start, fp - item_start);
1081 newformat[fp - item_start] = 0;
b1c749bb
PH
1082
1083 /* Short int is promoted to int when passing through ..., so we must use
1084 int for va_arg(). */
1085
1086 switch(length)
1087 {
1088 case L_SHORT:
1089 case L_NORMAL: sprintf(CS p, newformat, va_arg(ap, int)); break;
1090 case L_LONG: sprintf(CS p, newformat, va_arg(ap, long int)); break;
c6c2dc1d 1091 case L_LONGLONG: sprintf(CS p, newformat, va_arg(ap, LONGLONG_T)); break;
b1c749bb 1092 }
059ec3d9
PH
1093 while (*p) p++;
1094 break;
1095
1096 case 'p':
1097 if (p >= last - 24) { yield = FALSE; goto END_FORMAT; }
1098 strncpy(newformat, item_start, fp - item_start);
1099 newformat[fp - item_start] = 0;
1100 sprintf(CS p, newformat, va_arg(ap, void *));
1101 while (*p) p++;
1102 break;
1103
1104 /* %f format is inherently insecure if the numbers that it may be
870f6ba8
TF
1105 handed are unknown (e.g. 1e300). However, in Exim, %f is used for
1106 printing load averages, and these are actually stored as integers
1107 (load average * 1000) so the size of the numbers is constrained.
1108 It is also used for formatting sending rates, where the simplicity
1109 of the format prevents overflow. */
059ec3d9
PH
1110
1111 case 'f':
1112 case 'e':
1113 case 'E':
1114 case 'g':
1115 case 'G':
1116 if (precision < 0) precision = 6;
1117 if (p >= last - precision - 8) { yield = FALSE; goto END_FORMAT; }
1118 strncpy(newformat, item_start, fp - item_start);
1119 newformat[fp-item_start] = 0;
b1c749bb
PH
1120 if (length == L_LONGDOUBLE)
1121 sprintf(CS p, newformat, va_arg(ap, long double));
1122 else
1123 sprintf(CS p, newformat, va_arg(ap, double));
059ec3d9
PH
1124 while (*p) p++;
1125 break;
1126
1127 /* String types */
1128
1129 case '%':
1130 if (p >= last) { yield = FALSE; goto END_FORMAT; }
1131 *p++ = '%';
1132 break;
1133
1134 case 'c':
1135 if (p >= last) { yield = FALSE; goto END_FORMAT; }
1136 *p++ = va_arg(ap, int);
1137 break;
1138
1139 case 'D': /* Insert datestamp for log file names */
1140 s = CS tod_stamp(tod_log_datestamp);
1141 string_datestamp_offset = p - buffer; /* Passed back via global */
1142 goto INSERT_STRING;
1143
1144 case 's':
1145 case 'S': /* Forces *lower* case */
1146 s = va_arg(ap, char *);
1147
1148 INSERT_STRING: /* Come to from %D above */
1149 if (s == NULL) s = null;
1150 slen = Ustrlen(s);
1151
1152 /* If the width is specified, check that there is a precision
1153 set; if not, set it to the width to prevent overruns of long
1154 strings. */
1155
1156 if (width >= 0)
1157 {
1158 if (precision < 0) precision = width;
1159 }
1160
1161 /* If a width is not specified and the precision is specified, set
1162 the width to the precision, or the string length if shorted. */
1163
1164 else if (precision >= 0)
1165 {
1166 width = (precision < slen)? precision : slen;
1167 }
1168
1169 /* If neither are specified, set them both to the string length. */
1170
1171 else width = precision = slen;
1172
1173 /* Check string space, and add the string to the buffer if ok. If
1174 not OK, add part of the string (debugging uses this to show as
1175 much as possible). */
1176
1177 if (p >= last - width)
1178 {
1179 yield = FALSE;
1180 width = precision = last - p - 1;
1181 }
1182 sprintf(CS p, "%*.*s", width, precision, s);
1183 if (fp[-1] == 'S')
1184 while (*p) { *p = tolower(*p); p++; }
1185 else
1186 while (*p) p++;
1187 if (!yield) goto END_FORMAT;
1188 break;
1189
1190 /* Some things are never used in Exim; also catches junk. */
1191
1192 default:
1193 strncpy(newformat, item_start, fp - item_start);
1194 newformat[fp-item_start] = 0;
1195 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "string_format: unsupported type "
1196 "in \"%s\" in \"%s\"", newformat, format);
1197 break;
1198 }
1199 }
1200
1201/* Ensure string is complete; return TRUE if got to the end of the format */
1202
1203END_FORMAT:
1204
1205*p = 0;
1206return yield;
1207}
1208
1209
1210
1211#ifndef COMPILE_UTILITY
1212/*************************************************
1213* Generate an "open failed" message *
1214*************************************************/
1215
1216/* This function creates a message after failure to open a file. It includes a
1217string supplied as data, adds the strerror() text, and if the failure was
1218"Permission denied", reads and includes the euid and egid.
1219
1220Arguments:
1221 eno the value of errno after the failure
1222 format a text format string - deliberately not uschar *
1223 ... arguments for the format string
1224
1225Returns: a message, in dynamic store
1226*/
1227
1228uschar *
1229string_open_failed(int eno, char *format, ...)
1230{
1231va_list ap;
1232uschar buffer[1024];
1233
1234Ustrcpy(buffer, "failed to open ");
1235va_start(ap, format);
1236
1237/* Use the checked formatting routine to ensure that the buffer
1238does not overflow. It should not, since this is called only for internally
1239specified messages. If it does, the message just gets truncated, and there
1240doesn't seem much we can do about that. */
1241
1242(void)string_vformat(buffer+15, sizeof(buffer) - 15, format, ap);
1243
1244return (eno == EACCES)?
1245 string_sprintf("%s: %s (euid=%ld egid=%ld)", buffer, strerror(eno),
1246 (long int)geteuid(), (long int)getegid()) :
1247 string_sprintf("%s: %s", buffer, strerror(eno));
1248}
1249#endif /* COMPILE_UTILITY */
1250
1251
1252
1253#ifndef COMPILE_UTILITY
1254/*************************************************
1255* Generate local prt for logging *
1256*************************************************/
1257
1258/* This function is a subroutine for use in string_log_address() below.
1259
1260Arguments:
1261 addr the address being logged
1262 yield the current dynamic buffer pointer
1263 sizeptr points to current size
1264 ptrptr points to current insert pointer
1265
1266Returns: the new value of the buffer pointer
1267*/
1268
1269static uschar *
1270string_get_localpart(address_item *addr, uschar *yield, int *sizeptr,
1271 int *ptrptr)
1272{
1273if (testflag(addr, af_include_affixes) && addr->prefix != NULL)
1274 yield = string_cat(yield, sizeptr, ptrptr, addr->prefix,
1275 Ustrlen(addr->prefix));
1276yield = string_cat(yield, sizeptr, ptrptr, addr->local_part,
1277 Ustrlen(addr->local_part));
1278if (testflag(addr, af_include_affixes) && addr->suffix != NULL)
1279 yield = string_cat(yield, sizeptr, ptrptr, addr->suffix,
1280 Ustrlen(addr->suffix));
1281return yield;
1282}
1283
1284
1285/*************************************************
1286* Generate log address list *
1287*************************************************/
1288
1289/* This function generates a list consisting of an address and its parents, for
1290use in logging lines. For saved onetime aliased addresses, the onetime parent
1291field is used. If the address was delivered by a transport with rcpt_include_
1292affixes set, the af_include_affixes bit will be set in the address. In that
1293case, we include the affixes here too.
1294
1295Arguments:
1296 addr bottom (ultimate) address
1297 all_parents if TRUE, include all parents
1298 success TRUE for successful delivery
1299
1300Returns: a string in dynamic store
1301*/
1302
1303uschar *
1304string_log_address(address_item *addr, BOOL all_parents, BOOL success)
1305{
1306int size = 64;
1307int ptr = 0;
1308BOOL add_topaddr = TRUE;
1309uschar *yield = store_get(size);
1310address_item *topaddr;
1311
1312/* Find the ultimate parent */
1313
1314for (topaddr = addr; topaddr->parent != NULL; topaddr = topaddr->parent);
1315
1316/* We start with just the local part for pipe, file, and reply deliveries, and
1317for successful local deliveries from routers that have the log_as_local flag
1318set. File deliveries from filters can be specified as non-absolute paths in
1319cases where the transport is goin to complete the path. If there is an error
1320before this happens (expansion failure) the local part will not be updated, and
1321so won't necessarily look like a path. Add extra text for this case. */
1322
1323if (testflag(addr, af_pfr) ||
1324 (success &&
1325 addr->router != NULL && addr->router->log_as_local &&
1326 addr->transport != NULL && addr->transport->info->local))
1327 {
1328 if (testflag(addr, af_file) && addr->local_part[0] != '/')
1329 yield = string_cat(yield, &size, &ptr, CUS"save ", 5);
1330 yield = string_get_localpart(addr, yield, &size, &ptr);
1331 }
1332
1333/* Other deliveries start with the full address. It we have split it into local
1334part and domain, use those fields. Some early failures can happen before the
1335splitting is done; in those cases use the original field. */
1336
1337else
1338 {
1339 if (addr->local_part != NULL)
1340 {
1341 yield = string_get_localpart(addr, yield, &size, &ptr);
1342 yield = string_cat(yield, &size, &ptr, US"@", 1);
1343 yield = string_cat(yield, &size, &ptr, addr->domain,
1344 Ustrlen(addr->domain) );
1345 }
1346 else
1347 {
1348 yield = string_cat(yield, &size, &ptr, addr->address, Ustrlen(addr->address));
1349 }
1350 yield[ptr] = 0;
1351
1352 /* If the address we are going to print is the same as the top address,
1353 and all parents are not being included, don't add on the top address. First
1354 of all, do a caseless comparison; if this succeeds, do a caseful comparison
1355 on the local parts. */
1356
1357 if (strcmpic(yield, topaddr->address) == 0 &&
1358 Ustrncmp(yield, topaddr->address, Ustrchr(yield, '@') - yield) == 0 &&
1359 addr->onetime_parent == NULL &&
1360 (!all_parents || addr->parent == NULL || addr->parent == topaddr))
1361 add_topaddr = FALSE;
1362 }
1363
1364/* If all parents are requested, or this is a local pipe/file/reply, and
1365there is at least one intermediate parent, show it in brackets, and continue
1366with all of them if all are wanted. */
1367
1368if ((all_parents || testflag(addr, af_pfr)) &&
1369 addr->parent != NULL &&
1370 addr->parent != topaddr)
1371 {
1372 uschar *s = US" (";
1373 address_item *addr2;
1374 for (addr2 = addr->parent; addr2 != topaddr; addr2 = addr2->parent)
1375 {
1376 yield = string_cat(yield, &size, &ptr, s, 2);
1377 yield = string_cat(yield, &size, &ptr, addr2->address, Ustrlen(addr2->address));
1378 if (!all_parents) break;
1379 s = US", ";
1380 }
1381 yield = string_cat(yield, &size, &ptr, US")", 1);
1382 }
1383
1384/* Add the top address if it is required */
1385
1386if (add_topaddr)
1387 {
1388 yield = string_cat(yield, &size, &ptr, US" <", 2);
1389
1390 if (addr->onetime_parent == NULL)
1391 yield = string_cat(yield, &size, &ptr, topaddr->address,
1392 Ustrlen(topaddr->address));
1393 else
1394 yield = string_cat(yield, &size, &ptr, addr->onetime_parent,
1395 Ustrlen(addr->onetime_parent));
1396
1397 yield = string_cat(yield, &size, &ptr, US">", 1);
1398 }
1399
1400yield[ptr] = 0; /* string_cat() leaves space */
1401return yield;
1402}
1403#endif /* COMPILE_UTILITY */
1404
1405
1406
1407
1408
1409/*************************************************
1410**************************************************
1411* Stand-alone test program *
1412**************************************************
1413*************************************************/
1414
1415#ifdef STAND_ALONE
1416int main(void)
1417{
1418uschar buffer[256];
1419
1420printf("Testing is_ip_address\n");
1421
1422while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1423 {
1424 int offset;
1425 buffer[Ustrlen(buffer) - 1] = 0;
1426 printf("%d\n", string_is_ip_address(buffer, NULL));
1427 printf("%d %d %s\n", string_is_ip_address(buffer, &offset), offset, buffer);
1428 }
1429
1430printf("Testing string_nextinlist\n");
1431
1432while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1433 {
1434 uschar *list = buffer;
1435 uschar *lp1, *lp2;
1436 uschar item[256];
1437 int sep1 = 0;
1438 int sep2 = 0;
1439
1440 if (*list == '<')
1441 {
1442 sep1 = sep2 = list[1];
1443 list += 2;
1444 }
1445
1446 lp1 = lp2 = list;
1447 for (;;)
1448 {
1449 uschar *item1 = string_nextinlist(&lp1, &sep1, item, sizeof(item));
1450 uschar *item2 = string_nextinlist(&lp2, &sep2, NULL, 0);
1451
1452 if (item1 == NULL && item2 == NULL) break;
1453 if (item == NULL || item2 == NULL || Ustrcmp(item1, item2) != 0)
1454 {
1455 printf("***ERROR\nitem1=\"%s\"\nitem2=\"%s\"\n",
1456 (item1 == NULL)? "NULL" : CS item1,
1457 (item2 == NULL)? "NULL" : CS item2);
1458 break;
1459 }
1460 else printf(" \"%s\"\n", CS item1);
1461 }
1462 }
1463
1464/* This is a horrible lash-up, but it serves its purpose. */
1465
1466printf("Testing string_format\n");
1467
1468while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1469 {
1470 void *args[3];
1471 double dargs[3];
1472 int dflag = 0;
1473 int n = 0;
1474 int count;
1475 int countset = 0;
1476 uschar format[256];
1477 uschar outbuf[256];
1478 uschar *s;
1479 buffer[Ustrlen(buffer) - 1] = 0;
1480
1481 s = Ustrchr(buffer, ',');
1482 if (s == NULL) s = buffer + Ustrlen(buffer);
1483
1484 Ustrncpy(format, buffer, s - buffer);
1485 format[s-buffer] = 0;
1486
1487 if (*s == ',') s++;
1488
1489 while (*s != 0)
1490 {
1491 uschar *ss = s;
1492 s = Ustrchr(ss, ',');
1493 if (s == NULL) s = ss + Ustrlen(ss);
1494
1495 if (isdigit(*ss))
1496 {
1497 Ustrncpy(outbuf, ss, s-ss);
1498 if (Ustrchr(outbuf, '.') != NULL)
1499 {
1500 dflag = 1;
1501 dargs[n++] = Ustrtod(outbuf, NULL);
1502 }
1503 else
1504 {
1505 args[n++] = (void *)Uatoi(outbuf);
1506 }
1507 }
1508
1509 else if (Ustrcmp(ss, "*") == 0)
1510 {
1511 args[n++] = (void *)(&count);
1512 countset = 1;
1513 }
1514
1515 else
1516 {
1517 uschar *sss = malloc(s - ss + 1);
1518 Ustrncpy(sss, ss, s-ss);
1519 args[n++] = sss;
1520 }
1521
1522 if (*s == ',') s++;
1523 }
1524
1525 if (!dflag) printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1526 args[0], args[1], args[2])? "True" : "False");
1527
1528 else printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1529 dargs[0], dargs[1], dargs[2])? "True" : "False");
1530
1531 printf("%s\n", CS outbuf);
1532 if (countset) printf("count=%d\n", count);
1533 }
1534
1535return 0;
1536}
1537#endif
1538
1539/* End of string.c */