ARC: better diagnostics for keyfile issues
[exim.git] / src / src / string.c
... / ...
CommitLineData
1/*************************************************
2* Exim - an Internet mail transport agent *
3*************************************************/
4
5/* Copyright (c) University of Cambridge 1995 - 2018 */
6/* See the file NOTICE for conditions of use and distribution. */
7
8/* Miscellaneous string-handling functions. Some are not required for
9utilities and tests, and are cut out by the COMPILE_UTILITY macro. */
10
11
12#include "exim.h"
13
14
15#ifndef COMPILE_UTILITY
16/*************************************************
17* Test for IP address *
18*************************************************/
19
20/* This used just to be a regular expression, but with IPv6 things are a bit
21more complicated. If the address contains a colon, it is assumed to be a v6
22address (assuming HAVE_IPV6 is set). If a mask is permitted and one is present,
23and maskptr is not NULL, its offset is placed there.
24
25Arguments:
26 s a string
27 maskptr NULL if no mask is permitted to follow
28 otherwise, points to an int where the offset of '/' is placed
29 if there is no / followed by trailing digits, *maskptr is set 0
30
31Returns: 0 if the string is not a textual representation of an IP address
32 4 if it is an IPv4 address
33 6 if it is an IPv6 address
34*/
35
36int
37string_is_ip_address(const uschar *s, int *maskptr)
38{
39int i;
40int yield = 4;
41
42/* If an optional mask is permitted, check for it. If found, pass back the
43offset. */
44
45if (maskptr)
46 {
47 const uschar *ss = s + Ustrlen(s);
48 *maskptr = 0;
49 if (s != ss && isdigit(*(--ss)))
50 {
51 while (ss > s && isdigit(ss[-1])) ss--;
52 if (ss > s && *(--ss) == '/') *maskptr = ss - s;
53 }
54 }
55
56/* A colon anywhere in the string => IPv6 address */
57
58if (Ustrchr(s, ':') != NULL)
59 {
60 BOOL had_double_colon = FALSE;
61 BOOL v4end = FALSE;
62 int count = 0;
63
64 yield = 6;
65
66 /* An IPv6 address must start with hex digit or double colon. A single
67 colon is invalid. */
68
69 if (*s == ':' && *(++s) != ':') return 0;
70
71 /* Now read up to 8 components consisting of up to 4 hex digits each. There
72 may be one and only one appearance of double colon, which implies any number
73 of binary zero bits. The number of preceding components is held in count. */
74
75 for (count = 0; count < 8; count++)
76 {
77 /* If the end of the string is reached before reading 8 components, the
78 address is valid provided a double colon has been read. This also applies
79 if we hit the / that introduces a mask or the % that introduces the
80 interface specifier (scope id) of a link-local address. */
81
82 if (*s == 0 || *s == '%' || *s == '/') return had_double_colon ? yield : 0;
83
84 /* If a component starts with an additional colon, we have hit a double
85 colon. This is permitted to appear once only, and counts as at least
86 one component. The final component may be of this form. */
87
88 if (*s == ':')
89 {
90 if (had_double_colon) return 0;
91 had_double_colon = TRUE;
92 s++;
93 continue;
94 }
95
96 /* If the remainder of the string contains a dot but no colons, we
97 can expect a trailing IPv4 address. This is valid if either there has
98 been no double-colon and this is the 7th component (with the IPv4 address
99 being the 7th & 8th components), OR if there has been a double-colon
100 and fewer than 6 components. */
101
102 if (Ustrchr(s, ':') == NULL && Ustrchr(s, '.') != NULL)
103 {
104 if ((!had_double_colon && count != 6) ||
105 (had_double_colon && count > 6)) return 0;
106 v4end = TRUE;
107 yield = 6;
108 break;
109 }
110
111 /* Check for at least one and not more than 4 hex digits for this
112 component. */
113
114 if (!isxdigit(*s++)) return 0;
115 if (isxdigit(*s) && isxdigit(*(++s)) && isxdigit(*(++s))) s++;
116
117 /* If the component is terminated by colon and there is more to
118 follow, skip over the colon. If there is no more to follow the address is
119 invalid. */
120
121 if (*s == ':' && *(++s) == 0) return 0;
122 }
123
124 /* If about to handle a trailing IPv4 address, drop through. Otherwise
125 all is well if we are at the end of the string or at the mask or at a percent
126 sign, which introduces the interface specifier (scope id) of a link local
127 address. */
128
129 if (!v4end)
130 return (*s == 0 || *s == '%' ||
131 (*s == '/' && maskptr != NULL && *maskptr != 0))? yield : 0;
132 }
133
134/* Test for IPv4 address, which may be the tail-end of an IPv6 address. */
135
136for (i = 0; i < 4; i++)
137 {
138 long n;
139 uschar * end;
140
141 if (i != 0 && *s++ != '.') return 0;
142 n = strtol(CCS s, CSS &end, 10);
143 if (n > 255 || n < 0 || end <= s || end > s+3) return 0;
144 s = end;
145 }
146
147return !*s || (*s == '/' && maskptr && *maskptr != 0) ? yield : 0;
148}
149#endif /* COMPILE_UTILITY */
150
151
152/*************************************************
153* Format message size *
154*************************************************/
155
156/* Convert a message size in bytes to printing form, rounding
157according to the magnitude of the number. A value of zero causes
158a string of spaces to be returned.
159
160Arguments:
161 size the message size in bytes
162 buffer where to put the answer
163
164Returns: pointer to the buffer
165 a string of exactly 5 characters is normally returned
166*/
167
168uschar *
169string_format_size(int size, uschar *buffer)
170{
171if (size == 0) Ustrcpy(buffer, " ");
172else if (size < 1024) sprintf(CS buffer, "%5d", size);
173else if (size < 10*1024)
174 sprintf(CS buffer, "%4.1fK", (double)size / 1024.0);
175else if (size < 1024*1024)
176 sprintf(CS buffer, "%4dK", (size + 512)/1024);
177else if (size < 10*1024*1024)
178 sprintf(CS buffer, "%4.1fM", (double)size / (1024.0 * 1024.0));
179else
180 sprintf(CS buffer, "%4dM", (size + 512 * 1024)/(1024*1024));
181return buffer;
182}
183
184
185
186#ifndef COMPILE_UTILITY
187/*************************************************
188* Convert a number to base 62 format *
189*************************************************/
190
191/* Convert a long integer into an ASCII base 62 string. For Cygwin the value of
192BASE_62 is actually 36. Always return exactly 6 characters plus zero, in a
193static area.
194
195Argument: a long integer
196Returns: pointer to base 62 string
197*/
198
199uschar *
200string_base62(unsigned long int value)
201{
202static uschar yield[7];
203uschar *p = yield + sizeof(yield) - 1;
204*p = 0;
205while (p > yield)
206 {
207 *(--p) = base62_chars[value % BASE_62];
208 value /= BASE_62;
209 }
210return yield;
211}
212#endif /* COMPILE_UTILITY */
213
214
215
216/*************************************************
217* Interpret escape sequence *
218*************************************************/
219
220/* This function is called from several places where escape sequences are to be
221interpreted in strings.
222
223Arguments:
224 pp points a pointer to the initiating "\" in the string;
225 the pointer gets updated to point to the final character
226Returns: the value of the character escape
227*/
228
229int
230string_interpret_escape(const uschar **pp)
231{
232#ifdef COMPILE_UTILITY
233const uschar *hex_digits= CUS"0123456789abcdef";
234#endif
235int ch;
236const uschar *p = *pp;
237ch = *(++p);
238if (isdigit(ch) && ch != '8' && ch != '9')
239 {
240 ch -= '0';
241 if (isdigit(p[1]) && p[1] != '8' && p[1] != '9')
242 {
243 ch = ch * 8 + *(++p) - '0';
244 if (isdigit(p[1]) && p[1] != '8' && p[1] != '9')
245 ch = ch * 8 + *(++p) - '0';
246 }
247 }
248else switch(ch)
249 {
250 case 'b': ch = '\b'; break;
251 case 'f': ch = '\f'; break;
252 case 'n': ch = '\n'; break;
253 case 'r': ch = '\r'; break;
254 case 't': ch = '\t'; break;
255 case 'v': ch = '\v'; break;
256 case 'x':
257 ch = 0;
258 if (isxdigit(p[1]))
259 {
260 ch = ch * 16 +
261 Ustrchr(hex_digits, tolower(*(++p))) - hex_digits;
262 if (isxdigit(p[1])) ch = ch * 16 +
263 Ustrchr(hex_digits, tolower(*(++p))) - hex_digits;
264 }
265 break;
266 }
267*pp = p;
268return ch;
269}
270
271
272
273#ifndef COMPILE_UTILITY
274/*************************************************
275* Ensure string is printable *
276*************************************************/
277
278/* This function is called for critical strings. It checks for any
279non-printing characters, and if any are found, it makes a new copy
280of the string with suitable escape sequences. It is most often called by the
281macro string_printing(), which sets allow_tab TRUE.
282
283Arguments:
284 s the input string
285 allow_tab TRUE to allow tab as a printing character
286
287Returns: string with non-printers encoded as printing sequences
288*/
289
290const uschar *
291string_printing2(const uschar *s, BOOL allow_tab)
292{
293int nonprintcount = 0;
294int length = 0;
295const uschar *t = s;
296uschar *ss, *tt;
297
298while (*t != 0)
299 {
300 int c = *t++;
301 if (!mac_isprint(c) || (!allow_tab && c == '\t')) nonprintcount++;
302 length++;
303 }
304
305if (nonprintcount == 0) return s;
306
307/* Get a new block of store guaranteed big enough to hold the
308expanded string. */
309
310ss = store_get(length + nonprintcount * 3 + 1);
311
312/* Copy everything, escaping non printers. */
313
314t = s;
315tt = ss;
316
317while (*t != 0)
318 {
319 int c = *t;
320 if (mac_isprint(c) && (allow_tab || c != '\t')) *tt++ = *t++; else
321 {
322 *tt++ = '\\';
323 switch (*t)
324 {
325 case '\n': *tt++ = 'n'; break;
326 case '\r': *tt++ = 'r'; break;
327 case '\b': *tt++ = 'b'; break;
328 case '\v': *tt++ = 'v'; break;
329 case '\f': *tt++ = 'f'; break;
330 case '\t': *tt++ = 't'; break;
331 default: sprintf(CS tt, "%03o", *t); tt += 3; break;
332 }
333 t++;
334 }
335 }
336*tt = 0;
337return ss;
338}
339#endif /* COMPILE_UTILITY */
340
341/*************************************************
342* Undo printing escapes in string *
343*************************************************/
344
345/* This function is the reverse of string_printing2. It searches for
346backslash characters and if any are found, it makes a new copy of the
347string with escape sequences parsed. Otherwise it returns the original
348string.
349
350Arguments:
351 s the input string
352
353Returns: string with printing escapes parsed back
354*/
355
356uschar *
357string_unprinting(uschar *s)
358{
359uschar *p, *q, *r, *ss;
360int len, off;
361
362p = Ustrchr(s, '\\');
363if (!p) return s;
364
365len = Ustrlen(s) + 1;
366ss = store_get(len);
367
368q = ss;
369off = p - s;
370if (off)
371 {
372 memcpy(q, s, off);
373 q += off;
374 }
375
376while (*p)
377 {
378 if (*p == '\\')
379 {
380 *q++ = string_interpret_escape((const uschar **)&p);
381 p++;
382 }
383 else
384 {
385 r = Ustrchr(p, '\\');
386 if (!r)
387 {
388 off = Ustrlen(p);
389 memcpy(q, p, off);
390 p += off;
391 q += off;
392 break;
393 }
394 else
395 {
396 off = r - p;
397 memcpy(q, p, off);
398 q += off;
399 p = r;
400 }
401 }
402 }
403*q = '\0';
404
405return ss;
406}
407
408
409
410
411/*************************************************
412* Copy and save string *
413*************************************************/
414
415/* This function assumes that memcpy() is faster than strcpy().
416
417Argument: string to copy
418Returns: copy of string in new store
419*/
420
421uschar *
422string_copy(const uschar *s)
423{
424int len = Ustrlen(s) + 1;
425uschar *ss = store_get(len);
426memcpy(ss, s, len);
427return ss;
428}
429
430
431
432/*************************************************
433* Copy and save string in malloc'd store *
434*************************************************/
435
436/* This function assumes that memcpy() is faster than strcpy().
437
438Argument: string to copy
439Returns: copy of string in new store
440*/
441
442uschar *
443string_copy_malloc(const uschar *s)
444{
445int len = Ustrlen(s) + 1;
446uschar *ss = store_malloc(len);
447memcpy(ss, s, len);
448return ss;
449}
450
451
452
453/*************************************************
454* Copy, lowercase and save string *
455*************************************************/
456
457/*
458Argument: string to copy
459Returns: copy of string in new store, with letters lowercased
460*/
461
462uschar *
463string_copylc(const uschar *s)
464{
465uschar *ss = store_get(Ustrlen(s) + 1);
466uschar *p = ss;
467while (*s != 0) *p++ = tolower(*s++);
468*p = 0;
469return ss;
470}
471
472
473
474/*************************************************
475* Copy and save string, given length *
476*************************************************/
477
478/* It is assumed the data contains no zeros. A zero is added
479onto the end.
480
481Arguments:
482 s string to copy
483 n number of characters
484
485Returns: copy of string in new store
486*/
487
488uschar *
489string_copyn(const uschar *s, int n)
490{
491uschar *ss = store_get(n + 1);
492Ustrncpy(ss, s, n);
493ss[n] = 0;
494return ss;
495}
496
497
498/*************************************************
499* Copy, lowercase, and save string, given length *
500*************************************************/
501
502/* It is assumed the data contains no zeros. A zero is added
503onto the end.
504
505Arguments:
506 s string to copy
507 n number of characters
508
509Returns: copy of string in new store, with letters lowercased
510*/
511
512uschar *
513string_copynlc(uschar *s, int n)
514{
515uschar *ss = store_get(n + 1);
516uschar *p = ss;
517while (n-- > 0) *p++ = tolower(*s++);
518*p = 0;
519return ss;
520}
521
522
523
524/*************************************************
525* Copy string if long, inserting newlines *
526*************************************************/
527
528/* If the given string is longer than 75 characters, it is copied, and within
529the copy, certain space characters are converted into newlines.
530
531Argument: pointer to the string
532Returns: pointer to the possibly altered string
533*/
534
535uschar *
536string_split_message(uschar *msg)
537{
538uschar *s, *ss;
539
540if (msg == NULL || Ustrlen(msg) <= 75) return msg;
541s = ss = msg = string_copy(msg);
542
543for (;;)
544 {
545 int i = 0;
546 while (i < 75 && *ss != 0 && *ss != '\n') ss++, i++;
547 if (*ss == 0) break;
548 if (*ss == '\n')
549 s = ++ss;
550 else
551 {
552 uschar *t = ss + 1;
553 uschar *tt = NULL;
554 while (--t > s + 35)
555 {
556 if (*t == ' ')
557 {
558 if (t[-1] == ':') { tt = t; break; }
559 if (tt == NULL) tt = t;
560 }
561 }
562
563 if (tt == NULL) /* Can't split behind - try ahead */
564 {
565 t = ss + 1;
566 while (*t != 0)
567 {
568 if (*t == ' ' || *t == '\n')
569 { tt = t; break; }
570 t++;
571 }
572 }
573
574 if (tt == NULL) break; /* Can't find anywhere to split */
575 *tt = '\n';
576 s = ss = tt+1;
577 }
578 }
579
580return msg;
581}
582
583
584
585/*************************************************
586* Copy returned DNS domain name, de-escaping *
587*************************************************/
588
589/* If a domain name contains top-bit characters, some resolvers return
590the fully qualified name with those characters turned into escapes. The
591convention is a backslash followed by _decimal_ digits. We convert these
592back into the original binary values. This will be relevant when
593allow_utf8_domains is set true and UTF-8 characters are used in domain
594names. Backslash can also be used to escape other characters, though we
595shouldn't come across them in domain names.
596
597Argument: the domain name string
598Returns: copy of string in new store, de-escaped
599*/
600
601uschar *
602string_copy_dnsdomain(uschar *s)
603{
604uschar *yield;
605uschar *ss = yield = store_get(Ustrlen(s) + 1);
606
607while (*s != 0)
608 {
609 if (*s != '\\')
610 {
611 *ss++ = *s++;
612 }
613 else if (isdigit(s[1]))
614 {
615 *ss++ = (s[1] - '0')*100 + (s[2] - '0')*10 + s[3] - '0';
616 s += 4;
617 }
618 else if (*(++s) != 0)
619 {
620 *ss++ = *s++;
621 }
622 }
623
624*ss = 0;
625return yield;
626}
627
628
629#ifndef COMPILE_UTILITY
630/*************************************************
631* Copy space-terminated or quoted string *
632*************************************************/
633
634/* This function copies from a string until its end, or until whitespace is
635encountered, unless the string begins with a double quote, in which case the
636terminating quote is sought, and escaping within the string is done. The length
637of a de-quoted string can be no longer than the original, since escaping always
638turns n characters into 1 character.
639
640Argument: pointer to the pointer to the first character, which gets updated
641Returns: the new string
642*/
643
644uschar *
645string_dequote(const uschar **sptr)
646{
647const uschar *s = *sptr;
648uschar *t, *yield;
649
650/* First find the end of the string */
651
652if (*s != '\"')
653 {
654 while (*s != 0 && !isspace(*s)) s++;
655 }
656else
657 {
658 s++;
659 while (*s != 0 && *s != '\"')
660 {
661 if (*s == '\\') (void)string_interpret_escape(&s);
662 s++;
663 }
664 if (*s != 0) s++;
665 }
666
667/* Get enough store to copy into */
668
669t = yield = store_get(s - *sptr + 1);
670s = *sptr;
671
672/* Do the copy */
673
674if (*s != '\"')
675 {
676 while (*s != 0 && !isspace(*s)) *t++ = *s++;
677 }
678else
679 {
680 s++;
681 while (*s != 0 && *s != '\"')
682 {
683 if (*s == '\\') *t++ = string_interpret_escape(&s);
684 else *t++ = *s;
685 s++;
686 }
687 if (*s != 0) s++;
688 }
689
690/* Update the pointer and return the terminated copy */
691
692*sptr = s;
693*t = 0;
694return yield;
695}
696#endif /* COMPILE_UTILITY */
697
698
699
700/*************************************************
701* Format a string and save it *
702*************************************************/
703
704/* The formatting is done by string_format, which checks the length of
705everything.
706
707Arguments:
708 format a printf() format - deliberately char * rather than uschar *
709 because it will most usually be a literal string
710 ... arguments for format
711
712Returns: pointer to fresh piece of store containing sprintf'ed string
713*/
714
715uschar *
716string_sprintf(const char *format, ...)
717{
718va_list ap;
719uschar buffer[STRING_SPRINTF_BUFFER_SIZE];
720va_start(ap, format);
721if (!string_vformat(buffer, sizeof(buffer), format, ap))
722 log_write(0, LOG_MAIN|LOG_PANIC_DIE,
723 "string_sprintf expansion was longer than " SIZE_T_FMT
724 "; format string was (%s)\nexpansion started '%.32s'",
725 sizeof(buffer), format, buffer);
726va_end(ap);
727return string_copy(buffer);
728}
729
730
731
732/*************************************************
733* Case-independent strncmp() function *
734*************************************************/
735
736/*
737Arguments:
738 s first string
739 t second string
740 n number of characters to compare
741
742Returns: < 0, = 0, or > 0, according to the comparison
743*/
744
745int
746strncmpic(const uschar *s, const uschar *t, int n)
747{
748while (n--)
749 {
750 int c = tolower(*s++) - tolower(*t++);
751 if (c) return c;
752 }
753return 0;
754}
755
756
757/*************************************************
758* Case-independent strcmp() function *
759*************************************************/
760
761/*
762Arguments:
763 s first string
764 t second string
765
766Returns: < 0, = 0, or > 0, according to the comparison
767*/
768
769int
770strcmpic(const uschar *s, const uschar *t)
771{
772while (*s != 0)
773 {
774 int c = tolower(*s++) - tolower(*t++);
775 if (c != 0) return c;
776 }
777return *t;
778}
779
780
781/*************************************************
782* Case-independent strstr() function *
783*************************************************/
784
785/* The third argument specifies whether whitespace is required
786to follow the matched string.
787
788Arguments:
789 s string to search
790 t substring to search for
791 space_follows if TRUE, match only if whitespace follows
792
793Returns: pointer to substring in string, or NULL if not found
794*/
795
796uschar *
797strstric(uschar *s, uschar *t, BOOL space_follows)
798{
799uschar *p = t;
800uschar *yield = NULL;
801int cl = tolower(*p);
802int cu = toupper(*p);
803
804while (*s)
805 {
806 if (*s == cl || *s == cu)
807 {
808 if (yield == NULL) yield = s;
809 if (*(++p) == 0)
810 {
811 if (!space_follows || s[1] == ' ' || s[1] == '\n' ) return yield;
812 yield = NULL;
813 p = t;
814 }
815 cl = tolower(*p);
816 cu = toupper(*p);
817 s++;
818 }
819 else if (yield != NULL)
820 {
821 yield = NULL;
822 p = t;
823 cl = tolower(*p);
824 cu = toupper(*p);
825 }
826 else s++;
827 }
828return NULL;
829}
830
831
832
833#ifndef COMPILE_UTILITY
834/*************************************************
835* Get next string from separated list *
836*************************************************/
837
838/* Leading and trailing space is removed from each item. The separator in the
839list is controlled by the int pointed to by the separator argument as follows:
840
841 If the value is > 0 it is used as the separator. This is typically used for
842 sublists such as slash-separated options. The value is always a printing
843 character.
844
845 (If the value is actually > UCHAR_MAX there is only one item in the list.
846 This is used for some cases when called via functions that sometimes
847 plough through lists, and sometimes are given single items.)
848
849 If the value is <= 0, the string is inspected for a leading <x, where x is an
850 ispunct() or an iscntrl() character. If found, x is used as the separator. If
851 not found:
852
853 (a) if separator == 0, ':' is used
854 (b) if separator <0, -separator is used
855
856 In all cases the value of the separator that is used is written back to the
857 int so that it is used on subsequent calls as we progress through the list.
858
859A literal ispunct() separator can be represented in an item by doubling, but
860there is no way to include an iscntrl() separator as part of the data.
861
862Arguments:
863 listptr points to a pointer to the current start of the list; the
864 pointer gets updated to point after the end of the next item
865 separator a pointer to the separator character in an int (see above)
866 buffer where to put a copy of the next string in the list; or
867 NULL if the next string is returned in new memory
868 buflen when buffer is not NULL, the size of buffer; otherwise ignored
869
870Returns: pointer to buffer, containing the next substring,
871 or NULL if no more substrings
872*/
873
874uschar *
875string_nextinlist(const uschar **listptr, int *separator, uschar *buffer, int buflen)
876{
877int sep = *separator;
878const uschar *s = *listptr;
879BOOL sep_is_special;
880
881if (s == NULL) return NULL;
882
883/* This allows for a fixed specified separator to be an iscntrl() character,
884but at the time of implementation, this is never the case. However, it's best
885to be conservative. */
886
887while (isspace(*s) && *s != sep) s++;
888
889/* A change of separator is permitted, so look for a leading '<' followed by an
890allowed character. */
891
892if (sep <= 0)
893 {
894 if (*s == '<' && (ispunct(s[1]) || iscntrl(s[1])))
895 {
896 sep = s[1];
897 s += 2;
898 while (isspace(*s) && *s != sep) s++;
899 }
900 else
901 {
902 sep = (sep == 0)? ':' : -sep;
903 }
904 *separator = sep;
905 }
906
907/* An empty string has no list elements */
908
909if (*s == 0) return NULL;
910
911/* Note whether whether or not the separator is an iscntrl() character. */
912
913sep_is_special = iscntrl(sep);
914
915/* Handle the case when a buffer is provided. */
916
917if (buffer)
918 {
919 int p = 0;
920 for (; *s != 0; s++)
921 {
922 if (*s == sep && (*(++s) != sep || sep_is_special)) break;
923 if (p < buflen - 1) buffer[p++] = *s;
924 }
925 while (p > 0 && isspace(buffer[p-1])) p--;
926 buffer[p] = 0;
927 }
928
929/* Handle the case when a buffer is not provided. */
930
931else
932 {
933 const uschar *ss;
934 gstring * g = NULL;
935
936 /* We know that *s != 0 at this point. However, it might be pointing to a
937 separator, which could indicate an empty string, or (if an ispunct()
938 character) could be doubled to indicate a separator character as data at the
939 start of a string. Avoid getting working memory for an empty item. */
940
941 if (*s == sep)
942 {
943 s++;
944 if (*s != sep || sep_is_special)
945 {
946 *listptr = s;
947 return string_copy(US"");
948 }
949 }
950
951 /* Not an empty string; the first character is guaranteed to be a data
952 character. */
953
954 for (;;)
955 {
956 for (ss = s + 1; *ss != 0 && *ss != sep; ss++) ;
957 g = string_catn(g, s, ss-s);
958 s = ss;
959 if (*s == 0 || *(++s) != sep || sep_is_special) break;
960 }
961 while (g->ptr > 0 && isspace(g->s[g->ptr-1])) g->ptr--;
962 buffer = string_from_gstring(g);
963 gstring_reset_unused(g);
964 }
965
966/* Update the current pointer and return the new string */
967
968*listptr = s;
969return buffer;
970}
971
972
973static const uschar *
974Ustrnchr(const uschar * s, int c, unsigned * len)
975{
976unsigned siz = *len;
977while (siz)
978 {
979 if (!*s) return NULL;
980 if (*s == c)
981 {
982 *len = siz;
983 return s;
984 }
985 s++;
986 siz--;
987 }
988return NULL;
989}
990
991
992/************************************************
993* Add element to separated list *
994************************************************/
995/* This function is used to build a list, returning an allocated null-terminated
996growable string. The given element has any embedded separator characters
997doubled.
998
999Despite having the same growable-string interface as string_cat() the list is
1000always returned null-terminated.
1001
1002Arguments:
1003 list expanding-string for the list that is being built, or NULL
1004 if this is a new list that has no contents yet
1005 sep list separator character
1006 ele new element to be appended to the list
1007
1008Returns: pointer to the start of the list, changed if copied for expansion.
1009*/
1010
1011gstring *
1012string_append_listele(gstring * list, uschar sep, const uschar * ele)
1013{
1014uschar * sp;
1015
1016if (list && list->ptr)
1017 list = string_catn(list, &sep, 1);
1018
1019while((sp = Ustrchr(ele, sep)))
1020 {
1021 list = string_catn(list, ele, sp-ele+1);
1022 list = string_catn(list, &sep, 1);
1023 ele = sp+1;
1024 }
1025list = string_cat(list, ele);
1026(void) string_from_gstring(list);
1027return list;
1028}
1029
1030
1031gstring *
1032string_append_listele_n(gstring * list, uschar sep, const uschar * ele,
1033 unsigned len)
1034{
1035const uschar * sp;
1036
1037if (list && list->ptr)
1038 list = string_catn(list, &sep, 1);
1039
1040while((sp = Ustrnchr(ele, sep, &len)))
1041 {
1042 list = string_catn(list, ele, sp-ele+1);
1043 list = string_catn(list, &sep, 1);
1044 ele = sp+1;
1045 len--;
1046 }
1047list = string_catn(list, ele, len);
1048(void) string_from_gstring(list);
1049return list;
1050}
1051
1052
1053
1054/* A slightly-bogus listmaker utility; the separator is a string so
1055can be multiple chars - there is no checking for the element content
1056containing any of the separator. */
1057
1058gstring *
1059string_append2_listele_n(gstring * list, const uschar * sepstr,
1060 const uschar * ele, unsigned len)
1061{
1062if (list && list->ptr)
1063 list = string_cat(list, sepstr);
1064
1065list = string_catn(list, ele, len);
1066(void) string_from_gstring(list);
1067return list;
1068}
1069
1070
1071
1072/************************************************/
1073/* Create a growable-string with some preassigned space */
1074
1075gstring *
1076string_get(unsigned size)
1077{
1078gstring * g = store_get(sizeof(gstring) + size);
1079g->size = size;
1080g->ptr = 0;
1081g->s = US(g + 1);
1082return g;
1083}
1084
1085/* NUL-terminate the C string in the growable-string, and return it. */
1086
1087uschar *
1088string_from_gstring(gstring * g)
1089{
1090if (!g) return NULL;
1091g->s[g->ptr] = '\0';
1092return g->s;
1093}
1094
1095void
1096gstring_reset_unused(gstring * g)
1097{
1098store_reset(g->s + (g->size = g->ptr + 1));
1099}
1100
1101/*************************************************
1102* Add chars to string *
1103*************************************************/
1104
1105/* Arguments:
1106 g the grawable-string
1107 p current end of data
1108 count amount to grow by
1109*/
1110
1111static void
1112gstring_grow(gstring * g, int p, int count)
1113{
1114int oldsize = g->size;
1115
1116/* Mostly, string_cat() is used to build small strings of a few hundred
1117characters at most. There are times, however, when the strings are very much
1118longer (for example, a lookup that returns a vast number of alias addresses).
1119To try to keep things reasonable, we use increments whose size depends on the
1120existing length of the string. */
1121
1122unsigned inc = oldsize < 4096 ? 127 : 1023;
1123g->size = ((p + count + inc) & ~inc) + 1;
1124
1125/* Try to extend an existing allocation. If the result of calling
1126store_extend() is false, either there isn't room in the current memory block,
1127or this string is not the top item on the dynamic store stack. We then have
1128to get a new chunk of store and copy the old string. When building large
1129strings, it is helpful to call store_release() on the old string, to release
1130memory blocks that have become empty. (The block will be freed if the string
1131is at its start.) However, we can do this only if we know that the old string
1132was the last item on the dynamic memory stack. This is the case if it matches
1133store_last_get. */
1134
1135if (!store_extend(g->s, oldsize, g->size))
1136 g->s = store_newblock(g->s, g->size, p);
1137}
1138
1139
1140
1141/* This function is used when building up strings of unknown length. Room is
1142always left for a terminating zero to be added to the string that is being
1143built. This function does not require the string that is being added to be NUL
1144terminated, because the number of characters to add is given explicitly. It is
1145sometimes called to extract parts of other strings.
1146
1147Arguments:
1148 string points to the start of the string that is being built, or NULL
1149 if this is a new string that has no contents yet
1150 s points to characters to add
1151 count count of characters to add; must not exceed the length of s, if s
1152 is a C string.
1153
1154Returns: pointer to the start of the string, changed if copied for expansion.
1155 Note that a NUL is not added, though space is left for one. This is
1156 because string_cat() is often called multiple times to build up a
1157 string - there's no point adding the NUL till the end.
1158
1159*/
1160/* coverity[+alloc] */
1161
1162gstring *
1163string_catn(gstring * g, const uschar *s, int count)
1164{
1165int p;
1166
1167if (!g)
1168 {
1169 unsigned inc = count < 4096 ? 127 : 1023;
1170 unsigned size = ((count + inc) & ~inc) + 1;
1171 g = string_get(size);
1172 }
1173
1174p = g->ptr;
1175if (p + count >= g->size)
1176 gstring_grow(g, p, count);
1177
1178/* Because we always specify the exact number of characters to copy, we can
1179use memcpy(), which is likely to be more efficient than strncopy() because the
1180latter has to check for zero bytes. */
1181
1182memcpy(g->s + p, s, count);
1183g->ptr = p + count;
1184return g;
1185}
1186
1187
1188gstring *
1189string_cat(gstring *string, const uschar *s)
1190{
1191return string_catn(string, s, Ustrlen(s));
1192}
1193
1194
1195
1196/*************************************************
1197* Append strings to another string *
1198*************************************************/
1199
1200/* This function can be used to build a string from many other strings.
1201It calls string_cat() to do the dirty work.
1202
1203Arguments:
1204 string expanding-string that is being built, or NULL
1205 if this is a new string that has no contents yet
1206 count the number of strings to append
1207 ... "count" uschar* arguments, which must be valid zero-terminated
1208 C strings
1209
1210Returns: pointer to the start of the string, changed if copied for expansion.
1211 The string is not zero-terminated - see string_cat() above.
1212*/
1213
1214__inline__ gstring *
1215string_append(gstring *string, int count, ...)
1216{
1217va_list ap;
1218
1219va_start(ap, count);
1220while (count-- > 0)
1221 {
1222 uschar *t = va_arg(ap, uschar *);
1223 string = string_cat(string, t);
1224 }
1225va_end(ap);
1226
1227return string;
1228}
1229#endif
1230
1231
1232
1233/*************************************************
1234* Format a string with length checks *
1235*************************************************/
1236
1237/* This function is used to format a string with checking of the length of the
1238output for all conversions. It protects Exim from absent-mindedness when
1239calling functions like debug_printf and string_sprintf, and elsewhere. There
1240are two different entry points to what is actually the same function, depending
1241on whether the variable length list of data arguments are given explicitly or
1242as a va_list item.
1243
1244The formats are the usual printf() ones, with some omissions (never used) and
1245three additions for strings: %S forces lower case, %T forces upper case, and
1246%#s or %#S prints nothing for a NULL string. Without the # "NULL" is printed
1247(useful in debugging). There is also the addition of %D and %M, which insert
1248the date in the form used for datestamped log files.
1249
1250Arguments:
1251 buffer a buffer in which to put the formatted string
1252 buflen the length of the buffer
1253 format the format string - deliberately char * and not uschar *
1254 ... or ap variable list of supplementary arguments
1255
1256Returns: TRUE if the result fitted in the buffer
1257*/
1258
1259BOOL
1260string_format(uschar *buffer, int buflen, const char *format, ...)
1261{
1262BOOL yield;
1263va_list ap;
1264va_start(ap, format);
1265yield = string_vformat(buffer, buflen, format, ap);
1266va_end(ap);
1267return yield;
1268}
1269
1270
1271BOOL
1272string_vformat(uschar *buffer, int buflen, const char *format, va_list ap)
1273{
1274/* We assume numbered ascending order, C does not guarantee that */
1275enum { L_NORMAL=1, L_SHORT=2, L_LONG=3, L_LONGLONG=4, L_LONGDOUBLE=5, L_SIZE=6 };
1276
1277BOOL yield = TRUE;
1278int width, precision;
1279const char *fp = format; /* Deliberately not unsigned */
1280uschar *p = buffer;
1281uschar *last = buffer + buflen - 1;
1282
1283string_datestamp_offset = -1; /* Datestamp not inserted */
1284string_datestamp_length = 0; /* Datestamp not inserted */
1285string_datestamp_type = 0; /* Datestamp not inserted */
1286
1287/* Scan the format and handle the insertions */
1288
1289while (*fp != 0)
1290 {
1291 int length = L_NORMAL;
1292 int *nptr;
1293 int slen;
1294 const char *null = "NULL"; /* ) These variables */
1295 const char *item_start, *s; /* ) are deliberately */
1296 char newformat[16]; /* ) not unsigned */
1297
1298 /* Non-% characters just get copied verbatim */
1299
1300 if (*fp != '%')
1301 {
1302 if (p >= last) { yield = FALSE; break; }
1303 *p++ = (uschar)*fp++;
1304 continue;
1305 }
1306
1307 /* Deal with % characters. Pick off the width and precision, for checking
1308 strings, skipping over the flag and modifier characters. */
1309
1310 item_start = fp;
1311 width = precision = -1;
1312
1313 if (strchr("-+ #0", *(++fp)) != NULL)
1314 {
1315 if (*fp == '#') null = "";
1316 fp++;
1317 }
1318
1319 if (isdigit((uschar)*fp))
1320 {
1321 width = *fp++ - '0';
1322 while (isdigit((uschar)*fp)) width = width * 10 + *fp++ - '0';
1323 }
1324 else if (*fp == '*')
1325 {
1326 width = va_arg(ap, int);
1327 fp++;
1328 }
1329
1330 if (*fp == '.')
1331 {
1332 if (*(++fp) == '*')
1333 {
1334 precision = va_arg(ap, int);
1335 fp++;
1336 }
1337 else
1338 {
1339 precision = 0;
1340 while (isdigit((uschar)*fp))
1341 precision = precision*10 + *fp++ - '0';
1342 }
1343 }
1344
1345 /* Skip over 'h', 'L', 'l', 'll' and 'z', remembering the item length */
1346
1347 if (*fp == 'h')
1348 { fp++; length = L_SHORT; }
1349 else if (*fp == 'L')
1350 { fp++; length = L_LONGDOUBLE; }
1351 else if (*fp == 'l')
1352 {
1353 if (fp[1] == 'l')
1354 {
1355 fp += 2;
1356 length = L_LONGLONG;
1357 }
1358 else
1359 {
1360 fp++;
1361 length = L_LONG;
1362 }
1363 }
1364 else if (*fp == 'z')
1365 { fp++; length = L_SIZE; }
1366
1367 /* Handle each specific format type. */
1368
1369 switch (*fp++)
1370 {
1371 case 'n':
1372 nptr = va_arg(ap, int *);
1373 *nptr = p - buffer;
1374 break;
1375
1376 case 'd':
1377 case 'o':
1378 case 'u':
1379 case 'x':
1380 case 'X':
1381 if (p >= last - ((length > L_LONG)? 24 : 12))
1382 { yield = FALSE; goto END_FORMAT; }
1383 strncpy(newformat, item_start, fp - item_start);
1384 newformat[fp - item_start] = 0;
1385
1386 /* Short int is promoted to int when passing through ..., so we must use
1387 int for va_arg(). */
1388
1389 switch(length)
1390 {
1391 case L_SHORT:
1392 case L_NORMAL: p += sprintf(CS p, newformat, va_arg(ap, int)); break;
1393 case L_LONG: p += sprintf(CS p, newformat, va_arg(ap, long int)); break;
1394 case L_LONGLONG: p += sprintf(CS p, newformat, va_arg(ap, LONGLONG_T)); break;
1395 case L_SIZE: p += sprintf(CS p, newformat, va_arg(ap, size_t)); break;
1396 }
1397 break;
1398
1399 case 'p':
1400 {
1401 void * ptr;
1402 if (p >= last - 24) { yield = FALSE; goto END_FORMAT; }
1403 /* sprintf() saying "(nil)" for a null pointer seems unreliable.
1404 Handle it explicitly. */
1405 if ((ptr = va_arg(ap, void *)))
1406 {
1407 strncpy(newformat, item_start, fp - item_start);
1408 newformat[fp - item_start] = 0;
1409 p += sprintf(CS p, newformat, ptr);
1410 }
1411 else
1412 p += sprintf(CS p, "(nil)");
1413 }
1414 break;
1415
1416 /* %f format is inherently insecure if the numbers that it may be
1417 handed are unknown (e.g. 1e300). However, in Exim, %f is used for
1418 printing load averages, and these are actually stored as integers
1419 (load average * 1000) so the size of the numbers is constrained.
1420 It is also used for formatting sending rates, where the simplicity
1421 of the format prevents overflow. */
1422
1423 case 'f':
1424 case 'e':
1425 case 'E':
1426 case 'g':
1427 case 'G':
1428 if (precision < 0) precision = 6;
1429 if (p >= last - precision - 8) { yield = FALSE; goto END_FORMAT; }
1430 strncpy(newformat, item_start, fp - item_start);
1431 newformat[fp-item_start] = 0;
1432 if (length == L_LONGDOUBLE)
1433 p += sprintf(CS p, newformat, va_arg(ap, long double));
1434 else
1435 p += sprintf(CS p, newformat, va_arg(ap, double));
1436 break;
1437
1438 /* String types */
1439
1440 case '%':
1441 if (p >= last) { yield = FALSE; goto END_FORMAT; }
1442 *p++ = '%';
1443 break;
1444
1445 case 'c':
1446 if (p >= last) { yield = FALSE; goto END_FORMAT; }
1447 *p++ = va_arg(ap, int);
1448 break;
1449
1450 case 'D': /* Insert daily datestamp for log file names */
1451 s = CS tod_stamp(tod_log_datestamp_daily);
1452 string_datestamp_offset = p - buffer; /* Passed back via global */
1453 string_datestamp_length = Ustrlen(s); /* Passed back via global */
1454 string_datestamp_type = tod_log_datestamp_daily;
1455 slen = string_datestamp_length;
1456 goto INSERT_STRING;
1457
1458 case 'M': /* Insert monthly datestamp for log file names */
1459 s = CS tod_stamp(tod_log_datestamp_monthly);
1460 string_datestamp_offset = p - buffer; /* Passed back via global */
1461 string_datestamp_length = Ustrlen(s); /* Passed back via global */
1462 string_datestamp_type = tod_log_datestamp_monthly;
1463 slen = string_datestamp_length;
1464 goto INSERT_STRING;
1465
1466 case 's':
1467 case 'S': /* Forces *lower* case */
1468 case 'T': /* Forces *upper* case */
1469 s = va_arg(ap, char *);
1470
1471 if (s == NULL) s = null;
1472 slen = Ustrlen(s);
1473
1474 INSERT_STRING: /* Come to from %D or %M above */
1475
1476 /* If the width is specified, check that there is a precision
1477 set; if not, set it to the width to prevent overruns of long
1478 strings. */
1479
1480 if (width >= 0)
1481 {
1482 if (precision < 0) precision = width;
1483 }
1484
1485 /* If a width is not specified and the precision is specified, set
1486 the width to the precision, or the string length if shorted. */
1487
1488 else if (precision >= 0)
1489 {
1490 width = (precision < slen)? precision : slen;
1491 }
1492
1493 /* If neither are specified, set them both to the string length. */
1494
1495 else width = precision = slen;
1496
1497 /* Check string space, and add the string to the buffer if ok. If
1498 not OK, add part of the string (debugging uses this to show as
1499 much as possible). */
1500
1501 if (p == last)
1502 {
1503 yield = FALSE;
1504 goto END_FORMAT;
1505 }
1506 if (p >= last - width)
1507 {
1508 yield = FALSE;
1509 width = precision = last - p - 1;
1510 if (width < 0) width = 0;
1511 if (precision < 0) precision = 0;
1512 }
1513 sprintf(CS p, "%*.*s", width, precision, s);
1514 if (fp[-1] == 'S')
1515 while (*p) { *p = tolower(*p); p++; }
1516 else if (fp[-1] == 'T')
1517 while (*p) { *p = toupper(*p); p++; }
1518 else
1519 while (*p) p++;
1520 if (!yield) goto END_FORMAT;
1521 break;
1522
1523 /* Some things are never used in Exim; also catches junk. */
1524
1525 default:
1526 strncpy(newformat, item_start, fp - item_start);
1527 newformat[fp-item_start] = 0;
1528 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "string_format: unsupported type "
1529 "in \"%s\" in \"%s\"", newformat, format);
1530 break;
1531 }
1532 }
1533
1534/* Ensure string is complete; return TRUE if got to the end of the format */
1535
1536END_FORMAT:
1537
1538*p = 0;
1539return yield;
1540}
1541
1542
1543
1544#ifndef COMPILE_UTILITY
1545/*************************************************
1546* Generate an "open failed" message *
1547*************************************************/
1548
1549/* This function creates a message after failure to open a file. It includes a
1550string supplied as data, adds the strerror() text, and if the failure was
1551"Permission denied", reads and includes the euid and egid.
1552
1553Arguments:
1554 eno the value of errno after the failure
1555 format a text format string - deliberately not uschar *
1556 ... arguments for the format string
1557
1558Returns: a message, in dynamic store
1559*/
1560
1561uschar *
1562string_open_failed(int eno, const char *format, ...)
1563{
1564va_list ap;
1565uschar buffer[1024];
1566
1567Ustrcpy(buffer, "failed to open ");
1568va_start(ap, format);
1569
1570/* Use the checked formatting routine to ensure that the buffer
1571does not overflow. It should not, since this is called only for internally
1572specified messages. If it does, the message just gets truncated, and there
1573doesn't seem much we can do about that. */
1574
1575(void)string_vformat(buffer+15, sizeof(buffer) - 15, format, ap);
1576va_end(ap);
1577
1578return (eno == EACCES)?
1579 string_sprintf("%s: %s (euid=%ld egid=%ld)", buffer, strerror(eno),
1580 (long int)geteuid(), (long int)getegid()) :
1581 string_sprintf("%s: %s", buffer, strerror(eno));
1582}
1583#endif /* COMPILE_UTILITY */
1584
1585
1586
1587
1588
1589#ifndef COMPILE_UTILITY
1590/* qsort(3), currently used to sort the environment variables
1591for -bP environment output, needs a function to compare two pointers to string
1592pointers. Here it is. */
1593
1594int
1595string_compare_by_pointer(const void *a, const void *b)
1596{
1597return Ustrcmp(* CUSS a, * CUSS b);
1598}
1599#endif /* COMPILE_UTILITY */
1600
1601
1602
1603/*************************************************
1604**************************************************
1605* Stand-alone test program *
1606**************************************************
1607*************************************************/
1608
1609#ifdef STAND_ALONE
1610int main(void)
1611{
1612uschar buffer[256];
1613
1614printf("Testing is_ip_address\n");
1615
1616while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1617 {
1618 int offset;
1619 buffer[Ustrlen(buffer) - 1] = 0;
1620 printf("%d\n", string_is_ip_address(buffer, NULL));
1621 printf("%d %d %s\n", string_is_ip_address(buffer, &offset), offset, buffer);
1622 }
1623
1624printf("Testing string_nextinlist\n");
1625
1626while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1627 {
1628 uschar *list = buffer;
1629 uschar *lp1, *lp2;
1630 uschar item[256];
1631 int sep1 = 0;
1632 int sep2 = 0;
1633
1634 if (*list == '<')
1635 {
1636 sep1 = sep2 = list[1];
1637 list += 2;
1638 }
1639
1640 lp1 = lp2 = list;
1641 for (;;)
1642 {
1643 uschar *item1 = string_nextinlist(&lp1, &sep1, item, sizeof(item));
1644 uschar *item2 = string_nextinlist(&lp2, &sep2, NULL, 0);
1645
1646 if (item1 == NULL && item2 == NULL) break;
1647 if (item == NULL || item2 == NULL || Ustrcmp(item1, item2) != 0)
1648 {
1649 printf("***ERROR\nitem1=\"%s\"\nitem2=\"%s\"\n",
1650 (item1 == NULL)? "NULL" : CS item1,
1651 (item2 == NULL)? "NULL" : CS item2);
1652 break;
1653 }
1654 else printf(" \"%s\"\n", CS item1);
1655 }
1656 }
1657
1658/* This is a horrible lash-up, but it serves its purpose. */
1659
1660printf("Testing string_format\n");
1661
1662while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1663 {
1664 void *args[3];
1665 long long llargs[3];
1666 double dargs[3];
1667 int dflag = 0;
1668 int llflag = 0;
1669 int n = 0;
1670 int count;
1671 int countset = 0;
1672 uschar format[256];
1673 uschar outbuf[256];
1674 uschar *s;
1675 buffer[Ustrlen(buffer) - 1] = 0;
1676
1677 s = Ustrchr(buffer, ',');
1678 if (s == NULL) s = buffer + Ustrlen(buffer);
1679
1680 Ustrncpy(format, buffer, s - buffer);
1681 format[s-buffer] = 0;
1682
1683 if (*s == ',') s++;
1684
1685 while (*s != 0)
1686 {
1687 uschar *ss = s;
1688 s = Ustrchr(ss, ',');
1689 if (s == NULL) s = ss + Ustrlen(ss);
1690
1691 if (isdigit(*ss))
1692 {
1693 Ustrncpy(outbuf, ss, s-ss);
1694 if (Ustrchr(outbuf, '.') != NULL)
1695 {
1696 dflag = 1;
1697 dargs[n++] = Ustrtod(outbuf, NULL);
1698 }
1699 else if (Ustrstr(outbuf, "ll") != NULL)
1700 {
1701 llflag = 1;
1702 llargs[n++] = strtoull(CS outbuf, NULL, 10);
1703 }
1704 else
1705 {
1706 args[n++] = (void *)Uatoi(outbuf);
1707 }
1708 }
1709
1710 else if (Ustrcmp(ss, "*") == 0)
1711 {
1712 args[n++] = (void *)(&count);
1713 countset = 1;
1714 }
1715
1716 else
1717 {
1718 uschar *sss = malloc(s - ss + 1);
1719 Ustrncpy(sss, ss, s-ss);
1720 args[n++] = sss;
1721 }
1722
1723 if (*s == ',') s++;
1724 }
1725
1726 if (!dflag && !llflag)
1727 printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1728 args[0], args[1], args[2])? "True" : "False");
1729
1730 else if (dflag)
1731 printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1732 dargs[0], dargs[1], dargs[2])? "True" : "False");
1733
1734 else printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1735 llargs[0], llargs[1], llargs[2])? "True" : "False");
1736
1737 printf("%s\n", CS outbuf);
1738 if (countset) printf("count=%d\n", count);
1739 }
1740
1741return 0;
1742}
1743#endif
1744
1745/* End of string.c */