Fix mariadb/mysql macro confusion
[exim.git] / src / src / string.c
... / ...
CommitLineData
1/*************************************************
2* Exim - an Internet mail transport agent *
3*************************************************/
4
5/* Copyright (c) University of Cambridge 1995 - 2016 */
6/* See the file NOTICE for conditions of use and distribution. */
7
8/* Miscellaneous string-handling functions. Some are not required for
9utilities and tests, and are cut out by the COMPILE_UTILITY macro. */
10
11
12#include "exim.h"
13
14
15#ifndef COMPILE_UTILITY
16/*************************************************
17* Test for IP address *
18*************************************************/
19
20/* This used just to be a regular expression, but with IPv6 things are a bit
21more complicated. If the address contains a colon, it is assumed to be a v6
22address (assuming HAVE_IPV6 is set). If a mask is permitted and one is present,
23and maskptr is not NULL, its offset is placed there.
24
25Arguments:
26 s a string
27 maskptr NULL if no mask is permitted to follow
28 otherwise, points to an int where the offset of '/' is placed
29 if there is no / followed by trailing digits, *maskptr is set 0
30
31Returns: 0 if the string is not a textual representation of an IP address
32 4 if it is an IPv4 address
33 6 if it is an IPv6 address
34*/
35
36int
37string_is_ip_address(const uschar *s, int *maskptr)
38{
39int i;
40int yield = 4;
41
42/* If an optional mask is permitted, check for it. If found, pass back the
43offset. */
44
45if (maskptr)
46 {
47 const uschar *ss = s + Ustrlen(s);
48 *maskptr = 0;
49 if (s != ss && isdigit(*(--ss)))
50 {
51 while (ss > s && isdigit(ss[-1])) ss--;
52 if (ss > s && *(--ss) == '/') *maskptr = ss - s;
53 }
54 }
55
56/* A colon anywhere in the string => IPv6 address */
57
58if (Ustrchr(s, ':') != NULL)
59 {
60 BOOL had_double_colon = FALSE;
61 BOOL v4end = FALSE;
62 int count = 0;
63
64 yield = 6;
65
66 /* An IPv6 address must start with hex digit or double colon. A single
67 colon is invalid. */
68
69 if (*s == ':' && *(++s) != ':') return 0;
70
71 /* Now read up to 8 components consisting of up to 4 hex digits each. There
72 may be one and only one appearance of double colon, which implies any number
73 of binary zero bits. The number of preceding components is held in count. */
74
75 for (count = 0; count < 8; count++)
76 {
77 /* If the end of the string is reached before reading 8 components, the
78 address is valid provided a double colon has been read. This also applies
79 if we hit the / that introduces a mask or the % that introduces the
80 interface specifier (scope id) of a link-local address. */
81
82 if (*s == 0 || *s == '%' || *s == '/') return had_double_colon ? yield : 0;
83
84 /* If a component starts with an additional colon, we have hit a double
85 colon. This is permitted to appear once only, and counts as at least
86 one component. The final component may be of this form. */
87
88 if (*s == ':')
89 {
90 if (had_double_colon) return 0;
91 had_double_colon = TRUE;
92 s++;
93 continue;
94 }
95
96 /* If the remainder of the string contains a dot but no colons, we
97 can expect a trailing IPv4 address. This is valid if either there has
98 been no double-colon and this is the 7th component (with the IPv4 address
99 being the 7th & 8th components), OR if there has been a double-colon
100 and fewer than 6 components. */
101
102 if (Ustrchr(s, ':') == NULL && Ustrchr(s, '.') != NULL)
103 {
104 if ((!had_double_colon && count != 6) ||
105 (had_double_colon && count > 6)) return 0;
106 v4end = TRUE;
107 yield = 6;
108 break;
109 }
110
111 /* Check for at least one and not more than 4 hex digits for this
112 component. */
113
114 if (!isxdigit(*s++)) return 0;
115 if (isxdigit(*s) && isxdigit(*(++s)) && isxdigit(*(++s))) s++;
116
117 /* If the component is terminated by colon and there is more to
118 follow, skip over the colon. If there is no more to follow the address is
119 invalid. */
120
121 if (*s == ':' && *(++s) == 0) return 0;
122 }
123
124 /* If about to handle a trailing IPv4 address, drop through. Otherwise
125 all is well if we are at the end of the string or at the mask or at a percent
126 sign, which introduces the interface specifier (scope id) of a link local
127 address. */
128
129 if (!v4end)
130 return (*s == 0 || *s == '%' ||
131 (*s == '/' && maskptr != NULL && *maskptr != 0))? yield : 0;
132 }
133
134/* Test for IPv4 address, which may be the tail-end of an IPv6 address. */
135
136for (i = 0; i < 4; i++)
137 {
138 long n;
139 uschar * end;
140
141 if (i != 0 && *s++ != '.') return 0;
142 n = strtol(CCS s, CSS &end, 10);
143 if (n > 255 || n < 0 || end <= s || end > s+3) return 0;
144 s = end;
145 }
146
147return !*s || (*s == '/' && maskptr && *maskptr != 0) ? yield : 0;
148}
149#endif /* COMPILE_UTILITY */
150
151
152/*************************************************
153* Format message size *
154*************************************************/
155
156/* Convert a message size in bytes to printing form, rounding
157according to the magnitude of the number. A value of zero causes
158a string of spaces to be returned.
159
160Arguments:
161 size the message size in bytes
162 buffer where to put the answer
163
164Returns: pointer to the buffer
165 a string of exactly 5 characters is normally returned
166*/
167
168uschar *
169string_format_size(int size, uschar *buffer)
170{
171if (size == 0) Ustrcpy(buffer, " ");
172else if (size < 1024) sprintf(CS buffer, "%5d", size);
173else if (size < 10*1024)
174 sprintf(CS buffer, "%4.1fK", (double)size / 1024.0);
175else if (size < 1024*1024)
176 sprintf(CS buffer, "%4dK", (size + 512)/1024);
177else if (size < 10*1024*1024)
178 sprintf(CS buffer, "%4.1fM", (double)size / (1024.0 * 1024.0));
179else
180 sprintf(CS buffer, "%4dM", (size + 512 * 1024)/(1024*1024));
181return buffer;
182}
183
184
185
186#ifndef COMPILE_UTILITY
187/*************************************************
188* Convert a number to base 62 format *
189*************************************************/
190
191/* Convert a long integer into an ASCII base 62 string. For Cygwin the value of
192BASE_62 is actually 36. Always return exactly 6 characters plus zero, in a
193static area.
194
195Argument: a long integer
196Returns: pointer to base 62 string
197*/
198
199uschar *
200string_base62(unsigned long int value)
201{
202static uschar yield[7];
203uschar *p = yield + sizeof(yield) - 1;
204*p = 0;
205while (p > yield)
206 {
207 *(--p) = base62_chars[value % BASE_62];
208 value /= BASE_62;
209 }
210return yield;
211}
212#endif /* COMPILE_UTILITY */
213
214
215
216/*************************************************
217* Interpret escape sequence *
218*************************************************/
219
220/* This function is called from several places where escape sequences are to be
221interpreted in strings.
222
223Arguments:
224 pp points a pointer to the initiating "\" in the string;
225 the pointer gets updated to point to the final character
226Returns: the value of the character escape
227*/
228
229int
230string_interpret_escape(const uschar **pp)
231{
232#ifdef COMPILE_UTILITY
233const uschar *hex_digits= CUS"0123456789abcdef";
234#endif
235int ch;
236const uschar *p = *pp;
237ch = *(++p);
238if (isdigit(ch) && ch != '8' && ch != '9')
239 {
240 ch -= '0';
241 if (isdigit(p[1]) && p[1] != '8' && p[1] != '9')
242 {
243 ch = ch * 8 + *(++p) - '0';
244 if (isdigit(p[1]) && p[1] != '8' && p[1] != '9')
245 ch = ch * 8 + *(++p) - '0';
246 }
247 }
248else switch(ch)
249 {
250 case 'b': ch = '\b'; break;
251 case 'f': ch = '\f'; break;
252 case 'n': ch = '\n'; break;
253 case 'r': ch = '\r'; break;
254 case 't': ch = '\t'; break;
255 case 'v': ch = '\v'; break;
256 case 'x':
257 ch = 0;
258 if (isxdigit(p[1]))
259 {
260 ch = ch * 16 +
261 Ustrchr(hex_digits, tolower(*(++p))) - hex_digits;
262 if (isxdigit(p[1])) ch = ch * 16 +
263 Ustrchr(hex_digits, tolower(*(++p))) - hex_digits;
264 }
265 break;
266 }
267*pp = p;
268return ch;
269}
270
271
272
273#ifndef COMPILE_UTILITY
274/*************************************************
275* Ensure string is printable *
276*************************************************/
277
278/* This function is called for critical strings. It checks for any
279non-printing characters, and if any are found, it makes a new copy
280of the string with suitable escape sequences. It is most often called by the
281macro string_printing(), which sets allow_tab TRUE.
282
283Arguments:
284 s the input string
285 allow_tab TRUE to allow tab as a printing character
286
287Returns: string with non-printers encoded as printing sequences
288*/
289
290const uschar *
291string_printing2(const uschar *s, BOOL allow_tab)
292{
293int nonprintcount = 0;
294int length = 0;
295const uschar *t = s;
296uschar *ss, *tt;
297
298while (*t != 0)
299 {
300 int c = *t++;
301 if (!mac_isprint(c) || (!allow_tab && c == '\t')) nonprintcount++;
302 length++;
303 }
304
305if (nonprintcount == 0) return s;
306
307/* Get a new block of store guaranteed big enough to hold the
308expanded string. */
309
310ss = store_get(length + nonprintcount * 3 + 1);
311
312/* Copy everything, escaping non printers. */
313
314t = s;
315tt = ss;
316
317while (*t != 0)
318 {
319 int c = *t;
320 if (mac_isprint(c) && (allow_tab || c != '\t')) *tt++ = *t++; else
321 {
322 *tt++ = '\\';
323 switch (*t)
324 {
325 case '\n': *tt++ = 'n'; break;
326 case '\r': *tt++ = 'r'; break;
327 case '\b': *tt++ = 'b'; break;
328 case '\v': *tt++ = 'v'; break;
329 case '\f': *tt++ = 'f'; break;
330 case '\t': *tt++ = 't'; break;
331 default: sprintf(CS tt, "%03o", *t); tt += 3; break;
332 }
333 t++;
334 }
335 }
336*tt = 0;
337return ss;
338}
339#endif /* COMPILE_UTILITY */
340
341/*************************************************
342* Undo printing escapes in string *
343*************************************************/
344
345/* This function is the reverse of string_printing2. It searches for
346backslash characters and if any are found, it makes a new copy of the
347string with escape sequences parsed. Otherwise it returns the original
348string.
349
350Arguments:
351 s the input string
352
353Returns: string with printing escapes parsed back
354*/
355
356uschar *
357string_unprinting(uschar *s)
358{
359uschar *p, *q, *r, *ss;
360int len, off;
361
362p = Ustrchr(s, '\\');
363if (!p) return s;
364
365len = Ustrlen(s) + 1;
366ss = store_get(len);
367
368q = ss;
369off = p - s;
370if (off)
371 {
372 memcpy(q, s, off);
373 q += off;
374 }
375
376while (*p)
377 {
378 if (*p == '\\')
379 {
380 *q++ = string_interpret_escape((const uschar **)&p);
381 p++;
382 }
383 else
384 {
385 r = Ustrchr(p, '\\');
386 if (!r)
387 {
388 off = Ustrlen(p);
389 memcpy(q, p, off);
390 p += off;
391 q += off;
392 break;
393 }
394 else
395 {
396 off = r - p;
397 memcpy(q, p, off);
398 q += off;
399 p = r;
400 }
401 }
402 }
403*q = '\0';
404
405return ss;
406}
407
408
409
410
411/*************************************************
412* Copy and save string *
413*************************************************/
414
415/* This function assumes that memcpy() is faster than strcpy().
416
417Argument: string to copy
418Returns: copy of string in new store
419*/
420
421uschar *
422string_copy(const uschar *s)
423{
424int len = Ustrlen(s) + 1;
425uschar *ss = store_get(len);
426memcpy(ss, s, len);
427return ss;
428}
429
430
431
432/*************************************************
433* Copy and save string in malloc'd store *
434*************************************************/
435
436/* This function assumes that memcpy() is faster than strcpy().
437
438Argument: string to copy
439Returns: copy of string in new store
440*/
441
442uschar *
443string_copy_malloc(const uschar *s)
444{
445int len = Ustrlen(s) + 1;
446uschar *ss = store_malloc(len);
447memcpy(ss, s, len);
448return ss;
449}
450
451
452
453/*************************************************
454* Copy, lowercase and save string *
455*************************************************/
456
457/*
458Argument: string to copy
459Returns: copy of string in new store, with letters lowercased
460*/
461
462uschar *
463string_copylc(const uschar *s)
464{
465uschar *ss = store_get(Ustrlen(s) + 1);
466uschar *p = ss;
467while (*s != 0) *p++ = tolower(*s++);
468*p = 0;
469return ss;
470}
471
472
473
474/*************************************************
475* Copy and save string, given length *
476*************************************************/
477
478/* It is assumed the data contains no zeros. A zero is added
479onto the end.
480
481Arguments:
482 s string to copy
483 n number of characters
484
485Returns: copy of string in new store
486*/
487
488uschar *
489string_copyn(const uschar *s, int n)
490{
491uschar *ss = store_get(n + 1);
492Ustrncpy(ss, s, n);
493ss[n] = 0;
494return ss;
495}
496
497
498/*************************************************
499* Copy, lowercase, and save string, given length *
500*************************************************/
501
502/* It is assumed the data contains no zeros. A zero is added
503onto the end.
504
505Arguments:
506 s string to copy
507 n number of characters
508
509Returns: copy of string in new store, with letters lowercased
510*/
511
512uschar *
513string_copynlc(uschar *s, int n)
514{
515uschar *ss = store_get(n + 1);
516uschar *p = ss;
517while (n-- > 0) *p++ = tolower(*s++);
518*p = 0;
519return ss;
520}
521
522
523
524/*************************************************
525* Copy string if long, inserting newlines *
526*************************************************/
527
528/* If the given string is longer than 75 characters, it is copied, and within
529the copy, certain space characters are converted into newlines.
530
531Argument: pointer to the string
532Returns: pointer to the possibly altered string
533*/
534
535uschar *
536string_split_message(uschar *msg)
537{
538uschar *s, *ss;
539
540if (msg == NULL || Ustrlen(msg) <= 75) return msg;
541s = ss = msg = string_copy(msg);
542
543for (;;)
544 {
545 int i = 0;
546 while (i < 75 && *ss != 0 && *ss != '\n') ss++, i++;
547 if (*ss == 0) break;
548 if (*ss == '\n')
549 s = ++ss;
550 else
551 {
552 uschar *t = ss + 1;
553 uschar *tt = NULL;
554 while (--t > s + 35)
555 {
556 if (*t == ' ')
557 {
558 if (t[-1] == ':') { tt = t; break; }
559 if (tt == NULL) tt = t;
560 }
561 }
562
563 if (tt == NULL) /* Can't split behind - try ahead */
564 {
565 t = ss + 1;
566 while (*t != 0)
567 {
568 if (*t == ' ' || *t == '\n')
569 { tt = t; break; }
570 t++;
571 }
572 }
573
574 if (tt == NULL) break; /* Can't find anywhere to split */
575 *tt = '\n';
576 s = ss = tt+1;
577 }
578 }
579
580return msg;
581}
582
583
584
585/*************************************************
586* Copy returned DNS domain name, de-escaping *
587*************************************************/
588
589/* If a domain name contains top-bit characters, some resolvers return
590the fully qualified name with those characters turned into escapes. The
591convention is a backslash followed by _decimal_ digits. We convert these
592back into the original binary values. This will be relevant when
593allow_utf8_domains is set true and UTF-8 characters are used in domain
594names. Backslash can also be used to escape other characters, though we
595shouldn't come across them in domain names.
596
597Argument: the domain name string
598Returns: copy of string in new store, de-escaped
599*/
600
601uschar *
602string_copy_dnsdomain(uschar *s)
603{
604uschar *yield;
605uschar *ss = yield = store_get(Ustrlen(s) + 1);
606
607while (*s != 0)
608 {
609 if (*s != '\\')
610 {
611 *ss++ = *s++;
612 }
613 else if (isdigit(s[1]))
614 {
615 *ss++ = (s[1] - '0')*100 + (s[2] - '0')*10 + s[3] - '0';
616 s += 4;
617 }
618 else if (*(++s) != 0)
619 {
620 *ss++ = *s++;
621 }
622 }
623
624*ss = 0;
625return yield;
626}
627
628
629#ifndef COMPILE_UTILITY
630/*************************************************
631* Copy space-terminated or quoted string *
632*************************************************/
633
634/* This function copies from a string until its end, or until whitespace is
635encountered, unless the string begins with a double quote, in which case the
636terminating quote is sought, and escaping within the string is done. The length
637of a de-quoted string can be no longer than the original, since escaping always
638turns n characters into 1 character.
639
640Argument: pointer to the pointer to the first character, which gets updated
641Returns: the new string
642*/
643
644uschar *
645string_dequote(const uschar **sptr)
646{
647const uschar *s = *sptr;
648uschar *t, *yield;
649
650/* First find the end of the string */
651
652if (*s != '\"')
653 {
654 while (*s != 0 && !isspace(*s)) s++;
655 }
656else
657 {
658 s++;
659 while (*s != 0 && *s != '\"')
660 {
661 if (*s == '\\') (void)string_interpret_escape(&s);
662 s++;
663 }
664 if (*s != 0) s++;
665 }
666
667/* Get enough store to copy into */
668
669t = yield = store_get(s - *sptr + 1);
670s = *sptr;
671
672/* Do the copy */
673
674if (*s != '\"')
675 {
676 while (*s != 0 && !isspace(*s)) *t++ = *s++;
677 }
678else
679 {
680 s++;
681 while (*s != 0 && *s != '\"')
682 {
683 if (*s == '\\') *t++ = string_interpret_escape(&s);
684 else *t++ = *s;
685 s++;
686 }
687 if (*s != 0) s++;
688 }
689
690/* Update the pointer and return the terminated copy */
691
692*sptr = s;
693*t = 0;
694return yield;
695}
696#endif /* COMPILE_UTILITY */
697
698
699
700/*************************************************
701* Format a string and save it *
702*************************************************/
703
704/* The formatting is done by string_format, which checks the length of
705everything.
706
707Arguments:
708 format a printf() format - deliberately char * rather than uschar *
709 because it will most usually be a literal string
710 ... arguments for format
711
712Returns: pointer to fresh piece of store containing sprintf'ed string
713*/
714
715uschar *
716string_sprintf(const char *format, ...)
717{
718va_list ap;
719uschar buffer[STRING_SPRINTF_BUFFER_SIZE];
720va_start(ap, format);
721if (!string_vformat(buffer, sizeof(buffer), format, ap))
722 log_write(0, LOG_MAIN|LOG_PANIC_DIE,
723 "string_sprintf expansion was longer than " SIZE_T_FMT
724 "; format string was (%s)\nexpansion started '%.32s'",
725 sizeof(buffer), format, buffer);
726va_end(ap);
727return string_copy(buffer);
728}
729
730
731
732/*************************************************
733* Case-independent strncmp() function *
734*************************************************/
735
736/*
737Arguments:
738 s first string
739 t second string
740 n number of characters to compare
741
742Returns: < 0, = 0, or > 0, according to the comparison
743*/
744
745int
746strncmpic(const uschar *s, const uschar *t, int n)
747{
748while (n--)
749 {
750 int c = tolower(*s++) - tolower(*t++);
751 if (c) return c;
752 }
753return 0;
754}
755
756
757/*************************************************
758* Case-independent strcmp() function *
759*************************************************/
760
761/*
762Arguments:
763 s first string
764 t second string
765
766Returns: < 0, = 0, or > 0, according to the comparison
767*/
768
769int
770strcmpic(const uschar *s, const uschar *t)
771{
772while (*s != 0)
773 {
774 int c = tolower(*s++) - tolower(*t++);
775 if (c != 0) return c;
776 }
777return *t;
778}
779
780
781/*************************************************
782* Case-independent strstr() function *
783*************************************************/
784
785/* The third argument specifies whether whitespace is required
786to follow the matched string.
787
788Arguments:
789 s string to search
790 t substring to search for
791 space_follows if TRUE, match only if whitespace follows
792
793Returns: pointer to substring in string, or NULL if not found
794*/
795
796uschar *
797strstric(uschar *s, uschar *t, BOOL space_follows)
798{
799uschar *p = t;
800uschar *yield = NULL;
801int cl = tolower(*p);
802int cu = toupper(*p);
803
804while (*s)
805 {
806 if (*s == cl || *s == cu)
807 {
808 if (yield == NULL) yield = s;
809 if (*(++p) == 0)
810 {
811 if (!space_follows || s[1] == ' ' || s[1] == '\n' ) return yield;
812 yield = NULL;
813 p = t;
814 }
815 cl = tolower(*p);
816 cu = toupper(*p);
817 s++;
818 }
819 else if (yield != NULL)
820 {
821 yield = NULL;
822 p = t;
823 cl = tolower(*p);
824 cu = toupper(*p);
825 }
826 else s++;
827 }
828return NULL;
829}
830
831
832
833#ifndef COMPILE_UTILITY
834/*************************************************
835* Get next string from separated list *
836*************************************************/
837
838/* Leading and trailing space is removed from each item. The separator in the
839list is controlled by the int pointed to by the separator argument as follows:
840
841 If the value is > 0 it is used as the separator. This is typically used for
842 sublists such as slash-separated options. The value is always a printing
843 character.
844
845 (If the value is actually > UCHAR_MAX there is only one item in the list.
846 This is used for some cases when called via functions that sometimes
847 plough through lists, and sometimes are given single items.)
848
849 If the value is <= 0, the string is inspected for a leading <x, where x is an
850 ispunct() or an iscntrl() character. If found, x is used as the separator. If
851 not found:
852
853 (a) if separator == 0, ':' is used
854 (b) if separator <0, -separator is used
855
856 In all cases the value of the separator that is used is written back to the
857 int so that it is used on subsequent calls as we progress through the list.
858
859A literal ispunct() separator can be represented in an item by doubling, but
860there is no way to include an iscntrl() separator as part of the data.
861
862Arguments:
863 listptr points to a pointer to the current start of the list; the
864 pointer gets updated to point after the end of the next item
865 separator a pointer to the separator character in an int (see above)
866 buffer where to put a copy of the next string in the list; or
867 NULL if the next string is returned in new memory
868 buflen when buffer is not NULL, the size of buffer; otherwise ignored
869
870Returns: pointer to buffer, containing the next substring,
871 or NULL if no more substrings
872*/
873
874uschar *
875string_nextinlist(const uschar **listptr, int *separator, uschar *buffer, int buflen)
876{
877int sep = *separator;
878const uschar *s = *listptr;
879BOOL sep_is_special;
880
881if (s == NULL) return NULL;
882
883/* This allows for a fixed specified separator to be an iscntrl() character,
884but at the time of implementation, this is never the case. However, it's best
885to be conservative. */
886
887while (isspace(*s) && *s != sep) s++;
888
889/* A change of separator is permitted, so look for a leading '<' followed by an
890allowed character. */
891
892if (sep <= 0)
893 {
894 if (*s == '<' && (ispunct(s[1]) || iscntrl(s[1])))
895 {
896 sep = s[1];
897 s += 2;
898 while (isspace(*s) && *s != sep) s++;
899 }
900 else
901 {
902 sep = (sep == 0)? ':' : -sep;
903 }
904 *separator = sep;
905 }
906
907/* An empty string has no list elements */
908
909if (*s == 0) return NULL;
910
911/* Note whether whether or not the separator is an iscntrl() character. */
912
913sep_is_special = iscntrl(sep);
914
915/* Handle the case when a buffer is provided. */
916
917if (buffer != NULL)
918 {
919 int p = 0;
920 for (; *s != 0; s++)
921 {
922 if (*s == sep && (*(++s) != sep || sep_is_special)) break;
923 if (p < buflen - 1) buffer[p++] = *s;
924 }
925 while (p > 0 && isspace(buffer[p-1])) p--;
926 buffer[p] = 0;
927 }
928
929/* Handle the case when a buffer is not provided. */
930
931else
932 {
933 int size = 0;
934 int ptr = 0;
935 const uschar *ss;
936
937 /* We know that *s != 0 at this point. However, it might be pointing to a
938 separator, which could indicate an empty string, or (if an ispunct()
939 character) could be doubled to indicate a separator character as data at the
940 start of a string. Avoid getting working memory for an empty item. */
941
942 if (*s == sep)
943 {
944 s++;
945 if (*s != sep || sep_is_special)
946 {
947 *listptr = s;
948 return string_copy(US"");
949 }
950 }
951
952 /* Not an empty string; the first character is guaranteed to be a data
953 character. */
954
955 for (;;)
956 {
957 for (ss = s + 1; *ss != 0 && *ss != sep; ss++);
958 buffer = string_catn(buffer, &size, &ptr, s, ss-s);
959 s = ss;
960 if (*s == 0 || *(++s) != sep || sep_is_special) break;
961 }
962 while (ptr > 0 && isspace(buffer[ptr-1])) ptr--;
963 buffer[ptr] = 0;
964 }
965
966/* Update the current pointer and return the new string */
967
968*listptr = s;
969return buffer;
970}
971
972
973static const uschar *
974Ustrnchr(const uschar * s, int c, unsigned * len)
975{
976unsigned siz = *len;
977while (siz)
978 {
979 if (!*s) return NULL;
980 if (*s == c)
981 {
982 *len = siz;
983 return s;
984 }
985 s++;
986 siz--;
987 }
988return NULL;
989}
990
991
992/************************************************
993* Add element to separated list *
994************************************************/
995/* This function is used to build a list, returning an allocated null-terminated
996growable string. The given element has any embedded separator characters
997doubled.
998
999Despite having the same growable-string interface as string_cat() the list is
1000always returned null-terminated.
1001
1002Arguments:
1003 list points to the start of the list that is being built, or NULL
1004 if this is a new list that has no contents yet
1005 sz (ptr to) amount of memory allocated for list; zero for a new list
1006 off (ptr to) current list length in chars (insert point for next addition),
1007 zero for a new list
1008 sep list separator character
1009 ele new element to be appended to the list
1010
1011Returns: pointer to the start of the list, changed if copied for expansion.
1012*/
1013
1014uschar *
1015string_append_listele(uschar * list, int * sz, int * off,
1016 uschar sep, const uschar * ele)
1017{
1018uschar * sp;
1019
1020if (list)
1021 list = string_catn(list, sz, off, &sep, 1);
1022
1023while((sp = Ustrchr(ele, sep)))
1024 {
1025 list = string_catn(list, sz, off, ele, sp-ele+1);
1026 list = string_catn(list, sz, off, &sep, 1);
1027 ele = sp+1;
1028 }
1029list = string_cat(list, sz, off, ele);
1030list[*off] = '\0';
1031return list;
1032}
1033
1034
1035uschar *
1036string_append_listele_n(uschar * list, int * sz, int * off,
1037 uschar sep, const uschar * ele, unsigned len)
1038{
1039const uschar * sp;
1040
1041if (list)
1042 list = string_catn(list, sz, off, &sep, 1);
1043
1044while((sp = Ustrnchr(ele, sep, &len)))
1045 {
1046 list = string_catn(list, sz, off, ele, sp-ele+1);
1047 list = string_catn(list, sz, off, &sep, 1);
1048 ele = sp+1;
1049 len--;
1050 }
1051list = string_catn(list, sz, off, ele, len);
1052list[*off] = '\0';
1053return list;
1054}
1055
1056
1057
1058/*************************************************
1059* Add chars to string *
1060*************************************************/
1061
1062/* This function is used when building up strings of unknown length. Room is
1063always left for a terminating zero to be added to the string that is being
1064built. This function does not require the string that is being added to be NUL
1065terminated, because the number of characters to add is given explicitly. It is
1066sometimes called to extract parts of other strings.
1067
1068Arguments:
1069 string points to the start of the string that is being built, or NULL
1070 if this is a new string that has no contents yet
1071 size points to a variable that holds the current capacity of the memory
1072 block (updated if changed)
1073 ptr points to a variable that holds the offset at which to add
1074 characters, updated to the new offset
1075 s points to characters to add
1076 count count of characters to add; must not exceed the length of s, if s
1077 is a C string.
1078
1079If string is given as NULL, *size and *ptr should both be zero.
1080
1081Returns: pointer to the start of the string, changed if copied for expansion.
1082 Note that a NUL is not added, though space is left for one. This is
1083 because string_cat() is often called multiple times to build up a
1084 string - there's no point adding the NUL till the end.
1085
1086*/
1087/* coverity[+alloc] */
1088
1089uschar *
1090string_catn(uschar *string, int *size, int *ptr, const uschar *s, int count)
1091{
1092int p = *ptr;
1093
1094if (p + count >= *size)
1095 {
1096 int oldsize = *size;
1097
1098 /* Mostly, string_cat() is used to build small strings of a few hundred
1099 characters at most. There are times, however, when the strings are very much
1100 longer (for example, a lookup that returns a vast number of alias addresses).
1101 To try to keep things reasonable, we use increments whose size depends on the
1102 existing length of the string. */
1103
1104 int inc = (oldsize < 4096)? 100 : 1024;
1105 while (*size <= p + count) *size += inc;
1106
1107 /* New string */
1108
1109 if (string == NULL) string = store_get(*size);
1110
1111 /* Try to extend an existing allocation. If the result of calling
1112 store_extend() is false, either there isn't room in the current memory block,
1113 or this string is not the top item on the dynamic store stack. We then have
1114 to get a new chunk of store and copy the old string. When building large
1115 strings, it is helpful to call store_release() on the old string, to release
1116 memory blocks that have become empty. (The block will be freed if the string
1117 is at its start.) However, we can do this only if we know that the old string
1118 was the last item on the dynamic memory stack. This is the case if it matches
1119 store_last_get. */
1120
1121 else if (!store_extend(string, oldsize, *size))
1122 {
1123 BOOL release_ok = store_last_get[store_pool] == string;
1124 uschar *newstring = store_get(*size);
1125 memcpy(newstring, string, p);
1126 if (release_ok) store_release(string);
1127 string = newstring;
1128 }
1129 }
1130
1131/* Because we always specify the exact number of characters to copy, we can
1132use memcpy(), which is likely to be more efficient than strncopy() because the
1133latter has to check for zero bytes.
1134
1135The Coverity annotation deals with the lack of correlated variable tracking;
1136common use is a null string and zero size and pointer, on first use for a
1137string being built. The "if" above then allocates, but Coverity assume that
1138the "if" might not happen and whines for a null-deref done by the memcpy(). */
1139
1140/* coverity[deref_parm_field_in_call] : FALSE */
1141memcpy(string + p, s, count);
1142*ptr = p + count;
1143return string;
1144}
1145
1146
1147uschar *
1148string_cat(uschar *string, int *size, int *ptr, const uschar *s)
1149{
1150return string_catn(string, size, ptr, s, Ustrlen(s));
1151}
1152#endif /* COMPILE_UTILITY */
1153
1154
1155
1156#ifndef COMPILE_UTILITY
1157/*************************************************
1158* Append strings to another string *
1159*************************************************/
1160
1161/* This function can be used to build a string from many other strings.
1162It calls string_cat() to do the dirty work.
1163
1164Arguments:
1165 string points to the start of the string that is being built, or NULL
1166 if this is a new string that has no contents yet
1167 size points to a variable that holds the current capacity of the memory
1168 block (updated if changed)
1169 ptr points to a variable that holds the offset at which to add
1170 characters, updated to the new offset
1171 count the number of strings to append
1172 ... "count" uschar* arguments, which must be valid zero-terminated
1173 C strings
1174
1175Returns: pointer to the start of the string, changed if copied for expansion.
1176 The string is not zero-terminated - see string_cat() above.
1177*/
1178
1179uschar *
1180string_append(uschar *string, int *size, int *ptr, int count, ...)
1181{
1182va_list ap;
1183int i;
1184
1185va_start(ap, count);
1186for (i = 0; i < count; i++)
1187 {
1188 uschar *t = va_arg(ap, uschar *);
1189 string = string_cat(string, size, ptr, t);
1190 }
1191va_end(ap);
1192
1193return string;
1194}
1195#endif
1196
1197
1198
1199/*************************************************
1200* Format a string with length checks *
1201*************************************************/
1202
1203/* This function is used to format a string with checking of the length of the
1204output for all conversions. It protects Exim from absent-mindedness when
1205calling functions like debug_printf and string_sprintf, and elsewhere. There
1206are two different entry points to what is actually the same function, depending
1207on whether the variable length list of data arguments are given explicitly or
1208as a va_list item.
1209
1210The formats are the usual printf() ones, with some omissions (never used) and
1211three additions for strings: %S forces lower case, %T forces upper case, and
1212%#s or %#S prints nothing for a NULL string. Without thr # "NULL" is printed
1213(useful in debugging). There is also the addition of %D and %M, which insert
1214the date in the form used for datestamped log files.
1215
1216Arguments:
1217 buffer a buffer in which to put the formatted string
1218 buflen the length of the buffer
1219 format the format string - deliberately char * and not uschar *
1220 ... or ap variable list of supplementary arguments
1221
1222Returns: TRUE if the result fitted in the buffer
1223*/
1224
1225BOOL
1226string_format(uschar *buffer, int buflen, const char *format, ...)
1227{
1228BOOL yield;
1229va_list ap;
1230va_start(ap, format);
1231yield = string_vformat(buffer, buflen, format, ap);
1232va_end(ap);
1233return yield;
1234}
1235
1236
1237BOOL
1238string_vformat(uschar *buffer, int buflen, const char *format, va_list ap)
1239{
1240/* We assume numbered ascending order, C does not guarantee that */
1241enum { L_NORMAL=1, L_SHORT=2, L_LONG=3, L_LONGLONG=4, L_LONGDOUBLE=5, L_SIZE=6 };
1242
1243BOOL yield = TRUE;
1244int width, precision;
1245const char *fp = format; /* Deliberately not unsigned */
1246uschar *p = buffer;
1247uschar *last = buffer + buflen - 1;
1248
1249string_datestamp_offset = -1; /* Datestamp not inserted */
1250string_datestamp_length = 0; /* Datestamp not inserted */
1251string_datestamp_type = 0; /* Datestamp not inserted */
1252
1253/* Scan the format and handle the insertions */
1254
1255while (*fp != 0)
1256 {
1257 int length = L_NORMAL;
1258 int *nptr;
1259 int slen;
1260 const char *null = "NULL"; /* ) These variables */
1261 const char *item_start, *s; /* ) are deliberately */
1262 char newformat[16]; /* ) not unsigned */
1263
1264 /* Non-% characters just get copied verbatim */
1265
1266 if (*fp != '%')
1267 {
1268 if (p >= last) { yield = FALSE; break; }
1269 *p++ = (uschar)*fp++;
1270 continue;
1271 }
1272
1273 /* Deal with % characters. Pick off the width and precision, for checking
1274 strings, skipping over the flag and modifier characters. */
1275
1276 item_start = fp;
1277 width = precision = -1;
1278
1279 if (strchr("-+ #0", *(++fp)) != NULL)
1280 {
1281 if (*fp == '#') null = "";
1282 fp++;
1283 }
1284
1285 if (isdigit((uschar)*fp))
1286 {
1287 width = *fp++ - '0';
1288 while (isdigit((uschar)*fp)) width = width * 10 + *fp++ - '0';
1289 }
1290 else if (*fp == '*')
1291 {
1292 width = va_arg(ap, int);
1293 fp++;
1294 }
1295
1296 if (*fp == '.')
1297 {
1298 if (*(++fp) == '*')
1299 {
1300 precision = va_arg(ap, int);
1301 fp++;
1302 }
1303 else
1304 {
1305 precision = 0;
1306 while (isdigit((uschar)*fp))
1307 precision = precision*10 + *fp++ - '0';
1308 }
1309 }
1310
1311 /* Skip over 'h', 'L', 'l', 'll' and 'z', remembering the item length */
1312
1313 if (*fp == 'h')
1314 { fp++; length = L_SHORT; }
1315 else if (*fp == 'L')
1316 { fp++; length = L_LONGDOUBLE; }
1317 else if (*fp == 'l')
1318 {
1319 if (fp[1] == 'l')
1320 {
1321 fp += 2;
1322 length = L_LONGLONG;
1323 }
1324 else
1325 {
1326 fp++;
1327 length = L_LONG;
1328 }
1329 }
1330 else if (*fp == 'z')
1331 { fp++; length = L_SIZE; }
1332
1333 /* Handle each specific format type. */
1334
1335 switch (*fp++)
1336 {
1337 case 'n':
1338 nptr = va_arg(ap, int *);
1339 *nptr = p - buffer;
1340 break;
1341
1342 case 'd':
1343 case 'o':
1344 case 'u':
1345 case 'x':
1346 case 'X':
1347 if (p >= last - ((length > L_LONG)? 24 : 12))
1348 { yield = FALSE; goto END_FORMAT; }
1349 strncpy(newformat, item_start, fp - item_start);
1350 newformat[fp - item_start] = 0;
1351
1352 /* Short int is promoted to int when passing through ..., so we must use
1353 int for va_arg(). */
1354
1355 switch(length)
1356 {
1357 case L_SHORT:
1358 case L_NORMAL: p += sprintf(CS p, newformat, va_arg(ap, int)); break;
1359 case L_LONG: p += sprintf(CS p, newformat, va_arg(ap, long int)); break;
1360 case L_LONGLONG: p += sprintf(CS p, newformat, va_arg(ap, LONGLONG_T)); break;
1361 case L_SIZE: p += sprintf(CS p, newformat, va_arg(ap, size_t)); break;
1362 }
1363 break;
1364
1365 case 'p':
1366 {
1367 void * ptr;
1368 if (p >= last - 24) { yield = FALSE; goto END_FORMAT; }
1369 /* sprintf() saying "(nil)" for a null pointer seems unreliable.
1370 Handle it explicitly. */
1371 if ((ptr = va_arg(ap, void *)))
1372 {
1373 strncpy(newformat, item_start, fp - item_start);
1374 newformat[fp - item_start] = 0;
1375 p += sprintf(CS p, newformat, ptr);
1376 }
1377 else
1378 p += sprintf(CS p, "(nil)");
1379 }
1380 break;
1381
1382 /* %f format is inherently insecure if the numbers that it may be
1383 handed are unknown (e.g. 1e300). However, in Exim, %f is used for
1384 printing load averages, and these are actually stored as integers
1385 (load average * 1000) so the size of the numbers is constrained.
1386 It is also used for formatting sending rates, where the simplicity
1387 of the format prevents overflow. */
1388
1389 case 'f':
1390 case 'e':
1391 case 'E':
1392 case 'g':
1393 case 'G':
1394 if (precision < 0) precision = 6;
1395 if (p >= last - precision - 8) { yield = FALSE; goto END_FORMAT; }
1396 strncpy(newformat, item_start, fp - item_start);
1397 newformat[fp-item_start] = 0;
1398 if (length == L_LONGDOUBLE)
1399 p += sprintf(CS p, newformat, va_arg(ap, long double));
1400 else
1401 p += sprintf(CS p, newformat, va_arg(ap, double));
1402 break;
1403
1404 /* String types */
1405
1406 case '%':
1407 if (p >= last) { yield = FALSE; goto END_FORMAT; }
1408 *p++ = '%';
1409 break;
1410
1411 case 'c':
1412 if (p >= last) { yield = FALSE; goto END_FORMAT; }
1413 *p++ = va_arg(ap, int);
1414 break;
1415
1416 case 'D': /* Insert daily datestamp for log file names */
1417 s = CS tod_stamp(tod_log_datestamp_daily);
1418 string_datestamp_offset = p - buffer; /* Passed back via global */
1419 string_datestamp_length = Ustrlen(s); /* Passed back via global */
1420 string_datestamp_type = tod_log_datestamp_daily;
1421 slen = string_datestamp_length;
1422 goto INSERT_STRING;
1423
1424 case 'M': /* Insert monthly datestamp for log file names */
1425 s = CS tod_stamp(tod_log_datestamp_monthly);
1426 string_datestamp_offset = p - buffer; /* Passed back via global */
1427 string_datestamp_length = Ustrlen(s); /* Passed back via global */
1428 string_datestamp_type = tod_log_datestamp_monthly;
1429 slen = string_datestamp_length;
1430 goto INSERT_STRING;
1431
1432 case 's':
1433 case 'S': /* Forces *lower* case */
1434 case 'T': /* Forces *upper* case */
1435 s = va_arg(ap, char *);
1436
1437 if (s == NULL) s = null;
1438 slen = Ustrlen(s);
1439
1440 INSERT_STRING: /* Come to from %D or %M above */
1441
1442 /* If the width is specified, check that there is a precision
1443 set; if not, set it to the width to prevent overruns of long
1444 strings. */
1445
1446 if (width >= 0)
1447 {
1448 if (precision < 0) precision = width;
1449 }
1450
1451 /* If a width is not specified and the precision is specified, set
1452 the width to the precision, or the string length if shorted. */
1453
1454 else if (precision >= 0)
1455 {
1456 width = (precision < slen)? precision : slen;
1457 }
1458
1459 /* If neither are specified, set them both to the string length. */
1460
1461 else width = precision = slen;
1462
1463 /* Check string space, and add the string to the buffer if ok. If
1464 not OK, add part of the string (debugging uses this to show as
1465 much as possible). */
1466
1467 if (p == last)
1468 {
1469 yield = FALSE;
1470 goto END_FORMAT;
1471 }
1472 if (p >= last - width)
1473 {
1474 yield = FALSE;
1475 width = precision = last - p - 1;
1476 if (width < 0) width = 0;
1477 if (precision < 0) precision = 0;
1478 }
1479 sprintf(CS p, "%*.*s", width, precision, s);
1480 if (fp[-1] == 'S')
1481 while (*p) { *p = tolower(*p); p++; }
1482 else if (fp[-1] == 'T')
1483 while (*p) { *p = toupper(*p); p++; }
1484 else
1485 while (*p) p++;
1486 if (!yield) goto END_FORMAT;
1487 break;
1488
1489 /* Some things are never used in Exim; also catches junk. */
1490
1491 default:
1492 strncpy(newformat, item_start, fp - item_start);
1493 newformat[fp-item_start] = 0;
1494 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "string_format: unsupported type "
1495 "in \"%s\" in \"%s\"", newformat, format);
1496 break;
1497 }
1498 }
1499
1500/* Ensure string is complete; return TRUE if got to the end of the format */
1501
1502END_FORMAT:
1503
1504*p = 0;
1505return yield;
1506}
1507
1508
1509
1510#ifndef COMPILE_UTILITY
1511/*************************************************
1512* Generate an "open failed" message *
1513*************************************************/
1514
1515/* This function creates a message after failure to open a file. It includes a
1516string supplied as data, adds the strerror() text, and if the failure was
1517"Permission denied", reads and includes the euid and egid.
1518
1519Arguments:
1520 eno the value of errno after the failure
1521 format a text format string - deliberately not uschar *
1522 ... arguments for the format string
1523
1524Returns: a message, in dynamic store
1525*/
1526
1527uschar *
1528string_open_failed(int eno, const char *format, ...)
1529{
1530va_list ap;
1531uschar buffer[1024];
1532
1533Ustrcpy(buffer, "failed to open ");
1534va_start(ap, format);
1535
1536/* Use the checked formatting routine to ensure that the buffer
1537does not overflow. It should not, since this is called only for internally
1538specified messages. If it does, the message just gets truncated, and there
1539doesn't seem much we can do about that. */
1540
1541(void)string_vformat(buffer+15, sizeof(buffer) - 15, format, ap);
1542va_end(ap);
1543
1544return (eno == EACCES)?
1545 string_sprintf("%s: %s (euid=%ld egid=%ld)", buffer, strerror(eno),
1546 (long int)geteuid(), (long int)getegid()) :
1547 string_sprintf("%s: %s", buffer, strerror(eno));
1548}
1549#endif /* COMPILE_UTILITY */
1550
1551
1552
1553
1554
1555#ifndef COMPILE_UTILITY
1556/* qsort(3), currently used to sort the environment variables
1557for -bP environment output, needs a function to compare two pointers to string
1558pointers. Here it is. */
1559
1560int
1561string_compare_by_pointer(const void *a, const void *b)
1562{
1563return Ustrcmp(* CUSS a, * CUSS b);
1564}
1565#endif /* COMPILE_UTILITY */
1566
1567
1568
1569/*************************************************
1570**************************************************
1571* Stand-alone test program *
1572**************************************************
1573*************************************************/
1574
1575#ifdef STAND_ALONE
1576int main(void)
1577{
1578uschar buffer[256];
1579
1580printf("Testing is_ip_address\n");
1581
1582while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1583 {
1584 int offset;
1585 buffer[Ustrlen(buffer) - 1] = 0;
1586 printf("%d\n", string_is_ip_address(buffer, NULL));
1587 printf("%d %d %s\n", string_is_ip_address(buffer, &offset), offset, buffer);
1588 }
1589
1590printf("Testing string_nextinlist\n");
1591
1592while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1593 {
1594 uschar *list = buffer;
1595 uschar *lp1, *lp2;
1596 uschar item[256];
1597 int sep1 = 0;
1598 int sep2 = 0;
1599
1600 if (*list == '<')
1601 {
1602 sep1 = sep2 = list[1];
1603 list += 2;
1604 }
1605
1606 lp1 = lp2 = list;
1607 for (;;)
1608 {
1609 uschar *item1 = string_nextinlist(&lp1, &sep1, item, sizeof(item));
1610 uschar *item2 = string_nextinlist(&lp2, &sep2, NULL, 0);
1611
1612 if (item1 == NULL && item2 == NULL) break;
1613 if (item == NULL || item2 == NULL || Ustrcmp(item1, item2) != 0)
1614 {
1615 printf("***ERROR\nitem1=\"%s\"\nitem2=\"%s\"\n",
1616 (item1 == NULL)? "NULL" : CS item1,
1617 (item2 == NULL)? "NULL" : CS item2);
1618 break;
1619 }
1620 else printf(" \"%s\"\n", CS item1);
1621 }
1622 }
1623
1624/* This is a horrible lash-up, but it serves its purpose. */
1625
1626printf("Testing string_format\n");
1627
1628while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1629 {
1630 void *args[3];
1631 long long llargs[3];
1632 double dargs[3];
1633 int dflag = 0;
1634 int llflag = 0;
1635 int n = 0;
1636 int count;
1637 int countset = 0;
1638 uschar format[256];
1639 uschar outbuf[256];
1640 uschar *s;
1641 buffer[Ustrlen(buffer) - 1] = 0;
1642
1643 s = Ustrchr(buffer, ',');
1644 if (s == NULL) s = buffer + Ustrlen(buffer);
1645
1646 Ustrncpy(format, buffer, s - buffer);
1647 format[s-buffer] = 0;
1648
1649 if (*s == ',') s++;
1650
1651 while (*s != 0)
1652 {
1653 uschar *ss = s;
1654 s = Ustrchr(ss, ',');
1655 if (s == NULL) s = ss + Ustrlen(ss);
1656
1657 if (isdigit(*ss))
1658 {
1659 Ustrncpy(outbuf, ss, s-ss);
1660 if (Ustrchr(outbuf, '.') != NULL)
1661 {
1662 dflag = 1;
1663 dargs[n++] = Ustrtod(outbuf, NULL);
1664 }
1665 else if (Ustrstr(outbuf, "ll") != NULL)
1666 {
1667 llflag = 1;
1668 llargs[n++] = strtoull(CS outbuf, NULL, 10);
1669 }
1670 else
1671 {
1672 args[n++] = (void *)Uatoi(outbuf);
1673 }
1674 }
1675
1676 else if (Ustrcmp(ss, "*") == 0)
1677 {
1678 args[n++] = (void *)(&count);
1679 countset = 1;
1680 }
1681
1682 else
1683 {
1684 uschar *sss = malloc(s - ss + 1);
1685 Ustrncpy(sss, ss, s-ss);
1686 args[n++] = sss;
1687 }
1688
1689 if (*s == ',') s++;
1690 }
1691
1692 if (!dflag && !llflag)
1693 printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1694 args[0], args[1], args[2])? "True" : "False");
1695
1696 else if (dflag)
1697 printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1698 dargs[0], dargs[1], dargs[2])? "True" : "False");
1699
1700 else printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1701 llargs[0], llargs[1], llargs[2])? "True" : "False");
1702
1703 printf("%s\n", CS outbuf);
1704 if (countset) printf("count=%d\n", count);
1705 }
1706
1707return 0;
1708}
1709#endif
1710
1711/* End of string.c */