Remove extraneus line - benign but pointless.
[exim.git] / src / src / string.c
... / ...
CommitLineData
1/*************************************************
2* Exim - an Internet mail transport agent *
3*************************************************/
4
5/* Copyright (c) University of Cambridge 1995 - 2018 */
6/* See the file NOTICE for conditions of use and distribution. */
7
8/* Miscellaneous string-handling functions. Some are not required for
9utilities and tests, and are cut out by the COMPILE_UTILITY macro. */
10
11
12#include "exim.h"
13
14
15#ifndef COMPILE_UTILITY
16/*************************************************
17* Test for IP address *
18*************************************************/
19
20/* This used just to be a regular expression, but with IPv6 things are a bit
21more complicated. If the address contains a colon, it is assumed to be a v6
22address (assuming HAVE_IPV6 is set). If a mask is permitted and one is present,
23and maskptr is not NULL, its offset is placed there.
24
25Arguments:
26 s a string
27 maskptr NULL if no mask is permitted to follow
28 otherwise, points to an int where the offset of '/' is placed
29 if there is no / followed by trailing digits, *maskptr is set 0
30
31Returns: 0 if the string is not a textual representation of an IP address
32 4 if it is an IPv4 address
33 6 if it is an IPv6 address
34*/
35
36int
37string_is_ip_address(const uschar *s, int *maskptr)
38{
39int i;
40int yield = 4;
41
42/* If an optional mask is permitted, check for it. If found, pass back the
43offset. */
44
45if (maskptr)
46 {
47 const uschar *ss = s + Ustrlen(s);
48 *maskptr = 0;
49 if (s != ss && isdigit(*(--ss)))
50 {
51 while (ss > s && isdigit(ss[-1])) ss--;
52 if (ss > s && *(--ss) == '/') *maskptr = ss - s;
53 }
54 }
55
56/* A colon anywhere in the string => IPv6 address */
57
58if (Ustrchr(s, ':') != NULL)
59 {
60 BOOL had_double_colon = FALSE;
61 BOOL v4end = FALSE;
62 int count = 0;
63
64 yield = 6;
65
66 /* An IPv6 address must start with hex digit or double colon. A single
67 colon is invalid. */
68
69 if (*s == ':' && *(++s) != ':') return 0;
70
71 /* Now read up to 8 components consisting of up to 4 hex digits each. There
72 may be one and only one appearance of double colon, which implies any number
73 of binary zero bits. The number of preceding components is held in count. */
74
75 for (count = 0; count < 8; count++)
76 {
77 /* If the end of the string is reached before reading 8 components, the
78 address is valid provided a double colon has been read. This also applies
79 if we hit the / that introduces a mask or the % that introduces the
80 interface specifier (scope id) of a link-local address. */
81
82 if (*s == 0 || *s == '%' || *s == '/') return had_double_colon ? yield : 0;
83
84 /* If a component starts with an additional colon, we have hit a double
85 colon. This is permitted to appear once only, and counts as at least
86 one component. The final component may be of this form. */
87
88 if (*s == ':')
89 {
90 if (had_double_colon) return 0;
91 had_double_colon = TRUE;
92 s++;
93 continue;
94 }
95
96 /* If the remainder of the string contains a dot but no colons, we
97 can expect a trailing IPv4 address. This is valid if either there has
98 been no double-colon and this is the 7th component (with the IPv4 address
99 being the 7th & 8th components), OR if there has been a double-colon
100 and fewer than 6 components. */
101
102 if (Ustrchr(s, ':') == NULL && Ustrchr(s, '.') != NULL)
103 {
104 if ((!had_double_colon && count != 6) ||
105 (had_double_colon && count > 6)) return 0;
106 v4end = TRUE;
107 yield = 6;
108 break;
109 }
110
111 /* Check for at least one and not more than 4 hex digits for this
112 component. */
113
114 if (!isxdigit(*s++)) return 0;
115 if (isxdigit(*s) && isxdigit(*(++s)) && isxdigit(*(++s))) s++;
116
117 /* If the component is terminated by colon and there is more to
118 follow, skip over the colon. If there is no more to follow the address is
119 invalid. */
120
121 if (*s == ':' && *(++s) == 0) return 0;
122 }
123
124 /* If about to handle a trailing IPv4 address, drop through. Otherwise
125 all is well if we are at the end of the string or at the mask or at a percent
126 sign, which introduces the interface specifier (scope id) of a link local
127 address. */
128
129 if (!v4end)
130 return (*s == 0 || *s == '%' ||
131 (*s == '/' && maskptr != NULL && *maskptr != 0))? yield : 0;
132 }
133
134/* Test for IPv4 address, which may be the tail-end of an IPv6 address. */
135
136for (i = 0; i < 4; i++)
137 {
138 long n;
139 uschar * end;
140
141 if (i != 0 && *s++ != '.') return 0;
142 n = strtol(CCS s, CSS &end, 10);
143 if (n > 255 || n < 0 || end <= s || end > s+3) return 0;
144 s = end;
145 }
146
147return !*s || (*s == '/' && maskptr && *maskptr != 0) ? yield : 0;
148}
149#endif /* COMPILE_UTILITY */
150
151
152/*************************************************
153* Format message size *
154*************************************************/
155
156/* Convert a message size in bytes to printing form, rounding
157according to the magnitude of the number. A value of zero causes
158a string of spaces to be returned.
159
160Arguments:
161 size the message size in bytes
162 buffer where to put the answer
163
164Returns: pointer to the buffer
165 a string of exactly 5 characters is normally returned
166*/
167
168uschar *
169string_format_size(int size, uschar *buffer)
170{
171if (size == 0) Ustrcpy(buffer, " ");
172else if (size < 1024) sprintf(CS buffer, "%5d", size);
173else if (size < 10*1024)
174 sprintf(CS buffer, "%4.1fK", (double)size / 1024.0);
175else if (size < 1024*1024)
176 sprintf(CS buffer, "%4dK", (size + 512)/1024);
177else if (size < 10*1024*1024)
178 sprintf(CS buffer, "%4.1fM", (double)size / (1024.0 * 1024.0));
179else
180 sprintf(CS buffer, "%4dM", (size + 512 * 1024)/(1024*1024));
181return buffer;
182}
183
184
185
186#ifndef COMPILE_UTILITY
187/*************************************************
188* Convert a number to base 62 format *
189*************************************************/
190
191/* Convert a long integer into an ASCII base 62 string. For Cygwin the value of
192BASE_62 is actually 36. Always return exactly 6 characters plus zero, in a
193static area.
194
195Argument: a long integer
196Returns: pointer to base 62 string
197*/
198
199uschar *
200string_base62(unsigned long int value)
201{
202static uschar yield[7];
203uschar *p = yield + sizeof(yield) - 1;
204*p = 0;
205while (p > yield)
206 {
207 *(--p) = base62_chars[value % BASE_62];
208 value /= BASE_62;
209 }
210return yield;
211}
212#endif /* COMPILE_UTILITY */
213
214
215
216/*************************************************
217* Interpret escape sequence *
218*************************************************/
219
220/* This function is called from several places where escape sequences are to be
221interpreted in strings.
222
223Arguments:
224 pp points a pointer to the initiating "\" in the string;
225 the pointer gets updated to point to the final character
226Returns: the value of the character escape
227*/
228
229int
230string_interpret_escape(const uschar **pp)
231{
232#ifdef COMPILE_UTILITY
233const uschar *hex_digits= CUS"0123456789abcdef";
234#endif
235int ch;
236const uschar *p = *pp;
237ch = *(++p);
238if (isdigit(ch) && ch != '8' && ch != '9')
239 {
240 ch -= '0';
241 if (isdigit(p[1]) && p[1] != '8' && p[1] != '9')
242 {
243 ch = ch * 8 + *(++p) - '0';
244 if (isdigit(p[1]) && p[1] != '8' && p[1] != '9')
245 ch = ch * 8 + *(++p) - '0';
246 }
247 }
248else switch(ch)
249 {
250 case 'b': ch = '\b'; break;
251 case 'f': ch = '\f'; break;
252 case 'n': ch = '\n'; break;
253 case 'r': ch = '\r'; break;
254 case 't': ch = '\t'; break;
255 case 'v': ch = '\v'; break;
256 case 'x':
257 ch = 0;
258 if (isxdigit(p[1]))
259 {
260 ch = ch * 16 +
261 Ustrchr(hex_digits, tolower(*(++p))) - hex_digits;
262 if (isxdigit(p[1])) ch = ch * 16 +
263 Ustrchr(hex_digits, tolower(*(++p))) - hex_digits;
264 }
265 break;
266 }
267*pp = p;
268return ch;
269}
270
271
272
273#ifndef COMPILE_UTILITY
274/*************************************************
275* Ensure string is printable *
276*************************************************/
277
278/* This function is called for critical strings. It checks for any
279non-printing characters, and if any are found, it makes a new copy
280of the string with suitable escape sequences. It is most often called by the
281macro string_printing(), which sets allow_tab TRUE.
282
283Arguments:
284 s the input string
285 allow_tab TRUE to allow tab as a printing character
286
287Returns: string with non-printers encoded as printing sequences
288*/
289
290const uschar *
291string_printing2(const uschar *s, BOOL allow_tab)
292{
293int nonprintcount = 0;
294int length = 0;
295const uschar *t = s;
296uschar *ss, *tt;
297
298while (*t != 0)
299 {
300 int c = *t++;
301 if (!mac_isprint(c) || (!allow_tab && c == '\t')) nonprintcount++;
302 length++;
303 }
304
305if (nonprintcount == 0) return s;
306
307/* Get a new block of store guaranteed big enough to hold the
308expanded string. */
309
310ss = store_get(length + nonprintcount * 3 + 1);
311
312/* Copy everything, escaping non printers. */
313
314t = s;
315tt = ss;
316
317while (*t != 0)
318 {
319 int c = *t;
320 if (mac_isprint(c) && (allow_tab || c != '\t')) *tt++ = *t++; else
321 {
322 *tt++ = '\\';
323 switch (*t)
324 {
325 case '\n': *tt++ = 'n'; break;
326 case '\r': *tt++ = 'r'; break;
327 case '\b': *tt++ = 'b'; break;
328 case '\v': *tt++ = 'v'; break;
329 case '\f': *tt++ = 'f'; break;
330 case '\t': *tt++ = 't'; break;
331 default: sprintf(CS tt, "%03o", *t); tt += 3; break;
332 }
333 t++;
334 }
335 }
336*tt = 0;
337return ss;
338}
339#endif /* COMPILE_UTILITY */
340
341/*************************************************
342* Undo printing escapes in string *
343*************************************************/
344
345/* This function is the reverse of string_printing2. It searches for
346backslash characters and if any are found, it makes a new copy of the
347string with escape sequences parsed. Otherwise it returns the original
348string.
349
350Arguments:
351 s the input string
352
353Returns: string with printing escapes parsed back
354*/
355
356uschar *
357string_unprinting(uschar *s)
358{
359uschar *p, *q, *r, *ss;
360int len, off;
361
362p = Ustrchr(s, '\\');
363if (!p) return s;
364
365len = Ustrlen(s) + 1;
366ss = store_get(len);
367
368q = ss;
369off = p - s;
370if (off)
371 {
372 memcpy(q, s, off);
373 q += off;
374 }
375
376while (*p)
377 {
378 if (*p == '\\')
379 {
380 *q++ = string_interpret_escape((const uschar **)&p);
381 p++;
382 }
383 else
384 {
385 r = Ustrchr(p, '\\');
386 if (!r)
387 {
388 off = Ustrlen(p);
389 memcpy(q, p, off);
390 p += off;
391 q += off;
392 break;
393 }
394 else
395 {
396 off = r - p;
397 memcpy(q, p, off);
398 q += off;
399 p = r;
400 }
401 }
402 }
403*q = '\0';
404
405return ss;
406}
407
408
409
410
411/*************************************************
412* Copy and save string *
413*************************************************/
414
415/* This function assumes that memcpy() is faster than strcpy().
416
417Argument: string to copy
418Returns: copy of string in new store
419*/
420
421uschar *
422string_copy(const uschar *s)
423{
424int len = Ustrlen(s) + 1;
425uschar *ss = store_get(len);
426memcpy(ss, s, len);
427return ss;
428}
429
430
431
432/*************************************************
433* Copy and save string in malloc'd store *
434*************************************************/
435
436/* This function assumes that memcpy() is faster than strcpy().
437
438Argument: string to copy
439Returns: copy of string in new store
440*/
441
442uschar *
443string_copy_malloc(const uschar *s)
444{
445int len = Ustrlen(s) + 1;
446uschar *ss = store_malloc(len);
447memcpy(ss, s, len);
448return ss;
449}
450
451
452
453/*************************************************
454* Copy, lowercase and save string *
455*************************************************/
456
457/*
458Argument: string to copy
459Returns: copy of string in new store, with letters lowercased
460*/
461
462uschar *
463string_copylc(const uschar *s)
464{
465uschar *ss = store_get(Ustrlen(s) + 1);
466uschar *p = ss;
467while (*s != 0) *p++ = tolower(*s++);
468*p = 0;
469return ss;
470}
471
472
473
474/*************************************************
475* Copy and save string, given length *
476*************************************************/
477
478/* It is assumed the data contains no zeros. A zero is added
479onto the end.
480
481Arguments:
482 s string to copy
483 n number of characters
484
485Returns: copy of string in new store
486*/
487
488uschar *
489string_copyn(const uschar *s, int n)
490{
491uschar *ss = store_get(n + 1);
492Ustrncpy(ss, s, n);
493ss[n] = 0;
494return ss;
495}
496
497
498/*************************************************
499* Copy, lowercase, and save string, given length *
500*************************************************/
501
502/* It is assumed the data contains no zeros. A zero is added
503onto the end.
504
505Arguments:
506 s string to copy
507 n number of characters
508
509Returns: copy of string in new store, with letters lowercased
510*/
511
512uschar *
513string_copynlc(uschar *s, int n)
514{
515uschar *ss = store_get(n + 1);
516uschar *p = ss;
517while (n-- > 0) *p++ = tolower(*s++);
518*p = 0;
519return ss;
520}
521
522
523
524/*************************************************
525* Copy string if long, inserting newlines *
526*************************************************/
527
528/* If the given string is longer than 75 characters, it is copied, and within
529the copy, certain space characters are converted into newlines.
530
531Argument: pointer to the string
532Returns: pointer to the possibly altered string
533*/
534
535uschar *
536string_split_message(uschar *msg)
537{
538uschar *s, *ss;
539
540if (msg == NULL || Ustrlen(msg) <= 75) return msg;
541s = ss = msg = string_copy(msg);
542
543for (;;)
544 {
545 int i = 0;
546 while (i < 75 && *ss != 0 && *ss != '\n') ss++, i++;
547 if (*ss == 0) break;
548 if (*ss == '\n')
549 s = ++ss;
550 else
551 {
552 uschar *t = ss + 1;
553 uschar *tt = NULL;
554 while (--t > s + 35)
555 {
556 if (*t == ' ')
557 {
558 if (t[-1] == ':') { tt = t; break; }
559 if (tt == NULL) tt = t;
560 }
561 }
562
563 if (tt == NULL) /* Can't split behind - try ahead */
564 {
565 t = ss + 1;
566 while (*t != 0)
567 {
568 if (*t == ' ' || *t == '\n')
569 { tt = t; break; }
570 t++;
571 }
572 }
573
574 if (tt == NULL) break; /* Can't find anywhere to split */
575 *tt = '\n';
576 s = ss = tt+1;
577 }
578 }
579
580return msg;
581}
582
583
584
585/*************************************************
586* Copy returned DNS domain name, de-escaping *
587*************************************************/
588
589/* If a domain name contains top-bit characters, some resolvers return
590the fully qualified name with those characters turned into escapes. The
591convention is a backslash followed by _decimal_ digits. We convert these
592back into the original binary values. This will be relevant when
593allow_utf8_domains is set true and UTF-8 characters are used in domain
594names. Backslash can also be used to escape other characters, though we
595shouldn't come across them in domain names.
596
597Argument: the domain name string
598Returns: copy of string in new store, de-escaped
599*/
600
601uschar *
602string_copy_dnsdomain(uschar *s)
603{
604uschar *yield;
605uschar *ss = yield = store_get(Ustrlen(s) + 1);
606
607while (*s != 0)
608 {
609 if (*s != '\\')
610 {
611 *ss++ = *s++;
612 }
613 else if (isdigit(s[1]))
614 {
615 *ss++ = (s[1] - '0')*100 + (s[2] - '0')*10 + s[3] - '0';
616 s += 4;
617 }
618 else if (*(++s) != 0)
619 {
620 *ss++ = *s++;
621 }
622 }
623
624*ss = 0;
625return yield;
626}
627
628
629#ifndef COMPILE_UTILITY
630/*************************************************
631* Copy space-terminated or quoted string *
632*************************************************/
633
634/* This function copies from a string until its end, or until whitespace is
635encountered, unless the string begins with a double quote, in which case the
636terminating quote is sought, and escaping within the string is done. The length
637of a de-quoted string can be no longer than the original, since escaping always
638turns n characters into 1 character.
639
640Argument: pointer to the pointer to the first character, which gets updated
641Returns: the new string
642*/
643
644uschar *
645string_dequote(const uschar **sptr)
646{
647const uschar *s = *sptr;
648uschar *t, *yield;
649
650/* First find the end of the string */
651
652if (*s != '\"')
653 {
654 while (*s != 0 && !isspace(*s)) s++;
655 }
656else
657 {
658 s++;
659 while (*s != 0 && *s != '\"')
660 {
661 if (*s == '\\') (void)string_interpret_escape(&s);
662 s++;
663 }
664 if (*s != 0) s++;
665 }
666
667/* Get enough store to copy into */
668
669t = yield = store_get(s - *sptr + 1);
670s = *sptr;
671
672/* Do the copy */
673
674if (*s != '\"')
675 {
676 while (*s != 0 && !isspace(*s)) *t++ = *s++;
677 }
678else
679 {
680 s++;
681 while (*s != 0 && *s != '\"')
682 {
683 if (*s == '\\') *t++ = string_interpret_escape(&s);
684 else *t++ = *s;
685 s++;
686 }
687 if (*s != 0) s++;
688 }
689
690/* Update the pointer and return the terminated copy */
691
692*sptr = s;
693*t = 0;
694return yield;
695}
696#endif /* COMPILE_UTILITY */
697
698
699
700/*************************************************
701* Format a string and save it *
702*************************************************/
703
704/* The formatting is done by string_format, which checks the length of
705everything.
706
707Arguments:
708 format a printf() format - deliberately char * rather than uschar *
709 because it will most usually be a literal string
710 ... arguments for format
711
712Returns: pointer to fresh piece of store containing sprintf'ed string
713*/
714
715uschar *
716string_sprintf(const char *format, ...)
717{
718va_list ap;
719uschar buffer[STRING_SPRINTF_BUFFER_SIZE];
720va_start(ap, format);
721if (!string_vformat(buffer, sizeof(buffer), format, ap))
722 log_write(0, LOG_MAIN|LOG_PANIC_DIE,
723 "string_sprintf expansion was longer than " SIZE_T_FMT
724 "; format string was (%s)\nexpansion started '%.32s'",
725 sizeof(buffer), format, buffer);
726va_end(ap);
727return string_copy(buffer);
728}
729
730
731
732/*************************************************
733* Case-independent strncmp() function *
734*************************************************/
735
736/*
737Arguments:
738 s first string
739 t second string
740 n number of characters to compare
741
742Returns: < 0, = 0, or > 0, according to the comparison
743*/
744
745int
746strncmpic(const uschar *s, const uschar *t, int n)
747{
748while (n--)
749 {
750 int c = tolower(*s++) - tolower(*t++);
751 if (c) return c;
752 }
753return 0;
754}
755
756
757/*************************************************
758* Case-independent strcmp() function *
759*************************************************/
760
761/*
762Arguments:
763 s first string
764 t second string
765
766Returns: < 0, = 0, or > 0, according to the comparison
767*/
768
769int
770strcmpic(const uschar *s, const uschar *t)
771{
772while (*s != 0)
773 {
774 int c = tolower(*s++) - tolower(*t++);
775 if (c != 0) return c;
776 }
777return *t;
778}
779
780
781/*************************************************
782* Case-independent strstr() function *
783*************************************************/
784
785/* The third argument specifies whether whitespace is required
786to follow the matched string.
787
788Arguments:
789 s string to search
790 t substring to search for
791 space_follows if TRUE, match only if whitespace follows
792
793Returns: pointer to substring in string, or NULL if not found
794*/
795
796uschar *
797strstric(uschar *s, uschar *t, BOOL space_follows)
798{
799uschar *p = t;
800uschar *yield = NULL;
801int cl = tolower(*p);
802int cu = toupper(*p);
803
804while (*s)
805 {
806 if (*s == cl || *s == cu)
807 {
808 if (yield == NULL) yield = s;
809 if (*(++p) == 0)
810 {
811 if (!space_follows || s[1] == ' ' || s[1] == '\n' ) return yield;
812 yield = NULL;
813 p = t;
814 }
815 cl = tolower(*p);
816 cu = toupper(*p);
817 s++;
818 }
819 else if (yield != NULL)
820 {
821 yield = NULL;
822 p = t;
823 cl = tolower(*p);
824 cu = toupper(*p);
825 }
826 else s++;
827 }
828return NULL;
829}
830
831
832
833#ifndef COMPILE_UTILITY
834/*************************************************
835* Get next string from separated list *
836*************************************************/
837
838/* Leading and trailing space is removed from each item. The separator in the
839list is controlled by the int pointed to by the separator argument as follows:
840
841 If the value is > 0 it is used as the separator. This is typically used for
842 sublists such as slash-separated options. The value is always a printing
843 character.
844
845 (If the value is actually > UCHAR_MAX there is only one item in the list.
846 This is used for some cases when called via functions that sometimes
847 plough through lists, and sometimes are given single items.)
848
849 If the value is <= 0, the string is inspected for a leading <x, where x is an
850 ispunct() or an iscntrl() character. If found, x is used as the separator. If
851 not found:
852
853 (a) if separator == 0, ':' is used
854 (b) if separator <0, -separator is used
855
856 In all cases the value of the separator that is used is written back to the
857 int so that it is used on subsequent calls as we progress through the list.
858
859A literal ispunct() separator can be represented in an item by doubling, but
860there is no way to include an iscntrl() separator as part of the data.
861
862Arguments:
863 listptr points to a pointer to the current start of the list; the
864 pointer gets updated to point after the end of the next item
865 separator a pointer to the separator character in an int (see above)
866 buffer where to put a copy of the next string in the list; or
867 NULL if the next string is returned in new memory
868 buflen when buffer is not NULL, the size of buffer; otherwise ignored
869
870Returns: pointer to buffer, containing the next substring,
871 or NULL if no more substrings
872*/
873
874uschar *
875string_nextinlist(const uschar **listptr, int *separator, uschar *buffer, int buflen)
876{
877int sep = *separator;
878const uschar *s = *listptr;
879BOOL sep_is_special;
880
881if (s == NULL) return NULL;
882
883/* This allows for a fixed specified separator to be an iscntrl() character,
884but at the time of implementation, this is never the case. However, it's best
885to be conservative. */
886
887while (isspace(*s) && *s != sep) s++;
888
889/* A change of separator is permitted, so look for a leading '<' followed by an
890allowed character. */
891
892if (sep <= 0)
893 {
894 if (*s == '<' && (ispunct(s[1]) || iscntrl(s[1])))
895 {
896 sep = s[1];
897 s += 2;
898 while (isspace(*s) && *s != sep) s++;
899 }
900 else
901 {
902 sep = (sep == 0)? ':' : -sep;
903 }
904 *separator = sep;
905 }
906
907/* An empty string has no list elements */
908
909if (*s == 0) return NULL;
910
911/* Note whether whether or not the separator is an iscntrl() character. */
912
913sep_is_special = iscntrl(sep);
914
915/* Handle the case when a buffer is provided. */
916
917if (buffer)
918 {
919 int p = 0;
920 for (; *s != 0; s++)
921 {
922 if (*s == sep && (*(++s) != sep || sep_is_special)) break;
923 if (p < buflen - 1) buffer[p++] = *s;
924 }
925 while (p > 0 && isspace(buffer[p-1])) p--;
926 buffer[p] = 0;
927 }
928
929/* Handle the case when a buffer is not provided. */
930
931else
932 {
933 const uschar *ss;
934 gstring * g = NULL;
935
936 /* We know that *s != 0 at this point. However, it might be pointing to a
937 separator, which could indicate an empty string, or (if an ispunct()
938 character) could be doubled to indicate a separator character as data at the
939 start of a string. Avoid getting working memory for an empty item. */
940
941 if (*s == sep)
942 {
943 s++;
944 if (*s != sep || sep_is_special)
945 {
946 *listptr = s;
947 return string_copy(US"");
948 }
949 }
950
951 /* Not an empty string; the first character is guaranteed to be a data
952 character. */
953
954 for (;;)
955 {
956 for (ss = s + 1; *ss != 0 && *ss != sep; ss++) ;
957 g = string_catn(g, s, ss-s);
958 s = ss;
959 if (*s == 0 || *(++s) != sep || sep_is_special) break;
960 }
961 while (g->ptr > 0 && isspace(g->s[g->ptr-1])) g->ptr--;
962 buffer = string_from_gstring(g);
963 gstring_reset_unused(g);
964 }
965
966/* Update the current pointer and return the new string */
967
968*listptr = s;
969return buffer;
970}
971
972
973static const uschar *
974Ustrnchr(const uschar * s, int c, unsigned * len)
975{
976unsigned siz = *len;
977while (siz)
978 {
979 if (!*s) return NULL;
980 if (*s == c)
981 {
982 *len = siz;
983 return s;
984 }
985 s++;
986 siz--;
987 }
988return NULL;
989}
990
991
992/************************************************
993* Add element to separated list *
994************************************************/
995/* This function is used to build a list, returning an allocated null-terminated
996growable string. The given element has any embedded separator characters
997doubled.
998
999Despite having the same growable-string interface as string_cat() the list is
1000always returned null-terminated.
1001
1002Arguments:
1003 list expanding-string for the list that is being built, or NULL
1004 if this is a new list that has no contents yet
1005 sep list separator character
1006 ele new element to be appended to the list
1007
1008Returns: pointer to the start of the list, changed if copied for expansion.
1009*/
1010
1011gstring *
1012string_append_listele(gstring * list, uschar sep, const uschar * ele)
1013{
1014uschar * sp;
1015
1016if (list && list->ptr)
1017 list = string_catn(list, &sep, 1);
1018
1019while((sp = Ustrchr(ele, sep)))
1020 {
1021 list = string_catn(list, ele, sp-ele+1);
1022 list = string_catn(list, &sep, 1);
1023 ele = sp+1;
1024 }
1025list = string_cat(list, ele);
1026(void) string_from_gstring(list);
1027return list;
1028}
1029
1030
1031gstring *
1032string_append_listele_n(gstring * list, uschar sep, const uschar * ele,
1033 unsigned len)
1034{
1035const uschar * sp;
1036
1037if (list && list->ptr)
1038 list = string_catn(list, &sep, 1);
1039
1040while((sp = Ustrnchr(ele, sep, &len)))
1041 {
1042 list = string_catn(list, ele, sp-ele+1);
1043 list = string_catn(list, &sep, 1);
1044 ele = sp+1;
1045 len--;
1046 }
1047list = string_catn(list, ele, len);
1048(void) string_from_gstring(list);
1049return list;
1050}
1051
1052
1053
1054/************************************************/
1055/* Create a growable-string with some preassigned space */
1056
1057gstring *
1058string_get(unsigned size)
1059{
1060gstring * g = store_get(sizeof(gstring) + size);
1061g->size = size;
1062g->ptr = 0;
1063g->s = US(g + 1);
1064return g;
1065}
1066
1067/* NUL-terminate the C string in the growable-string, and return it. */
1068
1069uschar *
1070string_from_gstring(gstring * g)
1071{
1072if (!g) return NULL;
1073g->s[g->ptr] = '\0';
1074return g->s;
1075}
1076
1077void
1078gstring_reset_unused(gstring * g)
1079{
1080store_reset(g->s + (g->size = g->ptr + 1));
1081}
1082
1083/*************************************************
1084* Add chars to string *
1085*************************************************/
1086
1087/* Arguments:
1088 g the grawable-string
1089 p current end of data
1090 count amount to grow by
1091*/
1092
1093static void
1094gstring_grow(gstring * g, int p, int count)
1095{
1096int oldsize = g->size;
1097
1098/* Mostly, string_cat() is used to build small strings of a few hundred
1099characters at most. There are times, however, when the strings are very much
1100longer (for example, a lookup that returns a vast number of alias addresses).
1101To try to keep things reasonable, we use increments whose size depends on the
1102existing length of the string. */
1103
1104unsigned inc = oldsize < 4096 ? 127 : 1023;
1105g->size = ((p + count + inc) & ~inc) + 1;
1106
1107/* Try to extend an existing allocation. If the result of calling
1108store_extend() is false, either there isn't room in the current memory block,
1109or this string is not the top item on the dynamic store stack. We then have
1110to get a new chunk of store and copy the old string. When building large
1111strings, it is helpful to call store_release() on the old string, to release
1112memory blocks that have become empty. (The block will be freed if the string
1113is at its start.) However, we can do this only if we know that the old string
1114was the last item on the dynamic memory stack. This is the case if it matches
1115store_last_get. */
1116
1117if (!store_extend(g->s, oldsize, g->size))
1118 g->s = store_newblock(g->s, g->size, p);
1119}
1120
1121
1122
1123/* This function is used when building up strings of unknown length. Room is
1124always left for a terminating zero to be added to the string that is being
1125built. This function does not require the string that is being added to be NUL
1126terminated, because the number of characters to add is given explicitly. It is
1127sometimes called to extract parts of other strings.
1128
1129Arguments:
1130 string points to the start of the string that is being built, or NULL
1131 if this is a new string that has no contents yet
1132 s points to characters to add
1133 count count of characters to add; must not exceed the length of s, if s
1134 is a C string.
1135
1136Returns: pointer to the start of the string, changed if copied for expansion.
1137 Note that a NUL is not added, though space is left for one. This is
1138 because string_cat() is often called multiple times to build up a
1139 string - there's no point adding the NUL till the end.
1140
1141*/
1142/* coverity[+alloc] */
1143
1144gstring *
1145string_catn(gstring * g, const uschar *s, int count)
1146{
1147int p;
1148
1149if (!g)
1150 {
1151 unsigned inc = count < 4096 ? 127 : 1023;
1152 unsigned size = ((count + inc) & ~inc) + 1;
1153 g = string_get(size);
1154 }
1155
1156p = g->ptr;
1157if (p + count >= g->size)
1158 gstring_grow(g, p, count);
1159
1160/* Because we always specify the exact number of characters to copy, we can
1161use memcpy(), which is likely to be more efficient than strncopy() because the
1162latter has to check for zero bytes. */
1163
1164memcpy(g->s + p, s, count);
1165g->ptr = p + count;
1166return g;
1167}
1168
1169
1170gstring *
1171string_cat(gstring *string, const uschar *s)
1172{
1173return string_catn(string, s, Ustrlen(s));
1174}
1175
1176
1177
1178/*************************************************
1179* Append strings to another string *
1180*************************************************/
1181
1182/* This function can be used to build a string from many other strings.
1183It calls string_cat() to do the dirty work.
1184
1185Arguments:
1186 string expanding-string that is being built, or NULL
1187 if this is a new string that has no contents yet
1188 count the number of strings to append
1189 ... "count" uschar* arguments, which must be valid zero-terminated
1190 C strings
1191
1192Returns: pointer to the start of the string, changed if copied for expansion.
1193 The string is not zero-terminated - see string_cat() above.
1194*/
1195
1196__inline__ gstring *
1197string_append(gstring *string, int count, ...)
1198{
1199va_list ap;
1200
1201va_start(ap, count);
1202while (count-- > 0)
1203 {
1204 uschar *t = va_arg(ap, uschar *);
1205 string = string_cat(string, t);
1206 }
1207va_end(ap);
1208
1209return string;
1210}
1211#endif
1212
1213
1214
1215/*************************************************
1216* Format a string with length checks *
1217*************************************************/
1218
1219/* This function is used to format a string with checking of the length of the
1220output for all conversions. It protects Exim from absent-mindedness when
1221calling functions like debug_printf and string_sprintf, and elsewhere. There
1222are two different entry points to what is actually the same function, depending
1223on whether the variable length list of data arguments are given explicitly or
1224as a va_list item.
1225
1226The formats are the usual printf() ones, with some omissions (never used) and
1227three additions for strings: %S forces lower case, %T forces upper case, and
1228%#s or %#S prints nothing for a NULL string. Without the # "NULL" is printed
1229(useful in debugging). There is also the addition of %D and %M, which insert
1230the date in the form used for datestamped log files.
1231
1232Arguments:
1233 buffer a buffer in which to put the formatted string
1234 buflen the length of the buffer
1235 format the format string - deliberately char * and not uschar *
1236 ... or ap variable list of supplementary arguments
1237
1238Returns: TRUE if the result fitted in the buffer
1239*/
1240
1241BOOL
1242string_format(uschar *buffer, int buflen, const char *format, ...)
1243{
1244BOOL yield;
1245va_list ap;
1246va_start(ap, format);
1247yield = string_vformat(buffer, buflen, format, ap);
1248va_end(ap);
1249return yield;
1250}
1251
1252
1253BOOL
1254string_vformat(uschar *buffer, int buflen, const char *format, va_list ap)
1255{
1256/* We assume numbered ascending order, C does not guarantee that */
1257enum { L_NORMAL=1, L_SHORT=2, L_LONG=3, L_LONGLONG=4, L_LONGDOUBLE=5, L_SIZE=6 };
1258
1259BOOL yield = TRUE;
1260int width, precision;
1261const char *fp = format; /* Deliberately not unsigned */
1262uschar *p = buffer;
1263uschar *last = buffer + buflen - 1;
1264
1265string_datestamp_offset = -1; /* Datestamp not inserted */
1266string_datestamp_length = 0; /* Datestamp not inserted */
1267string_datestamp_type = 0; /* Datestamp not inserted */
1268
1269/* Scan the format and handle the insertions */
1270
1271while (*fp != 0)
1272 {
1273 int length = L_NORMAL;
1274 int *nptr;
1275 int slen;
1276 const char *null = "NULL"; /* ) These variables */
1277 const char *item_start, *s; /* ) are deliberately */
1278 char newformat[16]; /* ) not unsigned */
1279
1280 /* Non-% characters just get copied verbatim */
1281
1282 if (*fp != '%')
1283 {
1284 if (p >= last) { yield = FALSE; break; }
1285 *p++ = (uschar)*fp++;
1286 continue;
1287 }
1288
1289 /* Deal with % characters. Pick off the width and precision, for checking
1290 strings, skipping over the flag and modifier characters. */
1291
1292 item_start = fp;
1293 width = precision = -1;
1294
1295 if (strchr("-+ #0", *(++fp)) != NULL)
1296 {
1297 if (*fp == '#') null = "";
1298 fp++;
1299 }
1300
1301 if (isdigit((uschar)*fp))
1302 {
1303 width = *fp++ - '0';
1304 while (isdigit((uschar)*fp)) width = width * 10 + *fp++ - '0';
1305 }
1306 else if (*fp == '*')
1307 {
1308 width = va_arg(ap, int);
1309 fp++;
1310 }
1311
1312 if (*fp == '.')
1313 {
1314 if (*(++fp) == '*')
1315 {
1316 precision = va_arg(ap, int);
1317 fp++;
1318 }
1319 else
1320 {
1321 precision = 0;
1322 while (isdigit((uschar)*fp))
1323 precision = precision*10 + *fp++ - '0';
1324 }
1325 }
1326
1327 /* Skip over 'h', 'L', 'l', 'll' and 'z', remembering the item length */
1328
1329 if (*fp == 'h')
1330 { fp++; length = L_SHORT; }
1331 else if (*fp == 'L')
1332 { fp++; length = L_LONGDOUBLE; }
1333 else if (*fp == 'l')
1334 {
1335 if (fp[1] == 'l')
1336 {
1337 fp += 2;
1338 length = L_LONGLONG;
1339 }
1340 else
1341 {
1342 fp++;
1343 length = L_LONG;
1344 }
1345 }
1346 else if (*fp == 'z')
1347 { fp++; length = L_SIZE; }
1348
1349 /* Handle each specific format type. */
1350
1351 switch (*fp++)
1352 {
1353 case 'n':
1354 nptr = va_arg(ap, int *);
1355 *nptr = p - buffer;
1356 break;
1357
1358 case 'd':
1359 case 'o':
1360 case 'u':
1361 case 'x':
1362 case 'X':
1363 if (p >= last - ((length > L_LONG)? 24 : 12))
1364 { yield = FALSE; goto END_FORMAT; }
1365 strncpy(newformat, item_start, fp - item_start);
1366 newformat[fp - item_start] = 0;
1367
1368 /* Short int is promoted to int when passing through ..., so we must use
1369 int for va_arg(). */
1370
1371 switch(length)
1372 {
1373 case L_SHORT:
1374 case L_NORMAL: p += sprintf(CS p, newformat, va_arg(ap, int)); break;
1375 case L_LONG: p += sprintf(CS p, newformat, va_arg(ap, long int)); break;
1376 case L_LONGLONG: p += sprintf(CS p, newformat, va_arg(ap, LONGLONG_T)); break;
1377 case L_SIZE: p += sprintf(CS p, newformat, va_arg(ap, size_t)); break;
1378 }
1379 break;
1380
1381 case 'p':
1382 {
1383 void * ptr;
1384 if (p >= last - 24) { yield = FALSE; goto END_FORMAT; }
1385 /* sprintf() saying "(nil)" for a null pointer seems unreliable.
1386 Handle it explicitly. */
1387 if ((ptr = va_arg(ap, void *)))
1388 {
1389 strncpy(newformat, item_start, fp - item_start);
1390 newformat[fp - item_start] = 0;
1391 p += sprintf(CS p, newformat, ptr);
1392 }
1393 else
1394 p += sprintf(CS p, "(nil)");
1395 }
1396 break;
1397
1398 /* %f format is inherently insecure if the numbers that it may be
1399 handed are unknown (e.g. 1e300). However, in Exim, %f is used for
1400 printing load averages, and these are actually stored as integers
1401 (load average * 1000) so the size of the numbers is constrained.
1402 It is also used for formatting sending rates, where the simplicity
1403 of the format prevents overflow. */
1404
1405 case 'f':
1406 case 'e':
1407 case 'E':
1408 case 'g':
1409 case 'G':
1410 if (precision < 0) precision = 6;
1411 if (p >= last - precision - 8) { yield = FALSE; goto END_FORMAT; }
1412 strncpy(newformat, item_start, fp - item_start);
1413 newformat[fp-item_start] = 0;
1414 if (length == L_LONGDOUBLE)
1415 p += sprintf(CS p, newformat, va_arg(ap, long double));
1416 else
1417 p += sprintf(CS p, newformat, va_arg(ap, double));
1418 break;
1419
1420 /* String types */
1421
1422 case '%':
1423 if (p >= last) { yield = FALSE; goto END_FORMAT; }
1424 *p++ = '%';
1425 break;
1426
1427 case 'c':
1428 if (p >= last) { yield = FALSE; goto END_FORMAT; }
1429 *p++ = va_arg(ap, int);
1430 break;
1431
1432 case 'D': /* Insert daily datestamp for log file names */
1433 s = CS tod_stamp(tod_log_datestamp_daily);
1434 string_datestamp_offset = p - buffer; /* Passed back via global */
1435 string_datestamp_length = Ustrlen(s); /* Passed back via global */
1436 string_datestamp_type = tod_log_datestamp_daily;
1437 slen = string_datestamp_length;
1438 goto INSERT_STRING;
1439
1440 case 'M': /* Insert monthly datestamp for log file names */
1441 s = CS tod_stamp(tod_log_datestamp_monthly);
1442 string_datestamp_offset = p - buffer; /* Passed back via global */
1443 string_datestamp_length = Ustrlen(s); /* Passed back via global */
1444 string_datestamp_type = tod_log_datestamp_monthly;
1445 slen = string_datestamp_length;
1446 goto INSERT_STRING;
1447
1448 case 's':
1449 case 'S': /* Forces *lower* case */
1450 case 'T': /* Forces *upper* case */
1451 s = va_arg(ap, char *);
1452
1453 if (s == NULL) s = null;
1454 slen = Ustrlen(s);
1455
1456 INSERT_STRING: /* Come to from %D or %M above */
1457
1458 /* If the width is specified, check that there is a precision
1459 set; if not, set it to the width to prevent overruns of long
1460 strings. */
1461
1462 if (width >= 0)
1463 {
1464 if (precision < 0) precision = width;
1465 }
1466
1467 /* If a width is not specified and the precision is specified, set
1468 the width to the precision, or the string length if shorted. */
1469
1470 else if (precision >= 0)
1471 {
1472 width = (precision < slen)? precision : slen;
1473 }
1474
1475 /* If neither are specified, set them both to the string length. */
1476
1477 else width = precision = slen;
1478
1479 /* Check string space, and add the string to the buffer if ok. If
1480 not OK, add part of the string (debugging uses this to show as
1481 much as possible). */
1482
1483 if (p == last)
1484 {
1485 yield = FALSE;
1486 goto END_FORMAT;
1487 }
1488 if (p >= last - width)
1489 {
1490 yield = FALSE;
1491 width = precision = last - p - 1;
1492 if (width < 0) width = 0;
1493 if (precision < 0) precision = 0;
1494 }
1495 sprintf(CS p, "%*.*s", width, precision, s);
1496 if (fp[-1] == 'S')
1497 while (*p) { *p = tolower(*p); p++; }
1498 else if (fp[-1] == 'T')
1499 while (*p) { *p = toupper(*p); p++; }
1500 else
1501 while (*p) p++;
1502 if (!yield) goto END_FORMAT;
1503 break;
1504
1505 /* Some things are never used in Exim; also catches junk. */
1506
1507 default:
1508 strncpy(newformat, item_start, fp - item_start);
1509 newformat[fp-item_start] = 0;
1510 log_write(0, LOG_MAIN|LOG_PANIC_DIE, "string_format: unsupported type "
1511 "in \"%s\" in \"%s\"", newformat, format);
1512 break;
1513 }
1514 }
1515
1516/* Ensure string is complete; return TRUE if got to the end of the format */
1517
1518END_FORMAT:
1519
1520*p = 0;
1521return yield;
1522}
1523
1524
1525
1526#ifndef COMPILE_UTILITY
1527/*************************************************
1528* Generate an "open failed" message *
1529*************************************************/
1530
1531/* This function creates a message after failure to open a file. It includes a
1532string supplied as data, adds the strerror() text, and if the failure was
1533"Permission denied", reads and includes the euid and egid.
1534
1535Arguments:
1536 eno the value of errno after the failure
1537 format a text format string - deliberately not uschar *
1538 ... arguments for the format string
1539
1540Returns: a message, in dynamic store
1541*/
1542
1543uschar *
1544string_open_failed(int eno, const char *format, ...)
1545{
1546va_list ap;
1547uschar buffer[1024];
1548
1549Ustrcpy(buffer, "failed to open ");
1550va_start(ap, format);
1551
1552/* Use the checked formatting routine to ensure that the buffer
1553does not overflow. It should not, since this is called only for internally
1554specified messages. If it does, the message just gets truncated, and there
1555doesn't seem much we can do about that. */
1556
1557(void)string_vformat(buffer+15, sizeof(buffer) - 15, format, ap);
1558va_end(ap);
1559
1560return (eno == EACCES)?
1561 string_sprintf("%s: %s (euid=%ld egid=%ld)", buffer, strerror(eno),
1562 (long int)geteuid(), (long int)getegid()) :
1563 string_sprintf("%s: %s", buffer, strerror(eno));
1564}
1565#endif /* COMPILE_UTILITY */
1566
1567
1568
1569
1570
1571#ifndef COMPILE_UTILITY
1572/* qsort(3), currently used to sort the environment variables
1573for -bP environment output, needs a function to compare two pointers to string
1574pointers. Here it is. */
1575
1576int
1577string_compare_by_pointer(const void *a, const void *b)
1578{
1579return Ustrcmp(* CUSS a, * CUSS b);
1580}
1581#endif /* COMPILE_UTILITY */
1582
1583
1584
1585/*************************************************
1586**************************************************
1587* Stand-alone test program *
1588**************************************************
1589*************************************************/
1590
1591#ifdef STAND_ALONE
1592int main(void)
1593{
1594uschar buffer[256];
1595
1596printf("Testing is_ip_address\n");
1597
1598while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1599 {
1600 int offset;
1601 buffer[Ustrlen(buffer) - 1] = 0;
1602 printf("%d\n", string_is_ip_address(buffer, NULL));
1603 printf("%d %d %s\n", string_is_ip_address(buffer, &offset), offset, buffer);
1604 }
1605
1606printf("Testing string_nextinlist\n");
1607
1608while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1609 {
1610 uschar *list = buffer;
1611 uschar *lp1, *lp2;
1612 uschar item[256];
1613 int sep1 = 0;
1614 int sep2 = 0;
1615
1616 if (*list == '<')
1617 {
1618 sep1 = sep2 = list[1];
1619 list += 2;
1620 }
1621
1622 lp1 = lp2 = list;
1623 for (;;)
1624 {
1625 uschar *item1 = string_nextinlist(&lp1, &sep1, item, sizeof(item));
1626 uschar *item2 = string_nextinlist(&lp2, &sep2, NULL, 0);
1627
1628 if (item1 == NULL && item2 == NULL) break;
1629 if (item == NULL || item2 == NULL || Ustrcmp(item1, item2) != 0)
1630 {
1631 printf("***ERROR\nitem1=\"%s\"\nitem2=\"%s\"\n",
1632 (item1 == NULL)? "NULL" : CS item1,
1633 (item2 == NULL)? "NULL" : CS item2);
1634 break;
1635 }
1636 else printf(" \"%s\"\n", CS item1);
1637 }
1638 }
1639
1640/* This is a horrible lash-up, but it serves its purpose. */
1641
1642printf("Testing string_format\n");
1643
1644while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1645 {
1646 void *args[3];
1647 long long llargs[3];
1648 double dargs[3];
1649 int dflag = 0;
1650 int llflag = 0;
1651 int n = 0;
1652 int count;
1653 int countset = 0;
1654 uschar format[256];
1655 uschar outbuf[256];
1656 uschar *s;
1657 buffer[Ustrlen(buffer) - 1] = 0;
1658
1659 s = Ustrchr(buffer, ',');
1660 if (s == NULL) s = buffer + Ustrlen(buffer);
1661
1662 Ustrncpy(format, buffer, s - buffer);
1663 format[s-buffer] = 0;
1664
1665 if (*s == ',') s++;
1666
1667 while (*s != 0)
1668 {
1669 uschar *ss = s;
1670 s = Ustrchr(ss, ',');
1671 if (s == NULL) s = ss + Ustrlen(ss);
1672
1673 if (isdigit(*ss))
1674 {
1675 Ustrncpy(outbuf, ss, s-ss);
1676 if (Ustrchr(outbuf, '.') != NULL)
1677 {
1678 dflag = 1;
1679 dargs[n++] = Ustrtod(outbuf, NULL);
1680 }
1681 else if (Ustrstr(outbuf, "ll") != NULL)
1682 {
1683 llflag = 1;
1684 llargs[n++] = strtoull(CS outbuf, NULL, 10);
1685 }
1686 else
1687 {
1688 args[n++] = (void *)Uatoi(outbuf);
1689 }
1690 }
1691
1692 else if (Ustrcmp(ss, "*") == 0)
1693 {
1694 args[n++] = (void *)(&count);
1695 countset = 1;
1696 }
1697
1698 else
1699 {
1700 uschar *sss = malloc(s - ss + 1);
1701 Ustrncpy(sss, ss, s-ss);
1702 args[n++] = sss;
1703 }
1704
1705 if (*s == ',') s++;
1706 }
1707
1708 if (!dflag && !llflag)
1709 printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1710 args[0], args[1], args[2])? "True" : "False");
1711
1712 else if (dflag)
1713 printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1714 dargs[0], dargs[1], dargs[2])? "True" : "False");
1715
1716 else printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1717 llargs[0], llargs[1], llargs[2])? "True" : "False");
1718
1719 printf("%s\n", CS outbuf);
1720 if (countset) printf("count=%d\n", count);
1721 }
1722
1723return 0;
1724}
1725#endif
1726
1727/* End of string.c */