tidying
[exim.git] / src / util / renew-opendmarc-tlds.sh
1 #!/bin/sh -eu
2 #
3 # Short version of this script:
4 # curl -f -o /var/cache/exim/opendmarc.tlds https://publicsuffix.org/list/public_suffix_list.dat
5 # but run as Exim runtime user, writing to a place it can write to, and with
6 # sanity checks and atomic replacement.
7 #
8 # For now, we deliberately leave the invalid file around for analysis
9 # with .<pid> suffix.
10 #
11 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~8< cut here >8~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
12 #
13 # Create a cron-job as the Exim run-time user to invoke this daily, with a
14 # single parameter, 'cron'. Eg:
15 #
16 # 3 4 * * * /usr/local/sbin/renew-opendmarc-tlds.sh cron
17 #
18 # That will, at 3 minutes past the 4th hour (in whatever timezone cron is
19 # running it) invoke this script with 'cron'; we will then sleep between 10 and
20 # 50 seconds, before continuing.
21 #
22 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~8< cut here >8~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
23 #
24 # This should be "pretty portable"; the only things it depends upon are:
25 # * a POSIX shell which additionally implements 'local' (dash works)
26 # * the 'curl' command; change the fetch_candidate() function to replace that
27 # * the 'stat' command, to get the size of a file; else Perl
28 # + change size_of() if need be; it's defined per-OS
29 # * the 'hexdump' command and /dev/urandom existing
30 # + used when invoked with 'cron', to avoid retrieving on a minute boundary
31 # and contending with many other automated systems.
32 # + with bash/zsh, can replace with: $(( 10 + ( RANDOM % 40 ) ))
33 # + on Debian/Ubuntu systems, hexdump is in the 'bsdmainutils' package.
34
35 # Consider putting an email address inside the parentheses, something like
36 # noc@example.org or other reachable address, so that if something goes wrong
37 # and the server operators need to step in, they can see from logs who to
38 # contact instead of just blocking your IP:
39 readonly CurlUserAgent='renew-opendmarc-tlds/0.1 (distributed with Exim)'
40
41 # change this to your Exim run-time user (exim -n -bP exim_user) :
42 readonly RuntimeUser='_exim'
43
44 # Do not make this a directory which untrusted users can write to:
45 readonly StateDir='/var/cache/exim'
46
47 readonly URL='https://publicsuffix.org/list/public_suffix_list.dat'
48
49 readonly TargetShortFile='opendmarc.tlds'
50
51 # When replacing, new file must be at least this percentage the size of
52 # the old one or it's an error:
53 readonly MinNewSizeRation=90
54
55 # Each of these regexps must be matched by the file, or it's an error:
56 readonly MustExistRegexps='
57 ^ac\.uk$
58 ^org$
59 ^tech$
60 '
61
62 # =======================8< end of configuration >8=======================
63
64 set -eu
65
66 readonly FullTargetPath="${StateDir}/${TargetShortFile}"
67 readonly WorkingFile="${FullTargetPath}.$$"
68
69 progname="$(basename "$0")"
70 note() { printf >&2 '%s: %s\n' "$progname" "$*"; }
71 die() { note "$@"; exit 1; }
72
73 # guard against stomping on file-permissions
74 [ ".$(id -un)" = ".${RuntimeUser:?}" ] || \
75 die "must be invoked as ${RuntimeUser}"
76
77 fetch_candidate() {
78 curl --user-agent "$CurlUserAgent" -fSs -o "${WorkingFile}" "${URL}"
79 }
80
81 case $(uname -s) in
82 *BSD|Darwin)
83 size_of() { stat -f %z "$1"; }
84 ;;
85 Linux)
86 size_of() { stat -c %s "$1"; }
87 ;;
88 *)
89 # why do we live in a world where Perl is the safe portable solution
90 # to getting the size of a file?
91 size_of() { perl -le 'print((stat($ARGV[0]))[7])' -- "$1"; }
92 ;;
93 esac
94
95 sanity_check_candidate() {
96 local new_size prev_size re
97 new_size="$(size_of "$WorkingFile")"
98
99 for re in $MustExistRegexps; do
100 grep -qs "$re" -- "$WorkingFile" || \
101 die "regexp $re not found in $WorkingFile"
102 done
103
104 if ! prev_size="$(size_of "$FullTargetPath")"; then
105 note "missing previous file, can't size-compare: $FullTargetPath"
106 # We're sane by definition, probably initial fetch, and the
107 # stat failure and this note will be printed. That's fine; if
108 # a cron invocation is missing the file then something has gone
109 # badly wrong.
110 return 0
111 fi
112 local ratio
113 ratio=$(expr $new_size \* 100 / $prev_size)
114 if [ $ratio -lt $MinNewSizeRation ]; then
115 die "New $TargetShortFile candidate only ${ratio}% size of old; $new_size vs $prev_size"
116 fi
117 }
118
119 if [ "${1:-.}" = "cron" ]; then
120 shift
121 # Don't pull on-the-minute, wait for off-cycle-peak
122 sleep $(( ($(dd if=/dev/urandom bs=1 count=1 2>/dev/null | hexdump -e '1/1 "%u"') % 40) + 10))
123 fi
124
125 umask 022
126 fetch_candidate
127 sanity_check_candidate
128 mv -- "$WorkingFile" "$FullTargetPath"