Commit | Line | Data |
---|---|---|
ddd16464 PP |
1 | #!/bin/sh -eu |
2 | # | |
3 | # Short version of this script: | |
4 | # curl -f -o /var/cache/exim/opendmarc.tlds https://publicsuffix.org/list/public_suffix_list.dat | |
5 | # but run as Exim runtime user, writing to a place it can write to, and with | |
6 | # sanity checks and atomic replacement. | |
7 | # | |
8 | # For now, we deliberately leave the invalid file around for analysis | |
9 | # with .<pid> suffix. | |
10 | # | |
11 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~8< cut here >8~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
12 | # | |
13 | # Create a cron-job as the Exim run-time user to invoke this daily, with a | |
14 | # single parameter, 'cron'. Eg: | |
15 | # | |
7a128512 | 16 | # 3 4 * * * /usr/local/sbin/renew-opendmarc-tlds.sh cron |
ddd16464 PP |
17 | # |
18 | # That will, at 3 minutes past the 4th hour (in whatever timezone cron is | |
19 | # running it) invoke this script with 'cron'; we will then sleep between 10 and | |
20 | # 50 seconds, before continuing. | |
21 | # | |
22 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~8< cut here >8~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
23 | # | |
24 | # This should be "pretty portable"; the only things it depends upon are: | |
25 | # * a POSIX shell which additionally implements 'local' (dash works) | |
26 | # * the 'curl' command; change the fetch_candidate() function to replace that | |
e5c15571 PP |
27 | # * the 'stat' command, to get the size of a file; else Perl |
28 | # + change size_of() if need be; it's defined per-OS | |
ddd16464 PP |
29 | # * the 'hexdump' command and /dev/urandom existing |
30 | # + used when invoked with 'cron', to avoid retrieving on a minute boundary | |
31 | # and contending with many other automated systems. | |
32 | # + with bash/zsh, can replace with: $(( 10 + ( RANDOM % 40 ) )) | |
33 | # + on Debian/Ubuntu systems, hexdump is in the 'bsdmainutils' package. | |
34 | ||
35 | # Consider putting an email address inside the parentheses, something like | |
36 | # noc@example.org or other reachable address, so that if something goes wrong | |
37 | # and the server operators need to step in, they can see from logs who to | |
38 | # contact instead of just blocking your IP: | |
39 | readonly CurlUserAgent='renew-opendmarc-tlds/0.1 (distributed with Exim)' | |
40 | ||
41 | # change this to your Exim run-time user (exim -n -bP exim_user) : | |
42 | readonly RuntimeUser='_exim' | |
43 | ||
44 | # Do not make this a directory which untrusted users can write to: | |
45 | readonly StateDir='/var/cache/exim' | |
46 | ||
47 | readonly URL='https://publicsuffix.org/list/public_suffix_list.dat' | |
48 | ||
49 | readonly TargetShortFile='opendmarc.tlds' | |
50 | ||
51 | # When replacing, new file must be at least this percentage the size of | |
52 | # the old one or it's an error: | |
53 | readonly MinNewSizeRation=90 | |
54 | ||
55 | # Each of these regexps must be matched by the file, or it's an error: | |
56 | readonly MustExistRegexps=' | |
57 | ^ac\.uk$ | |
58 | ^org$ | |
59 | ^tech$ | |
60 | ' | |
61 | ||
62 | # =======================8< end of configuration >8======================= | |
63 | ||
64 | set -eu | |
65 | ||
66 | readonly FullTargetPath="${StateDir}/${TargetShortFile}" | |
67 | readonly WorkingFile="${FullTargetPath}.$$" | |
68 | ||
69 | progname="$(basename "$0")" | |
70 | note() { printf >&2 '%s: %s\n' "$progname" "$*"; } | |
71 | die() { note "$@"; exit 1; } | |
72 | ||
73 | # guard against stomping on file-permissions | |
74 | [ ".$(id -un)" = ".${RuntimeUser:?}" ] || \ | |
75 | die "must be invoked as ${RuntimeUser}" | |
76 | ||
77 | fetch_candidate() { | |
78 | curl --user-agent "$CurlUserAgent" -fSs -o "${WorkingFile}" "${URL}" | |
79 | } | |
80 | ||
7a128512 PP |
81 | case $(uname -s) in |
82 | *BSD|Darwin) | |
83 | size_of() { stat -f %z "$1"; } | |
84 | ;; | |
85 | Linux) | |
86 | size_of() { stat -c %s "$1"; } | |
87 | ;; | |
e5c15571 PP |
88 | *) |
89 | # why do we live in a world where Perl is the safe portable solution | |
90 | # to getting the size of a file? | |
91 | size_of() { perl -le 'print((stat($ARGV[0]))[7])' -- "$1"; } | |
7a128512 PP |
92 | ;; |
93 | esac | |
ddd16464 PP |
94 | |
95 | sanity_check_candidate() { | |
96 | local new_size prev_size re | |
97 | new_size="$(size_of "$WorkingFile")" | |
98 | ||
99 | for re in $MustExistRegexps; do | |
100 | grep -qs "$re" -- "$WorkingFile" || \ | |
101 | die "regexp $re not found in $WorkingFile" | |
102 | done | |
103 | ||
104 | if ! prev_size="$(size_of "$FullTargetPath")"; then | |
105 | note "missing previous file, can't size-compare: $FullTargetPath" | |
106 | # We're sane by definition, probably initial fetch, and the | |
107 | # stat failure and this note will be printed. That's fine; if | |
108 | # a cron invocation is missing the file then something has gone | |
109 | # badly wrong. | |
110 | return 0 | |
111 | fi | |
112 | local ratio | |
113 | ratio=$(expr $new_size \* 100 / $prev_size) | |
114 | if [ $ratio -lt $MinNewSizeRation ]; then | |
115 | die "New $TargetShortFile candidate only ${ratio}% size of old; $new_size vs $prev_size" | |
116 | fi | |
117 | } | |
118 | ||
119 | if [ "${1:-.}" = "cron" ]; then | |
120 | shift | |
121 | # Don't pull on-the-minute, wait for off-cycle-peak | |
122 | sleep $(( ($(dd if=/dev/urandom bs=1 count=1 2>/dev/null | hexdump -e '1/1 "%u"') % 40) + 10)) | |
123 | fi | |
124 | ||
125 | umask 022 | |
126 | fetch_candidate | |
127 | sanity_check_candidate | |
128 | mv -- "$WorkingFile" "$FullTargetPath" |