Commit | Line | Data |
---|---|---|
ddd16464 PP |
1 | #!/bin/sh -eu |
2 | # | |
3 | # Short version of this script: | |
4 | # curl -f -o /var/cache/exim/opendmarc.tlds https://publicsuffix.org/list/public_suffix_list.dat | |
5 | # but run as Exim runtime user, writing to a place it can write to, and with | |
6 | # sanity checks and atomic replacement. | |
7 | # | |
8 | # For now, we deliberately leave the invalid file around for analysis | |
9 | # with .<pid> suffix. | |
10 | # | |
11 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~8< cut here >8~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
12 | # | |
13 | # Create a cron-job as the Exim run-time user to invoke this daily, with a | |
14 | # single parameter, 'cron'. Eg: | |
15 | # | |
7a128512 | 16 | # 3 4 * * * /usr/local/sbin/renew-opendmarc-tlds.sh cron |
ddd16464 PP |
17 | # |
18 | # That will, at 3 minutes past the 4th hour (in whatever timezone cron is | |
19 | # running it) invoke this script with 'cron'; we will then sleep between 10 and | |
20 | # 50 seconds, before continuing. | |
21 | # | |
22 | # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~8< cut here >8~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | |
23 | # | |
24 | # This should be "pretty portable"; the only things it depends upon are: | |
25 | # * a POSIX shell which additionally implements 'local' (dash works) | |
26 | # * the 'curl' command; change the fetch_candidate() function to replace that | |
27 | # * the 'stat' command, to get the size of a file; change size_of() if need be | |
28 | # * the 'hexdump' command and /dev/urandom existing | |
29 | # + used when invoked with 'cron', to avoid retrieving on a minute boundary | |
30 | # and contending with many other automated systems. | |
31 | # + with bash/zsh, can replace with: $(( 10 + ( RANDOM % 40 ) )) | |
32 | # + on Debian/Ubuntu systems, hexdump is in the 'bsdmainutils' package. | |
33 | ||
34 | # Consider putting an email address inside the parentheses, something like | |
35 | # noc@example.org or other reachable address, so that if something goes wrong | |
36 | # and the server operators need to step in, they can see from logs who to | |
37 | # contact instead of just blocking your IP: | |
38 | readonly CurlUserAgent='renew-opendmarc-tlds/0.1 (distributed with Exim)' | |
39 | ||
40 | # change this to your Exim run-time user (exim -n -bP exim_user) : | |
41 | readonly RuntimeUser='_exim' | |
42 | ||
43 | # Do not make this a directory which untrusted users can write to: | |
44 | readonly StateDir='/var/cache/exim' | |
45 | ||
46 | readonly URL='https://publicsuffix.org/list/public_suffix_list.dat' | |
47 | ||
48 | readonly TargetShortFile='opendmarc.tlds' | |
49 | ||
50 | # When replacing, new file must be at least this percentage the size of | |
51 | # the old one or it's an error: | |
52 | readonly MinNewSizeRation=90 | |
53 | ||
54 | # Each of these regexps must be matched by the file, or it's an error: | |
55 | readonly MustExistRegexps=' | |
56 | ^ac\.uk$ | |
57 | ^org$ | |
58 | ^tech$ | |
59 | ' | |
60 | ||
61 | # =======================8< end of configuration >8======================= | |
62 | ||
63 | set -eu | |
64 | ||
65 | readonly FullTargetPath="${StateDir}/${TargetShortFile}" | |
66 | readonly WorkingFile="${FullTargetPath}.$$" | |
67 | ||
68 | progname="$(basename "$0")" | |
69 | note() { printf >&2 '%s: %s\n' "$progname" "$*"; } | |
70 | die() { note "$@"; exit 1; } | |
71 | ||
72 | # guard against stomping on file-permissions | |
73 | [ ".$(id -un)" = ".${RuntimeUser:?}" ] || \ | |
74 | die "must be invoked as ${RuntimeUser}" | |
75 | ||
76 | fetch_candidate() { | |
77 | curl --user-agent "$CurlUserAgent" -fSs -o "${WorkingFile}" "${URL}" | |
78 | } | |
79 | ||
7a128512 PP |
80 | case $(uname -s) in |
81 | *BSD|Darwin) | |
82 | size_of() { stat -f %z "$1"; } | |
83 | ;; | |
84 | Linux) | |
85 | size_of() { stat -c %s "$1"; } | |
86 | ;; | |
87 | *) # optimism? | |
88 | size_of() { stat -c %s "$1"; } | |
89 | ;; | |
90 | esac | |
ddd16464 PP |
91 | |
92 | sanity_check_candidate() { | |
93 | local new_size prev_size re | |
94 | new_size="$(size_of "$WorkingFile")" | |
95 | ||
96 | for re in $MustExistRegexps; do | |
97 | grep -qs "$re" -- "$WorkingFile" || \ | |
98 | die "regexp $re not found in $WorkingFile" | |
99 | done | |
100 | ||
101 | if ! prev_size="$(size_of "$FullTargetPath")"; then | |
102 | note "missing previous file, can't size-compare: $FullTargetPath" | |
103 | # We're sane by definition, probably initial fetch, and the | |
104 | # stat failure and this note will be printed. That's fine; if | |
105 | # a cron invocation is missing the file then something has gone | |
106 | # badly wrong. | |
107 | return 0 | |
108 | fi | |
109 | local ratio | |
110 | ratio=$(expr $new_size \* 100 / $prev_size) | |
111 | if [ $ratio -lt $MinNewSizeRation ]; then | |
112 | die "New $TargetShortFile candidate only ${ratio}% size of old; $new_size vs $prev_size" | |
113 | fi | |
114 | } | |
115 | ||
116 | if [ "${1:-.}" = "cron" ]; then | |
117 | shift | |
118 | # Don't pull on-the-minute, wait for off-cycle-peak | |
119 | sleep $(( ($(dd if=/dev/urandom bs=1 count=1 2>/dev/null | hexdump -e '1/1 "%u"') % 40) + 10)) | |
120 | fi | |
121 | ||
122 | umask 022 | |
123 | fetch_candidate | |
124 | sanity_check_candidate | |
125 | mv -- "$WorkingFile" "$FullTargetPath" |