From: Jacob Bachmeyer Date: Sun, 20 Nov 2022 04:10:21 +0000 (-0600) Subject: Reorganize gatekeeper to reflect processing phases X-Git-Url: https://vcs.fsf.org/?a=commitdiff_plain;h=399183d69fbf3c32e32d027ffc79f5cc60957ba4;p=gatekeeper.git Reorganize gatekeeper to reflect processing phases Prior to committing, this was validated with: (DIFF='git diff --cached'; comm -3 <($DIFF | grep ^- | sed -e 's/^-//' | sort) \ <($DIFF | grep ^+ | sed -e 's/^+//' | sort) ) The output shows only blank lines, diff headers, POD structure, and the addition of a forward declaration for verify_clearsigned_message as a temporary measure ahead of a call that will eventually be removed, after all lines are sorted and paired for analysis. To replicate, change the "git diff" command to compare this commit with its parent. --- diff --git a/gatekeeper.pl b/gatekeeper.pl index 694254d..c9ded32 100755 --- a/gatekeeper.pl +++ b/gatekeeper.pl @@ -388,7 +388,7 @@ if (IN_TEST_MODE) { # override the above for testing =back -=head2 Logging +=head2 Logging and General Utilities =over @@ -491,6 +491,100 @@ ftp_syslog info => "Beginning upload processing run."; # send copies of warnings to syslog $SIG{__WARN__} = sub { ftp_syslog warning => $_[0]; warn $_[0] }; +=item mkdir_p ( $base, @directory ) + +Ensure that DIRECTORY (expressed as returned by File::Spec->splitdir) +exists under the BASE directory. + +=cut + +sub mkdir_p { + # @_ is directory name elements + + my @dir_steps; # list of intermediate dirs needed + # for example, creating bar/baz/quux in an empty /foo populates this list + # with qw( /foo/bar /foo/bar/baz /foo/bar/baz/quux ) on POSIX + + for (@dir_steps = (); @_ && ! -d File::Spec->catdir(@_); pop) + { unshift @dir_steps, File::Spec->catdir(@_) } + + mkdir $_ or die "mkdir($_): $!" for @dir_steps; +} + +=item @unique_list = unique ( @list ) + +Filter LIST to return only unique strings. Analogous to uniq(1) but does +not require LIST be sorted. Order of LIST is preserved; the first +occurrence of each unique value is passed through. + +=cut + +sub unique { + my %filter; + my @ret; + + foreach (@_) { unless ($filter{$_}) { $filter{$_}++; push @ret, $_ } } + + return @ret; +} + +=item sendmail ( $sender, $recipients, $headers, $body ) + +Send mail from SENDER to RECIPIENTS, with HEADERS and BODY. SENDER is an +email address. RECIPIENTS is an arrayref of email addresses. HEADERS is +an arrayref of name/value arrayrefs. BODY is a string, which will be sent +as given. + +Information about the message is also sent to syslog. + +=cut + +sub sendmail { + my $sender = shift; + my $recipients = shift; + my $headers = shift; + my $body = shift; + + if (NOMAIL) { + ftp_syslog info => + 'NOMAIL is set - not sending email to '.join(' ',@$recipients); + } else { + ftp_syslog info => 'Sending email to '.join(' ',@$recipients); + } + + { + my $subject = 'generic failure'; + if (my @subject = grep $_->[0] eq 'Subject', @$headers) + { $subject = $subject[0][1] } + if ($subject =~ m/generic failure$/) + { ftp_syslog warning => "Error uploading package: $body" } + ftp_syslog info => "Subject: '$subject'"; + ftp_syslog info => "Body: $body"; + } + + unless (NOMAIL) { + my $smtp; + if (IN_TEST_MODE) { + $smtp = Net::SMTP->new + (Host => 'localhost', Port => $ENV{TEST_SMTP_PORT}); + } else { + $smtp = Net::SMTP->new(Host => 'localhost'); + } + abort "FATAL: SMTP connection failed" unless $smtp; + + $smtp->mail($sender); + $smtp->recipient(unique(@$recipients), { SkipBad => 1}); + + $smtp->data(); + $smtp->datasend($_->[0].': '.$_->[1]."\n") for @$headers; + $smtp->datasend("\n"); + $smtp->datasend($body); + $smtp->dataend(); + + $smtp->quit(); + } +} + # # -- Filename validation patterns and limits # @@ -622,7 +716,7 @@ BEGIN { =back -=head2 Local exception definitions and utilities +=head2 Exception Definitions and Utilities =over @@ -792,7 +886,116 @@ sub throw { =back -=head2 GPG helpers +=head2 Package configuration access + +=over + +=item $package_name = directory_package_name ( $directory ) + +Extract the package name implied in DIRECTORY, which is the first component +of the file name. + +=cut + +sub directory_package_name { + my @directory = File::Spec::Unix->splitdir(shift); + return $directory[0]; +} + +=item @files = directory_configuration_files ( $file, $directory ) + +Return list of FILEs applicable to DIRECTORY. The rule used is that each +FILE applies to its own directory and is inherited by all subdirectories. + +=cut + +sub directory_configuration_files { + my $file = shift; + my $directory = shift; + + my @candidates; + + for (my @directory = File::Spec::Unix->splitdir($directory); + @directory; + pop @directory) + { push @candidates, File::Spec->catfile + ($package_config_base, @directory, $file) } + push @candidates, File::Spec->catfile($package_config_base, $file); + + return grep -f $_ && -r _ && -s _, @candidates; +} + +=item @keyrings = directory_keyrings ( $directory ) + +Return list of keyrings present in package configuration and applicable to +DIRECTORY, which is a relative name beginning with the appropriate package. + +=cut + +sub directory_keyrings { + my $directory = shift; + + my @keyrings = directory_configuration_files('pubring.gpg', $directory); + + if (DEBUG) { + ftp_syslog debug => "DEBUG: found keyring $_" for @keyrings; + } + + return @keyrings; +} + +=item @addresses = directory_email_addresses ( $directory ) + +Return list of email addresses configured to receive notification of +activity on DIRECTORY. These are from both the inherited per-directory +"email" files and the maintainer addresses registered for the package in +the maintainers.bypkg file. + +=cut + +sub directory_email_addresses { + my $directory = shift; + + local *_; + + my $package_name = directory_package_name($directory); + + my @email_files = directory_configuration_files('email', $directory); + + my @addresses; + + foreach my $file (@email_files) { + open EMAIL_FILE, '<', $file or abort "open($file) failed: $!"; + while () { + chomp; + push @addresses, $1 + if m/^([[:graph:]]+[@][[:graph:]]+)$/; # simple sanity check and untaint + } + close EMAIL_FILE or warn "close($file) failed: $!"; + } + + # Now also look for all maintainer addresses in the maintainers.bypkg file + my $needle = $package_name.' - '; + my $nlen = length $needle; + open EMAIL_FILE, '<', $maintainers_bypkg + or abort "open($maintainers_bypkg) failed: $!"; + while () { + chomp; + next unless $needle eq substr $_,0,$nlen; # find the line for this package + # crawl through it, collecting email addresses + pos = $nlen; + push @addresses, $1 while m/\G[^<]*<([^@]+[@][^>]+)>/g; + } + close EMAIL_FILE or warn "close($maintainers_bypkg) failed: $!"; + + return @addresses; +} + + + +=back + +=head2 Directive reader and parsing helpers =over @@ -840,1291 +1043,1092 @@ sub slurp_clearsigned_message { return join('', @lines); } -=item $results = verify_clearsigned_message ( $text, @keyrings ) - -=item $results = verify_detached_signature ( $file, $sigfile, @keyrings ) - -Verify the PGP-clearsigned message in TEXT or the detached signature in -SIGFILE for FILE, using a key from KEYRINGS. The TEXT may be tainted, but -the list of KEYRINGS and the FILE and SIGFILE values must be -untainted. +=item $directive = read_directive ( $handle ) -The message signature should be considered verified iff C is zero -and C is not defined in the returned hashref. +Read a PGP-clearsigned directive from the provided handle, which must be +open for reading. The handle is assumed to be positioned at the start of +the file. This function will search for the PGP header and stop reading at +the signature. -The return value is a hashref containing: +The return value is an arrayref of key/value arrayrefs representing the +directive elements in the first PGP-clearsigned message found while reading +from HANDLE. The values in the returned structure are tainted. -=over +=cut -=item TILT +sub read_directive { + my $directive = shift; -An arrayref of reasons the results should be considered invalid. This key -will not exist if the verification was successful and trustworthy. + local *_; + my @records = (); -The presense of this key in the returned hashref indicates that we saw -something very wrong from gpgv. Note that our handling is fairly paranoid, -for example, multiple signatures on the input will result in this being -set, as we assume that gpgv has been somehow subverted if more than one -verification result is returned. + # First, we find the PGP signature header. + while (<$directive>) { + chomp; s/\r+$//; # remove line ending, including DOS type + last if m/^-----BEGIN PGP SIGNED MESSAGE-----\s*$/; + # RFC4880 allows trailing whitespace on marker lines. + } + # We are now in the armor headers. + while (<$directive>) { + chomp; s/\r+$//; # remove line ending, including DOS type + # According to RFC4880, there must be exactly one empty line to + # separate the signed message from the armor headers. + last if m/^$/; + } + # We are now looking at the signed message text. + while (<$directive>) { + chomp; s/\r+$//; # remove line ending, including DOS type + s/^\s+//; s/\s+$//; # trim leading and trailing whitespace -=item exitcode + last if m/^-----BEGIN PGP SIGNATURE-----$/; -The exit status from gpgv. This will be zero if gpgv considers the -signature valid. + unless (/^$/) { # ignore blank lines + push @records, [split /\s+/,$_,2]; + $records[-1][0] =~ s/\s*:$//; # trim trailing colon on key + } + } + # That is all: we have reached the signature and are done. + return \@records; +} -=item raw_output +=item $directive = read_directive_from_file ( $filename ) -=item raw_log +Read a PGP-clearsigned directive file and return an arrayref of key/value +pair arrayrefs representing the directive elements in the signed portion of +the file FILENAME. Any text in the file not within the first clearsigned +message is ignored. -=item raw_status +The values returned from this procedure are tainted. -The complete collected output, log, and status buffers. +=cut -=item key_longid +sub read_directive_from_file { + my $filename = shift; -The 64-bit long key ID of the key that signed TEXT, if available. + open my $handle, '<', $filename + or die "open($filename): $!"; + my $records = read_directive($handle); + close $handle + or die "close($filename): $!"; -=item key_fingerprint + return $records; +} -The fingerprint of the PGP key that signed TEXT, if available. +=item $directive = read_directive_from_string ( $text ) -=item sig_creation +Read a PGP-clearsigned directive and return an arrayref of key/value pair +arrayrefs representing the directive elements in the signed portion of the +provided TEXT. Any text not within the first clearsigned message is +ignored. This function uses Perl's support for in-memory files. -Epoch timestamp of signature. +The values returned from this procedure are tainted. -=item sig_expiration +=cut -Epoch timestamp at which the signature expires, if the signature expires. -This key is only present if the signature has an expiration date. +sub read_directive_from_string { + my $text = shift; -=back + open my $handle, '<', \$text + or die "open memory file: $!"; + my $records = read_directive($handle); + close $handle + or die "close memory file: $!"; -The C fields in the returned hashref are tainted; the extracted -values are untainted. The C field, if present, is untainted. + return $records; +} -=cut +=item @values = find_directive_elements ( $directive, $key ) -# helpers for verify_clearsigned_message and verify_detached_signature -sub _spawn_gpgv { - my $keyrings = shift; - my @file_args = @_; +=item $count = find_directive_elements ( $directive, $key ) - # We need a few pipes: - # - clearsigned message to gpgv stdin - # - output from gpgv stdout/stderr - # - log from gpgv --logger-fd - # - status from gpgv --status-fd - # - a flag pipe to indicate successful exec or carry an error +Search the DIRECTIVE arrayref for KEY elements and return their associated +values. An empty list is returned if no KEY elements are found in +DIRECTIVE. The KEY comparison is case-insensitive. - # The three output streams from gpgv must be kept separate, or - # CVE-2018-12020 "SigSpoof" issues can occur. Worse, the gpgv status - # output must be examined with care, as there has been at least one bug - # (CVE-2022-34903) whereby GPG could be tricked to emit arbitrary output - # on the status pipe. - pipe my $gpgv_stdin, my $gpgv_stdin_source - or abort "failed to create pipe for gpgv stdin: $!"; - pipe my $gpgv_output, my $gpgv_output_sink - or abort "failed to create pipe for gpgv output: $!"; - pipe my $gpgv_log, my $gpgv_log_sink - or abort "failed to create pipe for gpgv log: $!"; - pipe my $gpgv_status, my $gpgv_status_sink - or abort "failed to create pipe for gpgv status: $!"; - pipe my $gpgv_flag, my $gpgv_flag_sink - or abort "failed to create pipe for gpgv flag: $!"; +The values returned from this procedure are tainted. - # ensure autoflush on writes to gpgv - { my $outhandle = select $gpgv_stdin_source; $| = 1; select $outhandle } +In scalar context, return the number of entries that would be returned in +list context. - my @gpgv_args = ( GPGV_BIN, - '--logger-fd', fileno $gpgv_log_sink, - '--status-fd', fileno $gpgv_status_sink ); - push @gpgv_args, '--keyring', $_ for @$keyrings; - push @gpgv_args, @file_args; +=cut - ftp_syslog debug => 'DEBUG: gpgv command line: '.join(' ', @gpgv_args) - if DEBUG; +sub find_directive_elements { + my $directive = shift; + my $key = lc shift; - my $pid = fork; - abort "failed to fork child for gpgv: $!" - unless defined $pid; + if (wantarray) { + return map $_->[1], grep lc($_->[0]) eq $key, @$directive; + } else { + return grep lc($_->[0]) eq $key, @$directive; + } +} - unless ($pid) { - # We are in the child process... - close $gpgv_stdin_source; - close $gpgv_output; close $gpgv_log; - close $gpgv_status; close $gpgv_flag; +=item $directory = find_directory ( $directive ) - our $AbortPipe = $gpgv_flag_sink; # pipe to parent - our $AbortExitCode = 127; # as posix_spawn uses - # no need to use local here; this process will either exec or abort +Extract the destination directory name from the parsed DIRECTIVE arrayref. +An exception is thrown if DIRECTIVE does not contain exactly one +"directory" element or if the value of that element is not acceptable. - # Adjust close-on-exec flags: - my $flags; - # - clear on status and log sinks - $flags = fcntl $gpgv_status_sink, F_GETFD, 0 - or abort "ERR: fcntl F_GETFD on status: $!"; - fcntl $gpgv_status_sink, F_SETFD, $flags & ~FD_CLOEXEC - or abort "ERR: fcntl F_SETFD on status: $!"; - $flags = fcntl $gpgv_log_sink, F_GETFD, 0 - or abort "ERR: fcntl F_GETFD on log: $!"; - fcntl $gpgv_log_sink, F_SETFD, $flags & ~FD_CLOEXEC - or abort "ERR: fcntl F_SETFD on log: $!"; - # - set on flag pipe sink - $flags = fcntl $gpgv_flag_sink, F_GETFD, 0 - or abort "ERR: fcntl F_GETFD on flag: $!"; - fcntl $gpgv_flag_sink, F_SETFD, $flags | FD_CLOEXEC - or abort "ERR: fcntl F_SETFD on flag: $!"; +The value returned from this procedure is untainted. - # Prepare STDIN/STDOUT/STDERR - open STDIN, '<&', $gpgv_stdin or abort "ERR: set stdin: $!"; - open STDOUT, '>&', $gpgv_output_sink or abort "ERR: set stdout: $!"; - open STDERR, '>&', $gpgv_output_sink or abort "ERR: set stderr: $!"; +=cut - # Exec gpgv - exec { GPGV_BIN } @gpgv_args or abort "ERR: exec: $!"; - } +sub find_directory { + my $directive = shift; - # The parent continues here... - close $gpgv_stdin; - close $gpgv_output_sink; close $gpgv_log_sink; - close $gpgv_status_sink; close $gpgv_flag_sink; + my @values = find_directive_elements($directive, 'directory'); - # This is a bit tricky: we need to know if gpgv could not be run, so we - # have an extra pipe that will either report an error or be closed if the - # exec succeeds in the child process. - while (defined(my $err = <$gpgv_flag>)) { - chomp $err; - if ($err =~ m/^ERR: (.*)$/) { - # This is bad - we couldn't even execute the gpgv command properly - waitpid $pid, 0; # reap failed child - abort - ("gpg verify of directive file failed (error executing gpgv): $1"); - } - } - close $gpgv_flag; # child has closed its end one way or another + die "Only one directory directive is allowed per directive file." + if scalar @values > 1; + die "no directory directive specified" + unless @values; + die "invalid directory element with no value" + unless $values[0]; - foreach my $cell ([$gpgv_stdin_source, 'message'], [$gpgv_output, 'output'], - [$gpgv_log, 'log'], [$gpgv_status, 'status']) { - my $flags = fcntl $cell->[0], F_GETFL, 0 - or abort "gpgv: fcntl F_GETFL $cell->[1]: $!"; - fcntl $cell->[0], F_SETFL, $flags | O_NONBLOCK - or abort "gpgv: fcntl F_SETFL $cell->[1]: $!"; - } + die "invalid directory $values[0]" + unless $values[0] =~ m/^($RE_filename_relative)$/; - return $pid, $gpgv_stdin_source, $gpgv_output, $gpgv_log, $gpgv_status; + return $1; } -sub _analyze_gpgv_output { - my $ret = shift; # hashref +=item $package = find_package ( $directive ) - # CVE-2022-34903 caused GPG to dump a chunk of its heap to the status fd, - # and, eventually, segfault upon reaching unallocated address space. - # This had two recognizable consequences: - # - The GPG process dies with SIGSEGV. - # - The status output very likely contains multiple NUL bytes. - push @{$ret->{TILT}}, 'gpgv died on signal '.WTERMSIG($ret->{exitcode}) - if WIFSIGNALED($ret->{exitcode}); - for (qw(output log status)) - { push @{$ret->{TILT}}, "gpgv $_ contained NUL byte" - if $ret->{'raw_'.$_} =~ m/\0/ } +Extract the package name from the parsed DIRECTIVE arrayref. An exception +is thrown if DIRECTIVE does not contain exactly one "directory" element or +if the value of that element is not a relative file name. - local *_; - # counters - my $intro_status = 0; my $check_status = 0; my $verdict_status = 0; +The value returned from this procedure is untainted. - open my $status, '<', \($ret->{raw_status}) - or abort "open in-memory file for gpgv status: $!"; - while (<$status>) { - chomp; - unless (m/^\[GNUPG:\] /g) { - push @{$ret->{TILT}}, "gpgv status line lacks required prefix"; - last; # stop parsing if an invalid line is found - } +=cut - if (m/\GNEWSIG/gc) { - $intro_status++; # Note that NEWSIG is optional - } elsif (m/\G(GOOD|EXP|EXPKEY|REVKEY|BAD|ERR)SIG ([[:xdigit:]]+) /gc) { - # $1 -- result tag $2 -- long ID or fingerprint - # The next field is the primary username, except ERRSIG, but there is - # no guarantee that the primary UID will contain an email address. - if (length($2) > 16) { # We have a key fingerprint - $ret->{key_fingerprint} = $2; - $ret->{key_longid} = substr $2,-16; - } else { # We only have a long key ID - $ret->{key_longid} = $2; - } +sub find_package { + # The package name is the first directory named in the directory element. + my @dirs = File::Spec::Unix->splitdir(find_directory(@_)); + return $dirs[0]; +} - if ($1 eq 'BAD') { - $verdict_status++; - push @{$ret->{TILT}}, 'gpgv reported a bad signature, but exited zero' - if 0 == $ret->{exitcode}; - } elsif ($1 eq 'ERR') { # an ERRSIG line - $verdict_status++; - if (m/\G(\d+)\s(\d+)\s([[:xdigit:]]{2})\s([-:T[:digit:]Z+]+)\s(\d+) - /gcx) { - # $1 -- pubkey algorithm $2 -- digest algorithm - # $3 -- timestamp $4 -- result code - abort 'gpgv returned an ISO8601 timestamp; implementation needed' - if $3 =~ m/T/; - $ret->{sig_creation} = $3; - } else - { push @{$ret->{TILT}}, 'gpgv ERRSIG line failed parsing' } + - push @{$ret->{TILT}}, 'gpgv reported an error, but exited zero' - if 0 == $ret->{exitcode}; - } else { # GOODSIG/EXPSIG/EXPKEYSIG/REVKEYSIG - $check_status++; - } - } elsif (m/\G(VALID)SIG\s([[:xdigit:]]+)\s(\d{4}-\d{2}-\d{2})\s - ([-:T[:digit:]Z+]+)\s([-:T[:digit:]Z+]+)\s(\d+)\s(\S+)\s - (\d+)\s(\d+)\s([[:xdigit:]]{2})\s([[:xdigit:]]+) - /gcx) { - $verdict_status++; - # $1 -- valid tag $2 -- key fingerprint - # $3 -- signature date $4 -- signature timestamp - # $5 -- expiration timestamp $6 -- signature version - # $7 -- reserved $8 -- pubkey algorithm - # $9 -- digest algorithm $10 -- signature class - # $11 -- primary key fingerprint - $ret->{key_fingerprint} = $2; - $ret->{key_longid} = substr $2,-16; - abort 'gpgv returned an ISO8601 timestamp; implementation needed' - if $4 =~ m/T/ || $5 =~ m/T/; - $ret->{sig_creation} = $4; - # GPG reports 0 if the signature does not expire - $ret->{sig_expiration} = $5 if $5 > 0; - } - } - close $status or abort "close in-memory file for gpgv status: $!"; +=back - push @{$ret->{TILT}}, 'gpgv reported more than one signature' - if $intro_status > 1; - push @{$ret->{TILT}}, 'gpgv reported more than one signature check' - if $check_status > 1; - push @{$ret->{TILT}}, 'gpgv reported more than one signature verdict' - if $verdict_status > 1; - push @{$ret->{TILT}}, 'gpgv reported no signature verdict at all' - if $verdict_status < 1; +=head2 [SC] Scan Inbox - return $ret; -} +=over -sub verify_clearsigned_message { - my $text = shift; - my @keyrings = @_; +=item @files = scan_incoming ( $directory ) - ftp_syslog debug => 'DEBUG: message size is '.length($text) if DEBUG; +Scan DIRECTORY for newly-arrived uploaded files. Remove blatantly bogus +files, ignore acceptable files that are either still open or recently +modified, and return a list of filenames for further processing. - # "my (LIST) = ..." causes problems with CPerl mode here -- jcb - my $pid; my $gpgv_stdin_source; - my $gpgv_output; my $gpgv_log; my $gpgv_status; - ($pid, $gpgv_stdin_source, $gpgv_output, $gpgv_log, $gpgv_status) = - _spawn_gpgv(\@keyrings, '-'); +=cut - local $SIG{PIPE} = sub { abort 'gpgv exited unexpectedly' }; - my $Rchk = ''; my $Wchk = ''; - vec($Wchk, (fileno $gpgv_stdin_source), 1) = 1; - vec($Rchk, (fileno $_), 1) = 1 for ($gpgv_output, $gpgv_log, $gpgv_status); - my $Rrdy = ''; my $Wrdy = ''; - my $raw_output = ''; my $raw_log = ''; my $raw_status = ''; - pos $text = 0; # use this slot to store a position because we can - do { - foreach my $cell ([$gpgv_output, \$raw_output], [$gpgv_log, \$raw_log], - [$gpgv_status, \$raw_status]) { - if (vec($Rrdy, (fileno $cell->[0]), 1)) { - my $eof; # defined and zero at eof - 1 while - $eof = sysread $cell->[0], ${$cell->[1]}, 128, length ${$cell->[1]}; - vec($Rchk, (fileno $cell->[0]), 1) = 0 if defined $eof && $eof == 0; +sub scan_incoming { + my $directory = shift; + + local *_; + + my $time_bar = time - 120; + my @trash; my $badname_count = 0; + my %possible; + # Get list of all possible files from incoming dir. + # + opendir INCOMING, $directory + or abort "FATAL opendir($directory) failed: $!"; + ENT: while (defined($_ = readdir INCOMING)) { + next ENT if m/^[.]{1,2}$/; # skip . and .. entries + # require acceptable filenames + unless (length($_) <= MAX_FILE_NAME_LEN && m/^($RE_filename_here)$/) { + m/^(.*)$/; # untaint the value + # This is safe for unlink (which is all we will do with @trash) + # because the filename came from a directory entry, so it must be a + # valid filename and cannot indicate directory traversal. + unless (m/^[.]$RE_filename_here$/) { + # do not unlink backup files left by cleanup and cleanup_dir + push @trash, File::Spec->catfile($directory, $1); $badname_count++; } + next ENT } + my $ent = $1; # if we get here, $RE_filename_here matched above + # $_ remains tainted, but $ent is an untainted (and safe) copy - if (defined fileno $gpgv_stdin_source - && vec($Wrdy, (fileno $gpgv_stdin_source), 1)) { - my $err = syswrite $gpgv_stdin_source, $text, 128, pos $text; - pos $text += $err if defined $err; - unless (pos $text < length $text) { - vec($Wchk, (fileno $gpgv_stdin_source), 1) = 0; - close $gpgv_stdin_source; - } + # Examine the file; this populates an internal cache in perl. + unless (stat(File::Spec->catfile($directory, $ent))) { + ftp_syslog warning => "could not stat($ent), skipping"; + next ENT } - select $Rrdy=$Rchk, $Wrdy=$Wchk, undef, undef - if grep vec($Rchk, (fileno $_), 1), - $gpgv_output, $gpgv_log, $gpgv_status; - } while (grep vec($Rchk, (fileno $_), 1), - $gpgv_output, $gpgv_log, $gpgv_status); + # Do not consider files that have been modified in the last 2 minutes. + # This is an extra safety check to avoid trying to process files that + # are still being uploaded. (use stat cache) + if (TSTAMPCHECK) { + if ((stat(_))[9] >= $time_bar) { + ftp_syslog debug => "DEBUG: " + ."$ent has been modified in the last 2 minutes, skipping" + if DEBUG; + next ENT + } + } - close $gpgv_stdin_source; close $gpgv_output; - close $gpgv_log; close $gpgv_status; - waitpid $pid, 0; # reap child that ran gpgv + # check for overlength directives and signatures (use stat cache) + if (/[.]directive[.]asc$/ && -f _ && ((-s _) >= MAX_DIRECTIVE_SIZE)) { + ftp_syslog info => "Overlength directive file ($ent) trashcanned"; + push @trash, File::Spec->catfile($directory, $ent); + next ENT + } elsif (/[.]sig$/ && -f _ && ((-s _) >= MAX_SIGNATURE_SIZE)) { + ftp_syslog info => "Overlength signature file ($ent) trashcanned"; + push @trash, File::Spec->catfile($directory, $ent); + next ENT + } - return _analyze_gpgv_output - ({exitcode => $?, raw_output => $raw_output, - raw_log => $raw_log, raw_status => $raw_status}); -} + ftp_syslog debug => "DEBUG: uploaded file to check: $ent" if DEBUG; + $possible{$ent} = 1; + } + closedir INCOMING + or abort "FATAL: closedir($directory) failed: $!"; -sub verify_detached_signature { - my $filename = shift; - my $sigfilename = shift; - my @keyrings = @_; + # dispose of any garbage files + ftp_syslog info => "$badname_count files with bogus names were trashcanned" + if $badname_count; + ftp_syslog info => "Trashcanned files removed" + if unlink @trash; + @trash = (); # and empty the array to be safe, since it contained + # potentially arbitrary _untainted_ filenames - # This is very similar to verify_clearsigned_message, but slightly - # simpler because all input to GPG is supplied from files, so we do not - # have a pipe to the child process. We still need the other pipes and we - # still have the same risks of exploits against GPG. + # No possible files found, so return before we call lsof + return () unless %possible; - { - my $file_size = -s $filename; - my $sig_file_size = -s $sigfilename; + # Determine if any of those possible files are open. We find the + # possible files before running lsof (partly) to avoid a race + # condition. (If we ran lsof on the whole directory first, a new file + # might be uploaded and possibly be processed even though it was open.) + # + # Note that lsof outputs absolute names. + # + # This lsof test only works if either: + # a) lsof is not compiled with HASSECURITY + # b) gatekeeper runs as the vsftpd ftp_user + # If neither of those 2 conditions are met, the lsof call will not see + # the open files because they are owned by another user. + # On modern (Debian) systems, condition a) is not met. + # On modern GNU/Linux systems, unless either condition b) is met or lsof + # is installed setuid root, the kernel will not permit the open files to + # be seen because they are owned by another user. + my @lsof_args = (LSOF_BIN, "-Fn", + map { File::Spec->catfile($directory, $_) } keys %possible); + ftp_syslog debug => "DEBUG: lsof command line: " . join(' ',@lsof_args) + if DEBUG; - ftp_syslog debug => "DEBUG: $sigfilename size is $sig_file_size" - if DEBUG; - ftp_syslog debug => "DEBUG: $filename size is $file_size" + open LSOF, '-|', @lsof_args + or abort "FATAL: cannot spawn lsof: $!";; + while () { + ftp_syslog debug => "DEBUG: lsof output: $_" if DEBUG; + # only look at the name lines + next unless /^n${directory}\/(.+)$/; + ftp_syslog debug => "DEBUG: " + ."upload in progress for $1, ignoring during this run" if DEBUG; + delete ($possible{$1}) + or warn "WARNING: lsof found unrequested but open $1?!"; } + close (LSOF); - my $pid; my $gpgv_output; my $gpgv_log; my $gpgv_status; - { my $extra; # pipe to gpgv stdin; not used here - ($pid, $extra, $gpgv_output, $gpgv_log, $gpgv_status) = - _spawn_gpgv(\@keyrings, $sigfilename, $filename); - close $extra; - } - - my $Rchk = ''; - vec($Rchk, (fileno $_), 1) = 1 for ($gpgv_output, $gpgv_log, $gpgv_status); - my $Rrdy = ''; - my $raw_output = ''; my $raw_log = ''; my $raw_status = ''; - do { - foreach my $cell ([$gpgv_output, \$raw_output], [$gpgv_log, \$raw_log], - [$gpgv_status, \$raw_status]) { - if (vec($Rrdy, (fileno $cell->[0]), 1)) { - my $eof; # defined and zero at eof - 1 while - $eof = sysread $cell->[0], ${$cell->[1]}, 128, length ${$cell->[1]}; - vec($Rchk, (fileno $cell->[0]), 1) = 0 if defined $eof && $eof == 0; - } - } - - select $Rrdy=$Rchk, undef, undef, undef - if grep vec($Rchk, (fileno $_), 1), - $gpgv_output, $gpgv_log, $gpgv_status; - } while (grep vec($Rchk, (fileno $_), 1), - $gpgv_output, $gpgv_log, $gpgv_status); - - close $gpgv_output; close $gpgv_log; close $gpgv_status; - waitpid $pid, 0; # reap child that ran gpgv - - return _analyze_gpgv_output - ({exitcode => $?, raw_output => $raw_output, - raw_log => $raw_log, raw_status => $raw_status}); + return keys %possible; } - +=item @packets = gather_packets ( $directory, $scratchpad ) -=back +Scan DIRECTORY for newly-arrived upload packets. Remove blatantly bogus +files, ignore partial packets and acceptable files that are either still +open or recently modified, and return a list of arrayrefs representing +complete packets found. The files mentioned in the returned arrayrefs have +been moved to the SCRATCHPAD directory. Each returned packet begins with +the directive file, but order is not otherwise specified. -=head2 Package configuration access +=cut -=over +sub gather_packets { + my $directory = shift; + my $scratchpad = shift; -=item $package_name = directory_package_name ( $directory ) + my @files = scan_incoming($directory); + my %havefile = map { $_ => 1 } @files; -Extract the package name implied in DIRECTORY, which is the first component -of the file name. + # Find the directives among the possibilities and assemble packets + my @ret; -=cut + my @stems = + map m/^(.*)[.]directive[.]asc$/, + grep m/[.]directive[.]asc$/, @files; -sub directory_package_name { - my @directory = File::Spec::Unix->splitdir(shift); - return $directory[0]; -} - -=item @files = directory_configuration_files ( $file, $directory ) - -Return list of FILEs applicable to DIRECTORY. The rule used is that each -FILE applies to its own directory and is inherited by all subdirectories. - -=cut - -sub directory_configuration_files { - my $file = shift; - my $directory = shift; - - my @candidates; - - for (my @directory = File::Spec::Unix->splitdir($directory); - @directory; - pop @directory) - { push @candidates, File::Spec->catfile - ($package_config_base, @directory, $file) } - push @candidates, File::Spec->catfile($package_config_base, $file); - - return grep -f $_ && -r _ && -s _, @candidates; -} - -=item @keyrings = directory_keyrings ( $directory ) - -Return list of keyrings present in package configuration and applicable to -DIRECTORY, which is a relative name beginning with the appropriate package. - -=cut - -sub directory_keyrings { - my $directory = shift; - - my @keyrings = directory_configuration_files('pubring.gpg', $directory); - - if (DEBUG) { - ftp_syslog debug => "DEBUG: found keyring $_" for @keyrings; - } - - return @keyrings; -} - -=item @addresses = directory_email_addresses ( $directory ) + STEM: foreach my $stem (@stems) { + # This trades generality for efficiency. In the general case, the STEM + # in STEM.directive.asc is almost arbitrary and collecting packets + # (identified by sharing a common STEM) requires multiple scans over + # the set of collected files. In nasty cases, the associations between + # files and packets could even be ambiguous. The below shortcuts the + # issue by requiring known extensions on each common STEM, but requires + # this function to be updated if new packet types are added. -Return list of email addresses configured to receive notification of -activity on DIRECTORY. These are from both the inherited per-directory -"email" files and the maintainer addresses registered for the package in -the maintainers.bypkg file. + ftp_syslog debug => "DEBUG: " + ."considering stem [$stem] for processing" if DEBUG; -=cut + # Note that all values in %havefile are 1 and the undefined value is + # falsish in Perl, so simple checks are adequate here. No tests for + # the directive file itself are done because each STEM is derived from + # its presence. -sub directory_email_addresses { - my $directory = shift; + if ($havefile{$stem} && $havefile{$stem.'.sig'}) { + # File upload triplet: STEM.directive.asc, STEM.sig, STEM + my $triplet = [$stem.'.directive.asc', $stem.'.sig', $stem]; - local *_; + foreach my $file (@$triplet) { + # If the file exists in the scratchpad, but not in the incoming + # directory, we may have already moved it to the scratchpad + # directory as part of another packet. We submit both packets for + # processing; it is near-certain that one of them is fake and will + # be rejected after failing authentication. + unless ((-e File::Spec->catfile($scratchpad, $file) + && ! -e File::Spec->catfile($directory, $file)) + || rename (File::Spec->catfile($directory, $file), + File::Spec->catfile($scratchpad, $file))) { + ftp_syslog error => + "rename $directory/$file to $scratchpad/$file: $!"; + next STEM # abandon processing this triplet + } + } - my $package_name = directory_package_name($directory); + push @ret, $triplet; + ftp_syslog info => 'processing ['.join(':',@$triplet).']'; + } else { + # A lone directive file: STEM.directive.asc - my @email_files = directory_configuration_files('email', $directory); + # Here we have a potential problem. We could be seeing a directive + # file that belongs to a triplet the rest of which has not been + # uploaded yet. If so, we should ignore this file and not move it to + # $scratchpad. This means we need to read the file and see if there + # is a 'filename:' directive. - my @addresses; + my $racecondition = 0; + my $directive = read_directive_from_file + (File::Spec->catfile($directory, $stem.'.directive.asc')); + foreach my $cell (@$directive) { + next unless lc($cell->[0]) eq 'filename'; + $racecondition = 1; # found a 'filename' directive + } - foreach my $file (@email_files) { - open EMAIL_FILE, '<', $file or abort "open($file) failed: $!"; - while () { - chomp; - push @addresses, $1 - if m/^([[:graph:]]+[@][[:graph:]]+)$/; # simple sanity check and untaint + if ($racecondition) { + # Most likely a race condition. We have a directive file but not + # the accompanying file(s). Just ignore this directive for now. + ftp_syslog info => + "Found directive file with filename directive " + ."(${stem}.directive.asc), but no accompanying files. " + ."Ignoring directive file in this run."; + } else { + # Directive file only, no actual file to deal with + # This can happen when dealing with symlink/rmsymlink/archive options + my $file = $stem.'.directive.asc'; + unless ((-e File::Spec->catfile($scratchpad, $file) # as above for + && ! -e File::Spec->catfile($directory, $file))# file uploads + || rename (File::Spec->catfile($directory, $file), + File::Spec->catfile($scratchpad, $file))) { + ftp_syslog error => + "rename $directory/$file to $scratchpad/$file: $!"; + next STEM # abandon processing this item + } + push @ret, [$file]; + ftp_syslog info => 'processing ['.$file.']'; + } } - close EMAIL_FILE or warn "close($file) failed: $!"; - } - - # Now also look for all maintainer addresses in the maintainers.bypkg file - my $needle = $package_name.' - '; - my $nlen = length $needle; - open EMAIL_FILE, '<', $maintainers_bypkg - or abort "open($maintainers_bypkg) failed: $!"; - while () { - chomp; - next unless $needle eq substr $_,0,$nlen; # find the line for this package - # crawl through it, collecting email addresses - pos = $nlen; - push @addresses, $1 while m/\G[^<]*<([^@]+[@][^>]+)>/g; } - close EMAIL_FILE or warn "close($maintainers_bypkg) failed: $!"; - return @addresses; + return @ret; } =back -=head2 Email +=head2 [PV] Parsing and Validation =over -=item @unique_list = unique ( @list ) - -Filter LIST to return only unique strings. Analogous to uniq(1) but does -not require LIST be sorted. Order of LIST is preserved; the first -occurrence of each unique value is passed through. - -=cut +=item $oplist = interpret_directive ( $directive ) -sub unique { - my %filter; - my @ret; +Analyze the elements in DIRECTIVE, performing basic validation. An +exception is thrown if DIRECTIVE contains invalid element values. - foreach (@_) { unless ($filter{$_}) { $filter{$_}++; push @ret, $_ } } +The return value is an arrayref of command/parameter arrayrefs representing +the operations to be performed to carry out DIRECTIVE. - return @ret; -} +The values in the returned structure are untainted. -=item @filtered = exclude_mail_blacklist ( $blacklist_file, @addresses ) +=cut -Filter ADDRESSES to remove addresses mentioned in BLACKLIST_FILE. +sub interpret_directive { + my $directive = shift; # presumed tainted -=cut + my @errors; + my @trace; + my $version_error; -sub exclude_mail_blacklist { - my $blacklist_file = shift; - my @emaillist = @_; + my %options = ( replace => undef ); + my %header = ( version => undef, options => \%options, + package => undef, directory => undef, filename => undef ); + my @ops = ([header => \%header]); + my $install = undef; # can only install one file per directive + # The 'install' op carries the name of the file to install, while the + # 'filename' element binds the directive signature to its intended + # upload. These are assumed to be the same in protocol 1.2 and earlier. - local *_; - my %blacklist; + { # Extract version first, since directive elements may be in any order. + my @versions = grep $_->[0] eq 'version', @$directive; - my @filtered = @emaillist; - if (-f $blacklist_file) { - open BLACKLIST, '<', $blacklist_file - or abort "open($blacklist_file) failed: $!"; - while () { chomp; $blacklist{$_}++ } - close BLACKLIST or abort "close($blacklist_file) failed: $!"; + if (scalar @versions == 1) { + if ($versions[0][1] =~ /^(\d+\.\d+)$/) { + my $val = $1; # so far so good - @filtered = grep !$blacklist{$_}, @emaillist; + $header{version} = $val; # TODO: parse? + } else { + # version value does not match required pattern + push @errors, "invalid version $versions[0][1]"; + $version_error = 'invalid version'; + } + } elsif (scalar @versions > 1) { + push @errors, "invalid multiple version elements"; + $version_error = 'multiple version elements'; + } else { # no version at all; no longer allowed + push @errors, "no version specified in directive"; + } } - return @filtered; -} + if ($header{version} + && $header{version} ne '1.1' && $header{version} ne '1.2') { + push @errors, "invalid version $header{version}, not supported"; + $version_error = 'unsupported version'; + } -=item sendmail ( $sender, $recipients, $headers, $body ) + foreach my $item (@$directive) { + my $tainted_cmd = lc $item->[0]; + my $tainted_val = $item->[1]; -Send mail from SENDER to RECIPIENTS, with HEADERS and BODY. SENDER is an -email address. RECIPIENTS is an arrayref of email addresses. HEADERS is -an arrayref of name/value arrayrefs. BODY is a string, which will be sent -as given. + push @trace, $item; -Information about the message is also sent to syslog. + if (!$tainted_val && !($tainted_cmd =~ m/^comment|^no-op/)) { + push @errors, "invalid $tainted_cmd element with no value"; + push @trace, [' ^--', 'element with no value']; + } elsif ($tainted_cmd eq 'directory') { + unless ($tainted_val =~ m/^($RE_filename_relative)$/) { + push @errors, "invalid directory $tainted_val"; + push @trace, [' ^--', 'this directory name is invalid']; + next; + } + my $val = $1; # so far so good -=cut + my @dirs = File::Spec::Unix->splitdir($1); + my $dir_depth = scalar @dirs; -sub sendmail { - my $sender = shift; - my $recipients = shift; - my $headers = shift; - my $body = shift; + # A couple of subdir levels are ok, but don't allow hundreds. + if ($dir_depth > MAX_DIRECTORY_DEPTH) { + push @errors, "$dir_depth levels is too deep, in $val"; + push @trace, [' ^--', 'this directory name is nested too deeply']; + next; + } - if (NOMAIL) { - ftp_syslog info => - 'NOMAIL is set - not sending email to '.join(' ',@$recipients); - } else { - ftp_syslog info => 'Sending email to '.join(' ',@$recipients); - } + # Only let them specify one directory directive. + if (defined $header{directory}) { + push @errors, + "Only one directory directive is allowed per directive file. " + ."Error at directory directive: $val"; + push @trace, [' ^--', 'second directory element found here']; + next; + } - { - my $subject = 'generic failure'; - if (my @subject = grep $_->[0] eq 'Subject', @$headers) - { $subject = $subject[0][1] } - if ($subject =~ m/generic failure$/) - { ftp_syslog warning => "Error uploading package: $body" } - ftp_syslog info => "Subject: '$subject'"; - ftp_syslog info => "Body: $body"; - } + $header{directory} = $val; # ok. + $header{package} = $dirs[0]; # top-level name, no subdir + } elsif ($tainted_cmd eq 'filename') { + # We use the same filename restrictions as scan_incoming + unless ($tainted_val =~ /^($RE_filename_here)$/) { + push @errors, "invalid filename $tainted_val"; + push @trace, [' ^--', 'this filename is invalid']; + next; + } + my $val = $1; # so far so good - unless (NOMAIL) { - my $smtp; - if (IN_TEST_MODE) { - $smtp = Net::SMTP->new - (Host => 'localhost', Port => $ENV{TEST_SMTP_PORT}); + # Only let them specify one filename directive. + if (defined $header{filename}) { + push @errors, + "Only one filename directive is allowed per directive file. " + ."Error at filename directive: $val."; + push @trace, [' ^--', 'second filename element found here']; + next; + } + + $header{filename} = $val; + } elsif ($tainted_cmd eq 'version') { + # already handled above; insert any error into the trace + push @trace, [' ^--', $version_error] if $version_error; + } elsif ($tainted_cmd eq 'symlink') { + unless ($tainted_val =~ + /^($RE_filename_relative)\s+($RE_filename_relative)$/) { + push @errors, "invalid parameters for symlink command: $tainted_val"; + push @trace, + [' ^--', 'invalid parameters here; need TARGET and LINKNAME']; + next; + } + # $1 -- link target $2 -- link name + push @ops, [symlink => $1, $2]; + } elsif ($tainted_cmd eq 'rmsymlink') { + unless ($tainted_val =~ /^($RE_filename_relative)$/) { + push @errors, "invalid parameters for rmsymlink command: $tainted_val"; + push @trace, + [' ^--', 'invalid parameter here; need relative filename']; + next; + } + push @ops, [rmsymlink => $1]; + } elsif ($tainted_cmd eq 'archive') { + unless ($tainted_val =~ /^($RE_filename_relative)$/) { + push @errors, + "invalid parameters for archive command: $tainted_val"; + push @trace, + [' ^--', 'invalid parameter here; need relative filename']; + next; + } + push @ops, [archive => $1]; + } elsif ($tainted_cmd eq 'replace') { + # This command is only supported from v1.2 + unless ($tainted_val =~ /^(true|false)$/) { + push @errors, + "invalid parameters for replace command: $tainted_val"; + push @trace, + [' ^--', 'invalid parameter here; need "true" or "false"']; + next; + } + + if ($header{version} eq '1.1') { + push @errors, + "invalid directive 'replace', not supported prior to version 1.2"; + push @trace, [' ^--', 'this element was introduced in version 1.2']; + next; + } + + $options{replace} = ($1 eq 'true'); + } elsif ($tainted_cmd eq 'comment') { + # Comments are ok, we ignore them + } elsif (IN_TEST_MODE && $tainted_cmd eq 'no-op') { + # The testsuite uses a no-op command to validate directive processing. + push @ops, ['no-op']; } else { - $smtp = Net::SMTP->new(Host => 'localhost'); + push @errors, "Invalid directive line:\n\n $tainted_cmd $tainted_val"; + push @trace, [' ^--', 'this element is not recognized']; } - abort "FATAL: SMTP connection failed" unless $smtp; - $smtp->mail($sender); - $smtp->recipient(unique(@$recipients), { SkipBad => 1}); + if (!defined($install) + && defined $header{filename} && defined $header{directory}) + { push @ops, ($install = [install => $header{filename}]) } + } - $smtp->data(); - $smtp->datasend($_->[0].': '.$_->[1]."\n") for @$headers; - $smtp->datasend("\n"); - $smtp->datasend($body); - $smtp->dataend(); + # They have to specify a directory directive. + unless ($header{directory}) { + # Send the warning to the upload-ftp script maintainer, and the person who + # signed the file, if we were able to extract that from the signature on + # the directive file. + push @errors, "no directory element specified in directive"; + } - $smtp->quit(); + if (@errors) { + throw directive_syntax => + trace => \@trace, summary => $errors[0], directory => $header{directory}; } + + return \@ops; } -=item mail $message, [ to => \@addresses ], [ subject => $subject ] +# temporary scaffolding; last piece of read_directive_file that does not +# really fit elsewhere and will be removed when the new key index is +# implemented to directly map long key IDs to email addresses +sub verify_clearsigned_message; # declaration; permitted but not required +sub guess_email_address_from_signature { + my $directive_file_contents = shift; -Send MESSAGE to ADDRESSES or the internal reporting inbox if ADDRESSES is -not given. + my @addresses; -=cut + # If we don't know whose project this file belongs to, because the + # 'directory:' line is messed up or not there, we'd still like to let the + # uploader know something went wrong. So let's see if we can match the + # directive file signature against one of our public keyrings. + { + my @tmp_keyrings; + open(TMP,"/usr/bin/find $package_config_base -name pubring.gpg|"); + while() { + chomp; + m,^(/?${RE_filename_relative})$, or next; + push @tmp_keyrings, $1; + } + close(TMP); -sub mail { - my $msg = shift; - my %args = @_; + my $tmp_result = verify_clearsigned_message + ($directive_file_contents, @tmp_keyrings); - $args{subject} = 'generic failure' unless $args{subject}; + unless ($tmp_result->{exitcode} != 0 || defined $tmp_result->{TILT}) { + if (($tmp_result->{raw_log} =~ /Good signature from .*?<(.*?)>/)) + { push @addresses, $1 } + } + } - our $Internal_Report_Inbox; + return @addresses; +} - my @email_list = ($Internal_Report_Inbox); - # Some messages should be sent to the user, some should not - @email_list = @{$args{to}} if defined $args{to}; +=item validate_directive ( $packet, $oplist ) - # At minimum, an Internet email address must contain an @ character. - @email_list = grep m/@/, @email_list; +Validate the commands in OPLIST as applicable to PACKET. PACKET is an +arrayref listing the files considered to be in this packet. OPLIST is an +operation list arrayref. - unless (@email_list) { - # Something went wrong, but we can't figure out which package this - # upload belongs to. Mention that in the logs, and then mail this to - # the script maintainer anyway. - ftp_syslog info => - "No uploader e-mail address(es) to report this error to!"; - @email_list = ($Internal_Report_Inbox); - } +An exception is thrown if validation fails. - my $sender = 'ftp-upload-script@gnu.org'; - $sender = 'ftp-upload@gnu.org' - if defined $args{to}; # We really want replies to go to the ftp-upload queue +=cut - @email_list = exclude_mail_blacklist($email_blacklist, @email_list); +sub validate_directive { + my $packet = shift; + my $ops = shift; - $args{subject} = "[$zone_tag gnu-ftp-upload] $args{subject}"; + my $stem = substr $packet->[0],0,-(length '.directive.asc'); + my $op_header = $ops->[0][1]; - # Wrap message at 78 characters, this is e-mail... - local $Text::Wrap::columns = 78; + # Configuration must exist for the package + throw unknown_package => package_name => $op_header->{package} + unless -d File::Spec->catdir($package_config_base, $op_header->{package}); - sendmail($sender, - [$Internal_Report_Inbox, @email_list], - [[To => join ', ', @email_list], - [From => $sender], - [Date => strftime("%a, %e %b %Y %H:%M:%S %z", localtime)], - [Subject => $args{subject}], - ['Reply-To' => 'ftp-upload@gnu.org'], - ['Message-ID'=> Email::MessageID->new->in_brackets], - ], - wrap('','',$msg)); -} + # While per-directory email lists are now supported, the per-package list + # is still required to be present. + throw package_configuration => package_name => $op_header->{package}, + summary => 'no email list for package '.$op_header->{package} + unless -f File::Spec->catfile + ($package_config_base, $op_header->{package}, 'email'); -=item report_upload_to_archive $message, $package + # Check that we actually have at least one command in the directive + unless ($#$ops > 0) { + if (1 == scalar @$packet) { + throw directive_syntax => + trace => [], directory => $op_header->{directory}, + summary => 'nothing to do - no commands in directive file'; + } else { + # Provide a different message if this looks like an upload packet. + throw directive_syntax => + trace => [], directory => $op_header->{directory}, + summary => "no filename element in $stem.directive.asc." + .' Upgrade to the latest version! ' + .'See http://www.gnu.org/prep/maintain/maintain.html'; + } + } -Send MESSAGE to the public archive inbox, with a subject indicating that an -upload for PACKAGE was processed. This is used to publicly archive all -successfully processed directives, and some erroneous directives. + # Check if this directive carries a file/validate stem if needed + if (defined $op_header->{filename}) { + # Ben Pfaff wrote: + # First, "gpg -b" doesn't verify that the filename of the signed + # data is correct. This means that I can rename gcc-1.2.3.tar.gz + # to gcc-3.4.5.tar.gz and the signature will still verify + # correctly. This opens up the possibility for confusion, but in + # itself it's not a huge deal. + # + # To fix this, we require a 'filename:' line in the directive file that + # needs to match the name of the uploaded file and serves to bind the + # directive signature and the uploaded packet. We already know that + # the name of the uploaded file must match the stem of the directive + # file name; this is how it was recognized as part of the packet. + throw directive_filename_mismatch => + filename => $op_header->{filename}, stem => $stem + unless $stem eq $op_header->{filename}; + } +} -=cut + -sub report_upload_to_archive { - my $msg = shift; - my $package_name = shift; +=back - our $Public_Upload_Archive_Inbox; +=head2 [AA] Authentication/Authorization - sendmail('ftp-upload-script@gnu.org', - [$Public_Upload_Archive_Inbox], - [[To => $Public_Upload_Archive_Inbox], - [From => 'ftp-upload-script@gnu.org'], - [Date => strftime("%a, %e %b %Y %H:%M:%S %z", localtime)], - [Subject => ("[$zone_tag gnu-ftp-debug] " - ."new upload processed: $package_name")], - ['Reply-To' => 'ftp-upload@gnu.org'], - ['Message-ID'=> Email::MessageID->new->in_brackets], - ], - $msg); -} +=over - +=item $results = verify_clearsigned_message ( $text, @keyrings ) -=back +=item $results = verify_detached_signature ( $file, $sigfile, @keyrings ) -=head2 Directive reader and parsing helpers +Verify the PGP-clearsigned message in TEXT or the detached signature in +SIGFILE for FILE, using a key from KEYRINGS. The TEXT may be tainted, but +the list of KEYRINGS and the FILE and SIGFILE values must be +untainted. + +The message signature should be considered verified iff C is zero +and C is not defined in the returned hashref. + +The return value is a hashref containing: =over -=item $directive = read_directive ( $handle ) +=item TILT -Read a PGP-clearsigned directive from the provided handle, which must be -open for reading. The handle is assumed to be positioned at the start of -the file. This function will search for the PGP header and stop reading at -the signature. +An arrayref of reasons the results should be considered invalid. This key +will not exist if the verification was successful and trustworthy. -The return value is an arrayref of key/value arrayrefs representing the -directive elements in the first PGP-clearsigned message found while reading -from HANDLE. The values in the returned structure are tainted. +The presense of this key in the returned hashref indicates that we saw +something very wrong from gpgv. Note that our handling is fairly paranoid, +for example, multiple signatures on the input will result in this being +set, as we assume that gpgv has been somehow subverted if more than one +verification result is returned. -=cut +=item exitcode -sub read_directive { - my $directive = shift; +The exit status from gpgv. This will be zero if gpgv considers the +signature valid. - local *_; - my @records = (); +=item raw_output - # First, we find the PGP signature header. - while (<$directive>) { - chomp; s/\r+$//; # remove line ending, including DOS type - last if m/^-----BEGIN PGP SIGNED MESSAGE-----\s*$/; - # RFC4880 allows trailing whitespace on marker lines. - } - # We are now in the armor headers. - while (<$directive>) { - chomp; s/\r+$//; # remove line ending, including DOS type - # According to RFC4880, there must be exactly one empty line to - # separate the signed message from the armor headers. - last if m/^$/; - } - # We are now looking at the signed message text. - while (<$directive>) { - chomp; s/\r+$//; # remove line ending, including DOS type - s/^\s+//; s/\s+$//; # trim leading and trailing whitespace +=item raw_log - last if m/^-----BEGIN PGP SIGNATURE-----$/; +=item raw_status - unless (/^$/) { # ignore blank lines - push @records, [split /\s+/,$_,2]; - $records[-1][0] =~ s/\s*:$//; # trim trailing colon on key - } - } - # That is all: we have reached the signature and are done. - return \@records; -} +The complete collected output, log, and status buffers. -=item $directive = read_directive_from_file ( $filename ) +=item key_longid -Read a PGP-clearsigned directive file and return an arrayref of key/value -pair arrayrefs representing the directive elements in the signed portion of -the file FILENAME. Any text in the file not within the first clearsigned -message is ignored. +The 64-bit long key ID of the key that signed TEXT, if available. -The values returned from this procedure are tainted. +=item key_fingerprint -=cut +The fingerprint of the PGP key that signed TEXT, if available. -sub read_directive_from_file { - my $filename = shift; +=item sig_creation - open my $handle, '<', $filename - or die "open($filename): $!"; - my $records = read_directive($handle); - close $handle - or die "close($filename): $!"; +Epoch timestamp of signature. - return $records; -} +=item sig_expiration -=item $directive = read_directive_from_string ( $text ) +Epoch timestamp at which the signature expires, if the signature expires. +This key is only present if the signature has an expiration date. -Read a PGP-clearsigned directive and return an arrayref of key/value pair -arrayrefs representing the directive elements in the signed portion of the -provided TEXT. Any text not within the first clearsigned message is -ignored. This function uses Perl's support for in-memory files. +=back -The values returned from this procedure are tainted. +The C fields in the returned hashref are tainted; the extracted +values are untainted. The C field, if present, is untainted. =cut -sub read_directive_from_string { - my $text = shift; +# helpers for verify_clearsigned_message and verify_detached_signature +sub _spawn_gpgv { + my $keyrings = shift; + my @file_args = @_; - open my $handle, '<', \$text - or die "open memory file: $!"; - my $records = read_directive($handle); - close $handle - or die "close memory file: $!"; + # We need a few pipes: + # - clearsigned message to gpgv stdin + # - output from gpgv stdout/stderr + # - log from gpgv --logger-fd + # - status from gpgv --status-fd + # - a flag pipe to indicate successful exec or carry an error - return $records; -} + # The three output streams from gpgv must be kept separate, or + # CVE-2018-12020 "SigSpoof" issues can occur. Worse, the gpgv status + # output must be examined with care, as there has been at least one bug + # (CVE-2022-34903) whereby GPG could be tricked to emit arbitrary output + # on the status pipe. + pipe my $gpgv_stdin, my $gpgv_stdin_source + or abort "failed to create pipe for gpgv stdin: $!"; + pipe my $gpgv_output, my $gpgv_output_sink + or abort "failed to create pipe for gpgv output: $!"; + pipe my $gpgv_log, my $gpgv_log_sink + or abort "failed to create pipe for gpgv log: $!"; + pipe my $gpgv_status, my $gpgv_status_sink + or abort "failed to create pipe for gpgv status: $!"; + pipe my $gpgv_flag, my $gpgv_flag_sink + or abort "failed to create pipe for gpgv flag: $!"; -=item @values = find_directive_elements ( $directive, $key ) + # ensure autoflush on writes to gpgv + { my $outhandle = select $gpgv_stdin_source; $| = 1; select $outhandle } -=item $count = find_directive_elements ( $directive, $key ) + my @gpgv_args = ( GPGV_BIN, + '--logger-fd', fileno $gpgv_log_sink, + '--status-fd', fileno $gpgv_status_sink ); + push @gpgv_args, '--keyring', $_ for @$keyrings; + push @gpgv_args, @file_args; -Search the DIRECTIVE arrayref for KEY elements and return their associated -values. An empty list is returned if no KEY elements are found in -DIRECTIVE. The KEY comparison is case-insensitive. + ftp_syslog debug => 'DEBUG: gpgv command line: '.join(' ', @gpgv_args) + if DEBUG; -The values returned from this procedure are tainted. + my $pid = fork; + abort "failed to fork child for gpgv: $!" + unless defined $pid; -In scalar context, return the number of entries that would be returned in -list context. + unless ($pid) { + # We are in the child process... + close $gpgv_stdin_source; + close $gpgv_output; close $gpgv_log; + close $gpgv_status; close $gpgv_flag; -=cut + our $AbortPipe = $gpgv_flag_sink; # pipe to parent + our $AbortExitCode = 127; # as posix_spawn uses + # no need to use local here; this process will either exec or abort -sub find_directive_elements { - my $directive = shift; - my $key = lc shift; + # Adjust close-on-exec flags: + my $flags; + # - clear on status and log sinks + $flags = fcntl $gpgv_status_sink, F_GETFD, 0 + or abort "ERR: fcntl F_GETFD on status: $!"; + fcntl $gpgv_status_sink, F_SETFD, $flags & ~FD_CLOEXEC + or abort "ERR: fcntl F_SETFD on status: $!"; + $flags = fcntl $gpgv_log_sink, F_GETFD, 0 + or abort "ERR: fcntl F_GETFD on log: $!"; + fcntl $gpgv_log_sink, F_SETFD, $flags & ~FD_CLOEXEC + or abort "ERR: fcntl F_SETFD on log: $!"; + # - set on flag pipe sink + $flags = fcntl $gpgv_flag_sink, F_GETFD, 0 + or abort "ERR: fcntl F_GETFD on flag: $!"; + fcntl $gpgv_flag_sink, F_SETFD, $flags | FD_CLOEXEC + or abort "ERR: fcntl F_SETFD on flag: $!"; - if (wantarray) { - return map $_->[1], grep lc($_->[0]) eq $key, @$directive; - } else { - return grep lc($_->[0]) eq $key, @$directive; - } -} + # Prepare STDIN/STDOUT/STDERR + open STDIN, '<&', $gpgv_stdin or abort "ERR: set stdin: $!"; + open STDOUT, '>&', $gpgv_output_sink or abort "ERR: set stdout: $!"; + open STDERR, '>&', $gpgv_output_sink or abort "ERR: set stderr: $!"; -=item $directory = find_directory ( $directive ) + # Exec gpgv + exec { GPGV_BIN } @gpgv_args or abort "ERR: exec: $!"; + } -Extract the destination directory name from the parsed DIRECTIVE arrayref. -An exception is thrown if DIRECTIVE does not contain exactly one -"directory" element or if the value of that element is not acceptable. + # The parent continues here... + close $gpgv_stdin; + close $gpgv_output_sink; close $gpgv_log_sink; + close $gpgv_status_sink; close $gpgv_flag_sink; -The value returned from this procedure is untainted. + # This is a bit tricky: we need to know if gpgv could not be run, so we + # have an extra pipe that will either report an error or be closed if the + # exec succeeds in the child process. + while (defined(my $err = <$gpgv_flag>)) { + chomp $err; + if ($err =~ m/^ERR: (.*)$/) { + # This is bad - we couldn't even execute the gpgv command properly + waitpid $pid, 0; # reap failed child + abort + ("gpg verify of directive file failed (error executing gpgv): $1"); + } + } + close $gpgv_flag; # child has closed its end one way or another -=cut + foreach my $cell ([$gpgv_stdin_source, 'message'], [$gpgv_output, 'output'], + [$gpgv_log, 'log'], [$gpgv_status, 'status']) { + my $flags = fcntl $cell->[0], F_GETFL, 0 + or abort "gpgv: fcntl F_GETFL $cell->[1]: $!"; + fcntl $cell->[0], F_SETFL, $flags | O_NONBLOCK + or abort "gpgv: fcntl F_SETFL $cell->[1]: $!"; + } -sub find_directory { - my $directive = shift; + return $pid, $gpgv_stdin_source, $gpgv_output, $gpgv_log, $gpgv_status; +} - my @values = find_directive_elements($directive, 'directory'); +sub _analyze_gpgv_output { + my $ret = shift; # hashref - die "Only one directory directive is allowed per directive file." - if scalar @values > 1; - die "no directory directive specified" - unless @values; - die "invalid directory element with no value" - unless $values[0]; + # CVE-2022-34903 caused GPG to dump a chunk of its heap to the status fd, + # and, eventually, segfault upon reaching unallocated address space. + # This had two recognizable consequences: + # - The GPG process dies with SIGSEGV. + # - The status output very likely contains multiple NUL bytes. + push @{$ret->{TILT}}, 'gpgv died on signal '.WTERMSIG($ret->{exitcode}) + if WIFSIGNALED($ret->{exitcode}); + for (qw(output log status)) + { push @{$ret->{TILT}}, "gpgv $_ contained NUL byte" + if $ret->{'raw_'.$_} =~ m/\0/ } - die "invalid directory $values[0]" - unless $values[0] =~ m/^($RE_filename_relative)$/; + local *_; + # counters + my $intro_status = 0; my $check_status = 0; my $verdict_status = 0; - return $1; -} + open my $status, '<', \($ret->{raw_status}) + or abort "open in-memory file for gpgv status: $!"; + while (<$status>) { + chomp; + unless (m/^\[GNUPG:\] /g) { + push @{$ret->{TILT}}, "gpgv status line lacks required prefix"; + last; # stop parsing if an invalid line is found + } -=item $package = find_package ( $directive ) + if (m/\GNEWSIG/gc) { + $intro_status++; # Note that NEWSIG is optional + } elsif (m/\G(GOOD|EXP|EXPKEY|REVKEY|BAD|ERR)SIG ([[:xdigit:]]+) /gc) { + # $1 -- result tag $2 -- long ID or fingerprint + # The next field is the primary username, except ERRSIG, but there is + # no guarantee that the primary UID will contain an email address. + if (length($2) > 16) { # We have a key fingerprint + $ret->{key_fingerprint} = $2; + $ret->{key_longid} = substr $2,-16; + } else { # We only have a long key ID + $ret->{key_longid} = $2; + } -Extract the package name from the parsed DIRECTIVE arrayref. An exception -is thrown if DIRECTIVE does not contain exactly one "directory" element or -if the value of that element is not a relative file name. + if ($1 eq 'BAD') { + $verdict_status++; + push @{$ret->{TILT}}, 'gpgv reported a bad signature, but exited zero' + if 0 == $ret->{exitcode}; + } elsif ($1 eq 'ERR') { # an ERRSIG line + $verdict_status++; + if (m/\G(\d+)\s(\d+)\s([[:xdigit:]]{2})\s([-:T[:digit:]Z+]+)\s(\d+) + /gcx) { + # $1 -- pubkey algorithm $2 -- digest algorithm + # $3 -- timestamp $4 -- result code + abort 'gpgv returned an ISO8601 timestamp; implementation needed' + if $3 =~ m/T/; + $ret->{sig_creation} = $3; + } else + { push @{$ret->{TILT}}, 'gpgv ERRSIG line failed parsing' } -The value returned from this procedure is untainted. + push @{$ret->{TILT}}, 'gpgv reported an error, but exited zero' + if 0 == $ret->{exitcode}; + } else { # GOODSIG/EXPSIG/EXPKEYSIG/REVKEYSIG + $check_status++; + } + } elsif (m/\G(VALID)SIG\s([[:xdigit:]]+)\s(\d{4}-\d{2}-\d{2})\s + ([-:T[:digit:]Z+]+)\s([-:T[:digit:]Z+]+)\s(\d+)\s(\S+)\s + (\d+)\s(\d+)\s([[:xdigit:]]{2})\s([[:xdigit:]]+) + /gcx) { + $verdict_status++; + # $1 -- valid tag $2 -- key fingerprint + # $3 -- signature date $4 -- signature timestamp + # $5 -- expiration timestamp $6 -- signature version + # $7 -- reserved $8 -- pubkey algorithm + # $9 -- digest algorithm $10 -- signature class + # $11 -- primary key fingerprint + $ret->{key_fingerprint} = $2; + $ret->{key_longid} = substr $2,-16; + abort 'gpgv returned an ISO8601 timestamp; implementation needed' + if $4 =~ m/T/ || $5 =~ m/T/; + $ret->{sig_creation} = $4; + # GPG reports 0 if the signature does not expire + $ret->{sig_expiration} = $5 if $5 > 0; + } + } + close $status or abort "close in-memory file for gpgv status: $!"; -=cut + push @{$ret->{TILT}}, 'gpgv reported more than one signature' + if $intro_status > 1; + push @{$ret->{TILT}}, 'gpgv reported more than one signature check' + if $check_status > 1; + push @{$ret->{TILT}}, 'gpgv reported more than one signature verdict' + if $verdict_status > 1; + push @{$ret->{TILT}}, 'gpgv reported no signature verdict at all' + if $verdict_status < 1; -sub find_package { - # The package name is the first directory named in the directory element. - my @dirs = File::Spec::Unix->splitdir(find_directory(@_)); - return $dirs[0]; + return $ret; } - - -=back - -=head2 [SC] Scan for incoming packets - -=over - -=item @files = scan_incoming ( $directory ) - -Scan DIRECTORY for newly-arrived uploaded files. Remove blatantly bogus -files, ignore acceptable files that are either still open or recently -modified, and return a list of filenames for further processing. - -=cut +sub verify_clearsigned_message { + my $text = shift; + my @keyrings = @_; -sub scan_incoming { - my $directory = shift; + ftp_syslog debug => 'DEBUG: message size is '.length($text) if DEBUG; - local *_; + # "my (LIST) = ..." causes problems with CPerl mode here -- jcb + my $pid; my $gpgv_stdin_source; + my $gpgv_output; my $gpgv_log; my $gpgv_status; + ($pid, $gpgv_stdin_source, $gpgv_output, $gpgv_log, $gpgv_status) = + _spawn_gpgv(\@keyrings, '-'); - my $time_bar = time - 120; - my @trash; my $badname_count = 0; - my %possible; - # Get list of all possible files from incoming dir. - # - opendir INCOMING, $directory - or abort "FATAL opendir($directory) failed: $!"; - ENT: while (defined($_ = readdir INCOMING)) { - next ENT if m/^[.]{1,2}$/; # skip . and .. entries - # require acceptable filenames - unless (length($_) <= MAX_FILE_NAME_LEN && m/^($RE_filename_here)$/) { - m/^(.*)$/; # untaint the value - # This is safe for unlink (which is all we will do with @trash) - # because the filename came from a directory entry, so it must be a - # valid filename and cannot indicate directory traversal. - unless (m/^[.]$RE_filename_here$/) { - # do not unlink backup files left by cleanup and cleanup_dir - push @trash, File::Spec->catfile($directory, $1); $badname_count++; + local $SIG{PIPE} = sub { abort 'gpgv exited unexpectedly' }; + my $Rchk = ''; my $Wchk = ''; + vec($Wchk, (fileno $gpgv_stdin_source), 1) = 1; + vec($Rchk, (fileno $_), 1) = 1 for ($gpgv_output, $gpgv_log, $gpgv_status); + my $Rrdy = ''; my $Wrdy = ''; + my $raw_output = ''; my $raw_log = ''; my $raw_status = ''; + pos $text = 0; # use this slot to store a position because we can + do { + foreach my $cell ([$gpgv_output, \$raw_output], [$gpgv_log, \$raw_log], + [$gpgv_status, \$raw_status]) { + if (vec($Rrdy, (fileno $cell->[0]), 1)) { + my $eof; # defined and zero at eof + 1 while + $eof = sysread $cell->[0], ${$cell->[1]}, 128, length ${$cell->[1]}; + vec($Rchk, (fileno $cell->[0]), 1) = 0 if defined $eof && $eof == 0; } - next ENT - } - my $ent = $1; # if we get here, $RE_filename_here matched above - # $_ remains tainted, but $ent is an untainted (and safe) copy - - # Examine the file; this populates an internal cache in perl. - unless (stat(File::Spec->catfile($directory, $ent))) { - ftp_syslog warning => "could not stat($ent), skipping"; - next ENT } - # Do not consider files that have been modified in the last 2 minutes. - # This is an extra safety check to avoid trying to process files that - # are still being uploaded. (use stat cache) - if (TSTAMPCHECK) { - if ((stat(_))[9] >= $time_bar) { - ftp_syslog debug => "DEBUG: " - ."$ent has been modified in the last 2 minutes, skipping" - if DEBUG; - next ENT + if (defined fileno $gpgv_stdin_source + && vec($Wrdy, (fileno $gpgv_stdin_source), 1)) { + my $err = syswrite $gpgv_stdin_source, $text, 128, pos $text; + pos $text += $err if defined $err; + unless (pos $text < length $text) { + vec($Wchk, (fileno $gpgv_stdin_source), 1) = 0; + close $gpgv_stdin_source; } } - # check for overlength directives and signatures (use stat cache) - if (/[.]directive[.]asc$/ && -f _ && ((-s _) >= MAX_DIRECTIVE_SIZE)) { - ftp_syslog info => "Overlength directive file ($ent) trashcanned"; - push @trash, File::Spec->catfile($directory, $ent); - next ENT - } elsif (/[.]sig$/ && -f _ && ((-s _) >= MAX_SIGNATURE_SIZE)) { - ftp_syslog info => "Overlength signature file ($ent) trashcanned"; - push @trash, File::Spec->catfile($directory, $ent); - next ENT - } + select $Rrdy=$Rchk, $Wrdy=$Wchk, undef, undef + if grep vec($Rchk, (fileno $_), 1), + $gpgv_output, $gpgv_log, $gpgv_status; + } while (grep vec($Rchk, (fileno $_), 1), + $gpgv_output, $gpgv_log, $gpgv_status); - ftp_syslog debug => "DEBUG: uploaded file to check: $ent" if DEBUG; - $possible{$ent} = 1; - } - closedir INCOMING - or abort "FATAL: closedir($directory) failed: $!"; + close $gpgv_stdin_source; close $gpgv_output; + close $gpgv_log; close $gpgv_status; + waitpid $pid, 0; # reap child that ran gpgv - # dispose of any garbage files - ftp_syslog info => "$badname_count files with bogus names were trashcanned" - if $badname_count; - ftp_syslog info => "Trashcanned files removed" - if unlink @trash; - @trash = (); # and empty the array to be safe, since it contained - # potentially arbitrary _untainted_ filenames + return _analyze_gpgv_output + ({exitcode => $?, raw_output => $raw_output, + raw_log => $raw_log, raw_status => $raw_status}); +} - # No possible files found, so return before we call lsof - return () unless %possible; +sub verify_detached_signature { + my $filename = shift; + my $sigfilename = shift; + my @keyrings = @_; - # Determine if any of those possible files are open. We find the - # possible files before running lsof (partly) to avoid a race - # condition. (If we ran lsof on the whole directory first, a new file - # might be uploaded and possibly be processed even though it was open.) - # - # Note that lsof outputs absolute names. - # - # This lsof test only works if either: - # a) lsof is not compiled with HASSECURITY - # b) gatekeeper runs as the vsftpd ftp_user - # If neither of those 2 conditions are met, the lsof call will not see - # the open files because they are owned by another user. - # On modern (Debian) systems, condition a) is not met. - # On modern GNU/Linux systems, unless either condition b) is met or lsof - # is installed setuid root, the kernel will not permit the open files to - # be seen because they are owned by another user. - my @lsof_args = (LSOF_BIN, "-Fn", - map { File::Spec->catfile($directory, $_) } keys %possible); - ftp_syslog debug => "DEBUG: lsof command line: " . join(' ',@lsof_args) - if DEBUG; + # This is very similar to verify_clearsigned_message, but slightly + # simpler because all input to GPG is supplied from files, so we do not + # have a pipe to the child process. We still need the other pipes and we + # still have the same risks of exploits against GPG. - open LSOF, '-|', @lsof_args - or abort "FATAL: cannot spawn lsof: $!";; - while () { - ftp_syslog debug => "DEBUG: lsof output: $_" if DEBUG; - # only look at the name lines - next unless /^n${directory}\/(.+)$/; - ftp_syslog debug => "DEBUG: " - ."upload in progress for $1, ignoring during this run" - if DEBUG; - delete ($possible{$1}) - or warn "WARNING: lsof found unrequested but open $1?!"; - } - close (LSOF); - - return keys %possible; -} - -=item @packets = gather_packets ( $directory, $scratchpad ) - -Scan DIRECTORY for newly-arrived upload packets. Remove blatantly bogus -files, ignore partial packets and acceptable files that are either still -open or recently modified, and return a list of arrayrefs representing -complete packets found. The files mentioned in the returned arrayrefs have -been moved to the SCRATCHPAD directory. Each returned packet begins with -the directive file, but order is not otherwise specified. - -=cut - -sub gather_packets { - my $directory = shift; - my $scratchpad = shift; - - my @files = scan_incoming($directory); - my %havefile = map { $_ => 1 } @files; - - # Find the directives among the possibilities and assemble packets - my @ret; - - my @stems = - map m/^(.*)[.]directive[.]asc$/, - grep m/[.]directive[.]asc$/, @files; - - STEM: foreach my $stem (@stems) { - # This trades generality for efficiency. In the general case, the STEM - # in STEM.directive.asc is almost arbitrary and collecting packets - # (identified by sharing a common STEM) requires multiple scans over - # the set of collected files. In nasty cases, the associations between - # files and packets could even be ambiguous. The below shortcuts the - # issue by requiring known extensions on each common STEM, but requires - # this function to be updated if new packet types are added. - - ftp_syslog debug => "DEBUG: " - ."considering stem [$stem] for processing" if DEBUG; - - # Note that all values in %havefile are 1 and the undefined value is - # falsish in Perl, so simple checks are adequate here. No tests for - # the directive file itself are done because each STEM is derived from - # its presence. - - if ($havefile{$stem} && $havefile{$stem.'.sig'}) { - # File upload triplet: STEM.directive.asc, STEM.sig, STEM - my $triplet = [$stem.'.directive.asc', $stem.'.sig', $stem]; - - foreach my $file (@$triplet) { - # If the file exists in the scratchpad, but not in the incoming - # directory, we may have already moved it to the scratchpad - # directory as part of another packet. We submit both packets for - # processing; it is near-certain that one of them is fake and will - # be rejected after failing authentication. - unless ((-e File::Spec->catfile($scratchpad, $file) - && ! -e File::Spec->catfile($directory, $file)) - || rename (File::Spec->catfile($directory, $file), - File::Spec->catfile($scratchpad, $file))) { - ftp_syslog error => - "rename $directory/$file to $scratchpad/$file: $!"; - next STEM # abandon processing this triplet - } - } - - push @ret, $triplet; - ftp_syslog info => 'processing ['.join(':',@$triplet).']'; - } else { - # A lone directive file: STEM.directive.asc - - # Here we have a potential problem. We could be seeing a directive - # file that belongs to a triplet the rest of which has not been - # uploaded yet. If so, we should ignore this file and not move it to - # $scratchpad. This means we need to read the file and see if there - # is a 'filename:' directive. - - my $racecondition = 0; - my $directive = read_directive_from_file - (File::Spec->catfile($directory, $stem.'.directive.asc')); - foreach my $cell (@$directive) { - next unless lc($cell->[0]) eq 'filename'; - $racecondition = 1; # found a 'filename' directive - } - - if ($racecondition) { - # Most likely a race condition. We have a directive file but not - # the accompanying file(s). Just ignore this directive for now. - ftp_syslog info => - "Found directive file with filename directive " - ."(${stem}.directive.asc), but no accompanying files. " - ."Ignoring directive file in this run."; - } else { - # Directive file only, no actual file to deal with - # This can happen when dealing with symlink/rmsymlink/archive options - my $file = $stem.'.directive.asc'; - unless ((-e File::Spec->catfile($scratchpad, $file) # as above for - && ! -e File::Spec->catfile($directory, $file))# file uploads - || rename (File::Spec->catfile($directory, $file), - File::Spec->catfile($scratchpad, $file))) { - ftp_syslog error => - "rename $directory/$file to $scratchpad/$file: $!"; - next STEM # abandon processing this item - } - push @ret, [$file]; - ftp_syslog info => 'processing ['.$file.']'; - } - } - } - - return @ret; -} - - - -=back - -=head2 [PV] Parsing and Validation - -=over - -=item $oplist = interpret_directive ( $directive ) - -Analyze the elements in DIRECTIVE, performing basic validation. An -exception is thrown if DIRECTIVE contains invalid element values. - -The return value is an arrayref of command/parameter arrayrefs representing -the operations to be performed to carry out DIRECTIVE. - -The values in the returned structure are untainted. - -=cut - -sub interpret_directive { - my $directive = shift; # presumed tainted - - my @errors; - my @trace; - my $version_error; - - my %options = ( replace => undef ); - my %header = ( version => undef, options => \%options, - package => undef, directory => undef, filename => undef ); - my @ops = ([header => \%header]); - my $install = undef; # can only install one file per directive - # The 'install' op carries the name of the file to install, while the - # 'filename' element binds the directive signature to its intended - # upload. These are assumed to be the same in protocol 1.2 and earlier. - - { # Extract version first, since directive elements may be in any order. - my @versions = grep $_->[0] eq 'version', @$directive; - - if (scalar @versions == 1) { - if ($versions[0][1] =~ /^(\d+\.\d+)$/) { - my $val = $1; # so far so good + { + my $file_size = -s $filename; + my $sig_file_size = -s $sigfilename; - $header{version} = $val; # TODO: parse? - } else { - # version value does not match required pattern - push @errors, "invalid version $versions[0][1]"; - $version_error = 'invalid version'; - } - } elsif (scalar @versions > 1) { - push @errors, "invalid multiple version elements"; - $version_error = 'multiple version elements'; - } else { # no version at all; no longer allowed - push @errors, "no version specified in directive"; - } + ftp_syslog debug => "DEBUG: $sigfilename size is $sig_file_size" + if DEBUG; + ftp_syslog debug => "DEBUG: $filename size is $file_size" + if DEBUG; } - if ($header{version} - && $header{version} ne '1.1' && $header{version} ne '1.2') { - push @errors, "invalid version $header{version}, not supported"; - $version_error = 'unsupported version'; + my $pid; my $gpgv_output; my $gpgv_log; my $gpgv_status; + { my $extra; # pipe to gpgv stdin; not used here + ($pid, $extra, $gpgv_output, $gpgv_log, $gpgv_status) = + _spawn_gpgv(\@keyrings, $sigfilename, $filename); + close $extra; } - foreach my $item (@$directive) { - my $tainted_cmd = lc $item->[0]; - my $tainted_val = $item->[1]; - - push @trace, $item; - - if (!$tainted_val && !($tainted_cmd =~ m/^comment|^no-op/)) { - push @errors, "invalid $tainted_cmd element with no value"; - push @trace, [' ^--', 'element with no value']; - } elsif ($tainted_cmd eq 'directory') { - unless ($tainted_val =~ m/^($RE_filename_relative)$/) { - push @errors, "invalid directory $tainted_val"; - push @trace, [' ^--', 'this directory name is invalid']; - next; - } - my $val = $1; # so far so good - - my @dirs = File::Spec::Unix->splitdir($1); - my $dir_depth = scalar @dirs; - - # A couple of subdir levels are ok, but don't allow hundreds. - if ($dir_depth > MAX_DIRECTORY_DEPTH) { - push @errors, "$dir_depth levels is too deep, in $val"; - push @trace, [' ^--', 'this directory name is nested too deeply']; - next; - } - - # Only let them specify one directory directive. - if (defined $header{directory}) { - push @errors, - "Only one directory directive is allowed per directive file. " - ."Error at directory directive: $val"; - push @trace, [' ^--', 'second directory element found here']; - next; - } - - $header{directory} = $val; # ok. - $header{package} = $dirs[0]; # top-level name, no subdir - } elsif ($tainted_cmd eq 'filename') { - # We use the same filename restrictions as scan_incoming - unless ($tainted_val =~ /^($RE_filename_here)$/) { - push @errors, "invalid filename $tainted_val"; - push @trace, [' ^--', 'this filename is invalid']; - next; - } - my $val = $1; # so far so good - - # Only let them specify one filename directive. - if (defined $header{filename}) { - push @errors, - "Only one filename directive is allowed per directive file. " - ."Error at filename directive: $val."; - push @trace, [' ^--', 'second filename element found here']; - next; - } - - $header{filename} = $val; - } elsif ($tainted_cmd eq 'version') { - # already handled above; insert any error into the trace - push @trace, [' ^--', $version_error] if $version_error; - } elsif ($tainted_cmd eq 'symlink') { - unless ($tainted_val =~ - /^($RE_filename_relative)\s+($RE_filename_relative)$/) { - push @errors, "invalid parameters for symlink command: $tainted_val"; - push @trace, - [' ^--', 'invalid parameters here; need TARGET and LINKNAME']; - next; - } - # $1 -- link target $2 -- link name - push @ops, [symlink => $1, $2]; - } elsif ($tainted_cmd eq 'rmsymlink') { - unless ($tainted_val =~ /^($RE_filename_relative)$/) { - push @errors, "invalid parameters for rmsymlink command: $tainted_val"; - push @trace, - [' ^--', 'invalid parameter here; need relative filename']; - next; - } - push @ops, [rmsymlink => $1]; - } elsif ($tainted_cmd eq 'archive') { - unless ($tainted_val =~ /^($RE_filename_relative)$/) { - push @errors, - "invalid parameters for archive command: $tainted_val"; - push @trace, - [' ^--', 'invalid parameter here; need relative filename']; - next; - } - push @ops, [archive => $1]; - } elsif ($tainted_cmd eq 'replace') { - # This command is only supported from v1.2 - unless ($tainted_val =~ /^(true|false)$/) { - push @errors, - "invalid parameters for replace command: $tainted_val"; - push @trace, - [' ^--', 'invalid parameter here; need "true" or "false"']; - next; - } - - if ($header{version} eq '1.1') { - push @errors, - "invalid directive 'replace', not supported prior to version 1.2"; - push @trace, [' ^--', 'this element was introduced in version 1.2']; - next; + my $Rchk = ''; + vec($Rchk, (fileno $_), 1) = 1 for ($gpgv_output, $gpgv_log, $gpgv_status); + my $Rrdy = ''; + my $raw_output = ''; my $raw_log = ''; my $raw_status = ''; + do { + foreach my $cell ([$gpgv_output, \$raw_output], [$gpgv_log, \$raw_log], + [$gpgv_status, \$raw_status]) { + if (vec($Rrdy, (fileno $cell->[0]), 1)) { + my $eof; # defined and zero at eof + 1 while + $eof = sysread $cell->[0], ${$cell->[1]}, 128, length ${$cell->[1]}; + vec($Rchk, (fileno $cell->[0]), 1) = 0 if defined $eof && $eof == 0; } + } - $options{replace} = ($1 eq 'true'); - } elsif ($tainted_cmd eq 'comment') { - # Comments are ok, we ignore them - } elsif (IN_TEST_MODE && $tainted_cmd eq 'no-op') { - # The testsuite uses a no-op command to validate directive processing. - push @ops, ['no-op']; - } else { - push @errors, "Invalid directive line:\n\n $tainted_cmd $tainted_val"; - push @trace, [' ^--', 'this element is not recognized']; - } - - if (!defined($install) - && defined $header{filename} && defined $header{directory}) - { push @ops, ($install = [install => $header{filename}]) } - } - - # They have to specify a directory directive. - unless ($header{directory}) { - # Send the warning to the upload-ftp script maintainer, and the person who - # signed the file, if we were able to extract that from the signature on - # the directive file. - push @errors, "no directory element specified in directive"; - } - - if (@errors) { - throw directive_syntax => - trace => \@trace, summary => $errors[0], directory => $header{directory}; - } + select $Rrdy=$Rchk, undef, undef, undef + if grep vec($Rchk, (fileno $_), 1), + $gpgv_output, $gpgv_log, $gpgv_status; + } while (grep vec($Rchk, (fileno $_), 1), + $gpgv_output, $gpgv_log, $gpgv_status); - return \@ops; + close $gpgv_output; close $gpgv_log; close $gpgv_status; + waitpid $pid, 0; # reap child that ran gpgv + + return _analyze_gpgv_output + ({exitcode => $?, raw_output => $raw_output, + raw_log => $raw_log, raw_status => $raw_status}); } =item $epoch = advance_timestamp_ratchet ( $full_filename, $epoch ) @@ -2185,104 +2189,6 @@ sub advance_timestamp_ratchet { return $old_epoch; } -# temporary scaffolding; last piece of read_directive_file that does not -# really fit elsewhere and will be removed when the new key index is -# implemented to directly map long key IDs to email addresses -sub guess_email_address_from_signature { - my $directive_file_contents = shift; - - my @addresses; - - # If we don't know whose project this file belongs to, because the - # 'directory:' line is messed up or not there, we'd still like to let the - # uploader know something went wrong. So let's see if we can match the - # directive file signature against one of our public keyrings. - { - my @tmp_keyrings; - open(TMP,"/usr/bin/find $package_config_base -name pubring.gpg|"); - while() { - chomp; - m,^(/?${RE_filename_relative})$, or next; - push @tmp_keyrings, $1; - } - close(TMP); - - my $tmp_result = verify_clearsigned_message - ($directive_file_contents, @tmp_keyrings); - - unless ($tmp_result->{exitcode} != 0 || defined $tmp_result->{TILT}) { - if (($tmp_result->{raw_log} =~ /Good signature from .*?<(.*?)>/)) - { push @addresses, $1 } - } - } - - return @addresses; -} - -=item validate_directive ( $packet, $oplist ) - -Validate the commands in OPLIST as applicable to PACKET. PACKET is an -arrayref listing the files considered to be in this packet. OPLIST is an -operation list arrayref. - -An exception is thrown if validation fails. - -=cut - -sub validate_directive { - my $packet = shift; - my $ops = shift; - - my $stem = substr $packet->[0],0,-(length '.directive.asc'); - my $op_header = $ops->[0][1]; - - # Configuration must exist for the package - throw unknown_package => package_name => $op_header->{package} - unless -d File::Spec->catdir($package_config_base, $op_header->{package}); - - # While per-directory email lists are now supported, the per-package list - # is still required to be present. - throw package_configuration => package_name => $op_header->{package}, - summary => 'no email list for package '.$op_header->{package} - unless -f File::Spec->catfile - ($package_config_base, $op_header->{package}, 'email'); - - # Check that we actually have at least one command in the directive - unless ($#$ops > 0) { - if (1 == scalar @$packet) { - throw directive_syntax => - trace => [], directory => $op_header->{directory}, - summary => 'nothing to do - no commands in directive file'; - } else { - # Provide a different message if this looks like an upload packet. - throw directive_syntax => - trace => [], directory => $op_header->{directory}, - summary => "no filename element in $stem.directive.asc." - .' Upgrade to the latest version! ' - .'See http://www.gnu.org/prep/maintain/maintain.html'; - } - } - - # Check if this directive carries a file/validate stem if needed - if (defined $op_header->{filename}) { - # Ben Pfaff wrote: - # First, "gpg -b" doesn't verify that the filename of the signed - # data is correct. This means that I can rename gcc-1.2.3.tar.gz - # to gcc-3.4.5.tar.gz and the signature will still verify - # correctly. This opens up the possibility for confusion, but in - # itself it's not a huge deal. - # - # To fix this, we require a 'filename:' line in the directive file that - # needs to match the name of the uploaded file and serves to bind the - # directive signature and the uploaded packet. We already know that - # the name of the uploaded file must match the stem of the directive - # file name; this is how it was recognized as part of the packet. - throw directive_filename_mismatch => - filename => $op_header->{filename}, stem => $stem - unless $stem eq $op_header->{filename}; - } -} - =item check_signature_timestamp ( $what , $timestamp ) Report the WHAT signature TIMESTAMP to the log and raise an exception if @@ -2334,6 +2240,14 @@ sub check_replay { } } + + +=back + +=head2 [VL] Validation + +=over + =item check_automake_vulnerabilities ( $upload_file ) Examine UPLOAD_FILE for known vulnerabilities that certain (now ancient) @@ -2407,26 +2321,6 @@ sub check_automake_vulnerabilities { =over -=item mkdir_p ( $base, @directory ) - -Ensure that DIRECTORY (expressed as returned by File::Spec->splitdir) -exists under the BASE directory. - -=cut - -sub mkdir_p { - # @_ is directory name elements - - my @dir_steps; # list of intermediate dirs needed - # for example, creating bar/baz/quux in an empty /foo populates this list - # with qw( /foo/bar /foo/bar/baz /foo/bar/baz/quux ) on POSIX - - for (@dir_steps = (); @_ && ! -d File::Spec->catdir(@_); pop) - { unshift @dir_steps, File::Spec->catdir(@_) } - - mkdir $_ or die "mkdir($_): $!" for @dir_steps; -} - =item archive_filepair ( $directory, $filename ) Move FILENAME (and its detached signature) from DIRECTORY in the managed @@ -2621,6 +2515,121 @@ sub execute_rmsymlink { =back +=head2 [RP] Report + +=over + +=item @filtered = exclude_mail_blacklist ( $blacklist_file, @addresses ) + +Filter ADDRESSES to remove addresses mentioned in BLACKLIST_FILE. + +=cut + +sub exclude_mail_blacklist { + my $blacklist_file = shift; + my @emaillist = @_; + + local *_; + my %blacklist; + + my @filtered = @emaillist; + if (-f $blacklist_file) { + open BLACKLIST, '<', $blacklist_file + or abort "open($blacklist_file) failed: $!"; + while () { chomp; $blacklist{$_}++ } + close BLACKLIST or abort "close($blacklist_file) failed: $!"; + + @filtered = grep !$blacklist{$_}, @emaillist; + } + + return @filtered; +} + +=item mail $message, [ to => \@addresses ], [ subject => $subject ] + +Send MESSAGE to ADDRESSES or the internal reporting inbox if ADDRESSES is +not given. + +=cut + +sub mail { + my $msg = shift; + my %args = @_; + + $args{subject} = 'generic failure' unless $args{subject}; + + our $Internal_Report_Inbox; + + my @email_list = ($Internal_Report_Inbox); + # Some messages should be sent to the user, some should not + @email_list = @{$args{to}} if defined $args{to}; + + # At minimum, an Internet email address must contain an @ character. + @email_list = grep m/@/, @email_list; + + unless (@email_list) { + # Something went wrong, but we can't figure out which package this + # upload belongs to. Mention that in the logs, and then mail this to + # the script maintainer anyway. + ftp_syslog info => + "No uploader e-mail address(es) to report this error to!"; + @email_list = ($Internal_Report_Inbox); + } + + my $sender = 'ftp-upload-script@gnu.org'; + $sender = 'ftp-upload@gnu.org' + if defined $args{to}; # We really want replies to go to the ftp-upload queue + + @email_list = exclude_mail_blacklist($email_blacklist, @email_list); + + $args{subject} = "[$zone_tag gnu-ftp-upload] $args{subject}"; + + # Wrap message at 78 characters, this is e-mail... + local $Text::Wrap::columns = 78; + + sendmail($sender, + [$Internal_Report_Inbox, @email_list], + [[To => join ', ', @email_list], + [From => $sender], + [Date => strftime("%a, %e %b %Y %H:%M:%S %z", localtime)], + [Subject => $args{subject}], + ['Reply-To' => 'ftp-upload@gnu.org'], + ['Message-ID'=> Email::MessageID->new->in_brackets], + ], + wrap('','',$msg)); +} + +=item report_upload_to_archive $message, $package + +Send MESSAGE to the public archive inbox, with a subject indicating that an +upload for PACKAGE was processed. This is used to publicly archive all +successfully processed directives, and some erroneous directives. + +=cut + +sub report_upload_to_archive { + my $msg = shift; + my $package_name = shift; + + our $Public_Upload_Archive_Inbox; + + sendmail('ftp-upload-script@gnu.org', + [$Public_Upload_Archive_Inbox], + [[To => $Public_Upload_Archive_Inbox], + [From => 'ftp-upload-script@gnu.org'], + [Date => strftime("%a, %e %b %Y %H:%M:%S %z", localtime)], + [Subject => ("[$zone_tag gnu-ftp-debug] " + ."new upload processed: $package_name")], + ['Reply-To' => 'ftp-upload@gnu.org'], + ['Message-ID'=> Email::MessageID->new->in_brackets], + ], + $msg); +} + + + +=back + =head2 Clean up =over