From 5441d92c378272d003cb5f994b3f609f9f8c373b Mon Sep 17 00:00:00 2001 From: Jacob Bachmeyer Date: Sat, 15 Oct 2022 19:26:56 -0500 Subject: [PATCH] Revise packet-gathering loop in scan_incoming Since every packet must contain a directive, the process is now driven based on the set of directives available. This change also avoids considering every file and fixes a related minor bug in the old code: comments indicated that all files from each triplet were supposed to be removed from possible consideration, but the loop was actually iterating over a temporary list containing the keys of the %possible hash as of the start of the loop. This change also introduces a new internal format for the result of the scan_incoming function and some temporary scaffolding code to convert the new format to the old format to keep the tool working during the change. --- gatekeeper.pl | 144 +++++++++++++++++--------- testsuite/gatekeeper.all/00_idle.exp | 26 +---- testsuite/gatekeeper.all/01_loose.exp | 2 +- testsuite/lib/gatekeeper.exp | 4 +- 4 files changed, 103 insertions(+), 73 deletions(-) diff --git a/gatekeeper.pl b/gatekeeper.pl index ed9fd7e..b966081 100755 --- a/gatekeeper.pl +++ b/gatekeeper.pl @@ -853,6 +853,18 @@ sub read_directive_from_file { # we check them. This is redundant protection -- the ftp config on # ftp.gnu.org does not allow overwrites or deletes. # + +=item @packets = scan_incoming ( $directory, $scratchpad ) + +Scan DIRECTORY for newly-arrived upload packets. Remove blatantly bogus +files, ignore partial packets and acceptable files that are either still +open or recently modified, and return a list of arrayrefs representing +complete packets found. The files mentioned in the returned arrayrefs have +been moved to the SCRATCHPAD directory. Each returned packet begins with +the directive file, but order is not otherwise specified. + +=cut + sub scan_incoming { my $directory = shift; my $scratchpad = shift; @@ -959,40 +971,54 @@ sub scan_incoming { } close (LSOF); + # Find the directives among the possibilities and assemble packets my @ret; - # For each remaining possibility, do some more checks - for my $ent (keys %possible) { - my $base = $ent; - my $sig = "$base.sig"; - my $directive = "$base.directive.asc"; - my $bare_base = $base; - $bare_base =~ s/\.directive\.asc$//g; + my @stems = + map m/^(.*)[.]directive[.]asc$/, + grep m/[.]directive[.]asc$/, keys %possible; + + STEM: foreach my $stem (@stems) { + # This trades generality for efficiency. In the general case, the STEM + # in STEM.directive.asc is almost arbitrary and collecting packets + # (identified by sharing a common STEM) requires multiple scans over + # the set of collected files. In nasty cases, the associations between + # files and packets could even be ambiguous. The below shortcuts the + # issue by requiring known extensions on each common STEM, but requires + # this function to be updated if new packet types are added. + ftp_syslog('debug', "DEBUG: " - ."considering $ent for processing") if DEBUG; - - # Work on this triple, if all three files are accepted. Overlength - # directive and signature files were discarded in the initial scan. - if (exists($possible{$base}) && exists($possible{$sig}) - && exists($possible{$directive})) { - push (@ret, { "directive" => $directive, "sig" => $sig, - "upload" => $base, "directive_only" => 0 }); - ftp_syslog('info', "processing [$directive:$sig:$base]"); - - # Do atomic rename to temp incoming directory before reading - # anything, for safety. - # - for my $f (($directive, $sig, $base)) { - rename ($f, "$scratchpad/$f") - or fatal("rename $directory/$f to $scratchpad/$f failed: $!",0); + ."considering stem [$stem] for processing") if DEBUG; + + # Note that all values in %possible are 1 and the undefined value is + # falsish in Perl, so simple checks are adequate here. No tests for + # the directive file itself are done because each STEM is derived from + # its presence. + + if ($possible{$stem} && $possible{$stem.'.sig'}) { + # File upload triplet: STEM.directive.asc, STEM.sig, STEM + my $triplet = [$stem.'.directive.asc', $stem.'.sig', $stem]; + + foreach my $file (@$triplet) { + # If the file exists in the scratchpad, but not in the incoming + # directory, we may have already moved it to the scratchpad + # directory as part of another packet. Submit both packets for + # processing; it is near-certain that one of them is fake and will + # be rejected after failing authentication. + unless ((-e File::Spec->catfile($scratchpad, $file) + && ! -e File::Spec->catfile($directory, $file)) + || rename (File::Spec->catfile($directory, $file), + File::Spec->catfile($scratchpad, $file))) { + ftp_syslog('error', + "rename $directory/$file to $scratchpad/$file: $!"); + next STEM # abandon processing this triplet + } } - # don't bother to try any part of this triple again. - delete $possible{$base}; - delete $possible{$sig}; - delete $possible{$directive}; - } elsif (exists($possible{$base}) && !exists($possible{"$bare_base.sig"}) - && ($base =~ /\.directive\.asc$/)) { + push @ret, $triplet; + ftp_syslog('info', 'processing ['.join(':',@$triplet).']'); + } else { + # A lone directive file: STEM.directive.asc # Here we have a potential problem. It's possible that we are seeing # a directive file that belongs to a triplet the rest of which has @@ -1001,35 +1027,35 @@ sub scan_incoming { # if there is a 'filename:' directive. my $racecondition = 0; - open(TMP,$base); - while () { - if (/^Filename:/i) { - $racecondition = 1; - last; - } + my $directive = read_directive_from_file + (File::Spec->catfile($directory,$stem.'.directive.asc')); + foreach my $cell (@$directive) { + next unless lc($cell->[0]) eq 'filename'; + $racecondition = 1; # found a 'filename' directive } - close(TMP); if ($racecondition) { - # Most likely a race condition. We've found a directive file but - # not the accompanying file(s). Just ignore this directive file - # for now. + # Most likely a race condition. We have a directive file but not + # the accompanying file(s). Just ignore this directive for now. ftp_syslog('info', - "Found directive file with filename directive ($base), " - ."but no accompanying files. " + "Found directive file with filename directive " + ."(${stem}.directive.asc), but no accompanying files. " ."Ignoring directive file in this run."); } else { # Directive file only, no actual file to deal with # This can happen when dealing with symlink/rmsymlink/archive options - push (@ret, { "directive" => $base, "sig" => '', - "upload" => '', "directive_only" => 1 }); - # Do atomic rename to temp incoming directory before reading - # anything, for safety. - rename ($base, "$scratchpad/$base") - or fatal("rename $directory/$base " - ."to $scratchpad/$base failed: $!",0); + my $file = $stem.'.directive.asc'; + unless ((-e File::Spec->catfile($scratchpad, $file) # as above for + && ! -e File::Spec->catfile($directory, $file))# file uploads + || rename (File::Spec->catfile($directory, $file), + File::Spec->catfile($scratchpad, $file))) { + ftp_syslog('error', + "rename $directory/$file to $scratchpad/$file: $!"); + next STEM # abandon processing this item + } + push @ret, [$file]; + ftp_syslog('info', 'processing ['.$file.']'); } - delete $possible{$base}; } } @@ -1850,6 +1876,26 @@ chdir ($incoming_dir) or ftp_die("FATAL: chdir($incoming_dir) failed: $!"); my @incoming = scan_incoming ($incoming_dir, $incoming_tmp); +# Temporary scaffolding to convert the new values returned by scan_incoming +# to the old format. +foreach my $cell (@incoming) { + next if ref $cell eq 'HASH'; # cell is old format + + if (3 == scalar @$cell) { # an upload triplet + my $directive; my $sig; my $upload; + foreach (@$cell) { + if (m/[.]directive[.]asc$/) { $directive = $_ } + elsif (m/[.]sig$/) { $sig = $_ } + else { $upload = $_ } + } + $cell = {directive => $directive, sig => $sig, + upload => $upload, directive_only => 0} + } elsif (1 == scalar @$cell) {# loose directive + $cell = {directive => $cell->[0], sig => '', + upload => '', directive_only => 1} + } +} +# End temporary scaffolding. # we've moved the files to work on to a new directory. chdir ($incoming_tmp) diff --git a/testsuite/gatekeeper.all/00_idle.exp b/testsuite/gatekeeper.all/00_idle.exp index 355af3a..8c47d73 100644 --- a/testsuite/gatekeeper.all/00_idle.exp +++ b/testsuite/gatekeeper.all/00_idle.exp @@ -104,9 +104,6 @@ analyze_log $tenv "idle processing: bogus files" { !scan,x:x "ignored file: x:x " !scan,x?x "ignored file: x?x" !scan,;xax "ignored file: ;xax" - - consider,bogus1 "considered file: bogus1" - consider,bogus2 "considered file: bogus2" } analyze_no_mail $tenv "idle processing: bogus files" @@ -135,7 +132,7 @@ proc check_incomplete_upload { has_directive has_main has_signature } { } dsig { good 00 0000 } lappend msglist \ scan,partial.bin.directive.asc "scan found directive file" \ - consider,partial.bin.directive.asc "considered directive file" + consider,partial.bin "considered packet" } if { $has_main } { lappend filelist partial.bin @@ -143,15 +140,13 @@ proc check_incomplete_upload { has_directive has_main has_signature } { incomplete upload main file } lappend msglist \ - scan,partial.bin "scan found main file" \ - consider,partial.bin "considered main file" + scan,partial.bin "scan found main file" } if { $has_signature } { lappend filelist partial.bin.sig lappend testcase fsig { good 01 0000 } lappend msglist \ - scan,partial.bin.sig "scan found signature file" \ - consider,partial.bin.sig "considered signature file" + scan,partial.bin.sig "scan found signature file" } if { $has_directive && ! ( $has_main || $has_signature ) } { lappend msglist skip-loose,partial.bin.directive.asc \ @@ -207,24 +202,17 @@ proc check_ongoing_upload { open_directive open_main open_signature } { lappend msglist \ open,partial.bin.directive.asc "directive file still open" } else { - lappend msglist \ - consider,partial.bin.directive.asc "considered directive file" + lappend msglist consider,partial.bin "considered packet" } if { $open_main } { puts $mockfile "> n${lsofstem}/partial.bin" lappend msglist \ open,partial.bin "main file still open" - } else { - lappend msglist \ - consider,partial.bin "considered main file" } if { $open_signature } { puts $mockfile "> n${lsofstem}/partial.bin.sig" lappend msglist \ open,partial.bin.sig "signature file still open" - } else { - lappend msglist \ - consider,partial.bin.sig "considered signature file" } if { $open_directive || $open_main || $open_signature } { puts $mockfile "? 0" @@ -298,22 +286,19 @@ proc check_recent_upload { recent_directive recent_main recent_signature } { } else { lappend msglist scan,recent.bin.directive.asc \ "scan found directive file" - lappend msglist consider,recent.bin.directive.asc \ - "considered directive file" + lappend msglist consider,recent.bin "considered packet" } if { $recent_main } { lappend testcase file-mtime "15 seconds ago" lappend msglist recent,recent.bin "skipped recent main file" } else { lappend msglist scan,recent.bin "scan found main file" - lappend msglist consider,recent.bin "considered main file" } if { $recent_signature } { lappend testcase fsig-mtime "15 seconds ago" lappend msglist recent,recent.bin.sig "skipped recent signature file" } else { lappend msglist scan,recent.bin.sig "scan found signature file" - lappend msglist consider,recent.bin.sig "considered signature file" } make_test_case $tenv [list recent.bin $testcase] @@ -355,7 +340,6 @@ proc check_oversize_part { oversize_directive oversize_signature } { nowork "ftp-upload 'nothing to do' message" scan,oversize.bin "scan found main file" - consider,oversize.bin "considered main file" } set filelist { oversize.bin } diff --git a/testsuite/gatekeeper.all/01_loose.exp b/testsuite/gatekeeper.all/01_loose.exp index 4540aed..e15fb86 100644 --- a/testsuite/gatekeeper.all/01_loose.exp +++ b/testsuite/gatekeeper.all/01_loose.exp @@ -30,7 +30,7 @@ proc check_loose_directive { desc case args } { start "ftp-upload start message" scan,foo.directive.asc "scan found directive file" - consider,foo.directive.asc "considered directive file" + consider,foo "considered loose directive file" found,foo.directive.asc "found directive file for processing" } diff --git a/testsuite/lib/gatekeeper.exp b/testsuite/lib/gatekeeper.exp index 4e9a9be..0f852e2 100644 --- a/testsuite/lib/gatekeeper.exp +++ b/testsuite/lib/gatekeeper.exp @@ -567,8 +567,8 @@ proc analyze_log { base_dir name assess } { exp_continue } -re {^gatekeeper\[[0-9]+\]: \(Test\)\ - DEBUG: considering ([^ ]+) for processing.} { - # from scan_incoming, top of triplet checking loop + DEBUG: considering stem \[([^ ]+)\] for processing.} { + # from scan_incoming, top of packet checking loop set A(consider,$expect_out(1,string)) 1 exp_continue } -- 2.25.1