From: Jacob Bachmeyer Date: Wed, 29 Jul 2020 04:09:45 +0000 (-0500) Subject: Add re-import verification tool for Git X-Git-Url: https://vcs.fsf.org/?a=commitdiff_plain;h=refs%2Fheads%2Fimport-tool;p=gatekeeper.git Add re-import verification tool for Git --- diff --git a/git-check-import-branch.pl b/git-check-import-branch.pl new file mode 100755 index 0000000..ba4d947 --- /dev/null +++ b/git-check-import-branch.pl @@ -0,0 +1,198 @@ +#!/usr/bin/perl + +# Copyright (C) 2020 Jacob Bachmeyer +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +=head1 NAME + +git-check-import-branch.pl - Verify that a reimport loaded the same blobs + +=head1 SYNOPSIS + + git-check-import-branch.pl + + Options: + -h --help print brief help message + -v --verbose print additional information while running + +=head1 OPTIONS + +=over + +=item B<--help> B<-h> + +Print brief help message and exit. + +=item B<--verbose> B<-v> + +Print additional information while running. Repeating this option further +increases verbosity. + +=back + +=head1 DESCRIPTION + +This tool searches the histories of two branches in a Git repository and +reports any differences in the set of blobs referenced between them. + +=cut + +use strict; +use warnings; + +use Getopt::Long; +use Pod::Usage; + +my %OPT = (); +my $Verbose = 0; + +GetOptions('help|h' => \$OPT{help}, + 'verbose|v+' => \$Verbose, + ) or pod2usage(2); +pod2usage(1) if $OPT{help}; + +pod2usage(2) unless @ARGV == 2; + +### +### Git reader routines +### + +# read a commit; return hashref with keys: +# id Git commit id +# tree tree object id for this commit +# parents array of parents of this commit +sub read_commit { + my $commit_id = shift; + + my $commit = { id => qx/git rev-parse $commit_id/ }; + chomp $commit->{id}; + die "failed to find commit id $commit_id" unless $? == 0; + + open my $git_cat, '-|', qw/git cat-file commit/, $commit_id + or die "git cat-file commit $commit_id: $!"; + print " read commit $commit->{id}\n" if $Verbose > 1; + while (<$git_cat>) { + chomp; + last if m/^$/; + if (m/^parent\s+([[:xdigit:]]{40})$/) { + push @{$commit->{parents}}, $1; + } elsif (m/^tree\s+([[:xdigit:]]{40})$/) { + die "multiple trees in commit $commit_id" if $commit->{tree}; + $commit->{tree} = $1; + } + } + close $git_cat; + + return $commit; +} + +# read a tree; return arrayref of blob hex SHA1 values and arrayref of +# subtree ids as hex SHA1 values +sub read_tree { + my $tree_id = shift; + + my @blobs = (); + my @trees = (); + + open my $git_cat, '-|', qw/git cat-file -p/, $tree_id + or die "git cat-file -p $tree_id: $!"; + print " read tree $tree_id\n" if $Verbose > 1; + while (<$git_cat>) { + chomp; + m/^([0-7]{2})([0-7]{4})\s+(blob|tree)\s+([[:xdigit:]]{40})\s+(.*)$/ + or die "failed to parse tree listing line: $_"; + my $ftype = $1; my $mode = $2; my $obtype = $3; my $obid = $4; + my $name = $5; + print " $_\n" if $Verbose > 2; + if ($obtype eq 'blob') { + push @blobs, $obid; + } elsif ($obtype eq 'tree') { + push @trees, $obid; + } else { die "unknown Git object type in tree: $obtype" } + } + close $git_cat; + + return \@blobs, \@trees; +} + +# read a tree recursively; collecting all blob hex SHA1 values +sub read_subtree { + my $top_id = shift; + + my %blobs = (); + my %trees = (); + + my @queue = ($top_id); + my $blobs; my $trees; + while (@queue) { + $trees{$queue[0]}++; + ($blobs, $trees) = read_tree shift @queue; + $blobs{$_}++ for @$blobs; + push @queue, grep !$trees{$_}, @$trees; + } + + return keys %blobs; +} + +# read history; return sorted list of blob hex SHA1 values +sub read_history { + my $tip = shift; + + my %blobs = (); + my %commits = (); + + print "Reading history for $tip...\n" if $Verbose; + my @queue = read_commit $tip; + while (my $commit = shift @queue) { + $commits{$commit->{id}}++; + $blobs{$_}++ for read_subtree $commit->{tree}; + push @queue, map {read_commit $_} grep !$commits{$_}, @{$commit->{parents}}; + } + + return sort keys %blobs; +} + +### +### Collection and list diff +### + +my @A_Blobs = read_history $ARGV[0]; +my @B_Blobs = read_history $ARGV[1]; + +print "Analyzing..." if $Verbose; + +my %A_Blobs = map { $_ => 1 } @A_Blobs; +my %B_Blobs = map { $_ => 1 } @B_Blobs; + +my @Only_A = grep !$B_Blobs{$_}, @A_Blobs; +my @Only_B = grep !$A_Blobs{$_}, @B_Blobs; + +print "done\n\n" if $Verbose; + +if (@Only_A) { + print "Blobs found only in $ARGV[0]:\n"; + print " $_\n" for @Only_A; +} +if (@Only_B) { + print "Blobs found only in $ARGV[1]:\n"; + print " $_\n" for @Only_B; +} + +if (!@Only_A && !@Only_B) { + my $count = scalar @A_Blobs; + print "Congratulations! All $count blobs are in both histories.\n"; +} + +__END__