blob: e792b1cc736323466287d9992869e7e04447dd8c [file] [log] [blame]
#!/usr/bin/perl
# List the given commit range similar to "git log --graph" but instead of
# showing the branches as recorded in the commits, show the implicit commit
# hierarchy as far as merge conflicts are concerned: a commit is considered an
# "implicit ancestor" of a descendant commit if exchanging both in the commit
# history would cause merge conflicts, i.e. if their changes overlap.
#
# The idea originates from the Darcs SCM which -- while its underlying idea is
# cool, and replicated in this Perl script -- was doomed by the choice of
# programming language and by trying to describe the principle via quantum
# physics (when the proper scientific background would have been group theory,
# really, although that would have been even worse a vehicle to document the
# motivation and implementation).
# list of all commits, in order
my @commits = ();
# maps short commit names to long ones
my %short2long = ();
# maps commit -> parents
my %forward = ();
# maps commit -> children
my %backward = ();
# maps commit -> file_name -> hunk list
# where hunk list is a struct returned by hunks_data()
my %commit_changes = ();
sub ordered_set () {
my @list = ();
my %seen = ();
my $add = sub ($) {
if ($seen{$_[0]} eq undef) {
push(@list, $_[0]);
$seen{$_[0]} = $#list;
}
};
return {
'add' => $add,
'remove' => sub ($) {
my $index = $seen{$_[0]};
return if $index eq undef;
delete $seen{$_[0]};
splice(@list, $index, 1);
for (; $i <= $#list; $i++) {
$seen{$list[$i]} = $i;
}
},
'contains' => sub ($) {
return $seen{$_[0]} ne undef;
},
'merge' => sub ($) {
map { &$add($_) } @{$_[0]->{'list'}()};
},
'clone' => sub () {
my $cloned = ordered_set();
map { $cloned->{'add'}($_) } @list;
return $cloned;
},
'list' => sub () {
return \@list;
}
};
}
sub imply_parents ($$) {
my $child = $_[0];
my $parents = $_[1];
foreach my $parent(@$parents) {
next if $child eq $parent;
$forward{$child} = ordered_set() if $forward{$child} eq undef;
return if ($forward{$child}->{'contains'}($parent));
$forward{$child}->{'add'}($parent);
$backward{$parent} = ordered_set() if $backward{$parent} eq undef;
$backward{$parent}->{'add'}($child);
}
}
sub hunks_data () {
my @begin = ();
my @end = ();
my @revisions = ();
my $insert = sub ($$$$) {
splice(@begin, $_[0], 0, $_[1]);
splice(@end, $_[0], 0, $_[2]);
splice(@revisions, $_[0], 0, $_[3]);
};
return {
'size' => sub () {
return $#end;
},
'begin' => \@begin,
'end' => \@end,
'revisions' => \@revisions,
'add' => sub ($$$) {
push(@begin, $_[0]);
push(@end, $_[1]);
if (ref($_[2]) ne 'HASH') {
my $set = ordered_set();
$set->{'add'}($_[2]);
$_[2] = $set;
}
push(@revisions, $_[2]);
},
'merge' => sub ($) {
my @begin2 = @{$_[0]->{'begin'}};
my @end2 = @{$_[0]->{'end'}};
my @revisions2 = @{$_[0]->{'revisions'}};
my $i = 0;
my $j = 0;
while ($i <= $#end && $j <= $#end2) {
if ($begin[$i] >= $end2[$j]) {
&$insert($i, $begin2[$j], $end2[$j], $revisions2[$j]);
$j++;
} elsif ($end[$i] > $begin2[$j]) {
if ($begin[$i] < $begin2[$j]) {
&$insert($i, $begin[$i], $begin2[$j], $revisions[$i]->{'clone'}());
$i++;
} elsif ($begin1 > $begin2[$j]) {
&$insert($i, $begin2[$j], $begin[$i], $revisions2[$j]->{'clone'}());
$i++;
}
if ($end[$i] < $end2[$j]) {
&$insert($i + 1, $end[$i], $end2[$j], $revisions2[$j]->{'clone'}());
$revisions[$i]->{'merge'}($revisions2[$j]);
$i++;
} elsif ($end[$i] > $end2[$j]) {
&$insert($i + 1, $end2[$j], $end[$i], $revisions[$i]->{'clone'}());
$revisions[$i]->{'merge'}($revisions2[$j]);
$i++;
} else {
$revisions[$i]->{'merge'}($revisions2[$j]);
}
}
$i++;
}
while ($j <= $#end2) {
push(@begin, $begin2[$j]);
push(@end, $end2[$j]);
push(@revisions, $revisions2[$j]);
$j++;
}
},
# merge-weakly merges only the specified hunks which do not overlap with the current ones.
'merge-weakly' => sub ($) {
my @begin2 = @{$_[0]->{'begin'}};
my @end2 = @{$_[0]->{'end'}};
my @revisions2 = @{$_[0]->{'revisions'}};
my $i = 0;
my $j = 0;
while ($i <= $#end && $j <= $#end2) {
if ($begin[$i] >= $end2[$j]) {
&$insert($i, $begin2[$j], $end2[$j], $revisions2[$j]);
$j++;
} elsif ($end[$i] > $begin2[$j]) {
if ($begin1 > $begin2[$j]) {
&$insert($i, $begin2[$j], $begin[$i], $revisions2[$j]->{'clone'}());
$i++;
}
if ($end[$i] < $end2[$j]) {
&$insert($i + 1, $end[$i], $end2[$j], $revisions2[$j]->{'clone'}());
$i++;
}
}
$i++;
}
while ($j <= $#end2) {
push(@begin, $begin2[$j]);
push(@end, $end2[$j]);
push(@revisions, $revisions2[$j]);
$j++;
}
},
'clone-for-hunk-adjustment' => sub () {
my $clone = hunks_data();
my @hunks = ();
$clone->{'hunk'} = sub ($$$$) {
push(@hunks, @_);
};
$clone->{'finish'} = sub () {
my $i = 0, $offset = 0;
while ($i <= $#end) {
last if $#hunks < 0;
if ($end[$i] < $hunks[0]) {
$clone->{'add'}($offset + $begin[$i], $offset + $end[$i], $revisions[$i]);
} elsif ($begin[$i] > $hunks[1]) {
$offset = $hunks[3] - $hunks[1];
splice(@hunks, 0, 4);
next;
} else {
my $begin2 = $begin[$i] < $hunks[0] ? $begin[$i] + $offset : $hunks[2];
$offset = $hunks[3] - $hunks[1];
my $end2 = $end[$i] <= $hunks[1] ? $hunks[2] : $end[$i] + $offset;
$clone->{'add'}($begin2, $end2, $revisions[$i]);
}
$i++;
}
while ($i <= $#end) {
$clone->{'add'}($offset + $begin[$i], $offset + $end[$i], $revisions[$i]);
$i++;
}
delete $clone->{'hunk'};
delete $clone->{'finish'};
};
return $clone;
},
'implied-revisions' => sub ($) {
my @begin2 = @{$_[0]->{'begin'}};
my @end2 = @{$_[0]->{'end'}};
my @revisions2 = @{$_[0]->{'revisions'}};
my $i = 0;
my $j = 0;
my $result = ordered_set();
while ($i <= $#end && $j <= $#end2) {
if ($end[$i] < $begin2[$j]) {
$i++;
} elsif ($begin[$i] > $end2[$j]) {
$j++;
} else {
$result->{'merge'}($revisions2[$j]);
$j++;
}
}
return $result->{'list'}();
}
};
}
sub handle_single_parent ($$$$$) {
my $current_commit = $_[0];
my $in = $_[1];
my %original_parent_changes = %{$_[2]};
my $changes = $_[3];
my $parent_changes = $_[4];
my $file_name;
my $hunks = undef;
my $parent_hunks = undef;
my %handled_files = ();
my $finish_file = sub () {
if ($hunks ne undef) {
if ($changes->{$file_name} eq undef) {
$changes->{$file_name} = $hunks;
} else {
$changes->{$file_name}->{'merge'}($hunks);
}
}
if ($parent_hunks ne undef) {
$parent_hunks->{'finish'}();
if ($parent_changes->{$file_name} eq undef) {
$parent_changes->{$file_name} = $parent_hunks;
} else {
$parent_changes->{$file_name}->{'merge'}($parent_hunks);
}
}
$handled_files{$file_name} = 1 if $file_name ne undef;
};
while (<$in>) {
# TODO: handle spaces and even " b/" as part of a filename
if (/^diff --git a\/.* b\/(.*)$/) {
&$finish_file();
$file_name = $1;
$hunks = hunks_data();
$parent_hunks = $original_parent_changes{$file_name};
$parent_hunks = $parent_hunks->{'clone-for-hunk-adjustment'}() if $parent_hunks ne undef;
} elsif (/^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))? @@/) {
my $length0 = $3 ne '' ? $3 : 1;
my $begin0 = $1 + ($3 eq '0' ? 1 : 0);
my $end0 = $begin0 + $length0;
my $length1 = $6 ne '' ? $6 : 1;
my $begin1 = $4 + ($6 eq '0' ? 1 : 0);
my $end1 = $begin1 + $length1;
$hunks->{'add'}($begin1, $end1, $current_commit);
$parent_hunks->{'hunk'}($begin0, $end0, $begin1, $end1) if $parent_hunks ne undef;
}
}
&$finish_file();
foreach my $file_name (keys %original_parent_changes) {
if ($handled_files{$file_name} eq undef) {
my $hunks = $original_parent_changes{$file_name};
if ($parent_changes->{$file_name} eq undef) {
$parent_changes->{$file_name} = $hunks;
} else {
$parent_changes->{$file_name}->{'merge'}($hunks);
}
}
}
}
# Reads the diff(s) associated with the (merge) commit into the global data
# structures. Takes previously read information about ancestors into account.
# After this method has been called on all the commits of interest,
# $commit_changes{$commit} will refer to a hash that maps all touched files to
# the respective hunk lists (that document what commits touched which parts,
# sort of a simultaneous `git blame`).
sub read_commit ($$) {
my $current_commit = $_[0];
my @parents = @{$_[1]};
my %changes = ();
my %parent_changes = ();
$commit_changes{$current_commit} = \%changes;
# use the empty tree to compare initial commits against
@parents = ( '4b825dc642cb6eb9a060e54bf8d69288fbee4904' ) if $#parents < 0;
foreach my $parent (@parents) {
my $original_parent_changes = $commit_changes{$parent};
my @command = ('git', 'diff', '-U0', # '-M',
$parent, $current_commit);
open(my $in, '-|', @command);
handle_single_parent($current_commit, $in, $original_parent_changes, \%changes, \%parent_changes);
close($in);
}
foreach my $file_name (keys %changes) {
my $parent_hunks = $parent_changes{$file_name};
next if $parent_hunks eq undef;
my $hunks = $changes{$file_name};
imply_parents($current_commit, $hunks->{'implied-revisions'}($parent_hunks));
$hunks->{'merge-weakly'}($parent_hunks);
}
map { $changes{$_} = $parent_changes{$_} if $changes{$_} eq undef; } keys %parent_changes;
}
# Reads all necessary information for the commit range specified by the argument
# which is expected to be a reference to an array of command-line arguments
# appropriate for being called with `git log`.
sub read_commits ($) {
my $commit_range = $_[0];
my $current;
my @parents;
my %parent_changes;
my $previous_file_name;
my $current_file;
my @current_file_changes;
my @parent_file_changes;
my $offset;
my @command = ('git', 'log', '--reverse', '--topo-order',
'--format=%H %h %p');
# TODO: filter out --graph and stuff
push(@command, @$commit_range);
my $i = 1;
open (my $in, '-|', @command);
while (<$in>) {
next if (/^$/);
print $i++ . "...\r";
chomp;
if (/^([0-9a-f]+) ([0-9a-f]+) ([0-9a-f ]*)$/) {
my $sha1 = $1;
$current = $2;
@parents = split(/ /, $3);
push(@commits, $current);
$short2long{$current} = $sha1;
read_commit($current, \@parents);
}
}
close($in);
}
my $use_gitk = 0;
my $simplify = 1;
my $dashdash = -1;
for (my $i = 0; $i <= $#ARGV; $i++) {
if ($ARGV[$i] eq '--') {
$dashdash = $i;
last;
}
if ($ARGV[$i] eq '--gitk') {
$use_gitk = 1;
} elsif ($ARGV[$i] eq '--simplify') {
$simplify = 1;
} elsif ($ARGV[$i] eq '--no-simplify') {
$simplify = 0;
} else {
next;
}
splice(@ARGV, $i, 1);
$i--;
}
read_commits(\@ARGV);
sub get_parents ($) {
my $parents = $forward{$_[0]};
return [] if $parents eq undef;
return $parents->{'list'}();
}
# We can simplify the implied history by skipping parents that are ancestors of
# other parents (e.g. if a commit is already an implied grandparent, it does
# not have to be an implied parent, too).
if ($simplify) {
foreach my $current (@commits) {
my @stack = ();
my %seen = ();
my %parents = ();
foreach my $parent (@{get_parents($current)}) {
$parents{$parent} = 1;
foreach my $grampy (@{get_parents($parent)}) {
push(@stack, $grampy);
}
}
while ($#stack >= 0) {
my $commit = pop(@stack);
next if $seen{$commit} ne undef;
if ($parents{$commit} ne undef) {
$forward{$current}->{'remove'}($commit);
delete $parents{$commit};
}
foreach my $parent (@{get_parents($commit)}) {
push(@stack, $parent);
}
$seen{$commit} = 1;
}
}
}
# Unfortunately, there is no scriptable way to use the --graph support of `git
# log`.
#
# Fortunately, however, there is a way to circumvent that: by defining the
# commit order using temporary grafts, written into a temporary file and used
# via a temporary GIT_GRAFT_FILE env variable.
my $git_dir = `git rev-parse --git-dir`;
chomp $git_dir;
sub show () {
my $grafts_file = $git_dir . '/TEMP-GRAFTS';
my @args = ('log', '--graph', '--format=%h %s%n');
open(my $grafts, '>', $grafts_file);
foreach my $current (@commits) {
push(@args, $current) if $backward{$current} eq undef;
print $grafts $short2long{$current};
if (ref($forward{$current}) eq 'HASH') {
foreach my $parent (@{$forward{$current}->{'list'}()}) {
my $sha1 = $short2long{$parent};
if ($sha1 ne undef) {
print $grafts ' ' . $sha1;
}
}
}
print $grafts "\n";
}
close($grafts);
$ENV{'GIT_GRAFT_FILE'} = $grafts_file;
push(@args, '--');
# add file arguments from @ARGV
for (my $i = 0; $i <= $#ARGV; $i++) {
next if $ARGV[$i] ne '--';
$i++;
push(@args, @ARGV[$i..$#ARGV]) if $i <= $#ARGV;
last;
}
if ($use_gitk) {
splice(@args, 0, 3);
exec('gitk', @args);
}
exec('git', @args);
}
show();