#!/usr/bin/perl # # Generate a listing of all paths ever used in the repository, along with the # disk space used by the path throughout the entire history. Note that sizes # for trees are cumulative; they include the sizes of all of the paths below # them, in addition to the tree storage itself. All sizes are in bytes, and # reflect git's delta and zlib compression. # # One caveat is that this is just the _current_ on-disk size. The on-disk size # of each object may change if git repacks and chooses different delta bases, # for example. Likewise, the mapping of paths to objects may change (e.g., a # given blob object may appear at multiple paths, and we assign it to only one # path slot). So take it as a rough guide; removing particular paths # from your history may not remove exactly that many bytes from the repository # size. use strict; my $commits = { size => 0 }; my $tags = { size => 0 }; my $tree = { size => 0 }; open(my $git, '-|', 'git rev-list --objects --all | git cat-file --batch-check="%(objectsize:disk) %(objecttype) %(rest)" '); while (<$git>) { my ($size, $type, $name) = /^(\d+) (.*?) (.*)/; if ($type eq 'commit') { add_to_bucket($commits, '', $size); } elsif ($type eq 'tag') { add_to_bucket($tags, '', $size); } else { add_to_bucket($tree, $name, $size); } } show_bucket($commits, 'COMMITS', 0); show_bucket($tags, 'TAGS', 0); show_bucket($tree, '/', 0); sub add_to_bucket { my ($node, $name, $size) = @_; $node->{size} += $size; my @name = split('/', $name); while (@name) { $node = $node->{child}->{shift @name} ||= { size => 0 }; $node->{size} += $size; } } sub show_bucket { my ($node, $name, $indent) = @_; print " " x $indent; printf "%10s %s\n", $node->{size}, $name; for my $child (sort(keys %{$node->{child}})) { my $child_name = $name eq '/' ? $child : "$name/$child"; show_bucket($node->{child}->{$child}, $child_name, $indent + 1) } }