From f5c448aa65dd87c12344cc42fa078cc6fd4b1427 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 8 Feb 2020 11:04:15 +0100 Subject: [PATCH] diff: Optimize diff calculation for shared subtrees When the diff calculation compares two trees with identical id then no differences between them can ever show up. Optimize for that case by simply traversing the tree only once to collect all referenced blobs for a proper calculation of added and removed blobs. Just skipping the common subtrees is not possible as this would skew the results if the added or removed blobs are shared with one of the subtrees. --- cmd/restic/cmd_diff.go | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/cmd/restic/cmd_diff.go b/cmd/restic/cmd_diff.go index 090568845..1fc8b0458 100644 --- a/cmd/restic/cmd_diff.go +++ b/cmd/restic/cmd_diff.go @@ -116,10 +116,10 @@ func addBlobs(bs restic.BlobSet, node *restic.Node) { // DiffStats collects the differences between two snapshots. type DiffStats struct { - ChangedFiles int - Added DiffStat - Removed DiffStat - BlobsBefore, BlobsAfter restic.BlobSet + ChangedFiles int + Added DiffStat + Removed DiffStat + BlobsBefore, BlobsAfter, BlobsCommon restic.BlobSet } // NewDiffStats creates new stats for a diff run. @@ -127,6 +127,7 @@ func NewDiffStats() *DiffStats { return &DiffStats{ BlobsBefore: restic.NewBlobSet(), BlobsAfter: restic.NewBlobSet(), + BlobsCommon: restic.NewBlobSet(), } } @@ -177,6 +178,27 @@ func (c *Comparer) printDir(ctx context.Context, mode string, stats *DiffStat, b return nil } +func (c *Comparer) collectDir(ctx context.Context, blobs restic.BlobSet, id restic.ID) error { + debug.Log("print tree %v", id) + tree, err := c.repo.LoadTree(ctx, id) + if err != nil { + return err + } + + for _, node := range tree.Nodes { + addBlobs(blobs, node) + + if node.Type == "dir" { + err := c.collectDir(ctx, blobs, *node.Subtree) + if err != nil { + Warnf("error: %v\n", err) + } + } + } + + return nil +} + func uniqueNodeNames(tree1, tree2 *restic.Tree) (tree1Nodes, tree2Nodes map[string]*restic.Node, uniqueNames []string) { names := make(map[string]struct{}) tree1Nodes = make(map[string]*restic.Node) @@ -248,7 +270,12 @@ func (c *Comparer) diffTree(ctx context.Context, stats *DiffStats, prefix string } if node1.Type == "dir" && node2.Type == "dir" { - err := c.diffTree(ctx, stats, name, *node1.Subtree, *node2.Subtree) + var err error + if (*node1.Subtree).Equal(*node2.Subtree) { + err = c.collectDir(ctx, stats.BlobsCommon, *node1.Subtree) + } else { + err = c.diffTree(ctx, stats, name, *node1.Subtree, *node2.Subtree) + } if err != nil { Warnf("error: %v\n", err) } @@ -345,8 +372,8 @@ func runDiff(opts DiffOptions, gopts GlobalOptions, args []string) error { } both := stats.BlobsBefore.Intersect(stats.BlobsAfter) - updateBlobs(repo, stats.BlobsBefore.Sub(both), &stats.Removed) - updateBlobs(repo, stats.BlobsAfter.Sub(both), &stats.Added) + updateBlobs(repo, stats.BlobsBefore.Sub(both).Sub(stats.BlobsCommon), &stats.Removed) + updateBlobs(repo, stats.BlobsAfter.Sub(both).Sub(stats.BlobsCommon), &stats.Added) Printf("\n") Printf("Files: %5d new, %5d removed, %5d changed\n", stats.Added.Files, stats.Removed.Files, stats.ChangedFiles)