From c0fc85d303306ebae8a9bb4d4619f8c3440bb43e Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 22 Feb 2020 20:22:45 +0100 Subject: [PATCH 1/3] diff: Add integration test --- cmd/restic/integration_test.go | 103 +++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index 439ea8510..b279a75b0 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -154,6 +154,21 @@ func testRunCheckOutput(gopts GlobalOptions) (string, error) { return buf.String(), err } +func testRunDiffOutput(gopts GlobalOptions, firstSnapshotID string, secondSnapshotID string) (string, error) { + buf := bytes.NewBuffer(nil) + + globalOptions.stdout = buf + defer func() { + globalOptions.stdout = os.Stdout + }() + + opts := DiffOptions{ + ShowMetadata: false, + } + err := runDiff(opts, gopts, []string{firstSnapshotID, secondSnapshotID}) + return string(buf.Bytes()), err +} + func testRunRebuildIndex(t testing.TB, gopts GlobalOptions) { globalOptions.stdout = ioutil.Discard defer func() { @@ -1472,3 +1487,91 @@ func TestQuietBackup(t *testing.T) { testRunCheck(t, env.gopts) } + +func copyFile(dst string, src string) error { + srcFile, err := os.Open(src) + if err != nil { + return err + } + defer srcFile.Close() + + dstFile, err := os.Create(dst) + if err != nil { + return err + } + defer dstFile.Close() + + _, err = io.Copy(dstFile, srcFile) + return err +} + +var diffOutputRegexPatterns = []string{ + "-.+modfile", + "M.+modfile1", + "\\+.+modfile2", + "\\+.+modfile3", + "\\+.+modfile4", + "-.+submoddir", + "-.+submoddir.subsubmoddir", + "\\+.+submoddir2", + "\\+.+submoddir2.subsubmoddir", + "Files: +2 new, +1 removed, +1 changed", + "Dirs: +3 new, +2 removed", + "Data Blobs: +2 new, +1 removed", + "Added: +7[0-9]{2}\\.[0-9]{3} KiB", + "Removed: +2[0-9]{2}\\.[0-9]{3} KiB", +} + +func TestDiff(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + testRunInit(t, env.gopts) + + datadir := filepath.Join(env.base, "testdata") + testdir := filepath.Join(datadir, "testdir") + subtestdir := filepath.Join(testdir, "subtestdir") + testfile := filepath.Join(testdir, "testfile") + + rtest.OK(t, os.Mkdir(testdir, 0755)) + rtest.OK(t, os.Mkdir(subtestdir, 0755)) + rtest.OK(t, appendRandomData(testfile, 256*1024)) + + moddir := filepath.Join(datadir, "moddir") + submoddir := filepath.Join(moddir, "submoddir") + subsubmoddir := filepath.Join(submoddir, "subsubmoddir") + modfile := filepath.Join(moddir, "modfile") + rtest.OK(t, os.Mkdir(moddir, 0755)) + rtest.OK(t, os.Mkdir(submoddir, 0755)) + rtest.OK(t, os.Mkdir(subsubmoddir, 0755)) + rtest.OK(t, copyFile(modfile, testfile)) + rtest.OK(t, appendRandomData(modfile+"1", 256*1024)) + + snapshots := make(map[string]struct{}) + opts := BackupOptions{} + testRunBackup(t, "", []string{datadir}, opts, env.gopts) + snapshots, firstSnapshotID := lastSnapshot(snapshots, loadSnapshotMap(t, env.gopts)) + + rtest.OK(t, os.Rename(modfile, modfile+"3")) + rtest.OK(t, os.Rename(submoddir, submoddir+"2")) + rtest.OK(t, appendRandomData(modfile+"1", 256*1024)) + rtest.OK(t, appendRandomData(modfile+"2", 256*1024)) + rtest.OK(t, os.Mkdir(modfile+"4", 0755)) + + testRunBackup(t, "", []string{datadir}, opts, env.gopts) + snapshots, secondSnapshotID := lastSnapshot(snapshots, loadSnapshotMap(t, env.gopts)) + + _, err := testRunDiffOutput(env.gopts, "", secondSnapshotID) + rtest.Assert(t, err != nil, "expected error on invalid snapshot id") + + out, err := testRunDiffOutput(env.gopts, firstSnapshotID, secondSnapshotID) + if err != nil { + t.Fatalf("expected no error from diff for test repository, got %v", err) + } + + for _, pattern := range diffOutputRegexPatterns { + r, err := regexp.Compile(pattern) + rtest.Assert(t, err == nil, "failed to compile regexp %v", pattern) + rtest.Assert(t, r.MatchString(out), "expected pattern %v in output, got\n%v", pattern, out) + } +} From f5c448aa65dd87c12344cc42fa078cc6fd4b1427 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 8 Feb 2020 11:04:15 +0100 Subject: [PATCH 2/3] diff: Optimize diff calculation for shared subtrees When the diff calculation compares two trees with identical id then no differences between them can ever show up. Optimize for that case by simply traversing the tree only once to collect all referenced blobs for a proper calculation of added and removed blobs. Just skipping the common subtrees is not possible as this would skew the results if the added or removed blobs are shared with one of the subtrees. --- cmd/restic/cmd_diff.go | 41 ++++++++++++++++++++++++++++++++++------- 1 file changed, 34 insertions(+), 7 deletions(-) diff --git a/cmd/restic/cmd_diff.go b/cmd/restic/cmd_diff.go index 090568845..1fc8b0458 100644 --- a/cmd/restic/cmd_diff.go +++ b/cmd/restic/cmd_diff.go @@ -116,10 +116,10 @@ func addBlobs(bs restic.BlobSet, node *restic.Node) { // DiffStats collects the differences between two snapshots. type DiffStats struct { - ChangedFiles int - Added DiffStat - Removed DiffStat - BlobsBefore, BlobsAfter restic.BlobSet + ChangedFiles int + Added DiffStat + Removed DiffStat + BlobsBefore, BlobsAfter, BlobsCommon restic.BlobSet } // NewDiffStats creates new stats for a diff run. @@ -127,6 +127,7 @@ func NewDiffStats() *DiffStats { return &DiffStats{ BlobsBefore: restic.NewBlobSet(), BlobsAfter: restic.NewBlobSet(), + BlobsCommon: restic.NewBlobSet(), } } @@ -177,6 +178,27 @@ func (c *Comparer) printDir(ctx context.Context, mode string, stats *DiffStat, b return nil } +func (c *Comparer) collectDir(ctx context.Context, blobs restic.BlobSet, id restic.ID) error { + debug.Log("print tree %v", id) + tree, err := c.repo.LoadTree(ctx, id) + if err != nil { + return err + } + + for _, node := range tree.Nodes { + addBlobs(blobs, node) + + if node.Type == "dir" { + err := c.collectDir(ctx, blobs, *node.Subtree) + if err != nil { + Warnf("error: %v\n", err) + } + } + } + + return nil +} + func uniqueNodeNames(tree1, tree2 *restic.Tree) (tree1Nodes, tree2Nodes map[string]*restic.Node, uniqueNames []string) { names := make(map[string]struct{}) tree1Nodes = make(map[string]*restic.Node) @@ -248,7 +270,12 @@ func (c *Comparer) diffTree(ctx context.Context, stats *DiffStats, prefix string } if node1.Type == "dir" && node2.Type == "dir" { - err := c.diffTree(ctx, stats, name, *node1.Subtree, *node2.Subtree) + var err error + if (*node1.Subtree).Equal(*node2.Subtree) { + err = c.collectDir(ctx, stats.BlobsCommon, *node1.Subtree) + } else { + err = c.diffTree(ctx, stats, name, *node1.Subtree, *node2.Subtree) + } if err != nil { Warnf("error: %v\n", err) } @@ -345,8 +372,8 @@ func runDiff(opts DiffOptions, gopts GlobalOptions, args []string) error { } both := stats.BlobsBefore.Intersect(stats.BlobsAfter) - updateBlobs(repo, stats.BlobsBefore.Sub(both), &stats.Removed) - updateBlobs(repo, stats.BlobsAfter.Sub(both), &stats.Added) + updateBlobs(repo, stats.BlobsBefore.Sub(both).Sub(stats.BlobsCommon), &stats.Removed) + updateBlobs(repo, stats.BlobsAfter.Sub(both).Sub(stats.BlobsCommon), &stats.Added) Printf("\n") Printf("Files: %5d new, %5d removed, %5d changed\n", stats.Added.Files, stats.Removed.Files, stats.ChangedFiles) From 4f221c4022a046bec046db40415c577ccfba61d0 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Thu, 5 Mar 2020 22:48:03 +0100 Subject: [PATCH 3/3] Add changelog entry --- changelog/unreleased/pull-2598 | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 changelog/unreleased/pull-2598 diff --git a/changelog/unreleased/pull-2598 b/changelog/unreleased/pull-2598 new file mode 100644 index 000000000..7000c66d5 --- /dev/null +++ b/changelog/unreleased/pull-2598 @@ -0,0 +1,6 @@ +Enhancement: Improve speed of diff command + +We've improved the performance of the diff command when comparing snapshots +with similar content. It should run up to twice as fast as before. + +https://github.com/restic/restic/pull/2598