From 9175795fdbacea2cf882d30357217194c60a7a47 Mon Sep 17 00:00:00 2001 From: Alexander Weiss Date: Thu, 9 Jul 2020 22:35:04 +0200 Subject: [PATCH] Check contents in archiver When backing up with a parent snapshot and the file is not changed, also check if contents are still available in index. --- changelog/unreleased/issue-2571 | 16 +++++++++++ cmd/restic/integration_test.go | 47 +++++++++++++++++++++++++++++++++ internal/archiver/archiver.go | 43 +++++++++++++++++++++--------- 3 files changed, 94 insertions(+), 12 deletions(-) create mode 100644 changelog/unreleased/issue-2571 diff --git a/changelog/unreleased/issue-2571 b/changelog/unreleased/issue-2571 new file mode 100644 index 000000000..64fcc3160 --- /dev/null +++ b/changelog/unreleased/issue-2571 @@ -0,0 +1,16 @@ +Enhancement: Self-heal missing file parts during backup of unchanged files + +We've improved the resilience of restic to certain types of repository corruption. + +For files that are unchanged since the parent snapshot, the backup command now +verifies that all parts of the files still exist in the repository. Parts that are +missing, e.g. from a damaged repository, are backed up again. This verification +was already run for files that were modified since the parent snapshot, but is +now also done for unchanged files. + +Note that restic will not backup file parts that are referenced in the index but +where the actual data is not present on disk, as this situation can only be +detected by restic check. Please ensure that you run `restic check` regularly. + +https://github.com/restic/restic/issues/2571 +https://github.com/restic/restic/pull/2827 diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index b94e50520..5ffb5ee22 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -359,6 +359,53 @@ func TestBackupNonExistingFile(t *testing.T) { testRunBackup(t, "", dirs, opts, env.gopts) } +func TestBackupSelfHealing(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + testRunInit(t, env.gopts) + + p := filepath.Join(env.testdata, "test/test") + rtest.OK(t, os.MkdirAll(filepath.Dir(p), 0755)) + rtest.OK(t, appendRandomData(p, 5)) + + opts := BackupOptions{} + + testRunBackup(t, filepath.Dir(env.testdata), []string{filepath.Base(env.testdata)}, opts, env.gopts) + testRunCheck(t, env.gopts) + + r, err := OpenRepository(env.gopts) + rtest.OK(t, err) + + // Get all tree packs + rtest.OK(t, r.LoadIndex(env.gopts.ctx)) + treePacks := restic.NewIDSet() + for _, idx := range r.Index().(*repository.MasterIndex).All() { + for _, id := range idx.TreePacks() { + treePacks.Insert(id) + } + } + + // remove all data packs + rtest.OK(t, r.List(env.gopts.ctx, restic.DataFile, func(id restic.ID, size int64) error { + if treePacks.Has(id) { + return nil + } + return r.Backend().Remove(env.gopts.ctx, restic.Handle{Type: restic.DataFile, Name: id.String()}) + })) + + testRunRebuildIndex(t, env.gopts) + // now the repo is also missing the data blob in the index; check should report this + rtest.Assert(t, runCheck(CheckOptions{}, env.gopts, nil) != nil, + "check should have reported an error") + + // second backup should report an error but "heal" this situation + err = testRunBackupAssumeFailure(t, filepath.Dir(env.testdata), []string{filepath.Base(env.testdata)}, opts, env.gopts) + rtest.Assert(t, err != nil, + "backup should have reported an error") + testRunCheck(t, env.gopts) +} + func includes(haystack []string, needle string) bool { for _, s := range haystack { if s == needle { diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index bc1432c28..51ec94f15 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -302,6 +302,18 @@ func (fn *FutureNode) wait(ctx context.Context) { } } +// allBlobsPresent checks if all blobs (contents) of the given node are +// present in the index. +func (arch *Archiver) allBlobsPresent(previous *restic.Node) bool { + // check if all blobs are contained in index + for _, id := range previous.Content { + if !arch.Repo.Index().Has(id, restic.DataBlob) { + return false + } + } + return true +} + // Save saves a target (file or directory) to the repo. If the item is // excluded, this function returns a nil node and error, with excluded set to // true. @@ -388,19 +400,26 @@ func (arch *Archiver) Save(ctx context.Context, snPath, target string, previous // use previous list of blobs if the file hasn't changed if previous != nil && !fileChanged(fi, previous, arch.IgnoreInode) { - debug.Log("%v hasn't changed, using old list of blobs", target) - arch.CompleteItem(snPath, previous, previous, ItemStats{}, time.Since(start)) - arch.CompleteBlob(snPath, previous.Size) - fn.node, err = arch.nodeFromFileInfo(target, fi) - if err != nil { - return FutureNode{}, false, err + if arch.allBlobsPresent(previous) { + debug.Log("%v hasn't changed, using old list of blobs", target) + arch.CompleteItem(snPath, previous, previous, ItemStats{}, time.Since(start)) + arch.CompleteBlob(snPath, previous.Size) + fn.node, err = arch.nodeFromFileInfo(target, fi) + if err != nil { + return FutureNode{}, false, err + } + + // copy list of blobs + fn.node.Content = previous.Content + + _ = file.Close() + return fn, false, nil + } else { + debug.Log("%v hasn't changed, but contents are missing!", target) + // There are contents missing - inform user! + err := errors.Errorf("parts of %v not found in the repository index; storing the file again", target) + arch.error(abstarget, fi, err) } - - // copy list of blobs - fn.node.Content = previous.Content - - _ = file.Close() - return fn, false, nil } fn.isFile = true