From bdf7ba20cbe2cfaf8c1e657cc24a355d561f3f13 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Mon, 13 Apr 2020 16:20:59 +0200 Subject: [PATCH] archiver: Fix race condition triggered by TestArchiverAbortEarlyOnError The Save methods of the BlobSaver, FileSaver and TreeSaver return early on when the archiver is stopped due to an error. For that they select on both the tomb.Dying() and context.Done() channels, which can lead to a race condition when the tomb is killed due to an error: The tomb first closes its Dying channel before canceling all child contexts. Archiver.SaveDir only aborts its execution once the context was canceled. When the tomb killing is paused between closing its Dying channel and canceling the child contexts, this lets the FileSaver/TreeSaver.Save methods return immediately, however, ScanDir still reads further files causing the test case to fail. As a killed tomb always cancels all child contexts and as the Savers always use a context bound to the tomb, it is sufficient to just use context.Done() as escape hatch in the Save functions. This fixes the mismatch between SaveDir and Save. Adjust the tests to use contexts bound to the tomb for all interactions with the Savers. --- internal/archiver/blob_saver.go | 8 +------- internal/archiver/blob_saver_test.go | 8 ++++---- internal/archiver/file_saver.go | 9 +-------- internal/archiver/file_saver_test.go | 10 +++++----- internal/archiver/tree_saver.go | 8 +------- internal/archiver/tree_saver_test.go | 8 ++++---- 6 files changed, 16 insertions(+), 35 deletions(-) diff --git a/internal/archiver/blob_saver.go b/internal/archiver/blob_saver.go index c20b96919..45db04f46 100644 --- a/internal/archiver/blob_saver.go +++ b/internal/archiver/blob_saver.go @@ -22,8 +22,7 @@ type BlobSaver struct { m sync.Mutex knownBlobs restic.BlobSet - ch chan<- saveBlobJob - done <-chan struct{} + ch chan<- saveBlobJob } // NewBlobSaver returns a new blob. A worker pool is started, it is stopped @@ -34,7 +33,6 @@ func NewBlobSaver(ctx context.Context, t *tomb.Tomb, repo Saver, workers uint) * repo: repo, knownBlobs: restic.NewBlobSet(), ch: ch, - done: t.Dying(), } for i := uint(0); i < workers; i++ { @@ -53,10 +51,6 @@ func (s *BlobSaver) Save(ctx context.Context, t restic.BlobType, buf *Buffer) Fu ch := make(chan saveBlobResponse, 1) select { case s.ch <- saveBlobJob{BlobType: t, buf: buf, ch: ch}: - case <-s.done: - debug.Log("not sending job, BlobSaver is done") - close(ch) - return FutureBlob{ch: ch} case <-ctx.Done(): debug.Log("not sending job, context is cancelled") close(ch) diff --git a/internal/archiver/blob_saver_test.go b/internal/archiver/blob_saver_test.go index 6d7af3fed..f3b0e1a68 100644 --- a/internal/archiver/blob_saver_test.go +++ b/internal/archiver/blob_saver_test.go @@ -38,12 +38,12 @@ func TestBlobSaver(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - var tmb tomb.Tomb + tmb, ctx := tomb.WithContext(ctx) saver := &saveFail{ idx: repository.NewIndex(), } - b := NewBlobSaver(ctx, &tmb, saver, uint(runtime.NumCPU())) + b := NewBlobSaver(ctx, tmb, saver, uint(runtime.NumCPU())) var results []FutureBlob @@ -84,13 +84,13 @@ func TestBlobSaverError(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - var tmb tomb.Tomb + tmb, ctx := tomb.WithContext(ctx) saver := &saveFail{ idx: repository.NewIndex(), failAt: int32(test.failAt), } - b := NewBlobSaver(ctx, &tmb, saver, uint(runtime.NumCPU())) + b := NewBlobSaver(ctx, tmb, saver, uint(runtime.NumCPU())) var results []FutureBlob diff --git a/internal/archiver/file_saver.go b/internal/archiver/file_saver.go index c958e4e19..24cc5e116 100644 --- a/internal/archiver/file_saver.go +++ b/internal/archiver/file_saver.go @@ -58,8 +58,7 @@ type FileSaver struct { pol chunker.Pol - ch chan<- saveFileJob - done <-chan struct{} + ch chan<- saveFileJob CompleteBlob func(filename string, bytes uint64) @@ -80,7 +79,6 @@ func NewFileSaver(ctx context.Context, t *tomb.Tomb, save SaveBlobFn, pol chunke saveFilePool: NewBufferPool(ctx, int(poolSize), chunker.MaxSize), pol: pol, ch: ch, - done: t.Dying(), CompleteBlob: func(string, uint64) {}, } @@ -113,11 +111,6 @@ func (s *FileSaver) Save(ctx context.Context, snPath string, file fs.File, fi os select { case s.ch <- job: - case <-s.done: - debug.Log("not sending job, FileSaver is done") - _ = file.Close() - close(ch) - return FutureFile{ch: ch} case <-ctx.Done(): debug.Log("not sending job, context is cancelled: %v", ctx.Err()) _ = file.Close() diff --git a/internal/archiver/file_saver_test.go b/internal/archiver/file_saver_test.go index 88e62fd57..d4f4fe82b 100644 --- a/internal/archiver/file_saver_test.go +++ b/internal/archiver/file_saver_test.go @@ -30,8 +30,8 @@ func createTestFiles(t testing.TB, num int) (files []string, cleanup func()) { return files, cleanup } -func startFileSaver(ctx context.Context, t testing.TB) (*FileSaver, *tomb.Tomb) { - var tmb tomb.Tomb +func startFileSaver(ctx context.Context, t testing.TB) (*FileSaver, context.Context, *tomb.Tomb) { + tmb, ctx := tomb.WithContext(ctx) saveBlob := func(ctx context.Context, tpe restic.BlobType, buf *Buffer) FutureBlob { ch := make(chan saveBlobResponse) @@ -45,10 +45,10 @@ func startFileSaver(ctx context.Context, t testing.TB) (*FileSaver, *tomb.Tomb) t.Fatal(err) } - s := NewFileSaver(ctx, &tmb, saveBlob, pol, workers, workers) + s := NewFileSaver(ctx, tmb, saveBlob, pol, workers, workers) s.NodeFromFileInfo = restic.NodeFromFileInfo - return s, &tmb + return s, ctx, tmb } func TestFileSaver(t *testing.T) { @@ -62,7 +62,7 @@ func TestFileSaver(t *testing.T) { completeFn := func(*restic.Node, ItemStats) {} testFs := fs.Local{} - s, tmb := startFileSaver(ctx, t) + s, ctx, tmb := startFileSaver(ctx, t) var results []FutureFile diff --git a/internal/archiver/tree_saver.go b/internal/archiver/tree_saver.go index 29b899e82..a956f4a12 100644 --- a/internal/archiver/tree_saver.go +++ b/internal/archiver/tree_saver.go @@ -42,8 +42,7 @@ type TreeSaver struct { saveTree func(context.Context, *restic.Tree) (restic.ID, ItemStats, error) errFn ErrorFunc - ch chan<- saveTreeJob - done <-chan struct{} + ch chan<- saveTreeJob } // NewTreeSaver returns a new tree saver. A worker pool with treeWorkers is @@ -53,7 +52,6 @@ func NewTreeSaver(ctx context.Context, t *tomb.Tomb, treeWorkers uint, saveTree s := &TreeSaver{ ch: ch, - done: t.Dying(), saveTree: saveTree, errFn: errFn, } @@ -78,10 +76,6 @@ func (s *TreeSaver) Save(ctx context.Context, snPath string, node *restic.Node, } select { case s.ch <- job: - case <-s.done: - debug.Log("not saving tree, TreeSaver is done") - close(ch) - return FutureTree{ch: ch} case <-ctx.Done(): debug.Log("not saving tree, context is cancelled") close(ch) diff --git a/internal/archiver/tree_saver_test.go b/internal/archiver/tree_saver_test.go index 3f58da222..bc8c2612e 100644 --- a/internal/archiver/tree_saver_test.go +++ b/internal/archiver/tree_saver_test.go @@ -17,7 +17,7 @@ func TestTreeSaver(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - var tmb tomb.Tomb + tmb, ctx := tomb.WithContext(ctx) saveFn := func(context.Context, *restic.Tree) (restic.ID, ItemStats, error) { return restic.NewRandomID(), ItemStats{TreeBlobs: 1, TreeSize: 123}, nil @@ -27,7 +27,7 @@ func TestTreeSaver(t *testing.T) { return nil } - b := NewTreeSaver(ctx, &tmb, uint(runtime.NumCPU()), saveFn, errFn) + b := NewTreeSaver(ctx, tmb, uint(runtime.NumCPU()), saveFn, errFn) var results []FutureTree @@ -71,7 +71,7 @@ func TestTreeSaverError(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() - var tmb tomb.Tomb + tmb, ctx := tomb.WithContext(ctx) var num int32 saveFn := func(context.Context, *restic.Tree) (restic.ID, ItemStats, error) { @@ -88,7 +88,7 @@ func TestTreeSaverError(t *testing.T) { return nil } - b := NewTreeSaver(ctx, &tmb, uint(runtime.NumCPU()), saveFn, errFn) + b := NewTreeSaver(ctx, tmb, uint(runtime.NumCPU()), saveFn, errFn) var results []FutureTree