diff --git a/src/restic/repository/prune.go b/src/restic/repository/index_rebuild.go similarity index 52% rename from src/restic/repository/prune.go rename to src/restic/repository/index_rebuild.go index 75f9f9ac7..34ef6680e 100644 --- a/src/restic/repository/prune.go +++ b/src/restic/repository/index_rebuild.go @@ -1,91 +1,14 @@ package repository import ( - "bytes" "fmt" - "io" "os" "restic/backend" - "restic/crypto" "restic/debug" "restic/pack" "restic/worker" ) -// Repack takes a list of packs together with a list of blobs contained in -// these packs. Each pack is loaded and the blobs listed in keepBlobs is saved -// into a new pack. Afterwards, the packs are removed. This operation requires -// an exclusive lock on the repo. -func Repack(repo *Repository, packs, keepBlobs backend.IDSet) (err error) { - debug.Log("Repack", "repacking %d packs while keeping %d blobs", len(packs), len(keepBlobs)) - - buf := make([]byte, 0, maxPackSize) - for packID := range packs { - // load the complete pack - h := backend.Handle{Type: backend.Data, Name: packID.String()} - - l, err := repo.Backend().Load(h, buf[:cap(buf)], 0) - if err == io.ErrUnexpectedEOF { - err = nil - buf = buf[:l] - } - - if err != nil { - return err - } - - debug.Log("Repack", "pack %v loaded (%d bytes)", packID.Str(), len(buf)) - - unpck, err := pack.NewUnpacker(repo.Key(), bytes.NewReader(buf)) - if err != nil { - return err - } - - debug.Log("Repack", "processing pack %v, blobs: %v", packID.Str(), len(unpck.Entries)) - var plaintext []byte - for _, entry := range unpck.Entries { - if !keepBlobs.Has(entry.ID) { - continue - } - - ciphertext := buf[entry.Offset : entry.Offset+entry.Length] - - if cap(plaintext) < len(ciphertext) { - plaintext = make([]byte, len(ciphertext)) - } - - plaintext, err = crypto.Decrypt(repo.Key(), plaintext, ciphertext) - if err != nil { - return err - } - - _, err = repo.SaveAndEncrypt(entry.Type, plaintext, &entry.ID) - if err != nil { - return err - } - - debug.Log("Repack", " saved blob %v", entry.ID.Str()) - - keepBlobs.Delete(entry.ID) - } - } - - if err := repo.Flush(); err != nil { - return err - } - - for packID := range packs { - err := repo.Backend().Remove(backend.Data, packID.String()) - if err != nil { - debug.Log("Repack", "error removing pack %v: %v", packID.Str(), err) - return err - } - debug.Log("Repack", "removed pack %v", packID.Str()) - } - - return nil -} - const rebuildIndexWorkers = 10 type loadBlobsResult struct { diff --git a/src/restic/repository/prune_test.go b/src/restic/repository/index_rebuild_test.go similarity index 54% rename from src/restic/repository/prune_test.go rename to src/restic/repository/index_rebuild_test.go index 9b40e92bd..6a6dcc354 100644 --- a/src/restic/repository/prune_test.go +++ b/src/restic/repository/index_rebuild_test.go @@ -1,4 +1,4 @@ -package repository_test +package repository import ( "io" @@ -110,83 +110,3 @@ func findPacksForBlobs(t *testing.T, repo *repository.Repository, blobs backend. return packs } - -func repack(t *testing.T, repo *repository.Repository, packs, blobs backend.IDSet) { - err := repository.Repack(repo, packs, blobs) - if err != nil { - t.Fatal(err) - } -} - -func saveIndex(t *testing.T, repo *repository.Repository) { - if err := repo.SaveIndex(); err != nil { - t.Fatalf("repo.SaveIndex() %v", err) - } -} - -func rebuildIndex(t *testing.T, repo *repository.Repository) { - if err := repository.RebuildIndex(repo); err != nil { - t.Fatalf("error rebuilding index: %v", err) - } -} - -func reloadIndex(t *testing.T, repo *repository.Repository) { - repo.SetIndex(repository.NewMasterIndex()) - if err := repo.LoadIndex(); err != nil { - t.Fatalf("error loading new index: %v", err) - } -} - -func TestRepack(t *testing.T) { - repo, cleanup := repository.TestRepository(t) - defer cleanup() - - createRandomBlobs(t, repo, rand.Intn(400), 0.7) - - packsBefore := listPacks(t, repo) - - // Running repack on empty ID sets should not do anything at all. - repack(t, repo, nil, nil) - - packsAfter := listPacks(t, repo) - - if !packsAfter.Equals(packsBefore) { - t.Fatalf("packs are not equal, Repack modified something. Before:\n %v\nAfter:\n %v", - packsBefore, packsAfter) - } - - saveIndex(t, repo) - - removeBlobs, keepBlobs := selectBlobs(t, repo, 0.2) - - removePacks := findPacksForBlobs(t, repo, removeBlobs) - - repack(t, repo, removePacks, keepBlobs) - rebuildIndex(t, repo) - reloadIndex(t, repo) - - packsAfter = listPacks(t, repo) - for id := range removePacks { - if packsAfter.Has(id) { - t.Errorf("pack %v still present although it should have been repacked and removed", id.Str()) - } - } - - idx := repo.Index() - for id := range keepBlobs { - pb, err := idx.Lookup(id) - if err != nil { - t.Errorf("unable to find blob %v in repo", id.Str()) - } - - if removePacks.Has(pb.PackID) { - t.Errorf("lookup returned pack ID %v that should've been removed", pb.PackID) - } - } - - for id := range removeBlobs { - if _, err := idx.Lookup(id); err == nil { - t.Errorf("blob %v still contained in the repo", id.Str()) - } - } -} diff --git a/src/restic/repository/repack.go b/src/restic/repository/repack.go new file mode 100644 index 000000000..7177d0964 --- /dev/null +++ b/src/restic/repository/repack.go @@ -0,0 +1,84 @@ +package repository + +import ( + "bytes" + "io" + "restic/backend" + "restic/crypto" + "restic/debug" + "restic/pack" +) + +// Repack takes a list of packs together with a list of blobs contained in +// these packs. Each pack is loaded and the blobs listed in keepBlobs is saved +// into a new pack. Afterwards, the packs are removed. This operation requires +// an exclusive lock on the repo. +func Repack(repo *Repository, packs, keepBlobs backend.IDSet) (err error) { + debug.Log("Repack", "repacking %d packs while keeping %d blobs", len(packs), len(keepBlobs)) + + buf := make([]byte, 0, maxPackSize) + for packID := range packs { + // load the complete pack + h := backend.Handle{Type: backend.Data, Name: packID.String()} + + l, err := repo.Backend().Load(h, buf[:cap(buf)], 0) + if err == io.ErrUnexpectedEOF { + err = nil + buf = buf[:l] + } + + if err != nil { + return err + } + + debug.Log("Repack", "pack %v loaded (%d bytes)", packID.Str(), len(buf)) + + unpck, err := pack.NewUnpacker(repo.Key(), bytes.NewReader(buf)) + if err != nil { + return err + } + + debug.Log("Repack", "processing pack %v, blobs: %v", packID.Str(), len(unpck.Entries)) + var plaintext []byte + for _, entry := range unpck.Entries { + if !keepBlobs.Has(entry.ID) { + continue + } + + ciphertext := buf[entry.Offset : entry.Offset+entry.Length] + + if cap(plaintext) < len(ciphertext) { + plaintext = make([]byte, len(ciphertext)) + } + + plaintext, err = crypto.Decrypt(repo.Key(), plaintext, ciphertext) + if err != nil { + return err + } + + _, err = repo.SaveAndEncrypt(entry.Type, plaintext, &entry.ID) + if err != nil { + return err + } + + debug.Log("Repack", " saved blob %v", entry.ID.Str()) + + keepBlobs.Delete(entry.ID) + } + } + + if err := repo.Flush(); err != nil { + return err + } + + for packID := range packs { + err := repo.Backend().Remove(backend.Data, packID.String()) + if err != nil { + debug.Log("Repack", "error removing pack %v: %v", packID.Str(), err) + return err + } + debug.Log("Repack", "removed pack %v", packID.Str()) + } + + return nil +} diff --git a/src/restic/repository/repack_test.go b/src/restic/repository/repack_test.go new file mode 100644 index 000000000..be343fe1f --- /dev/null +++ b/src/restic/repository/repack_test.go @@ -0,0 +1,88 @@ +package repository + +import ( + "math/rand" + "restic/backend" + "restic/repository" + "testing" +) + +func repack(t *testing.T, repo *repository.Repository, packs, blobs backend.IDSet) { + err := repository.Repack(repo, packs, blobs) + if err != nil { + t.Fatal(err) + } +} + +func saveIndex(t *testing.T, repo *repository.Repository) { + if err := repo.SaveIndex(); err != nil { + t.Fatalf("repo.SaveIndex() %v", err) + } +} + +func rebuildIndex(t *testing.T, repo *repository.Repository) { + if err := repository.RebuildIndex(repo); err != nil { + t.Fatalf("error rebuilding index: %v", err) + } +} + +func reloadIndex(t *testing.T, repo *repository.Repository) { + repo.SetIndex(repository.NewMasterIndex()) + if err := repo.LoadIndex(); err != nil { + t.Fatalf("error loading new index: %v", err) + } +} + +func TestRepack(t *testing.T) { + repo, cleanup := repository.TestRepository(t) + defer cleanup() + + createRandomBlobs(t, repo, rand.Intn(400), 0.7) + + packsBefore := listPacks(t, repo) + + // Running repack on empty ID sets should not do anything at all. + repack(t, repo, nil, nil) + + packsAfter := listPacks(t, repo) + + if !packsAfter.Equals(packsBefore) { + t.Fatalf("packs are not equal, Repack modified something. Before:\n %v\nAfter:\n %v", + packsBefore, packsAfter) + } + + saveIndex(t, repo) + + removeBlobs, keepBlobs := selectBlobs(t, repo, 0.2) + + removePacks := findPacksForBlobs(t, repo, removeBlobs) + + repack(t, repo, removePacks, keepBlobs) + rebuildIndex(t, repo) + reloadIndex(t, repo) + + packsAfter = listPacks(t, repo) + for id := range removePacks { + if packsAfter.Has(id) { + t.Errorf("pack %v still present although it should have been repacked and removed", id.Str()) + } + } + + idx := repo.Index() + for id := range keepBlobs { + pb, err := idx.Lookup(id) + if err != nil { + t.Errorf("unable to find blob %v in repo", id.Str()) + } + + if removePacks.Has(pb.PackID) { + t.Errorf("lookup returned pack ID %v that should've been removed", pb.PackID) + } + } + + for id := range removeBlobs { + if _, err := idx.Lookup(id); err == nil { + t.Errorf("blob %v still contained in the repo", id.Str()) + } + } +}