Reimplement rebuild-index

This commit is contained in:
Alexander Weiss 2020-10-10 21:51:11 +02:00
parent 187c8fb259
commit 30b6a0878a
3 changed files with 110 additions and 55 deletions

View File

@ -1,10 +1,8 @@
package main
import (
"context"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/index"
"github.com/restic/restic/internal/pack"
"github.com/restic/restic/internal/repository"
"github.com/restic/restic/internal/restic"
"github.com/spf13/cobra"
@ -12,7 +10,7 @@ import (
var cmdRebuildIndex = &cobra.Command{
Use: "rebuild-index [flags]",
Short: "Build a new index file",
Short: "Build a new index",
Long: `
The "rebuild-index" command creates a new index based on the pack files in the
repository.
@ -24,15 +22,25 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
`,
DisableAutoGenTag: true,
RunE: func(cmd *cobra.Command, args []string) error {
return runRebuildIndex(globalOptions)
return runRebuildIndex(rebuildIndexOptions, globalOptions)
},
}
func init() {
cmdRoot.AddCommand(cmdRebuildIndex)
// RebuildIndexOptions collects all options for the rebuild-index command.
type RebuildIndexOptions struct {
ReadAllPacks bool
}
func runRebuildIndex(gopts GlobalOptions) error {
var rebuildIndexOptions RebuildIndexOptions
func init() {
cmdRoot.AddCommand(cmdRebuildIndex)
f := cmdRebuildIndex.Flags()
f.BoolVar(&rebuildIndexOptions.ReadAllPacks, "read-all-packs", false, "read all pack files to generate new index from scratch")
}
func runRebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions) error {
repo, err := OpenRepository(gopts)
if err != nil {
return err
@ -44,59 +52,100 @@ func runRebuildIndex(gopts GlobalOptions) error {
return err
}
ctx, cancel := context.WithCancel(gopts.ctx)
defer cancel()
return rebuildIndex(ctx, repo, restic.NewIDSet())
return rebuildIndex(opts, gopts, repo, restic.NewIDSet())
}
func rebuildIndex(ctx context.Context, repo restic.Repository, ignorePacks restic.IDSet) error {
Verbosef("counting files in repo\n")
func rebuildIndex(opts RebuildIndexOptions, gopts GlobalOptions, repo *repository.Repository, ignorePacks restic.IDSet) error {
ctx := gopts.ctx
var packs uint64
err := repo.List(ctx, restic.PackFile, func(restic.ID, int64) error {
packs++
return nil
})
if err != nil {
return err
}
var obsolete restic.IDs
packSizeFromList := make(map[restic.ID]int64)
packs := restic.NewIDSet()
totalPacks := 0
bar := newProgressMax(!globalOptions.Quiet, packs-uint64(len(ignorePacks)), "packs")
idx, invalidFiles, err := index.New(ctx, repo, ignorePacks, bar)
bar.Done()
if err != nil {
return err
}
if opts.ReadAllPacks {
// get old index files
err := repo.List(ctx, restic.IndexFile, func(id restic.ID, size int64) error {
obsolete = append(obsolete, id)
return nil
})
if err != nil {
return err
}
if globalOptions.verbosity >= 2 {
for _, id := range invalidFiles {
Printf("skipped incomplete pack file: %v\n", id)
Verbosef("finding pack files in repo...\n")
err = repo.List(ctx, restic.PackFile, func(id restic.ID, size int64) error {
packSizeFromList[id] = size
packs.Insert(id)
totalPacks++
return nil
})
if err != nil {
return err
}
} else {
Verbosef("loading indexes...\n")
err := repo.LoadIndex(gopts.ctx)
if err != nil {
return err
}
packSizeFromIndex := make(map[restic.ID]int64)
Verbosef("getting pack files to read...\n")
// iterate over all blobs in index
for blob := range repo.Index().Each(ctx) {
size, ok := packSizeFromIndex[blob.PackID]
if !ok {
size = pack.HeaderSize
}
size += int64(pack.PackedSizeOfBlob(blob.Length))
// update packSizeFromIndex
packSizeFromIndex[blob.PackID] = size
}
err = repo.List(ctx, restic.PackFile, func(id restic.ID, packSize int64) error {
size, ok := packSizeFromIndex[id]
if !ok || size != packSize {
// Pack was not referenced in index or size does not match
packSizeFromList[id] = size
packs.Insert(id)
}
totalPacks++
delete(packSizeFromIndex, id)
return nil
})
if err != nil {
return err
}
for id := range packSizeFromIndex {
// ignore pack files that are referenced in the index but do not exist
// when rebuilding the index
packs.Insert(id)
}
}
Verbosef("finding old index files\n")
if len(packSizeFromList) > 0 {
Verbosef("reading pack files\n")
bar := newProgressMax(!globalOptions.Quiet, uint64(len(packSizeFromList)), "packs")
invalidFiles, err := repo.LoadIndexFromPacks(ctx, packSizeFromList, bar)
if err != nil {
return err
}
var supersedes restic.IDs
err = repo.List(ctx, restic.IndexFile, func(id restic.ID, size int64) error {
supersedes = append(supersedes, id)
return nil
})
for _, id := range invalidFiles {
Verboseff("skipped incomplete pack file: %v\n", id)
totalPacks--
}
} else {
Verbosef("no need to read any pack file\n")
}
err := rebuildIndexFiles(gopts, repo, packs, obsolete, uint64(totalPacks))
if err != nil {
return err
}
ids, err := idx.Save(ctx, repo, supersedes)
if err != nil {
return errors.Fatalf("unable to save index, last error was: %v", err)
}
Verbosef("saved new indexes as %v\n", ids)
Verbosef("remove %d old index files\n", len(supersedes))
err = DeleteFilesChecked(globalOptions, repo, restic.NewIDSet(supersedes...), restic.IndexFile)
if err != nil {
return errors.Fatalf("unable to remove an old index: %v\n", err)
}
Verbosef("done\n")
return nil
}

View File

@ -175,7 +175,7 @@ func testRunRebuildIndex(t testing.TB, gopts GlobalOptions) {
globalOptions.stdout = os.Stdout
}()
rtest.OK(t, runRebuildIndex(gopts))
rtest.OK(t, runRebuildIndex(RebuildIndexOptions{}, gopts))
}
func testRunLs(t testing.TB, gopts GlobalOptions, snapshotID string) []string {
@ -1351,7 +1351,7 @@ func TestRebuildIndexFailsOnAppendOnly(t *testing.T) {
env.gopts.backendTestHook = func(r restic.Backend) (restic.Backend, error) {
return &appendOnlyBackend{r}, nil
}
err := runRebuildIndex(env.gopts)
err := runRebuildIndex(RebuildIndexOptions{}, env.gopts)
if err == nil {
t.Error("expected rebuildIndex to fail")
}
@ -1583,7 +1583,7 @@ func (be *listOnceBackend) List(ctx context.Context, t restic.FileType, fn func(
return be.Backend.List(ctx, t, fn)
}
func TestPruneListOnce(t *testing.T) {
func TestListOnce(t *testing.T) {
env, cleanup := withTestEnvironment(t)
defer cleanup()
@ -1613,6 +1613,9 @@ func TestPruneListOnce(t *testing.T) {
testRunForget(t, env.gopts, firstSnapshot[0].String())
testRunPrune(t, env.gopts, pruneOpts)
rtest.OK(t, runCheck(checkOpts, env.gopts, nil))
rtest.OK(t, runRebuildIndex(RebuildIndexOptions{}, env.gopts))
rtest.OK(t, runRebuildIndex(RebuildIndexOptions{ReadAllPacks: true}, env.gopts))
}
func TestHardLink(t *testing.T) {

View File

@ -281,7 +281,10 @@ type EachByPackResult struct {
}
// EachByPack returns a channel that yields all blobs known to the index
// grouped by packID but ignoring blobs with a packID in packPlacklist.
// grouped by packID but ignoring blobs with a packID in packPlacklist for
// finalized indexes.
// This filtering is used when rebuilding the index where we need to ignore packs
// from the finalized index which have been re-read into a non-finalized index.
// When the context is cancelled, the background goroutine
// terminates. This blocks any modification of the index.
func (idx *Index) EachByPack(ctx context.Context, packBlacklist restic.IDSet) <-chan EachByPackResult {
@ -300,7 +303,7 @@ func (idx *Index) EachByPack(ctx context.Context, packBlacklist restic.IDSet) <-
m := &idx.byType[typ]
m.foreach(func(e *indexEntry) bool {
packID := idx.packs[e.packIndex]
if !packBlacklist.Has(packID) {
if !idx.final || !packBlacklist.Has(packID) {
byPack[packID] = append(byPack[packID], e)
}
return true