Add dangling blob detection and removal to 'fsck'

This commit is contained in:
Alexander Neumann 2014-12-22 14:46:54 +01:00
parent a981141e49
commit 1944ab13d4
4 changed files with 157 additions and 20 deletions

View File

@ -1,13 +1,24 @@
package main
import (
"errors"
"fmt"
"github.com/restic/restic"
"github.com/restic/restic/backend"
)
type CmdFsck struct{}
type CmdFsck struct {
CheckData bool ` long:"check-data" description:"Read data blobs" default:"false"`
Snapshot string `short:"s" long:"snapshot" description:"Only check this snapshot"`
Orphaned bool `short:"o" long:"orphaned" description:"Check for orphaned blobs"`
RemoveOrphaned bool `short:"x" long:"remove-orphaned" description:"Remove orphaned blobs (implies -o)"`
// lists checking for orphaned blobs
o_data *restic.BlobList
o_trees *restic.BlobList
o_maps *restic.BlobList
}
func init() {
_, err := parser.AddCommand("fsck",
@ -19,21 +30,45 @@ func init() {
}
}
func fsckFile(ch *restic.ContentHandler, IDs []backend.ID) error {
func fsckFile(opts CmdFsck, ch *restic.ContentHandler, IDs []backend.ID) error {
for _, id := range IDs {
debug("checking data blob %v\n", id)
// load content
_, err := ch.Load(backend.Data, id)
if err != nil {
return err
if opts.CheckData {
// load content
_, err := ch.Load(backend.Data, id)
if err != nil {
return err
}
} else {
// test if data blob is there
ok, err := ch.Test(backend.Data, id)
if err != nil {
return err
}
if !ok {
return fmt.Errorf("data blob %v not found", id)
}
}
// if orphan check is active, record storage id
if opts.o_data != nil {
// lookup storage ID
sid, err := ch.Lookup(id)
if err != nil {
return err
}
// add ID to list
opts.o_data.Insert(restic.Blob{ID: sid})
}
}
return nil
}
func fsckTree(ch *restic.ContentHandler, id backend.ID) error {
func fsckTree(opts CmdFsck, ch *restic.ContentHandler, id backend.ID) error {
debug("checking tree %v\n", id)
tree, err := restic.LoadTree(ch, id)
@ -41,6 +76,18 @@ func fsckTree(ch *restic.ContentHandler, id backend.ID) error {
return err
}
// if orphan check is active, record storage id
if opts.o_trees != nil {
// lookup storage ID
sid, err := ch.Lookup(id)
if err != nil {
return err
}
// add ID to list
opts.o_trees.Insert(restic.Blob{ID: sid})
}
for i, node := range tree {
if node.Name == "" {
return fmt.Errorf("node %v of tree %v has no name", i, id)
@ -56,7 +103,7 @@ func fsckTree(ch *restic.ContentHandler, id backend.ID) error {
return fmt.Errorf("file node %q of tree %v has no content", node.Name, id)
}
err := fsckFile(ch, node.Content)
err := fsckFile(opts, ch, node.Content)
if err != nil {
return err
}
@ -65,7 +112,7 @@ func fsckTree(ch *restic.ContentHandler, id backend.ID) error {
return fmt.Errorf("dir node %q of tree %v has no subtree", node.Name, id)
}
err := fsckTree(ch, node.Subtree)
err := fsckTree(opts, ch, node.Subtree)
if err != nil {
return err
}
@ -75,7 +122,7 @@ func fsckTree(ch *restic.ContentHandler, id backend.ID) error {
return nil
}
func fsck_snapshot(s restic.Server, id backend.ID) error {
func fsck_snapshot(opts CmdFsck, s restic.Server, id backend.ID) error {
debug("checking snapshot %v\n", id)
ch, err := restic.NewContentHandler(s)
@ -96,16 +143,25 @@ func fsck_snapshot(s restic.Server, id backend.ID) error {
return fmt.Errorf("snapshot %v has no map", sn.ID)
}
return fsckTree(ch, sn.Tree)
// if orphan check is active, record storage id for map
if opts.o_maps != nil {
opts.o_maps.Insert(restic.Blob{ID: sn.Map})
}
return fsckTree(opts, ch, sn.Tree)
}
func (cmd CmdFsck) Usage() string {
return "fsck [all|snapshot-ID]"
return "[fsck-options]"
}
func (cmd CmdFsck) Execute(args []string) error {
if len(args) == 0 {
return fmt.Errorf("type or ID not specified, Usage: %s", cmd.Usage())
if len(args) != 0 {
return errors.New("fsck has no arguments")
}
if cmd.RemoveOrphaned && !cmd.Orphaned {
cmd.Orphaned = true
}
s, err := OpenRepo()
@ -113,27 +169,75 @@ func (cmd CmdFsck) Execute(args []string) error {
return err
}
if len(args) == 1 && args[0] != "all" {
snapshotID, err := s.FindSnapshot(args[0])
if cmd.Snapshot != "" {
snapshotID, err := s.FindSnapshot(cmd.Snapshot)
if err != nil {
return fmt.Errorf("invalid id %q: %v", args[0], err)
return fmt.Errorf("invalid id %q: %v", cmd.Snapshot, err)
}
return fsck_snapshot(s, snapshotID)
return fsck_snapshot(cmd, s, snapshotID)
}
if cmd.Orphaned {
cmd.o_data = restic.NewBlobList()
cmd.o_trees = restic.NewBlobList()
cmd.o_maps = restic.NewBlobList()
}
list, err := s.List(backend.Snapshot)
debug("checking %d snapshots\n", len(list))
if err != nil {
return err
}
for _, snapshotID := range list {
err := fsck_snapshot(s, snapshotID)
err := fsck_snapshot(cmd, s, snapshotID)
if err != nil {
return err
}
}
if !cmd.Orphaned {
return nil
}
debug("starting orphaned check\n")
l := []struct {
desc string
tpe backend.Type
list *restic.BlobList
}{
{"data blob", backend.Data, cmd.o_data},
{"tree", backend.Tree, cmd.o_trees},
{"maps", backend.Map, cmd.o_maps},
}
for _, d := range l {
debug("checking for orphaned %v\n", d.desc)
blobs, err := s.List(d.tpe)
if err != nil {
return err
}
for _, id := range blobs {
_, err := d.list.Find(restic.Blob{ID: id})
if err == restic.ErrBlobNotFound {
if !cmd.RemoveOrphaned {
fmt.Printf("orphaned %v %v\n", d.desc, id)
continue
}
fmt.Printf("removing orphaned %v %v\n", d.desc, id)
err := s.Remove(d.tpe, id)
if err != nil {
return err
}
}
}
}
return nil
}

View File

@ -60,7 +60,7 @@ func print_tree(prefix string, ch *restic.ContentHandler, id backend.ID) error {
}
func (cmd CmdLs) Usage() string {
return "ls snapshot-ID [DIR]"
return "snapshot-ID [DIR]"
}
func (cmd CmdLs) Execute(s restic.Server, key *restic.Key, args []string) error {

View File

@ -130,6 +130,10 @@ func create(u string) (backend.Backend, error) {
}
func OpenRepo() (restic.Server, error) {
if opts.Repo == "" {
return restic.Server{}, errors.New("Please specify repository location (-r)")
}
be, err := open(opts.Repo)
if err != nil {
return restic.Server{}, err

View File

@ -181,6 +181,17 @@ func (ch *ContentHandler) Load(t backend.Type, id backend.ID) ([]byte, error) {
return buf, nil
}
// Lookup returns the storage ID for the given blob
func (ch *ContentHandler) Lookup(id backend.ID) (backend.ID, error) {
// lookup storage hash
blob, err := ch.bl.Find(Blob{ID: id})
if err != nil {
return nil, err
}
return blob.Storage, nil
}
// LoadJSON calls Load() to get content from the backend and afterwards calls
// json.Unmarshal on the item.
func (ch *ContentHandler) LoadJSON(t backend.Type, id backend.ID, item interface{}) error {
@ -214,3 +225,21 @@ func (ch *ContentHandler) LoadJSONRaw(t backend.Type, id backend.ID, item interf
err = json.Unmarshal(backend.Uncompress(buf), item)
return err
}
// Test checks if a blob is in the repository. For Data and Tree blobs, the
// storage ID is looked up.
func (ch *ContentHandler) Test(t backend.Type, id backend.ID) (bool, error) {
if t == backend.Data || t == backend.Tree {
// lookup storage id
// lookup storage hash
blob, err := ch.bl.Find(Blob{ID: id})
if err != nil {
return false, fmt.Errorf("Storage ID %s not found", id)
}
id = blob.Storage
}
return ch.s.Test(t, id)
}