From 203a911de9c2cee2dee8e9f92611362971078d4e Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sat, 10 Jan 2015 23:40:10 +0100 Subject: [PATCH] Refactor repository structure Merge Map data type into Tree. --- archiver.go | 229 ++++++++++++++++++++--------- backend/id.go | 6 + backend/id_set.go | 68 +++++++++ backend/id_set_test.go | 40 +++++ backend/interface.go | 1 - backend/local.go | 5 - backend/local_test.go | 6 +- backend/sftp.go | 6 +- bloblist.go | 111 -------------- cmd/restic/cmd_backup.go | 15 +- cmd/restic/cmd_cat.go | 72 +-------- cmd/restic/cmd_find.go | 21 +-- cmd/restic/cmd_fsck.go | 118 +++++++-------- cmd/restic/cmd_list.go | 2 - cmd/restic/cmd_ls.go | 24 ++- cmd/restic/cmd_snapshots.go | 4 +- contenthandler.go | 250 -------------------------------- map.go | 188 ++++++++++++++++++++++++ bloblist_test.go => map_test.go | 35 +++-- restorer.go | 30 ++-- scanner.go | 14 +- server.go | 128 ++++++++++++++++ snapshot.go | 8 +- snapshot_test.go | 5 +- tree.go | 220 ++++++++++++++++++++++------ tree_test.go | 14 ++ 26 files changed, 939 insertions(+), 681 deletions(-) create mode 100644 backend/id_set.go create mode 100644 backend/id_set_test.go delete mode 100644 bloblist.go delete mode 100644 contenthandler.go create mode 100644 map.go rename bloblist_test.go => map_test.go (75%) diff --git a/archiver.go b/archiver.go index 3e15e1bd4..5ab757adb 100644 --- a/archiver.go +++ b/archiver.go @@ -1,6 +1,7 @@ package restic import ( + "encoding/json" "errors" "fmt" "io" @@ -18,10 +19,8 @@ const ( ) type Archiver struct { - s Server - ch *ContentHandler - - bl *BlobList // blobs used for the current snapshot + s Server + m *Map fileToken chan struct{} blobToken chan struct{} @@ -32,7 +31,7 @@ type Archiver struct { p *Progress } -func NewArchiver(s Server, bl *BlobList, p *Progress) (*Archiver, error) { +func NewArchiver(s Server, p *Progress) (*Archiver, error) { var err error arch := &Archiver{ s: s, @@ -50,74 +49,110 @@ func NewArchiver(s Server, bl *BlobList, p *Progress) (*Archiver, error) { arch.blobToken <- struct{}{} } + // create new map to store all blobs in + arch.m = NewMap() + // abort on all errors arch.Error = func(string, os.FileInfo, error) error { return err } // allow all files arch.Filter = func(string, os.FileInfo) bool { return true } - arch.bl = NewBlobList() - if bl != nil { - arch.bl.Merge(bl) - } - arch.ch = NewContentHandler(s) - - // load all blobs from all snapshots - // TODO: only use bloblist from old snapshot if available - err = arch.ch.LoadAllMaps() - if err != nil { - return nil, err - } - return arch, nil } func (arch *Archiver) Save(t backend.Type, data []byte) (Blob, error) { - blob, err := arch.ch.Save(t, data) - if err != nil { - return Blob{}, err + // compute plaintext hash + id := backend.Hash(data) + + debug("Save(%v, %v)\n", t, id.Str()) + + // test if this blob is already known + blob, err := arch.m.FindID(id) + if err == nil { + debug("Save(%v, %v): reusing %v\n", t, id.Str(), blob.Storage.Str()) + id.Free() + return blob, nil } - // store blob in storage map for current snapshot - arch.bl.Insert(blob) + // else encrypt and save data + blob, err = arch.s.Save(t, data, id) - return blob, nil + // store blob in storage map + smapblob := arch.m.Insert(blob) + + // if the map has a different storage id for this plaintext blob, use that + // one and remove the other. This happens if the same plaintext blob was + // stored concurrently and finished earlier than this blob. + if blob.Storage.Compare(smapblob.Storage) != 0 { + debug("using other block, removing %v\n", blob.Storage.Str()) + + // remove the blob again + // TODO: implement a list of blobs in transport, so this doesn't happen so often + err = arch.s.Remove(t, blob.Storage) + if err != nil { + return Blob{}, err + } + } + + debug(": Save(%v, %v): new blob %v\n", t, id.Str(), blob) + + return smapblob, nil } -func (arch *Archiver) SaveJSON(t backend.Type, item interface{}) (Blob, error) { - blob, err := arch.ch.SaveJSON(t, item) +func (arch *Archiver) SaveTreeJSON(item interface{}) (Blob, error) { + // convert to json + data, err := json.Marshal(item) if err != nil { return Blob{}, err } - // store blob in storage map for current snapshot - arch.bl.Insert(blob) + // check if tree has been saved before + buf := backend.Compress(data) + id := backend.Hash(buf) + blob, err := arch.m.FindID(id) + + // return the blob if we found it + if err == nil { + return blob, nil + } + + // otherwise save the data + blob, err = arch.s.Save(backend.Tree, buf, id) + if err != nil { + return Blob{}, err + } + + // store blob in storage map + arch.m.Insert(blob) return blob, nil } // SaveFile stores the content of the file on the backend as a Blob by calling // Save for each chunk. -func (arch *Archiver) SaveFile(node *Node) error { +func (arch *Archiver) SaveFile(node *Node) (Blobs, error) { + debug("SaveFile(%q)\n", node.path) + file, err := os.Open(node.path) defer file.Close() if err != nil { - return err + return nil, err } // check file again fi, err := file.Stat() if err != nil { - return err + return nil, err } if fi.ModTime() != node.ModTime { - e2 := arch.Error(node.path, fi, errors.New("file was updated, using new version\n")) + e2 := arch.Error(node.path, fi, errors.New("file was updated, using new version")) if e2 == nil { // create new node n, err := NodeFromFileInfo(node.path, fi) if err != nil { - return err + return nil, err } // copy node @@ -139,16 +174,16 @@ func (arch *Archiver) SaveFile(node *Node) error { defer FreeChunkBuf("blob single file", buf) n, err := io.ReadFull(file, buf) if err != nil && err != io.ErrUnexpectedEOF && err != io.EOF { - return arrar.Annotate(err, "SaveFile() read small file") + return nil, arrar.Annotate(err, "SaveFile() read small file") } if err == io.EOF { // use empty blob list for empty files blobs = Blobs{} } else { - blob, err := arch.ch.Save(backend.Data, buf[:n]) + blob, err := arch.Save(backend.Data, buf[:n]) if err != nil { - return arrar.Annotate(err, "SaveFile() save chunk") + return nil, arrar.Annotate(err, "SaveFile() save chunk") } arch.p.Report(Stat{Bytes: blob.Size}) @@ -173,7 +208,7 @@ func (arch *Archiver) SaveFile(node *Node) error { if err != nil { FreeChunkBuf("blob chunker", buf) - return arrar.Annotate(err, "SaveFile() chunker.Next()") + return nil, arrar.Annotate(err, "SaveFile() chunker.Next()") } chunks++ @@ -183,7 +218,7 @@ func (arch *Archiver) SaveFile(node *Node) error { resCh := make(chan Blob, 1) go func(ch chan<- Blob) { - blob, err := arch.ch.Save(backend.Data, chunk.Data) + blob, err := arch.Save(backend.Data, chunk.Data) // TODO handle error if err != nil { panic(err) @@ -205,7 +240,7 @@ func (arch *Archiver) SaveFile(node *Node) error { } if len(blobs) != chunks { - return fmt.Errorf("chunker returned %v chunks, but only %v blobs saved", chunks, len(blobs)) + return nil, fmt.Errorf("chunker returned %v chunks, but only %v blobs saved", chunks, len(blobs)) } } @@ -214,54 +249,111 @@ func (arch *Archiver) SaveFile(node *Node) error { node.Content = make([]backend.ID, len(blobs)) for i, blob := range blobs { node.Content[i] = blob.ID - arch.bl.Insert(blob) bytes += blob.Size } if bytes != node.Size { - return fmt.Errorf("errors saving node %q: saved %d bytes, wanted %d bytes", node.path, bytes, node.Size) + return nil, fmt.Errorf("errors saving node %q: saved %d bytes, wanted %d bytes", node.path, bytes, node.Size) } - return nil + debug("SaveFile(%q): %v\n", node.path, blobs) + + return blobs, nil } func (arch *Archiver) saveTree(t *Tree) (Blob, error) { + debug("saveTree(%v)\n", t) var wg sync.WaitGroup - for _, node := range *t { - if node.tree != nil && node.Subtree == nil { + // add all blobs to global map + arch.m.Merge(t.Map) + + // TODO: do all this in parallel + for _, node := range t.Nodes { + if node.tree != nil { b, err := arch.saveTree(node.tree) if err != nil { return Blob{}, err } node.Subtree = b.ID + t.Map.Insert(b) arch.p.Report(Stat{Dirs: 1}) - } else if node.Type == "file" && len(node.Content) == 0 { - // get token - token := <-arch.fileToken + } else if node.Type == "file" { + if len(node.Content) > 0 { + removeContent := false - // start goroutine - wg.Add(1) - go func(n *Node) { - defer wg.Done() - defer func() { - arch.fileToken <- token - }() + // check content + for _, id := range node.Content { + blob, err := t.Map.FindID(id) + if err != nil { + debug("unable to find storage id for data blob %v", id.Str()) + arch.Error(node.path, nil, fmt.Errorf("unable to find storage id for data blob %v", id.Str())) + removeContent = true + t.Map.DeleteID(id) + arch.m.DeleteID(id) + continue + } - node.err = arch.SaveFile(n) - arch.p.Report(Stat{Files: 1}) - }(node) + if ok, err := arch.s.Test(backend.Data, blob.Storage); !ok || err != nil { + debug("blob %v not in repository (error is %v)", blob, err) + arch.Error(node.path, nil, fmt.Errorf("blob %v not in repository (error is %v)", blob.Storage.Str(), err)) + removeContent = true + t.Map.DeleteID(id) + arch.m.DeleteID(id) + } + } + + if removeContent { + debug("removing content for %s", node.path) + node.Content = node.Content[:0] + } + } + + if len(node.Content) == 0 { + // get token + token := <-arch.fileToken + + // start goroutine + wg.Add(1) + go func(n *Node) { + defer wg.Done() + defer func() { + arch.fileToken <- token + }() + + var blobs Blobs + blobs, node.err = arch.SaveFile(n) + for _, b := range blobs { + t.Map.Insert(b) + } + + arch.p.Report(Stat{Files: 1}) + }(node) + } } } wg.Wait() + usedIDs := backend.NewIDSet() + // check for invalid file nodes - for _, node := range *t { + for _, node := range t.Nodes { if node.Type == "file" && node.Content == nil && node.err == nil { return Blob{}, fmt.Errorf("node %v has empty content", node.Name) } + // remember used hashes + if node.Type == "file" && node.Content != nil { + for _, id := range node.Content { + usedIDs.Insert(id) + } + } + + if node.Type == "dir" && node.Subtree != nil { + usedIDs.Insert(node.Subtree) + } + if node.err != nil { err := arch.Error(node.path, nil, node.err) if err != nil { @@ -273,7 +365,15 @@ func (arch *Archiver) saveTree(t *Tree) (Blob, error) { } } - blob, err := arch.SaveJSON(backend.Tree, t) + before := len(t.Map.IDs()) + t.Map.Prune(usedIDs) + after := len(t.Map.IDs()) + + if before != after { + debug("pruned %d ids from map for tree %v\n", before-after, t) + } + + blob, err := arch.SaveTreeJSON(t) if err != nil { return Blob{}, err } @@ -296,17 +396,10 @@ func (arch *Archiver) Snapshot(dir string, t *Tree, parentSnapshot backend.ID) ( if err != nil { return nil, nil, err } - sn.Tree = blob.ID - - // save bloblist - blob, err = arch.SaveJSON(backend.Map, arch.bl) - if err != nil { - return nil, nil, err - } - sn.Map = blob.Storage + sn.Tree = blob // save snapshot - blob, err = arch.SaveJSON(backend.Snapshot, sn) + blob, err = arch.s.SaveJSON(backend.Snapshot, sn) if err != nil { return nil, nil, err } diff --git a/backend/id.go b/backend/id.go index 4db643ae2..01dcd187a 100644 --- a/backend/id.go +++ b/backend/id.go @@ -36,6 +36,12 @@ func (id ID) String() string { return hex.EncodeToString(id) } +const shortStr = 4 + +func (id ID) Str() string { + return hex.EncodeToString(id[:shortStr]) +} + // Equal compares an ID to another other. func (id ID) Equal(other ID) bool { return bytes.Equal(id, other) diff --git a/backend/id_set.go b/backend/id_set.go new file mode 100644 index 000000000..6b36e0aba --- /dev/null +++ b/backend/id_set.go @@ -0,0 +1,68 @@ +package backend + +import ( + "errors" + "sort" + "sync" +) + +type IDSet struct { + list IDs + m sync.Mutex +} + +func NewIDSet() *IDSet { + return &IDSet{ + list: make(IDs, 0), + } +} + +func (s *IDSet) find(id ID) (int, error) { + pos := sort.Search(len(s.list), func(i int) bool { + return id.Compare(s.list[i]) >= 0 + }) + + if pos < len(s.list) { + candID := s.list[pos] + if id.Compare(candID) == 0 { + return pos, nil + } + } + + return pos, errors.New("ID not found") +} + +func (s *IDSet) insert(id ID) { + pos, err := s.find(id) + if err == nil { + // already present + return + } + + // insert blob + // https://code.google.com/p/go-wiki/wiki/SliceTricks + s.list = append(s.list, ID{}) + copy(s.list[pos+1:], s.list[pos:]) + s.list[pos] = id + + return +} + +func (s *IDSet) Insert(id ID) { + s.m.Lock() + defer s.m.Unlock() + + s.insert(id) +} + +func (s *IDSet) Find(id ID) error { + s.m.Lock() + defer s.m.Unlock() + + _, err := s.find(id) + if err != nil { + return err + } + + return nil +} diff --git a/backend/id_set_test.go b/backend/id_set_test.go new file mode 100644 index 000000000..a01676ef6 --- /dev/null +++ b/backend/id_set_test.go @@ -0,0 +1,40 @@ +package backend_test + +import ( + "crypto/rand" + "io" + "testing" + + "github.com/restic/restic/backend" +) + +func randomID() []byte { + buf := make([]byte, backend.IDSize) + _, err := io.ReadFull(rand.Reader, buf) + if err != nil { + panic(err) + } + return buf +} + +func TestSet(t *testing.T) { + s := backend.NewIDSet() + + testID := randomID() + err := s.Find(testID) + assert(t, err != nil, "found test ID in IDSet before insertion") + + for i := 0; i < 238; i++ { + s.Insert(randomID()) + } + + s.Insert(testID) + ok(t, s.Find(testID)) + + for i := 0; i < 80; i++ { + s.Insert(randomID()) + } + + s.Insert(testID) + ok(t, s.Find(testID)) +} diff --git a/backend/interface.go b/backend/interface.go index b0ada8602..2cbd21ff6 100644 --- a/backend/interface.go +++ b/backend/interface.go @@ -10,7 +10,6 @@ const ( Lock = "lock" Snapshot = "snapshot" Tree = "tree" - Map = "map" ) const ( diff --git a/backend/local.go b/backend/local.go index 68da2fe74..97dfe092f 100644 --- a/backend/local.go +++ b/backend/local.go @@ -17,7 +17,6 @@ const ( dataPath = "data" snapshotPath = "snapshots" treePath = "trees" - mapPath = "maps" lockPath = "locks" keyPath = "keys" tempPath = "tmp" @@ -38,7 +37,6 @@ func OpenLocal(dir string) (*Local, error) { filepath.Join(dir, dataPath), filepath.Join(dir, snapshotPath), filepath.Join(dir, treePath), - filepath.Join(dir, mapPath), filepath.Join(dir, lockPath), filepath.Join(dir, keyPath), filepath.Join(dir, tempPath), @@ -90,7 +88,6 @@ func CreateLocal(dir string) (*Local, error) { filepath.Join(dir, dataPath), filepath.Join(dir, snapshotPath), filepath.Join(dir, treePath), - filepath.Join(dir, mapPath), filepath.Join(dir, lockPath), filepath.Join(dir, keyPath), filepath.Join(dir, tempPath), @@ -176,8 +173,6 @@ func (b *Local) dirname(t Type, id ID) string { if id != nil { n = filepath.Join(treePath, fmt.Sprintf("%02x", id[0])) } - case Map: - n = mapPath case Lock: n = lockPath case Key: diff --git a/backend/local_test.go b/backend/local_test.go index e79eca5ef..52e0688b5 100644 --- a/backend/local_test.go +++ b/backend/local_test.go @@ -44,7 +44,7 @@ func teardownBackend(t *testing.T, b *backend.Local) { } func testBackend(b *backend.Local, t *testing.T) { - for _, tpe := range []backend.Type{backend.Data, backend.Key, backend.Lock, backend.Snapshot, backend.Tree, backend.Map} { + for _, tpe := range []backend.Type{backend.Data, backend.Key, backend.Lock, backend.Snapshot, backend.Tree} { // detect non-existing files for _, test := range TestStrings { id, err := backend.ParseID(test.id) @@ -106,13 +106,13 @@ func testBackend(b *backend.Local, t *testing.T) { found, err := b.Test(tpe, id) ok(t, err) - assert(t, found, fmt.Sprintf("id %q was not found before removal")) + assert(t, found, fmt.Sprintf("id %q was not found before removal", id)) ok(t, b.Remove(tpe, id)) found, err = b.Test(tpe, id) ok(t, err) - assert(t, !found, fmt.Sprintf("id %q was not found before removal")) + assert(t, !found, fmt.Sprintf("id %q not found after removal", id)) } } diff --git a/backend/sftp.go b/backend/sftp.go index 8184414d6..a45049eea 100644 --- a/backend/sftp.go +++ b/backend/sftp.go @@ -77,7 +77,6 @@ func OpenSFTP(dir string, program string, args ...string) (*SFTP, error) { filepath.Join(dir, dataPath), filepath.Join(dir, snapshotPath), filepath.Join(dir, treePath), - filepath.Join(dir, mapPath), filepath.Join(dir, lockPath), filepath.Join(dir, keyPath), filepath.Join(dir, tempPath), @@ -134,7 +133,6 @@ func CreateSFTP(dir string, program string, args ...string) (*SFTP, error) { filepath.Join(dir, dataPath), filepath.Join(dir, snapshotPath), filepath.Join(dir, treePath), - filepath.Join(dir, mapPath), filepath.Join(dir, lockPath), filepath.Join(dir, keyPath), filepath.Join(dir, tempPath), @@ -242,7 +240,7 @@ func (r *SFTP) mkdirAll(dir string, mode os.FileMode) error { fi, err = r.c.Lstat(dir) if err != nil { // return previous errors - return fmt.Errorf("mkdirAll(%s): unable to create directories: %v, %v", errMkdirAll, errMkdir) + return fmt.Errorf("mkdirAll(%s): unable to create directories: %v, %v", dir, errMkdirAll, errMkdir) } if !fi.IsDir() { @@ -284,8 +282,6 @@ func (r *SFTP) dirname(t Type, id ID) string { if id != nil { n = filepath.Join(treePath, fmt.Sprintf("%02x", id[0])) } - case Map: - n = mapPath case Lock: n = lockPath case Key: diff --git a/bloblist.go b/bloblist.go deleted file mode 100644 index d04064091..000000000 --- a/bloblist.go +++ /dev/null @@ -1,111 +0,0 @@ -package restic - -import ( - "bytes" - "encoding/json" - "errors" - "sort" - "sync" - - "github.com/restic/restic/backend" -) - -type BlobList struct { - list []Blob - m sync.Mutex -} - -var ErrBlobNotFound = errors.New("Blob not found") - -func NewBlobList() *BlobList { - return &BlobList{ - list: []Blob{}, - } -} - -func LoadBlobList(ch *ContentHandler, id backend.ID) (*BlobList, error) { - bl := &BlobList{} - err := ch.LoadJSONRaw(backend.Map, id, bl) - if err != nil { - return nil, err - } - - return bl, nil -} - -func (bl *BlobList) find(blob Blob) (int, Blob, error) { - pos := sort.Search(len(bl.list), func(i int) bool { - return blob.ID.Compare(bl.list[i].ID) >= 0 - }) - - if pos < len(bl.list) && blob.ID.Compare(bl.list[pos].ID) == 0 { - return pos, bl.list[pos], nil - } - - return pos, Blob{}, ErrBlobNotFound -} - -func (bl *BlobList) Find(blob Blob) (Blob, error) { - bl.m.Lock() - defer bl.m.Unlock() - - _, blob, err := bl.find(blob) - return blob, err -} - -func (bl *BlobList) Merge(other *BlobList) { - bl.m.Lock() - defer bl.m.Unlock() - other.m.Lock() - defer other.m.Unlock() - - for _, blob := range other.list { - bl.insert(blob) - } -} - -func (bl *BlobList) insert(blob Blob) { - pos, _, err := bl.find(blob) - if err == nil { - // already present - return - } - - // insert blob - // https://code.google.com/p/go-wiki/wiki/bliceTricks - bl.list = append(bl.list, Blob{}) - copy(bl.list[pos+1:], bl.list[pos:]) - bl.list[pos] = blob -} - -func (bl *BlobList) Insert(blob Blob) { - bl.m.Lock() - defer bl.m.Unlock() - - bl.insert(blob) -} - -func (bl BlobList) MarshalJSON() ([]byte, error) { - return json.Marshal(bl.list) -} - -func (bl *BlobList) UnmarshalJSON(data []byte) error { - return json.Unmarshal(data, &bl.list) -} - -// Compare compares two blobs by comparing the ID and the size. It returns -1, -// 0, or 1. -func (blob Blob) Compare(other Blob) int { - if res := bytes.Compare(other.ID, blob.ID); res != 0 { - return res - } - - if blob.Size < other.Size { - return -1 - } - if blob.Size > other.Size { - return 1 - } - - return 0 -} diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index 8615e3503..203c2beac 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -59,7 +59,7 @@ func format_duration(d time.Duration) string { } func print_tree2(indent int, t *restic.Tree) { - for _, node := range *t { + for _, node := range t.Nodes { if node.Tree() != nil { fmt.Printf("%s%s/\n", strings.Repeat(" ", indent), node.Name) print_tree2(indent+1, node.Tree()) @@ -120,22 +120,19 @@ func (cmd CmdBackup) Execute(args []string) error { return err } - var bl *restic.BlobList if parentSnapshotID != nil { fmt.Printf("load old snapshot\n") - ch := restic.NewContentHandler(s) - sn, err := ch.LoadSnapshot(parentSnapshotID) + sn, err := restic.LoadSnapshot(s, parentSnapshotID) if err != nil { return err } - oldTree, err := restic.LoadTreeRecursive(filepath.Dir(sn.Dir), ch, sn.Tree) + oldTree, err := restic.LoadTreeRecursive(filepath.Dir(sn.Dir), s, sn.Tree) if err != nil { return err } - bl = restic.NewBlobList() - err = newTree.CopyFrom(bl, oldTree, ch.BlobList()) + err = newTree.CopyFrom(oldTree, &s) if err != nil { return err } @@ -175,14 +172,14 @@ func (cmd CmdBackup) Execute(args []string) error { } } - arch, err := restic.NewArchiver(s, bl, archiveProgress) + arch, err := restic.NewArchiver(s, archiveProgress) if err != nil { fmt.Fprintf(os.Stderr, "err: %v\n", err) } arch.Error = func(dir string, fi os.FileInfo, err error) error { // TODO: make ignoring errors configurable - fmt.Fprintf(os.Stderr, "\nerror for %s: %v\n", dir, err) + fmt.Fprintf(os.Stderr, "\x1b[2K\rerror for %s: %v\n", dir, err) return nil } diff --git a/cmd/restic/cmd_cat.go b/cmd/restic/cmd_cat.go index 47acbe38e..464f556cc 100644 --- a/cmd/restic/cmd_cat.go +++ b/cmd/restic/cmd_cat.go @@ -53,66 +53,26 @@ func (cmd CmdCat) Execute(args []string) error { } } - ch := restic.NewContentHandler(s) - switch tpe { case "blob": - err = ch.LoadAllMaps() - if err != nil { - return err - } - - // try id - data, err := ch.Load(backend.Data, id) + // try storage id + data, err := s.LoadID(backend.Data, id) if err == nil { _, err = os.Stdout.Write(data) return err } - // try storage id - buf, err := s.Get(backend.Data, id) - if err != nil { - return err - } - - // decrypt - buf, err = s.Decrypt(buf) - if err != nil { - return err - } - - _, err = os.Stdout.Write(buf) + _, err = os.Stdout.Write(data) return err case "tree": - err = ch.LoadAllMaps() + // try storage id + tree := &restic.Tree{} + err := s.LoadJSONID(backend.Tree, id, tree) if err != nil { return err } - var tree restic.Tree - // try id - err := ch.LoadJSON(backend.Tree, id, &tree) - if err != nil { - // try storage id - buf, err := s.Get(backend.Tree, id) - if err != nil { - return err - } - - // decrypt - buf, err = s.Decrypt(buf) - if err != nil { - return err - } - - // unmarshal - err = json.Unmarshal(backend.Uncompress(buf), &tree) - if err != nil { - return err - } - } - buf, err := json.MarshalIndent(&tree, "", " ") if err != nil { return err @@ -120,26 +80,10 @@ func (cmd CmdCat) Execute(args []string) error { fmt.Println(string(buf)) - return nil - case "map": - var bl restic.BlobList - err := ch.LoadJSONRaw(backend.Map, id, &bl) - if err != nil { - return err - } - - buf, err := json.MarshalIndent(&bl, "", " ") - if err != nil { - return err - } - - fmt.Println(string(buf)) - return nil case "snapshot": - var sn restic.Snapshot - - err = ch.LoadJSONRaw(backend.Snapshot, id, &sn) + sn := &restic.Snapshot{} + err = s.LoadJSONID(backend.Snapshot, id, sn) if err != nil { return err } diff --git a/cmd/restic/cmd_find.go b/cmd/restic/cmd_find.go index 544d5be28..d3111f1d2 100644 --- a/cmd/restic/cmd_find.go +++ b/cmd/restic/cmd_find.go @@ -63,16 +63,15 @@ func parseTime(str string) (time.Time, error) { return time.Time{}, fmt.Errorf("unable to parse time: %q", str) } -func (c CmdFind) findInTree(ch *restic.ContentHandler, id backend.ID, path string) ([]findResult, error) { - debug("checking tree %v\n", id) - - tree, err := restic.LoadTree(ch, id) +func (c CmdFind) findInTree(s restic.Server, blob restic.Blob, path string) ([]findResult, error) { + debug("checking tree %v\n", blob) + tree, err := restic.LoadTree(s, blob) if err != nil { return nil, err } results := []findResult{} - for _, node := range tree { + for _, node := range tree.Nodes { debug(" testing entry %q\n", node.Name) m, err := filepath.Match(c.pattern, node.Name) @@ -98,7 +97,12 @@ func (c CmdFind) findInTree(ch *restic.ContentHandler, id backend.ID, path strin } if node.Type == "dir" { - subdirResults, err := c.findInTree(ch, node.Subtree, filepath.Join(path, node.Name)) + b, err := tree.Map.FindID(node.Subtree) + if err != nil { + return nil, err + } + + subdirResults, err := c.findInTree(s, b, filepath.Join(path, node.Name)) if err != nil { return nil, err } @@ -113,13 +117,12 @@ func (c CmdFind) findInTree(ch *restic.ContentHandler, id backend.ID, path strin func (c CmdFind) findInSnapshot(s restic.Server, id backend.ID) error { debug("searching in snapshot %s\n for entries within [%s %s]", id, c.oldest, c.newest) - ch := restic.NewContentHandler(s) - sn, err := ch.LoadSnapshot(id) + sn, err := restic.LoadSnapshot(s, id) if err != nil { return err } - results, err := c.findInTree(ch, sn.Tree, "") + results, err := c.findInTree(s, sn.Tree, "") if err != nil { return err } diff --git a/cmd/restic/cmd_fsck.go b/cmd/restic/cmd_fsck.go index 779092ba9..64e34b726 100644 --- a/cmd/restic/cmd_fsck.go +++ b/cmd/restic/cmd_fsck.go @@ -16,9 +16,8 @@ type CmdFsck struct { RemoveOrphaned bool `short:"r" long:"remove-orphaned" description:"Remove orphaned blobs (implies -o)"` // lists checking for orphaned blobs - o_data *restic.BlobList - o_trees *restic.BlobList - o_maps *restic.BlobList + o_data *backend.IDSet + o_trees *backend.IDSet } func init() { @@ -31,91 +30,103 @@ func init() { } } -func fsckFile(opts CmdFsck, ch *restic.ContentHandler, IDs []backend.ID) error { +func fsckFile(opts CmdFsck, s restic.Server, m *restic.Map, IDs []backend.ID) (uint64, error) { + var bytes uint64 + for _, id := range IDs { debug("checking data blob %v\n", id) + // test if blob is in map + blob, err := m.FindID(id) + if err != nil { + return 0, fmt.Errorf("storage ID for data blob %v not found", id) + } + + bytes += blob.Size + if opts.CheckData { // load content - _, err := ch.Load(backend.Data, id) + _, err := s.Load(backend.Data, blob) if err != nil { - return err + return 0, err } } else { // test if data blob is there - ok, err := ch.Test(backend.Data, id) + ok, err := s.Test(backend.Data, blob.Storage) if err != nil { - return err + return 0, err } if !ok { - return fmt.Errorf("data blob %v not found", id) + return 0, fmt.Errorf("data blob %v not found", id) } } // if orphan check is active, record storage id if opts.o_data != nil { - // lookup storage ID - sid, err := ch.Lookup(id) - if err != nil { - return err - } - - // add ID to list - opts.o_data.Insert(restic.Blob{ID: sid}) + opts.o_data.Insert(blob.Storage) } } - return nil + return bytes, nil } -func fsckTree(opts CmdFsck, ch *restic.ContentHandler, id backend.ID) error { - debug("checking tree %v\n", id) +func fsckTree(opts CmdFsck, s restic.Server, blob restic.Blob) error { + debug("checking tree %v\n", blob.ID) - tree, err := restic.LoadTree(ch, id) + tree, err := restic.LoadTree(s, blob) if err != nil { return err } // if orphan check is active, record storage id if opts.o_trees != nil { - // lookup storage ID - sid, err := ch.Lookup(id) - if err != nil { - return err - } - // add ID to list - opts.o_trees.Insert(restic.Blob{ID: sid}) + opts.o_trees.Insert(blob.Storage) } var firstErr error - for i, node := range tree { + for i, node := range tree.Nodes { if node.Name == "" { - return fmt.Errorf("node %v of tree %v has no name", i, id) + return fmt.Errorf("node %v of tree %v has no name", i, blob.ID) } if node.Type == "" { - return fmt.Errorf("node %q of tree %v has no type", node.Name, id) + return fmt.Errorf("node %q of tree %v has no type", node.Name, blob.ID) } switch node.Type { case "file": - if node.Content == nil && node.Error == "" { - return fmt.Errorf("file node %q of tree %v has no content", node.Name, id) + if node.Content == nil { + return fmt.Errorf("file node %q of tree %v has no content: %v", node.Name, blob.ID, node) } - err := fsckFile(opts, ch, node.Content) + if node.Content == nil && node.Error == "" { + return fmt.Errorf("file node %q of tree %v has no content", node.Name, blob.ID) + } + + bytes, err := fsckFile(opts, s, tree.Map, node.Content) if err != nil { return err } + + if bytes != node.Size { + return fmt.Errorf("file node %q of tree %v has size %d, but only %d bytes could be found", node.Name, blob, node.Size, bytes) + } case "dir": if node.Subtree == nil { - return fmt.Errorf("dir node %q of tree %v has no subtree", node.Name, id) + return fmt.Errorf("dir node %q of tree %v has no subtree", node.Name, blob.ID) } - err := fsckTree(opts, ch, node.Subtree) + // lookup blob + subtreeBlob, err := tree.Map.FindID(node.Subtree) + if err != nil { + firstErr = err + fmt.Fprintf(os.Stderr, "%v\n", err) + } + + err = fsckTree(opts, s, subtreeBlob) if err != nil { firstErr = err fmt.Fprintf(os.Stderr, "%v\n", err) @@ -129,27 +140,22 @@ func fsckTree(opts CmdFsck, ch *restic.ContentHandler, id backend.ID) error { func fsck_snapshot(opts CmdFsck, s restic.Server, id backend.ID) error { debug("checking snapshot %v\n", id) - ch := restic.NewContentHandler(s) - - sn, err := ch.LoadSnapshot(id) + sn, err := restic.LoadSnapshot(s, id) if err != nil { - return err + return fmt.Errorf("loading snapshot %v failed: %v", id, err) } - if sn.Tree == nil { - return fmt.Errorf("snapshot %v has no content", sn.ID) + if !sn.Tree.Valid() { + return fmt.Errorf("snapshot %v has invalid tree %v", sn.ID, sn.Tree) } - if sn.Map == nil { - return fmt.Errorf("snapshot %v has no map", sn.ID) + err = fsckTree(opts, s, sn.Tree) + if err != nil { + debug(" checking tree %v for snapshot %v\n", sn.Tree, id) + fmt.Fprintf(os.Stderr, "snapshot %v:\n error for tree %v:\n %v\n", id, sn.Tree, err) } - // if orphan check is active, record storage id for map - if opts.o_maps != nil { - opts.o_maps.Insert(restic.Blob{ID: sn.Map}) - } - - return fsckTree(opts, ch, sn.Tree) + return err } func (cmd CmdFsck) Usage() string { @@ -185,9 +191,8 @@ func (cmd CmdFsck) Execute(args []string) error { } if cmd.Orphaned { - cmd.o_data = restic.NewBlobList() - cmd.o_trees = restic.NewBlobList() - cmd.o_maps = restic.NewBlobList() + cmd.o_data = backend.NewIDSet() + cmd.o_trees = backend.NewIDSet() } list, err := s.List(backend.Snapshot) @@ -214,11 +219,10 @@ func (cmd CmdFsck) Execute(args []string) error { l := []struct { desc string tpe backend.Type - list *restic.BlobList + set *backend.IDSet }{ {"data blob", backend.Data, cmd.o_data}, {"tree", backend.Tree, cmd.o_trees}, - {"maps", backend.Map, cmd.o_maps}, } for _, d := range l { @@ -230,8 +234,8 @@ func (cmd CmdFsck) Execute(args []string) error { } for _, id := range blobs { - _, err := d.list.Find(restic.Blob{ID: id}) - if err == restic.ErrBlobNotFound { + err := d.set.Find(id) + if err != nil { if !cmd.RemoveOrphaned { fmt.Printf("orphaned %v %v\n", d.desc, id) continue diff --git a/cmd/restic/cmd_list.go b/cmd/restic/cmd_list.go index 5d43d8eed..58e2b4301 100644 --- a/cmd/restic/cmd_list.go +++ b/cmd/restic/cmd_list.go @@ -46,8 +46,6 @@ func (cmd CmdList) Execute(args []string) error { each = s.EachDecrypted case "snapshots": t = backend.Snapshot - case "maps": - t = backend.Map case "keys": t = backend.Key case "locks": diff --git a/cmd/restic/cmd_ls.go b/cmd/restic/cmd_ls.go index 037d6b488..0c34e93c7 100644 --- a/cmd/restic/cmd_ls.go +++ b/cmd/restic/cmd_ls.go @@ -37,19 +37,22 @@ func print_node(prefix string, n *restic.Node) string { } } -func print_tree(prefix string, ch *restic.ContentHandler, id backend.ID) error { - tree := &restic.Tree{} - - err := ch.LoadJSON(backend.Tree, id, tree) +func print_tree(prefix string, s restic.Server, blob restic.Blob) error { + tree, err := restic.LoadTree(s, blob) if err != nil { return err } - for _, entry := range *tree { + for _, entry := range tree.Nodes { fmt.Println(print_node(prefix, entry)) if entry.Type == "dir" && entry.Subtree != nil { - err = print_tree(filepath.Join(prefix, entry.Name), ch, entry.Subtree) + b, err := tree.Map.FindID(entry.Subtree) + if err != nil { + return err + } + + err = print_tree(filepath.Join(prefix, entry.Name), s, b) if err != nil { return err } @@ -78,17 +81,12 @@ func (cmd CmdLs) Execute(args []string) error { return err } - ch := restic.NewContentHandler(s) - if err != nil { - return err - } - - sn, err := ch.LoadSnapshot(id) + sn, err := restic.LoadSnapshot(s, id) if err != nil { return err } fmt.Printf("snapshot of %s at %s:\n", sn.Dir, sn.Time) - return print_tree("", ch, sn.Tree) + return print_tree("", s, sn.Tree) } diff --git a/cmd/restic/cmd_snapshots.go b/cmd/restic/cmd_snapshots.go index 1a1a7c25b..5dbe2b75a 100644 --- a/cmd/restic/cmd_snapshots.go +++ b/cmd/restic/cmd_snapshots.go @@ -97,15 +97,13 @@ func (cmd CmdSnapshots) Execute(args []string) error { return err } - ch := restic.NewContentHandler(s) - tab := NewTable() tab.Header = fmt.Sprintf("%-8s %-19s %-10s %s", "ID", "Date", "Source", "Directory") tab.RowFormat = "%-8s %-19s %-10s %s" list := []*restic.Snapshot{} s.EachID(backend.Snapshot, func(id backend.ID) { - sn, err := restic.LoadSnapshot(ch, id) + sn, err := restic.LoadSnapshot(s, id) if err != nil { fmt.Fprintf(os.Stderr, "error loading snapshot %s: %v\n", id, err) return diff --git a/contenthandler.go b/contenthandler.go deleted file mode 100644 index 03284c1cc..000000000 --- a/contenthandler.go +++ /dev/null @@ -1,250 +0,0 @@ -package restic - -import ( - "encoding/json" - "errors" - "fmt" - - "github.com/restic/restic/backend" -) - -var ErrWrongData = errors.New("wrong data decrypt, checksum does not match") - -type ContentHandler struct { - s Server - - bl *BlobList -} - -// NewContentHandler creates a new content handler. -func NewContentHandler(s Server) *ContentHandler { - ch := &ContentHandler{ - s: s, - bl: NewBlobList(), - } - - return ch -} - -// LoadSnapshot adds all blobs from a snapshot into the content handler and returns the snapshot. -func (ch *ContentHandler) LoadSnapshot(id backend.ID) (*Snapshot, error) { - sn, err := LoadSnapshot(ch, id) - if err != nil { - return nil, err - } - - sn.bl, err = LoadBlobList(ch, sn.Map) - if err != nil { - return nil, err - } - - ch.bl.Merge(sn.bl) - - return sn, nil -} - -// LoadAllMaps adds all blobs from all snapshots that can be decrypted -// into the content handler. -func (ch *ContentHandler) LoadAllMaps() error { - // add all maps from all snapshots that can be decrypted to the storage map - err := backend.EachID(ch.s, backend.Map, func(id backend.ID) { - bl, err := LoadBlobList(ch, id) - if err != nil { - return - } - - ch.bl.Merge(bl) - }) - if err != nil { - return err - } - - return nil -} - -// Save encrypts data and stores it to the backend as type t. If the data was -// already saved before, the blob is returned. -func (ch *ContentHandler) Save(t backend.Type, data []byte) (Blob, error) { - // compute plaintext hash - id := backend.Hash(data) - - // test if the hash is already in the backend - blob, err := ch.bl.Find(Blob{ID: id}) - if err == nil { - id.Free() - return blob, nil - } - - // else create a new blob - blob = Blob{ - ID: id, - Size: uint64(len(data)), - } - - var ciphertext []byte - - // for a bloblist/map, use a larger buffer - if t == backend.Map { - ciphertext = make([]byte, len(data)+CiphertextExtension) - } else { - // otherwise use buffer from pool - ciphertext = GetChunkBuf("ch.Save()") - defer FreeChunkBuf("ch.Save()", ciphertext) - } - - // encrypt blob - n, err := ch.s.Encrypt(ciphertext, data) - if err != nil { - return Blob{}, err - } - - ciphertext = ciphertext[:n] - - // save blob - sid, err := ch.s.Create(t, ciphertext) - if err != nil { - return Blob{}, err - } - - blob.Storage = sid - blob.StorageSize = uint64(len(ciphertext)) - - // insert blob into the storage map - ch.bl.Insert(blob) - - return blob, nil -} - -// SaveJSON serialises item as JSON and uses Save() to store it to the backend as type t. -func (ch *ContentHandler) SaveJSON(t backend.Type, item interface{}) (Blob, error) { - // convert to json - data, err := json.Marshal(item) - if err != nil { - return Blob{}, err - } - - // compress and save data - return ch.Save(t, backend.Compress(data)) -} - -// Load tries to load and decrypt content identified by t and id from the backend. -func (ch *ContentHandler) Load(t backend.Type, id backend.ID) ([]byte, error) { - if t == backend.Snapshot { - // load data - buf, err := ch.s.Get(t, id) - if err != nil { - return nil, err - } - - // decrypt - buf, err = ch.s.Decrypt(buf) - if err != nil { - return nil, err - } - - return buf, nil - } - - // lookup storage hash - blob, err := ch.bl.Find(Blob{ID: id}) - if err != nil { - return nil, fmt.Errorf("Storage ID for ID %s not found", id) - } - - // load data - buf, err := ch.s.Get(t, blob.Storage) - if err != nil { - return nil, err - } - - // check length - if len(buf) != int(blob.StorageSize) { - return nil, errors.New("Invalid storage length") - } - - // decrypt - buf, err = ch.s.Decrypt(buf) - if err != nil { - return nil, err - } - - // check length - if len(buf) != int(blob.Size) { - return nil, errors.New("Invalid length") - } - - // check SHA256 sum - if !id.Equal(backend.Hash(buf)) { - return nil, ErrWrongData - } - - return buf, nil -} - -// Lookup returns the storage ID for the given blob -func (ch *ContentHandler) Lookup(id backend.ID) (backend.ID, error) { - // lookup storage hash - blob, err := ch.bl.Find(Blob{ID: id}) - if err != nil { - return nil, err - } - - return blob.Storage, nil -} - -// LoadJSON calls Load() to get content from the backend and afterwards calls -// json.Unmarshal on the item. -func (ch *ContentHandler) LoadJSON(t backend.Type, id backend.ID, item interface{}) error { - // load from backend - buf, err := ch.Load(t, id) - if err != nil { - return err - } - - // inflate and unmarshal - err = json.Unmarshal(backend.Uncompress(buf), item) - return err -} - -// LoadJSONRaw loads data with the given storage id and type from the backend, -// decrypts it and calls json.Unmarshal on the item. -func (ch *ContentHandler) LoadJSONRaw(t backend.Type, id backend.ID, item interface{}) error { - // load data - buf, err := ch.s.Get(t, id) - if err != nil { - return err - } - - // decrypt - buf, err = ch.s.Decrypt(buf) - if err != nil { - return err - } - - // inflate and unmarshal - err = json.Unmarshal(backend.Uncompress(buf), item) - return err -} - -// Test checks if a blob is in the repository. For Data and Tree blobs, the -// storage ID is looked up. -func (ch *ContentHandler) Test(t backend.Type, id backend.ID) (bool, error) { - if t == backend.Data || t == backend.Tree { - // lookup storage id - - // lookup storage hash - blob, err := ch.bl.Find(Blob{ID: id}) - if err != nil { - return false, fmt.Errorf("Storage ID for ID %s not found", id) - } - - id = blob.Storage - } - - return ch.s.Test(t, id) -} - -// BlobList returns the current BlobList. -func (ch *ContentHandler) BlobList() *BlobList { - return ch.bl -} diff --git a/map.go b/map.go new file mode 100644 index 000000000..99db34bea --- /dev/null +++ b/map.go @@ -0,0 +1,188 @@ +package restic + +import ( + "bytes" + "encoding/json" + "errors" + "sort" + "sync" + + "github.com/restic/restic/backend" +) + +type Map struct { + list []Blob + m sync.Mutex +} + +var ErrBlobNotFound = errors.New("Blob not found") + +func NewMap() *Map { + return &Map{ + list: []Blob{}, + } +} + +func (bl *Map) find(blob Blob, checkSize bool) (int, Blob, error) { + pos := sort.Search(len(bl.list), func(i int) bool { + return blob.ID.Compare(bl.list[i].ID) >= 0 + }) + + if pos < len(bl.list) { + b := bl.list[pos] + if blob.ID.Compare(b.ID) == 0 && (!checkSize || blob.Size == b.Size) { + return pos, b, nil + } + } + + return pos, Blob{}, ErrBlobNotFound +} + +func (bl *Map) Find(blob Blob) (Blob, error) { + bl.m.Lock() + defer bl.m.Unlock() + + _, blob, err := bl.find(blob, true) + return blob, err +} + +func (bl *Map) FindID(id backend.ID) (Blob, error) { + bl.m.Lock() + defer bl.m.Unlock() + + _, blob, err := bl.find(Blob{ID: id}, false) + return blob, err +} + +func (bl *Map) Merge(other *Map) { + bl.m.Lock() + defer bl.m.Unlock() + other.m.Lock() + defer other.m.Unlock() + + for _, blob := range other.list { + bl.insert(blob) + } +} + +func (bl *Map) insert(blob Blob) Blob { + pos, b, err := bl.find(blob, true) + if err == nil { + // already present + return b + } + + // insert blob + // https://code.google.com/p/go-wiki/wiki/SliceTricks + bl.list = append(bl.list, Blob{}) + copy(bl.list[pos+1:], bl.list[pos:]) + bl.list[pos] = blob + + return blob +} + +func (bl *Map) Insert(blob Blob) Blob { + bl.m.Lock() + defer bl.m.Unlock() + + debug(" Map<%p> insert %v", bl, blob) + + return bl.insert(blob) +} + +func (bl *Map) MarshalJSON() ([]byte, error) { + return json.Marshal(bl.list) +} + +func (bl *Map) UnmarshalJSON(data []byte) error { + return json.Unmarshal(data, &bl.list) +} + +func (bl *Map) IDs() []backend.ID { + bl.m.Lock() + defer bl.m.Unlock() + + ids := make([]backend.ID, 0, len(bl.list)) + for _, b := range bl.list { + ids = append(ids, b.ID) + } + + return ids +} + +func (bl *Map) StorageIDs() []backend.ID { + bl.m.Lock() + defer bl.m.Unlock() + + ids := make([]backend.ID, 0, len(bl.list)) + for _, b := range bl.list { + ids = append(ids, b.Storage) + } + + return ids +} + +func (bl *Map) Equals(other *Map) bool { + bl.m.Lock() + defer bl.m.Unlock() + + if len(bl.list) != len(other.list) { + return false + } + + for i := 0; i < len(bl.list); i++ { + if bl.list[i].Compare(other.list[i]) != 0 { + return false + } + } + + return true +} + +// Prune deletes all IDs from the map except the ones listed in ids. +func (m *Map) Prune(ids *backend.IDSet) { + m.m.Lock() + defer m.m.Unlock() + + pos := 0 + for pos < len(m.list) { + blob := m.list[pos] + if ids.Find(blob.ID) != nil { + // remove element + m.list = append(m.list[:pos], m.list[pos+1:]...) + continue + } + + pos++ + } +} + +// DeleteID removes the plaintext ID id from the map. +func (m *Map) DeleteID(id backend.ID) { + m.m.Lock() + defer m.m.Unlock() + + pos, _, err := m.find(Blob{ID: id}, false) + if err != nil { + return + } + + m.list = append(m.list[:pos], m.list[pos+1:]...) +} + +// Compare compares two blobs by comparing the ID and the size. It returns -1, +// 0, or 1. +func (blob Blob) Compare(other Blob) int { + if res := bytes.Compare(other.ID, blob.ID); res != 0 { + return res + } + + if blob.Size < other.Size { + return -1 + } + if blob.Size > other.Size { + return 1 + } + + return 0 +} diff --git a/bloblist_test.go b/map_test.go similarity index 75% rename from bloblist_test.go rename to map_test.go index 79d001584..e61c0a120 100644 --- a/bloblist_test.go +++ b/map_test.go @@ -14,7 +14,7 @@ import ( "github.com/restic/restic/backend" ) -var maxWorkers = flag.Uint("workers", 20, "number of workers to test BlobList concurrent access against") +var maxWorkers = flag.Uint("workers", 20, "number of workers to test Map concurrent access against") func randomID() []byte { buf := make([]byte, backend.IDSize) @@ -26,12 +26,17 @@ func randomID() []byte { } func newBlob() restic.Blob { - return restic.Blob{ID: randomID(), Size: uint64(mrand.Uint32())} + return restic.Blob{ + ID: randomID(), + Size: uint64(mrand.Uint32()), + Storage: randomID(), + StorageSize: uint64(mrand.Uint32()), + } } // Test basic functionality -func TestBlobList(t *testing.T) { - bl := restic.NewBlobList() +func TestMap(t *testing.T) { + bl := restic.NewMap() b := newBlob() bl.Insert(b) @@ -40,11 +45,15 @@ func TestBlobList(t *testing.T) { bl.Insert(newBlob()) } - b2, err := bl.Find(restic.Blob{ID: b.ID}) + b2, err := bl.Find(restic.Blob{ID: b.ID, Size: b.Size}) ok(t, err) assert(t, b2.Compare(b) == 0, "items are not equal: want %v, got %v", b, b2) - bl2 := restic.NewBlobList() + b2, err = bl.FindID(b.ID) + ok(t, err) + assert(t, b2.Compare(b) == 0, "items are not equal: want %v, got %v", b, b2) + + bl2 := restic.NewMap() for i := 0; i < 1000; i++ { bl.Insert(newBlob()) } @@ -66,8 +75,8 @@ func TestBlobList(t *testing.T) { } // Test JSON encode/decode -func TestBlobListJSON(t *testing.T) { - bl := restic.NewBlobList() +func TestMapJSON(t *testing.T) { + bl := restic.NewMap() b := restic.Blob{ID: randomID()} bl.Insert(b) @@ -78,7 +87,7 @@ func TestBlobListJSON(t *testing.T) { buf, err := json.Marshal(bl) ok(t, err) - bl2 := restic.BlobList{} + bl2 := restic.Map{} json.Unmarshal(buf, &bl2) b2, err = bl2.Find(b) @@ -90,10 +99,10 @@ func TestBlobListJSON(t *testing.T) { } // random insert/find access by several goroutines -func TestBlobListRandom(t *testing.T) { +func TestMapRandom(t *testing.T) { var wg sync.WaitGroup - worker := func(bl *restic.BlobList) { + worker := func(bl *restic.Map) { defer wg.Done() b := newBlob() @@ -117,7 +126,7 @@ func TestBlobListRandom(t *testing.T) { } } - bl2 := restic.NewBlobList() + bl2 := restic.NewMap() for i := 0; i < 200; i++ { bl2.Insert(newBlob()) } @@ -125,7 +134,7 @@ func TestBlobListRandom(t *testing.T) { bl2.Merge(bl) } - bl := restic.NewBlobList() + bl := restic.NewMap() for i := 0; uint(i) < *maxWorkers; i++ { wg.Add(1) diff --git a/restorer.go b/restorer.go index c8d4b2d5b..88fdafa83 100644 --- a/restorer.go +++ b/restorer.go @@ -13,7 +13,6 @@ import ( type Restorer struct { s Server - ch *ContentHandler sn *Snapshot Error func(dir string, node *Node, err error) error @@ -25,9 +24,8 @@ func NewRestorer(s Server, snid backend.ID) (*Restorer, error) { r := &Restorer{s: s} var err error - r.ch = NewContentHandler(s) - r.sn, err = r.ch.LoadSnapshot(snid) + r.sn, err = LoadSnapshot(s, snid) if err != nil { return nil, arrar.Annotate(err, "load snapshot for restorer") } @@ -38,19 +36,18 @@ func NewRestorer(s Server, snid backend.ID) (*Restorer, error) { return r, nil } -func (res *Restorer) to(dst string, dir string, tree_id backend.ID) error { - tree := Tree{} - err := res.ch.LoadJSON(backend.Tree, tree_id, &tree) +func (res *Restorer) to(dst string, dir string, treeBlob Blob) error { + tree, err := LoadTree(res.s, treeBlob) if err != nil { - return res.Error(dir, nil, arrar.Annotate(err, "LoadJSON")) + return res.Error(dir, nil, arrar.Annotate(err, "LoadTree")) } - for _, node := range tree { + for _, node := range tree.Nodes { dstpath := filepath.Join(dst, dir, node.Name) if res.Filter == nil || res.Filter(filepath.Join(res.sn.Dir, dir, node.Name), dstpath, node) { - err := node.CreateAt(res.ch, dstpath) + err := tree.CreateNodeAt(node, res.s, dstpath) // Did it fail because of ENOENT? if arrar.Check(err, func(err error) bool { @@ -63,7 +60,7 @@ func (res *Restorer) to(dst string, dir string, tree_id backend.ID) error { // Create parent directories and retry err = os.MkdirAll(filepath.Dir(dstpath), 0700) if err == nil || err == os.ErrExist { - err = node.CreateAt(res.ch, dstpath) + err = tree.CreateNodeAt(node, res.s, dstpath) } } @@ -77,11 +74,20 @@ func (res *Restorer) to(dst string, dir string, tree_id backend.ID) error { if node.Type == "dir" { if node.Subtree == nil { - return errors.New(fmt.Sprintf("Dir without subtree in tree %s", tree_id)) + return errors.New(fmt.Sprintf("Dir without subtree in tree %s", treeBlob)) } subp := filepath.Join(dir, node.Name) - err = res.to(dst, subp, node.Subtree) + + subtreeBlob, err := tree.Map.FindID(node.Subtree) + if err != nil { + err = res.Error(subp, node, arrar.Annotate(err, "lookup subtree")) + if err != nil { + return err + } + } + + err = res.to(dst, subp, subtreeBlob) if err != nil { err = res.Error(subp, node, arrar.Annotate(err, "restore subtree")) if err != nil { diff --git a/scanner.go b/scanner.go index 9c90465d1..fc2daba46 100644 --- a/scanner.go +++ b/scanner.go @@ -45,7 +45,7 @@ func scan(filterFn FilterFunc, progress *Progress, dir string) (*Tree, error) { } // build new tree - tree := Tree{} + tree := NewTree() for _, entry := range entries { path := filepath.Join(dir, entry.Name()) @@ -70,7 +70,7 @@ func scan(filterFn FilterFunc, progress *Progress, dir string) (*Tree, error) { } } - for _, node := range tree { + for _, node := range tree.Nodes { if node.Type == "file" && node.Content != nil { continue } @@ -83,7 +83,7 @@ func scan(filterFn FilterFunc, progress *Progress, dir string) (*Tree, error) { } } - return &tree, nil + return tree, nil } func (sc *Scanner) Scan(path string) (*Tree, error) { @@ -100,12 +100,12 @@ func (sc *Scanner) Scan(path string) (*Tree, error) { return nil, arrar.Annotate(err, "NodeFromFileInfo()") } + tree := NewTree() + tree.Insert(node) if node.Type != "dir" { - t := &Tree{node} - sc.p.Report(Stat{Files: 1, Bytes: node.Size}) - return t, nil + return tree, nil } sc.p.Report(Stat{Dirs: 1}) @@ -115,5 +115,5 @@ func (sc *Scanner) Scan(path string) (*Tree, error) { return nil, arrar.Annotate(err, "loadTree()") } - return &Tree{node}, nil + return tree, nil } diff --git a/server.go b/server.go index 563971ffb..5c1f5f64a 100644 --- a/server.go +++ b/server.go @@ -1,7 +1,9 @@ package restic import ( + "encoding/json" "errors" + "fmt" "github.com/restic/restic/backend" ) @@ -50,6 +52,132 @@ func (s Server) PrefixLength(t backend.Type) (int, error) { return backend.PrefixLength(s.be, t) } +// Load tries to load and decrypt content identified by t and blob from the backend. +func (s Server) Load(t backend.Type, blob Blob) ([]byte, error) { + // load data + buf, err := s.Get(t, blob.Storage) + if err != nil { + return nil, err + } + + // check length + if len(buf) != int(blob.StorageSize) { + return nil, errors.New("Invalid storage length") + } + + // decrypt + buf, err = s.Decrypt(buf) + if err != nil { + return nil, err + } + + // check length + if len(buf) != int(blob.Size) { + return nil, errors.New("Invalid length") + } + + // check SHA256 sum + id := backend.Hash(buf) + if !blob.ID.Equal(id) { + return nil, fmt.Errorf("load %v: expected plaintext hash %v, got %v", blob.Storage, blob.ID, id) + } + + return buf, nil +} + +// Load tries to load and decrypt content identified by t and id from the backend. +func (s Server) LoadID(t backend.Type, storageID backend.ID) ([]byte, error) { + // load data + buf, err := s.Get(t, storageID) + if err != nil { + return nil, err + } + + // decrypt + buf, err = s.Decrypt(buf) + if err != nil { + return nil, err + } + + return buf, nil +} + +// LoadJSON calls Load() to get content from the backend and afterwards calls +// json.Unmarshal on the item. +func (s Server) LoadJSON(t backend.Type, blob Blob, item interface{}) error { + // load from backend + buf, err := s.Load(t, blob) + if err != nil { + return err + } + + // inflate and unmarshal + err = json.Unmarshal(backend.Uncompress(buf), item) + return err +} + +// LoadJSONID calls Load() to get content from the backend and afterwards calls +// json.Unmarshal on the item. +func (s Server) LoadJSONID(t backend.Type, storageID backend.ID, item interface{}) error { + // load from backend + buf, err := s.LoadID(t, storageID) + if err != nil { + return err + } + + // inflate and unmarshal + err = json.Unmarshal(backend.Uncompress(buf), item) + return err +} + +// Save encrypts data and stores it to the backend as type t. +func (s Server) Save(t backend.Type, data []byte, id backend.ID) (Blob, error) { + if id == nil { + // compute plaintext hash + id = backend.Hash(data) + } + + // create a new blob + blob := Blob{ + ID: id, + Size: uint64(len(data)), + } + + ciphertext := GetChunkBuf("ch.Save()") + defer FreeChunkBuf("ch.Save()", ciphertext) + + // encrypt blob + n, err := s.Encrypt(ciphertext, data) + if err != nil { + return Blob{}, err + } + + ciphertext = ciphertext[:n] + + // save blob + sid, err := s.Create(t, ciphertext) + if err != nil { + return Blob{}, err + } + + blob.Storage = sid + blob.StorageSize = uint64(len(ciphertext)) + + return blob, nil +} + +// SaveJSON serialises item as JSON and uses Save() to store it to the backend as type t. +func (s Server) SaveJSON(t backend.Type, item interface{}) (Blob, error) { + // convert to json + data, err := json.Marshal(item) + if err != nil { + return Blob{}, err + } + + // compress and save data + return s.Save(t, backend.Compress(data), nil) +} + // Returns the backend used for this server. func (s Server) Backend() backend.Backend { return s.be diff --git a/snapshot.go b/snapshot.go index 9548d616e..838303ff8 100644 --- a/snapshot.go +++ b/snapshot.go @@ -14,8 +14,7 @@ import ( type Snapshot struct { Time time.Time `json:"time"` Parent backend.ID `json:"parent,omitempty"` - Tree backend.ID `json:"tree"` - Map backend.ID `json:"map"` + Tree Blob `json:"tree"` Dir string `json:"dir"` Hostname string `json:"hostname,omitempty"` Username string `json:"username,omitempty"` @@ -23,7 +22,6 @@ type Snapshot struct { GID uint32 `json:"gid,omitempty"` id backend.ID // plaintext ID, used during restore - bl *BlobList } func NewSnapshot(dir string) (*Snapshot, error) { @@ -61,9 +59,9 @@ func NewSnapshot(dir string) (*Snapshot, error) { return sn, nil } -func LoadSnapshot(ch *ContentHandler, id backend.ID) (*Snapshot, error) { +func LoadSnapshot(s Server, id backend.ID) (*Snapshot, error) { sn := &Snapshot{id: id} - err := ch.LoadJSON(backend.Snapshot, id, sn) + err := s.LoadJSONID(backend.Snapshot, id, sn) if err != nil { return nil, err } diff --git a/snapshot_test.go b/snapshot_test.go index 39213d20f..887c1a209 100644 --- a/snapshot_test.go +++ b/snapshot_test.go @@ -5,15 +5,14 @@ import ( "time" "github.com/restic/restic" - "github.com/restic/restic/backend" ) func testSnapshot(t *testing.T, s restic.Server) { var err error sn, err := restic.NewSnapshot("/home/foobar") ok(t, err) - sn.Tree, err = backend.ParseID("c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2") - ok(t, err) + // sn.Tree, err = restic.Blob{ID: backend.ParseID("c3ab8ff13720e8ad9047dd39466b3c8974e592c2fa383d4a3960714caef0c4f2")} + // ok(t, err) sn.Time, err = time.Parse(time.RFC3339Nano, "2014-08-03T17:49:05.378595539+02:00") ok(t, err) diff --git a/tree.go b/tree.go index 7f1ac4032..fa3117897 100644 --- a/tree.go +++ b/tree.go @@ -7,10 +7,8 @@ import ( "os" "os/user" "path/filepath" - "reflect" "sort" "strconv" - "strings" "syscall" "time" @@ -18,7 +16,10 @@ import ( "github.com/restic/restic/backend" ) -type Tree []*Node +type Tree struct { + Nodes []*Node `json:"nodes"` + Map *Map `json:"map"` +} type Node struct { Name string `json:"name"` @@ -54,6 +55,7 @@ var ( type Blob struct { ID backend.ID `json:"id,omitempty"` + Offset uint64 `json:"offset,omitempty"` Size uint64 `json:"size,omitempty"` Storage backend.ID `json:"sid,omitempty"` // encrypted ID StorageSize uint64 `json:"ssize,omitempty"` // encrypted Size @@ -74,21 +76,20 @@ func (n Node) String() string { return fmt.Sprintf("", n.Type, n.Name) } -func (t Tree) String() string { - s := []string{} - for _, n := range t { - s = append(s, n.String()) +func NewTree() *Tree { + return &Tree{ + Nodes: []*Node{}, + Map: NewMap(), } - return strings.Join(s, "\n") } -func LoadTree(ch *ContentHandler, id backend.ID) (Tree, error) { - if id == nil { - return nil, nil - } +func (t Tree) String() string { + return fmt.Sprintf("Tree<%d nodes, %d blobs>", len(t.Nodes), len(t.Map.list)) +} - tree := Tree{} - err := ch.LoadJSON(backend.Tree, id, &tree) +func LoadTree(s Server, blob Blob) (*Tree, error) { + tree := &Tree{} + err := s.LoadJSON(backend.Tree, blob, tree) if err != nil { return nil, err } @@ -96,23 +97,28 @@ func LoadTree(ch *ContentHandler, id backend.ID) (Tree, error) { return tree, nil } -// LoadTreeRecursive loads the tree and all subtrees via ch. -func LoadTreeRecursive(path string, ch *ContentHandler, id backend.ID) (Tree, error) { +// LoadTreeRecursive loads the tree and all subtrees via s. +func LoadTreeRecursive(path string, s Server, blob Blob) (*Tree, error) { // TODO: load subtrees in parallel - tree, err := LoadTree(ch, id) + tree, err := LoadTree(s, blob) if err != nil { return nil, err } - for _, n := range tree { + for _, n := range tree.Nodes { n.path = filepath.Join(path, n.Name) if n.Type == "dir" && n.Subtree != nil { - t, err := LoadTreeRecursive(n.path, ch, n.Subtree) + subtreeBlob, err := tree.Map.FindID(n.Subtree) if err != nil { return nil, err } - n.tree = &t + t, err := LoadTreeRecursive(n.path, s, subtreeBlob) + if err != nil { + return nil, err + } + + n.tree = t } } @@ -120,8 +126,9 @@ func LoadTreeRecursive(path string, ch *ContentHandler, id backend.ID) (Tree, er } // CopyFrom recursively copies all content from other to t. -func (t Tree) CopyFrom(bl *BlobList, other Tree, otherBl *BlobList) error { - for _, node := range t { +func (t Tree) CopyFrom(other *Tree, s *Server) error { + debug("CopyFrom(%v)\n", other) + for _, node := range t.Nodes { // only process files and dirs if node.Type != "file" && node.Type != "dir" { continue @@ -132,44 +139,67 @@ func (t Tree) CopyFrom(bl *BlobList, other Tree, otherBl *BlobList) error { // if the node could not be found or the type has changed, proceed to the next if err == ErrNodeNotFound || node.Type != oldNode.Type { + debug(" node %v is new\n", node) continue } if node.Type == "file" { // compare content if node.SameContent(oldNode) { + debug(" file node %v has same content\n", node) + + // check if all content is still available in the repository + for _, id := range oldNode.Content { + blob, err := other.Map.FindID(id) + if err != nil { + continue + } + + if ok, err := s.Test(backend.Data, blob.Storage); !ok || err != nil { + continue + } + } + // copy Content node.Content = oldNode.Content // copy storage IDs for _, id := range node.Content { - blob, err := otherBl.Find(Blob{ID: id}) + blob, err := other.Map.FindID(id) if err != nil { return err } - bl.Insert(blob) + debug(" insert blob %v\n", blob) + t.Map.Insert(blob) } } } else if node.Type == "dir" { // fill in all subtrees from old subtree - err := node.tree.CopyFrom(bl, *oldNode.tree, otherBl) + err := node.tree.CopyFrom(oldNode.tree, s) if err != nil { return err } // check if tree has changed if node.tree.Equals(*oldNode.tree) { + debug(" tree node %v has same content\n", node) + // if nothing has changed, copy subtree ID node.Subtree = oldNode.Subtree // and store blob in bloblist - blob, err := otherBl.Find(Blob{ID: oldNode.Subtree}) + blob, err := other.Map.FindID(oldNode.Subtree) if err != nil { return err } - bl.Insert(blob) + debug(" insert blob %v\n", blob) + t.Map.Insert(blob) + } else { + debug(" trees are not equal: %v\n", node) + debug(" %#v\n", node.tree) + debug(" %#v\n", oldNode.tree) } } } @@ -177,13 +207,28 @@ func (t Tree) CopyFrom(bl *BlobList, other Tree, otherBl *BlobList) error { return nil } -// Equals returns true if t and other have exactly the same nodes. +// Equals returns true if t and other have exactly the same nodes and map. func (t Tree) Equals(other Tree) bool { - if len(t) != len(other) { + if len(t.Nodes) != len(other.Nodes) { + debug("tree.Equals(): trees have different number of nodes") return false } - return reflect.DeepEqual(t, other) + if !t.Map.Equals(other.Map) { + debug("tree.Equals(): maps aren't equal") + return false + } + + for i := 0; i < len(t.Nodes); i++ { + if !t.Nodes[i].Equals(*other.Nodes[i]) { + debug("tree.Equals(): node %d is different:", i) + debug(" %#v", t.Nodes[i]) + debug(" %#v", other.Nodes[i]) + return false + } + } + + return true } func (t *Tree) Insert(node *Node) error { @@ -195,20 +240,20 @@ func (t *Tree) Insert(node *Node) error { // insert blob // https://code.google.com/p/go-wiki/wiki/bliceTricks - *t = append(*t, &Node{}) - copy((*t)[pos+1:], (*t)[pos:]) - (*t)[pos] = node + t.Nodes = append(t.Nodes, &Node{}) + copy(t.Nodes[pos+1:], t.Nodes[pos:]) + t.Nodes[pos] = node return nil } func (t Tree) find(name string) (int, *Node, error) { - pos := sort.Search(len(t), func(i int) bool { - return t[i].Name >= name + pos := sort.Search(len(t.Nodes), func(i int) bool { + return t.Nodes[i].Name >= name }) - if pos < len(t) && t[pos].Name == name { - return pos, t[pos], nil + if pos < len(t.Nodes) && t.Nodes[pos].Name == name { + return pos, t.Nodes[pos], nil } return pos, nil, ErrNodeNotFound @@ -221,7 +266,7 @@ func (t Tree) Find(name string) (*Node, error) { func (t Tree) Stat() Stat { s := Stat{} - for _, n := range t { + for _, n := range t.Nodes { switch n.Type { case "file": s.Files++ @@ -239,7 +284,7 @@ func (t Tree) Stat() Stat { func (t Tree) StatTodo() Stat { s := Stat{} - for _, n := range t { + for _, n := range t.Nodes { switch n.Type { case "file": if n.Content == nil { @@ -337,7 +382,7 @@ func NodeFromFileInfo(path string, fi os.FileInfo) (*Node, error) { return node, err } -func (node *Node) CreateAt(ch *ContentHandler, path string) error { +func (t Tree) CreateNodeAt(node *Node, s Server, path string) error { switch node.Type { case "dir": err := os.Mkdir(path, node.Mode) @@ -367,7 +412,12 @@ func (node *Node) CreateAt(ch *ContentHandler, path string) error { } for _, blobid := range node.Content { - buf, err := ch.Load(backend.Data, blobid) + blob, err := t.Map.FindID(blobid) + if err != nil { + return arrar.Annotate(err, "Find Blob") + } + + buf, err := s.Load(backend.Data, blob) if err != nil { return arrar.Annotate(err, "Load") } @@ -504,6 +554,80 @@ func (node *Node) UnmarshalJSON(data []byte) error { return err } +func (node Node) Equals(other Node) bool { + // TODO: add generatored code for this + if node.Name != other.Name { + return false + } + if node.Type != other.Type { + return false + } + if node.Mode != other.Mode { + return false + } + if node.ModTime != other.ModTime { + return false + } + if node.AccessTime != other.AccessTime { + return false + } + if node.ChangeTime != other.ChangeTime { + return false + } + if node.UID != other.UID { + return false + } + if node.GID != other.GID { + return false + } + if node.User != other.User { + return false + } + if node.Group != other.Group { + return false + } + if node.Inode != other.Inode { + return false + } + if node.Size != other.Size { + return false + } + if node.Links != other.Links { + return false + } + if node.LinkTarget != other.LinkTarget { + return false + } + if node.Device != other.Device { + return false + } + if node.Content != nil && other.Content == nil { + return false + } else if node.Content == nil && other.Content != nil { + return false + } else if node.Content != nil && other.Content != nil { + if len(node.Content) != len(other.Content) { + return false + } + + for i := 0; i < len(node.Content); i++ { + if !node.Content[i].Equal(other.Content[i]) { + return false + } + } + } + + if !node.Subtree.Equal(other.Subtree) { + return false + } + + if node.Error != other.Error { + return false + } + + return true +} + func (b Blob) Free() { if b.ID != nil { b.ID.Free() @@ -513,3 +637,17 @@ func (b Blob) Free() { b.Storage.Free() } } + +func (b Blob) Valid() bool { + if b.ID == nil || b.Storage == nil || b.StorageSize == 0 { + return false + } + + return true +} + +func (b Blob) String() string { + return fmt.Sprintf("Blob<%s -> %s>", + b.ID.Str(), + b.Storage.Str()) +} diff --git a/tree_test.go b/tree_test.go index c0b8f7c7b..4f293a088 100644 --- a/tree_test.go +++ b/tree_test.go @@ -76,3 +76,17 @@ func TestNodeMarshal(t *testing.T) { } } } + +func TestNodeComparison(t *testing.T) { + fi, err := os.Lstat("tree_test.go") + ok(t, err) + + node, err := restic.NodeFromFileInfo("foo", fi) + ok(t, err) + + n2 := *node + assert(t, node.Equals(n2), "nodes aren't equal") + + n2.Size -= 1 + assert(t, !node.Equals(n2), "nodes are equal") +}