diff --git a/backend/id.go b/backend/id.go index 71b810322..966cd7a4e 100644 --- a/backend/id.go +++ b/backend/id.go @@ -102,33 +102,3 @@ func (id *ID) UnmarshalJSON(b []byte) error { func IDFromData(d []byte) ID { return hashData(d) } - -type IDs []ID - -func (ids IDs) Len() int { - return len(ids) -} - -func (ids IDs) Less(i, j int) bool { - if len(ids[i]) < len(ids[j]) { - return true - } - - for k, b := range ids[i] { - if b == ids[j][k] { - continue - } - - if b < ids[j][k] { - return true - } else { - return false - } - } - - return false -} - -func (ids IDs) Swap(i, j int) { - ids[i], ids[j] = ids[j], ids[i] -} diff --git a/backend/ids.go b/backend/ids.go new file mode 100644 index 000000000..11cf436d2 --- /dev/null +++ b/backend/ids.go @@ -0,0 +1,69 @@ +package backend + +import ( + "encoding/hex" + "fmt" +) + +// IDs is an ordered list of IDs that implements sort.Interface. +type IDs []ID + +func (ids IDs) Len() int { + return len(ids) +} + +func (ids IDs) Less(i, j int) bool { + if len(ids[i]) < len(ids[j]) { + return true + } + + for k, b := range ids[i] { + if b == ids[j][k] { + continue + } + + if b < ids[j][k] { + return true + } + + return false + } + + return false +} + +func (ids IDs) Swap(i, j int) { + ids[i], ids[j] = ids[j], ids[i] +} + +// Uniq returns list without duplicate IDs. The returned list retains the order +// of the original list so that the order of the first occurrence of each ID +// stays the same. +func (ids IDs) Uniq() (list IDs) { + seen := NewIDSet() + + for _, id := range ids { + if seen.Has(id) { + continue + } + + list = append(list, id) + seen.Insert(id) + } + + return list +} + +type shortID ID + +func (id shortID) String() string { + return hex.EncodeToString(id[:shortStr]) +} + +func (ids IDs) String() string { + elements := make([]shortID, 0, len(ids)) + for _, id := range ids { + elements = append(elements, shortID(id)) + } + return fmt.Sprintf("%v", elements) +} diff --git a/backend/ids_test.go b/backend/ids_test.go new file mode 100644 index 000000000..02647eba1 --- /dev/null +++ b/backend/ids_test.go @@ -0,0 +1,57 @@ +package backend_test + +import ( + "reflect" + "testing" + + "github.com/restic/restic/backend" +) + +var uniqTests = []struct { + before, after backend.IDs +}{ + { + backend.IDs{ + str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), + str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), + str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), + }, + backend.IDs{ + str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), + str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), + }, + }, + { + backend.IDs{ + str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), + str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), + str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), + }, + backend.IDs{ + str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), + str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), + }, + }, + { + backend.IDs{ + str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), + str2id("f658198b405d7e80db5ace1980d125c8da62f636b586c46bf81dfb856a49d0c8"), + str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), + str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), + }, + backend.IDs{ + str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), + str2id("f658198b405d7e80db5ace1980d125c8da62f636b586c46bf81dfb856a49d0c8"), + str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), + }, + }, +} + +func TestUniqIDs(t *testing.T) { + for i, test := range uniqTests { + uniq := test.before.Uniq() + if !reflect.DeepEqual(uniq, test.after) { + t.Errorf("uniqIDs() test %v failed\n wanted: %v\n got: %v", i, test.after, uniq) + } + } +} diff --git a/backend/idset.go b/backend/idset.go new file mode 100644 index 000000000..4f27f3489 --- /dev/null +++ b/backend/idset.go @@ -0,0 +1,49 @@ +package backend + +// IDSet is a set of IDs. +type IDSet map[ID]struct{} + +// NewIDSet returns a new IDSet, populated with ids. +func NewIDSet(ids ...ID) IDSet { + m := make(IDSet) + for _, id := range ids { + m[id] = struct{}{} + } + + return m +} + +// Has returns true iff id is contained in the set. +func (s IDSet) Has(id ID) bool { + _, ok := s[id] + return ok +} + +// Insert adds id to the set. +func (s IDSet) Insert(id ID) { + s[id] = struct{}{} +} + +// Delete removes id from the set. +func (s IDSet) Delete(id ID) { + delete(s, id) +} + +// List returns a slice of all IDs in the set. +func (s IDSet) List() IDs { + list := make(IDs, 0, len(s)) + for id := range s { + list = append(list, id) + } + + return list +} + +func (s IDSet) String() string { + str := s.List().String() + if len(str) < 2 { + return "{}" + } + + return "{" + str[1:len(str)-2] + "}" +} diff --git a/backend/idset_test.go b/backend/idset_test.go new file mode 100644 index 000000000..7084c8abf --- /dev/null +++ b/backend/idset_test.go @@ -0,0 +1,34 @@ +package backend_test + +import ( + "testing" + + "github.com/restic/restic/backend" +) + +var idsetTests = []struct { + id backend.ID + seen bool +}{ + {str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), false}, + {str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), false}, + {str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), true}, + {str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), true}, + {str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), true}, + {str2id("f658198b405d7e80db5ace1980d125c8da62f636b586c46bf81dfb856a49d0c8"), false}, + {str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), true}, + {str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), true}, + {str2id("f658198b405d7e80db5ace1980d125c8da62f636b586c46bf81dfb856a49d0c8"), true}, + {str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), true}, +} + +func TestIDSet(t *testing.T) { + set := backend.NewIDSet() + for i, test := range idsetTests { + seen := set.Has(test.id) + if seen != test.seen { + t.Errorf("IDSet test %v failed: wanted %v, got %v", i, test.seen, seen) + } + set.Insert(test.id) + } +} diff --git a/checker/checker.go b/checker/checker.go index 239b0db04..d8ac497d6 100644 --- a/checker/checker.go +++ b/checker/checker.go @@ -489,21 +489,6 @@ func (c *Checker) Structure(errChan chan<- error, done <-chan struct{}) { func (c *Checker) checkTree(id backend.ID, tree *restic.Tree) (errs []error) { debug.Log("Checker.checkTree", "checking tree %v", id.Str()) - // if _, ok := c.blobs[id2map(id)]; !ok { - // errs = append(errs, Error{TreeID: id, Err: errors.New("not found in index")}) - // } - - // blobs, subtrees, treeErrors := c.tree(id) - // if treeErrors != nil { - // debug.Log("Checker.trees", "error checking tree %v: %v", id.Str(), treeErrors) - // errs = append(errs, treeErrors...) - // continue - // } - - // treeIDs = append(treeIDs, subtrees...) - - // treesChecked[id2map(id)] = struct{}{} - var blobs []backend.ID for i, node := range tree.Nodes { diff --git a/pack/pack.go b/pack/pack.go index 481bd9ecb..727566dcf 100644 --- a/pack/pack.go +++ b/pack/pack.go @@ -57,7 +57,7 @@ func (t *BlobType) UnmarshalJSON(buf []byte) error { // Blob is a blob within a pack. type Blob struct { Type BlobType - Length uint32 + Length uint ID backend.ID Offset uint } @@ -100,7 +100,7 @@ func (p *Packer) Add(t BlobType, id backend.ID, rd io.Reader) (int64, error) { c := Blob{Type: t, ID: id} n, err := io.Copy(p.hw, rd) - c.Length = uint32(n) + c.Length = uint(n) c.Offset = p.bytes p.bytes += uint(n) p.blobs = append(p.blobs, c) @@ -164,7 +164,7 @@ func (p *Packer) writeHeader(wr io.Writer) (bytesWritten uint, err error) { for _, b := range p.blobs { entry := headerEntry{ Type: b.Type, - Length: b.Length, + Length: uint32(b.Length), ID: b.ID, } @@ -276,7 +276,7 @@ func NewUnpacker(k *crypto.Key, entries []Blob, rd io.ReadSeeker) (*Unpacker, er entries = append(entries, Blob{ Type: e.Type, - Length: e.Length, + Length: uint(e.Length), ID: e.ID, Offset: pos, }) diff --git a/repository/index.go b/repository/index.go index 117f54a3a..b53de02dd 100644 --- a/repository/index.go +++ b/repository/index.go @@ -15,7 +15,7 @@ import ( // Index holds a lookup table for id -> pack. type Index struct { m sync.Mutex - pack map[string]indexEntry + pack map[backend.ID]indexEntry } type indexEntry struct { @@ -29,12 +29,12 @@ type indexEntry struct { // NewIndex returns a new index. func NewIndex() *Index { return &Index{ - pack: make(map[string]indexEntry), + pack: make(map[backend.ID]indexEntry), } } func (idx *Index) store(t pack.BlobType, id backend.ID, pack *backend.ID, offset, length uint, old bool) { - idx.pack[id.String()] = indexEntry{ + idx.pack[id] = indexEntry{ tpe: t, packID: pack, offset: offset, @@ -61,9 +61,8 @@ func (idx *Index) Remove(packID backend.ID) { debug.Log("Index.Remove", "id %v removed", packID.Str()) - s := packID.String() - if _, ok := idx.pack[s]; ok { - delete(idx.pack, s) + if _, ok := idx.pack[packID]; ok { + delete(idx.pack, packID) } } @@ -72,7 +71,7 @@ func (idx *Index) Lookup(id backend.ID) (packID *backend.ID, tpe pack.BlobType, idx.m.Lock() defer idx.m.Unlock() - if p, ok := idx.pack[id.String()]; ok { + if p, ok := idx.pack[id]; ok { debug.Log("Index.Lookup", "id %v found in pack %v at %d, length %d", id.Str(), p.packID.Str(), p.offset, p.length) return p.packID, p.tpe, p.offset, p.length, nil @@ -110,7 +109,7 @@ func (idx *Index) Merge(other *Index) { for k, v := range other.pack { if _, ok := idx.pack[k]; ok { - debug.Log("Index.Merge", "index already has key %v, updating", k[:8]) + debug.Log("Index.Merge", "index already has key %v, updating", k.Str()) } idx.pack[k] = v @@ -138,13 +137,7 @@ func (idx *Index) Each(done chan struct{}) <-chan PackedBlob { close(ch) }() - for ids, blob := range idx.pack { - id, err := backend.ParseID(ids) - if err != nil { - // ignore invalid IDs - continue - } - + for id, blob := range idx.pack { select { case <-done: return @@ -153,7 +146,7 @@ func (idx *Index) Each(done chan struct{}) <-chan PackedBlob { ID: id, Offset: blob.offset, Type: blob.tpe, - Length: uint32(blob.length), + Length: blob.length, }, PackID: *blob.packID, }: @@ -173,7 +166,7 @@ func (idx *Index) Count(t pack.BlobType) (n uint) { for id, blob := range idx.pack { if blob.tpe == t { n++ - debug.Log("Index.Count", " blob %v counted: %v", id[:8], blob) + debug.Log("Index.Count", " blob %v counted: %v", id.Str(), blob) } } @@ -181,12 +174,12 @@ func (idx *Index) Count(t pack.BlobType) (n uint) { } type packJSON struct { - ID string `json:"id"` + ID backend.ID `json:"id"` Blobs []blobJSON `json:"blobs"` } type blobJSON struct { - ID string `json:"id"` + ID backend.ID `json:"id"` Type pack.BlobType `json:"type"` Offset uint `json:"offset"` Length uint `json:"length"` @@ -197,7 +190,7 @@ type blobJSON struct { // blobs in the index. func (idx *Index) generatePackList(selectFn func(indexEntry) bool) ([]*packJSON, error) { list := []*packJSON{} - packs := make(map[string]*packJSON) + packs := make(map[backend.ID]*packJSON) for id, blob := range idx.pack { if selectFn != nil && !selectFn(blob) { @@ -208,15 +201,15 @@ func (idx *Index) generatePackList(selectFn func(indexEntry) bool) ([]*packJSON, if blob.packID.IsNull() { debug.Log("Index.generatePackList", "blob %q has no packID! (type %v, offset %v, length %v)", - id[:8], blob.tpe, blob.offset, blob.length) + id.Str(), blob.tpe, blob.offset, blob.length) return nil, fmt.Errorf("unable to serialize index: pack for blob %v hasn't been written yet", id) } // see if pack is already in map - p, ok := packs[blob.packID.String()] + p, ok := packs[*blob.packID] if !ok { // else create new pack - p = &packJSON{ID: blob.packID.String()} + p = &packJSON{ID: *blob.packID} // and append it to the list and map list = append(list, p) @@ -302,20 +295,8 @@ func DecodeIndex(rd io.Reader) (*Index, error) { idx := NewIndex() for _, pack := range list { - packID, err := backend.ParseID(pack.ID) - if err != nil { - debug.Log("Index.DecodeIndex", "error parsing pack ID %q: %v", pack.ID, err) - return nil, err - } - for _, blob := range pack.Blobs { - blobID, err := backend.ParseID(blob.ID) - if err != nil { - debug.Log("Index.DecodeIndex", "error parsing blob ID %q: %v", blob.ID, err) - return nil, err - } - - idx.store(blob.Type, blobID, &packID, blob.Offset, blob.Length, true) + idx.store(blob.Type, blob.ID, &pack.ID, blob.Offset, blob.Length, true) } } diff --git a/repository/parallel.go b/repository/parallel.go index 44f75ccbe..19ba567c5 100644 --- a/repository/parallel.go +++ b/repository/parallel.go @@ -20,6 +20,10 @@ func closeIfOpen(ch chan struct{}) { // processing stops. If done is closed, the function should return. type ParallelWorkFunc func(id string, done <-chan struct{}) error +// ParallelIDWorkFunc gets one backend.ID to work on. If an error is returned, +// processing stops. If done is closed, the function should return. +type ParallelIDWorkFunc func(id backend.ID, done <-chan struct{}) error + // FilesInParallel runs n workers of f in parallel, on the IDs that // repo.List(t) yield. If f returns an error, the process is aborted and the // first error is returned. @@ -69,3 +73,16 @@ func FilesInParallel(repo backend.Lister, t backend.Type, n uint, f ParallelWork return nil } + +// ParallelWorkFuncParseID converts a function that takes a backend.ID to a +// function that takes a string. +func ParallelWorkFuncParseID(f ParallelIDWorkFunc) ParallelWorkFunc { + return func(s string, done <-chan struct{}) error { + id, err := backend.ParseID(s) + if err != nil { + return err + } + + return f(id, done) + } +}