Merge pull request #294 from restic/rework-id

Refactor IDs and IDSet
2024-06-27 07:55:08 +02:00 · 2015-09-08 21:26:07 +02:00 · 2015-09-08 21:26:07 +02:00 · 108d28316a
commit 108d28316a
parent 76b1f017c0 5c46dc41de
9 changed files with 247 additions and 85 deletions
--- a/backend/id.go
+++ b/backend/id.go
@ -102,33 +102,3 @@ func (id *ID) UnmarshalJSON(b []byte) error {
 func IDFromData(d []byte) ID {
 	return hashData(d)
 }
-
-type IDs []ID
-
-func (ids IDs) Len() int {
-	return len(ids)
-}
-
-func (ids IDs) Less(i, j int) bool {
-	if len(ids[i]) < len(ids[j]) {
-		return true
-	}
-
-	for k, b := range ids[i] {
-		if b == ids[j][k] {
-			continue
-		}
-
-		if b < ids[j][k] {
-			return true
-		} else {
-			return false
-		}
-	}
-
-	return false
-}
-
-func (ids IDs) Swap(i, j int) {
-	ids[i], ids[j] = ids[j], ids[i]
-}
--- a/backend/ids.go
+++ b/backend/ids.go
@ -0,0 +1,69 @@
+package backend
+
+import (
+	"encoding/hex"
+	"fmt"
+)
+
+// IDs is an ordered list of IDs that implements sort.Interface.
+type IDs []ID
+
+func (ids IDs) Len() int {
+	return len(ids)
+}
+
+func (ids IDs) Less(i, j int) bool {
+	if len(ids[i]) < len(ids[j]) {
+		return true
+	}
+
+	for k, b := range ids[i] {
+		if b == ids[j][k] {
+			continue
+		}
+
+		if b < ids[j][k] {
+			return true
+		}
+
+		return false
+	}
+
+	return false
+}
+
+func (ids IDs) Swap(i, j int) {
+	ids[i], ids[j] = ids[j], ids[i]
+}
+
+// Uniq returns list without duplicate IDs. The returned list retains the order
+// of the original list so that the order of the first occurrence of each ID
+// stays the same.
+func (ids IDs) Uniq() (list IDs) {
+	seen := NewIDSet()
+
+	for _, id := range ids {
+		if seen.Has(id) {
+			continue
+		}
+
+		list = append(list, id)
+		seen.Insert(id)
+	}
+
+	return list
+}
+
+type shortID ID
+
+func (id shortID) String() string {
+	return hex.EncodeToString(id[:shortStr])
+}
+
+func (ids IDs) String() string {
+	elements := make([]shortID, 0, len(ids))
+	for _, id := range ids {
+		elements = append(elements, shortID(id))
+	}
+	return fmt.Sprintf("%v", elements)
+}
--- a/backend/ids_test.go
+++ b/backend/ids_test.go
@ -0,0 +1,57 @@
+package backend_test
+
+import (
+	"reflect"
+	"testing"
+
+	"github.com/restic/restic/backend"
+)
+
+var uniqTests = []struct {
+	before, after backend.IDs
+}{
+	{
+		backend.IDs{
+			str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
+			str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"),
+			str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
+		},
+		backend.IDs{
+			str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
+			str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"),
+		},
+	},
+	{
+		backend.IDs{
+			str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"),
+			str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
+			str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
+		},
+		backend.IDs{
+			str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"),
+			str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
+		},
+	},
+	{
+		backend.IDs{
+			str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"),
+			str2id("f658198b405d7e80db5ace1980d125c8da62f636b586c46bf81dfb856a49d0c8"),
+			str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
+			str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
+		},
+		backend.IDs{
+			str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"),
+			str2id("f658198b405d7e80db5ace1980d125c8da62f636b586c46bf81dfb856a49d0c8"),
+			str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"),
+		},
+	},
+}
+
+func TestUniqIDs(t *testing.T) {
+	for i, test := range uniqTests {
+		uniq := test.before.Uniq()
+		if !reflect.DeepEqual(uniq, test.after) {
+			t.Errorf("uniqIDs() test %v failed\n  wanted: %v\n  got: %v", i, test.after, uniq)
+		}
+	}
+}
--- a/backend/idset.go
+++ b/backend/idset.go
@ -0,0 +1,49 @@
+package backend
+
+// IDSet is a set of IDs.
+type IDSet map[ID]struct{}
+
+// NewIDSet returns a new IDSet, populated with ids.
+func NewIDSet(ids ...ID) IDSet {
+	m := make(IDSet)
+	for _, id := range ids {
+		m[id] = struct{}{}
+	}
+
+	return m
+}
+
+// Has returns true iff id is contained in the set.
+func (s IDSet) Has(id ID) bool {
+	_, ok := s[id]
+	return ok
+}
+
+// Insert adds id to the set.
+func (s IDSet) Insert(id ID) {
+	s[id] = struct{}{}
+}
+
+// Delete removes id from the set.
+func (s IDSet) Delete(id ID) {
+	delete(s, id)
+}
+
+// List returns a slice of all IDs in the set.
+func (s IDSet) List() IDs {
+	list := make(IDs, 0, len(s))
+	for id := range s {
+		list = append(list, id)
+	}
+
+	return list
+}
+
+func (s IDSet) String() string {
+	str := s.List().String()
+	if len(str) < 2 {
+		return "{}"
+	}
+
+	return "{" + str[1:len(str)-2] + "}"
+}
--- a/backend/idset_test.go
+++ b/backend/idset_test.go
@ -0,0 +1,34 @@
+package backend_test
+
+import (
+	"testing"
+
+	"github.com/restic/restic/backend"
+)
+
+var idsetTests = []struct {
+	id   backend.ID
+	seen bool
+}{
+	{str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), false},
+	{str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), false},
+	{str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), true},
+	{str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), true},
+	{str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), true},
+	{str2id("f658198b405d7e80db5ace1980d125c8da62f636b586c46bf81dfb856a49d0c8"), false},
+	{str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), true},
+	{str2id("1285b30394f3b74693cc29a758d9624996ae643157776fce8154aabd2f01515f"), true},
+	{str2id("f658198b405d7e80db5ace1980d125c8da62f636b586c46bf81dfb856a49d0c8"), true},
+	{str2id("7bb086db0d06285d831485da8031281e28336a56baa313539eaea1c73a2a1a40"), true},
+}
+
+func TestIDSet(t *testing.T) {
+	set := backend.NewIDSet()
+	for i, test := range idsetTests {
+		seen := set.Has(test.id)
+		if seen != test.seen {
+			t.Errorf("IDSet test %v failed: wanted %v, got %v", i, test.seen, seen)
+		}
+		set.Insert(test.id)
+	}
+}
--- a/checker/checker.go
+++ b/checker/checker.go
@ -489,21 +489,6 @@ func (c *Checker) Structure(errChan chan<- error, done <-chan struct{}) {
 func (c *Checker) checkTree(id backend.ID, tree *restic.Tree) (errs []error) {
 	debug.Log("Checker.checkTree", "checking tree %v", id.Str())

-	// if _, ok := c.blobs[id2map(id)]; !ok {
-	// 	errs = append(errs, Error{TreeID: id, Err: errors.New("not found in index")})
-	// }
-
-	// blobs, subtrees, treeErrors := c.tree(id)
-	// if treeErrors != nil {
-	// 	debug.Log("Checker.trees", "error checking tree %v: %v", id.Str(), treeErrors)
-	// 	errs = append(errs, treeErrors...)
-	// 	continue
-	// }
-
-	// treeIDs = append(treeIDs, subtrees...)
-
-	// treesChecked[id2map(id)] = struct{}{}
-
 	var blobs []backend.ID

 	for i, node := range tree.Nodes {
--- a/pack/pack.go
+++ b/pack/pack.go
@ -57,7 +57,7 @@ func (t *BlobType) UnmarshalJSON(buf []byte) error {
 // Blob is a blob within a pack.
 type Blob struct {
 	Type   BlobType
-	Length uint32
+	Length uint
 	ID     backend.ID
 	Offset uint
 }
@ -100,7 +100,7 @@ func (p *Packer) Add(t BlobType, id backend.ID, rd io.Reader) (int64, error) {
 	c := Blob{Type: t, ID: id}

 	n, err := io.Copy(p.hw, rd)
-	c.Length = uint32(n)
+	c.Length = uint(n)
 	c.Offset = p.bytes
 	p.bytes += uint(n)
 	p.blobs = append(p.blobs, c)
@ -164,7 +164,7 @@ func (p *Packer) writeHeader(wr io.Writer) (bytesWritten uint, err error) {
 	for _, b := range p.blobs {
 		entry := headerEntry{
 			Type:   b.Type,
-			Length: b.Length,
+			Length: uint32(b.Length),
 			ID:     b.ID,
 		}

@ -276,7 +276,7 @@ func NewUnpacker(k *crypto.Key, entries []Blob, rd io.ReadSeeker) (*Unpacker, er

 			entries = append(entries, Blob{
 				Type:   e.Type,
-				Length: e.Length,
+				Length: uint(e.Length),
 				ID:     e.ID,
 				Offset: pos,
 			})
--- a/repository/index.go
+++ b/repository/index.go
@ -15,7 +15,7 @@ import (
 // Index holds a lookup table for id -> pack.
 type Index struct {
 	m    sync.Mutex
-	pack map[string]indexEntry
+	pack map[backend.ID]indexEntry
 }

 type indexEntry struct {
@ -29,12 +29,12 @@ type indexEntry struct {
 // NewIndex returns a new index.
 func NewIndex() *Index {
 	return &Index{
-		pack: make(map[string]indexEntry),
+		pack: make(map[backend.ID]indexEntry),
 	}
 }

 func (idx *Index) store(t pack.BlobType, id backend.ID, pack *backend.ID, offset, length uint, old bool) {
-	idx.pack[id.String()] = indexEntry{
+	idx.pack[id] = indexEntry{
 		tpe:    t,
 		packID: pack,
 		offset: offset,
@ -61,9 +61,8 @@ func (idx *Index) Remove(packID backend.ID) {

 	debug.Log("Index.Remove", "id %v removed", packID.Str())

-	s := packID.String()
-	if _, ok := idx.pack[s]; ok {
-		delete(idx.pack, s)
+	if _, ok := idx.pack[packID]; ok {
+		delete(idx.pack, packID)
 	}
 }

@ -72,7 +71,7 @@ func (idx *Index) Lookup(id backend.ID) (packID *backend.ID, tpe pack.BlobType,
 	idx.m.Lock()
 	defer idx.m.Unlock()

-	if p, ok := idx.pack[id.String()]; ok {
+	if p, ok := idx.pack[id]; ok {
 		debug.Log("Index.Lookup", "id %v found in pack %v at %d, length %d",
 			id.Str(), p.packID.Str(), p.offset, p.length)
 		return p.packID, p.tpe, p.offset, p.length, nil
@ -110,7 +109,7 @@ func (idx *Index) Merge(other *Index) {

 	for k, v := range other.pack {
 		if _, ok := idx.pack[k]; ok {
-			debug.Log("Index.Merge", "index already has key %v, updating", k[:8])
+			debug.Log("Index.Merge", "index already has key %v, updating", k.Str())
 		}

 		idx.pack[k] = v
@ -138,13 +137,7 @@ func (idx *Index) Each(done chan struct{}) <-chan PackedBlob {
 			close(ch)
 		}()

-		for ids, blob := range idx.pack {
-			id, err := backend.ParseID(ids)
-			if err != nil {
-				// ignore invalid IDs
-				continue
-			}
-
+		for id, blob := range idx.pack {
 			select {
 			case <-done:
 				return
@ -153,7 +146,7 @@ func (idx *Index) Each(done chan struct{}) <-chan PackedBlob {
 					ID:     id,
 					Offset: blob.offset,
 					Type:   blob.tpe,
-					Length: uint32(blob.length),
+					Length: blob.length,
 				},
 				PackID: *blob.packID,
 			}:
@ -173,7 +166,7 @@ func (idx *Index) Count(t pack.BlobType) (n uint) {
 	for id, blob := range idx.pack {
 		if blob.tpe == t {
 			n++
-			debug.Log("Index.Count", "  blob %v counted: %v", id[:8], blob)
+			debug.Log("Index.Count", "  blob %v counted: %v", id.Str(), blob)
 		}
 	}

@ -181,12 +174,12 @@ func (idx *Index) Count(t pack.BlobType) (n uint) {
 }

 type packJSON struct {
-	ID    string     `json:"id"`
+	ID    backend.ID `json:"id"`
 	Blobs []blobJSON `json:"blobs"`
 }

 type blobJSON struct {
-	ID     string        `json:"id"`
+	ID     backend.ID    `json:"id"`
 	Type   pack.BlobType `json:"type"`
 	Offset uint          `json:"offset"`
 	Length uint          `json:"length"`
@ -197,7 +190,7 @@ type blobJSON struct {
 // blobs in the index.
 func (idx *Index) generatePackList(selectFn func(indexEntry) bool) ([]*packJSON, error) {
 	list := []*packJSON{}
-	packs := make(map[string]*packJSON)
+	packs := make(map[backend.ID]*packJSON)

 	for id, blob := range idx.pack {
 		if selectFn != nil && !selectFn(blob) {
@ -208,15 +201,15 @@ func (idx *Index) generatePackList(selectFn func(indexEntry) bool) ([]*packJSON,

 		if blob.packID.IsNull() {
 			debug.Log("Index.generatePackList", "blob %q has no packID! (type %v, offset %v, length %v)",
-				id[:8], blob.tpe, blob.offset, blob.length)
+				id.Str(), blob.tpe, blob.offset, blob.length)
 			return nil, fmt.Errorf("unable to serialize index: pack for blob %v hasn't been written yet", id)
 		}

 		// see if pack is already in map
-		p, ok := packs[blob.packID.String()]
+		p, ok := packs[*blob.packID]
 		if !ok {
 			// else create new pack
-			p = &packJSON{ID: blob.packID.String()}
+			p = &packJSON{ID: *blob.packID}

 			// and append it to the list and map
 			list = append(list, p)
@ -302,20 +295,8 @@ func DecodeIndex(rd io.Reader) (*Index, error) {

 	idx := NewIndex()
 	for _, pack := range list {
-		packID, err := backend.ParseID(pack.ID)
-		if err != nil {
-			debug.Log("Index.DecodeIndex", "error parsing pack ID %q: %v", pack.ID, err)
-			return nil, err
-		}
-
 		for _, blob := range pack.Blobs {
-			blobID, err := backend.ParseID(blob.ID)
-			if err != nil {
-				debug.Log("Index.DecodeIndex", "error parsing blob ID %q: %v", blob.ID, err)
-				return nil, err
-			}
-
-			idx.store(blob.Type, blobID, &packID, blob.Offset, blob.Length, true)
+			idx.store(blob.Type, blob.ID, &pack.ID, blob.Offset, blob.Length, true)
 		}
 	}

--- a/repository/parallel.go
+++ b/repository/parallel.go
@ -20,6 +20,10 @@ func closeIfOpen(ch chan struct{}) {
 // processing stops. If done is closed, the function should return.
 type ParallelWorkFunc func(id string, done <-chan struct{}) error

+// ParallelIDWorkFunc gets one backend.ID to work on. If an error is returned,
+// processing stops. If done is closed, the function should return.
+type ParallelIDWorkFunc func(id backend.ID, done <-chan struct{}) error
+
 // FilesInParallel runs n workers of f in parallel, on the IDs that
 // repo.List(t) yield. If f returns an error, the process is aborted and the
 // first error is returned.
@ -69,3 +73,16 @@ func FilesInParallel(repo backend.Lister, t backend.Type, n uint, f ParallelWork

 	return nil
 }
+
+// ParallelWorkFuncParseID converts a function that takes a backend.ID to a
+// function that takes a string.
+func ParallelWorkFuncParseID(f ParallelIDWorkFunc) ParallelWorkFunc {
+	return func(s string, done <-chan struct{}) error {
+		id, err := backend.ParseID(s)
+		if err != nil {
+			return err
+		}
+
+		return f(id, done)
+	}
+}