From be5a0ff59f88e98282d184e5715590908c854f71 Mon Sep 17 00:00:00 2001 From: greatroar <@> Date: Tue, 10 Mar 2020 16:41:22 +0100 Subject: [PATCH 1/2] Centralize buffer allocation and size checking in Repository.LoadBlob MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Benchmark results for internal/repository: name old time/op new time/op delta LoadTree-8 479µs ± 2% 478µs ± 1% ~ (p=0.780 n=10+9) LoadBlob-8 11.6ms ± 2% 11.6ms ± 1% ~ (p=0.631 n=10+10) LoadAndDecrypt-8 13.2ms ± 2% 13.3ms ± 3% ~ (p=0.631 n=10+10) name old alloc/op new alloc/op delta LoadTree-8 41.2kB ± 0% 41.2kB ± 0% ~ (all equal) LoadBlob-8 2.28kB ± 0% 2.28kB ± 0% ~ (all equal) LoadAndDecrypt-8 2.10MB ± 0% 2.10MB ± 0% ~ (all equal) name old allocs/op new allocs/op delta LoadTree-8 652 ± 0% 652 ± 0% ~ (all equal) LoadBlob-8 24.0 ± 0% 24.0 ± 0% ~ (all equal) LoadAndDecrypt-8 30.0 ± 0% 30.0 ± 0% ~ (all equal) name old speed new speed delta LoadBlob-8 86.2MB/s ± 2% 86.4MB/s ± 1% ~ (p=0.594 n=10+10) LoadAndDecrypt-8 75.7MB/s ± 2% 75.4MB/s ± 3% ~ (p=0.617 n=10+10) --- cmd/restic/cmd_cat.go | 7 ++--- cmd/restic/cmd_dump.go | 19 +++--------- internal/archiver/testing.go | 4 +-- internal/fuse/file.go | 7 ++--- internal/fuse/fuse_test.go | 7 ++--- internal/repository/repository.go | 15 +++++---- internal/repository/repository_test.go | 43 ++++++++------------------ internal/restic/node.go | 13 +------- internal/restic/repository.go | 2 +- 9 files changed, 37 insertions(+), 80 deletions(-) diff --git a/cmd/restic/cmd_cat.go b/cmd/restic/cmd_cat.go index 1c16a2f62..8d2535585 100644 --- a/cmd/restic/cmd_cat.go +++ b/cmd/restic/cmd_cat.go @@ -170,18 +170,15 @@ func runCat(gopts GlobalOptions, args []string) error { case "blob": for _, t := range []restic.BlobType{restic.DataBlob, restic.TreeBlob} { - list, found := repo.Index().Lookup(id, t) + _, found := repo.Index().Lookup(id, t) if !found { continue } - blob := list[0] - buf := make([]byte, blob.Length) - n, err := repo.LoadBlob(gopts.ctx, t, id, buf) + buf, err := repo.LoadBlob(gopts.ctx, t, id, nil) if err != nil { return err } - buf = buf[:n] _, err = os.Stdout.Write(buf) return err diff --git a/cmd/restic/cmd_dump.go b/cmd/restic/cmd_dump.go index 5b4caae00..fb5dccf7f 100644 --- a/cmd/restic/cmd_dump.go +++ b/cmd/restic/cmd_dump.go @@ -171,24 +171,15 @@ func runDump(opts DumpOptions, gopts GlobalOptions, args []string) error { } func getNodeData(ctx context.Context, output io.Writer, repo restic.Repository, node *restic.Node) error { - var buf []byte + var ( + buf []byte + err error + ) for _, id := range node.Content { - - size, found := repo.LookupBlobSize(id, restic.DataBlob) - if !found { - return errors.Errorf("id %v not found in repository", id) - } - - buf = buf[:cap(buf)] - if len(buf) < restic.CiphertextLength(int(size)) { - buf = restic.NewBlobBuffer(int(size)) - } - - n, err := repo.LoadBlob(ctx, restic.DataBlob, id, buf) + buf, err = repo.LoadBlob(ctx, restic.DataBlob, id, buf) if err != nil { return err } - buf = buf[:n] _, err = output.Write(buf) if err != nil { diff --git a/internal/archiver/testing.go b/internal/archiver/testing.go index 790e94950..873e9ab5d 100644 --- a/internal/archiver/testing.go +++ b/internal/archiver/testing.go @@ -228,13 +228,13 @@ func TestEnsureFileContent(ctx context.Context, t testing.TB, repo restic.Reposi content := make([]byte, restic.CiphertextLength(len(file.Content))) pos := 0 for _, id := range node.Content { - n, err := repo.LoadBlob(ctx, restic.DataBlob, id, content[pos:]) + part, err := repo.LoadBlob(ctx, restic.DataBlob, id, content[pos:]) if err != nil { t.Fatalf("error loading blob %v: %v", id.Str(), err) return } - pos += n + pos += len(part) } content = content[:pos] diff --git a/internal/fuse/file.go b/internal/fuse/file.go index 5fefd5e3e..215841622 100644 --- a/internal/fuse/file.go +++ b/internal/fuse/file.go @@ -96,15 +96,14 @@ func (f *file) getBlobAt(ctx context.Context, i int) (blob []byte, err error) { f.blobs[j] = nil } - buf := restic.NewBlobBuffer(f.sizes[i]) - n, err := f.root.repo.LoadBlob(ctx, restic.DataBlob, f.node.Content[i], buf) + blob, err = f.root.repo.LoadBlob(ctx, restic.DataBlob, f.node.Content[i], nil) if err != nil { debug.Log("LoadBlob(%v, %v) failed: %v", f.node.Name, f.node.Content[i], err) return nil, err } - f.blobs[i] = buf[:n] + f.blobs[i] = blob - return buf[:n], nil + return blob, nil } func (f *file) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadResponse) error { diff --git a/internal/fuse/fuse_test.go b/internal/fuse/fuse_test.go index 5d4cbb268..1f2d9bf53 100644 --- a/internal/fuse/fuse_test.go +++ b/internal/fuse/fuse_test.go @@ -94,12 +94,11 @@ func TestFuseFile(t *testing.T) { rtest.Assert(t, found, "Expected to find blob id %v", id) filesize += uint64(size) - buf := restic.NewBlobBuffer(int(size)) - n, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf) + buf, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, nil) rtest.OK(t, err) - if uint(n) != size { - t.Fatalf("not enough bytes read for id %v: want %v, got %v", id.Str(), size, n) + if len(buf) != int(size) { + t.Fatalf("not enough bytes read for id %v: want %v, got %v", id.Str(), size, len(buf)) } if uint(len(buf)) != size { diff --git a/internal/repository/repository.go b/internal/repository/repository.go index e39e728c7..75dd78db2 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -670,29 +670,28 @@ func (r *Repository) Close() error { return r.be.Close() } -// LoadBlob loads a blob of type t from the repository to the buffer. buf must -// be large enough to hold the encrypted blob, since it is used as scratch -// space. -func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) (int, error) { +// LoadBlob loads a blob of type t from the repository. +// It may use all of buf[:cap(buf)] as scratch space. +func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error) { debug.Log("load blob %v into buf (len %v, cap %v)", id, len(buf), cap(buf)) size, found := r.idx.LookupSize(id, t) if !found { - return 0, errors.Errorf("id %v not found in repository", id) + return nil, errors.Errorf("id %v not found in repository", id) } if cap(buf) < restic.CiphertextLength(int(size)) { - return 0, errors.Errorf("buffer is too small for data blob (%d < %d)", cap(buf), restic.CiphertextLength(int(size))) + buf = restic.NewBlobBuffer(int(size)) } n, err := r.loadBlob(ctx, id, t, buf) if err != nil { - return 0, err + return nil, err } buf = buf[:n] debug.Log("loaded %d bytes into buf %p", len(buf), buf) - return len(buf), err + return buf, err } // SaveBlob saves a blob of type t into the repository. If id is the null id, it diff --git a/internal/repository/repository_test.go b/internal/repository/repository_test.go index 4ec71e726..b08448ca8 100644 --- a/internal/repository/repository_test.go +++ b/internal/repository/repository_test.go @@ -43,10 +43,9 @@ func TestSave(t *testing.T) { // rtest.OK(t, repo.SaveIndex()) // read back - buf := restic.NewBlobBuffer(size) - n, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf) + buf, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, nil) rtest.OK(t, err) - rtest.Equals(t, len(buf), n) + rtest.Equals(t, size, len(buf)) rtest.Assert(t, len(buf) == len(data), "number of bytes read back does not match: expected %d, got %d", @@ -77,10 +76,9 @@ func TestSaveFrom(t *testing.T) { rtest.OK(t, repo.Flush(context.Background())) // read back - buf := restic.NewBlobBuffer(size) - n, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf) + buf, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, nil) rtest.OK(t, err) - rtest.Equals(t, len(buf), n) + rtest.Equals(t, size, len(buf)) rtest.Assert(t, len(buf) == len(data), "number of bytes read back does not match: expected %d, got %d", @@ -164,33 +162,17 @@ func TestLoadBlob(t *testing.T) { rtest.OK(t, err) rtest.OK(t, repo.Flush(context.Background())) - // first, test with buffers that are too small - for _, testlength := range []int{length - 20, length, restic.CiphertextLength(length) - 1} { - buf = make([]byte, 0, testlength) - n, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf) - if err == nil { - t.Errorf("LoadBlob() did not return an error for a buffer that is too small to hold the blob") - continue - } - - if n != 0 { - t.Errorf("LoadBlob() returned an error and n > 0") - continue - } - } - - // then use buffers that are large enough base := restic.CiphertextLength(length) - for _, testlength := range []int{base, base + 7, base + 15, base + 1000} { + for _, testlength := range []int{0, base - 20, base - 1, base, base + 7, base + 15, base + 1000} { buf = make([]byte, 0, testlength) - n, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf) + buf, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf) if err != nil { t.Errorf("LoadBlob() returned an error for buffer size %v: %v", testlength, err) continue } - if n != length { - t.Errorf("LoadBlob() returned the wrong number of bytes: want %v, got %v", length, n) + if len(buf) != length { + t.Errorf("LoadBlob() returned the wrong number of bytes: want %v, got %v", length, len(buf)) continue } } @@ -213,13 +195,14 @@ func BenchmarkLoadBlob(b *testing.B) { b.SetBytes(int64(length)) for i := 0; i < b.N; i++ { - n, err := repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf) + var err error + buf, err = repo.LoadBlob(context.TODO(), restic.DataBlob, id, buf) rtest.OK(b, err) - if n != length { - b.Errorf("wanted %d bytes, got %d", length, n) + if len(buf) != length { + b.Errorf("wanted %d bytes, got %d", length, len(buf)) } - id2 := restic.Hash(buf[:n]) + id2 := restic.Hash(buf) if !id.Equal(id2) { b.Errorf("wrong data returned, wanted %v, got %v", id.Str(), id2.Str()) } diff --git a/internal/restic/node.go b/internal/restic/node.go index 0ba60ab82..7efb35022 100644 --- a/internal/restic/node.go +++ b/internal/restic/node.go @@ -282,21 +282,10 @@ func (node Node) createFileAt(ctx context.Context, path string, repo Repository) func (node Node) writeNodeContent(ctx context.Context, repo Repository, f *os.File) error { var buf []byte for _, id := range node.Content { - size, found := repo.LookupBlobSize(id, DataBlob) - if !found { - return errors.Errorf("id %v not found in repository", id) - } - - buf = buf[:cap(buf)] - if len(buf) < CiphertextLength(int(size)) { - buf = NewBlobBuffer(int(size)) - } - - n, err := repo.LoadBlob(ctx, DataBlob, id, buf) + buf, err := repo.LoadBlob(ctx, DataBlob, id, buf) if err != nil { return err } - buf = buf[:n] _, err = f.Write(buf) if err != nil { diff --git a/internal/restic/repository.go b/internal/restic/repository.go index 46d7379db..9713a6c77 100644 --- a/internal/restic/repository.go +++ b/internal/restic/repository.go @@ -45,7 +45,7 @@ type Repository interface { // new buffer will be allocated and returned. LoadAndDecrypt(ctx context.Context, buf []byte, t FileType, id ID) (data []byte, err error) - LoadBlob(context.Context, BlobType, ID, []byte) (int, error) + LoadBlob(context.Context, BlobType, ID, []byte) ([]byte, error) SaveBlob(context.Context, BlobType, []byte, ID) (ID, error) LoadTree(context.Context, ID) (*Tree, error) From e7d7b85d5976723fd1563660e3761cf72257fde2 Mon Sep 17 00:00:00 2001 From: greatroar <@> Date: Tue, 10 Mar 2020 17:52:14 +0100 Subject: [PATCH 2/2] Merge Repository.{LoadBlob,loadBlob} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pushing the allocation logic down into the former loadBlob body means that fewer allocations have to be performed: name old time/op new time/op delta LoadTree-8 478µs ± 1% 481µs ± 2% ~ (p=0.315 n=9+10) LoadBlob-8 11.6ms ± 1% 11.6ms ± 2% ~ (p=0.393 n=10+10) LoadAndDecrypt-8 13.3ms ± 3% 13.3ms ± 3% ~ (p=0.905 n=10+9) LoadIndex-8 33.6ms ± 2% 33.2ms ± 1% -1.15% (p=0.028 n=10+9) name old alloc/op new alloc/op delta LoadTree-8 41.2kB ± 0% 41.1kB ± 0% -0.23% (p=0.000 n=10+10) LoadBlob-8 2.28kB ± 0% 2.18kB ± 0% -4.21% (p=0.000 n=10+10) LoadAndDecrypt-8 2.10MB ± 0% 2.10MB ± 0% ~ (all equal) LoadIndex-8 5.22MB ± 0% 5.22MB ± 0% ~ (p=0.631 n=10+10) name old allocs/op new allocs/op delta LoadTree-8 652 ± 0% 651 ± 0% -0.15% (p=0.000 n=10+10) LoadBlob-8 24.0 ± 0% 23.0 ± 0% -4.17% (p=0.000 n=10+10) LoadAndDecrypt-8 30.0 ± 0% 30.0 ± 0% ~ (all equal) LoadIndex-8 30.2k ± 0% 30.2k ± 0% ~ (p=0.610 n=10+10) name old speed new speed delta LoadBlob-8 86.4MB/s ± 1% 85.9MB/s ± 2% ~ (p=0.393 n=10+10) LoadAndDecrypt-8 75.4MB/s ± 3% 75.4MB/s ± 3% ~ (p=0.858 n=10+9) --- internal/repository/repository.go | 69 ++++++++----------------------- 1 file changed, 18 insertions(+), 51 deletions(-) diff --git a/internal/repository/repository.go b/internal/repository/repository.go index 75dd78db2..08241f683 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -130,17 +130,16 @@ func (r *Repository) sortCachedPacks(blobs []restic.PackedBlob) []restic.PackedB return append(cached, noncached...) } -// loadBlob tries to load and decrypt content identified by t and id from a -// pack from the backend, the result is stored in plaintextBuf, which must be -// large enough to hold the complete blob. -func (r *Repository) loadBlob(ctx context.Context, id restic.ID, t restic.BlobType, plaintextBuf []byte) (int, error) { - debug.Log("load %v with id %v (buf len %v, cap %d)", t, id, len(plaintextBuf), cap(plaintextBuf)) +// LoadBlob loads a blob of type t from the repository. +// It may use all of buf[:cap(buf)] as scratch space. +func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error) { + debug.Log("load %v with id %v (buf len %v, cap %d)", t, id, len(buf), cap(buf)) // lookup packs blobs, found := r.idx.Lookup(id, t) if !found { debug.Log("id %v not found in index", id) - return 0, errors.Errorf("id %v not found in repository", id) + return nil, errors.Errorf("id %v not found in repository", id) } // try cached pack files first @@ -157,13 +156,14 @@ func (r *Repository) loadBlob(ctx context.Context, id restic.ID, t restic.BlobTy // load blob from pack h := restic.Handle{Type: restic.DataFile, Name: blob.PackID.String()} - if uint(cap(plaintextBuf)) < blob.Length { - return 0, errors.Errorf("buffer is too small: %v < %v", cap(plaintextBuf), blob.Length) + switch { + case cap(buf) < int(blob.Length): + buf = make([]byte, blob.Length) + case len(buf) != int(blob.Length): + buf = buf[:blob.Length] } - plaintextBuf = plaintextBuf[:blob.Length] - - n, err := restic.ReadAt(ctx, r.be, h, int64(blob.Offset), plaintextBuf) + n, err := restic.ReadAt(ctx, r.be, h, int64(blob.Offset), buf) if err != nil { debug.Log("error loading blob %v: %v", blob, err) lastError = err @@ -178,7 +178,7 @@ func (r *Repository) loadBlob(ctx context.Context, id restic.ID, t restic.BlobTy } // decrypt - nonce, ciphertext := plaintextBuf[:r.key.NonceSize()], plaintextBuf[r.key.NonceSize():] + nonce, ciphertext := buf[:r.key.NonceSize()], buf[r.key.NonceSize():] plaintext, err := r.key.Open(ciphertext[:0], nonce, ciphertext, nil) if err != nil { lastError = errors.Errorf("decrypting blob %v failed: %v", id, err) @@ -191,16 +191,16 @@ func (r *Repository) loadBlob(ctx context.Context, id restic.ID, t restic.BlobTy continue } - // move decrypted data to the start of the provided buffer - copy(plaintextBuf[0:], plaintext) - return len(plaintext), nil + // move decrypted data to the start of the buffer + copy(buf, plaintext) + return buf[:len(plaintext)], nil } if lastError != nil { - return 0, lastError + return nil, lastError } - return 0, errors.Errorf("loading blob %v from %v packs failed", id.Str(), len(blobs)) + return nil, errors.Errorf("loading blob %v from %v packs failed", id.Str(), len(blobs)) } // LoadJSONUnpacked decrypts the data and afterwards calls json.Unmarshal on @@ -670,30 +670,6 @@ func (r *Repository) Close() error { return r.be.Close() } -// LoadBlob loads a blob of type t from the repository. -// It may use all of buf[:cap(buf)] as scratch space. -func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.ID, buf []byte) ([]byte, error) { - debug.Log("load blob %v into buf (len %v, cap %v)", id, len(buf), cap(buf)) - size, found := r.idx.LookupSize(id, t) - if !found { - return nil, errors.Errorf("id %v not found in repository", id) - } - - if cap(buf) < restic.CiphertextLength(int(size)) { - buf = restic.NewBlobBuffer(int(size)) - } - - n, err := r.loadBlob(ctx, id, t, buf) - if err != nil { - return nil, err - } - buf = buf[:n] - - debug.Log("loaded %d bytes into buf %p", len(buf), buf) - - return buf, err -} - // SaveBlob saves a blob of type t into the repository. If id is the null id, it // will be computed and returned. func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte, id restic.ID) (restic.ID, error) { @@ -708,19 +684,10 @@ func (r *Repository) SaveBlob(ctx context.Context, t restic.BlobType, buf []byte func (r *Repository) LoadTree(ctx context.Context, id restic.ID) (*restic.Tree, error) { debug.Log("load tree %v", id) - size, found := r.idx.LookupSize(id, restic.TreeBlob) - if !found { - return nil, errors.Errorf("tree %v not found in repository", id) - } - - debug.Log("size is %d, create buffer", size) - buf := restic.NewBlobBuffer(int(size)) - - n, err := r.loadBlob(ctx, id, restic.TreeBlob, buf) + buf, err := r.LoadBlob(ctx, restic.TreeBlob, id, nil) if err != nil { return nil, err } - buf = buf[:n] t := &restic.Tree{} err = json.Unmarshal(buf, t)