Merge pull request #2790 from greatroar/fix-quadratic-read

Fix quadratic file reading in restic mount
This commit is contained in:
MichaelEischer 2020-07-12 18:42:14 +02:00 committed by GitHub
commit b84f5177cb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 167 additions and 45 deletions

View File

@ -0,0 +1,6 @@
Enhancement: Optimized file access in restic mount
Reading large (> 100GiB) files from restic mountpoints is now faster,
and the speedup is greater for larger files.
https://github.com/restic/restic/pull/2790

View File

@ -139,10 +139,7 @@ func mount(opts MountOptions, gopts GlobalOptions, mountpoint string) error {
Paths: opts.Paths,
SnapshotTemplate: opts.SnapshotTemplate,
}
root, err := fuse.NewRoot(gopts.ctx, repo, cfg)
if err != nil {
return err
}
root := fuse.NewRoot(gopts.ctx, repo, cfg)
Printf("Now serving the repository at %s\n", mountpoint)
Printf("When finished, quit with Ctrl-c or umount the mountpoint.\n")

2
go.mod
View File

@ -13,7 +13,7 @@ require (
github.com/golang/protobuf v1.3.1 // indirect
github.com/google/go-cmp v0.2.0
github.com/gopherjs/gopherjs v0.0.0-20190411002643-bd77b112433e // indirect
github.com/hashicorp/golang-lru v0.5.1 // indirect
github.com/hashicorp/golang-lru v0.5.1
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/juju/ratelimit v1.0.1
github.com/kr/fs v0.1.0 // indirect

View File

@ -0,0 +1,87 @@
package fuse
import (
"sync"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/restic"
"github.com/hashicorp/golang-lru/simplelru"
)
// Crude estimate of the overhead per blob: a SHA-256, a linked list node
// and some pointers. See comment in blobCache.add.
const cacheOverhead = len(restic.ID{}) + 64
// A blobCache is a fixed-size cache of blob contents.
// It is safe for concurrent access.
type blobCache struct {
mu sync.Mutex
c *simplelru.LRU
free, size int // Current and max capacity, in bytes.
}
// Construct a blob cache that stores at most size bytes worth of blobs.
func newBlobCache(size int) *blobCache {
c := &blobCache{
free: size,
size: size,
}
// NewLRU wants us to specify some max. number of entries, else it errors.
// The actual maximum will be smaller than size/cacheOverhead, because we
// evict entries (RemoveOldest in add) to maintain our size bound.
maxEntries := size / cacheOverhead
lru, err := simplelru.NewLRU(maxEntries, c.evict)
if err != nil {
panic(err) // Can only be maxEntries <= 0.
}
c.c = lru
return c
}
func (c *blobCache) add(id restic.ID, blob []byte) {
debug.Log("blobCache: add %v", id)
size := len(blob) + cacheOverhead
if size > c.size {
return
}
c.mu.Lock()
defer c.mu.Unlock()
var key interface{} = id
if c.c.Contains(key) { // Doesn't update the recency list.
return
}
// This loop takes at most min(maxEntries, maxchunksize/cacheOverhead)
// iterations.
for size > c.free {
c.c.RemoveOldest()
}
c.c.Add(key, blob)
c.free -= size
}
func (c *blobCache) get(id restic.ID) ([]byte, bool) {
c.mu.Lock()
value, ok := c.c.Get(id)
c.mu.Unlock()
debug.Log("blobCache: get %v, hit %v", id, ok)
blob, ok := value.([]byte)
return blob, ok
}
func (c *blobCache) evict(key, value interface{}) {
blob := value.([]byte)
debug.Log("blobCache: evict %v, %d bytes", key, len(blob))
c.free += len(blob) + cacheOverhead
}

View File

@ -3,6 +3,8 @@
package fuse
import (
"sort"
"github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic"
@ -18,21 +20,20 @@ const blockSize = 512
// Statically ensure that *file implements the given interface
var _ = fs.HandleReader(&file{})
var _ = fs.HandleReleaser(&file{})
type file struct {
root *Root
node *restic.Node
inode uint64
sizes []int
blobs [][]byte
// cumsize[i] holds the cumulative size of blobs[:i].
cumsize []uint64
}
func newFile(ctx context.Context, root *Root, inode uint64, node *restic.Node) (fusefile *file, err error) {
debug.Log("create new file for %v with %d blobs", node.Name, len(node.Content))
var bytes uint64
sizes := make([]int, len(node.Content))
cumsize := make([]uint64, 1+len(node.Content))
for i, id := range node.Content {
size, ok := root.blobSizeCache.Lookup(id)
if !ok {
@ -43,8 +44,8 @@ func newFile(ctx context.Context, root *Root, inode uint64, node *restic.Node) (
}
}
sizes[i] = int(size)
bytes += uint64(size)
cumsize[i+1] = bytes
}
if bytes != node.Size {
@ -56,8 +57,8 @@ func newFile(ctx context.Context, root *Root, inode uint64, node *restic.Node) (
inode: inode,
root: root,
node: node,
sizes: sizes,
blobs: make([][]byte, len(node.Content)),
cumsize: cumsize,
}, nil
}
@ -84,13 +85,10 @@ func (f *file) Attr(ctx context.Context, a *fuse.Attr) error {
func (f *file) getBlobAt(ctx context.Context, i int) (blob []byte, err error) {
debug.Log("getBlobAt(%v, %v)", f.node.Name, i)
if f.blobs[i] != nil {
return f.blobs[i], nil
}
// release earlier blobs
for j := 0; j < i; j++ {
f.blobs[j] = nil
blob, ok := f.root.blobCache.get(f.node.Content[i])
if ok {
return blob, nil
}
blob, err = f.root.repo.LoadBlob(ctx, restic.DataBlob, f.node.Content[i], nil)
@ -98,16 +96,17 @@ func (f *file) getBlobAt(ctx context.Context, i int) (blob []byte, err error) {
debug.Log("LoadBlob(%v, %v) failed: %v", f.node.Name, f.node.Content[i], err)
return nil, err
}
f.blobs[i] = blob
f.root.blobCache.add(f.node.Content[i], blob)
return blob, nil
}
func (f *file) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadResponse) error {
debug.Log("Read(%v, %v, %v), file size %v", f.node.Name, req.Size, req.Offset, f.node.Size)
offset := req.Offset
offset := uint64(req.Offset)
if uint64(offset) > f.node.Size {
if offset > f.node.Size {
debug.Log("Read(%v): offset is greater than file size: %v > %v",
f.node.Name, req.Offset, f.node.Size)
@ -123,16 +122,15 @@ func (f *file) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadR
}
// Skip blobs before the offset
startContent := 0
for offset > int64(f.sizes[startContent]) {
offset -= int64(f.sizes[startContent])
startContent++
}
startContent := -1 + sort.Search(len(f.cumsize), func(i int) bool {
return f.cumsize[i] > offset
})
offset -= f.cumsize[startContent]
dst := resp.Data[0:req.Size]
readBytes := 0
remainingBytes := req.Size
for i := startContent; remainingBytes > 0 && i < len(f.sizes); i++ {
for i := startContent; remainingBytes > 0 && i < len(f.cumsize)-1; i++ {
blob, err := f.getBlobAt(ctx, i)
if err != nil {
return err
@ -154,13 +152,6 @@ func (f *file) Read(ctx context.Context, req *fuse.ReadRequest, resp *fuse.ReadR
return nil
}
func (f *file) Release(ctx context.Context, req *fuse.ReleaseRequest) error {
for i := range f.blobs {
f.blobs[i] = nil
}
return nil
}
func (f *file) Listxattr(ctx context.Context, req *fuse.ListxattrRequest, resp *fuse.ListxattrResponse) error {
debug.Log("Listxattr(%v, %v)", f.node.Name, req.Size)
for _, attr := range f.node.ExtendedAttributes {

View File

@ -20,6 +20,48 @@ import (
rtest "github.com/restic/restic/internal/test"
)
func TestCache(t *testing.T) {
var id1, id2, id3 restic.ID
id1[0] = 1
id2[0] = 2
id3[0] = 3
const (
kiB = 1 << 10
cacheSize = 64*kiB + 3*cacheOverhead
)
c := newBlobCache(cacheSize)
addAndCheck := func(id restic.ID, exp []byte) {
c.add(id, exp)
blob, ok := c.get(id)
rtest.Assert(t, ok, "blob %v added but not found in cache", id)
rtest.Equals(t, &exp[0], &blob[0])
rtest.Equals(t, exp, blob)
}
addAndCheck(id1, make([]byte, 32*kiB))
addAndCheck(id2, make([]byte, 30*kiB))
addAndCheck(id3, make([]byte, 10*kiB))
_, ok := c.get(id2)
rtest.Assert(t, ok, "blob %v not present", id2)
_, ok = c.get(id1)
rtest.Assert(t, !ok, "blob %v present, but should have been evicted", id1)
c.add(id1, make([]byte, 1+c.size))
_, ok = c.get(id1)
rtest.Assert(t, !ok, "blob %v too large but still added to cache")
c.c.Remove(id1)
c.c.Remove(id3)
c.c.Remove(id2)
rtest.Equals(t, cacheSize, c.size)
rtest.Equals(t, cacheSize, c.free)
}
func testRead(t testing.TB, f *file, offset, length int, data []byte) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
@ -114,10 +156,7 @@ func TestFuseFile(t *testing.T) {
Size: filesize,
Content: content,
}
root := &Root{
blobSizeCache: NewBlobSizeCache(context.TODO(), repo.Index()),
repo: repo,
}
root := NewRoot(context.TODO(), repo, Config{})
t.Logf("blob cache has %d entries", len(root.blobSizeCache.m))
@ -146,8 +185,6 @@ func TestFuseFile(t *testing.T) {
t.Errorf("test %d failed, wrong data returned (offset %v, length %v)", i, offset, length)
}
}
rtest.OK(t, f.Release(ctx, nil))
}
// Test top-level directories for their UID and GID.
@ -165,11 +202,10 @@ func testTopUidGid(t *testing.T, cfg Config, repo restic.Repository, uid, gid ui
t.Helper()
ctx := context.Background()
root, err := NewRoot(ctx, repo, cfg)
rtest.OK(t, err)
root := NewRoot(ctx, repo, cfg)
var attr fuse.Attr
err = root.Attr(ctx, &attr)
err := root.Attr(ctx, &attr)
rtest.OK(t, err)
rtest.Equals(t, uid, attr.Uid)
rtest.Equals(t, gid, attr.Gid)

View File

@ -29,6 +29,7 @@ type Root struct {
cfg Config
inode uint64
snapshots restic.Snapshots
blobCache *blobCache
blobSizeCache *BlobSizeCache
snCount int
@ -45,14 +46,18 @@ var _ = fs.NodeStringLookuper(&Root{})
const rootInode = 1
// Size of the blob cache. TODO: make this configurable.
const blobCacheSize = 64 << 20
// NewRoot initializes a new root node from a repository.
func NewRoot(ctx context.Context, repo restic.Repository, cfg Config) (*Root, error) {
func NewRoot(ctx context.Context, repo restic.Repository, cfg Config) *Root {
debug.Log("NewRoot(), config %v", cfg)
root := &Root{
repo: repo,
inode: rootInode,
cfg: cfg,
blobCache: newBlobCache(blobCacheSize),
blobSizeCache: NewBlobSizeCache(ctx, repo.Index()),
}
@ -70,7 +75,7 @@ func NewRoot(ctx context.Context, repo restic.Repository, cfg Config) (*Root, er
root.MetaDir = NewMetaDir(root, rootInode, entries)
return root, nil
return root
}
// Root is just there to satisfy fs.Root, it returns itself.