From 58719e1f47ab6c28a6d3708fcac9bd7767cffc4f Mon Sep 17 00:00:00 2001 From: greatroar <@> Date: Wed, 17 Jun 2020 12:17:55 +0200 Subject: [PATCH] Replace mount's per-file cache by a global LRU cache --- cmd/restic/cmd_mount.go | 5 +-- go.mod | 2 +- internal/fuse/blobcache.go | 87 ++++++++++++++++++++++++++++++++++++++ internal/fuse/file.go | 15 +++---- internal/fuse/fuse_test.go | 52 ++++++++++++++++++++--- internal/fuse/root.go | 9 +++- 6 files changed, 147 insertions(+), 23 deletions(-) create mode 100644 internal/fuse/blobcache.go diff --git a/cmd/restic/cmd_mount.go b/cmd/restic/cmd_mount.go index 64c7cb987..b17c3d478 100644 --- a/cmd/restic/cmd_mount.go +++ b/cmd/restic/cmd_mount.go @@ -139,10 +139,7 @@ func mount(opts MountOptions, gopts GlobalOptions, mountpoint string) error { Paths: opts.Paths, SnapshotTemplate: opts.SnapshotTemplate, } - root, err := fuse.NewRoot(gopts.ctx, repo, cfg) - if err != nil { - return err - } + root := fuse.NewRoot(gopts.ctx, repo, cfg) Printf("Now serving the repository at %s\n", mountpoint) Printf("When finished, quit with Ctrl-c or umount the mountpoint.\n") diff --git a/go.mod b/go.mod index 8bd5901fa..20bf25234 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/golang/protobuf v1.3.1 // indirect github.com/google/go-cmp v0.2.0 github.com/gopherjs/gopherjs v0.0.0-20190411002643-bd77b112433e // indirect - github.com/hashicorp/golang-lru v0.5.1 // indirect + github.com/hashicorp/golang-lru v0.5.1 github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/juju/ratelimit v1.0.1 github.com/kr/fs v0.1.0 // indirect diff --git a/internal/fuse/blobcache.go b/internal/fuse/blobcache.go new file mode 100644 index 000000000..728457b11 --- /dev/null +++ b/internal/fuse/blobcache.go @@ -0,0 +1,87 @@ +package fuse + +import ( + "sync" + + "github.com/restic/restic/internal/debug" + "github.com/restic/restic/internal/restic" + + "github.com/hashicorp/golang-lru/simplelru" +) + +// Crude estimate of the overhead per blob: a SHA-256, a linked list node +// and some pointers. See comment in blobCache.add. +const cacheOverhead = len(restic.ID{}) + 64 + +// A blobCache is a fixed-size cache of blob contents. +// It is safe for concurrent access. +type blobCache struct { + mu sync.Mutex + c *simplelru.LRU + + free, size int // Current and max capacity, in bytes. +} + +// Construct a blob cache that stores at most size bytes worth of blobs. +func newBlobCache(size int) *blobCache { + c := &blobCache{ + free: size, + size: size, + } + + // NewLRU wants us to specify some max. number of entries, else it errors. + // The actual maximum will be smaller than size/cacheOverhead, because we + // evict entries (RemoveOldest in add) to maintain our size bound. + maxEntries := size / cacheOverhead + lru, err := simplelru.NewLRU(maxEntries, c.evict) + if err != nil { + panic(err) // Can only be maxEntries <= 0. + } + c.c = lru + + return c +} + +func (c *blobCache) add(id restic.ID, blob []byte) { + debug.Log("blobCache: add %v", id) + + size := len(blob) + cacheOverhead + if size > c.size { + return + } + + c.mu.Lock() + defer c.mu.Unlock() + + var key interface{} = id + + if c.c.Contains(key) { // Doesn't update the recency list. + return + } + + // This loop takes at most min(maxEntries, maxchunksize/cacheOverhead) + // iterations. + for size > c.free { + c.c.RemoveOldest() + } + + c.c.Add(key, blob) + c.free -= size +} + +func (c *blobCache) get(id restic.ID) ([]byte, bool) { + c.mu.Lock() + value, ok := c.c.Get(id) + c.mu.Unlock() + + debug.Log("blobCache: get %v, hit %v", id, ok) + + blob, ok := value.([]byte) + return blob, ok +} + +func (c *blobCache) evict(key, value interface{}) { + blob := value.([]byte) + debug.Log("blobCache: evict %v, %d bytes", key, len(blob)) + c.free += len(blob) + cacheOverhead +} diff --git a/internal/fuse/file.go b/internal/fuse/file.go index 791265e13..b38258fd1 100644 --- a/internal/fuse/file.go +++ b/internal/fuse/file.go @@ -28,12 +28,6 @@ type file struct { // cumsize[i] holds the cumulative size of blobs[:i]. cumsize []uint64 - - // Cached blob and its index in the blobs of node. - cached struct { - blob []byte - index int - } } func newFile(ctx context.Context, root *Root, inode uint64, node *restic.Node) (fusefile *file, err error) { @@ -91,8 +85,10 @@ func (f *file) Attr(ctx context.Context, a *fuse.Attr) error { func (f *file) getBlobAt(ctx context.Context, i int) (blob []byte, err error) { debug.Log("getBlobAt(%v, %v)", f.node.Name, i) - if i == f.cached.index && f.cached.blob != nil { - return f.cached.blob, nil + + blob, ok := f.root.blobCache.get(f.node.Content[i]) + if ok { + return blob, nil } blob, err = f.root.repo.LoadBlob(ctx, restic.DataBlob, f.node.Content[i], nil) @@ -100,7 +96,8 @@ func (f *file) getBlobAt(ctx context.Context, i int) (blob []byte, err error) { debug.Log("LoadBlob(%v, %v) failed: %v", f.node.Name, f.node.Content[i], err) return nil, err } - f.cached.blob, f.cached.index = blob, i + + f.root.blobCache.add(f.node.Content[i], blob) return blob, nil } diff --git a/internal/fuse/fuse_test.go b/internal/fuse/fuse_test.go index 147cee317..c855f403c 100644 --- a/internal/fuse/fuse_test.go +++ b/internal/fuse/fuse_test.go @@ -20,6 +20,48 @@ import ( rtest "github.com/restic/restic/internal/test" ) +func TestCache(t *testing.T) { + var id1, id2, id3 restic.ID + id1[0] = 1 + id2[0] = 2 + id3[0] = 3 + + const ( + kiB = 1 << 10 + cacheSize = 64*kiB + 3*cacheOverhead + ) + + c := newBlobCache(cacheSize) + + addAndCheck := func(id restic.ID, exp []byte) { + c.add(id, exp) + blob, ok := c.get(id) + rtest.Assert(t, ok, "blob %v added but not found in cache", id) + rtest.Equals(t, &exp[0], &blob[0]) + rtest.Equals(t, exp, blob) + } + + addAndCheck(id1, make([]byte, 32*kiB)) + addAndCheck(id2, make([]byte, 30*kiB)) + addAndCheck(id3, make([]byte, 10*kiB)) + + _, ok := c.get(id2) + rtest.Assert(t, ok, "blob %v not present", id2) + _, ok = c.get(id1) + rtest.Assert(t, !ok, "blob %v present, but should have been evicted", id1) + + c.add(id1, make([]byte, 1+c.size)) + _, ok = c.get(id1) + rtest.Assert(t, !ok, "blob %v too large but still added to cache") + + c.c.Remove(id1) + c.c.Remove(id3) + c.c.Remove(id2) + + rtest.Equals(t, cacheSize, c.size) + rtest.Equals(t, cacheSize, c.free) +} + func testRead(t testing.TB, f *file, offset, length int, data []byte) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -114,10 +156,7 @@ func TestFuseFile(t *testing.T) { Size: filesize, Content: content, } - root := &Root{ - blobSizeCache: NewBlobSizeCache(context.TODO(), repo.Index()), - repo: repo, - } + root := NewRoot(context.TODO(), repo, Config{}) t.Logf("blob cache has %d entries", len(root.blobSizeCache.m)) @@ -163,11 +202,10 @@ func testTopUidGid(t *testing.T, cfg Config, repo restic.Repository, uid, gid ui t.Helper() ctx := context.Background() - root, err := NewRoot(ctx, repo, cfg) - rtest.OK(t, err) + root := NewRoot(ctx, repo, cfg) var attr fuse.Attr - err = root.Attr(ctx, &attr) + err := root.Attr(ctx, &attr) rtest.OK(t, err) rtest.Equals(t, uid, attr.Uid) rtest.Equals(t, gid, attr.Gid) diff --git a/internal/fuse/root.go b/internal/fuse/root.go index a7061b224..c43f8b448 100644 --- a/internal/fuse/root.go +++ b/internal/fuse/root.go @@ -29,6 +29,7 @@ type Root struct { cfg Config inode uint64 snapshots restic.Snapshots + blobCache *blobCache blobSizeCache *BlobSizeCache snCount int @@ -45,14 +46,18 @@ var _ = fs.NodeStringLookuper(&Root{}) const rootInode = 1 +// Size of the blob cache. TODO: make this configurable. +const blobCacheSize = 64 << 20 + // NewRoot initializes a new root node from a repository. -func NewRoot(ctx context.Context, repo restic.Repository, cfg Config) (*Root, error) { +func NewRoot(ctx context.Context, repo restic.Repository, cfg Config) *Root { debug.Log("NewRoot(), config %v", cfg) root := &Root{ repo: repo, inode: rootInode, cfg: cfg, + blobCache: newBlobCache(blobCacheSize), blobSizeCache: NewBlobSizeCache(ctx, repo.Index()), } @@ -70,7 +75,7 @@ func NewRoot(ctx context.Context, repo restic.Repository, cfg Config) (*Root, er root.MetaDir = NewMetaDir(root, rootInode, entries) - return root, nil + return root } // Root is just there to satisfy fs.Root, it returns itself.