Merge pull request #2856 from aawsome/remove-readahead

Simplify cache logic
This commit is contained in:
MichaelEischer 2021-09-04 20:25:49 +02:00 committed by GitHub
commit fa5ca8af81
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 64 additions and 100 deletions

View File

@ -425,10 +425,11 @@ func removePacksExcept(gopts GlobalOptions, t *testing.T, keep restic.IDSet, rem
// Get all tree packs
rtest.OK(t, r.LoadIndex(gopts.ctx))
treePacks := restic.NewIDSet()
for _, idx := range r.Index().(*repository.MasterIndex).All() {
for _, id := range idx.TreePacks() {
treePacks.Insert(id)
for pb := range r.Index().Each(context.TODO()) {
if pb.Type == restic.TreeBlob {
treePacks.Insert(pb.PackID)
}
}
@ -487,11 +488,10 @@ func TestBackupTreeLoadError(t *testing.T) {
r, err := OpenRepository(env.gopts)
rtest.OK(t, err)
rtest.OK(t, r.LoadIndex(env.gopts.ctx))
// collect tree packs of subdirectory
subTreePacks := restic.NewIDSet()
for _, idx := range r.Index().(*repository.MasterIndex).All() {
for _, id := range idx.TreePacks() {
subTreePacks.Insert(id)
treePacks := restic.NewIDSet()
for pb := range r.Index().Each(context.TODO()) {
if pb.Type == restic.TreeBlob {
treePacks.Insert(pb.PackID)
}
}
@ -499,7 +499,7 @@ func TestBackupTreeLoadError(t *testing.T) {
testRunCheck(t, env.gopts)
// delete the subdirectory pack first
for id := range subTreePacks {
for id := range treePacks {
rtest.OK(t, r.Backend().Remove(env.gopts.ctx, restic.Handle{Type: restic.PackFile, Name: id.String()}))
}
testRunRebuildIndex(t, env.gopts)

View File

@ -71,6 +71,7 @@ func (be *MemoryBackend) Save(ctx context.Context, h restic.Handle, rd restic.Re
be.m.Lock()
defer be.m.Unlock()
h.ContainedBlobType = restic.InvalidBlob
if h.Type == restic.ConfigFile {
h.Name = ""
}
@ -122,6 +123,7 @@ func (be *MemoryBackend) openReader(ctx context.Context, h restic.Handle, length
be.m.Lock()
defer be.m.Unlock()
h.ContainedBlobType = restic.InvalidBlob
if h.Type == restic.ConfigFile {
h.Name = ""
}
@ -158,6 +160,7 @@ func (be *MemoryBackend) Stat(ctx context.Context, h restic.Handle) (restic.File
return restic.FileInfo{}, backoff.Permanent(err)
}
h.ContainedBlobType = restic.InvalidBlob
if h.Type == restic.ConfigFile {
h.Name = ""
}
@ -179,6 +182,7 @@ func (be *MemoryBackend) Remove(ctx context.Context, h restic.Handle) error {
debug.Log("Remove %v", h)
h.ContainedBlobType = restic.InvalidBlob
if _, ok := be.data[h]; !ok {
return errNotFound
}

View File

@ -21,7 +21,7 @@ type Backend struct {
inProgress map[restic.Handle]chan struct{}
}
// ensure cachedBackend implements restic.Backend
// ensure Backend implements restic.Backend
var _ restic.Backend = &Backend{}
func newBackend(be restic.Backend, c *Cache) *Backend {
@ -43,13 +43,19 @@ func (b *Backend) Remove(ctx context.Context, h restic.Handle) error {
return b.Cache.remove(h)
}
func autoCached(t restic.FileType) bool {
return t == restic.IndexFile || t == restic.SnapshotFile
func autoCacheTypes(h restic.Handle) bool {
switch h.Type {
case restic.IndexFile, restic.SnapshotFile:
return true
case restic.PackFile:
return h.ContainedBlobType == restic.TreeBlob
}
return false
}
// Save stores a new file in the backend and the cache.
func (b *Backend) Save(ctx context.Context, h restic.Handle, rd restic.RewindReader) error {
if !autoCached(h.Type) {
if !autoCacheTypes(h) {
return b.Backend.Save(ctx, h, rd)
}
@ -168,25 +174,8 @@ func (b *Backend) Load(ctx context.Context, h restic.Handle, length int, offset
debug.Log("error loading %v from cache: %v", h, err)
}
// partial file requested
if offset != 0 || length != 0 {
if b.Cache.PerformReadahead(h) {
debug.Log("performing readahead for %v", h)
err := b.cacheFile(ctx, h)
if err == nil {
return b.loadFromCacheOrDelegate(ctx, h, length, offset, consumer)
}
debug.Log("error caching %v: %v", h, err)
}
debug.Log("Load(%v, %v, %v): partial file requested, delegating to backend", h, length, offset)
return b.Backend.Load(ctx, h, length, offset, consumer)
}
// if we don't automatically cache this file type, fall back to the backend
if !autoCached(h.Type) {
if !autoCacheTypes(h) {
debug.Log("Load(%v, %v, %v): delegating to backend", h, length, offset)
return b.Backend.Load(ctx, h, length, offset, consumer)
}

View File

@ -17,10 +17,9 @@ import (
// Cache manages a local cache.
type Cache struct {
path string
Base string
Created bool
PerformReadahead func(restic.Handle) bool
path string
Base string
Created bool
}
const dirMode = 0700
@ -152,10 +151,6 @@ func New(id string, basedir string) (c *Cache, err error) {
path: cachedir,
Base: basedir,
Created: created,
PerformReadahead: func(restic.Handle) bool {
// do not perform readahead by default
return false
},
}
return c, nil

View File

@ -42,10 +42,10 @@ import (
// Index holds lookup tables for id -> pack.
type Index struct {
m sync.Mutex
byType [restic.NumBlobTypes]indexMap
packs restic.IDs
treePacks restic.IDs
m sync.Mutex
byType [restic.NumBlobTypes]indexMap
packs restic.IDs
mixedPacks restic.IDSet
// only used by Store, StorePacks does not check for already saved packIDs
packIDToIndex map[restic.ID]int
@ -59,6 +59,7 @@ type Index struct {
func NewIndex() *Index {
return &Index{
packIDToIndex: make(map[restic.ID]int),
mixedPacks: restic.NewIDSet(),
created: time.Now(),
}
}
@ -511,9 +512,9 @@ func (idx *Index) Dump(w io.Writer) error {
return nil
}
// TreePacks returns a list of packs that contain only tree blobs.
func (idx *Index) TreePacks() restic.IDs {
return idx.treePacks
// MixedPacks returns an IDSet that contain packs which have mixed blobs.
func (idx *Index) MixedPacks() restic.IDSet {
return idx.mixedPacks
}
// merge() merges indexes, i.e. idx.merge(idx2) merges the contents of idx2 into idx.
@ -558,7 +559,7 @@ func (idx *Index) merge(idx2 *Index) error {
})
}
idx.treePacks = append(idx.treePacks, idx2.treePacks...)
idx.mixedPacks.Merge(idx2.mixedPacks)
idx.ids = append(idx.ids, idx2.ids...)
idx.supersedes = append(idx.supersedes, idx2.supersedes...)
@ -612,8 +613,8 @@ func DecodeIndex(buf []byte, id restic.ID) (idx *Index, oldFormat bool, err erro
}
}
if !data && tree {
idx.treePacks = append(idx.treePacks, pack.ID)
if data && tree {
idx.mixedPacks.Insert(pack.ID)
}
}
idx.supersedes = idxJSON.Supersedes
@ -657,8 +658,8 @@ func decodeOldIndex(buf []byte) (idx *Index, err error) {
}
}
if !data && tree {
idx.treePacks = append(idx.treePacks, pack.ID)
if data && tree {
idx.mixedPacks.Insert(pack.ID)
}
}
idx.final = true

View File

@ -100,6 +100,18 @@ func (mi *MasterIndex) Has(bh restic.BlobHandle) bool {
return false
}
func (mi *MasterIndex) IsMixedPack(packID restic.ID) bool {
mi.idxMutex.RLock()
defer mi.idxMutex.RUnlock()
for _, idx := range mi.idx {
if idx.MixedPacks().Has(packID) {
return true
}
}
return false
}
// Packs returns all packs that are covered by the index.
// If packBlacklist is given, those packs are only contained in the
// resulting IDSet if they are contained in a non-final (newly written) index.

View File

@ -113,7 +113,8 @@ func (r *Repository) savePacker(ctx context.Context, t restic.BlobType, p *Packe
}
id := restic.IDFromHash(p.hw.Sum(nil))
h := restic.Handle{Type: restic.PackFile, Name: id.String()}
h := restic.Handle{Type: restic.PackFile, Name: id.String(),
ContainedBlobType: t}
var beHash []byte
if p.beHw != nil {
beHash = p.beHw.Sum(nil)
@ -131,20 +132,6 @@ func (r *Repository) savePacker(ctx context.Context, t restic.BlobType, p *Packe
debug.Log("saved as %v", h)
if t == restic.TreeBlob && r.Cache != nil {
debug.Log("saving tree pack file in cache")
_, err = p.tmpfile.Seek(0, 0)
if err != nil {
return errors.Wrap(err, "Seek")
}
err := r.Cache.Save(h, p.tmpfile)
if err != nil {
return err
}
}
err = p.tmpfile.Close()
if err != nil {
return errors.Wrap(err, "close tempfile")

View File

@ -180,7 +180,12 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.
}
// load blob from pack
h := restic.Handle{Type: restic.PackFile, Name: blob.PackID.String()}
bt := t
if r.idx.IsMixedPack(blob.PackID) {
bt = restic.InvalidBlob
}
h := restic.Handle{Type: restic.PackFile,
Name: blob.PackID.String(), ContainedBlobType: bt}
switch {
case cap(buf) < int(blob.Length):
@ -570,36 +575,6 @@ func (r *Repository) PrepareCache(indexIDs restic.IDSet) error {
fmt.Fprintf(os.Stderr, "error clearing pack files in cache: %v\n", err)
}
treePacks := restic.NewIDSet()
for _, idx := range r.idx.All() {
for _, id := range idx.TreePacks() {
treePacks.Insert(id)
}
}
// use readahead
debug.Log("using readahead")
cache := r.Cache
cache.PerformReadahead = func(h restic.Handle) bool {
if h.Type != restic.PackFile {
debug.Log("no readahead for %v, is not a pack file", h)
return false
}
id, err := restic.ParseID(h.Name)
if err != nil {
debug.Log("no readahead for %v, invalid ID", h)
return false
}
if treePacks.Has(id) {
debug.Log("perform readahead for %v", h)
return true
}
debug.Log("no readahead for %v, not tree file", h)
return false
}
return nil
}

View File

@ -21,8 +21,9 @@ const (
// Handle is used to store and access data in a backend.
type Handle struct {
Type FileType
Name string
Type FileType
ContainedBlobType BlobType
Name string
}
func (h Handle) String() string {

View File

@ -243,7 +243,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
return err
}
h := restic.Handle{Type: restic.PackFile, Name: pack.id.String()}
h := restic.Handle{Type: restic.PackFile, Name: pack.id.String(), ContainedBlobType: restic.DataBlob}
err := r.packLoader(ctx, h, int(end-start), start, func(rd io.Reader) error {
bufferSize := int(end - start)
if bufferSize > maxBufferSize {