From 2e7d47502932dca7e3b04ee3959270045ad17280 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 15 Aug 2020 17:41:55 +0200 Subject: [PATCH 1/3] Process packs in order of first appearance --- internal/restorer/filerestorer.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index 2dc5982b0..af6427a49 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -101,6 +101,10 @@ func (r *fileRestorer) forEachBlob(blobIDs []restic.ID, fn func(packID restic.ID func (r *fileRestorer) restoreFiles(ctx context.Context) error { packs := make(map[restic.ID]*packInfo) // all packs + // Process packs in order of first access. While this cannot guarantee + // that file chunks are restored sequentially, it offers a good enough + // approximation to shorten restore times by up to 19% in some test. + var packOrder restic.IDs // create packInfo from fileInfo for _, file := range r.files { @@ -123,6 +127,7 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { files: make(map[*fileInfo]struct{}), } packs[packID] = pack + packOrder = append(packOrder, packID) } pack.files[file] = struct{}{} }) @@ -157,7 +162,8 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error { } // the main restore loop - for _, pack := range packs { + for _, id := range packOrder { + pack := packs[id] select { case <-ctx.Done(): return ctx.Err() From 8cc9514879ddba1ffc30e5266331a23c8bee005d Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 15 Aug 2020 17:45:05 +0200 Subject: [PATCH 2/3] restorer: pre-allocate files before loading chunks --- changelog/unreleased/pull-2195 | 6 +++++ internal/restorer/filerestorer.go | 9 ++++--- internal/restorer/fileswriter.go | 17 +++++++++++-- internal/restorer/fileswriter_test.go | 8 +++--- internal/restorer/preallocate_darwin.go | 33 +++++++++++++++++++++++++ internal/restorer/preallocate_linux.go | 16 ++++++++++++ internal/restorer/preallocate_other.go | 11 +++++++++ internal/restorer/restorer.go | 2 +- 8 files changed, 92 insertions(+), 10 deletions(-) create mode 100644 internal/restorer/preallocate_darwin.go create mode 100644 internal/restorer/preallocate_linux.go create mode 100644 internal/restorer/preallocate_other.go diff --git a/changelog/unreleased/pull-2195 b/changelog/unreleased/pull-2195 index c2dac8bdf..a139aa4e1 100644 --- a/changelog/unreleased/pull-2195 +++ b/changelog/unreleased/pull-2195 @@ -14,4 +14,10 @@ file can be written to the file before any of the preceeding file blobs. It is therefore possible to have gaps in the data written to the target files if restore fails or interrupted by the user. +The implementation will try to preallocate space for the restored files +on the filesystem to prevent file fragmentation. This ensures good read +performance for large files, like for example VM images. If preallocating +space is not supported by the filesystem, then this step is silently skipped. + https://github.com/restic/restic/pull/2195 +https://github.com/restic/restic/pull/2893 diff --git a/internal/restorer/filerestorer.go b/internal/restorer/filerestorer.go index af6427a49..82435deb2 100644 --- a/internal/restorer/filerestorer.go +++ b/internal/restorer/filerestorer.go @@ -33,6 +33,7 @@ const ( type fileInfo struct { lock sync.Mutex flags int + size int64 location string // file on local filesystem relative to restorer basedir blobs interface{} // blobs of the file } @@ -74,8 +75,8 @@ func newFileRestorer(dst string, } } -func (r *fileRestorer) addFile(location string, content restic.IDs) { - r.files = append(r.files, &fileInfo{location: location, blobs: content}) +func (r *fileRestorer) addFile(location string, content restic.IDs, size int64) { + r.files = append(r.files, &fileInfo{location: location, blobs: content, size: size}) } func (r *fileRestorer) targetPath(location string) string { @@ -275,13 +276,15 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) { // write other blobs after releasing the lock file.lock.Lock() create := file.flags&fileProgress == 0 + createSize := int64(-1) if create { defer file.lock.Unlock() file.flags |= fileProgress + createSize = file.size } else { file.lock.Unlock() } - return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, create) + return r.filesWriter.writeToFile(r.targetPath(file.location), blobData, offset, createSize) } err := writeToFile() if err != nil { diff --git a/internal/restorer/fileswriter.go b/internal/restorer/fileswriter.go index 8d632cd09..c44b38e51 100644 --- a/internal/restorer/fileswriter.go +++ b/internal/restorer/fileswriter.go @@ -5,6 +5,7 @@ import ( "sync" "github.com/cespare/xxhash" + "github.com/restic/restic/internal/debug" ) // writes blobs to target files. @@ -33,7 +34,7 @@ func newFilesWriter(count int) *filesWriter { } } -func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create bool) error { +func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, createSize int64) error { bucket := &w.buckets[uint(xxhash.Sum64String(path))%uint(len(w.buckets))] acquireWriter := func() (*os.File, error) { @@ -46,7 +47,7 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create } var flags int - if create { + if createSize >= 0 { flags = os.O_CREATE | os.O_TRUNC | os.O_WRONLY } else { flags = os.O_WRONLY @@ -60,6 +61,18 @@ func (w *filesWriter) writeToFile(path string, blob []byte, offset int64, create bucket.files[path] = wr bucket.users[path] = 1 + if createSize >= 0 { + err := preallocateFile(wr, createSize) + if err != nil { + // Just log the preallocate error but don't let it cause the restore process to fail. + // Preallocate might return an error if the filesystem (implementation) does not + // support preallocation or our parameters combination to the preallocate call + // This should yield a syscall.ENOTSUP error, but some other errors might also + // show up. + debug.Log("Failed to preallocate %v with size %v: %v", path, createSize, err) + } + } + return wr, nil } diff --git a/internal/restorer/fileswriter_test.go b/internal/restorer/fileswriter_test.go index 690826534..a6b7e011b 100644 --- a/internal/restorer/fileswriter_test.go +++ b/internal/restorer/fileswriter_test.go @@ -16,19 +16,19 @@ func TestFilesWriterBasic(t *testing.T) { f1 := dir + "/f1" f2 := dir + "/f2" - rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, true)) + rtest.OK(t, w.writeToFile(f1, []byte{1}, 0, 2)) rtest.Equals(t, 0, len(w.buckets[0].files)) rtest.Equals(t, 0, len(w.buckets[0].users)) - rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, true)) + rtest.OK(t, w.writeToFile(f2, []byte{2}, 0, 2)) rtest.Equals(t, 0, len(w.buckets[0].files)) rtest.Equals(t, 0, len(w.buckets[0].users)) - rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, false)) + rtest.OK(t, w.writeToFile(f1, []byte{1}, 1, -1)) rtest.Equals(t, 0, len(w.buckets[0].files)) rtest.Equals(t, 0, len(w.buckets[0].users)) - rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, false)) + rtest.OK(t, w.writeToFile(f2, []byte{2}, 1, -1)) rtest.Equals(t, 0, len(w.buckets[0].files)) rtest.Equals(t, 0, len(w.buckets[0].users)) diff --git a/internal/restorer/preallocate_darwin.go b/internal/restorer/preallocate_darwin.go new file mode 100644 index 000000000..73de322dd --- /dev/null +++ b/internal/restorer/preallocate_darwin.go @@ -0,0 +1,33 @@ +package restorer + +import ( + "os" + "runtime" + "unsafe" + + "golang.org/x/sys/unix" +) + +func preallocateFile(wr *os.File, size int64) error { + // try contiguous first + fst := unix.Fstore_t{ + Flags: unix.F_ALLOCATECONTIG | unix.F_ALLOCATEALL, + Posmode: unix.F_PEOFPOSMODE, + Offset: 0, + Length: size, + } + _, err := unix.FcntlInt(wr.Fd(), unix.F_PREALLOCATE, int(uintptr(unsafe.Pointer(&fst)))) + + if err == nil { + return nil + } + + // just take preallocation in any form, but still ask for everything + fst.Flags = unix.F_ALLOCATEALL + _, err = unix.FcntlInt(wr.Fd(), unix.F_PREALLOCATE, int(uintptr(unsafe.Pointer(&fst)))) + + // Keep struct alive until fcntl has returned + runtime.KeepAlive(fst) + + return err +} diff --git a/internal/restorer/preallocate_linux.go b/internal/restorer/preallocate_linux.go new file mode 100644 index 000000000..dc73ddfe2 --- /dev/null +++ b/internal/restorer/preallocate_linux.go @@ -0,0 +1,16 @@ +package restorer + +import ( + "os" + + "golang.org/x/sys/unix" +) + +func preallocateFile(wr *os.File, size int64) error { + if size <= 0 { + return nil + } + // int fallocate(int fd, int mode, off_t offset, off_t len) + // use mode = 0 to also change the file size + return unix.Fallocate(int(wr.Fd()), 0, 0, size) +} diff --git a/internal/restorer/preallocate_other.go b/internal/restorer/preallocate_other.go new file mode 100644 index 000000000..b43afc335 --- /dev/null +++ b/internal/restorer/preallocate_other.go @@ -0,0 +1,11 @@ +// +build !linux,!darwin + +package restorer + +import "os" + +func preallocateFile(wr *os.File, size int64) error { + // Maybe truncate can help? + // Windows: This calls SetEndOfFile which preallocates space on disk + return wr.Truncate(size) +} diff --git a/internal/restorer/restorer.go b/internal/restorer/restorer.go index 415155de7..06e590532 100644 --- a/internal/restorer/restorer.go +++ b/internal/restorer/restorer.go @@ -238,7 +238,7 @@ func (res *Restorer) RestoreTo(ctx context.Context, dst string) error { idx.Add(node.Inode, node.DeviceID, location) } - filerestorer.addFile(location, node.Content) + filerestorer.addFile(location, node.Content, int64(node.Size)) return nil }, From 121233e1b3ead27fd161f2d6acca3aba8869b1c6 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Wed, 19 Aug 2020 22:31:10 +0200 Subject: [PATCH 3/3] Add preallocate tests --- internal/restorer/preallocate_test.go | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 internal/restorer/preallocate_test.go diff --git a/internal/restorer/preallocate_test.go b/internal/restorer/preallocate_test.go new file mode 100644 index 000000000..05b3a8efd --- /dev/null +++ b/internal/restorer/preallocate_test.go @@ -0,0 +1,34 @@ +package restorer + +import ( + "os" + "path" + "strconv" + "testing" + + "github.com/restic/restic/internal/fs" + "github.com/restic/restic/internal/test" +) + +func TestPreallocate(t *testing.T) { + for _, i := range []int64{0, 1, 4096, 1024 * 1024} { + t.Run(strconv.FormatInt(i, 10), func(t *testing.T) { + dirpath, cleanup := test.TempDir(t) + defer cleanup() + + flags := os.O_CREATE | os.O_TRUNC | os.O_WRONLY + wr, err := os.OpenFile(path.Join(dirpath, "test"), flags, 0600) + test.OK(t, err) + defer wr.Close() + + err = preallocateFile(wr, i) + test.OK(t, err) + + fi, err := wr.Stat() + test.OK(t, err) + + efi := fs.ExtendedStat(fi) + test.Assert(t, efi.Size == i || efi.Blocks > 0, "Preallocated size of %v, got size %v block %v", i, efi.Size, efi.Blocks) + }) + } +}