diff --git a/changelog/unreleased/pull-4006 b/changelog/unreleased/pull-4006 new file mode 100644 index 000000000..01f4ddb6e --- /dev/null +++ b/changelog/unreleased/pull-4006 @@ -0,0 +1,16 @@ +Enhancement: (alpha) Store deviceID only for hardlinks + +Set `RESTIC_FEATURES=device-id-for-hardlinks` to enable this alpha feature. +The feature flag will be removed after repository format version 3 becomes +available or be replaced with a different solution. + +When creating backups from a filesystem snapshot, for example created using +btrfs subvolumes, the deviceID of the filesystem changes compared to previous +snapshots. This prevented restic from deduplicating the directory metadata of +a snapshot. + +When this alpha feature is enabled, then the deviceID is only stored for +hardlinks. This significantly reduces the metadata duplication for most +backups. + +https://github.com/restic/restic/pull/4006 diff --git a/changelog/unreleased/pull-4503 b/changelog/unreleased/pull-4503 index 3ce5c48e8..b52552d69 100644 --- a/changelog/unreleased/pull-4503 +++ b/changelog/unreleased/pull-4503 @@ -4,4 +4,5 @@ If files on different devices had the same inode id, then the `stats` command did not correctly calculate the snapshot size. This has been fixed. https://github.com/restic/restic/pull/4503 +https://github.com/restic/restic/pull/4006 https://forum.restic.net/t/possible-bug-in-stats/6461/8 diff --git a/cmd/restic/cmd_stats.go b/cmd/restic/cmd_stats.go index d3078a419..b84620bab 100644 --- a/cmd/restic/cmd_stats.go +++ b/cmd/restic/cmd_stats.go @@ -270,11 +270,14 @@ func statsWalkTree(repo restic.Loader, opts StatsOptions, stats *statsContainer, // will still be restored stats.TotalFileCount++ - // if inodes are present, only count each inode once - // (hard links do not increase restore size) - if !hardLinkIndex.Has(node.Inode, node.DeviceID) || node.Inode == 0 { - hardLinkIndex.Add(node.Inode, node.DeviceID, struct{}{}) + if node.Links == 1 || node.Type == "dir" { stats.TotalSize += node.Size + } else { + // if hardlinks are present only count each deviceID+inode once + if !hardLinkIndex.Has(node.Inode, node.DeviceID) || node.Inode == 0 { + hardLinkIndex.Add(node.Inode, node.DeviceID, struct{}{}) + stats.TotalSize += node.Size + } } } diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index 77ddba7c4..19d16c4d3 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -12,6 +12,7 @@ import ( "github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/feature" "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/restic" "golang.org/x/sync/errgroup" @@ -188,6 +189,14 @@ func (arch *Archiver) nodeFromFileInfo(snPath, filename string, fi os.FileInfo) if !arch.WithAtime { node.AccessTime = node.ModTime } + if feature.Flag.Enabled(feature.DeviceIDForHardlinks) { + if node.Links == 1 || node.Type == "dir" { + // the DeviceID is only necessary for hardlinked files + // when using subvolumes or snapshots their deviceIDs tend to change which causes + // restic to upload new tree blobs + node.DeviceID = 0 + } + } // overwrite name to match that within the snapshot node.Name = path.Base(snPath) if err != nil { diff --git a/internal/archiver/archiver_test.go b/internal/archiver/archiver_test.go index 46ef44251..411994911 100644 --- a/internal/archiver/archiver_test.go +++ b/internal/archiver/archiver_test.go @@ -19,6 +19,7 @@ import ( "github.com/restic/restic/internal/backend/mem" "github.com/restic/restic/internal/checker" "github.com/restic/restic/internal/errors" + "github.com/restic/restic/internal/feature" "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" @@ -2125,6 +2126,8 @@ const ( ) func TestMetadataChanged(t *testing.T) { + defer feature.TestSetFlag(t, feature.Flag, feature.DeviceIDForHardlinks, true)() + files := TestDir{ "testfile": TestFile{ Content: "foo bar test file", @@ -2153,6 +2156,7 @@ func TestMetadataChanged(t *testing.T) { sn, node2 := snapshot(t, repo, fs, nil, "testfile") // set some values so we can then compare the nodes + want.DeviceID = 0 want.Content = node2.Content want.Path = "" if len(want.ExtendedAttributes) == 0 { diff --git a/internal/archiver/archiver_unix_test.go b/internal/archiver/archiver_unix_test.go index 7523f0749..2552b23e1 100644 --- a/internal/archiver/archiver_unix_test.go +++ b/internal/archiver/archiver_unix_test.go @@ -6,6 +6,12 @@ package archiver import ( "os" "syscall" + "testing" + + "github.com/restic/restic/internal/feature" + "github.com/restic/restic/internal/fs" + "github.com/restic/restic/internal/restic" + restictest "github.com/restic/restic/internal/test" ) type wrappedFileInfo struct { @@ -39,3 +45,45 @@ func wrapFileInfo(fi os.FileInfo) os.FileInfo { return res } + +func statAndSnapshot(t *testing.T, repo restic.Repository, name string) (*restic.Node, *restic.Node) { + fi := lstat(t, name) + want, err := restic.NodeFromFileInfo(name, fi) + restictest.OK(t, err) + + _, node := snapshot(t, repo, fs.Local{}, nil, name) + return want, node +} + +func TestHardlinkMetadata(t *testing.T) { + defer feature.TestSetFlag(t, feature.Flag, feature.DeviceIDForHardlinks, true)() + + files := TestDir{ + "testfile": TestFile{ + Content: "foo bar test file", + }, + "linktarget": TestFile{ + Content: "test file", + }, + "testlink": TestHardlink{ + Target: "./linktarget", + }, + "testdir": TestDir{}, + } + + tempdir, repo := prepareTempdirRepoSrc(t, files) + + back := restictest.Chdir(t, tempdir) + defer back() + + want, node := statAndSnapshot(t, repo, "testlink") + restictest.Assert(t, node.DeviceID == want.DeviceID, "device id mismatch expected %v got %v", want.DeviceID, node.DeviceID) + restictest.Assert(t, node.Links == want.Links, "link count mismatch expected %v got %v", want.Links, node.Links) + restictest.Assert(t, node.Inode == want.Inode, "inode mismatch expected %v got %v", want.Inode, node.Inode) + + _, node = statAndSnapshot(t, repo, "testfile") + restictest.Assert(t, node.DeviceID == 0, "device id mismatch for testfile expected %v got %v", 0, node.DeviceID) + + _, node = statAndSnapshot(t, repo, "testdir") + restictest.Assert(t, node.DeviceID == 0, "device id mismatch for testdir expected %v got %v", 0, node.DeviceID) +} diff --git a/internal/archiver/testing.go b/internal/archiver/testing.go index 111c1e68c..0bbd03a72 100644 --- a/internal/archiver/testing.go +++ b/internal/archiver/testing.go @@ -6,6 +6,7 @@ import ( "path" "path/filepath" "runtime" + "sort" "strings" "testing" "time" @@ -63,11 +64,29 @@ func (s TestSymlink) String() string { return "" } +// TestHardlink describes a hardlink created for a test. +type TestHardlink struct { + Target string +} + +func (s TestHardlink) String() string { + return "" +} + // TestCreateFiles creates a directory structure described by dir at target, // which must already exist. On Windows, symlinks aren't created. func TestCreateFiles(t testing.TB, target string, dir TestDir) { t.Helper() - for name, item := range dir { + + // ensure a stable order such that it can be guaranteed that a hardlink target already exists + var names []string + for name := range dir { + names = append(names, name) + } + sort.Strings(names) + + for _, name := range names { + item := dir[name] targetPath := filepath.Join(target, name) switch it := item.(type) { @@ -81,6 +100,11 @@ func TestCreateFiles(t testing.TB, target string, dir TestDir) { if err != nil { t.Fatal(err) } + case TestHardlink: + err := fs.Link(filepath.Join(target, filepath.FromSlash(it.Target)), targetPath) + if err != nil { + t.Fatal(err) + } case TestDir: err := fs.Mkdir(targetPath, 0755) if err != nil { diff --git a/internal/feature/registry.go b/internal/feature/registry.go index 620c9ec35..4693b8909 100644 --- a/internal/feature/registry.go +++ b/internal/feature/registry.go @@ -5,13 +5,13 @@ var Flag = New() // flag names are written in kebab-case const ( - ExampleFeature FlagName = "example-feature" DeprecateLegacyIndex FlagName = "deprecate-legacy-index" + DeviceIDForHardlinks FlagName = "device-id-for-hardlinks" ) func init() { Flag.SetFlags(map[FlagName]FlagDesc{ - ExampleFeature: {Type: Alpha, Description: "just for testing"}, DeprecateLegacyIndex: {Type: Beta, Description: "disable support for index format used by restic 0.1.0. Use `restic repair index` to update the index if necessary."}, + DeviceIDForHardlinks: {Type: Alpha, Description: "store deviceID only for hardlinks to reduce metadata changes for example when using btrfs subvolumes. Will be removed in a future restic version after repository format 3 is available"}, }) } diff --git a/internal/restic/node.go b/internal/restic/node.go index cbe9ef363..e7688aada 100644 --- a/internal/restic/node.go +++ b/internal/restic/node.go @@ -82,7 +82,7 @@ type Node struct { User string `json:"user,omitempty"` Group string `json:"group,omitempty"` Inode uint64 `json:"inode,omitempty"` - DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev + DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev, only stored for hardlinks Size uint64 `json:"size,omitempty"` Links uint64 `json:"links,omitempty"` LinkTarget string `json:"linktarget,omitempty"`