From 6adb6296081227f7b7855f2122bd7908b954218d Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Thu, 26 May 2022 22:32:44 +0200 Subject: [PATCH 1/2] Add support for non-utf8 symlink targets --- changelog/unreleased/issue-3311 | 12 ++++++++ internal/restic/node.go | 52 +++++++++++++++++++++++---------- 2 files changed, 48 insertions(+), 16 deletions(-) create mode 100644 changelog/unreleased/issue-3311 diff --git a/changelog/unreleased/issue-3311 b/changelog/unreleased/issue-3311 new file mode 100644 index 000000000..4dfe502f7 --- /dev/null +++ b/changelog/unreleased/issue-3311 @@ -0,0 +1,12 @@ +Bugfix: Support non-UTF8 paths as symlink target + +Restic versions before 0.16.0 did not correctly backup and restore symlinks +that contain a non-UTF8 target. Note that this only affects system that still +use a non-Unicode encoding for filesystem paths. + +We have extended the repository format to add support for such symlinks. Please +note that at least restic version 0.16.0 must be used for both backup and +restore to correctly handle non-UTF8 symlink targets. + +https://github.com/restic/restic/issues/3311 +https://github.com/restic/restic/pull/3802 diff --git a/internal/restic/node.go b/internal/restic/node.go index f2d9f2315..edb49bfca 100644 --- a/internal/restic/node.go +++ b/internal/restic/node.go @@ -10,6 +10,7 @@ import ( "sync" "syscall" "time" + "unicode/utf8" "github.com/restic/restic/internal/errors" @@ -27,21 +28,26 @@ type ExtendedAttribute struct { // Node is a file, directory or other item in a backup. type Node struct { - Name string `json:"name"` - Type string `json:"type"` - Mode os.FileMode `json:"mode,omitempty"` - ModTime time.Time `json:"mtime,omitempty"` - AccessTime time.Time `json:"atime,omitempty"` - ChangeTime time.Time `json:"ctime,omitempty"` - UID uint32 `json:"uid"` - GID uint32 `json:"gid"` - User string `json:"user,omitempty"` - Group string `json:"group,omitempty"` - Inode uint64 `json:"inode,omitempty"` - DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev - Size uint64 `json:"size,omitempty"` - Links uint64 `json:"links,omitempty"` - LinkTarget string `json:"linktarget,omitempty"` + Name string `json:"name"` + Type string `json:"type"` + Mode os.FileMode `json:"mode,omitempty"` + ModTime time.Time `json:"mtime,omitempty"` + AccessTime time.Time `json:"atime,omitempty"` + ChangeTime time.Time `json:"ctime,omitempty"` + UID uint32 `json:"uid"` + GID uint32 `json:"gid"` + User string `json:"user,omitempty"` + Group string `json:"group,omitempty"` + Inode uint64 `json:"inode,omitempty"` + DeviceID uint64 `json:"device_id,omitempty"` // device id of the file, stat.st_dev + Size uint64 `json:"size,omitempty"` + Links uint64 `json:"links,omitempty"` + LinkTarget string `json:"linktarget,omitempty"` + // implicitly base64-encoded field. Only used while encoding, `linktarget_raw` will overwrite LinkTarget if present. + // This allows storing arbitrary byte-sequences, which are possible as symlink targets on unix systems, + // as LinkTarget without breaking backwards-compatibility. + // Must only be set of the linktarget cannot be encoded as valid utf8. + LinkTargetRaw []byte `json:"linktarget_raw,omitempty"` ExtendedAttributes []ExtendedAttribute `json:"extended_attributes,omitempty"` Device uint64 `json:"device,omitempty"` // in case of Type == "dev", stat.st_rdev Content IDs `json:"content"` @@ -344,6 +350,13 @@ func (node Node) MarshalJSON() ([]byte, error) { nj := nodeJSON(node) name := strconv.Quote(node.Name) nj.Name = name[1 : len(name)-1] + if nj.LinkTargetRaw != nil { + panic("LinkTargetRaw must not be set manually") + } + if !utf8.ValidString(node.LinkTarget) { + // store raw bytes if invalid utf8 + nj.LinkTargetRaw = []byte(node.LinkTarget) + } return json.Marshal(nj) } @@ -358,7 +371,14 @@ func (node *Node) UnmarshalJSON(data []byte) error { } nj.Name, err = strconv.Unquote(`"` + nj.Name + `"`) - return errors.Wrap(err, "Unquote") + if err != nil { + return errors.Wrap(err, "Unquote") + } + if nj.LinkTargetRaw != nil { + nj.LinkTarget = string(nj.LinkTargetRaw) + nj.LinkTargetRaw = nil + } + return nil } func (node Node) Equals(other Node) bool { From f12bbd9229c1be438c3b85abc970d04b72d9447b Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Fri, 17 Jun 2022 19:34:41 +0200 Subject: [PATCH 2/2] restic: check that Node.LinkTarget can handle non-utf8 characters --- internal/restic/node_test.go | 38 ++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/internal/restic/node_test.go b/internal/restic/node_test.go index 45ccd790c..aae010421 100644 --- a/internal/restic/node_test.go +++ b/internal/restic/node_test.go @@ -2,6 +2,8 @@ package restic_test import ( "context" + "encoding/json" + "fmt" "os" "path/filepath" "reflect" @@ -10,6 +12,7 @@ import ( "time" "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/test" rtest "github.com/restic/restic/internal/test" ) @@ -334,3 +337,38 @@ func TestFixTime(t *testing.T) { }) } } + +func TestSymlinkSerialization(t *testing.T) { + for _, link := range []string{ + "válîd \t Üñi¢òde \n śẗŕinǵ", + string([]byte{0, 1, 2, 0xfa, 0xfb, 0xfc}), + } { + n := restic.Node{ + LinkTarget: link, + } + ser, err := json.Marshal(n) + test.OK(t, err) + var n2 restic.Node + err = json.Unmarshal(ser, &n2) + test.OK(t, err) + fmt.Println(string(ser)) + + test.Equals(t, n.LinkTarget, n2.LinkTarget) + } +} + +func TestSymlinkSerializationFormat(t *testing.T) { + for _, d := range []struct { + ser string + linkTarget string + }{ + {`{"linktarget":"test"}`, "test"}, + {`{"linktarget":"\u0000\u0001\u0002\ufffd\ufffd\ufffd","linktarget_raw":"AAEC+vv8"}`, string([]byte{0, 1, 2, 0xfa, 0xfb, 0xfc})}, + } { + var n2 restic.Node + err := json.Unmarshal([]byte(d.ser), &n2) + test.OK(t, err) + test.Equals(t, d.linkTarget, n2.LinkTarget) + test.Assert(t, n2.LinkTargetRaw == nil, "quoted link target is just a helper field and must be unset after decoding") + } +}