From 3db569c45abddad3b735cf4bb6eac87feed4762d Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Mon, 22 Feb 2016 20:45:51 +0100 Subject: [PATCH 1/2] Update chunker --- vendor/manifest | 4 +- .../src/github.com/restic/chunker/README.md | 9 +- .../src/github.com/restic/chunker/chunker.go | 110 ++++----- .../github.com/restic/chunker/chunker_test.go | 226 +++++++++--------- .../github.com/restic/chunker/example_test.go | 39 +++ .../github.com/restic/chunker/polynomials.go | 10 +- .../restic/chunker/polynomials_test.go | 158 ++++++------ 7 files changed, 299 insertions(+), 257 deletions(-) create mode 100644 vendor/src/github.com/restic/chunker/example_test.go diff --git a/vendor/manifest b/vendor/manifest index 6caebea93..b3a907c3b 100644 --- a/vendor/manifest +++ b/vendor/manifest @@ -40,8 +40,8 @@ { "importpath": "github.com/restic/chunker", "repository": "https://github.com/restic/chunker", - "revision": "fc45043175c38d59374024a38fb7123c40a64f20", - "branch": "HEAD" + "revision": "16c849a106e0a50d658e8f5e49a01f6728f4f92c", + "branch": "master" }, { "importpath": "golang.org/x/crypto/pbkdf2", diff --git a/vendor/src/github.com/restic/chunker/README.md b/vendor/src/github.com/restic/chunker/README.md index 818abbcd7..b9339f9e8 100644 --- a/vendor/src/github.com/restic/chunker/README.md +++ b/vendor/src/github.com/restic/chunker/README.md @@ -1,11 +1,12 @@ +[![GoDoc](https://godoc.org/github.com/restic/chunker?status.svg)](http://godoc.org/github.com/restic/chunker) [![Build Status](https://travis-ci.org/restic/chunker.svg?branch=master)](https://travis-ci.org/restic/chunker) -Content Defined Chunking (CDC) based on a rolling Rabin Checksum. - -Part of https://github.com/restic/restic. +The package `chunker` implements content-defined-chunking (CDC) based on a +rolling Rabin Hash. The library is part of the [restic backup +program](https://github.com/restic/restic). An introduction to Content Defined Chunking can be found in the restic blog -post [Foundation - Introducing Content Defined Chunking (CDC)](https://restic.github.io/blog/2015-09-12/restic-foundation1-cdc/). +post [Foundation - Introducing Content Defined Chunking (CDC)](https://restic.github.io/blog/2015-09-12/restic-foundation1-cdc). You can find the API documentation at https://godoc.org/github.com/restic/chunker diff --git a/vendor/src/github.com/restic/chunker/chunker.go b/vendor/src/github.com/restic/chunker/chunker.go index d39bfc71c..8b82d8d5d 100644 --- a/vendor/src/github.com/restic/chunker/chunker.go +++ b/vendor/src/github.com/restic/chunker/chunker.go @@ -2,14 +2,13 @@ package chunker import ( "errors" - "hash" "io" "sync" ) const ( - KiB = 1024 - MiB = 1024 * KiB + kiB = 1024 + miB = 1024 * kiB // WindowSize is the size of the sliding window. windowSize = 64 @@ -17,20 +16,16 @@ const ( // aim to create chunks of 20 bits or about 1MiB on average. averageBits = 20 - // MinSize is the minimal size of a chunk. - MinSize = 512 * KiB - // MaxSize is the maximal size of a chunk. - MaxSize = 8 * MiB + // MinSize is the default minimal size of a chunk. + MinSize = 512 * kiB + // MaxSize is the default maximal size of a chunk. + MaxSize = 8 * miB splitmask = (1 << averageBits) - 1 - chunkerBufSize = 512 * KiB + chunkerBufSize = 512 * kiB ) -var bufPool = sync.Pool{ - New: func() interface{} { return make([]byte, chunkerBufSize) }, -} - type tables struct { out [256]Pol mod [256]Pol @@ -52,15 +47,13 @@ type Chunk struct { Start uint Length uint Cut uint64 - Digest []byte -} - -func (c Chunk) Reader(r io.ReaderAt) io.Reader { - return io.NewSectionReader(r, int64(c.Start), int64(c.Length)) + Data []byte } // Chunker splits content with Rabin Fingerprints. type Chunker struct { + MinSize, MaxSize uint + pol Pol polShift uint tables *tables @@ -82,17 +75,17 @@ type Chunker struct { pre uint // wait for this many bytes before start calculating an new chunk digest uint64 - h hash.Hash } // New returns a new Chunker based on polynomial p that reads from rd // with bufsize and pass all data to hash along the way. -func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker { +func New(rd io.Reader, pol Pol) *Chunker { c := &Chunker{ - buf: bufPool.Get().([]byte), - h: h, - pol: pol, - rd: rd, + buf: make([]byte, chunkerBufSize), + pol: pol, + rd: rd, + MinSize: MinSize, + MaxSize: MaxSize, } c.reset() @@ -100,6 +93,19 @@ func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker { return c } +// Reset reinitializes the chunker with a new reader and polynomial. +func (c *Chunker) Reset(rd io.Reader, pol Pol) { + *c = Chunker{ + buf: c.buf, + pol: pol, + rd: rd, + MinSize: c.MinSize, + MaxSize: c.MaxSize, + } + + c.reset() +} + func (c *Chunker) reset() { c.polShift = uint(c.pol.Deg() - 8) c.fillTables() @@ -115,12 +121,8 @@ func (c *Chunker) reset() { c.slide(1) c.start = c.pos - if c.h != nil { - c.h.Reset() - } - // do not start a new chunk unless at least MinSize bytes have been read - c.pre = MinSize - windowSize + c.pre = c.MinSize - windowSize } // Calculate out_table and mod_table for optimization. Must be called only @@ -179,12 +181,13 @@ func (c *Chunker) fillTables() { } // Next returns the position and length of the next chunk of data. If an error -// occurs while reading, the error is returned with a nil chunk. The state of -// the current chunk is undefined. When the last chunk has been returned, all -// subsequent calls yield a nil chunk and an io.EOF error. -func (c *Chunker) Next() (*Chunk, error) { +// occurs while reading, the error is returned. Afterwards, the state of the +// current chunk is undefined. When the last chunk has been returned, all +// subsequent calls yield an io.EOF error. +func (c *Chunker) Next(data []byte) (Chunk, error) { + data = data[:0] if c.tables == nil { - return nil, errors.New("polynomial is not set") + return Chunk{}, errors.New("polynomial is not set") } for { @@ -203,22 +206,19 @@ func (c *Chunker) Next() (*Chunk, error) { if err == io.EOF && !c.closed { c.closed = true - // return the buffer to the pool - bufPool.Put(c.buf) - // return current chunk, if any bytes have been processed if c.count > 0 { - return &Chunk{ + return Chunk{ Start: c.start, Length: c.count, Cut: c.digest, - Digest: c.hashDigest(), + Data: data, }, nil } } if err != nil { - return nil, err + return Chunk{}, err } c.bpos = 0 @@ -230,7 +230,7 @@ func (c *Chunker) Next() (*Chunk, error) { n := c.bmax - c.bpos if c.pre > uint(n) { c.pre -= uint(n) - c.updateHash(c.buf[c.bpos:c.bmax]) + data = append(data, c.buf[c.bpos:c.bmax]...) c.count += uint(n) c.pos += uint(n) @@ -239,7 +239,7 @@ func (c *Chunker) Next() (*Chunk, error) { continue } - c.updateHash(c.buf[c.bpos : c.bpos+c.pre]) + data = append(data, c.buf[c.bpos:c.bpos+c.pre]...) c.bpos += c.pre c.count += c.pre @@ -264,22 +264,22 @@ func (c *Chunker) Next() (*Chunk, error) { // end inline add++ - if add < MinSize { + if add < c.MinSize { continue } if (c.digest&splitmask) == 0 || add >= MaxSize { i := add - c.count - 1 - c.updateHash(c.buf[c.bpos : c.bpos+uint(i)+1]) + data = append(data, c.buf[c.bpos:c.bpos+uint(i)+1]...) c.count = add c.pos += uint(i) + 1 c.bpos += uint(i) + 1 - chunk := &Chunk{ + chunk := Chunk{ Start: c.start, Length: c.count, Cut: c.digest, - Digest: c.hashDigest(), + Data: data, } c.reset() @@ -290,7 +290,7 @@ func (c *Chunker) Next() (*Chunk, error) { steps := c.bmax - c.bpos if steps > 0 { - c.updateHash(c.buf[c.bpos : c.bpos+steps]) + data = append(data, c.buf[c.bpos:c.bpos+steps]...) } c.count += steps c.pos += steps @@ -298,24 +298,6 @@ func (c *Chunker) Next() (*Chunk, error) { } } -func (c *Chunker) updateHash(data []byte) { - if c.h != nil { - // the hashes from crypto/sha* do not return an error - _, err := c.h.Write(data) - if err != nil { - panic(err) - } - } -} - -func (c *Chunker) hashDigest() []byte { - if c.h == nil { - return nil - } - - return c.h.Sum(nil) -} - func (c *Chunker) append(b byte) { index := c.digest >> c.polShift c.digest <<= 8 diff --git a/vendor/src/github.com/restic/chunker/chunker_test.go b/vendor/src/github.com/restic/chunker/chunker_test.go index 9fa54f233..7277aa95d 100644 --- a/vendor/src/github.com/restic/chunker/chunker_test.go +++ b/vendor/src/github.com/restic/chunker/chunker_test.go @@ -1,19 +1,13 @@ -package chunker_test +package chunker import ( "bytes" - "crypto/md5" "crypto/sha256" "encoding/hex" - "hash" "io" - "io/ioutil" "math/rand" "testing" "time" - - "github.com/restic/chunker" - . "github.com/restic/restic/test" ) func parseDigest(s string) []byte { @@ -32,7 +26,7 @@ type chunk struct { } // polynomial used for all the tests below -const testPol = chunker.Pol(0x3DA3358B4DC173) +const testPol = Pol(0x3DA3358B4DC173) // created for 32MB of random data out of math/rand's Uint32() seeded by // constant 23 @@ -68,59 +62,51 @@ var chunks1 = []chunk{ // test if nullbytes are correctly split, even if length is a multiple of MinSize. var chunks2 = []chunk{ - chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, - chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, - chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, - chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, + chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, + chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, + chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, + chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, } -func testWithData(t *testing.T, chnker *chunker.Chunker, testChunks []chunk) []*chunker.Chunk { - chunks := []*chunker.Chunk{} +func testWithData(t *testing.T, chnker *Chunker, testChunks []chunk, checkDigest bool) []Chunk { + chunks := []Chunk{} pos := uint(0) for i, chunk := range testChunks { - c, err := chnker.Next() + c, err := chnker.Next(nil) if err != nil { t.Fatalf("Error returned with chunk %d: %v", i, err) } - if c == nil { - t.Fatalf("Nil chunk returned") + if c.Start != pos { + t.Fatalf("Start for chunk %d does not match: expected %d, got %d", + i, pos, c.Start) } - if c != nil { - if c.Start != pos { - t.Fatalf("Start for chunk %d does not match: expected %d, got %d", - i, pos, c.Start) - } + if c.Length != chunk.Length { + t.Fatalf("Length for chunk %d does not match: expected %d, got %d", + i, chunk.Length, c.Length) + } - if c.Length != chunk.Length { - t.Fatalf("Length for chunk %d does not match: expected %d, got %d", - i, chunk.Length, c.Length) - } + if c.Cut != chunk.CutFP { + t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x", + i, len(chunks)-1, chunk.CutFP, c.Cut) + } - if c.Cut != chunk.CutFP { - t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x", - i, len(chunks)-1, chunk.CutFP, c.Cut) - } - - if c.Digest != nil && !bytes.Equal(c.Digest, chunk.Digest) { + if checkDigest { + digest := hashData(c.Data) + if !bytes.Equal(chunk.Digest, digest) { t.Fatalf("Digest fingerprint for chunk %d/%d does not match: expected %02x, got %02x", - i, len(chunks)-1, chunk.Digest, c.Digest) + i, len(chunks)-1, chunk.Digest, digest) } - - pos += c.Length - chunks = append(chunks, c) } + + pos += c.Length + chunks = append(chunks, c) } - c, err := chnker.Next() - - if c != nil { - t.Fatal("additional non-nil chunk returned") - } - + _, err := chnker.Next(nil) if err != io.EOF { t.Fatal("wrong error returned after last chunk") } @@ -143,39 +129,32 @@ func getRandom(seed, count int) []byte { return buf } +func hashData(d []byte) []byte { + h := sha256.New() + h.Write(d) + return h.Sum(nil) +} + func TestChunker(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) - ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New()) - chunks := testWithData(t, ch, chunks1) - - // test reader - for i, c := range chunks { - rd := c.Reader(bytes.NewReader(buf)) - - h := sha256.New() - n, err := io.Copy(h, rd) - if err != nil { - t.Fatalf("io.Copy(): %v", err) - } - - if uint(n) != chunks1[i].Length { - t.Fatalf("reader returned wrong number of bytes: expected %d, got %d", - chunks1[i].Length, n) - } - - d := h.Sum(nil) - if !bytes.Equal(d, chunks1[i].Digest) { - t.Fatalf("wrong hash returned: expected %02x, got %02x", - chunks1[i].Digest, d) - } - } + ch := New(bytes.NewReader(buf), testPol) + testWithData(t, ch, chunks1, true) // setup nullbyte data source - buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) - ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) + buf = bytes.Repeat([]byte{0}, len(chunks2)*MinSize) + ch = New(bytes.NewReader(buf), testPol) - testWithData(t, ch, chunks2) + testWithData(t, ch, chunks2, true) +} + +func TestChunkerReset(t *testing.T) { + buf := getRandom(23, 32*1024*1024) + ch := New(bytes.NewReader(buf), testPol) + testWithData(t, ch, chunks1, true) + + ch.Reset(bytes.NewReader(buf), testPol) + testWithData(t, ch, chunks1, true) } func TestChunkerWithRandomPolynomial(t *testing.T) { @@ -184,67 +163,64 @@ func TestChunkerWithRandomPolynomial(t *testing.T) { // generate a new random polynomial start := time.Now() - p, err := chunker.RandomPolynomial() - OK(t, err) + p, err := RandomPolynomial() + if err != nil { + t.Fatal(err) + } t.Logf("generating random polynomial took %v", time.Since(start)) start = time.Now() - ch := chunker.New(bytes.NewReader(buf), p, sha256.New()) + ch := New(bytes.NewReader(buf), p) t.Logf("creating chunker took %v", time.Since(start)) // make sure that first chunk is different - c, err := ch.Next() + c, err := ch.Next(nil) - Assert(t, c.Cut != chunks1[0].CutFP, - "Cut point is the same") - Assert(t, c.Length != chunks1[0].Length, - "Length is the same") - Assert(t, !bytes.Equal(c.Digest, chunks1[0].Digest), - "Digest is the same") + if c.Cut == chunks1[0].CutFP { + t.Fatal("Cut point is the same") + } + + if c.Length == chunks1[0].Length { + t.Fatal("Length is the same") + } + + if bytes.Equal(hashData(c.Data), chunks1[0].Digest) { + t.Fatal("Digest is the same") + } } func TestChunkerWithoutHash(t *testing.T) { // setup data source buf := getRandom(23, 32*1024*1024) - ch := chunker.New(bytes.NewReader(buf), testPol, nil) - chunks := testWithData(t, ch, chunks1) + ch := New(bytes.NewReader(buf), testPol) + chunks := testWithData(t, ch, chunks1, false) // test reader for i, c := range chunks { - rd := c.Reader(bytes.NewReader(buf)) - - buf2, err := ioutil.ReadAll(rd) - if err != nil { - t.Fatalf("io.Copy(): %v", err) - } - - if uint(len(buf2)) != chunks1[i].Length { + if uint(len(c.Data)) != chunks1[i].Length { t.Fatalf("reader returned wrong number of bytes: expected %d, got %d", - chunks1[i].Length, uint(len(buf2))) + chunks1[i].Length, len(c.Data)) } - if uint(len(buf2)) != chunks1[i].Length { - t.Fatalf("wrong number of bytes returned: expected %02x, got %02x", - chunks[i].Length, len(buf2)) - } - - if !bytes.Equal(buf[c.Start:c.Start+c.Length], buf2) { + if !bytes.Equal(buf[c.Start:c.Start+c.Length], c.Data) { t.Fatalf("invalid data for chunk returned: expected %02x, got %02x", - buf[c.Start:c.Start+c.Length], buf2) + buf[c.Start:c.Start+c.Length], c.Data) } } // setup nullbyte data source - buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) - ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) + buf = bytes.Repeat([]byte{0}, len(chunks2)*MinSize) + ch = New(bytes.NewReader(buf), testPol) - testWithData(t, ch, chunks2) + testWithData(t, ch, chunks2, false) } -func benchmarkChunker(b *testing.B, hash hash.Hash) { - size := 10 * 1024 * 1024 +func benchmarkChunker(b *testing.B, checkDigest bool) { + size := 32 * 1024 * 1024 rd := bytes.NewReader(getRandom(23, size)) + ch := New(rd, testPol) + buf := make([]byte, MaxSize) b.ResetTimer() b.SetBytes(int64(size)) @@ -253,11 +229,16 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) { for i := 0; i < b.N; i++ { chunks = 0 - rd.Seek(0, 0) - ch := chunker.New(rd, testPol, hash) + _, err := rd.Seek(0, 0) + if err != nil { + b.Fatalf("Seek() return error %v", err) + } + ch.Reset(rd, testPol) + + cur := 0 for { - _, err := ch.Next() + chunk, err := ch.Next(buf) if err == io.EOF { break @@ -267,7 +248,26 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) { b.Fatalf("Unexpected error occurred: %v", err) } + if chunk.Length != chunks1[cur].Length { + b.Errorf("wrong chunk length, want %d, got %d", + chunks1[cur].Length, chunk.Length) + } + + if chunk.Cut != chunks1[cur].CutFP { + b.Errorf("wrong cut fingerprint, want 0x%x, got 0x%x", + chunks1[cur].CutFP, chunk.Cut) + } + + if checkDigest { + h := hashData(chunk.Data) + if !bytes.Equal(h, chunks1[cur].Digest) { + b.Errorf("wrong digest, want %x, got %x", + chunks1[cur].Digest, h) + } + } + chunks++ + cur++ } } @@ -275,24 +275,22 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) { } func BenchmarkChunkerWithSHA256(b *testing.B) { - benchmarkChunker(b, sha256.New()) -} - -func BenchmarkChunkerWithMD5(b *testing.B) { - benchmarkChunker(b, md5.New()) + benchmarkChunker(b, true) } func BenchmarkChunker(b *testing.B) { - benchmarkChunker(b, nil) + benchmarkChunker(b, false) } func BenchmarkNewChunker(b *testing.B) { - p, err := chunker.RandomPolynomial() - OK(b, err) + p, err := RandomPolynomial() + if err != nil { + b.Fatal(err) + } b.ResetTimer() for i := 0; i < b.N; i++ { - chunker.New(bytes.NewBuffer(nil), p, nil) + New(bytes.NewBuffer(nil), p) } } diff --git a/vendor/src/github.com/restic/chunker/example_test.go b/vendor/src/github.com/restic/chunker/example_test.go new file mode 100644 index 000000000..42f475f87 --- /dev/null +++ b/vendor/src/github.com/restic/chunker/example_test.go @@ -0,0 +1,39 @@ +package chunker + +import ( + "bytes" + "crypto/sha256" + "fmt" + "io" +) + +func ExampleChunker() { + // generate 32MiB of deterministic pseudo-random data + data := getRandom(23, 32*1024*1024) + + // create a chunker + chunker := New(bytes.NewReader(data), Pol(0x3DA3358B4DC173)) + + // reuse this buffer + buf := make([]byte, 8*1024*1024) + + for i := 0; i < 5; i++ { + chunk, err := chunker.Next(buf) + if err == io.EOF { + break + } + + if err != nil { + panic(err) + } + + fmt.Printf("%d %02x\n", chunk.Length, sha256.Sum256(chunk.Data)) + } + + // Output: + // 2163460 4b94cb2cf293855ea43bf766731c74969b91aa6bf3c078719aabdd19860d590d + // 643703 5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407 + // 1528956 a73759636a1e7a2758767791c69e81b69fb49236c6929e5d1b654e06e37674ba + // 1955808 c955fb059409b25f07e5ae09defbbc2aadf117c97a3724e06ad4abd2787e6824 + // 2222372 6ba5e9f7e1b310722be3627716cf469be941f7f3e39a4c3bcefea492ec31ee56 +} diff --git a/vendor/src/github.com/restic/chunker/polynomials.go b/vendor/src/github.com/restic/chunker/polynomials.go index 355da1095..801f9a231 100644 --- a/vendor/src/github.com/restic/chunker/polynomials.go +++ b/vendor/src/github.com/restic/chunker/polynomials.go @@ -258,13 +258,15 @@ func qp(p uint, g Pol) Pol { return res.Add(2).Mod(g) } -func (p Pol) MarshalJSON() ([]byte, error) { - buf := strconv.AppendUint([]byte{'"'}, uint64(p), 16) +// MarshalJSON returns the JSON representation of the Pol. +func (x Pol) MarshalJSON() ([]byte, error) { + buf := strconv.AppendUint([]byte{'"'}, uint64(x), 16) buf = append(buf, '"') return buf, nil } -func (p *Pol) UnmarshalJSON(data []byte) error { +// UnmarshalJSON parses a Pol from the JSON data. +func (x *Pol) UnmarshalJSON(data []byte) error { if len(data) < 2 { return errors.New("invalid string for polynomial") } @@ -272,7 +274,7 @@ func (p *Pol) UnmarshalJSON(data []byte) error { if err != nil { return err } - *p = Pol(n) + *x = Pol(n) return nil } diff --git a/vendor/src/github.com/restic/chunker/polynomials_test.go b/vendor/src/github.com/restic/chunker/polynomials_test.go index bdfadd671..7caa09883 100644 --- a/vendor/src/github.com/restic/chunker/polynomials_test.go +++ b/vendor/src/github.com/restic/chunker/polynomials_test.go @@ -1,16 +1,13 @@ -package chunker_test +package chunker import ( "strconv" "testing" - - "github.com/restic/chunker" - . "github.com/restic/restic/test" ) var polAddTests = []struct { - x, y chunker.Pol - sum chunker.Pol + x, y Pol + sum Pol }{ {23, 16, 23 ^ 16}, {0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4}, @@ -18,24 +15,29 @@ var polAddTests = []struct { } func TestPolAdd(t *testing.T) { - for _, test := range polAddTests { - Equals(t, test.sum, test.x.Add(test.y)) - Equals(t, test.sum, test.y.Add(test.x)) + for i, test := range polAddTests { + if test.sum != test.x.Add(test.y) { + t.Errorf("test %d failed: sum != x+y", i) + } + + if test.sum != test.y.Add(test.x) { + t.Errorf("test %d failed: sum != y+x", i) + } } } -func parseBin(s string) chunker.Pol { +func parseBin(s string) Pol { i, err := strconv.ParseUint(s, 2, 64) if err != nil { panic(err) } - return chunker.Pol(i) + return Pol(i) } var polMulTests = []struct { - x, y chunker.Pol - res chunker.Pol + x, y Pol + res Pol }{ {1, 2, 2}, { @@ -78,13 +80,15 @@ var polMulTests = []struct { func TestPolMul(t *testing.T) { for i, test := range polMulTests { m := test.x.Mul(test.y) - Assert(t, test.res == m, - "TestPolMul failed for test %d: %v * %v: want %v, got %v", - i, test.x, test.y, test.res, m) + if test.res != m { + t.Errorf("TestPolMul failed for test %d: %v * %v: want %v, got %v", + i, test.x, test.y, test.res, m) + } m = test.y.Mul(test.x) - Assert(t, test.res == test.y.Mul(test.x), - "TestPolMul failed for %d: %v * %v: want %v, got %v", - i, test.x, test.y, test.res, m) + if test.res != test.y.Mul(test.x) { + t.Errorf("TestPolMul failed for %d: %v * %v: want %v, got %v", + i, test.x, test.y, test.res, m) + } } } @@ -95,21 +99,21 @@ func TestPolMulOverflow(t *testing.T) { if e, ok := err.(string); ok && e == "multiplication would overflow uint64" { return - } else { - t.Logf("invalid error raised: %v", err) - // re-raise error if not overflow - panic(err) } + + t.Logf("invalid error raised: %v", err) + // re-raise error if not overflow + panic(err) }() - x := chunker.Pol(1 << 63) + x := Pol(1 << 63) x.Mul(2) t.Fatal("overflow test did not panic") } var polDivTests = []struct { - x, y chunker.Pol - res chunker.Pol + x, y Pol + res Pol }{ {10, 50, 0}, {0, 1, 0}, @@ -139,15 +143,16 @@ var polDivTests = []struct { func TestPolDiv(t *testing.T) { for i, test := range polDivTests { m := test.x.Div(test.y) - Assert(t, test.res == m, - "TestPolDiv failed for test %d: %v * %v: want %v, got %v", - i, test.x, test.y, test.res, m) + if test.res != m { + t.Errorf("TestPolDiv failed for test %d: %v * %v: want %v, got %v", + i, test.x, test.y, test.res, m) + } } } var polModTests = []struct { - x, y chunker.Pol - res chunker.Pol + x, y Pol + res Pol }{ {10, 50, 10}, {0, 1, 0}, @@ -175,14 +180,17 @@ var polModTests = []struct { } func TestPolModt(t *testing.T) { - for _, test := range polModTests { - Equals(t, test.res, test.x.Mod(test.y)) + for i, test := range polModTests { + res := test.x.Mod(test.y) + if test.res != res { + t.Errorf("test %d failed: want %v, got %v", i, test.res, res) + } } } func BenchmarkPolDivMod(t *testing.B) { - f := chunker.Pol(0x2482734cacca49) - g := chunker.Pol(0x3af4b284899) + f := Pol(0x2482734cacca49) + g := Pol(0x3af4b284899) for i := 0; i < t.N; i++ { g.DivMod(f) @@ -190,8 +198,8 @@ func BenchmarkPolDivMod(t *testing.B) { } func BenchmarkPolDiv(t *testing.B) { - f := chunker.Pol(0x2482734cacca49) - g := chunker.Pol(0x3af4b284899) + f := Pol(0x2482734cacca49) + g := Pol(0x3af4b284899) for i := 0; i < t.N; i++ { g.Div(f) @@ -199,8 +207,8 @@ func BenchmarkPolDiv(t *testing.B) { } func BenchmarkPolMod(t *testing.B) { - f := chunker.Pol(0x2482734cacca49) - g := chunker.Pol(0x3af4b284899) + f := Pol(0x2482734cacca49) + g := Pol(0x3af4b284899) for i := 0; i < t.N; i++ { g.Mod(f) @@ -208,7 +216,7 @@ func BenchmarkPolMod(t *testing.B) { } func BenchmarkPolDeg(t *testing.B) { - f := chunker.Pol(0x3af4b284899) + f := Pol(0x3af4b284899) d := f.Deg() if d != 41 { t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d", @@ -221,25 +229,31 @@ func BenchmarkPolDeg(t *testing.B) { } func TestRandomPolynomial(t *testing.T) { - _, err := chunker.RandomPolynomial() - OK(t, err) + _, err := RandomPolynomial() + if err != nil { + t.Fatal(err) + } } func BenchmarkRandomPolynomial(t *testing.B) { for i := 0; i < t.N; i++ { - _, err := chunker.RandomPolynomial() - OK(t, err) + _, err := RandomPolynomial() + if err != nil { + t.Fatal(err) + } } } func TestExpandPolynomial(t *testing.T) { - pol := chunker.Pol(0x3DA3358B4DC173) + pol := Pol(0x3DA3358B4DC173) s := pol.Expand() - Equals(t, "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1", s) + if s != "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1" { + t.Fatal("wrong result") + } } var polIrredTests = []struct { - f chunker.Pol + f Pol irred bool }{ {0x38f1e565e288df, false}, @@ -270,15 +284,16 @@ var polIrredTests = []struct { func TestPolIrreducible(t *testing.T) { for _, test := range polIrredTests { - Assert(t, test.f.Irreducible() == test.irred, - "Irreducibility test for Polynomial %v failed: got %v, wanted %v", - test.f, test.f.Irreducible(), test.irred) + if test.f.Irreducible() != test.irred { + t.Errorf("Irreducibility test for Polynomial %v failed: got %v, wanted %v", + test.f, test.f.Irreducible(), test.irred) + } } } func BenchmarkPolIrreducible(b *testing.B) { // find first irreducible polynomial - var pol chunker.Pol + var pol Pol for _, test := range polIrredTests { if test.irred { pol = test.f @@ -287,15 +302,16 @@ func BenchmarkPolIrreducible(b *testing.B) { } for i := 0; i < b.N; i++ { - Assert(b, pol.Irreducible(), - "Irreducibility test for Polynomial %v failed", pol) + if !pol.Irreducible() { + b.Errorf("Irreducibility test for Polynomial %v failed", pol) + } } } var polGCDTests = []struct { - f1 chunker.Pol - f2 chunker.Pol - gcd chunker.Pol + f1 Pol + f2 Pol + gcd Pol }{ {10, 50, 2}, {0, 1, 1}, @@ -345,21 +361,24 @@ var polGCDTests = []struct { func TestPolGCD(t *testing.T) { for i, test := range polGCDTests { gcd := test.f1.GCD(test.f2) - Assert(t, test.gcd == gcd, - "GCD test %d (%+v) failed: got %v, wanted %v", - i, test, gcd, test.gcd) + if test.gcd != gcd { + t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v", + i, test, gcd, test.gcd) + } + gcd = test.f2.GCD(test.f1) - Assert(t, test.gcd == gcd, - "GCD test %d (%+v) failed: got %v, wanted %v", - i, test, gcd, test.gcd) + if test.gcd != gcd { + t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v", + i, test, gcd, test.gcd) + } } } var polMulModTests = []struct { - f1 chunker.Pol - f2 chunker.Pol - g chunker.Pol - mod chunker.Pol + f1 Pol + f2 Pol + g Pol + mod Pol }{ { 0x1230, @@ -378,8 +397,9 @@ var polMulModTests = []struct { func TestPolMulMod(t *testing.T) { for i, test := range polMulModTests { mod := test.f1.MulMod(test.f2, test.g) - Assert(t, mod == test.mod, - "MulMod test %d (%+v) failed: got %v, wanted %v", - i, test, mod, test.mod) + if mod != test.mod { + t.Errorf("MulMod test %d (%+v) failed: got %v, wanted %v", + i, test, mod, test.mod) + } } } From 2ce49ea0ee9c05a0f5db089098e465fe3227c1de Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Mon, 22 Feb 2016 21:09:21 +0100 Subject: [PATCH 2/2] Update code to use the new Chunker interface --- src/restic/archiver.go | 18 ++++---- src/restic/archiver_test.go | 55 ++++++++++--------------- src/restic/buffer_pool.go | 21 ++++++++++ src/restic/repository/packer_manager.go | 5 +-- 4 files changed, 53 insertions(+), 46 deletions(-) create mode 100644 src/restic/buffer_pool.go diff --git a/src/restic/archiver.go b/src/restic/archiver.go index d005bb8f4..9ff7ca10b 100644 --- a/src/restic/archiver.go +++ b/src/restic/archiver.go @@ -1,7 +1,7 @@ package restic import ( - "crypto/sha256" + "bytes" "encoding/json" "fmt" "io" @@ -11,13 +11,14 @@ import ( "sync" "time" - "github.com/restic/chunker" "restic/backend" "restic/debug" "restic/pack" "restic/pipe" "restic/repository" + "github.com/restic/chunker" + "github.com/juju/errors" ) @@ -154,12 +155,11 @@ type saveResult struct { bytes uint64 } -func (arch *Archiver) saveChunk(chunk *chunker.Chunk, p *Progress, token struct{}, file *os.File, resultChannel chan<- saveResult) { - hash := chunk.Digest - id := backend.ID{} - copy(id[:], hash) +func (arch *Archiver) saveChunk(chunk chunker.Chunk, p *Progress, token struct{}, file *os.File, resultChannel chan<- saveResult) { + defer freeBuf(chunk.Data) - err := arch.Save(pack.Data, id, chunk.Length, chunk.Reader(file)) + id := backend.Hash(chunk.Data) + err := arch.Save(pack.Data, id, chunk.Length, bytes.NewReader(chunk.Data)) // TODO handle error if err != nil { panic(err) @@ -220,11 +220,11 @@ func (arch *Archiver) SaveFile(p *Progress, node *Node) error { return err } - chnker := chunker.New(file, arch.repo.Config.ChunkerPolynomial, sha256.New()) + chnker := chunker.New(file, arch.repo.Config.ChunkerPolynomial) resultChannels := [](<-chan saveResult){} for { - chunk, err := chnker.Next() + chunk, err := chnker.Next(getBuf()) if err == io.EOF { break } diff --git a/src/restic/archiver_test.go b/src/restic/archiver_test.go index d38a73b80..813cc3362 100644 --- a/src/restic/archiver_test.go +++ b/src/restic/archiver_test.go @@ -2,12 +2,10 @@ package restic_test import ( "bytes" - "crypto/sha256" "io" "testing" "time" - "github.com/restic/chunker" "restic" "restic/backend" "restic/checker" @@ -15,6 +13,8 @@ import ( "restic/pack" "restic/repository" . "restic/test" + + "github.com/restic/chunker" ) var testPol = chunker.Pol(0x3DA3358B4DC173) @@ -24,17 +24,12 @@ type Rdr interface { io.ReaderAt } -type chunkedData struct { - buf []byte - chunks []*chunker.Chunk -} - func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) { rd.Seek(0, 0) - ch := chunker.New(rd, testPol, sha256.New()) + ch := chunker.New(rd, testPol) for { - chunk, err := ch.Next() + chunk, err := ch.Next(buf) if err == io.EOF { break @@ -43,12 +38,10 @@ func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.K OK(b, err) // reduce length of buf - buf = buf[:chunk.Length] - n, err := io.ReadFull(chunk.Reader(rd), buf) - OK(b, err) - Assert(b, uint(n) == chunk.Length, "invalid length: got %d, expected %d", n, chunk.Length) + Assert(b, uint(len(chunk.Data)) == chunk.Length, + "invalid length: got %d, expected %d", len(chunk.Data), chunk.Length) - _, err = crypto.Encrypt(key, buf2, buf) + _, err = crypto.Encrypt(key, buf2, chunk.Data) OK(b, err) } } @@ -72,18 +65,16 @@ func BenchmarkChunkEncrypt(b *testing.B) { } func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) { - ch := chunker.New(rd, testPol, sha256.New()) + ch := chunker.New(rd, testPol) for { - chunk, err := ch.Next() + chunk, err := ch.Next(buf) if err == io.EOF { break } // reduce length of chunkBuf - buf = buf[:chunk.Length] - io.ReadFull(chunk.Reader(rd), buf) - crypto.Encrypt(key, buf, buf) + crypto.Encrypt(key, chunk.Data, chunk.Data) } } @@ -258,8 +249,7 @@ func testParallelSaveWithDuplication(t *testing.T, seed int) { duplication := 7 arch := restic.NewArchiver(repo) - data, chunks := getRandomData(seed, dataSizeMb*1024*1024) - reader := bytes.NewReader(data) + chunks := getRandomData(seed, dataSizeMb*1024*1024) errChannels := [](<-chan error){} @@ -272,18 +262,15 @@ func testParallelSaveWithDuplication(t *testing.T, seed int) { errChan := make(chan error) errChannels = append(errChannels, errChan) - go func(reader *bytes.Reader, c *chunker.Chunk, errChan chan<- error) { + go func(c chunker.Chunk, errChan chan<- error) { barrier <- struct{}{} - hash := c.Digest - id := backend.ID{} - copy(id[:], hash) - - time.Sleep(time.Duration(hash[0])) - err := arch.Save(pack.Data, id, c.Length, c.Reader(reader)) + id := backend.Hash(c.Data) + time.Sleep(time.Duration(id[0])) + err := arch.Save(pack.Data, id, c.Length, bytes.NewReader(c.Data)) <-barrier errChan <- err - }(reader, c, errChan) + }(c, errChan) } } @@ -298,20 +285,20 @@ func testParallelSaveWithDuplication(t *testing.T, seed int) { assertNoUnreferencedPacks(t, chkr) } -func getRandomData(seed int, size int) ([]byte, []*chunker.Chunk) { +func getRandomData(seed int, size int) []chunker.Chunk { buf := Random(seed, size) - chunks := []*chunker.Chunk{} - chunker := chunker.New(bytes.NewReader(buf), testPol, sha256.New()) + var chunks []chunker.Chunk + chunker := chunker.New(bytes.NewReader(buf), testPol) for { - c, err := chunker.Next() + c, err := chunker.Next(nil) if err == io.EOF { break } chunks = append(chunks, c) } - return buf, chunks + return chunks } func createAndInitChecker(t *testing.T, repo *repository.Repository) *checker.Checker { diff --git a/src/restic/buffer_pool.go b/src/restic/buffer_pool.go new file mode 100644 index 000000000..25603bbfe --- /dev/null +++ b/src/restic/buffer_pool.go @@ -0,0 +1,21 @@ +package restic + +import ( + "sync" + + "github.com/restic/chunker" +) + +var bufPool = sync.Pool{ + New: func() interface{} { + return make([]byte, chunker.MinSize) + }, +} + +func getBuf() []byte { + return bufPool.Get().([]byte) +} + +func freeBuf(data []byte) { + bufPool.Put(data) +} diff --git a/src/restic/repository/packer_manager.go b/src/restic/repository/packer_manager.go index 51a8ae888..a7716418e 100644 --- a/src/restic/repository/packer_manager.go +++ b/src/restic/repository/packer_manager.go @@ -3,7 +3,6 @@ package repository import ( "sync" - "github.com/restic/chunker" "restic/backend" "restic/crypto" "restic/debug" @@ -18,8 +17,8 @@ type packerManager struct { packs []*pack.Packer } -const minPackSize = 4 * chunker.MiB -const maxPackSize = 16 * chunker.MiB +const minPackSize = 4 * 1024 * 1024 +const maxPackSize = 16 * 1024 * 1024 const maxPackers = 200 // findPacker returns a packer for a new blob of size bytes. Either a new one is