1
0
mirror of https://github.com/restic/restic.git synced 2024-07-06 09:20:53 +02:00

Merge pull request #472 from restic/update-chunker

Update chunker
This commit is contained in:
Alexander Neumann 2016-02-24 21:25:15 +01:00
commit 77d85cee52
11 changed files with 352 additions and 303 deletions

View File

@ -1,7 +1,7 @@
package restic package restic
import ( import (
"crypto/sha256" "bytes"
"encoding/json" "encoding/json"
"fmt" "fmt"
"io" "io"
@ -11,13 +11,14 @@ import (
"sync" "sync"
"time" "time"
"github.com/restic/chunker"
"restic/backend" "restic/backend"
"restic/debug" "restic/debug"
"restic/pack" "restic/pack"
"restic/pipe" "restic/pipe"
"restic/repository" "restic/repository"
"github.com/restic/chunker"
"github.com/juju/errors" "github.com/juju/errors"
) )
@ -154,12 +155,11 @@ type saveResult struct {
bytes uint64 bytes uint64
} }
func (arch *Archiver) saveChunk(chunk *chunker.Chunk, p *Progress, token struct{}, file *os.File, resultChannel chan<- saveResult) { func (arch *Archiver) saveChunk(chunk chunker.Chunk, p *Progress, token struct{}, file *os.File, resultChannel chan<- saveResult) {
hash := chunk.Digest defer freeBuf(chunk.Data)
id := backend.ID{}
copy(id[:], hash)
err := arch.Save(pack.Data, id, chunk.Length, chunk.Reader(file)) id := backend.Hash(chunk.Data)
err := arch.Save(pack.Data, id, chunk.Length, bytes.NewReader(chunk.Data))
// TODO handle error // TODO handle error
if err != nil { if err != nil {
panic(err) panic(err)
@ -220,11 +220,11 @@ func (arch *Archiver) SaveFile(p *Progress, node *Node) error {
return err return err
} }
chnker := chunker.New(file, arch.repo.Config.ChunkerPolynomial, sha256.New()) chnker := chunker.New(file, arch.repo.Config.ChunkerPolynomial)
resultChannels := [](<-chan saveResult){} resultChannels := [](<-chan saveResult){}
for { for {
chunk, err := chnker.Next() chunk, err := chnker.Next(getBuf())
if err == io.EOF { if err == io.EOF {
break break
} }

View File

@ -2,12 +2,10 @@ package restic_test
import ( import (
"bytes" "bytes"
"crypto/sha256"
"io" "io"
"testing" "testing"
"time" "time"
"github.com/restic/chunker"
"restic" "restic"
"restic/backend" "restic/backend"
"restic/checker" "restic/checker"
@ -15,6 +13,8 @@ import (
"restic/pack" "restic/pack"
"restic/repository" "restic/repository"
. "restic/test" . "restic/test"
"github.com/restic/chunker"
) )
var testPol = chunker.Pol(0x3DA3358B4DC173) var testPol = chunker.Pol(0x3DA3358B4DC173)
@ -24,17 +24,12 @@ type Rdr interface {
io.ReaderAt io.ReaderAt
} }
type chunkedData struct {
buf []byte
chunks []*chunker.Chunk
}
func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) { func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.Key) {
rd.Seek(0, 0) rd.Seek(0, 0)
ch := chunker.New(rd, testPol, sha256.New()) ch := chunker.New(rd, testPol)
for { for {
chunk, err := ch.Next() chunk, err := ch.Next(buf)
if err == io.EOF { if err == io.EOF {
break break
@ -43,12 +38,10 @@ func benchmarkChunkEncrypt(b testing.TB, buf, buf2 []byte, rd Rdr, key *crypto.K
OK(b, err) OK(b, err)
// reduce length of buf // reduce length of buf
buf = buf[:chunk.Length] Assert(b, uint(len(chunk.Data)) == chunk.Length,
n, err := io.ReadFull(chunk.Reader(rd), buf) "invalid length: got %d, expected %d", len(chunk.Data), chunk.Length)
OK(b, err)
Assert(b, uint(n) == chunk.Length, "invalid length: got %d, expected %d", n, chunk.Length)
_, err = crypto.Encrypt(key, buf2, buf) _, err = crypto.Encrypt(key, buf2, chunk.Data)
OK(b, err) OK(b, err)
} }
} }
@ -72,18 +65,16 @@ func BenchmarkChunkEncrypt(b *testing.B) {
} }
func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) { func benchmarkChunkEncryptP(b *testing.PB, buf []byte, rd Rdr, key *crypto.Key) {
ch := chunker.New(rd, testPol, sha256.New()) ch := chunker.New(rd, testPol)
for { for {
chunk, err := ch.Next() chunk, err := ch.Next(buf)
if err == io.EOF { if err == io.EOF {
break break
} }
// reduce length of chunkBuf // reduce length of chunkBuf
buf = buf[:chunk.Length] crypto.Encrypt(key, chunk.Data, chunk.Data)
io.ReadFull(chunk.Reader(rd), buf)
crypto.Encrypt(key, buf, buf)
} }
} }
@ -258,8 +249,7 @@ func testParallelSaveWithDuplication(t *testing.T, seed int) {
duplication := 7 duplication := 7
arch := restic.NewArchiver(repo) arch := restic.NewArchiver(repo)
data, chunks := getRandomData(seed, dataSizeMb*1024*1024) chunks := getRandomData(seed, dataSizeMb*1024*1024)
reader := bytes.NewReader(data)
errChannels := [](<-chan error){} errChannels := [](<-chan error){}
@ -272,18 +262,15 @@ func testParallelSaveWithDuplication(t *testing.T, seed int) {
errChan := make(chan error) errChan := make(chan error)
errChannels = append(errChannels, errChan) errChannels = append(errChannels, errChan)
go func(reader *bytes.Reader, c *chunker.Chunk, errChan chan<- error) { go func(c chunker.Chunk, errChan chan<- error) {
barrier <- struct{}{} barrier <- struct{}{}
hash := c.Digest id := backend.Hash(c.Data)
id := backend.ID{} time.Sleep(time.Duration(id[0]))
copy(id[:], hash) err := arch.Save(pack.Data, id, c.Length, bytes.NewReader(c.Data))
time.Sleep(time.Duration(hash[0]))
err := arch.Save(pack.Data, id, c.Length, c.Reader(reader))
<-barrier <-barrier
errChan <- err errChan <- err
}(reader, c, errChan) }(c, errChan)
} }
} }
@ -298,20 +285,20 @@ func testParallelSaveWithDuplication(t *testing.T, seed int) {
assertNoUnreferencedPacks(t, chkr) assertNoUnreferencedPacks(t, chkr)
} }
func getRandomData(seed int, size int) ([]byte, []*chunker.Chunk) { func getRandomData(seed int, size int) []chunker.Chunk {
buf := Random(seed, size) buf := Random(seed, size)
chunks := []*chunker.Chunk{} var chunks []chunker.Chunk
chunker := chunker.New(bytes.NewReader(buf), testPol, sha256.New()) chunker := chunker.New(bytes.NewReader(buf), testPol)
for { for {
c, err := chunker.Next() c, err := chunker.Next(nil)
if err == io.EOF { if err == io.EOF {
break break
} }
chunks = append(chunks, c) chunks = append(chunks, c)
} }
return buf, chunks return chunks
} }
func createAndInitChecker(t *testing.T, repo *repository.Repository) *checker.Checker { func createAndInitChecker(t *testing.T, repo *repository.Repository) *checker.Checker {

21
src/restic/buffer_pool.go Normal file
View File

@ -0,0 +1,21 @@
package restic
import (
"sync"
"github.com/restic/chunker"
)
var bufPool = sync.Pool{
New: func() interface{} {
return make([]byte, chunker.MinSize)
},
}
func getBuf() []byte {
return bufPool.Get().([]byte)
}
func freeBuf(data []byte) {
bufPool.Put(data)
}

View File

@ -3,7 +3,6 @@ package repository
import ( import (
"sync" "sync"
"github.com/restic/chunker"
"restic/backend" "restic/backend"
"restic/crypto" "restic/crypto"
"restic/debug" "restic/debug"
@ -18,8 +17,8 @@ type packerManager struct {
packs []*pack.Packer packs []*pack.Packer
} }
const minPackSize = 4 * chunker.MiB const minPackSize = 4 * 1024 * 1024
const maxPackSize = 16 * chunker.MiB const maxPackSize = 16 * 1024 * 1024
const maxPackers = 200 const maxPackers = 200
// findPacker returns a packer for a new blob of size bytes. Either a new one is // findPacker returns a packer for a new blob of size bytes. Either a new one is

4
vendor/manifest vendored
View File

@ -40,8 +40,8 @@
{ {
"importpath": "github.com/restic/chunker", "importpath": "github.com/restic/chunker",
"repository": "https://github.com/restic/chunker", "repository": "https://github.com/restic/chunker",
"revision": "fc45043175c38d59374024a38fb7123c40a64f20", "revision": "16c849a106e0a50d658e8f5e49a01f6728f4f92c",
"branch": "HEAD" "branch": "master"
}, },
{ {
"importpath": "golang.org/x/crypto/pbkdf2", "importpath": "golang.org/x/crypto/pbkdf2",

View File

@ -1,11 +1,12 @@
[![GoDoc](https://godoc.org/github.com/restic/chunker?status.svg)](http://godoc.org/github.com/restic/chunker)
[![Build Status](https://travis-ci.org/restic/chunker.svg?branch=master)](https://travis-ci.org/restic/chunker) [![Build Status](https://travis-ci.org/restic/chunker.svg?branch=master)](https://travis-ci.org/restic/chunker)
Content Defined Chunking (CDC) based on a rolling Rabin Checksum. The package `chunker` implements content-defined-chunking (CDC) based on a
rolling Rabin Hash. The library is part of the [restic backup
Part of https://github.com/restic/restic. program](https://github.com/restic/restic).
An introduction to Content Defined Chunking can be found in the restic blog An introduction to Content Defined Chunking can be found in the restic blog
post [Foundation - Introducing Content Defined Chunking (CDC)](https://restic.github.io/blog/2015-09-12/restic-foundation1-cdc/). post [Foundation - Introducing Content Defined Chunking (CDC)](https://restic.github.io/blog/2015-09-12/restic-foundation1-cdc).
You can find the API documentation at You can find the API documentation at
https://godoc.org/github.com/restic/chunker https://godoc.org/github.com/restic/chunker

View File

@ -2,14 +2,13 @@ package chunker
import ( import (
"errors" "errors"
"hash"
"io" "io"
"sync" "sync"
) )
const ( const (
KiB = 1024 kiB = 1024
MiB = 1024 * KiB miB = 1024 * kiB
// WindowSize is the size of the sliding window. // WindowSize is the size of the sliding window.
windowSize = 64 windowSize = 64
@ -17,20 +16,16 @@ const (
// aim to create chunks of 20 bits or about 1MiB on average. // aim to create chunks of 20 bits or about 1MiB on average.
averageBits = 20 averageBits = 20
// MinSize is the minimal size of a chunk. // MinSize is the default minimal size of a chunk.
MinSize = 512 * KiB MinSize = 512 * kiB
// MaxSize is the maximal size of a chunk. // MaxSize is the default maximal size of a chunk.
MaxSize = 8 * MiB MaxSize = 8 * miB
splitmask = (1 << averageBits) - 1 splitmask = (1 << averageBits) - 1
chunkerBufSize = 512 * KiB chunkerBufSize = 512 * kiB
) )
var bufPool = sync.Pool{
New: func() interface{} { return make([]byte, chunkerBufSize) },
}
type tables struct { type tables struct {
out [256]Pol out [256]Pol
mod [256]Pol mod [256]Pol
@ -52,15 +47,13 @@ type Chunk struct {
Start uint Start uint
Length uint Length uint
Cut uint64 Cut uint64
Digest []byte Data []byte
}
func (c Chunk) Reader(r io.ReaderAt) io.Reader {
return io.NewSectionReader(r, int64(c.Start), int64(c.Length))
} }
// Chunker splits content with Rabin Fingerprints. // Chunker splits content with Rabin Fingerprints.
type Chunker struct { type Chunker struct {
MinSize, MaxSize uint
pol Pol pol Pol
polShift uint polShift uint
tables *tables tables *tables
@ -82,17 +75,17 @@ type Chunker struct {
pre uint // wait for this many bytes before start calculating an new chunk pre uint // wait for this many bytes before start calculating an new chunk
digest uint64 digest uint64
h hash.Hash
} }
// New returns a new Chunker based on polynomial p that reads from rd // New returns a new Chunker based on polynomial p that reads from rd
// with bufsize and pass all data to hash along the way. // with bufsize and pass all data to hash along the way.
func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker { func New(rd io.Reader, pol Pol) *Chunker {
c := &Chunker{ c := &Chunker{
buf: bufPool.Get().([]byte), buf: make([]byte, chunkerBufSize),
h: h, pol: pol,
pol: pol, rd: rd,
rd: rd, MinSize: MinSize,
MaxSize: MaxSize,
} }
c.reset() c.reset()
@ -100,6 +93,19 @@ func New(rd io.Reader, pol Pol, h hash.Hash) *Chunker {
return c return c
} }
// Reset reinitializes the chunker with a new reader and polynomial.
func (c *Chunker) Reset(rd io.Reader, pol Pol) {
*c = Chunker{
buf: c.buf,
pol: pol,
rd: rd,
MinSize: c.MinSize,
MaxSize: c.MaxSize,
}
c.reset()
}
func (c *Chunker) reset() { func (c *Chunker) reset() {
c.polShift = uint(c.pol.Deg() - 8) c.polShift = uint(c.pol.Deg() - 8)
c.fillTables() c.fillTables()
@ -115,12 +121,8 @@ func (c *Chunker) reset() {
c.slide(1) c.slide(1)
c.start = c.pos c.start = c.pos
if c.h != nil {
c.h.Reset()
}
// do not start a new chunk unless at least MinSize bytes have been read // do not start a new chunk unless at least MinSize bytes have been read
c.pre = MinSize - windowSize c.pre = c.MinSize - windowSize
} }
// Calculate out_table and mod_table for optimization. Must be called only // Calculate out_table and mod_table for optimization. Must be called only
@ -179,12 +181,13 @@ func (c *Chunker) fillTables() {
} }
// Next returns the position and length of the next chunk of data. If an error // Next returns the position and length of the next chunk of data. If an error
// occurs while reading, the error is returned with a nil chunk. The state of // occurs while reading, the error is returned. Afterwards, the state of the
// the current chunk is undefined. When the last chunk has been returned, all // current chunk is undefined. When the last chunk has been returned, all
// subsequent calls yield a nil chunk and an io.EOF error. // subsequent calls yield an io.EOF error.
func (c *Chunker) Next() (*Chunk, error) { func (c *Chunker) Next(data []byte) (Chunk, error) {
data = data[:0]
if c.tables == nil { if c.tables == nil {
return nil, errors.New("polynomial is not set") return Chunk{}, errors.New("polynomial is not set")
} }
for { for {
@ -203,22 +206,19 @@ func (c *Chunker) Next() (*Chunk, error) {
if err == io.EOF && !c.closed { if err == io.EOF && !c.closed {
c.closed = true c.closed = true
// return the buffer to the pool
bufPool.Put(c.buf)
// return current chunk, if any bytes have been processed // return current chunk, if any bytes have been processed
if c.count > 0 { if c.count > 0 {
return &Chunk{ return Chunk{
Start: c.start, Start: c.start,
Length: c.count, Length: c.count,
Cut: c.digest, Cut: c.digest,
Digest: c.hashDigest(), Data: data,
}, nil }, nil
} }
} }
if err != nil { if err != nil {
return nil, err return Chunk{}, err
} }
c.bpos = 0 c.bpos = 0
@ -230,7 +230,7 @@ func (c *Chunker) Next() (*Chunk, error) {
n := c.bmax - c.bpos n := c.bmax - c.bpos
if c.pre > uint(n) { if c.pre > uint(n) {
c.pre -= uint(n) c.pre -= uint(n)
c.updateHash(c.buf[c.bpos:c.bmax]) data = append(data, c.buf[c.bpos:c.bmax]...)
c.count += uint(n) c.count += uint(n)
c.pos += uint(n) c.pos += uint(n)
@ -239,7 +239,7 @@ func (c *Chunker) Next() (*Chunk, error) {
continue continue
} }
c.updateHash(c.buf[c.bpos : c.bpos+c.pre]) data = append(data, c.buf[c.bpos:c.bpos+c.pre]...)
c.bpos += c.pre c.bpos += c.pre
c.count += c.pre c.count += c.pre
@ -264,22 +264,22 @@ func (c *Chunker) Next() (*Chunk, error) {
// end inline // end inline
add++ add++
if add < MinSize { if add < c.MinSize {
continue continue
} }
if (c.digest&splitmask) == 0 || add >= MaxSize { if (c.digest&splitmask) == 0 || add >= MaxSize {
i := add - c.count - 1 i := add - c.count - 1
c.updateHash(c.buf[c.bpos : c.bpos+uint(i)+1]) data = append(data, c.buf[c.bpos:c.bpos+uint(i)+1]...)
c.count = add c.count = add
c.pos += uint(i) + 1 c.pos += uint(i) + 1
c.bpos += uint(i) + 1 c.bpos += uint(i) + 1
chunk := &Chunk{ chunk := Chunk{
Start: c.start, Start: c.start,
Length: c.count, Length: c.count,
Cut: c.digest, Cut: c.digest,
Digest: c.hashDigest(), Data: data,
} }
c.reset() c.reset()
@ -290,7 +290,7 @@ func (c *Chunker) Next() (*Chunk, error) {
steps := c.bmax - c.bpos steps := c.bmax - c.bpos
if steps > 0 { if steps > 0 {
c.updateHash(c.buf[c.bpos : c.bpos+steps]) data = append(data, c.buf[c.bpos:c.bpos+steps]...)
} }
c.count += steps c.count += steps
c.pos += steps c.pos += steps
@ -298,24 +298,6 @@ func (c *Chunker) Next() (*Chunk, error) {
} }
} }
func (c *Chunker) updateHash(data []byte) {
if c.h != nil {
// the hashes from crypto/sha* do not return an error
_, err := c.h.Write(data)
if err != nil {
panic(err)
}
}
}
func (c *Chunker) hashDigest() []byte {
if c.h == nil {
return nil
}
return c.h.Sum(nil)
}
func (c *Chunker) append(b byte) { func (c *Chunker) append(b byte) {
index := c.digest >> c.polShift index := c.digest >> c.polShift
c.digest <<= 8 c.digest <<= 8

View File

@ -1,19 +1,13 @@
package chunker_test package chunker
import ( import (
"bytes" "bytes"
"crypto/md5"
"crypto/sha256" "crypto/sha256"
"encoding/hex" "encoding/hex"
"hash"
"io" "io"
"io/ioutil"
"math/rand" "math/rand"
"testing" "testing"
"time" "time"
"github.com/restic/chunker"
. "github.com/restic/restic/test"
) )
func parseDigest(s string) []byte { func parseDigest(s string) []byte {
@ -32,7 +26,7 @@ type chunk struct {
} }
// polynomial used for all the tests below // polynomial used for all the tests below
const testPol = chunker.Pol(0x3DA3358B4DC173) const testPol = Pol(0x3DA3358B4DC173)
// created for 32MB of random data out of math/rand's Uint32() seeded by // created for 32MB of random data out of math/rand's Uint32() seeded by
// constant 23 // constant 23
@ -68,59 +62,51 @@ var chunks1 = []chunk{
// test if nullbytes are correctly split, even if length is a multiple of MinSize. // test if nullbytes are correctly split, even if length is a multiple of MinSize.
var chunks2 = []chunk{ var chunks2 = []chunk{
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
chunk{chunker.MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")}, chunk{MinSize, 0, parseDigest("07854d2fef297a06ba81685e660c332de36d5d18d546927d30daad6d7fda1541")},
} }
func testWithData(t *testing.T, chnker *chunker.Chunker, testChunks []chunk) []*chunker.Chunk { func testWithData(t *testing.T, chnker *Chunker, testChunks []chunk, checkDigest bool) []Chunk {
chunks := []*chunker.Chunk{} chunks := []Chunk{}
pos := uint(0) pos := uint(0)
for i, chunk := range testChunks { for i, chunk := range testChunks {
c, err := chnker.Next() c, err := chnker.Next(nil)
if err != nil { if err != nil {
t.Fatalf("Error returned with chunk %d: %v", i, err) t.Fatalf("Error returned with chunk %d: %v", i, err)
} }
if c == nil { if c.Start != pos {
t.Fatalf("Nil chunk returned") t.Fatalf("Start for chunk %d does not match: expected %d, got %d",
i, pos, c.Start)
} }
if c != nil { if c.Length != chunk.Length {
if c.Start != pos { t.Fatalf("Length for chunk %d does not match: expected %d, got %d",
t.Fatalf("Start for chunk %d does not match: expected %d, got %d", i, chunk.Length, c.Length)
i, pos, c.Start) }
}
if c.Length != chunk.Length { if c.Cut != chunk.CutFP {
t.Fatalf("Length for chunk %d does not match: expected %d, got %d", t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
i, chunk.Length, c.Length) i, len(chunks)-1, chunk.CutFP, c.Cut)
} }
if c.Cut != chunk.CutFP { if checkDigest {
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x", digest := hashData(c.Data)
i, len(chunks)-1, chunk.CutFP, c.Cut) if !bytes.Equal(chunk.Digest, digest) {
}
if c.Digest != nil && !bytes.Equal(c.Digest, chunk.Digest) {
t.Fatalf("Digest fingerprint for chunk %d/%d does not match: expected %02x, got %02x", t.Fatalf("Digest fingerprint for chunk %d/%d does not match: expected %02x, got %02x",
i, len(chunks)-1, chunk.Digest, c.Digest) i, len(chunks)-1, chunk.Digest, digest)
} }
pos += c.Length
chunks = append(chunks, c)
} }
pos += c.Length
chunks = append(chunks, c)
} }
c, err := chnker.Next() _, err := chnker.Next(nil)
if c != nil {
t.Fatal("additional non-nil chunk returned")
}
if err != io.EOF { if err != io.EOF {
t.Fatal("wrong error returned after last chunk") t.Fatal("wrong error returned after last chunk")
} }
@ -143,39 +129,32 @@ func getRandom(seed, count int) []byte {
return buf return buf
} }
func hashData(d []byte) []byte {
h := sha256.New()
h.Write(d)
return h.Sum(nil)
}
func TestChunker(t *testing.T) { func TestChunker(t *testing.T) {
// setup data source // setup data source
buf := getRandom(23, 32*1024*1024) buf := getRandom(23, 32*1024*1024)
ch := chunker.New(bytes.NewReader(buf), testPol, sha256.New()) ch := New(bytes.NewReader(buf), testPol)
chunks := testWithData(t, ch, chunks1) testWithData(t, ch, chunks1, true)
// test reader
for i, c := range chunks {
rd := c.Reader(bytes.NewReader(buf))
h := sha256.New()
n, err := io.Copy(h, rd)
if err != nil {
t.Fatalf("io.Copy(): %v", err)
}
if uint(n) != chunks1[i].Length {
t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
chunks1[i].Length, n)
}
d := h.Sum(nil)
if !bytes.Equal(d, chunks1[i].Digest) {
t.Fatalf("wrong hash returned: expected %02x, got %02x",
chunks1[i].Digest, d)
}
}
// setup nullbyte data source // setup nullbyte data source
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) buf = bytes.Repeat([]byte{0}, len(chunks2)*MinSize)
ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) ch = New(bytes.NewReader(buf), testPol)
testWithData(t, ch, chunks2) testWithData(t, ch, chunks2, true)
}
func TestChunkerReset(t *testing.T) {
buf := getRandom(23, 32*1024*1024)
ch := New(bytes.NewReader(buf), testPol)
testWithData(t, ch, chunks1, true)
ch.Reset(bytes.NewReader(buf), testPol)
testWithData(t, ch, chunks1, true)
} }
func TestChunkerWithRandomPolynomial(t *testing.T) { func TestChunkerWithRandomPolynomial(t *testing.T) {
@ -184,67 +163,64 @@ func TestChunkerWithRandomPolynomial(t *testing.T) {
// generate a new random polynomial // generate a new random polynomial
start := time.Now() start := time.Now()
p, err := chunker.RandomPolynomial() p, err := RandomPolynomial()
OK(t, err) if err != nil {
t.Fatal(err)
}
t.Logf("generating random polynomial took %v", time.Since(start)) t.Logf("generating random polynomial took %v", time.Since(start))
start = time.Now() start = time.Now()
ch := chunker.New(bytes.NewReader(buf), p, sha256.New()) ch := New(bytes.NewReader(buf), p)
t.Logf("creating chunker took %v", time.Since(start)) t.Logf("creating chunker took %v", time.Since(start))
// make sure that first chunk is different // make sure that first chunk is different
c, err := ch.Next() c, err := ch.Next(nil)
Assert(t, c.Cut != chunks1[0].CutFP, if c.Cut == chunks1[0].CutFP {
"Cut point is the same") t.Fatal("Cut point is the same")
Assert(t, c.Length != chunks1[0].Length, }
"Length is the same")
Assert(t, !bytes.Equal(c.Digest, chunks1[0].Digest), if c.Length == chunks1[0].Length {
"Digest is the same") t.Fatal("Length is the same")
}
if bytes.Equal(hashData(c.Data), chunks1[0].Digest) {
t.Fatal("Digest is the same")
}
} }
func TestChunkerWithoutHash(t *testing.T) { func TestChunkerWithoutHash(t *testing.T) {
// setup data source // setup data source
buf := getRandom(23, 32*1024*1024) buf := getRandom(23, 32*1024*1024)
ch := chunker.New(bytes.NewReader(buf), testPol, nil) ch := New(bytes.NewReader(buf), testPol)
chunks := testWithData(t, ch, chunks1) chunks := testWithData(t, ch, chunks1, false)
// test reader // test reader
for i, c := range chunks { for i, c := range chunks {
rd := c.Reader(bytes.NewReader(buf)) if uint(len(c.Data)) != chunks1[i].Length {
buf2, err := ioutil.ReadAll(rd)
if err != nil {
t.Fatalf("io.Copy(): %v", err)
}
if uint(len(buf2)) != chunks1[i].Length {
t.Fatalf("reader returned wrong number of bytes: expected %d, got %d", t.Fatalf("reader returned wrong number of bytes: expected %d, got %d",
chunks1[i].Length, uint(len(buf2))) chunks1[i].Length, len(c.Data))
} }
if uint(len(buf2)) != chunks1[i].Length { if !bytes.Equal(buf[c.Start:c.Start+c.Length], c.Data) {
t.Fatalf("wrong number of bytes returned: expected %02x, got %02x",
chunks[i].Length, len(buf2))
}
if !bytes.Equal(buf[c.Start:c.Start+c.Length], buf2) {
t.Fatalf("invalid data for chunk returned: expected %02x, got %02x", t.Fatalf("invalid data for chunk returned: expected %02x, got %02x",
buf[c.Start:c.Start+c.Length], buf2) buf[c.Start:c.Start+c.Length], c.Data)
} }
} }
// setup nullbyte data source // setup nullbyte data source
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) buf = bytes.Repeat([]byte{0}, len(chunks2)*MinSize)
ch = chunker.New(bytes.NewReader(buf), testPol, sha256.New()) ch = New(bytes.NewReader(buf), testPol)
testWithData(t, ch, chunks2) testWithData(t, ch, chunks2, false)
} }
func benchmarkChunker(b *testing.B, hash hash.Hash) { func benchmarkChunker(b *testing.B, checkDigest bool) {
size := 10 * 1024 * 1024 size := 32 * 1024 * 1024
rd := bytes.NewReader(getRandom(23, size)) rd := bytes.NewReader(getRandom(23, size))
ch := New(rd, testPol)
buf := make([]byte, MaxSize)
b.ResetTimer() b.ResetTimer()
b.SetBytes(int64(size)) b.SetBytes(int64(size))
@ -253,11 +229,16 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
chunks = 0 chunks = 0
rd.Seek(0, 0) _, err := rd.Seek(0, 0)
ch := chunker.New(rd, testPol, hash) if err != nil {
b.Fatalf("Seek() return error %v", err)
}
ch.Reset(rd, testPol)
cur := 0
for { for {
_, err := ch.Next() chunk, err := ch.Next(buf)
if err == io.EOF { if err == io.EOF {
break break
@ -267,7 +248,26 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) {
b.Fatalf("Unexpected error occurred: %v", err) b.Fatalf("Unexpected error occurred: %v", err)
} }
if chunk.Length != chunks1[cur].Length {
b.Errorf("wrong chunk length, want %d, got %d",
chunks1[cur].Length, chunk.Length)
}
if chunk.Cut != chunks1[cur].CutFP {
b.Errorf("wrong cut fingerprint, want 0x%x, got 0x%x",
chunks1[cur].CutFP, chunk.Cut)
}
if checkDigest {
h := hashData(chunk.Data)
if !bytes.Equal(h, chunks1[cur].Digest) {
b.Errorf("wrong digest, want %x, got %x",
chunks1[cur].Digest, h)
}
}
chunks++ chunks++
cur++
} }
} }
@ -275,24 +275,22 @@ func benchmarkChunker(b *testing.B, hash hash.Hash) {
} }
func BenchmarkChunkerWithSHA256(b *testing.B) { func BenchmarkChunkerWithSHA256(b *testing.B) {
benchmarkChunker(b, sha256.New()) benchmarkChunker(b, true)
}
func BenchmarkChunkerWithMD5(b *testing.B) {
benchmarkChunker(b, md5.New())
} }
func BenchmarkChunker(b *testing.B) { func BenchmarkChunker(b *testing.B) {
benchmarkChunker(b, nil) benchmarkChunker(b, false)
} }
func BenchmarkNewChunker(b *testing.B) { func BenchmarkNewChunker(b *testing.B) {
p, err := chunker.RandomPolynomial() p, err := RandomPolynomial()
OK(b, err) if err != nil {
b.Fatal(err)
}
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
chunker.New(bytes.NewBuffer(nil), p, nil) New(bytes.NewBuffer(nil), p)
} }
} }

View File

@ -0,0 +1,39 @@
package chunker
import (
"bytes"
"crypto/sha256"
"fmt"
"io"
)
func ExampleChunker() {
// generate 32MiB of deterministic pseudo-random data
data := getRandom(23, 32*1024*1024)
// create a chunker
chunker := New(bytes.NewReader(data), Pol(0x3DA3358B4DC173))
// reuse this buffer
buf := make([]byte, 8*1024*1024)
for i := 0; i < 5; i++ {
chunk, err := chunker.Next(buf)
if err == io.EOF {
break
}
if err != nil {
panic(err)
}
fmt.Printf("%d %02x\n", chunk.Length, sha256.Sum256(chunk.Data))
}
// Output:
// 2163460 4b94cb2cf293855ea43bf766731c74969b91aa6bf3c078719aabdd19860d590d
// 643703 5727a63c0964f365ab8ed2ccf604912f2ea7be29759a2b53ede4d6841e397407
// 1528956 a73759636a1e7a2758767791c69e81b69fb49236c6929e5d1b654e06e37674ba
// 1955808 c955fb059409b25f07e5ae09defbbc2aadf117c97a3724e06ad4abd2787e6824
// 2222372 6ba5e9f7e1b310722be3627716cf469be941f7f3e39a4c3bcefea492ec31ee56
}

View File

@ -258,13 +258,15 @@ func qp(p uint, g Pol) Pol {
return res.Add(2).Mod(g) return res.Add(2).Mod(g)
} }
func (p Pol) MarshalJSON() ([]byte, error) { // MarshalJSON returns the JSON representation of the Pol.
buf := strconv.AppendUint([]byte{'"'}, uint64(p), 16) func (x Pol) MarshalJSON() ([]byte, error) {
buf := strconv.AppendUint([]byte{'"'}, uint64(x), 16)
buf = append(buf, '"') buf = append(buf, '"')
return buf, nil return buf, nil
} }
func (p *Pol) UnmarshalJSON(data []byte) error { // UnmarshalJSON parses a Pol from the JSON data.
func (x *Pol) UnmarshalJSON(data []byte) error {
if len(data) < 2 { if len(data) < 2 {
return errors.New("invalid string for polynomial") return errors.New("invalid string for polynomial")
} }
@ -272,7 +274,7 @@ func (p *Pol) UnmarshalJSON(data []byte) error {
if err != nil { if err != nil {
return err return err
} }
*p = Pol(n) *x = Pol(n)
return nil return nil
} }

View File

@ -1,16 +1,13 @@
package chunker_test package chunker
import ( import (
"strconv" "strconv"
"testing" "testing"
"github.com/restic/chunker"
. "github.com/restic/restic/test"
) )
var polAddTests = []struct { var polAddTests = []struct {
x, y chunker.Pol x, y Pol
sum chunker.Pol sum Pol
}{ }{
{23, 16, 23 ^ 16}, {23, 16, 23 ^ 16},
{0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4}, {0x9a7e30d1e855e0a0, 0x670102a1f4bcd414, 0xfd7f32701ce934b4},
@ -18,24 +15,29 @@ var polAddTests = []struct {
} }
func TestPolAdd(t *testing.T) { func TestPolAdd(t *testing.T) {
for _, test := range polAddTests { for i, test := range polAddTests {
Equals(t, test.sum, test.x.Add(test.y)) if test.sum != test.x.Add(test.y) {
Equals(t, test.sum, test.y.Add(test.x)) t.Errorf("test %d failed: sum != x+y", i)
}
if test.sum != test.y.Add(test.x) {
t.Errorf("test %d failed: sum != y+x", i)
}
} }
} }
func parseBin(s string) chunker.Pol { func parseBin(s string) Pol {
i, err := strconv.ParseUint(s, 2, 64) i, err := strconv.ParseUint(s, 2, 64)
if err != nil { if err != nil {
panic(err) panic(err)
} }
return chunker.Pol(i) return Pol(i)
} }
var polMulTests = []struct { var polMulTests = []struct {
x, y chunker.Pol x, y Pol
res chunker.Pol res Pol
}{ }{
{1, 2, 2}, {1, 2, 2},
{ {
@ -78,13 +80,15 @@ var polMulTests = []struct {
func TestPolMul(t *testing.T) { func TestPolMul(t *testing.T) {
for i, test := range polMulTests { for i, test := range polMulTests {
m := test.x.Mul(test.y) m := test.x.Mul(test.y)
Assert(t, test.res == m, if test.res != m {
"TestPolMul failed for test %d: %v * %v: want %v, got %v", t.Errorf("TestPolMul failed for test %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m) i, test.x, test.y, test.res, m)
}
m = test.y.Mul(test.x) m = test.y.Mul(test.x)
Assert(t, test.res == test.y.Mul(test.x), if test.res != test.y.Mul(test.x) {
"TestPolMul failed for %d: %v * %v: want %v, got %v", t.Errorf("TestPolMul failed for %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m) i, test.x, test.y, test.res, m)
}
} }
} }
@ -95,21 +99,21 @@ func TestPolMulOverflow(t *testing.T) {
if e, ok := err.(string); ok && e == "multiplication would overflow uint64" { if e, ok := err.(string); ok && e == "multiplication would overflow uint64" {
return return
} else {
t.Logf("invalid error raised: %v", err)
// re-raise error if not overflow
panic(err)
} }
t.Logf("invalid error raised: %v", err)
// re-raise error if not overflow
panic(err)
}() }()
x := chunker.Pol(1 << 63) x := Pol(1 << 63)
x.Mul(2) x.Mul(2)
t.Fatal("overflow test did not panic") t.Fatal("overflow test did not panic")
} }
var polDivTests = []struct { var polDivTests = []struct {
x, y chunker.Pol x, y Pol
res chunker.Pol res Pol
}{ }{
{10, 50, 0}, {10, 50, 0},
{0, 1, 0}, {0, 1, 0},
@ -139,15 +143,16 @@ var polDivTests = []struct {
func TestPolDiv(t *testing.T) { func TestPolDiv(t *testing.T) {
for i, test := range polDivTests { for i, test := range polDivTests {
m := test.x.Div(test.y) m := test.x.Div(test.y)
Assert(t, test.res == m, if test.res != m {
"TestPolDiv failed for test %d: %v * %v: want %v, got %v", t.Errorf("TestPolDiv failed for test %d: %v * %v: want %v, got %v",
i, test.x, test.y, test.res, m) i, test.x, test.y, test.res, m)
}
} }
} }
var polModTests = []struct { var polModTests = []struct {
x, y chunker.Pol x, y Pol
res chunker.Pol res Pol
}{ }{
{10, 50, 10}, {10, 50, 10},
{0, 1, 0}, {0, 1, 0},
@ -175,14 +180,17 @@ var polModTests = []struct {
} }
func TestPolModt(t *testing.T) { func TestPolModt(t *testing.T) {
for _, test := range polModTests { for i, test := range polModTests {
Equals(t, test.res, test.x.Mod(test.y)) res := test.x.Mod(test.y)
if test.res != res {
t.Errorf("test %d failed: want %v, got %v", i, test.res, res)
}
} }
} }
func BenchmarkPolDivMod(t *testing.B) { func BenchmarkPolDivMod(t *testing.B) {
f := chunker.Pol(0x2482734cacca49) f := Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899) g := Pol(0x3af4b284899)
for i := 0; i < t.N; i++ { for i := 0; i < t.N; i++ {
g.DivMod(f) g.DivMod(f)
@ -190,8 +198,8 @@ func BenchmarkPolDivMod(t *testing.B) {
} }
func BenchmarkPolDiv(t *testing.B) { func BenchmarkPolDiv(t *testing.B) {
f := chunker.Pol(0x2482734cacca49) f := Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899) g := Pol(0x3af4b284899)
for i := 0; i < t.N; i++ { for i := 0; i < t.N; i++ {
g.Div(f) g.Div(f)
@ -199,8 +207,8 @@ func BenchmarkPolDiv(t *testing.B) {
} }
func BenchmarkPolMod(t *testing.B) { func BenchmarkPolMod(t *testing.B) {
f := chunker.Pol(0x2482734cacca49) f := Pol(0x2482734cacca49)
g := chunker.Pol(0x3af4b284899) g := Pol(0x3af4b284899)
for i := 0; i < t.N; i++ { for i := 0; i < t.N; i++ {
g.Mod(f) g.Mod(f)
@ -208,7 +216,7 @@ func BenchmarkPolMod(t *testing.B) {
} }
func BenchmarkPolDeg(t *testing.B) { func BenchmarkPolDeg(t *testing.B) {
f := chunker.Pol(0x3af4b284899) f := Pol(0x3af4b284899)
d := f.Deg() d := f.Deg()
if d != 41 { if d != 41 {
t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d", t.Fatalf("BenchmalPolDeg: Wrong degree %d returned, expected %d",
@ -221,25 +229,31 @@ func BenchmarkPolDeg(t *testing.B) {
} }
func TestRandomPolynomial(t *testing.T) { func TestRandomPolynomial(t *testing.T) {
_, err := chunker.RandomPolynomial() _, err := RandomPolynomial()
OK(t, err) if err != nil {
t.Fatal(err)
}
} }
func BenchmarkRandomPolynomial(t *testing.B) { func BenchmarkRandomPolynomial(t *testing.B) {
for i := 0; i < t.N; i++ { for i := 0; i < t.N; i++ {
_, err := chunker.RandomPolynomial() _, err := RandomPolynomial()
OK(t, err) if err != nil {
t.Fatal(err)
}
} }
} }
func TestExpandPolynomial(t *testing.T) { func TestExpandPolynomial(t *testing.T) {
pol := chunker.Pol(0x3DA3358B4DC173) pol := Pol(0x3DA3358B4DC173)
s := pol.Expand() s := pol.Expand()
Equals(t, "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1", s) if s != "x^53+x^52+x^51+x^50+x^48+x^47+x^45+x^41+x^40+x^37+x^36+x^34+x^32+x^31+x^27+x^25+x^24+x^22+x^19+x^18+x^16+x^15+x^14+x^8+x^6+x^5+x^4+x+1" {
t.Fatal("wrong result")
}
} }
var polIrredTests = []struct { var polIrredTests = []struct {
f chunker.Pol f Pol
irred bool irred bool
}{ }{
{0x38f1e565e288df, false}, {0x38f1e565e288df, false},
@ -270,15 +284,16 @@ var polIrredTests = []struct {
func TestPolIrreducible(t *testing.T) { func TestPolIrreducible(t *testing.T) {
for _, test := range polIrredTests { for _, test := range polIrredTests {
Assert(t, test.f.Irreducible() == test.irred, if test.f.Irreducible() != test.irred {
"Irreducibility test for Polynomial %v failed: got %v, wanted %v", t.Errorf("Irreducibility test for Polynomial %v failed: got %v, wanted %v",
test.f, test.f.Irreducible(), test.irred) test.f, test.f.Irreducible(), test.irred)
}
} }
} }
func BenchmarkPolIrreducible(b *testing.B) { func BenchmarkPolIrreducible(b *testing.B) {
// find first irreducible polynomial // find first irreducible polynomial
var pol chunker.Pol var pol Pol
for _, test := range polIrredTests { for _, test := range polIrredTests {
if test.irred { if test.irred {
pol = test.f pol = test.f
@ -287,15 +302,16 @@ func BenchmarkPolIrreducible(b *testing.B) {
} }
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
Assert(b, pol.Irreducible(), if !pol.Irreducible() {
"Irreducibility test for Polynomial %v failed", pol) b.Errorf("Irreducibility test for Polynomial %v failed", pol)
}
} }
} }
var polGCDTests = []struct { var polGCDTests = []struct {
f1 chunker.Pol f1 Pol
f2 chunker.Pol f2 Pol
gcd chunker.Pol gcd Pol
}{ }{
{10, 50, 2}, {10, 50, 2},
{0, 1, 1}, {0, 1, 1},
@ -345,21 +361,24 @@ var polGCDTests = []struct {
func TestPolGCD(t *testing.T) { func TestPolGCD(t *testing.T) {
for i, test := range polGCDTests { for i, test := range polGCDTests {
gcd := test.f1.GCD(test.f2) gcd := test.f1.GCD(test.f2)
Assert(t, test.gcd == gcd, if test.gcd != gcd {
"GCD test %d (%+v) failed: got %v, wanted %v", t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v",
i, test, gcd, test.gcd) i, test, gcd, test.gcd)
}
gcd = test.f2.GCD(test.f1) gcd = test.f2.GCD(test.f1)
Assert(t, test.gcd == gcd, if test.gcd != gcd {
"GCD test %d (%+v) failed: got %v, wanted %v", t.Errorf("GCD test %d (%+v) failed: got %v, wanted %v",
i, test, gcd, test.gcd) i, test, gcd, test.gcd)
}
} }
} }
var polMulModTests = []struct { var polMulModTests = []struct {
f1 chunker.Pol f1 Pol
f2 chunker.Pol f2 Pol
g chunker.Pol g Pol
mod chunker.Pol mod Pol
}{ }{
{ {
0x1230, 0x1230,
@ -378,8 +397,9 @@ var polMulModTests = []struct {
func TestPolMulMod(t *testing.T) { func TestPolMulMod(t *testing.T) {
for i, test := range polMulModTests { for i, test := range polMulModTests {
mod := test.f1.MulMod(test.f2, test.g) mod := test.f1.MulMod(test.f2, test.g)
Assert(t, mod == test.mod, if mod != test.mod {
"MulMod test %d (%+v) failed: got %v, wanted %v", t.Errorf("MulMod test %d (%+v) failed: got %v, wanted %v",
i, test, mod, test.mod) i, test, mod, test.mod)
}
} }
} }