From a5c33d80d83d207a2476f0d639858d3e92df0b49 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 8 Feb 2015 19:32:12 +0100 Subject: [PATCH] Chunker: remove pool, buf and make bufsize an option --- chunker/chunker.go | 53 ++++++++++++-------------------------- chunker/chunker_test.go | 57 +++++++++++++++++++++++++++-------------- 2 files changed, 54 insertions(+), 56 deletions(-) diff --git a/chunker/chunker.go b/chunker/chunker.go index a6321cde9..da038bf03 100644 --- a/chunker/chunker.go +++ b/chunker/chunker.go @@ -9,10 +9,11 @@ const ( KiB = 1024 MiB = 1024 * KiB - // randomly generated irreducible polynomial of degree 53 in Z_2[X] + // Polynomial is a randomly generated irreducible polynomial of degree 53 + // in Z_2[X]. All rabin fingerprints are calculated with this polynomial. Polynomial = 0x3DA3358B4DC173 - // use a sliding window of 64 byte. + // WindowSize is the size of the sliding window. WindowSize = 64 // aim to create chunks of 20 bits or about 1MiB on average. @@ -30,15 +31,6 @@ var ( once sync.Once mod_table [256]uint64 out_table [256]uint64 - - chunkerPool = sync.Pool{ - New: func() interface{} { - return &Chunker{ - window: make([]byte, WindowSize), - buf: make([]byte, MaxSize), - } - }, - } ) // A chunk is one content-dependent chunk of bytes whose end was cut when the @@ -72,22 +64,19 @@ type Chunker struct { } // New returns a new Chunker that reads from data from rd. -func New(rd io.Reader) *Chunker { - c := chunkerPool.Get().(*Chunker) - c.rd = rd +func New(rd io.Reader, bufsize int) *Chunker { + once.Do(fill_tables) - once.Do(c.fill_tables) + c := &Chunker{ + window: make([]byte, WindowSize), + buf: make([]byte, bufsize), + rd: rd, + } c.reset() return c } -// Free returns this chunker to the allocation pool -func (c *Chunker) Free() { - c.rd = nil - chunkerPool.Put(c) -} - func (c *Chunker) reset() { for i := 0; i < WindowSize; i++ { c.window[i] = 0 @@ -103,7 +92,7 @@ func (c *Chunker) reset() { } // Calculate out_table and mod_table for optimization. Must be called only once. -func (c *Chunker) fill_tables() { +func fill_tables() { // calculate table for sliding out bytes. The byte to slide out is used as // the index for the table, the value contains the following: // out_table[b] = Hash(b || 0 || ... || 0) @@ -139,13 +128,11 @@ func (c *Chunker) fill_tables() { } } -// Next returns the next chunk of data. If an error occurs while reading, -// the error is returned with a nil chunk. The state of the current chunk -// is undefined. When the last chunk has been returned, all subsequent -// calls yield a nil chunk and an io.EOF error. -func (c *Chunker) Next(dst []byte) (*Chunk, error) { - dst = dst[:0] - +// Next returns the position and length of the next chunk of data. If an error +// occurs while reading, the error is returned with a nil chunk. The state of +// the current chunk is undefined. When the last chunk has been returned, all +// subsequent calls yield a nil chunk and an io.EOF error. +func (c *Chunker) Next() (*Chunk, error) { for { if c.bpos >= c.bmax { n, err := io.ReadFull(c.rd, c.buf) @@ -168,7 +155,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) { Start: c.start, Length: c.count, Cut: c.digest, - Data: dst, }, nil } } @@ -186,7 +172,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) { n := c.bmax - c.bpos if c.pre > n { c.pre -= n - dst = append(dst, c.buf[c.bpos:c.bmax]...) c.count += n c.pos += n @@ -194,7 +179,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) { continue } - dst = append(dst, c.buf[c.bpos:c.bpos+c.pre]...) c.bpos += c.pre c.count += c.pre c.pos += c.pre @@ -216,7 +200,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) { c.digest ^= mod_table[index] if (c.count+i+1 >= MinSize && (c.digest&splitmask) == 0) || c.count+i+1 >= MaxSize { - dst = append(dst, c.buf[c.bpos:c.bpos+i+1]...) c.count += i + 1 c.pos += i + 1 c.bpos += i + 1 @@ -225,7 +208,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) { Start: c.start, Length: c.count, Cut: c.digest, - Data: dst, } // keep position @@ -240,9 +222,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) { } steps := c.bmax - c.bpos - if steps > 0 { - dst = append(dst, c.buf[c.bpos:c.bpos+steps]...) - } c.count += steps c.pos += steps c.bpos = c.bmax diff --git a/chunker/chunker_test.go b/chunker/chunker_test.go index dbdd68569..2e51eb80a 100644 --- a/chunker/chunker_test.go +++ b/chunker/chunker_test.go @@ -2,13 +2,18 @@ package chunker_test import ( "bytes" + "flag" "io" "math/rand" + "os" "testing" "github.com/restic/restic/chunker" ) +var benchmarkFile = flag.String("bench.file", "", "read from this file for benchmark") +var testBufSize = flag.Int("test.bufsize", 256*1024, "use this buffer size for benchmark") + type chunk struct { Length int CutFP uint64 @@ -55,9 +60,8 @@ var chunks2 = []chunk{ } func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) { - buf := make([]byte, chunker.MaxSize) for i, chunk := range chunks { - c, err := chnker.Next(buf) + c, err := chnker.Next() if err != nil { t.Fatalf("Error returned with chunk %d: %v", i, err) @@ -73,11 +77,6 @@ func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) { i, chunk.Length, c.Length) } - if len(c.Data) != chunk.Length { - t.Fatalf("Data length for chunk %d does not match: expected %d, got %d", - i, chunk.Length, len(c.Data)) - } - if c.Cut != chunk.CutFP { t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x", i, len(chunks)-1, chunk.CutFP, c.Cut) @@ -85,7 +84,7 @@ func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) { } } - c, err := chnker.Next(buf) + c, err := chnker.Next() if c != nil { t.Fatal("additional non-nil chunk returned") @@ -114,32 +113,51 @@ func get_random(seed, count int) []byte { func TestChunker(t *testing.T) { // setup data source buf := get_random(23, 32*1024*1024) - ch := chunker.New(bytes.NewReader(buf)) + ch := chunker.New(bytes.NewReader(buf), *testBufSize) test_with_data(t, ch, chunks1) - ch.Free() // setup nullbyte data source buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) - ch = chunker.New(bytes.NewReader(buf)) + ch = chunker.New(bytes.NewReader(buf), *testBufSize) test_with_data(t, ch, chunks2) - ch.Free() } func TestChunkerReuse(t *testing.T) { // test multiple uses of the same chunker for i := 0; i < 4; i++ { buf := get_random(23, 32*1024*1024) - ch := chunker.New(bytes.NewReader(buf)) + ch := chunker.New(bytes.NewReader(buf), *testBufSize) test_with_data(t, ch, chunks1) - ch.Free() } } func BenchmarkChunker(b *testing.B) { - size := 10 * 1024 * 1024 - buf := get_random(23, size) - dst := make([]byte, chunker.MaxSize) + var ( + rd io.ReadSeeker + size int + ) + + b.Logf("using bufsize %v", *testBufSize) + + if *benchmarkFile != "" { + b.Logf("using file %q for benchmark", *benchmarkFile) + f, err := os.Open(*benchmarkFile) + if err != nil { + b.Fatalf("open(%q): %v", *benchmarkFile, err) + } + + fi, err := f.Stat() + if err != nil { + b.Fatalf("lstat(%q): %v", *benchmarkFile, err) + } + + size = int(fi.Size()) + rd = f + } else { + size = 10 * 1024 * 1024 + rd = bytes.NewReader(get_random(23, size)) + } b.ResetTimer() b.SetBytes(int64(size)) @@ -148,10 +166,11 @@ func BenchmarkChunker(b *testing.B) { for i := 0; i < b.N; i++ { chunks = 0 - ch := chunker.New(bytes.NewReader(buf)) + rd.Seek(0, 0) + ch := chunker.New(rd, *testBufSize) for { - _, err := ch.Next(dst) + _, err := ch.Next() if err == io.EOF { break