Chunker: remove pool, buf and make bufsize an option

This commit is contained in:
Alexander Neumann 2015-02-08 19:32:12 +01:00
parent 8dc5c2296a
commit a5c33d80d8
2 changed files with 54 additions and 56 deletions

View File

@ -9,10 +9,11 @@ const (
KiB = 1024 KiB = 1024
MiB = 1024 * KiB MiB = 1024 * KiB
// randomly generated irreducible polynomial of degree 53 in Z_2[X] // Polynomial is a randomly generated irreducible polynomial of degree 53
// in Z_2[X]. All rabin fingerprints are calculated with this polynomial.
Polynomial = 0x3DA3358B4DC173 Polynomial = 0x3DA3358B4DC173
// use a sliding window of 64 byte. // WindowSize is the size of the sliding window.
WindowSize = 64 WindowSize = 64
// aim to create chunks of 20 bits or about 1MiB on average. // aim to create chunks of 20 bits or about 1MiB on average.
@ -30,15 +31,6 @@ var (
once sync.Once once sync.Once
mod_table [256]uint64 mod_table [256]uint64
out_table [256]uint64 out_table [256]uint64
chunkerPool = sync.Pool{
New: func() interface{} {
return &Chunker{
window: make([]byte, WindowSize),
buf: make([]byte, MaxSize),
}
},
}
) )
// A chunk is one content-dependent chunk of bytes whose end was cut when the // A chunk is one content-dependent chunk of bytes whose end was cut when the
@ -72,22 +64,19 @@ type Chunker struct {
} }
// New returns a new Chunker that reads from data from rd. // New returns a new Chunker that reads from data from rd.
func New(rd io.Reader) *Chunker { func New(rd io.Reader, bufsize int) *Chunker {
c := chunkerPool.Get().(*Chunker) once.Do(fill_tables)
c.rd = rd
once.Do(c.fill_tables) c := &Chunker{
window: make([]byte, WindowSize),
buf: make([]byte, bufsize),
rd: rd,
}
c.reset() c.reset()
return c return c
} }
// Free returns this chunker to the allocation pool
func (c *Chunker) Free() {
c.rd = nil
chunkerPool.Put(c)
}
func (c *Chunker) reset() { func (c *Chunker) reset() {
for i := 0; i < WindowSize; i++ { for i := 0; i < WindowSize; i++ {
c.window[i] = 0 c.window[i] = 0
@ -103,7 +92,7 @@ func (c *Chunker) reset() {
} }
// Calculate out_table and mod_table for optimization. Must be called only once. // Calculate out_table and mod_table for optimization. Must be called only once.
func (c *Chunker) fill_tables() { func fill_tables() {
// calculate table for sliding out bytes. The byte to slide out is used as // calculate table for sliding out bytes. The byte to slide out is used as
// the index for the table, the value contains the following: // the index for the table, the value contains the following:
// out_table[b] = Hash(b || 0 || ... || 0) // out_table[b] = Hash(b || 0 || ... || 0)
@ -139,13 +128,11 @@ func (c *Chunker) fill_tables() {
} }
} }
// Next returns the next chunk of data. If an error occurs while reading, // Next returns the position and length of the next chunk of data. If an error
// the error is returned with a nil chunk. The state of the current chunk // occurs while reading, the error is returned with a nil chunk. The state of
// is undefined. When the last chunk has been returned, all subsequent // the current chunk is undefined. When the last chunk has been returned, all
// calls yield a nil chunk and an io.EOF error. // subsequent calls yield a nil chunk and an io.EOF error.
func (c *Chunker) Next(dst []byte) (*Chunk, error) { func (c *Chunker) Next() (*Chunk, error) {
dst = dst[:0]
for { for {
if c.bpos >= c.bmax { if c.bpos >= c.bmax {
n, err := io.ReadFull(c.rd, c.buf) n, err := io.ReadFull(c.rd, c.buf)
@ -168,7 +155,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
Start: c.start, Start: c.start,
Length: c.count, Length: c.count,
Cut: c.digest, Cut: c.digest,
Data: dst,
}, nil }, nil
} }
} }
@ -186,7 +172,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
n := c.bmax - c.bpos n := c.bmax - c.bpos
if c.pre > n { if c.pre > n {
c.pre -= n c.pre -= n
dst = append(dst, c.buf[c.bpos:c.bmax]...)
c.count += n c.count += n
c.pos += n c.pos += n
@ -194,7 +179,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
continue continue
} }
dst = append(dst, c.buf[c.bpos:c.bpos+c.pre]...)
c.bpos += c.pre c.bpos += c.pre
c.count += c.pre c.count += c.pre
c.pos += c.pre c.pos += c.pre
@ -216,7 +200,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
c.digest ^= mod_table[index] c.digest ^= mod_table[index]
if (c.count+i+1 >= MinSize && (c.digest&splitmask) == 0) || c.count+i+1 >= MaxSize { if (c.count+i+1 >= MinSize && (c.digest&splitmask) == 0) || c.count+i+1 >= MaxSize {
dst = append(dst, c.buf[c.bpos:c.bpos+i+1]...)
c.count += i + 1 c.count += i + 1
c.pos += i + 1 c.pos += i + 1
c.bpos += i + 1 c.bpos += i + 1
@ -225,7 +208,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
Start: c.start, Start: c.start,
Length: c.count, Length: c.count,
Cut: c.digest, Cut: c.digest,
Data: dst,
} }
// keep position // keep position
@ -240,9 +222,6 @@ func (c *Chunker) Next(dst []byte) (*Chunk, error) {
} }
steps := c.bmax - c.bpos steps := c.bmax - c.bpos
if steps > 0 {
dst = append(dst, c.buf[c.bpos:c.bpos+steps]...)
}
c.count += steps c.count += steps
c.pos += steps c.pos += steps
c.bpos = c.bmax c.bpos = c.bmax

View File

@ -2,13 +2,18 @@ package chunker_test
import ( import (
"bytes" "bytes"
"flag"
"io" "io"
"math/rand" "math/rand"
"os"
"testing" "testing"
"github.com/restic/restic/chunker" "github.com/restic/restic/chunker"
) )
var benchmarkFile = flag.String("bench.file", "", "read from this file for benchmark")
var testBufSize = flag.Int("test.bufsize", 256*1024, "use this buffer size for benchmark")
type chunk struct { type chunk struct {
Length int Length int
CutFP uint64 CutFP uint64
@ -55,9 +60,8 @@ var chunks2 = []chunk{
} }
func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) { func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
buf := make([]byte, chunker.MaxSize)
for i, chunk := range chunks { for i, chunk := range chunks {
c, err := chnker.Next(buf) c, err := chnker.Next()
if err != nil { if err != nil {
t.Fatalf("Error returned with chunk %d: %v", i, err) t.Fatalf("Error returned with chunk %d: %v", i, err)
@ -73,11 +77,6 @@ func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
i, chunk.Length, c.Length) i, chunk.Length, c.Length)
} }
if len(c.Data) != chunk.Length {
t.Fatalf("Data length for chunk %d does not match: expected %d, got %d",
i, chunk.Length, len(c.Data))
}
if c.Cut != chunk.CutFP { if c.Cut != chunk.CutFP {
t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x", t.Fatalf("Cut fingerprint for chunk %d/%d does not match: expected %016x, got %016x",
i, len(chunks)-1, chunk.CutFP, c.Cut) i, len(chunks)-1, chunk.CutFP, c.Cut)
@ -85,7 +84,7 @@ func test_with_data(t *testing.T, chnker *chunker.Chunker, chunks []chunk) {
} }
} }
c, err := chnker.Next(buf) c, err := chnker.Next()
if c != nil { if c != nil {
t.Fatal("additional non-nil chunk returned") t.Fatal("additional non-nil chunk returned")
@ -114,32 +113,51 @@ func get_random(seed, count int) []byte {
func TestChunker(t *testing.T) { func TestChunker(t *testing.T) {
// setup data source // setup data source
buf := get_random(23, 32*1024*1024) buf := get_random(23, 32*1024*1024)
ch := chunker.New(bytes.NewReader(buf)) ch := chunker.New(bytes.NewReader(buf), *testBufSize)
test_with_data(t, ch, chunks1) test_with_data(t, ch, chunks1)
ch.Free()
// setup nullbyte data source // setup nullbyte data source
buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize) buf = bytes.Repeat([]byte{0}, len(chunks2)*chunker.MinSize)
ch = chunker.New(bytes.NewReader(buf)) ch = chunker.New(bytes.NewReader(buf), *testBufSize)
test_with_data(t, ch, chunks2) test_with_data(t, ch, chunks2)
ch.Free()
} }
func TestChunkerReuse(t *testing.T) { func TestChunkerReuse(t *testing.T) {
// test multiple uses of the same chunker // test multiple uses of the same chunker
for i := 0; i < 4; i++ { for i := 0; i < 4; i++ {
buf := get_random(23, 32*1024*1024) buf := get_random(23, 32*1024*1024)
ch := chunker.New(bytes.NewReader(buf)) ch := chunker.New(bytes.NewReader(buf), *testBufSize)
test_with_data(t, ch, chunks1) test_with_data(t, ch, chunks1)
ch.Free()
} }
} }
func BenchmarkChunker(b *testing.B) { func BenchmarkChunker(b *testing.B) {
size := 10 * 1024 * 1024 var (
buf := get_random(23, size) rd io.ReadSeeker
dst := make([]byte, chunker.MaxSize) size int
)
b.Logf("using bufsize %v", *testBufSize)
if *benchmarkFile != "" {
b.Logf("using file %q for benchmark", *benchmarkFile)
f, err := os.Open(*benchmarkFile)
if err != nil {
b.Fatalf("open(%q): %v", *benchmarkFile, err)
}
fi, err := f.Stat()
if err != nil {
b.Fatalf("lstat(%q): %v", *benchmarkFile, err)
}
size = int(fi.Size())
rd = f
} else {
size = 10 * 1024 * 1024
rd = bytes.NewReader(get_random(23, size))
}
b.ResetTimer() b.ResetTimer()
b.SetBytes(int64(size)) b.SetBytes(int64(size))
@ -148,10 +166,11 @@ func BenchmarkChunker(b *testing.B) {
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
chunks = 0 chunks = 0
ch := chunker.New(bytes.NewReader(buf)) rd.Seek(0, 0)
ch := chunker.New(rd, *testBufSize)
for { for {
_, err := ch.Next(dst) _, err := ch.Next()
if err == io.EOF { if err == io.EOF {
break break