From 9e24238cdd0d9227bcc2d9ef9baba19c2ff6a235 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sat, 5 Mar 2016 13:46:20 +0100 Subject: [PATCH] Update chunker --- vendor/manifest | 2 +- .../src/github.com/restic/chunker/chunker.go | 143 +++++++++++------- 2 files changed, 88 insertions(+), 57 deletions(-) diff --git a/vendor/manifest b/vendor/manifest index b3a907c3b..27dd6a9b4 100644 --- a/vendor/manifest +++ b/vendor/manifest @@ -40,7 +40,7 @@ { "importpath": "github.com/restic/chunker", "repository": "https://github.com/restic/chunker", - "revision": "16c849a106e0a50d658e8f5e49a01f6728f4f92c", + "revision": "d1a97fa6e55ab338a8ceb769d72f856a56e9379a", "branch": "master" }, { diff --git a/vendor/src/github.com/restic/chunker/chunker.go b/vendor/src/github.com/restic/chunker/chunker.go index 8b82d8d5d..038985f3f 100644 --- a/vendor/src/github.com/restic/chunker/chunker.go +++ b/vendor/src/github.com/restic/chunker/chunker.go @@ -33,12 +33,12 @@ type tables struct { // cache precomputed tables, these are read-only anyway var cache struct { - entries map[Pol]*tables + entries map[Pol]tables sync.Mutex } func init() { - cache.entries = make(map[Pol]*tables) + cache.entries = make(map[Pol]tables) } // Chunk is one content-dependent chunk of bytes whose end was cut when the @@ -50,17 +50,7 @@ type Chunk struct { Data []byte } -// Chunker splits content with Rabin Fingerprints. -type Chunker struct { - MinSize, MaxSize uint - - pol Pol - polShift uint - tables *tables - - rd io.Reader - closed bool - +type chunkerState struct { window [windowSize]byte wpos int @@ -77,15 +67,37 @@ type Chunker struct { digest uint64 } +type chunkerConfig struct { + MinSize, MaxSize uint + + pol Pol + polShift uint + tables tables + tablesInitialized bool + + rd io.Reader + closed bool +} + +// Chunker splits content with Rabin Fingerprints. +type Chunker struct { + chunkerConfig + chunkerState +} + // New returns a new Chunker based on polynomial p that reads from rd // with bufsize and pass all data to hash along the way. func New(rd io.Reader, pol Pol) *Chunker { c := &Chunker{ - buf: make([]byte, chunkerBufSize), - pol: pol, - rd: rd, - MinSize: MinSize, - MaxSize: MaxSize, + chunkerState: chunkerState{ + buf: make([]byte, chunkerBufSize), + }, + chunkerConfig: chunkerConfig{ + pol: pol, + rd: rd, + MinSize: MinSize, + MaxSize: MaxSize, + }, } c.reset() @@ -96,11 +108,15 @@ func New(rd io.Reader, pol Pol) *Chunker { // Reset reinitializes the chunker with a new reader and polynomial. func (c *Chunker) Reset(rd io.Reader, pol Pol) { *c = Chunker{ - buf: c.buf, - pol: pol, - rd: rd, - MinSize: c.MinSize, - MaxSize: c.MaxSize, + chunkerState: chunkerState{ + buf: c.buf, + }, + chunkerConfig: chunkerConfig{ + pol: pol, + rd: rd, + MinSize: MinSize, + MaxSize: MaxSize, + }, } c.reset() @@ -118,7 +134,7 @@ func (c *Chunker) reset() { c.digest = 0 c.wpos = 0 c.count = 0 - c.slide(1) + c.digest = c.slide(c.digest, 1) c.start = c.pos // do not start a new chunk unless at least MinSize bytes have been read @@ -133,6 +149,8 @@ func (c *Chunker) fillTables() { return } + c.tablesInitialized = true + // test if the tables are cached for this polynomial cache.Lock() defer cache.Unlock() @@ -141,10 +159,6 @@ func (c *Chunker) fillTables() { return } - // else create a new entry - c.tables = &tables{} - cache.entries[c.pol] = c.tables - // calculate table for sliding out bytes. The byte to slide out is used as // the index for the table, the value contains the following: // out_table[b] = Hash(b || 0 || ... || 0) @@ -178,6 +192,8 @@ func (c *Chunker) fillTables() { // enough to reduce modulo Polynomial c.tables.mod[b] = Pol(uint64(b)<= c.bmax { - n, err := io.ReadFull(c.rd, c.buf[:]) + n, err := io.ReadFull(c.rd, buf[:]) if err == io.ErrUnexpectedEOF { err = nil @@ -230,7 +252,7 @@ func (c *Chunker) Next(data []byte) (Chunk, error) { n := c.bmax - c.bpos if c.pre > uint(n) { c.pre -= uint(n) - data = append(data, c.buf[c.bpos:c.bmax]...) + data = append(data, buf[c.bpos:c.bmax]...) c.count += uint(n) c.pos += uint(n) @@ -239,7 +261,7 @@ func (c *Chunker) Next(data []byte) (Chunk, error) { continue } - data = append(data, c.buf[c.bpos:c.bpos+c.pre]...) + data = append(data, buf[c.bpos:c.bpos+c.pre]...) c.bpos += c.pre c.count += c.pre @@ -248,37 +270,41 @@ func (c *Chunker) Next(data []byte) (Chunk, error) { } add := c.count - for _, b := range c.buf[c.bpos:c.bmax] { - // inline c.slide(b) and append(b) to increase performance - out := c.window[c.wpos] - c.window[c.wpos] = b - c.digest ^= uint64(c.tables.out[out]) - c.wpos = (c.wpos + 1) % windowSize + digest := c.digest + win := c.window + wpos := c.wpos + for _, b := range buf[c.bpos:c.bmax] { + // slide(b) + out := win[wpos] + win[wpos] = b + digest ^= uint64(tabout[out]) + wpos = (wpos + 1) % windowSize - // c.append(b) - index := c.digest >> c.polShift - c.digest <<= 8 - c.digest |= uint64(b) + // updateDigest + index := byte(digest >> polShift) + digest <<= 8 + digest |= uint64(b) - c.digest ^= uint64(c.tables.mod[index]) - // end inline + digest ^= uint64(tabmod[index]) + // end manual inline add++ - if add < c.MinSize { + if add < minSize { continue } - if (c.digest&splitmask) == 0 || add >= MaxSize { + if (digest&splitmask) == 0 || add >= maxSize { i := add - c.count - 1 data = append(data, c.buf[c.bpos:c.bpos+uint(i)+1]...) c.count = add c.pos += uint(i) + 1 c.bpos += uint(i) + 1 + c.buf = buf chunk := Chunk{ Start: c.start, Length: c.count, - Cut: c.digest, + Cut: digest, Data: data, } @@ -287,6 +313,9 @@ func (c *Chunker) Next(data []byte) (Chunk, error) { return chunk, nil } } + c.digest = digest + c.window = win + c.wpos = wpos steps := c.bmax - c.bpos if steps > 0 { @@ -298,21 +327,23 @@ func (c *Chunker) Next(data []byte) (Chunk, error) { } } -func (c *Chunker) append(b byte) { - index := c.digest >> c.polShift - c.digest <<= 8 - c.digest |= uint64(b) +func updateDigest(digest uint64, polShift uint, tab tables, b byte) (newDigest uint64) { + index := digest >> polShift + digest <<= 8 + digest |= uint64(b) - c.digest ^= uint64(c.tables.mod[index]) + digest ^= uint64(tab.mod[index]) + return digest } -func (c *Chunker) slide(b byte) { +func (c *Chunker) slide(digest uint64, b byte) (newDigest uint64) { out := c.window[c.wpos] c.window[c.wpos] = b - c.digest ^= uint64(c.tables.out[out]) + digest ^= uint64(c.tables.out[out]) c.wpos = (c.wpos + 1) % windowSize - c.append(b) + digest = updateDigest(digest, c.polShift, c.tables, b) + return digest } func appendByte(hash Pol, b byte, pol Pol) Pol {