dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit b1c7996c39784e81f73e4b1189f9b5786caad974
parent 66a94e3ce6d7c193d13d0bad4b1b8f7ecd51f5d2
Author: sin <sin@2f30.org>
Date:   Fri, 22 Feb 2019 19:32:12 +0000

Rework get_chunk_size() to return the size rather than the offset

Diffstat:
Mchunker.c | 13++++++-------
1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/chunker.c b/chunker.c @@ -43,8 +43,6 @@ get_chunk_size(struct chunker *chunker) if (chunk_size < WINSIZE) return chunk_size; - bp = chunker->buf; - /* * To achieve better deduplication, we chunk blocks based on a * recurring pattern occuring on the data stream. A fixed window @@ -53,14 +51,15 @@ get_chunk_size(struct chunker *chunker) * When the rolling hash matches a given pattern the block is chunked * at the end of that window. */ + bp = &chunker->buf[chunker->rpos]; fp = buzh_init(bp, WINSIZE); - for (i = chunker->rpos; i < chunker->wpos - WINSIZE; i++) { - chunk_size = i + WINSIZE; + for (i = 0; i < chunk_size - WINSIZE; i++) { if (i > 0) - fp = buzh_update(fp, bp[i - 1], bp[chunk_size - 1], + fp = buzh_update(fp, bp[i - 1], bp[i + WINSIZE - 1], WINSIZE); - if (match_pattern(chunker, chunk_size, fp) == 1) - return chunk_size; + if (match_pattern(chunker, i + WINSIZE, fp) == 1) + return i + WINSIZE; + } return chunk_size; }