commit b1c7996c39784e81f73e4b1189f9b5786caad974
parent 66a94e3ce6d7c193d13d0bad4b1b8f7ecd51f5d2
Author: sin <sin@2f30.org>
Date: Fri, 22 Feb 2019 19:32:12 +0000
Rework get_chunk_size() to return the size rather than the offset
Diffstat:
1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/chunker.c b/chunker.c
@@ -43,8 +43,6 @@ get_chunk_size(struct chunker *chunker)
if (chunk_size < WINSIZE)
return chunk_size;
- bp = chunker->buf;
-
/*
* To achieve better deduplication, we chunk blocks based on a
* recurring pattern occuring on the data stream. A fixed window
@@ -53,14 +51,15 @@ get_chunk_size(struct chunker *chunker)
* When the rolling hash matches a given pattern the block is chunked
* at the end of that window.
*/
+ bp = &chunker->buf[chunker->rpos];
fp = buzh_init(bp, WINSIZE);
- for (i = chunker->rpos; i < chunker->wpos - WINSIZE; i++) {
- chunk_size = i + WINSIZE;
+ for (i = 0; i < chunk_size - WINSIZE; i++) {
if (i > 0)
- fp = buzh_update(fp, bp[i - 1], bp[chunk_size - 1],
+ fp = buzh_update(fp, bp[i - 1], bp[i + WINSIZE - 1],
WINSIZE);
- if (match_pattern(chunker, chunk_size, fp) == 1)
- return chunk_size;
+ if (match_pattern(chunker, i + WINSIZE, fp) == 1)
+ return i + WINSIZE;
+
}
return chunk_size;
}