commit 07c41115923df14d48ec16279ed14dcb0df598e1
parent 32c20d64995844daaaed9c9a11afc03ae68c7753
Author: z3bra <contactatz3bradotorg>
Date: Sun, 17 Feb 2019 14:42:59 +0100
Check buffer size in chunk_blk()
It also changes the for loop so we can chunk a block if the pattern
matches right at the beginning of the block, thus making the smallest
chunk size WINSIZ instead of WINSIZ + 1.
Diffstat:
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/dedup.c b/dedup.c
@@ -140,6 +140,10 @@ chunk_blk(uint8_t *buf, size_t size)
size_t i;
uint32_t fp;
+ /* buzhash should be at least WINSIZ */
+ if (size < WINSIZ)
+ return size;
+
/*
* To achieve better deduplication, we chunk blocks based on a
* recurring pattern occuring on the data stream. A fixed window
@@ -150,8 +154,9 @@ chunk_blk(uint8_t *buf, size_t size)
* WINSIZ the smallest possible block size.
*/
fp = buzh_init(buf, WINSIZ);
- for (i = 1; i < size - WINSIZ; i++) {
- fp = buzh_update(fp, buf[i - 1], buf[i + WINSIZ - 1], WINSIZ);
+ for (i = 0; i < size - WINSIZ; i++) {
+ if (i > 0)
+ fp = buzh_update(fp, buf[i - 1], buf[WINSIZ + i - 1], WINSIZ);
if ((fp & HASHMSK) == 0)
return i + WINSIZ;
}
@@ -387,10 +392,7 @@ dedup(int fd, char *msg)
struct bdescr bdescr;
size_t blksiz;
- if (n > WINSIZ)
- blksiz = chunk_blk(bp, n);
- else
- blksiz = n;
+ blksiz = chunk_blk(bp, n);
memcpy(bdescr.md, md, sizeof(bdescr));
bdescr.offset = enthdr.store_size;