Check buffer size in chunk_blk() - dedup - deduplicating backup program

commit 07c41115923df14d48ec16279ed14dcb0df598e1
parent 32c20d64995844daaaed9c9a11afc03ae68c7753
Author: z3bra <contactatz3bradotorg>
Date:   Sun, 17 Feb 2019 14:42:59 +0100

Check buffer size in chunk_blk()

It also changes the for loop so we can chunk a block if the pattern
matches right at the beginning of the block, thus making the smallest
chunk size WINSIZ instead of WINSIZ + 1.

Diffstat:
M dedup.c  | 14 ++++++++------

1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/dedup.c b/dedup.c
@@ -140,6 +140,10 @@ chunk_blk(uint8_t *buf, size_t size)
 	size_t i;
 	uint32_t fp;
 
+	/* buzhash should be at least WINSIZ */
+	if (size < WINSIZ)
+		return size;
+
 	/*
 	 * To achieve better deduplication, we chunk blocks based on a
 	 * recurring pattern occuring on the data stream. A fixed window
@@ -150,8 +154,9 @@ chunk_blk(uint8_t *buf, size_t size)
 	 * WINSIZ the smallest possible block size.
 	 */
 	fp = buzh_init(buf, WINSIZ);
-	for (i = 1; i < size - WINSIZ; i++) {
-		fp = buzh_update(fp, buf[i - 1], buf[i + WINSIZ - 1], WINSIZ);
+	for (i = 0; i < size - WINSIZ; i++) {
+		if (i > 0)
+			fp = buzh_update(fp, buf[i - 1], buf[WINSIZ + i - 1], WINSIZ);
 		if ((fp & HASHMSK) == 0)
 			return i + WINSIZ;
 	}
@@ -387,10 +392,7 @@ dedup(int fd, char *msg)
 			struct bdescr bdescr;
 			size_t blksiz;
 
-			if (n > WINSIZ)
-				blksiz = chunk_blk(bp, n);
-			else
-				blksiz = n;
+			blksiz = chunk_blk(bp, n);
 
 			memcpy(bdescr.md, md, sizeof(bdescr));
 			bdescr.offset = enthdr.store_size;

	dedup deduplicating backup program
	git clone git://git.2f30.org/dedup
	Log \| Files \| Refs \| README \| LICENSE