commit 9bee83eb425e6424e1cb6c9e5fbe6dced9d005ce
parent c0597760335a6dfcfeb37a023f6362567079ba7f
Author: sin <sin@2f30.org>
Date: Sat, 16 Feb 2019 23:49:36 +0000
Hook in rolling hash
Diffstat:
M | dedup.c | | | 69 | +++++++++++++++++++++++++++++++++++++++++---------------------------- |
1 file changed, 41 insertions(+), 28 deletions(-)
diff --git a/dedup.c b/dedup.c
@@ -16,8 +16,8 @@
#define STOREF ".store"
#define CACHEF ".cache"
-#define BLKSIZ 65536
-#define WINSIZ 4095
+#define BLKSIZ (65536*4)
+#define WINSIZ 127
#define MDSIZ SHA256_DIGEST_LENGTH
#define ROTL(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
@@ -140,7 +140,7 @@ chunk_blk(uint8_t *buf, size_t size)
*/
fp = buzh_init(buf, WINSIZ);
for (i = 1; i < size - WINSIZ; i++) {
- fp = buzh_update(fp, buf[i - 1], buf[i + WINSIZ], WINSIZ);
+ fp = buzh_update(fp, buf[i - 1], buf[i + WINSIZ - 1], WINSIZ);
if ((fp & 0x00001fff) == 0)
return i + WINSIZ;
}
@@ -366,10 +366,9 @@ lookup_blk(uint8_t *md, struct bdescr *bdescr)
void
dedup(int fd)
{
- uint8_t md[MDSIZ];
uint8_t *buf;
- SHA256_CTX ctx;
struct ent *ent;
+ SHA256_CTX ctx;
ssize_t n;
buf = alloc_buf(BLKSIZ);
@@ -377,36 +376,50 @@ dedup(int fd)
SHA256_Init(&ctx);
while ((n = xread(fd, buf, BLKSIZ)) > 0) {
- struct bdescr bdescr;
+ uint8_t *bp = buf;
- hash_blk(buf, n, md);
+ while (n > 0) {
+ uint8_t md[MDSIZ];
+ struct bdescr bdescr;
+ size_t blksiz;
- /* Calculate file hash one block at a time */
- SHA256_Update(&ctx, buf, n);
+ if (n > WINSIZ)
+ blksiz = chunk_blk(bp, n);
+ else
+ blksiz = n;
- ent = grow_ent(ent, ent->nblks + 1);
- if (lookup_blk(md, &bdescr) < 0) {
- struct bdescr bdescr;
- struct cent *cent;
+ hash_blk(bp, blksiz, md);
- /* Block not found, create new block descriptor */
- memcpy(bdescr.md, md, sizeof(bdescr));
- bdescr.offset = store_size();
- bdescr.size = n;
+ /* Calculate file hash one block at a time */
+ SHA256_Update(&ctx, bp, blksiz);
- /* Update index entry */
- ent->bdescr[ent->nblks++] = bdescr;
+ ent = grow_ent(ent, ent->nblks + 1);
+ if (lookup_blk(md, &bdescr) < 0) {
+ struct bdescr bdescr;
+ struct cent *cent;
- /* Store block */
- append_blk(buf, n);
+ /* Block not found, create new block descriptor */
+ memcpy(bdescr.md, md, sizeof(bdescr));
+ bdescr.offset = store_size();
+ bdescr.size = blksiz;
- /* Create a cache entry for this block */
- cent = alloc_cent();
- cent->bdescr = bdescr;
- add_cent(cent);
- } else {
- /* Found block with the same hash, update index entry */
- ent->bdescr[ent->nblks++] = bdescr;
+ /* Update index entry */
+ ent->bdescr[ent->nblks++] = bdescr;
+
+ /* Store block */
+ append_blk(bp, blksiz);
+
+ /* Create a cache entry for this block */
+ cent = alloc_cent();
+ cent->bdescr = bdescr;
+ add_cent(cent);
+ } else {
+ /* Found block with the same hash, update index entry */
+ ent->bdescr[ent->nblks++] = bdescr;
+ }
+
+ bp += blksiz;
+ n -= blksiz;
}
}