dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit 9a790f78b6623fd5340369f71e8649cefbe2d4ec
parent 22ec348a2fff0a5f80a3bdb4fc1bc0ffd4aec838
Author: sin <sin@2f30.org>
Date:   Sun, 17 Feb 2019 00:14:18 +0000

Tweaks

Diffstat:
Mdedup.c | 9+++++----
1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/dedup.c b/dedup.c @@ -16,8 +16,9 @@ #define STOREF ".store" #define CACHEF ".cache" -#define BLKSIZ 65536 -#define WINSIZ 32 +#define BLKSIZ (8*1024*1024) +#define WINSIZ 4096 +#define HASHMSK ((1ul << 21) - 1) #define MDSIZ SHA256_DIGEST_LENGTH #define ROTL(x, y) (((x) << (y)) | ((x) >> (32 - (y)))) @@ -134,14 +135,14 @@ chunk_blk(uint8_t *buf, size_t size) /* * Chunking blocks is decided using a rolling hash + binary pattern. * The buzhash algorithm is used to "fingerprint" a fixed size window. - * Once the lower 13 bits of this fingerprint are all zeros, + * Once the lower bits of this fingerprint are all zeros, * the block is chunked. * If the pattern can't be matched, then we return the buffer size. */ fp = buzh_init(buf, WINSIZ); for (i = 1; i < size - WINSIZ; i++) { fp = buzh_update(fp, buf[i - 1], buf[i + WINSIZ - 1], WINSIZ); - if ((fp & 0x00001fff) == 0) + if ((fp & HASHMSK) == 0) return i + WINSIZ; } return size;