commit 23e23d22beed84fe844c6d76f453667d9a6f95c6
parent e8031b23797e666b43f96906372d97e2da4f4d0a
Author: sin <sin@2f30.org>
Date: Fri, 22 Feb 2019 13:15:25 +0000
Tweak params
The parameters were taken from casync.
Diffstat:
3 files changed, 39 insertions(+), 19 deletions(-)
diff --git a/chunker.c b/chunker.c
@@ -16,15 +16,33 @@ struct chunker {
};
static size_t
+calc_discr(size_t avg)
+{
+ return avg / (-1.42888852e-7 * avg + 1.33237515);
+}
+
+static int
+match_pattern(size_t chunk_size, uint32_t fp)
+{
+ size_t discr = calc_discr(BLKSIZE_AVG);
+
+ if (chunk_size >= BLKSIZE_MAX)
+ return 1;
+ if (chunk_size < BLKSIZE_MIN)
+ return 0;
+ return (fp % discr) == discr - 1;
+}
+
+static size_t
get_chunk_size(struct chunker *chunker)
{
uint8_t *bp;
uint32_t fp;
- size_t i;
+ size_t i, chunk_size;
- /* buzhash should be at least WINSIZE */
- if (chunker->wpos - chunker->rpos < WINSIZE)
- return chunker->wpos - chunker->rpos;
+ chunk_size = chunker->wpos - chunker->rpos;
+ if (chunk_size < WINSIZE)
+ return chunk_size;
bp = chunker->buf;
@@ -39,13 +57,14 @@ get_chunk_size(struct chunker *chunker)
*/
fp = buzh_init(bp, WINSIZE);
for (i = chunker->rpos; i < chunker->wpos - WINSIZE; i++) {
+ chunk_size = i + WINSIZE;
if (i > 0)
- fp = buzh_update(fp, bp[i - 1], bp[WINSIZE + i - 1],
+ fp = buzh_update(fp, bp[i - 1], bp[chunk_size - 1],
WINSIZE);
- if ((fp & HASHMSK) == 0)
- return i + WINSIZE;
+ if (match_pattern(chunk_size, fp) == 1)
+ return chunk_size;
}
- return chunker->wpos - chunker->rpos;
+ return chunk_size;
}
struct chunker *
diff --git a/config.h b/config.h
@@ -1,3 +1,4 @@
-#define BLKSIZE 131072
+#define BLKSIZE_AVG ((size_t)131072)
+#define BLKSIZE_MIN ((BLKSIZE_AVG) / 4)
+#define BLKSIZE_MAX ((BLKSIZE_AVG) * 4)
#define WINSIZE 32
-#define HASHMSK ((1ul << 15) - 1)
diff --git a/dedup.c b/dedup.c
@@ -325,9 +325,9 @@ dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size)
struct blk_desc blk_desc;
size_t n;
- comp_buf = alloc_buf(comp_size(BLKSIZE));
+ comp_buf = alloc_buf(comp_size(BLKSIZE_MAX));
- n = comp(chunkp, comp_buf, chunk_size, comp_size(BLKSIZE));
+ n = comp(chunkp, comp_buf, chunk_size, comp_size(BLKSIZE_MAX));
hash_blk(comp_buf, n, md);
snaphdr.st.orig_size += chunk_size;
@@ -374,7 +374,7 @@ dedup(int fd, char *msg)
ssize_t n;
snap = alloc_snap();
- chunker = alloc_chunker(BLKSIZE, fd);
+ chunker = alloc_chunker(BLKSIZE_MAX, fd);
SHA256_Init(&ctx);
while ((n = fill_chunker(chunker)) > 0) {
@@ -417,14 +417,14 @@ extract(struct snapshot *snap, void *arg)
if (memcmp(snap->md, args->md, sizeof(snap->md)) != 0)
return WALK_CONTINUE;
- buf[0] = alloc_buf(BLKSIZE);
- buf[1] = alloc_buf(comp_size(BLKSIZE));
+ buf[0] = alloc_buf(BLKSIZE_MAX);
+ buf[1] = alloc_buf(comp_size(BLKSIZE_MAX));
for (i = 0; i < snap->nr_blk_descs; i++) {
size_t blksize;
read_blk(buf[1], &snap->blk_desc[i]);
blksize = decomp(buf[1], buf[0], snap->blk_desc[i].size,
- BLKSIZE);
+ BLKSIZE_MAX);
xwrite(args->fd, buf[0], blksize);
}
free_buf(buf[1]);
@@ -440,7 +440,7 @@ check(struct snapshot *snap, void *arg)
SHA256_CTX ctx;
uint64_t i;
- buf = alloc_buf(comp_size(BLKSIZE));
+ buf = alloc_buf(comp_size(BLKSIZE_MAX));
/*
* Calculate hash for each block and compare
* against snapshot entry block descriptor
@@ -491,7 +491,7 @@ rebuild_cache(struct snapshot *snap, void *arg)
SHA256_CTX ctx;
uint64_t i;
- buf = alloc_buf(comp_size(BLKSIZE));
+ buf = alloc_buf(comp_size(BLKSIZE_MAX));
for (i = 0; i < snap->nr_blk_descs; i++) {
struct cache_entry *ent;
@@ -590,7 +590,7 @@ init(void)
VER_MAJ, VER_MIN, maj, min);
} else {
snaphdr.flags = (VER_MAJ << 8) | VER_MIN;
- snaphdr.st.min_blk_size = comp_size(BLKSIZE);
+ snaphdr.st.min_blk_size = comp_size(BLKSIZE_MAX);
xwrite(ifd, &snaphdr, sizeof(snaphdr));
}