commit d60ace395a74a5efe067ee9cd5d85446c7facf43
parent d8bfc3a69ce4c3c35dfa8c0d5cef3ce10e424300
Author: sin <sin@2f30.org>
Date: Tue, 26 Feb 2019 09:48:57 +0000
When matching a pattern check if bottom bits of hash are 0
This approach is more efficient and easier to understand.
Diffstat:
2 files changed, 2 insertions(+), 9 deletions(-)
diff --git a/chunker.c b/chunker.c
@@ -14,7 +14,6 @@ struct chunker {
size_t cap;
size_t rpos;
size_t wpos;
- size_t discr;
int fd;
};
@@ -88,7 +87,7 @@ match_pattern(struct chunker *chunker, size_t chunk_size, uint32_t fp)
return 1;
if (chunk_size < BLKSIZE_MIN)
return 0;
- return (fp % chunker->discr) == chunker->discr - 1;
+ return (fp & HASHMASK_BITS) == 0;
}
static size_t
@@ -123,12 +122,6 @@ get_chunk_size(struct chunker *chunker)
return chunk_size;
}
-static size_t
-calc_discr(size_t avg)
-{
- return avg / (-1.42888852e-7 * avg + 1.33237515);
-}
-
struct chunker *
alloc_chunker(size_t cap, int fd)
{
@@ -145,7 +138,6 @@ alloc_chunker(size_t cap, int fd)
chunker->rpos = 0;
chunker->wpos = 0;
chunker->fd = fd;
- chunker->discr = calc_discr(BLKSIZE_AVG);
return chunker;
}
diff --git a/config.h b/config.h
@@ -1,4 +1,5 @@
#define BLKSIZE_AVG ((size_t)524288)
#define BLKSIZE_MIN ((BLKSIZE_AVG) / 4)
#define BLKSIZE_MAX ((BLKSIZE_AVG) * 4)
+#define HASHMASK_BITS (BLKSIZE_AVG - 1)
#define WINSIZE 32