dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit a41b0b187818a9659dda1fee972b8a9946889020
parent 3d32364aa8deb41341b21c25b726239c6e9c295c
Author: sin <sin@2f30.org>
Date:   Fri,  8 Mar 2019 11:35:36 +0000

Store params in chunker struct

Diffstat:
Mchunker.c | 54+++++++++++++++++++++++++++++++++---------------------
1 file changed, 33 insertions(+), 21 deletions(-)

diff --git a/chunker.c b/chunker.c @@ -15,6 +15,10 @@ struct chunker { size_t cap; size_t rpos; size_t wpos; + size_t min_size; + size_t max_size; + size_t mask; + size_t win_size; }; /* @@ -65,40 +69,41 @@ static uint32_t buz[] = { static inline uint32_t buzh_init(uint8_t *buf, size_t size) { - uint32_t fp; + uint32_t sum; size_t i; - for (i = 1, fp = 0; i < size; i++, buf++) - fp ^= ROTL(buz[*buf], (size - i) % 32); + for (i = 1, sum = 0; i < size; i++, buf++) + sum ^= ROTL(buz[*buf], (size - i) % 32); - return fp ^ buz[*buf]; + return sum ^ buz[*buf]; } static inline uint32_t -buzh_update(uint32_t fp, uint8_t out, uint8_t in, size_t size) +buzh_update(uint32_t sum, uint8_t out, uint8_t in, size_t size) { - return ROTL(fp, 1) ^ ROTL(buz[out], size % 32) ^ buz[in]; + return ROTL(sum, 1) ^ ROTL(buz[out], size % 32) ^ buz[in]; } static inline int -match_pattern(size_t chunk_size, uint32_t fp) +match_pattern(struct chunker *chunker, size_t chunk_size, uint32_t sum) { - if (chunk_size >= BLKSIZE_MAX) + if (chunk_size >= chunker->max_size) return 1; - if (chunk_size < BLKSIZE_MIN) + if (chunk_size < chunker->min_size) return 0; - return (fp & HASHMASK_BITS) == 0; + return (sum & chunker->mask) == 0; } static size_t get_chunk_size(struct chunker *chunker) { + size_t chunk_size, win_size, i; + uint32_t sum; uint8_t *bp; - uint32_t fp; - size_t i, chunk_size; chunk_size = chunker->wpos - chunker->rpos; - if (chunk_size < WINSIZE) + win_size = chunker->win_size; + if (chunk_size < win_size) return chunk_size; /* @@ -110,14 +115,17 @@ get_chunk_size(struct chunker *chunker) * at the end of that window. */ bp = &chunker->buf[chunker->rpos]; - fp = buzh_init(bp, WINSIZE); - for (i = 0; i < chunk_size - WINSIZE; i++) { - if (i > 0) - fp = buzh_update(fp, bp[i - 1], bp[i + WINSIZE - 1], - WINSIZE); - if (match_pattern(i + WINSIZE, fp)) - return i + WINSIZE; - + sum = buzh_init(bp, win_size); + for (i = 0; i < chunk_size - win_size; i++) { + if (i > 0) { + uint8_t out = bp[i - 1]; + uint8_t in = bp[i + win_size - 1]; + + sum = buzh_update(sum, out, in, win_size); + } + + if (match_pattern(chunker, i + win_size, sum)) + return i + win_size; } return chunk_size; } @@ -136,6 +144,10 @@ alloc_chunker(int fd, size_t cap) err(1, "calloc"); chunker->fd = fd; chunker->cap = cap; + chunker->min_size = BLKSIZE_MIN; + chunker->max_size = BLKSIZE_MAX; + chunker->mask = HASHMASK_BITS; + chunker->win_size = WINSIZE; return chunker; }