dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit eaed3251aefdb0cac2fb61970ccf2f8f52a097a2
parent 99e39cb4a6a9a5b21bae79d2e8c304019565ad93
Author: sin <sin@2f30.org>
Date:   Sat,  6 Apr 2019 09:33:34 +0100

Remove match_pattern and inline the code in the caller

Diffstat:
Mchunker.c | 20+++++++-------------
1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/chunker.c b/chunker.c @@ -83,16 +83,6 @@ buzh_update(uint32_t sum, uint8_t out, uint8_t in, size_t size) return ROTL(sum, 1) ^ ROTL(buz[out], size % 32) ^ buz[in]; } -static inline int -match_pattern(struct chunker *chunker, size_t chunk_size, uint32_t sum) -{ - if (chunk_size >= chunker->max_size) - return 1; - if (chunk_size < chunker->min_size) - return 0; - return (sum & chunker->mask) == 0; -} - static size_t get_chunk_size(struct chunker *chunker) { @@ -107,8 +97,8 @@ get_chunk_size(struct chunker *chunker) /* * To achieve better deduplication, we chunk blocks based on a - * recurring pattern occuring on the data stream. A fixed window - * of WINSIZE bytes is slid over the data, and a rolling hash is + * recurring pattern occuring on the data stream. We slide a fixed + * window of WINSIZE bytes over the data, and a rolling hash is * computed for this window. * When the rolling hash matches a given pattern the block is chunked * at the end of that window. @@ -125,7 +115,11 @@ get_chunk_size(struct chunker *chunker) sum = buzh_update(sum, out, in, win_size); } - if (match_pattern(chunker, chunk_size, sum)) + if (chunk_size < chunker->min_size) + continue; + + if (chunk_size >= chunker->max_size || + (sum & chunker->mask) == 0) return chunk_size; } return max_chunk_size;