commit eaed3251aefdb0cac2fb61970ccf2f8f52a097a2
parent 99e39cb4a6a9a5b21bae79d2e8c304019565ad93
Author: sin <sin@2f30.org>
Date: Sat, 6 Apr 2019 09:33:34 +0100
Remove match_pattern and inline the code in the caller
Diffstat:
1 file changed, 7 insertions(+), 13 deletions(-)
diff --git a/chunker.c b/chunker.c
@@ -83,16 +83,6 @@ buzh_update(uint32_t sum, uint8_t out, uint8_t in, size_t size)
return ROTL(sum, 1) ^ ROTL(buz[out], size % 32) ^ buz[in];
}
-static inline int
-match_pattern(struct chunker *chunker, size_t chunk_size, uint32_t sum)
-{
- if (chunk_size >= chunker->max_size)
- return 1;
- if (chunk_size < chunker->min_size)
- return 0;
- return (sum & chunker->mask) == 0;
-}
-
static size_t
get_chunk_size(struct chunker *chunker)
{
@@ -107,8 +97,8 @@ get_chunk_size(struct chunker *chunker)
/*
* To achieve better deduplication, we chunk blocks based on a
- * recurring pattern occuring on the data stream. A fixed window
- * of WINSIZE bytes is slid over the data, and a rolling hash is
+ * recurring pattern occuring on the data stream. We slide a fixed
+ * window of WINSIZE bytes over the data, and a rolling hash is
* computed for this window.
* When the rolling hash matches a given pattern the block is chunked
* at the end of that window.
@@ -125,7 +115,11 @@ get_chunk_size(struct chunker *chunker)
sum = buzh_update(sum, out, in, win_size);
}
- if (match_pattern(chunker, chunk_size, sum))
+ if (chunk_size < chunker->min_size)
+ continue;
+
+ if (chunk_size >= chunker->max_size ||
+ (sum & chunker->mask) == 0)
return chunk_size;
}
return max_chunk_size;