dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit b314f213a126fe090d8fa3bb6a9cad0e5dff7ba9
parent 652d81e641e2b9dfbf181c2170ae944640be514f
Author: sin <sin@2f30.org>
Date:   Sat,  2 Mar 2019 15:51:14 +0000

Add a mechanism to disable compression

Currently this can only be set at compile time.  It is a matter of
hooking it up to a flag or environment variable.

Diffstat:
MMakefile | 2++
Aconfig.c | 1+
Mdedup.c | 60++++++++++++++++++++++++++++++++++++++++--------------------
Mdedup.h | 5++++-
Mtypes.c | 4++--
5 files changed, 49 insertions(+), 23 deletions(-)

diff --git a/Makefile b/Makefile @@ -10,6 +10,7 @@ SRC = \ tree.h \ cache.c \ chunker.c \ + config.c \ pack.c \ types.c \ unpack.c \ @@ -19,6 +20,7 @@ OBJ = \ $(BIN).o \ cache.o \ chunker.o \ + config.o \ pack.o \ types.o \ unpack.o \ diff --git a/config.c b/config.c @@ -0,0 +1 @@ +int compr_enabled = 1; diff --git a/dedup.c b/dedup.c @@ -45,9 +45,16 @@ int verbose; char *argv0; static size_t -comp_size(size_t size) +compr_size(size_t size) { - return LZ4_compressBound(size); + size_t ret; + + if (compr_enabled) + ret = LZ4_compressBound(size); + else + ret = size; + + return ret; } static size_t @@ -55,9 +62,16 @@ comp(uint8_t *in, uint8_t *out, size_t insize, size_t outsize) { int ret; - ret = LZ4_compress_default((char *)in, (char *)out, insize, outsize); - if (ret < 0) - errx(1, "LZ4_compress_default failed"); + if (compr_enabled) { + ret = LZ4_compress_default((char *)in, (char *)out, insize, + outsize); + if (ret < 0) + errx(1, "LZ4_compress_default failed"); + } else { + ret = insize; + memcpy(out, in, insize); + } + return ret; } @@ -66,9 +80,15 @@ decomp(uint8_t *in, uint8_t *out, size_t insize, size_t outsize) { int ret; - ret = LZ4_decompress_safe((char *)in, (char *)out, insize, outsize); - if (ret < 0) - errx(1, "LZ4_decompress_safe failed"); + if (compr_enabled) { + ret = LZ4_decompress_safe((char *)in, (char *)out, insize, outsize); + if (ret < 0) + errx(1, "LZ4_decompress_safe failed"); + } else { + ret = insize; + memcpy(out, in, insize); + } + return ret; } @@ -90,7 +110,7 @@ print_stats(struct stats *st) fprintf(stderr, "Original size: %llu bytes\n", (unsigned long long)st->orig_size); fprintf(stderr, "Compressed size: %llu bytes\n", - (unsigned long long)st->comp_size); + (unsigned long long)st->compr_size); fprintf(stderr, "Deduplicated size: %llu bytes\n", (unsigned long long)st->dedup_size); fprintf(stderr, "Min/avg/max block size: %llu/%llu/%llu bytes\n", @@ -208,16 +228,16 @@ dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size) { uint8_t md[MDSIZE]; struct cache_entry cache_entry; - uint8_t *comp_buf; + uint8_t *compr_buf; size_t n; - comp_buf = alloc_buf(comp_size(BLKSIZE_MAX)); + compr_buf = alloc_buf(compr_size(BLKSIZE_MAX)); - n = comp(chunkp, comp_buf, chunk_size, comp_size(BLKSIZE_MAX)); - hash_blk(comp_buf, n, md); + n = comp(chunkp, compr_buf, chunk_size, compr_size(BLKSIZE_MAX)); + hash_blk(compr_buf, n, md); snap_hdr.st.orig_size += chunk_size; - snap_hdr.st.comp_size += n; + snap_hdr.st.compr_size += n; memcpy(cache_entry.md, md, sizeof(cache_entry.md)); if (lookup_cache_entry(cache, &cache_entry) < 0) { @@ -228,7 +248,7 @@ dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size) blk_desc.size = n; snap->blk_desc[snap->nr_blk_descs++] = blk_desc; - append_blk(comp_buf, &blk_desc); + append_blk(compr_buf, &blk_desc); cache_entry.offset = blk_desc.offset; cache_entry.size = blk_desc.size; @@ -253,7 +273,7 @@ dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size) cache_hits++; } - free(comp_buf); + free(compr_buf); } static void @@ -309,7 +329,7 @@ extract(struct snapshot *snap, void *arg) return WALK_CONTINUE; buf[0] = alloc_buf(BLKSIZE_MAX); - buf[1] = alloc_buf(comp_size(BLKSIZE_MAX)); + buf[1] = alloc_buf(compr_size(BLKSIZE_MAX)); for (i = 0; i < snap->nr_blk_descs; i++) { struct blk_desc *blk_desc; size_t blksize; @@ -333,7 +353,7 @@ check(struct snapshot *snap, void *arg) SHA256_CTX ctx; uint64_t i; - buf = alloc_buf(comp_size(BLKSIZE_MAX)); + buf = alloc_buf(compr_size(BLKSIZE_MAX)); /* * Calculate hash for each block and compare * against snapshot entry block descriptor @@ -386,7 +406,7 @@ rebuild_cache(struct snapshot *snap, void *arg) SHA256_CTX ctx; uint64_t i; - buf = alloc_buf(comp_size(BLKSIZE_MAX)); + buf = alloc_buf(compr_size(BLKSIZE_MAX)); for (i = 0; i < snap->nr_blk_descs; i++) { struct cache_entry cache_entry; struct blk_desc *blk_desc; @@ -513,7 +533,7 @@ init_snap_hdr(void) { snap_hdr.flags = (VER_MAJ << 8) | VER_MIN; snap_hdr.size = SNAP_HDR_SIZE; - snap_hdr.st.min_blk_size = comp_size(BLKSIZE_MAX); + snap_hdr.st.min_blk_size = compr_size(BLKSIZE_MAX); } static void diff --git a/dedup.h b/dedup.h @@ -24,7 +24,7 @@ struct chunker; struct stats { uint64_t orig_size; /* original store size */ - uint64_t comp_size; /* compressed store size */ + uint64_t compr_size; /* compressed store size */ uint64_t dedup_size; /* deduplicated store size */ uint64_t min_blk_size; uint64_t max_blk_size; @@ -64,6 +64,9 @@ struct cache_entry { uint64_t size; /* size of block */ }; +/* config.c */ +extern int compr_enabled; + /* dedup.c */ extern int verbose; diff --git a/types.c b/types.c @@ -22,7 +22,7 @@ read_snap_hdr(int fd, struct snapshot_hdr *hdr) n += unpack(&buf[n], "qqqqqq", &hdr->st.orig_size, - &hdr->st.comp_size, + &hdr->st.compr_size, &hdr->st.dedup_size, &hdr->st.min_blk_size, &hdr->st.max_blk_size, @@ -50,7 +50,7 @@ write_snap_hdr(int fd, struct snapshot_hdr *hdr) n += pack(&buf[n], "qqqqqq", hdr->st.orig_size, - hdr->st.comp_size, + hdr->st.compr_size, hdr->st.dedup_size, hdr->st.min_blk_size, hdr->st.max_blk_size,