dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit f07513053f8662fac3f909bb6e2cdf895b567cb3
parent 7f984c9b4136d768e72f31d99666b72bca12d7f2
Author: sin <sin@2f30.org>
Date:   Sun,  7 Apr 2019 13:25:45 +0100

Add mini compressor framework

Diffstat:
Mcompress.c | 184++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Mdedup.1 | 11+++++++----
Mdedup.c | 45+++++++++++++++++++++++++++++++--------------
Mdedup.h | 27+++++++++++++++++++--------
Mhash.c | 2+-
5 files changed, 208 insertions(+), 61 deletions(-)

diff --git a/compress.c b/compress.c @@ -1,56 +1,172 @@ +#include <sys/types.h> + #include <err.h> #include <stdint.h> #include <string.h> #include <lz4.h> -int compr_enabled = 1; +#include "blake2.h" +#include "dedup.h" -size_t -compr_size(size_t size) +static int none_init(struct compr_ctx *ctx); +static size_t none_size(struct compr_ctx *ctx, size_t n); +static size_t none_compr(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize); +static size_t none_decompr(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize); + +static int lz4_init(struct compr_ctx *ctx); +static size_t lz4_size(struct compr_ctx *ctx, size_t n); +static size_t lz4_compr(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize); +static size_t lz4_decompr(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize); + +static struct compr_ops { + int (*init)(struct compr_ctx *ctx); + size_t (*size)(struct compr_ctx *ctx, size_t n); + size_t (*compr)(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize); + size_t (*decompr)(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize); +} comprs[NR_COMPRS] = { + { + .init = none_init, + .size = none_size, + .compr = none_compr, + .decompr = none_decompr, + }, + { + .init = lz4_init, + .size = lz4_size, + .compr = lz4_compr, + .decompr = lz4_decompr, + }, +}; + +static struct algomap { + char *name; + int type; +} algomap[] = { + { + .name = "none", + .type = COMPR_NONE, + }, + { + .name = "lz4", + .type = COMPR_LZ4, + }, + { + .name = NULL, + }, +}; + +static int +none_init(struct compr_ctx *ctx) +{ + return 0; +} + +static size_t +none_size(struct compr_ctx *ctx, size_t n) { - size_t ret; + return n; +} - if (compr_enabled) - ret = LZ4_compressBound(size); - else - ret = size; +static size_t +none_compr(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize) +{ + memcpy(out, in, insize); + return insize; +} - return ret; +static size_t +none_decompr(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize) +{ + memcpy(out, in, insize); + return insize; } -size_t -compr(uint8_t *in, uint8_t *out, size_t insize, size_t outsize) +static int +lz4_init(struct compr_ctx *ctx) +{ + return 0; +} + +static size_t +lz4_size(struct compr_ctx *ctx, size_t n) +{ + return LZ4_compressBound(n); +} + +static size_t +lz4_compr(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize) +{ + int n; + + n = LZ4_compress_default((char *)in, (char *)out, insize, + outsize); + if (n < 0) + errx(1, "LZ4_compress_default failed"); + return n; +} + +static size_t +lz4_decompr(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize) +{ + int n; + + n = LZ4_decompress_safe((char *)in, (char *)out, insize, + outsize); + if (n < 0) + errx(1, "LZ4_decompress_safe failed"); + return n; +} + +int +compr_init(struct compr_ctx *ctx, int type) { - int ret; + if (type < 0 || type >= NR_COMPRS) + return -1; - if (compr_enabled) { - ret = LZ4_compress_default((char *)in, (char *)out, insize, - outsize); - if (ret < 0) - errx(1, "LZ4_compress_default failed"); - } else { - ret = insize; - memcpy(out, in, insize); - } + ctx->ops = &comprs[type]; + return (*ctx->ops->init)(ctx); +} - return ret; +int +compr_size(struct compr_ctx *ctx, size_t n) +{ + return (*ctx->ops->size)(ctx, n); } size_t -decompr(uint8_t *in, uint8_t *out, size_t insize, size_t outsize) +compr(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize) { - int ret; + return (*ctx->ops->compr)(ctx, in, out, insize, outsize); +} - if (compr_enabled) { - ret = LZ4_decompress_safe((char *)in, (char *)out, insize, - outsize); - if (ret < 0) - errx(1, "LZ4_decompress_safe failed"); - } else { - ret = insize; - memcpy(out, in, insize); - } +size_t +decompr(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize) +{ + return (*ctx->ops->decompr)(ctx, in, out, insize, outsize); +} + +int +compr_name2type(char *name) +{ + struct algomap *algo; - return ret; + for (algo = &algomap[0]; algo->name != NULL; algo++) + if (strcmp(algo->name, name) == 0) + break; + if (algo->name == NULL) + return -1; + return algo->type; } diff --git a/dedup.1 b/dedup.1 @@ -6,8 +6,9 @@ .Nd data deduplication program .Sh SYNOPSIS .Nm dedup -.Op Fl Zcilv +.Op Fl cilv .Op Fl H Ar hash +.Op Fl Z Ar compressor .Op Fl e Ar id .Op Fl r Ar root .Op Fl m Ar message @@ -34,10 +35,12 @@ unique blocks in the store. The supported hash functions are blake2b and blake2bp. This flag only has an effect when initializing the repository. By default blake2b is used. -.It Fl Z -Disable compression support for this repository. +.It Fl Z Ar compressor +The compressor function used to compress the blocks +in the store. +The supported compressor functions are none and lz4. This flag only has an effect when initializing the repository. -By default compression is enabled. +By default lz4 is used. .It Fl c Perform a consistency check on the repository. .It Fl i diff --git a/dedup.c b/dedup.c @@ -34,6 +34,7 @@ static struct icache *icache; static int ifd; static int sfd; static int hash_algo; +static int compr_algo; int verbose; char *argv0; @@ -217,12 +218,15 @@ dedup_chunk(struct snap *snap, uint8_t *chunkp, size_t chunk_size) { uint8_t md[MD_SIZE]; struct blk_desc blk_desc; + struct compr_ctx ctx; uint8_t *compr_buf; - size_t n; + size_t n, csize; - compr_buf = alloc_buf(compr_size(BLKSIZE_MAX)); + compr_init(&ctx, compr_algo); + csize = compr_size(&ctx, BLKSIZE_MAX); + compr_buf = alloc_buf(csize); - n = compr(chunkp, compr_buf, chunk_size, compr_size(BLKSIZE_MAX)); + n = compr(&ctx, chunkp, compr_buf, chunk_size, csize); hash_blk(compr_buf, n, md); snap_hdr.st.orig_size += chunk_size; @@ -295,20 +299,22 @@ extract(struct snap *snap, void *arg) { uint8_t *buf[2]; struct extract_args *args = arg; + struct compr_ctx ctx; uint64_t i; if (memcmp(snap->md, args->md, sizeof(snap->md)) != 0) return WALK_CONTINUE; + compr_init(&ctx, compr_algo); buf[0] = alloc_buf(BLKSIZE_MAX); - buf[1] = alloc_buf(compr_size(BLKSIZE_MAX)); + buf[1] = alloc_buf(compr_size(&ctx, BLKSIZE_MAX)); for (i = 0; i < snap->nr_blk_descs; i++) { struct blk_desc *blk_desc; size_t blksize; blk_desc = &snap->blk_desc[i]; read_blk(buf[1], blk_desc); - blksize = decompr(buf[1], buf[0], blk_desc->size, BLKSIZE_MAX); + blksize = decompr(&ctx, buf[1], buf[0], blk_desc->size, BLKSIZE_MAX); xwrite(args->fd, buf[0], blksize); } free_buf(buf[1]); @@ -325,6 +331,7 @@ extract(struct snap *snap, void *arg) static int check_snap(struct snap *snap, void *arg) { + struct compr_ctx ctx; uint8_t *buf; int *ret = arg; uint64_t i; @@ -335,7 +342,8 @@ check_snap(struct snap *snap, void *arg) fputc('\n', stderr); } - buf = alloc_buf(compr_size(BLKSIZE_MAX)); + compr_init(&ctx, compr_algo); + buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX)); for (i = 0; i < snap->nr_blk_descs; i++) { uint8_t md[MD_SIZE]; struct blk_desc *blk_desc; @@ -367,10 +375,12 @@ check_snap(struct snap *snap, void *arg) static int build_icache(struct snap *snap, void *arg) { + struct compr_ctx ctx; uint8_t *buf; uint64_t i; - buf = alloc_buf(compr_size(BLKSIZE_MAX)); + compr_init(&ctx, compr_algo); + buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX)); for (i = 0; i < snap->nr_blk_descs; i++) { struct blk_desc *blk_desc; @@ -432,7 +442,7 @@ static void init_blk_hdr(void) { blk_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN; - blk_hdr.flags |= compr_enabled << COMPR_ENABLED_SHIFT; + blk_hdr.flags |= compr_algo << COMPR_ALGO_SHIFT; blk_hdr.flags |= hash_algo << HASH_ALGO_SHIFT; blk_hdr.size = BLK_HDR_SIZE; } @@ -446,9 +456,9 @@ load_blk_hdr(void) read_blk_hdr(sfd, &blk_hdr); match_ver(blk_hdr.flags); - v = blk_hdr.flags >> COMPR_ENABLED_SHIFT; - v &= COMPR_ENABLED_MASK; - compr_enabled = v; + v = blk_hdr.flags >> COMPR_ALGO_SHIFT; + v &= COMPR_ALGO_MASK; + compr_algo = v; v = blk_hdr.flags >> HASH_ALGO_SHIFT; v &= HASH_ALGO_MASK; @@ -465,9 +475,12 @@ save_blk_hdr(void) static void init_snap_hdr(void) { + struct compr_ctx ctx; + + compr_init(&ctx, compr_algo); snap_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN; snap_hdr.size = SNAP_HDR_SIZE; - snap_hdr.st.min_blk_size = compr_size(BLKSIZE_MAX); + snap_hdr.st.min_blk_size = compr_size(&ctx, BLKSIZE_MAX); } static void @@ -539,7 +552,7 @@ term(void) static void usage(void) { - fprintf(stderr, "usage: %s [-Zcilv] [-H hash] [-e id] [-r root] [-m message] [file]\n", argv0); + fprintf(stderr, "usage: %s [cilv] [-Z compressor] [-H hash] [-e id] [-r root] [-m message] [file]\n", argv0); exit(1); } @@ -548,6 +561,7 @@ main(int argc, char *argv[]) { uint8_t md[MD_SIZE]; char *id = NULL, *root = NULL, *msg = NULL, *hash_name = NULL; + char *compr_name; int iflag = 0, lflag = 0, cflag = 0; int fd = -1; @@ -559,7 +573,10 @@ main(int argc, char *argv[]) errx(1, "unknown hash: %s", hash_name); break; case 'Z': - compr_enabled = 0; + compr_name = EARGF(usage()); + compr_algo = compr_name2type(compr_name); + if (compr_algo < 0) + errx(1, "unknown hash: %s", compr_name); break; case 'c': cflag = 1; diff --git a/dedup.h b/dedup.h @@ -24,8 +24,14 @@ #define HASH_ALGO_SHIFT 18 #define HASH_ALGO_MASK 0x7 /* max 8 hash algos */ -#define COMPR_ENABLED_SHIFT 16 -#define COMPR_ENABLED_MASK 0x3 /* max 4 compression algos */ +#define COMPR_ALGO_SHIFT 16 +#define COMPR_ALGO_MASK 0x3 /* max 4 compression algos */ + +enum compr_algo { + COMPR_NONE, + COMPR_LZ4, + NR_COMPRS, +}; enum hash_algo { BLAKE2B_ALGO, @@ -72,6 +78,10 @@ struct snap { struct blk_desc blk_desc[]; }; +struct compr_ctx { + struct compr_ops *ops; +}; + struct hash_ctx { union { blake2b_state blake2b_ctx; @@ -80,9 +90,6 @@ struct hash_ctx { struct hash_ops *ops; }; -/* compress.c */ -extern int compr_enabled; - /* dedup.c */ extern int verbose; @@ -95,9 +102,13 @@ uint8_t *get_chunk(struct chunker *chunker, size_t *chunk_size); void drain_chunker(struct chunker *chunker); /* compress.c */ -size_t compr_size(size_t size); -size_t compr(uint8_t *in, uint8_t *out, size_t insize, size_t outsize); -size_t decompr(uint8_t *in, uint8_t *out, size_t insize, size_t outsize); +int compr_init(struct compr_ctx *ctx, int type); +int compr_size(struct compr_ctx *ctx, size_t n); +size_t compr(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize); +size_t decompr(struct compr_ctx *ctx, const void *in, void *out, + size_t insize, size_t outsize); +int compr_name2type(char *name); /* hash.c */ int hash_init(struct hash_ctx *ctx, int type, size_t n); diff --git a/hash.c b/hash.c @@ -27,7 +27,7 @@ static struct hash_ops { .init = blake2bpi, .update = blake2bpu, .final = blake2bpf, - } + }, }; static struct algomap {