commit f07513053f8662fac3f909bb6e2cdf895b567cb3
parent 7f984c9b4136d768e72f31d99666b72bca12d7f2
Author: sin <sin@2f30.org>
Date: Sun, 7 Apr 2019 13:25:45 +0100
Add mini compressor framework
Diffstat:
M | compress.c | | | 184 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------- |
M | dedup.1 | | | 11 | +++++++---- |
M | dedup.c | | | 45 | +++++++++++++++++++++++++++++++-------------- |
M | dedup.h | | | 27 | +++++++++++++++++++-------- |
M | hash.c | | | 2 | +- |
5 files changed, 208 insertions(+), 61 deletions(-)
diff --git a/compress.c b/compress.c
@@ -1,56 +1,172 @@
+#include <sys/types.h>
+
#include <err.h>
#include <stdint.h>
#include <string.h>
#include <lz4.h>
-int compr_enabled = 1;
+#include "blake2.h"
+#include "dedup.h"
-size_t
-compr_size(size_t size)
+static int none_init(struct compr_ctx *ctx);
+static size_t none_size(struct compr_ctx *ctx, size_t n);
+static size_t none_compr(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize);
+static size_t none_decompr(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize);
+
+static int lz4_init(struct compr_ctx *ctx);
+static size_t lz4_size(struct compr_ctx *ctx, size_t n);
+static size_t lz4_compr(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize);
+static size_t lz4_decompr(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize);
+
+static struct compr_ops {
+ int (*init)(struct compr_ctx *ctx);
+ size_t (*size)(struct compr_ctx *ctx, size_t n);
+ size_t (*compr)(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize);
+ size_t (*decompr)(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize);
+} comprs[NR_COMPRS] = {
+ {
+ .init = none_init,
+ .size = none_size,
+ .compr = none_compr,
+ .decompr = none_decompr,
+ },
+ {
+ .init = lz4_init,
+ .size = lz4_size,
+ .compr = lz4_compr,
+ .decompr = lz4_decompr,
+ },
+};
+
+static struct algomap {
+ char *name;
+ int type;
+} algomap[] = {
+ {
+ .name = "none",
+ .type = COMPR_NONE,
+ },
+ {
+ .name = "lz4",
+ .type = COMPR_LZ4,
+ },
+ {
+ .name = NULL,
+ },
+};
+
+static int
+none_init(struct compr_ctx *ctx)
+{
+ return 0;
+}
+
+static size_t
+none_size(struct compr_ctx *ctx, size_t n)
{
- size_t ret;
+ return n;
+}
- if (compr_enabled)
- ret = LZ4_compressBound(size);
- else
- ret = size;
+static size_t
+none_compr(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize)
+{
+ memcpy(out, in, insize);
+ return insize;
+}
- return ret;
+static size_t
+none_decompr(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize)
+{
+ memcpy(out, in, insize);
+ return insize;
}
-size_t
-compr(uint8_t *in, uint8_t *out, size_t insize, size_t outsize)
+static int
+lz4_init(struct compr_ctx *ctx)
+{
+ return 0;
+}
+
+static size_t
+lz4_size(struct compr_ctx *ctx, size_t n)
+{
+ return LZ4_compressBound(n);
+}
+
+static size_t
+lz4_compr(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize)
+{
+ int n;
+
+ n = LZ4_compress_default((char *)in, (char *)out, insize,
+ outsize);
+ if (n < 0)
+ errx(1, "LZ4_compress_default failed");
+ return n;
+}
+
+static size_t
+lz4_decompr(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize)
+{
+ int n;
+
+ n = LZ4_decompress_safe((char *)in, (char *)out, insize,
+ outsize);
+ if (n < 0)
+ errx(1, "LZ4_decompress_safe failed");
+ return n;
+}
+
+int
+compr_init(struct compr_ctx *ctx, int type)
{
- int ret;
+ if (type < 0 || type >= NR_COMPRS)
+ return -1;
- if (compr_enabled) {
- ret = LZ4_compress_default((char *)in, (char *)out, insize,
- outsize);
- if (ret < 0)
- errx(1, "LZ4_compress_default failed");
- } else {
- ret = insize;
- memcpy(out, in, insize);
- }
+ ctx->ops = &comprs[type];
+ return (*ctx->ops->init)(ctx);
+}
- return ret;
+int
+compr_size(struct compr_ctx *ctx, size_t n)
+{
+ return (*ctx->ops->size)(ctx, n);
}
size_t
-decompr(uint8_t *in, uint8_t *out, size_t insize, size_t outsize)
+compr(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize)
{
- int ret;
+ return (*ctx->ops->compr)(ctx, in, out, insize, outsize);
+}
- if (compr_enabled) {
- ret = LZ4_decompress_safe((char *)in, (char *)out, insize,
- outsize);
- if (ret < 0)
- errx(1, "LZ4_decompress_safe failed");
- } else {
- ret = insize;
- memcpy(out, in, insize);
- }
+size_t
+decompr(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize)
+{
+ return (*ctx->ops->decompr)(ctx, in, out, insize, outsize);
+}
+
+int
+compr_name2type(char *name)
+{
+ struct algomap *algo;
- return ret;
+ for (algo = &algomap[0]; algo->name != NULL; algo++)
+ if (strcmp(algo->name, name) == 0)
+ break;
+ if (algo->name == NULL)
+ return -1;
+ return algo->type;
}
diff --git a/dedup.1 b/dedup.1
@@ -6,8 +6,9 @@
.Nd data deduplication program
.Sh SYNOPSIS
.Nm dedup
-.Op Fl Zcilv
+.Op Fl cilv
.Op Fl H Ar hash
+.Op Fl Z Ar compressor
.Op Fl e Ar id
.Op Fl r Ar root
.Op Fl m Ar message
@@ -34,10 +35,12 @@ unique blocks in the store.
The supported hash functions are blake2b and blake2bp.
This flag only has an effect when initializing the repository.
By default blake2b is used.
-.It Fl Z
-Disable compression support for this repository.
+.It Fl Z Ar compressor
+The compressor function used to compress the blocks
+in the store.
+The supported compressor functions are none and lz4.
This flag only has an effect when initializing the repository.
-By default compression is enabled.
+By default lz4 is used.
.It Fl c
Perform a consistency check on the repository.
.It Fl i
diff --git a/dedup.c b/dedup.c
@@ -34,6 +34,7 @@ static struct icache *icache;
static int ifd;
static int sfd;
static int hash_algo;
+static int compr_algo;
int verbose;
char *argv0;
@@ -217,12 +218,15 @@ dedup_chunk(struct snap *snap, uint8_t *chunkp, size_t chunk_size)
{
uint8_t md[MD_SIZE];
struct blk_desc blk_desc;
+ struct compr_ctx ctx;
uint8_t *compr_buf;
- size_t n;
+ size_t n, csize;
- compr_buf = alloc_buf(compr_size(BLKSIZE_MAX));
+ compr_init(&ctx, compr_algo);
+ csize = compr_size(&ctx, BLKSIZE_MAX);
+ compr_buf = alloc_buf(csize);
- n = compr(chunkp, compr_buf, chunk_size, compr_size(BLKSIZE_MAX));
+ n = compr(&ctx, chunkp, compr_buf, chunk_size, csize);
hash_blk(compr_buf, n, md);
snap_hdr.st.orig_size += chunk_size;
@@ -295,20 +299,22 @@ extract(struct snap *snap, void *arg)
{
uint8_t *buf[2];
struct extract_args *args = arg;
+ struct compr_ctx ctx;
uint64_t i;
if (memcmp(snap->md, args->md, sizeof(snap->md)) != 0)
return WALK_CONTINUE;
+ compr_init(&ctx, compr_algo);
buf[0] = alloc_buf(BLKSIZE_MAX);
- buf[1] = alloc_buf(compr_size(BLKSIZE_MAX));
+ buf[1] = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
for (i = 0; i < snap->nr_blk_descs; i++) {
struct blk_desc *blk_desc;
size_t blksize;
blk_desc = &snap->blk_desc[i];
read_blk(buf[1], blk_desc);
- blksize = decompr(buf[1], buf[0], blk_desc->size, BLKSIZE_MAX);
+ blksize = decompr(&ctx, buf[1], buf[0], blk_desc->size, BLKSIZE_MAX);
xwrite(args->fd, buf[0], blksize);
}
free_buf(buf[1]);
@@ -325,6 +331,7 @@ extract(struct snap *snap, void *arg)
static int
check_snap(struct snap *snap, void *arg)
{
+ struct compr_ctx ctx;
uint8_t *buf;
int *ret = arg;
uint64_t i;
@@ -335,7 +342,8 @@ check_snap(struct snap *snap, void *arg)
fputc('\n', stderr);
}
- buf = alloc_buf(compr_size(BLKSIZE_MAX));
+ compr_init(&ctx, compr_algo);
+ buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
for (i = 0; i < snap->nr_blk_descs; i++) {
uint8_t md[MD_SIZE];
struct blk_desc *blk_desc;
@@ -367,10 +375,12 @@ check_snap(struct snap *snap, void *arg)
static int
build_icache(struct snap *snap, void *arg)
{
+ struct compr_ctx ctx;
uint8_t *buf;
uint64_t i;
- buf = alloc_buf(compr_size(BLKSIZE_MAX));
+ compr_init(&ctx, compr_algo);
+ buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
for (i = 0; i < snap->nr_blk_descs; i++) {
struct blk_desc *blk_desc;
@@ -432,7 +442,7 @@ static void
init_blk_hdr(void)
{
blk_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN;
- blk_hdr.flags |= compr_enabled << COMPR_ENABLED_SHIFT;
+ blk_hdr.flags |= compr_algo << COMPR_ALGO_SHIFT;
blk_hdr.flags |= hash_algo << HASH_ALGO_SHIFT;
blk_hdr.size = BLK_HDR_SIZE;
}
@@ -446,9 +456,9 @@ load_blk_hdr(void)
read_blk_hdr(sfd, &blk_hdr);
match_ver(blk_hdr.flags);
- v = blk_hdr.flags >> COMPR_ENABLED_SHIFT;
- v &= COMPR_ENABLED_MASK;
- compr_enabled = v;
+ v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
+ v &= COMPR_ALGO_MASK;
+ compr_algo = v;
v = blk_hdr.flags >> HASH_ALGO_SHIFT;
v &= HASH_ALGO_MASK;
@@ -465,9 +475,12 @@ save_blk_hdr(void)
static void
init_snap_hdr(void)
{
+ struct compr_ctx ctx;
+
+ compr_init(&ctx, compr_algo);
snap_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN;
snap_hdr.size = SNAP_HDR_SIZE;
- snap_hdr.st.min_blk_size = compr_size(BLKSIZE_MAX);
+ snap_hdr.st.min_blk_size = compr_size(&ctx, BLKSIZE_MAX);
}
static void
@@ -539,7 +552,7 @@ term(void)
static void
usage(void)
{
- fprintf(stderr, "usage: %s [-Zcilv] [-H hash] [-e id] [-r root] [-m message] [file]\n", argv0);
+ fprintf(stderr, "usage: %s [cilv] [-Z compressor] [-H hash] [-e id] [-r root] [-m message] [file]\n", argv0);
exit(1);
}
@@ -548,6 +561,7 @@ main(int argc, char *argv[])
{
uint8_t md[MD_SIZE];
char *id = NULL, *root = NULL, *msg = NULL, *hash_name = NULL;
+ char *compr_name;
int iflag = 0, lflag = 0, cflag = 0;
int fd = -1;
@@ -559,7 +573,10 @@ main(int argc, char *argv[])
errx(1, "unknown hash: %s", hash_name);
break;
case 'Z':
- compr_enabled = 0;
+ compr_name = EARGF(usage());
+ compr_algo = compr_name2type(compr_name);
+ if (compr_algo < 0)
+ errx(1, "unknown hash: %s", compr_name);
break;
case 'c':
cflag = 1;
diff --git a/dedup.h b/dedup.h
@@ -24,8 +24,14 @@
#define HASH_ALGO_SHIFT 18
#define HASH_ALGO_MASK 0x7 /* max 8 hash algos */
-#define COMPR_ENABLED_SHIFT 16
-#define COMPR_ENABLED_MASK 0x3 /* max 4 compression algos */
+#define COMPR_ALGO_SHIFT 16
+#define COMPR_ALGO_MASK 0x3 /* max 4 compression algos */
+
+enum compr_algo {
+ COMPR_NONE,
+ COMPR_LZ4,
+ NR_COMPRS,
+};
enum hash_algo {
BLAKE2B_ALGO,
@@ -72,6 +78,10 @@ struct snap {
struct blk_desc blk_desc[];
};
+struct compr_ctx {
+ struct compr_ops *ops;
+};
+
struct hash_ctx {
union {
blake2b_state blake2b_ctx;
@@ -80,9 +90,6 @@ struct hash_ctx {
struct hash_ops *ops;
};
-/* compress.c */
-extern int compr_enabled;
-
/* dedup.c */
extern int verbose;
@@ -95,9 +102,13 @@ uint8_t *get_chunk(struct chunker *chunker, size_t *chunk_size);
void drain_chunker(struct chunker *chunker);
/* compress.c */
-size_t compr_size(size_t size);
-size_t compr(uint8_t *in, uint8_t *out, size_t insize, size_t outsize);
-size_t decompr(uint8_t *in, uint8_t *out, size_t insize, size_t outsize);
+int compr_init(struct compr_ctx *ctx, int type);
+int compr_size(struct compr_ctx *ctx, size_t n);
+size_t compr(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize);
+size_t decompr(struct compr_ctx *ctx, const void *in, void *out,
+ size_t insize, size_t outsize);
+int compr_name2type(char *name);
/* hash.c */
int hash_init(struct hash_ctx *ctx, int type, size_t n);
diff --git a/hash.c b/hash.c
@@ -27,7 +27,7 @@ static struct hash_ops {
.init = blake2bpi,
.update = blake2bpu,
.final = blake2bpf,
- }
+ },
};
static struct algomap {