dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit b0e37d93a013e29a6ee2918eabe2708aa7022921
parent e5fdb8553cc9adfcebf7d567a57334b1238e3cd3
Author: sin <sin@2f30.org>
Date:   Sun,  7 Apr 2019 10:14:29 +0100

Add mini hash framework

Diffstat:
MMakefile | 3+++
Mdedup.1 | 15++++++++-------
Mdedup.c | 80+++++++++++++++++++++++++------------------------------------------------------
Mdedup.h | 22+++++++++++-----------
Ahash.c | 119+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahash.h | 18++++++++++++++++++
6 files changed, 184 insertions(+), 73 deletions(-)

diff --git a/Makefile b/Makefile @@ -10,6 +10,7 @@ HDR = \ blake2.h \ config.h \ dedup.h \ + hash.h \ tree.h \ SRC = \ @@ -19,6 +20,7 @@ SRC = \ blake2bp-ref.c \ chunker.c \ compress.c \ + hash.c \ icache.c \ pack.c \ types.c \ @@ -31,6 +33,7 @@ OBJ = \ blake2bp-ref.o \ chunker.o \ compress.o \ + hash.o \ icache.o \ pack.o \ types.o \ diff --git a/dedup.1 b/dedup.1 @@ -1,4 +1,4 @@ -.Dd April 6, 2019 +.Dd April 7, 2019 .Dt DEDUP 1 .Os .Sh NAME @@ -6,7 +6,8 @@ .Nd data deduplication program .Sh SYNOPSIS .Nm dedup -.Op Fl PZcilv +.Op Fl Zcilv +.Op Fl H Ar hash .Op Fl e Ar id .Op Fl r Ar root .Op Fl m Ar message @@ -27,11 +28,11 @@ should be used and piped into .Nm . .Sh OPTIONS .Bl -tag -width "-m message" -.It Fl P -Use the blake2bp variant which is a parallel version of blake2b. -These two variants are incompatible as they produce different -hashes. This flag only has an effect when initializing the -repository. By default blake2b is used. +.It Fl H Ar hash +The cryptographic hash function used to identify +unique blocks in the store. The supported hash functions +are blake2b and blake2bp. This flag only has an effect when +initializing the repository. By default blake2b is used. .It Fl Z Disable compression support for this repository. This flag only has an effect when initializing the repository. diff --git a/dedup.c b/dedup.c @@ -13,6 +13,7 @@ #include "arg.h" #include "blake2.h" #include "dedup.h" +#include "hash.h" #define SNAPSF ".snapshots" #define STOREF ".store" @@ -33,7 +34,7 @@ static struct blk_hdr blk_hdr; static struct icache *icache; static int ifd; static int sfd; -static int blake2b_parallel; +static int hash_algo; int verbose; char *argv0; @@ -105,38 +106,17 @@ free_snap(struct snap *snap) static void hash_snap(struct snap *snap, uint8_t *md) { - switch (blake2b_parallel) { - case 0: { - blake2b_state ctx; - uint64_t i; - - blake2b_init(&ctx, MD_SIZE); - for (i = 0; i < snap->nr_blk_descs; i++) { - struct blk_desc *blk_desc; - - blk_desc = &snap->blk_desc[i]; - blake2b_update(&ctx, blk_desc->md, - sizeof(blk_desc->md)); - } - blake2b_final(&ctx, md, MD_SIZE); - break; - } - case 1: { - blake2bp_state ctx; - uint64_t i; + struct hash_ctx ctx; + uint64_t i; - blake2bp_init(&ctx, MD_SIZE); - for (i = 0; i < snap->nr_blk_descs; i++) { - struct blk_desc *blk_desc; + hash_init(&ctx, hash_algo, MD_SIZE); + for (i = 0; i < snap->nr_blk_descs; i++) { + struct blk_desc *blk_desc; - blk_desc = &snap->blk_desc[i]; - blake2bp_update(&ctx, blk_desc->md, - sizeof(blk_desc->md)); - } - blake2bp_final(&ctx, md, MD_SIZE); - break; - } + blk_desc = &snap->blk_desc[i]; + hash_update(&ctx, blk_desc->md, sizeof(blk_desc->md)); } + hash_final(&ctx, md, MD_SIZE); } static struct snap * @@ -202,24 +182,11 @@ free_buf(uint8_t *buf) static void hash_blk(uint8_t *buf, size_t size, uint8_t *md) { - switch (blake2b_parallel) { - case 0: { - blake2b_state ctx; - - blake2b_init(&ctx, MD_SIZE); - blake2b_update(&ctx, buf, size); - blake2b_final(&ctx, md, MD_SIZE); - break; - } - case 1: { - blake2bp_state ctx; + struct hash_ctx ctx; - blake2bp_init(&ctx, MD_SIZE); - blake2bp_update(&ctx, buf, size); - blake2bp_final(&ctx, md, MD_SIZE); - break; - } - } + hash_init(&ctx, hash_algo, MD_SIZE); + hash_update(&ctx, buf, size); + hash_final(&ctx, md, MD_SIZE); } static void @@ -467,7 +434,7 @@ init_blk_hdr(void) { blk_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN; blk_hdr.flags |= compr_enabled << COMPR_ENABLED_SHIFT; - blk_hdr.flags |= blake2b_parallel << BLAKE2BP_ENABLED_SHIFT; + blk_hdr.flags |= hash_algo << HASH_ALGO_SHIFT; blk_hdr.size = BLK_HDR_SIZE; } @@ -484,9 +451,9 @@ load_blk_hdr(void) v &= COMPR_ENABLED_MASK; compr_enabled = v; - v = blk_hdr.flags >> BLAKE2BP_ENABLED_SHIFT; - v &= BLAKE2BP_ENABLED_MASK; - blake2b_parallel = v; + v = blk_hdr.flags >> HASH_ALGO_SHIFT; + v &= HASH_ALGO_MASK; + hash_algo = v; } static void @@ -573,7 +540,7 @@ term(void) static void usage(void) { - fprintf(stderr, "usage: %s [-PZcilv] [-e id] [-r root] [-m message] [file]\n", argv0); + fprintf(stderr, "usage: %s [-Zcilv] [-H hash] [-e id] [-r root] [-m message] [file]\n", argv0); exit(1); } @@ -581,13 +548,16 @@ int main(int argc, char *argv[]) { uint8_t md[MD_SIZE]; - char *id = NULL, *root = NULL, *msg = NULL; + char *id = NULL, *root = NULL, *msg = NULL, *algo = NULL; int iflag = 0, lflag = 0, cflag = 0; int fd = -1; ARGBEGIN { - case 'P': - blake2b_parallel = 1; + case 'H': + algo = EARGF(usage()); + hash_algo = hash_name2type(algo); + if (hash_algo < 0) + errx(1, "unknown hash: %s", algo); break; case 'Z': compr_enabled = 0; diff --git a/dedup.h b/dedup.h @@ -25,11 +25,11 @@ #define COMPR_ENABLED_SHIFT 16 #define COMPR_ENABLED_MASK 0x1 -#define BLAKE2BP_ENABLED_SHIFT 17 -#define BLAKE2BP_ENABLED_MASK 0x1 +#define HASH_ALGO_SHIFT 17 +#define HASH_ALGO_MASK 0x7 /* max 8 hash algos */ -struct icache; struct chunker; +struct icache; struct stats { uint64_t orig_size; /* original store size */ @@ -73,14 +73,6 @@ extern int compr_enabled; /* dedup.c */ extern int verbose; -/* icache.c */ -struct icache *alloc_icache(void); -void free_icache(struct icache *icache); -void insert_icache(struct icache *icache, struct blk_desc *desc); -int lookup_icache(struct icache *icache, struct blk_desc *desc); -void icache_stats(struct icache *icache, unsigned long long *hits, - unsigned long long *misses); - /* chunker.c */ struct chunker *alloc_chunker(int fd, size_t min_size, size_t max_size, size_t mask, size_t win_size); @@ -94,6 +86,14 @@ size_t compr_size(size_t size); size_t compr(uint8_t *in, uint8_t *out, size_t insize, size_t outsize); size_t decompr(uint8_t *in, uint8_t *out, size_t insize, size_t outsize); +/* icache.c */ +struct icache *alloc_icache(void); +void free_icache(struct icache *icache); +void insert_icache(struct icache *icache, struct blk_desc *desc); +int lookup_icache(struct icache *icache, struct blk_desc *desc); +void icache_stats(struct icache *icache, unsigned long long *hits, + unsigned long long *misses); + /* pack.c */ int pack(unsigned char *dst, char *fmt, ...); diff --git a/hash.c b/hash.c @@ -0,0 +1,119 @@ +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "blake2.h" +#include "hash.h" + +static int blake2bi(struct hash_ctx *ctx, size_t n); +static int blake2bu(struct hash_ctx *ctx, const void *buf, size_t n); +static int blake2bf(struct hash_ctx *ctx, void *buf, size_t n); + +static int blake2bpi(struct hash_ctx *ctx, size_t n); +static int blake2bpu(struct hash_ctx *ctx, const void *buf, size_t n); +static int blake2bpf(struct hash_ctx *ctx, void *buf, size_t n); + +static struct hash_ops { + int (*init)(struct hash_ctx *ctx, size_t n); + int (*update)(struct hash_ctx *ctx, const void *buf, size_t n); + int (*final)(struct hash_ctx *ctx, void *buf, size_t n); +} hashes[NR_ALGOS] = { + { + .init = blake2bi, + .update = blake2bu, + .final = blake2bf, + }, + { + .init = blake2bpi, + .update = blake2bpu, + .final = blake2bpf, + } +}; + +static struct algomap { + char *name; + int type; +} algomap[] = { + { + .name = "blake2b", + .type = BLAKE2B_ALGO, + }, + { + .name = "blake2bp", + .type = BLAKE2BP_ALGO, + }, + { + .name = NULL, + }, +}; + +static int +blake2bi(struct hash_ctx *ctx, size_t n) +{ + return blake2b_init(&ctx->u.blake2b_ctx, n); +} + +static int +blake2bu(struct hash_ctx *ctx, const void *buf, size_t n) +{ + return blake2b_update(&ctx->u.blake2b_ctx, buf, n); +} + +static int +blake2bf(struct hash_ctx *ctx, void *buf, size_t n) +{ + return blake2b_final(&ctx->u.blake2b_ctx, buf, n); +} + +static int +blake2bpi(struct hash_ctx *ctx, size_t n) +{ + return blake2bp_init(&ctx->u.blake2bp_ctx, n); +} + +static int +blake2bpu(struct hash_ctx *ctx, const void *buf, size_t n) +{ + return blake2bp_update(&ctx->u.blake2bp_ctx, buf, n); +} + +static int +blake2bpf(struct hash_ctx *ctx, void *buf, size_t n) +{ + return blake2bp_final(&ctx->u.blake2bp_ctx, buf, n); +} + +int +hash_init(struct hash_ctx *ctx, int type, size_t n) +{ + if (type < 0 || type >= NR_ALGOS) + return -1; + + ctx->ops = &hashes[type]; + return (*ctx->ops->init)(ctx, n); +} + +int +hash_update(struct hash_ctx *ctx, const void *buf, size_t n) +{ + return (*ctx->ops->update)(ctx, buf, n); +} + +int +hash_final(struct hash_ctx *ctx, void *buf, size_t n) +{ + return (*ctx->ops->final)(ctx, buf, n); +} + +int +hash_name2type(char *name) +{ + struct algomap *algo; + + for (algo = &algomap[0]; algo->name != NULL; algo++) + if (strcmp(algo->name, name) == 0) + break; + if (algo->name == NULL) + return -1; + return algo->type; +} diff --git a/hash.h b/hash.h @@ -0,0 +1,18 @@ +enum hash_algo { + BLAKE2B_ALGO, + BLAKE2BP_ALGO, + NR_ALGOS, +}; + +struct hash_ctx { + union { + blake2b_state blake2b_ctx; + blake2bp_state blake2bp_ctx; + } u; + struct hash_ops *ops; +}; + +int hash_init(struct hash_ctx *ctx, int type, size_t n); +int hash_update(struct hash_ctx *ctx, const void *buf, size_t n); +int hash_final(struct hash_ctx *ctx, void *buf, size_t n); +int hash_name2type(char *name);