commit b0e37d93a013e29a6ee2918eabe2708aa7022921
parent e5fdb8553cc9adfcebf7d567a57334b1238e3cd3
Author: sin <sin@2f30.org>
Date: Sun, 7 Apr 2019 10:14:29 +0100
Add mini hash framework
Diffstat:
M | Makefile | | | 3 | +++ |
M | dedup.1 | | | 15 | ++++++++------- |
M | dedup.c | | | 80 | +++++++++++++++++++++++++------------------------------------------------------ |
M | dedup.h | | | 22 | +++++++++++----------- |
A | hash.c | | | 119 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | hash.h | | | 18 | ++++++++++++++++++ |
6 files changed, 184 insertions(+), 73 deletions(-)
diff --git a/Makefile b/Makefile
@@ -10,6 +10,7 @@ HDR = \
blake2.h \
config.h \
dedup.h \
+ hash.h \
tree.h \
SRC = \
@@ -19,6 +20,7 @@ SRC = \
blake2bp-ref.c \
chunker.c \
compress.c \
+ hash.c \
icache.c \
pack.c \
types.c \
@@ -31,6 +33,7 @@ OBJ = \
blake2bp-ref.o \
chunker.o \
compress.o \
+ hash.o \
icache.o \
pack.o \
types.o \
diff --git a/dedup.1 b/dedup.1
@@ -1,4 +1,4 @@
-.Dd April 6, 2019
+.Dd April 7, 2019
.Dt DEDUP 1
.Os
.Sh NAME
@@ -6,7 +6,8 @@
.Nd data deduplication program
.Sh SYNOPSIS
.Nm dedup
-.Op Fl PZcilv
+.Op Fl Zcilv
+.Op Fl H Ar hash
.Op Fl e Ar id
.Op Fl r Ar root
.Op Fl m Ar message
@@ -27,11 +28,11 @@ should be used and piped into
.Nm .
.Sh OPTIONS
.Bl -tag -width "-m message"
-.It Fl P
-Use the blake2bp variant which is a parallel version of blake2b.
-These two variants are incompatible as they produce different
-hashes. This flag only has an effect when initializing the
-repository. By default blake2b is used.
+.It Fl H Ar hash
+The cryptographic hash function used to identify
+unique blocks in the store. The supported hash functions
+are blake2b and blake2bp. This flag only has an effect when
+initializing the repository. By default blake2b is used.
.It Fl Z
Disable compression support for this repository.
This flag only has an effect when initializing the repository.
diff --git a/dedup.c b/dedup.c
@@ -13,6 +13,7 @@
#include "arg.h"
#include "blake2.h"
#include "dedup.h"
+#include "hash.h"
#define SNAPSF ".snapshots"
#define STOREF ".store"
@@ -33,7 +34,7 @@ static struct blk_hdr blk_hdr;
static struct icache *icache;
static int ifd;
static int sfd;
-static int blake2b_parallel;
+static int hash_algo;
int verbose;
char *argv0;
@@ -105,38 +106,17 @@ free_snap(struct snap *snap)
static void
hash_snap(struct snap *snap, uint8_t *md)
{
- switch (blake2b_parallel) {
- case 0: {
- blake2b_state ctx;
- uint64_t i;
-
- blake2b_init(&ctx, MD_SIZE);
- for (i = 0; i < snap->nr_blk_descs; i++) {
- struct blk_desc *blk_desc;
-
- blk_desc = &snap->blk_desc[i];
- blake2b_update(&ctx, blk_desc->md,
- sizeof(blk_desc->md));
- }
- blake2b_final(&ctx, md, MD_SIZE);
- break;
- }
- case 1: {
- blake2bp_state ctx;
- uint64_t i;
+ struct hash_ctx ctx;
+ uint64_t i;
- blake2bp_init(&ctx, MD_SIZE);
- for (i = 0; i < snap->nr_blk_descs; i++) {
- struct blk_desc *blk_desc;
+ hash_init(&ctx, hash_algo, MD_SIZE);
+ for (i = 0; i < snap->nr_blk_descs; i++) {
+ struct blk_desc *blk_desc;
- blk_desc = &snap->blk_desc[i];
- blake2bp_update(&ctx, blk_desc->md,
- sizeof(blk_desc->md));
- }
- blake2bp_final(&ctx, md, MD_SIZE);
- break;
- }
+ blk_desc = &snap->blk_desc[i];
+ hash_update(&ctx, blk_desc->md, sizeof(blk_desc->md));
}
+ hash_final(&ctx, md, MD_SIZE);
}
static struct snap *
@@ -202,24 +182,11 @@ free_buf(uint8_t *buf)
static void
hash_blk(uint8_t *buf, size_t size, uint8_t *md)
{
- switch (blake2b_parallel) {
- case 0: {
- blake2b_state ctx;
-
- blake2b_init(&ctx, MD_SIZE);
- blake2b_update(&ctx, buf, size);
- blake2b_final(&ctx, md, MD_SIZE);
- break;
- }
- case 1: {
- blake2bp_state ctx;
+ struct hash_ctx ctx;
- blake2bp_init(&ctx, MD_SIZE);
- blake2bp_update(&ctx, buf, size);
- blake2bp_final(&ctx, md, MD_SIZE);
- break;
- }
- }
+ hash_init(&ctx, hash_algo, MD_SIZE);
+ hash_update(&ctx, buf, size);
+ hash_final(&ctx, md, MD_SIZE);
}
static void
@@ -467,7 +434,7 @@ init_blk_hdr(void)
{
blk_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN;
blk_hdr.flags |= compr_enabled << COMPR_ENABLED_SHIFT;
- blk_hdr.flags |= blake2b_parallel << BLAKE2BP_ENABLED_SHIFT;
+ blk_hdr.flags |= hash_algo << HASH_ALGO_SHIFT;
blk_hdr.size = BLK_HDR_SIZE;
}
@@ -484,9 +451,9 @@ load_blk_hdr(void)
v &= COMPR_ENABLED_MASK;
compr_enabled = v;
- v = blk_hdr.flags >> BLAKE2BP_ENABLED_SHIFT;
- v &= BLAKE2BP_ENABLED_MASK;
- blake2b_parallel = v;
+ v = blk_hdr.flags >> HASH_ALGO_SHIFT;
+ v &= HASH_ALGO_MASK;
+ hash_algo = v;
}
static void
@@ -573,7 +540,7 @@ term(void)
static void
usage(void)
{
- fprintf(stderr, "usage: %s [-PZcilv] [-e id] [-r root] [-m message] [file]\n", argv0);
+ fprintf(stderr, "usage: %s [-Zcilv] [-H hash] [-e id] [-r root] [-m message] [file]\n", argv0);
exit(1);
}
@@ -581,13 +548,16 @@ int
main(int argc, char *argv[])
{
uint8_t md[MD_SIZE];
- char *id = NULL, *root = NULL, *msg = NULL;
+ char *id = NULL, *root = NULL, *msg = NULL, *algo = NULL;
int iflag = 0, lflag = 0, cflag = 0;
int fd = -1;
ARGBEGIN {
- case 'P':
- blake2b_parallel = 1;
+ case 'H':
+ algo = EARGF(usage());
+ hash_algo = hash_name2type(algo);
+ if (hash_algo < 0)
+ errx(1, "unknown hash: %s", algo);
break;
case 'Z':
compr_enabled = 0;
diff --git a/dedup.h b/dedup.h
@@ -25,11 +25,11 @@
#define COMPR_ENABLED_SHIFT 16
#define COMPR_ENABLED_MASK 0x1
-#define BLAKE2BP_ENABLED_SHIFT 17
-#define BLAKE2BP_ENABLED_MASK 0x1
+#define HASH_ALGO_SHIFT 17
+#define HASH_ALGO_MASK 0x7 /* max 8 hash algos */
-struct icache;
struct chunker;
+struct icache;
struct stats {
uint64_t orig_size; /* original store size */
@@ -73,14 +73,6 @@ extern int compr_enabled;
/* dedup.c */
extern int verbose;
-/* icache.c */
-struct icache *alloc_icache(void);
-void free_icache(struct icache *icache);
-void insert_icache(struct icache *icache, struct blk_desc *desc);
-int lookup_icache(struct icache *icache, struct blk_desc *desc);
-void icache_stats(struct icache *icache, unsigned long long *hits,
- unsigned long long *misses);
-
/* chunker.c */
struct chunker *alloc_chunker(int fd, size_t min_size, size_t max_size,
size_t mask, size_t win_size);
@@ -94,6 +86,14 @@ size_t compr_size(size_t size);
size_t compr(uint8_t *in, uint8_t *out, size_t insize, size_t outsize);
size_t decompr(uint8_t *in, uint8_t *out, size_t insize, size_t outsize);
+/* icache.c */
+struct icache *alloc_icache(void);
+void free_icache(struct icache *icache);
+void insert_icache(struct icache *icache, struct blk_desc *desc);
+int lookup_icache(struct icache *icache, struct blk_desc *desc);
+void icache_stats(struct icache *icache, unsigned long long *hits,
+ unsigned long long *misses);
+
/* pack.c */
int pack(unsigned char *dst, char *fmt, ...);
diff --git a/hash.c b/hash.c
@@ -0,0 +1,119 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "blake2.h"
+#include "hash.h"
+
+static int blake2bi(struct hash_ctx *ctx, size_t n);
+static int blake2bu(struct hash_ctx *ctx, const void *buf, size_t n);
+static int blake2bf(struct hash_ctx *ctx, void *buf, size_t n);
+
+static int blake2bpi(struct hash_ctx *ctx, size_t n);
+static int blake2bpu(struct hash_ctx *ctx, const void *buf, size_t n);
+static int blake2bpf(struct hash_ctx *ctx, void *buf, size_t n);
+
+static struct hash_ops {
+ int (*init)(struct hash_ctx *ctx, size_t n);
+ int (*update)(struct hash_ctx *ctx, const void *buf, size_t n);
+ int (*final)(struct hash_ctx *ctx, void *buf, size_t n);
+} hashes[NR_ALGOS] = {
+ {
+ .init = blake2bi,
+ .update = blake2bu,
+ .final = blake2bf,
+ },
+ {
+ .init = blake2bpi,
+ .update = blake2bpu,
+ .final = blake2bpf,
+ }
+};
+
+static struct algomap {
+ char *name;
+ int type;
+} algomap[] = {
+ {
+ .name = "blake2b",
+ .type = BLAKE2B_ALGO,
+ },
+ {
+ .name = "blake2bp",
+ .type = BLAKE2BP_ALGO,
+ },
+ {
+ .name = NULL,
+ },
+};
+
+static int
+blake2bi(struct hash_ctx *ctx, size_t n)
+{
+ return blake2b_init(&ctx->u.blake2b_ctx, n);
+}
+
+static int
+blake2bu(struct hash_ctx *ctx, const void *buf, size_t n)
+{
+ return blake2b_update(&ctx->u.blake2b_ctx, buf, n);
+}
+
+static int
+blake2bf(struct hash_ctx *ctx, void *buf, size_t n)
+{
+ return blake2b_final(&ctx->u.blake2b_ctx, buf, n);
+}
+
+static int
+blake2bpi(struct hash_ctx *ctx, size_t n)
+{
+ return blake2bp_init(&ctx->u.blake2bp_ctx, n);
+}
+
+static int
+blake2bpu(struct hash_ctx *ctx, const void *buf, size_t n)
+{
+ return blake2bp_update(&ctx->u.blake2bp_ctx, buf, n);
+}
+
+static int
+blake2bpf(struct hash_ctx *ctx, void *buf, size_t n)
+{
+ return blake2bp_final(&ctx->u.blake2bp_ctx, buf, n);
+}
+
+int
+hash_init(struct hash_ctx *ctx, int type, size_t n)
+{
+ if (type < 0 || type >= NR_ALGOS)
+ return -1;
+
+ ctx->ops = &hashes[type];
+ return (*ctx->ops->init)(ctx, n);
+}
+
+int
+hash_update(struct hash_ctx *ctx, const void *buf, size_t n)
+{
+ return (*ctx->ops->update)(ctx, buf, n);
+}
+
+int
+hash_final(struct hash_ctx *ctx, void *buf, size_t n)
+{
+ return (*ctx->ops->final)(ctx, buf, n);
+}
+
+int
+hash_name2type(char *name)
+{
+ struct algomap *algo;
+
+ for (algo = &algomap[0]; algo->name != NULL; algo++)
+ if (strcmp(algo->name, name) == 0)
+ break;
+ if (algo->name == NULL)
+ return -1;
+ return algo->type;
+}
diff --git a/hash.h b/hash.h
@@ -0,0 +1,18 @@
+enum hash_algo {
+ BLAKE2B_ALGO,
+ BLAKE2BP_ALGO,
+ NR_ALGOS,
+};
+
+struct hash_ctx {
+ union {
+ blake2b_state blake2b_ctx;
+ blake2bp_state blake2bp_ctx;
+ } u;
+ struct hash_ops *ops;
+};
+
+int hash_init(struct hash_ctx *ctx, int type, size_t n);
+int hash_update(struct hash_ctx *ctx, const void *buf, size_t n);
+int hash_final(struct hash_ctx *ctx, void *buf, size_t n);
+int hash_name2type(char *name);