dedup

deduplicating backup program
git clone git://git.2f30.org/dedup
Log | Files | Refs | README | LICENSE

commit 4d8c3d50ca614a40275ad223fdfd31fd897b9627
parent c3a040a2db7964be918a34acc35db912431757b1
Author: sin <sin@2f30.org>
Date:   Tue, 14 May 2019 13:05:50 +0300

Move hashing to top layer

The hash of the block should be the hash of the plaintext block.  This
is because when encryption is enabled the final block is always
different even if it contained the same data originally.  This is due
to the fact that each encrypted block contains a unique nonce.

Diffstat:
Mbcompress.c | 9---------
Mbencrypt.c | 9---------
Mblock.c | 58+++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mblock.h | 1-
Mbstorage.c | 79+------------------------------------------------------------------------------
5 files changed, 58 insertions(+), 98 deletions(-)

diff --git a/bcompress.c b/bcompress.c @@ -36,7 +36,6 @@ static int bcput(struct bctx *bctx, void *buf, size_t n, unsigned char *md); static int bcget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n); static int bcrm(struct bctx *bctx, unsigned char *md); static int bcgc(struct bctx *bctx); -static int bccheck(struct bctx *bctx, unsigned char *md); static int bcsync(struct bctx *bctx); static int bcclose(struct bctx *bctx); @@ -47,7 +46,6 @@ static struct bops bops = { .get = bcget, .rm = bcrm, .gc = bcgc, - .check = bccheck, .sync = bcsync, .close = bcclose, }; @@ -318,13 +316,6 @@ bcgc(struct bctx *bctx) } static int -bccheck(struct bctx *bctx, unsigned char *md) -{ - return bencryptops()->check(bctx, md); - -} - -static int bcsync(struct bctx *bctx) { return bencryptops()->sync(bctx); diff --git a/bencrypt.c b/bencrypt.c @@ -34,7 +34,6 @@ static int beput(struct bctx *bctx, void *buf, size_t n, unsigned char *md); static int beget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n); static int berm(struct bctx *bctx, unsigned char *md); static int begc(struct bctx *bctx); -static int becheck(struct bctx *bctx, unsigned char *md); static int besync(struct bctx *bctx); static int beclose(struct bctx *bctx); @@ -45,7 +44,6 @@ static struct bops bops = { .get = beget, .rm = berm, .gc = begc, - .check = becheck, .sync = besync, .close = beclose, }; @@ -325,13 +323,6 @@ begc(struct bctx *bctx) } static int -becheck(struct bctx *bctx, unsigned char *md) -{ - return bstorageops()->check(bctx, md); - -} - -static int besync(struct bctx *bctx) { return bstorageops()->sync(bctx); diff --git a/block.c b/block.c @@ -9,9 +9,18 @@ #include <stdlib.h> #include <string.h> +#include <sodium.h> + #include "block.h" +#include "config.h" #include "misc.h" +static int +bhash(void *buf, size_t n, unsigned char *md) +{ + return crypto_generichash(md, MDSIZE, buf, n, NULL, 0); +} + int bcreat(char *path, int mode, struct bctx **bctx) { @@ -20,6 +29,11 @@ bcreat(char *path, int mode, struct bctx **bctx) return -1; } + if (sodium_init() < 0) { + seterr("sodium_init: failed"); + return -1; + } + *bctx = calloc(1, sizeof(**bctx)); if (*bctx == NULL) { seterr("calloc: %s", strerror(errno)); @@ -41,6 +55,11 @@ bopen(char *path, int flags, int mode, struct bctx **bctx) return -1; } + if (sodium_init() < 0) { + seterr("sodium_init: failed"); + return -1; + } + *bctx = calloc(1, sizeof(**bctx)); if (*bctx == NULL) { seterr("calloc: %s", strerror(errno)); @@ -62,6 +81,11 @@ bput(struct bctx *bctx, void *buf, size_t n, unsigned char *md) return -1; } + if (bhash(buf, n, md) < 0) { + seterr("bhash: failed"); + return -1; + } + return bcompressops()->put(bctx, buf, n, md); } @@ -98,15 +122,47 @@ bgc(struct bctx *bctx) return bcompressops()->gc(bctx); } +/* + * Lookup the block given hash and rehash it. + * Check that the hashes match. It returns -1 + * on error, 0 on success and 1 if a block hash + * mismatch is detected. + */ int bcheck(struct bctx *bctx, unsigned char *md) { + unsigned char tmp[MDSIZE]; + void *buf; + size_t n; + if (bctx == NULL || md == NULL) { seterr("invalid params"); return -1; } - return bcompressops()->check(bctx, md); + buf = malloc(BSIZEMAX); + if (buf == NULL) { + seterr("malloc: out of memory"); + return -1; + } + n = BSIZEMAX; + + if (bcompressops()->get(bctx, md, buf, &n) < 0) { + free(buf); + return -1; + } + + if (bhash(buf, n, tmp) < 0) { + free(buf); + return -1; + } + + if (memcmp(tmp, md, MDSIZE) != 0) { + free(buf); + return -1; + } + free(buf); + return 0; } int diff --git a/block.h b/block.h @@ -21,7 +21,6 @@ struct bops { int (*get)(struct bctx *, unsigned char *, void *, size_t *); int (*rm)(struct bctx *, unsigned char *); int (*gc)(struct bctx *); - int (*check)(struct bctx *, unsigned char *); int (*sync)(struct bctx *); int (*close)(struct bctx *); }; diff --git a/bstorage.c b/bstorage.c @@ -22,8 +22,6 @@ #include <strings.h> #include <unistd.h> -#include <sodium.h> - #include "block.h" #include "config.h" #include "misc.h" @@ -56,7 +54,6 @@ static int bsput(struct bctx *, void *, size_t, unsigned char *); static int bsget(struct bctx *, unsigned char *, void *, size_t *); static int bsrm(struct bctx *, unsigned char *); static int bsgc(struct bctx *); -static int bscheck(struct bctx *, unsigned char *); static int bssync(struct bctx *); static int bsclose(struct bctx *); @@ -67,7 +64,6 @@ static struct bops bops = { .get = bsget, .rm = bsrm, .gc = bsgc, - .check = bscheck, .sync = bssync, .close = bsclose, }; @@ -116,12 +112,6 @@ bd_cmp(struct bd *b1, struct bd *b2) static RB_PROTOTYPE(bdcache, bd, rbe, bd_cmp) static RB_GENERATE(bdcache, bd, rbe, bd_cmp) -static int -bhash(void *buf, size_t n, unsigned char *md) -{ - return crypto_generichash(md, MDSIZE, buf, n, NULL, 0); -} - /* Unpack block header */ static int unpackbhdr(unsigned char *buf, struct bhdr *bhdr) @@ -283,11 +273,6 @@ bscreat(struct bctx *bctx, char *path, int mode) struct bhdr *bhdr; int fd; - if (sodium_init() < 0) { - seterr("sodium_init: failed"); - return -1; - } - fd = open(path, O_RDWR | O_CREAT | O_EXCL, mode); if (fd < 0) { seterr("open: %s", strerror(errno)); @@ -342,11 +327,6 @@ bsopen(struct bctx *bctx, char *path, int flags, int mode) return -1; } - if (sodium_init() < 0) { - seterr("sodium_init: failed"); - return -1; - } - fd = open(path, flags, mode); if (fd < 0) { seterr("open: %s", strerror(errno)); @@ -409,17 +389,13 @@ bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md) struct bd key, *bd; off_t offs; - if (bhash(buf, n, key.md) < 0) { - seterr("bhash: failed"); - return -1; - } - /* * If the block is already present in the cache * just increment the reference count and write back * the block descriptor associated for that block. */ sctx = bctx->sctx; + memcpy(key.md, md, MDSIZE); bd = RB_FIND(bdcache, &sctx->bdcache, &key); if (bd != NULL) { off_t bdoffs; @@ -605,59 +581,6 @@ bsgc(struct bctx *bctx) return 0; } -/* - * Lookup the block given hash and rehash it. - * Check that the hashes match. It returns -1 - * on error, 0 on success and 1 if a block hash - * mismatch is detected. - */ -static int -bscheck(struct bctx *bctx, unsigned char *md) -{ - struct sctx *sctx; - struct bd key, *bd; - void *buf; - - sctx = bctx->sctx; - memcpy(key.md, md, MDSIZE); - bd = RB_FIND(bdcache, &sctx->bdcache, &key); - if (bd == NULL) { - seterr("unknown block"); - return -1; - } - - buf = malloc(bd->size); - if (buf == NULL) { - seterr("malloc: %s", strerror(errno)); - return -1; - } - - if (lseek(sctx->fd, bd->offset, SEEK_SET) < 0) { - free(buf); - seterr("lseek: %s", strerror(errno)); - return -1; - } - - if (xread(sctx->fd, buf, bd->size) != bd->size) { - free(buf); - seterr("failed to read block: %s", strerror(errno)); - return -1; - } - - if (bhash(buf, bd->size, key.md) < 0) { - free(buf); - seterr("bhash: failed"); - return -1; - } - - if (memcmp(key.md, md, MDSIZE) != 0) { - free(buf); - return 1; - } - free(buf); - return 0; -} - /* Sync block header to storage file */ static int bssync(struct bctx *bctx)