dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit a5062b8799146166bf125a7f49630fa46d84639b
parent 918648012d5b18fa898b7338de99e16583a0f7f6
Author: sin <sin@2f30.org>
Date:   Thu, 25 Apr 2019 22:02:32 +0100

Implement block deletion

This code relies on fallocate(2) so it is not very portable.

Diffstat:
Mbcompress.c | 10++++++++++
Mblock.c | 12++++++++++++
Mblock.h | 2++
Mbstorage.c | 38++++++++++++++++++++++++++++++++++++++
Mconfig.mk | 2+-
5 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/bcompress.c b/bcompress.c @@ -25,6 +25,7 @@ static int bccreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar) static int bcopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar); static int bcput(struct bctx *bctx, void *buf, size_t n, unsigned char *md); static int bcget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n); +static int bcrm(struct bctx *bctx, unsigned char *md); static int bccheck(struct bctx *bctx, unsigned char *md); static int bcsync(struct bctx *bctx); static int bcclose(struct bctx *bctx); @@ -34,6 +35,7 @@ static struct bops bops = { .open = bcopen, .put = bcput, .get = bcget, + .rm = bcrm, .check = bccheck, .sync = bcsync, .close = bcclose, @@ -238,6 +240,14 @@ bcget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n) } static int +bcrm(struct bctx *bctx, unsigned char *md) +{ + struct bops *bops = bstorageops(); + + return bops->rm(bctx, md); +} + +static int bccheck(struct bctx *bctx, unsigned char *md) { struct bops *bops = bstorageops(); diff --git a/block.c b/block.c @@ -78,6 +78,18 @@ bget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n) } int +brm(struct bctx *bctx, unsigned char *md) +{ + struct bops *bops; + + if (bctx == NULL || md == NULL) + return -1; + + bops = bcompressops(); + return bops->rm(bctx, md); +} + +int bcheck(struct bctx *bctx, unsigned char *md) { struct bops *bops; diff --git a/block.h b/block.h @@ -17,6 +17,7 @@ struct bops { int (*open)(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar); int (*put)(struct bctx *bctx, void *buf, size_t n, unsigned char *md); int (*get)(struct bctx *bctx, unsigned char *md, void *buf, size_t *n); + int (*rm)(struct bctx *bctx, unsigned char *md); int (*check)(struct bctx *bctx, unsigned char *md); int (*sync)(struct bctx *bctx); int (*close)(struct bctx *bctx); @@ -27,6 +28,7 @@ extern int bcreat(char *path, int mode, struct bparam *bpar, struct bctx **bctx) extern int bopen(char *path, int flags, int mode, struct bparam *bpar, struct bctx **bctx); extern int bput(struct bctx *bctx, void *buf, size_t n, unsigned char *md); extern int bget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n); +extern int brm(struct bctx *bctx, unsigned char *md); extern int bcheck(struct bctx *bctx, unsigned char *md); extern int bsync(struct bctx *bctx); extern int bclose(struct bctx *bctx); diff --git a/bstorage.c b/bstorage.c @@ -51,6 +51,7 @@ static int bscreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar) static int bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar); static int bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md); static int bsget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n); +static int bsrm(struct bctx *bctx, unsigned char *md); static int bscheck(struct bctx *bctx, unsigned char *md); static int bssync(struct bctx *bctx); static int bsclose(struct bctx *bctx); @@ -60,6 +61,7 @@ static struct bops bops = { .open = bsopen, .put = bsput, .get = bsget, + .rm = bsrm, .check = bscheck, .sync = bssync, .close = bsclose, @@ -536,6 +538,42 @@ bsget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n) return 0; } +static int +bsrm(struct bctx *bctx, unsigned char *md) +{ + struct sctx *sctx; + struct bd key, *bd; + off_t bdoffs; + + sctx = bctx->sctx; + + /* Lookup block in the cache */ + memcpy(key.md, md, MDSIZE); + bd = RB_FIND(bdcache, &sctx->bdcache, &key); + if (bd == NULL) + return -1; + + bdoffs = bd->offset - BDSIZE; + if (lseek(sctx->fd, bdoffs, SEEK_SET) < 0) + return -1; + + bd->refcnt--; + if (packbd(sctx->fd, bd) < 0) { + bd->refcnt++; + return -1; + } + + if (bd->refcnt == 0) { + int mode; + + mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; + fallocate(sctx->fd, mode, bd->offset, bd->size); + RB_REMOVE(bdcache, &sctx->bdcache, bd); + free(bd); + } + return 0; +} + /* * Lookup the block and rehash it. Check that the * resulting hash matches the given hash. diff --git a/config.mk b/config.mk @@ -2,5 +2,5 @@ VERSION = 1.0 PREFIX = /usr/local MANPREFIX = $(PREFIX)/man -CPPFLAGS = -I/usr/local/include -D_FILE_OFFSET_BITS=64 +CPPFLAGS = -I/usr/local/include -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE LDFLAGS = -L/usr/local/lib