dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit e814b11f226f19a9a60a5bfc53e0be2128d91ef0
parent cc34761d0d55ea6c036421cf4fee677ccbe657a4
Author: sin <sin@2f30.org>
Date:   Fri, 26 Apr 2019 12:49:03 +0100

Implement dup-gc(1)

Diffstat:
MMakefile | 12++++++++----
MTODO | 1-
Mbcompress.c | 6+++---
Mblock.c | 6+++---
Mblock.h | 4++--
Mbstorage.c | 4++--
Adup-gc.1 | 24++++++++++++++++++++++++
Adup-gc.c | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mdup-pack.c | 4++--
Mdup-unpack.c | 4++--
10 files changed, 113 insertions(+), 19 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,7 +1,7 @@ include config.mk -BIN = dup-check dup-init dup-pack dup-rm dup-unpack -MAN = dup-check.1 dup-init.1 dup-pack.1 dup-rm.1 dup-unpack.1 +BIN = dup-check dup-gc dup-init dup-pack dup-rm dup-unpack +MAN = dup-check.1 dup-gc.1 dup-init.1 dup-pack.1 dup-rm.1 dup-unpack.1 HDR = \ arg.h \ @@ -26,6 +26,7 @@ COMMOBJ = \ unpack.o \ DCHECKOBJ = $(COMMOBJ) dup-check.o +DGCOBJ = $(COMMOBJ) dup-gc.o DINITOBJ = $(COMMOBJ) dup-init.o DPACKOBJ = $(COMMOBJ) dup-pack.o DRMOBJ = $(COMMOBJ) dup-rm.o @@ -35,10 +36,10 @@ LDLIBS = -lsnappy all: $(BIN) -$(DCHECKOBJ) $(DINITOBJ) $(DPACKOBJ) $(DRMOBJ) $(DUNPACKOBJ): $(HDR) +$(DCHECKOBJ) $(DGCOBJ) $(DINITOBJ) $(DPACKOBJ) $(DRMOBJ) $(DUNPACKOBJ): $(HDR) clean: - rm -f $(DCHECKOBJ) $(DINITOBJ) $(DPACKOBJ) $(DRMOBJ) $(DUNPACKOBJ) $(BIN) + rm -f $(DCHECKOBJ) $(DGCOBJ) $(DINITOBJ) $(DPACKOBJ) $(DRMOBJ) $(DUNPACKOBJ) $(BIN) rm -rf dedup-$(VERSION) dedup-$(VERSION).tar.gz install: all @@ -64,6 +65,9 @@ dist: clean dup-check: $(DCHECKOBJ) $(CC) -o $@ $(DCHECKOBJ) $(LDFLAGS) $(LDLIBS) +dup-gc: $(DGCOBJ) + $(CC) -o $@ $(DGCOBJ) $(LDFLAGS) $(LDLIBS) + dup-init: $(DINITOBJ) $(CC) -o $@ $(DINITOBJ) $(LDFLAGS) $(LDLIBS) diff --git a/TODO b/TODO @@ -2,4 +2,3 @@ Use a ring buffer in the chunker (avoid memmove() call) Create a library archive out of the blake2b files and link with it pledge/unveil support Use flock() to avoid corruption -Implement dup-gc(1) diff --git a/bcompress.c b/bcompress.c @@ -26,7 +26,7 @@ static int bcopen(struct bctx *bctx, char *path, int flags, int mode, struct bpa static int bcput(struct bctx *bctx, void *buf, size_t n, unsigned char *md); static int bcget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n); static int bcrm(struct bctx *bctx, unsigned char *md); -static int bcgc(struct bctx *bctx, unsigned char *md); +static int bcgc(struct bctx *bctx); static int bccheck(struct bctx *bctx, unsigned char *md); static int bcsync(struct bctx *bctx); static int bcclose(struct bctx *bctx); @@ -250,11 +250,11 @@ bcrm(struct bctx *bctx, unsigned char *md) } static int -bcgc(struct bctx *bctx, unsigned char *md) +bcgc(struct bctx *bctx) { struct bops *bops = bstorageops(); - return bops->gc(bctx, md); + return bops->gc(bctx); } static int diff --git a/block.c b/block.c @@ -90,15 +90,15 @@ brm(struct bctx *bctx, unsigned char *md) } int -bgc(struct bctx *bctx, unsigned char *md) +bgc(struct bctx *bctx) { struct bops *bops; - if (bctx == NULL || md == NULL) + if (bctx == NULL) return -1; bops = bcompressops(); - return bops->gc(bctx, md); + return bops->gc(bctx); } int diff --git a/block.h b/block.h @@ -18,7 +18,7 @@ struct bops { int (*put)(struct bctx *bctx, void *buf, size_t n, unsigned char *md); int (*get)(struct bctx *bctx, unsigned char *md, void *buf, size_t *n); int (*rm)(struct bctx *bctx, unsigned char *md); - int (*gc)(struct bctx *bctx, unsigned char *md); + int (*gc)(struct bctx *bctx); int (*check)(struct bctx *bctx, unsigned char *md); int (*sync)(struct bctx *bctx); int (*close)(struct bctx *bctx); @@ -30,7 +30,7 @@ extern int bopen(char *path, int flags, int mode, struct bparam *bpar, struct bc extern int bput(struct bctx *bctx, void *buf, size_t n, unsigned char *md); extern int bget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n); extern int brm(struct bctx *bctx, unsigned char *md); -extern int bgc(struct bctx *bctx, unsigned char *md); +extern int bgc(struct bctx *bctx); extern int bcheck(struct bctx *bctx, unsigned char *md); extern int bsync(struct bctx *bctx); extern int bclose(struct bctx *bctx); diff --git a/bstorage.c b/bstorage.c @@ -57,7 +57,7 @@ static int bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bpa static int bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md); static int bsget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n); static int bsrm(struct bctx *bctx, unsigned char *md); -static int bsgc(struct bctx *bctx, unsigned char *md); +static int bsgc(struct bctx *bctx); static int bscheck(struct bctx *bctx, unsigned char *md); static int bssync(struct bctx *bctx); static int bsclose(struct bctx *bctx); @@ -586,7 +586,7 @@ bsrm(struct bctx *bctx, unsigned char *md) } static int -bsgc(struct bctx *bctx, unsigned char *md) +bsgc(struct bctx *bctx) { struct sctx *sctx; struct bd key, *bd; diff --git a/dup-gc.1 b/dup-gc.1 @@ -0,0 +1,24 @@ +.Dd April 26, 2019 +.Dt DUP-GC 1 +.Os +.Sh NAME +.Nm dup-gc +.Nd Garbage collect dedup repository +.Sh SYNOPSIS +.Nm dup-gc +.Op Fl v +.Op repo +.Sh DESCRIPTION +.Nm +performs garbage collection on a dedup repository. +If no +.Ar repo +is specified the current working directory is used. +.Sh OPTIONS +.Bl -tag -width "-v" +.It Fl v +Enable verbose mode. +.El +.Sh AUTHORS +.An Dimitris Papastamos Aq Mt sin@2f30.org , +.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dup-gc.c b/dup-gc.c @@ -0,0 +1,67 @@ +#include <sys/types.h> +#include <sys/stat.h> + +#include <err.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include "arg.h" +#include "config.h" +#include "block.h" +#include "snap.h" + +int verbose; +char *argv0; + +static void +usage(void) +{ + fprintf(stderr, "usage: %s [repo]\n", argv0); + exit(1); +} + +int +main(int argc, char *argv[]) +{ + struct bctx *bctx; /* block context */ + struct bparam bpar; + char *repo; + + ARGBEGIN { + case 'H': + bpar.halgo = EARGF(usage()); + break; + case 'Z': + bpar.calgo = EARGF(usage()); + break; + case 'v': + verbose++; + break; + default: + usage(); + } ARGEND + + switch (argc) { + case 0: + repo = "."; + break; + case 1: + repo = argv[0]; + break; + default: + usage(); + }; + + if (chdir(repo) < 0) + err(1, "chdir: %s", repo); + + if (bopen(STORAGEPATH, O_RDWR, 0600, &bpar, &bctx) < 0) + errx(1, "bopen: failed"); + if (bgc(bctx) < 0) + errx(1, "bgc: failed"); + if (bclose(bctx) < 0) + errx(1, "bclose: failed"); + return 0; +} diff --git a/dup-pack.c b/dup-pack.c @@ -61,7 +61,7 @@ main(int argc, char *argv[]) char path[PATH_MAX]; struct sctx *sctx; struct bctx *bctx; - struct bparam bparam; + struct bparam bpar; char *repo = "."; ARGBEGIN { @@ -83,7 +83,7 @@ main(int argc, char *argv[]) errx(1, "screat: %s: failed", path); snprintf(path, sizeof(path), "%s/storage", repo); - if (bopen(path, O_RDWR, 0600, &bparam, &bctx) <0) + if (bopen(path, O_RDWR, 0600, &bpar, &bctx) <0) errx(1, "bopen: %s: failed", path); if (pack(sctx, bctx) < 0) diff --git a/dup-unpack.c b/dup-unpack.c @@ -77,7 +77,7 @@ main(int argc, char *argv[]) char path[PATH_MAX]; struct sctx *sctx; struct bctx *bctx; - struct bparam bparam; + struct bparam bpar; char *repo = "."; ARGBEGIN { @@ -99,7 +99,7 @@ main(int argc, char *argv[]) errx(1, "sopen: %s: failed", path); snprintf(path, sizeof(path), "%s/storage", repo); - if (bopen(path, O_RDONLY, 0600, &bparam, &bctx) <0) + if (bopen(path, O_RDONLY, 0600, &bpar, &bctx) <0) errx(1, "bopen: %s: failed", path); if (unpack(sctx, bctx) < 0)