commit 3205dbd75458fc84e08ca98ee1e1fe17b19f2693
parent 2d0701e96dd5242eefe456dca44a5c2b8ba67eb5
Author: sin <sin@2f30.org>
Date: Thu, 25 Apr 2019 20:54:30 +0100
Implement dup-check(1)
Diffstat:
9 files changed, 192 insertions(+), 5 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,7 +1,7 @@
include config.mk
-BIN = dup-init dup-pack dup-unpack
-MAN = dup-init.1 dup-pack.1 dup-unpack.1
+BIN = dup-check dup-init dup-pack dup-unpack
+MAN = dup-check.1 dup-init.1 dup-pack.1 dup-unpack.1
HDR = \
arg.h \
@@ -24,6 +24,7 @@ COMMOBJ = \
snap.o \
unpack.o \
+DCHECKOBJ = $(COMMOBJ) dup-check.o
DINITOBJ = $(COMMOBJ) dup-init.o
DPACKOBJ = $(COMMOBJ) dup-pack.o
DUNPACKOBJ = $(COMMOBJ) dup-unpack.o
@@ -32,10 +33,10 @@ LDLIBS = -lsnappy
all: $(BIN)
-$(DINITOBJ) $(DPACKOBJ) $(DUNPACKOBJ): $(HDR)
+$(DCHECKOBJ) $(DINITOBJ) $(DPACKOBJ) $(DUNPACKOBJ): $(HDR)
clean:
- rm -f $(DINITOBJ) $(DPACKOBJ) $(DUNPACKOBJ) $(BIN)
+ rm -f $(DCHECKOBJ) $(DINITOBJ) $(DPACKOBJ) $(DUNPACKOBJ) $(BIN)
rm -rf dedup-$(VERSION) dedup-$(VERSION).tar.gz
install: all
@@ -58,6 +59,9 @@ dist: clean
.c.o:
$(CC) $(CPPFLAGS) $(CFLAGS) -c $<
+dup-check: $(DCHECKOBJ)
+ $(CC) -o $@ $(DCHECKOBJ) $(LDFLAGS) $(LDLIBS)
+
dup-init: $(DINITOBJ)
$(CC) -o $@ $(DINITOBJ) $(LDFLAGS) $(LDLIBS)
diff --git a/TODO b/TODO
@@ -1,5 +1,4 @@
Use a ring buffer in the chunker (avoid memmove() call)
Create a library archive out of the blake2b files and link with it
pledge/unveil support
-Implement dup-check(1)
Use flock() to avoid corruption
diff --git a/bcompress.c b/bcompress.c
@@ -25,6 +25,7 @@ static int bccreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar)
static int bcopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar);
static int bcput(struct bctx *bctx, void *buf, size_t n, unsigned char *md);
static int bcget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n);
+static int bccheck(struct bctx *bctx, unsigned char *md);
static int bcsync(struct bctx *bctx);
static int bcclose(struct bctx *bctx);
@@ -33,6 +34,7 @@ static struct bops bops = {
.open = bcopen,
.put = bcput,
.get = bcget,
+ .check = bccheck,
.sync = bcsync,
.close = bcclose,
};
@@ -236,6 +238,15 @@ bcget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n)
}
static int
+bccheck(struct bctx *bctx, unsigned char *md)
+{
+ struct bops *bops = bstorageops();
+
+ return bops->check(bctx, md);
+
+}
+
+static int
bcsync(struct bctx *bctx)
{
struct bops *bops = bstorageops();
diff --git a/block.c b/block.c
@@ -78,6 +78,18 @@ bget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n)
}
int
+bcheck(struct bctx *bctx, unsigned char *md)
+{
+ struct bops *bops;
+
+ if (bctx == NULL || md == NULL)
+ return -1;
+
+ bops = bcompressops();
+ return bops->check(bctx, md);
+}
+
+int
bsync(struct bctx *bctx)
{
struct bops *bops;
diff --git a/block.h b/block.h
@@ -17,6 +17,7 @@ struct bops {
int (*open)(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar);
int (*put)(struct bctx *bctx, void *buf, size_t n, unsigned char *md);
int (*get)(struct bctx *bctx, unsigned char *md, void *buf, size_t *n);
+ int (*check)(struct bctx *bctx, unsigned char *md);
int (*sync)(struct bctx *bctx);
int (*close)(struct bctx *bctx);
};
@@ -26,6 +27,7 @@ extern int bcreat(char *path, int mode, struct bparam *bpar, struct bctx **bctx)
extern int bopen(char *path, int flags, int mode, struct bparam *bpar, struct bctx **bctx);
extern int bput(struct bctx *bctx, void *buf, size_t n, unsigned char *md);
extern int bget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n);
+extern int bcheck(struct bctx *bctx, unsigned char *md);
extern int bsync(struct bctx *bctx);
extern int bclose(struct bctx *bctx);
struct bparam *bparamdef(void);
diff --git a/bstorage.c b/bstorage.c
@@ -51,6 +51,7 @@ static int bscreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar)
static int bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar);
static int bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md);
static int bsget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n);
+static int bscheck(struct bctx *bctx, unsigned char *md);
static int bssync(struct bctx *bctx);
static int bsclose(struct bctx *bctx);
@@ -59,6 +60,7 @@ static struct bops bops = {
.open = bsopen,
.put = bsput,
.get = bsget,
+ .check = bscheck,
.sync = bssync,
.close = bsclose,
};
@@ -515,6 +517,53 @@ bsget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n)
return 0;
}
+/*
+ * Lookup the block and rehash it. Check that the
+ * resulting hash matches the given hash.
+ */
+static int
+bscheck(struct bctx *bctx, unsigned char *md)
+{
+ struct sctx *sctx;
+ struct bd key, *bd;
+ void *buf;
+
+ sctx = bctx->sctx;
+
+ /* Lookup block in the cache */
+ memcpy(key.md, md, MDSIZE);
+ bd = RB_FIND(bdcache, &sctx->bdcache, &key);
+ if (bd == NULL)
+ return -1;
+
+ buf = malloc(bd->size);
+ if (buf == NULL)
+ return -1;
+
+ if (lseek(sctx->fd, bd->offset, SEEK_SET) < 0) {
+ free(buf);
+ return -1;
+ }
+
+ if (xread(sctx->fd, buf, bd->size) != bd->size) {
+ free(buf);
+ return -1;
+ }
+
+ if (bhash(buf, bd->size, key.md) < 0) {
+ free(buf);
+ return -1;
+ }
+
+ if (memcmp(key.md, md, MDSIZE) != 0) {
+ free(buf);
+ return -1;
+ }
+
+ free(buf);
+ return 0;
+}
+
/* Sync block header to storage */
static int
bssync(struct bctx *bctx)
diff --git a/dotest b/dotest
@@ -9,6 +9,8 @@ test0()
./dup-init -Z none "$repo"
./dup-pack -r "$repo" snap0 < "$data"
./dup-pack -r "$repo" snap1 < "$data"
+ ./dup-check -r "$repo" snap0
+ ./dup-check -r "$repo" snap1
du -sh "$repo"
sum0=`sha1sum "$data" | awk '{print $1}'`
sum1=`./dup-unpack -r "$repo" snap0 | sha1sum | awk '{print $1}'`
@@ -26,6 +28,8 @@ test1()
./dup-init -Z snappy "$repo"
./dup-pack -r "$repo" snap0 < "$data"
./dup-pack -r "$repo" snap1 < "$data"
+ ./dup-check -r "$repo" snap0
+ ./dup-check -r "$repo" snap1
du -sh "$repo"
sum0=`sha1sum "$data" | awk '{print $1}'`
sum1=`./dup-unpack -r "$repo" snap0 | sha1sum | awk '{print $1}'`
diff --git a/dup-check.1 b/dup-check.1
@@ -0,0 +1,25 @@
+.Dd April 25, 2019
+.Dt DUP-CHECK 1
+.Os
+.Sh NAME
+.Nm dup-check
+.Nd Check snapshot consistency
+.Sh SYNOPSIS
+.Nm dup-check
+.Op Fl v
+.Op Fl r Ar repo
+.Ar name
+.Sh DESCRIPTION
+.Nm
+checks that a snapshot is internally consistent.
+.Sh OPTIONS
+.Bl -tag -width "-r repo"
+.It Fl r Ar repo
+Repository directory.
+By default the current working directory is used.
+.It Fl v
+Enable verbose mode.
+.El
+.Sh AUTHORS
+.An Dimitris Papastamos Aq Mt sin@2f30.org ,
+.An z3bra Aq Mt contactatz3bradotorg .
diff --git a/dup-check.c b/dup-check.c
@@ -0,0 +1,81 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <err.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "arg.h"
+#include "block.h"
+#include "config.h"
+#include "snap.h"
+
+int verbose;
+char *argv0;
+
+static int
+check(struct sctx *sctx, struct bctx *bctx)
+{
+ unsigned char md[MDSIZE];
+ int sn;
+
+ while ((sn = sget(sctx, md)) == MDSIZE) {
+ if (bcheck(bctx, md) < 0)
+ return -1;
+ }
+ if (sn < 0)
+ return -1;
+ return 0;
+}
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-v] [-r repo] name\n", argv0);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char path[PATH_MAX];
+ struct sctx *sctx;
+ struct bctx *bctx;
+ struct bparam bparam;
+ char *repo = ".";
+
+ ARGBEGIN {
+ case 'r':
+ repo = EARGF(usage());
+ break;
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ if (argc != 1)
+ usage();
+
+ snprintf(path, sizeof(path), "%s/archive/%s", repo, argv[0]);
+ if (sopen(path, O_RDONLY, 0600, &sctx) < 0)
+ errx(1, "sopen: %s: failed", path);
+
+ snprintf(path, sizeof(path), "%s/storage", repo);
+ if (bopen(path, O_RDONLY, 0600, &bparam, &bctx) <0)
+ errx(1, "bopen: %s: failed", path);
+
+ if (check(sctx, bctx) < 0)
+ errx(1, "dedup: failed");
+
+ if (bclose(bctx) < 0)
+ errx(1, "bclose: failed");
+ if (sclose(sctx) < 0)
+ errx(1, "sclose: failed");
+
+ return 0;
+}