commit 1dbc2650dde7d194f7b87e51abc5efeab4b61eb0
parent 6c3de1be215d063566c45bb412301088abf18add
Author: sin <sin@2f30.org>
Date: Wed, 17 Apr 2019 16:49:05 +0100
Refactor dedup into multiple programs
dinit(1) - Initialize a repository
dlist(1) - List snapshots
dinfo(1) - Print information about the repository
dpack(1) - Create snapshot from stdin
dunpack(1) - Extract snapshot to stdout
dcheck(1) - Check if repository is consistent
Needs more work to share the code properly.
Diffstat:
M | Makefile | | | 61 | ++++++++++++++++++++++++++++++++++++++++++++++++------------- |
M | README | | | 19 | ++++++++++--------- |
A | dcheck.1 | | | 25 | +++++++++++++++++++++++++ |
A | dcheck.c | | | 316 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
D | dedup.1 | | | 62 | -------------------------------------------------------------- |
D | dedup.c | | | 700 | ------------------------------------------------------------------------------- |
A | dinfo.1 | | | 25 | +++++++++++++++++++++++++ |
A | dinfo.c | | | 170 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | dinit.1 | | | 39 | +++++++++++++++++++++++++++++++++++++++ |
A | dinit.c | | | 155 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | dlist.1 | | | 25 | +++++++++++++++++++++++++ |
A | dlist.c | | | 232 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | dpack.1 | | | 35 | +++++++++++++++++++++++++++++++++++ |
A | dpack.c | | | 422 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | dunpack.1 | | | 28 | ++++++++++++++++++++++++++++ |
A | dunpack.c | | | 288 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
16 files changed, 1818 insertions(+), 784 deletions(-)
diff --git a/Makefile b/Makefile
@@ -3,7 +3,16 @@ include config.mk
VERSION = 1.0
PREFIX = /usr/local
MANPREFIX = $(PREFIX)/man
-TARGET = dedup
+TARGET = dcheck dinfo dinit dlist dpack dunpack
+
+MAN = \
+ dcheck.1 \
+ dinfo.1 \
+ dinit.1 \
+ dlist.1 \
+ dpack.1 \
+ dunpack.1 \
+
HDR = \
arg.h \
blake2-impl.h \
@@ -23,7 +32,12 @@ SRC = \
compress-none.c \
compress-snappy.c \
compress.c \
- dedup.c \
+ dcheck.c \
+ dinfo.c \
+ dinit.c \
+ dlist.c \
+ dpack.c \
+ dunpack.c \
hash-blake2b.c \
hash-blake2bp.c \
hash-blake2s.c \
@@ -35,7 +49,7 @@ SRC = \
unpack.c \
utils.c \
-OBJ = \
+COMMOBJ = \
blake2b-ref.o \
blake2bp-ref.o \
blake2s-ref.o \
@@ -45,7 +59,6 @@ OBJ = \
compress-none.o \
compress-snappy.o \
compress.o \
- dedup.o \
hash-blake2b.o \
hash-blake2bp.o \
hash-blake2s.o \
@@ -57,14 +70,21 @@ OBJ = \
unpack.o \
utils.o \
+DCHECKOBJ = $(COMMOBJ) dcheck.o
+DINFOOBJ = $(COMMOBJ) dinfo.o
+DINITOBJ = $(COMMOBJ) dinit.o
+DLISTOBJ = $(COMMOBJ) dlist.o
+DPACKOBJ = $(COMMOBJ) dpack.o
+DUNPACKOBJ = $(COMMOBJ) dunpack.o
+
DISTFILES = \
+ $(MAN) \
$(SRC) \
CHANGELOG \
LICENSE \
Makefile \
README \
config.mk \
- dedup.1 \
CFLAGS = -g -O2 -Wall $(OPENMPCFLAGS)
CPPFLAGS = -I/usr/local/include -D_FILE_OFFSET_BITS=64
@@ -73,20 +93,20 @@ LDLIBS = -llz4 -lsnappy $(OPENMPLDLIBS)
all: $(TARGET)
-$(OBJ): $(HDR)
+$(DCHECKOBJ) $(DINFOOBJ) $(DINITOBJ) $(DLISTOBJ) $(DPACKOBJ) $(DUNPACKOBJ): $(HDR)
clean:
- rm -f $(OBJ) dedup dedup-$(VERSION).tar.gz
+ rm -f $(DCHECKOBJ) $(DINFOOBJ) $(DINITOBJ) $(DLISTOBJ) $(DPACKOBJ) $(DUNPACKOBJ) $(TARGET) dedup-$(VERSION).tar.gz
install: all
mkdir -p $(DESTDIR)$(PREFIX)/bin
- cp -f dedup $(DESTDIR)$(PREFIX)/bin
+ cp -f $(TARGET) $(DESTDIR)$(PREFIX)/bin
mkdir -p $(DESTDIR)$(MANPREFIX)/man1
- cp -f dedup.1 $(DESTDIR)$(MANPREFIX)/man1
+ cp -f $(MAN) $(DESTDIR)$(MANPREFIX)/man1
uninstall:
- rm -f $(DESTDIR)$(PREFIX)/bin/dedup
- rm -f $(DESTDIR)$(MANPREFIX)/man1/dedup.1
+ cd $(DESTDIR)$(PREFIX)/bin && rm -f $(TARGET)
+ cd $(DESTDIR)$(MANPREFIX)/man1 && rm -f $(MAN)
dist:
mkdir -p dedup-$(VERSION)
@@ -102,5 +122,20 @@ dist:
.c.o:
$(CC) $(CPPFLAGS) $(CFLAGS) -c $<
-dedup: $(OBJ)
- $(CC) -o $@ $(OBJ) $(LDFLAGS) $(LDLIBS)
+dcheck: $(DCHECKOBJ)
+ $(CC) -o $@ $(DCHECKOBJ) $(LDFLAGS) $(LDLIBS)
+
+dinfo: $(DINFOOBJ)
+ $(CC) -o $@ $(DINFOOBJ) $(LDFLAGS) $(LDLIBS)
+
+dinit: $(DINITOBJ)
+ $(CC) -o $@ $(DINITOBJ) $(LDFLAGS) $(LDLIBS)
+
+dlist: $(DLISTOBJ)
+ $(CC) -o $@ $(DLISTOBJ) $(LDFLAGS) $(LDLIBS)
+
+dpack: $(DPACKOBJ)
+ $(CC) -o $@ $(DPACKOBJ) $(LDFLAGS) $(LDLIBS)
+
+dunpack: $(DUNPACKOBJ)
+ $(CC) -o $@ $(DUNPACKOBJ) $(LDFLAGS) $(LDLIBS)
diff --git a/README b/README
@@ -8,29 +8,30 @@ Getting started
To use dedup you have to first initialize the repository.
- dedup -r ~/repo -i
+ dinit repo
+
+This will create .{snapshots,store} files in the repo directory. The
+store file contains all the unique blocks. The snapshots file
+contains all the revisions of files that have been deduplicated.
dedup only handles a single file at a time, so using tar is advised.
-For example, to dedup a directory tree you can invoke dedup as
+For example, to dedup a directory tree you can invoke dpack as
follows:
- tar -c ~/dir | dedup -r ~/repo -m "$(date)"
+ tar -c ~/dir | dpack -m "$(date)" repo
-This will create .{snapshots,store} files in the ~/repo directory.
-The store file contains all the unique blocks. The snapshots file
-contains all the revisions of files that have been deduplicated. The
--m flag is used to attach an arbitrary message to the snapshot.
+The -m flag is used to attach an arbitrary message to the snapshot.
To list all known revisions run:
- dedup -r ~/repo -l
+ dlist repo
You will get a list of hashes. Each hash corresponds to a single file
(in this case, a tar archive).
To extract a file from the deduplicated store run:
- dedup -r ~/repo -e <hash> > dir.tar
+ dunpack -e <hash> repo > snapshot.tar
Portability
===========
diff --git a/dcheck.1 b/dcheck.1
@@ -0,0 +1,25 @@
+.Dd April 17, 2019
+.Dt DCHECK 1
+.Os
+.Sh NAME
+.Nm dcheck
+.Nd Perform consistency checks on a dedup repo
+.Sh SYNOPSIS
+.Nm dcheck
+.Op Fl v
+.Op repo
+.Sh DESCRIPTION
+.Nm
+performs consistency checks on a dedup repo.
+If no
+.Ar repo
+is specified, then the current directory
+is assumed to be the repository.
+.Sh OPTIONS
+.Bl -tag -width "-v"
+.It Fl v
+Enable verbose mode.
+.El
+.Sh AUTHORS
+.An Dimitris Papastamos Aq Mt sin@2f30.org ,
+.An z3bra Aq Mt contactatz3bradotorg .
diff --git a/dcheck.c b/dcheck.c
@@ -0,0 +1,316 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+
+#include <err.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "arg.h"
+#include "blake2.h"
+#include "dedup.h"
+
+#define SNAPSF ".snapshots"
+#define STOREF ".store"
+
+enum {
+ WALK_CONTINUE,
+ WALK_STOP
+};
+
+static struct snap_hdr snap_hdr;
+static struct blk_hdr blk_hdr;
+static int ifd;
+static int sfd;
+static int hash_algo = HASH_BLAKE2B;
+static int compr_algo = COMPR_LZ4;
+
+int verbose;
+char *argv0;
+
+static void
+print_md(FILE *fp, uint8_t *md, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ fprintf(fp, "%02x", md[i]);
+}
+
+static struct snap *
+alloc_snap(void)
+{
+ struct snap *snap;
+
+ snap = calloc(1, sizeof(*snap));
+ if (snap == NULL)
+ err(1, "%s", __func__);
+ return snap;
+}
+
+static void
+free_snap(struct snap *snap)
+{
+ free(snap);
+}
+
+static struct snap *
+grow_snap(struct snap *snap, uint64_t nr_blk_descs)
+{
+ size_t size;
+
+ if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0]))
+ errx(1, "%s: overflow", __func__);
+ size = nr_blk_descs * sizeof(snap->blk_desc[0]);
+
+ if (size > SIZE_MAX - sizeof(*snap))
+ errx(1, "%s: overflow", __func__);
+ size += sizeof(*snap);
+
+ snap = realloc(snap, size);
+ if (snap == NULL)
+ err(1, "%s", __func__);
+ return snap;
+}
+
+static uint8_t *
+alloc_buf(size_t size)
+{
+ void *p;
+
+ p = calloc(1, size);
+ if (p == NULL)
+ err(1, "%s", __func__);
+ return p;
+}
+
+static void
+free_buf(uint8_t *buf)
+{
+ free(buf);
+}
+
+static void
+hash_blk(uint8_t *buf, size_t size, uint8_t *md)
+{
+ struct hash_ctx ctx;
+
+ if (hash_init(&ctx, hash_algo, MD_SIZE) < 0)
+ errx(1, "hash_init failed");
+ hash_update(&ctx, buf, size);
+ hash_final(&ctx, md, MD_SIZE);
+}
+
+static void
+read_blk(uint8_t *buf, struct blk_desc *blk_desc)
+{
+ ssize_t n;
+
+ xlseek(sfd, blk_desc->offset, SEEK_SET);
+ n = xread(sfd, buf, blk_desc->size);
+ if (n == 0)
+ errx(1, "%s: unexpected EOF", __func__);
+ if (n != blk_desc->size)
+ errx(1, "%s: short read", __func__);
+}
+
+/*
+ * Hash every block referenced by the given snapshot
+ * and compare its hash with the one stored in the corresponding
+ * block descriptor.
+ */
+static int
+check_snap(struct snap *snap, void *arg)
+{
+ struct compr_ctx ctx;
+ uint8_t *buf;
+ int *ret = arg;
+ uint64_t i;
+
+ if (verbose > 0) {
+ fprintf(stderr, "Checking snapshot: ");
+ print_md(stderr, snap->md, sizeof(snap->md));
+ fputc('\n', stderr);
+ }
+
+ if (compr_init(&ctx, compr_algo) < 0)
+ errx(1, "compr_init failed");
+ buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
+ for (i = 0; i < snap->nr_blk_descs; i++) {
+ uint8_t md[MD_SIZE];
+ struct blk_desc *blk_desc;
+
+ blk_desc = &snap->blk_desc[i];
+ read_blk(buf, blk_desc);
+ hash_blk(buf, blk_desc->size, md);
+
+ if (memcmp(blk_desc->md, md, sizeof(blk_desc->md)) == 0)
+ continue;
+
+ fprintf(stderr, "Block hash mismatch\n");
+ fprintf(stderr, " Expected hash: ");
+ print_md(stderr, blk_desc->md, sizeof(blk_desc->md));
+ fputc('\n', stderr);
+ fprintf(stderr, " Actual hash: ");
+ print_md(stderr, md, sizeof(md));
+ fputc('\n', stderr);
+ fprintf(stderr, " Offset: %llu\n",
+ (unsigned long long)blk_desc->offset);
+ fprintf(stderr, " Size: %llu\n",
+ (unsigned long long)blk_desc->size);
+ *ret = -1;
+ }
+ free_buf(buf);
+ compr_final(&ctx);
+ return WALK_CONTINUE;
+}
+
+/* Walk through all snapshots and call fn() on each one */
+static void
+walk_snap(int (*fn)(struct snap *, void *), void *arg)
+{
+ uint64_t i;
+
+ xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
+ for (i = 0; i < snap_hdr.nr_snaps; i++) {
+ struct snap *snap;
+ int ret;
+
+ snap = alloc_snap();
+ read_snap(ifd, snap);
+ snap = grow_snap(snap, snap->nr_blk_descs);
+ read_snap_descs(ifd, snap);
+
+ ret = (*fn)(snap, arg);
+ free_snap(snap);
+ if (ret == WALK_STOP)
+ break;
+ }
+}
+
+static void
+match_ver(uint64_t v)
+{
+ uint8_t maj, min;
+
+ min = v & VER_MIN_MASK;
+ maj = (v >> VER_MAJ_SHIFT) & VER_MAJ_MASK;
+ if (maj == VER_MAJ && min == VER_MIN)
+ return;
+ errx(1, "format version mismatch: expected %u.%u but got %u.%u",
+ VER_MAJ, VER_MIN, maj, min);
+}
+
+static void
+load_blk_hdr(void)
+{
+ uint64_t v;
+
+ xlseek(sfd, 0, SEEK_SET);
+ read_blk_hdr(sfd, &blk_hdr);
+ match_ver(blk_hdr.flags);
+
+ v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
+ v &= COMPR_ALGO_MASK;
+ compr_algo = v;
+
+ if (compr_algo < 0 || compr_algo >= NR_COMPRS)
+ errx(1, "unsupported compression algorithm: %d", compr_algo);
+
+ if (verbose > 0)
+ fprintf(stderr, "Compression algorithm: %s\n",
+ compr_type2name(compr_algo));
+
+ v = blk_hdr.flags >> HASH_ALGO_SHIFT;
+ v &= HASH_ALGO_MASK;
+ hash_algo = v;
+
+ if (hash_algo < 0 || hash_algo >= NR_HASHES)
+ errx(1, "unsupported hash algorithm: %d", hash_algo);
+
+ if (verbose > 0)
+ fprintf(stderr, "Hash algorithm: %s\n",
+ hash_type2name(hash_algo));
+}
+
+static void
+load_snap_hdr(void)
+{
+ xlseek(ifd, 0, SEEK_SET);
+ read_snap_hdr(ifd, &snap_hdr);
+ match_ver(snap_hdr.flags);
+}
+
+static void
+init(void)
+{
+ ifd = open(SNAPSF, O_RDONLY, 0600);
+ if (ifd < 0)
+ err(1, "open %s", SNAPSF);
+
+ sfd = open(STOREF, O_RDONLY, 0600);
+ if (sfd < 0)
+ err(1, "open %s", STOREF);
+
+ if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
+ flock(sfd, LOCK_NB | LOCK_EX) < 0)
+ err(1, "flock");
+
+ load_snap_hdr();
+ load_blk_hdr();
+}
+
+static void
+term(void)
+{
+ close(sfd);
+ close(ifd);
+}
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-v] [repo]\n", argv0);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *repo = NULL;
+ int ret;
+
+ ARGBEGIN {
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ switch (argc) {
+ case 0:
+ repo = ".";
+ break;
+ case 1:
+ repo = argv[0];
+ break;
+ default:
+ usage();
+ };
+
+ if (chdir(repo) < 0)
+ err(1, "chdir: %s", repo);
+
+ init();
+ ret = 0;
+ walk_snap(check_snap, &ret);
+ if (ret != 0)
+ errx(1, "%s or %s is corrupted", SNAPSF, STOREF);
+ term();
+ return 0;
+}
diff --git a/dedup.1 b/dedup.1
@@ -1,62 +0,0 @@
-.Dd April 10, 2019
-.Dt DEDUP 1
-.Os
-.Sh NAME
-.Nm dedup
-.Nd data deduplication program
-.Sh SYNOPSIS
-.Nm dedup
-.Op Fl cilv
-.Op Fl H Ar hash
-.Op Fl Z Ar compressor
-.Op Fl e Ar id
-.Op Fl r Ar root
-.Op Fl m Ar message
-.Op file
-.Sh DESCRIPTION
-.Nm
-deduplicates data from the specified
-.Ar file .
-If no
-.Ar file
-is specified then it reads from standard input.
-.Pp
-.Nm
-does not track any file metadata so to deduplicate
-directory trees, an archival tool like
-.Xr tar 1
-should be used and piped into
-.Nm .
-.Sh OPTIONS
-.Bl -tag -width "-Z compressor"
-.It Fl H Ar hash
-The cryptographic hash function used to identify
-unique blocks in the store.
-The supported hash functions are blake2b, blake2bp, blake2s and blake2sp.
-This flag only has an effect when initializing the repository.
-By default blake2b is used.
-.It Fl Z Ar compressor
-The compressor function used to compress the blocks
-in the store.
-The supported compressor functions are none, lz4 and snappy.
-This flag only has an effect when initializing the repository.
-By default lz4 is used.
-.It Fl c
-Perform a consistency check on the repository.
-.It Fl i
-Initialize the repository.
-.It Fl l
-List snapshots.
-.It Fl v
-Enable verbose mode.
-.It Fl e Ar id
-Extract snapshot with the specified id.
-.It Fl r Ar root
-Set the directory where the repository will be created.
-By default the repository is created in the current directory.
-.It Fl m Ar message
-Attach a descriptive message to the snapshot.
-.El
-.Sh AUTHORS
-.An Dimitris Papastamos Aq Mt sin@2f30.org ,
-.An z3bra Aq Mt contactatz3bradotorg .
diff --git a/dedup.c b/dedup.c
@@ -1,700 +0,0 @@
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/file.h>
-
-#include <err.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "arg.h"
-#include "blake2.h"
-#include "dedup.h"
-
-#define SNAPSF ".snapshots"
-#define STOREF ".store"
-
-enum {
- WALK_CONTINUE,
- WALK_STOP
-};
-
-struct extract_args {
- uint8_t *md;
- int fd;
- int ret;
-};
-
-static struct snap_hdr snap_hdr;
-static struct blk_hdr blk_hdr;
-static struct icache *icache;
-static int ifd;
-static int sfd;
-static int hash_algo = HASH_BLAKE2B;
-static int compr_algo = COMPR_LZ4;
-
-int verbose;
-char *argv0;
-
-static void
-print_md(FILE *fp, uint8_t *md, size_t size)
-{
- size_t i;
-
- for (i = 0; i < size; i++)
- fprintf(fp, "%02x", md[i]);
-}
-
-static void
-print_stats(struct stats *st)
-{
- unsigned long long hits, misses;
- double hitratio;
-
- if (st->nr_blks == 0)
- return;
-
- fprintf(stderr, "Original size: %llu bytes\n",
- (unsigned long long)st->orig_size);
- fprintf(stderr, "Compressed size: %llu bytes\n",
- (unsigned long long)st->compr_size);
- fprintf(stderr, "Deduplicated size: %llu bytes\n",
- (unsigned long long)st->dedup_size);
- fprintf(stderr, "Deduplication ratio: %.2f\n",
- (double)st->orig_size / st->dedup_size);
- fprintf(stderr, "Min/avg/max block size: %llu/%llu/%llu bytes\n",
- (unsigned long long)st->min_blk_size,
- (unsigned long long)st->dedup_size / st->nr_blks,
- (unsigned long long)st->max_blk_size);
- fprintf(stderr, "Number of unique blocks: %llu\n",
- (unsigned long long)st->nr_blks);
-
- icache_stats(icache, &hits, &misses);
- if (hits == 0 && misses == 0)
- hitratio = 0;
- else
- hitratio = (double)hits / (hits + misses);
-
- fprintf(stderr, "Index cache hit percentage: %.2f%%\n",
- 100 * hitratio);
-}
-
-static struct snap *
-alloc_snap(void)
-{
- struct snap *snap;
-
- snap = calloc(1, sizeof(*snap));
- if (snap == NULL)
- err(1, "%s", __func__);
- return snap;
-}
-
-static void
-free_snap(struct snap *snap)
-{
- free(snap);
-}
-
-/*
- * The snapshot hash is calculated over the
- * hash of its block descriptors.
- */
-static void
-hash_snap(struct snap *snap, uint8_t *md)
-{
- struct hash_ctx ctx;
- uint64_t i;
-
- if (hash_init(&ctx, hash_algo, MD_SIZE) < 0)
- errx(1, "hash_init failed");
- for (i = 0; i < snap->nr_blk_descs; i++) {
- struct blk_desc *blk_desc;
-
- blk_desc = &snap->blk_desc[i];
- hash_update(&ctx, blk_desc->md, sizeof(blk_desc->md));
- }
- hash_final(&ctx, md, MD_SIZE);
-}
-
-static struct snap *
-grow_snap(struct snap *snap, uint64_t nr_blk_descs)
-{
- size_t size;
-
- if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0]))
- errx(1, "%s: overflow", __func__);
- size = nr_blk_descs * sizeof(snap->blk_desc[0]);
-
- if (size > SIZE_MAX - sizeof(*snap))
- errx(1, "%s: overflow", __func__);
- size += sizeof(*snap);
-
- snap = realloc(snap, size);
- if (snap == NULL)
- err(1, "%s", __func__);
- return snap;
-}
-
-static void
-append_snap(struct snap *snap)
-{
- if (snap->nr_blk_descs > UINT64_MAX / BLK_DESC_SIZE)
- errx(1, "%s: overflow", __func__);
- snap->size = snap->nr_blk_descs * BLK_DESC_SIZE;
-
- if (snap->size > UINT64_MAX - SNAPSHOT_SIZE)
- errx(1, "%s: overflow", __func__);
- snap->size += SNAPSHOT_SIZE;
-
- xlseek(ifd, snap_hdr.size, SEEK_SET);
- write_snap(ifd, snap);
- write_snap_blk_descs(ifd, snap);
-
- if (snap_hdr.size > UINT64_MAX - snap->size)
- errx(1, "%s: overflow", __func__);
- snap_hdr.size += snap->size;
-
- if (snap_hdr.nr_snaps > UINT64_MAX - 1)
- errx(1, "%s: overflow", __func__);
- snap_hdr.nr_snaps++;
-}
-
-static uint8_t *
-alloc_buf(size_t size)
-{
- void *p;
-
- p = calloc(1, size);
- if (p == NULL)
- err(1, "%s", __func__);
- return p;
-}
-
-static void
-free_buf(uint8_t *buf)
-{
- free(buf);
-}
-
-static void
-hash_blk(uint8_t *buf, size_t size, uint8_t *md)
-{
- struct hash_ctx ctx;
-
- if (hash_init(&ctx, hash_algo, MD_SIZE) < 0)
- errx(1, "hash_init failed");
- hash_update(&ctx, buf, size);
- hash_final(&ctx, md, MD_SIZE);
-}
-
-static void
-read_blk(uint8_t *buf, struct blk_desc *blk_desc)
-{
- ssize_t n;
-
- xlseek(sfd, blk_desc->offset, SEEK_SET);
- n = xread(sfd, buf, blk_desc->size);
- if (n == 0)
- errx(1, "%s: unexpected EOF", __func__);
- if (n != blk_desc->size)
- errx(1, "%s: short read", __func__);
-}
-
-static void
-append_blk(uint8_t *buf, struct blk_desc *blk_desc)
-{
- xlseek(sfd, blk_hdr.size, SEEK_SET);
- xwrite(sfd, buf, blk_desc->size);
-
- if (blk_hdr.size > UINT64_MAX - blk_desc->size)
- errx(1, "%s: overflow", __func__);
- blk_hdr.size += blk_desc->size;
-}
-
-static void
-dedup_chunk(struct snap *snap, uint8_t *chunkp, size_t chunk_size)
-{
- uint8_t md[MD_SIZE];
- struct blk_desc blk_desc;
- struct compr_ctx ctx;
- uint8_t *compr_buf;
- size_t n, csize;
-
- if (compr_init(&ctx, compr_algo) < 0)
- errx(1, "compr_init failed");
- csize = compr_size(&ctx, BLKSIZE_MAX);
- compr_buf = alloc_buf(csize);
-
- n = compr(&ctx, chunkp, compr_buf, chunk_size, csize);
- hash_blk(compr_buf, n, md);
-
- snap_hdr.st.orig_size += chunk_size;
- snap_hdr.st.compr_size += n;
-
- memcpy(blk_desc.md, md, sizeof(blk_desc.md));
- if (lookup_icache(icache, &blk_desc) < 0) {
- blk_desc.offset = blk_hdr.size;
- blk_desc.size = n;
-
- snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
- append_blk(compr_buf, &blk_desc);
-
- insert_icache(icache, &blk_desc);
-
- snap_hdr.st.dedup_size += blk_desc.size;
- snap_hdr.st.nr_blks++;
-
- if (blk_desc.size > snap_hdr.st.max_blk_size)
- snap_hdr.st.max_blk_size = blk_desc.size;
- if (blk_desc.size < snap_hdr.st.min_blk_size)
- snap_hdr.st.min_blk_size = blk_desc.size;
- } else {
- snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
- }
-
- free(compr_buf);
- compr_final(&ctx);
-}
-
-static void
-dedup(int fd, char *msg)
-{
- struct snap *snap;
- struct chunker *chunker;
-
- snap = alloc_snap();
- chunker = alloc_chunker(fd, BLKSIZE_MIN, BLKSIZE_MAX,
- HASHMASK_BITS, WINSIZE);
-
- while (fill_chunker(chunker) > 0) {
- uint8_t *chunkp;
- size_t chunk_size;
-
- chunkp = get_chunk(chunker, &chunk_size);
- snap = grow_snap(snap, snap->nr_blk_descs + 1);
- dedup_chunk(snap, chunkp, chunk_size);
- drain_chunker(chunker);
- }
-
- if (snap->nr_blk_descs > 0) {
- if (msg != NULL) {
- size_t size;
-
- size = strlen(msg) + 1;
- if (size > sizeof(snap->msg))
- size = sizeof(snap->msg);
- memcpy(snap->msg, msg, size);
- snap->msg[size - 1] = '\0';
- }
- hash_snap(snap, snap->md);
- append_snap(snap);
- }
-
- free_chunker(chunker);
- free_snap(snap);
-}
-
-static int
-extract(struct snap *snap, void *arg)
-{
- uint8_t *buf[2];
- struct extract_args *args = arg;
- struct compr_ctx ctx;
- uint64_t i;
-
- if (memcmp(snap->md, args->md, sizeof(snap->md)) != 0)
- return WALK_CONTINUE;
-
- if (compr_init(&ctx, compr_algo) < 0)
- errx(1, "compr_init failed");
- buf[0] = alloc_buf(BLKSIZE_MAX);
- buf[1] = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
- for (i = 0; i < snap->nr_blk_descs; i++) {
- struct blk_desc *blk_desc;
- size_t blksize;
-
- blk_desc = &snap->blk_desc[i];
- read_blk(buf[1], blk_desc);
- blksize = decompr(&ctx, buf[1], buf[0], blk_desc->size, BLKSIZE_MAX);
- xwrite(args->fd, buf[0], blksize);
- }
- free_buf(buf[1]);
- free_buf(buf[0]);
- compr_final(&ctx);
- args->ret = 0;
- return WALK_STOP;
-}
-
-/*
- * Hash every block referenced by the given snapshot
- * and compare its hash with the one stored in the corresponding
- * block descriptor.
- */
-static int
-check_snap(struct snap *snap, void *arg)
-{
- struct compr_ctx ctx;
- uint8_t *buf;
- int *ret = arg;
- uint64_t i;
-
- if (verbose > 0) {
- fprintf(stderr, "Checking snapshot: ");
- print_md(stderr, snap->md, sizeof(snap->md));
- fputc('\n', stderr);
- }
-
- if (compr_init(&ctx, compr_algo) < 0)
- errx(1, "compr_init failed");
- buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
- for (i = 0; i < snap->nr_blk_descs; i++) {
- uint8_t md[MD_SIZE];
- struct blk_desc *blk_desc;
-
- blk_desc = &snap->blk_desc[i];
- read_blk(buf, blk_desc);
- hash_blk(buf, blk_desc->size, md);
-
- if (memcmp(blk_desc->md, md, sizeof(blk_desc->md)) == 0)
- continue;
-
- fprintf(stderr, "Block hash mismatch\n");
- fprintf(stderr, " Expected hash: ");
- print_md(stderr, blk_desc->md, sizeof(blk_desc->md));
- fputc('\n', stderr);
- fprintf(stderr, " Actual hash: ");
- print_md(stderr, md, sizeof(md));
- fputc('\n', stderr);
- fprintf(stderr, " Offset: %llu\n",
- (unsigned long long)blk_desc->offset);
- fprintf(stderr, " Size: %llu\n",
- (unsigned long long)blk_desc->size);
- *ret = -1;
- }
- free_buf(buf);
- compr_final(&ctx);
- return WALK_CONTINUE;
-}
-
-static int
-build_icache(struct snap *snap, void *arg)
-{
- struct compr_ctx ctx;
- uint8_t *buf;
- uint64_t i;
-
- if (compr_init(&ctx, compr_algo) < 0)
- errx(1, "compr_init failed");
- buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
- for (i = 0; i < snap->nr_blk_descs; i++) {
- struct blk_desc *blk_desc;
-
- blk_desc = &snap->blk_desc[i];
- insert_icache(icache, blk_desc);
- }
- free(buf);
- compr_final(&ctx);
- return WALK_CONTINUE;
-}
-
-static int
-list(struct snap *snap, void *arg)
-{
- print_md(stdout, snap->md, sizeof(snap->md));
- if (snap->msg[0] != '\0')
- printf("\t%s\n", snap->msg);
- else
- putchar('\n');
- return WALK_CONTINUE;
-}
-
-/* Walk through all snapshots and call fn() on each one */
-static void
-walk_snap(int (*fn)(struct snap *, void *), void *arg)
-{
- uint64_t i;
-
- xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
- for (i = 0; i < snap_hdr.nr_snaps; i++) {
- struct snap *snap;
- int ret;
-
- snap = alloc_snap();
- read_snap(ifd, snap);
- snap = grow_snap(snap, snap->nr_blk_descs);
- read_snap_descs(ifd, snap);
-
- ret = (*fn)(snap, arg);
- free_snap(snap);
- if (ret == WALK_STOP)
- break;
- }
-}
-
-static void
-match_ver(uint64_t v)
-{
- uint8_t maj, min;
-
- min = v & VER_MIN_MASK;
- maj = (v >> VER_MAJ_SHIFT) & VER_MAJ_MASK;
- if (maj == VER_MAJ && min == VER_MIN)
- return;
- errx(1, "format version mismatch: expected %u.%u but got %u.%u",
- VER_MAJ, VER_MIN, maj, min);
-}
-
-static void
-init_blk_hdr(void)
-{
- blk_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN;
- blk_hdr.flags |= compr_algo << COMPR_ALGO_SHIFT;
- blk_hdr.flags |= hash_algo << HASH_ALGO_SHIFT;
- blk_hdr.size = BLK_HDR_SIZE;
-}
-
-static void
-load_blk_hdr(void)
-{
- uint64_t v;
-
- xlseek(sfd, 0, SEEK_SET);
- read_blk_hdr(sfd, &blk_hdr);
- match_ver(blk_hdr.flags);
-
- v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
- v &= COMPR_ALGO_MASK;
- compr_algo = v;
-
- if (compr_algo < 0 || compr_algo >= NR_COMPRS)
- errx(1, "unsupported compression algorithm: %d", compr_algo);
-
- if (verbose > 0)
- fprintf(stderr, "Compression algorithm: %s\n",
- compr_type2name(compr_algo));
-
- v = blk_hdr.flags >> HASH_ALGO_SHIFT;
- v &= HASH_ALGO_MASK;
- hash_algo = v;
-
- if (hash_algo < 0 || hash_algo >= NR_HASHES)
- errx(1, "unsupported hash algorithm: %d", hash_algo);
-
- if (verbose > 0)
- fprintf(stderr, "Hash algorithm: %s\n",
- hash_type2name(hash_algo));
-}
-
-static void
-save_blk_hdr(void)
-{
- xlseek(sfd, 0, SEEK_SET);
- write_blk_hdr(sfd, &blk_hdr);
-}
-
-static void
-init_snap_hdr(void)
-{
- snap_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN;
- snap_hdr.size = SNAP_HDR_SIZE;
- snap_hdr.st.min_blk_size = UINT64_MAX;
-}
-
-static void
-load_snap_hdr(void)
-{
- xlseek(ifd, 0, SEEK_SET);
- read_snap_hdr(ifd, &snap_hdr);
- match_ver(snap_hdr.flags);
-}
-
-static void
-save_snap_hdr(void)
-{
- xlseek(ifd, 0, SEEK_SET);
- write_snap_hdr(ifd, &snap_hdr);
-}
-
-static void
-init(int iflag)
-{
- int flags;
-
- flags = O_RDWR;
- if (iflag)
- flags |= O_CREAT | O_EXCL;
-
- ifd = open(SNAPSF, flags, 0600);
- if (ifd < 0)
- err(1, "open %s", SNAPSF);
-
- sfd = open(STOREF, flags, 0600);
- if (sfd < 0)
- err(1, "open %s", STOREF);
-
- if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
- flock(sfd, LOCK_NB | LOCK_EX) < 0)
- err(1, "flock");
-
- if (iflag) {
- init_snap_hdr();
- init_blk_hdr();
- } else {
- load_snap_hdr();
- load_blk_hdr();
- }
-
- icache = alloc_icache();
- walk_snap(build_icache, NULL);
-}
-
-static void
-term(void)
-{
- if (verbose > 0)
- print_stats(&snap_hdr.st);
-
- free_icache(icache);
-
- save_blk_hdr();
- save_snap_hdr();
-
- fsync(sfd);
- fsync(ifd);
-
- close(sfd);
- close(ifd);
-}
-
-static void
-usage(void)
-{
- fprintf(stderr, "usage: %s [-cilv] [-H hash] [-Z compressor] [-e id] [-r root] [-m message] [file]\n", argv0);
- exit(1);
-}
-
-int
-main(int argc, char *argv[])
-{
- uint8_t md[MD_SIZE];
- char *id = NULL, *root = NULL, *msg = NULL, *hash_name, *compr_name;
- int iflag = 0, lflag = 0, cflag = 0;
- int fd = -1;
-
- ARGBEGIN {
- case 'H':
- hash_name = EARGF(usage());
- if (strcmp(hash_name, "?") == 0) {
- hash_list(STDERR_FILENO);
- return 0;
- }
- hash_algo = hash_name2type(hash_name);
- if (hash_algo < 0)
- errx(1, "unknown hash: %s", hash_name);
- break;
- case 'Z':
- compr_name = EARGF(usage());
- if (strcmp(compr_name, "?") == 0) {
- compr_list(STDERR_FILENO);
- return 0;
- }
- compr_algo = compr_name2type(compr_name);
- if (compr_algo < 0)
- errx(1, "unknown compressor: %s", compr_name);
- break;
- case 'c':
- cflag = 1;
- break;
- case 'e':
- id = EARGF(usage());
- break;
- case 'i':
- iflag = 1;
- break;
- case 'l':
- lflag = 1;
- break;
- case 'r':
- root = EARGF(usage());
- break;
- case 'm':
- msg = EARGF(usage());
- break;
- case 'v':
- verbose++;
- break;
- default:
- usage();
- } ARGEND
-
- if (argc > 1) {
- usage();
- } else if (argc == 1) {
- if (id) {
- fd = open(argv[0], O_RDWR | O_CREAT, 0600);
- if (fd < 0)
- err(1, "open %s", argv[0]);
- } else {
- fd = open(argv[0], O_RDONLY);
- if (fd < 0)
- err(1, "open %s", argv[0]);
- }
- } else {
- if (id)
- fd = STDOUT_FILENO;
- else
- fd = STDIN_FILENO;
- }
-
- if (root != NULL) {
- mkdir(root, 0700);
- if (chdir(root) < 0)
- err(1, "chdir: %s", root);
- }
-
- init(iflag);
-
- if (iflag) {
- term();
- return 0;
- }
-
- if (cflag) {
- int ret;
-
- ret = 0;
- walk_snap(check_snap, &ret);
- if (ret != 0)
- errx(1, "%s or %s is corrupted", SNAPSF, STOREF);
-
- term();
- return 0;
- }
-
- if (lflag) {
- walk_snap(list, NULL);
- term();
- return 0;
- }
-
- if (id) {
- struct extract_args args;
-
- str2bin(id, md);
- args.md = md;
- args.fd = fd;
- args.ret = -1;
- walk_snap(extract, &args);
- if (args.ret != 0)
- errx(1, "unknown snapshot: %s", id);
- } else {
- dedup(fd, msg);
- }
-
- term();
- return 0;
-}
diff --git a/dinfo.1 b/dinfo.1
@@ -0,0 +1,25 @@
+.Dd April 17, 2019
+.Dt DINFO 1
+.Os
+.Sh NAME
+.Nm dinfo
+.Nd Print information about a dedup repository
+.Sh SYNOPSIS
+.Nm dinfo
+.Op Fl v
+.Op repo
+.Sh DESCRIPTION
+.Nm
+prints information about a dedup repository.
+If no
+.Ar repo
+is specified, then the current directory
+is assumed to be the repository.
+.Sh OPTIONS
+.Bl -tag -width "-v"
+.It Fl v
+Enable verbose mode.
+.El
+.Sh AUTHORS
+.An Dimitris Papastamos Aq Mt sin@2f30.org ,
+.An z3bra Aq Mt contactatz3bradotorg .
diff --git a/dinfo.c b/dinfo.c
@@ -0,0 +1,170 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+
+#include <err.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "arg.h"
+#include "blake2.h"
+#include "dedup.h"
+
+#define SNAPSF ".snapshots"
+#define STOREF ".store"
+
+static struct snap_hdr snap_hdr;
+static struct blk_hdr blk_hdr;
+static int ifd;
+static int sfd;
+static int hash_algo = HASH_BLAKE2B;
+static int compr_algo = COMPR_LZ4;
+
+int verbose;
+char *argv0;
+
+static void
+print_info(struct stats *st)
+{
+ if (st->nr_blks == 0)
+ return;
+
+ fprintf(stderr, "Original size: %llu bytes\n",
+ (unsigned long long)st->orig_size);
+ fprintf(stderr, "Compressed size: %llu bytes\n",
+ (unsigned long long)st->compr_size);
+ fprintf(stderr, "Deduplicated size: %llu bytes\n",
+ (unsigned long long)st->dedup_size);
+ fprintf(stderr, "Deduplication ratio: %.2f\n",
+ (double)st->orig_size / st->dedup_size);
+ fprintf(stderr, "Min/avg/max block size: %llu/%llu/%llu bytes\n",
+ (unsigned long long)st->min_blk_size,
+ (unsigned long long)st->dedup_size / st->nr_blks,
+ (unsigned long long)st->max_blk_size);
+ fprintf(stderr, "Number of unique blocks: %llu\n",
+ (unsigned long long)st->nr_blks);
+}
+
+static void
+match_ver(uint64_t v)
+{
+ uint8_t maj, min;
+
+ min = v & VER_MIN_MASK;
+ maj = (v >> VER_MAJ_SHIFT) & VER_MAJ_MASK;
+ if (maj == VER_MAJ && min == VER_MIN)
+ return;
+ errx(1, "format version mismatch: expected %u.%u but got %u.%u",
+ VER_MAJ, VER_MIN, maj, min);
+}
+
+static void
+load_blk_hdr(void)
+{
+ uint64_t v;
+
+ xlseek(sfd, 0, SEEK_SET);
+ read_blk_hdr(sfd, &blk_hdr);
+ match_ver(blk_hdr.flags);
+
+ v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
+ v &= COMPR_ALGO_MASK;
+ compr_algo = v;
+
+ if (compr_algo < 0 || compr_algo >= NR_COMPRS)
+ errx(1, "unsupported compression algorithm: %d", compr_algo);
+
+ if (verbose > 0)
+ fprintf(stderr, "Compression algorithm: %s\n",
+ compr_type2name(compr_algo));
+
+ v = blk_hdr.flags >> HASH_ALGO_SHIFT;
+ v &= HASH_ALGO_MASK;
+ hash_algo = v;
+
+ if (hash_algo < 0 || hash_algo >= NR_HASHES)
+ errx(1, "unsupported hash algorithm: %d", hash_algo);
+
+ if (verbose > 0)
+ fprintf(stderr, "Hash algorithm: %s\n",
+ hash_type2name(hash_algo));
+}
+
+static void
+load_snap_hdr(void)
+{
+ xlseek(ifd, 0, SEEK_SET);
+ read_snap_hdr(ifd, &snap_hdr);
+ match_ver(snap_hdr.flags);
+}
+
+static void
+init(void)
+{
+ ifd = open(SNAPSF, O_RDONLY, 0600);
+ if (ifd < 0)
+ err(1, "open %s", SNAPSF);
+
+ sfd = open(STOREF, O_RDONLY, 0600);
+ if (sfd < 0)
+ err(1, "open %s", STOREF);
+
+ if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
+ flock(sfd, LOCK_NB | LOCK_EX) < 0)
+ err(1, "flock");
+
+ load_snap_hdr();
+ load_blk_hdr();
+}
+
+static void
+term(void)
+{
+ close(sfd);
+ close(ifd);
+}
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-v] [repo]\n", argv0);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *repo = NULL;
+
+ ARGBEGIN {
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ switch (argc) {
+ case 0:
+ repo = ".";
+ break;
+ case 1:
+ repo = argv[0];
+ break;
+ default:
+ usage();
+ };
+
+
+ if (chdir(repo) < 0)
+ err(1, "chdir: %s", repo);
+
+ init();
+ print_info(&snap_hdr.st);
+ term();
+ return 0;
+}
diff --git a/dinit.1 b/dinit.1
@@ -0,0 +1,39 @@
+.Dd April 17, 2019
+.Dt DINIT 1
+.Os
+.Sh NAME
+.Nm dinit
+.Nd Initialize a dedup repository
+.Sh SYNOPSIS
+.Nm dinit
+.Op Fl v
+.Op Fl H Ar hash
+.Op Fl Z Ar compressor
+.Op repo
+.Sh DESCRIPTION
+.Nm
+initializes a dedup repository.
+If no
+.Ar repo
+is specified, then the current directory
+is assumed to be the repository.
+.Sh OPTIONS
+.Bl -tag -width "-Z compressor"
+.It Fl v
+Enable verbose mode.
+.It Fl H Ar hash
+The cryptographic hash function used to identify
+unique blocks in the store.
+The supported hash functions are blake2b, blake2bp, blake2s and blake2sp.
+This flag only has an effect when initializing the repository.
+By default blake2b is used.
+.It Fl Z Ar compressor
+The compressor function used to compress the blocks
+in the store.
+The supported compressor functions are none, lz4 and snappy.
+This flag only has an effect when initializing the repository.
+By default lz4 is used.
+.El
+.Sh AUTHORS
+.An Dimitris Papastamos Aq Mt sin@2f30.org ,
+.An z3bra Aq Mt contactatz3bradotorg .
diff --git a/dinit.c b/dinit.c
@@ -0,0 +1,155 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+
+#include <err.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "arg.h"
+#include "blake2.h"
+#include "dedup.h"
+
+#define SNAPSF ".snapshots"
+#define STOREF ".store"
+
+static struct snap_hdr snap_hdr;
+static struct blk_hdr blk_hdr;
+static int ifd;
+static int sfd;
+static int hash_algo = HASH_BLAKE2B;
+static int compr_algo = COMPR_LZ4;
+
+int verbose;
+char *argv0;
+
+static void
+init_blk_hdr(void)
+{
+ blk_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN;
+ blk_hdr.flags |= compr_algo << COMPR_ALGO_SHIFT;
+ blk_hdr.flags |= hash_algo << HASH_ALGO_SHIFT;
+ blk_hdr.size = BLK_HDR_SIZE;
+}
+
+static void
+save_blk_hdr(void)
+{
+ xlseek(sfd, 0, SEEK_SET);
+ write_blk_hdr(sfd, &blk_hdr);
+}
+
+static void
+init_snap_hdr(void)
+{
+ snap_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN;
+ snap_hdr.size = SNAP_HDR_SIZE;
+ snap_hdr.st.min_blk_size = UINT64_MAX;
+}
+
+static void
+save_snap_hdr(void)
+{
+ xlseek(ifd, 0, SEEK_SET);
+ write_snap_hdr(ifd, &snap_hdr);
+}
+
+static void
+init(void)
+{
+ int flags;
+
+ flags = O_RDWR | O_CREAT | O_EXCL;
+ ifd = open(SNAPSF, flags, 0600);
+ if (ifd < 0)
+ err(1, "open %s", SNAPSF);
+
+ sfd = open(STOREF, flags, 0600);
+ if (sfd < 0)
+ err(1, "open %s", STOREF);
+
+ if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
+ flock(sfd, LOCK_NB | LOCK_EX) < 0)
+ err(1, "flock");
+
+ init_snap_hdr();
+ init_blk_hdr();
+}
+
+static void
+term(void)
+{
+ save_blk_hdr();
+ save_snap_hdr();
+
+ fsync(sfd);
+ fsync(ifd);
+
+ close(sfd);
+ close(ifd);
+}
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-v] [-H hash] [-Z compressor] [repo]\n", argv0);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *hash_name = NULL, *compr_name = NULL;
+ char *repo;
+
+ ARGBEGIN {
+ case 'H':
+ hash_name = EARGF(usage());
+ if (strcmp(hash_name, "?") == 0) {
+ hash_list(STDERR_FILENO);
+ return 0;
+ }
+ hash_algo = hash_name2type(hash_name);
+ if (hash_algo < 0)
+ errx(1, "unknown hash: %s", hash_name);
+ break;
+ case 'Z':
+ compr_name = EARGF(usage());
+ if (strcmp(compr_name, "?") == 0) {
+ compr_list(STDERR_FILENO);
+ return 0;
+ }
+ compr_algo = compr_name2type(compr_name);
+ if (compr_algo < 0)
+ errx(1, "unknown compressor: %s", compr_name);
+ break;
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ switch (argc) {
+ case 0:
+ repo = ".";
+ break;
+ case 1:
+ repo = argv[0];
+ break;
+ default:
+ usage();
+ };
+
+ mkdir(repo, 0700);
+ if (chdir(repo) < 0)
+ err(1, "chdir: %s", repo);
+
+ init();
+ term();
+ return 0;
+}
diff --git a/dlist.1 b/dlist.1
@@ -0,0 +1,25 @@
+.Dd April 17, 2019
+.Dt DLIST 1
+.Os
+.Sh NAME
+.Nm dlist
+.Nd List snapshots from a dedup repository
+.Sh SYNOPSIS
+.Nm dlist
+.Op Fl v
+.Op repo
+.Sh DESCRIPTION
+.Nm
+lists snapshots from a dedup repository.
+If no
+.Ar repo
+is specified, then the current directory
+is assumed to be the repository.
+.Sh OPTIONS
+.Bl -tag -width "-v"
+.It Fl v
+Enable verbose mode.
+.El
+.Sh AUTHORS
+.An Dimitris Papastamos Aq Mt sin@2f30.org ,
+.An z3bra Aq Mt contactatz3bradotorg .
diff --git a/dlist.c b/dlist.c
@@ -0,0 +1,232 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+
+#include <err.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "arg.h"
+#include "blake2.h"
+#include "dedup.h"
+
+#define SNAPSF ".snapshots"
+#define STOREF ".store"
+
+enum {
+ WALK_CONTINUE,
+ WALK_STOP
+};
+
+static struct snap_hdr snap_hdr;
+static struct blk_hdr blk_hdr;
+static int ifd;
+static int sfd;
+static int hash_algo = HASH_BLAKE2B;
+static int compr_algo = COMPR_LZ4;
+
+int verbose;
+char *argv0;
+
+static void
+print_md(FILE *fp, uint8_t *md, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ fprintf(fp, "%02x", md[i]);
+}
+
+static struct snap *
+alloc_snap(void)
+{
+ struct snap *snap;
+
+ snap = calloc(1, sizeof(*snap));
+ if (snap == NULL)
+ err(1, "%s", __func__);
+ return snap;
+}
+
+static void
+free_snap(struct snap *snap)
+{
+ free(snap);
+}
+
+static struct snap *
+grow_snap(struct snap *snap, uint64_t nr_blk_descs)
+{
+ size_t size;
+
+ if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0]))
+ errx(1, "%s: overflow", __func__);
+ size = nr_blk_descs * sizeof(snap->blk_desc[0]);
+
+ if (size > SIZE_MAX - sizeof(*snap))
+ errx(1, "%s: overflow", __func__);
+ size += sizeof(*snap);
+
+ snap = realloc(snap, size);
+ if (snap == NULL)
+ err(1, "%s", __func__);
+ return snap;
+}
+
+static int
+list(struct snap *snap, void *arg)
+{
+ print_md(stdout, snap->md, sizeof(snap->md));
+ if (snap->msg[0] != '\0')
+ printf("\t%s\n", snap->msg);
+ else
+ putchar('\n');
+ return WALK_CONTINUE;
+}
+
+/* Walk through all snapshots and call fn() on each one */
+static void
+walk_snap(int (*fn)(struct snap *, void *), void *arg)
+{
+ uint64_t i;
+
+ xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
+ for (i = 0; i < snap_hdr.nr_snaps; i++) {
+ struct snap *snap;
+ int ret;
+
+ snap = alloc_snap();
+ read_snap(ifd, snap);
+ snap = grow_snap(snap, snap->nr_blk_descs);
+ read_snap_descs(ifd, snap);
+
+ ret = (*fn)(snap, arg);
+ free_snap(snap);
+ if (ret == WALK_STOP)
+ break;
+ }
+}
+
+static void
+match_ver(uint64_t v)
+{
+ uint8_t maj, min;
+
+ min = v & VER_MIN_MASK;
+ maj = (v >> VER_MAJ_SHIFT) & VER_MAJ_MASK;
+ if (maj == VER_MAJ && min == VER_MIN)
+ return;
+ errx(1, "format version mismatch: expected %u.%u but got %u.%u",
+ VER_MAJ, VER_MIN, maj, min);
+}
+
+static void
+load_blk_hdr(void)
+{
+ uint64_t v;
+
+ xlseek(sfd, 0, SEEK_SET);
+ read_blk_hdr(sfd, &blk_hdr);
+ match_ver(blk_hdr.flags);
+
+ v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
+ v &= COMPR_ALGO_MASK;
+ compr_algo = v;
+
+ if (compr_algo < 0 || compr_algo >= NR_COMPRS)
+ errx(1, "unsupported compression algorithm: %d", compr_algo);
+
+ if (verbose > 0)
+ fprintf(stderr, "Compression algorithm: %s\n",
+ compr_type2name(compr_algo));
+
+ v = blk_hdr.flags >> HASH_ALGO_SHIFT;
+ v &= HASH_ALGO_MASK;
+ hash_algo = v;
+
+ if (hash_algo < 0 || hash_algo >= NR_HASHES)
+ errx(1, "unsupported hash algorithm: %d", hash_algo);
+
+ if (verbose > 0)
+ fprintf(stderr, "Hash algorithm: %s\n",
+ hash_type2name(hash_algo));
+}
+
+static void
+load_snap_hdr(void)
+{
+ xlseek(ifd, 0, SEEK_SET);
+ read_snap_hdr(ifd, &snap_hdr);
+ match_ver(snap_hdr.flags);
+}
+
+static void
+init(void)
+{
+ ifd = open(SNAPSF, O_RDONLY, 0600);
+ if (ifd < 0)
+ err(1, "open %s", SNAPSF);
+
+ sfd = open(STOREF, O_RDONLY, 0600);
+ if (sfd < 0)
+ err(1, "open %s", STOREF);
+
+ if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
+ flock(sfd, LOCK_NB | LOCK_EX) < 0)
+ err(1, "flock");
+
+ load_snap_hdr();
+ load_blk_hdr();
+}
+
+static void
+term(void)
+{
+ close(sfd);
+ close(ifd);
+}
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-v] [repo]\n", argv0);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *repo = NULL;
+
+ ARGBEGIN {
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ switch (argc) {
+ case 0:
+ repo = ".";
+ break;
+ case 1:
+ repo = argv[0];
+ break;
+ default:
+ usage();
+ };
+
+
+ if (chdir(repo) < 0)
+ err(1, "chdir: %s", repo);
+
+ init();
+ walk_snap(list, NULL);
+ term();
+ return 0;
+}
diff --git a/dpack.1 b/dpack.1
@@ -0,0 +1,35 @@
+.Dd April 17, 2019
+.Dt DPACK 1
+.Os
+.Sh NAME
+.Nm dpack
+.Nd Deduplicate data from stdin
+.Sh SYNOPSIS
+.Nm dpack
+.Op Fl v
+.Op Fl m Ar message
+.Op repo
+.Sh DESCRIPTION
+.Nm
+deduplicates data from stdin.
+If no
+.Ar repo
+is specified, then the current directory
+is assumed to be the repository.
+.Pp
+.Nm
+does not track any file metadata so to deduplicate
+directory trees, an archival tool like
+.Xr tar 1
+should be used and piped into
+.Nm .
+.Sh OPTIONS
+.Bl -tag -width "-m message"
+.It Fl m Ar message
+Attach a descriptive message to the snapshot.
+.It Fl v
+Enable verbose mode.
+.El
+.Sh AUTHORS
+.An Dimitris Papastamos Aq Mt sin@2f30.org ,
+.An z3bra Aq Mt contactatz3bradotorg .
diff --git a/dpack.c b/dpack.c
@@ -0,0 +1,422 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+
+#include <err.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "arg.h"
+#include "blake2.h"
+#include "dedup.h"
+
+#define SNAPSF ".snapshots"
+#define STOREF ".store"
+
+enum {
+ WALK_CONTINUE,
+ WALK_STOP
+};
+
+static struct snap_hdr snap_hdr;
+static struct blk_hdr blk_hdr;
+static struct icache *icache;
+static int ifd;
+static int sfd;
+static int hash_algo = HASH_BLAKE2B;
+static int compr_algo = COMPR_LZ4;
+
+int verbose;
+char *argv0;
+
+static struct snap *
+alloc_snap(void)
+{
+ struct snap *snap;
+
+ snap = calloc(1, sizeof(*snap));
+ if (snap == NULL)
+ err(1, "%s", __func__);
+ return snap;
+}
+
+static void
+free_snap(struct snap *snap)
+{
+ free(snap);
+}
+
+/*
+ * The snapshot hash is calculated over the
+ * hash of its block descriptors.
+ */
+static void
+hash_snap(struct snap *snap, uint8_t *md)
+{
+ struct hash_ctx ctx;
+ uint64_t i;
+
+ if (hash_init(&ctx, hash_algo, MD_SIZE) < 0)
+ errx(1, "hash_init failed");
+ for (i = 0; i < snap->nr_blk_descs; i++) {
+ struct blk_desc *blk_desc;
+
+ blk_desc = &snap->blk_desc[i];
+ hash_update(&ctx, blk_desc->md, sizeof(blk_desc->md));
+ }
+ hash_final(&ctx, md, MD_SIZE);
+}
+
+static struct snap *
+grow_snap(struct snap *snap, uint64_t nr_blk_descs)
+{
+ size_t size;
+
+ if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0]))
+ errx(1, "%s: overflow", __func__);
+ size = nr_blk_descs * sizeof(snap->blk_desc[0]);
+
+ if (size > SIZE_MAX - sizeof(*snap))
+ errx(1, "%s: overflow", __func__);
+ size += sizeof(*snap);
+
+ snap = realloc(snap, size);
+ if (snap == NULL)
+ err(1, "%s", __func__);
+ return snap;
+}
+
+static void
+append_snap(struct snap *snap)
+{
+ if (snap->nr_blk_descs > UINT64_MAX / BLK_DESC_SIZE)
+ errx(1, "%s: overflow", __func__);
+ snap->size = snap->nr_blk_descs * BLK_DESC_SIZE;
+
+ if (snap->size > UINT64_MAX - SNAPSHOT_SIZE)
+ errx(1, "%s: overflow", __func__);
+ snap->size += SNAPSHOT_SIZE;
+
+ xlseek(ifd, snap_hdr.size, SEEK_SET);
+ write_snap(ifd, snap);
+ write_snap_blk_descs(ifd, snap);
+
+ if (snap_hdr.size > UINT64_MAX - snap->size)
+ errx(1, "%s: overflow", __func__);
+ snap_hdr.size += snap->size;
+
+ if (snap_hdr.nr_snaps > UINT64_MAX - 1)
+ errx(1, "%s: overflow", __func__);
+ snap_hdr.nr_snaps++;
+}
+
+static uint8_t *
+alloc_buf(size_t size)
+{
+ void *p;
+
+ p = calloc(1, size);
+ if (p == NULL)
+ err(1, "%s", __func__);
+ return p;
+}
+
+static void
+hash_blk(uint8_t *buf, size_t size, uint8_t *md)
+{
+ struct hash_ctx ctx;
+
+ if (hash_init(&ctx, hash_algo, MD_SIZE) < 0)
+ errx(1, "hash_init failed");
+ hash_update(&ctx, buf, size);
+ hash_final(&ctx, md, MD_SIZE);
+}
+
+static void
+append_blk(uint8_t *buf, struct blk_desc *blk_desc)
+{
+ xlseek(sfd, blk_hdr.size, SEEK_SET);
+ xwrite(sfd, buf, blk_desc->size);
+
+ if (blk_hdr.size > UINT64_MAX - blk_desc->size)
+ errx(1, "%s: overflow", __func__);
+ blk_hdr.size += blk_desc->size;
+}
+
+static void
+dedup_chunk(struct snap *snap, uint8_t *chunkp, size_t chunk_size)
+{
+ uint8_t md[MD_SIZE];
+ struct blk_desc blk_desc;
+ struct compr_ctx ctx;
+ uint8_t *compr_buf;
+ size_t n, csize;
+
+ if (compr_init(&ctx, compr_algo) < 0)
+ errx(1, "compr_init failed");
+ csize = compr_size(&ctx, BLKSIZE_MAX);
+ compr_buf = alloc_buf(csize);
+
+ n = compr(&ctx, chunkp, compr_buf, chunk_size, csize);
+ hash_blk(compr_buf, n, md);
+
+ snap_hdr.st.orig_size += chunk_size;
+ snap_hdr.st.compr_size += n;
+
+ memcpy(blk_desc.md, md, sizeof(blk_desc.md));
+ if (lookup_icache(icache, &blk_desc) < 0) {
+ blk_desc.offset = blk_hdr.size;
+ blk_desc.size = n;
+
+ snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
+ append_blk(compr_buf, &blk_desc);
+
+ insert_icache(icache, &blk_desc);
+
+ snap_hdr.st.dedup_size += blk_desc.size;
+ snap_hdr.st.nr_blks++;
+
+ if (blk_desc.size > snap_hdr.st.max_blk_size)
+ snap_hdr.st.max_blk_size = blk_desc.size;
+ if (blk_desc.size < snap_hdr.st.min_blk_size)
+ snap_hdr.st.min_blk_size = blk_desc.size;
+ } else {
+ snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
+ }
+
+ free(compr_buf);
+ compr_final(&ctx);
+}
+
+static void
+dedup(int fd, char *msg)
+{
+ struct snap *snap;
+ struct chunker *chunker;
+
+ snap = alloc_snap();
+ chunker = alloc_chunker(fd, BLKSIZE_MIN, BLKSIZE_MAX,
+ HASHMASK_BITS, WINSIZE);
+
+ while (fill_chunker(chunker) > 0) {
+ uint8_t *chunkp;
+ size_t chunk_size;
+
+ chunkp = get_chunk(chunker, &chunk_size);
+ snap = grow_snap(snap, snap->nr_blk_descs + 1);
+ dedup_chunk(snap, chunkp, chunk_size);
+ drain_chunker(chunker);
+ }
+
+ if (snap->nr_blk_descs > 0) {
+ if (msg != NULL) {
+ size_t size;
+
+ size = strlen(msg) + 1;
+ if (size > sizeof(snap->msg))
+ size = sizeof(snap->msg);
+ memcpy(snap->msg, msg, size);
+ snap->msg[size - 1] = '\0';
+ }
+ hash_snap(snap, snap->md);
+ append_snap(snap);
+ }
+
+ free_chunker(chunker);
+ free_snap(snap);
+}
+
+static int
+build_icache(struct snap *snap, void *arg)
+{
+ struct compr_ctx ctx;
+ uint8_t *buf;
+ uint64_t i;
+
+ if (compr_init(&ctx, compr_algo) < 0)
+ errx(1, "compr_init failed");
+ buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
+ for (i = 0; i < snap->nr_blk_descs; i++) {
+ struct blk_desc *blk_desc;
+
+ blk_desc = &snap->blk_desc[i];
+ insert_icache(icache, blk_desc);
+ }
+ free(buf);
+ compr_final(&ctx);
+ return WALK_CONTINUE;
+}
+
+/* Walk through all snapshots and call fn() on each one */
+static void
+walk_snap(int (*fn)(struct snap *, void *), void *arg)
+{
+ uint64_t i;
+
+ xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
+ for (i = 0; i < snap_hdr.nr_snaps; i++) {
+ struct snap *snap;
+ int ret;
+
+ snap = alloc_snap();
+ read_snap(ifd, snap);
+ snap = grow_snap(snap, snap->nr_blk_descs);
+ read_snap_descs(ifd, snap);
+
+ ret = (*fn)(snap, arg);
+ free_snap(snap);
+ if (ret == WALK_STOP)
+ break;
+ }
+}
+
+static void
+match_ver(uint64_t v)
+{
+ uint8_t maj, min;
+
+ min = v & VER_MIN_MASK;
+ maj = (v >> VER_MAJ_SHIFT) & VER_MAJ_MASK;
+ if (maj == VER_MAJ && min == VER_MIN)
+ return;
+ errx(1, "format version mismatch: expected %u.%u but got %u.%u",
+ VER_MAJ, VER_MIN, maj, min);
+}
+
+static void
+load_blk_hdr(void)
+{
+ uint64_t v;
+
+ xlseek(sfd, 0, SEEK_SET);
+ read_blk_hdr(sfd, &blk_hdr);
+ match_ver(blk_hdr.flags);
+
+ v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
+ v &= COMPR_ALGO_MASK;
+ compr_algo = v;
+
+ if (compr_algo < 0 || compr_algo >= NR_COMPRS)
+ errx(1, "unsupported compression algorithm: %d", compr_algo);
+
+ if (verbose > 0)
+ fprintf(stderr, "Compression algorithm: %s\n",
+ compr_type2name(compr_algo));
+
+ v = blk_hdr.flags >> HASH_ALGO_SHIFT;
+ v &= HASH_ALGO_MASK;
+ hash_algo = v;
+
+ if (hash_algo < 0 || hash_algo >= NR_HASHES)
+ errx(1, "unsupported hash algorithm: %d", hash_algo);
+
+ if (verbose > 0)
+ fprintf(stderr, "Hash algorithm: %s\n",
+ hash_type2name(hash_algo));
+}
+
+static void
+save_blk_hdr(void)
+{
+ xlseek(sfd, 0, SEEK_SET);
+ write_blk_hdr(sfd, &blk_hdr);
+}
+
+static void
+load_snap_hdr(void)
+{
+ xlseek(ifd, 0, SEEK_SET);
+ read_snap_hdr(ifd, &snap_hdr);
+ match_ver(snap_hdr.flags);
+}
+
+static void
+save_snap_hdr(void)
+{
+ xlseek(ifd, 0, SEEK_SET);
+ write_snap_hdr(ifd, &snap_hdr);
+}
+
+static void
+init(void)
+{
+ ifd = open(SNAPSF, O_RDWR, 0600);
+ if (ifd < 0)
+ err(1, "open %s", SNAPSF);
+
+ sfd = open(STOREF, O_RDWR, 0600);
+ if (sfd < 0)
+ err(1, "open %s", STOREF);
+
+ if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
+ flock(sfd, LOCK_NB | LOCK_EX) < 0)
+ err(1, "flock");
+
+ load_snap_hdr();
+ load_blk_hdr();
+
+ icache = alloc_icache();
+ walk_snap(build_icache, NULL);
+}
+
+static void
+term(void)
+{
+ free_icache(icache);
+
+ save_blk_hdr();
+ save_snap_hdr();
+
+ fsync(sfd);
+ fsync(ifd);
+
+ close(sfd);
+ close(ifd);
+}
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-v] [-m message] [repo]\n", argv0);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *repo, *msg = NULL;
+
+ ARGBEGIN {
+ case 'm':
+ msg = EARGF(usage());
+ break;
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ switch (argc) {
+ case 0:
+ repo = ".";
+ break;
+ case 1:
+ repo = argv[0];
+ break;
+ default:
+ usage();
+ };
+
+ if (chdir(repo) < 0)
+ err(1, "chdir: %s", repo);
+
+ init();
+ dedup(STDIN_FILENO, msg);
+ term();
+ return 0;
+}
diff --git a/dunpack.1 b/dunpack.1
@@ -0,0 +1,28 @@
+.Dd April 17, 2019
+.Dt DUNPACK 1
+.Os
+.Sh NAME
+.Nm dunpack
+.Nd Extract snapshot from a dedup repository
+.Sh SYNOPSIS
+.Nm dunpack
+.Op Fl v
+.Ar id
+.Op repo
+.Sh DESCRIPTION
+.Nm
+extracts the snapshot specified by
+.Ar id
+from the dedup repository.
+If no
+.Ar repo
+is specified, then the current directory
+is assumed to be the repository.
+.Sh OPTIONS
+.Bl -tag -width "-v"
+.It Fl v
+Enable verbose mode.
+.El
+.Sh AUTHORS
+.An Dimitris Papastamos Aq Mt sin@2f30.org ,
+.An z3bra Aq Mt contactatz3bradotorg .
diff --git a/dunpack.c b/dunpack.c
@@ -0,0 +1,288 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/file.h>
+
+#include <err.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "arg.h"
+#include "blake2.h"
+#include "dedup.h"
+
+#define SNAPSF ".snapshots"
+#define STOREF ".store"
+
+enum {
+ WALK_CONTINUE,
+ WALK_STOP
+};
+
+struct extract_args {
+ uint8_t *md;
+ int fd;
+ int ret;
+};
+
+static struct snap_hdr snap_hdr;
+static struct blk_hdr blk_hdr;
+static int ifd;
+static int sfd;
+static int hash_algo = HASH_BLAKE2B;
+static int compr_algo = COMPR_LZ4;
+
+int verbose;
+char *argv0;
+
+static struct snap *
+alloc_snap(void)
+{
+ struct snap *snap;
+
+ snap = calloc(1, sizeof(*snap));
+ if (snap == NULL)
+ err(1, "%s", __func__);
+ return snap;
+}
+
+static void
+free_snap(struct snap *snap)
+{
+ free(snap);
+}
+
+static struct snap *
+grow_snap(struct snap *snap, uint64_t nr_blk_descs)
+{
+ size_t size;
+
+ if (nr_blk_descs > SIZE_MAX / sizeof(snap->blk_desc[0]))
+ errx(1, "%s: overflow", __func__);
+ size = nr_blk_descs * sizeof(snap->blk_desc[0]);
+
+ if (size > SIZE_MAX - sizeof(*snap))
+ errx(1, "%s: overflow", __func__);
+ size += sizeof(*snap);
+
+ snap = realloc(snap, size);
+ if (snap == NULL)
+ err(1, "%s", __func__);
+ return snap;
+}
+
+static uint8_t *
+alloc_buf(size_t size)
+{
+ void *p;
+
+ p = calloc(1, size);
+ if (p == NULL)
+ err(1, "%s", __func__);
+ return p;
+}
+
+static void
+free_buf(uint8_t *buf)
+{
+ free(buf);
+}
+
+static void
+read_blk(uint8_t *buf, struct blk_desc *blk_desc)
+{
+ ssize_t n;
+
+ xlseek(sfd, blk_desc->offset, SEEK_SET);
+ n = xread(sfd, buf, blk_desc->size);
+ if (n == 0)
+ errx(1, "%s: unexpected EOF", __func__);
+ if (n != blk_desc->size)
+ errx(1, "%s: short read", __func__);
+}
+
+static int
+extract(struct snap *snap, void *arg)
+{
+ uint8_t *buf[2];
+ struct extract_args *args = arg;
+ struct compr_ctx ctx;
+ uint64_t i;
+
+ if (memcmp(snap->md, args->md, sizeof(snap->md)) != 0)
+ return WALK_CONTINUE;
+
+ if (compr_init(&ctx, compr_algo) < 0)
+ errx(1, "compr_init failed");
+ buf[0] = alloc_buf(BLKSIZE_MAX);
+ buf[1] = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
+ for (i = 0; i < snap->nr_blk_descs; i++) {
+ struct blk_desc *blk_desc;
+ size_t blksize;
+
+ blk_desc = &snap->blk_desc[i];
+ read_blk(buf[1], blk_desc);
+ blksize = decompr(&ctx, buf[1], buf[0], blk_desc->size, BLKSIZE_MAX);
+ xwrite(args->fd, buf[0], blksize);
+ }
+ free_buf(buf[1]);
+ free_buf(buf[0]);
+ compr_final(&ctx);
+ args->ret = 0;
+ return WALK_STOP;
+}
+
+/* Walk through all snapshots and call fn() on each one */
+static void
+walk_snap(int (*fn)(struct snap *, void *), void *arg)
+{
+ uint64_t i;
+
+ xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET);
+ for (i = 0; i < snap_hdr.nr_snaps; i++) {
+ struct snap *snap;
+ int ret;
+
+ snap = alloc_snap();
+ read_snap(ifd, snap);
+ snap = grow_snap(snap, snap->nr_blk_descs);
+ read_snap_descs(ifd, snap);
+
+ ret = (*fn)(snap, arg);
+ free_snap(snap);
+ if (ret == WALK_STOP)
+ break;
+ }
+}
+
+static void
+match_ver(uint64_t v)
+{
+ uint8_t maj, min;
+
+ min = v & VER_MIN_MASK;
+ maj = (v >> VER_MAJ_SHIFT) & VER_MAJ_MASK;
+ if (maj == VER_MAJ && min == VER_MIN)
+ return;
+ errx(1, "format version mismatch: expected %u.%u but got %u.%u",
+ VER_MAJ, VER_MIN, maj, min);
+}
+
+static void
+load_blk_hdr(void)
+{
+ uint64_t v;
+
+ xlseek(sfd, 0, SEEK_SET);
+ read_blk_hdr(sfd, &blk_hdr);
+ match_ver(blk_hdr.flags);
+
+ v = blk_hdr.flags >> COMPR_ALGO_SHIFT;
+ v &= COMPR_ALGO_MASK;
+ compr_algo = v;
+
+ if (compr_algo < 0 || compr_algo >= NR_COMPRS)
+ errx(1, "unsupported compression algorithm: %d", compr_algo);
+
+ if (verbose > 0)
+ fprintf(stderr, "Compression algorithm: %s\n",
+ compr_type2name(compr_algo));
+
+ v = blk_hdr.flags >> HASH_ALGO_SHIFT;
+ v &= HASH_ALGO_MASK;
+ hash_algo = v;
+
+ if (hash_algo < 0 || hash_algo >= NR_HASHES)
+ errx(1, "unsupported hash algorithm: %d", hash_algo);
+
+ if (verbose > 0)
+ fprintf(stderr, "Hash algorithm: %s\n",
+ hash_type2name(hash_algo));
+}
+
+static void
+load_snap_hdr(void)
+{
+ xlseek(ifd, 0, SEEK_SET);
+ read_snap_hdr(ifd, &snap_hdr);
+ match_ver(snap_hdr.flags);
+}
+
+static void
+init(void)
+{
+ ifd = open(SNAPSF, O_RDONLY, 0600);
+ if (ifd < 0)
+ err(1, "open %s", SNAPSF);
+
+ sfd = open(STOREF, O_RDONLY, 0600);
+ if (sfd < 0)
+ err(1, "open %s", STOREF);
+
+ if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
+ flock(sfd, LOCK_NB | LOCK_EX) < 0)
+ err(1, "flock");
+
+ load_snap_hdr();
+ load_blk_hdr();
+}
+
+static void
+term(void)
+{
+ close(sfd);
+ close(ifd);
+}
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-v] id [repo]\n", argv0);
+ exit(1);
+}
+
+int
+main(int argc, char *argv[])
+{
+ uint8_t md[MD_SIZE];
+ char *repo, *id = NULL;
+ struct extract_args args;
+
+ ARGBEGIN {
+ case 'v':
+ verbose++;
+ break;
+ default:
+ usage();
+ } ARGEND
+
+ switch (argc) {
+ case 1:
+ id = argv[0];
+ repo = ".";
+ break;
+ case 2:
+ id = argv[0];
+ repo = argv[1];
+ break;
+ default:
+ usage();
+ };
+
+ if (chdir(repo) < 0)
+ err(1, "chdir: %s", repo);
+
+ init();
+ str2bin(id, md);
+ args.md = md;
+ args.fd = STDIN_FILENO;
+ args.ret = -1;
+ walk_snap(extract, &args);
+ if (args.ret != 0)
+ errx(1, "unknown snapshot: %s", id);
+ term();
+ return 0;
+}