commit 891041b03c66d2c37f4aef65de717e0f3740d1d1
parent bb81bcb6bd4b4bf7dbe36d2a47087dec0c58f812
Author: sin <sin@2f30.org>
Date: Wed, 1 May 2019 20:54:49 +0100
Some more comments in bstorage.c
Diffstat:
M | bstorage.c | | | 111 | +++++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------- |
1 file changed, 75 insertions(+), 36 deletions(-)
diff --git a/bstorage.c b/bstorage.c
@@ -2,10 +2,10 @@
* Storage layer implementation using a single backing file.
* The file format is as follows:
*
- * [storage header]
- * [storage descriptor 0]
+ * [block header]
+ * [block descriptor 0]
* [data]
- * [storage descriptor 1]
+ * [block descriptor 1]
* [data]
* ...
*/
@@ -26,31 +26,30 @@
#include "queue.h"
#include "tree.h"
+/* block header flags */
+#define BHDRMAGIC "DEDUPDIDUPDIDUP"
+#define NBHDRMAGIC sizeof(BHDRMAGIC)
#define VMIN 0
#define VMAJ 1
-
#define VMINMASK 0xff
#define VMAJSHIFT 8
#define VMAJMASK 0xff
-
#define HALGOSHIFT 19
#define HALGOMASK 0x7
#define CALGOSHIFT 16
#define CALGOMASK 0x7
-
-#define BHDRMAGIC "DEDUPDIDUPDIDUP"
-#define NBHDRMAGIC sizeof(BHDRMAGIC)
-#define BD2BTYPE 0x100
-#define BD2STYPE 0x101
-
-#define BHDRSIZE (NBHDRMAGIC + 8 + 8)
-#define BDSIZE (8 + 8 + 8 + 8 + (MDSIZE))
-
#define CNONETYPE 0
#define CSNAPPYTYPE 1
#define HBLAKE2BTYPE 0
#define HBLAKE2STYPE 1
+#define BHDRSIZE (NBHDRMAGIC + 8 + 8)
+
+/* block descriptor flags */
+#define BD2BTYPE 0x100
+#define BD2STYPE 0x101
+#define BDSIZE (8 + 8 + 8 + 8 + (MDSIZE))
+/* misc helpers */
extern ssize_t xread(int, void *, size_t);
extern ssize_t xwrite(int, void *, size_t);
extern int pack(unsigned char *, char *, ...);
@@ -80,30 +79,30 @@ static struct bops bops = {
/* Block header structure */
struct bhdr {
- char magic[NBHDRMAGIC];
- uint64_t flags;
- uint64_t nbd;
+ char magic[NBHDRMAGIC]; /* magic number for file(1) */
+ uint64_t flags; /* version number, compression/hashing configuration */
+ uint64_t nbd; /* number of block descriptors */
};
/* Block descriptor */
struct bd {
- uint16_t type;
- uint8_t reserved[6];
- uint64_t offset; /* offset of block */
- uint64_t size; /* size of block */
- uint64_t refcnt;
- unsigned char md[MDSIZE];
- RB_ENTRY(bd) rbe;
- SLIST_ENTRY(bd) sle;
+ uint16_t type; /* type of hashing algorithm used */
+ uint8_t reserved[6]; /* should be set to 0 when writing */
+ uint64_t offset; /* block offset */
+ uint64_t size; /* block size */
+ uint64_t refcnt; /* reference count of block, 0 if block is removed */
+ unsigned char md[MDSIZE]; /* hash of block */
+ RB_ENTRY(bd) rbe; /* bdcache link node */
+ SLIST_ENTRY(bd) sle; /* gchead link node */
};
RB_HEAD(bdcache, bd);
/* Storage layer context */
struct sctx {
- struct bdcache bdcache;
- SLIST_HEAD(gchead, bd) gchead;
- struct bhdr bhdr;
- int fd;
+ struct bdcache bdcache; /* cache of block descriptors */
+ SLIST_HEAD(gchead, bd) gchead; /* list of all blocks with a zero refcount */
+ struct bhdr bhdr; /* block header entry */
+ int fd; /* underlying storage file descriptor */
int rdonly; /* when set to 1, the bssync() operation is a no-op */
int type; /* hash algorithm for new blocks */
};
@@ -231,7 +230,7 @@ packbd(int fd, struct bd *bd)
return n;
}
-/* Insert block descriptor to cache */
+/* Load block descriptor from file */
static int
loadbd(struct sctx *sctx)
{
@@ -257,6 +256,16 @@ loadbd(struct sctx *sctx)
return -1;
}
+ /*
+ * When refcount is 0 the block has been removed.
+ * In that case, the block descriptor is still present
+ * in the file as it is used to locate the next block
+ * descriptor which could be live.
+ *
+ * The garbage collection list links together all block
+ * descriptors that have a reference count of 0.
+ * This is needed to implement the gc operation.
+ */
if (bd->refcnt > 0)
RB_INSERT(bdcache, &sctx->bdcache, bd);
else
@@ -295,7 +304,7 @@ initbdcache(struct sctx *sctx)
return 0;
}
-/* Create storage */
+/* Create storage file */
static int
bscreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar)
{
@@ -346,6 +355,7 @@ bscreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar)
bhdr->nbd = 0;
sctx->fd = fd;
+ /* Write the block header entry to the file */
if (packbhdr(fd, bhdr) < 0) {
free(sctx);
close(fd);
@@ -354,7 +364,7 @@ bscreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar)
return 0;
}
-/* Open storage */
+/* Open storage file */
static int
bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar)
{
@@ -387,6 +397,8 @@ bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar)
RB_INIT(&sctx->bdcache);
SLIST_INIT(&sctx->gchead);
bhdr = &sctx->bhdr;
+
+ /* Read block header entry from file */
if (unpackbhdr(fd, bhdr) < 0) {
free(sctx);
close(fd);
@@ -442,6 +454,10 @@ bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar)
sctx->fd = fd;
sctx->rdonly = flags == O_RDONLY;
+ /*
+ * Initialize block descriptor cache
+ * and garbage collection list.
+ */
if (initbdcache(sctx) < 0) {
free(sctx);
close(fd);
@@ -450,7 +466,7 @@ bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar)
return 0;
}
-/* Write a block */
+/* Write a block to the storage file */
static int
bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
{
@@ -460,7 +476,6 @@ bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
off_t offs;
sctx = bctx->sctx;
-
switch (sctx->type) {
case BD2BTYPE:
if (b2bhash(buf, n, key.md) < 0)
@@ -474,6 +489,11 @@ bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
return -1;
}
+ /*
+ * If the block is already present in the cache
+ * just increment the reference count and write back
+ * the block descriptor associated for that block.
+ */
bd = RB_FIND(bdcache, &sctx->bdcache, &key);
if (bd != NULL) {
off_t bdoffs;
@@ -482,7 +502,6 @@ bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
if (lseek(sctx->fd, bdoffs, SEEK_SET) < 0)
return -1;
- /* Block already present, increment the reference count */
bd->refcnt++;
if (packbd(sctx->fd, bd) < 0) {
bd->refcnt--;
@@ -493,10 +512,12 @@ bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
return 0;
}
+ /* New blocks are always appended to the storage file */
offs = lseek(sctx->fd, 0, SEEK_END);
if (offs < 0)
return -1;
+ /* Allocate a new block descriptor */
bd = calloc(1, sizeof(*bd));
if (bd == NULL)
return -1;
@@ -506,25 +527,34 @@ bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
bd->refcnt = 1;
memcpy(bd->md, key.md, MDSIZE);
+ /* Write block descriptor to storage file */
if (packbd(sctx->fd, bd) < 0) {
free(bd);
return -1;
}
+ /* Append block payload to block descriptor */
if (xwrite(sctx->fd, buf, n) != n) {
+ /* Shouldn't fail but if it does rewind storage file state */
ftruncate(sctx->fd, offs);
free(bd);
return -1;
}
+ /*
+ * Update block entry header.
+ * The header will be written to the storage file
+ * when bsclose() or bssync() is called.
+ */
bhdr = &sctx->bhdr;
bhdr->nbd++;
+
RB_INSERT(bdcache, &sctx->bdcache, bd);
memcpy(md, bd->md, MDSIZE);
return bd->size;
}
-/* Read a block */
+/* Read a block from the storage file */
static int
bsget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n)
{
@@ -576,6 +606,7 @@ bsrm(struct bctx *bctx, unsigned char *md)
return -1;
}
+ /* This block is still referenced so just return */
if (bd->refcnt > 0)
return 0;
@@ -600,6 +631,14 @@ bsrm(struct bctx *bctx, unsigned char *md)
return 0;
}
+/*
+ * Re-punch all holes in the storage file.
+ * This is needed when the storage file is copied from
+ * one system to another and back. The target system
+ * may not support hole punching so the holes will be
+ * filled with literal zeroes, negating the space saving
+ * effects.
+ */
static int
bsgc(struct bctx *bctx)
{