dedup

deduplicating backup program
git clone git://git.2f30.org/dedup
Log | Files | Refs | README | LICENSE

bstorage.c (13644B)


      1 /*
      2  * Storage layer implementation using a single backing file.
      3  * The file format is as follows:
      4  *
      5  * [block header]
      6  * [block descriptor 0]
      7  * [data]
      8  * [block descriptor 1]
      9  * [data]
     10  * ...
     11  */
     12 #include <sys/types.h>
     13 #include <sys/stat.h>
     14 
     15 #include <assert.h>
     16 #include <errno.h>
     17 #include <fcntl.h>
     18 #include <stdint.h>
     19 #include <stdio.h>
     20 #include <stdlib.h>
     21 #include <string.h>
     22 #include <strings.h>
     23 #include <unistd.h>
     24 
     25 #include "block.h"
     26 #include "compat.h"
     27 #include "config.h"
     28 #include "misc.h"
     29 #include "queue.h"
     30 #include "tree.h"
     31 
     32 /* block header constants */
     33 #define BHDRMAGIC	"DEDUPDIDUPDIDUP"
     34 #define NBHDRMAGIC	16
     35 
     36 #define VMIN		0
     37 #define VMAJ		1
     38 #define VMINMASK	0xff
     39 #define VMAJSHIFT	8
     40 #define VMAJMASK	0xff
     41 
     42 #define BHDRSIZE	(NBHDRMAGIC + 8 + 8)
     43 
     44 /* block descriptor constants */
     45 #define BDTYPE		0x100
     46 #define BDSIZE		(8 + 8 + 8 + 8 + (MDSIZE))
     47 
     48 /* misc helpers */
     49 extern int pack(unsigned char *, char *, ...);
     50 extern int unpack(unsigned char *, char *, ...);
     51 
     52 static int bscreat(struct bctx *, char *, int);
     53 static int bsopen(struct bctx *, char *, int, int);
     54 static int bsput(struct bctx *, void *, size_t, unsigned char *);
     55 static int bsget(struct bctx *, unsigned char *, void *, size_t *);
     56 static int bsrm(struct bctx *, unsigned char *);
     57 static int bsgc(struct bctx *);
     58 static int bssync(struct bctx *);
     59 static int bsclose(struct bctx *);
     60 
     61 static struct bops bops = {
     62 	.creat = bscreat,
     63 	.open = bsopen,
     64 	.put = bsput,
     65 	.get = bsget,
     66 	.rm = bsrm,
     67 	.gc = bsgc,
     68 	.sync = bssync,
     69 	.close = bsclose,
     70 };
     71 
     72 /* Block header structure */
     73 struct bhdr {
     74 	char magic[NBHDRMAGIC]; /* magic number for file(1) */
     75 	uint64_t flags;		/* version number */
     76 	uint64_t nbd;		/* number of block descriptors */
     77 };
     78 
     79 /* Block descriptor */
     80 struct bd {
     81 	uint16_t type;			/* BDTYPE */
     82 	unsigned char reserved[6];	/* should be set to 0 when writing */
     83 	uint64_t offset;		/* block offset */
     84 	uint64_t size;			/* block size */
     85 	uint64_t refcnt;		/* reference count of block, 0 if block is removed */
     86 	unsigned char md[MDSIZE];	/* hash of block */
     87 	RB_ENTRY(bd) rbe;		/* bdcache link node */
     88 	SLIST_ENTRY(bd) sle;		/* gchead link node */
     89 };
     90 RB_HEAD(bdcache, bd);
     91 
     92 /* Storage layer context */
     93 struct sctx {
     94 	struct bdcache bdcache;		/* cache of block descriptors */
     95 	SLIST_HEAD(gchead, bd) gchead;	/* list of all blocks with a zero refcount */
     96 	int fd;				/* underlying storage file descriptor */
     97 	int rdonly;			/* when set to 1, the bssync() operation is a no-op */
     98 	struct bhdr bhdr;		/* block header */
     99 };
    100 
    101 static int
    102 bd_cmp(struct bd *b1, struct bd *b2)
    103 {
    104 	int r;
    105 
    106 	r = memcmp(b1->md, b2->md, MDSIZE);
    107 	if (r > 0)
    108 		return 1;
    109 	else if (r < 0)
    110 		return -1;
    111 	return 0;
    112 }
    113 static RB_PROTOTYPE(bdcache, bd, rbe, bd_cmp)
    114 static RB_GENERATE(bdcache, bd, rbe, bd_cmp)
    115 
    116 /* Unpack block header */
    117 static int
    118 unpackbhdr(unsigned char *buf, struct bhdr *bhdr)
    119 {
    120 	char fmt[BUFSIZ];
    121 	int n;
    122 
    123 	snprintf(fmt, sizeof(fmt), "'%dqq", NBHDRMAGIC);
    124 	n = unpack(buf, fmt,
    125 	           bhdr->magic,
    126 	           &bhdr->flags,
    127 	           &bhdr->nbd);
    128 
    129 	assert(n == BHDRSIZE);
    130 	return n;
    131 }
    132 
    133 /* Pack block header */
    134 static int
    135 packbhdr(unsigned char *buf, struct bhdr *bhdr)
    136 {
    137 	char fmt[BUFSIZ];
    138 	int n;
    139 
    140 	snprintf(fmt, sizeof(fmt), "'%dqq", NBHDRMAGIC);
    141 	n = pack(buf, fmt,
    142 	         bhdr->magic,
    143 	         bhdr->flags,
    144 	         bhdr->nbd);
    145 
    146 	assert(n == BHDRSIZE);
    147 	return n;
    148 }
    149 
    150 /* Unpack block descriptor */
    151 static int
    152 unpackbd(unsigned char *buf, struct bd *bd)
    153 {
    154 	char fmt[BUFSIZ];
    155 	int n;
    156 
    157 	snprintf(fmt, sizeof(fmt), "s'6qqq'%d", MDSIZE);
    158 	n = unpack(buf, fmt,
    159 	           &bd->type,
    160 	           bd->reserved,
    161 	           &bd->offset,
    162 	           &bd->size,
    163 	           &bd->refcnt,
    164 	           bd->md);
    165 
    166 	assert(n == BDSIZE);
    167 	return n;
    168 }
    169 
    170 /* Write block descriptor */
    171 static int
    172 packbd(unsigned char *buf, struct bd *bd)
    173 {
    174 	char fmt[BUFSIZ];
    175 	int n;
    176 
    177 	snprintf(fmt, sizeof(fmt), "s'6qqq'%d", MDSIZE);
    178 	n = pack(buf, fmt,
    179 	         bd->type,
    180 	         bd->reserved,
    181 	         bd->offset,
    182 	         bd->size,
    183 	         bd->refcnt,
    184 	         bd->md);
    185 
    186 	assert(n == BDSIZE);
    187 	return n;
    188 }
    189 
    190 /* Load block descriptor from file */
    191 static int
    192 loadbd(struct sctx *sctx)
    193 {
    194 	unsigned char bdbuf[BDSIZE];
    195 	struct bd *bd;
    196 
    197 	bd = calloc(1, sizeof(*bd));
    198 	if (bd == NULL) {
    199 		seterr("calloc: out of memory");
    200 		return -1;
    201 	}
    202 
    203 	if (xread(sctx->fd, bdbuf, BDSIZE) != BDSIZE) {
    204 		free(bd);
    205 		seterr("failed to read block descriptor: %s",
    206 		        strerror(errno));
    207 		return -1;
    208 	}
    209 	unpackbd(bdbuf, bd);
    210 
    211 	if (bd->type != BDTYPE) {
    212 		free(bd);
    213 		seterr("invalid block descriptor type: %d", bd->type);
    214 		return -1;
    215 	}
    216 
    217 	/* Move to the next block descriptor */
    218 	if (lseek(sctx->fd, bd->size, SEEK_CUR) < 0) {
    219 		free(bd);
    220 		seterr("lseek: %s", strerror(errno));
    221 		return -1;
    222 	}
    223 
    224 	/*
    225 	 * When refcount is 0 the block has been removed.
    226 	 * In that case, the block descriptor is still present
    227 	 * in the file as it is used to locate the next block
    228 	 * descriptor which could be live.
    229 	 *
    230 	 * The garbage collection list links together all block
    231 	 * descriptors that have a reference count of 0.
    232 	 * This is needed to implement the gc operation.
    233 	 */
    234 	if (bd->refcnt > 0)
    235 		RB_INSERT(bdcache, &sctx->bdcache, bd);
    236 	else
    237 		SLIST_INSERT_HEAD(&sctx->gchead, bd, sle);
    238 	return 0;
    239 }
    240 
    241 /* Initialize block descriptor cache */
    242 static int
    243 initbdcache(struct sctx *sctx)
    244 {
    245 	struct bhdr *bhdr;
    246 	uint64_t i;
    247 
    248 	bhdr = &sctx->bhdr;
    249 	for (i = 0; i < bhdr->nbd; i++) {
    250 		struct bd *bd, *tmp;
    251 
    252 		if (loadbd(sctx) == 0)
    253 			continue;
    254 
    255 		/* Free block descriptor cache */
    256 		RB_FOREACH_SAFE(bd, bdcache, &sctx->bdcache, tmp) {
    257 			RB_REMOVE(bdcache, &sctx->bdcache, bd);
    258 			free(bd);
    259 		}
    260 
    261 		/* Free garbage collector list */
    262 		while (!SLIST_EMPTY(&sctx->gchead)) {
    263 			bd = SLIST_FIRST(&sctx->gchead);
    264 			SLIST_REMOVE(&sctx->gchead, bd, bd, sle);
    265 			free(bd);
    266 		}
    267 		return -1;
    268 	}
    269 	return 0;
    270 }
    271 
    272 /* Create storage file */
    273 static int
    274 bscreat(struct bctx *bctx, char *path, int mode)
    275 {
    276 	unsigned char bhdrbuf[BHDRSIZE];
    277 	struct sctx *sctx;
    278 	struct bhdr *bhdr;
    279 	int fd;
    280 
    281 	fd = open(path, O_RDWR | O_CREAT | O_EXCL, mode);
    282 	if (fd < 0) {
    283 		seterr("open: %s", strerror(errno));
    284 		return -1;
    285 	}
    286 
    287 	bctx->sctx = calloc(1, sizeof(struct sctx));
    288 	if (bctx->sctx == NULL) {
    289 		close(fd);
    290 		seterr("calloc: out of memory");
    291 		return -1;
    292 	}
    293 
    294 	sctx = bctx->sctx;
    295 	RB_INIT(&sctx->bdcache);
    296 	SLIST_INIT(&sctx->gchead);
    297 	sctx->fd = fd;
    298 
    299 	bhdr = &sctx->bhdr;
    300 	memcpy(bhdr->magic, BHDRMAGIC, NBHDRMAGIC);
    301 	bhdr->flags = (VMAJ << VMAJSHIFT) | VMIN;
    302 	bhdr->nbd = 0;
    303 
    304 	packbhdr(bhdrbuf, bhdr);
    305 	if (xwrite(fd, bhdrbuf, BHDRSIZE) != BHDRSIZE) {
    306 		free(sctx);
    307 		close(fd);
    308 		seterr("failed to write block header: %s", strerror(errno));
    309 		return -1;
    310 	}
    311 	return 0;
    312 }
    313 
    314 /* Open storage file */
    315 static int
    316 bsopen(struct bctx *bctx, char *path, int flags, int mode)
    317 {
    318 	unsigned char bhdrbuf[BHDRSIZE];
    319 	struct sctx *sctx;
    320 	struct bhdr *bhdr;
    321 	int fd;
    322 
    323 	switch (flags) {
    324 	case B_READ:
    325 		flags = O_RDONLY;
    326 		break;
    327 	case B_RDWR:
    328 		flags = O_RDWR;
    329 		break;
    330 	default:
    331 		seterr("invalid params");
    332 		return -1;
    333 	}
    334 
    335 	fd = open(path, flags, mode);
    336 	if (fd < 0) {
    337 		seterr("open: %s", strerror(errno));
    338 		return -1;
    339 	}
    340 
    341 	bctx->sctx = calloc(1, sizeof(struct sctx));
    342 	if (bctx->sctx == NULL) {
    343 		close(fd);
    344 		seterr("calloc: out of memory");
    345 		return -1;
    346 	}
    347 
    348 	sctx = bctx->sctx;
    349 	RB_INIT(&sctx->bdcache);
    350 	SLIST_INIT(&sctx->gchead);
    351 	bhdr = &sctx->bhdr;
    352 
    353 	if (xread(fd, bhdrbuf, BHDRSIZE) != BHDRSIZE) {
    354 		free(sctx);
    355 		close(fd);
    356 		seterr("failed to read block header: %s", strerror(errno));
    357 		return -1;
    358 	}
    359 	unpackbhdr(bhdrbuf, bhdr);
    360 
    361 	if (memcmp(bhdr->magic, BHDRMAGIC, NBHDRMAGIC) != 0) {
    362 		free(sctx);
    363 		close(fd);
    364 		seterr("unknown block header magic");
    365 		return -1;
    366 	}
    367 
    368 	/* If the major version is different, the format is incompatible */
    369 	if (((bhdr->flags >> VMAJSHIFT) & VMAJMASK) != VMAJ) {
    370 		free(sctx);
    371 		close(fd);
    372 		seterr("block header version mismatch");
    373 		return -1;
    374 	}
    375 
    376 	sctx->fd = fd;
    377 	sctx->rdonly = flags == O_RDONLY;
    378 
    379 	if (initbdcache(sctx) < 0) {
    380 		free(sctx);
    381 		close(fd);
    382 		return -1;
    383 	}
    384 	return 0;
    385 }
    386 
    387 /* Write a block to the storage file */
    388 static int
    389 bsput(struct bctx *bctx, void *buf, size_t n, unsigned char *md)
    390 {
    391 	unsigned char bdbuf[BDSIZE];
    392 	struct sctx *sctx;
    393 	struct bhdr *bhdr;
    394 	struct bd key, *bd;
    395 	off_t offs;
    396 
    397 	/*
    398 	 * If the block is already present in the cache
    399 	 * just increment the reference count and write back
    400 	 * the block descriptor associated with that block.
    401 	 */
    402 	sctx = bctx->sctx;
    403 	memcpy(key.md, md, MDSIZE);
    404 	bd = RB_FIND(bdcache, &sctx->bdcache, &key);
    405 	if (bd != NULL) {
    406 		off_t bdoffs;
    407 
    408 		bdoffs = bd->offset - BDSIZE;
    409 		if (lseek(sctx->fd, bdoffs, SEEK_SET) < 0) {
    410 			seterr("lseek: %s", strerror(errno));
    411 			return -1;
    412 		}
    413 
    414 		bd->refcnt++;
    415 		packbd(bdbuf, bd);
    416 		if (xwrite(sctx->fd, bdbuf, BDSIZE) != BDSIZE) {
    417 			bd->refcnt--;
    418 			seterr("failed to write block descriptor: %s",
    419 			        strerror(errno));
    420 			return -1;
    421 		}
    422 
    423 		memcpy(md, bd->md, MDSIZE);
    424 		return 0;
    425 	}
    426 
    427 	/* New blocks are appended at the end of storage file */
    428 	offs = lseek(sctx->fd, 0, SEEK_END);
    429 	if (offs < 0) {
    430 		seterr("lseek: %s", strerror(errno));
    431 		return -1;
    432 	}
    433 
    434 	bd = calloc(1, sizeof(*bd));
    435 	if (bd == NULL) {
    436 		seterr("calloc: out of memory");
    437 		return -1;
    438 	}
    439 	bd->type = BDTYPE;
    440 	bd->offset = offs + BDSIZE;
    441 	bd->size = n;
    442 	bd->refcnt = 1;
    443 	memcpy(bd->md, key.md, MDSIZE);
    444 
    445 	packbd(bdbuf, bd);
    446 	if (xwrite(sctx->fd, bdbuf, BDSIZE) != BDSIZE) {
    447 		/* Shouldn't fail but if it does rewind storage file state */
    448 		ftruncate(sctx->fd, offs);
    449 		free(bd);
    450 		seterr("failed to write block descriptor: %s",
    451 		        strerror(errno));
    452 		return -1;
    453 	}
    454 
    455 	if (xwrite(sctx->fd, buf, n) != n) {
    456 		/* Shouldn't fail but if it does rewind storage file state */
    457 		ftruncate(sctx->fd, offs);
    458 		free(bd);
    459 		seterr("failed to write block: %s", strerror(errno));
    460 		return -1;
    461 	}
    462 
    463 	/*
    464 	 * Update block entry header.
    465 	 * The header will be written to the storage file
    466 	 * when bsclose() or bssync() is called.
    467 	 */
    468 	bhdr = &sctx->bhdr;
    469 	bhdr->nbd++;
    470 
    471 	RB_INSERT(bdcache, &sctx->bdcache, bd);
    472 	memcpy(md, bd->md, MDSIZE);
    473 	return bd->size;
    474 }
    475 
    476 /* Read a block from the storage file */
    477 static int
    478 bsget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n)
    479 {
    480 	struct sctx *sctx;
    481 	struct bd key, *bd;
    482 
    483 	sctx = bctx->sctx;
    484 	memcpy(key.md, md, MDSIZE);
    485 	bd = RB_FIND(bdcache, &sctx->bdcache, &key);
    486 	if (bd == NULL) {
    487 		seterr("block not found");
    488 		return -1;
    489 	}
    490 
    491 	if (*n < bd->size) {
    492 		seterr("buffer too small");
    493 		return -1;
    494 	}
    495 
    496 	if (lseek(sctx->fd, bd->offset, SEEK_SET) < 0) {
    497 		seterr("lseek: %s", strerror(errno));
    498 		return -1;
    499 	}
    500 	if (xread(sctx->fd, buf, bd->size) != bd->size) {
    501 		seterr("failed to read block: %s", strerror(errno));
    502 		return -1;
    503 	}
    504 	*n = bd->size;
    505 	return 0;
    506 }
    507 
    508 /* Remove a block with the given hash */
    509 static int
    510 bsrm(struct bctx *bctx, unsigned char *md)
    511 {
    512 	unsigned char bdbuf[BDSIZE];
    513 	struct sctx *sctx;
    514 	struct bd key, *bd;
    515 	off_t bdoffs;
    516 
    517 	sctx = bctx->sctx;
    518 	memcpy(key.md, md, MDSIZE);
    519 	bd = RB_FIND(bdcache, &sctx->bdcache, &key);
    520 	if (bd == NULL) {
    521 		seterr("block not found");
    522 		return -1;
    523 	}
    524 
    525 	bdoffs = bd->offset - BDSIZE;
    526 	if (lseek(sctx->fd, bdoffs, SEEK_SET) < 0) {
    527 		seterr("lseek: %s", strerror(errno));
    528 		return -1;
    529 	}
    530 
    531 	bd->refcnt--;
    532 	packbd(bdbuf, bd);
    533 	if (xwrite(sctx->fd, bdbuf, BDSIZE) != BDSIZE) {
    534 		bd->refcnt++;
    535 		seterr("failed to write block descriptor: %s",
    536 		        strerror(errno));
    537 		return -1;
    538 	}
    539 
    540 	/* This block is still referenced so just return */
    541 	if (bd->refcnt > 0)
    542 		return 0;
    543 
    544 	if (punchhole(sctx->fd, bd->offset, bd->size) < 0) {
    545 		/*
    546 		 * Filesystem does not support hole punching.
    547 		 * Restore reference count.
    548 		 */
    549 		lseek(sctx->fd, bdoffs, SEEK_SET);
    550 		bd->refcnt++;
    551 		packbd(bdbuf, bd);
    552 		xwrite(sctx->fd, bdbuf, BDSIZE);
    553 		seterr("operation not supported");
    554 		return -1;
    555 	}
    556 
    557 	/*
    558 	 * Remove block from block descriptor cache as this is no
    559 	 * longer a valid block.  Insert it into the garbage collector
    560 	 * list instead.
    561 	 */
    562 	RB_REMOVE(bdcache, &sctx->bdcache, bd);
    563 	SLIST_INSERT_HEAD(&sctx->gchead, bd, sle);
    564 	return 0;
    565 }
    566 
    567 /*
    568  * Re-punch all holes in the storage file.
    569  * This is needed when the storage file is copied from
    570  * one system to another and back.  The target system
    571  * may not support hole punching so the holes will be
    572  * filled with literal zeroes, negating the space saving
    573  * effects.
    574  */
    575 static int
    576 bsgc(struct bctx *bctx)
    577 {
    578 	struct sctx *sctx;
    579 	struct bd *bd;
    580 
    581 	sctx = bctx->sctx;
    582 	SLIST_FOREACH(bd, &sctx->gchead, sle) {
    583 		assert(bd->refcnt == 0);
    584 		punchhole(sctx->fd, bd->offset, bd->size);
    585 	}
    586 	return 0;
    587 }
    588 
    589 /* Sync block header to storage file */
    590 static int
    591 bssync(struct bctx *bctx)
    592 {
    593 	unsigned char bhdrbuf[BHDRSIZE];
    594 	struct sctx *sctx;
    595 	struct bhdr *bhdr;
    596 
    597 	sctx = bctx->sctx;
    598 	if (sctx->rdonly)
    599 		return 0;
    600 
    601 	if (lseek(sctx->fd, 0, SEEK_SET) < 0) {
    602 		seterr("lseek: %s", strerror(errno));
    603 		return -1;
    604 	}
    605 
    606 	bhdr = &sctx->bhdr;
    607 	packbhdr(bhdrbuf, bhdr);
    608 	if (xwrite(sctx->fd, bhdrbuf, BHDRSIZE) != BHDRSIZE) {
    609 		seterr("failed to write block header: %s", strerror(errno));
    610 		return -1;
    611 	}
    612 	fsync(sctx->fd);
    613 	return 0;
    614 }
    615 
    616 /* Close storage handle */
    617 static int
    618 bsclose(struct bctx *bctx)
    619 {
    620 	struct sctx *sctx;
    621 	struct bd *bd, *tmp;
    622 	int r;
    623 
    624 	/* Free block descriptor cache */
    625 	sctx = bctx->sctx;
    626 	RB_FOREACH_SAFE(bd, bdcache, &sctx->bdcache, tmp) {
    627 		RB_REMOVE(bdcache, &sctx->bdcache, bd);
    628 		free(bd);
    629 	}
    630 
    631 	/* Free garbage collector list */
    632 	while (!SLIST_EMPTY(&sctx->gchead)) {
    633 		bd = SLIST_FIRST(&sctx->gchead);
    634 		SLIST_REMOVE(&sctx->gchead, bd, bd, sle);
    635 		free(bd);
    636 	}
    637 
    638 	r = close(sctx->fd);
    639 	free(sctx);
    640 	if (r < 0)
    641 		seterr("close: %s", strerror(errno));
    642 	return r;
    643 }
    644 
    645 struct bops *
    646 bstorageops(void)
    647 {
    648 	return &bops;
    649 }