dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

dpack.c (4115B)


      1 #include <sys/types.h>
      2 #include <sys/stat.h>
      3 #include <sys/file.h>
      4 
      5 #include <err.h>
      6 #include <fcntl.h>
      7 #include <stdio.h>
      8 #include <stdint.h>
      9 #include <stdlib.h>
     10 #include <string.h>
     11 #include <unistd.h>
     12 
     13 #include "arg.h"
     14 #include "blake2.h"
     15 #include "dedup.h"
     16 
     17 static struct snap_hdr snap_hdr;
     18 static struct blk_hdr blk_hdr;
     19 static struct icache *icache;
     20 static int ifd;
     21 static int sfd;
     22 static int hash_algo = HASH_BLAKE2B;
     23 static int compr_algo = COMPR_LZ4;
     24 
     25 int verbose;
     26 char *argv0;
     27 
     28 static void
     29 dedup_chunk(struct snap *snap, uint8_t *chunkp, size_t chunk_size)
     30 {
     31 	uint8_t md[MD_SIZE];
     32 	struct blk_desc blk_desc;
     33 	struct compr_ctx ctx;
     34 	uint8_t *compr_buf;
     35 	size_t n, csize;
     36 
     37 	if (compr_init(&ctx, compr_algo) < 0)
     38 		errx(1, "compr_init failed");
     39 	csize = compr_size(&ctx, BLKSIZE_MAX);
     40 	compr_buf = alloc_buf(csize);
     41 
     42 	n = compr(&ctx, chunkp, compr_buf, chunk_size, csize);
     43 	hash_blk(compr_buf, n, md, hash_algo);
     44 
     45 	snap_hdr.st.orig_size += chunk_size;
     46 	snap_hdr.st.compr_size += n;
     47 
     48 	memcpy(blk_desc.md, md, sizeof(blk_desc.md));
     49 	if (lookup_icache(icache, &blk_desc) < 0) {
     50 		blk_desc.offset = blk_hdr.size;
     51 		blk_desc.size = n;
     52 
     53 		snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
     54 		append_blk(sfd, &blk_hdr, compr_buf, &blk_desc);
     55 
     56 		insert_icache(icache, &blk_desc);
     57 
     58 		snap_hdr.st.dedup_size += blk_desc.size;
     59 		snap_hdr.st.nr_blks++;
     60 
     61 		if (blk_desc.size > snap_hdr.st.max_blk_size)
     62 			snap_hdr.st.max_blk_size = blk_desc.size;
     63 		if (blk_desc.size < snap_hdr.st.min_blk_size)
     64 			snap_hdr.st.min_blk_size = blk_desc.size;
     65 	} else {
     66 		snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
     67 	}
     68 
     69 	free(compr_buf);
     70 	compr_final(&ctx);
     71 }
     72 
     73 static void
     74 dedup(int fd, char *msg)
     75 {
     76 	struct snap *snap;
     77 	struct chunker *chunker;
     78 
     79 	snap = alloc_snap();
     80 	chunker = alloc_chunker(fd, BLKSIZE_MIN, BLKSIZE_MAX,
     81 	                        HASHMASK_BITS, WINSIZE);
     82 
     83 	while (fill_chunker(chunker) > 0) {
     84 		uint8_t *chunkp;
     85 		size_t chunk_size;
     86 
     87 		chunkp = get_chunk(chunker, &chunk_size);
     88 		snap = grow_snap(snap, snap->nr_blk_descs + 1);
     89 		dedup_chunk(snap, chunkp, chunk_size);
     90 		drain_chunker(chunker);
     91 	}
     92 
     93 	if (snap->nr_blk_descs > 0) {
     94 		if (msg != NULL) {
     95 			size_t size;
     96 
     97 			size = strlen(msg) + 1;
     98 			if (size > sizeof(snap->msg))
     99 				size = sizeof(snap->msg);
    100 			memcpy(snap->msg, msg, size);
    101 			snap->msg[size - 1] = '\0';
    102 		}
    103 		hash_snap(snap, snap->md, hash_algo);
    104 		append_snap(ifd, &snap_hdr, snap);
    105 	}
    106 
    107 	free_chunker(chunker);
    108 	free_snap(snap);
    109 }
    110 
    111 static int
    112 build_icache(struct snap *snap, void *arg)
    113 {
    114 	struct compr_ctx ctx;
    115 	uint8_t *buf;
    116 	uint64_t i;
    117 
    118 	if (compr_init(&ctx, compr_algo) < 0)
    119 		errx(1, "compr_init failed");
    120 	buf = alloc_buf(compr_size(&ctx, BLKSIZE_MAX));
    121 	for (i = 0; i < snap->nr_blk_descs; i++) {
    122 		struct blk_desc *blk_desc;
    123 
    124 		blk_desc = &snap->blk_desc[i];
    125 		insert_icache(icache, blk_desc);
    126 	}
    127 	free(buf);
    128 	compr_final(&ctx);
    129 	return WALK_CONTINUE;
    130 }
    131 
    132 static void
    133 init(void)
    134 {
    135 	ifd = open(SNAPSF, O_RDWR, 0600);
    136 	if (ifd < 0)
    137 		err(1, "open %s", SNAPSF);
    138 
    139 	sfd = open(STOREF, O_RDWR, 0600);
    140 	if (sfd < 0)
    141 		err(1, "open %s", STOREF);
    142 
    143 	if (flock(ifd, LOCK_NB | LOCK_EX) < 0 ||
    144 	    flock(sfd, LOCK_NB | LOCK_EX) < 0)
    145 		err(1, "flock");
    146 
    147 
    148 	xlseek(ifd, 0, SEEK_SET);
    149 	load_snap_hdr(ifd, &snap_hdr);
    150 	xlseek(sfd, 0, SEEK_SET);
    151 	load_blk_hdr(sfd, &blk_hdr, &compr_algo, &hash_algo);
    152 
    153 	icache = alloc_icache();
    154 	walk_snap(ifd, &snap_hdr, build_icache, NULL);
    155 }
    156 
    157 static void
    158 term(void)
    159 {
    160 	xlseek(ifd, 0, SEEK_SET);
    161 	write_snap_hdr(ifd, &snap_hdr);
    162 	xlseek(sfd, 0, SEEK_SET);
    163 	write_blk_hdr(sfd, &blk_hdr);
    164 
    165 	fsync(ifd);
    166 	fsync(sfd);
    167 
    168 	close(ifd);
    169 	close(sfd);
    170 
    171 	free_icache(icache);
    172 }
    173 
    174 static void
    175 usage(void)
    176 {
    177 	fprintf(stderr, "usage: %s [-v] [-m message] [repo]\n", argv0);
    178 	exit(1);
    179 }
    180 
    181 int
    182 main(int argc, char *argv[])
    183 {
    184 	char *repo, *msg = NULL;
    185 
    186 	ARGBEGIN {
    187 	case 'm':
    188 		msg = EARGF(usage());
    189 		break;
    190 	case 'v':
    191 		verbose++;
    192 		break;
    193 	default:
    194 		usage();
    195 	} ARGEND
    196 
    197 	switch (argc) {
    198 	case 0:
    199 		repo = ".";
    200 		break;
    201 	case 1:
    202 		repo = argv[0];
    203 		break;
    204 	default:
    205 		usage();
    206 	};
    207 
    208 	if (chdir(repo) < 0)
    209 		err(1, "chdir: %s", repo);
    210 
    211 	init();
    212 	dedup(STDIN_FILENO, msg);
    213 	term();
    214 	return 0;
    215 }