commit 53741db3458c3a8fc633b7a5336d08062fa5e8a6
parent e1c69d4298f03af158a5a5637dad600cdd41f3dc
Author: sin <sin@2f30.org>
Date: Wed, 20 Feb 2019 14:06:23 +0000
Rename stuff
Diffstat:
M | dedup.c | | | 327 | +++++++++++++++++++++++++++++++++++++++---------------------------------------- |
1 file changed, 161 insertions(+), 166 deletions(-)
diff --git a/dedup.c b/dedup.c
@@ -14,7 +14,7 @@
#include "arg.h"
#include "tree.h"
-#define INDEXF ".index"
+#define SNAPSF ".snapshots"
#define STOREF ".store"
#define CACHEF ".cache"
@@ -41,41 +41,37 @@ struct stats {
uint64_t dedup_size;
uint64_t min_blk_size;
uint64_t max_blk_size;
- uint64_t nblks;
+ uint64_t nr_blks;
uint64_t cache_hits;
uint64_t cache_misses;
uint64_t reserved[4];
};
-/* index file header */
-struct enthdr {
+struct snapshot_hdr {
uint64_t flags;
- uint64_t nents;
+ uint64_t nr_snapshots;
uint64_t store_size;
uint64_t reserved[4];
struct stats st;
};
-/* block descriptor */
-struct bdescr {
+struct blk_desc {
uint8_t md[MDSIZE];
uint64_t offset;
uint64_t size;
};
-/* index file entry */
-struct ent {
+struct snapshot {
uint64_t size;
uint8_t msg[MSGSIZE];
uint8_t md[MDSIZE]; /* hash of file */
- uint64_t nblks;
- struct bdescr bdescr[];
+ uint64_t nr_blk_descs;
+ struct blk_desc blk_desc[];
};
-/* cache entry */
-struct cent {
- struct bdescr bdescr;
- RB_ENTRY(cent) e;
+struct cache_entry {
+ struct blk_desc blk_desc;
+ RB_ENTRY(cache_entry) e;
};
struct extract_args {
@@ -83,8 +79,8 @@ struct extract_args {
int fd;
};
-RB_HEAD(cache, cent) cache_head;
-struct enthdr enthdr;
+RB_HEAD(cache, cache_entry) cache_head;
+struct snapshot_hdr snaphdr;
int ifd;
int sfd;
int cfd;
@@ -177,7 +173,8 @@ chunk_blk(uint8_t *buf, size_t size)
fp = buzh_init(buf, WINSIZE);
for (i = 0; i < size - WINSIZE; i++) {
if (i > 0)
- fp = buzh_update(fp, buf[i - 1], buf[WINSIZE + i - 1], WINSIZE);
+ fp = buzh_update(fp, buf[i - 1], buf[WINSIZE + i - 1],
+ WINSIZE);
if ((fp & HASHMSK) == 0)
return i + WINSIZE;
}
@@ -224,7 +221,7 @@ print_md(FILE *fp, uint8_t *md, size_t size)
void
print_stats(struct stats *st)
{
- if (st->nblks == 0)
+ if (st->nr_blks == 0)
return;
fprintf(stderr, "original size: %llu bytes\n",
@@ -235,10 +232,10 @@ print_stats(struct stats *st)
(unsigned long long)st->dedup_size);
fprintf(stderr, "min/avg/max block size: %llu/%llu/%llu\n",
(unsigned long long)st->min_blk_size,
- (unsigned long long)st->dedup_size / st->nblks,
+ (unsigned long long)st->dedup_size / st->nr_blks,
(unsigned long long)st->max_blk_size);
fprintf(stderr, "number of blocks: %llu\n",
- (unsigned long long)st->nblks);
+ (unsigned long long)st->nr_blks);
fprintf(stderr, "total cache hits: %llu\n",
(unsigned long long)st->cache_hits);
fprintf(stderr, "total cache misses: %llu\n",
@@ -306,24 +303,24 @@ xwrite(int fd, const void *buf, size_t nbytes)
}
int
-cent_cmp(struct cent *e1, struct cent *e2)
+cache_entry_cmp(struct cache_entry *e1, struct cache_entry *e2)
{
int r;
- r = memcmp(e1->bdescr.md, e2->bdescr.md, sizeof(e1->bdescr.md));
+ r = memcmp(e1->blk_desc.md, e2->blk_desc.md, sizeof(e1->blk_desc.md));
if (r > 0)
return 1;
else if (r < 0)
return -1;
return 0;
}
-RB_PROTOTYPE(cache, cent, e, cent_cmp);
-RB_GENERATE(cache, cent, e, cent_cmp);
+RB_PROTOTYPE(cache, cache_entry, e, cache_entry_cmp);
+RB_GENERATE(cache, cache_entry, e, cache_entry_cmp);
-struct cent *
-alloc_cent(void)
+struct cache_entry *
+alloc_cache_entry(void)
{
- struct cent *ent;
+ struct cache_entry *ent;
ent = calloc(1, sizeof(*ent));
if (ent == NULL)
@@ -332,82 +329,82 @@ alloc_cent(void)
}
void
-add_cent(struct cent *cent)
+add_cache_entry(struct cache_entry *ent)
{
- RB_INSERT(cache, &cache_head, cent);
+ RB_INSERT(cache, &cache_head, ent);
}
void
flush_cache(void)
{
- struct cent *cent;
+ struct cache_entry *ent;
if (!cache_dirty)
return;
xlseek(cfd, 0, SEEK_SET);
- RB_FOREACH(cent, cache, &cache_head)
- xwrite(cfd, ¢->bdescr, sizeof(cent->bdescr));
+ RB_FOREACH(ent, cache, &cache_head)
+ xwrite(cfd, &ent->blk_desc, sizeof(ent->blk_desc));
}
void
free_cache(void)
{
- struct cent *cent, *tmp;
+ struct cache_entry *ent, *tmp;
- RB_FOREACH_SAFE(cent, cache, &cache_head, tmp) {
- RB_REMOVE(cache, &cache_head, cent);
- free(cent);
+ RB_FOREACH_SAFE(ent, cache, &cache_head, tmp) {
+ RB_REMOVE(cache, &cache_head, ent);
+ free(ent);
}
}
uint64_t
-cache_nents(void)
+cache_nr_entries(void)
{
struct stat sb;
if (fstat(cfd, &sb) < 0)
err(1, "fstat");
- return sb.st_size / sizeof(struct bdescr);
+ return sb.st_size / sizeof(struct blk_desc);
}
void
-append_ent(struct ent *ent)
+append_snap(struct snapshot *snap)
{
- /* Update index header */
- enthdr.nents++;
+ /* Update snapshot header */
+ snaphdr.nr_snapshots++;
xlseek(ifd, 0, SEEK_SET);
- xwrite(ifd, &enthdr, sizeof(enthdr));
+ xwrite(ifd, &snaphdr, sizeof(snaphdr));
- /* Append entry */
+ /* Append snapshot */
xlseek(ifd, 0, SEEK_END);
- ent->size = sizeof(*ent);
- ent->size += ent->nblks * sizeof(ent->bdescr[0]);
- xwrite(ifd, ent, ent->size);
+ snap->size = sizeof(*snap);
+ snap->size += snap->nr_blk_descs * sizeof(snap->blk_desc[0]);
+ xwrite(ifd, snap, snap->size);
}
-struct ent *
-alloc_ent(void)
+struct snapshot *
+alloc_snap(void)
{
- struct ent *ent;
+ struct snapshot *snap;
- ent = calloc(1, sizeof(*ent));
- if (ent == NULL)
+ snap = calloc(1, sizeof(*snap));
+ if (snap == NULL)
err(1, "calloc");
- return ent;
+ return snap;
}
-struct ent *
-grow_ent(struct ent *ent, uint64_t nblks)
+struct snapshot *
+grow_snap(struct snapshot *snap, uint64_t nr_blk_descs)
{
size_t size;
- size = sizeof(*ent);
- size += nblks * sizeof(ent->bdescr[0]);
- ent = realloc(ent, size);
- if (ent == NULL)
+ size = sizeof(*snap);
+ size += nr_blk_descs * sizeof(snap->blk_desc[0]);
+ snap = realloc(snap, size);
+ if (snap == NULL)
err(1, "realloc");
- return ent;
+ return snap;
}
uint8_t *
@@ -432,30 +429,30 @@ hash_blk(uint8_t *buf, size_t size, uint8_t *md)
}
void
-read_blk(uint8_t *buf, struct bdescr *bdescr)
+read_blk(uint8_t *buf, struct blk_desc *blk_desc)
{
- xlseek(sfd, bdescr->offset, SEEK_SET);
- if (xread(sfd, buf, bdescr->size) == 0)
+ xlseek(sfd, blk_desc->offset, SEEK_SET);
+ if (xread(sfd, buf, blk_desc->size) == 0)
errx(1, "read: unexpected EOF");
}
void
-append_blk(uint8_t *buf, struct bdescr *bdescr)
+append_blk(uint8_t *buf, struct blk_desc *blk_desc)
{
- xlseek(sfd, enthdr.store_size, SEEK_SET);
- xwrite(sfd, buf, bdescr->size);
- enthdr.store_size += bdescr->size;
+ xlseek(sfd, snaphdr.store_size, SEEK_SET);
+ xwrite(sfd, buf, blk_desc->size);
+ snaphdr.store_size += blk_desc->size;
}
int
-lookup_bdescr(uint8_t *md, struct bdescr *bdescr)
+lookup_blk_desc(uint8_t *md, struct blk_desc *blk_desc)
{
- struct cent *ent, key;
+ struct cache_entry *ent, key;
- memcpy(key.bdescr.md, md, sizeof(key.bdescr.md));
+ memcpy(key.blk_desc.md, md, sizeof(key.blk_desc.md));
ent = RB_FIND(cache, &cache_head, &key);
if (ent != NULL) {
- *bdescr = ent->bdescr;
+ *blk_desc = ent->blk_desc;
return 0;
}
return -1;
@@ -465,115 +462,113 @@ void
dedup(int fd, char *msg)
{
uint8_t *buf[2];
- struct ent *ent;
+ struct snapshot *snap;
SHA256_CTX ctx;
ssize_t n, bufsize;
buf[0] = alloc_buf(BLKSIZE);
buf[1] = alloc_buf(comp_size(BLKSIZE));
- ent = alloc_ent();
+ snap = alloc_snap();
bufsize = 0;
SHA256_Init(&ctx);
- while ((n = xread(fd, buf[0] + bufsize, BLKSIZE - bufsize)) > 0 || bufsize > 0) {
+ while ((n = xread(fd, buf[0] + bufsize, BLKSIZE - bufsize)) > 0 ||
+ bufsize > 0) {
+
uint8_t md[MDSIZE];
- struct bdescr bdescr;
+ struct blk_desc blk_desc;
size_t blksize, csize;
uint8_t *inp = buf[0]; /* input buf */
uint8_t *outp = buf[1]; /* compressed buf */
if (n > 0) {
bufsize += n;
- enthdr.st.orig_size += n;
+ snaphdr.st.orig_size += n;
}
blksize = chunk_blk(inp, bufsize);
csize = comp(inp, outp, blksize, comp_size(BLKSIZE));
- enthdr.st.comp_size += csize;
+ snaphdr.st.comp_size += csize;
hash_blk(outp, csize, md);
/* Calculate file hash one block at a time */
SHA256_Update(&ctx, inp, blksize);
- ent = grow_ent(ent, ent->nblks + 1);
+ snap = grow_snap(snap, snap->nr_blk_descs + 1);
- if (lookup_bdescr(md, &bdescr) < 0) {
- struct cent *cent;
+ if (lookup_blk_desc(md, &blk_desc) < 0) {
+ struct cache_entry *ent;
- memcpy(bdescr.md, md, sizeof(bdescr.md));
- bdescr.offset = enthdr.store_size;
- bdescr.size = csize;
+ memcpy(blk_desc.md, md, sizeof(blk_desc.md));
+ blk_desc.offset = snaphdr.store_size;
+ blk_desc.size = csize;
- /* Update index entry */
- ent->bdescr[ent->nblks++] = bdescr;
+ snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
- /* Store block */
- append_blk(outp, &bdescr);
+ append_blk(outp, &blk_desc);
- /* Create a cache entry for this block */
- cent = alloc_cent();
- cent->bdescr = bdescr;
- add_cent(cent);
+ ent = alloc_cache_entry();
+ ent->blk_desc = blk_desc;
+ add_cache_entry(ent);
cache_dirty = 1;
- enthdr.st.dedup_size += bdescr.size;
- enthdr.st.nblks++;
- enthdr.st.cache_misses++;
+ snaphdr.st.dedup_size += blk_desc.size;
+ snaphdr.st.nr_blks++;
+ snaphdr.st.cache_misses++;
- if (bdescr.size > enthdr.st.max_blk_size)
- enthdr.st.max_blk_size = bdescr.size;
- if (bdescr.size < enthdr.st.min_blk_size)
- enthdr.st.min_blk_size = bdescr.size;
+ if (blk_desc.size > snaphdr.st.max_blk_size)
+ snaphdr.st.max_blk_size = blk_desc.size;
+ if (blk_desc.size < snaphdr.st.min_blk_size)
+ snaphdr.st.min_blk_size = blk_desc.size;
} else {
- ent->bdescr[ent->nblks++] = bdescr;
- enthdr.st.cache_hits++;
+ snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
+ snaphdr.st.cache_hits++;
}
memmove(inp, inp + blksize, bufsize - blksize);
bufsize -= blksize;
}
- if (ent->nblks > 0) {
- /* Calculate hash and add this entry to the index */
- SHA256_Final(ent->md, &ctx);
+ if (snap->nr_blk_descs > 0) {
+ SHA256_Final(snap->md, &ctx);
if (msg != NULL) {
size_t size;
size = strlen(msg) + 1;
- if (size > sizeof(ent->msg))
- size = sizeof(ent->msg);
- memcpy(ent->msg, msg, size);
- ent->msg[size - 1] = '\0';
+ if (size > sizeof(snap->msg))
+ size = sizeof(snap->msg);
+ memcpy(snap->msg, msg, size);
+ snap->msg[size - 1] = '\0';
}
- append_ent(ent);
+ append_snap(snap);
}
- free(ent);
+ free(snap);
free(buf[1]);
free(buf[0]);
}
int
-extract(struct ent *ent, void *arg)
+extract(struct snapshot *snap, void *arg)
{
uint8_t *buf[2];
struct extract_args *args = arg;
uint64_t i;
- if (memcmp(ent->md, args->md, sizeof(ent->md)) != 0)
+ if (memcmp(snap->md, args->md, sizeof(snap->md)) != 0)
return WALK_CONTINUE;
buf[0] = alloc_buf(BLKSIZE);
buf[1] = alloc_buf(comp_size(BLKSIZE));
- for (i = 0; i < ent->nblks; i++) {
+ for (i = 0; i < snap->nr_blk_descs; i++) {
size_t blksize;
- read_blk(buf[1], &ent->bdescr[i]);
- blksize = decomp(buf[1], buf[0], ent->bdescr[i].size, BLKSIZE);
+ read_blk(buf[1], &snap->blk_desc[i]);
+ blksize = decomp(buf[1], buf[0], snap->blk_desc[i].size, BLKSIZE);
xwrite(args->fd, buf[0], blksize);
}
free(buf[1]);
@@ -582,7 +577,7 @@ extract(struct ent *ent, void *arg)
}
int
-check(struct ent *ent, void *arg)
+check(struct snapshot *snap, void *arg)
{
uint8_t md[MDSIZE];
uint8_t *buf;
@@ -592,48 +587,48 @@ check(struct ent *ent, void *arg)
buf = alloc_buf(comp_size(BLKSIZE));
/*
* Calculate hash for each block and compare
- * with index entry block descriptor
+ * against snapshot entry block descriptor
*/
- for (i = 0; i < ent->nblks; i++) {
- read_blk(buf, &ent->bdescr[i]);
+ for (i = 0; i < snap->nr_blk_descs; i++) {
+ read_blk(buf, &snap->blk_desc[i]);
SHA256_Init(&ctx);
- SHA256_Update(&ctx, buf, ent->bdescr[i].size);
+ SHA256_Update(&ctx, buf, snap->blk_desc[i].size);
SHA256_Final(md, &ctx);
- if (memcmp(ent->bdescr[i].md, md,
- sizeof(ent->bdescr[i]).md) == 0)
+ if (memcmp(snap->blk_desc[i].md, md,
+ sizeof(snap->blk_desc[i]).md) == 0)
continue;
fprintf(stderr, "Block hash mismatch\n");
fprintf(stderr, " Expected hash: ");
- print_md(stderr, ent->md, sizeof(ent->md));
+ print_md(stderr, snap->md, sizeof(snap->md));
fputc('\n', stderr);
fprintf(stderr, " Actual hash: ");
print_md(stderr, md, sizeof(md));
fputc('\n', stderr);
fprintf(stderr, " Offset: %llu\n",
- (unsigned long long)ent->bdescr[i].offset);
+ (unsigned long long)snap->blk_desc[i].offset);
fprintf(stderr, " Size: %llu\n",
- (unsigned long long)ent->bdescr[i].size);
+ (unsigned long long)snap->blk_desc[i].size);
}
free(buf);
return WALK_CONTINUE;
}
int
-list(struct ent *ent, void *arg)
+list(struct snapshot *snap, void *arg)
{
- print_md(stdout, ent->md, sizeof(ent->md));
- if (ent->msg[0] != '\0')
- printf("\t%s\n", ent->msg);
+ print_md(stdout, snap->md, sizeof(snap->md));
+ if (snap->msg[0] != '\0')
+ printf("\t%s\n", snap->msg);
else
putchar('\n');
return WALK_CONTINUE;
}
int
-rebuild_cache(struct ent *ent, void *arg)
+rebuild_cache(struct snapshot *snap, void *arg)
{
uint8_t md[MDSIZE];
uint8_t *buf;
@@ -641,47 +636,47 @@ rebuild_cache(struct ent *ent, void *arg)
uint64_t i;
buf = alloc_buf(comp_size(BLKSIZE));
- for (i = 0; i < ent->nblks; i++) {
- struct cent *cent;
+ for (i = 0; i < snap->nr_blk_descs; i++) {
+ struct cache_entry *ent;
- read_blk(buf, &ent->bdescr[i]);
+ read_blk(buf, &snap->blk_desc[i]);
SHA256_Init(&ctx);
- SHA256_Update(&ctx, buf, ent->bdescr[i].size);
+ SHA256_Update(&ctx, buf, snap->blk_desc[i].size);
SHA256_Final(md, &ctx);
- cent = alloc_cent();
- memcpy(cent->bdescr.md, md, sizeof(cent->bdescr.md));
- cent->bdescr = ent->bdescr[i];
- add_cent(cent);
+ ent = alloc_cache_entry();
+ memcpy(ent->blk_desc.md, md, sizeof(ent->blk_desc.md));
+ ent->blk_desc = snap->blk_desc[i];
+ add_cache_entry(ent);
cache_dirty = 1;
}
free(buf);
return WALK_CONTINUE;
}
-/* Walk through all index entries and call fn() on each one */
+/* Walk through all snapshots and call fn() on each one */
void
-walk(int (*fn)(struct ent *, void *), void *arg)
+walk(int (*fn)(struct snapshot *, void *), void *arg)
{
- struct ent *ent;
+ struct snapshot *snap;
uint64_t i;
- ent = alloc_ent();
- xlseek(ifd, sizeof(enthdr), SEEK_SET);
- for (i = 0; i < enthdr.nents; i++) {
- if (xread(ifd, ent, sizeof(*ent)) == 0)
+ snap = alloc_snap();
+ xlseek(ifd, sizeof(snaphdr), SEEK_SET);
+ for (i = 0; i < snaphdr.nr_snapshots; i++) {
+ if (xread(ifd, snap, sizeof(*snap)) == 0)
errx(1, "read: unexpected EOF");
- ent = grow_ent(ent, ent->nblks);
- if (xread(ifd, ent->bdescr,
- ent->nblks * sizeof(ent->bdescr[0])) == 0)
+ snap = grow_snap(snap, snap->nr_blk_descs);
+ if (xread(ifd, snap->blk_desc,
+ snap->nr_blk_descs * sizeof(snap->blk_desc[0])) == 0)
errx(1, "read: unexpected EOF");
- if ((*fn)(ent, arg) == WALK_STOP)
+ if ((*fn)(snap, arg) == WALK_STOP)
break;
}
- free(ent);
+ free(snap);
}
void
@@ -689,15 +684,15 @@ init_cache(void)
{
uint64_t nents, i;
- nents = cache_nents();
+ nents = cache_nr_entries();
xlseek(cfd, 0, SEEK_SET);
for (i = 0; i < nents; i++) {
- struct cent *cent;
+ struct cache_entry *ent;
- cent = alloc_cent();
- if (xread(cfd, ¢->bdescr, sizeof(cent->bdescr)) == 0)
+ ent = alloc_cache_entry();
+ if (xread(cfd, &ent->blk_desc, sizeof(ent->blk_desc)) == 0)
errx(1, "read: unexpected EOF");
- add_cent(cent);
+ add_cache_entry(ent);
}
}
@@ -706,9 +701,9 @@ init(void)
{
struct stat sb;
- ifd = open(INDEXF, O_RDWR | O_CREAT, 0600);
+ ifd = open(SNAPSF, O_RDWR | O_CREAT, 0600);
if (ifd < 0)
- err(1, "open %s", INDEXF);
+ err(1, "open %s", SNAPSF);
sfd = open(STOREF, O_RDWR | O_CREAT, 0600);
if (sfd < 0)
@@ -724,24 +719,24 @@ init(void)
errx(1, "busy lock");
if (fstat(ifd, &sb) < 0)
- err(1, "fstat %s", INDEXF);
+ err(1, "fstat %s", SNAPSF);
if (sb.st_size != 0) {
uint8_t maj, min;
- xread(ifd, &enthdr, sizeof(enthdr));
- min = enthdr.flags & 0xff;
- maj = (enthdr.flags >> 8) & 0xff;
+ xread(ifd, &snaphdr, sizeof(snaphdr));
+ min = snaphdr.flags & 0xff;
+ maj = (snaphdr.flags >> 8) & 0xff;
if (maj != VER_MAJ || min != VER_MIN)
- errx(1, "expected index format version %u.%u but got %u.%u",
+ errx(1, "expected snapshot format version %u.%u but got %u.%u",
VER_MAJ, VER_MIN, maj, min);
} else {
- enthdr.flags = (VER_MAJ << 8) | VER_MIN;
- xwrite(ifd, &enthdr, sizeof(enthdr));
- enthdr.st.min_blk_size = comp_size(BLKSIZE);
+ snaphdr.flags = (VER_MAJ << 8) | VER_MIN;
+ xwrite(ifd, &snaphdr, sizeof(snaphdr));
+ snaphdr.st.min_blk_size = comp_size(BLKSIZE);
}
- if (cache_nents() != 0)
+ if (cache_nr_entries() != 0)
init_cache();
else
walk(rebuild_cache, NULL);
@@ -751,7 +746,7 @@ void
term(void)
{
if (verbose)
- print_stats(&enthdr.st);
+ print_stats(&snaphdr.st);
flush_cache();
free_cache();