dedup

deduplicating backup program
git clone git://git.2f30.org/dedup
Log | Files | Refs | README | LICENSE

commit 44ce796d4a5c9a22d136f4f4c37596254d70bee3
parent fe4061d87b77bf2789310758b799078da594f2bf
Author: sin <sin@2f30.org>
Date:   Tue, 20 Mar 2018 18:03:17 +0000

Minor refactoring

Diffstat:
Mdedup.c | 20+++++++++++++-------
1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/dedup.c b/dedup.c @@ -19,7 +19,7 @@ struct enthdr { } __attribute__((packed)); struct ent { - uint64_t sz; + uint64_t sz; /* size of entire entry structure */ unsigned char md[SHA256_DIGEST_LENGTH]; uint64_t nblks; uint64_t blks[]; @@ -90,10 +90,12 @@ dump_blk(struct blk *blk) void append_ent(struct ent *ent) { + /* Update index header */ enthdr.nents++; lseek(ifd, 0, SEEK_SET); write(ifd, &enthdr, sizeof(enthdr)); + /* Append entry */ lseek(ifd, 0, SEEK_END); ent->sz = sizeof(*ent); ent->sz += ent->nblks * sizeof(ent->blks[0]); @@ -117,6 +119,7 @@ grow_ent(struct ent *ent, uint64_t nblks) size_t sz; sz = sizeof(*ent); + /* XXX: Smarter realloc strategy */ sz += nblks * sizeof(ent->blks[0]); ent = realloc(ent, sz); if (ent == NULL) @@ -185,7 +188,10 @@ dedup(int fd) hash_blk(&blk); if (verbose) dump_blk(&blk); + + /* Rolling hash of input stream */ SHA256_Update(&ctx, blk.data, blk.sz); + /* Prepare for adding a new block index for this entry */ ent = grow_ent(ent, ent->nblks + 1); if (lookup_blk(&blk, &blkidx) == -1) { @@ -203,19 +209,19 @@ dedup(int fd) if (n < 0) err(1, "read"); + /* Calculate hash and add this entry to the index */ SHA256_Final(ent->md, &ctx); append_ent(ent); free(ent); } void -str2id(unsigned char *idstr, uint8_t *id) +str2bin(unsigned char *s, uint8_t *d) { - size_t i, len = strlen(idstr) / 2; - char *p = idstr; + size_t i, len = strlen(s) / 2; - for (i = 0; i < len; i++, p += 2) - sscanf(p, "%2hhx", &id[i]); + for (i = 0; i < len; i++, s += 2) + sscanf(s, "%2hhx", &d[i]); } void @@ -225,7 +231,7 @@ extract(unsigned char *id, int fd) struct ent *ent; uint64_t i; - str2id(id, md); + str2bin(id, md); lseek(ifd, sizeof(enthdr), SEEK_SET); for (i = 0; i < enthdr.nents; i++) { ent = alloc_ent();