commit 44ce796d4a5c9a22d136f4f4c37596254d70bee3
parent fe4061d87b77bf2789310758b799078da594f2bf
Author: sin <sin@2f30.org>
Date: Tue, 20 Mar 2018 18:03:17 +0000
Minor refactoring
Diffstat:
1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/dedup.c b/dedup.c
@@ -19,7 +19,7 @@ struct enthdr {
} __attribute__((packed));
struct ent {
- uint64_t sz;
+ uint64_t sz; /* size of entire entry structure */
unsigned char md[SHA256_DIGEST_LENGTH];
uint64_t nblks;
uint64_t blks[];
@@ -90,10 +90,12 @@ dump_blk(struct blk *blk)
void
append_ent(struct ent *ent)
{
+ /* Update index header */
enthdr.nents++;
lseek(ifd, 0, SEEK_SET);
write(ifd, &enthdr, sizeof(enthdr));
+ /* Append entry */
lseek(ifd, 0, SEEK_END);
ent->sz = sizeof(*ent);
ent->sz += ent->nblks * sizeof(ent->blks[0]);
@@ -117,6 +119,7 @@ grow_ent(struct ent *ent, uint64_t nblks)
size_t sz;
sz = sizeof(*ent);
+ /* XXX: Smarter realloc strategy */
sz += nblks * sizeof(ent->blks[0]);
ent = realloc(ent, sz);
if (ent == NULL)
@@ -185,7 +188,10 @@ dedup(int fd)
hash_blk(&blk);
if (verbose)
dump_blk(&blk);
+
+ /* Rolling hash of input stream */
SHA256_Update(&ctx, blk.data, blk.sz);
+ /* Prepare for adding a new block index for this entry */
ent = grow_ent(ent, ent->nblks + 1);
if (lookup_blk(&blk, &blkidx) == -1) {
@@ -203,19 +209,19 @@ dedup(int fd)
if (n < 0)
err(1, "read");
+ /* Calculate hash and add this entry to the index */
SHA256_Final(ent->md, &ctx);
append_ent(ent);
free(ent);
}
void
-str2id(unsigned char *idstr, uint8_t *id)
+str2bin(unsigned char *s, uint8_t *d)
{
- size_t i, len = strlen(idstr) / 2;
- char *p = idstr;
+ size_t i, len = strlen(s) / 2;
- for (i = 0; i < len; i++, p += 2)
- sscanf(p, "%2hhx", &id[i]);
+ for (i = 0; i < len; i++, s += 2)
+ sscanf(s, "%2hhx", &d[i]);
}
void
@@ -225,7 +231,7 @@ extract(unsigned char *id, int fd)
struct ent *ent;
uint64_t i;
- str2id(id, md);
+ str2bin(id, md);
lseek(ifd, sizeof(enthdr), SEEK_SET);
for (i = 0; i < enthdr.nents; i++) {
ent = alloc_ent();