commit 5ae463d1c2cb6c77d735d53ee7e00c3a00b70090
parent 4a9c691eb97725f224362ee65dc088f0260bc8b6
Author: sin <sin@2f30.org>
Date: Wed, 21 Mar 2018 13:45:58 +0000
Rework cache code
Diffstat:
M | dedup.c | | | 99 | +++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------- |
1 file changed, 64 insertions(+), 35 deletions(-)
diff --git a/dedup.c b/dedup.c
@@ -35,17 +35,18 @@ struct blk {
unsigned char data[BLKSIZ];
} __attribute__((packed));
-struct cent {
+struct cache_data {
unsigned char md[SHA256_DIGEST_LENGTH];
uint64_t blkidx;
-} __attribute__((packed));
+};
-struct hash_ent {
- struct cent cent;
- RB_ENTRY(hash_ent) e;
+struct cache_ent {
+ struct cache_data data;
+ int dirty;
+ RB_ENTRY(cache_ent) e;
};
-RB_HEAD(hash_tree, hash_ent) hash_tree_head;
+RB_HEAD(cache, cache_ent) cache_head;
struct enthdr enthdr;
int ifd;
int sfd;
@@ -122,33 +123,53 @@ xwrite(int fd, const void *buf, size_t nbytes)
}
int
-hash_ent_cmp(struct hash_ent *e1, struct hash_ent *e2)
+cache_ent_cmp(struct cache_ent *e1, struct cache_ent *e2)
{
int r;
- r = memcmp(e1->cent.md, e2->cent.md, sizeof(e1->cent.md));
+ r = memcmp(e1->data.md, e2->data.md, sizeof(e1->data.md));
if (r > 0)
return 1;
else if (r < 0)
return -1;
return 0;
}
-RB_PROTOTYPE(hash_tree, hash_ent, e, hash_ent_cmp);
-RB_GENERATE(hash_tree, hash_ent, e, hash_ent_cmp);
+RB_PROTOTYPE(cache, cache_ent, e, cache_ent_cmp);
+RB_GENERATE(cache, cache_ent, e, cache_ent_cmp);
-struct hash_ent *
-hash_ent_add(unsigned char *md, uint64_t blkidx)
+struct cache_ent *
+alloc_cache_ent(unsigned char *md, uint64_t blkidx)
{
- struct hash_ent *hash_ent;
+ struct cache_ent *ent;
- hash_ent = malloc(sizeof(*hash_ent));
- if (hash_ent == NULL)
+ ent = calloc(1, sizeof(*ent));
+ if (ent == NULL)
err(1, "malloc");
+ memcpy(&ent->data.md, md, sizeof(ent->data.md));
+ ent->data.blkidx = blkidx;
+ return ent;
+}
+
+void
+add_cache_ent(struct cache_ent *ent)
+{
+ RB_INSERT(cache, &cache_head, ent);
+}
- memcpy(&hash_ent->cent.md, md, sizeof(hash_ent->cent.md));
- hash_ent->cent.blkidx = blkidx;
- RB_INSERT(hash_tree, &hash_tree_head, hash_ent);
- return hash_ent;
+void
+flush_cache(void)
+{
+ struct cache_ent *ent;
+
+ if (verbose)
+ fprintf(stderr, "flushing cache...\n");
+ RB_FOREACH(ent, cache, &cache_head) {
+ if (!ent->dirty)
+ continue;
+ lseek(cfd, ent->data.blkidx * sizeof(ent->data), SEEK_SET);
+ xwrite(cfd, &ent->data, sizeof(ent->data));
+ ent->dirty = 0;
+ }
}
void
@@ -200,7 +221,7 @@ storefile_nblks(void)
uint64_t
cachefile_nblks(void)
{
- return lseek(cfd, 0, SEEK_END) / sizeof(struct cent);
+ return lseek(cfd, 0, SEEK_END) / sizeof(struct cache_data);
}
void
@@ -231,12 +252,12 @@ append_blk(struct blk *blk)
int
lookup_blk(struct blk *blk, uint64_t *blkidx)
{
- struct hash_ent *hash_ent, key;
+ struct cache_ent *ent, key;
- memcpy(key.cent.md, blk->md, sizeof(key.cent.md));
- hash_ent = RB_FIND(hash_tree, &hash_tree_head, &key);
- if (hash_ent != NULL) {
- *blkidx = hash_ent->cent.blkidx;
+ memcpy(key.data.md, blk->md, sizeof(key.data.md));
+ ent = RB_FIND(cache, &cache_head, &key);
+ if (ent != NULL) {
+ *blkidx = ent->data.blkidx;
return 0;
}
return -1;
@@ -264,13 +285,15 @@ dedup(int fd)
ent = grow_ent(ent, ent->nblks + 1);
if (lookup_blk(&blk, &blkidx) == -1) {
- struct hash_ent *hash_ent;
+ struct cache_ent *cache_ent;
uint64_t nblks = storefile_nblks();
+ /* Create a cache entry for this block */
+ cache_ent = alloc_cache_ent(blk.md, nblks);
+ add_cache_ent(cache_ent);
+ cache_ent->dirty = 1;
+
ent->blks[ent->nblks++] = nblks;
- hash_ent = hash_ent_add(blk.md, nblks);
- lseek(cfd, 0, SEEK_END);
- xwrite(cfd, &hash_ent->cent, sizeof(hash_ent->cent));
append_blk(&blk);
} else {
ent->blks[ent->nblks++] = blkidx;
@@ -281,6 +304,7 @@ dedup(int fd)
SHA256_Final(ent->md, &ctx);
append_ent(ent);
free(ent);
+ flush_cache();
}
void
@@ -339,13 +363,15 @@ rebuild_cache(void)
nblks = storefile_nblks();
lseek(cfd, 0, SEEK_SET);
for (i = 0; i < nblks; i++) {
- struct hash_ent *hash_ent;
+ struct cache_ent *ent;
struct blk blk;
read_blk(&blk, i);
- hash_ent = hash_ent_add(blk.md, i);
- xwrite(cfd, &hash_ent->cent, sizeof(hash_ent->cent));
+ ent = alloc_cache_ent(blk.md, i);
+ add_cache_ent(ent);
+ ent->dirty = 1;
}
+ flush_cache();
}
void
@@ -359,11 +385,13 @@ init_cache(void)
nblks = cachefile_nblks();
lseek(cfd, 0, SEEK_SET);
for (i = 0; i < nblks; i++) {
- struct cent cent;
+ struct blk blk;
+ struct cache_ent *ent;
- if (xread(cfd, ¢, sizeof(cent)) == 0)
+ ent = alloc_cache_ent(blk.md, i);
+ if (xread(cfd, &ent->data, sizeof(ent->data)) == 0)
errx(1, "unexpected EOF");
- hash_ent_add(cent.md, cent.blkidx);
+ add_cache_ent(ent);
}
}
@@ -401,6 +429,7 @@ term(void)
fsync(ifd);
fsync(sfd);
fsync(cfd);
+
close(ifd);
close(sfd);
close(cfd);