commit 9adb9c6c1a308e2845d822987948afbf12fc943b
parent 19420eb96fb72d9d5f7940678d1b957aa26f1fce
Author: sin <sin@2f30.org>
Date: Fri, 22 Feb 2019 23:24:03 +0000
Move rbtree cache to cache.c
Diffstat:
M | Makefile | | | 5 | +++-- |
A | cache.c | | | 94 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | dedup.c | | | 222 | ++++++++++++++++++++++--------------------------------------------------------- |
M | dedup.h | | | 7 | +++++++ |
A | types.h | | | 44 | ++++++++++++++++++++++++++++++++++++++++++++ |
5 files changed, 210 insertions(+), 162 deletions(-)
diff --git a/Makefile b/Makefile
@@ -2,8 +2,8 @@ VERSION = 0.4
PREFIX = /usr/local
MANPREFIX = $(PREFIX)/man
BIN = dedup
-SRC = $(BIN).c chunker.c hash.c pack.c unpack.c utils.c
-OBJ = $(BIN).o chunker.o hash.o pack.o unpack.o utils.o
+SRC = $(BIN).c cache.c chunker.c hash.c pack.c unpack.c utils.c
+OBJ = $(BIN).o cache.o chunker.o hash.o pack.o unpack.o utils.o
DISTFILES = \
$(SRC) \
LICENSE \
@@ -14,6 +14,7 @@ DISTFILES = \
$(BIN).1 \
dedup.h \
tree.h \
+ types.h \
CFLAGS = -g -Wall
CPPFLAGS = -I/usr/local/include -D_FILE_OFFSET_BITS=64
diff --git a/cache.c b/cache.c
@@ -0,0 +1,94 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <err.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "dedup.h"
+#include "tree.h"
+
+struct cache_node {
+ struct cache_entry ent;
+ RB_ENTRY(cache_node) e;
+};
+
+static RB_HEAD(cache, cache_node) cache_head;
+
+static int
+cache_node_cmp(struct cache_node *e1, struct cache_node *e2)
+{
+ int r;
+
+ r = memcmp(e1->ent.md, e2->ent.md, sizeof(e1->ent.md));
+ if (r > 0)
+ return 1;
+ else if (r < 0)
+ return -1;
+ return 0;
+}
+static RB_PROTOTYPE(cache, cache_node, e, cache_node_cmp);
+static RB_GENERATE(cache, cache_node, e, cache_node_cmp);
+
+static struct cache_node *
+alloc_cache_node(struct cache_entry *ent)
+{
+ struct cache_node *node;
+
+ node = calloc(1, sizeof(*node));
+ if (node == NULL)
+ err(1, "calloc");
+ node->ent = *ent;
+ return node;
+}
+
+static void
+free_cache_node(struct cache_node *node)
+{
+ free(node);
+}
+
+void
+add_cache_entry(struct cache_entry *ent)
+{
+ struct cache_node *node;
+
+ node = alloc_cache_node(ent);
+ RB_INSERT(cache, &cache_head, node);
+}
+
+int
+lookup_cache_entry(struct cache_entry *ent)
+{
+ struct cache_node *node, key;
+
+ key.ent = *ent;
+ node = RB_FIND(cache, &cache_head, &key);
+ if (node != NULL) {
+ *ent = node->ent;
+ return 0;
+ }
+ return -1;
+}
+
+void
+walk_cache(int (*fn)(struct cache_entry *))
+{
+ struct cache_node *node;
+
+ RB_FOREACH(node, cache, &cache_head)
+ (*fn)(&node->ent);
+}
+
+void
+free_cache(void)
+{
+ struct cache_node *node, *tmp;
+
+ RB_FOREACH_SAFE(node, cache, &cache_head, tmp) {
+ RB_REMOVE(cache, &cache_head, node);
+ free_cache_node(node);
+ }
+}
diff --git a/dedup.c b/dedup.c
@@ -1,3 +1,4 @@
+#include <sys/types.h>
#include <sys/stat.h>
#include <sys/file.h>
@@ -14,67 +15,21 @@
#include "arg.h"
#include "dedup.h"
-#include "tree.h"
#define SNAPSF ".snapshots"
#define STOREF ".store"
#define CACHEF ".cache"
-#define MSGSIZE 256
-#define MDSIZE SHA256_DIGEST_LENGTH
-
-/* file format version */
-#define VER_MIN 1
-#define VER_MAJ 0
-
enum {
WALK_CONTINUE,
WALK_STOP
};
-struct stats {
- uint64_t orig_size;
- uint64_t comp_size;
- uint64_t dedup_size;
- uint64_t min_blk_size;
- uint64_t max_blk_size;
- uint64_t nr_blks;
- uint64_t reserved[6];
-};
-
-struct snapshot_hdr {
- uint64_t flags;
- uint64_t nr_snapshots;
- uint64_t store_size;
- uint64_t reserved[4];
- struct stats st;
-};
-
-struct blk_desc {
- uint8_t md[MDSIZE];
- uint64_t offset;
- uint64_t size;
-};
-
-struct snapshot {
- uint64_t size;
- uint8_t msg[MSGSIZE];
- uint8_t md[MDSIZE]; /* hash of file */
- uint64_t nr_blk_descs;
- struct blk_desc blk_desc[];
-};
-
-struct cache_entry {
- struct blk_desc blk_desc;
- RB_ENTRY(cache_entry) e;
-};
-
struct extract_args {
uint8_t *md;
int fd;
};
-static RB_HEAD(cache, cache_entry) cache_head;
static struct snapshot_hdr snaphdr;
static int ifd;
static int sfd;
@@ -144,78 +99,6 @@ print_stats(struct stats *st)
fprintf(stderr, "cache misses: %llu\n", cache_misses);
}
-static int
-cache_entry_cmp(struct cache_entry *e1, struct cache_entry *e2)
-{
- int r;
-
- r = memcmp(e1->blk_desc.md, e2->blk_desc.md, sizeof(e1->blk_desc.md));
- if (r > 0)
- return 1;
- else if (r < 0)
- return -1;
- return 0;
-}
-static RB_PROTOTYPE(cache, cache_entry, e, cache_entry_cmp);
-static RB_GENERATE(cache, cache_entry, e, cache_entry_cmp);
-
-static struct cache_entry *
-alloc_cache_entry(void)
-{
- struct cache_entry *ent;
-
- ent = calloc(1, sizeof(*ent));
- if (ent == NULL)
- err(1, "calloc");
- return ent;
-}
-
-static void
-free_cache_entry(struct cache_entry *ent)
-{
- free(ent);
-}
-
-static void
-add_cache_entry(struct cache_entry *ent)
-{
- RB_INSERT(cache, &cache_head, ent);
-}
-
-static void
-flush_cache(void)
-{
- struct cache_entry *ent;
-
- if (!cache_dirty)
- return;
-
- xlseek(cfd, 0, SEEK_SET);
- RB_FOREACH(ent, cache, &cache_head)
- xwrite(cfd, &ent->blk_desc, sizeof(ent->blk_desc));
-}
-
-static void
-free_cache(void)
-{
- struct cache_entry *ent, *tmp;
-
- RB_FOREACH_SAFE(ent, cache, &cache_head, tmp) {
- RB_REMOVE(cache, &cache_head, ent);
- free_cache_entry(ent);
- }
-}
-
-static uint64_t
-cache_nr_entries(void)
-{
- struct stat sb;
-
- if (fstat(cfd, &sb) < 0)
- err(1, "fstat");
- return sb.st_size / sizeof(struct blk_desc);
-}
-
static void
append_snap(struct snapshot *snap)
{
@@ -304,26 +187,12 @@ append_blk(uint8_t *buf, struct blk_desc *blk_desc)
snaphdr.store_size += blk_desc->size;
}
-static int
-lookup_blk_desc(uint8_t *md, struct blk_desc *blk_desc)
-{
- struct cache_entry *ent, key;
-
- memcpy(key.blk_desc.md, md, sizeof(key.blk_desc.md));
- ent = RB_FIND(cache, &cache_head, &key);
- if (ent != NULL) {
- *blk_desc = ent->blk_desc;
- return 0;
- }
- return -1;
-}
-
static void
dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size)
{
uint8_t md[MDSIZE];
+ struct cache_entry cache_entry;
uint8_t *comp_buf;
- struct blk_desc blk_desc;
size_t n;
comp_buf = alloc_buf(comp_size(BLKSIZE_MAX));
@@ -334,21 +203,21 @@ dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size)
snaphdr.st.orig_size += chunk_size;
snaphdr.st.comp_size += n;
- if (lookup_blk_desc(md, &blk_desc) < 0) {
- struct cache_entry *ent;
+ memcpy(cache_entry.md, md, sizeof(cache_entry.md));
+ if (lookup_cache_entry(&cache_entry) < 0) {
+ struct blk_desc blk_desc;
- memcpy(blk_desc.md, md, sizeof(blk_desc.md));
+ memcpy(&blk_desc.md, md, sizeof(blk_desc.md));
blk_desc.offset = snaphdr.store_size;
blk_desc.size = n;
snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
-
append_blk(comp_buf, &blk_desc);
- ent = alloc_cache_entry();
- ent->blk_desc = blk_desc;
- add_cache_entry(ent);
+ cache_entry.offset = blk_desc.offset;
+ cache_entry.size = blk_desc.size;
cache_dirty = 1;
+ add_cache_entry(&cache_entry);
cache_misses++;
snaphdr.st.dedup_size += blk_desc.size;
@@ -359,6 +228,11 @@ dedup_chunk(struct snapshot *snap, uint8_t *chunkp, size_t chunk_size)
if (blk_desc.size < snaphdr.st.min_blk_size)
snaphdr.st.min_blk_size = blk_desc.size;
} else {
+ struct blk_desc blk_desc;
+
+ memcpy(&blk_desc.md, cache_entry.md, sizeof(blk_desc.md));
+ blk_desc.offset = cache_entry.offset;
+ blk_desc.size = cache_entry.size;
snap->blk_desc[snap->nr_blk_descs++] = blk_desc;
cache_hits++;
}
@@ -494,19 +368,21 @@ rebuild_cache(struct snapshot *snap, void *arg)
buf = alloc_buf(comp_size(BLKSIZE_MAX));
for (i = 0; i < snap->nr_blk_descs; i++) {
- struct cache_entry *ent;
+ struct cache_entry cache_entry;
+ struct blk_desc *blk_desc;
- read_blk(buf, &snap->blk_desc[i]);
+ blk_desc = &snap->blk_desc[i];
+ read_blk(buf, blk_desc);
SHA256_Init(&ctx);
- SHA256_Update(&ctx, buf, snap->blk_desc[i].size);
+ SHA256_Update(&ctx, buf, blk_desc->size);
SHA256_Final(md, &ctx);
- ent = alloc_cache_entry();
- memcpy(ent->blk_desc.md, md, sizeof(ent->blk_desc.md));
- ent->blk_desc = snap->blk_desc[i];
- add_cache_entry(ent);
+ memcpy(cache_entry.md, blk_desc->md, sizeof(cache_entry.md));
+ cache_entry.offset = blk_desc->offset;
+ cache_entry.size = blk_desc->size;
cache_dirty = 1;
+ add_cache_entry(&cache_entry);
}
free(buf);
return WALK_CONTINUE;
@@ -514,7 +390,7 @@ rebuild_cache(struct snapshot *snap, void *arg)
/* Walk through all snapshots and call fn() on each one */
static void
-walk(int (*fn)(struct snapshot *, void *), void *arg)
+walk_snap(int (*fn)(struct snapshot *, void *), void *arg)
{
uint64_t i;
@@ -539,19 +415,37 @@ walk(int (*fn)(struct snapshot *, void *), void *arg)
}
}
+static int
+flush_cache(struct cache_entry *cache_entry)
+{
+ xwrite(cfd, cache_entry, sizeof(*cache_entry));
+ return 0;
+}
+
+static uint64_t
+cache_nr_entries(void)
+{
+ struct stat sb;
+
+ if (fstat(cfd, &sb) < 0)
+ err(1, "fstat");
+ return sb.st_size / sizeof(struct cache_entry);
+}
+
static void
-init_cache(void)
+load_cache(void)
{
+ uint64_t nr_entries;
uint64_t i;
xlseek(cfd, 0, SEEK_SET);
- for (i = 0; i < cache_nr_entries(); i++) {
- struct cache_entry *ent;
+ nr_entries = cache_nr_entries();
+ for (i = 0; i < nr_entries; i++) {
+ struct cache_entry cache_entry;
- ent = alloc_cache_entry();
- if (xread(cfd, &ent->blk_desc, sizeof(ent->blk_desc)) == 0)
+ if (xread(cfd, &cache_entry, sizeof(cache_entry)) == 0)
errx(1, "read: unexpected EOF");
- add_cache_entry(ent);
+ add_cache_entry(&cache_entry);
}
}
@@ -596,9 +490,9 @@ init(void)
}
if (cache_nr_entries() != 0)
- init_cache();
+ load_cache();
else
- walk(rebuild_cache, NULL);
+ walk_snap(rebuild_cache, NULL);
}
static void
@@ -606,7 +500,11 @@ term(void)
{
if (verbose)
print_stats(&snaphdr.st);
- flush_cache();
+
+ if (cache_dirty) {
+ xlseek(cfd, 0, SEEK_SET);
+ walk_cache(flush_cache);
+ }
free_cache();
fsync(ifd);
@@ -683,20 +581,24 @@ main(int argc, char *argv[])
init();
if (cflag) {
- walk(check, NULL);
+ walk_snap(check, NULL);
term();
return 0;
}
if (lflag) {
- walk(list, NULL);
+ walk_snap(list, NULL);
term();
return 0;
}
if (id) {
+ struct extract_args args;
+
str2bin(id, md);
- walk(extract, &(struct extract_args){ .md = md, .fd = fd });
+ args.md = md;
+ args.fd = fd;
+ walk_snap(extract, &args);
} else {
dedup(fd, msg);
}
diff --git a/dedup.h b/dedup.h
@@ -1,7 +1,14 @@
#include "config.h"
+#include "types.h"
struct chunker;
+/* cache.c */
+void add_cache_entry(struct cache_entry *ent);
+int lookup_cache_entry(struct cache_entry *ent);
+void walk_cache(int (*fn)(struct cache_entry *));
+void free_cache(void);
+
/* chunker.c */
struct chunker *alloc_chunker(size_t cap, int fd);
void free_chunker(struct chunker *chunker);
diff --git a/types.h b/types.h
@@ -0,0 +1,44 @@
+#define MSGSIZE 256
+#define MDSIZE 32
+
+/* snashot file format version */
+#define VER_MIN 1
+#define VER_MAJ 0
+
+struct stats {
+ uint64_t orig_size;
+ uint64_t comp_size;
+ uint64_t dedup_size;
+ uint64_t min_blk_size;
+ uint64_t max_blk_size;
+ uint64_t nr_blks;
+ uint64_t reserved[6];
+};
+
+struct snapshot_hdr {
+ uint64_t flags;
+ uint64_t nr_snapshots;
+ uint64_t store_size;
+ uint64_t reserved[4];
+ struct stats st;
+};
+
+struct blk_desc {
+ uint8_t md[MDSIZE];
+ uint64_t offset;
+ uint64_t size;
+};
+
+struct snapshot {
+ uint64_t size;
+ uint8_t msg[MSGSIZE];
+ uint8_t md[MDSIZE]; /* hash of snapshot */
+ uint64_t nr_blk_descs;
+ struct blk_desc blk_desc[];
+};
+
+struct cache_entry {
+ uint8_t md[MDSIZE];
+ uint64_t offset;
+ uint64_t size;
+};