dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit 1a978a0ee0986c6cd869dd94a47cf85b89f2474f
parent df98cd4221d5e54c88d5ce172690c6cc7cb08235
Author: sin <sin@2f30.org>
Date:   Wed,  6 Mar 2019 09:33:19 +0000

Remove .cache file

It did not make a lot of sense since the same information is stored in
the snapshots file.  The time spent to load the cache from the
snapshots file on every run is negligible.

Diffstat:
MREADME | 2+-
Mdedup.1 | 4++--
Mdedup.c | 88+++----------------------------------------------------------------------------
Mdedup.h | 1-
Mtypes.c | 36------------------------------------
5 files changed, 6 insertions(+), 125 deletions(-)

diff --git a/README b/README @@ -10,7 +10,7 @@ follows: tar -c ~/dir | dedup -r ~/bak -m "$(date)" -This will create .{cache,snapshots,store} files in the ~/bak +This will create .{snapshots,store} files in the ~/bak directory. The store file contains all the unique blocks. The snapshots file contains all the revisions of files that have been deduplicated. Each revision is identified by its SHA256 hash. The diff --git a/dedup.1 b/dedup.1 @@ -1,4 +1,4 @@ -.Dd March 05, 2019 +.Dd March 06, 2019 .Dt DEDUP 1 .Os .Sh NAME @@ -33,7 +33,7 @@ Enable verbose mode. .It Fl e Ar id Extract snapshot with the specified id. .It Fl r Ar root -Set the root directory where the .cache, .snapshots and .store +Set the root directory where the .snapshots and .store files will be created. .It Fl m Ar message Attach a descriptive message to the snapshot. diff --git a/dedup.c b/dedup.c @@ -18,7 +18,6 @@ #define SNAPSF ".snapshots" #define STOREF ".store" -#define CACHEF ".cache" enum { WALK_CONTINUE, @@ -36,7 +35,6 @@ static struct blk_hdr blk_hdr; static struct cache *cache; static int ifd; static int sfd; -static int cfd; static unsigned long long cache_hits; static unsigned long long cache_misses; @@ -395,7 +393,7 @@ check_snap(struct snapshot *snap, void *arg) } static int -reload_cache(struct snapshot *snap, void *arg) +load_cache(struct snapshot *snap, void *arg) { uint8_t *buf; uint64_t i; @@ -462,74 +460,6 @@ match_ver(uint64_t v) } static void -hash_cache_entry_update(struct cache_entry *cache_entry, SHA256_CTX *ctx) -{ - uint8_t buf[CACHE_ENTRY_SIZE]; - char fmt[BUFSIZ]; - int n; - - snprintf(fmt, sizeof(fmt), "'%dqq", MDSIZE); - n = pack(buf, fmt, cache_entry->md, cache_entry->offset, - cache_entry->size); - SHA256_Update(ctx, buf, n); -} - -static void -load_cache(void) -{ - uint8_t md[MDSIZE]; - struct stat sb; - SHA256_CTX ctx; - uint64_t nr_entries, i; - - if (fstat(cfd, &sb) < 0) - err(1, "fstat"); - nr_entries = sb.st_size / CACHE_ENTRY_SIZE; - - xlseek(cfd, 0, SEEK_SET); - SHA256_Init(&ctx); - for (i = 0; i < nr_entries; i++) { - struct cache_entry cache_entry; - - read_cache_entry(cfd, &cache_entry); - hash_cache_entry_update(&cache_entry, &ctx); - add_cache_entry(cache, &cache_entry); - } - SHA256_Final(md, &ctx); - - if (memcmp(snap_hdr.cache_md, md, sizeof(snap_hdr.cache_md)) != 0) { - if (verbose > 0) - fprintf(stderr, "Rebuilding cache\n"); - free_cache(cache); - cache = alloc_cache(); - if (ftruncate(cfd, 0) < 0) - err(1, "ftruncate"); - xlseek(ifd, SNAP_HDR_SIZE, SEEK_SET); - xlseek(cfd, 0, SEEK_SET); - walk_snap(reload_cache, NULL); - } -} - -static int -flush_cache(struct cache_entry *cache_entry, void *arg) -{ - hash_cache_entry_update(cache_entry, arg); - write_cache_entry(cfd, cache_entry); - return 0; -} - -static void -save_cache(void) -{ - SHA256_CTX ctx; - - SHA256_Init(&ctx); - xlseek(cfd, 0, SEEK_SET); - walk_cache(cache, flush_cache, &ctx); - SHA256_Final(snap_hdr.cache_md, &ctx); -} - -static void init_blk_hdr(void) { blk_hdr.flags = (VER_MAJ << VER_MAJ_SHIFT) | VER_MIN; @@ -598,17 +528,8 @@ init(int iflag) if (sfd < 0) err(1, "open %s", STOREF); - /* - * The cache file does not have to exist - * and will be created again if deleted. - */ - cfd = open(CACHEF, O_RDWR | O_CREAT, 0600); - if (cfd < 0) - err(1, "open %s", CACHEF); - if (flock(ifd, LOCK_NB | LOCK_EX) < 0 || - flock(sfd, LOCK_NB | LOCK_EX) < 0 || - flock(cfd, LOCK_NB | LOCK_EX) < 0) + flock(sfd, LOCK_NB | LOCK_EX) < 0) err(1, "flock"); if (iflag) { @@ -620,7 +541,7 @@ init(int iflag) } cache = alloc_cache(); - load_cache(); + walk_snap(load_cache, NULL); } static void @@ -629,17 +550,14 @@ term(void) if (verbose > 0) print_stats(&snap_hdr.st); - save_cache(); free_cache(cache); save_blk_hdr(); save_snap_hdr(); - fsync(cfd); fsync(sfd); fsync(ifd); - close(cfd); close(sfd); close(ifd); } diff --git a/dedup.h b/dedup.h @@ -10,7 +10,6 @@ #define BLK_HDR_SIZE 16 #define BLK_DESC_SIZE 48 #define SNAPSHOT_SIZE 304 -#define CACHE_ENTRY_SIZE 48 #define MSGSIZE 256 #define MDSIZE 32 diff --git a/types.c b/types.c @@ -193,39 +193,3 @@ write_snapshot_blk_descs(int fd, struct snapshot *snap) for (i = 0; i < snap->nr_blk_descs; i++) write_blk_desc(fd, &snap->blk_desc[i]); } - -void -read_cache_entry(int fd, struct cache_entry *cache_entry) -{ - uint8_t buf[CACHE_ENTRY_SIZE]; - char fmt[BUFSIZ]; - int n; - - if (xread(fd, buf, sizeof(buf)) == 0) - errx(1, "read_blk_desc: unexpected EOF"); - - snprintf(fmt, sizeof(fmt), "'%dqq", MDSIZE); - n = unpack(buf, fmt, - cache_entry->md, - &cache_entry->offset, - &cache_entry->size); - - assert(n == CACHE_ENTRY_SIZE); -} - -void -write_cache_entry(int fd, struct cache_entry *cache_entry) -{ - uint8_t buf[CACHE_ENTRY_SIZE]; - char fmt[BUFSIZ]; - int n; - - snprintf(fmt, sizeof(fmt), "'%dqq", MDSIZE); - n = pack(buf, fmt, - cache_entry->md, - cache_entry->offset, - cache_entry->size); - - assert(n == CACHE_ENTRY_SIZE); - xwrite(fd, buf, n); -}