dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit 514871477eab5d433ff6e942f1438d321e2e2dd0
parent 346aca7c9992b325ea9683adf7327c55613a1c54
Author: sin <sin@2f30.org>
Date:   Sat,  2 Mar 2019 12:30:15 +0000

Implement repo init operation

Diffstat:
MREADME | 11+++++------
Mdedup.1 | 6++++--
Mdedup.c | 81+++++++++++++++++++++++++++++++++++++++++++++----------------------------------
3 files changed, 55 insertions(+), 43 deletions(-)

diff --git a/README b/README @@ -1,5 +1,9 @@ dedup is a simple data deduplication program. +To use dedup you have to first initialize the repository. + + dedup -r ~/bak -i + dedup only handles a single file at a time, so using tar is advised. For example, to dedup a tar file you can invoke dedup as follows: @@ -22,9 +26,4 @@ To extract a file from the deduplicated store run: dedup -r ~/bak -e <hash> > dir.tar -Cheers, -sin - -Requirements: -dedup uses LZ4 compression algorithm. -You'll need a library for handling this, for example http://www.lz4.org +Dedup depends on liblz4 and openssl. diff --git a/dedup.1 b/dedup.1 @@ -1,4 +1,4 @@ -.Dd Feb 22, 2019 +.Dd March 02, 2019 .Dt DEDUP 1 .Os .Sh NAME @@ -6,7 +6,7 @@ .Nd data deduplication program .Sh SYNOPSIS .Nm dedup -.Op Fl clv +.Op Fl cilv .Op Fl e Ar id .Op Fl r Ar root .Op Fl m Ar message @@ -20,6 +20,8 @@ only handles a single file at a time, so using tar is advised. .Bl -tag -width "-r root" .It Fl c Perform a consistency check on the deduplication storage. +.It Fl i +Initialize the repository. .It Fl l List snapshots by hash. .It Fl v diff --git a/dedup.c b/dedup.c @@ -485,19 +485,15 @@ save_cache(void) } static void -load_blk_hdr(void) +init_blk_hdr(void) { - struct stat sb; - - if (fstat(sfd, &sb) < 0) - err(1, "fstat %s", STOREF); - if (sb.st_size == 0) { - blk_hdr.flags = (VER_MAJ << 8) | VER_MIN; - blk_hdr.size = BLK_HDR_LEN; - write_blk_hdr(sfd, &blk_hdr); - return; - } + blk_hdr.flags = (VER_MAJ << 8) | VER_MIN; + blk_hdr.size = BLK_HDR_LEN; +} +static void +load_blk_hdr(void) +{ read_blk_hdr(sfd, &blk_hdr); match_ver(blk_hdr.flags); } @@ -510,20 +506,16 @@ save_blk_hdr(void) } static void -load_snap_hdr(void) +init_snap_hdr(void) { - struct stat sb; - - if (fstat(ifd, &sb) < 0) - err(1, "fstat %s", SNAPSF); - if (sb.st_size == 0) { - snap_hdr.flags = (VER_MAJ << 8) | VER_MIN; - snap_hdr.size = SNAP_HDR_LEN; - snap_hdr.st.min_blk_size = comp_size(BLKSIZE_MAX); - write_snap_hdr(ifd, &snap_hdr); - return; - } + snap_hdr.flags = (VER_MAJ << 8) | VER_MIN; + snap_hdr.size = SNAP_HDR_LEN; + snap_hdr.st.min_blk_size = comp_size(BLKSIZE_MAX); +} +static void +load_snap_hdr(void) +{ read_snap_hdr(ifd, &snap_hdr); match_ver(snap_hdr.flags); } @@ -536,19 +528,25 @@ save_snap_hdr(void) } static void -init(void) +init(int iflag) { + int flags; + cache = alloc_cache(); - ifd = open(SNAPSF, O_RDWR | O_CREAT, 0600); + flags = O_RDWR; + if (iflag) + flags |= O_CREAT | O_EXCL; + + ifd = open(SNAPSF, flags, 0600); if (ifd < 0) err(1, "open %s", SNAPSF); - sfd = open(STOREF, O_RDWR | O_CREAT, 0600); + sfd = open(STOREF, flags, 0600); if (sfd < 0) err(1, "open %s", STOREF); - cfd = open(CACHEF, O_RDWR | O_CREAT, 0600); + cfd = open(CACHEF, flags, 0600); if (cfd < 0) err(1, "open %s", CACHEF); @@ -557,9 +555,14 @@ init(void) flock(cfd, LOCK_NB | LOCK_EX) < 0) errx(1, "busy lock"); - load_snap_hdr(); - load_blk_hdr(); - load_cache(); + if (iflag) { + init_snap_hdr(); + init_blk_hdr(); + } else { + load_snap_hdr(); + load_blk_hdr(); + load_cache(); + } } static void @@ -568,9 +571,9 @@ term(void) if (verbose > 0) print_stats(&snap_hdr.st); - save_snap_hdr(); - save_blk_hdr(); save_cache(); + save_blk_hdr(); + save_snap_hdr(); fsync(ifd); fsync(sfd); @@ -586,7 +589,7 @@ term(void) static void usage(void) { - fprintf(stderr, "usage: %s [-clv] [-e id] [-r root] [-m message] [file]\n", argv0); + fprintf(stderr, "usage: %s [-cilv] [-e id] [-r root] [-m message] [file]\n", argv0); exit(1); } @@ -595,7 +598,7 @@ main(int argc, char *argv[]) { uint8_t md[MDSIZE]; char *id = NULL, *root = NULL, *msg = NULL; - int fd = -1, lflag = 0, cflag = 0; + int fd = -1, iflag = 0, lflag = 0, cflag = 0; ARGBEGIN { case 'c': @@ -604,6 +607,9 @@ main(int argc, char *argv[]) case 'e': id = EARGF(usage()); break; + case 'i': + iflag = 1; + break; case 'l': lflag = 1; break; @@ -645,7 +651,12 @@ main(int argc, char *argv[]) err(1, "chdir: %s", root); } - init(); + init(iflag); + + if (iflag) { + term(); + return 0; + } if (cflag) { xlseek(ifd, SNAP_HDR_LEN, SEEK_SET);