dedup

data deduplication program
git clone git://git.2f30.org/dedup.git
Log | Files | Refs | README | LICENSE

commit 75462091afc92997fb825e90d160bf5084b1242e
parent 07c582217aa242d97771e96520280479a4a6955c
Author: sin <sin@2f30.org>
Date:   Thu, 18 Apr 2019 12:54:49 +0100

Rename d* to dup-*

Diffstat:
MMakefile | 50+++++++++++++++++++++++++-------------------------
MREADME | 10+++++-----
Ddcheck.1 | 25-------------------------
Ddinfo.1 | 37-------------------------------------
Ddinit.1 | 39---------------------------------------
Ddlist.1 | 25-------------------------
Ddpack.1 | 35-----------------------------------
Ddunpack.1 | 28----------------------------
Adup-check.1 | 25+++++++++++++++++++++++++
Rdcheck.c -> dup-check.c | 0
Adup-info.1 | 37+++++++++++++++++++++++++++++++++++++
Rdinfo.c -> dup-info.c | 0
Adup-init.1 | 39+++++++++++++++++++++++++++++++++++++++
Rdinit.c -> dup-init.c | 0
Adup-list.1 | 25+++++++++++++++++++++++++
Rdlist.c -> dup-list.c | 0
Adup-pack.1 | 35+++++++++++++++++++++++++++++++++++
Rdpack.c -> dup-pack.c | 0
Adup-unpack.1 | 28++++++++++++++++++++++++++++
Rdunpack.c -> dup-unpack.c | 0
20 files changed, 219 insertions(+), 219 deletions(-)

diff --git a/Makefile b/Makefile @@ -3,15 +3,15 @@ include config.mk VERSION = 1.0 PREFIX = /usr/local MANPREFIX = $(PREFIX)/man -TARGET = dcheck dinfo dinit dlist dpack dunpack +TARGET = dup-check dup-info dup-init dup-list dup-pack dup-unpack MAN = \ - dcheck.1 \ - dinfo.1 \ - dinit.1 \ - dlist.1 \ - dpack.1 \ - dunpack.1 \ + dup-check.1 \ + dup-info.1 \ + dup-init.1 \ + dup-list.1 \ + dup-pack.1 \ + dup-unpack.1 \ HDR = \ arg.h \ @@ -32,12 +32,12 @@ SRC = \ compress-none.c \ compress-snappy.c \ compress.c \ - dcheck.c \ - dinfo.c \ - dinit.c \ - dlist.c \ - dpack.c \ - dunpack.c \ + dup-check.c \ + dup-info.c \ + dup-init.c \ + dup-list.c \ + dup-pack.c \ + dup-unpack.c \ hash-blake2b.c \ hash-blake2bp.c \ hash-blake2s.c \ @@ -70,12 +70,12 @@ COMMOBJ = \ unpack.o \ utils.o \ -DCHECKOBJ = $(COMMOBJ) dcheck.o -DINFOOBJ = $(COMMOBJ) dinfo.o -DINITOBJ = $(COMMOBJ) dinit.o -DLISTOBJ = $(COMMOBJ) dlist.o -DPACKOBJ = $(COMMOBJ) dpack.o -DUNPACKOBJ = $(COMMOBJ) dunpack.o +DCHECKOBJ = $(COMMOBJ) dup-check.o +DINFOOBJ = $(COMMOBJ) dup-info.o +DINITOBJ = $(COMMOBJ) dup-init.o +DLISTOBJ = $(COMMOBJ) dup-list.o +DPACKOBJ = $(COMMOBJ) dup-pack.o +DUNPACKOBJ = $(COMMOBJ) dup-unpack.o DISTFILES = \ $(MAN) \ @@ -122,20 +122,20 @@ dist: .c.o: $(CC) $(CPPFLAGS) $(CFLAGS) -c $< -dcheck: $(DCHECKOBJ) +dup-check: $(DCHECKOBJ) $(CC) -o $@ $(DCHECKOBJ) $(LDFLAGS) $(LDLIBS) -dinfo: $(DINFOOBJ) +dup-info: $(DINFOOBJ) $(CC) -o $@ $(DINFOOBJ) $(LDFLAGS) $(LDLIBS) -dinit: $(DINITOBJ) +dup-init: $(DINITOBJ) $(CC) -o $@ $(DINITOBJ) $(LDFLAGS) $(LDLIBS) -dlist: $(DLISTOBJ) +dup-list: $(DLISTOBJ) $(CC) -o $@ $(DLISTOBJ) $(LDFLAGS) $(LDLIBS) -dpack: $(DPACKOBJ) +dup-pack: $(DPACKOBJ) $(CC) -o $@ $(DPACKOBJ) $(LDFLAGS) $(LDLIBS) -dunpack: $(DUNPACKOBJ) +dup-unpack: $(DUNPACKOBJ) $(CC) -o $@ $(DUNPACKOBJ) $(LDFLAGS) $(LDLIBS) diff --git a/README b/README @@ -8,30 +8,30 @@ Getting started To use dedup you have to first initialize the repository. - dinit repo + dup-init repo This will create .{snapshots,store} files in the repo directory. The store file contains all the unique blocks. The snapshots file contains all the revisions of files that have been deduplicated. dedup only handles a single file at a time, so using tar is advised. -For example, to dedup a directory tree you can invoke dpack as +For example, to dedup a directory tree you can invoke dup-pack(1) as follows: - tar -c ~/dir | dpack -m "$(date)" repo + tar -c ~/dir | dup-pack -m "$(date)" repo The -m flag is used to attach an arbitrary message to the snapshot. To list all known revisions run: - dlist repo + dup-list repo You will get a list of hashes. Each hash corresponds to a single file (in this case, a tar archive). To extract a file from the deduplicated store run: - dunpack <hash> repo > snapshot.tar + dup-unpack <hash> repo > snapshot.tar Portability =========== diff --git a/dcheck.1 b/dcheck.1 @@ -1,25 +0,0 @@ -.Dd April 17, 2019 -.Dt DCHECK 1 -.Os -.Sh NAME -.Nm dcheck -.Nd Perform consistency checks on a dedup repo -.Sh SYNOPSIS -.Nm dcheck -.Op Fl v -.Op repo -.Sh DESCRIPTION -.Nm -performs consistency checks on a dedup repo. -If no -.Ar repo -is specified, then the current directory -is assumed to be the repository. -.Sh OPTIONS -.Bl -tag -width "-v" -.It Fl v -Enable verbose mode. -.El -.Sh AUTHORS -.An Dimitris Papastamos Aq Mt sin@2f30.org , -.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dinfo.1 b/dinfo.1 @@ -1,37 +0,0 @@ -.Dd April 18, 2019 -.Dt DINFO 1 -.Os -.Sh NAME -.Nm dinfo -.Nd Print information about a dedup repository -.Sh SYNOPSIS -.Nm dinfo -.Op Fl tv -.Op repo -.Sh DESCRIPTION -.Nm -prints information about a dedup repository. -If no -.Ar repo -is specified, then the current directory -is assumed to be the repository. -.Sh OPTIONS -.Bl -tag -width "-v" -.It Fl t -Enable terse mode. -The output fields are as follows: -.br -[original dataset size] -[compressed dataset size] -[deduplicated dataset size] -[deduplication ratio] -[min block size] -[average block size] -[max block size] -[number of unique blocks] -.It Fl v -Enable verbose mode. -.El -.Sh AUTHORS -.An Dimitris Papastamos Aq Mt sin@2f30.org , -.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dinit.1 b/dinit.1 @@ -1,39 +0,0 @@ -.Dd April 17, 2019 -.Dt DINIT 1 -.Os -.Sh NAME -.Nm dinit -.Nd Initialize a dedup repository -.Sh SYNOPSIS -.Nm dinit -.Op Fl v -.Op Fl H Ar hash -.Op Fl Z Ar compressor -.Op repo -.Sh DESCRIPTION -.Nm -initializes a dedup repository. -If no -.Ar repo -is specified, then the current directory -is assumed to be the repository. -.Sh OPTIONS -.Bl -tag -width "-Z compressor" -.It Fl v -Enable verbose mode. -.It Fl H Ar hash -The cryptographic hash function used to identify -unique blocks in the store. -The supported hash functions are blake2b, blake2bp, blake2s and blake2sp. -This flag only has an effect when initializing the repository. -By default blake2b is used. -.It Fl Z Ar compressor -The compressor function used to compress the blocks -in the store. -The supported compressor functions are none, lz4 and snappy. -This flag only has an effect when initializing the repository. -By default lz4 is used. -.El -.Sh AUTHORS -.An Dimitris Papastamos Aq Mt sin@2f30.org , -.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dlist.1 b/dlist.1 @@ -1,25 +0,0 @@ -.Dd April 17, 2019 -.Dt DLIST 1 -.Os -.Sh NAME -.Nm dlist -.Nd List snapshots from a dedup repository -.Sh SYNOPSIS -.Nm dlist -.Op Fl v -.Op repo -.Sh DESCRIPTION -.Nm -lists snapshots from a dedup repository. -If no -.Ar repo -is specified, then the current directory -is assumed to be the repository. -.Sh OPTIONS -.Bl -tag -width "-v" -.It Fl v -Enable verbose mode. -.El -.Sh AUTHORS -.An Dimitris Papastamos Aq Mt sin@2f30.org , -.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dpack.1 b/dpack.1 @@ -1,35 +0,0 @@ -.Dd April 18, 2019 -.Dt DPACK 1 -.Os -.Sh NAME -.Nm dpack -.Nd Deduplicate data from stdin -.Sh SYNOPSIS -.Nm dpack -.Op Fl v -.Op Fl m Ar message -.Op repo -.Sh DESCRIPTION -.Nm -deduplicates data from stdin. -If no -.Ar repo -is specified, then the current directory -is assumed to be the repository. -.Pp -.Nm -does not track any file metadata so to deduplicate -directory trees, an archival tool like -.Xr tar 1 -should be used and piped into -.Nm . -.Sh OPTIONS -.Bl -tag -width "-m message" -.It Fl v -Enable verbose mode. -.It Fl m Ar message -Attach a descriptive message to the snapshot. -.El -.Sh AUTHORS -.An Dimitris Papastamos Aq Mt sin@2f30.org , -.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dunpack.1 b/dunpack.1 @@ -1,28 +0,0 @@ -.Dd April 17, 2019 -.Dt DUNPACK 1 -.Os -.Sh NAME -.Nm dunpack -.Nd Extract snapshot from a dedup repository -.Sh SYNOPSIS -.Nm dunpack -.Op Fl v -.Ar id -.Op repo -.Sh DESCRIPTION -.Nm -extracts the snapshot specified by -.Ar id -from the dedup repository and writes the data to stdout. -If no -.Ar repo -is specified, then the current directory -is assumed to be the repository. -.Sh OPTIONS -.Bl -tag -width "-v" -.It Fl v -Enable verbose mode. -.El -.Sh AUTHORS -.An Dimitris Papastamos Aq Mt sin@2f30.org , -.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dup-check.1 b/dup-check.1 @@ -0,0 +1,25 @@ +.Dd April 18, 2019 +.Dt DUP-CHECK 1 +.Os +.Sh NAME +.Nm dup-check +.Nd Perform consistency checks on a dedup repo +.Sh SYNOPSIS +.Nm dup-check +.Op Fl v +.Op repo +.Sh DESCRIPTION +.Nm +performs consistency checks on a dedup repo. +If no +.Ar repo +is specified, then the current directory +is assumed to be the repository. +.Sh OPTIONS +.Bl -tag -width "-v" +.It Fl v +Enable verbose mode. +.El +.Sh AUTHORS +.An Dimitris Papastamos Aq Mt sin@2f30.org , +.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dcheck.c b/dup-check.c diff --git a/dup-info.1 b/dup-info.1 @@ -0,0 +1,37 @@ +.Dd April 18, 2019 +.Dt DUP-INFO 1 +.Os +.Sh NAME +.Nm dup-info +.Nd Print information about a dedup repository +.Sh SYNOPSIS +.Nm dup-info +.Op Fl tv +.Op repo +.Sh DESCRIPTION +.Nm +prints information about a dedup repository. +If no +.Ar repo +is specified, then the current directory +is assumed to be the repository. +.Sh OPTIONS +.Bl -tag -width "-v" +.It Fl t +Enable terse mode. +The output fields are as follows: +.br +[original dataset size] +[compressed dataset size] +[deduplicated dataset size] +[deduplication ratio] +[min block size] +[average block size] +[max block size] +[number of unique blocks] +.It Fl v +Enable verbose mode. +.El +.Sh AUTHORS +.An Dimitris Papastamos Aq Mt sin@2f30.org , +.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dinfo.c b/dup-info.c diff --git a/dup-init.1 b/dup-init.1 @@ -0,0 +1,39 @@ +.Dd April 18, 2019 +.Dt DUP-INIT 1 +.Os +.Sh NAME +.Nm dup-init +.Nd Initialize a dedup repository +.Sh SYNOPSIS +.Nm dup-init +.Op Fl v +.Op Fl H Ar hash +.Op Fl Z Ar compressor +.Op repo +.Sh DESCRIPTION +.Nm +initializes a dedup repository. +If no +.Ar repo +is specified, then the current directory +is assumed to be the repository. +.Sh OPTIONS +.Bl -tag -width "-Z compressor" +.It Fl v +Enable verbose mode. +.It Fl H Ar hash +The cryptographic hash function used to identify +unique blocks in the store. +The supported hash functions are blake2b, blake2bp, blake2s and blake2sp. +This flag only has an effect when initializing the repository. +By default blake2b is used. +.It Fl Z Ar compressor +The compressor function used to compress the blocks +in the store. +The supported compressor functions are none, lz4 and snappy. +This flag only has an effect when initializing the repository. +By default lz4 is used. +.El +.Sh AUTHORS +.An Dimitris Papastamos Aq Mt sin@2f30.org , +.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dinit.c b/dup-init.c diff --git a/dup-list.1 b/dup-list.1 @@ -0,0 +1,25 @@ +.Dd April 18, 2019 +.Dt DUP-LIST 1 +.Os +.Sh NAME +.Nm dup-list +.Nd List snapshots from a dedup repository +.Sh SYNOPSIS +.Nm dup-list +.Op Fl v +.Op repo +.Sh DESCRIPTION +.Nm +lists snapshots from a dedup repository. +If no +.Ar repo +is specified, then the current directory +is assumed to be the repository. +.Sh OPTIONS +.Bl -tag -width "-v" +.It Fl v +Enable verbose mode. +.El +.Sh AUTHORS +.An Dimitris Papastamos Aq Mt sin@2f30.org , +.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dlist.c b/dup-list.c diff --git a/dup-pack.1 b/dup-pack.1 @@ -0,0 +1,35 @@ +.Dd April 18, 2019 +.Dt DUP-PACK 1 +.Os +.Sh NAME +.Nm dup-pack +.Nd Deduplicate data from stdin +.Sh SYNOPSIS +.Nm dup-pack +.Op Fl v +.Op Fl m Ar message +.Op repo +.Sh DESCRIPTION +.Nm +deduplicates data from stdin. +If no +.Ar repo +is specified, then the current directory +is assumed to be the repository. +.Pp +.Nm +does not track any file metadata so to deduplicate +directory trees, an archival tool like +.Xr tar 1 +should be used and piped into +.Nm . +.Sh OPTIONS +.Bl -tag -width "-m message" +.It Fl v +Enable verbose mode. +.It Fl m Ar message +Attach a descriptive message to the snapshot. +.El +.Sh AUTHORS +.An Dimitris Papastamos Aq Mt sin@2f30.org , +.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dpack.c b/dup-pack.c diff --git a/dup-unpack.1 b/dup-unpack.1 @@ -0,0 +1,28 @@ +.Dd April 18, 2019 +.Dt DUP-UNPACK 1 +.Os +.Sh NAME +.Nm dup-unpack +.Nd Extract snapshot from a dedup repository +.Sh SYNOPSIS +.Nm dup-unpack +.Op Fl v +.Ar id +.Op repo +.Sh DESCRIPTION +.Nm +extracts the snapshot specified by +.Ar id +from the dedup repository and writes the data to stdout. +If no +.Ar repo +is specified, then the current directory +is assumed to be the repository. +.Sh OPTIONS +.Bl -tag -width "-v" +.It Fl v +Enable verbose mode. +.El +.Sh AUTHORS +.An Dimitris Papastamos Aq Mt sin@2f30.org , +.An z3bra Aq Mt contactatz3bradotorg . diff --git a/dunpack.c b/dup-unpack.c