dedup

deduplicating backup program
git clone git://git.2f30.org/dedup
Log | Files | Refs | README | LICENSE

commit 9f54a917ef6c2f8f850684bed77fa708a062ca33
parent 1ed8f937d82f094ae3e4db7511256dee7d5d471a
Author: sin <sin@2f30.org>
Date:   Sun,  5 May 2019 11:58:12 +0100

Add lz4 support

Diffstat:
MMakefile | 2+-
MREADME | 1+
MTODO | 1-
Mbcompress.c | 33++++++++++++++++++++++++++++++++-
Mbstorage.c | 6++++++
Mdup-init.1 | 4++--
Atest007 | 18++++++++++++++++++
7 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile @@ -34,7 +34,7 @@ DPACKOBJ = $(COMMOBJ) dup-pack.o DRMOBJ = $(COMMOBJ) dup-rm.o DUNPACKOBJ = $(COMMOBJ) dup-unpack.o -LDLIBS = -lsnappy -lsodium +LDLIBS = -lsnappy -llz4 -lsodium all: $(BIN) diff --git a/README b/README @@ -58,6 +58,7 @@ Dependencies ============ - snappy + - liblz4 - libsodium Contact diff --git a/TODO b/TODO @@ -1,6 +1,5 @@ Use a ring buffer in the chunker (avoid memmove() call) pledge/unveil support Use flock() to avoid corruption -Revert lz4 support Ability to fetch key from dedicated fd so it never touches disk Use pread/pwrite to avoid lseek() calls diff --git a/bcompress.c b/bcompress.c @@ -12,12 +12,14 @@ #include <strings.h> #include <unistd.h> +#include <lz4.h> #include <snappy-c.h> #include "block.h" #define CDNONETYPE 0x200 #define CDSNAPPYTYPE 0x201 +#define CDLZ4TYPE 0x202 #define CDSIZE (8 + 8) extern int pack(unsigned char *, char *, ...); @@ -98,6 +100,8 @@ bccreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar) type = CDNONETYPE; } else if (strcasecmp(bpar->calgo, "snappy") == 0) { type = CDSNAPPYTYPE; + } else if (strcasecmp(bpar->calgo, "lz4") == 0) { + type = CDLZ4TYPE; } else { bseterr("invalid compression type: %s", bpar->calgo); return -1; @@ -142,6 +146,8 @@ bcopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar) cctx->type = CDNONETYPE; } else if (strcasecmp(bpar->calgo, "snappy") == 0) { cctx->type = CDSNAPPYTYPE; + } else if (strcasecmp(bpar->calgo, "lz4") == 0) { + cctx->type = CDLZ4TYPE; } else { bops->close(bctx); free(cctx); @@ -166,6 +172,8 @@ bcput(struct bctx *bctx, void *buf, size_t n, unsigned char *md) cn = n; } else if (cctx->type == CDSNAPPYTYPE) { cn = snappy_max_compressed_length(n); + } else if (cctx->type == CDLZ4TYPE) { + cn = LZ4_compressBound(n); } else { bseterr("invalid compression type: %d", cctx->type); return -1; @@ -187,6 +195,16 @@ bcput(struct bctx *bctx, void *buf, size_t n, unsigned char *md) bseterr("snappy_compress: failed"); return -1; } + } else if (cctx->type == CDLZ4TYPE) { + int ret; + + ret = LZ4_compress_default(buf, &cbuf[CDSIZE], n, cn); + if (ret < 0) { + free(cbuf); + bseterr("LZ4_compress_default: failed"); + return -1; + } + cn = ret; } else { free(cbuf); bseterr("invalid compression type: %d", cctx->type); @@ -218,7 +236,10 @@ bcget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n) /* Calculate maximum compressed block size */ size = *n; - cn = snappy_max_compressed_length(size); + cn = snappy_max_compressed_length(*n); + if (cn > size) + size = cn; + cn = LZ4_compressBound(*n); if (cn > size) size = cn; size += CDSIZE; @@ -269,6 +290,16 @@ bcget(struct bctx *bctx, unsigned char *md, void *buf, size_t *n) bseterr("snappy_uncompress: failed"); return -1; } + } else if (cd.type == CDLZ4TYPE) { + int ret; + + ret = LZ4_decompress_safe(&cbuf[CDSIZE], buf, cd.size, *n); + if (ret < 0) { + free(cbuf); + bseterr("LZ4_decompress_safe: failed"); + return -1; + } + un = ret; } else { free(cbuf); bseterr("invalid compression type: %d", cd.type); diff --git a/bstorage.c b/bstorage.c @@ -45,6 +45,7 @@ #define CALGOMASK 0x7 #define CNONETYPE 0 #define CSNAPPYTYPE 1 +#define CLZ4TYPE 2 #define BHDRSIZE (NBHDRMAGIC + 8 + 8) /* block descriptor constants */ @@ -339,6 +340,8 @@ bscreat(struct bctx *bctx, char *path, int mode, struct bparam *bpar) bhdr->flags |= CNONETYPE << CALGOSHIFT; } else if (strcasecmp(bpar->calgo, "snappy") == 0) { bhdr->flags |= CSNAPPYTYPE << CALGOSHIFT; + } else if (strcasecmp(bpar->calgo, "lz4") == 0) { + bhdr->flags |= CLZ4TYPE << CALGOSHIFT; } else { free(sctx); close(fd); @@ -445,6 +448,9 @@ bsopen(struct bctx *bctx, char *path, int flags, int mode, struct bparam *bpar) case CSNAPPYTYPE: bpar->calgo = "snappy"; break; + case CLZ4TYPE: + bpar->calgo = "lz4"; + break; default: free(sctx); close(fd); diff --git a/dup-init.1 b/dup-init.1 @@ -1,4 +1,4 @@ -.Dd May 2, 2019 +.Dd May 5, 2019 .Dt DUP-INIT 1 .Os .Sh NAME @@ -27,7 +27,7 @@ By default none is used. .It Fl Z Ar algo The compressor algorithm used to compress the blocks in the store. -The supported compressor algorithms are none and snappy. +The supported compressor algorithms are none and snappy and lz4. By default snappy is used. .It Fl k Ar keyfile Path to encryption key. diff --git a/test007 b/test007 @@ -0,0 +1,18 @@ +#!/bin/sh +set -ex + +SHA1SUM=${SHA1SUM:-sha1sum} + +repo=`mktemp -u` +data=`mktemp` +dd if=/dev/urandom of="$data" bs=1M count=64 +./dup-init -Z lz4 "$repo" +./dup-pack -r "$repo" snap0 < "$data" +./dup-pack -r "$repo" snap1 < "$data" +du -sh "$repo" +sum0=`$SHA1SUM < "$data" | awk '{print $1}'` +sum1=`./dup-unpack -r "$repo" snap0 | $SHA1SUM | awk '{print $1}'` +sum2=`./dup-unpack -r "$repo" snap1 | $SHA1SUM | awk '{print $1}'` +[ "$sum0" = "$sum1" ] +[ "$sum0" = "$sum2" ] +rm -rf "$repo" "$data"