commit 99c3b317f9e0558d624235439a4162306f8e549d
parent b1fd149a54efd7fe0d3bf4dcd5b3c48467d4434f
Author: sin <sin@2f30.org>
Date: Tue, 19 Feb 2019 10:17:52 +0000
Rework stats
Diffstat:
M | dedup.c | | | 71 | +++++++++++++++++++++++++++++++++++++---------------------------------- |
1 file changed, 37 insertions(+), 34 deletions(-)
diff --git a/dedup.c b/dedup.c
@@ -25,7 +25,7 @@
#define MDSIZE SHA256_DIGEST_LENGTH
/* file format version */
-#define VER_MIN 0
+#define VER_MIN 1
#define VER_MAJ 0
#define ROTL(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
@@ -39,8 +39,12 @@ struct stats {
uint64_t orig_size;
uint64_t comp_size;
uint64_t dedup_size;
+ uint64_t min_blk_size;
+ uint64_t max_blk_size;
+ uint64_t nblks;
uint64_t cache_hits;
uint64_t cache_misses;
+ uint64_t reserved[4];
};
/* index file header */
@@ -48,6 +52,7 @@ struct enthdr {
uint64_t flags;
uint64_t nents;
uint64_t store_size;
+ uint64_t reserved[4];
struct stats st;
};
@@ -208,7 +213,7 @@ decomp(uint8_t *in, uint8_t *out, size_t insize, size_t outsize)
}
void
-print_md(const uint8_t *md, size_t size)
+print_md(uint8_t *md, size_t size)
{
size_t i;
@@ -217,6 +222,27 @@ print_md(const uint8_t *md, size_t size)
}
void
+print_stats(struct stats *st)
+{
+ fprintf(stderr, "original size: %llu bytes\n",
+ (unsigned long long)st->orig_size);
+ fprintf(stderr, "compressed size: %llu bytes\n",
+ (unsigned long long)st->comp_size);
+ fprintf(stderr, "deduplicated size: %llu bytes\n",
+ (unsigned long long)st->dedup_size);
+ fprintf(stderr, "min/avg/max block size: %llu/%llu/%llu\n",
+ (unsigned long long)st->min_blk_size,
+ (unsigned long long)st->dedup_size / st->nblks,
+ (unsigned long long)st->max_blk_size);
+ fprintf(stderr, "number of blocks: %llu\n",
+ (unsigned long long)st->nblks);
+ fprintf(stderr, "total cache hits: %llu\n",
+ (unsigned long long)st->cache_hits);
+ fprintf(stderr, "total cache misses: %llu\n",
+ (unsigned long long)st->cache_misses);
+}
+
+void
str2bin(char *s, uint8_t *d)
{
size_t i, size = strlen(s) / 2;
@@ -488,7 +514,13 @@ dedup(int fd, char *msg)
cache_dirty = 1;
enthdr.st.dedup_size += bdescr.size;
+ enthdr.st.nblks++;
enthdr.st.cache_misses++;
+
+ if (bdescr.size > enthdr.st.max_blk_size)
+ enthdr.st.max_blk_size = bdescr.size;
+ if (bdescr.size < enthdr.st.min_blk_size)
+ enthdr.st.min_blk_size = bdescr.size;
} else {
ent->bdescr[ent->nblks++] = bdescr;
enthdr.st.cache_hits++;
@@ -651,11 +683,6 @@ void
init_cache(void)
{
uint64_t nents, i;
- uint64_t min, max, avg;
-
- min = comp_size(BLKSIZE);
- max = 0;
- avg = 0;
nents = cache_nents();
xlseek(cfd, 0, SEEK_SET);
@@ -666,20 +693,6 @@ init_cache(void)
if (xread(cfd, ¢->bdescr, sizeof(cent->bdescr)) == 0)
errx(1, "read: unexpected EOF");
add_cent(cent);
-
- if (cent->bdescr.size > max)
- max = cent->bdescr.size;
- if (cent->bdescr.size < min)
- min = cent->bdescr.size;
- avg += cent->bdescr.size;
- }
- avg /= nents;
-
- if (verbose) {
- fprintf(stderr, "min/avg/max block size: %llu/%llu/%llu\n",
- (unsigned long long)min,
- (unsigned long long)avg,
- (unsigned long long)max);
}
}
@@ -720,19 +733,7 @@ init(void)
} else {
enthdr.flags = (VER_MAJ << 8) | VER_MIN;
xwrite(ifd, &enthdr, sizeof(enthdr));
- }
-
- if (verbose) {
- fprintf(stderr, "original size: %llu bytes\n",
- (unsigned long long)enthdr.st.orig_size);
- fprintf(stderr, "compressed size: %llu bytes\n",
- (unsigned long long)enthdr.st.comp_size);
- fprintf(stderr, "deduplicated size: %llu bytes\n",
- (unsigned long long)enthdr.st.dedup_size);
- fprintf(stderr, "total cache hits: %llu\n",
- (unsigned long long)enthdr.st.cache_hits);
- fprintf(stderr, "total cache misses: %llu\n",
- (unsigned long long)enthdr.st.cache_misses);
+ enthdr.st.min_blk_size = comp_size(BLKSIZE);
}
if (cache_nents() != 0)
@@ -744,6 +745,8 @@ init(void)
void
term(void)
{
+ if (verbose)
+ print_stats(&enthdr.st);
flush_cache();
free_cache();