blob: 4c2336974ed610ab95cecb5e571b29e1366f733d [file] [log] [blame]
//-----------------------------------------------------------------------------
// Flipping a single bit of a key should cause an "avalanche" of changes in
// the hash function's output. Ideally, each output bits should flip 50% of
// the time - if the probability of an output bit flipping is not 50%, that bit
// is "biased". Too much bias means that patterns applied to the input will
// cause "echoes" of the patterns in the output, which in turn can cause the
// hash function to fail to create an even, random distribution of hash values.
#pragma once
#include "Types.h"
#include "Random.h"
#include <vector>
#include <stdio.h>
#include <math.h>
// Avalanche fails if a bit is biased by more than 1%
#define AVALANCHE_FAIL 0.01
double maxBias ( std::vector<int> & counts, int reps );
//-----------------------------------------------------------------------------
template < typename keytype, typename hashtype >
void calcBias ( pfHash hash, std::vector<int> & counts, int reps, Rand & r )
{
const int keybytes = sizeof(keytype);
const int hashbytes = sizeof(hashtype);
const int keybits = keybytes * 8;
const int hashbits = hashbytes * 8;
keytype K;
hashtype A,B;
for(int irep = 0; irep < reps; irep++)
{
if(irep % (reps/10) == 0) printf(".");
r.rand_p(&K,keybytes);
hash(&K,keybytes,0,&A);
int * cursor = &counts[0];
for(int iBit = 0; iBit < keybits; iBit++)
{
flipbit(&K,keybytes,iBit);
hash(&K,keybytes,0,&B);
flipbit(&K,keybytes,iBit);
for(int iOut = 0; iOut < hashbits; iOut++)
{
int bitA = getbit(&A,hashbytes,iOut);
int bitB = getbit(&B,hashbytes,iOut);
(*cursor++) += (bitA ^ bitB);
}
}
}
}
//-----------------------------------------------------------------------------
template < typename keytype, typename hashtype >
bool AvalancheTest ( pfHash hash, const int reps )
{
Rand r(48273);
const int keybytes = sizeof(keytype);
const int hashbytes = sizeof(hashtype);
const int keybits = keybytes * 8;
const int hashbits = hashbytes * 8;
printf("Testing %3d-bit keys -> %3d-bit hashes, %8d reps",keybits,hashbits,reps);
//----------
std::vector<int> bins(keybits*hashbits,0);
calcBias<keytype,hashtype>(hash,bins,reps,r);
//----------
bool result = true;
double b = maxBias(bins,reps);
printf(" worst bias is %f%%",b * 100.0);
if(b > AVALANCHE_FAIL)
{
printf(" !!!!! ");
result = false;
}
printf("\n");
return result;
}
//----------------------------------------------------------------------------
// Tests the Bit Independence Criteron. Stricter than Avalanche, but slow and
// not really all that useful.
template< typename keytype, typename hashtype >
void BicTest ( pfHash hash, const int keybit, const int reps, double & maxBias, int & maxA, int & maxB, bool verbose )
{
Rand r(11938);
const int keybytes = sizeof(keytype);
const int hashbytes = sizeof(hashtype);
const int hashbits = hashbytes * 8;
std::vector<int> bins(hashbits*hashbits*4,0);
keytype key;
hashtype h1,h2;
for(int irep = 0; irep < reps; irep++)
{
if(verbose)
{
if(irep % (reps/10) == 0) printf(".");
}
r.rand_p(&key,keybytes);
hash(&key,keybytes,0,&h1);
flipbit(key,keybit);
hash(&key,keybytes,0,&h2);
hashtype d = h1 ^ h2;
for(int out1 = 0; out1 < hashbits; out1++)
for(int out2 = 0; out2 < hashbits; out2++)
{
if(out1 == out2) continue;
uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
bins[(out1 * hashbits + out2) * 4 + b]++;
}
}
if(verbose) printf("\n");
maxBias = 0;
for(int out1 = 0; out1 < hashbits; out1++)
{
for(int out2 = 0; out2 < hashbits; out2++)
{
if(out1 == out2)
{
if(verbose) printf("\\");
continue;
}
double bias = 0;
for(int b = 0; b < 4; b++)
{
double b2 = double(bins[(out1 * hashbits + out2) * 4 + b]) / double(reps / 2);
b2 = fabs(b2 * 2 - 1);
if(b2 > bias) bias = b2;
}
if(bias > maxBias)
{
maxBias = bias;
maxA = out1;
maxB = out2;
}
if(verbose)
{
if (bias < 0.01) printf(".");
else if(bias < 0.05) printf("o");
else if(bias < 0.33) printf("O");
else printf("X");
}
}
if(verbose) printf("\n");
}
}
//----------
template< typename keytype, typename hashtype >
bool BicTest ( pfHash hash, const int reps )
{
const int keybytes = sizeof(keytype);
const int keybits = keybytes * 8;
double maxBias = 0;
int maxK = 0;
int maxA = 0;
int maxB = 0;
for(int i = 0; i < keybits; i++)
{
if(i % (keybits/10) == 0) printf(".");
double bias;
int a,b;
BicTest<keytype,hashtype>(hash,i,reps,bias,a,b,true);
if(bias > maxBias)
{
maxBias = bias;
maxK = i;
maxA = a;
maxB = b;
}
}
printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
// Bit independence is harder to pass than avalanche, so we're a bit more lax here.
bool result = (maxBias < 0.05);
return result;
}
//-----------------------------------------------------------------------------
// BIC test variant - store all intermediate data in a table, draw diagram
// afterwards (much faster)
template< typename keytype, typename hashtype >
void BicTest3 ( pfHash hash, const int reps, bool verbose = true )
{
const int keybytes = sizeof(keytype);
const int keybits = keybytes * 8;
const int hashbytes = sizeof(hashtype);
const int hashbits = hashbytes * 8;
const int pagesize = hashbits*hashbits*4;
Rand r(11938);
double maxBias = 0;
int maxK = 0;
int maxA = 0;
int maxB = 0;
keytype key;
hashtype h1,h2;
std::vector<int> bins(keybits*pagesize,0);
for(int keybit = 0; keybit < keybits; keybit++)
{
if(keybit % (keybits/10) == 0) printf(".");
int * page = &bins[keybit*pagesize];
for(int irep = 0; irep < reps; irep++)
{
r.rand_p(&key,keybytes);
hash(&key,keybytes,0,&h1);
flipbit(key,keybit);
hash(&key,keybytes,0,&h2);
hashtype d = h1 ^ h2;
for(int out1 = 0; out1 < hashbits-1; out1++)
for(int out2 = out1+1; out2 < hashbits; out2++)
{
int * b = &page[(out1*hashbits+out2)*4];
uint32_t x = getbit(d,out1) | (getbit(d,out2) << 1);
b[x]++;
}
}
}
printf("\n");
for(int out1 = 0; out1 < hashbits-1; out1++)
{
for(int out2 = out1+1; out2 < hashbits; out2++)
{
if(verbose) printf("(%3d,%3d) - ",out1,out2);
for(int keybit = 0; keybit < keybits; keybit++)
{
int * page = &bins[keybit*pagesize];
int * bins = &page[(out1*hashbits+out2)*4];
double bias = 0;
for(int b = 0; b < 4; b++)
{
double b2 = double(bins[b]) / double(reps / 2);
b2 = fabs(b2 * 2 - 1);
if(b2 > bias) bias = b2;
}
if(bias > maxBias)
{
maxBias = bias;
maxK = keybit;
maxA = out1;
maxB = out2;
}
if(verbose)
{
if (bias < 0.01) printf(".");
else if(bias < 0.05) printf("o");
else if(bias < 0.33) printf("O");
else printf("X");
}
}
// Finished keybit
if(verbose) printf("\n");
}
if(verbose)
{
for(int i = 0; i < keybits+12; i++) printf("-");
printf("\n");
}
}
printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
}
//-----------------------------------------------------------------------------
// BIC test variant - iterate over output bits, then key bits. No temp storage,
// but slooooow
template< typename keytype, typename hashtype >
void BicTest2 ( pfHash hash, const int reps, bool verbose = true )
{
const int keybytes = sizeof(keytype);
const int keybits = keybytes * 8;
const int hashbytes = sizeof(hashtype);
const int hashbits = hashbytes * 8;
Rand r(11938);
double maxBias = 0;
int maxK = 0;
int maxA = 0;
int maxB = 0;
keytype key;
hashtype h1,h2;
for(int out1 = 0; out1 < hashbits-1; out1++)
for(int out2 = out1+1; out2 < hashbits; out2++)
{
if(verbose) printf("(%3d,%3d) - ",out1,out2);
for(int keybit = 0; keybit < keybits; keybit++)
{
int bins[4] = { 0, 0, 0, 0 };
for(int irep = 0; irep < reps; irep++)
{
r.rand_p(&key,keybytes);
hash(&key,keybytes,0,&h1);
flipbit(key,keybit);
hash(&key,keybytes,0,&h2);
hashtype d = h1 ^ h2;
uint32_t b = getbit(d,out1) | (getbit(d,out2) << 1);
bins[b]++;
}
double bias = 0;
for(int b = 0; b < 4; b++)
{
double b2 = double(bins[b]) / double(reps / 2);
b2 = fabs(b2 * 2 - 1);
if(b2 > bias) bias = b2;
}
if(bias > maxBias)
{
maxBias = bias;
maxK = keybit;
maxA = out1;
maxB = out2;
}
if(verbose)
{
if (bias < 0.05) printf(".");
else if(bias < 0.10) printf("o");
else if(bias < 0.50) printf("O");
else printf("X");
}
}
// Finished keybit
if(verbose) printf("\n");
}
printf("Max bias %f - (%3d : %3d,%3d)\n",maxBias,maxK,maxA,maxB);
}
//-----------------------------------------------------------------------------