SpeedTest.cpp - external/smhasher - Git at Google

 #include "SpeedTest.h"

 #include "Random.h"

 #include <stdio.h>   // for printf
 #include <memory.h>  // for memset
 #include <math.h>    // for sqrt
 #include <algorithm> // for sort

 //-----------------------------------------------------------------------------
 // We view our timing values as a series of random variables V that has been
 // contaminated with occasional outliers due to cache misses, thread
 // preemption, etcetera. To filter out the outliers, we search for the largest
 // subset of V such that all its values are within three standard deviations
 // of the mean.

 double CalcMean ( std::vector<double> & v )
 {
   double mean = 0;

   for(int i = 0; i < (int)v.size(); i++)
   {
     mean += v[i];
   }

   mean /= double(v.size());

   return mean;
 }

 double CalcMean ( std::vector<double> & v, int a, int b )
 {
   double mean = 0;

   for(int i = a; i <= b; i++)
   {
     mean += v[i];
   }

   mean /= (b-a+1);

   return mean;
 }

 double CalcStdv ( std::vector<double> & v, int a, int b )
 {
   double mean = CalcMean(v,a,b);

   double stdv = 0;

   for(int i = a; i <= b; i++)
   {
     double x = v[i] - mean;

     stdv += x*x;
   }

   stdv = sqrt(stdv / (b-a+1));

   return stdv;
 }

 // Return true if the largest value in v[0,len) is more than three
 // standard deviations from the mean

 bool ContainsOutlier ( std::vector<double> & v, size_t len )
 {
   double mean = 0;

   for(size_t i = 0; i < len; i++)
   {
     mean += v[i];
   }

   mean /= double(len);

   double stdv = 0;

   for(size_t i = 0; i < len; i++)
   {
     double x = v[i] - mean;
     stdv += x*x;
   }

   stdv = sqrt(stdv / double(len));

   double cutoff = mean + stdv*3;

   return v[len-1] > cutoff;
 }

 // Do a binary search to find the largest subset of v that does not contain
 // outliers.

 void FilterOutliers ( std::vector<double> & v )
 {
   std::sort(v.begin(),v.end());

   size_t len = 0;

   for(size_t x = 0x40000000; x; x = x >> 1 )
   {
     if((len | x) >= v.size()) continue;

     if(!ContainsOutlier(v,len | x))
     {
       len |= x;
     }
   }

   v.resize(len);
 }

 // Iteratively tighten the set to find a subset that does not contain
 // outliers. I'm not positive this works correctly in all cases.

 void FilterOutliers2 ( std::vector<double> & v )
 {
   std::sort(v.begin(),v.end());

   int a = 0;
   int b = (int)(v.size() - 1);

   for(int i = 0; i < 10; i++)
   {
     //printf("%d %d\n",a,b);

     double mean = CalcMean(v,a,b);
     double stdv = CalcStdv(v,a,b);

     double cutA = mean - stdv*3;
     double cutB = mean + stdv*3;

     while((a < b) && (v[a] < cutA)) a++;
     while((b > a) && (v[b] > cutB)) b--;
   }

   std::vector<double> v2;

   v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);

   v.swap(v2);
 }

 //-----------------------------------------------------------------------------
 // We really want the rdtsc() calls to bracket the function call as tightly
 // as possible, but that's hard to do portably. We'll try and get as close as
 // possible by marking the function as NEVER_INLINE (to keep the optimizer from
 // moving it) and marking the timing variables as "volatile register".

 NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )
 {
   volatile register int64_t begin,end;

   uint32_t temp[16];

   begin = rdtsc();

   hash(key,len,seed,temp);

   end = rdtsc();

   return end-begin;
 }

 //-----------------------------------------------------------------------------

 double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )
 {
   Rand r(seed);

   uint8_t * buf = new uint8_t[blocksize + 512];

   uint64_t t1 = reinterpret_cast<uint64_t>(buf);

   t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
   t1 += align;

   uint8_t * block = reinterpret_cast<uint8_t*>(t1);

   r.rand_p(block,blocksize);

   //----------

   std::vector<double> times;
   times.reserve(trials);

   for(int itrial = 0; itrial < trials; itrial++)
   {
     r.rand_p(block,blocksize);

     double t = (double)timehash(hash,block,blocksize,itrial);

     if(t > 0) times.push_back(t);
   }

   //----------

   std::sort(times.begin(),times.end());

   FilterOutliers(times);

   delete [] buf;

   return CalcMean(times);
 }

 //-----------------------------------------------------------------------------
 // 256k blocks seem to give the best results.

 void BulkSpeedTest ( pfHash hash, uint32_t seed )
 {
   const int trials = 2999;
   const int blocksize = 256 * 1024;

   printf("Bulk speed test - %d-byte keys\n",blocksize);

   for(int align = 0; align < 8; align++)
   {
     double cycles = SpeedTest(hash,seed,trials,blocksize,align);

     double bestbpc = double(blocksize)/cycles;

     double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
     printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
   }
 }

 //-----------------------------------------------------------------------------

 void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /*outCycles*/ )
 {
   const int trials = 999999;

   if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);

   double cycles = SpeedTest(hash,seed,trials,keysize,0);

   printf("%8.2f cycles/hash\n",cycles);
 }

 //-----------------------------------------------------------------------------
	#include "SpeedTest.h"

	#include "Random.h"

	#include <stdio.h> // for printf
	#include <memory.h> // for memset
	#include <math.h> // for sqrt
	#include <algorithm> // for sort

	//-----------------------------------------------------------------------------
	// We view our timing values as a series of random variables V that has been
	// contaminated with occasional outliers due to cache misses, thread
	// preemption, etcetera. To filter out the outliers, we search for the largest
	// subset of V such that all its values are within three standard deviations
	// of the mean.

	double CalcMean ( std::vector<double> & v )
	{
	double mean = 0;

	for(int i = 0; i < (int)v.size(); i++)
	{
	mean += v[i];
	}

	mean /= double(v.size());

	return mean;
	}

	double CalcMean ( std::vector<double> & v, int a, int b )
	{
	double mean = 0;

	for(int i = a; i <= b; i++)
	{
	mean += v[i];
	}

	mean /= (b-a+1);

	return mean;
	}

	double CalcStdv ( std::vector<double> & v, int a, int b )
	{
	double mean = CalcMean(v,a,b);

	double stdv = 0;

	for(int i = a; i <= b; i++)
	{
	double x = v[i] - mean;

	stdv += x*x;
	}

	stdv = sqrt(stdv / (b-a+1));

	return stdv;
	}

	// Return true if the largest value in v[0,len) is more than three
	// standard deviations from the mean

	bool ContainsOutlier ( std::vector<double> & v, size_t len )
	{
	double mean = 0;

	for(size_t i = 0; i < len; i++)
	{
	mean += v[i];
	}

	mean /= double(len);

	double stdv = 0;

	for(size_t i = 0; i < len; i++)
	{
	double x = v[i] - mean;
	stdv += x*x;
	}

	stdv = sqrt(stdv / double(len));

	double cutoff = mean + stdv*3;

	return v[len-1] > cutoff;
	}

	// Do a binary search to find the largest subset of v that does not contain
	// outliers.

	void FilterOutliers ( std::vector<double> & v )
	{
	std::sort(v.begin(),v.end());

	size_t len = 0;

	for(size_t x = 0x40000000; x; x = x >> 1 )
	{
	if((len \| x) >= v.size()) continue;

	if(!ContainsOutlier(v,len \| x))
	{
	len \|= x;
	}
	}

	v.resize(len);
	}

	// Iteratively tighten the set to find a subset that does not contain
	// outliers. I'm not positive this works correctly in all cases.

	void FilterOutliers2 ( std::vector<double> & v )
	{
	std::sort(v.begin(),v.end());

	int a = 0;
	int b = (int)(v.size() - 1);

	for(int i = 0; i < 10; i++)
	{
	//printf("%d %d\n",a,b);

	double mean = CalcMean(v,a,b);
	double stdv = CalcStdv(v,a,b);

	double cutA = mean - stdv*3;
	double cutB = mean + stdv*3;

	while((a < b) && (v[a] < cutA)) a++;
	while((b > a) && (v[b] > cutB)) b--;
	}

	std::vector<double> v2;

	v2.insert(v2.begin(),v.begin()+a,v.begin()+b+1);

	v.swap(v2);
	}

	//-----------------------------------------------------------------------------
	// We really want the rdtsc() calls to bracket the function call as tightly
	// as possible, but that's hard to do portably. We'll try and get as close as
	// possible by marking the function as NEVER_INLINE (to keep the optimizer from
	// moving it) and marking the timing variables as "volatile register".

	NEVER_INLINE int64_t timehash ( pfHash hash, const void * key, int len, int seed )
	{
	volatile register int64_t begin,end;

	uint32_t temp[16];

	begin = rdtsc();

	hash(key,len,seed,temp);

	end = rdtsc();

	return end-begin;
	}

	//-----------------------------------------------------------------------------

	double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int blocksize, const int align )
	{
	Rand r(seed);

	uint8_t * buf = new uint8_t[blocksize + 512];

	uint64_t t1 = reinterpret_cast<uint64_t>(buf);

	t1 = (t1 + 255) & BIG_CONSTANT(0xFFFFFFFFFFFFFF00);
	t1 += align;

	uint8_t * block = reinterpret_cast<uint8_t*>(t1);

	r.rand_p(block,blocksize);

	//----------

	std::vector<double> times;
	times.reserve(trials);

	for(int itrial = 0; itrial < trials; itrial++)
	{
	r.rand_p(block,blocksize);

	double t = (double)timehash(hash,block,blocksize,itrial);

	if(t > 0) times.push_back(t);
	}

	//----------

	std::sort(times.begin(),times.end());

	FilterOutliers(times);

	delete [] buf;

	return CalcMean(times);
	}

	//-----------------------------------------------------------------------------
	// 256k blocks seem to give the best results.

	void BulkSpeedTest ( pfHash hash, uint32_t seed )
	{
	const int trials = 2999;
	const int blocksize = 256 * 1024;

	printf("Bulk speed test - %d-byte keys\n",blocksize);

	for(int align = 0; align < 8; align++)
	{
	double cycles = SpeedTest(hash,seed,trials,blocksize,align);

	double bestbpc = double(blocksize)/cycles;

	double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
	printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
	}
	}

	//-----------------------------------------------------------------------------

	void TinySpeedTest ( pfHash hash, int hashsize, int keysize, uint32_t seed, bool verbose, double & /outCycles/ )
	{
	const int trials = 999999;

	if(verbose) printf("Small key speed test - %4d-byte keys - ",keysize);

	double cycles = SpeedTest(hash,seed,trials,keysize,0);

	printf("%8.2f cycles/hash\n",cycles);
	}

	//-----------------------------------------------------------------------------