blob: 317603dac411d9142760685337a7039f3006d23c [file] [log] [blame]
// Based on public domain code written in 2012 by Samuel Neves
#include "rar.hpp"
#ifdef USE_SSE
#include "blake2s_sse.cpp"
#endif
static void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth);
static void blake2s_update( blake2s_state *S, const byte *in, size_t inlen );
static void blake2s_final( blake2s_state *S, byte *digest );
#include "blake2sp.cpp"
static const uint32 blake2s_IV[8] =
{
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
};
static const byte blake2s_sigma[10][16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } ,
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } ,
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 } ,
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 } ,
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 } ,
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 } ,
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 } ,
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 } ,
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 } ,
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 } ,
};
static inline void blake2s_set_lastnode( blake2s_state *S )
{
S->f[1] = ~0U;
}
/* Some helper functions, not necessarily useful */
static inline void blake2s_set_lastblock( blake2s_state *S )
{
if( S->last_node ) blake2s_set_lastnode( S );
S->f[0] = ~0U;
}
static inline void blake2s_increment_counter( blake2s_state *S, const uint32 inc )
{
S->t[0] += inc;
S->t[1] += ( S->t[0] < inc );
}
/* init2 xors IV with input parameter block */
void blake2s_init_param( blake2s_state *S, uint32 node_offset, uint32 node_depth)
{
#ifdef USE_SSE
if (_SSE_Version>=SSE_SSE2)
blake2s_init_sse();
#endif
S->init(); // Clean data.
for( int i = 0; i < 8; ++i )
S->h[i] = blake2s_IV[i];
S->h[0] ^= 0x02080020; // We use BLAKE2sp parameters block.
S->h[2] ^= node_offset;
S->h[3] ^= (node_depth<<16)|0x20000000;
}
#define G(r,i,m,a,b,c,d) \
a = a + b + m[blake2s_sigma[r][2*i+0]]; \
d = rotr32(d ^ a, 16); \
c = c + d; \
b = rotr32(b ^ c, 12); \
a = a + b + m[blake2s_sigma[r][2*i+1]]; \
d = rotr32(d ^ a, 8); \
c = c + d; \
b = rotr32(b ^ c, 7);
static void blake2s_compress( blake2s_state *S, const byte block[BLAKE2S_BLOCKBYTES] )
{
uint32 m[16];
uint32 v[16];
for( size_t i = 0; i < 16; ++i )
m[i] = RawGet4( block + i * 4 );
for( size_t i = 0; i < 8; ++i )
v[i] = S->h[i];
v[ 8] = blake2s_IV[0];
v[ 9] = blake2s_IV[1];
v[10] = blake2s_IV[2];
v[11] = blake2s_IV[3];
v[12] = S->t[0] ^ blake2s_IV[4];
v[13] = S->t[1] ^ blake2s_IV[5];
v[14] = S->f[0] ^ blake2s_IV[6];
v[15] = S->f[1] ^ blake2s_IV[7];
for ( uint r = 0; r <= 9; ++r ) // No gain on i7 if unrolled, but exe size grows.
{
G(r,0,m,v[ 0],v[ 4],v[ 8],v[12]);
G(r,1,m,v[ 1],v[ 5],v[ 9],v[13]);
G(r,2,m,v[ 2],v[ 6],v[10],v[14]);
G(r,3,m,v[ 3],v[ 7],v[11],v[15]);
G(r,4,m,v[ 0],v[ 5],v[10],v[15]);
G(r,5,m,v[ 1],v[ 6],v[11],v[12]);
G(r,6,m,v[ 2],v[ 7],v[ 8],v[13]);
G(r,7,m,v[ 3],v[ 4],v[ 9],v[14]);
}
for( size_t i = 0; i < 8; ++i )
S->h[i] = S->h[i] ^ v[i] ^ v[i + 8];
}
void blake2s_update( blake2s_state *S, const byte *in, size_t inlen )
{
while( inlen > 0 )
{
size_t left = S->buflen;
size_t fill = 2 * BLAKE2S_BLOCKBYTES - left;
if( inlen > fill )
{
memcpy( S->buf + left, in, fill ); // Fill buffer
S->buflen += fill;
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
#ifdef USE_SSE
#ifdef _WIN_32 // We use SSSE3 _mm_shuffle_epi8 only in x64 mode.
if (_SSE_Version>=SSE_SSE2)
#else
if (_SSE_Version>=SSE_SSSE3)
#endif
blake2s_compress_sse( S, S->buf );
else
blake2s_compress( S, S->buf ); // Compress
#else
blake2s_compress( S, S->buf ); // Compress
#endif
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES ); // Shift buffer left
S->buflen -= BLAKE2S_BLOCKBYTES;
in += fill;
inlen -= fill;
}
else // inlen <= fill
{
memcpy( S->buf + left, in, (size_t)inlen );
S->buflen += (size_t)inlen; // Be lazy, do not compress
in += inlen;
inlen = 0;
}
}
}
void blake2s_final( blake2s_state *S, byte *digest )
{
if( S->buflen > BLAKE2S_BLOCKBYTES )
{
blake2s_increment_counter( S, BLAKE2S_BLOCKBYTES );
blake2s_compress( S, S->buf );
S->buflen -= BLAKE2S_BLOCKBYTES;
memcpy( S->buf, S->buf + BLAKE2S_BLOCKBYTES, S->buflen );
}
blake2s_increment_counter( S, ( uint32 )S->buflen );
blake2s_set_lastblock( S );
memset( S->buf + S->buflen, 0, 2 * BLAKE2S_BLOCKBYTES - S->buflen ); /* Padding */
blake2s_compress( S, S->buf );
for( int i = 0; i < 8; ++i ) /* Output full hash */
RawPut4( S->h[i], digest + 4 * i );
}