Merge pull request #177 from Cyan4973/dev
v0.7.0
diff --git a/.gitignore b/.gitignore
index 36639c6..9a49cfd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,9 +8,11 @@
xxh32sum
xxh64sum
xxhsum
+xxhsum.exe
xxhsum32
xxhsum_privateXXH
xxhsum_inlinedXXH
+xxhsum_inlinedXXH.exe
# Mac OS-X artefacts
*.dSYM
diff --git a/.travis.yml b/.travis.yml
index 895da85..fab2866 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,9 +1,58 @@
language: c
-compiler: gcc
-script: make -B test-all
-before_install:
- - sudo apt-get update -qq
- - sudo apt-get install -qq gcc-arm-linux-gnueabi
- - sudo apt-get install -qq clang
- - sudo apt-get install -qq g++-multilib
- - sudo apt-get install -qq gcc-multilib
+
+matrix:
+ fast_finish: true
+ include:
+
+ - name: General linux tests (Xenial)
+ dist: xenial
+ before_install:
+ - sudo apt-get update -qq
+ - sudo apt-get install -qq clang
+ - sudo apt-get install -qq g++-multilib
+ - sudo apt-get install -qq gcc-multilib
+ - sudo apt-get install -qq cppcheck
+ script:
+ - make -B test-all
+
+ - name: Check results consistency on x64
+ script:
+ - CPPFLAGS=-DXXH_VECTOR=0 make check # Scalar code path
+ - make clean
+ - CPPFLAGS=-DXXH_VECTOR=1 make check # SSE2 code path
+ - make clean
+ - CPPFLAGS="-mavx2 -DXXH_VECTOR=2" make check # AVX2 code path
+
+ - name: ARM + aarch64 compilation and consistency checks
+ dist: xenial
+ install:
+ - sudo apt-get install -qq
+ qemu-system-arm
+ qemu-user-static
+ gcc-arm-linux-gnueabi
+ libc6-dev-armel-cross
+ gcc-aarch64-linux-gnu
+ libc6-dev-arm64-cross
+ script:
+ # arm (32-bit)
+ - CC=arm-linux-gnueabi-gcc CPPFLAGS=-DXXH_VECTOR=0 LDFLAGS=-static RUN_ENV=qemu-arm-static make check # Scalar code path
+ - make clean
+ # Note : the following test (ARM 32-bit + NEON) is disabled for the time being.
+ # I haven't yet found a way to make it link on Travis CI using gcc cross-compilation.
+ # NEON code path is fortunately validated through `aarch64` below.
+ # - CC=arm-linux-gnueabi-gcc CPPFLAGS=-DXXH_VECTOR=3 CFLAGS="-O3 -march=armv7-a -mfloat-abi=hard -mfpu=neon" LDFLAGS=-static RUN_ENV=qemu-arm-static make check # NEON code path
+ - make clean
+ # aarch64
+ - CC=aarch64-linux-gnu-gcc CPPFLAGS=-DXXH_VECTOR=0 LDFLAGS=-static RUN_ENV=qemu-aarch64-static make check # Scalar code path
+ - make clean
+ - CC=aarch64-linux-gnu-gcc CPPFLAGS=-DXXH_VECTOR=3 LDFLAGS=-static RUN_ENV=qemu-aarch64-static make check # NEON code path
+ - make clean
+
+ - name: PowerPC + PPC64 compilation and consistency checks
+ install:
+ - sudo apt-get install -qq qemu-system-ppc qemu-user-static gcc-powerpc-linux-gnu
+ script:
+ - CC=powerpc-linux-gnu-gcc RUN_ENV=qemu-ppc-static CPPFLAGS=-m32 LDFLAGS=-static make check # Only scalar code path available
+ - make clean
+ - CC=powerpc-linux-gnu-gcc RUN_ENV=qemu-ppc64-static CFLAGS="-O3 -m64" LDFLAGS="-static -m64" make check # Only scalar code path available
+ - make clean
diff --git a/Makefile b/Makefile
index 6dd738f..79c0ca6 100644
--- a/Makefile
+++ b/Makefile
@@ -33,21 +33,15 @@
LIBVER_PATCH := $(shell echo $(LIBVER_PATCH_SCRIPT))
LIBVER := $(LIBVER_MAJOR).$(LIBVER_MINOR).$(LIBVER_PATCH)
-# SSE4 detection
-HAVE_SSE4 := $(shell $(CC) -dM -E - < /dev/null | grep "SSE4" > /dev/null && echo 1 || echo 0)
-ifeq ($(HAVE_SSE4), 1)
-NOSSE4 := -mno-sse4
-else
-NOSSE4 :=
-endif
-
-CFLAGS ?= -O2 $(NOSSE4) # disables potential auto-vectorization
-CFLAGS += -Wall -Wextra -Wcast-qual -Wcast-align -Wshadow \
- -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
- -Wstrict-prototypes -Wundef
-
-FLAGS = $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(MOREFLAGS)
-XXHSUM_VERSION=$(LIBVER)
+CFLAGS ?= -O3
+DEBUGFLAGS+=-Wall -Wextra -Wconversion -Wcast-qual -Wcast-align -Wshadow \
+ -Wstrict-aliasing=1 -Wswitch-enum -Wdeclaration-after-statement \
+ -Wstrict-prototypes -Wundef -Wpointer-arith -Wformat-security \
+ -Wvla -Wformat=2 -Winit-self -Wfloat-equal -Wwrite-strings \
+ -Wredundant-decls -Wstrict-overflow=5
+CFLAGS += $(DEBUGFLAGS)
+FLAGS = $(CFLAGS) $(CPPFLAGS) $(MOREFLAGS)
+XXHSUM_VERSION = $(LIBVER)
MD2ROFF = ronn
MD2ROFF_FLAGS = --roff --warnings --manual="User Commands" --organization="xxhsum $(XXHSUM_VERSION)"
@@ -76,59 +70,70 @@
.PHONY: default
+default: DEBUGFLAGS=
default: lib xxhsum_and_links
.PHONY: all
all: lib xxhsum xxhsum_inlinedXXH
+xxhsum : xxhash.o xxhsum.o
+
xxhsum32: CFLAGS += -m32
-xxhsum xxhsum32: xxhash.c xxhsum.c
- $(CC) $(FLAGS) $^ -o $@$(EXT)
+xxhsum32: xxhash.c xxhsum.c
+ $(CC) $(FLAGS) $^ $(LDFLAGS) -o $@$(EXT)
+
+xxhash.o: xxhash.h xxh3.h
+
+xxhsum.o: xxhash.h
.PHONY: xxhsum_and_links
-xxhsum_and_links: xxhsum
- ln -sf xxhsum xxh32sum
- ln -sf xxhsum xxh64sum
+xxhsum_and_links: xxhsum xxh32sum xxh64sum
+xxh32sum xxh64sum: xxhsum
+ ln -sf $^ $@
+
+xxhsum_inlinedXXH: CPPFLAGS += -DXXH_INLINE_ALL
xxhsum_inlinedXXH: xxhsum.c
- $(CC) $(FLAGS) -DXXH_PRIVATE_API $^ -o $@$(EXT)
+ $(CC) $(FLAGS) $^ -o $@$(EXT)
# library
libxxhash.a: ARFLAGS = rcs
libxxhash.a: xxhash.o
- @echo compiling static library
- @$(AR) $(ARFLAGS) $@ $^
+ $(AR) $(ARFLAGS) $@ $^
$(LIBXXH): LDFLAGS += -shared
ifeq (,$(filter Windows%,$(OS)))
-$(LIBXXH): LDFLAGS += -fPIC
+$(LIBXXH): CFLAGS += -fPIC
endif
$(LIBXXH): xxhash.c
- @echo compiling dynamic library $(LIBVER)
- @$(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
- @echo creating versioned links
- @ln -sf $@ libxxhash.$(SHARED_EXT_MAJOR)
- @ln -sf $@ libxxhash.$(SHARED_EXT)
+ $(CC) $(FLAGS) $^ $(LDFLAGS) $(SONAME_FLAGS) -o $@
+ ln -sf $@ libxxhash.$(SHARED_EXT_MAJOR)
+ ln -sf $@ libxxhash.$(SHARED_EXT)
libxxhash : $(LIBXXH)
+.PHONY: lib
lib: libxxhash.a libxxhash
+# =================================================
# tests
+# =================================================
+# make check can be run with cross-compiled binaries on emulated environments (qemu user mode)
+# by setting $(RUN_ENV) to the target emulation environment
.PHONY: check
check: xxhsum
# stdin
- ./xxhsum < xxhash.c
+ $(RUN_ENV) ./xxhsum < xxhash.c
# multiple files
- ./xxhsum xxhash.* xxhsum.*
+ $(RUN_ENV) ./xxhsum xxhash.* xxhsum.*
# internal bench
- ./xxhsum -bi1
+ $(RUN_ENV) ./xxhsum -bi1
# file bench
- ./xxhsum -bi1 xxhash.c
+ $(RUN_ENV) ./xxhsum -bi1 xxhash.c
.PHONY: test-mem
test-mem: xxhsum
@@ -166,19 +171,23 @@
armtest: clean
@echo ---- test ARM compilation ----
- $(MAKE) xxhsum CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror -static"
+ CC=arm-linux-gnueabi-gcc MOREFLAGS="-Werror -static" $(MAKE) xxhsum
clangtest: clean
@echo ---- test clang compilation ----
- $(MAKE) all CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion"
+ CC=clang MOREFLAGS="-Werror -Wconversion -Wno-sign-conversion" $(MAKE) all
-gpptest: clean
- @echo ---- test g++ compilation ----
- $(MAKE) all CC=g++ CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror"
+cxxtest: clean
+ @echo ---- test C++ compilation ----
+ CC="$(CXX) -Wno-deprecated" $(MAKE) all CFLAGS="-O3 -Wall -Wextra -Wundef -Wshadow -Wcast-align -Werror -fPIC"
-c90test: clean
+.PHONY: c90test
+c90test: CPPFLAGS += -DXXH_NO_LONG_LONG
+c90test: CFLAGS += -std=c90 -Werror -pedantic
+c90test: xxhash.c
@echo ---- test strict C90 compilation [xxh32 only] ----
- $(CC) -std=c90 -Werror -pedantic -DXXH_NO_LONG_LONG -c xxhash.c
+ $(RM) xxhash.o
+ $(CC) $(FLAGS) $^ $(LDFLAGS) -c
$(RM) xxhash.o
usan: CC=clang
@@ -186,10 +195,17 @@
@echo ---- check undefined behavior - sanitize ----
$(MAKE) clean test CC=$(CC) MOREFLAGS="-g -fsanitize=undefined -fno-sanitize-recover=all"
+.PHONY: staticAnalyze
staticAnalyze: clean
@echo ---- static analyzer - scan-build ----
CFLAGS="-g -Werror" scan-build --status-bugs -v $(MAKE) all
+.PHONY: cppcheck
+cppcheck:
+ @echo ---- static analyzer - cppcheck ----
+ cppcheck . --force --enable=warning,portability,performance,style --error-exitcode=1 > /dev/null
+
+.PHONY: namespaceTest
namespaceTest:
$(CC) -c xxhash.c
$(CC) -DXXH_NAMESPACE=TEST_ -c xxhash.c -o xxhash2.o
@@ -199,6 +215,7 @@
xxhsum.1: xxhsum.1.md
cat $^ | $(MD2ROFF) $(MD2ROFF_FLAGS) | sed -n '/^\.\\\".*/!p' > $@
+.PHONY: man
man: xxhsum.1
clean-man:
@@ -209,7 +226,8 @@
test: all namespaceTest check test-xxhsum-c c90test
-test-all: test test32 armtest clangtest gpptest usan listL120 trailingWhitespace staticAnalyze
+test-all: CFLAGS += -Werror
+test-all: test test32 clangtest cxxtest usan listL120 trailingWhitespace staticAnalyze
.PHONY: listL120
listL120: # extract lines >= 120 characters in *.{c,h}, by Takayuki Matsuoka (note : $$, for Makefile compatibility)
diff --git a/README.md b/README.md
index 30318a9..323bc6f 100644
--- a/README.md
+++ b/README.md
@@ -81,8 +81,6 @@
- `XXH_CPU_LITTLE_ENDIAN` : by default, endianess is determined at compile time.
It's possible to skip auto-detection and force format to little-endian, by setting this macro to 1.
Setting it to 0 forces big-endian.
-- `XXH_FORCE_NATIVE_FORMAT` : on big-endian systems : use native number representation.
- Breaks consistency with little-endian results.
- `XXH_PRIVATE_API` : same impact as `XXH_INLINE_ALL`.
Name underlines that symbols will not be published on library public interface.
- `XXH_NAMESPACE` : prefix all symbols with the value of `XXH_NAMESPACE`.
@@ -100,7 +98,7 @@
Calling xxhash 64-bit variant from a C program :
-```
+```C
#include "xxhash.h"
unsigned long long calcul_hash(const void* buffer, size_t length)
@@ -112,42 +110,66 @@
```
Using streaming variant is more involved, but makes it possible to provide data in multiple rounds :
-```
+```C
#include "stdlib.h" /* abort() */
#include "xxhash.h"
unsigned long long calcul_hash_streaming(someCustomType handler)
{
+ /* create a hash state */
XXH64_state_t* const state = XXH64_createState();
if (state==NULL) abort();
- size_t const bufferSize = SOME_VALUE;
+ size_t const bufferSize = SOME_SIZE;
void* const buffer = malloc(bufferSize);
if (buffer==NULL) abort();
+ /* Initialize state with selected seed */
unsigned long long const seed = 0; /* or any other value */
XXH_errorcode const resetResult = XXH64_reset(state, seed);
if (resetResult == XXH_ERROR) abort();
+ /* Feed the state with input data, any size, any number of times */
(...)
while ( /* any condition */ ) {
- size_t const length = get_more_data(buffer, bufferSize, handler); /* undescribed */
- XXH_errorcode const addResult = XXH64_update(state, buffer, length);
- if (addResult == XXH_ERROR) abort();
+ size_t const length = get_more_data(buffer, bufferSize, handler);
+ XXH_errorcode const updateResult = XXH64_update(state, buffer, length);
+ if (updateResult == XXH_ERROR) abort();
(...)
}
-
(...)
- unsigned long long const hash = XXH64_digest(state);
+ /* Get the hash */
+ XXH64_hash_t const hash = XXH64_digest(state);
+
+ /* State can then be re-used; in this example, it is simply freed */
free(buffer);
XXH64_freeState(state);
- return hash;
+ return (unsigned long long)hash;
}
```
+### New experimental hash algorithm
+
+Starting with `v0.7.0`, the library includes a new algorithm, named `XXH3`,
+able to generate 64 and 128-bits hashes.
+
+The new algorithm is much faster than its predecessors,
+for both long and small inputs,
+as can be observed in following graphs :
+
+
+
+
+
+The algorithm is currently labelled experimental, as it may change in a future version.
+To access it, one need to unlock its declaration using macro `XXH_STATIC_LINKING_ONLY`.
+It can be used for ephemeral data, and for tests, but avoid storing long-term hash values yet.
+`XXH3` will be stabilized in a future version.
+This period will be used to collect users' feedback.
+
### Other programming languages
diff --git a/cmake_unofficial/CMakeLists.txt b/cmake_unofficial/CMakeLists.txt
index 393ece3..71398d9 100644
--- a/cmake_unofficial/CMakeLists.txt
+++ b/cmake_unofficial/CMakeLists.txt
@@ -58,6 +58,9 @@
# libxxhash
add_library(xxhash "${XXHASH_DIR}/xxhash.c")
+if (BUILD_SHARED_LIBS)
+ target_compile_definitions(xxhash PUBLIC XXH_EXPORT)
+endif ()
set_target_properties(xxhash PROPERTIES
SOVERSION "${XXHASH_VERSION_STRING}"
VERSION "${XXHASH_VERSION_STRING}")
diff --git a/doc/xxhash_spec.md b/doc/xxhash_spec.md
index e673334..7e634e0 100644
--- a/doc/xxhash_spec.md
+++ b/doc/xxhash_spec.md
@@ -16,7 +16,7 @@
### Version
-0.1.0 (15/01/18)
+0.1.1 (10/10/18)
Table of Contents
@@ -63,13 +63,15 @@
The algorithm uses 32-bits addition, multiplication, rotate, shift and xor operations. Many operations require some 32-bits prime number constants, all defined below :
- static const u32 PRIME32_1 = 2654435761U;
- static const u32 PRIME32_2 = 2246822519U;
- static const u32 PRIME32_3 = 3266489917U;
- static const u32 PRIME32_4 = 668265263U;
- static const u32 PRIME32_5 = 374761393U;
+ static const u32 PRIME32_1 = 2654435761U; // 0b10011110001101110111100110110001
+ static const u32 PRIME32_2 = 2246822519U; // 0b10000101111010111100101001110111
+ static const u32 PRIME32_3 = 3266489917U; // 0b11000010101100101010111000111101
+ static const u32 PRIME32_4 = 668265263U; // 0b00100111110101001110101100101111
+ static const u32 PRIME32_5 = 374761393U; // 0b00010110010101100110011110110001
-### Step 1. Initialise internal accumulators
+These constants are prime numbers, and feature a good mix of bits 1 and 0, neither too regular, nor too dissymmetric. These properties help dispersion capabilities.
+
+### Step 1. Initialize internal accumulators
Each accumulator gets an initial value based on optional `seed` input. Since the `seed` is optional, it can be `0`.
@@ -170,11 +172,13 @@
The algorithm uses 64-bit addition, multiplication, rotate, shift and xor operations. Many operations require some 64-bit prime number constants, all defined below :
- static const u64 PRIME64_1 = 11400714785074694791ULL;
- static const u64 PRIME64_2 = 14029467366897019727ULL;
- static const u64 PRIME64_3 = 1609587929392839161ULL;
- static const u64 PRIME64_4 = 9650029242287828579ULL;
- static const u64 PRIME64_5 = 2870177450012600261ULL;
+ static const u64 PRIME64_1 = 11400714785074694791ULL; // 0b1001111000110111011110011011000110000101111010111100101010000111
+ static const u64 PRIME64_2 = 14029467366897019727ULL; // 0b1100001010110010101011100011110100100111110101001110101101001111
+ static const u64 PRIME64_3 = 1609587929392839161ULL; // 0b0001011001010110011001111011000110011110001101110111100111111001
+ static const u64 PRIME64_4 = 9650029242287828579ULL; // 0b1000010111101011110010100111011111000010101100101010111001100011
+ static const u64 PRIME64_5 = 2870177450012600261ULL; // 0b0010011111010100111010110010111100010110010101100110011111000101
+
+These constants are prime numbers, and feature a good mix of bits 1 and 0, neither too regular, nor too dissymmetric. These properties help dispersion capabilities.
### Step 1. Initialise internal accumulators
@@ -308,4 +312,5 @@
Version changes
--------------------
+v0.1.1 : added a note on rationale for selection of constants
v0.1.0 : initial release
diff --git a/xxh3.h b/xxh3.h
new file mode 100644
index 0000000..9197b68
--- /dev/null
+++ b/xxh3.h
@@ -0,0 +1,806 @@
+/*
+ xxHash - Extremely Fast Hash algorithm
+ Development source file for `xxh3`
+ Copyright (C) 2019-present, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Note :
+ This file is separated for development purposes.
+ It will be integrated into `xxhash.c` when development phase is complete.
+*/
+
+#ifndef XXH3_H
+#define XXH3_H
+
+
+/* === Dependencies === */
+
+#undef XXH_INLINE_ALL /* in case it's already defined */
+#define XXH_INLINE_ALL
+#include "xxhash.h"
+
+#define NDEBUG
+#include <assert.h>
+
+
+/* === Compiler versions === */
+
+#if !(defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) /* C99+ */
+# define restrict /* disable */
+#endif
+
+#if defined(__GNUC__)
+# if defined(__SSE2__)
+# include <x86intrin.h>
+# elif defined(__ARM_NEON__) || defined(__ARM_NEON)
+# define inline __inline__ /* clang bug */
+# include <arm_neon.h>
+# undef inline
+# endif
+# define ALIGN(n) __attribute__ ((aligned(n)))
+#elif defined(_MSC_VER)
+# include <intrin.h>
+# define ALIGN(n) __declspec(align(n))
+#else
+# define ALIGN(n) /* disabled */
+#endif
+
+
+
+/* ==========================================
+ * Vectorization detection
+ * ========================================== */
+#define XXH_SCALAR 0
+#define XXH_SSE2 1
+#define XXH_AVX2 2
+#define XXH_NEON 3
+
+#ifndef XXH_VECTOR /* can be defined on command line */
+# if defined(__AVX2__)
+# define XXH_VECTOR XXH_AVX2
+# elif defined(__SSE2__)
+# define XXH_VECTOR XXH_SSE2
+/* msvc support maybe later */
+# elif defined(__GNUC__) && (defined(__ARM_NEON__) || defined(__ARM_NEON))
+# define XXH_VECTOR XXH_NEON
+# else
+# define XXH_VECTOR XXH_SCALAR
+# endif
+#endif
+
+/* U64 XXH_mult32to64(U32 a, U64 b) { return (U64)a * (U64)b; } */
+#ifdef _MSC_VER
+# include <intrin.h>
+ /* MSVC doesn't do a good job with the mull detection. */
+# define XXH_mult32to64 __emulu
+#else
+# define XXH_mult32to64(x, y) ((U64)((x) & 0xFFFFFFFF) * (U64)((y) & 0xFFFFFFFF))
+#endif
+
+
+/* ==========================================
+ * XXH3 default settings
+ * ========================================== */
+
+#define KEYSET_DEFAULT_SIZE 48 /* minimum 32 */
+
+
+ALIGN(64) static const U32 kKey[KEYSET_DEFAULT_SIZE] = {
+ 0xb8fe6c39,0x23a44bbe,0x7c01812c,0xf721ad1c,
+ 0xded46de9,0x839097db,0x7240a4a4,0xb7b3671f,
+ 0xcb79e64e,0xccc0e578,0x825ad07d,0xccff7221,
+ 0xb8084674,0xf743248e,0xe03590e6,0x813a264c,
+ 0x3c2852bb,0x91c300cb,0x88d0658b,0x1b532ea3,
+ 0x71644897,0xa20df94e,0x3819ef46,0xa9deacd8,
+ 0xa8fa763f,0xe39c343f,0xf9dcbbc7,0xc70b4f1d,
+ 0x8a51e04b,0xcdb45931,0xc89f7ec9,0xd9787364,
+
+ 0xeac5ac83,0x34d3ebc3,0xc581a0ff,0xfa1363eb,
+ 0x170ddd51,0xb7f0da49,0xd3165526,0x29d4689e,
+ 0x2b16be58,0x7d47a1fc,0x8ff8b8d1,0x7ad031ce,
+ 0x45cb3a8f,0x95160428,0xafd7fbca,0xbb4b407e,
+};
+
+
+#if defined(__GNUC__) && defined(__i386__)
+/* GCC is stupid and tries to vectorize this.
+ * This tells GCC that it is wrong. */
+__attribute__((__target__("no-sse")))
+#endif
+static U64
+XXH3_mul128(U64 ll1, U64 ll2)
+{
+#if defined(__SIZEOF_INT128__) || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
+
+ __uint128_t lll = (__uint128_t)ll1 * ll2;
+ return (U64)lll + (U64)(lll >> 64);
+
+#elif defined(_M_X64) || defined(_M_IA64)
+
+# pragma intrinsic(_umul128)
+ U64 llhigh;
+ U64 const lllow = _umul128(ll1, ll2, &llhigh);
+ return lllow + llhigh;
+
+#elif defined(__aarch64__) && defined(__GNUC__)
+
+ U64 llow;
+ U64 llhigh;
+ __asm__("umulh %0, %1, %2" : "=r" (llhigh) : "r" (ll1), "r" (ll2));
+ __asm__("madd %0, %1, %2, %3" : "=r" (llow) : "r" (ll1), "r" (ll2), "r" (llhigh));
+ return lllow;
+
+ /* Do it out manually on 32-bit.
+ * This is a modified, unrolled, widened, and optimized version of the
+ * mulqdu routine from Hacker's Delight.
+ *
+ * https://www.hackersdelight.org/hdcodetxt/mulqdu.c.txt
+ *
+ * This was modified to use U32->U64 multiplication instead
+ * of U16->U32, to add the high and low values in the end,
+ * be endian-independent, and I added a partial assembly
+ * implementation for ARM. */
+
+ /* An easy 128-bit folding multiply on ARMv6T2 and ARMv7-A/R can be done with
+ * the mighty umaal (Unsigned Multiply Accumulate Accumulate Long) which takes 4 cycles
+ * or less, doing a long multiply and adding two 32-bit integers:
+ *
+ * void umaal(U32 *RdLo, U32 *RdHi, U32 Rn, U32 Rm)
+ * {
+ * U64 prodAcc = (U64)Rn * (U64)Rm;
+ * prodAcc += *RdLo;
+ * prodAcc += *RdHi;
+ * *RdLo = prodAcc & 0xFFFFFFFF;
+ * *RdHi = prodAcc >> 32;
+ * }
+ *
+ * This is compared to umlal which adds to a single 64-bit integer:
+ *
+ * void umlal(U32 *RdLo, U32 *RdHi, U32 Rn, U32 Rm)
+ * {
+ * U64 prodAcc = (U64)Rn * (U64)Rm;
+ * prodAcc += (*RdLo | ((U64)*RdHi << 32);
+ * *RdLo = prodAcc & 0xFFFFFFFF;
+ * *RdHi = prodAcc >> 32;
+ * }
+ *
+ * Getting the compiler to emit them is like pulling teeth, and checking
+ * for it is annoying because ARMv7-M lacks this instruction. However, it
+ * is worth it, because this is an otherwise expensive operation. */
+
+ /* GCC-compatible, ARMv6t2 or ARMv7+, non-M variant, and 32-bit */
+#elif defined(__GNUC__) /* GCC-compatible */ \
+ && defined(__ARM_ARCH) && !defined(__aarch64__) && !defined(__arm64__) /* 32-bit ARM */\
+ && !defined(__ARM_ARCH_7M__) /* <- Not ARMv7-M vv*/ \
+ && !(defined(__TARGET_ARCH_ARM) && __TARGET_ARCH_ARM == 0 && __TARGET_ARCH_THUMB == 4) \
+ && (defined(__ARM_ARCH_6T2__) || __ARM_ARCH > 6) /* ARMv6T2 or later */
+
+ U32 w[4] = { 0 };
+ U32 u[2] = { (U32)(ll1 >> 32), (U32)ll1 };
+ U32 v[2] = { (U32)(ll2 >> 32), (U32)ll2 };
+ U32 k;
+
+ /* U64 t = (U64)u[1] * (U64)v[1];
+ * w[3] = t & 0xFFFFFFFF;
+ * k = t >> 32; */
+ __asm__("umull %0, %1, %2, %3"
+ : "=r" (w[3]), "=r" (k)
+ : "r" (u[1]), "r" (v[1]));
+
+ /* t = (U64)u[0] * (U64)v[1] + w[2] + k;
+ * w[2] = t & 0xFFFFFFFF;
+ * k = t >> 32; */
+ __asm__("umaal %0, %1, %2, %3"
+ : "+r" (w[2]), "+r" (k)
+ : "r" (u[0]), "r" (v[1]));
+ w[1] = k;
+ k = 0;
+
+ /* t = (U64)u[1] * (U64)v[0] + w[2] + k;
+ * w[2] = t & 0xFFFFFFFF;
+ * k = t >> 32; */
+ __asm__("umaal %0, %1, %2, %3"
+ : "+r" (w[2]), "+r" (k)
+ : "r" (u[1]), "r" (v[0]));
+
+ /* t = (U64)u[0] * (U64)v[0] + w[1] + k;
+ * w[1] = t & 0xFFFFFFFF;
+ * k = t >> 32; */
+ __asm__("umaal %0, %1, %2, %3"
+ : "+r" (w[1]), "+r" (k)
+ : "r" (u[0]), "r" (v[0]));
+ w[0] = k;
+
+ return (w[1] | ((U64)w[0] << 32)) + (w[3] | ((U64)w[2] << 32));
+
+#else /* Portable scalar version */
+
+ /* emulate 64x64->128b multiplication, using four 32x32->64 */
+ U32 const h1 = (U32)(ll1 >> 32);
+ U32 const h2 = (U32)(ll2 >> 32);
+ U32 const l1 = (U32)ll1;
+ U32 const l2 = (U32)ll2;
+
+ U64 const llh = XXH_mult32to64(h1, h2);
+ U64 const llm1 = XXH_mult32to64(l1, h2);
+ U64 const llm2 = XXH_mult32to64(h1, l2);
+ U64 const lll = XXH_mult32to64(l1, l2);
+
+ U64 const t = lll + (llm1 << 32);
+ U64 const carry1 = t < lll;
+
+ U64 const lllow = t + (llm2 << 32);
+ U64 const carry2 = lllow < t;
+ U64 const llhigh = llh + (llm1 >> 32) + (llm2 >> 32) + carry1 + carry2;
+
+ return llhigh + lllow;
+
+#endif
+}
+
+
+static XXH64_hash_t XXH3_avalanche(U64 h64)
+{
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+ return h64;
+}
+
+
+/* ==========================================
+ * Short keys
+ * ========================================== */
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_1to3_64b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
+{
+ assert(data != NULL);
+ assert(len > 0 && len <= 3);
+ assert(keyPtr != NULL);
+ { const U32* const key32 = (const U32*) keyPtr;
+ BYTE const c1 = ((const BYTE*)data)[0];
+ BYTE const c2 = ((const BYTE*)data)[len >> 1];
+ BYTE const c3 = ((const BYTE*)data)[len - 1];
+ U32 const l1 = (U32)(c1) + ((U32)(c2) << 8);
+ U32 const l2 = (U32)(len) + ((U32)(c3) << 2);
+ U64 const ll11 = XXH_mult32to64((l1 + seed + key32[0]), (l2 + key32[1]));
+ return XXH3_avalanche(ll11);
+ }
+}
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_4to8_64b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
+{
+ assert(data != NULL);
+ assert(len >= 4 && len <= 8);
+ { const U32* const key32 = (const U32*) keyPtr;
+ U64 acc = PRIME64_1 * (len + seed);
+ U32 const l1 = XXH_readLE32(data) + key32[0];
+ U32 const l2 = XXH_readLE32((const BYTE*)data + len - 4) + key32[1];
+ acc += XXH_mult32to64(l1, l2);
+ return XXH3_avalanche(acc);
+ }
+}
+
+XXH_FORCE_INLINE U64
+XXH3_readKey64(const void* ptr)
+{
+ assert(((size_t)ptr & 7) == 0); /* aligned on 8-bytes boundaries */
+ if (XXH_CPU_LITTLE_ENDIAN) {
+ return *(const U64*)ptr;
+ } else {
+ const U32* const ptr32 = (const U32*)ptr;
+ return (U64)ptr32[0] + (((U64)ptr32[1]) << 32);
+ }
+}
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_9to16_64b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
+{
+ assert(data != NULL);
+ assert(key != NULL);
+ assert(len >= 9 && len <= 16);
+ { const U64* const key64 = (const U64*) keyPtr;
+ U64 acc = PRIME64_1 * (len + seed);
+ U64 const ll1 = XXH_readLE64(data) + XXH3_readKey64(key64);
+ U64 const ll2 = XXH_readLE64((const BYTE*)data + len - 8) + XXH3_readKey64(key64+1);
+ acc += XXH3_mul128(ll1, ll2);
+ return XXH3_avalanche(acc);
+ }
+}
+
+XXH_FORCE_INLINE XXH64_hash_t
+XXH3_len_0to16_64b(const void* data, size_t len, XXH64_hash_t seed)
+{
+ assert(data != NULL);
+ assert(len <= 16);
+ { if (len > 8) return XXH3_len_9to16_64b(data, len, kKey, seed);
+ if (len >= 4) return XXH3_len_4to8_64b(data, len, kKey, seed);
+ if (len) return XXH3_len_1to3_64b(data, len, kKey, seed);
+ return seed;
+ }
+}
+
+
+/* === Long Keys === */
+
+#define STRIPE_LEN 64
+#define STRIPE_ELTS (STRIPE_LEN / sizeof(U32))
+#define ACC_NB (STRIPE_LEN / sizeof(U64))
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512(void* acc, const void *restrict data, const void *restrict key)
+{
+#if (XXH_VECTOR == XXH_AVX2)
+
+ assert(((size_t)acc) & 31 == 0);
+ { ALIGN(32) __m256i* const xacc = (__m256i *) acc;
+ const __m256i* const xdata = (const __m256i *) data;
+ const __m256i* const xkey = (const __m256i *) key;
+
+ size_t i;
+ for (i=0; i < STRIPE_LEN/sizeof(__m256i); i++) {
+ __m256i const d = _mm256_loadu_si256 (xdata+i);
+ __m256i const k = _mm256_loadu_si256 (xkey+i);
+ __m256i const dk = _mm256_add_epi32 (d,k); /* uint32 dk[8] = {d0+k0, d1+k1, d2+k2, d3+k3, ...} */
+ __m256i const res = _mm256_mul_epu32 (dk, _mm256_shuffle_epi32 (dk, 0x31)); /* uint64 res[4] = {dk0*dk1, dk2*dk3, ...} */
+ __m256i const add = _mm256_add_epi64(d, xacc[i]);
+ xacc[i] = _mm256_add_epi64(res, add);
+ }
+ }
+
+#elif (XXH_VECTOR == XXH_SSE2)
+
+ assert(((size_t)acc) & 15 == 0);
+ { ALIGN(16) __m128i* const xacc = (__m128i *) acc;
+ const __m128i* const xdata = (const __m128i *) data;
+ const __m128i* const xkey = (const __m128i *) key;
+
+ size_t i;
+ for (i=0; i < STRIPE_LEN/sizeof(__m128i); i++) {
+ __m128i const d = _mm_loadu_si128 (xdata+i);
+ __m128i const k = _mm_loadu_si128 (xkey+i);
+ __m128i const dk = _mm_add_epi32 (d,k); /* uint32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */
+ __m128i const res = _mm_mul_epu32 (dk, _mm_shuffle_epi32 (dk, 0x31)); /* uint64 res[2] = {dk0*dk1,dk2*dk3} */
+ __m128i const add = _mm_add_epi64(d, xacc[i]);
+ xacc[i] = _mm_add_epi64(res, add);
+ }
+ }
+
+#elif (XXH_VECTOR == XXH_NEON)
+
+ assert(((size_t)acc) & 15 == 0);
+ { uint64x2_t* const xacc = (uint64x2_t *)acc;
+ const uint32_t* const xdata = (const uint32_t *)data;
+ const uint32_t* const xkey = (const uint32_t *)key;
+
+ size_t i;
+ for (i=0; i < STRIPE_LEN / sizeof(uint64x2_t); i++) {
+ uint32x4_t const d = vld1q_u32(xdata+i*4); /* U32 d[4] = xdata[i]; */
+ uint32x4_t const k = vld1q_u32(xkey+i*4); /* U32 k[4] = xkey[i]; */
+ uint32x4_t dk = vaddq_u32(d, k); /* U32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */
+#if !defined(__aarch64__) && !defined(__arm64__) /* ARM32-specific hack */
+ /* vzip on ARMv7 Clang generates a lot of vmovs (technically vorrs) without this.
+ * vzip on 32-bit ARM NEON will overwrite the original register, and I think that Clang
+ * assumes I don't want to destroy it and tries to make a copy. This slows down the code
+ * a lot.
+ * aarch64 not only uses an entirely different syntax, but it requires three
+ * instructions...
+ * ext v1.16B, v0.16B, #8 // select high bits because aarch64 can't address them directly
+ * zip1 v3.2s, v0.2s, v1.2s // first zip
+ * zip2 v2.2s, v0.2s, v1.2s // second zip
+ * ...to do what ARM does in one:
+ * vzip.32 d0, d1 // Interleave high and low bits and overwrite. */
+ __asm__("vzip.32 %e0, %f0" : "+w" (dk)); /* dk = { dk0, dk2, dk1, dk3 }; */
+ xacc[i] = vaddq_u64(xacc[i], vreinterpretq_u64_u32(d)); /* xacc[i] += (U64x2)d; */
+ xacc[i] = vmlal_u32(xacc[i], vget_low_u32(dk), vget_high_u32(dk)); /* xacc[i] += { (U64)dk0*dk1, (U64)dk2*dk3 }; */
+#else
+ /* On aarch64, vshrn/vmovn seems to be equivalent to, if not faster than, the vzip method. */
+ uint32x2_t dkL = vmovn_u64(vreinterpretq_u64_u32(dk)); /* U32 dkL[2] = dk & 0xFFFFFFFF; */
+ uint32x2_t dkH = vshrn_n_u64(vreinterpretq_u64_u32(dk), 32); /* U32 dkH[2] = dk >> 32; */
+ xacc[i] = vaddq_u64(xacc[i], vreinterpretq_u64_u32(d)); /* xacc[i] += (U64x2)d; */
+ xacc[i] = vmlal_u32(xacc[i], dkL, dkH); /* xacc[i] += (U64x2)dkL*(U64x2)dkH; */
+#endif
+ }
+ }
+
+#else /* scalar variant - universal */
+
+ U64* const xacc = (U64*) acc; /* presumed aligned */
+ const U32* const xdata = (const U32*) data;
+ const U32* const xkey = (const U32*) key;
+
+ int i;
+ for (i=0; i < (int)ACC_NB; i++) {
+ int const left = 2*i;
+ int const right= 2*i + 1;
+ U32 const dataLeft = XXH_readLE32(xdata + left);
+ U32 const dataRight = XXH_readLE32(xdata + right);
+ xacc[i] += XXH_mult32to64(dataLeft + xkey[left], dataRight + xkey[right]);
+ xacc[i] += dataLeft + ((U64)dataRight << 32);
+ }
+
+#endif
+}
+
+static void XXH3_scrambleAcc(void* acc, const void* key)
+{
+#if (XXH_VECTOR == XXH_AVX2)
+
+ assert(((size_t)acc) & 31 == 0);
+ { ALIGN(32) __m256i* const xacc = (__m256i*) acc;
+ const __m256i* const xkey = (const __m256i *) key;
+
+ size_t i;
+ for (i=0; i < STRIPE_LEN/sizeof(__m256i); i++) {
+ __m256i data = xacc[i];
+ __m256i const shifted = _mm256_srli_epi64(data, 47);
+ data = _mm256_xor_si256(data, shifted);
+
+ { __m256i const k = _mm256_loadu_si256 (xkey+i);
+ __m256i const dk = _mm256_mul_epu32 (data,k); /* U32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */
+
+ __m256i const d2 = _mm256_shuffle_epi32 (data,0x31);
+ __m256i const k2 = _mm256_shuffle_epi32 (k,0x31);
+ __m256i const dk2 = _mm256_mul_epu32 (d2,k2); /* U32 dk[4] = {d0+k0, d1+k1, d2+k2, d3+k3} */
+
+ xacc[i] = _mm256_xor_si256(dk, dk2);
+ } }
+ }
+
+#elif (XXH_VECTOR == XXH_SSE2)
+
+ assert(((size_t)acc) & 15 == 0);
+ { ALIGN(16) __m128i* const xacc = (__m128i*) acc;
+ const __m128i* const xkey = (const __m128i *) key;
+
+ size_t i;
+ for (i=0; i < STRIPE_LEN/sizeof(__m128i); i++) {
+ __m128i data = xacc[i];
+ __m128i const shifted = _mm_srli_epi64(data, 47);
+ data = _mm_xor_si128(data, shifted);
+
+ { __m128i const k = _mm_loadu_si128 (xkey+i);
+ __m128i const dk = _mm_mul_epu32 (data,k);
+
+ __m128i const d2 = _mm_shuffle_epi32 (data, 0x31);
+ __m128i const k2 = _mm_shuffle_epi32 (k, 0x31);
+ __m128i const dk2 = _mm_mul_epu32 (d2,k2);
+
+ xacc[i] = _mm_xor_si128(dk, dk2);
+ } }
+ }
+
+#elif (XXH_VECTOR == XXH_NEON)
+
+ assert(((size_t)acc) & 15 == 0);
+ { uint64x2_t* const xacc = (uint64x2_t*) acc;
+ const uint32_t* const xkey = (const uint32_t*) key;
+ size_t i;
+
+ for (i=0; i < STRIPE_LEN/sizeof(uint64x2_t); i++) {
+ uint64x2_t data = xacc[i];
+ uint64x2_t const shifted = vshrq_n_u64(data, 47); /* uint64 shifted[2] = data >> 47; */
+ data = veorq_u64(data, shifted); /* data ^= shifted; */
+ {
+ /* shuffle: 0, 1, 2, 3 -> 0, 2, 1, 3 */
+ uint32x2x2_t const d =
+ vzip_u32(
+ vget_low_u32(vreinterpretq_u32_u64(data)),
+ vget_high_u32(vreinterpretq_u32_u64(data))
+ );
+ uint32x2x2_t const k = vld2_u32(xkey+i*4); /* load and swap */
+ uint64x2_t const dk = vmull_u32(d.val[0],k.val[0]); /* U64 dk[2] = {(U64)d0*k0, (U64)d2*k2} */
+ uint64x2_t const dk2 = vmull_u32(d.val[1],k.val[1]); /* U64 dk2[2] = {(U64)d1*k1, (U64)d3*k3} */
+ xacc[i] = veorq_u64(dk, dk2); /* xacc[i] = dk^dk2; */
+ } }
+ }
+
+#else /* scalar variant - universal */
+
+ U64* const xacc = (U64*) acc;
+ const U32* const xkey = (const U32*) key;
+
+ int i;
+ for (i=0; i < (int)ACC_NB; i++) {
+ int const left = 2*i;
+ int const right= 2*i + 1;
+ xacc[i] ^= xacc[i] >> 47;
+
+ { U64 const p1 = XXH_mult32to64(xacc[i] & 0xFFFFFFFF, xkey[left]);
+ U64 const p2 = XXH_mult32to64(xacc[i] >> 32, xkey[right]);
+ xacc[i] = p1 ^ p2;
+ } }
+
+#endif
+}
+
+static void XXH3_accumulate(U64* acc, const void* restrict data, const U32* restrict key, size_t nbStripes)
+{
+ size_t n;
+ /* Clang doesn't unroll this loop without the pragma. Unrolling can be up to 1.4x faster. */
+#if defined(__clang__) && !defined(__OPTIMIZE_SIZE__)
+# pragma clang loop unroll(enable)
+#endif
+ for (n = 0; n < nbStripes; n++ ) {
+ XXH3_accumulate_512(acc, (const BYTE*)data + n*STRIPE_LEN, key);
+ key += 2;
+ }
+}
+
+static void
+XXH3_hashLong(U64* acc, const void* data, size_t len)
+{
+ #define NB_KEYS ((KEYSET_DEFAULT_SIZE - STRIPE_ELTS) / 2)
+
+ size_t const block_len = STRIPE_LEN * NB_KEYS;
+ size_t const nb_blocks = len / block_len;
+
+ size_t n;
+ for (n = 0; n < nb_blocks; n++) {
+ XXH3_accumulate(acc, (const BYTE*)data + n*block_len, kKey, NB_KEYS);
+ XXH3_scrambleAcc(acc, kKey + (KEYSET_DEFAULT_SIZE - STRIPE_ELTS));
+ }
+
+ /* last partial block */
+ assert(len > STRIPE_LEN);
+ { size_t const nbStripes = (len % block_len) / STRIPE_LEN;
+ assert(nbStripes < NB_KEYS);
+ XXH3_accumulate(acc, (const BYTE*)data + nb_blocks*block_len, kKey, nbStripes);
+
+ /* last stripe */
+ if (len & (STRIPE_LEN - 1)) {
+ const BYTE* const p = (const BYTE*) data + len - STRIPE_LEN;
+ XXH3_accumulate_512(acc, p, kKey + nbStripes*2);
+ } }
+}
+
+
+XXH_FORCE_INLINE U64 XXH3_mix16B(const void* data, const void* key)
+{
+ const U64* const key64 = (const U64*)key;
+ return XXH3_mul128(
+ XXH_readLE64(data) ^ XXH3_readKey64(key64),
+ XXH_readLE64((const BYTE*)data+8) ^ XXH3_readKey64(key64+1) );
+}
+
+XXH_FORCE_INLINE U64 XXH3_mix2Accs(const U64* acc, const void* key)
+{
+ const U64* const key64 = (const U64*)key;
+ return XXH3_mul128(
+ acc[0] ^ XXH3_readKey64(key64),
+ acc[1] ^ XXH3_readKey64(key64+1) );
+}
+
+static XXH64_hash_t XXH3_mergeAccs(const U64* acc, const U32* key, U64 start)
+{
+ U64 result64 = start;
+
+ result64 += XXH3_mix2Accs(acc+0, key+0);
+ result64 += XXH3_mix2Accs(acc+2, key+4);
+ result64 += XXH3_mix2Accs(acc+4, key+8);
+ result64 += XXH3_mix2Accs(acc+6, key+12);
+
+ return XXH3_avalanche(result64);
+}
+
+__attribute__((noinline)) static XXH64_hash_t /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
+XXH3_hashLong_64b(const void* data, size_t len, XXH64_hash_t seed)
+{
+ ALIGN(64) U64 acc[ACC_NB] = { seed, PRIME64_1, PRIME64_2, PRIME64_3, PRIME64_4, PRIME64_5, -seed, 0 };
+
+ XXH3_hashLong(acc, data, len);
+
+ /* converge into final hash */
+ assert(sizeof(acc) == 64);
+ return XXH3_mergeAccs(acc, kKey, (U64)len * PRIME64_1);
+}
+
+
+/* === Public entry point === */
+
+XXH_PUBLIC_API XXH64_hash_t
+XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed)
+{
+ const BYTE* const p = (const BYTE*)data;
+ const char* const key = (const char*)kKey;
+
+ if (len <= 16) return XXH3_len_0to16_64b(data, len, seed);
+
+ { U64 acc = PRIME64_1 * (len + seed);
+ if (len > 32) {
+ if (len > 64) {
+ if (len > 96) {
+ if (len > 128) return XXH3_hashLong_64b(data, len, seed);
+
+ acc += XXH3_mix16B(p+48, key+96);
+ acc += XXH3_mix16B(p+len-64, key+112);
+ }
+
+ acc += XXH3_mix16B(p+32, key+64);
+ acc += XXH3_mix16B(p+len-48, key+80);
+ }
+
+ acc += XXH3_mix16B(p+16, key+32);
+ acc += XXH3_mix16B(p+len-32, key+48);
+
+ }
+
+ acc += XXH3_mix16B(p+0, key+0);
+ acc += XXH3_mix16B(p+len-16, key+16);
+
+ return XXH3_avalanche(acc);
+ }
+}
+
+
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len)
+{
+ return XXH3_64bits_withSeed(data, len, 0);
+}
+
+
+
+/* ==========================================
+ * XXH3 128 bits (=> XXH128)
+ * ========================================== */
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_1to3_128b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
+{
+ assert(data != NULL);
+ assert(len > 0 && len <= 3);
+ assert(keyPtr != NULL);
+ { const U32* const key32 = (const U32*) keyPtr;
+ BYTE const c1 = ((const BYTE*)data)[0];
+ BYTE const c2 = ((const BYTE*)data)[len >> 1];
+ BYTE const c3 = ((const BYTE*)data)[len - 1];
+ U32 const l1 = (U32)(c1) + ((U32)(c2) << 8);
+ U32 const l2 = (U32)(len) + ((U32)(c3) << 2);
+ U64 const ll11 = XXH_mult32to64(l1 + seed + key32[0], l2 + key32[1]);
+ U64 const ll12 = XXH_mult32to64(l1 + key32[2], l2 - seed + key32[3]);
+ return (XXH128_hash_t) { XXH3_avalanche(ll11), XXH3_avalanche(ll12) };
+ }
+}
+
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_4to8_128b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
+{
+ assert(data != NULL);
+ assert(len >= 4 && len <= 8);
+ { const U32* const key32 = (const U32*) keyPtr;
+ U64 acc1 = PRIME64_1 * ((U64)len + seed);
+ U64 acc2 = PRIME64_2 * ((U64)len - seed);
+ U32 const l1 = XXH_readLE32(data);
+ U32 const l2 = XXH_readLE32((const BYTE*)data + len - 4);
+ acc1 += XXH_mult32to64(l1 + key32[0], l2 + key32[1]);
+ acc2 += XXH_mult32to64(l1 - key32[2], l2 + key32[3]);
+ return (XXH128_hash_t){ XXH3_avalanche(acc1), XXH3_avalanche(acc2) };
+ }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_9to16_128b(const void* data, size_t len, const void* keyPtr, XXH64_hash_t seed)
+{
+ assert(data != NULL);
+ assert(key != NULL);
+ assert(len >= 9 && len <= 16);
+ { const U64* const key64 = (const U64*) keyPtr;
+ U64 acc1 = PRIME64_1 * ((U64)len + seed);
+ U64 acc2 = PRIME64_2 * ((U64)len - seed);
+ U64 const ll1 = XXH_readLE64(data);
+ U64 const ll2 = XXH_readLE64((const BYTE*)data + len - 8);
+ acc1 += XXH3_mul128(ll1 + XXH3_readKey64(key64+0), ll2 + XXH3_readKey64(key64+1));
+ acc2 += XXH3_mul128(ll1 + XXH3_readKey64(key64+2), ll2 + XXH3_readKey64(key64+3));
+ return (XXH128_hash_t){ XXH3_avalanche(acc1), XXH3_avalanche(acc2) };
+ }
+}
+
+XXH_FORCE_INLINE XXH128_hash_t
+XXH3_len_0to16_128b(const void* data, size_t len, XXH64_hash_t seed)
+{
+ assert(data != NULL);
+ assert(len <= 16);
+ { if (len > 8) return XXH3_len_9to16_128b(data, len, kKey, seed);
+ if (len >= 4) return XXH3_len_4to8_128b(data, len, kKey, seed);
+ if (len) return XXH3_len_1to3_128b(data, len, kKey, seed);
+ return (XXH128_hash_t) { seed, -seed };
+ }
+}
+
+__attribute__((noinline)) static XXH128_hash_t /* It's important for performance that XXH3_hashLong is not inlined. Not sure why (uop cache maybe ?), but difference is large and easily measurable */
+XXH3_hashLong_128b(const void* data, size_t len, XXH64_hash_t seed)
+{
+ ALIGN(64) U64 acc[ACC_NB] = { seed, PRIME64_1, PRIME64_2, PRIME64_3, PRIME64_4, PRIME64_5, -seed, 0 };
+ assert(len > 128);
+
+ XXH3_hashLong(acc, data, len);
+
+ /* converge into final hash */
+ assert(sizeof(acc) == 64);
+ { U64 const low64 = XXH3_mergeAccs(acc, kKey, (U64)len * PRIME64_1);
+ U64 const high64 = XXH3_mergeAccs(acc, kKey+16, ((U64)len+1) * PRIME64_2);
+ return (XXH128_hash_t) { low64, high64 };
+ }
+}
+
+XXH_PUBLIC_API XXH128_hash_t
+XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed)
+{
+ if (len <= 16) return XXH3_len_0to16_128b(data, len, seed);
+
+ { U64 acc1 = PRIME64_1 * (len + seed);
+ U64 acc2 = 0;
+ const BYTE* const p = (const BYTE*)data;
+ const char* const key = (const char*)kKey;
+ if (len > 32) {
+ if (len > 64) {
+ if (len > 96) {
+ if (len > 128) return XXH3_hashLong_128b(data, len, seed);
+
+ acc1 += XXH3_mix16B(p+48, key+96);
+ acc2 += XXH3_mix16B(p+len-64, key+112);
+ }
+
+ acc1 += XXH3_mix16B(p+32, key+64);
+ acc2 += XXH3_mix16B(p+len-48, key+80);
+ }
+
+ acc1 += XXH3_mix16B(p+16, key+32);
+ acc2 += XXH3_mix16B(p+len-32, key+48);
+
+ }
+
+ acc1 += XXH3_mix16B(p+0, key+0);
+ acc2 += XXH3_mix16B(p+len-16, key+16);
+
+ { U64 const part1 = acc1 + acc2;
+ U64 const part2 = (acc1 * PRIME64_3) + (acc2 * PRIME64_4) + ((len - seed) * PRIME64_2);
+ return (XXH128_hash_t) { XXH3_avalanche(part1), -XXH3_avalanche(part2) };
+ }
+ }
+}
+
+
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len)
+{
+ return XXH3_128bits_withSeed(data, len, 0);
+}
+
+
+XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed)
+{
+ return XXH3_128bits_withSeed(data, len, seed);
+}
+
+#endif /* XXH3_H */
diff --git a/xxhash.c b/xxhash.c
index da06ea7..0fd12ce 100644
--- a/xxhash.c
+++ b/xxhash.c
@@ -71,18 +71,6 @@
# define XXH_ACCEPT_NULL_INPUT_POINTER 0
#endif
-/*!XXH_FORCE_NATIVE_FORMAT :
- * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
- * Results are therefore identical for little-endian and big-endian CPU.
- * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
- * Should endian-independence be of no importance for your application, you may set the #define below to 1,
- * to improve speed for Big-endian CPU.
- * This option has no impact on Little_Endian CPU.
- */
-#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
-# define XXH_FORCE_NATIVE_FORMAT 0
-#endif
-
/*!XXH_FORCE_ALIGN_CHECK :
* This is a minor performance trick, only useful with lots of very small keys.
* It means : check for aligned/unaligned input.
@@ -122,16 +110,16 @@
***************************************/
#ifdef _MSC_VER /* Visual Studio */
# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
-# define FORCE_INLINE static __forceinline
+# define XXH_FORCE_INLINE static __forceinline
#else
# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# ifdef __GNUC__
-# define FORCE_INLINE static inline __attribute__((always_inline))
+# define XXH_FORCE_INLINE static inline __attribute__((always_inline))
# else
-# define FORCE_INLINE static inline
+# define XXH_FORCE_INLINE static inline
# endif
# else
-# define FORCE_INLINE static
+# define XXH_FORCE_INLINE static
# endif /* __STDC_VERSION__ */
#endif
@@ -154,6 +142,9 @@
# endif
#endif
+
+/* === Memory access === */
+
#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
@@ -181,6 +172,22 @@
#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+/* === Endianess === */
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+static int XXH_isLittleEndian(void)
+{
+ const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
+ return one.c[0];
+}
+# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
+#endif
+
+
+
+
/* ****************************************
* Compiler-specific Functions and Macros
******************************************/
@@ -191,8 +198,8 @@
# define XXH_rotl32(x,r) _rotl(x,r)
# define XXH_rotl64(x,r) _rotl64(x,r)
#else
-# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
-# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+# define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
+# define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
#endif
#if defined(_MSC_VER) /* Visual Studio */
@@ -210,38 +217,14 @@
#endif
-/* *************************************
-* Architecture Macros
-***************************************/
-typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
-
-/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
-#ifndef XXH_CPU_LITTLE_ENDIAN
-static int XXH_isLittleEndian(void)
-{
- const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
- return one.c[0];
-}
-# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
-#endif
-
-
/* ***************************
* Memory reads
*****************************/
typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
-FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+XXH_FORCE_INLINE U32 XXH_readLE32(const void* ptr)
{
- if (align==XXH_unaligned)
- return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
- else
- return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
-}
-
-FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
-{
- return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
}
static U32 XXH_readBE32(const void* ptr)
@@ -249,6 +232,16 @@
return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
}
+XXH_FORCE_INLINE U32
+XXH_readLE32_align(const void* ptr, XXH_alignment align)
+{
+ if (align==XXH_unaligned) {
+ return XXH_readLE32(ptr);
+ } else {
+ return XXH_CPU_LITTLE_ENDIAN ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+ }
+}
+
/* *************************************
* Macros
@@ -260,18 +253,62 @@
/* *******************************************************************
* 32-bit hash functions
*********************************************************************/
-static const U32 PRIME32_1 = 2654435761U;
-static const U32 PRIME32_2 = 2246822519U;
-static const U32 PRIME32_3 = 3266489917U;
-static const U32 PRIME32_4 = 668265263U;
-static const U32 PRIME32_5 = 374761393U;
+static const U32 PRIME32_1 = 2654435761U; /* 0b10011110001101110111100110110001 */
+static const U32 PRIME32_2 = 2246822519U; /* 0b10000101111010111100101001110111 */
+static const U32 PRIME32_3 = 3266489917U; /* 0b11000010101100101010111000111101 */
+static const U32 PRIME32_4 = 668265263U; /* 0b00100111110101001110101100101111 */
+static const U32 PRIME32_5 = 374761393U; /* 0b00010110010101100110011110110001 */
-static U32 XXH32_round(U32 seed, U32 input)
+static U32 XXH32_round(U32 acc, U32 input)
{
- seed += input * PRIME32_2;
- seed = XXH_rotl32(seed, 13);
- seed *= PRIME32_1;
- return seed;
+ acc += input * PRIME32_2;
+ acc = XXH_rotl32(acc, 13);
+ acc *= PRIME32_1;
+#if defined(__GNUC__) && defined(__SSE4_1__) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+ /* UGLY HACK:
+ * This inline assembly hack forces acc into a normal register. This is the
+ * only thing that prevents GCC and Clang from autovectorizing the XXH32 loop
+ * (pragmas and attributes don't work for some resason) without globally
+ * disabling SSE4.1.
+ *
+ * The reason we want to avoid vectorization is because despite working on
+ * 4 integers at a time, there are multiple factors slowing XXH32 down on
+ * SSE4:
+ * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on newer chips!)
+ * making it slightly slower to multiply four integers at once compared to four
+ * integers independently. Even when pmulld was fastest, Sandy/Ivy Bridge, it is
+ * still not worth it to go into SSE just to multiply unless doing a long operation.
+ *
+ * - Four instructions are required to rotate,
+ * movqda tmp, v // not required with VEX encoding
+ * pslld tmp, 13 // tmp <<= 13
+ * psrld v, 19 // x >>= 19
+ * por v, tmp // x |= tmp
+ * compared to one for scalar:
+ * roll v, 13 // reliably fast across the board
+ * shldl v, v, 13 // Sandy Bridge and later prefer this for some reason
+ *
+ * - Instruction level parallelism is actually more beneficial here because the
+ * SIMD actually serializes this operation: While v1 is rotating, v2 can load data,
+ * while v3 can multiply. SSE forces them to operate together.
+ *
+ * How this hack works:
+ * __asm__("" // Declare an assembly block but don't declare any instructions
+ * : // However, as an Input/Output Operand,
+ * "+r" // constrain a read/write operand (+) as a general purpose register (r).
+ * (acc) // and set acc as the operand
+ * );
+ *
+ * Because of the 'r', the compiler has promised that seed will be in a
+ * general purpose register and the '+' says that it will be 'read/write',
+ * so it has to assume it has changed. It is like volatile without all the
+ * loads and stores.
+ *
+ * Since the argument has to be in a normal register (not an SSE register),
+ * each time XXH32_round is called, it is impossible to vectorize. */
+ __asm__("" : "+r" (acc));
+#endif
+ return acc;
}
/* mix all bits */
@@ -285,17 +322,16 @@
return(h32);
}
-#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+#define XXH_get32bits(p) XXH_readLE32_align(p, align)
static U32
-XXH32_finalize(U32 h32, const void* ptr, size_t len,
- XXH_endianess endian, XXH_alignment align)
+XXH32_finalize(U32 h32, const void* ptr, size_t len, XXH_alignment align)
{
const BYTE* p = (const BYTE*)ptr;
-#define PROCESS1 \
- h32 += (*p) * PRIME32_5; \
- p++; \
+
+#define PROCESS1 \
+ h32 += (*p++) * PRIME32_5; \
h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
#define PROCESS4 \
@@ -347,10 +383,8 @@
return h32; /* reaching this point is deemed impossible */
}
-
-FORCE_INLINE U32
-XXH32_endian_align(const void* input, size_t len, U32 seed,
- XXH_endianess endian, XXH_alignment align)
+XXH_FORCE_INLINE U32
+XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
@@ -385,7 +419,7 @@
h32 += (U32)len;
- return XXH32_finalize(h32, p, len&15, endian, align);
+ return XXH32_finalize(h32, p, len&15, align);
}
@@ -397,21 +431,15 @@
XXH32_reset(&state, seed);
XXH32_update(&state, input, len);
return XXH32_digest(&state);
+
#else
- XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if (XXH_FORCE_ALIGN_CHECK) {
if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
- else
- return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+ return XXH32_endian_align(input, len, seed, XXH_aligned);
} }
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
- else
- return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+ return XXH32_endian_align(input, len, seed, XXH_unaligned);
#endif
}
@@ -448,12 +476,9 @@
}
-FORCE_INLINE
-XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+XXH_PUBLIC_API XXH_errorcode
+XXH32_update(XXH32_state_t* state, const void* input, size_t len)
{
- const BYTE* p = (const BYTE*)input;
- const BYTE* const bEnd = p + len;
-
if (input==NULL)
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
return XXH_OK;
@@ -461,69 +486,61 @@
return XXH_ERROR;
#endif
- state->total_len_32 += (unsigned)len;
- state->large_len |= (len>=16) | (state->total_len_32>=16);
+ { const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
- if (state->memsize + len < 16) { /* fill in tmp buffer */
- XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
- state->memsize += (unsigned)len;
- return XXH_OK;
- }
+ state->total_len_32 += (XXH32_hash_t)len;
+ state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
- if (state->memsize) { /* some data left from previous update */
- XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
- { const U32* p32 = state->mem32;
- state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
- state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
- state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
- state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian));
+ if (state->memsize + len < 16) { /* fill in tmp buffer */
+ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+ state->memsize += (XXH32_hash_t)len;
+ return XXH_OK;
}
- p += 16-state->memsize;
- state->memsize = 0;
- }
- if (p <= bEnd-16) {
- const BYTE* const limit = bEnd - 16;
- U32 v1 = state->v1;
- U32 v2 = state->v2;
- U32 v3 = state->v3;
- U32 v4 = state->v4;
+ if (state->memsize) { /* some data left from previous update */
+ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+ { const U32* p32 = state->mem32;
+ state->v1 = XXH32_round(state->v1, XXH_readLE32(p32)); p32++;
+ state->v2 = XXH32_round(state->v2, XXH_readLE32(p32)); p32++;
+ state->v3 = XXH32_round(state->v3, XXH_readLE32(p32)); p32++;
+ state->v4 = XXH32_round(state->v4, XXH_readLE32(p32));
+ }
+ p += 16-state->memsize;
+ state->memsize = 0;
+ }
- do {
- v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
- v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
- v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
- v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
- } while (p<=limit);
+ if (p <= bEnd-16) {
+ const BYTE* const limit = bEnd - 16;
+ U32 v1 = state->v1;
+ U32 v2 = state->v2;
+ U32 v3 = state->v3;
+ U32 v4 = state->v4;
- state->v1 = v1;
- state->v2 = v2;
- state->v3 = v3;
- state->v4 = v4;
- }
+ do {
+ v1 = XXH32_round(v1, XXH_readLE32(p)); p+=4;
+ v2 = XXH32_round(v2, XXH_readLE32(p)); p+=4;
+ v3 = XXH32_round(v3, XXH_readLE32(p)); p+=4;
+ v4 = XXH32_round(v4, XXH_readLE32(p)); p+=4;
+ } while (p<=limit);
- if (p < bEnd) {
- XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
- state->memsize = (unsigned)(bEnd-p);
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
}
return XXH_OK;
}
-XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
-{
- XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
- else
- return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
-}
-
-
-FORCE_INLINE U32
-XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state)
{
U32 h32;
@@ -538,18 +555,7 @@
h32 += state->total_len_32;
- return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned);
-}
-
-
-XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
-{
- XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH32_digest_endian(state_in, XXH_littleEndian);
- else
- return XXH32_digest_endian(state_in, XXH_bigEndian);
+ return XXH32_finalize(h32, state->mem32, state->memsize, XXH_aligned);
}
@@ -641,17 +647,9 @@
}
#endif
-FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+XXH_FORCE_INLINE U64 XXH_readLE64(const void* ptr)
{
- if (align==XXH_unaligned)
- return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
- else
- return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
-}
-
-FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
-{
- return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
}
static U64 XXH_readBE64(const void* ptr)
@@ -659,14 +657,23 @@
return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
}
+XXH_FORCE_INLINE U64
+XXH_readLE64_align(const void* ptr, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return XXH_readLE64(ptr);
+ else
+ return XXH_CPU_LITTLE_ENDIAN ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
/*====== xxh64 ======*/
-static const U64 PRIME64_1 = 11400714785074694791ULL;
-static const U64 PRIME64_2 = 14029467366897019727ULL;
-static const U64 PRIME64_3 = 1609587929392839161ULL;
-static const U64 PRIME64_4 = 9650029242287828579ULL;
-static const U64 PRIME64_5 = 2870177450012600261ULL;
+static const U64 PRIME64_1 = 11400714785074694791ULL; /* 0b1001111000110111011110011011000110000101111010111100101010000111 */
+static const U64 PRIME64_2 = 14029467366897019727ULL; /* 0b1100001010110010101011100011110100100111110101001110101101001111 */
+static const U64 PRIME64_3 = 1609587929392839161ULL; /* 0b0001011001010110011001111011000110011110001101110111100111111001 */
+static const U64 PRIME64_4 = 9650029242287828579ULL; /* 0b1000010111101011110010100111011111000010101100101010111001100011 */
+static const U64 PRIME64_5 = 2870177450012600261ULL; /* 0b0010011111010100111010110010111100010110010101100110011111000101 */
static U64 XXH64_round(U64 acc, U64 input)
{
@@ -695,17 +702,15 @@
}
-#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+#define XXH_get64bits(p) XXH_readLE64_align(p, align)
static U64
-XXH64_finalize(U64 h64, const void* ptr, size_t len,
- XXH_endianess endian, XXH_alignment align)
+XXH64_finalize(U64 h64, const void* ptr, size_t len, XXH_alignment align)
{
const BYTE* p = (const BYTE*)ptr;
-#define PROCESS1_64 \
- h64 ^= (*p) * PRIME64_5; \
- p++; \
+#define PROCESS1_64 \
+ h64 ^= (*p++) * PRIME64_5; \
h64 = XXH_rotl64(h64, 11) * PRIME64_1;
#define PROCESS4_64 \
@@ -807,9 +812,8 @@
return 0; /* unreachable, but some compilers complain without it */
}
-FORCE_INLINE U64
-XXH64_endian_align(const void* input, size_t len, U64 seed,
- XXH_endianess endian, XXH_alignment align)
+XXH_FORCE_INLINE U64
+XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_alignment align)
{
const BYTE* p = (const BYTE*)input;
const BYTE* bEnd = p + len;
@@ -848,7 +852,7 @@
h64 += (U64) len;
- return XXH64_finalize(h64, p, len, endian, align);
+ return XXH64_finalize(h64, p, len, align);
}
@@ -860,21 +864,16 @@
XXH64_reset(&state, seed);
XXH64_update(&state, input, len);
return XXH64_digest(&state);
+
#else
- XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
if (XXH_FORCE_ALIGN_CHECK) {
if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
- else
- return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+ return XXH64_endian_align(input, len, seed, XXH_aligned);
} }
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
- else
- return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+ return XXH64_endian_align(input, len, seed, XXH_unaligned);
+
#endif
}
@@ -908,12 +907,9 @@
return XXH_OK;
}
-FORCE_INLINE
-XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+XXH_PUBLIC_API XXH_errorcode
+XXH64_update (XXH64_state_t* state, const void* input, size_t len)
{
- const BYTE* p = (const BYTE*)input;
- const BYTE* const bEnd = p + len;
-
if (input==NULL)
#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
return XXH_OK;
@@ -921,63 +917,58 @@
return XXH_ERROR;
#endif
- state->total_len += len;
+ { const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
- if (state->memsize + len < 32) { /* fill in tmp buffer */
- XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
- state->memsize += (U32)len;
- return XXH_OK;
- }
+ state->total_len += len;
- if (state->memsize) { /* tmp buffer is full */
- XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
- state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
- state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
- state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
- state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
- p += 32-state->memsize;
- state->memsize = 0;
- }
+ if (state->memsize + len < 32) { /* fill in tmp buffer */
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+ state->memsize += (U32)len;
+ return XXH_OK;
+ }
- if (p+32 <= bEnd) {
- const BYTE* const limit = bEnd - 32;
- U64 v1 = state->v1;
- U64 v2 = state->v2;
- U64 v3 = state->v3;
- U64 v4 = state->v4;
+ if (state->memsize) { /* tmp buffer is full */
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+ state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0));
+ state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1));
+ state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2));
+ state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3));
+ p += 32-state->memsize;
+ state->memsize = 0;
+ }
- do {
- v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
- v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
- v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
- v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
- } while (p<=limit);
+ if (p+32 <= bEnd) {
+ const BYTE* const limit = bEnd - 32;
+ U64 v1 = state->v1;
+ U64 v2 = state->v2;
+ U64 v3 = state->v3;
+ U64 v4 = state->v4;
- state->v1 = v1;
- state->v2 = v2;
- state->v3 = v3;
- state->v4 = v4;
- }
+ do {
+ v1 = XXH64_round(v1, XXH_readLE64(p)); p+=8;
+ v2 = XXH64_round(v2, XXH_readLE64(p)); p+=8;
+ v3 = XXH64_round(v3, XXH_readLE64(p)); p+=8;
+ v4 = XXH64_round(v4, XXH_readLE64(p)); p+=8;
+ } while (p<=limit);
- if (p < bEnd) {
- XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
- state->memsize = (unsigned)(bEnd-p);
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
}
return XXH_OK;
}
-XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
-{
- XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
- else
- return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
-}
-
-FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state)
{
U64 h64;
@@ -998,17 +989,7 @@
h64 += (U64) state->total_len;
- return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned);
-}
-
-XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
-{
- XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
-
- if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
- return XXH64_digest_endian(state_in, XXH_littleEndian);
- else
- return XXH64_digest_endian(state_in, XXH_bigEndian);
+ return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, XXH_aligned);
}
@@ -1026,4 +1007,14 @@
return XXH_readBE64(src);
}
+
+
+/* *********************************************************************
+* XXH3
+* New generation hash designed for speed on small keys and vectorization
+************************************************************************ */
+
+#include "xxh3.h"
+
+
#endif /* XXH_NO_LONG_LONG */
diff --git a/xxhash.h b/xxhash.h
index d6bad94..7f3d660 100644
--- a/xxhash.h
+++ b/xxhash.h
@@ -107,7 +107,15 @@
# define XXH_PUBLIC_API static
# endif
#else
-# define XXH_PUBLIC_API /* do nothing */
+# if defined(WIN32) && !defined(__GNUC__)
+# ifdef XXH_EXPORT
+# define XXH_PUBLIC_API __declspec(dllexport)
+# else
+# define XXH_PUBLIC_API __declspec(dllimport)
+# endif
+# else
+# define XXH_PUBLIC_API /* do nothing */
+# endif
#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
/*! XXH_NAMESPACE, aka Namespace Emulation :
@@ -150,8 +158,8 @@
* Version
***************************************/
#define XXH_VERSION_MAJOR 0
-#define XXH_VERSION_MINOR 6
-#define XXH_VERSION_RELEASE 5
+#define XXH_VERSION_MINOR 7
+#define XXH_VERSION_RELEASE 0
#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
XXH_PUBLIC_API unsigned XXH_versionNumber (void);
@@ -239,6 +247,8 @@
typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+
+
#endif /* XXH_NO_LONG_LONG */
@@ -281,43 +291,150 @@
uint64_t v4;
uint64_t mem64[4];
uint32_t memsize;
- uint32_t reserved[2]; /* never read nor write, might be removed in a future version */
+ uint32_t reserved[2]; /* never read nor write, might be removed in a future version */
}; /* typedef'd to XXH64_state_t */
# else
struct XXH32_state_s {
- unsigned total_len_32;
- unsigned large_len;
- unsigned v1;
- unsigned v2;
- unsigned v3;
- unsigned v4;
- unsigned mem32[4];
- unsigned memsize;
- unsigned reserved; /* never read nor write, might be removed in a future version */
+ XXH32_hash_t total_len_32;
+ XXH32_hash_t large_len;
+ XXH32_hash_t v1;
+ XXH32_hash_t v2;
+ XXH32_hash_t v3;
+ XXH32_hash_t v4;
+ XXH32_hash_t mem32[4];
+ XXH32_hash_t memsize;
+ XXH32_hash_t reserved; /* never read nor write, might be removed in a future version */
}; /* typedef'd to XXH32_state_t */
# ifndef XXH_NO_LONG_LONG /* remove 64-bit support */
struct XXH64_state_s {
- unsigned long long total_len;
- unsigned long long v1;
- unsigned long long v2;
- unsigned long long v3;
- unsigned long long v4;
- unsigned long long mem64[4];
- unsigned memsize;
- unsigned reserved[2]; /* never read nor write, might be removed in a future version */
+ XXH64_hash_t total_len;
+ XXH64_hash_t v1;
+ XXH64_hash_t v2;
+ XXH64_hash_t v3;
+ XXH64_hash_t v4;
+ XXH64_hash_t mem64[4];
+ XXH32_hash_t memsize;
+ XXH32_hash_t reserved[2]; /* never read nor write, might be removed in a future version */
}; /* typedef'd to XXH64_state_t */
# endif
# endif
+/*-**********************************************************************
+* XXH3
+* New experimental hash
+************************************************************************/
+#ifndef XXH_NO_LONG_LONG
+
+
+/* ============================================
+ * XXH3 is a new hash algorithm,
+ * featuring vastly improved speed performance
+ * for both small and large inputs.
+ * A full speed analysis will be published,
+ * it requires a lot more space than this comment can handle.
+ * In general, expect XXH3 to run about ~2x faster on large inputs,
+ * and >3x faster on small ones, though exact difference depend on platform.
+ *
+ * The algorithm is portable, will generate the same hash on all platforms.
+ * It benefits greatly from vectorization units, but does not require it.
+ *
+ * XXH3 offers 2 variants, _64bits and _128bits.
+ * When only 64 bits are needed, prefer calling the _64bits variant :
+ * it reduces the amount of mixing, resulting in faster speed on small inputs.
+ * It's also generally simpler to manipulate a scalar type than a struct.
+ * Note : the low 64-bit field of the _128bits variant is the same as _64bits result.
+ *
+ * The XXH3 algorithm is still considered experimental.
+ * It's possible to use it for ephemeral data, but avoid storing long-term values for later re-use.
+ * While labelled experimental, the produced result can still change between versions.
+ *
+ * The API currently supports one-shot hashing only.
+ * The full version will include streaming capability, and canonical representation
+ * Long term optional feature may include custom secret keys, and secret key generation.
+ *
+ * There are still a number of opened questions that community can influence during the experimental period.
+ * I'm trying to list a few of them below, though don't consider this list as complete.
+ *
+ * - 128-bits output type : currently defined as a structure of 2 64-bits fields.
+ * That's because 128-bit values do not exist in C standard.
+ * Note that it means that, at byte level, result is not identical depending on endianess.
+ * However, at field level, they are identical on all platforms.
+ * The canonical representation will solve the issue of identical byte-level representation across platforms,
+ * which is necessary for serialization.
+ * Would there be a better representation for a 128-bit hash result ?
+ * Are the names of the inner 64-bit fields important ? Should they be changed ?
+ *
+ * - Canonical representation : for the 64-bit variant, canonical representation is the same as XXH64() (aka big-endian).
+ * What should it be for the 128-bit variant ?
+ * Since it's no longer a scalar value, big-endian representation is no longer an obvious choice.
+ * One possibility : represent it as the concatenation of two 64-bits canonical representation (aka 2x big-endian)
+ * Another one : represent it in the same order as natural order in the struct for little-endian platforms.
+ * Less consistent with existing convention for XXH32/XXH64, but may be more natural for little-endian platforms.
+ *
+ * - Associated functions for 128-bit hash : simple things, such as checking if 2 hashes are equal, become more difficult with struct.
+ * Granted, it's not terribly difficult to create a comparator, but it's still a workload.
+ * Would it be beneficial to declare and define a comparator function for XXH128_hash_t ?
+ * Are there other operations on XXH128_hash_t which would be desirable ?
+ *
+ * - Variant compatibility : The low 64-bit field of the _128bits variant is the same as the result of _64bits.
+ * This is not a compulsory behavior. It just felt that it "wouldn't hurt", and might even help in some (unidentified) cases.
+ * But it might influence the design of XXH128_hash_t, in ways which may block other possibilities.
+ * Good idea, bad idea ?
+ *
+ * - Seed type for 128-bits variant : currently, it's a single 64-bit value, like the 64-bit variant.
+ * It could be argued that it's more logical to offer a 128-bit seed input parameter for a 128-bit hash.
+ * Although it's also more difficult to use, since it requires to declare and pass a structure instead of a value.
+ * It would either replace current choice, or add a new one.
+ * Farmhash, for example, offers both variants (the 128-bits seed variant is called `doubleSeed`).
+ * If both 64-bit and 128-bit seeds are possible, which variant should be called XXH128 ?
+ *
+ * - Result for len==0 : Currently, the result of hashing a zero-length input is the seed.
+ * This mimics the behavior of a crc : in which case, a seed is effectively an accumulator, so it's not updated if input is empty.
+ * Consequently, by default, when no seed specified, it returns zero. That part seems okay (it used to be a request for XXH32/XXH64).
+ * But is it still fine to return the seed when the seed is non-zero ?
+ * Are there use case which would depend on this behavior, or would prefer a mixing of the seed ?
+ */
+
+#ifdef XXH_NAMESPACE
+# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
+# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
+# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
+# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
+# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
+#endif
+
+
+typedef struct {
+ XXH64_hash_t low64;
+ XXH64_hash_t high64;
+} XXH128_hash_t;
+
+XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, unsigned long long seed);
+
+/* note : variants without seed produce same result as variant with seed == 0 */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, unsigned long long seed);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, unsigned long long seed); /* == XXH128() */
+
+
+#endif /* XXH_NO_LONG_LONG */
+
+
+/*-**********************************************************************
+* XXH_INLINE_ALL
+************************************************************************/
#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */
#endif
+
+
#endif /* XXH_STATIC_LINKING_ONLY */
diff --git a/xxhsum.c b/xxhsum.c
index 69931f7..0ec11c0 100644
--- a/xxhsum.c
+++ b/xxhsum.c
@@ -44,7 +44,6 @@
# define _LARGEFILE64_SOURCE
#endif
-
/* ************************************
* Includes
**************************************/
@@ -55,6 +54,7 @@
#include <sys/stat.h> /* stat, stat64, _stat64 */
#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */
#include <assert.h> /* assert */
+#include <errno.h> /* errno */
#define XXH_STATIC_LINKING_ONLY /* *_state_t */
#include "xxhash.h"
@@ -63,15 +63,65 @@
/* ************************************
* OS-Specific Includes
**************************************/
-#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
-# include <fcntl.h> /* _O_BINARY */
-# include <io.h> /* _setmode, _isatty */
-# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
+#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || (defined(__APPLE__) && defined(__MACH__)) /* UNIX-like OS */ \
+ || defined(__midipix__) || defined(__VMS))
+# if (defined(__APPLE__) && defined(__MACH__)) || defined(__SVR4) || defined(_AIX) || defined(__hpux) /* POSIX.1-2001 (SUSv3) conformant */ \
+ || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) /* BSD distros */
+# define PLATFORM_POSIX_VERSION 200112L
+# else
+# if defined(__linux__) || defined(__linux)
+# ifndef _POSIX_C_SOURCE
+# define _POSIX_C_SOURCE 200112L /* use feature test macro */
+# endif
+# endif
+# include <unistd.h> /* declares _POSIX_VERSION */
+# if defined(_POSIX_VERSION) /* POSIX compliant */
+# define PLATFORM_POSIX_VERSION _POSIX_VERSION
+# else
+# define PLATFORM_POSIX_VERSION 0
+# endif
+# endif
+#endif
+#if !defined(PLATFORM_POSIX_VERSION)
+# define PLATFORM_POSIX_VERSION -1
+#endif
+
+#if (defined(__linux__) && (PLATFORM_POSIX_VERSION >= 1)) \
+ || (PLATFORM_POSIX_VERSION >= 200112L) \
+ || defined(__DJGPP__) \
+ || defined(__MSYS__)
+# include <unistd.h> /* isatty */
+# define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
+#elif defined(MSDOS) || defined(OS2) || defined(__CYGWIN__)
+# include <io.h> /* _isatty */
# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
+#elif defined(WIN32) || defined(_WIN32)
+# include <io.h> /* _isatty */
+# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
+# include <stdio.h> /* FILE */
+static __inline int IS_CONSOLE(FILE* stdStream) {
+ DWORD dummy;
+ return _isatty(_fileno(stdStream)) && GetConsoleMode((HANDLE)_get_osfhandle(_fileno(stdStream)), &dummy);
+}
#else
-# include <unistd.h> /* isatty, STDIN_FILENO */
+# define IS_CONSOLE(stdStream) 0
+#endif
+
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32)
+# include <fcntl.h> /* _O_BINARY */
+# include <io.h> /* _setmode, _fileno, _get_osfhandle */
+# if !defined(__DJGPP__)
+# include <windows.h> /* DeviceIoControl, HANDLE, FSCTL_SET_SPARSE */
+# include <winioctl.h> /* FSCTL_SET_SPARSE */
+# define SET_BINARY_MODE(file) { int const unused=_setmode(_fileno(file), _O_BINARY); (void)unused; }
+# define SET_SPARSE_FILE_MODE(file) { DWORD dw; DeviceIoControl((HANDLE) _get_osfhandle(_fileno(file)), FSCTL_SET_SPARSE, 0, 0, 0, 0, &dw, 0); }
+# else
+# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
+# define SET_SPARSE_FILE_MODE(file)
+# endif
+#else
# define SET_BINARY_MODE(file)
-# define IS_CONSOLE(stdStream) isatty(STDIN_FILENO)
+# define SET_SPARSE_FILE_MODE(file)
#endif
#if !defined(S_ISREG)
@@ -114,13 +164,86 @@
#define QUOTE(str) #str
#define EXPAND_AND_QUOTE(str) QUOTE(str)
#define PROGRAM_VERSION EXPAND_AND_QUOTE(LIB_VERSION)
+
+/* Show compiler versions in WELCOME_MESSAGE. VERSION_FMT will return the printf specifiers,
+ * and VERSION will contain the comma separated list of arguments to the VERSION_FMT string. */
+#if defined(__clang_version__)
+/* Clang does its own thing. */
+# ifdef __apple_build_version__
+# define VERSION_FMT ", Apple Clang %s"
+# else
+# define VERSION_FMT ", Clang %s"
+# endif
+# define VERSION __clang_version__
+#elif defined(__VERSION__)
+/* GCC and ICC */
+# define VERSION_FMT ", %s"
+# ifdef __INTEL_COMPILER /* icc adds its prefix */
+# define VERSION_STRING __VERSION__
+# else /* assume GCC */
+# define VERSION "GCC " __VERSION__
+# endif
+#elif defined(_MSC_FULL_VER) && defined(_MSC_BUILD)
+/* "For example, if the version number of the Visual C++ compiler is 15.00.20706.01, the _MSC_FULL_VER macro
+ * evaluates to 150020706." https://docs.microsoft.com/en-us/cpp/preprocessor/predefined-macros?view=vs-2017 */
+# define VERSION _MSC_FULL_VER / 10000000 % 100, _MSC_FULL_VER / 100000 % 100, _MSC_FULL_VER % 100000, _MSC_BUILD
+# define VERSION_FMT ", MSVC %02i.%02i.%05i.%02i"
+#elif defined(__TINYC__)
+/* tcc stores its version in the __TINYC__ macro. */
+# define VERSION_FMT ", tcc %i.%i.%i"
+# define VERSION __TINYC__ / 10000 % 100, __TINYC__ / 100 % 100, __TINYC__ % 100
+#else
+# define VERSION_FMT "%s"
+# define VERSION ""
+#endif
+
+/* makes the next part easier */
+#if defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
+# define ARCH_X86 "x86_64"
+#elif defined(__i386__) || defined(_M_X86) || defined(_M_X86_FP)
+# define ARCH_X86 "i386"
+#endif
+
+/* Try to detect the architecture. */
+#if defined(ARCH_X86)
+# if defined(__AVX2__)
+# define ARCH ARCH_X86 " + AVX2"
+# elif defined(__AVX__)
+# define ARCH ARCH_X86 " + AVX"
+# elif defined(__SSE2__)
+# define ARCH ARCH_X86 " + SSE2"
+# else
+# define ARCH ARCH_X86
+# endif
+#elif defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64)
+# define ARCH "aarch64"
+#elif defined(__arm__) || defined(__thumb__) || defined(__thumb2__) || defined(_M_ARM)
+# if defined(__ARM_NEON) || defined(__ARM_NEON__)
+# define ARCH "arm + NEON"
+# else
+# define ARCH "arm"
+# endif
+#elif defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__)
+# define ARCH "ppc64"
+#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)
+# define ARCH "ppc"
+#elif defined(__AVR)
+# define ARCH "AVR"
+#elif defined(__mips64)
+# define ARCH "mips64"
+#elif defined(__mips)
+# define ARCH "mips"
+#else
+# define ARCH "unknown"
+#endif
+
static const int g_nbBits = (int)(sizeof(void*)*8);
static const char g_lename[] = "little endian";
static const char g_bename[] = "big endian";
#define ENDIAN_NAME (BMK_isLittleEndian() ? g_lename : g_bename)
static const char author[] = "Yann Collet";
-#define WELCOME_MESSAGE(exename) "%s %s (%i-bits %s), by %s \n", \
- exename, PROGRAM_VERSION, g_nbBits, ENDIAN_NAME, author
+#define WELCOME_MESSAGE(exename) "%s %s (%i-bits %s %s)" VERSION_FMT ", by %s \n", \
+ exename, PROGRAM_VERSION, g_nbBits, ARCH, ENDIAN_NAME, VERSION, author
#define KB *( 1<<10)
#define MB *( 1<<20)
@@ -215,9 +338,11 @@
static U32 localXXH64(const void* buffer, size_t bufferSize, U32 seed) { return (U32)XXH64(buffer, bufferSize, seed); }
+static U32 localXXH3_64b(const void* buffer, size_t bufferSize, U32 seed) { (void)seed; return (U32)XXH3_64bits(buffer, bufferSize); }
+
static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer, size_t bufferSize)
{
- U32 nbh_perIteration = ((300 MB) / (bufferSize+1)) + 1; /* first loop conservatively aims for 300 MB/s */
+ U32 nbh_perIteration = (U32)((300 MB) / (bufferSize+1)) + 1; /* first loop conservatively aims for 300 MB/s */
U32 iterationNb;
double fastestH = 100000000.;
@@ -227,25 +352,34 @@
U32 r=0;
clock_t cStart;
- DISPLAYLEVEL(2, "%1i-%-17.17s : %10u ->\r", iterationNb, hName, (U32)bufferSize);
+ DISPLAYLEVEL(2, "%1u-%-17.17s : %10u ->\r", iterationNb, hName, (U32)bufferSize);
cStart = clock();
while (clock() == cStart); /* starts clock() at its exact beginning */
cStart = clock();
- { U32 i;
- for (i=0; i<nbh_perIteration; i++)
- r += h(buffer, bufferSize, i);
+ { U32 u;
+ for (u=0; u<nbh_perIteration; u++)
+ r += h(buffer, bufferSize, u);
}
- if (r==0) DISPLAYLEVEL(3,".\r"); /* do something with r to avoid compiler "optimizing" away hash function */
- { double const timeS = ((double)BMK_clockSpan(cStart) / CLOCKS_PER_SEC) / nbh_perIteration;
+ if (r==0) DISPLAYLEVEL(3,".\r"); /* do something with r to defeat compiler "optimizing" away hash */
+
+ { clock_t const nbTicks = BMK_clockSpan(cStart);
+ double const timeS = ((double)nbTicks / CLOCKS_PER_SEC) / nbh_perIteration;
+ if (nbTicks == 0) { /* faster than resolution timer */
+ nbh_perIteration *= 100;
+ iterationNb--; /* try again */
+ continue;
+ }
if (timeS < fastestH) fastestH = timeS;
- DISPLAYLEVEL(2, "%1i-%-17.17s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
+ DISPLAYLEVEL(2, "%1u-%-17.17s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
iterationNb, hName, (U32)bufferSize,
(double)1 / fastestH,
((double)bufferSize / (1<<20)) / fastestH );
}
- assert(fastestH > 1./2000000000); /* avoid U32 overflow */
- nbh_perIteration = (U32)(1 / fastestH) + 1; /* adjust nbh_perIteration to last roughtly one second */
+ { double nbh_perSecond = (1 / fastestH) + 1;
+ if (nbh_perSecond > (double)(4000U<<20)) nbh_perSecond = (double)(4000U<<20);
+ nbh_perIteration = (U32)nbh_perSecond;
+ }
}
DISPLAYLEVEL(1, "%-19.19s : %10u -> %8.0f it/s (%7.1f MB/s) \n", hName, (U32)bufferSize,
(double)1 / fastestH,
@@ -280,8 +414,16 @@
if ((specificTest==0) | (specificTest==4))
BMK_benchHash(localXXH64, "XXH64 unaligned", ((const char*)buffer)+3, bufferSize);
- if (specificTest > 4) {
- DISPLAY("benchmark mode invalid \n");
+ /* Bench XXH3 */
+ if ((specificTest==0) | (specificTest==5))
+ BMK_benchHash(localXXH3_64b, "XXH3_64bits", buffer, bufferSize);
+
+ /* Bench XXH3 on Unaligned input */
+ if ((specificTest==0) | (specificTest==6))
+ BMK_benchHash(localXXH3_64b, "XXH3_64b unaligned", ((const char*)buffer)+3, bufferSize);
+
+ if (specificTest > 6) {
+ DISPLAY("Benchmark mode invalid.\n");
return 1;
}
return 0;
@@ -306,95 +448,121 @@
for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
const char* const inFileName = fileNamesTable[fileIdx];
- FILE* const inFile = fopen( inFileName, "rb" );
- size_t const benchedSize = BMK_selectBenchedSize(inFileName);
- char* const buffer = (char*)calloc(benchedSize+16+3, 1);
- void* const alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF); /* align on next 16 bytes */
+ assert(inFileName != NULL);
+ {
+ FILE* const inFile = fopen( inFileName, "rb" );
+ size_t const benchedSize = BMK_selectBenchedSize(inFileName);
+ char* const buffer = (char*)calloc(benchedSize+16+3, 1);
+ void* const alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF); /* align on next 16 bytes */
- /* Checks */
- if ((inFile==NULL) || (inFileName==NULL)) {
- DISPLAY("Pb opening %s\n", inFileName);
- free(buffer);
- return 11;
- }
- if(!buffer) {
- DISPLAY("\nError: not enough memory!\n");
- fclose(inFile);
- return 12;
- }
-
- /* Fill input buffer */
- DISPLAYLEVEL(1, "\rLoading %s... \n", inFileName);
- { size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile);
- fclose(inFile);
- if(readSize != benchedSize) {
- DISPLAY("\nError: problem reading file '%s' !! \n", inFileName);
+ /* Checks */
+ if (inFile==NULL){
+ DISPLAY("Error: Could not open '%s': %s.\n", inFileName, strerror(errno));
free(buffer);
- return 13;
- } }
+ return 11;
+ }
+ if(!buffer) {
+ DISPLAY("\nError: Out of memory.\n");
+ fclose(inFile);
+ return 12;
+ }
- /* bench */
- result |= BMK_benchMem(alignedBuffer, benchedSize, specificTest);
+ /* Fill input buffer */
+ DISPLAYLEVEL(1, "\rLoading %s... \n", inFileName);
+ { size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile);
+ fclose(inFile);
+ if(readSize != benchedSize) {
+ DISPLAY("\nError: Could not read '%s': %s.\n", inFileName, strerror(errno));
+ free(buffer);
+ return 13;
+ } }
- free(buffer);
+ /* bench */
+ result |= BMK_benchMem(alignedBuffer, benchedSize, specificTest);
+
+ free(buffer);
+ }
}
return result;
}
-
-static int BMK_benchInternal(size_t keySize, int specificTest)
+static int BMK_benchInternal(size_t keySize, U32 specificTest)
{
void* const buffer = calloc(keySize+16+3, 1);
- void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF); /* align on next 16 bytes */
- if(!buffer) {
- DISPLAY("\nError: not enough memory!\n");
+ if (!buffer) {
+ DISPLAY("\nError: Out of memory.\n");
return 12;
}
- /* bench */
- DISPLAYLEVEL(1, "Sample of ");
- if (keySize > 10 KB) {
- DISPLAYLEVEL(1, "%u KB", (U32)(keySize >> 10));
- } else {
- DISPLAYLEVEL(1, "%u bytes", (U32)keySize);
- }
- DISPLAYLEVEL(1, "... \n");
+ { const void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF); /* align on next 16 bytes */
- { int const result = BMK_benchMem(alignedBuffer, keySize, specificTest);
- free(buffer);
- return result;
+ /* bench */
+ DISPLAYLEVEL(1, "Sample of ");
+ if (keySize > 10 KB) {
+ DISPLAYLEVEL(1, "%u KB", (U32)(keySize >> 10));
+ } else {
+ DISPLAYLEVEL(1, "%u bytes", (U32)keySize);
+ }
+ DISPLAYLEVEL(1, "... \n");
+
+ { int const result = BMK_benchMem(alignedBuffer, keySize, specificTest);
+ free(buffer);
+ return result;
+ }
}
}
-static void BMK_checkResult(U32 r1, U32 r2)
-{
- static int nbTests = 1;
- if (r1==r2) {
- DISPLAYLEVEL(3, "\rTest%3i : %08X == %08X ok ", nbTests, r1, r2);
- } else {
- DISPLAY("\rERROR : Test%3i : %08X <> %08X !!!!! \n", nbTests, r1, r2);
- exit(1);
- }
- nbTests++;
-}
+/* ************************************************
+ * Self-test :
+ * ensure results consistency accross platforms
+ *********************************************** */
-
-static void BMK_checkResult64(U64 r1, U64 r2)
+static void BMK_checkResult32(XXH32_hash_t r1, XXH32_hash_t r2)
{
static int nbTests = 1;
if (r1!=r2) {
- DISPLAY("\rERROR : Test%3i : 64-bit values non equals !!!!! \n", nbTests);
- DISPLAY("\r %08X%08X != %08X%08X \n", (U32)(r1>>32), (U32)r1, (U32)(r2>>32), (U32)r2);
+ DISPLAY("\rError: 32-bit hash test %i: Internal sanity check failed!\n", nbTests);
+ DISPLAY("\rGot 0x%08X, expected 0x%08X.\n", r1, r2);
+ DISPLAY("\rNote: If you modified the hash functions, make sure to either update the values\n"
+ "or temporarily comment out the tests in BMK_sanityCheck.\n");
+ exit(1);
+ }
+ nbTests++;
+}
+
+static void BMK_checkResult64(XXH64_hash_t r1, XXH64_hash_t r2)
+{
+ static int nbTests = 1;
+ if (r1!=r2) {
+ DISPLAY("\rError: 64-bit hash test %i: Internal sanity check failed!\n", nbTests);
+ DISPLAY("\rGot 0x%08X%08XULL, expected 0x%08X%08XULL.\n", (U32)(r1>>32), (U32)r1, (U32)(r2>>32), (U32)r2);
+ DISPLAY("\rNote: If you modified the hash functions, make sure to either update the values\n"
+ "or temporarily comment out the tests in BMK_sanityCheck.\n");
+ exit(1);
+ }
+ nbTests++;
+}
+
+static void BMK_checkResult128(XXH128_hash_t r1, XXH128_hash_t r2)
+{
+ static int nbTests = 1;
+ if ((r1.low64 != r2.low64) || (r1.high64 != r2.high64)) {
+ DISPLAY("\rError: 128-bit hash test %i: Internal sanity check failed.\n", nbTests);
+ DISPLAY("\rGot { 0x%08X%08XULL, 0x%08X%08XULL }, expected { 0x%08X%08XULL, %08X%08XULL } \n",
+ (U32)(r1.low64>>32), (U32)r1.low64, (U32)(r1.high64>>32), (U32)r1.high64,
+ (U32)(r2.low64>>32), (U32)r2.low64, (U32)(r2.high64>>32), (U32)r2.high64 );
+ DISPLAY("\rNote: If you modified the hash functions, make sure to either update the values\n"
+ "or temporarily comment out the tests in BMK_sanityCheck.\n");
exit(1);
}
nbTests++;
}
-static void BMK_testSequence64(void* sentence, size_t len, U64 seed, U64 Nresult)
+static void BMK_testSequence64(const void* sentence, size_t len, U64 seed, U64 Nresult)
{
XXH64_state_t state;
U64 Dresult;
@@ -403,18 +571,52 @@
Dresult = XXH64(sentence, len, seed);
BMK_checkResult64(Dresult, Nresult);
- XXH64_reset(&state, seed);
- XXH64_update(&state, sentence, len);
+ (void)XXH64_reset(&state, seed);
+ (void)XXH64_update(&state, sentence, len);
Dresult = XXH64_digest(&state);
BMK_checkResult64(Dresult, Nresult);
- XXH64_reset(&state, seed);
+ (void)XXH64_reset(&state, seed);
for (pos=0; pos<len; pos++)
- XXH64_update(&state, ((char*)sentence)+pos, 1);
+ (void)XXH64_update(&state, ((const char*)sentence)+pos, 1);
Dresult = XXH64_digest(&state);
BMK_checkResult64(Dresult, Nresult);
}
+static void BMK_testXXH3(const void* data, size_t len, U64 seed, U64 Nresult)
+{
+ { U64 const Dresult = XXH3_64bits_withSeed(data, len, seed);
+ BMK_checkResult64(Dresult, Nresult);
+ }
+
+ /* check that the no-seed variant produces same result as seed==0 */
+ if (seed == 0) {
+ U64 const Dresult = XXH3_64bits(data, len);
+ BMK_checkResult64(Dresult, Nresult);
+ }
+}
+
+static void BMK_testXXH128(const void* data, size_t len, U64 seed, XXH128_hash_t Nresult)
+{
+ { XXH128_hash_t const Dresult = XXH3_128bits_withSeed(data, len, seed);
+ BMK_checkResult128(Dresult, Nresult);
+
+ /* check that XXH128() is identical to XXH3_128bits_withSeed() */
+ { XXH128_hash_t const Dresult2 = XXH128(data, len, seed);
+ BMK_checkResult128(Dresult2, Nresult);
+ }
+
+ /* check that first field is equal to _64bits variant */
+ { U64 const result64 = XXH3_64bits_withSeed(data, len, seed);
+ BMK_checkResult64(result64, Nresult.low64);
+ } }
+
+ /* check that the no-seed variant produces same result as seed==0 */
+ if (seed == 0) {
+ XXH128_hash_t const Dresult = XXH3_128bits(data, len);
+ BMK_checkResult128(Dresult, Nresult);
+ }
+}
static void BMK_testSequence(const void* sequence, size_t len, U32 seed, U32 Nresult)
{
@@ -423,22 +625,22 @@
size_t pos;
Dresult = XXH32(sequence, len, seed);
- BMK_checkResult(Dresult, Nresult);
+ BMK_checkResult32(Dresult, Nresult);
- XXH32_reset(&state, seed);
- XXH32_update(&state, sequence, len);
+ (void)XXH32_reset(&state, seed);
+ (void)XXH32_update(&state, sequence, len);
Dresult = XXH32_digest(&state);
- BMK_checkResult(Dresult, Nresult);
+ BMK_checkResult32(Dresult, Nresult);
- XXH32_reset(&state, seed);
+ (void)XXH32_reset(&state, seed);
for (pos=0; pos<len; pos++)
- XXH32_update(&state, ((const char*)sequence)+pos, 1);
+ (void)XXH32_update(&state, ((const char*)sequence)+pos, 1);
Dresult = XXH32_digest(&state);
- BMK_checkResult(Dresult, Nresult);
+ BMK_checkResult32(Dresult, Nresult);
}
-#define SANITY_BUFFER_SIZE 101
+#define SANITY_BUFFER_SIZE 2243
static void BMK_sanityCheck(void)
{
static const U32 prime = 2654435761U;
@@ -457,8 +659,8 @@
BMK_testSequence(sanityBuffer, 1, prime, 0xD5845D64);
BMK_testSequence(sanityBuffer, 14, 0, 0xE5AA0AB4);
BMK_testSequence(sanityBuffer, 14, prime, 0x4481951D);
- BMK_testSequence(sanityBuffer, SANITY_BUFFER_SIZE, 0, 0x1F1AA412);
- BMK_testSequence(sanityBuffer, SANITY_BUFFER_SIZE, prime, 0x498EC8E2);
+ BMK_testSequence(sanityBuffer,222, 0, 0xC8070816);
+ BMK_testSequence(sanityBuffer,222, prime, 0xF3CFC852);
BMK_testSequence64(NULL , 0, 0, 0xEF46DB3751D8E999ULL);
BMK_testSequence64(NULL , 0, prime, 0xAC75FDA2929B17EFULL);
@@ -466,8 +668,115 @@
BMK_testSequence64(sanityBuffer, 1, prime, 0x739840CB819FA723ULL);
BMK_testSequence64(sanityBuffer, 14, 0, 0xCFFA8DB881BC3A3DULL);
BMK_testSequence64(sanityBuffer, 14, prime, 0x5B9611585EFCC9CBULL);
- BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, 0, 0x0EAB543384F878ADULL);
- BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, prime, 0xCAA65939306F1E21ULL);
+ BMK_testSequence64(sanityBuffer,222, 0, 0x9DD507880DEBB03DULL);
+ BMK_testSequence64(sanityBuffer,222, prime, 0xDC515172B8EE0600ULL);
+
+ BMK_testXXH3(NULL, 0, 0, 0); /* zero-length hash is the seed == 0 by default */
+ BMK_testXXH3(NULL, 0, prime, prime);
+ BMK_testXXH3(sanityBuffer, 1, 0, 0xE2C6D3B40D6F9203ULL); /* 1 - 3 */
+ BMK_testXXH3(sanityBuffer, 1, prime, 0xCEE5DF124E6135DCULL); /* 1 - 3 */
+ BMK_testXXH3(sanityBuffer, 6, 0, 0x585D6F8D1AAD96A2ULL); /* 4 - 8 */
+ BMK_testXXH3(sanityBuffer, 6, prime, 0x133EC8CA1739250FULL); /* 4 - 8 */
+ BMK_testXXH3(sanityBuffer, 12, 0, 0x0E85E122FE5356ACULL); /* 9 - 16 */
+ BMK_testXXH3(sanityBuffer, 12, prime, 0xE0DB5E70DA67EB16ULL); /* 9 - 16 */
+ BMK_testXXH3(sanityBuffer, 24, 0, 0x6C213B15B89230C9ULL); /* 17 - 32 */
+ BMK_testXXH3(sanityBuffer, 24, prime, 0x71892DB847A8F53CULL); /* 17 - 32 */
+ BMK_testXXH3(sanityBuffer, 48, 0, 0xECED834E8E99DA1EULL); /* 33 - 64 */
+ BMK_testXXH3(sanityBuffer, 48, prime, 0xA901250B336F9133ULL); /* 33 - 64 */
+ BMK_testXXH3(sanityBuffer, 80, 0, 0xC67B3A9C6D69E022ULL); /* 65 - 96 */
+ BMK_testXXH3(sanityBuffer, 80, prime, 0x5054F266D6A65EE4ULL); /* 65 - 96 */
+ BMK_testXXH3(sanityBuffer, 112, 0, 0x84B99B2137A264A5ULL); /* 97 -128 */
+ BMK_testXXH3(sanityBuffer, 112, prime, 0xD6BF88A668E69F2AULL); /* 97 -128 */
+ BMK_testXXH3(sanityBuffer, 192, 0, 0x6D96AC3F415CFCFEULL); /* one block, finishing at stripe boundary */
+ BMK_testXXH3(sanityBuffer, 192, prime, 0xE4BD30AA1673B966ULL); /* one block, finishing at stripe boundary */
+ BMK_testXXH3(sanityBuffer, 222, 0, 0xB62929C362EF3BF5ULL); /* one block, last stripe is overlapping */
+ BMK_testXXH3(sanityBuffer, 222, prime, 0x2782C3C49E3FD25EULL); /* one block, last stripe is overlapping */
+ BMK_testXXH3(sanityBuffer,2048, 0, 0x802EB54C97564FD7ULL); /* 2 blocks, finishing at block boundary */
+ BMK_testXXH3(sanityBuffer,2048, prime, 0xC9F188CFAFDA22CDULL); /* 2 blocks, finishing at block boundary */
+ BMK_testXXH3(sanityBuffer,2240, 0, 0x16B0035F6ABC1F46ULL); /* 3 blocks, finishing at stripe boundary */
+ BMK_testXXH3(sanityBuffer,2240, prime, 0x389E68C2348B9161ULL); /* 3 blocks, finishing at stripe boundary */
+ BMK_testXXH3(sanityBuffer,2243, 0, 0xE7C1890BDBD2B245ULL); /* 3 blocks, last stripe is overlapping */
+ BMK_testXXH3(sanityBuffer,2243, prime, 0x3A68386AED0C50A7ULL); /* 3 blocks, last stripe is overlapping */
+
+ { XXH128_hash_t const expected = { 0, 0 };
+ BMK_testXXH128(NULL, 0, 0, expected); /* zero-length hash is { seed, -seed } by default */
+ }
+ { XXH128_hash_t const expected = { prime, -(U64)prime };
+ BMK_testXXH128(NULL, 0, prime, expected);
+ }
+ { XXH128_hash_t const expected = { 0xE2C6D3B40D6F9203ULL, 0x82895983D246CA74ULL };
+ BMK_testXXH128(sanityBuffer, 1, 0, expected); /* 1-3 */
+ }
+ { XXH128_hash_t const expected = { 0xCEE5DF124E6135DCULL, 0xFA2DA0269396F32DULL };
+ BMK_testXXH128(sanityBuffer, 1, prime, expected); /* 1-3 */
+ }
+ { XXH128_hash_t const expected = { 0x585D6F8D1AAD96A2ULL, 0x2791F3B193F0AB86ULL };
+ BMK_testXXH128(sanityBuffer, 6, 0, expected); /* 4-8 */
+ }
+ { XXH128_hash_t const expected = { 0x133EC8CA1739250FULL, 0xDF3F422D70BDE07FULL };
+ BMK_testXXH128(sanityBuffer, 6, prime, expected); /* 4-8 */
+ }
+ { XXH128_hash_t const expected = { 0x0E85E122FE5356ACULL, 0xD933CC7EDF4D95DAULL };
+ BMK_testXXH128(sanityBuffer, 12, 0, expected); /* 9-16 */
+ }
+ { XXH128_hash_t const expected = { 0xE0DB5E70DA67EB16ULL, 0x114C8C76E74C669FULL };
+ BMK_testXXH128(sanityBuffer, 12, prime, expected); /* 9-16 */
+ }
+ { XXH128_hash_t const expected = { 0x6C213B15B89230C9ULL, 0x3F3AACF5F277AC02ULL };
+ BMK_testXXH128(sanityBuffer, 24, 0, expected); /* 17-32 */
+ }
+ { XXH128_hash_t const expected = { 0x71892DB847A8F53CULL, 0xD11561AC7D0F5ECDULL };
+ BMK_testXXH128(sanityBuffer, 24, prime, expected); /* 17-32 */
+ }
+ { XXH128_hash_t const expected = { 0xECED834E8E99DA1EULL, 0x0F85E76A60898313ULL };
+ BMK_testXXH128(sanityBuffer, 48, 0, expected); /* 33-64 */
+ }
+ { XXH128_hash_t const expected = { 0xA901250B336F9133ULL, 0xA35D3FB395E1DDE0ULL };
+ BMK_testXXH128(sanityBuffer, 48, prime, expected); /* 33-64 */
+ }
+ { XXH128_hash_t const expected = { 0x338B2F6E103D5B4EULL, 0x5DD1777C8FA671ABULL };
+ BMK_testXXH128(sanityBuffer, 81, 0, expected); /* 65-96 */
+ }
+ { XXH128_hash_t const expected = { 0x0718382B6D4264C3ULL, 0x1D542DAFEFA1790EULL };
+ BMK_testXXH128(sanityBuffer, 81, prime, expected); /* 65-96 */
+ }
+ { XXH128_hash_t const expected = { 0x7DE871A4FE41C90EULL, 0x786CB41C46C6B7B6ULL };
+ BMK_testXXH128(sanityBuffer, 103, 0, expected); /* 97-128 */
+ }
+ { XXH128_hash_t const expected = { 0xAD8B0B428C940A2CULL, 0xF8BA6D8B8CB05EB7ULL };
+ BMK_testXXH128(sanityBuffer, 103, prime, expected); /* 97-128 */
+ }
+ { XXH128_hash_t const expected = { 0x6D96AC3F415CFCFEULL, 0x947EDFA54DD68990ULL };
+ BMK_testXXH128(sanityBuffer, 192, 0, expected); /* one block, ends at stripe boundary */
+ }
+ { XXH128_hash_t const expected = { 0xE4BD30AA1673B966ULL, 0x8132EF45FF3D51F2ULL };
+ BMK_testXXH128(sanityBuffer, 192, prime, expected); /* one block, ends at stripe boundary */
+ }
+ { XXH128_hash_t const expected = { 0xB62929C362EF3BF5ULL, 0x1946A7A9E6DD3032ULL };
+ BMK_testXXH128(sanityBuffer, 222, 0, expected); /* one block, last stripe is overlapping */
+ }
+ { XXH128_hash_t const expected = { 0x2782C3C49E3FD25EULL, 0x98CE16C40C2D59F6ULL };
+ BMK_testXXH128(sanityBuffer, 222, prime, expected); /* one block, last stripe is overlapping */
+ }
+ { XXH128_hash_t const expected = { 0x802EB54C97564FD7ULL, 0x384AADF242348D00ULL };
+ BMK_testXXH128(sanityBuffer,2048, 0, expected); /* two blocks, finishing at block boundary */
+ }
+ { XXH128_hash_t const expected = { 0xC9F188CFAFDA22CDULL, 0x7936B69445BE9EEDULL };
+ BMK_testXXH128(sanityBuffer,2048, prime, expected); /* two blocks, finishing at block boundary */
+ }
+ { XXH128_hash_t const expected = { 0x16B0035F6ABC1F46ULL, 0x1F6602850A1AA7EEULL };
+ BMK_testXXH128(sanityBuffer,2240, 0, expected); /* two blocks, ends at stripe boundary */
+ }
+ { XXH128_hash_t const expected = { 0x389E68C2348B9161ULL, 0xA7D1E8C96586A052ULL };
+ BMK_testXXH128(sanityBuffer,2240, prime, expected); /* two blocks, ends at stripe boundary */
+ }
+ { XXH128_hash_t const expected = { 0x8B1DE79158C397D3ULL, 0x9B6B2EEFAC2DE0ADULL };
+ BMK_testXXH128(sanityBuffer,2237, 0, expected); /* two blocks, ends at stripe boundary */
+ }
+ { XXH128_hash_t const expected = { 0x9DDF09ABA2B93DD6ULL, 0xB9CEDBE2582CA371ULL };
+ BMK_testXXH128(sanityBuffer,2237, prime, expected); /* two blocks, ends at stripe boundary */
+ }
+
DISPLAYLEVEL(3, "\r%70s\r", ""); /* Clean display line */
DISPLAYLEVEL(3, "Sanity check -- all tests ok\n");
@@ -501,8 +810,8 @@
size_t readSize;
/* Init */
- XXH32_reset(&state32, XXHSUM32_DEFAULT_SEED);
- XXH64_reset(&state64, XXHSUM64_DEFAULT_SEED);
+ (void)XXH32_reset(&state32, XXHSUM32_DEFAULT_SEED);
+ (void)XXH64_reset(&state64, XXHSUM64_DEFAULT_SEED);
/* Load file & update hash */
readSize = 1;
@@ -511,10 +820,10 @@
switch(hashType)
{
case algo_xxh32:
- XXH32_update(&state32, buffer, readSize);
+ (void)XXH32_update(&state32, buffer, readSize);
break;
case algo_xxh64:
- XXH64_update(&state64, buffer, readSize);
+ (void)XXH64_update(&state64, buffer, readSize);
break;
default:
break;
@@ -554,19 +863,20 @@
/* Check file existence */
if (fileName == stdinName) {
inFile = stdin;
+ fileName = "stdin";
SET_BINARY_MODE(stdin);
}
else
inFile = fopen( fileName, "rb" );
if (inFile==NULL) {
- DISPLAY( "Pb opening %s\n", fileName);
+ DISPLAY("Error: Could not open '%s': %s.\n", fileName, strerror(errno));
return 1;
}
/* Memory allocation & restrictions */
buffer = malloc(blockSize);
if(!buffer) {
- DISPLAY("\nError: not enough memory!\n");
+ DISPLAY("\nError: Out of memory.\n");
fclose(inFile);
return 1;
}
@@ -605,7 +915,7 @@
{
case algo_xxh32:
{ XXH32_canonical_t hcbe32;
- XXH32_canonicalFromHash(&hcbe32, h32);
+ (void)XXH32_canonicalFromHash(&hcbe32, h32);
displayEndianess==big_endian ?
BMK_display_BigEndian(&hcbe32, sizeof(hcbe32)) : BMK_display_LittleEndian(&hcbe32, sizeof(hcbe32));
DISPLAYRESULT(" %s\n", fileName);
@@ -613,7 +923,7 @@
}
case algo_xxh64:
{ XXH64_canonical_t hcbe64;
- XXH64_canonicalFromHash(&hcbe64, h64);
+ (void)XXH64_canonicalFromHash(&hcbe64, h64);
displayEndianess==big_endian ?
BMK_display_BigEndian(&hcbe64, sizeof(hcbe64)) : BMK_display_LittleEndian(&hcbe64, sizeof(hcbe64));
DISPLAYRESULT(" %s\n", fileName);
@@ -694,10 +1004,10 @@
char* lineBuf;
size_t blockSize;
char* blockBuf;
- int strictMode;
- int statusOnly;
- int warn;
- int quiet;
+ U32 strictMode;
+ U32 statusOnly;
+ U32 warn;
+ U32 quiet;
ParseFileReport report;
} ParseFileArg;
@@ -711,7 +1021,7 @@
static GetLineResult getLine(char** lineBuf, int* lineMax, FILE* inFile)
{
GetLineResult result = GetLine_ok;
- int len = 0;
+ size_t len = 0;
if ((*lineBuf == NULL) || (*lineMax<1)) {
free(*lineBuf); /* in case it's != NULL */
@@ -732,9 +1042,9 @@
}
/* Make enough space for len+1 (for final NUL) bytes. */
- if (len+1 >= *lineMax) {
+ if (len+1 >= (size_t)*lineMax) {
char* newLineBuf = NULL;
- int newBufSize = *lineMax;
+ size_t newBufSize = (size_t)*lineMax;
newBufSize += (newBufSize/2) + 1; /* x 1.5 */
if (newBufSize > MAX_LINE_LENGTH) newBufSize = MAX_LINE_LENGTH;
@@ -744,7 +1054,7 @@
if (newLineBuf == NULL) return GetLine_outOfMemory;
*lineBuf = newLineBuf;
- *lineMax = newBufSize;
+ *lineMax = (int)newBufSize;
}
if (c == '\n') break;
@@ -813,41 +1123,43 @@
static ParseLineResult parseLine(ParsedLine* parsedLine, const char* line)
{
const char* const firstSpace = strchr(line, ' ');
- const char* const secondSpace = firstSpace + 1;
+ if (firstSpace == NULL) return ParseLine_invalidFormat;
- parsedLine->filename = NULL;
- parsedLine->xxhBits = 0;
+ { const char* const secondSpace = firstSpace + 1;
+ if (*secondSpace != ' ') return ParseLine_invalidFormat;
- if (firstSpace == NULL || *secondSpace != ' ') return ParseLine_invalidFormat;
+ parsedLine->filename = NULL;
+ parsedLine->xxhBits = 0;
- switch (firstSpace - line)
- {
- case 8:
- { XXH32_canonical_t* xxh32c = &parsedLine->canonical.xxh32;
- if (canonicalFromString(xxh32c->digest, sizeof(xxh32c->digest), line)
- != CanonicalFromString_ok) {
- return ParseLine_invalidFormat;
+ switch (firstSpace - line)
+ {
+ case 8:
+ { XXH32_canonical_t* xxh32c = &parsedLine->canonical.xxh32;
+ if (canonicalFromString(xxh32c->digest, sizeof(xxh32c->digest), line)
+ != CanonicalFromString_ok) {
+ return ParseLine_invalidFormat;
+ }
+ parsedLine->xxhBits = 32;
+ break;
}
- parsedLine->xxhBits = 32;
- break;
+
+ case 16:
+ { XXH64_canonical_t* xxh64c = &parsedLine->canonical.xxh64;
+ if (canonicalFromString(xxh64c->digest, sizeof(xxh64c->digest), line)
+ != CanonicalFromString_ok) {
+ return ParseLine_invalidFormat;
+ }
+ parsedLine->xxhBits = 64;
+ break;
+ }
+
+ default:
+ return ParseLine_invalidFormat;
+ break;
}
- case 16:
- { XXH64_canonical_t* xxh64c = &parsedLine->canonical.xxh64;
- if (canonicalFromString(xxh64c->digest, sizeof(xxh64c->digest), line)
- != CanonicalFromString_ok) {
- return ParseLine_invalidFormat;
- }
- parsedLine->xxhBits = 64;
- break;
- }
-
- default:
- return ParseLine_invalidFormat;
- break;
+ parsedLine->filename = secondSpace + 1;
}
-
- parsedLine->filename = secondSpace + 1;
return ParseLine_ok;
}
@@ -873,7 +1185,7 @@
if (lineNumber == 0) {
/* This is unlikely happen, but md5sum.c has this
* error check. */
- DISPLAY("%s : too many checksum lines\n", inFileName);
+ DISPLAY("%s: Error: Too many checksum lines\n", inFileName);
report->quit = 1;
break;
}
@@ -892,15 +1204,15 @@
break;
default:
- DISPLAY("%s : %lu: unknown error\n", inFileName, lineNumber);
+ DISPLAY("%s:%lu: Error: Unknown error.\n", inFileName, lineNumber);
break;
case GetLine_exceedMaxLineLength:
- DISPLAY("%s : %lu: too long line\n", inFileName, lineNumber);
+ DISPLAY("%s:%lu: Error: Line too long.\n", inFileName, lineNumber);
break;
case GetLine_outOfMemory:
- DISPLAY("%s : %lu: out of memory\n", inFileName, lineNumber);
+ DISPLAY("%s:%lu: Error: Out of memory.\n", inFileName, lineNumber);
break;
}
report->quit = 1;
@@ -910,7 +1222,7 @@
if (parseLine(&parsedLine, parseFileArg->lineBuf) != ParseLine_ok) {
report->nImproperlyFormattedLines++;
if (parseFileArg->warn) {
- DISPLAY("%s : %lu: improperly formatted XXHASH checksum line\n"
+ DISPLAY("%s:%lu: Error: Improperly formatted checksum line.\n"
, inFileName, lineNumber);
}
continue;
@@ -921,7 +1233,7 @@
report->nImproperlyFormattedLines++;
report->nMixedFormatLines++;
if (parseFileArg->warn) {
- DISPLAY("%s : %lu: improperly formatted XXHASH checksum line (XXH32/64)\n"
+ DISPLAY("%s : %lu: Error: Multiple hash types in one file.\n"
, inFileName, lineNumber);
}
continue;
@@ -964,15 +1276,15 @@
switch (lineStatus)
{
default:
- DISPLAY("%s : unknown error\n", inFileName);
+ DISPLAY("%s: Error: Unknown error.\n", inFileName);
report->quit = 1;
break;
case LineStatus_failedToOpen:
report->nOpenOrReadFailures++;
if (!parseFileArg->statusOnly) {
- DISPLAYRESULT("%s : %lu: FAILED open or read %s\n"
- , inFileName, lineNumber, parsedLine.filename);
+ DISPLAYRESULT("%s:%lu: Could not open or read '%s': %s.\n",
+ inFileName, lineNumber, parsedLine.filename, strerror(errno));
}
break;
@@ -1035,13 +1347,14 @@
if (inFileName == stdinName) {
/* note : Since we expect text input for xxhash -c mode,
* Don't set binary mode for stdin */
+ inFileName = "stdin";
inFile = stdin;
} else {
inFile = fopen( inFileName, "rt" );
}
if (inFile == NULL) {
- DISPLAY( "Pb opening %s\n", inFileName);
+ DISPLAY("Error: Could not open '%s': %s\n", inFileName, strerror(errno));
return 0;
}
@@ -1066,19 +1379,22 @@
/* Show error/warning messages. All messages are copied from md5sum.c
*/
if (report->nProperlyFormattedLines == 0) {
- DISPLAY("%s: no properly formatted XXHASH checksum lines found\n", inFileName);
+ DISPLAY("%s: no properly formatted xxHash checksum lines found\n", inFileName);
} else if (!statusOnly) {
if (report->nImproperlyFormattedLines) {
- DISPLAYRESULT("%lu lines are improperly formatted\n"
- , report->nImproperlyFormattedLines);
+ DISPLAYRESULT("%lu %s are improperly formatted\n"
+ , report->nImproperlyFormattedLines
+ , report->nImproperlyFormattedLines == 1 ? "line" : "lines");
}
if (report->nOpenOrReadFailures) {
- DISPLAYRESULT("%lu listed files could not be read\n"
- , report->nOpenOrReadFailures);
+ DISPLAYRESULT("%lu listed %s could not be read\n"
+ , report->nOpenOrReadFailures
+ , report->nOpenOrReadFailures == 1 ? "file" : "files");
}
if (report->nMismatchedChecksums) {
- DISPLAYRESULT("%lu computed checksums did NOT match\n"
- , report->nMismatchedChecksums);
+ DISPLAYRESULT("%lu computed %s did NOT match\n"
+ , report->nMismatchedChecksums
+ , report->nMismatchedChecksums == 1 ? "checksum" : "checksums");
} }
/* Result (exit) code logic is copied from
@@ -1140,7 +1456,7 @@
DISPLAY( " -V, --version : display version\n");
DISPLAY( " -h, --help : display long help and exit\n");
DISPLAY( " -b : benchmark mode \n");
- DISPLAY( " -i# : number of iterations (benchmark mode; default %i)\n", g_nbIterations);
+ DISPLAY( " -i# : number of iterations (benchmark mode; default %u)\n", g_nbIterations);
DISPLAY( "\n");
DISPLAY( "The following four options are useful only when verifying checksums (-c):\n");
DISPLAY( "--strict : don't print OK for each successfully verified file\n");
@@ -1157,24 +1473,53 @@
return 1;
}
-/*! readU32FromChar() :
- @return : unsigned integer value read from input in `char` format,
- 0 is no figure at *stringPtr position.
- Interprets K, KB, KiB, M, MB and MiB suffix.
- Modifies `*stringPtr`, advancing it to position where reading stopped.
- Note : function result can overflow if digit string > MAX_UINT */
-static unsigned readU32FromChar(const char** stringPtr)
+static void errorOut(const char* msg)
{
+ DISPLAY("%s \n", msg); exit(1);
+}
+
+/*! readU32FromCharChecked() :
+ * @return 0 if success, and store the result in *value.
+ * allows and interprets K, KB, KiB, M, MB and MiB suffix.
+ * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
+ * @return 1 if an overflow error occurs */
+static int readU32FromCharChecked(const char** stringPtr, unsigned* value)
+{
+ static unsigned const max = (((unsigned)(-1)) / 10) - 1;
unsigned result = 0;
- while ((**stringPtr >='0') && (**stringPtr <='9'))
- result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
- if ((**stringPtr=='K') || (**stringPtr=='M')) {
- result <<= 10;
- if (**stringPtr=='M') result <<= 10;
+ while ((**stringPtr >='0') && (**stringPtr <='9')) {
+ if (result > max) return 1; /* overflow error */
+ result *= 10;
+ result += (unsigned)(**stringPtr - '0');
(*stringPtr)++ ;
+ }
+ if ((**stringPtr=='K') || (**stringPtr=='M')) {
+ unsigned const maxK = ((unsigned)(-1)) >> 10;
+ if (result > maxK) return 1; /* overflow error */
+ result <<= 10;
+ if (**stringPtr=='M') {
+ if (result > maxK) return 1; /* overflow error */
+ result <<= 10;
+ }
+ (*stringPtr)++; /* skip `K` or `M` */
if (**stringPtr=='i') (*stringPtr)++;
if (**stringPtr=='B') (*stringPtr)++;
}
+ *value = result;
+ return 0;
+}
+
+/*! readU32FromChar() :
+ * @return : unsigned integer value read from input in `char` format.
+ * allows and interprets K, KB, KiB, M, MB and MiB suffix.
+ * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
+ * Note : function will exit() program if digit sequence overflows */
+static unsigned readU32FromChar(const char** stringPtr) {
+ unsigned result;
+ if (readU32FromCharChecked(stringPtr, &result)) {
+ static const char errorMsg[] = "Error: numeric value too large";
+ errorOut(errorMsg);
+ }
return result;
}