blob: 189ecd3e399ac1ac43bd8479f8aa01198600265d [file] [log] [blame]
// Copyright 2019 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// -----------------------------------------------------------------------------
//
// Misc. common math functions
//
// Authors: vrabaud (vincent.rabaud@google.com)
//
#include "src/dsp/math.h"
#include <cassert>
#include <cmath>
#include <cstdint>
#include <numeric>
#include <cstring>
#include "src/dsp/dsp.h"
#include "src/dsp/dsp_x86.h"
//------------------------------------------------------------------------------
// The threshold till approximate version of log_2 can be used.
// Practically, we can get rid of the call to log() as the two values match to
// very high degree (the ratio of these two is 0.99999x).
// Keeping a high threshold for now.
constexpr uint32_t kApproxLogWithCorrectionMax = 65536;
// lookup table for small values of log2(int)
const float kWP2Log2Table[kLogLookupIdxMax] = {
0.0000000000000000f, 0.0000000000000000f,
1.0000000000000000f, 1.5849625007211560f,
2.0000000000000000f, 2.3219280948873621f,
2.5849625007211560f, 2.8073549220576041f,
3.0000000000000000f, 3.1699250014423121f,
3.3219280948873621f, 3.4594316186372973f,
3.5849625007211560f, 3.7004397181410921f,
3.8073549220576041f, 3.9068905956085187f,
4.0000000000000000f, 4.0874628412503390f,
4.1699250014423121f, 4.2479275134435852f,
4.3219280948873626f, 4.3923174227787606f,
4.4594316186372973f, 4.5235619560570130f,
4.5849625007211560f, 4.6438561897747243f,
4.7004397181410917f, 4.7548875021634682f,
4.8073549220576037f, 4.8579809951275718f,
4.9068905956085187f, 4.9541963103868749f,
5.0000000000000000f, 5.0443941193584533f,
5.0874628412503390f, 5.1292830169449663f,
5.1699250014423121f, 5.2094533656289501f,
5.2479275134435852f, 5.2854022188622487f,
5.3219280948873626f, 5.3575520046180837f,
5.3923174227787606f, 5.4262647547020979f,
5.4594316186372973f, 5.4918530963296747f,
5.5235619560570130f, 5.5545888516776376f,
5.5849625007211560f, 5.6147098441152083f,
5.6438561897747243f, 5.6724253419714951f,
5.7004397181410917f, 5.7279204545631987f,
5.7548875021634682f, 5.7813597135246599f,
5.8073549220576037f, 5.8328900141647412f,
5.8579809951275718f, 5.8826430493618415f,
5.9068905956085187f, 5.9307373375628866f,
5.9541963103868749f, 5.9772799234999167f,
6.0000000000000000f, 6.0223678130284543f,
6.0443941193584533f, 6.0660891904577720f,
6.0874628412503390f, 6.1085244567781691f,
6.1292830169449663f, 6.1497471195046822f,
6.1699250014423121f, 6.1898245588800175f,
6.2094533656289501f, 6.2288186904958804f,
6.2479275134435852f, 6.2667865406949010f,
6.2854022188622487f, 6.3037807481771030f,
6.3219280948873626f, 6.3398500028846243f,
6.3575520046180837f, 6.3750394313469245f,
6.3923174227787606f, 6.4093909361377017f,
6.4262647547020979f, 6.4429434958487279f,
6.4594316186372973f, 6.4757334309663976f,
6.4918530963296747f, 6.5077946401986963f,
6.5235619560570130f, 6.5391588111080309f,
6.5545888516776376f, 6.5698556083309478f,
6.5849625007211560f, 6.5999128421871278f,
6.6147098441152083f, 6.6293566200796094f,
6.6438561897747243f, 6.6582114827517946f,
6.6724253419714951f, 6.6865005271832185f,
6.7004397181410917f, 6.7142455176661224f,
6.7279204545631987f, 6.7414669864011464f,
6.7548875021634682f, 6.7681843247769259f,
6.7813597135246599f, 6.7944158663501061f,
6.8073549220576037f, 6.8201789624151878f,
6.8328900141647412f, 6.8454900509443747f,
6.8579809951275718f, 6.8703647195834047f,
6.8826430493618415f, 6.8948177633079437f,
6.9068905956085187f, 6.9188632372745946f,
6.9307373375628866f, 6.9425145053392398f,
6.9541963103868749f, 6.9657842846620869f,
6.9772799234999167f, 6.9886846867721654f,
7.0000000000000000f, 7.0112272554232539f,
7.0223678130284543f, 7.0334230015374501f,
7.0443941193584533f, 7.0552824355011898f,
7.0660891904577720f, 7.0768155970508308f,
7.0874628412503390f, 7.0980320829605263f,
7.1085244567781691f, 7.1189410727235076f,
7.1292830169449663f, 7.1395513523987936f,
7.1497471195046822f, 7.1598713367783890f,
7.1699250014423121f, 7.1799090900149344f,
7.1898245588800175f, 7.1996723448363644f,
7.2094533656289501f, 7.2191685204621611f,
7.2288186904958804f, 7.2384047393250785f,
7.2479275134435852f, 7.2573878426926521f,
7.2667865406949010f, 7.2761244052742375f,
7.2854022188622487f, 7.2946207488916270f,
7.3037807481771030f, 7.3128829552843557f,
7.3219280948873626f, 7.3309168781146167f,
7.3398500028846243f, 7.3487281542310771f,
7.3575520046180837f, 7.3663222142458160f,
7.3750394313469245f, 7.3837042924740519f,
7.3923174227787606f, 7.4008794362821843f,
7.4093909361377017f, 7.4178525148858982f,
7.4262647547020979f, 7.4346282276367245f,
7.4429434958487279f, 7.4512111118323289f,
7.4594316186372973f, 7.4676055500829976f,
7.4757334309663976f, 7.4838157772642563f,
7.4918530963296747f, 7.4998458870832056f,
7.5077946401986963f, 7.5156998382840427f,
7.5235619560570130f, 7.5313814605163118f,
7.5391588111080309f, 7.5468944598876364f,
7.5545888516776376f, 7.5622424242210728f,
7.5698556083309478f, 7.5774288280357486f,
7.5849625007211560f, 7.5924570372680806f,
7.5999128421871278f, 7.6073303137496104f,
7.6147098441152083f, 7.6220518194563764f,
7.6293566200796094f, 7.6366246205436487f,
7.6438561897747243f, 7.6510516911789281f,
7.6582114827517946f, 7.6653359171851764f,
7.6724253419714951f, 7.6794800995054464f,
7.6865005271832185f, 7.6934869574993252f,
7.7004397181410917f, 7.7073591320808825f,
7.7142455176661224f, 7.7210991887071855f,
7.7279204545631987f, 7.7347096202258383f,
7.7414669864011464f, 7.7481928495894605f,
7.7548875021634682f, 7.7615512324444795f,
7.7681843247769259f, 7.7747870596011736f,
7.7813597135246599f, 7.7879025593914317f,
7.7944158663501061f, 7.8008998999203047f,
7.8073549220576037f, 7.8137811912170374f,
7.8201789624151878f, 7.8265484872909150f,
7.8328900141647412f, 7.8392037880969436f,
7.8454900509443747f, 7.8517490414160571f,
7.8579809951275718f, 7.8641861446542797f,
7.8703647195834047f, 7.8765169465649993f,
7.8826430493618415f, 7.8887432488982591f,
7.8948177633079437f, 7.9008668079807486f,
7.9068905956085187f, 7.9128893362299619f,
7.9188632372745946f, 7.9248125036057812f,
7.9307373375628866f, 7.9366379390025709f,
7.9425145053392398f, 7.9483672315846778f,
7.9541963103868749f, 7.9600019320680805f,
7.9657842846620869f, 7.9715435539507719f,
7.9772799234999167f, 7.9829935746943103f,
7.9886846867721654f, 7.9943534368588577f
};
// x * (1. - log2(x))
const float kWP2SLog2m1Table[kLogLookupIdxMax] = {
0.000000000000000f, 1.0000000000000000f, 0.0000000000000000f,
-1.7548875021634682f, -4.0000000000000000f, -6.6096404744368105f,
-9.5097750043269365f, -12.6514844544032297f, -16.0000000000000000f,
-19.5293250129808094f, -23.2192809488736209f, -27.0537478050102713f,
-31.0195500086538729f, -35.1057163358341953f, -39.3029689088064558f,
-43.6033589341277832f, -48.0000000000000000f, -52.4868683012557682f,
-57.0586500259616187f, -61.7106227554281190f, -66.4385618977472490f,
-71.2386658783539701f, -76.1074956100205355f, -81.0419249893112976f,
-86.0391000173077458f, -91.0964047443681153f, -96.2114326716683905f,
-101.3819625584136475f, -106.6059378176128973f, -111.8814488586995850f,
-117.2067178682555664f, -122.5800856219931205f, -128.0000000000000000f,
-133.4650059388289662f, -138.9737366025115364f, -144.5249055930738109f,
-150.1173000519232232f, -155.7497745282711605f, -161.4212455108562381f,
-167.1306865356276887f, -172.8771237954944979f, -178.6596321893414370f,
-184.4773317567079403f, -190.3293844521901974f, -196.2149912200410711f,
-202.1333893348353570f, -208.0838499786225952f, -214.0656760288489693f,
-220.0782000346154916f, -226.1207823616452117f, -232.1928094887362306f,
-238.2936924405462662f, -244.4228653433367811f, -250.5797840918495467f,
-256.7639251168272949f, -262.9747842438562770f, -269.2118756352257947f,
-275.4747308073902445f, -281.7628977173991416f, -288.0759399123486446f,
-294.4134357365111327f, -300.7749775913360963f, -307.1601712439862695f,
-313.5686351804947662f, -320.0000000000000000f, -326.4539078468495177f,
-332.9300118776579325f, -339.4279757606707335f, -345.9474732050230728f,
-352.4881875176936887f, -359.0498111861476218f, -365.6320454848324175f,
-372.2346001038464465f, -378.8571927982412717f, -385.4995490565423211f,
-392.1614017871910391f, -398.8424910217124761f, -405.5425636335073705f,
-412.2613730712553775f, -418.9986791059911297f, -425.7542475909889959f,
-432.5278502336545898f, -439.3192643786828739f, -446.1282728017947079f,
-452.9546635134158805f, -459.7982295717046668f, -466.6587689043803948f,
-473.5360841388393283f, -480.4299824400821421f, -487.3402753560093856f,
-494.2667786696707140f, -501.2093122580813542f, -508.1676999572451905f,
-515.1417694330468748f, -522.1313520576978817f, -529.1362827914400668f,
-536.1564000692310401f, -543.1915456921514078f, -550.2415647232903666f,
-557.3063053878813662f, -564.3856189774724044f, -571.4793597579312063f,
-578.5873848810924756f, -585.7095542998714564f, -592.8457306866735053f,
-599.9957793549428970f, -607.1595681836990934f, -614.3369675449227998f,
-621.5278502336545898f, -628.7320914006849080f, -635.9495684877125541f,
-643.1801611648618291f, -650.4237512704515893f, -657.6802227529162792f,
-664.9494616147804891f, -672.2313558586031377f, -679.5257954347982832f,
-686.8326721912583253f, -694.1518798246972892f, -701.4833138336452976f,
-708.8268714730222655f, -716.1824517102259051f, -723.5499551826721927f,
-730.9292841567264531f, -738.3203424879725389f, -745.7230355827608719f,
-753.1372703609895325f, -760.5629552200649641f, -768.0000000000000000f,
-775.4483159495997597f, -782.9078156936990354f, -790.3784132014059196f,
-797.8600237553158649f, -805.3525639216582022f, -812.8559515213414670f,
-820.3701056018621784f, -827.8949464100461455f, -835.4303953655920623f,
-842.9763750353873775f, -850.5328091085675624f, -858.0996223722952436f,
-865.6767406882298701f, -873.2640909696648350f, -880.8616011593096573f,
-888.4692002076928929f, -896.0868180521655404f, -903.7143855964825434f,
-911.3518346909455659f, -918.9990981130846421f, -926.6561095488619912f,
-934.3228035743820783f, -941.9991156380868915f, -949.6849820434249523f,
-957.3803399319757546f, -965.0851272670147409f, -972.7992828175067643f,
-980.5227461425107549f, -988.2554575759854743f, -995.9973582119822595f,
-1003.7483898902125929f, -1011.5084951819779917f, -1019.2776173764533496f,
-1027.0557004673091797f, -1034.8426891396654810f, -1042.6385287573657479f,
-1050.4431653505596387f, -1058.2565456035895295f, -1066.0786168431666283f,
-1073.9093270268317610f, -1081.7486247316892332f, -1089.5964591434092199f,
-1097.4527800454886801f, -1105.3175378087607896f, -1113.1906833811533488f,
-1121.0721682776786565f, -1128.9619445706575789f, -1136.8599648801643980f,
-1144.7661823646906214f, -1152.6805507120188850f, -1160.6030241303019466f,
-1168.5335573393415416f, -1176.4721055620602783f, -1184.4186245161627085f,
-1192.3730704059798882f, -1200.3353999144903810f, -1208.3055701955177028f,
-1216.2835388660937497f, -1224.2692639989879808f, -1232.2627041153957634f,
-1240.2638181777826958f, -1248.2725655828801337f, -1256.2889061548280552f,
-1264.3128001384620802f, -1272.3442081927396430f, -1280.3830913843028156f,
-1288.4294111811741459f, -1296.4831294465807332f, -1304.5442084329060890f,
-1312.6126107757627324f, -1320.6882994881862032f, -1328.7712379549448087f,
-1336.8613899269646481f, -1344.9587195158624127f, -1353.0631911885907357f,
-1361.1747697621849511f, -1369.2934203986164903f, -1377.4191085997429127f,
-1385.5518002023602548f, -1393.6914613733470105f, -1401.8380586049045178f,
-1409.9915587098857941f, -1418.1519288172160032f, -1426.3191363673981868f,
-1434.4931491081035801f, -1442.6739350898455996f, -1450.8614626617340946f,
-1459.0557004673091797f, -1467.2566174404521462f, -1475.4641828013698159f,
-1483.6783660526571111f, -1491.8991369754251082f, -1500.1264656255063983f,
-1508.3603223297236582f, -1516.6006776822280244f, -1524.8475025409031787f,
-1533.1007680238333251f, -1541.3604455058325584f, -1549.6265066150376697f,
-1557.8989232295609781f, -1566.1776674742000068f, -1574.4627117172062754f,
-1582.7540285671091169f, -1591.0515908695965663f, -1599.3553717044474070f,
-1607.6653443825166505f, -1615.9814824427749045f, -1624.3037596493945784f,
-1632.6321499888874769f, -1640.9666276672905951f, -1649.3071671073989819f,
-1657.6537429460445310f, -1666.0063300314209300f, -1674.3649034204518102f,
-1682.7294383762048255f, -1691.0999103653443854f, -1699.4762950556298620f,
-1707.8585683134529063f, -1716.2467062014154635f, -1724.6406849759450779f,
-1733.0404810849520345f, -1741.4460711655217438f, -1749.8574320416437331f,
-1758.2745407219790650f, -1766.6973743976604965f, -1775.1259104401299282f,
-1783.5601263990088228f,
};
#if defined(WP2_NEED_LOG_TABLE_8BIT)
const uint8_t WP2LogTable8bit[256 + 1] = { // 31 ^ clz(i)
0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8
};
#endif
static float SLog2m1Slow_C(uint32_t v) {
assert(v >= kLogLookupIdxMax);
if (v < kApproxLogWithCorrectionMax) {
const float v_f = (float)v;
const int32_t log_v = WP2Log2Floor(v) - 7;
// vf = (2^log_v) * Xf; where y = 2^log_v and Xf < 256
// Xf = floor(Xf) * (1 + (v % y) / v)
// log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v)
// The correction factor: log(1 + d) ~ d; for very small d values, so
// log2(1 + (v % y) / v) ~ (1/log(2)) * (v % y)/v
const float correction = 1.44269504089f * (v & ((1 << log_v) - 1));
return -v_f * (kWP2Log2Table[v >> log_v] + (log_v - 1)) - correction;
} else {
return v - v * std::log2(v);
}
}
static float Log2Slow_C(uint32_t v) {
assert(v >= kLogLookupIdxMax);
if (v < kApproxLogWithCorrectionMax) {
// this version is 30% faster than calling log2(v), but a bit less precise
const uint32_t log_v = WP2Log2Floor(v) - 7;
float log_2 = kWP2Log2Table[v >> log_v] + log_v;
// Please check SLog2m1Slow_C for an explanation.
const float correction = 1.44269504089f * (v & ((1 << log_v) - 1));
log_2 += correction / v;
return log_2;
} else {
return std::log2(v);
}
}
namespace {
int32_t InnerProduct_C(const int16_t* const a, const int16_t* const b,
size_t size) {
return std::inner_product(a, a + size, b, 0);
}
#if defined(WP2_USE_SSE)
int32_t InnerProduct_SSE(const int16_t* const a, const int16_t* const b,
size_t size) {
assert(size > 0);
size_t i = 0;
__m128i sum = _mm_setzero_si128();
while (i + 8 <= size) {
const __m128i a0 = _mm_loadu_si128((const __m128i*)(a + i));
const __m128i b0 = _mm_loadu_si128((const __m128i*)(b + i));
const __m128i c = _mm_madd_epi16(a0, b0);
sum = _mm_add_epi32(sum, c);
i += 8;
}
if (i + 4 <= size) {
const __m128i a0 = _mm_loadl_epi64((const __m128i*)(a + i));
const __m128i b0 = _mm_loadl_epi64((const __m128i*)(b + i));
const __m128i c = _mm_madd_epi16(a0, b0);
sum = _mm_add_epi32(sum, c);
i += 4;
}
if (size >= 8) sum = _mm_add_epi32(sum, _mm_srli_si128(sum, 8));
int32_t res = 0;
if (size >= 4) {
sum = _mm_add_epi32(sum, _mm_srli_si128(sum, 4));
res = _mm_cvtsi128_si32(sum);
}
for (; i < size; ++i) res += a[i] * b[i];
return res;
}
#if 0
// This version is 30% slower than the C version, so leave it documented here.
// Maybe one day this code could be useful inspiration if we have to compute
// *several* log2()'s in parallel.
static float Log2Slow_SSE(uint32_t v) {
const float kRec2[4] = { // 2. / log(2.) / (2*k + 1)
2.885390043f, 0.961796701f, 0.577077985f, 0.412198573f
};
assert(v >= kLogLookupIdxMax);
const uint32_t N = WP2Log2Floor(v);
// compute correction with quadratic convergence
const uint32_t K = 1 << N;
const float u = (v - K) / (float)(v + K); // < 1.0
const float U2 = u * u;
const float U4 = U2 * U2;
const __m128 eps = _mm_set_ps(U4 * U2, U4, U2, 1.);
const __m128 coeffs = _mm_load_ps(kRec2);
const __m128 correction = _mm_dp_ps(eps, coeffs, 0xff);
const float extra = u * _mm_cvtss_f32(correction);
return N + extra;
}
#endif
WP2_TSAN_IGNORE_FUNCTION void MathInitSSE() {
WP2InnerProduct = InnerProduct_SSE;
// WP2Log2Slow = Log2Slow_SSE;
}
#endif // WP2_USE_SSE
} // namespace
namespace WP2 {
//------------------------------------------------------------------------------
// Divide-by-alpha
const uint32_t kAlphaDiv[256] = { // kAlphaDiv[a] = ((255 << 16) + a - 1) / a
0x0, 0xff0000,
0x7f8000, 0x550000, 0x3fc000, 0x330000, 0x2a8000, 0x246db7, 0x1fe000,
0x1c5556, 0x198000, 0x172e8c, 0x154000, 0x139d8a, 0x1236dc, 0x110000,
0xff000, 0xf0000, 0xe2aab, 0xd6bcb, 0xcc000, 0xc2493, 0xb9746, 0xb1643,
0xaa000, 0xa3334, 0x9cec5, 0x971c8, 0x91b6e, 0x8cb09, 0x88000, 0x839cf,
0x7f800, 0x7ba2f, 0x78000, 0x74925, 0x71556, 0x6e454, 0x6b5e6, 0x689d9,
0x66000, 0x63832, 0x6124a, 0x5ee24, 0x5cba3, 0x5aaab, 0x58b22, 0x56cf0,
0x55000, 0x5343f, 0x5199a, 0x50000, 0x4e763, 0x4cfb3, 0x4b8e4, 0x4a2e9,
0x48db7, 0x47944, 0x46585, 0x45271, 0x44000, 0x42e2a, 0x41ce8, 0x40c31,
0x3fc00, 0x3ec4f, 0x3dd18, 0x3ce55, 0x3c000, 0x3b217, 0x3a493, 0x39770,
0x38aab, 0x37e40, 0x3722a, 0x36667, 0x35af3, 0x34fcb, 0x344ed, 0x33a55,
0x33000, 0x325ee, 0x31c19, 0x31282, 0x30925, 0x30000, 0x2f712, 0x2ee59,
0x2e5d2, 0x2dd7c, 0x2d556, 0x2cd5d, 0x2c591, 0x2bdf0, 0x2b678, 0x2af29,
0x2a800, 0x2a0fe, 0x29a20, 0x29365, 0x28ccd, 0x28657, 0x28000, 0x279ca,
0x273b2, 0x26db7, 0x267da, 0x26218, 0x25c72, 0x256e7, 0x25175, 0x24c1c,
0x246dc, 0x241b3, 0x23ca2, 0x237a7, 0x232c3, 0x22df3, 0x22939, 0x22493,
0x22000, 0x21b82, 0x21715, 0x212bc, 0x20e74, 0x20a3e, 0x20619, 0x20205,
0x1fe00, 0x1fa0c, 0x1f628, 0x1f253, 0x1ee8c, 0x1ead4, 0x1e72b, 0x1e38f,
0x1e000, 0x1dc80, 0x1d90c, 0x1d5a4, 0x1d24a, 0x1cefb, 0x1cbb8, 0x1c881,
0x1c556, 0x1c235, 0x1bf20, 0x1bc15, 0x1b915, 0x1b61f, 0x1b334, 0x1b052,
0x1ad7a, 0x1aaab, 0x1a7e6, 0x1a52a, 0x1a277, 0x19fcc, 0x19d2b, 0x19a91,
0x19800, 0x19578, 0x192f7, 0x1907e, 0x18e0d, 0x18ba3, 0x18941, 0x186e6,
0x18493, 0x18246, 0x18000, 0x17dc2, 0x17b89, 0x17958, 0x1772d, 0x17508,
0x172e9, 0x170d1, 0x16ebe, 0x16cb2, 0x16aab, 0x168aa, 0x166af, 0x164b9,
0x162c9, 0x160de, 0x15ef8, 0x15d18, 0x15b3c, 0x15966, 0x15795, 0x155c8,
0x15400, 0x1523e, 0x1507f, 0x14ec5, 0x14d10, 0x14b5f, 0x149b3, 0x1480b,
0x14667, 0x144c7, 0x1432c, 0x14194, 0x14000, 0x13e71, 0x13ce5, 0x13b5d,
0x139d9, 0x13859, 0x136dc, 0x13563, 0x133ed, 0x1327b, 0x1310c, 0x12fa1,
0x12e39, 0x12cd5, 0x12b74, 0x12a16, 0x128bb, 0x12763, 0x1260e, 0x124bd,
0x1236e, 0x12223, 0x120da, 0x11f94, 0x11e51, 0x11d11, 0x11bd4, 0x11a99,
0x11962, 0x1182c, 0x116fa, 0x115ca, 0x1149d, 0x11372, 0x1124a, 0x11124,
0x11000, 0x10ee0, 0x10dc1, 0x10ca5, 0x10b8b, 0x10a73, 0x1095e, 0x1084b,
0x1073a, 0x1062c, 0x1051f, 0x10415, 0x1030d, 0x10207, 0x10103, 0x10000,
};
//------------------------------------------------------------------------------
// 31b-range values
const uint32_t PseudoRNG::kRandomTable[] = {
0x0de15230, 0x03b31886, 0x775faccb, 0x1c88626a, 0x68385c55, 0x14b3b828,
0x4a85fef8, 0x49ddb84b, 0x64fcf397, 0x5c550289, 0x4a290000, 0x0d7ec1da,
0x5940b7ab, 0x5492577d, 0x4e19ca72, 0x38d38c69, 0x0c01ee65, 0x32a1755f,
0x5437f652, 0x5abb2c32, 0x0faa57b1, 0x73f533e7, 0x685feeda, 0x7563cce2,
0x6e990e83, 0x4730a7ed, 0x4fc0d9c6, 0x496b153c, 0x4f1403fa, 0x541afb0c,
0x73990b32, 0x26d7cb1c, 0x6fcc3706, 0x2cbb77d8, 0x75762f2a, 0x6425ccdd,
0x24b35461, 0x0a7d8715, 0x220414a8, 0x141ebf67, 0x56b41583, 0x73e502e3,
0x44cab16f, 0x28264d42, 0x73baaefb, 0x0a50ebed, 0x1d6ab6fb, 0x0d3ad40b,
0x35db3b68, 0x2b081e83, 0x77ce6b95, 0x5181e5f0, 0x78853bbc, 0x009f9494,
0x27e5ed3c
};
PseudoRNG::PseudoRNG() : index1_(0), index2_(31) {
memcpy(tab_, kRandomTable, sizeof(tab_));
}
int32_t PseudoRNG::Update() {
const int32_t diff = tab_[index1_] - tab_[index2_];
tab_[index1_] = diff;
if (++index1_ == kTabSize) index1_ = 0;
if (++index2_ == kTabSize) index2_ = 0;
return diff;
}
} // namespace WP2
//------------------------------------------------------------------------------
WP2Log2SlowFunc WP2Log2Slow = nullptr;
WP2Log2SlowFunc WP2SLog2m1Slow = nullptr;
WP2InnerProductFunc WP2InnerProduct = nullptr;
static volatile WP2CPUInfo math_last_cpuinfo_used =
(WP2CPUInfo)&math_last_cpuinfo_used;
WP2_TSAN_IGNORE_FUNCTION void WP2MathInit() {
if (math_last_cpuinfo_used == WP2GetCPUInfo) return;
WP2Log2Slow = Log2Slow_C;
WP2SLog2m1Slow = SLog2m1Slow_C;
WP2InnerProduct = InnerProduct_C;
if (WP2GetCPUInfo != nullptr) {
#if defined(WP2_USE_SSE)
if (WP2GetCPUInfo(kSSE)) MathInitSSE();
#endif
}
math_last_cpuinfo_used = WP2GetCPUInfo;
}