| // Copyright 2019 Google LLC |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // https://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| // ----------------------------------------------------------------------------- |
| // |
| // Misc. common math functions |
| // |
| // Authors: vrabaud (vincent.rabaud@google.com) |
| // |
| |
| #include "src/dsp/math.h" |
| |
| #include <cassert> |
| #include <cmath> |
| #include <cstdint> |
| #include <numeric> |
| #include <cstring> |
| |
| #include "src/dsp/dsp.h" |
| #include "src/dsp/dsp_x86.h" |
| |
| //------------------------------------------------------------------------------ |
| |
| // The threshold till approximate version of log_2 can be used. |
| // Practically, we can get rid of the call to log() as the two values match to |
| // very high degree (the ratio of these two is 0.99999x). |
| // Keeping a high threshold for now. |
| constexpr uint32_t kApproxLogWithCorrectionMax = 65536; |
| |
| // lookup table for small values of log2(int) |
| const float kWP2Log2Table[kLogLookupIdxMax] = { |
| 0.0000000000000000f, 0.0000000000000000f, |
| 1.0000000000000000f, 1.5849625007211560f, |
| 2.0000000000000000f, 2.3219280948873621f, |
| 2.5849625007211560f, 2.8073549220576041f, |
| 3.0000000000000000f, 3.1699250014423121f, |
| 3.3219280948873621f, 3.4594316186372973f, |
| 3.5849625007211560f, 3.7004397181410921f, |
| 3.8073549220576041f, 3.9068905956085187f, |
| 4.0000000000000000f, 4.0874628412503390f, |
| 4.1699250014423121f, 4.2479275134435852f, |
| 4.3219280948873626f, 4.3923174227787606f, |
| 4.4594316186372973f, 4.5235619560570130f, |
| 4.5849625007211560f, 4.6438561897747243f, |
| 4.7004397181410917f, 4.7548875021634682f, |
| 4.8073549220576037f, 4.8579809951275718f, |
| 4.9068905956085187f, 4.9541963103868749f, |
| 5.0000000000000000f, 5.0443941193584533f, |
| 5.0874628412503390f, 5.1292830169449663f, |
| 5.1699250014423121f, 5.2094533656289501f, |
| 5.2479275134435852f, 5.2854022188622487f, |
| 5.3219280948873626f, 5.3575520046180837f, |
| 5.3923174227787606f, 5.4262647547020979f, |
| 5.4594316186372973f, 5.4918530963296747f, |
| 5.5235619560570130f, 5.5545888516776376f, |
| 5.5849625007211560f, 5.6147098441152083f, |
| 5.6438561897747243f, 5.6724253419714951f, |
| 5.7004397181410917f, 5.7279204545631987f, |
| 5.7548875021634682f, 5.7813597135246599f, |
| 5.8073549220576037f, 5.8328900141647412f, |
| 5.8579809951275718f, 5.8826430493618415f, |
| 5.9068905956085187f, 5.9307373375628866f, |
| 5.9541963103868749f, 5.9772799234999167f, |
| 6.0000000000000000f, 6.0223678130284543f, |
| 6.0443941193584533f, 6.0660891904577720f, |
| 6.0874628412503390f, 6.1085244567781691f, |
| 6.1292830169449663f, 6.1497471195046822f, |
| 6.1699250014423121f, 6.1898245588800175f, |
| 6.2094533656289501f, 6.2288186904958804f, |
| 6.2479275134435852f, 6.2667865406949010f, |
| 6.2854022188622487f, 6.3037807481771030f, |
| 6.3219280948873626f, 6.3398500028846243f, |
| 6.3575520046180837f, 6.3750394313469245f, |
| 6.3923174227787606f, 6.4093909361377017f, |
| 6.4262647547020979f, 6.4429434958487279f, |
| 6.4594316186372973f, 6.4757334309663976f, |
| 6.4918530963296747f, 6.5077946401986963f, |
| 6.5235619560570130f, 6.5391588111080309f, |
| 6.5545888516776376f, 6.5698556083309478f, |
| 6.5849625007211560f, 6.5999128421871278f, |
| 6.6147098441152083f, 6.6293566200796094f, |
| 6.6438561897747243f, 6.6582114827517946f, |
| 6.6724253419714951f, 6.6865005271832185f, |
| 6.7004397181410917f, 6.7142455176661224f, |
| 6.7279204545631987f, 6.7414669864011464f, |
| 6.7548875021634682f, 6.7681843247769259f, |
| 6.7813597135246599f, 6.7944158663501061f, |
| 6.8073549220576037f, 6.8201789624151878f, |
| 6.8328900141647412f, 6.8454900509443747f, |
| 6.8579809951275718f, 6.8703647195834047f, |
| 6.8826430493618415f, 6.8948177633079437f, |
| 6.9068905956085187f, 6.9188632372745946f, |
| 6.9307373375628866f, 6.9425145053392398f, |
| 6.9541963103868749f, 6.9657842846620869f, |
| 6.9772799234999167f, 6.9886846867721654f, |
| 7.0000000000000000f, 7.0112272554232539f, |
| 7.0223678130284543f, 7.0334230015374501f, |
| 7.0443941193584533f, 7.0552824355011898f, |
| 7.0660891904577720f, 7.0768155970508308f, |
| 7.0874628412503390f, 7.0980320829605263f, |
| 7.1085244567781691f, 7.1189410727235076f, |
| 7.1292830169449663f, 7.1395513523987936f, |
| 7.1497471195046822f, 7.1598713367783890f, |
| 7.1699250014423121f, 7.1799090900149344f, |
| 7.1898245588800175f, 7.1996723448363644f, |
| 7.2094533656289501f, 7.2191685204621611f, |
| 7.2288186904958804f, 7.2384047393250785f, |
| 7.2479275134435852f, 7.2573878426926521f, |
| 7.2667865406949010f, 7.2761244052742375f, |
| 7.2854022188622487f, 7.2946207488916270f, |
| 7.3037807481771030f, 7.3128829552843557f, |
| 7.3219280948873626f, 7.3309168781146167f, |
| 7.3398500028846243f, 7.3487281542310771f, |
| 7.3575520046180837f, 7.3663222142458160f, |
| 7.3750394313469245f, 7.3837042924740519f, |
| 7.3923174227787606f, 7.4008794362821843f, |
| 7.4093909361377017f, 7.4178525148858982f, |
| 7.4262647547020979f, 7.4346282276367245f, |
| 7.4429434958487279f, 7.4512111118323289f, |
| 7.4594316186372973f, 7.4676055500829976f, |
| 7.4757334309663976f, 7.4838157772642563f, |
| 7.4918530963296747f, 7.4998458870832056f, |
| 7.5077946401986963f, 7.5156998382840427f, |
| 7.5235619560570130f, 7.5313814605163118f, |
| 7.5391588111080309f, 7.5468944598876364f, |
| 7.5545888516776376f, 7.5622424242210728f, |
| 7.5698556083309478f, 7.5774288280357486f, |
| 7.5849625007211560f, 7.5924570372680806f, |
| 7.5999128421871278f, 7.6073303137496104f, |
| 7.6147098441152083f, 7.6220518194563764f, |
| 7.6293566200796094f, 7.6366246205436487f, |
| 7.6438561897747243f, 7.6510516911789281f, |
| 7.6582114827517946f, 7.6653359171851764f, |
| 7.6724253419714951f, 7.6794800995054464f, |
| 7.6865005271832185f, 7.6934869574993252f, |
| 7.7004397181410917f, 7.7073591320808825f, |
| 7.7142455176661224f, 7.7210991887071855f, |
| 7.7279204545631987f, 7.7347096202258383f, |
| 7.7414669864011464f, 7.7481928495894605f, |
| 7.7548875021634682f, 7.7615512324444795f, |
| 7.7681843247769259f, 7.7747870596011736f, |
| 7.7813597135246599f, 7.7879025593914317f, |
| 7.7944158663501061f, 7.8008998999203047f, |
| 7.8073549220576037f, 7.8137811912170374f, |
| 7.8201789624151878f, 7.8265484872909150f, |
| 7.8328900141647412f, 7.8392037880969436f, |
| 7.8454900509443747f, 7.8517490414160571f, |
| 7.8579809951275718f, 7.8641861446542797f, |
| 7.8703647195834047f, 7.8765169465649993f, |
| 7.8826430493618415f, 7.8887432488982591f, |
| 7.8948177633079437f, 7.9008668079807486f, |
| 7.9068905956085187f, 7.9128893362299619f, |
| 7.9188632372745946f, 7.9248125036057812f, |
| 7.9307373375628866f, 7.9366379390025709f, |
| 7.9425145053392398f, 7.9483672315846778f, |
| 7.9541963103868749f, 7.9600019320680805f, |
| 7.9657842846620869f, 7.9715435539507719f, |
| 7.9772799234999167f, 7.9829935746943103f, |
| 7.9886846867721654f, 7.9943534368588577f |
| }; |
| |
| // x * (1. - log2(x)) |
| const float kWP2SLog2m1Table[kLogLookupIdxMax] = { |
| 0.000000000000000f, 1.0000000000000000f, 0.0000000000000000f, |
| -1.7548875021634682f, -4.0000000000000000f, -6.6096404744368105f, |
| -9.5097750043269365f, -12.6514844544032297f, -16.0000000000000000f, |
| -19.5293250129808094f, -23.2192809488736209f, -27.0537478050102713f, |
| -31.0195500086538729f, -35.1057163358341953f, -39.3029689088064558f, |
| -43.6033589341277832f, -48.0000000000000000f, -52.4868683012557682f, |
| -57.0586500259616187f, -61.7106227554281190f, -66.4385618977472490f, |
| -71.2386658783539701f, -76.1074956100205355f, -81.0419249893112976f, |
| -86.0391000173077458f, -91.0964047443681153f, -96.2114326716683905f, |
| -101.3819625584136475f, -106.6059378176128973f, -111.8814488586995850f, |
| -117.2067178682555664f, -122.5800856219931205f, -128.0000000000000000f, |
| -133.4650059388289662f, -138.9737366025115364f, -144.5249055930738109f, |
| -150.1173000519232232f, -155.7497745282711605f, -161.4212455108562381f, |
| -167.1306865356276887f, -172.8771237954944979f, -178.6596321893414370f, |
| -184.4773317567079403f, -190.3293844521901974f, -196.2149912200410711f, |
| -202.1333893348353570f, -208.0838499786225952f, -214.0656760288489693f, |
| -220.0782000346154916f, -226.1207823616452117f, -232.1928094887362306f, |
| -238.2936924405462662f, -244.4228653433367811f, -250.5797840918495467f, |
| -256.7639251168272949f, -262.9747842438562770f, -269.2118756352257947f, |
| -275.4747308073902445f, -281.7628977173991416f, -288.0759399123486446f, |
| -294.4134357365111327f, -300.7749775913360963f, -307.1601712439862695f, |
| -313.5686351804947662f, -320.0000000000000000f, -326.4539078468495177f, |
| -332.9300118776579325f, -339.4279757606707335f, -345.9474732050230728f, |
| -352.4881875176936887f, -359.0498111861476218f, -365.6320454848324175f, |
| -372.2346001038464465f, -378.8571927982412717f, -385.4995490565423211f, |
| -392.1614017871910391f, -398.8424910217124761f, -405.5425636335073705f, |
| -412.2613730712553775f, -418.9986791059911297f, -425.7542475909889959f, |
| -432.5278502336545898f, -439.3192643786828739f, -446.1282728017947079f, |
| -452.9546635134158805f, -459.7982295717046668f, -466.6587689043803948f, |
| -473.5360841388393283f, -480.4299824400821421f, -487.3402753560093856f, |
| -494.2667786696707140f, -501.2093122580813542f, -508.1676999572451905f, |
| -515.1417694330468748f, -522.1313520576978817f, -529.1362827914400668f, |
| -536.1564000692310401f, -543.1915456921514078f, -550.2415647232903666f, |
| -557.3063053878813662f, -564.3856189774724044f, -571.4793597579312063f, |
| -578.5873848810924756f, -585.7095542998714564f, -592.8457306866735053f, |
| -599.9957793549428970f, -607.1595681836990934f, -614.3369675449227998f, |
| -621.5278502336545898f, -628.7320914006849080f, -635.9495684877125541f, |
| -643.1801611648618291f, -650.4237512704515893f, -657.6802227529162792f, |
| -664.9494616147804891f, -672.2313558586031377f, -679.5257954347982832f, |
| -686.8326721912583253f, -694.1518798246972892f, -701.4833138336452976f, |
| -708.8268714730222655f, -716.1824517102259051f, -723.5499551826721927f, |
| -730.9292841567264531f, -738.3203424879725389f, -745.7230355827608719f, |
| -753.1372703609895325f, -760.5629552200649641f, -768.0000000000000000f, |
| -775.4483159495997597f, -782.9078156936990354f, -790.3784132014059196f, |
| -797.8600237553158649f, -805.3525639216582022f, -812.8559515213414670f, |
| -820.3701056018621784f, -827.8949464100461455f, -835.4303953655920623f, |
| -842.9763750353873775f, -850.5328091085675624f, -858.0996223722952436f, |
| -865.6767406882298701f, -873.2640909696648350f, -880.8616011593096573f, |
| -888.4692002076928929f, -896.0868180521655404f, -903.7143855964825434f, |
| -911.3518346909455659f, -918.9990981130846421f, -926.6561095488619912f, |
| -934.3228035743820783f, -941.9991156380868915f, -949.6849820434249523f, |
| -957.3803399319757546f, -965.0851272670147409f, -972.7992828175067643f, |
| -980.5227461425107549f, -988.2554575759854743f, -995.9973582119822595f, |
| -1003.7483898902125929f, -1011.5084951819779917f, -1019.2776173764533496f, |
| -1027.0557004673091797f, -1034.8426891396654810f, -1042.6385287573657479f, |
| -1050.4431653505596387f, -1058.2565456035895295f, -1066.0786168431666283f, |
| -1073.9093270268317610f, -1081.7486247316892332f, -1089.5964591434092199f, |
| -1097.4527800454886801f, -1105.3175378087607896f, -1113.1906833811533488f, |
| -1121.0721682776786565f, -1128.9619445706575789f, -1136.8599648801643980f, |
| -1144.7661823646906214f, -1152.6805507120188850f, -1160.6030241303019466f, |
| -1168.5335573393415416f, -1176.4721055620602783f, -1184.4186245161627085f, |
| -1192.3730704059798882f, -1200.3353999144903810f, -1208.3055701955177028f, |
| -1216.2835388660937497f, -1224.2692639989879808f, -1232.2627041153957634f, |
| -1240.2638181777826958f, -1248.2725655828801337f, -1256.2889061548280552f, |
| -1264.3128001384620802f, -1272.3442081927396430f, -1280.3830913843028156f, |
| -1288.4294111811741459f, -1296.4831294465807332f, -1304.5442084329060890f, |
| -1312.6126107757627324f, -1320.6882994881862032f, -1328.7712379549448087f, |
| -1336.8613899269646481f, -1344.9587195158624127f, -1353.0631911885907357f, |
| -1361.1747697621849511f, -1369.2934203986164903f, -1377.4191085997429127f, |
| -1385.5518002023602548f, -1393.6914613733470105f, -1401.8380586049045178f, |
| -1409.9915587098857941f, -1418.1519288172160032f, -1426.3191363673981868f, |
| -1434.4931491081035801f, -1442.6739350898455996f, -1450.8614626617340946f, |
| -1459.0557004673091797f, -1467.2566174404521462f, -1475.4641828013698159f, |
| -1483.6783660526571111f, -1491.8991369754251082f, -1500.1264656255063983f, |
| -1508.3603223297236582f, -1516.6006776822280244f, -1524.8475025409031787f, |
| -1533.1007680238333251f, -1541.3604455058325584f, -1549.6265066150376697f, |
| -1557.8989232295609781f, -1566.1776674742000068f, -1574.4627117172062754f, |
| -1582.7540285671091169f, -1591.0515908695965663f, -1599.3553717044474070f, |
| -1607.6653443825166505f, -1615.9814824427749045f, -1624.3037596493945784f, |
| -1632.6321499888874769f, -1640.9666276672905951f, -1649.3071671073989819f, |
| -1657.6537429460445310f, -1666.0063300314209300f, -1674.3649034204518102f, |
| -1682.7294383762048255f, -1691.0999103653443854f, -1699.4762950556298620f, |
| -1707.8585683134529063f, -1716.2467062014154635f, -1724.6406849759450779f, |
| -1733.0404810849520345f, -1741.4460711655217438f, -1749.8574320416437331f, |
| -1758.2745407219790650f, -1766.6973743976604965f, -1775.1259104401299282f, |
| -1783.5601263990088228f, |
| }; |
| |
| #if defined(WP2_NEED_LOG_TABLE_8BIT) |
| const uint8_t WP2LogTable8bit[256 + 1] = { // 31 ^ clz(i) |
| 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, |
| 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
| 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, |
| 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
| 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, |
| 8 |
| }; |
| #endif |
| |
| static float SLog2m1Slow_C(uint32_t v) { |
| assert(v >= kLogLookupIdxMax); |
| if (v < kApproxLogWithCorrectionMax) { |
| const float v_f = (float)v; |
| const int32_t log_v = WP2Log2Floor(v) - 7; |
| |
| // vf = (2^log_v) * Xf; where y = 2^log_v and Xf < 256 |
| // Xf = floor(Xf) * (1 + (v % y) / v) |
| // log2(Xf) = log2(floor(Xf)) + log2(1 + (v % y) / v) |
| // The correction factor: log(1 + d) ~ d; for very small d values, so |
| // log2(1 + (v % y) / v) ~ (1/log(2)) * (v % y)/v |
| const float correction = 1.44269504089f * (v & ((1 << log_v) - 1)); |
| return -v_f * (kWP2Log2Table[v >> log_v] + (log_v - 1)) - correction; |
| } else { |
| return v - v * std::log2(v); |
| } |
| } |
| |
| static float Log2Slow_C(uint32_t v) { |
| assert(v >= kLogLookupIdxMax); |
| if (v < kApproxLogWithCorrectionMax) { |
| // this version is 30% faster than calling log2(v), but a bit less precise |
| const uint32_t log_v = WP2Log2Floor(v) - 7; |
| |
| float log_2 = kWP2Log2Table[v >> log_v] + log_v; |
| // Please check SLog2m1Slow_C for an explanation. |
| const float correction = 1.44269504089f * (v & ((1 << log_v) - 1)); |
| log_2 += correction / v; |
| return log_2; |
| } else { |
| return std::log2(v); |
| } |
| } |
| |
| namespace { |
| int32_t InnerProduct_C(const int16_t* const a, const int16_t* const b, |
| size_t size) { |
| return std::inner_product(a, a + size, b, 0); |
| } |
| |
| #if defined(WP2_USE_SSE) |
| |
| int32_t InnerProduct_SSE(const int16_t* const a, const int16_t* const b, |
| size_t size) { |
| assert(size > 0); |
| size_t i = 0; |
| __m128i sum = _mm_setzero_si128(); |
| while (i + 8 <= size) { |
| const __m128i a0 = _mm_loadu_si128((const __m128i*)(a + i)); |
| const __m128i b0 = _mm_loadu_si128((const __m128i*)(b + i)); |
| const __m128i c = _mm_madd_epi16(a0, b0); |
| sum = _mm_add_epi32(sum, c); |
| i += 8; |
| } |
| if (i + 4 <= size) { |
| const __m128i a0 = _mm_loadl_epi64((const __m128i*)(a + i)); |
| const __m128i b0 = _mm_loadl_epi64((const __m128i*)(b + i)); |
| const __m128i c = _mm_madd_epi16(a0, b0); |
| sum = _mm_add_epi32(sum, c); |
| i += 4; |
| } |
| if (size >= 8) sum = _mm_add_epi32(sum, _mm_srli_si128(sum, 8)); |
| int32_t res = 0; |
| if (size >= 4) { |
| sum = _mm_add_epi32(sum, _mm_srli_si128(sum, 4)); |
| res = _mm_cvtsi128_si32(sum); |
| } |
| |
| for (; i < size; ++i) res += a[i] * b[i]; |
| return res; |
| } |
| |
| #if 0 |
| // This version is 30% slower than the C version, so leave it documented here. |
| // Maybe one day this code could be useful inspiration if we have to compute |
| // *several* log2()'s in parallel. |
| static float Log2Slow_SSE(uint32_t v) { |
| const float kRec2[4] = { // 2. / log(2.) / (2*k + 1) |
| 2.885390043f, 0.961796701f, 0.577077985f, 0.412198573f |
| }; |
| assert(v >= kLogLookupIdxMax); |
| const uint32_t N = WP2Log2Floor(v); |
| // compute correction with quadratic convergence |
| const uint32_t K = 1 << N; |
| const float u = (v - K) / (float)(v + K); // < 1.0 |
| const float U2 = u * u; |
| const float U4 = U2 * U2; |
| const __m128 eps = _mm_set_ps(U4 * U2, U4, U2, 1.); |
| const __m128 coeffs = _mm_load_ps(kRec2); |
| const __m128 correction = _mm_dp_ps(eps, coeffs, 0xff); |
| const float extra = u * _mm_cvtss_f32(correction); |
| return N + extra; |
| } |
| #endif |
| |
| WP2_TSAN_IGNORE_FUNCTION void MathInitSSE() { |
| WP2InnerProduct = InnerProduct_SSE; |
| // WP2Log2Slow = Log2Slow_SSE; |
| } |
| |
| #endif // WP2_USE_SSE |
| |
| } // namespace |
| |
| namespace WP2 { |
| |
| //------------------------------------------------------------------------------ |
| // Divide-by-alpha |
| |
| const uint32_t kAlphaDiv[256] = { // kAlphaDiv[a] = ((255 << 16) + a - 1) / a |
| 0x0, 0xff0000, |
| 0x7f8000, 0x550000, 0x3fc000, 0x330000, 0x2a8000, 0x246db7, 0x1fe000, |
| 0x1c5556, 0x198000, 0x172e8c, 0x154000, 0x139d8a, 0x1236dc, 0x110000, |
| 0xff000, 0xf0000, 0xe2aab, 0xd6bcb, 0xcc000, 0xc2493, 0xb9746, 0xb1643, |
| 0xaa000, 0xa3334, 0x9cec5, 0x971c8, 0x91b6e, 0x8cb09, 0x88000, 0x839cf, |
| 0x7f800, 0x7ba2f, 0x78000, 0x74925, 0x71556, 0x6e454, 0x6b5e6, 0x689d9, |
| 0x66000, 0x63832, 0x6124a, 0x5ee24, 0x5cba3, 0x5aaab, 0x58b22, 0x56cf0, |
| 0x55000, 0x5343f, 0x5199a, 0x50000, 0x4e763, 0x4cfb3, 0x4b8e4, 0x4a2e9, |
| 0x48db7, 0x47944, 0x46585, 0x45271, 0x44000, 0x42e2a, 0x41ce8, 0x40c31, |
| 0x3fc00, 0x3ec4f, 0x3dd18, 0x3ce55, 0x3c000, 0x3b217, 0x3a493, 0x39770, |
| 0x38aab, 0x37e40, 0x3722a, 0x36667, 0x35af3, 0x34fcb, 0x344ed, 0x33a55, |
| 0x33000, 0x325ee, 0x31c19, 0x31282, 0x30925, 0x30000, 0x2f712, 0x2ee59, |
| 0x2e5d2, 0x2dd7c, 0x2d556, 0x2cd5d, 0x2c591, 0x2bdf0, 0x2b678, 0x2af29, |
| 0x2a800, 0x2a0fe, 0x29a20, 0x29365, 0x28ccd, 0x28657, 0x28000, 0x279ca, |
| 0x273b2, 0x26db7, 0x267da, 0x26218, 0x25c72, 0x256e7, 0x25175, 0x24c1c, |
| 0x246dc, 0x241b3, 0x23ca2, 0x237a7, 0x232c3, 0x22df3, 0x22939, 0x22493, |
| 0x22000, 0x21b82, 0x21715, 0x212bc, 0x20e74, 0x20a3e, 0x20619, 0x20205, |
| 0x1fe00, 0x1fa0c, 0x1f628, 0x1f253, 0x1ee8c, 0x1ead4, 0x1e72b, 0x1e38f, |
| 0x1e000, 0x1dc80, 0x1d90c, 0x1d5a4, 0x1d24a, 0x1cefb, 0x1cbb8, 0x1c881, |
| 0x1c556, 0x1c235, 0x1bf20, 0x1bc15, 0x1b915, 0x1b61f, 0x1b334, 0x1b052, |
| 0x1ad7a, 0x1aaab, 0x1a7e6, 0x1a52a, 0x1a277, 0x19fcc, 0x19d2b, 0x19a91, |
| 0x19800, 0x19578, 0x192f7, 0x1907e, 0x18e0d, 0x18ba3, 0x18941, 0x186e6, |
| 0x18493, 0x18246, 0x18000, 0x17dc2, 0x17b89, 0x17958, 0x1772d, 0x17508, |
| 0x172e9, 0x170d1, 0x16ebe, 0x16cb2, 0x16aab, 0x168aa, 0x166af, 0x164b9, |
| 0x162c9, 0x160de, 0x15ef8, 0x15d18, 0x15b3c, 0x15966, 0x15795, 0x155c8, |
| 0x15400, 0x1523e, 0x1507f, 0x14ec5, 0x14d10, 0x14b5f, 0x149b3, 0x1480b, |
| 0x14667, 0x144c7, 0x1432c, 0x14194, 0x14000, 0x13e71, 0x13ce5, 0x13b5d, |
| 0x139d9, 0x13859, 0x136dc, 0x13563, 0x133ed, 0x1327b, 0x1310c, 0x12fa1, |
| 0x12e39, 0x12cd5, 0x12b74, 0x12a16, 0x128bb, 0x12763, 0x1260e, 0x124bd, |
| 0x1236e, 0x12223, 0x120da, 0x11f94, 0x11e51, 0x11d11, 0x11bd4, 0x11a99, |
| 0x11962, 0x1182c, 0x116fa, 0x115ca, 0x1149d, 0x11372, 0x1124a, 0x11124, |
| 0x11000, 0x10ee0, 0x10dc1, 0x10ca5, 0x10b8b, 0x10a73, 0x1095e, 0x1084b, |
| 0x1073a, 0x1062c, 0x1051f, 0x10415, 0x1030d, 0x10207, 0x10103, 0x10000, |
| }; |
| |
| //------------------------------------------------------------------------------ |
| |
| // 31b-range values |
| const uint32_t PseudoRNG::kRandomTable[] = { |
| 0x0de15230, 0x03b31886, 0x775faccb, 0x1c88626a, 0x68385c55, 0x14b3b828, |
| 0x4a85fef8, 0x49ddb84b, 0x64fcf397, 0x5c550289, 0x4a290000, 0x0d7ec1da, |
| 0x5940b7ab, 0x5492577d, 0x4e19ca72, 0x38d38c69, 0x0c01ee65, 0x32a1755f, |
| 0x5437f652, 0x5abb2c32, 0x0faa57b1, 0x73f533e7, 0x685feeda, 0x7563cce2, |
| 0x6e990e83, 0x4730a7ed, 0x4fc0d9c6, 0x496b153c, 0x4f1403fa, 0x541afb0c, |
| 0x73990b32, 0x26d7cb1c, 0x6fcc3706, 0x2cbb77d8, 0x75762f2a, 0x6425ccdd, |
| 0x24b35461, 0x0a7d8715, 0x220414a8, 0x141ebf67, 0x56b41583, 0x73e502e3, |
| 0x44cab16f, 0x28264d42, 0x73baaefb, 0x0a50ebed, 0x1d6ab6fb, 0x0d3ad40b, |
| 0x35db3b68, 0x2b081e83, 0x77ce6b95, 0x5181e5f0, 0x78853bbc, 0x009f9494, |
| 0x27e5ed3c |
| }; |
| |
| PseudoRNG::PseudoRNG() : index1_(0), index2_(31) { |
| memcpy(tab_, kRandomTable, sizeof(tab_)); |
| } |
| |
| int32_t PseudoRNG::Update() { |
| const int32_t diff = tab_[index1_] - tab_[index2_]; |
| tab_[index1_] = diff; |
| if (++index1_ == kTabSize) index1_ = 0; |
| if (++index2_ == kTabSize) index2_ = 0; |
| return diff; |
| } |
| |
| } // namespace WP2 |
| |
| //------------------------------------------------------------------------------ |
| |
| WP2Log2SlowFunc WP2Log2Slow = nullptr; |
| WP2Log2SlowFunc WP2SLog2m1Slow = nullptr; |
| WP2InnerProductFunc WP2InnerProduct = nullptr; |
| |
| static volatile WP2CPUInfo math_last_cpuinfo_used = |
| (WP2CPUInfo)&math_last_cpuinfo_used; |
| |
| WP2_TSAN_IGNORE_FUNCTION void WP2MathInit() { |
| if (math_last_cpuinfo_used == WP2GetCPUInfo) return; |
| |
| WP2Log2Slow = Log2Slow_C; |
| WP2SLog2m1Slow = SLog2m1Slow_C; |
| WP2InnerProduct = InnerProduct_C; |
| |
| if (WP2GetCPUInfo != nullptr) { |
| #if defined(WP2_USE_SSE) |
| if (WP2GetCPUInfo(kSSE)) MathInitSSE(); |
| #endif |
| } |
| math_last_cpuinfo_used = WP2GetCPUInfo; |
| } |