blob: 083d3904d578c4f50ccee9c47fdc2341d8711268 [file] [log] [blame]
// Copyright 2020 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Test speed of some DSP functions
#include <memory>
#include "src/dsp/dsp.h"
#include "src/dsp/lossless/encl_dsp.h"
#include "examples/example_utils.h"
#include "include/helpers.h"
#include "src/utils/plane.h"
#include "src/utils/random.h"
namespace WP2 {
namespace {
class SpeedTestCpuInfo : public testing::TestWithParam<WP2CPUInfo> {
void SetUp() override {
WP2DspReset();
WP2GetCPUInfo = GetParam();
WP2DecDspInit();
WP2EncDspInit();
WP2SSIMInit();
WP2PSNRInit();
WP2L::EncLDspInit();
}
};
//------------------------------------------------------------------------------
TEST_P(SpeedTestCpuInfo, TestAddSub) {
constexpr uint32_t kMaxLen = 32 * 32;
int16_t src[kMaxLen], pred[kMaxLen], dst[kMaxLen], res[kMaxLen];
constexpr uint32_t kNumTests = 10;
constexpr uint32_t kNumLoops = 10000;
PseudoRNG rng;
for (uint32_t t = 0; t < kNumTests; ++t) {
const uint32_t len = (rng.GetUnsigned(kMaxLen - 1) + 1) & ~3;
const int32_t min = -(1 + rng.GetUnsigned(1023u));
const int32_t max = (1 + rng.GetUnsigned(1023u));
testutil::PrecalculatedRandom<kMaxLen * 3, int16_t> rnd16(-2048, 2048);
for (uint32_t l = 0; l < kNumLoops; ++l) {
rnd16.Fill(src, len);
rnd16.Fill(pred, len);
WP2::SubtractRow(src, pred, res, len);
WP2::AddRow(pred, res, min, max, dst, len);
}
uint32_t nb_errors = 0;
for (uint32_t i = 0; i < len; ++i) {
if (src[i] <= min) {
nb_errors += (dst[i] != min);
} else if (src[i] >= max) {
nb_errors += (dst[i] != max);
} else {
nb_errors += (src[i] != dst[i]);
}
}
EXPECT_EQ(nb_errors, 0u) << " rate: " << (100 * nb_errors / len) << "%";
}
}
TEST_P(SpeedTestCpuInfo, TestAddSubBlock) {
constexpr uint32_t kMaxLen = 32 * 32;
int16_t src[kMaxLen], pred[kMaxLen], dst[kMaxLen], res[kMaxLen];
constexpr uint32_t kNumTests = 100;
constexpr uint32_t kNumLoops = 200;
PseudoRNG rng;
for (bool add_eq : {false, true}) {
for (uint32_t t = 0; t < kNumTests; ++t) {
const uint32_t height = 1 + rng.GetUnsigned(32 - 1);
const int32_t min = -(1 + rng.GetUnsigned(1023u));
const int32_t max = (1 + rng.GetUnsigned(1023u));
testutil::PrecalculatedRandom<kMaxLen * 3, int16_t> rnd16(-2048, 2048);
for (uint32_t l = 0; l < kNumLoops; ++l) {
rnd16.Fill(src, kMaxLen);
rnd16.Fill(pred, kMaxLen);
// reverse order, from large to small:
for (uint32_t idx : {3, 2, 1, 0}) {
const uint32_t len = (4 << idx);
WP2::SubtractBlock[idx](src, 32, pred, 32, res, len, height);
if (add_eq) {
// 32b->16b conversion, with different stride ('len' -> '32')
for (uint32_t j = 0; j < height; ++j) {
for (uint32_t i = 0; i < len; ++i) {
dst[i + j * 32] = (int16_t)res[i + len * j];
}
}
WP2::AddBlockEq[idx](pred, 32, dst, 32, min, max, height);
} else {
WP2::AddBlock[idx](pred, 32, res, len, min, max, dst, 32, height);
}
}
}
uint32_t nb_errors = 0;
for (uint32_t i = 0; i < height * 32; ++i) {
if (src[i] <= min) {
nb_errors += (dst[i] != min);
} else if (src[i] >= max) {
nb_errors += (dst[i] != max);
} else {
nb_errors += (src[i] != dst[i]);
}
}
EXPECT_EQ(nb_errors, 0u)
<< " rate: " << (100 * nb_errors / kMaxLen) << "%";
}
}
}
//------------------------------------------------------------------------------
TEST_P(SpeedTestCpuInfo, TestBufferAdd) {
constexpr uint32_t kMaxLen = 1 << 14;
Vector_u32 A, B, C, D;
ASSERT_TRUE(A.resize(kMaxLen) && B.resize(kMaxLen));
ASSERT_TRUE(C.resize(kMaxLen) && D.resize(kMaxLen));
constexpr uint32_t kNumTests = 10;
constexpr uint32_t kNumLoops = 500;
PseudoRNG rng;
uint32_t CRC = 412325;
for (auto& c : C) c = 0;
for (auto& d : D) d = 0;
for (uint32_t t = 0; t < kNumTests; ++t) {
testutil::PrecalculatedRandom<kMaxLen * 2, uint32_t> rnd32(0, 1 << 12);
const uint32_t proba = rng.GetUnsigned(255);
uint32_t nzero = rng.GetUnsigned(532); // shouldn't matter
uint32_t* nzero_ptr = &nzero;
if (rng.GetUnsigned(10) < 5) nzero_ptr = nullptr;
for (uint32_t i = 0; i < rnd32.Size(); ++i) {
if (rng.GetUnsigned(1000) < proba) rnd32[i] = 0;
}
for (uint32_t l = 0; l < kNumLoops; ++l) {
const uint32_t len = kMaxLen / 2 + rng.GetUnsigned(kMaxLen / 2 - 1);
rnd32.Fill(&A[0], len);
rnd32.Fill(&B[0], len);
WP2L::BufferAdd(&A[0], &B[0], len, &C[0], nzero_ptr);
WP2L::BufferAdd(&A[0], &D[0], len, &D[0], nzero_ptr);
CRC = testutil::SillyCRC(CRC, nzero);
}
}
for (auto& c : C) CRC = testutil::SillyCRC(CRC, c);
for (auto& d : D) CRC = testutil::SillyCRC(CRC, d);
ASSERT_EQ(468228884u, CRC);
}
//------------------------------------------------------------------------------
// Generates random data in [min_range:max_range[ and distorts it by 'max_diff'.
// Checks that the distortion result matches the 'expected_average_score'.
template <typename T>
void DoTestSsim(BitDepth bit_depth, int max_diff,
double expected_average_score) {
constexpr uint32_t kMaxDim = 300;
constexpr uint32_t kNumTests = 5;
constexpr uint32_t kNumLoops = 7;
PseudoRNG rng;
double average_score = 0.;
for (uint32_t t = 0; t < kNumTests; ++t) {
const uint32_t width = 1 + rng.GetUnsigned(kMaxDim);
const uint32_t height = 1 + rng.GetUnsigned(kMaxDim);
testutil::PrecalculatedRandom<kMaxDim * 7, T> rnd(bit_depth.min(),
bit_depth.max() + 1);
double score;
Plane<T> plane1, plane2;
ASSERT_WP2_OK(plane1.Resize(width, height));
EXPECT_EQ(plane1.GetSSIM(plane2, bit_depth, &score),
WP2_STATUS_BAD_DIMENSION);
ASSERT_WP2_OK(plane2.Resize(width, height));
for (uint32_t y = 0; y < height; ++y) {
T* const row1 = plane1.Row(y);
T* const row2 = plane2.Row(y);
rnd.Fill(row1, width);
rnd.Fill(row2, width);
for (uint32_t x = 0; x < width; ++x) {
row2[x] = Clamp<int>(row2[x], row1[x] - max_diff, row1[x] + max_diff);
}
}
for (uint32_t l = 0; l < kNumLoops; ++l) {
ASSERT_WP2_OK(plane1.GetSSIM(plane2, bit_depth, &score));
if (max_diff == 0) {
EXPECT_EQ(score, 1. * width * height);
} else {
EXPECT_LT(score, 1. * width * height);
}
}
average_score += score / (width * height * kNumTests);
}
EXPECT_NEAR(average_score, expected_average_score, 1e-16);
}
TEST_P(SpeedTestCpuInfo, TestSsim8bits) {
constexpr BitDepth kBitDepth = {8, /*is_signed=*/false};
DoTestSsim<uint8_t>(kBitDepth, /*max_diff=*/0,
/*expected_average_score=*/1.);
DoTestSsim<uint8_t>(kBitDepth, /*max_diff=*/1,
/*expected_average_score=*/0.99990578289445553);
DoTestSsim<uint8_t>(kBitDepth, /*max_diff=*/50,
/*expected_average_score=*/0.77854488910920416);
DoTestSsim<uint8_t>(kBitDepth, /*max_diff=*/256,
/*expected_average_score=*/0.078342300386812946);
}
TEST_P(SpeedTestCpuInfo, TestSsim10bits) {
constexpr BitDepth kBitDepth = {10, /*is_signed=*/true};
DoTestSsim<int16_t>(kBitDepth, /*max_diff=*/0,
/*expected_average_score=*/1.);
DoTestSsim<int16_t>(kBitDepth, /*max_diff=*/1,
/*expected_average_score=*/0.9999983977231518);
DoTestSsim<int16_t>(kBitDepth, /*max_diff=*/50,
/*expected_average_score=*/0.99596831375426853);
DoTestSsim<int16_t>(kBitDepth, /*max_diff=*/1024,
/*expected_average_score=*/0.20170015131280025);
}
TEST_P(SpeedTestCpuInfo, TestSsim12bits) {
constexpr BitDepth kBitDepth = {12, /*is_signed=*/true};
DoTestSsim<int16_t>(kBitDepth, /*max_diff=*/0,
/*expected_average_score=*/1.);
DoTestSsim<int16_t>(kBitDepth, /*max_diff=*/1,
/*expected_average_score=*/0.99999989986046256);
DoTestSsim<int16_t>(kBitDepth, /*max_diff=*/50,
/*expected_average_score=*/0.99974890122298021);
DoTestSsim<int16_t>(kBitDepth, /*max_diff=*/4096,
/*expected_average_score=*/0.20169647034266905);
}
//------------------------------------------------------------------------------
template<typename T>
void DoTestPSNR(int min_range, int max_range, uint64_t result) {
constexpr uint32_t kMaxDim = 500;
constexpr uint32_t kNumTests = 10;
constexpr uint32_t kNumLoops = 50;
PseudoRNG rng;
uint64_t total_score = 0;
for (uint32_t t = 0; t < kNumTests; ++t) {
const uint32_t width = 1 + rng.GetUnsigned(kMaxDim);
const uint32_t height = 1 + rng.GetUnsigned(kMaxDim);
testutil::PrecalculatedRandom<kMaxDim * 7, T> rnd(min_range, max_range);
uint64_t score;
Plane<T> plane1, plane2, plane3;
ASSERT_WP2_OK(plane1.Resize(width, height));
EXPECT_EQ(plane1.GetSSE(plane2, &score), WP2_STATUS_BAD_DIMENSION);
ASSERT_WP2_OK(plane2.Resize(width, height));
ASSERT_WP2_OK(plane3.Resize(width, height));
plane3.Fill(0);
for (uint32_t y = 0; y < height; ++y) {
T* const row1 = plane1.Row(y);
T* const row2 = plane2.Row(y);
rnd.Fill(row1, width);
for (uint32_t x = 0; x < width; ++x) row2[x] = row1[x] ^ 1;
}
for (uint32_t l = 0; l < kNumLoops; ++l) {
ASSERT_WP2_OK(plane1.GetSSE(plane1, &score));
EXPECT_EQ(score, 0u);
ASSERT_WP2_OK(plane1.GetSSE(plane2, &score));
EXPECT_EQ(score, width * height); // diff is '1' for each pixel
ASSERT_WP2_OK(plane1.GetSSE(plane3, &score));
}
total_score += score;
}
EXPECT_EQ(total_score, result);
}
TEST_P(SpeedTestCpuInfo, TestPSNR_16b) {
DoTestPSNR<int16_t>(-2048, 2048, 6630626819665llu);
}
TEST_P(SpeedTestCpuInfo, TestPSNR_8b) {
DoTestPSNR<uint8_t>(0, 256, 15200787724llu);
}
//------------------------------------------------------------------------------
TEST_P(SpeedTestCpuInfo, TestCflPrediction) {
WP2::PredictionInit();
PseudoRNG rng;
constexpr uint32_t kMaxSize = 51353;
constexpr uint32_t kNumTests = 60;
constexpr uint32_t kNumLoops = 500;
for (uint32_t t = 0; t < kNumTests; ++t) {
int16_t in[kMaxSize], out[kMaxSize] = { 0 };
uint32_t len = (t < 16) ? 4 * t : 1 + rng.GetUnsigned(kMaxSize - 1);
len = (len & ~3);
const int32_t min_value = -rng.GetUnsigned(511);
const int32_t max_value = +rng.GetUnsigned(511);
const int32_t a = -rng.Get(-1020, 1240);
const int32_t b = +rng.Get(-512, 511);
for (auto& v : in) v = rng.Get(-511, 511);
for (uint32_t l = 0; l < kNumLoops; ++l) {
CflPredict(in, a, b, out, min_value, max_value, len);
}
for (uint32_t l = 0; l < len; ++l) {
ASSERT_EQ(out[l], CflScale(in[l], a, b, min_value, max_value))
<< "[" << in[l] << " * a=" << a << " + b=" << b << " = "
<< out[l] << " clipped to ["
<< min_value << ", " << max_value << "]";
}
for (uint32_t l = len; l < kMaxSize; ++l) ASSERT_EQ(out[l], 0);
}
}
//------------------------------------------------------------------------------
INSTANTIATE_TEST_SUITE_P(SpeedTestInstantiation, SpeedTestCpuInfo,
testing::ValuesIn(testutil::kWP2CpuInfos));
} // namespace
} // namespace WP2