blob: 78a1c5d4c35e6ba91b63c1fa7c30ae31024d3c3b [file] [log] [blame]
// Copyright 2020 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <math.h>
#include <algorithm>
#include <utility>
#include "base/compiler_specific.h"
#include "base/containers/span.h"
#include "base/logging.h"
#include "media/base/video_frame.h"
#include "media/base/video_types.h"
#include "media/gpu/test/video_frame_helpers.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "third_party/libyuv/include/libyuv/compare.h"
#include "ui/gfx/geometry/point.h"
#define ASSERT_TRUE_OR_RETURN(predicate, return_value) \
do { \
if (!(predicate)) { \
ADD_FAILURE(); \
return (return_value); \
} \
} while (0)
namespace media {
namespace test {
namespace {
// The metrics of the similarity of two images.
enum SimilarityMetrics {
PSNR, // Peak Signal-to-Noise Ratio. For detail see
// https://en.wikipedia.org/wiki/Peak_signal-to-noise_ratio
SSIM, // Structural Similarity. For detail see
// https://en.wikipedia.org/wiki/Structural_similarity
};
// Computes the SSIM of a window of 8x8 samples between two planes where each
// sample is 16 bits. This is modeled after libyuv::Ssim8x8_C().
double SSIM16BitPlane8x8(const uint8_t* src_a,
int stride_a,
const uint8_t* src_b,
int stride_b) {
int64_t sum_a = 0;
int64_t sum_b = 0;
int64_t sum_sq_a = 0;
int64_t sum_sq_b = 0;
int64_t sum_axb = 0;
for (int i = 0; i < 8; ++i) {
for (int j = 0; j < 8; ++j) {
// Read 16 bits and store it in a 32 bits value to avoid overflow in the
// following calculations.
const uint32_t a = static_cast<uint32_t>(
*reinterpret_cast<const uint16_t*>(UNSAFE_TODO(src_a + 2 * j)));
const uint32_t b = static_cast<uint32_t>(
*reinterpret_cast<const uint16_t*>(UNSAFE_TODO(src_b + 2 * j)));
sum_a += a;
sum_b += b;
sum_sq_a += a * a;
sum_sq_b += b * b;
sum_axb += a * b;
}
UNSAFE_TODO(src_a += stride_a);
UNSAFE_TODO(src_b += stride_b);
}
constexpr int64_t count = 64;
constexpr int64_t cc1 = 1759164917; // (64^2*(.01*65535)^2
constexpr int64_t cc2 = 15832484259; // (64^2*(.03*65535)^2
constexpr int64_t c1 = (cc1 * count * count) >> 12;
constexpr int64_t c2 = (cc2 * count * count) >> 12;
const int64_t sum_a_x_sum_b = sum_a * sum_b;
const int64_t ssim_n =
(2 * sum_a_x_sum_b + c1) * (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
const int64_t sum_a_sq = sum_a * sum_a;
const int64_t sum_b_sq = sum_b * sum_b;
const int64_t ssim_d =
(sum_a_sq + sum_b_sq + c1) *
(count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);
if (ssim_d == 0)
return std::numeric_limits<double>::max();
return ssim_n * 1.0 / ssim_d;
}
// Computes the SSIM between two planes where each sample is 16 bits. This is
// modeled after libyuv::CalcFrameSsim().
double Calc16bitPlaneSSIM(const uint8_t* src_a,
int stride_a,
const uint8_t* src_b,
int stride_b,
int width,
int height) {
int samples = 0;
double ssim_total = 0;
for (int i = 0; i < height - 8; i += 4) {
for (int j = 0; j < width - 8; j += 4) {
// Double |j| because the color depth is 16 bits.
ssim_total += SSIM16BitPlane8x8(UNSAFE_TODO(src_a + 2 * j), stride_a,
UNSAFE_TODO(src_b + 2 * j), stride_b);
samples++;
}
// |stride_a| and |stride_b| are bytes. No need to double them.
UNSAFE_TODO(src_a += stride_a * 4);
UNSAFE_TODO(src_b += stride_b * 4);
}
ssim_total /= samples;
return ssim_total;
}
// Computes the SSIM between two YUV420P010 buffers. This is modeled after
// libyuv::I420Ssim().
double ComputeYUV420P10SSIM(const uint8_t* src_y_a,
int stride_y_a,
const uint8_t* src_u_a,
int stride_u_a,
const uint8_t* src_v_a,
int stride_v_a,
const uint8_t* src_y_b,
int stride_y_b,
const uint8_t* src_u_b,
int stride_u_b,
const uint8_t* src_v_b,
int stride_v_b,
int width,
int height) {
const double ssim_y = Calc16bitPlaneSSIM(src_y_a, stride_y_a, src_y_b,
stride_y_b, width, height);
const int width_uv = (width + 1) >> 1;
const int height_uv = (height + 1) >> 1;
const double ssim_u = Calc16bitPlaneSSIM(src_u_a, stride_u_a, src_u_b,
stride_u_b, width_uv, height_uv);
const double ssim_v = Calc16bitPlaneSSIM(src_v_a, stride_v_a, src_v_b,
stride_v_b, width_uv, height_uv);
return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
}
double ComputeSimilarity(const VideoFrame* frame1,
const VideoFrame* frame2,
SimilarityMetrics mode) {
ASSERT_TRUE_OR_RETURN(
frame1->IsMappable() && frame2->IsMappable(),
static_cast<double>(std::numeric_limits<std::size_t>::max()));
ASSERT_TRUE_OR_RETURN(
frame1->visible_rect().size() == frame2->visible_rect().size(),
static_cast<double>(std::numeric_limits<std::size_t>::max()));
ASSERT_TRUE_OR_RETURN(
frame1->BitDepth() == frame2->BitDepth(),
static_cast<double>(std::numeric_limits<std::size_t>::max()));
const size_t bit_depth = std::min(frame1->BitDepth(), frame2->BitDepth());
const VideoPixelFormat common_format =
bit_depth == 8 ? PIXEL_FORMAT_I420 : PIXEL_FORMAT_YUV420P10;
// These are used, only if frames are converted to |common_format|, for
// keeping converted frames alive until the end of function.
scoped_refptr<VideoFrame> converted_frame1;
scoped_refptr<VideoFrame> converted_frame2;
if (frame1->format() != common_format) {
converted_frame1 = ConvertVideoFrame(frame1, common_format);
frame1 = converted_frame1.get();
}
if (frame2->format() != common_format) {
converted_frame2 = ConvertVideoFrame(frame2, common_format);
frame2 = converted_frame2.get();
}
decltype(&libyuv::I420Psnr) metric_func = nullptr;
switch (mode) {
case SimilarityMetrics::PSNR:
if (bit_depth == 8)
metric_func = &libyuv::I420Psnr;
break;
case SimilarityMetrics::SSIM:
if (bit_depth == 8)
metric_func = &libyuv::I420Ssim;
else if (bit_depth == 10)
metric_func = &ComputeYUV420P10SSIM;
break;
}
ASSERT_TRUE_OR_RETURN(metric_func, std::numeric_limits<double>::max());
return metric_func(
frame1->visible_data(0), frame1->stride(0), frame1->visible_data(1),
frame1->stride(1), frame1->visible_data(2), frame1->stride(2),
frame2->visible_data(0), frame2->stride(0), frame2->visible_data(1),
frame2->stride(1), frame2->visible_data(2), frame2->stride(2),
frame1->visible_rect().width(), frame1->visible_rect().height());
}
constexpr int kJointDistributionBitDepth = 4;
constexpr int kJointDistributionDim = 1 << kJointDistributionBitDepth;
using DistributionTable =
double[kJointDistributionDim][kJointDistributionDim][kJointDistributionDim];
bool ComputeLogJointDistribution(const VideoFrame& frame,
DistributionTable& log_joint_distribution) {
ASSERT_TRUE_OR_RETURN(frame.IsMappable(), false);
ASSERT_TRUE_OR_RETURN(frame.format() == PIXEL_FORMAT_ARGB, false);
ASSERT_TRUE_OR_RETURN(frame.BitDepth() == 8, false);
// Arbitrarily small number to fill the probability distribution table with so
// we don't have a problem with taking the log of 0.
static const double kMinProbabilityValue = 0.000000001;
double normalization_factor = kJointDistributionDim * kJointDistributionDim *
kJointDistributionDim *
kMinProbabilityValue +
(double)frame.visible_rect().size().GetArea();
// Initialize distribution table to arbitrarily small probability value.
for (int i = 0; i < kJointDistributionDim; i++) {
for (int j = 0; j < kJointDistributionDim; j++) {
for (int k = 0; k < kJointDistributionDim; k++) {
UNSAFE_TODO(log_joint_distribution[i][j][k]) = kMinProbabilityValue;
}
}
}
// Downsample the RGB values of the plane into 4-bits per channel, and use the
// downsampled color information to increment the corresponding element of the
// distribution table.
const uint8_t* row_ptr = frame.visible_data(0);
for (int y = 0; y < frame.visible_rect().height(); y++) {
for (int x = 0; x < frame.visible_rect().width(); x++) {
UNSAFE_TODO(log_joint_distribution
[row_ptr[4 * x + 1] >> kJointDistributionBitDepth]
[row_ptr[4 * x + 2] >> kJointDistributionBitDepth]
[row_ptr[4 * x + 3] >> kJointDistributionBitDepth]) +=
1.0;
}
UNSAFE_TODO(row_ptr += frame.stride(0));
}
// Normalize the joint distribution so that it sums to 1.0 and then take the
// log.
for (int i = 0; i < kJointDistributionDim; i++) {
for (int j = 0; j < kJointDistributionDim; j++) {
for (int k = 0; k < kJointDistributionDim; k++) {
UNSAFE_TODO(log_joint_distribution[i][j][k]) /= normalization_factor;
UNSAFE_TODO(log_joint_distribution[i][j][k]) =
log(UNSAFE_TODO(log_joint_distribution[i][j][k]));
}
}
}
return true;
}
double ComputeLogProbability(const VideoFrame& frame,
DistributionTable& log_joint_distribution) {
ASSERT_TRUE_OR_RETURN(frame.IsMappable(), 0.0);
ASSERT_TRUE_OR_RETURN(frame.format() == PIXEL_FORMAT_ARGB, 0.0);
ASSERT_TRUE_OR_RETURN(frame.BitDepth() == 8, 0.0);
double ret = 0.0;
const uint8_t* row_ptr = frame.visible_data(0);
for (int y = 0; y < frame.visible_rect().height(); y++) {
for (int x = 0; x < frame.visible_rect().width(); x++) {
ret +=
UNSAFE_TODO(log_joint_distribution
[row_ptr[4 * x + 1] >> kJointDistributionBitDepth]
[row_ptr[4 * x + 2] >> kJointDistributionBitDepth]
[row_ptr[4 * x + 3] >> kJointDistributionBitDepth]);
}
UNSAFE_TODO(row_ptr += frame.stride(0));
}
return ret;
}
constexpr double kMaxPsnr = 128.0;
} // namespace
size_t CompareFramesWithErrorDiff(const VideoFrame& frame1,
const VideoFrame& frame2,
uint8_t tolerance) {
ASSERT_TRUE_OR_RETURN(frame1.IsMappable() && frame2.IsMappable(),
std::numeric_limits<std::size_t>::max());
ASSERT_TRUE_OR_RETURN(frame1.format() == frame2.format(),
std::numeric_limits<std::size_t>::max());
ASSERT_TRUE_OR_RETURN(
frame1.visible_rect().size() == frame2.visible_rect().size(),
std::numeric_limits<std::size_t>::max());
size_t diff_cnt = 0;
const VideoPixelFormat format = frame1.format();
const size_t num_planes = VideoFrame::NumPlanes(format);
const gfx::Size& visible_size = frame1.visible_rect().size();
for (size_t i = 0; i < num_planes; ++i) {
const uint8_t* data1 = frame1.visible_data(i);
const int stride1 = frame1.stride(i);
const uint8_t* data2 = frame2.visible_data(i);
const int stride2 = frame2.stride(i);
const size_t rows = VideoFrame::Rows(i, format, visible_size.height());
const int row_bytes = VideoFrame::RowBytes(i, format, visible_size.width());
for (size_t r = 0; r < rows; ++r) {
for (int c = 0; c < row_bytes; c++) {
uint8_t b1 = UNSAFE_TODO(data1[(stride1 * r) + c]);
uint8_t b2 = UNSAFE_TODO(data2[(stride2 * r) + c]);
uint8_t diff = std::max(b1, b2) - std::min(b1, b2);
diff_cnt += diff > tolerance;
}
}
}
return diff_cnt;
}
double ComputePSNR(const VideoFrame& frame1, const VideoFrame& frame2) {
return ComputeSimilarity(&frame1, &frame2, SimilarityMetrics::PSNR);
}
double ComputeSSIM(const VideoFrame& frame1, const VideoFrame& frame2) {
return ComputeSimilarity(&frame1, &frame2, SimilarityMetrics::SSIM);
}
double ComputeLogLikelihoodRatio(scoped_refptr<const VideoFrame> golden_frame,
scoped_refptr<const VideoFrame> test_frame) {
ASSERT_TRUE_OR_RETURN(
golden_frame->visible_rect().size() == test_frame->visible_rect().size(),
0.0);
if (golden_frame->format() != PIXEL_FORMAT_ARGB) {
golden_frame = ConvertVideoFrame(golden_frame.get(), PIXEL_FORMAT_ARGB);
}
if (test_frame->format() != PIXEL_FORMAT_ARGB) {
test_frame = ConvertVideoFrame(test_frame.get(), PIXEL_FORMAT_ARGB);
}
DistributionTable log_joint_distribution;
double golden_log_prob = 0.0;
ASSERT_TRUE_OR_RETURN(
ComputeLogJointDistribution(*golden_frame, log_joint_distribution), 0.0);
golden_log_prob =
ComputeLogProbability(*golden_frame, log_joint_distribution);
ASSERT_TRUE_OR_RETURN(golden_log_prob != 0.0, 0.0);
double test_log_prob = 0.0;
test_log_prob = ComputeLogProbability(*test_frame, log_joint_distribution);
ASSERT_TRUE_OR_RETURN(test_log_prob != 0.0, 0.0);
return test_log_prob / golden_log_prob;
}
double ComputeAR30PSNR(base::span<const uint32_t> frame1_data,
size_t frame1_stride,
base::span<const uint32_t> frame2_data,
size_t frame2_stride,
size_t width,
size_t height) {
uint64_t sum_square_error = 0;
const uint64_t samples =
static_cast<uint64_t>(width) * static_cast<uint64_t>(height) * 3;
for (size_t y = 0; y < height; y++) {
for (size_t x = 0; x < width; x++) {
const uint32_t pixel1 = frame1_data[y * frame1_stride + x];
const uint32_t pixel2 = frame2_data[y * frame2_stride + x];
const int32_t r1 = (pixel1 >> 20) & 0x3FF;
const int32_t g1 = (pixel1 >> 10) & 0x3FF;
const int32_t b1 = pixel1 & 0x3FF;
const int32_t r2 = (pixel2 >> 20) & 0x3FF;
const int32_t g2 = (pixel2 >> 10) & 0x3FF;
const int32_t b2 = pixel2 & 0x3FF;
sum_square_error += (r1 - r2) * (r1 - r2);
sum_square_error += (g1 - g2) * (g1 - g2);
sum_square_error += (b1 - b2) * (b1 - b2);
}
}
if (!sum_square_error) {
return kMaxPsnr;
}
double inverse_mse =
static_cast<double>(samples) / static_cast<double>(sum_square_error);
double psnr = 10.0 * log10(1023.0 * 1023.0 * inverse_mse);
return psnr > kMaxPsnr ? kMaxPsnr : psnr;
}
} // namespace test
} // namespace media