Revert "Increase AudioBus alignment to 32 bytes"
This reverts commit 3d02dd3cc9b5963094875cc0f9ea0eb21f6a2a28.
Reason for revert: might make Mac tests fail http://b/411562684
Bug: 40756517
Original change's description:
> Increase AudioBus alignment to 32 bytes
>
> This CL bumps up AudioBus::kChannelAlignment from 16 bytes to 32 bytes.
>
> This allows us to clean up some vector math operations, and ensure that
> we are using the most efficient load and store operations.
>
> This CL also cleans up code which handled 16 and 32 bit alignments, and
> opportunistically replaces some manually aligned memory to use
> base::AlignedHeapArray instead.
>
> Bug: 40756517
> Change-Id: Id3eb4bef8572c83c0f12496b28c2cf80348ab9eb
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6319267
> Commit-Queue: Simeon Anfinrud <sanfin@chromium.org>
> Reviewed-by: Hongchan Choi <hongchan@chromium.org>
> Reviewed-by: Eugene Zemtsov <eugene@chromium.org>
> Auto-Submit: Thomas Guilbert <tguilbert@chromium.org>
> Reviewed-by: Simeon Anfinrud <sanfin@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#1448656}
Bug: 40756517
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Change-Id: If7bbe6481fae270b94ed5e84a82fbe58ecbc9f15
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6470771
Commit-Queue: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Owners-Override: Lingqi Chi <lingqi@chromium.org>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Auto-Submit: Lingqi Chi <lingqi@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1448797}
diff --git a/chromecast/media/audio/net/audio_socket.cc b/chromecast/media/audio/net/audio_socket.cc
index b54dd72b..8230d2b 100644
--- a/chromecast/media/audio/net/audio_socket.cc
+++ b/chromecast/media/audio/net/audio_socket.cc
@@ -88,7 +88,6 @@
}
// static
-constexpr size_t AudioSocket::kAudioDataHeaderPadding;
constexpr size_t AudioSocket::kAudioHeaderSize;
constexpr size_t AudioSocket::kAudioMessageHeaderSize;
@@ -144,7 +143,7 @@
// == AudioHeader ==
// uint16_t type (audio or metadata)
// uint64_t timestamp
- // 5 * uint32_t padding
+ // uint32_t padding
// == End of AudioHeader ==
// ... audio data ...
@@ -166,7 +165,7 @@
base::byte_span_from_ref(timestamp));
buffer = buffer.subspan(sizeof(uint64_t));
- std::ranges::fill(buffer.first<kAudioDataHeaderPadding>(), uint8_t{0});
+ std::ranges::fill(buffer.first<sizeof(uint32_t)>(), uint8_t{0});
}
bool AudioSocket::SendAudioBuffer(scoped_refptr<net::IOBuffer> audio_buffer,
@@ -363,8 +362,8 @@
size -= sizeof(timestamp);
// Handle padding bytes.
- data += kAudioDataHeaderPadding;
- size -= kAudioDataHeaderPadding;
+ data += sizeof(int32_t);
+ size -= sizeof(int32_t);
return delegate_->HandleAudioData(data, size, timestamp);
}
@@ -384,8 +383,8 @@
size -= sizeof(timestamp);
// Handle padding bytes.
- data += kAudioDataHeaderPadding;
- size -= kAudioDataHeaderPadding;
+ data += sizeof(int32_t);
+ size -= sizeof(int32_t);
return delegate_->HandleAudioBuffer(std::move(buffer), data, size, timestamp);
}
diff --git a/chromecast/media/audio/net/audio_socket.h b/chromecast/media/audio/net/audio_socket.h
index f5c73f873..229145c 100644
--- a/chromecast/media/audio/net/audio_socket.h
+++ b/chromecast/media/audio/net/audio_socket.h
@@ -87,20 +87,13 @@
// given message, a normal IOBuffer will be dynamically allocated instead.
void UseBufferPool(scoped_refptr<IOBufferPool> buffer_pool);
- // 16-bit type and 64-bit timestamp, plus padding to align the total message
- // header size to 32 bytes.
- static constexpr size_t kAudioDataHeaderPadding = 5 * sizeof(int32_t);
+ // 16-bit type and 64-bit timestamp, plus 32-bit padding to align to 16 bytes.
static constexpr size_t kAudioHeaderSize =
- sizeof(int16_t) + sizeof(int64_t) + kAudioDataHeaderPadding;
+ sizeof(int16_t) + sizeof(int64_t) + sizeof(int32_t);
// Includes additional 16-bit size field for SmallMessageSocket.
static constexpr size_t kAudioMessageHeaderSize =
sizeof(uint16_t) + kAudioHeaderSize;
- // Audio data following the header must be aligned to 32 bytes to allow
- // optimized SIMD vector math.
- static_assert(kAudioMessageHeaderSize % 32u == 0u,
- "kAudioMessageHeaderSize must be a multiple of 32");
-
// Fills in the audio message header for |buffer|, so it can later be sent via
// SendPreparedAudioBuffer(). |buffer| should have |kAudioMessageHeaderSize|
// bytes reserved at the start of the buffer, followed by |filled_bytes| of
diff --git a/chromecast/media/base/slew_volume.cc b/chromecast/media/base/slew_volume.cc
index b0225aa..715194d 100644
--- a/chromecast/media/base/slew_volume.cc
+++ b/chromecast/media/base/slew_volume.cc
@@ -15,7 +15,6 @@
#include "base/check_op.h"
#include "base/containers/span.h"
-#include "base/memory/aligned_memory.h"
#include "media/base/vector_math.h"
namespace {
@@ -158,9 +157,11 @@
float* dest) {
DCHECK(src);
DCHECK(dest);
- // Ensure |src| and |dest| are aligned.
- CHECK(base::IsAligned(src, ::media::vector_math::kRequiredAlignment));
- CHECK(base::IsAligned(dest, ::media::vector_math::kRequiredAlignment));
+ // Ensure |src| and |dest| are 16-byte aligned.
+ DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) &
+ (::media::vector_math::kRequiredAlignment - 1));
+ DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) &
+ (::media::vector_math::kRequiredAlignment - 1));
if (!frames) {
return;
diff --git a/chromecast/media/base/slew_volume.h b/chromecast/media/base/slew_volume.h
index 6287371..90494373 100644
--- a/chromecast/media/base/slew_volume.h
+++ b/chromecast/media/base/slew_volume.h
@@ -43,7 +43,7 @@
// |src| and |dest| are interleaved buffers with |channels| channels and at
// least |frames| frames (|channels| * |frames| total size).
// |src| and |dest| may be the same.
- // |src| and |dest| must be ::media::vector_math::kRequiredAlignment aligned.
+ // |src| and |dest| must be 16-byte aligned.
// If using planar data, |repeat_transition| should be true for channels 2
// through n, which will cause the slewing process to be repeated.
void ProcessFMAC(bool repeat_transition,
@@ -57,7 +57,7 @@
// |src| and |dest| are interleaved buffers with |channels| channels and at
// least |frames| frames (|channels| * |frames| total size).
// |src| and |dest| may be the same.
- // |src| and |dest| must be ::media::vector_math::kRequiredAlignment aligned.
+ // |src| and |dest| must be 16-byte aligned.
// If using planar data, |repeat_transition| should be true for channels 2
// through n, which will cause the slewing process to be repeated.
void ProcessFMUL(bool repeat_transition,
diff --git a/chromecast/media/base/slew_volume_unittests.cc b/chromecast/media/base/slew_volume_unittests.cc
index 00c9e9a3..0c3bbb2 100644
--- a/chromecast/media/base/slew_volume_unittests.cc
+++ b/chromecast/media/base/slew_volume_unittests.cc
@@ -121,9 +121,7 @@
}
void ClearInterrupted() {
- float throwaway
- __attribute__((__aligned__(::media::AudioBus::kChannelAlignment))) =
- 0.0f;
+ float throwaway __attribute__((__aligned__(16))) = 0.0f;
slew_volume_->ProcessFMUL(false, &throwaway, 1, 1, &throwaway);
}
diff --git a/chromecast/media/cma/backend/mixer/stream_mixer.cc b/chromecast/media/cma/backend/mixer/stream_mixer.cc
index 0bd8e7a..5b70cad 100644
--- a/chromecast/media/cma/backend/mixer/stream_mixer.cc
+++ b/chromecast/media/cma/backend/mixer/stream_mixer.cc
@@ -46,7 +46,6 @@
#include "chromecast/media/cma/backend/volume_map.h"
#include "chromecast/public/media/mixer_output_stream.h"
#include "media/audio/audio_device_description.h"
-#include "media/base/vector_math.h"
#define RUN_ON_MIXER_THREAD(method, ...) \
do { \
@@ -79,11 +78,6 @@
const int kMediaUnduckFadeMs = 700;
const int kDefaultFilterFrameAlignment = 64;
-// `kDefaultFilterFrameAlignment` must be as big as `kRequirementAlignment`, to
-// use vector_math helpers.
-static_assert(kDefaultFilterFrameAlignment >=
- ::media::vector_math::kRequiredAlignment);
-
constexpr base::TimeDelta kMixerThreadCheckTimeout = base::Seconds(10);
constexpr base::TimeDelta kHealthCheckInterval = base::Seconds(5);
diff --git a/chromecast/net/io_buffer_pool.cc b/chromecast/net/io_buffer_pool.cc
index 886652d..47323eb 100644
--- a/chromecast/net/io_buffer_pool.cc
+++ b/chromecast/net/io_buffer_pool.cc
@@ -71,10 +71,7 @@
class Wrapper;
union Storage;
- // Some of these buffers are passed to optimized SIMD audio algorithms and
- // have a minimum alignment requirement of
- // ::media::vector_math::kRequiredAlignment.
- static constexpr size_t kAlignment = 32;
+ static constexpr size_t kAlignment = 16;
static Storage* AllocateStorageUnionAndDataArea(size_t data_area_size);
static char* DataAreaFromStorageUnion(Storage* ptr);
diff --git a/media/audio/mac/audio_loopback_input_mac_unittest.mm b/media/audio/mac/audio_loopback_input_mac_unittest.mm
index 75d2c39..2f63d48 100644
--- a/media/audio/mac/audio_loopback_input_mac_unittest.mm
+++ b/media/audio/mac/audio_loopback_input_mac_unittest.mm
@@ -8,6 +8,7 @@
#endif
#include "media/audio/mac/audio_loopback_input_mac.h"
+#include "media/audio/mac/audio_loopback_input_mac_impl.h"
#include <ScreenCaptureKit/ScreenCaptureKit.h>
@@ -22,8 +23,6 @@
#include "base/task/single_thread_task_runner.h"
#include "base/test/task_environment.h"
#include "media/audio/audio_io.h"
-#include "media/audio/mac/audio_loopback_input_mac_impl.h"
-#include "media/base/audio_bus.h"
#include "media/base/audio_parameters.h"
#include "media/base/limits.h"
#include "media/base/mac/audio_latency_mac.h"
@@ -516,9 +515,8 @@
FakeAudioInputCallback sink;
stream->Start(&sink);
- // Buffer must be 32-bit aligned.
- alignas(AudioBus::kChannelAlignment) std::array<float, 2 * kFramesPerBuffer>
- buffer;
+ // Buffer must be 16-bit aligned.
+ alignas(16) std::array<float, 2 * kFramesPerBuffer> buffer;
for (size_t i = 0; i < buffer.size(); i++) {
buffer[i] = i;
}
diff --git a/media/base/audio_buffer.cc b/media/base/audio_buffer.cc
index 260e15f6..3589031 100644
--- a/media/base/audio_buffer.cc
+++ b/media/base/audio_buffer.cc
@@ -9,7 +9,6 @@
#include "media/base/audio_buffer.h"
-#include <algorithm>
#include <cmath>
#include "base/bits.h"
@@ -39,12 +38,16 @@
public:
explicit SelfOwnedMemory(size_t size)
: heap_array_(
- base::AlignedUninit<uint8_t>(size, AudioBus::kChannelAlignment)) {
+ base::HeapArray<uint8_t, base::AlignedFreeDeleter>::
+ FromOwningPointer(
+ static_cast<uint8_t*>(
+ base::AlignedAlloc(size, AudioBus::kChannelAlignment)),
+ size)) {
span_ = heap_array_.as_span();
}
private:
- base::AlignedHeapArray<uint8_t> heap_array_;
+ base::HeapArray<uint8_t, base::AlignedFreeDeleter> heap_array_;
};
std::unique_ptr<AudioBuffer::ExternalMemory> AllocateMemory(size_t size) {
@@ -59,16 +62,18 @@
sample_rate);
}
-AudioBufferMemoryPool::AudioBufferMemoryPool() = default;
+AudioBufferMemoryPool::AudioBufferMemoryPool(int alignment)
+ : alignment_(alignment) {}
AudioBufferMemoryPool::~AudioBufferMemoryPool() = default;
AudioBufferMemoryPool::ExternalMemoryFromPool::ExternalMemoryFromPool(
ExternalMemoryFromPool&& am) = default;
AudioBufferMemoryPool::ExternalMemoryFromPool::ExternalMemoryFromPool(
scoped_refptr<AudioBufferMemoryPool> pool,
- base::AlignedHeapArray<uint8_t> memory)
+ std::unique_ptr<uint8_t, base::AlignedFreeDeleter> memory,
+ size_t size)
: memory_(std::move(memory)), pool_(pool) {
- span_ = memory_.as_span();
+ span_ = {memory_.get(), size};
}
AudioBufferMemoryPool::ExternalMemoryFromPool::~ExternalMemoryFromPool() {
@@ -101,10 +106,11 @@
// FFmpeg may not always initialize the entire output memory, so just like
// for VideoFrames we need to zero out the memory. https://crbug.com/1144070.
- auto memory = base::AlignedUninit<uint8_t>(size, AudioBus::kChannelAlignment);
- std::ranges::fill(memory, 0);
+ auto memory = std::unique_ptr<uint8_t, base::AlignedFreeDeleter>(
+ static_cast<uint8_t*>(base::AlignedAlloc(size, GetChannelAlignment())));
+ memset(memory.get(), 0, size);
return std::make_unique<ExternalMemoryFromPool>(
- ExternalMemoryFromPool(this, std::move(memory)));
+ ExternalMemoryFromPool(this, std::move(memory), size));
}
void AudioBufferMemoryPool::ReturnBuffer(ExternalMemoryFromPool memory) {
@@ -142,8 +148,10 @@
DCHECK(channel_layout == CHANNEL_LAYOUT_DISCRETE ||
ChannelLayoutToChannelCount(channel_layout) == channel_count);
- const size_t bytes_per_channel = SampleFormatToBytesPerChannel(sample_format);
- CHECK_LE(bytes_per_channel, AudioBus::kChannelAlignment);
+ const int bytes_per_channel = SampleFormatToBytesPerChannel(sample_format);
+ const int channel_alignment =
+ pool_ ? pool_->GetChannelAlignment() : AudioBus::kChannelAlignment;
+ CHECK_LE(bytes_per_channel, channel_alignment);
// Empty buffer?
if (!create_buffer) {
@@ -176,13 +184,13 @@
std::ranges::fill(needs_zeroing, 0u);
return;
}
- size_t data_size_per_channel = frame_count * bytes_per_channel;
+ int data_size_per_channel = frame_count * bytes_per_channel;
if (IsPlanar(sample_format)) {
DCHECK(!IsBitstreamFormat()) << sample_format_;
// Planar data, so need to allocate buffer for each channel.
// Determine per channel data size, taking into account alignment.
- size_t block_size_per_channel =
- base::bits::AlignUp(data_size_per_channel, AudioBus::kChannelAlignment);
+ int block_size_per_channel = base::bits::AlignUpDeprecatedDoNotUse(
+ data_size_per_channel, channel_alignment);
DCHECK_GE(block_size_per_channel, data_size_per_channel);
// Allocate a contiguous buffer for all the channel data.
diff --git a/media/base/audio_buffer.h b/media/base/audio_buffer.h
index 16f03f2..b5d90c5 100644
--- a/media/base/audio_buffer.h
+++ b/media/base/audio_buffer.h
@@ -311,20 +311,23 @@
public:
REQUIRE_ADOPTION_FOR_REFCOUNTED_TYPE();
- AudioBufferMemoryPool();
+ explicit AudioBufferMemoryPool(int alignment = AudioBus::kChannelAlignment);
AudioBufferMemoryPool(const AudioBufferMemoryPool&) = delete;
AudioBufferMemoryPool& operator=(const AudioBufferMemoryPool&) = delete;
size_t GetPoolSizeForTesting();
+ int GetChannelAlignment() { return alignment_; }
struct ExternalMemoryFromPool : public AudioBuffer::ExternalMemory {
public:
- ExternalMemoryFromPool(scoped_refptr<AudioBufferMemoryPool> pool,
- base::AlignedHeapArray<uint8_t> memory);
+ ExternalMemoryFromPool(
+ scoped_refptr<AudioBufferMemoryPool> pool,
+ std::unique_ptr<uint8_t, base::AlignedFreeDeleter> memory,
+ size_t size);
ExternalMemoryFromPool(ExternalMemoryFromPool&&);
~ExternalMemoryFromPool() override;
- base::AlignedHeapArray<uint8_t> memory_;
+ std::unique_ptr<uint8_t, base::AlignedFreeDeleter> memory_;
scoped_refptr<AudioBufferMemoryPool> pool_;
};
@@ -337,6 +340,7 @@
std::unique_ptr<ExternalMemoryFromPool> CreateBuffer(size_t size);
void ReturnBuffer(ExternalMemoryFromPool memory);
+ const int alignment_;
base::Lock entry_lock_;
std::list<ExternalMemoryFromPool> entries_ GUARDED_BY(entry_lock_);
};
diff --git a/media/base/audio_buffer_unittest.cc b/media/base/audio_buffer_unittest.cc
index d45fd35e..1eddbc8f 100644
--- a/media/base/audio_buffer_unittest.cc
+++ b/media/base/audio_buffer_unittest.cc
@@ -14,7 +14,6 @@
#include <limits>
#include <memory>
-#include "base/memory/aligned_memory.h"
#include "base/memory/scoped_refptr.h"
#include "base/test/gtest_util.h"
#include "base/time/time.h"
@@ -827,18 +826,19 @@
// Test that the channels are aligned according to the pool parameter.
TEST(AudioBufferTest, AudioBufferMemoryPoolAlignment) {
+ const int kAlignment = 512;
const ChannelLayout kChannelLayout = CHANNEL_LAYOUT_6_1;
const size_t kChannelCount = ChannelLayoutToChannelCount(kChannelLayout);
- auto pool = base::MakeRefCounted<AudioBufferMemoryPool>();
+ auto pool = base::MakeRefCounted<AudioBufferMemoryPool>(kAlignment);
scoped_refptr<AudioBuffer> buffer =
AudioBuffer::CreateBuffer(kSampleFormatPlanarU8, kChannelLayout,
kChannelCount, kSampleRate, kSampleRate, pool);
ASSERT_EQ(kChannelCount, buffer->channel_data().size());
for (size_t i = 0; i < kChannelCount; i++) {
- EXPECT_TRUE(
- base::IsAligned(buffer->channel_data()[i], AudioBus::kChannelAlignment))
+ EXPECT_EQ(
+ 0u, reinterpret_cast<uintptr_t>(buffer->channel_data()[i]) % kAlignment)
<< " channel: " << i;
}
@@ -857,8 +857,8 @@
ASSERT_EQ(kChannelCount, buffer->channel_data().size());
for (size_t i = 0; i < kChannelCount; i++) {
- EXPECT_TRUE(
- base::IsAligned(buffer->channel_data()[i], AudioBus::kChannelAlignment))
+ EXPECT_EQ(0u, reinterpret_cast<uintptr_t>(buffer->channel_data()[i]) %
+ AudioBus::kChannelAlignment)
<< " channel: " << i;
}
}
diff --git a/media/base/audio_bus.h b/media/base/audio_bus.h
index 17a34b3b..4649d22 100644
--- a/media/base/audio_bus.h
+++ b/media/base/audio_bus.h
@@ -38,9 +38,9 @@
using ConstChannel = base::span<const float>;
using ChannelVector = std::vector<Channel>;
- // Guaranteed alignment of each channel's data; use 32-byte alignment for easy
+ // Guaranteed alignment of each channel's data; use 16-byte alignment for easy
// SSE optimizations.
- static constexpr size_t kChannelAlignment = 32;
+ static constexpr size_t kChannelAlignment = 16;
// Creates a new AudioBus and allocates |channels| of length |frames|. Uses
// channels() and frames_per_buffer() from AudioParameters if given.
diff --git a/media/base/audio_parameters.h b/media/base/audio_parameters.h
index e7f5358..5e9aeea 100644
--- a/media/base/audio_parameters.h
+++ b/media/base/audio_parameters.h
@@ -34,7 +34,7 @@
#pragma warning(push)
#pragma warning(disable : 4324) // Disable warning for added padding.
#endif
-inline constexpr int kParametersAlignment = 32;
+constexpr int kParametersAlignment = 16;
// ****WARNING****: Do not change the field types or ordering of these fields
// without checking that alignment is correct. The structs may be concurrently
diff --git a/media/base/vector_math.cc b/media/base/vector_math.cc
index fdf3b5d..cdbdff7 100644
--- a/media/base/vector_math.cc
+++ b/media/base/vector_math.cc
@@ -44,8 +44,8 @@
return;
}
CHECK_LE(src.size(), dest.size());
- CHECK(base::IsAligned(src.data(), kRequiredAlignment));
- CHECK(base::IsAligned(dest.data(), kRequiredAlignment));
+ DCHECK(base::IsAligned(src.data(), kRequiredAlignment));
+ DCHECK(base::IsAligned(dest.data(), kRequiredAlignment));
static const auto fmac_func = [] {
#if defined(ARCH_CPU_X86_FAMILY) && !BUILDFLAG(IS_NACL)
base::CPU cpu;
@@ -73,8 +73,8 @@
return;
}
CHECK_LE(src.size(), dest.size());
- CHECK(base::IsAligned(src.data(), kRequiredAlignment));
- CHECK(base::IsAligned(dest.data(), kRequiredAlignment));
+ DCHECK(base::IsAligned(src.data(), kRequiredAlignment));
+ DCHECK(base::IsAligned(dest.data(), kRequiredAlignment));
static const auto fmul_func = [] {
#if defined(ARCH_CPU_X86_FAMILY) && !BUILDFLAG(IS_NACL)
base::CPU cpu;
@@ -135,7 +135,7 @@
std::pair<float, float> EWMAAndMaxPower(float initial_value,
base::span<const float> src,
float smoothing_factor) {
- CHECK(base::IsAligned(src.data(), kRequiredAlignment));
+ DCHECK(base::IsAligned(src.data(), kRequiredAlignment));
static const auto ewma_and_max_power_func = [] {
#if defined(ARCH_CPU_X86_FAMILY) && !BUILDFLAG(IS_NACL)
base::CPU cpu;
@@ -171,9 +171,6 @@
#if defined(ARCH_CPU_X86_FAMILY) && !BUILDFLAG(IS_NACL)
void FMUL_SSE(const float src[], float scale, int len, float dest[]) {
- DCHECK(base::IsAligned(src, kRequiredAlignment));
- DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
const int rem = len % 4;
const int last_index = len - rem;
__m128 m_scale = _mm_set_ps1(scale);
@@ -191,14 +188,37 @@
float scale,
int len,
float dest[]) {
- DCHECK(base::IsAligned(src, kRequiredAlignment));
- DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
const int rem = len % 8;
const int last_index = len - rem;
__m256 m_scale = _mm256_set1_ps(scale);
- for (int i = 0; i < last_index; i += 8) {
- _mm256_store_ps(dest + i, _mm256_mul_ps(_mm256_load_ps(src + i), m_scale));
+ // TODO(crbug.com/40756517): Remove below alignment conditionals when AudioBus
+ // |kChannelAlignment| updated to 32.
+ bool aligned_src = (reinterpret_cast<uintptr_t>(src) & 0x1F) == 0;
+ bool aligned_dest = (reinterpret_cast<uintptr_t>(dest) & 0x1F) == 0;
+ if (aligned_src) {
+ if (aligned_dest) {
+ for (int i = 0; i < last_index; i += 8) {
+ _mm256_store_ps(dest + i,
+ _mm256_mul_ps(_mm256_load_ps(src + i), m_scale));
+ }
+ } else {
+ for (int i = 0; i < last_index; i += 8) {
+ _mm256_storeu_ps(dest + i,
+ _mm256_mul_ps(_mm256_load_ps(src + i), m_scale));
+ }
+ }
+ } else {
+ if (aligned_dest) {
+ for (int i = 0; i < last_index; i += 8) {
+ _mm256_store_ps(dest + i,
+ _mm256_mul_ps(_mm256_loadu_ps(src + i), m_scale));
+ }
+ } else {
+ for (int i = 0; i < last_index; i += 8) {
+ _mm256_storeu_ps(dest + i,
+ _mm256_mul_ps(_mm256_loadu_ps(src + i), m_scale));
+ }
+ }
}
// Handle any remaining values that wouldn't fit in an SSE pass.
@@ -208,9 +228,6 @@
}
void FMAC_SSE(const float src[], float scale, int len, float dest[]) {
- DCHECK(base::IsAligned(src, kRequiredAlignment));
- DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
const int rem = len % 4;
const int last_index = len - rem;
__m128 m_scale = _mm_set_ps1(scale);
@@ -233,9 +250,38 @@
const int rem = len % 8;
const int last_index = len - rem;
__m256 m_scale = _mm256_set1_ps(scale);
- for (int i = 0; i < last_index; i += 8) {
- _mm256_store_ps(dest + i, _mm256_fmadd_ps(_mm256_load_ps(src + i), m_scale,
- _mm256_load_ps(dest + i)));
+ // TODO(crbug.com/40756517): Remove below alignment conditionals when AudioBus
+ // |kChannelAlignment| updated to 32.
+ bool aligned_src = (reinterpret_cast<uintptr_t>(src) & 0x1F) == 0;
+ bool aligned_dest = (reinterpret_cast<uintptr_t>(dest) & 0x1F) == 0;
+ if (aligned_src) {
+ if (aligned_dest) {
+ for (int i = 0; i < last_index; i += 8) {
+ _mm256_store_ps(dest + i,
+ _mm256_fmadd_ps(_mm256_load_ps(src + i), m_scale,
+ _mm256_load_ps(dest + i)));
+ }
+ } else {
+ for (int i = 0; i < last_index; i += 8) {
+ _mm256_storeu_ps(dest + i,
+ _mm256_fmadd_ps(_mm256_load_ps(src + i), m_scale,
+ _mm256_loadu_ps(dest + i)));
+ }
+ }
+ } else {
+ if (aligned_dest) {
+ for (int i = 0; i < last_index; i += 8) {
+ _mm256_store_ps(dest + i,
+ _mm256_fmadd_ps(_mm256_loadu_ps(src + i), m_scale,
+ _mm256_load_ps(dest + i)));
+ }
+ } else {
+ for (int i = 0; i < last_index; i += 8) {
+ _mm256_storeu_ps(dest + i,
+ _mm256_fmadd_ps(_mm256_loadu_ps(src + i), m_scale,
+ _mm256_loadu_ps(dest + i)));
+ }
+ }
}
// Handle any remaining values that wouldn't fit in an SSE pass.
@@ -244,9 +290,6 @@
}
}
void FCLAMP_SSE(const float src[], int len, float dest[]) {
- DCHECK(base::IsAligned(src, kRequiredAlignment));
- DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
const int rem = len % 4;
const int last_index = len - rem;
const __m128 m_min = _mm_set_ps1(kClampMin);
@@ -271,27 +314,63 @@
}
}
+inline __attribute__((target("avx"))) __m256 SanitizeNan(const __m256 values) {
+ // Compare each value with itself. Since NaN != NaN, we end up with a mask
+ // with 0s instead of NaNs, and 1s for the original values.
+ const __m256 valid_mask = _mm256_cmp_ps(values, values, _CMP_EQ_OQ);
+
+ // Zero-out all NaNs by applying the mask with a logical AND.
+ return _mm256_and_ps(valid_mask, values);
+}
+
__attribute__((target("avx"))) void FCLAMP_AVX(const float src[],
int len,
float dest[]) {
- DCHECK(base::IsAligned(src, kRequiredAlignment));
- DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
const int rem = len % 8;
const int last_index = len - rem;
const __m256 m_max = _mm256_set1_ps(kClampMax);
const __m256 m_min = _mm256_set1_ps(kClampMin);
- for (int i = 0; i < last_index; i += 8) {
- const __m256 values = _mm256_load_ps(src + i);
- // Compare each value with itself. Since NaN != NaN, we end up with a mask
- // with 0s instead of NaNs, and 1s for the original values.
- const __m256 comparisons = _mm256_cmp_ps(values, values, _CMP_EQ_OQ);
- // Zero-out all NaNs by applying the mask with a logical AND.
- const __m256 sanitized_values = _mm256_and_ps(comparisons, values);
-
- _mm256_store_ps(
- dest + i, _mm256_max_ps(_mm256_min_ps(sanitized_values, m_max), m_min));
+ // TODO(crbug.com/40756517): Remove below alignment conditionals when AudioBus
+ // |kChannelAlignment| updated to 32.
+ bool aligned_src = (reinterpret_cast<uintptr_t>(src) & 0x1F) == 0;
+ bool aligned_dest = (reinterpret_cast<uintptr_t>(dest) & 0x1F) == 0;
+ if (aligned_src) {
+ if (aligned_dest) {
+ for (int i = 0; i < last_index; i += 8) {
+ _mm256_store_ps(
+ dest + i,
+ _mm256_max_ps(
+ _mm256_min_ps(SanitizeNan(_mm256_load_ps(src + i)), m_max),
+ m_min));
+ }
+ } else {
+ for (int i = 0; i < last_index; i += 8) {
+ _mm256_storeu_ps(
+ dest + i,
+ _mm256_max_ps(
+ _mm256_min_ps(SanitizeNan(_mm256_load_ps(src + i)), m_max),
+ m_min));
+ }
+ }
+ } else {
+ if (aligned_dest) {
+ for (int i = 0; i < last_index; i += 8) {
+ _mm256_store_ps(
+ dest + i,
+ _mm256_max_ps(
+ _mm256_min_ps(SanitizeNan(_mm256_loadu_ps(src + i)), m_max),
+ m_min));
+ }
+ } else {
+ for (int i = 0; i < last_index; i += 8) {
+ _mm256_storeu_ps(
+ dest + i,
+ _mm256_max_ps(
+ _mm256_min_ps(SanitizeNan(_mm256_loadu_ps(src + i)), m_max),
+ m_min));
+ }
+ }
}
// Handle any remaining values that wouldn't fit in an AVX2 pass.
@@ -325,7 +404,6 @@
// Thus, the strategy here is to compute z[n], z[n-1], z[n-2], and z[n-3] in
// each of the 4 lanes, and then combine them to give y[n].
- DCHECK(base::IsAligned(src, kRequiredAlignment));
const int rem = len % 4;
const int last_index = len - rem;
@@ -386,8 +464,6 @@
const float src[],
int len,
float smoothing_factor) {
- DCHECK(base::IsAligned(src, kRequiredAlignment));
-
const int rem = len % 8;
const int last_index = len - rem;
const float weight_prev = 1.0f - smoothing_factor;
@@ -459,9 +535,6 @@
#if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
void FMAC_NEON(const float src[], float scale, int len, float dest[]) {
- DCHECK(base::IsAligned(src, kRequiredAlignment));
- DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
const int rem = len % 4;
const int last_index = len - rem;
float32x4_t m_scale = vmovq_n_f32(scale);
@@ -477,9 +550,6 @@
}
void FMUL_NEON(const float src[], float scale, int len, float dest[]) {
- DCHECK(base::IsAligned(src, kRequiredAlignment));
- DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
const int rem = len % 4;
const int last_index = len - rem;
float32x4_t m_scale = vmovq_n_f32(scale);
@@ -494,9 +564,6 @@
}
void FCLAMP_NEON(const float src[], int len, float dest[]) {
- DCHECK(base::IsAligned(src, kRequiredAlignment));
- DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
const int rem = len % 4;
const int last_index = len - rem;
const float32x4_t m_min = vmovq_n_f32(kClampMin);
@@ -528,8 +595,6 @@
const float src[],
int len,
float smoothing_factor) {
- DCHECK(base::IsAligned(src, kRequiredAlignment));
-
// When the recurrence is unrolled, we see that we can split it into 4
// separate lanes of evaluation:
//
diff --git a/media/base/vector_math.h b/media/base/vector_math.h
index d2a8dad..0fab357 100644
--- a/media/base/vector_math.h
+++ b/media/base/vector_math.h
@@ -13,7 +13,7 @@
namespace media::vector_math {
// Required alignment for inputs and outputs to all vector math functions
-inline constexpr size_t kRequiredAlignment = 32;
+enum { kRequiredAlignment = 16 };
// Multiply each element of `src` by `scale` and add to `dest`.
// `src` and `dest` must be aligned by `kRequiredAlignment`.
diff --git a/media/ffmpeg/ffmpeg_common.h b/media/ffmpeg/ffmpeg_common.h
index 0dd3b23f..0225b37 100644
--- a/media/ffmpeg/ffmpeg_common.h
+++ b/media/ffmpeg/ffmpeg_common.h
@@ -49,7 +49,11 @@
// Alignment requirement by FFmpeg for input and output buffers. This need to
// be updated to match FFmpeg when it changes.
+#if defined(ARCH_CPU_ARM_FAMILY)
+constexpr inline int kFFmpegBufferAddressAlignment = 16;
+#else
constexpr inline int kFFmpegBufferAddressAlignment = 32;
+#endif
class AudioDecoderConfig;
class VideoDecoderConfig;
diff --git a/media/filters/ffmpeg_audio_decoder.cc b/media/filters/ffmpeg_audio_decoder.cc
index 76a6f12..f6b986f 100644
--- a/media/filters/ffmpeg_audio_decoder.cc
+++ b/media/filters/ffmpeg_audio_decoder.cc
@@ -55,10 +55,6 @@
static_cast<AudioBuffer*>(opaque)->Release();
}
-// AudioBufferMemoryPool uses `AudioBus::kChannelAlignment` under the hood,
-// which must be aligned to at least `kFFmpegBufferAddressAlignment`.
-static_assert(kFFmpegBufferAddressAlignment == AudioBus::kChannelAlignment);
-
FFmpegAudioDecoder::FFmpegAudioDecoder(
const scoped_refptr<base::SequencedTaskRunner>& task_runner,
MediaLog* media_log)
@@ -66,7 +62,8 @@
state_(DecoderState::kUninitialized),
av_sample_format_(0),
media_log_(media_log),
- pool_(base::MakeRefCounted<AudioBufferMemoryPool>()) {
+ pool_(base::MakeRefCounted<AudioBufferMemoryPool>(
+ kFFmpegBufferAddressAlignment)) {
DETACH_FROM_SEQUENCE(sequence_checker_);
}
diff --git a/third_party/blink/renderer/platform/audio/audio_array.h b/third_party/blink/renderer/platform/audio/audio_array.h
index d7965b8..e7d13b3 100644
--- a/third_party/blink/renderer/platform/audio/audio_array.h
+++ b/third_party/blink/renderer/platform/audio/audio_array.h
@@ -67,15 +67,18 @@
CHECK_LE(n, std::numeric_limits<unsigned>::max() / sizeof(T));
uint32_t initial_size = static_cast<uint32_t>(sizeof(T) * n);
+ // Minimmum alignment requirements for arrays so that we can use
+ // SIMD.
+#if defined(ARCH_CPU_X86_FAMILY)
+ const unsigned kAlignment = 32;
+#else
+ const unsigned kAlignment = 16;
+#endif
+
if (allocation_) {
WTF::Partitions::FastFree(allocation_);
}
- // Minimum alignment requirements for arrays so that we can use SIMD.
- // This value matches media::AudioBus::kChannelAlignment, for ease of
- // interop with media::AudioBus.
- static constexpr unsigned kAlignment = 32;
-
// Always allocate extra space so that we are guaranteed to get
// the desired alignment. Some memory is wasted, but it should be
// small since most arrays are probably at least 128 floats (or