Revert "Increase AudioBus alignment to 32 bytes"

This reverts commit 3d02dd3cc9b5963094875cc0f9ea0eb21f6a2a28.

Reason for revert: might make Mac tests fail http://b/411562684

Bug: 40756517
Original change's description:
> Increase AudioBus alignment to 32 bytes
>
> This CL bumps up AudioBus::kChannelAlignment from 16 bytes to 32 bytes.
>
> This allows us to clean up some vector math operations, and ensure that
> we are using the most efficient load and store operations.
>
> This CL also cleans up code which handled 16 and 32 bit alignments, and
> opportunistically replaces some manually aligned memory to use
> base::AlignedHeapArray instead.
>
> Bug: 40756517
> Change-Id: Id3eb4bef8572c83c0f12496b28c2cf80348ab9eb
> Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6319267
> Commit-Queue: Simeon Anfinrud <sanfin@chromium.org>
> Reviewed-by: Hongchan Choi <hongchan@chromium.org>
> Reviewed-by: Eugene Zemtsov <eugene@chromium.org>
> Auto-Submit: Thomas Guilbert <tguilbert@chromium.org>
> Reviewed-by: Simeon Anfinrud <sanfin@chromium.org>
> Cr-Commit-Position: refs/heads/main@{#1448656}

Bug: 40756517
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Change-Id: If7bbe6481fae270b94ed5e84a82fbe58ecbc9f15
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/6470771
Commit-Queue: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Owners-Override: Lingqi Chi <lingqi@chromium.org>
Bot-Commit: Rubber Stamper <rubber-stamper@appspot.gserviceaccount.com>
Auto-Submit: Lingqi Chi <lingqi@chromium.org>
Cr-Commit-Position: refs/heads/main@{#1448797}
diff --git a/chromecast/media/audio/net/audio_socket.cc b/chromecast/media/audio/net/audio_socket.cc
index b54dd72b..8230d2b 100644
--- a/chromecast/media/audio/net/audio_socket.cc
+++ b/chromecast/media/audio/net/audio_socket.cc
@@ -88,7 +88,6 @@
 }
 
 // static
-constexpr size_t AudioSocket::kAudioDataHeaderPadding;
 constexpr size_t AudioSocket::kAudioHeaderSize;
 constexpr size_t AudioSocket::kAudioMessageHeaderSize;
 
@@ -144,7 +143,7 @@
   //   == AudioHeader ==
   //   uint16_t type (audio or metadata)
   //   uint64_t timestamp
-  //   5 * uint32_t padding
+  //   uint32_t padding
   //   == End of AudioHeader ==
   //   ... audio data ...
 
@@ -166,7 +165,7 @@
       base::byte_span_from_ref(timestamp));
   buffer = buffer.subspan(sizeof(uint64_t));
 
-  std::ranges::fill(buffer.first<kAudioDataHeaderPadding>(), uint8_t{0});
+  std::ranges::fill(buffer.first<sizeof(uint32_t)>(), uint8_t{0});
 }
 
 bool AudioSocket::SendAudioBuffer(scoped_refptr<net::IOBuffer> audio_buffer,
@@ -363,8 +362,8 @@
   size -= sizeof(timestamp);
 
   // Handle padding bytes.
-  data += kAudioDataHeaderPadding;
-  size -= kAudioDataHeaderPadding;
+  data += sizeof(int32_t);
+  size -= sizeof(int32_t);
 
   return delegate_->HandleAudioData(data, size, timestamp);
 }
@@ -384,8 +383,8 @@
   size -= sizeof(timestamp);
 
   // Handle padding bytes.
-  data += kAudioDataHeaderPadding;
-  size -= kAudioDataHeaderPadding;
+  data += sizeof(int32_t);
+  size -= sizeof(int32_t);
 
   return delegate_->HandleAudioBuffer(std::move(buffer), data, size, timestamp);
 }
diff --git a/chromecast/media/audio/net/audio_socket.h b/chromecast/media/audio/net/audio_socket.h
index f5c73f873..229145c 100644
--- a/chromecast/media/audio/net/audio_socket.h
+++ b/chromecast/media/audio/net/audio_socket.h
@@ -87,20 +87,13 @@
   // given message, a normal IOBuffer will be dynamically allocated instead.
   void UseBufferPool(scoped_refptr<IOBufferPool> buffer_pool);
 
-  // 16-bit type and 64-bit timestamp, plus padding to align the total message
-  // header size to 32 bytes.
-  static constexpr size_t kAudioDataHeaderPadding = 5 * sizeof(int32_t);
+  // 16-bit type and 64-bit timestamp, plus 32-bit padding to align to 16 bytes.
   static constexpr size_t kAudioHeaderSize =
-      sizeof(int16_t) + sizeof(int64_t) + kAudioDataHeaderPadding;
+      sizeof(int16_t) + sizeof(int64_t) + sizeof(int32_t);
   // Includes additional 16-bit size field for SmallMessageSocket.
   static constexpr size_t kAudioMessageHeaderSize =
       sizeof(uint16_t) + kAudioHeaderSize;
 
-  // Audio data following the header must be aligned to 32 bytes to allow
-  // optimized SIMD vector math.
-  static_assert(kAudioMessageHeaderSize % 32u == 0u,
-                "kAudioMessageHeaderSize must be a multiple of 32");
-
   // Fills in the audio message header for |buffer|, so it can later be sent via
   // SendPreparedAudioBuffer(). |buffer| should have |kAudioMessageHeaderSize|
   // bytes reserved at the start of the buffer, followed by |filled_bytes| of
diff --git a/chromecast/media/base/slew_volume.cc b/chromecast/media/base/slew_volume.cc
index b0225aa..715194d 100644
--- a/chromecast/media/base/slew_volume.cc
+++ b/chromecast/media/base/slew_volume.cc
@@ -15,7 +15,6 @@
 
 #include "base/check_op.h"
 #include "base/containers/span.h"
-#include "base/memory/aligned_memory.h"
 #include "media/base/vector_math.h"
 
 namespace {
@@ -158,9 +157,11 @@
                              float* dest) {
   DCHECK(src);
   DCHECK(dest);
-  // Ensure |src| and |dest| are aligned.
-  CHECK(base::IsAligned(src, ::media::vector_math::kRequiredAlignment));
-  CHECK(base::IsAligned(dest, ::media::vector_math::kRequiredAlignment));
+  // Ensure |src| and |dest| are 16-byte aligned.
+  DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(src) &
+                    (::media::vector_math::kRequiredAlignment - 1));
+  DCHECK_EQ(0u, reinterpret_cast<uintptr_t>(dest) &
+                    (::media::vector_math::kRequiredAlignment - 1));
 
   if (!frames) {
     return;
diff --git a/chromecast/media/base/slew_volume.h b/chromecast/media/base/slew_volume.h
index 6287371..90494373 100644
--- a/chromecast/media/base/slew_volume.h
+++ b/chromecast/media/base/slew_volume.h
@@ -43,7 +43,7 @@
   // |src| and |dest| are interleaved buffers with |channels| channels and at
   // least |frames| frames (|channels| * |frames| total size).
   // |src| and |dest| may be the same.
-  // |src| and |dest| must be ::media::vector_math::kRequiredAlignment aligned.
+  // |src| and |dest| must be 16-byte aligned.
   // If using planar data, |repeat_transition| should be true for channels 2
   // through n, which will cause the slewing process to be repeated.
   void ProcessFMAC(bool repeat_transition,
@@ -57,7 +57,7 @@
   // |src| and |dest| are interleaved buffers with |channels| channels and at
   // least |frames| frames (|channels| * |frames| total size).
   // |src| and |dest| may be the same.
-  // |src| and |dest| must be ::media::vector_math::kRequiredAlignment aligned.
+  // |src| and |dest| must be 16-byte aligned.
   // If using planar data, |repeat_transition| should be true for channels 2
   // through n, which will cause the slewing process to be repeated.
   void ProcessFMUL(bool repeat_transition,
diff --git a/chromecast/media/base/slew_volume_unittests.cc b/chromecast/media/base/slew_volume_unittests.cc
index 00c9e9a3..0c3bbb2 100644
--- a/chromecast/media/base/slew_volume_unittests.cc
+++ b/chromecast/media/base/slew_volume_unittests.cc
@@ -121,9 +121,7 @@
   }
 
   void ClearInterrupted() {
-    float throwaway
-        __attribute__((__aligned__(::media::AudioBus::kChannelAlignment))) =
-            0.0f;
+    float throwaway __attribute__((__aligned__(16))) = 0.0f;
     slew_volume_->ProcessFMUL(false, &throwaway, 1, 1, &throwaway);
   }
 
diff --git a/chromecast/media/cma/backend/mixer/stream_mixer.cc b/chromecast/media/cma/backend/mixer/stream_mixer.cc
index 0bd8e7a..5b70cad 100644
--- a/chromecast/media/cma/backend/mixer/stream_mixer.cc
+++ b/chromecast/media/cma/backend/mixer/stream_mixer.cc
@@ -46,7 +46,6 @@
 #include "chromecast/media/cma/backend/volume_map.h"
 #include "chromecast/public/media/mixer_output_stream.h"
 #include "media/audio/audio_device_description.h"
-#include "media/base/vector_math.h"
 
 #define RUN_ON_MIXER_THREAD(method, ...)                                   \
   do {                                                                     \
@@ -79,11 +78,6 @@
 const int kMediaUnduckFadeMs = 700;
 const int kDefaultFilterFrameAlignment = 64;
 
-// `kDefaultFilterFrameAlignment` must be as big as `kRequirementAlignment`, to
-// use vector_math helpers.
-static_assert(kDefaultFilterFrameAlignment >=
-              ::media::vector_math::kRequiredAlignment);
-
 constexpr base::TimeDelta kMixerThreadCheckTimeout = base::Seconds(10);
 constexpr base::TimeDelta kHealthCheckInterval = base::Seconds(5);
 
diff --git a/chromecast/net/io_buffer_pool.cc b/chromecast/net/io_buffer_pool.cc
index 886652d..47323eb 100644
--- a/chromecast/net/io_buffer_pool.cc
+++ b/chromecast/net/io_buffer_pool.cc
@@ -71,10 +71,7 @@
   class Wrapper;
   union Storage;
 
-  // Some of these buffers are passed to optimized SIMD audio algorithms and
-  // have a minimum alignment requirement of
-  // ::media::vector_math::kRequiredAlignment.
-  static constexpr size_t kAlignment = 32;
+  static constexpr size_t kAlignment = 16;
 
   static Storage* AllocateStorageUnionAndDataArea(size_t data_area_size);
   static char* DataAreaFromStorageUnion(Storage* ptr);
diff --git a/media/audio/mac/audio_loopback_input_mac_unittest.mm b/media/audio/mac/audio_loopback_input_mac_unittest.mm
index 75d2c39..2f63d48 100644
--- a/media/audio/mac/audio_loopback_input_mac_unittest.mm
+++ b/media/audio/mac/audio_loopback_input_mac_unittest.mm
@@ -8,6 +8,7 @@
 #endif
 
 #include "media/audio/mac/audio_loopback_input_mac.h"
+#include "media/audio/mac/audio_loopback_input_mac_impl.h"
 
 #include <ScreenCaptureKit/ScreenCaptureKit.h>
 
@@ -22,8 +23,6 @@
 #include "base/task/single_thread_task_runner.h"
 #include "base/test/task_environment.h"
 #include "media/audio/audio_io.h"
-#include "media/audio/mac/audio_loopback_input_mac_impl.h"
-#include "media/base/audio_bus.h"
 #include "media/base/audio_parameters.h"
 #include "media/base/limits.h"
 #include "media/base/mac/audio_latency_mac.h"
@@ -516,9 +515,8 @@
     FakeAudioInputCallback sink;
     stream->Start(&sink);
 
-    // Buffer must be 32-bit aligned.
-    alignas(AudioBus::kChannelAlignment) std::array<float, 2 * kFramesPerBuffer>
-        buffer;
+    // Buffer must be 16-bit aligned.
+    alignas(16) std::array<float, 2 * kFramesPerBuffer> buffer;
     for (size_t i = 0; i < buffer.size(); i++) {
       buffer[i] = i;
     }
diff --git a/media/base/audio_buffer.cc b/media/base/audio_buffer.cc
index 260e15f6..3589031 100644
--- a/media/base/audio_buffer.cc
+++ b/media/base/audio_buffer.cc
@@ -9,7 +9,6 @@
 
 #include "media/base/audio_buffer.h"
 
-#include <algorithm>
 #include <cmath>
 
 #include "base/bits.h"
@@ -39,12 +38,16 @@
  public:
   explicit SelfOwnedMemory(size_t size)
       : heap_array_(
-            base::AlignedUninit<uint8_t>(size, AudioBus::kChannelAlignment)) {
+            base::HeapArray<uint8_t, base::AlignedFreeDeleter>::
+                FromOwningPointer(
+                    static_cast<uint8_t*>(
+                        base::AlignedAlloc(size, AudioBus::kChannelAlignment)),
+                    size)) {
     span_ = heap_array_.as_span();
   }
 
  private:
-  base::AlignedHeapArray<uint8_t> heap_array_;
+  base::HeapArray<uint8_t, base::AlignedFreeDeleter> heap_array_;
 };
 
 std::unique_ptr<AudioBuffer::ExternalMemory> AllocateMemory(size_t size) {
@@ -59,16 +62,18 @@
                             sample_rate);
 }
 
-AudioBufferMemoryPool::AudioBufferMemoryPool() = default;
+AudioBufferMemoryPool::AudioBufferMemoryPool(int alignment)
+    : alignment_(alignment) {}
 AudioBufferMemoryPool::~AudioBufferMemoryPool() = default;
 
 AudioBufferMemoryPool::ExternalMemoryFromPool::ExternalMemoryFromPool(
     ExternalMemoryFromPool&& am) = default;
 AudioBufferMemoryPool::ExternalMemoryFromPool::ExternalMemoryFromPool(
     scoped_refptr<AudioBufferMemoryPool> pool,
-    base::AlignedHeapArray<uint8_t> memory)
+    std::unique_ptr<uint8_t, base::AlignedFreeDeleter> memory,
+    size_t size)
     : memory_(std::move(memory)), pool_(pool) {
-  span_ = memory_.as_span();
+  span_ = {memory_.get(), size};
 }
 
 AudioBufferMemoryPool::ExternalMemoryFromPool::~ExternalMemoryFromPool() {
@@ -101,10 +106,11 @@
 
   // FFmpeg may not always initialize the entire output memory, so just like
   // for VideoFrames we need to zero out the memory. https://crbug.com/1144070.
-  auto memory = base::AlignedUninit<uint8_t>(size, AudioBus::kChannelAlignment);
-  std::ranges::fill(memory, 0);
+  auto memory = std::unique_ptr<uint8_t, base::AlignedFreeDeleter>(
+      static_cast<uint8_t*>(base::AlignedAlloc(size, GetChannelAlignment())));
+  memset(memory.get(), 0, size);
   return std::make_unique<ExternalMemoryFromPool>(
-      ExternalMemoryFromPool(this, std::move(memory)));
+      ExternalMemoryFromPool(this, std::move(memory), size));
 }
 
 void AudioBufferMemoryPool::ReturnBuffer(ExternalMemoryFromPool memory) {
@@ -142,8 +148,10 @@
   DCHECK(channel_layout == CHANNEL_LAYOUT_DISCRETE ||
          ChannelLayoutToChannelCount(channel_layout) == channel_count);
 
-  const size_t bytes_per_channel = SampleFormatToBytesPerChannel(sample_format);
-  CHECK_LE(bytes_per_channel, AudioBus::kChannelAlignment);
+  const int bytes_per_channel = SampleFormatToBytesPerChannel(sample_format);
+  const int channel_alignment =
+      pool_ ? pool_->GetChannelAlignment() : AudioBus::kChannelAlignment;
+  CHECK_LE(bytes_per_channel, channel_alignment);
 
   // Empty buffer?
   if (!create_buffer) {
@@ -176,13 +184,13 @@
     std::ranges::fill(needs_zeroing, 0u);
     return;
   }
-  size_t data_size_per_channel = frame_count * bytes_per_channel;
+  int data_size_per_channel = frame_count * bytes_per_channel;
   if (IsPlanar(sample_format)) {
     DCHECK(!IsBitstreamFormat()) << sample_format_;
     // Planar data, so need to allocate buffer for each channel.
     // Determine per channel data size, taking into account alignment.
-    size_t block_size_per_channel =
-        base::bits::AlignUp(data_size_per_channel, AudioBus::kChannelAlignment);
+    int block_size_per_channel = base::bits::AlignUpDeprecatedDoNotUse(
+        data_size_per_channel, channel_alignment);
     DCHECK_GE(block_size_per_channel, data_size_per_channel);
 
     // Allocate a contiguous buffer for all the channel data.
diff --git a/media/base/audio_buffer.h b/media/base/audio_buffer.h
index 16f03f2..b5d90c5 100644
--- a/media/base/audio_buffer.h
+++ b/media/base/audio_buffer.h
@@ -311,20 +311,23 @@
  public:
   REQUIRE_ADOPTION_FOR_REFCOUNTED_TYPE();
 
-  AudioBufferMemoryPool();
+  explicit AudioBufferMemoryPool(int alignment = AudioBus::kChannelAlignment);
   AudioBufferMemoryPool(const AudioBufferMemoryPool&) = delete;
   AudioBufferMemoryPool& operator=(const AudioBufferMemoryPool&) = delete;
 
   size_t GetPoolSizeForTesting();
+  int GetChannelAlignment() { return alignment_; }
 
   struct ExternalMemoryFromPool : public AudioBuffer::ExternalMemory {
    public:
-    ExternalMemoryFromPool(scoped_refptr<AudioBufferMemoryPool> pool,
-                           base::AlignedHeapArray<uint8_t> memory);
+    ExternalMemoryFromPool(
+        scoped_refptr<AudioBufferMemoryPool> pool,
+        std::unique_ptr<uint8_t, base::AlignedFreeDeleter> memory,
+        size_t size);
     ExternalMemoryFromPool(ExternalMemoryFromPool&&);
     ~ExternalMemoryFromPool() override;
 
-    base::AlignedHeapArray<uint8_t> memory_;
+    std::unique_ptr<uint8_t, base::AlignedFreeDeleter> memory_;
     scoped_refptr<AudioBufferMemoryPool> pool_;
   };
 
@@ -337,6 +340,7 @@
   std::unique_ptr<ExternalMemoryFromPool> CreateBuffer(size_t size);
   void ReturnBuffer(ExternalMemoryFromPool memory);
 
+  const int alignment_;
   base::Lock entry_lock_;
   std::list<ExternalMemoryFromPool> entries_ GUARDED_BY(entry_lock_);
 };
diff --git a/media/base/audio_buffer_unittest.cc b/media/base/audio_buffer_unittest.cc
index d45fd35e..1eddbc8f 100644
--- a/media/base/audio_buffer_unittest.cc
+++ b/media/base/audio_buffer_unittest.cc
@@ -14,7 +14,6 @@
 #include <limits>
 #include <memory>
 
-#include "base/memory/aligned_memory.h"
 #include "base/memory/scoped_refptr.h"
 #include "base/test/gtest_util.h"
 #include "base/time/time.h"
@@ -827,18 +826,19 @@
 
 // Test that the channels are aligned according to the pool parameter.
 TEST(AudioBufferTest, AudioBufferMemoryPoolAlignment) {
+  const int kAlignment = 512;
   const ChannelLayout kChannelLayout = CHANNEL_LAYOUT_6_1;
   const size_t kChannelCount = ChannelLayoutToChannelCount(kChannelLayout);
 
-  auto pool = base::MakeRefCounted<AudioBufferMemoryPool>();
+  auto pool = base::MakeRefCounted<AudioBufferMemoryPool>(kAlignment);
   scoped_refptr<AudioBuffer> buffer =
       AudioBuffer::CreateBuffer(kSampleFormatPlanarU8, kChannelLayout,
                                 kChannelCount, kSampleRate, kSampleRate, pool);
 
   ASSERT_EQ(kChannelCount, buffer->channel_data().size());
   for (size_t i = 0; i < kChannelCount; i++) {
-    EXPECT_TRUE(
-        base::IsAligned(buffer->channel_data()[i], AudioBus::kChannelAlignment))
+    EXPECT_EQ(
+        0u, reinterpret_cast<uintptr_t>(buffer->channel_data()[i]) % kAlignment)
         << " channel: " << i;
   }
 
@@ -857,8 +857,8 @@
 
   ASSERT_EQ(kChannelCount, buffer->channel_data().size());
   for (size_t i = 0; i < kChannelCount; i++) {
-    EXPECT_TRUE(
-        base::IsAligned(buffer->channel_data()[i], AudioBus::kChannelAlignment))
+    EXPECT_EQ(0u, reinterpret_cast<uintptr_t>(buffer->channel_data()[i]) %
+                      AudioBus::kChannelAlignment)
         << " channel: " << i;
   }
 }
diff --git a/media/base/audio_bus.h b/media/base/audio_bus.h
index 17a34b3b..4649d22 100644
--- a/media/base/audio_bus.h
+++ b/media/base/audio_bus.h
@@ -38,9 +38,9 @@
   using ConstChannel = base::span<const float>;
   using ChannelVector = std::vector<Channel>;
 
-  // Guaranteed alignment of each channel's data; use 32-byte alignment for easy
+  // Guaranteed alignment of each channel's data; use 16-byte alignment for easy
   // SSE optimizations.
-  static constexpr size_t kChannelAlignment = 32;
+  static constexpr size_t kChannelAlignment = 16;
 
   // Creates a new AudioBus and allocates |channels| of length |frames|.  Uses
   // channels() and frames_per_buffer() from AudioParameters if given.
diff --git a/media/base/audio_parameters.h b/media/base/audio_parameters.h
index e7f5358..5e9aeea 100644
--- a/media/base/audio_parameters.h
+++ b/media/base/audio_parameters.h
@@ -34,7 +34,7 @@
 #pragma warning(push)
 #pragma warning(disable : 4324)  // Disable warning for added padding.
 #endif
-inline constexpr int kParametersAlignment = 32;
+constexpr int kParametersAlignment = 16;
 
 // ****WARNING****: Do not change the field types or ordering of these fields
 // without checking that alignment is correct. The structs may be concurrently
diff --git a/media/base/vector_math.cc b/media/base/vector_math.cc
index fdf3b5d..cdbdff7 100644
--- a/media/base/vector_math.cc
+++ b/media/base/vector_math.cc
@@ -44,8 +44,8 @@
     return;
   }
   CHECK_LE(src.size(), dest.size());
-  CHECK(base::IsAligned(src.data(), kRequiredAlignment));
-  CHECK(base::IsAligned(dest.data(), kRequiredAlignment));
+  DCHECK(base::IsAligned(src.data(), kRequiredAlignment));
+  DCHECK(base::IsAligned(dest.data(), kRequiredAlignment));
   static const auto fmac_func = [] {
 #if defined(ARCH_CPU_X86_FAMILY) && !BUILDFLAG(IS_NACL)
     base::CPU cpu;
@@ -73,8 +73,8 @@
     return;
   }
   CHECK_LE(src.size(), dest.size());
-  CHECK(base::IsAligned(src.data(), kRequiredAlignment));
-  CHECK(base::IsAligned(dest.data(), kRequiredAlignment));
+  DCHECK(base::IsAligned(src.data(), kRequiredAlignment));
+  DCHECK(base::IsAligned(dest.data(), kRequiredAlignment));
   static const auto fmul_func = [] {
 #if defined(ARCH_CPU_X86_FAMILY) && !BUILDFLAG(IS_NACL)
     base::CPU cpu;
@@ -135,7 +135,7 @@
 std::pair<float, float> EWMAAndMaxPower(float initial_value,
                                         base::span<const float> src,
                                         float smoothing_factor) {
-  CHECK(base::IsAligned(src.data(), kRequiredAlignment));
+  DCHECK(base::IsAligned(src.data(), kRequiredAlignment));
   static const auto ewma_and_max_power_func = [] {
 #if defined(ARCH_CPU_X86_FAMILY) && !BUILDFLAG(IS_NACL)
     base::CPU cpu;
@@ -171,9 +171,6 @@
 
 #if defined(ARCH_CPU_X86_FAMILY) && !BUILDFLAG(IS_NACL)
 void FMUL_SSE(const float src[], float scale, int len, float dest[]) {
-  DCHECK(base::IsAligned(src, kRequiredAlignment));
-  DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
   const int rem = len % 4;
   const int last_index = len - rem;
   __m128 m_scale = _mm_set_ps1(scale);
@@ -191,14 +188,37 @@
                                                float scale,
                                                int len,
                                                float dest[]) {
-  DCHECK(base::IsAligned(src, kRequiredAlignment));
-  DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
   const int rem = len % 8;
   const int last_index = len - rem;
   __m256 m_scale = _mm256_set1_ps(scale);
-  for (int i = 0; i < last_index; i += 8) {
-    _mm256_store_ps(dest + i, _mm256_mul_ps(_mm256_load_ps(src + i), m_scale));
+  // TODO(crbug.com/40756517): Remove below alignment conditionals when AudioBus
+  // |kChannelAlignment| updated to 32.
+  bool aligned_src = (reinterpret_cast<uintptr_t>(src) & 0x1F) == 0;
+  bool aligned_dest = (reinterpret_cast<uintptr_t>(dest) & 0x1F) == 0;
+  if (aligned_src) {
+    if (aligned_dest) {
+      for (int i = 0; i < last_index; i += 8) {
+        _mm256_store_ps(dest + i,
+                        _mm256_mul_ps(_mm256_load_ps(src + i), m_scale));
+      }
+    } else {
+      for (int i = 0; i < last_index; i += 8) {
+        _mm256_storeu_ps(dest + i,
+                         _mm256_mul_ps(_mm256_load_ps(src + i), m_scale));
+      }
+    }
+  } else {
+    if (aligned_dest) {
+      for (int i = 0; i < last_index; i += 8) {
+        _mm256_store_ps(dest + i,
+                        _mm256_mul_ps(_mm256_loadu_ps(src + i), m_scale));
+      }
+    } else {
+      for (int i = 0; i < last_index; i += 8) {
+        _mm256_storeu_ps(dest + i,
+                         _mm256_mul_ps(_mm256_loadu_ps(src + i), m_scale));
+      }
+    }
   }
 
   // Handle any remaining values that wouldn't fit in an SSE pass.
@@ -208,9 +228,6 @@
 }
 
 void FMAC_SSE(const float src[], float scale, int len, float dest[]) {
-  DCHECK(base::IsAligned(src, kRequiredAlignment));
-  DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
   const int rem = len % 4;
   const int last_index = len - rem;
   __m128 m_scale = _mm_set_ps1(scale);
@@ -233,9 +250,38 @@
   const int rem = len % 8;
   const int last_index = len - rem;
   __m256 m_scale = _mm256_set1_ps(scale);
-  for (int i = 0; i < last_index; i += 8) {
-    _mm256_store_ps(dest + i, _mm256_fmadd_ps(_mm256_load_ps(src + i), m_scale,
-                                              _mm256_load_ps(dest + i)));
+  // TODO(crbug.com/40756517): Remove below alignment conditionals when AudioBus
+  // |kChannelAlignment| updated to 32.
+  bool aligned_src = (reinterpret_cast<uintptr_t>(src) & 0x1F) == 0;
+  bool aligned_dest = (reinterpret_cast<uintptr_t>(dest) & 0x1F) == 0;
+  if (aligned_src) {
+    if (aligned_dest) {
+      for (int i = 0; i < last_index; i += 8) {
+        _mm256_store_ps(dest + i,
+                        _mm256_fmadd_ps(_mm256_load_ps(src + i), m_scale,
+                                        _mm256_load_ps(dest + i)));
+      }
+    } else {
+      for (int i = 0; i < last_index; i += 8) {
+        _mm256_storeu_ps(dest + i,
+                         _mm256_fmadd_ps(_mm256_load_ps(src + i), m_scale,
+                                         _mm256_loadu_ps(dest + i)));
+      }
+    }
+  } else {
+    if (aligned_dest) {
+      for (int i = 0; i < last_index; i += 8) {
+        _mm256_store_ps(dest + i,
+                        _mm256_fmadd_ps(_mm256_loadu_ps(src + i), m_scale,
+                                        _mm256_load_ps(dest + i)));
+      }
+    } else {
+      for (int i = 0; i < last_index; i += 8) {
+        _mm256_storeu_ps(dest + i,
+                         _mm256_fmadd_ps(_mm256_loadu_ps(src + i), m_scale,
+                                         _mm256_loadu_ps(dest + i)));
+      }
+    }
   }
 
   // Handle any remaining values that wouldn't fit in an SSE pass.
@@ -244,9 +290,6 @@
   }
 }
 void FCLAMP_SSE(const float src[], int len, float dest[]) {
-  DCHECK(base::IsAligned(src, kRequiredAlignment));
-  DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
   const int rem = len % 4;
   const int last_index = len - rem;
   const __m128 m_min = _mm_set_ps1(kClampMin);
@@ -271,27 +314,63 @@
   }
 }
 
+inline __attribute__((target("avx"))) __m256 SanitizeNan(const __m256 values) {
+  // Compare each value with itself. Since NaN != NaN, we end up with a mask
+  // with 0s instead of NaNs, and 1s for the original values.
+  const __m256 valid_mask = _mm256_cmp_ps(values, values, _CMP_EQ_OQ);
+
+  // Zero-out all NaNs by applying the mask with a logical AND.
+  return _mm256_and_ps(valid_mask, values);
+}
+
 __attribute__((target("avx"))) void FCLAMP_AVX(const float src[],
                                                int len,
                                                float dest[]) {
-  DCHECK(base::IsAligned(src, kRequiredAlignment));
-  DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
   const int rem = len % 8;
   const int last_index = len - rem;
   const __m256 m_max = _mm256_set1_ps(kClampMax);
   const __m256 m_min = _mm256_set1_ps(kClampMin);
 
-  for (int i = 0; i < last_index; i += 8) {
-    const __m256 values = _mm256_load_ps(src + i);
-    // Compare each value with itself. Since NaN != NaN, we end up with a mask
-    // with 0s instead of NaNs, and 1s for the original values.
-    const __m256 comparisons = _mm256_cmp_ps(values, values, _CMP_EQ_OQ);
-    // Zero-out all NaNs by applying the mask with a logical AND.
-    const __m256 sanitized_values = _mm256_and_ps(comparisons, values);
-
-    _mm256_store_ps(
-        dest + i, _mm256_max_ps(_mm256_min_ps(sanitized_values, m_max), m_min));
+  // TODO(crbug.com/40756517): Remove below alignment conditionals when AudioBus
+  // |kChannelAlignment| updated to 32.
+  bool aligned_src = (reinterpret_cast<uintptr_t>(src) & 0x1F) == 0;
+  bool aligned_dest = (reinterpret_cast<uintptr_t>(dest) & 0x1F) == 0;
+  if (aligned_src) {
+    if (aligned_dest) {
+      for (int i = 0; i < last_index; i += 8) {
+        _mm256_store_ps(
+            dest + i,
+            _mm256_max_ps(
+                _mm256_min_ps(SanitizeNan(_mm256_load_ps(src + i)), m_max),
+                m_min));
+      }
+    } else {
+      for (int i = 0; i < last_index; i += 8) {
+        _mm256_storeu_ps(
+            dest + i,
+            _mm256_max_ps(
+                _mm256_min_ps(SanitizeNan(_mm256_load_ps(src + i)), m_max),
+                m_min));
+      }
+    }
+  } else {
+    if (aligned_dest) {
+      for (int i = 0; i < last_index; i += 8) {
+        _mm256_store_ps(
+            dest + i,
+            _mm256_max_ps(
+                _mm256_min_ps(SanitizeNan(_mm256_loadu_ps(src + i)), m_max),
+                m_min));
+      }
+    } else {
+      for (int i = 0; i < last_index; i += 8) {
+        _mm256_storeu_ps(
+            dest + i,
+            _mm256_max_ps(
+                _mm256_min_ps(SanitizeNan(_mm256_loadu_ps(src + i)), m_max),
+                m_min));
+      }
+    }
   }
 
   // Handle any remaining values that wouldn't fit in an AVX2 pass.
@@ -325,7 +404,6 @@
   // Thus, the strategy here is to compute z[n], z[n-1], z[n-2], and z[n-3] in
   // each of the 4 lanes, and then combine them to give y[n].
 
-  DCHECK(base::IsAligned(src, kRequiredAlignment));
   const int rem = len % 4;
   const int last_index = len - rem;
 
@@ -386,8 +464,6 @@
                      const float src[],
                      int len,
                      float smoothing_factor) {
-  DCHECK(base::IsAligned(src, kRequiredAlignment));
-
   const int rem = len % 8;
   const int last_index = len - rem;
   const float weight_prev = 1.0f - smoothing_factor;
@@ -459,9 +535,6 @@
 
 #if defined(ARCH_CPU_ARM_FAMILY) && defined(USE_NEON)
 void FMAC_NEON(const float src[], float scale, int len, float dest[]) {
-  DCHECK(base::IsAligned(src, kRequiredAlignment));
-  DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
   const int rem = len % 4;
   const int last_index = len - rem;
   float32x4_t m_scale = vmovq_n_f32(scale);
@@ -477,9 +550,6 @@
 }
 
 void FMUL_NEON(const float src[], float scale, int len, float dest[]) {
-  DCHECK(base::IsAligned(src, kRequiredAlignment));
-  DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
   const int rem = len % 4;
   const int last_index = len - rem;
   float32x4_t m_scale = vmovq_n_f32(scale);
@@ -494,9 +564,6 @@
 }
 
 void FCLAMP_NEON(const float src[], int len, float dest[]) {
-  DCHECK(base::IsAligned(src, kRequiredAlignment));
-  DCHECK(base::IsAligned(dest, kRequiredAlignment));
-
   const int rem = len % 4;
   const int last_index = len - rem;
   const float32x4_t m_min = vmovq_n_f32(kClampMin);
@@ -528,8 +595,6 @@
                                              const float src[],
                                              int len,
                                              float smoothing_factor) {
-  DCHECK(base::IsAligned(src, kRequiredAlignment));
-
   // When the recurrence is unrolled, we see that we can split it into 4
   // separate lanes of evaluation:
   //
diff --git a/media/base/vector_math.h b/media/base/vector_math.h
index d2a8dad..0fab357 100644
--- a/media/base/vector_math.h
+++ b/media/base/vector_math.h
@@ -13,7 +13,7 @@
 namespace media::vector_math {
 
 // Required alignment for inputs and outputs to all vector math functions
-inline constexpr size_t kRequiredAlignment = 32;
+enum { kRequiredAlignment = 16 };
 
 // Multiply each element of `src` by `scale` and add to `dest`.
 // `src` and `dest` must be aligned by `kRequiredAlignment`.
diff --git a/media/ffmpeg/ffmpeg_common.h b/media/ffmpeg/ffmpeg_common.h
index 0dd3b23f..0225b37 100644
--- a/media/ffmpeg/ffmpeg_common.h
+++ b/media/ffmpeg/ffmpeg_common.h
@@ -49,7 +49,11 @@
 
 // Alignment requirement by FFmpeg for input and output buffers. This need to
 // be updated to match FFmpeg when it changes.
+#if defined(ARCH_CPU_ARM_FAMILY)
+constexpr inline int kFFmpegBufferAddressAlignment = 16;
+#else
 constexpr inline int kFFmpegBufferAddressAlignment = 32;
+#endif
 
 class AudioDecoderConfig;
 class VideoDecoderConfig;
diff --git a/media/filters/ffmpeg_audio_decoder.cc b/media/filters/ffmpeg_audio_decoder.cc
index 76a6f12..f6b986f 100644
--- a/media/filters/ffmpeg_audio_decoder.cc
+++ b/media/filters/ffmpeg_audio_decoder.cc
@@ -55,10 +55,6 @@
     static_cast<AudioBuffer*>(opaque)->Release();
 }
 
-// AudioBufferMemoryPool uses `AudioBus::kChannelAlignment` under the hood,
-// which must be aligned to at least `kFFmpegBufferAddressAlignment`.
-static_assert(kFFmpegBufferAddressAlignment == AudioBus::kChannelAlignment);
-
 FFmpegAudioDecoder::FFmpegAudioDecoder(
     const scoped_refptr<base::SequencedTaskRunner>& task_runner,
     MediaLog* media_log)
@@ -66,7 +62,8 @@
       state_(DecoderState::kUninitialized),
       av_sample_format_(0),
       media_log_(media_log),
-      pool_(base::MakeRefCounted<AudioBufferMemoryPool>()) {
+      pool_(base::MakeRefCounted<AudioBufferMemoryPool>(
+          kFFmpegBufferAddressAlignment)) {
   DETACH_FROM_SEQUENCE(sequence_checker_);
 }
 
diff --git a/third_party/blink/renderer/platform/audio/audio_array.h b/third_party/blink/renderer/platform/audio/audio_array.h
index d7965b8..e7d13b3 100644
--- a/third_party/blink/renderer/platform/audio/audio_array.h
+++ b/third_party/blink/renderer/platform/audio/audio_array.h
@@ -67,15 +67,18 @@
     CHECK_LE(n, std::numeric_limits<unsigned>::max() / sizeof(T));
     uint32_t initial_size = static_cast<uint32_t>(sizeof(T) * n);
 
+    // Minimmum alignment requirements for arrays so that we can use
+    // SIMD.
+#if defined(ARCH_CPU_X86_FAMILY)
+    const unsigned kAlignment = 32;
+#else
+    const unsigned kAlignment = 16;
+#endif
+
     if (allocation_) {
       WTF::Partitions::FastFree(allocation_);
     }
 
-    // Minimum alignment requirements for arrays so that we can use SIMD.
-    // This value matches media::AudioBus::kChannelAlignment, for ease of
-    // interop with media::AudioBus.
-    static constexpr unsigned kAlignment = 32;
-
     // Always allocate extra space so that we are guaranteed to get
     // the desired alignment.  Some memory is wasted, but it should be
     // small since most arrays are probably at least 128 floats (or