media/audio/win/audio_low_latency_input_win.cc - chromium/src - Git at Google

 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 #include "media/audio/win/audio_low_latency_input_win.h"

 #include <audiopolicy.h>
 #include <mediaobj.h>
 #include <objbase.h>
 #include <uuids.h>
 #include <wmcodecdsp.h>

 #include <algorithm>
 #include <cmath>
 #include <memory>

 #include "base/logging.h"
 #include "base/metrics/histogram_functions.h"
 #include "base/metrics/histogram_macros.h"
 #include "base/strings/stringprintf.h"
 #include "base/strings/utf_string_conversions.h"
 #include "base/trace_event/trace_event.h"
 #include "media/audio/audio_device_description.h"
 #include "media/audio/audio_features.h"
 #include "media/audio/win/avrt_wrapper_win.h"
 #include "media/audio/win/core_audio_util_win.h"
 #include "media/base/audio_block_fifo.h"
 #include "media/base/audio_bus.h"
 #include "media/base/audio_timestamp_helper.h"
 #include "media/base/channel_layout.h"
 #include "media/base/limits.h"

 using base::win::ScopedCOMInitializer;

 namespace media {

 namespace {

 // Errors when initializing the audio client related to the audio format. Split
 // by whether we're using format conversion or not. Used for reporting stats -
 // do not renumber entries.
 enum FormatRelatedInitError {
   kUnsupportedFormat = 0,
   kUnsupportedFormatWithFormatConversion = 1,
   kInvalidArgument = 2,
   kInvalidArgumentWithFormatConversion = 3,
   kCount
 };

 bool IsSupportedFormatForConversion(const WAVEFORMATEX& format) {
   if (format.nSamplesPerSec < limits::kMinSampleRate ||
       format.nSamplesPerSec > limits::kMaxSampleRate) {
     return false;
   }

   switch (format.wBitsPerSample) {
     case 8:
     case 16:
     case 32:
       break;
     default:
       return false;
   }

   if (GuessChannelLayout(format.nChannels) == CHANNEL_LAYOUT_UNSUPPORTED) {
     LOG(ERROR) << "Hardware configuration not supported for audio conversion";
     return false;
   }

   return true;
 }

 // Returns the index of the device in the device collection, or -1 for the
 // default device, as used by the voice processing DMO.
 base::Optional<WORD> GetAudioDeviceCollectionIndexFromId(
     const std::string& device_id,
     const EDataFlow data_flow) {
   // The default device is specified with -1.
   if (AudioDeviceDescription::IsDefaultDevice(device_id))
     return -1;

   WORD device_index = -1;
   HRESULT hr = E_FAIL;
   // The default communications does not have an index itself, so we need to
   // find the index for the underlying device.
   if (AudioDeviceDescription::IsCommunicationsDevice(device_id)) {
     const std::string communications_id =
         (data_flow == eCapture)
             ? CoreAudioUtil::GetCommunicationsInputDeviceID()
             : CoreAudioUtil::GetCommunicationsOutputDeviceID();
     hr = CoreAudioUtil::GetDeviceCollectionIndex(communications_id, data_flow,
                                                  &device_index);
   } else {
     // Otherwise, just look for the device_id directly.
     hr = CoreAudioUtil::GetDeviceCollectionIndex(device_id, data_flow,
                                                  &device_index);
   }

   if (FAILED(hr) || hr == S_FALSE)
     return base::nullopt;

   return device_index;
 }

 // Implementation of IMediaBuffer, as required for
 // IMediaObject::ProcessOutput(). After consuming data provided by
 // ProcessOutput(), call SetLength() to update the buffer availability.
 // Example implementation:
 // http://msdn.microsoft.com/en-us/library/dd376684(v=vs.85).aspx
 class MediaBufferImpl : public IMediaBuffer {
  public:
   explicit MediaBufferImpl(DWORD max_length)
       : data_(new BYTE[max_length]), max_length_(max_length) {}

   // IMediaBuffer implementation.
   STDMETHOD(GetBufferAndLength)(BYTE** buffer, DWORD* length) {
     if (!buffer || !length)
       return E_POINTER;

     *buffer = data_.get();
     *length = length_;
     return S_OK;
   }

   STDMETHOD(GetMaxLength)(DWORD* max_length) {
     if (!max_length)
       return E_POINTER;

     *max_length = max_length_;
     return S_OK;
   }

   STDMETHOD(SetLength)(DWORD length) {
     if (length > max_length_)
       return E_INVALIDARG;

     length_ = length;
     return S_OK;
   }

   // IUnknown implementation.
   STDMETHOD_(ULONG, AddRef)() { return InterlockedIncrement(&ref_count_); }

   STDMETHOD(QueryInterface)(REFIID riid, void** object) {
     if (!object)
       return E_POINTER;
     if (riid != IID_IMediaBuffer && riid != IID_IUnknown)
       return E_NOINTERFACE;

     *object = static_cast<IMediaBuffer*>(this);
     AddRef();
     return S_OK;
   }

   STDMETHOD_(ULONG, Release)() {
     LONG ref_count = InterlockedDecrement(&ref_count_);
     if (ref_count == 0)
       delete this;

     return ref_count;
   }

  private:
   virtual ~MediaBufferImpl() {}

   std::unique_ptr<BYTE[]> data_;
   DWORD length_ = 0;
   const DWORD max_length_;
   LONG ref_count_ = 0;
 };

 }  // namespace

 WASAPIAudioInputStream::WASAPIAudioInputStream(
     AudioManagerWin* manager,
     const AudioParameters& params,
     const std::string& device_id,
     const AudioManager::LogCallback& log_callback,
     AudioManagerBase::VoiceProcessingMode voice_processing_mode)
     : manager_(manager),
       device_id_(device_id),
       output_device_id_for_aec_(AudioDeviceDescription::kDefaultDeviceId),
       log_callback_(log_callback),
       use_voice_processing_(voice_processing_mode ==
                             AudioManagerBase::VoiceProcessingMode::kEnabled) {
   DCHECK(manager_);
   DCHECK(!device_id_.empty());
   DCHECK(!log_callback_.is_null());

   DVLOG_IF(1, use_voice_processing_) << "Using Windows voice capture DSP DMO.";

   // Load the Avrt DLL if not already loaded. Required to support MMCSS.
   bool avrt_init = avrt::Initialize();
   DCHECK(avrt_init) << "Failed to load the Avrt.dll";

   const SampleFormat kSampleFormat = kSampleFormatS16;

   // Set up the desired output format specified by the client.
   output_format_.wFormatTag = WAVE_FORMAT_PCM;
   output_format_.nChannels = params.channels();
   output_format_.nSamplesPerSec = params.sample_rate();
   output_format_.wBitsPerSample = SampleFormatToBitsPerChannel(kSampleFormat);
   output_format_.nBlockAlign =
       (output_format_.wBitsPerSample / 8) * output_format_.nChannels;
   output_format_.nAvgBytesPerSec =
       output_format_.nSamplesPerSec * output_format_.nBlockAlign;
   output_format_.cbSize = 0;

   // Set the input (capture) format to the desired output format. In most cases,
   // it will be used unchanged.
   input_format_ = output_format_;

   // Size in bytes of each audio frame.
   frame_size_bytes_ = input_format_.nBlockAlign;

   // Store size of audio packets which we expect to get from the audio
   // endpoint device in each capture event.
   packet_size_bytes_ = params.GetBytesPerBuffer(kSampleFormat);
   packet_size_frames_ = packet_size_bytes_ / input_format_.nBlockAlign;
   DVLOG(1) << "Number of bytes per audio frame  : " << frame_size_bytes_;
   DVLOG(1) << "Number of audio frames per packet: " << packet_size_frames_;

   // All events are auto-reset events and non-signaled initially.

   // Create the event which the audio engine will signal each time
   // a buffer becomes ready to be processed by the client.
   audio_samples_ready_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
   DCHECK(audio_samples_ready_event_.IsValid());

   // Create the event which will be set in Stop() when capturing shall stop.
   stop_capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
   DCHECK(stop_capture_event_.IsValid());
 }

 WASAPIAudioInputStream::~WASAPIAudioInputStream() {
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
 }

 bool WASAPIAudioInputStream::Open() {
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
   DCHECK_EQ(OPEN_RESULT_OK, open_result_);

   // Verify that we are not already opened.
   if (opened_) {
     log_callback_.Run("WASAPIAIS::Open: already open");
     return false;
   }

   // Obtain a reference to the IMMDevice interface of the capturing
   // device with the specified unique identifier or role which was
   // set at construction.
   HRESULT hr = SetCaptureDevice();
   if (FAILED(hr)) {
     ReportOpenResult(hr);
     return false;
   }

   // If voice processing is enabled, initialize the DMO that is used for it. The
   // remainder of the function initializes an audio capture client (the normal
   // case). Either the DMO or the capture client is used.
   // TODO(grunell): Refactor out the audio capture client initialization to its
   // own function.
   if (use_voice_processing_) {
     opened_ = InitializeDmo();
     return opened_;
   }

   // Obtain an IAudioClient interface which enables us to create and initialize
   // an audio stream between an audio application and the audio engine.
   hr = endpoint_device_->Activate(__uuidof(IAudioClient), CLSCTX_INPROC_SERVER,
                                   NULL, &audio_client_);
   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_ACTIVATION_FAILED;
     ReportOpenResult(hr);
     return false;
   }

 #ifndef NDEBUG
   // Retrieve the stream format which the audio engine uses for its internal
   // processing/mixing of shared-mode streams. This function call is for
   // diagnostic purposes only and only in debug mode.
   hr = GetAudioEngineStreamFormat();
 #endif

   // Verify that the selected audio endpoint supports the specified format
   // set during construction.
   hr = S_OK;
   if (!DesiredFormatIsSupported(&hr)) {
     open_result_ = OPEN_RESULT_FORMAT_NOT_SUPPORTED;
     ReportOpenResult(hr);
     return false;
   }

   // Initialize the audio stream between the client and the device using
   // shared mode and a lowest possible glitch-free latency.
   hr = InitializeAudioEngine();
   if (SUCCEEDED(hr) && converter_)
     open_result_ = OPEN_RESULT_OK_WITH_RESAMPLING;
   ReportOpenResult(hr);  // Report before we assign a value to |opened_|.
   opened_ = SUCCEEDED(hr);

   return opened_;
 }

 void WASAPIAudioInputStream::Start(AudioInputCallback* callback) {
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
   DCHECK(callback);
   DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
   if (!opened_)
     return;

   if (started_)
     return;

   // TODO(grunell): Refactor the |use_voice_processing_| conditions in this
   // function to clean up the code.
   if (use_voice_processing_) {
     // Pre-fill render buffer with silence.
     if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
             audio_client_for_render_.Get(), audio_render_client_.Get())) {
       DLOG(WARNING) << "Failed to pre-fill render buffer with silence.";
     }
   } else {
     if (device_id_ == AudioDeviceDescription::kLoopbackWithMuteDeviceId &&
         system_audio_volume_) {
       BOOL muted = false;
       system_audio_volume_->GetMute(&muted);

       // If the system audio is muted at the time of capturing, then no need to
       // mute it again, and later we do not unmute system audio when stopping
       // capturing.
       if (!muted) {
         system_audio_volume_->SetMute(true, NULL);
         mute_done_ = true;
       }
     }
   }

   DCHECK(!sink_);
   sink_ = callback;

   // Starts periodic AGC microphone measurements if the AGC has been enabled
   // using SetAutomaticGainControl().
   StartAgc();

   // Create and start the thread that will drive the capturing by waiting for
   // capture events.
   DCHECK(!capture_thread_.get());
   capture_thread_.reset(new base::DelegateSimpleThread(
       this, "wasapi_capture_thread",
       base::SimpleThread::Options(base::ThreadPriority::REALTIME_AUDIO)));
   capture_thread_->Start();

   HRESULT hr = E_FAIL;
   if (use_voice_processing_) {
     hr = audio_client_for_render_->Start();
     if (FAILED(hr)) {
       DLOG(ERROR) << "Failed to start output streaming: " << std::hex << hr
                   << ", proceeding without rendering.";
     }
   } else {
     // Start streaming data between the endpoint buffer and the audio engine.
     hr = audio_client_->Start();
     if (FAILED(hr)) {
       DLOG(ERROR) << "Failed to start input streaming.";
       log_callback_.Run(base::StringPrintf(
           "WASAPIAIS::Start: Failed to start audio client, hresult = %#lx",
           hr));
     }

     if (SUCCEEDED(hr) && audio_render_client_for_loopback_.Get()) {
       hr = audio_render_client_for_loopback_->Start();
       if (FAILED(hr))
         log_callback_.Run(base::StringPrintf(
             "WASAPIAIS::Start: Failed to start render client for loopback, "
             "hresult = %#lx",
             hr));
     }
   }

   started_ = SUCCEEDED(hr);
 }

 void WASAPIAudioInputStream::Stop() {
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
   DVLOG(1) << "WASAPIAudioInputStream::Stop()";
   if (!started_)
     return;

   // We have muted system audio for capturing, so we need to unmute it when
   // capturing stops.
   if (device_id_ == AudioDeviceDescription::kLoopbackWithMuteDeviceId &&
       mute_done_) {
     DCHECK(system_audio_volume_);
     if (system_audio_volume_) {
       system_audio_volume_->SetMute(false, NULL);
       mute_done_ = false;
     }
   }

   // Stops periodic AGC microphone measurements.
   StopAgc();

   // Shut down the capture thread.
   if (stop_capture_event_.IsValid()) {
     SetEvent(stop_capture_event_.Get());
   }

   // TODO(grunell): Refactor the |use_voice_processing_| conditions in this
   // function to clean up the code.
   if (use_voice_processing_) {
     // Stop the render audio streaming. The input streaming needs no explicit
     // stopping.
     HRESULT hr = audio_client_for_render_->Stop();
     if (FAILED(hr)) {
       DLOG(ERROR) << "Failed to stop output streaming.";
     }
   } else {
     // Stop the input audio streaming.
     HRESULT hr = audio_client_->Stop();
     if (FAILED(hr)) {
       DLOG(ERROR) << "Failed to stop input streaming.";
     }
   }

   // Wait until the thread completes and perform cleanup.
   if (capture_thread_) {
     SetEvent(stop_capture_event_.Get());
     capture_thread_->Join();
     capture_thread_.reset();
   }

   if (use_voice_processing_) {
     HRESULT hr = voice_capture_dmo_->FreeStreamingResources();
     if (FAILED(hr))
       DLOG(ERROR) << "Failed to free dmo resources.";
   }

   started_ = false;
   sink_ = NULL;
 }

 void WASAPIAudioInputStream::Close() {
   DVLOG(1) << "WASAPIAudioInputStream::Close()";
   // It is valid to call Close() before calling open or Start().
   // It is also valid to call Close() after Start() has been called.
   Stop();

   if (converter_)
     converter_->RemoveInput(this);

   ReportAndResetGlitchStats();

   // Inform the audio manager that we have been closed. This will cause our
   // destruction.
   manager_->ReleaseInputStream(this);
 }

 double WASAPIAudioInputStream::GetMaxVolume() {
   // Verify that Open() has been called succesfully, to ensure that an audio
   // session exists and that an ISimpleAudioVolume interface has been created.
   DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
   if (!opened_)
     return 0.0;

   // The effective volume value is always in the range 0.0 to 1.0, hence
   // we can return a fixed value (=1.0) here.
   return 1.0;
 }

 void WASAPIAudioInputStream::SetVolume(double volume) {
   DVLOG(1) << "SetVolume(volume=" << volume << ")";
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
   DCHECK_GE(volume, 0.0);
   DCHECK_LE(volume, 1.0);

   DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
   if (!opened_)
     return;

   // Set a new master volume level. Valid volume levels are in the range
   // 0.0 to 1.0. Ignore volume-change events.
   HRESULT hr =
       simple_audio_volume_->SetMasterVolume(static_cast<float>(volume), NULL);
   if (FAILED(hr))
     DLOG(WARNING) << "Failed to set new input master volume.";

   // Update the AGC volume level based on the last setting above. Note that,
   // the volume-level resolution is not infinite and it is therefore not
   // possible to assume that the volume provided as input parameter can be
   // used directly. Instead, a new query to the audio hardware is required.
   // This method does nothing if AGC is disabled.
   UpdateAgcVolume();
 }

 double WASAPIAudioInputStream::GetVolume() {
   DCHECK(opened_) << "Open() has not been called successfully";
   if (!opened_)
     return 0.0;

   // Retrieve the current volume level. The value is in the range 0.0 to 1.0.
   float level = 0.0f;
   HRESULT hr = simple_audio_volume_->GetMasterVolume(&level);
   if (FAILED(hr))
     DLOG(WARNING) << "Failed to get input master volume.";

   return static_cast<double>(level);
 }

 bool WASAPIAudioInputStream::IsMuted() {
   DCHECK(opened_) << "Open() has not been called successfully";
   DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_);
   if (!opened_)
     return false;

   // Retrieves the current muting state for the audio session.
   BOOL is_muted = FALSE;
   HRESULT hr = simple_audio_volume_->GetMute(&is_muted);
   if (FAILED(hr))
     DLOG(WARNING) << "Failed to get input master volume.";

   return is_muted != FALSE;
 }

 void WASAPIAudioInputStream::SetOutputDeviceForAec(
     const std::string& output_device_id) {
   if (!use_voice_processing_)
     return;

   if (output_device_id == output_device_id_for_aec_)
     return;

   output_device_id_for_aec_ = output_device_id;

   // Set devices.
   Microsoft::WRL::ComPtr<IPropertyStore> ps;
   HRESULT hr = voice_capture_dmo_->QueryInterface(IID_IPropertyStore, &ps);
   if (FAILED(hr) || !ps) {
     log_callback_.Run(base::StringPrintf(
         "WASAPIAIS:SetOutputDeviceForAec: Getting DMO property store failed."));
     return;
   }

   if (!SetDmoDevices(ps.Get())) {
     log_callback_.Run(
         "WASAPIAIS:SetOutputDeviceForAec: Setting device indices failed.");
     return;
   }

   // Recreate the dummy render client on the new output.
   hr = audio_client_for_render_->Stop();
   if (FAILED(hr)) {
     DLOG(ERROR) << "Failed to stop output streaming.";
   }

   CreateDummyRenderClientsForDmo();

   if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
           audio_client_for_render_.Get(), audio_render_client_.Get())) {
     DLOG(WARNING) << "Failed to pre-fill render buffer with silence.";
   }

   hr = audio_client_for_render_->Start();
   if (FAILED(hr)) {
     DLOG(ERROR) << "Failed to start output streaming: " << std::hex << hr
                 << ", proceeding without rendering.";
   }

   log_callback_.Run(base::StringPrintf(
       "WASAPIAIS:SetOutputDeviceForAec: Successfully updated AEC output "
       "device to %s",
       output_device_id.c_str()));
 }

 void WASAPIAudioInputStream::Run() {
   ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);

   // Enable MMCSS to ensure that this thread receives prioritized access to
   // CPU resources.
   DWORD task_index = 0;
   HANDLE mm_task =
       avrt::AvSetMmThreadCharacteristics(L"Pro Audio", &task_index);
   bool mmcss_is_ok =
       (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
   if (!mmcss_is_ok) {
     // Failed to enable MMCSS on this thread. It is not fatal but can lead
     // to reduced QoS at high load.
     DWORD err = GetLastError();
     LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";
   }

   // Allocate a buffer with a size that enables us to take care of cases like:
   // 1) The recorded buffer size is smaller, or does not match exactly with,
   //    the selected packet size used in each callback.
   // 2) The selected buffer size is larger than the recorded buffer size in
   //    each event.
   // In the case where no resampling is required, a single buffer should be
   // enough but in case we get buffers that don't match exactly, we'll go with
   // two. Same applies if we need to resample and the buffer ratio is perfect.
   // However if the buffer ratio is imperfect, we will need 3 buffers to safely
   // be able to buffer up data in cases where a conversion requires two audio
   // buffers (and we need to be able to write to the third one).
   size_t capture_buffer_size =
       std::max(2 * endpoint_buffer_size_frames_ * frame_size_bytes_,
                2 * packet_size_frames_ * frame_size_bytes_);
   int buffers_required = capture_buffer_size / packet_size_bytes_;
   if (converter_ && imperfect_buffer_size_conversion_)
     ++buffers_required;

   DCHECK(!fifo_);
   fifo_.reset(new AudioBlockFifo(input_format_.nChannels, packet_size_frames_,
                                  buffers_required));

   DVLOG(1) << "AudioBlockFifo buffer count: " << buffers_required;

   bool success =
       use_voice_processing_ ? RunWithDmo() : RunWithAudioCaptureClient();

   if (!success) {
     // TODO(henrika): perhaps it worth improving the cleanup here by e.g.
     // stopping the audio client, joining the thread etc.?
     NOTREACHED() << "WASAPI capturing failed with error code "
                  << GetLastError();
   }

   // Disable MMCSS.
   if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
     PLOG(WARNING) << "Failed to disable MMCSS";
   }

   fifo_.reset();
 }

 bool WASAPIAudioInputStream::RunWithAudioCaptureClient() {
   HANDLE wait_array[2] = {stop_capture_event_.Get(),
                           audio_samples_ready_event_.Get()};

   while (true) {
     // Wait for a close-down event or a new capture event.
     DWORD wait_result = WaitForMultipleObjects(2, wait_array, FALSE, INFINITE);
     switch (wait_result) {
       case WAIT_OBJECT_0 + 0:
         // |stop_capture_event_| has been set.
         return true;
       case WAIT_OBJECT_0 + 1:
         // |audio_samples_ready_event_| has been set.
         PullCaptureDataAndPushToSink();
         break;
       case WAIT_FAILED:
       default:
         return false;
     }
   }

   return false;
 }

 bool WASAPIAudioInputStream::RunWithDmo() {
   while (true) {
     // Poll every 5 ms, or wake up on capture stop signal.
     DWORD wait_result = WaitForSingleObject(stop_capture_event_.Get(), 5);
     switch (wait_result) {
       case WAIT_OBJECT_0:
         // |stop_capture_event_| has been set.
         return true;
       case WAIT_TIMEOUT:
         PullDmoCaptureDataAndPushToSink();
         if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence(
                 audio_client_for_render_.Get(), audio_render_client_.Get())) {
           DLOG(WARNING) << "Failed to fill render buffer with silence.";
         }
         break;
       case WAIT_FAILED:
       default:
         return false;
     }
   }

   return false;
 }

 void WASAPIAudioInputStream::PullCaptureDataAndPushToSink() {
   TRACE_EVENT1("audio", "WASAPIAudioInputStream::PullCaptureDataAndPushToSink",
                "sample rate", input_format_.nSamplesPerSec);

   UINT64 last_device_position = 0;

   // Pull data from the capture endpoint buffer until it's empty or an error
   // occurs.
   while (true) {
     BYTE* data_ptr = nullptr;
     UINT32 num_frames_to_read = 0;
     DWORD flags = 0;
     UINT64 device_position = 0;

     // Note: The units on this are 100ns intervals. Both GetBuffer() and
     // GetPosition() will handle the translation from the QPC value, so we just
     // need to convert from 100ns units into us. Which is just dividing by 10.0
     // since 10x100ns = 1us.
     UINT64 capture_time_100ns = 0;

     // Retrieve the amount of data in the capture endpoint buffer, replace it
     // with silence if required, create callbacks for each packet and store
     // non-delivered data for the next event.
     HRESULT hr =
         audio_capture_client_->GetBuffer(&data_ptr, &num_frames_to_read, &flags,
                                          &device_position, &capture_time_100ns);
     if (hr == AUDCLNT_S_BUFFER_EMPTY)
       break;

     // TODO(grunell): Should we handle different errors explicitly? Perhaps exit
     // by setting |error = true|. What are the assumptions here that makes us
     // rely on the next WaitForMultipleObjects? Do we expect the next wait to be
     // successful sometimes?
     if (FAILED(hr)) {
       DLOG(ERROR) << "Failed to get data from the capture buffer";
       break;
     }

     // If the device position has changed, we assume this data belongs to a new
     // chunk, so we report delay and glitch stats and update the last and next
     // expected device positions.
     // If the device position has not changed we assume this data belongs to the
     // previous chunk, and only update the expected next device position.
     if (device_position != last_device_position) {
       ReportDelayStatsAndUpdateGlitchCount(
           flags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY, device_position,
           base::TimeTicks() +
               CoreAudioUtil::ReferenceTimeToTimeDelta(capture_time_100ns));
       last_device_position = device_position;
       expected_next_device_position_ = device_position + num_frames_to_read;
     } else {
       expected_next_device_position_ += num_frames_to_read;
     }

     // TODO(dalecurtis, olka, grunell): Is this ever false? If it is, should we
     // handle |flags & AUDCLNT_BUFFERFLAGS_TIMESTAMP_ERROR|?
     if (audio_clock_) {
       // The reported timestamp from GetBuffer is not as reliable as the clock
       // from the client.  We've seen timestamps reported for USB audio devices,
       // be off by several days.  Furthermore we've seen them jump back in time
       // every 2 seconds or so.
       // TODO(grunell): Using the audio clock as capture time for the currently
       // processed buffer seems incorrect. http://crbug.com/825744.
       audio_clock_->GetPosition(&device_position, &capture_time_100ns);
     }

     base::TimeTicks capture_time;
     if (capture_time_100ns) {
       // See conversion notes on |capture_time_100ns|.
       capture_time +=
           base::TimeDelta::FromMicroseconds(capture_time_100ns / 10.0);
     } else {
       // We may not have an IAudioClock or GetPosition() may return zero.
       capture_time = base::TimeTicks::Now();
     }

     // Adjust |capture_time| for the FIFO before pushing.
     capture_time -= AudioTimestampHelper::FramesToTime(
         fifo_->GetAvailableFrames(), input_format_.nSamplesPerSec);

     // TODO(grunell): Since we check |hr == AUDCLNT_S_BUFFER_EMPTY| above,
     // should we instead assert that |num_frames_to_read != 0|?
     if (num_frames_to_read != 0) {
       if (flags & AUDCLNT_BUFFERFLAGS_SILENT) {
         fifo_->PushSilence(num_frames_to_read);
       } else {
         fifo_->Push(data_ptr, num_frames_to_read,
                     input_format_.wBitsPerSample / 8);
       }
     }

     hr = audio_capture_client_->ReleaseBuffer(num_frames_to_read);
     DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer";

     // Get a cached AGC volume level which is updated once every second on the
     // audio manager thread. Note that, |volume| is also updated each time
     // SetVolume() is called through IPC by the render-side AGC.
     double volume = 0.0;
     GetAgcVolume(&volume);

     // Deliver captured data to the registered consumer using a packet size
     // which was specified at construction.
     while (fifo_->available_blocks()) {
       if (converter_) {
         if (imperfect_buffer_size_conversion_ &&
             fifo_->available_blocks() == 1) {
           // Special case. We need to buffer up more audio before we can convert
           // or else we'll suffer an underrun.
           // TODO(grunell): Verify this is really true.
           break;
         }
         converter_->Convert(convert_bus_.get());
         sink_->OnData(convert_bus_.get(), capture_time, volume);

         // Move the capture time forward for each vended block.
         capture_time += AudioTimestampHelper::FramesToTime(
             convert_bus_->frames(), output_format_.nSamplesPerSec);
       } else {
         sink_->OnData(fifo_->Consume(), capture_time, volume);

         // Move the capture time forward for each vended block.
         capture_time += AudioTimestampHelper::FramesToTime(
             packet_size_frames_, input_format_.nSamplesPerSec);
       }
     }
   }  // while (true)
 }

 void WASAPIAudioInputStream::PullDmoCaptureDataAndPushToSink() {
   TRACE_EVENT1("audio",
                "WASAPIAudioInputStream::PullDmoCaptureDataAndPushToSink",
                "sample rate", input_format_.nSamplesPerSec);

   // Pull data from the capture endpoint buffer until it's empty or an error
   // occurs.
   while (true) {
     DWORD status = 0;
     DMO_OUTPUT_DATA_BUFFER data_buffer = {0};
     data_buffer.pBuffer = media_buffer_.Get();

     // Get processed capture data from the DMO.
     HRESULT hr =
         voice_capture_dmo_->ProcessOutput(0,  // dwFlags
                                           1,  // cOutputBufferCount
                                           &data_buffer,
                                           &status);  // Must be ignored.
     if (FAILED(hr)) {
       DLOG(ERROR) << "DMO ProcessOutput failed, hr = 0x" << std::hex << hr;
       break;
     }

     BYTE* data;
     ULONG data_length = 0;
     // Get a pointer to the data buffer. This should be valid until the next
     // call to ProcessOutput.
     hr = media_buffer_->GetBufferAndLength(&data, &data_length);
     if (FAILED(hr)) {
       DLOG(ERROR) << "Could not get buffer, hr = 0x" << std::hex << hr;
       break;
     }

     if (data_length > 0) {
       const int samples_produced = data_length / frame_size_bytes_;

       base::TimeTicks capture_time;
       if (data_buffer.dwStatus & DMO_OUTPUT_DATA_BUFFERF_TIME &&
           data_buffer.rtTimestamp > 0) {
         // See conversion notes on |capture_time_100ns| in
         // PullCaptureDataAndPushToSink().
         capture_time +=
             base::TimeDelta::FromMicroseconds(data_buffer.rtTimestamp / 10.0);
       } else {
         // We may not get the timestamp from ProcessOutput(), fall back on
         // current timestamp.
         capture_time = base::TimeTicks::Now();
       }

       // Adjust |capture_time| for the FIFO before pushing.
       capture_time -= AudioTimestampHelper::FramesToTime(
           fifo_->GetAvailableFrames(), input_format_.nSamplesPerSec);

       fifo_->Push(data, samples_produced, input_format_.wBitsPerSample / 8);

       // Reset length to indicate buffer availability.
       hr = media_buffer_->SetLength(0);
       if (FAILED(hr))
         DLOG(ERROR) << "Could not reset length, hr = 0x" << std::hex << hr;

       // Get a cached AGC volume level which is updated once every second on the
       // audio manager thread. Note that, |volume| is also updated each time
       // SetVolume() is called through IPC by the render-side AGC.
       double volume = 0.0;
       GetAgcVolume(&volume);

       while (fifo_->available_blocks()) {
         if (converter_) {
           if (imperfect_buffer_size_conversion_ &&
               fifo_->available_blocks() == 1) {
             // Special case. We need to buffer up more audio before we can
             // convert or else we'll suffer an underrun.
             // TODO(grunell): Verify this is really true.
             break;
           }
           converter_->Convert(convert_bus_.get());
           sink_->OnData(convert_bus_.get(), capture_time, volume);

           // Move the capture time forward for each vended block.
           capture_time += AudioTimestampHelper::FramesToTime(
               convert_bus_->frames(), output_format_.nSamplesPerSec);
         } else {
           sink_->OnData(fifo_->Consume(), capture_time, volume);

           // Move the capture time forward for each vended block.
           capture_time += AudioTimestampHelper::FramesToTime(
               packet_size_frames_, input_format_.nSamplesPerSec);
         }
       }
     }  //  if (data_length > 0)

     if (!(data_buffer.dwStatus & DMO_OUTPUT_DATA_BUFFERF_INCOMPLETE)) {
       // The DMO cannot currently produce more data. This is the normal case;
       // otherwise it means the DMO had more than 10 ms of data available and
       // ProcessOutput should be called again.
       break;
     }
   }  // while (true)
 }

 void WASAPIAudioInputStream::HandleError(HRESULT err) {
   NOTREACHED() << "Error code: " << err;
   if (sink_)
     sink_->OnError();
 }

 HRESULT WASAPIAudioInputStream::SetCaptureDevice() {
   DCHECK_EQ(OPEN_RESULT_OK, open_result_);
   DCHECK(!endpoint_device_.Get());

   Microsoft::WRL::ComPtr<IMMDeviceEnumerator> enumerator;
   HRESULT hr =
       ::CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL,
                          CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&enumerator));
   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_CREATE_INSTANCE;
     return hr;
   }

   // Retrieve the IMMDevice by using the specified role or the specified
   // unique endpoint device-identification string.

   if (device_id_ == AudioDeviceDescription::kDefaultDeviceId) {
     // Retrieve the default capture audio endpoint for the specified role.
     // Note that, in Windows Vista, the MMDevice API supports device roles
     // but the system-supplied user interface programs do not.
     hr = enumerator->GetDefaultAudioEndpoint(eCapture, eConsole,
                                              endpoint_device_.GetAddressOf());
   } else if (device_id_ == AudioDeviceDescription::kCommunicationsDeviceId) {
     hr = enumerator->GetDefaultAudioEndpoint(eCapture, eCommunications,
                                              endpoint_device_.GetAddressOf());
   } else if (device_id_ == AudioDeviceDescription::kLoopbackWithMuteDeviceId) {
     // Capture the default playback stream.
     hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole,
                                              endpoint_device_.GetAddressOf());

     if (SUCCEEDED(hr)) {
       endpoint_device_->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL,
                                  NULL, &system_audio_volume_);
     }
   } else if (device_id_ == AudioDeviceDescription::kLoopbackInputDeviceId) {
     // Capture the default playback stream.
     hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole,
                                              endpoint_device_.GetAddressOf());
   } else {
     hr = enumerator->GetDevice(base::UTF8ToUTF16(device_id_).c_str(),
                                endpoint_device_.GetAddressOf());
   }

   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_NO_ENDPOINT;
     return hr;
   }

   // Verify that the audio endpoint device is active, i.e., the audio
   // adapter that connects to the endpoint device is present and enabled.
   DWORD state = DEVICE_STATE_DISABLED;
   hr = endpoint_device_->GetState(&state);
   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_NO_STATE;
     return hr;
   }

   if (!(state & DEVICE_STATE_ACTIVE)) {
     DLOG(ERROR) << "Selected capture device is not active.";
     open_result_ = OPEN_RESULT_DEVICE_NOT_ACTIVE;
     hr = E_ACCESSDENIED;
   }

   return hr;
 }

 HRESULT WASAPIAudioInputStream::GetAudioEngineStreamFormat() {
   HRESULT hr = S_OK;
 #ifndef NDEBUG
   // The GetMixFormat() method retrieves the stream format that the
   // audio engine uses for its internal processing of shared-mode streams.
   // The method always uses a WAVEFORMATEXTENSIBLE structure, instead
   // of a stand-alone WAVEFORMATEX structure, to specify the format.
   // An WAVEFORMATEXTENSIBLE structure can specify both the mapping of
   // channels to speakers and the number of bits of precision in each sample.
   base::win::ScopedCoMem<WAVEFORMATEXTENSIBLE> format_ex;
   hr =
       audio_client_->GetMixFormat(reinterpret_cast<WAVEFORMATEX**>(&format_ex));

   // See http://msdn.microsoft.com/en-us/windows/hardware/gg463006#EFH
   // for details on the WAVE file format.
   WAVEFORMATEX format = format_ex->Format;
   DVLOG(2) << "WAVEFORMATEX:";
   DVLOG(2) << "  wFormatTags    : 0x" << std::hex << format.wFormatTag;
   DVLOG(2) << "  nChannels      : " << format.nChannels;
   DVLOG(2) << "  nSamplesPerSec : " << format.nSamplesPerSec;
   DVLOG(2) << "  nAvgBytesPerSec: " << format.nAvgBytesPerSec;
   DVLOG(2) << "  nBlockAlign    : " << format.nBlockAlign;
   DVLOG(2) << "  wBitsPerSample : " << format.wBitsPerSample;
   DVLOG(2) << "  cbSize         : " << format.cbSize;

   DVLOG(2) << "WAVEFORMATEXTENSIBLE:";
   DVLOG(2) << " wValidBitsPerSample: "
            << format_ex->Samples.wValidBitsPerSample;
   DVLOG(2) << " dwChannelMask      : 0x" << std::hex
            << format_ex->dwChannelMask;
   if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_PCM)
     DVLOG(2) << " SubFormat          : KSDATAFORMAT_SUBTYPE_PCM";
   else if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
     DVLOG(2) << " SubFormat          : KSDATAFORMAT_SUBTYPE_IEEE_FLOAT";
   else if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_WAVEFORMATEX)
     DVLOG(2) << " SubFormat          : KSDATAFORMAT_SUBTYPE_WAVEFORMATEX";
 #endif
   return hr;
 }

 bool WASAPIAudioInputStream::DesiredFormatIsSupported(HRESULT* hr) {
   // An application that uses WASAPI to manage shared-mode streams can rely
   // on the audio engine to perform only limited format conversions. The audio
   // engine can convert between a standard PCM sample size used by the
   // application and the floating-point samples that the engine uses for its
   // internal processing. However, the format for an application stream
   // typically must have the same number of channels and the same sample
   // rate as the stream format used by the device.
   // Many audio devices support both PCM and non-PCM stream formats. However,
   // the audio engine can mix only PCM streams.
   base::win::ScopedCoMem<WAVEFORMATEX> closest_match;
   HRESULT hresult = audio_client_->IsFormatSupported(
       AUDCLNT_SHAREMODE_SHARED, &input_format_, &closest_match);
   DLOG_IF(ERROR, hresult == S_FALSE)
       << "Format is not supported but a closest match exists.";

   if (hresult == S_FALSE) {
     // Change the format we're going to ask for to better match with what the OS
     // can provide.  If we succeed in initializing the audio client in this
     // format and are able to convert from this format, we will do that
     // conversion.
     input_format_.nChannels = closest_match->nChannels;
     input_format_.nSamplesPerSec = closest_match->nSamplesPerSec;

     // If the closest match is fixed point PCM (WAVE_FORMAT_PCM or
     // KSDATAFORMAT_SUBTYPE_PCM), we use the closest match's bits per sample.
     // Otherwise, we keep the bits sample as is since we still request fixed
     // point PCM. In that case the closest match is typically in float format
     // (KSDATAFORMAT_SUBTYPE_IEEE_FLOAT).
     auto format_is_pcm = [](const WAVEFORMATEX* format) {
       if (format->wFormatTag == WAVE_FORMAT_PCM)
         return true;
       if (format->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
         const WAVEFORMATEXTENSIBLE* format_ex =
             reinterpret_cast<const WAVEFORMATEXTENSIBLE*>(format);
         return format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_PCM;
       }
       return false;
     };
     if (format_is_pcm(closest_match))
       input_format_.wBitsPerSample = closest_match->wBitsPerSample;

     input_format_.nBlockAlign =
         (input_format_.wBitsPerSample / 8) * input_format_.nChannels;
     input_format_.nAvgBytesPerSec =
         input_format_.nSamplesPerSec * input_format_.nBlockAlign;

     if (IsSupportedFormatForConversion(input_format_)) {
       DVLOG(1) << "Will convert capture audio from: \nbits: "
                << input_format_.wBitsPerSample
                << "\nsample rate: " << input_format_.nSamplesPerSec
                << "\nchannels: " << input_format_.nChannels
                << "\nblock align: " << input_format_.nBlockAlign
                << "\navg bytes per sec: " << input_format_.nAvgBytesPerSec;

       SetupConverterAndStoreFormatInfo();

       // Indicate that we're good to go with a close match.
       hresult = S_OK;
     }
   }

   // At this point, |hresult| == S_OK if the desired format is supported. If
   // |hresult| == S_FALSE, the OS supports a closest match but we don't support
   // conversion to it. Thus, SUCCEEDED() or FAILED() can't be used to determine
   // if the desired format is supported.
   *hr = hresult;
   return (hresult == S_OK);
 }

 void WASAPIAudioInputStream::SetupConverterAndStoreFormatInfo() {
   // Ideally, we want a 1:1 ratio between the buffers we get and the buffers
   // we give to OnData so that each buffer we receive from the OS can be
   // directly converted to a buffer that matches with what was asked for.
   const double buffer_ratio =
       output_format_.nSamplesPerSec / static_cast<double>(packet_size_frames_);
   double new_frames_per_buffer = input_format_.nSamplesPerSec / buffer_ratio;

   const auto input_layout = GuessChannelLayout(input_format_.nChannels);
   DCHECK_NE(CHANNEL_LAYOUT_UNSUPPORTED, input_layout);
   const auto output_layout = GuessChannelLayout(output_format_.nChannels);
   DCHECK_NE(CHANNEL_LAYOUT_UNSUPPORTED, output_layout);

   const AudioParameters input(AudioParameters::AUDIO_PCM_LOW_LATENCY,
                               input_layout, input_format_.nSamplesPerSec,
                               static_cast<int>(new_frames_per_buffer));

   const AudioParameters output(AudioParameters::AUDIO_PCM_LOW_LATENCY,
                                output_layout, output_format_.nSamplesPerSec,
                                packet_size_frames_);

   converter_.reset(new AudioConverter(input, output, false));
   converter_->AddInput(this);
   converter_->PrimeWithSilence();
   convert_bus_ = AudioBus::Create(output);

   // Update our packet size assumptions based on the new format.
   const auto new_bytes_per_buffer =
       static_cast<int>(new_frames_per_buffer) * input_format_.nBlockAlign;
   packet_size_frames_ = new_bytes_per_buffer / input_format_.nBlockAlign;
   packet_size_bytes_ = new_bytes_per_buffer;
   frame_size_bytes_ = input_format_.nBlockAlign;

   imperfect_buffer_size_conversion_ =
       std::modf(new_frames_per_buffer, &new_frames_per_buffer) != 0.0;
   DVLOG_IF(1, imperfect_buffer_size_conversion_)
       << "Audio capture data conversion: Need to inject fifo";
 }

 HRESULT WASAPIAudioInputStream::InitializeAudioEngine() {
   DCHECK_EQ(OPEN_RESULT_OK, open_result_);
   DWORD flags;
   // Use event-driven mode only fo regular input devices. For loopback the
   // EVENTCALLBACK flag is specified when intializing
   // |audio_render_client_for_loopback_|.
   if (AudioDeviceDescription::IsLoopbackDevice(device_id_)) {
     flags = AUDCLNT_STREAMFLAGS_LOOPBACK | AUDCLNT_STREAMFLAGS_NOPERSIST;
   } else {
     flags = AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST;
   }

   // Initialize the audio stream between the client and the device.
   // We connect indirectly through the audio engine by using shared mode.
   // The buffer duration is normally set to 0, which ensures that the buffer
   // size is the minimum buffer size needed to ensure that glitches do not occur
   // between the periodic processing passes. It can be set to 100 ms via a
   // feature.
   // Note: if the value is changed, update the description in
   // chrome/browser/flag_descriptions.cc.
   REFERENCE_TIME buffer_duration =
       base::FeatureList::IsEnabled(features::kIncreaseInputAudioBufferSize)
           ? 100 * 1000 * 10  // 100 ms expressed in 100-ns units.
           : 0;
   HRESULT hr = audio_client_->Initialize(
       AUDCLNT_SHAREMODE_SHARED, flags, buffer_duration,
       0,  // device period, n/a for shared mode.
       &input_format_,
       device_id_ == AudioDeviceDescription::kCommunicationsDeviceId
           ? &kCommunicationsSessionId
           : nullptr);

   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_AUDIO_CLIENT_INIT_FAILED;
     base::UmaHistogramSparse("Media.Audio.Capture.Win.InitError", hr);
     MaybeReportFormatRelatedInitError(hr);
     return hr;
   }

   // Retrieve the length of the endpoint buffer shared between the client
   // and the audio engine. The buffer length determines the maximum amount
   // of capture data that the audio engine can read from the endpoint buffer
   // during a single processing pass.
   hr = audio_client_->GetBufferSize(&endpoint_buffer_size_frames_);
   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_GET_BUFFER_SIZE_FAILED;
     return hr;
   }
   const int endpoint_buffer_size_ms =
       static_cast<double>(endpoint_buffer_size_frames_ * 1000) /
           input_format_.nSamplesPerSec +
       0.5;  // Round to closest integer
   UMA_HISTOGRAM_CUSTOM_TIMES(
       "Media.Audio.Capture.Win.EndpointBufferSize",
       base::TimeDelta::FromMilliseconds(endpoint_buffer_size_ms),
       base::TimeDelta::FromMilliseconds(1), base::TimeDelta::FromSeconds(1),
       50);
   DVLOG(1) << "Endpoint buffer size: " << endpoint_buffer_size_frames_
            << " frames (" << endpoint_buffer_size_ms << " ms)";

   // The period between processing passes by the audio engine is fixed for a
   // particular audio endpoint device and represents the smallest processing
   // quantum for the audio engine. This period plus the stream latency between
   // the buffer and endpoint device represents the minimum possible latency
   // that an audio application can achieve.
   REFERENCE_TIME device_period_shared_mode = 0;
   REFERENCE_TIME device_period_exclusive_mode = 0;
   HRESULT hr_dbg = audio_client_->GetDevicePeriod(
       &device_period_shared_mode, &device_period_exclusive_mode);
   if (SUCCEEDED(hr_dbg)) {
     // The 5000 addition is to round end result to closest integer.
     const int device_period_ms = (device_period_shared_mode + 5000) / 10000;
     UMA_HISTOGRAM_CUSTOM_TIMES(
         "Media.Audio.Capture.Win.DevicePeriod",
         base::TimeDelta::FromMilliseconds(device_period_ms),
         base::TimeDelta::FromMilliseconds(1), base::TimeDelta::FromSeconds(1),
         50);
     DVLOG(1) << "Device period: " << device_period_ms << " ms";
   }

   REFERENCE_TIME latency = 0;
   hr_dbg = audio_client_->GetStreamLatency(&latency);
   if (SUCCEEDED(hr_dbg)) {
     // The 5000 addition is to round end result to closest integer.
     const int latency_ms = (device_period_shared_mode + 5000) / 10000;
     UMA_HISTOGRAM_CUSTOM_TIMES("Media.Audio.Capture.Win.StreamLatency",
                                base::TimeDelta::FromMilliseconds(latency_ms),
                                base::TimeDelta::FromMilliseconds(1),
                                base::TimeDelta::FromSeconds(1), 50);
     DVLOG(1) << "Stream latency: " << latency_ms << " ms";
   }

   // Set the event handle that the audio engine will signal each time a buffer
   // becomes ready to be processed by the client.
   //
   // In loopback case the capture device doesn't receive any events, so we
   // need to create a separate playback client to get notifications. According
   // to MSDN:
   //
   //   A pull-mode capture client does not receive any events when a stream is
   //   initialized with event-driven buffering and is loopback-enabled. To
   //   work around this, initialize a render stream in event-driven mode. Each
   //   time the client receives an event for the render stream, it must signal
   //   the capture client to run the capture thread that reads the next set of
   //   samples from the capture endpoint buffer.
   //
   // http://msdn.microsoft.com/en-us/library/windows/desktop/dd316551(v=vs.85).aspx
   if (AudioDeviceDescription::IsLoopbackDevice(device_id_)) {
     hr = endpoint_device_->Activate(
         __uuidof(IAudioClient), CLSCTX_INPROC_SERVER, NULL,
         &audio_render_client_for_loopback_);
     if (FAILED(hr)) {
       open_result_ = OPEN_RESULT_LOOPBACK_ACTIVATE_FAILED;
       return hr;
     }

     hr = audio_render_client_for_loopback_->Initialize(
         AUDCLNT_SHAREMODE_SHARED,
         AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST, 0, 0,
         &input_format_, NULL);
     if (FAILED(hr)) {
       open_result_ = OPEN_RESULT_LOOPBACK_INIT_FAILED;
       return hr;
     }

     hr = audio_render_client_for_loopback_->SetEventHandle(
         audio_samples_ready_event_.Get());
   } else {
     hr = audio_client_->SetEventHandle(audio_samples_ready_event_.Get());
   }

   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_SET_EVENT_HANDLE;
     return hr;
   }

   // Get access to the IAudioCaptureClient interface. This interface
   // enables us to read input data from the capture endpoint buffer.
   hr = audio_client_->GetService(IID_PPV_ARGS(&audio_capture_client_));
   if (FAILED(hr)) {
     open_result_ = OPEN_RESULT_NO_CAPTURE_CLIENT;
     return hr;
   }

   // Obtain a reference to the ISimpleAudioVolume interface which enables
   // us to control the master volume level of an audio session.
   hr = audio_client_->GetService(IID_PPV_ARGS(&simple_audio_volume_));
   if (FAILED(hr))
     open_result_ = OPEN_RESULT_NO_AUDIO_VOLUME;

   audio_client_->GetService(IID_PPV_ARGS(&audio_clock_));
   if (!audio_clock_)
     LOG(WARNING) << "IAudioClock unavailable, capture times may be inaccurate.";

   return hr;
 }

 void WASAPIAudioInputStream::ReportOpenResult(HRESULT hr) const {
   DCHECK(!opened_);  // This method must be called before we set this flag.
   UMA_HISTOGRAM_ENUMERATION("Media.Audio.Capture.Win.Open", open_result_,
                             OPEN_RESULT_MAX + 1);
   if (open_result_ != OPEN_RESULT_OK &&
       open_result_ != OPEN_RESULT_OK_WITH_RESAMPLING) {
     log_callback_.Run(base::StringPrintf(
         "WASAPIAIS::Open: failed, result = %d, hresult = %#lx, "
         "input format = %#x/%d/%ld/%d/%d/%ld/%d, "
         "output format = %#x/%d/%ld/%d/%d/%ld/%d",
         // clang-format off
         open_result_, hr,
         input_format_.wFormatTag, input_format_.nChannels,
         input_format_.nSamplesPerSec, input_format_.wBitsPerSample,
         input_format_.nBlockAlign, input_format_.nAvgBytesPerSec,
         input_format_.cbSize,
         output_format_.wFormatTag, output_format_.nChannels,
         output_format_.nSamplesPerSec, output_format_.wBitsPerSample,
         output_format_.nBlockAlign, output_format_.nAvgBytesPerSec,
         output_format_.cbSize));
     // clang-format on
   }
 }

 void WASAPIAudioInputStream::MaybeReportFormatRelatedInitError(
     HRESULT hr) const {
   if (hr != AUDCLNT_E_UNSUPPORTED_FORMAT && hr != E_INVALIDARG)
     return;

   const FormatRelatedInitError format_related_error =
       hr == AUDCLNT_E_UNSUPPORTED_FORMAT
           ? converter_.get()
                 ? FormatRelatedInitError::kUnsupportedFormatWithFormatConversion
                 : FormatRelatedInitError::kUnsupportedFormat
           // Otherwise |hr| == E_INVALIDARG.
           : converter_.get()
                 ? FormatRelatedInitError::kInvalidArgumentWithFormatConversion
                 : FormatRelatedInitError::kInvalidArgument;
   base::UmaHistogramEnumeration(
       "Media.Audio.Capture.Win.InitError.FormatRelated", format_related_error,
       FormatRelatedInitError::kCount);
 }

 bool WASAPIAudioInputStream::InitializeDmo() {
   HRESULT hr = ::CoCreateInstance(CLSID_CWMAudioAEC, NULL, CLSCTX_INPROC_SERVER,
                                   IID_IMediaObject, &voice_capture_dmo_);
   if (FAILED(hr)) {
     DLOG(ERROR) << "Creating DMO failed.";
     return false;
   }

   if (!SetDmoProperties())
     return false;

   if (!SetDmoFormat())
     return false;

   hr = voice_capture_dmo_->AllocateStreamingResources();
   if (FAILED(hr)) {
     DLOG(ERROR) << "Allocating DMO resources failed.";
     return false;
   }

   SetupConverterAndStoreFormatInfo();

   media_buffer_ =
       new MediaBufferImpl(endpoint_buffer_size_frames_ * frame_size_bytes_);

   if (!CreateDummyRenderClientsForDmo())
     return false;

   // Get volume interface.
   Microsoft::WRL::ComPtr<IAudioSessionManager> audio_session_manager;
   hr = endpoint_device_->Activate(__uuidof(IAudioSessionManager),
                                   CLSCTX_INPROC_SERVER, NULL,
                                   &audio_session_manager);
   if (FAILED(hr)) {
     DLOG(ERROR) << "Obtaining audio session manager failed.";
     return false;
   }
   hr = audio_session_manager->GetSimpleAudioVolume(
       NULL,   // AudioSessionGuid. NULL for default session.
       FALSE,  // CrossProcessSession.
       &simple_audio_volume_);
   if (FAILED(hr)) {
     DLOG(ERROR) << "Obtaining audio volume interface failed.";
     return false;
   }

   return true;
 }

 bool WASAPIAudioInputStream::SetDmoProperties() {
   Microsoft::WRL::ComPtr<IPropertyStore> ps;
   HRESULT hr = voice_capture_dmo_->QueryInterface(IID_IPropertyStore, &ps);
   if (FAILED(hr) || !ps) {
     DLOG(ERROR) << "Getting DMO property store failed.";
     return false;
   }

   // Set devices.
   if (!SetDmoDevices(ps.Get())) {
     DLOG(ERROR) << "Setting device indices failed.";
     return false;
   }

   // Set DMO mode to AEC only.
   if (FAILED(CoreAudioUtil::SetVtI4Property(
           ps.Get(), MFPKEY_WMAAECMA_SYSTEM_MODE, SINGLE_CHANNEL_AEC))) {
     DLOG(ERROR) << "Setting DMO system mode failed.";
     return false;
   }

   // Enable the feature mode. This lets us override the default processing
   // settings below.
   if (FAILED(CoreAudioUtil::SetBoolProperty(
           ps.Get(), MFPKEY_WMAAECMA_FEATURE_MODE, VARIANT_TRUE))) {
     DLOG(ERROR) << "Setting DMO feature mode failed.";
     return false;
   }

   // Disable analog AGC (default enabled).
   if (FAILED(CoreAudioUtil::SetBoolProperty(
           ps.Get(), MFPKEY_WMAAECMA_MIC_GAIN_BOUNDER, VARIANT_FALSE))) {
     DLOG(ERROR) << "Setting DMO mic gain bounder failed.";
     return false;
   }

   // Disable noise suppression (default enabled).
   if (FAILED(CoreAudioUtil::SetVtI4Property(ps.Get(), MFPKEY_WMAAECMA_FEATR_NS,
                                             0))) {
     DLOG(ERROR) << "Disabling DMO NS failed.";
     return false;
   }

   return true;
 }

 bool WASAPIAudioInputStream::SetDmoFormat() {
   DMO_MEDIA_TYPE mt;  // Media type.
   mt.majortype = MEDIATYPE_Audio;
   mt.subtype = MEDIASUBTYPE_PCM;
   mt.lSampleSize = 0;
   mt.bFixedSizeSamples = TRUE;
   mt.bTemporalCompression = FALSE;
   mt.formattype = FORMAT_WaveFormatEx;

   HRESULT hr = MoInitMediaType(&mt, sizeof(WAVEFORMATEX));
   if (FAILED(hr)) {
     DLOG(ERROR) << "Init media type for DMO failed.";
     return false;
   }

   WAVEFORMATEX* dmo_output_format =
       reinterpret_cast<WAVEFORMATEX*>(mt.pbFormat);
   dmo_output_format->wFormatTag = WAVE_FORMAT_PCM;
   dmo_output_format->nChannels = 1;
   dmo_output_format->nSamplesPerSec = 16000;
   dmo_output_format->nAvgBytesPerSec = 32000;
   dmo_output_format->nBlockAlign = 2;
   dmo_output_format->wBitsPerSample = 16;
   dmo_output_format->cbSize = 0;

   DCHECK(IsSupportedFormatForConversion(*dmo_output_format));

   // Store the format used.
   input_format_.wFormatTag = dmo_output_format->wFormatTag;
   input_format_.nChannels = dmo_output_format->nChannels;
   input_format_.nSamplesPerSec = dmo_output_format->nSamplesPerSec;
   input_format_.wBitsPerSample = dmo_output_format->wBitsPerSample;
   input_format_.nBlockAlign = dmo_output_format->nBlockAlign;
   input_format_.nAvgBytesPerSec = dmo_output_format->nAvgBytesPerSec;
   input_format_.cbSize = dmo_output_format->cbSize;

   hr = voice_capture_dmo_->SetOutputType(0, &mt, 0);
   MoFreeMediaType(&mt);
   if (FAILED(hr)) {
     DLOG(ERROR) << "Setting DMO output type failed.";
     return false;
   }

   // We use 10 ms buffer size for the DMO.
   endpoint_buffer_size_frames_ = input_format_.nSamplesPerSec / 100;

   return true;
 }

 bool WASAPIAudioInputStream::SetDmoDevices(IPropertyStore* ps) {
   // Look up the input device's index.
   const base::Optional<WORD> input_device_index =
       GetAudioDeviceCollectionIndexFromId(device_id_, eCapture);

   if (!input_device_index) {
     log_callback_.Run(
         base::StringPrintf("WASAPIAIS:SetDmoDevices: Could not "
                            "resolve input device index for %s",
                            device_id_.c_str()));
     return false;
   }

   // Look up the output device's index.
   const base::Optional<WORD> output_device_index =
       GetAudioDeviceCollectionIndexFromId(output_device_id_for_aec_, eRender);
   if (!output_device_index) {
     log_callback_.Run(
         base::StringPrintf("WASAPIAIS:SetDmoDevices: Could not "
                            "resolve output device index for %s",
                            output_device_id_for_aec_.c_str()));
     return false;
   }

   // The DEVICE_INDEXES property packs the input and output indices into the
   // upper and lower halves of a LONG.
   LONG device_index_value =
       (static_cast<ULONG>(*output_device_index) << 16) +
       (static_cast<ULONG>(*input_device_index) & 0x0000ffff);
   return !FAILED(CoreAudioUtil::SetVtI4Property(
       ps, MFPKEY_WMAAECMA_DEVICE_INDEXES, device_index_value));
 }

 bool WASAPIAudioInputStream::CreateDummyRenderClientsForDmo() {
   Microsoft::WRL::ComPtr<IAudioClient> audio_client(CoreAudioUtil::CreateClient(
       output_device_id_for_aec_, eRender, eConsole));
   if (!audio_client.Get()) {
     DLOG(ERROR) << "Failed to create audio client for dummy rendering for DMO.";
     return false;
   }

   WAVEFORMATPCMEX mix_format;
   HRESULT hr =
       CoreAudioUtil::GetSharedModeMixFormat(audio_client.Get(), &mix_format);
   if (FAILED(hr)) {
     DLOG(ERROR) << "Failed to get mix format.";
     return false;
   }

   hr = audio_client->Initialize(AUDCLNT_SHAREMODE_SHARED,
                                 0,  // Stream flags
                                 0,  // Buffer duration
                                 0,  // Device period
                                 reinterpret_cast<WAVEFORMATEX*>(&mix_format),
                                 NULL);
   if (FAILED(hr)) {
     DLOG(ERROR) << "Failed to initalize audio client for rendering.";
     return false;
   }

   Microsoft::WRL::ComPtr<IAudioRenderClient> audio_render_client =
       CoreAudioUtil::CreateRenderClient(audio_client.Get());
   if (!audio_render_client.Get()) {
     DLOG(ERROR) << "Failed to create audio render client.";
     return false;
   }

   audio_client_for_render_ = audio_client;
   audio_render_client_ = audio_render_client;

   return true;
 }

 double WASAPIAudioInputStream::ProvideInput(AudioBus* audio_bus,
                                             uint32_t frames_delayed) {
   fifo_->Consume()->CopyTo(audio_bus);
   return 1.0;
 }

 void WASAPIAudioInputStream::ReportDelayStatsAndUpdateGlitchCount(
     bool discontinuity_flagged,
     UINT64 device_position,
     base::TimeTicks capture_time) {
   // Report delay. Don't report if no valid capture time.
   // Unreasonably large delays are clamped at 1 second. Some devices sometimes
   // have capture timestamps way off.
   if (capture_time > base::TimeTicks()) {
     base::TimeDelta delay = base::TimeTicks::Now() - capture_time;
     UMA_HISTOGRAM_CUSTOM_TIMES("Media.Audio.Capture.DeviceLatency", delay,
                                base::TimeDelta::FromMilliseconds(1),
                                base::TimeDelta::FromSeconds(1), 50);
   }

   // Detect glitch. Detect and count separately based on expected device
   // position and the discontinuity flag since they have showed to not always
   // be consistent with each other.
   if (expected_next_device_position_ != 0) {
     if (device_position > expected_next_device_position_) {
       ++total_glitches_;
       auto lost_frames = device_position - expected_next_device_position_;
       total_lost_frames_ += lost_frames;
       if (lost_frames > largest_glitch_frames_)
         largest_glitch_frames_ = lost_frames;
     } else if (device_position < expected_next_device_position_) {
       ++total_device_position_less_than_expected_;
     }
     if (discontinuity_flagged)
       ++total_discontinuities_;
     if (device_position > expected_next_device_position_ &&
         discontinuity_flagged) {
       ++total_concurrent_glitch_and_discontinuities_;
     }
   }
 }

 void WASAPIAudioInputStream::ReportAndResetGlitchStats() {
   UMA_HISTOGRAM_COUNTS("Media.Audio.Capture.Glitches", total_glitches_);
   UMA_HISTOGRAM_COUNTS("Media.Audio.Capture.Win.DevicePositionLessThanExpected",
                        total_device_position_less_than_expected_);
   UMA_HISTOGRAM_COUNTS("Media.Audio.Capture.Win.Discontinuities",
                        total_discontinuities_);
   UMA_HISTOGRAM_COUNTS(
       "Media.Audio.Capture.Win.ConcurrentGlitchAndDiscontinuities",
       total_concurrent_glitch_and_discontinuities_);

   double lost_frames_ms =
       (total_lost_frames_ * 1000) / input_format_.nSamplesPerSec;
   std::string log_message = base::StringPrintf(
       "WASAPIAIS: Total glitches=%d. Total frames lost=%llu (%.0lf ms). Total "
       "discontinuities=%d. Total concurrent glitch and discont=%d. Total low "
       "device "
       "positions=%d.",
       total_glitches_, total_lost_frames_, lost_frames_ms,
       total_discontinuities_, total_concurrent_glitch_and_discontinuities_,
       total_device_position_less_than_expected_);
   log_callback_.Run(log_message);

   if (total_glitches_ != 0) {
     UMA_HISTOGRAM_LONG_TIMES("Media.Audio.Capture.LostFramesInMs",
                              base::TimeDelta::FromMilliseconds(lost_frames_ms));
     int64_t largest_glitch_ms =
         (largest_glitch_frames_ * 1000) / input_format_.nSamplesPerSec;
     UMA_HISTOGRAM_CUSTOM_TIMES(
         "Media.Audio.Capture.LargestGlitchMs",
         base::TimeDelta::FromMilliseconds(largest_glitch_ms),
         base::TimeDelta::FromMilliseconds(1), base::TimeDelta::FromMinutes(1),
         50);
     DLOG(WARNING) << log_message;
   }

   expected_next_device_position_ = 0;
   total_glitches_ = 0;
   total_device_position_less_than_expected_ = 0;
   total_discontinuities_ = 0;
   total_concurrent_glitch_and_discontinuities_ = 0;
   total_lost_frames_ = 0;
   largest_glitch_frames_ = 0;
 }

 }  // namespace media