| // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #include "media/audio/win/audio_low_latency_input_win.h" |
| |
| #include <audiopolicy.h> |
| #include <mediaobj.h> |
| #include <objbase.h> |
| #include <uuids.h> |
| #include <wmcodecdsp.h> |
| |
| #include <algorithm> |
| #include <cmath> |
| #include <memory> |
| |
| #include "base/logging.h" |
| #include "base/metrics/histogram_functions.h" |
| #include "base/metrics/histogram_macros.h" |
| #include "base/strings/stringprintf.h" |
| #include "base/strings/utf_string_conversions.h" |
| #include "base/trace_event/trace_event.h" |
| #include "media/audio/audio_device_description.h" |
| #include "media/audio/audio_features.h" |
| #include "media/audio/win/avrt_wrapper_win.h" |
| #include "media/audio/win/core_audio_util_win.h" |
| #include "media/base/audio_block_fifo.h" |
| #include "media/base/audio_bus.h" |
| #include "media/base/audio_timestamp_helper.h" |
| #include "media/base/channel_layout.h" |
| #include "media/base/limits.h" |
| |
| using base::win::ScopedCOMInitializer; |
| |
| namespace media { |
| |
| namespace { |
| |
| // Errors when initializing the audio client related to the audio format. Split |
| // by whether we're using format conversion or not. Used for reporting stats - |
| // do not renumber entries. |
| enum FormatRelatedInitError { |
| kUnsupportedFormat = 0, |
| kUnsupportedFormatWithFormatConversion = 1, |
| kInvalidArgument = 2, |
| kInvalidArgumentWithFormatConversion = 3, |
| kCount |
| }; |
| |
| bool IsSupportedFormatForConversion(const WAVEFORMATEX& format) { |
| if (format.nSamplesPerSec < limits::kMinSampleRate || |
| format.nSamplesPerSec > limits::kMaxSampleRate) { |
| return false; |
| } |
| |
| switch (format.wBitsPerSample) { |
| case 8: |
| case 16: |
| case 32: |
| break; |
| default: |
| return false; |
| } |
| |
| if (GuessChannelLayout(format.nChannels) == CHANNEL_LAYOUT_UNSUPPORTED) { |
| LOG(ERROR) << "Hardware configuration not supported for audio conversion"; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| // Returns the index of the device in the device collection, or -1 for the |
| // default device, as used by the voice processing DMO. |
| base::Optional<WORD> GetAudioDeviceCollectionIndexFromId( |
| const std::string& device_id, |
| const EDataFlow data_flow) { |
| // The default device is specified with -1. |
| if (AudioDeviceDescription::IsDefaultDevice(device_id)) |
| return -1; |
| |
| WORD device_index = -1; |
| HRESULT hr = E_FAIL; |
| // The default communications does not have an index itself, so we need to |
| // find the index for the underlying device. |
| if (AudioDeviceDescription::IsCommunicationsDevice(device_id)) { |
| const std::string communications_id = |
| (data_flow == eCapture) |
| ? CoreAudioUtil::GetCommunicationsInputDeviceID() |
| : CoreAudioUtil::GetCommunicationsOutputDeviceID(); |
| hr = CoreAudioUtil::GetDeviceCollectionIndex(communications_id, data_flow, |
| &device_index); |
| } else { |
| // Otherwise, just look for the device_id directly. |
| hr = CoreAudioUtil::GetDeviceCollectionIndex(device_id, data_flow, |
| &device_index); |
| } |
| |
| if (FAILED(hr) || hr == S_FALSE) |
| return base::nullopt; |
| |
| return device_index; |
| } |
| |
| // Implementation of IMediaBuffer, as required for |
| // IMediaObject::ProcessOutput(). After consuming data provided by |
| // ProcessOutput(), call SetLength() to update the buffer availability. |
| // Example implementation: |
| // http://msdn.microsoft.com/en-us/library/dd376684(v=vs.85).aspx |
| class MediaBufferImpl : public IMediaBuffer { |
| public: |
| explicit MediaBufferImpl(DWORD max_length) |
| : data_(new BYTE[max_length]), max_length_(max_length) {} |
| |
| // IMediaBuffer implementation. |
| STDMETHOD(GetBufferAndLength)(BYTE** buffer, DWORD* length) { |
| if (!buffer || !length) |
| return E_POINTER; |
| |
| *buffer = data_.get(); |
| *length = length_; |
| return S_OK; |
| } |
| |
| STDMETHOD(GetMaxLength)(DWORD* max_length) { |
| if (!max_length) |
| return E_POINTER; |
| |
| *max_length = max_length_; |
| return S_OK; |
| } |
| |
| STDMETHOD(SetLength)(DWORD length) { |
| if (length > max_length_) |
| return E_INVALIDARG; |
| |
| length_ = length; |
| return S_OK; |
| } |
| |
| // IUnknown implementation. |
| STDMETHOD_(ULONG, AddRef)() { return InterlockedIncrement(&ref_count_); } |
| |
| STDMETHOD(QueryInterface)(REFIID riid, void** object) { |
| if (!object) |
| return E_POINTER; |
| if (riid != IID_IMediaBuffer && riid != IID_IUnknown) |
| return E_NOINTERFACE; |
| |
| *object = static_cast<IMediaBuffer*>(this); |
| AddRef(); |
| return S_OK; |
| } |
| |
| STDMETHOD_(ULONG, Release)() { |
| LONG ref_count = InterlockedDecrement(&ref_count_); |
| if (ref_count == 0) |
| delete this; |
| |
| return ref_count; |
| } |
| |
| private: |
| virtual ~MediaBufferImpl() {} |
| |
| std::unique_ptr<BYTE[]> data_; |
| DWORD length_ = 0; |
| const DWORD max_length_; |
| LONG ref_count_ = 0; |
| }; |
| |
| } // namespace |
| |
| WASAPIAudioInputStream::WASAPIAudioInputStream( |
| AudioManagerWin* manager, |
| const AudioParameters& params, |
| const std::string& device_id, |
| const AudioManager::LogCallback& log_callback, |
| AudioManagerBase::VoiceProcessingMode voice_processing_mode) |
| : manager_(manager), |
| device_id_(device_id), |
| output_device_id_for_aec_(AudioDeviceDescription::kDefaultDeviceId), |
| log_callback_(log_callback), |
| use_voice_processing_(voice_processing_mode == |
| AudioManagerBase::VoiceProcessingMode::kEnabled) { |
| DCHECK(manager_); |
| DCHECK(!device_id_.empty()); |
| DCHECK(!log_callback_.is_null()); |
| |
| DVLOG_IF(1, use_voice_processing_) << "Using Windows voice capture DSP DMO."; |
| |
| // Load the Avrt DLL if not already loaded. Required to support MMCSS. |
| bool avrt_init = avrt::Initialize(); |
| DCHECK(avrt_init) << "Failed to load the Avrt.dll"; |
| |
| const SampleFormat kSampleFormat = kSampleFormatS16; |
| |
| // Set up the desired output format specified by the client. |
| output_format_.wFormatTag = WAVE_FORMAT_PCM; |
| output_format_.nChannels = params.channels(); |
| output_format_.nSamplesPerSec = params.sample_rate(); |
| output_format_.wBitsPerSample = SampleFormatToBitsPerChannel(kSampleFormat); |
| output_format_.nBlockAlign = |
| (output_format_.wBitsPerSample / 8) * output_format_.nChannels; |
| output_format_.nAvgBytesPerSec = |
| output_format_.nSamplesPerSec * output_format_.nBlockAlign; |
| output_format_.cbSize = 0; |
| |
| // Set the input (capture) format to the desired output format. In most cases, |
| // it will be used unchanged. |
| input_format_ = output_format_; |
| |
| // Size in bytes of each audio frame. |
| frame_size_bytes_ = input_format_.nBlockAlign; |
| |
| // Store size of audio packets which we expect to get from the audio |
| // endpoint device in each capture event. |
| packet_size_bytes_ = params.GetBytesPerBuffer(kSampleFormat); |
| packet_size_frames_ = packet_size_bytes_ / input_format_.nBlockAlign; |
| DVLOG(1) << "Number of bytes per audio frame : " << frame_size_bytes_; |
| DVLOG(1) << "Number of audio frames per packet: " << packet_size_frames_; |
| |
| // All events are auto-reset events and non-signaled initially. |
| |
| // Create the event which the audio engine will signal each time |
| // a buffer becomes ready to be processed by the client. |
| audio_samples_ready_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL)); |
| DCHECK(audio_samples_ready_event_.IsValid()); |
| |
| // Create the event which will be set in Stop() when capturing shall stop. |
| stop_capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL)); |
| DCHECK(stop_capture_event_.IsValid()); |
| } |
| |
| WASAPIAudioInputStream::~WASAPIAudioInputStream() { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| } |
| |
| bool WASAPIAudioInputStream::Open() { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| DCHECK_EQ(OPEN_RESULT_OK, open_result_); |
| |
| // Verify that we are not already opened. |
| if (opened_) { |
| log_callback_.Run("WASAPIAIS::Open: already open"); |
| return false; |
| } |
| |
| // Obtain a reference to the IMMDevice interface of the capturing |
| // device with the specified unique identifier or role which was |
| // set at construction. |
| HRESULT hr = SetCaptureDevice(); |
| if (FAILED(hr)) { |
| ReportOpenResult(hr); |
| return false; |
| } |
| |
| // If voice processing is enabled, initialize the DMO that is used for it. The |
| // remainder of the function initializes an audio capture client (the normal |
| // case). Either the DMO or the capture client is used. |
| // TODO(grunell): Refactor out the audio capture client initialization to its |
| // own function. |
| if (use_voice_processing_) { |
| opened_ = InitializeDmo(); |
| return opened_; |
| } |
| |
| // Obtain an IAudioClient interface which enables us to create and initialize |
| // an audio stream between an audio application and the audio engine. |
| hr = endpoint_device_->Activate(__uuidof(IAudioClient), CLSCTX_INPROC_SERVER, |
| NULL, &audio_client_); |
| if (FAILED(hr)) { |
| open_result_ = OPEN_RESULT_ACTIVATION_FAILED; |
| ReportOpenResult(hr); |
| return false; |
| } |
| |
| #ifndef NDEBUG |
| // Retrieve the stream format which the audio engine uses for its internal |
| // processing/mixing of shared-mode streams. This function call is for |
| // diagnostic purposes only and only in debug mode. |
| hr = GetAudioEngineStreamFormat(); |
| #endif |
| |
| // Verify that the selected audio endpoint supports the specified format |
| // set during construction. |
| hr = S_OK; |
| if (!DesiredFormatIsSupported(&hr)) { |
| open_result_ = OPEN_RESULT_FORMAT_NOT_SUPPORTED; |
| ReportOpenResult(hr); |
| return false; |
| } |
| |
| // Initialize the audio stream between the client and the device using |
| // shared mode and a lowest possible glitch-free latency. |
| hr = InitializeAudioEngine(); |
| if (SUCCEEDED(hr) && converter_) |
| open_result_ = OPEN_RESULT_OK_WITH_RESAMPLING; |
| ReportOpenResult(hr); // Report before we assign a value to |opened_|. |
| opened_ = SUCCEEDED(hr); |
| |
| return opened_; |
| } |
| |
| void WASAPIAudioInputStream::Start(AudioInputCallback* callback) { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| DCHECK(callback); |
| DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully"; |
| if (!opened_) |
| return; |
| |
| if (started_) |
| return; |
| |
| // TODO(grunell): Refactor the |use_voice_processing_| conditions in this |
| // function to clean up the code. |
| if (use_voice_processing_) { |
| // Pre-fill render buffer with silence. |
| if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence( |
| audio_client_for_render_.Get(), audio_render_client_.Get())) { |
| DLOG(WARNING) << "Failed to pre-fill render buffer with silence."; |
| } |
| } else { |
| if (device_id_ == AudioDeviceDescription::kLoopbackWithMuteDeviceId && |
| system_audio_volume_) { |
| BOOL muted = false; |
| system_audio_volume_->GetMute(&muted); |
| |
| // If the system audio is muted at the time of capturing, then no need to |
| // mute it again, and later we do not unmute system audio when stopping |
| // capturing. |
| if (!muted) { |
| system_audio_volume_->SetMute(true, NULL); |
| mute_done_ = true; |
| } |
| } |
| } |
| |
| DCHECK(!sink_); |
| sink_ = callback; |
| |
| // Starts periodic AGC microphone measurements if the AGC has been enabled |
| // using SetAutomaticGainControl(). |
| StartAgc(); |
| |
| // Create and start the thread that will drive the capturing by waiting for |
| // capture events. |
| DCHECK(!capture_thread_.get()); |
| capture_thread_.reset(new base::DelegateSimpleThread( |
| this, "wasapi_capture_thread", |
| base::SimpleThread::Options(base::ThreadPriority::REALTIME_AUDIO))); |
| capture_thread_->Start(); |
| |
| HRESULT hr = E_FAIL; |
| if (use_voice_processing_) { |
| hr = audio_client_for_render_->Start(); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Failed to start output streaming: " << std::hex << hr |
| << ", proceeding without rendering."; |
| } |
| } else { |
| // Start streaming data between the endpoint buffer and the audio engine. |
| hr = audio_client_->Start(); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Failed to start input streaming."; |
| log_callback_.Run(base::StringPrintf( |
| "WASAPIAIS::Start: Failed to start audio client, hresult = %#lx", |
| hr)); |
| } |
| |
| if (SUCCEEDED(hr) && audio_render_client_for_loopback_.Get()) { |
| hr = audio_render_client_for_loopback_->Start(); |
| if (FAILED(hr)) |
| log_callback_.Run(base::StringPrintf( |
| "WASAPIAIS::Start: Failed to start render client for loopback, " |
| "hresult = %#lx", |
| hr)); |
| } |
| } |
| |
| started_ = SUCCEEDED(hr); |
| } |
| |
| void WASAPIAudioInputStream::Stop() { |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| DVLOG(1) << "WASAPIAudioInputStream::Stop()"; |
| if (!started_) |
| return; |
| |
| // We have muted system audio for capturing, so we need to unmute it when |
| // capturing stops. |
| if (device_id_ == AudioDeviceDescription::kLoopbackWithMuteDeviceId && |
| mute_done_) { |
| DCHECK(system_audio_volume_); |
| if (system_audio_volume_) { |
| system_audio_volume_->SetMute(false, NULL); |
| mute_done_ = false; |
| } |
| } |
| |
| // Stops periodic AGC microphone measurements. |
| StopAgc(); |
| |
| // Shut down the capture thread. |
| if (stop_capture_event_.IsValid()) { |
| SetEvent(stop_capture_event_.Get()); |
| } |
| |
| // TODO(grunell): Refactor the |use_voice_processing_| conditions in this |
| // function to clean up the code. |
| if (use_voice_processing_) { |
| // Stop the render audio streaming. The input streaming needs no explicit |
| // stopping. |
| HRESULT hr = audio_client_for_render_->Stop(); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Failed to stop output streaming."; |
| } |
| } else { |
| // Stop the input audio streaming. |
| HRESULT hr = audio_client_->Stop(); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Failed to stop input streaming."; |
| } |
| } |
| |
| // Wait until the thread completes and perform cleanup. |
| if (capture_thread_) { |
| SetEvent(stop_capture_event_.Get()); |
| capture_thread_->Join(); |
| capture_thread_.reset(); |
| } |
| |
| if (use_voice_processing_) { |
| HRESULT hr = voice_capture_dmo_->FreeStreamingResources(); |
| if (FAILED(hr)) |
| DLOG(ERROR) << "Failed to free dmo resources."; |
| } |
| |
| started_ = false; |
| sink_ = NULL; |
| } |
| |
| void WASAPIAudioInputStream::Close() { |
| DVLOG(1) << "WASAPIAudioInputStream::Close()"; |
| // It is valid to call Close() before calling open or Start(). |
| // It is also valid to call Close() after Start() has been called. |
| Stop(); |
| |
| if (converter_) |
| converter_->RemoveInput(this); |
| |
| ReportAndResetGlitchStats(); |
| |
| // Inform the audio manager that we have been closed. This will cause our |
| // destruction. |
| manager_->ReleaseInputStream(this); |
| } |
| |
| double WASAPIAudioInputStream::GetMaxVolume() { |
| // Verify that Open() has been called succesfully, to ensure that an audio |
| // session exists and that an ISimpleAudioVolume interface has been created. |
| DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully"; |
| if (!opened_) |
| return 0.0; |
| |
| // The effective volume value is always in the range 0.0 to 1.0, hence |
| // we can return a fixed value (=1.0) here. |
| return 1.0; |
| } |
| |
| void WASAPIAudioInputStream::SetVolume(double volume) { |
| DVLOG(1) << "SetVolume(volume=" << volume << ")"; |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| DCHECK_GE(volume, 0.0); |
| DCHECK_LE(volume, 1.0); |
| |
| DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully"; |
| if (!opened_) |
| return; |
| |
| // Set a new master volume level. Valid volume levels are in the range |
| // 0.0 to 1.0. Ignore volume-change events. |
| HRESULT hr = |
| simple_audio_volume_->SetMasterVolume(static_cast<float>(volume), NULL); |
| if (FAILED(hr)) |
| DLOG(WARNING) << "Failed to set new input master volume."; |
| |
| // Update the AGC volume level based on the last setting above. Note that, |
| // the volume-level resolution is not infinite and it is therefore not |
| // possible to assume that the volume provided as input parameter can be |
| // used directly. Instead, a new query to the audio hardware is required. |
| // This method does nothing if AGC is disabled. |
| UpdateAgcVolume(); |
| } |
| |
| double WASAPIAudioInputStream::GetVolume() { |
| DCHECK(opened_) << "Open() has not been called successfully"; |
| if (!opened_) |
| return 0.0; |
| |
| // Retrieve the current volume level. The value is in the range 0.0 to 1.0. |
| float level = 0.0f; |
| HRESULT hr = simple_audio_volume_->GetMasterVolume(&level); |
| if (FAILED(hr)) |
| DLOG(WARNING) << "Failed to get input master volume."; |
| |
| return static_cast<double>(level); |
| } |
| |
| bool WASAPIAudioInputStream::IsMuted() { |
| DCHECK(opened_) << "Open() has not been called successfully"; |
| DCHECK_CALLED_ON_VALID_SEQUENCE(sequence_checker_); |
| if (!opened_) |
| return false; |
| |
| // Retrieves the current muting state for the audio session. |
| BOOL is_muted = FALSE; |
| HRESULT hr = simple_audio_volume_->GetMute(&is_muted); |
| if (FAILED(hr)) |
| DLOG(WARNING) << "Failed to get input master volume."; |
| |
| return is_muted != FALSE; |
| } |
| |
| void WASAPIAudioInputStream::SetOutputDeviceForAec( |
| const std::string& output_device_id) { |
| if (!use_voice_processing_) |
| return; |
| |
| if (output_device_id == output_device_id_for_aec_) |
| return; |
| |
| output_device_id_for_aec_ = output_device_id; |
| |
| // Set devices. |
| Microsoft::WRL::ComPtr<IPropertyStore> ps; |
| HRESULT hr = voice_capture_dmo_->QueryInterface(IID_IPropertyStore, &ps); |
| if (FAILED(hr) || !ps) { |
| log_callback_.Run(base::StringPrintf( |
| "WASAPIAIS:SetOutputDeviceForAec: Getting DMO property store failed.")); |
| return; |
| } |
| |
| if (!SetDmoDevices(ps.Get())) { |
| log_callback_.Run( |
| "WASAPIAIS:SetOutputDeviceForAec: Setting device indices failed."); |
| return; |
| } |
| |
| // Recreate the dummy render client on the new output. |
| hr = audio_client_for_render_->Stop(); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Failed to stop output streaming."; |
| } |
| |
| CreateDummyRenderClientsForDmo(); |
| |
| if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence( |
| audio_client_for_render_.Get(), audio_render_client_.Get())) { |
| DLOG(WARNING) << "Failed to pre-fill render buffer with silence."; |
| } |
| |
| hr = audio_client_for_render_->Start(); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Failed to start output streaming: " << std::hex << hr |
| << ", proceeding without rendering."; |
| } |
| |
| log_callback_.Run(base::StringPrintf( |
| "WASAPIAIS:SetOutputDeviceForAec: Successfully updated AEC output " |
| "device to %s", |
| output_device_id.c_str())); |
| } |
| |
| void WASAPIAudioInputStream::Run() { |
| ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA); |
| |
| // Enable MMCSS to ensure that this thread receives prioritized access to |
| // CPU resources. |
| DWORD task_index = 0; |
| HANDLE mm_task = |
| avrt::AvSetMmThreadCharacteristics(L"Pro Audio", &task_index); |
| bool mmcss_is_ok = |
| (mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL)); |
| if (!mmcss_is_ok) { |
| // Failed to enable MMCSS on this thread. It is not fatal but can lead |
| // to reduced QoS at high load. |
| DWORD err = GetLastError(); |
| LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ")."; |
| } |
| |
| // Allocate a buffer with a size that enables us to take care of cases like: |
| // 1) The recorded buffer size is smaller, or does not match exactly with, |
| // the selected packet size used in each callback. |
| // 2) The selected buffer size is larger than the recorded buffer size in |
| // each event. |
| // In the case where no resampling is required, a single buffer should be |
| // enough but in case we get buffers that don't match exactly, we'll go with |
| // two. Same applies if we need to resample and the buffer ratio is perfect. |
| // However if the buffer ratio is imperfect, we will need 3 buffers to safely |
| // be able to buffer up data in cases where a conversion requires two audio |
| // buffers (and we need to be able to write to the third one). |
| size_t capture_buffer_size = |
| std::max(2 * endpoint_buffer_size_frames_ * frame_size_bytes_, |
| 2 * packet_size_frames_ * frame_size_bytes_); |
| int buffers_required = capture_buffer_size / packet_size_bytes_; |
| if (converter_ && imperfect_buffer_size_conversion_) |
| ++buffers_required; |
| |
| DCHECK(!fifo_); |
| fifo_.reset(new AudioBlockFifo(input_format_.nChannels, packet_size_frames_, |
| buffers_required)); |
| |
| DVLOG(1) << "AudioBlockFifo buffer count: " << buffers_required; |
| |
| bool success = |
| use_voice_processing_ ? RunWithDmo() : RunWithAudioCaptureClient(); |
| |
| if (!success) { |
| // TODO(henrika): perhaps it worth improving the cleanup here by e.g. |
| // stopping the audio client, joining the thread etc.? |
| NOTREACHED() << "WASAPI capturing failed with error code " |
| << GetLastError(); |
| } |
| |
| // Disable MMCSS. |
| if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) { |
| PLOG(WARNING) << "Failed to disable MMCSS"; |
| } |
| |
| fifo_.reset(); |
| } |
| |
| bool WASAPIAudioInputStream::RunWithAudioCaptureClient() { |
| HANDLE wait_array[2] = {stop_capture_event_.Get(), |
| audio_samples_ready_event_.Get()}; |
| |
| while (true) { |
| // Wait for a close-down event or a new capture event. |
| DWORD wait_result = WaitForMultipleObjects(2, wait_array, FALSE, INFINITE); |
| switch (wait_result) { |
| case WAIT_OBJECT_0 + 0: |
| // |stop_capture_event_| has been set. |
| return true; |
| case WAIT_OBJECT_0 + 1: |
| // |audio_samples_ready_event_| has been set. |
| PullCaptureDataAndPushToSink(); |
| break; |
| case WAIT_FAILED: |
| default: |
| return false; |
| } |
| } |
| |
| return false; |
| } |
| |
| bool WASAPIAudioInputStream::RunWithDmo() { |
| while (true) { |
| // Poll every 5 ms, or wake up on capture stop signal. |
| DWORD wait_result = WaitForSingleObject(stop_capture_event_.Get(), 5); |
| switch (wait_result) { |
| case WAIT_OBJECT_0: |
| // |stop_capture_event_| has been set. |
| return true; |
| case WAIT_TIMEOUT: |
| PullDmoCaptureDataAndPushToSink(); |
| if (!CoreAudioUtil::FillRenderEndpointBufferWithSilence( |
| audio_client_for_render_.Get(), audio_render_client_.Get())) { |
| DLOG(WARNING) << "Failed to fill render buffer with silence."; |
| } |
| break; |
| case WAIT_FAILED: |
| default: |
| return false; |
| } |
| } |
| |
| return false; |
| } |
| |
| void WASAPIAudioInputStream::PullCaptureDataAndPushToSink() { |
| TRACE_EVENT1("audio", "WASAPIAudioInputStream::PullCaptureDataAndPushToSink", |
| "sample rate", input_format_.nSamplesPerSec); |
| |
| UINT64 last_device_position = 0; |
| |
| // Pull data from the capture endpoint buffer until it's empty or an error |
| // occurs. |
| while (true) { |
| BYTE* data_ptr = nullptr; |
| UINT32 num_frames_to_read = 0; |
| DWORD flags = 0; |
| UINT64 device_position = 0; |
| |
| // Note: The units on this are 100ns intervals. Both GetBuffer() and |
| // GetPosition() will handle the translation from the QPC value, so we just |
| // need to convert from 100ns units into us. Which is just dividing by 10.0 |
| // since 10x100ns = 1us. |
| UINT64 capture_time_100ns = 0; |
| |
| // Retrieve the amount of data in the capture endpoint buffer, replace it |
| // with silence if required, create callbacks for each packet and store |
| // non-delivered data for the next event. |
| HRESULT hr = |
| audio_capture_client_->GetBuffer(&data_ptr, &num_frames_to_read, &flags, |
| &device_position, &capture_time_100ns); |
| if (hr == AUDCLNT_S_BUFFER_EMPTY) |
| break; |
| |
| // TODO(grunell): Should we handle different errors explicitly? Perhaps exit |
| // by setting |error = true|. What are the assumptions here that makes us |
| // rely on the next WaitForMultipleObjects? Do we expect the next wait to be |
| // successful sometimes? |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Failed to get data from the capture buffer"; |
| break; |
| } |
| |
| // If the device position has changed, we assume this data belongs to a new |
| // chunk, so we report delay and glitch stats and update the last and next |
| // expected device positions. |
| // If the device position has not changed we assume this data belongs to the |
| // previous chunk, and only update the expected next device position. |
| if (device_position != last_device_position) { |
| ReportDelayStatsAndUpdateGlitchCount( |
| flags & AUDCLNT_BUFFERFLAGS_DATA_DISCONTINUITY, device_position, |
| base::TimeTicks() + |
| CoreAudioUtil::ReferenceTimeToTimeDelta(capture_time_100ns)); |
| last_device_position = device_position; |
| expected_next_device_position_ = device_position + num_frames_to_read; |
| } else { |
| expected_next_device_position_ += num_frames_to_read; |
| } |
| |
| // TODO(dalecurtis, olka, grunell): Is this ever false? If it is, should we |
| // handle |flags & AUDCLNT_BUFFERFLAGS_TIMESTAMP_ERROR|? |
| if (audio_clock_) { |
| // The reported timestamp from GetBuffer is not as reliable as the clock |
| // from the client. We've seen timestamps reported for USB audio devices, |
| // be off by several days. Furthermore we've seen them jump back in time |
| // every 2 seconds or so. |
| // TODO(grunell): Using the audio clock as capture time for the currently |
| // processed buffer seems incorrect. http://crbug.com/825744. |
| audio_clock_->GetPosition(&device_position, &capture_time_100ns); |
| } |
| |
| base::TimeTicks capture_time; |
| if (capture_time_100ns) { |
| // See conversion notes on |capture_time_100ns|. |
| capture_time += |
| base::TimeDelta::FromMicroseconds(capture_time_100ns / 10.0); |
| } else { |
| // We may not have an IAudioClock or GetPosition() may return zero. |
| capture_time = base::TimeTicks::Now(); |
| } |
| |
| // Adjust |capture_time| for the FIFO before pushing. |
| capture_time -= AudioTimestampHelper::FramesToTime( |
| fifo_->GetAvailableFrames(), input_format_.nSamplesPerSec); |
| |
| // TODO(grunell): Since we check |hr == AUDCLNT_S_BUFFER_EMPTY| above, |
| // should we instead assert that |num_frames_to_read != 0|? |
| if (num_frames_to_read != 0) { |
| if (flags & AUDCLNT_BUFFERFLAGS_SILENT) { |
| fifo_->PushSilence(num_frames_to_read); |
| } else { |
| fifo_->Push(data_ptr, num_frames_to_read, |
| input_format_.wBitsPerSample / 8); |
| } |
| } |
| |
| hr = audio_capture_client_->ReleaseBuffer(num_frames_to_read); |
| DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer"; |
| |
| // Get a cached AGC volume level which is updated once every second on the |
| // audio manager thread. Note that, |volume| is also updated each time |
| // SetVolume() is called through IPC by the render-side AGC. |
| double volume = 0.0; |
| GetAgcVolume(&volume); |
| |
| // Deliver captured data to the registered consumer using a packet size |
| // which was specified at construction. |
| while (fifo_->available_blocks()) { |
| if (converter_) { |
| if (imperfect_buffer_size_conversion_ && |
| fifo_->available_blocks() == 1) { |
| // Special case. We need to buffer up more audio before we can convert |
| // or else we'll suffer an underrun. |
| // TODO(grunell): Verify this is really true. |
| break; |
| } |
| converter_->Convert(convert_bus_.get()); |
| sink_->OnData(convert_bus_.get(), capture_time, volume); |
| |
| // Move the capture time forward for each vended block. |
| capture_time += AudioTimestampHelper::FramesToTime( |
| convert_bus_->frames(), output_format_.nSamplesPerSec); |
| } else { |
| sink_->OnData(fifo_->Consume(), capture_time, volume); |
| |
| // Move the capture time forward for each vended block. |
| capture_time += AudioTimestampHelper::FramesToTime( |
| packet_size_frames_, input_format_.nSamplesPerSec); |
| } |
| } |
| } // while (true) |
| } |
| |
| void WASAPIAudioInputStream::PullDmoCaptureDataAndPushToSink() { |
| TRACE_EVENT1("audio", |
| "WASAPIAudioInputStream::PullDmoCaptureDataAndPushToSink", |
| "sample rate", input_format_.nSamplesPerSec); |
| |
| // Pull data from the capture endpoint buffer until it's empty or an error |
| // occurs. |
| while (true) { |
| DWORD status = 0; |
| DMO_OUTPUT_DATA_BUFFER data_buffer = {0}; |
| data_buffer.pBuffer = media_buffer_.Get(); |
| |
| // Get processed capture data from the DMO. |
| HRESULT hr = |
| voice_capture_dmo_->ProcessOutput(0, // dwFlags |
| 1, // cOutputBufferCount |
| &data_buffer, |
| &status); // Must be ignored. |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "DMO ProcessOutput failed, hr = 0x" << std::hex << hr; |
| break; |
| } |
| |
| BYTE* data; |
| ULONG data_length = 0; |
| // Get a pointer to the data buffer. This should be valid until the next |
| // call to ProcessOutput. |
| hr = media_buffer_->GetBufferAndLength(&data, &data_length); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Could not get buffer, hr = 0x" << std::hex << hr; |
| break; |
| } |
| |
| if (data_length > 0) { |
| const int samples_produced = data_length / frame_size_bytes_; |
| |
| base::TimeTicks capture_time; |
| if (data_buffer.dwStatus & DMO_OUTPUT_DATA_BUFFERF_TIME && |
| data_buffer.rtTimestamp > 0) { |
| // See conversion notes on |capture_time_100ns| in |
| // PullCaptureDataAndPushToSink(). |
| capture_time += |
| base::TimeDelta::FromMicroseconds(data_buffer.rtTimestamp / 10.0); |
| } else { |
| // We may not get the timestamp from ProcessOutput(), fall back on |
| // current timestamp. |
| capture_time = base::TimeTicks::Now(); |
| } |
| |
| // Adjust |capture_time| for the FIFO before pushing. |
| capture_time -= AudioTimestampHelper::FramesToTime( |
| fifo_->GetAvailableFrames(), input_format_.nSamplesPerSec); |
| |
| fifo_->Push(data, samples_produced, input_format_.wBitsPerSample / 8); |
| |
| // Reset length to indicate buffer availability. |
| hr = media_buffer_->SetLength(0); |
| if (FAILED(hr)) |
| DLOG(ERROR) << "Could not reset length, hr = 0x" << std::hex << hr; |
| |
| // Get a cached AGC volume level which is updated once every second on the |
| // audio manager thread. Note that, |volume| is also updated each time |
| // SetVolume() is called through IPC by the render-side AGC. |
| double volume = 0.0; |
| GetAgcVolume(&volume); |
| |
| while (fifo_->available_blocks()) { |
| if (converter_) { |
| if (imperfect_buffer_size_conversion_ && |
| fifo_->available_blocks() == 1) { |
| // Special case. We need to buffer up more audio before we can |
| // convert or else we'll suffer an underrun. |
| // TODO(grunell): Verify this is really true. |
| break; |
| } |
| converter_->Convert(convert_bus_.get()); |
| sink_->OnData(convert_bus_.get(), capture_time, volume); |
| |
| // Move the capture time forward for each vended block. |
| capture_time += AudioTimestampHelper::FramesToTime( |
| convert_bus_->frames(), output_format_.nSamplesPerSec); |
| } else { |
| sink_->OnData(fifo_->Consume(), capture_time, volume); |
| |
| // Move the capture time forward for each vended block. |
| capture_time += AudioTimestampHelper::FramesToTime( |
| packet_size_frames_, input_format_.nSamplesPerSec); |
| } |
| } |
| } // if (data_length > 0) |
| |
| if (!(data_buffer.dwStatus & DMO_OUTPUT_DATA_BUFFERF_INCOMPLETE)) { |
| // The DMO cannot currently produce more data. This is the normal case; |
| // otherwise it means the DMO had more than 10 ms of data available and |
| // ProcessOutput should be called again. |
| break; |
| } |
| } // while (true) |
| } |
| |
| void WASAPIAudioInputStream::HandleError(HRESULT err) { |
| NOTREACHED() << "Error code: " << err; |
| if (sink_) |
| sink_->OnError(); |
| } |
| |
| HRESULT WASAPIAudioInputStream::SetCaptureDevice() { |
| DCHECK_EQ(OPEN_RESULT_OK, open_result_); |
| DCHECK(!endpoint_device_.Get()); |
| |
| Microsoft::WRL::ComPtr<IMMDeviceEnumerator> enumerator; |
| HRESULT hr = |
| ::CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, |
| CLSCTX_INPROC_SERVER, IID_PPV_ARGS(&enumerator)); |
| if (FAILED(hr)) { |
| open_result_ = OPEN_RESULT_CREATE_INSTANCE; |
| return hr; |
| } |
| |
| // Retrieve the IMMDevice by using the specified role or the specified |
| // unique endpoint device-identification string. |
| |
| if (device_id_ == AudioDeviceDescription::kDefaultDeviceId) { |
| // Retrieve the default capture audio endpoint for the specified role. |
| // Note that, in Windows Vista, the MMDevice API supports device roles |
| // but the system-supplied user interface programs do not. |
| hr = enumerator->GetDefaultAudioEndpoint(eCapture, eConsole, |
| endpoint_device_.GetAddressOf()); |
| } else if (device_id_ == AudioDeviceDescription::kCommunicationsDeviceId) { |
| hr = enumerator->GetDefaultAudioEndpoint(eCapture, eCommunications, |
| endpoint_device_.GetAddressOf()); |
| } else if (device_id_ == AudioDeviceDescription::kLoopbackWithMuteDeviceId) { |
| // Capture the default playback stream. |
| hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole, |
| endpoint_device_.GetAddressOf()); |
| |
| if (SUCCEEDED(hr)) { |
| endpoint_device_->Activate(__uuidof(IAudioEndpointVolume), CLSCTX_ALL, |
| NULL, &system_audio_volume_); |
| } |
| } else if (device_id_ == AudioDeviceDescription::kLoopbackInputDeviceId) { |
| // Capture the default playback stream. |
| hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole, |
| endpoint_device_.GetAddressOf()); |
| } else { |
| hr = enumerator->GetDevice(base::UTF8ToUTF16(device_id_).c_str(), |
| endpoint_device_.GetAddressOf()); |
| } |
| |
| if (FAILED(hr)) { |
| open_result_ = OPEN_RESULT_NO_ENDPOINT; |
| return hr; |
| } |
| |
| // Verify that the audio endpoint device is active, i.e., the audio |
| // adapter that connects to the endpoint device is present and enabled. |
| DWORD state = DEVICE_STATE_DISABLED; |
| hr = endpoint_device_->GetState(&state); |
| if (FAILED(hr)) { |
| open_result_ = OPEN_RESULT_NO_STATE; |
| return hr; |
| } |
| |
| if (!(state & DEVICE_STATE_ACTIVE)) { |
| DLOG(ERROR) << "Selected capture device is not active."; |
| open_result_ = OPEN_RESULT_DEVICE_NOT_ACTIVE; |
| hr = E_ACCESSDENIED; |
| } |
| |
| return hr; |
| } |
| |
| HRESULT WASAPIAudioInputStream::GetAudioEngineStreamFormat() { |
| HRESULT hr = S_OK; |
| #ifndef NDEBUG |
| // The GetMixFormat() method retrieves the stream format that the |
| // audio engine uses for its internal processing of shared-mode streams. |
| // The method always uses a WAVEFORMATEXTENSIBLE structure, instead |
| // of a stand-alone WAVEFORMATEX structure, to specify the format. |
| // An WAVEFORMATEXTENSIBLE structure can specify both the mapping of |
| // channels to speakers and the number of bits of precision in each sample. |
| base::win::ScopedCoMem<WAVEFORMATEXTENSIBLE> format_ex; |
| hr = |
| audio_client_->GetMixFormat(reinterpret_cast<WAVEFORMATEX**>(&format_ex)); |
| |
| // See http://msdn.microsoft.com/en-us/windows/hardware/gg463006#EFH |
| // for details on the WAVE file format. |
| WAVEFORMATEX format = format_ex->Format; |
| DVLOG(2) << "WAVEFORMATEX:"; |
| DVLOG(2) << " wFormatTags : 0x" << std::hex << format.wFormatTag; |
| DVLOG(2) << " nChannels : " << format.nChannels; |
| DVLOG(2) << " nSamplesPerSec : " << format.nSamplesPerSec; |
| DVLOG(2) << " nAvgBytesPerSec: " << format.nAvgBytesPerSec; |
| DVLOG(2) << " nBlockAlign : " << format.nBlockAlign; |
| DVLOG(2) << " wBitsPerSample : " << format.wBitsPerSample; |
| DVLOG(2) << " cbSize : " << format.cbSize; |
| |
| DVLOG(2) << "WAVEFORMATEXTENSIBLE:"; |
| DVLOG(2) << " wValidBitsPerSample: " |
| << format_ex->Samples.wValidBitsPerSample; |
| DVLOG(2) << " dwChannelMask : 0x" << std::hex |
| << format_ex->dwChannelMask; |
| if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_PCM) |
| DVLOG(2) << " SubFormat : KSDATAFORMAT_SUBTYPE_PCM"; |
| else if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT) |
| DVLOG(2) << " SubFormat : KSDATAFORMAT_SUBTYPE_IEEE_FLOAT"; |
| else if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_WAVEFORMATEX) |
| DVLOG(2) << " SubFormat : KSDATAFORMAT_SUBTYPE_WAVEFORMATEX"; |
| #endif |
| return hr; |
| } |
| |
| bool WASAPIAudioInputStream::DesiredFormatIsSupported(HRESULT* hr) { |
| // An application that uses WASAPI to manage shared-mode streams can rely |
| // on the audio engine to perform only limited format conversions. The audio |
| // engine can convert between a standard PCM sample size used by the |
| // application and the floating-point samples that the engine uses for its |
| // internal processing. However, the format for an application stream |
| // typically must have the same number of channels and the same sample |
| // rate as the stream format used by the device. |
| // Many audio devices support both PCM and non-PCM stream formats. However, |
| // the audio engine can mix only PCM streams. |
| base::win::ScopedCoMem<WAVEFORMATEX> closest_match; |
| HRESULT hresult = audio_client_->IsFormatSupported( |
| AUDCLNT_SHAREMODE_SHARED, &input_format_, &closest_match); |
| DLOG_IF(ERROR, hresult == S_FALSE) |
| << "Format is not supported but a closest match exists."; |
| |
| if (hresult == S_FALSE) { |
| // Change the format we're going to ask for to better match with what the OS |
| // can provide. If we succeed in initializing the audio client in this |
| // format and are able to convert from this format, we will do that |
| // conversion. |
| input_format_.nChannels = closest_match->nChannels; |
| input_format_.nSamplesPerSec = closest_match->nSamplesPerSec; |
| |
| // If the closest match is fixed point PCM (WAVE_FORMAT_PCM or |
| // KSDATAFORMAT_SUBTYPE_PCM), we use the closest match's bits per sample. |
| // Otherwise, we keep the bits sample as is since we still request fixed |
| // point PCM. In that case the closest match is typically in float format |
| // (KSDATAFORMAT_SUBTYPE_IEEE_FLOAT). |
| auto format_is_pcm = [](const WAVEFORMATEX* format) { |
| if (format->wFormatTag == WAVE_FORMAT_PCM) |
| return true; |
| if (format->wFormatTag == WAVE_FORMAT_EXTENSIBLE) { |
| const WAVEFORMATEXTENSIBLE* format_ex = |
| reinterpret_cast<const WAVEFORMATEXTENSIBLE*>(format); |
| return format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_PCM; |
| } |
| return false; |
| }; |
| if (format_is_pcm(closest_match)) |
| input_format_.wBitsPerSample = closest_match->wBitsPerSample; |
| |
| input_format_.nBlockAlign = |
| (input_format_.wBitsPerSample / 8) * input_format_.nChannels; |
| input_format_.nAvgBytesPerSec = |
| input_format_.nSamplesPerSec * input_format_.nBlockAlign; |
| |
| if (IsSupportedFormatForConversion(input_format_)) { |
| DVLOG(1) << "Will convert capture audio from: \nbits: " |
| << input_format_.wBitsPerSample |
| << "\nsample rate: " << input_format_.nSamplesPerSec |
| << "\nchannels: " << input_format_.nChannels |
| << "\nblock align: " << input_format_.nBlockAlign |
| << "\navg bytes per sec: " << input_format_.nAvgBytesPerSec; |
| |
| SetupConverterAndStoreFormatInfo(); |
| |
| // Indicate that we're good to go with a close match. |
| hresult = S_OK; |
| } |
| } |
| |
| // At this point, |hresult| == S_OK if the desired format is supported. If |
| // |hresult| == S_FALSE, the OS supports a closest match but we don't support |
| // conversion to it. Thus, SUCCEEDED() or FAILED() can't be used to determine |
| // if the desired format is supported. |
| *hr = hresult; |
| return (hresult == S_OK); |
| } |
| |
| void WASAPIAudioInputStream::SetupConverterAndStoreFormatInfo() { |
| // Ideally, we want a 1:1 ratio between the buffers we get and the buffers |
| // we give to OnData so that each buffer we receive from the OS can be |
| // directly converted to a buffer that matches with what was asked for. |
| const double buffer_ratio = |
| output_format_.nSamplesPerSec / static_cast<double>(packet_size_frames_); |
| double new_frames_per_buffer = input_format_.nSamplesPerSec / buffer_ratio; |
| |
| const auto input_layout = GuessChannelLayout(input_format_.nChannels); |
| DCHECK_NE(CHANNEL_LAYOUT_UNSUPPORTED, input_layout); |
| const auto output_layout = GuessChannelLayout(output_format_.nChannels); |
| DCHECK_NE(CHANNEL_LAYOUT_UNSUPPORTED, output_layout); |
| |
| const AudioParameters input(AudioParameters::AUDIO_PCM_LOW_LATENCY, |
| input_layout, input_format_.nSamplesPerSec, |
| static_cast<int>(new_frames_per_buffer)); |
| |
| const AudioParameters output(AudioParameters::AUDIO_PCM_LOW_LATENCY, |
| output_layout, output_format_.nSamplesPerSec, |
| packet_size_frames_); |
| |
| converter_.reset(new AudioConverter(input, output, false)); |
| converter_->AddInput(this); |
| converter_->PrimeWithSilence(); |
| convert_bus_ = AudioBus::Create(output); |
| |
| // Update our packet size assumptions based on the new format. |
| const auto new_bytes_per_buffer = |
| static_cast<int>(new_frames_per_buffer) * input_format_.nBlockAlign; |
| packet_size_frames_ = new_bytes_per_buffer / input_format_.nBlockAlign; |
| packet_size_bytes_ = new_bytes_per_buffer; |
| frame_size_bytes_ = input_format_.nBlockAlign; |
| |
| imperfect_buffer_size_conversion_ = |
| std::modf(new_frames_per_buffer, &new_frames_per_buffer) != 0.0; |
| DVLOG_IF(1, imperfect_buffer_size_conversion_) |
| << "Audio capture data conversion: Need to inject fifo"; |
| } |
| |
| HRESULT WASAPIAudioInputStream::InitializeAudioEngine() { |
| DCHECK_EQ(OPEN_RESULT_OK, open_result_); |
| DWORD flags; |
| // Use event-driven mode only fo regular input devices. For loopback the |
| // EVENTCALLBACK flag is specified when intializing |
| // |audio_render_client_for_loopback_|. |
| if (AudioDeviceDescription::IsLoopbackDevice(device_id_)) { |
| flags = AUDCLNT_STREAMFLAGS_LOOPBACK | AUDCLNT_STREAMFLAGS_NOPERSIST; |
| } else { |
| flags = AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST; |
| } |
| |
| // Initialize the audio stream between the client and the device. |
| // We connect indirectly through the audio engine by using shared mode. |
| // The buffer duration is normally set to 0, which ensures that the buffer |
| // size is the minimum buffer size needed to ensure that glitches do not occur |
| // between the periodic processing passes. It can be set to 100 ms via a |
| // feature. |
| // Note: if the value is changed, update the description in |
| // chrome/browser/flag_descriptions.cc. |
| REFERENCE_TIME buffer_duration = |
| base::FeatureList::IsEnabled(features::kIncreaseInputAudioBufferSize) |
| ? 100 * 1000 * 10 // 100 ms expressed in 100-ns units. |
| : 0; |
| HRESULT hr = audio_client_->Initialize( |
| AUDCLNT_SHAREMODE_SHARED, flags, buffer_duration, |
| 0, // device period, n/a for shared mode. |
| &input_format_, |
| device_id_ == AudioDeviceDescription::kCommunicationsDeviceId |
| ? &kCommunicationsSessionId |
| : nullptr); |
| |
| if (FAILED(hr)) { |
| open_result_ = OPEN_RESULT_AUDIO_CLIENT_INIT_FAILED; |
| base::UmaHistogramSparse("Media.Audio.Capture.Win.InitError", hr); |
| MaybeReportFormatRelatedInitError(hr); |
| return hr; |
| } |
| |
| // Retrieve the length of the endpoint buffer shared between the client |
| // and the audio engine. The buffer length determines the maximum amount |
| // of capture data that the audio engine can read from the endpoint buffer |
| // during a single processing pass. |
| hr = audio_client_->GetBufferSize(&endpoint_buffer_size_frames_); |
| if (FAILED(hr)) { |
| open_result_ = OPEN_RESULT_GET_BUFFER_SIZE_FAILED; |
| return hr; |
| } |
| const int endpoint_buffer_size_ms = |
| static_cast<double>(endpoint_buffer_size_frames_ * 1000) / |
| input_format_.nSamplesPerSec + |
| 0.5; // Round to closest integer |
| UMA_HISTOGRAM_CUSTOM_TIMES( |
| "Media.Audio.Capture.Win.EndpointBufferSize", |
| base::TimeDelta::FromMilliseconds(endpoint_buffer_size_ms), |
| base::TimeDelta::FromMilliseconds(1), base::TimeDelta::FromSeconds(1), |
| 50); |
| DVLOG(1) << "Endpoint buffer size: " << endpoint_buffer_size_frames_ |
| << " frames (" << endpoint_buffer_size_ms << " ms)"; |
| |
| // The period between processing passes by the audio engine is fixed for a |
| // particular audio endpoint device and represents the smallest processing |
| // quantum for the audio engine. This period plus the stream latency between |
| // the buffer and endpoint device represents the minimum possible latency |
| // that an audio application can achieve. |
| REFERENCE_TIME device_period_shared_mode = 0; |
| REFERENCE_TIME device_period_exclusive_mode = 0; |
| HRESULT hr_dbg = audio_client_->GetDevicePeriod( |
| &device_period_shared_mode, &device_period_exclusive_mode); |
| if (SUCCEEDED(hr_dbg)) { |
| // The 5000 addition is to round end result to closest integer. |
| const int device_period_ms = (device_period_shared_mode + 5000) / 10000; |
| UMA_HISTOGRAM_CUSTOM_TIMES( |
| "Media.Audio.Capture.Win.DevicePeriod", |
| base::TimeDelta::FromMilliseconds(device_period_ms), |
| base::TimeDelta::FromMilliseconds(1), base::TimeDelta::FromSeconds(1), |
| 50); |
| DVLOG(1) << "Device period: " << device_period_ms << " ms"; |
| } |
| |
| REFERENCE_TIME latency = 0; |
| hr_dbg = audio_client_->GetStreamLatency(&latency); |
| if (SUCCEEDED(hr_dbg)) { |
| // The 5000 addition is to round end result to closest integer. |
| const int latency_ms = (device_period_shared_mode + 5000) / 10000; |
| UMA_HISTOGRAM_CUSTOM_TIMES("Media.Audio.Capture.Win.StreamLatency", |
| base::TimeDelta::FromMilliseconds(latency_ms), |
| base::TimeDelta::FromMilliseconds(1), |
| base::TimeDelta::FromSeconds(1), 50); |
| DVLOG(1) << "Stream latency: " << latency_ms << " ms"; |
| } |
| |
| // Set the event handle that the audio engine will signal each time a buffer |
| // becomes ready to be processed by the client. |
| // |
| // In loopback case the capture device doesn't receive any events, so we |
| // need to create a separate playback client to get notifications. According |
| // to MSDN: |
| // |
| // A pull-mode capture client does not receive any events when a stream is |
| // initialized with event-driven buffering and is loopback-enabled. To |
| // work around this, initialize a render stream in event-driven mode. Each |
| // time the client receives an event for the render stream, it must signal |
| // the capture client to run the capture thread that reads the next set of |
| // samples from the capture endpoint buffer. |
| // |
| // http://msdn.microsoft.com/en-us/library/windows/desktop/dd316551(v=vs.85).aspx |
| if (AudioDeviceDescription::IsLoopbackDevice(device_id_)) { |
| hr = endpoint_device_->Activate( |
| __uuidof(IAudioClient), CLSCTX_INPROC_SERVER, NULL, |
| &audio_render_client_for_loopback_); |
| if (FAILED(hr)) { |
| open_result_ = OPEN_RESULT_LOOPBACK_ACTIVATE_FAILED; |
| return hr; |
| } |
| |
| hr = audio_render_client_for_loopback_->Initialize( |
| AUDCLNT_SHAREMODE_SHARED, |
| AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_NOPERSIST, 0, 0, |
| &input_format_, NULL); |
| if (FAILED(hr)) { |
| open_result_ = OPEN_RESULT_LOOPBACK_INIT_FAILED; |
| return hr; |
| } |
| |
| hr = audio_render_client_for_loopback_->SetEventHandle( |
| audio_samples_ready_event_.Get()); |
| } else { |
| hr = audio_client_->SetEventHandle(audio_samples_ready_event_.Get()); |
| } |
| |
| if (FAILED(hr)) { |
| open_result_ = OPEN_RESULT_SET_EVENT_HANDLE; |
| return hr; |
| } |
| |
| // Get access to the IAudioCaptureClient interface. This interface |
| // enables us to read input data from the capture endpoint buffer. |
| hr = audio_client_->GetService(IID_PPV_ARGS(&audio_capture_client_)); |
| if (FAILED(hr)) { |
| open_result_ = OPEN_RESULT_NO_CAPTURE_CLIENT; |
| return hr; |
| } |
| |
| // Obtain a reference to the ISimpleAudioVolume interface which enables |
| // us to control the master volume level of an audio session. |
| hr = audio_client_->GetService(IID_PPV_ARGS(&simple_audio_volume_)); |
| if (FAILED(hr)) |
| open_result_ = OPEN_RESULT_NO_AUDIO_VOLUME; |
| |
| audio_client_->GetService(IID_PPV_ARGS(&audio_clock_)); |
| if (!audio_clock_) |
| LOG(WARNING) << "IAudioClock unavailable, capture times may be inaccurate."; |
| |
| return hr; |
| } |
| |
| void WASAPIAudioInputStream::ReportOpenResult(HRESULT hr) const { |
| DCHECK(!opened_); // This method must be called before we set this flag. |
| UMA_HISTOGRAM_ENUMERATION("Media.Audio.Capture.Win.Open", open_result_, |
| OPEN_RESULT_MAX + 1); |
| if (open_result_ != OPEN_RESULT_OK && |
| open_result_ != OPEN_RESULT_OK_WITH_RESAMPLING) { |
| log_callback_.Run(base::StringPrintf( |
| "WASAPIAIS::Open: failed, result = %d, hresult = %#lx, " |
| "input format = %#x/%d/%ld/%d/%d/%ld/%d, " |
| "output format = %#x/%d/%ld/%d/%d/%ld/%d", |
| // clang-format off |
| open_result_, hr, |
| input_format_.wFormatTag, input_format_.nChannels, |
| input_format_.nSamplesPerSec, input_format_.wBitsPerSample, |
| input_format_.nBlockAlign, input_format_.nAvgBytesPerSec, |
| input_format_.cbSize, |
| output_format_.wFormatTag, output_format_.nChannels, |
| output_format_.nSamplesPerSec, output_format_.wBitsPerSample, |
| output_format_.nBlockAlign, output_format_.nAvgBytesPerSec, |
| output_format_.cbSize)); |
| // clang-format on |
| } |
| } |
| |
| void WASAPIAudioInputStream::MaybeReportFormatRelatedInitError( |
| HRESULT hr) const { |
| if (hr != AUDCLNT_E_UNSUPPORTED_FORMAT && hr != E_INVALIDARG) |
| return; |
| |
| const FormatRelatedInitError format_related_error = |
| hr == AUDCLNT_E_UNSUPPORTED_FORMAT |
| ? converter_.get() |
| ? FormatRelatedInitError::kUnsupportedFormatWithFormatConversion |
| : FormatRelatedInitError::kUnsupportedFormat |
| // Otherwise |hr| == E_INVALIDARG. |
| : converter_.get() |
| ? FormatRelatedInitError::kInvalidArgumentWithFormatConversion |
| : FormatRelatedInitError::kInvalidArgument; |
| base::UmaHistogramEnumeration( |
| "Media.Audio.Capture.Win.InitError.FormatRelated", format_related_error, |
| FormatRelatedInitError::kCount); |
| } |
| |
| bool WASAPIAudioInputStream::InitializeDmo() { |
| HRESULT hr = ::CoCreateInstance(CLSID_CWMAudioAEC, NULL, CLSCTX_INPROC_SERVER, |
| IID_IMediaObject, &voice_capture_dmo_); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Creating DMO failed."; |
| return false; |
| } |
| |
| if (!SetDmoProperties()) |
| return false; |
| |
| if (!SetDmoFormat()) |
| return false; |
| |
| hr = voice_capture_dmo_->AllocateStreamingResources(); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Allocating DMO resources failed."; |
| return false; |
| } |
| |
| SetupConverterAndStoreFormatInfo(); |
| |
| media_buffer_ = |
| new MediaBufferImpl(endpoint_buffer_size_frames_ * frame_size_bytes_); |
| |
| if (!CreateDummyRenderClientsForDmo()) |
| return false; |
| |
| // Get volume interface. |
| Microsoft::WRL::ComPtr<IAudioSessionManager> audio_session_manager; |
| hr = endpoint_device_->Activate(__uuidof(IAudioSessionManager), |
| CLSCTX_INPROC_SERVER, NULL, |
| &audio_session_manager); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Obtaining audio session manager failed."; |
| return false; |
| } |
| hr = audio_session_manager->GetSimpleAudioVolume( |
| NULL, // AudioSessionGuid. NULL for default session. |
| FALSE, // CrossProcessSession. |
| &simple_audio_volume_); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Obtaining audio volume interface failed."; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool WASAPIAudioInputStream::SetDmoProperties() { |
| Microsoft::WRL::ComPtr<IPropertyStore> ps; |
| HRESULT hr = voice_capture_dmo_->QueryInterface(IID_IPropertyStore, &ps); |
| if (FAILED(hr) || !ps) { |
| DLOG(ERROR) << "Getting DMO property store failed."; |
| return false; |
| } |
| |
| // Set devices. |
| if (!SetDmoDevices(ps.Get())) { |
| DLOG(ERROR) << "Setting device indices failed."; |
| return false; |
| } |
| |
| // Set DMO mode to AEC only. |
| if (FAILED(CoreAudioUtil::SetVtI4Property( |
| ps.Get(), MFPKEY_WMAAECMA_SYSTEM_MODE, SINGLE_CHANNEL_AEC))) { |
| DLOG(ERROR) << "Setting DMO system mode failed."; |
| return false; |
| } |
| |
| // Enable the feature mode. This lets us override the default processing |
| // settings below. |
| if (FAILED(CoreAudioUtil::SetBoolProperty( |
| ps.Get(), MFPKEY_WMAAECMA_FEATURE_MODE, VARIANT_TRUE))) { |
| DLOG(ERROR) << "Setting DMO feature mode failed."; |
| return false; |
| } |
| |
| // Disable analog AGC (default enabled). |
| if (FAILED(CoreAudioUtil::SetBoolProperty( |
| ps.Get(), MFPKEY_WMAAECMA_MIC_GAIN_BOUNDER, VARIANT_FALSE))) { |
| DLOG(ERROR) << "Setting DMO mic gain bounder failed."; |
| return false; |
| } |
| |
| // Disable noise suppression (default enabled). |
| if (FAILED(CoreAudioUtil::SetVtI4Property(ps.Get(), MFPKEY_WMAAECMA_FEATR_NS, |
| 0))) { |
| DLOG(ERROR) << "Disabling DMO NS failed."; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool WASAPIAudioInputStream::SetDmoFormat() { |
| DMO_MEDIA_TYPE mt; // Media type. |
| mt.majortype = MEDIATYPE_Audio; |
| mt.subtype = MEDIASUBTYPE_PCM; |
| mt.lSampleSize = 0; |
| mt.bFixedSizeSamples = TRUE; |
| mt.bTemporalCompression = FALSE; |
| mt.formattype = FORMAT_WaveFormatEx; |
| |
| HRESULT hr = MoInitMediaType(&mt, sizeof(WAVEFORMATEX)); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Init media type for DMO failed."; |
| return false; |
| } |
| |
| WAVEFORMATEX* dmo_output_format = |
| reinterpret_cast<WAVEFORMATEX*>(mt.pbFormat); |
| dmo_output_format->wFormatTag = WAVE_FORMAT_PCM; |
| dmo_output_format->nChannels = 1; |
| dmo_output_format->nSamplesPerSec = 16000; |
| dmo_output_format->nAvgBytesPerSec = 32000; |
| dmo_output_format->nBlockAlign = 2; |
| dmo_output_format->wBitsPerSample = 16; |
| dmo_output_format->cbSize = 0; |
| |
| DCHECK(IsSupportedFormatForConversion(*dmo_output_format)); |
| |
| // Store the format used. |
| input_format_.wFormatTag = dmo_output_format->wFormatTag; |
| input_format_.nChannels = dmo_output_format->nChannels; |
| input_format_.nSamplesPerSec = dmo_output_format->nSamplesPerSec; |
| input_format_.wBitsPerSample = dmo_output_format->wBitsPerSample; |
| input_format_.nBlockAlign = dmo_output_format->nBlockAlign; |
| input_format_.nAvgBytesPerSec = dmo_output_format->nAvgBytesPerSec; |
| input_format_.cbSize = dmo_output_format->cbSize; |
| |
| hr = voice_capture_dmo_->SetOutputType(0, &mt, 0); |
| MoFreeMediaType(&mt); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Setting DMO output type failed."; |
| return false; |
| } |
| |
| // We use 10 ms buffer size for the DMO. |
| endpoint_buffer_size_frames_ = input_format_.nSamplesPerSec / 100; |
| |
| return true; |
| } |
| |
| bool WASAPIAudioInputStream::SetDmoDevices(IPropertyStore* ps) { |
| // Look up the input device's index. |
| const base::Optional<WORD> input_device_index = |
| GetAudioDeviceCollectionIndexFromId(device_id_, eCapture); |
| |
| if (!input_device_index) { |
| log_callback_.Run( |
| base::StringPrintf("WASAPIAIS:SetDmoDevices: Could not " |
| "resolve input device index for %s", |
| device_id_.c_str())); |
| return false; |
| } |
| |
| // Look up the output device's index. |
| const base::Optional<WORD> output_device_index = |
| GetAudioDeviceCollectionIndexFromId(output_device_id_for_aec_, eRender); |
| if (!output_device_index) { |
| log_callback_.Run( |
| base::StringPrintf("WASAPIAIS:SetDmoDevices: Could not " |
| "resolve output device index for %s", |
| output_device_id_for_aec_.c_str())); |
| return false; |
| } |
| |
| // The DEVICE_INDEXES property packs the input and output indices into the |
| // upper and lower halves of a LONG. |
| LONG device_index_value = |
| (static_cast<ULONG>(*output_device_index) << 16) + |
| (static_cast<ULONG>(*input_device_index) & 0x0000ffff); |
| return !FAILED(CoreAudioUtil::SetVtI4Property( |
| ps, MFPKEY_WMAAECMA_DEVICE_INDEXES, device_index_value)); |
| } |
| |
| bool WASAPIAudioInputStream::CreateDummyRenderClientsForDmo() { |
| Microsoft::WRL::ComPtr<IAudioClient> audio_client(CoreAudioUtil::CreateClient( |
| output_device_id_for_aec_, eRender, eConsole)); |
| if (!audio_client.Get()) { |
| DLOG(ERROR) << "Failed to create audio client for dummy rendering for DMO."; |
| return false; |
| } |
| |
| WAVEFORMATPCMEX mix_format; |
| HRESULT hr = |
| CoreAudioUtil::GetSharedModeMixFormat(audio_client.Get(), &mix_format); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Failed to get mix format."; |
| return false; |
| } |
| |
| hr = audio_client->Initialize(AUDCLNT_SHAREMODE_SHARED, |
| 0, // Stream flags |
| 0, // Buffer duration |
| 0, // Device period |
| reinterpret_cast<WAVEFORMATEX*>(&mix_format), |
| NULL); |
| if (FAILED(hr)) { |
| DLOG(ERROR) << "Failed to initalize audio client for rendering."; |
| return false; |
| } |
| |
| Microsoft::WRL::ComPtr<IAudioRenderClient> audio_render_client = |
| CoreAudioUtil::CreateRenderClient(audio_client.Get()); |
| if (!audio_render_client.Get()) { |
| DLOG(ERROR) << "Failed to create audio render client."; |
| return false; |
| } |
| |
| audio_client_for_render_ = audio_client; |
| audio_render_client_ = audio_render_client; |
| |
| return true; |
| } |
| |
| double WASAPIAudioInputStream::ProvideInput(AudioBus* audio_bus, |
| uint32_t frames_delayed) { |
| fifo_->Consume()->CopyTo(audio_bus); |
| return 1.0; |
| } |
| |
| void WASAPIAudioInputStream::ReportDelayStatsAndUpdateGlitchCount( |
| bool discontinuity_flagged, |
| UINT64 device_position, |
| base::TimeTicks capture_time) { |
| // Report delay. Don't report if no valid capture time. |
| // Unreasonably large delays are clamped at 1 second. Some devices sometimes |
| // have capture timestamps way off. |
| if (capture_time > base::TimeTicks()) { |
| base::TimeDelta delay = base::TimeTicks::Now() - capture_time; |
| UMA_HISTOGRAM_CUSTOM_TIMES("Media.Audio.Capture.DeviceLatency", delay, |
| base::TimeDelta::FromMilliseconds(1), |
| base::TimeDelta::FromSeconds(1), 50); |
| } |
| |
| // Detect glitch. Detect and count separately based on expected device |
| // position and the discontinuity flag since they have showed to not always |
| // be consistent with each other. |
| if (expected_next_device_position_ != 0) { |
| if (device_position > expected_next_device_position_) { |
| ++total_glitches_; |
| auto lost_frames = device_position - expected_next_device_position_; |
| total_lost_frames_ += lost_frames; |
| if (lost_frames > largest_glitch_frames_) |
| largest_glitch_frames_ = lost_frames; |
| } else if (device_position < expected_next_device_position_) { |
| ++total_device_position_less_than_expected_; |
| } |
| if (discontinuity_flagged) |
| ++total_discontinuities_; |
| if (device_position > expected_next_device_position_ && |
| discontinuity_flagged) { |
| ++total_concurrent_glitch_and_discontinuities_; |
| } |
| } |
| } |
| |
| void WASAPIAudioInputStream::ReportAndResetGlitchStats() { |
| UMA_HISTOGRAM_COUNTS("Media.Audio.Capture.Glitches", total_glitches_); |
| UMA_HISTOGRAM_COUNTS("Media.Audio.Capture.Win.DevicePositionLessThanExpected", |
| total_device_position_less_than_expected_); |
| UMA_HISTOGRAM_COUNTS("Media.Audio.Capture.Win.Discontinuities", |
| total_discontinuities_); |
| UMA_HISTOGRAM_COUNTS( |
| "Media.Audio.Capture.Win.ConcurrentGlitchAndDiscontinuities", |
| total_concurrent_glitch_and_discontinuities_); |
| |
| double lost_frames_ms = |
| (total_lost_frames_ * 1000) / input_format_.nSamplesPerSec; |
| std::string log_message = base::StringPrintf( |
| "WASAPIAIS: Total glitches=%d. Total frames lost=%llu (%.0lf ms). Total " |
| "discontinuities=%d. Total concurrent glitch and discont=%d. Total low " |
| "device " |
| "positions=%d.", |
| total_glitches_, total_lost_frames_, lost_frames_ms, |
| total_discontinuities_, total_concurrent_glitch_and_discontinuities_, |
| total_device_position_less_than_expected_); |
| log_callback_.Run(log_message); |
| |
| if (total_glitches_ != 0) { |
| UMA_HISTOGRAM_LONG_TIMES("Media.Audio.Capture.LostFramesInMs", |
| base::TimeDelta::FromMilliseconds(lost_frames_ms)); |
| int64_t largest_glitch_ms = |
| (largest_glitch_frames_ * 1000) / input_format_.nSamplesPerSec; |
| UMA_HISTOGRAM_CUSTOM_TIMES( |
| "Media.Audio.Capture.LargestGlitchMs", |
| base::TimeDelta::FromMilliseconds(largest_glitch_ms), |
| base::TimeDelta::FromMilliseconds(1), base::TimeDelta::FromMinutes(1), |
| 50); |
| DLOG(WARNING) << log_message; |
| } |
| |
| expected_next_device_position_ = 0; |
| total_glitches_ = 0; |
| total_device_position_less_than_expected_ = 0; |
| total_discontinuities_ = 0; |
| total_concurrent_glitch_and_discontinuities_ = 0; |
| total_lost_frames_ = 0; |
| largest_glitch_frames_ = 0; |
| } |
| |
| } // namespace media |