// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "media/audio/win/audio_low_latency_input_win.h"
#include "base/logging.h"
#include "base/memory/scoped_ptr.h"
#include "base/strings/utf_string_conversions.h"
#include "media/audio/win/audio_manager_win.h"
#include "media/audio/win/avrt_wrapper_win.h"
using base::win::ScopedComPtr;
using base::win::ScopedCOMInitializer;
namespace media {
AudioManagerWin* manager, const AudioParameters& params,
const std::string& device_id)
: manager_(manager),
sink_(NULL) {
// Load the Avrt DLL if not already loaded. Required to support MMCSS.
bool avrt_init = avrt::Initialize();
DCHECK(avrt_init) << "Failed to load the Avrt.dll";
// Set up the desired capture format specified by the client.
format_.nSamplesPerSec = params.sample_rate();
format_.wFormatTag = WAVE_FORMAT_PCM;
format_.wBitsPerSample = params.bits_per_sample();
format_.nChannels = params.channels();
format_.nBlockAlign = (format_.wBitsPerSample / 8) * format_.nChannels;
format_.nAvgBytesPerSec = format_.nSamplesPerSec * format_.nBlockAlign;
format_.cbSize = 0;
// Size in bytes of each audio frame.
frame_size_ = format_.nBlockAlign;
// Store size of audio packets which we expect to get from the audio
// endpoint device in each capture event.
packet_size_frames_ = params.GetBytesPerBuffer() / format_.nBlockAlign;
packet_size_bytes_ = params.GetBytesPerBuffer();
DVLOG(1) << "Number of bytes per audio frame : " << frame_size_;
DVLOG(1) << "Number of audio frames per packet: " << packet_size_frames_;
// All events are auto-reset events and non-signaled initially.
// Create the event which the audio engine will signal each time
// a buffer becomes ready to be processed by the client.
audio_samples_ready_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
// Create the event which will be set in Stop() when capturing shall stop.
stop_capture_event_.Set(CreateEvent(NULL, FALSE, FALSE, NULL));
ms_to_frame_count_ = static_cast<double>(params.sample_rate()) / 1000.0;
LARGE_INTEGER performance_frequency;
if (QueryPerformanceFrequency(&performance_frequency)) {
perf_count_to_100ns_units_ =
(10000000.0 / static_cast<double>(performance_frequency.QuadPart));
} else {
LOG(ERROR) << "High-resolution performance counters are not supported.";
perf_count_to_100ns_units_ = 0.0;
WASAPIAudioInputStream::~WASAPIAudioInputStream() {}
bool WASAPIAudioInputStream::Open() {
// Verify that we are not already opened.
if (opened_)
return false;
// Obtain a reference to the IMMDevice interface of the capturing
// device with the specified unique identifier or role which was
// set at construction.
HRESULT hr = SetCaptureDevice();
if (FAILED(hr))
return false;
// Obtain an IAudioClient interface which enables us to create and initialize
// an audio stream between an audio application and the audio engine.
hr = ActivateCaptureDevice();
if (FAILED(hr))
return false;
// Retrieve the stream format which the audio engine uses for its internal
// processing/mixing of shared-mode streams. This function call is for
// diagnostic purposes only and only in debug mode.
#ifndef NDEBUG
hr = GetAudioEngineStreamFormat();
// Verify that the selected audio endpoint supports the specified format
// set during construction.
if (!DesiredFormatIsSupported())
return false;
// Initialize the audio stream between the client and the device using
// shared mode and a lowest possible glitch-free latency.
hr = InitializeAudioEngine();
opened_ = SUCCEEDED(hr);
return opened_;
void WASAPIAudioInputStream::Start(AudioInputCallback* callback) {
DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
if (!opened_)
if (started_)
sink_ = callback;
// Starts periodic AGC microphone measurements if the AGC has been enabled
// using SetAutomaticGainControl().
// Create and start the thread that will drive the capturing by waiting for
// capture events.
capture_thread_ =
new base::DelegateSimpleThread(this, "wasapi_capture_thread");
// Start streaming data between the endpoint buffer and the audio engine.
HRESULT hr = audio_client_->Start();
DLOG_IF(ERROR, FAILED(hr)) << "Failed to start input streaming.";
if (SUCCEEDED(hr) && audio_render_client_for_loopback_)
hr = audio_render_client_for_loopback_->Start();
started_ = SUCCEEDED(hr);
void WASAPIAudioInputStream::Stop() {
DVLOG(1) << "WASAPIAudioInputStream::Stop()";
if (!started_)
// Stops periodic AGC microphone measurements.
// Shut down the capture thread.
if (stop_capture_event_.IsValid()) {
// Stop the input audio streaming.
HRESULT hr = audio_client_->Stop();
if (FAILED(hr)) {
LOG(ERROR) << "Failed to stop input streaming.";
// Wait until the thread completes and perform cleanup.
if (capture_thread_) {
capture_thread_ = NULL;
started_ = false;
void WASAPIAudioInputStream::Close() {
DVLOG(1) << "WASAPIAudioInputStream::Close()";
// It is valid to call Close() before calling open or Start().
// It is also valid to call Close() after Start() has been called.
if (sink_) {
sink_ = NULL;
// Inform the audio manager that we have been closed. This will cause our
// destruction.
double WASAPIAudioInputStream::GetMaxVolume() {
// Verify that Open() has been called succesfully, to ensure that an audio
// session exists and that an ISimpleAudioVolume interface has been created.
DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
if (!opened_)
return 0.0;
// The effective volume value is always in the range 0.0 to 1.0, hence
// we can return a fixed value (=1.0) here.
return 1.0;
void WASAPIAudioInputStream::SetVolume(double volume) {
DVLOG(1) << "SetVolume(volume=" << volume << ")";
DCHECK_GE(volume, 0.0);
DCHECK_LE(volume, 1.0);
DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
if (!opened_)
// Set a new master volume level. Valid volume levels are in the range
// 0.0 to 1.0. Ignore volume-change events.
HRESULT hr = simple_audio_volume_->SetMasterVolume(static_cast<float>(volume),
DLOG_IF(WARNING, FAILED(hr)) << "Failed to set new input master volume.";
// Update the AGC volume level based on the last setting above. Note that,
// the volume-level resolution is not infinite and it is therefore not
// possible to assume that the volume provided as input parameter can be
// used directly. Instead, a new query to the audio hardware is required.
// This method does nothing if AGC is disabled.
double WASAPIAudioInputStream::GetVolume() {
DLOG_IF(ERROR, !opened_) << "Open() has not been called successfully";
if (!opened_)
return 0.0;
// Retrieve the current volume level. The value is in the range 0.0 to 1.0.
float level = 0.0f;
HRESULT hr = simple_audio_volume_->GetMasterVolume(&level);
DLOG_IF(WARNING, FAILED(hr)) << "Failed to get input master volume.";
return static_cast<double>(level);
// static
int WASAPIAudioInputStream::HardwareSampleRate(
const std::string& device_id) {
base::win::ScopedCoMem<WAVEFORMATEX> audio_engine_mix_format;
HRESULT hr = GetMixFormat(device_id, &audio_engine_mix_format);
if (FAILED(hr))
return 0;
return static_cast<int>(audio_engine_mix_format->nSamplesPerSec);
// static
uint32 WASAPIAudioInputStream::HardwareChannelCount(
const std::string& device_id) {
base::win::ScopedCoMem<WAVEFORMATEX> audio_engine_mix_format;
HRESULT hr = GetMixFormat(device_id, &audio_engine_mix_format);
if (FAILED(hr))
return 0;
return static_cast<uint32>(audio_engine_mix_format->nChannels);
// static
HRESULT WASAPIAudioInputStream::GetMixFormat(const std::string& device_id,
WAVEFORMATEX** device_format) {
// It is assumed that this static method is called from a COM thread, i.e.,
// CoInitializeEx() is not called here to avoid STA/MTA conflicts.
ScopedComPtr<IMMDeviceEnumerator> enumerator;
HRESULT hr = enumerator.CreateInstance(__uuidof(MMDeviceEnumerator), NULL,
if (FAILED(hr))
return hr;
ScopedComPtr<IMMDevice> endpoint_device;
if (device_id == AudioManagerBase::kDefaultDeviceId) {
// Retrieve the default capture audio endpoint.
hr = enumerator->GetDefaultAudioEndpoint(eCapture, eConsole,
} else if (device_id == AudioManagerBase::kLoopbackInputDeviceId) {
// Capture the default playback stream.
hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole,
} else {
// Retrieve a capture endpoint device that is specified by an endpoint
// device-identification string.
hr = enumerator->GetDevice(UTF8ToUTF16(device_id).c_str(),
if (FAILED(hr))
return hr;
ScopedComPtr<IAudioClient> audio_client;
hr = endpoint_device->Activate(__uuidof(IAudioClient),
return SUCCEEDED(hr) ? audio_client->GetMixFormat(device_format) : hr;
void WASAPIAudioInputStream::Run() {
ScopedCOMInitializer com_init(ScopedCOMInitializer::kMTA);
// Increase the thread priority.
// Enable MMCSS to ensure that this thread receives prioritized access to
// CPU resources.
DWORD task_index = 0;
HANDLE mm_task = avrt::AvSetMmThreadCharacteristics(L"Pro Audio",
bool mmcss_is_ok =
(mm_task && avrt::AvSetMmThreadPriority(mm_task, AVRT_PRIORITY_CRITICAL));
if (!mmcss_is_ok) {
// Failed to enable MMCSS on this thread. It is not fatal but can lead
// to reduced QoS at high load.
DWORD err = GetLastError();
LOG(WARNING) << "Failed to enable MMCSS (error code=" << err << ").";
// Allocate a buffer with a size that enables us to take care of cases like:
// 1) The recorded buffer size is smaller, or does not match exactly with,
// the selected packet size used in each callback.
// 2) The selected buffer size is larger than the recorded buffer size in
// each event.
size_t buffer_frame_index = 0;
size_t capture_buffer_size = std::max(
2 * endpoint_buffer_size_frames_ * frame_size_,
2 * packet_size_frames_ * frame_size_);
scoped_ptr<uint8[]> capture_buffer(new uint8[capture_buffer_size]);
LARGE_INTEGER now_count;
bool recording = true;
bool error = false;
double volume = GetVolume();
HANDLE wait_array[2] = {stop_capture_event_, audio_samples_ready_event_};
while (recording && !error) {
// Wait for a close-down event or a new capture event.
DWORD wait_result = WaitForMultipleObjects(2, wait_array, FALSE, INFINITE);
switch (wait_result) {
error = true;
case WAIT_OBJECT_0 + 0:
// |stop_capture_event_| has been set.
recording = false;
case WAIT_OBJECT_0 + 1:
// |audio_samples_ready_event_| has been set.
BYTE* data_ptr = NULL;
UINT32 num_frames_to_read = 0;
DWORD flags = 0;
UINT64 device_position = 0;
UINT64 first_audio_frame_timestamp = 0;
// Retrieve the amount of data in the capture endpoint buffer,
// replace it with silence if required, create callbacks for each
// packet and store non-delivered data for the next event.
hr = audio_capture_client_->GetBuffer(&data_ptr,
if (FAILED(hr)) {
DLOG(ERROR) << "Failed to get data from the capture buffer";
if (num_frames_to_read != 0) {
size_t pos = buffer_frame_index * frame_size_;
size_t num_bytes = num_frames_to_read * frame_size_;
DCHECK_GE(capture_buffer_size, pos + num_bytes);
// Clear out the local buffer since silence is reported.
memset(&capture_buffer[pos], 0, num_bytes);
} else {
// Copy captured data from audio engine buffer to local buffer.
memcpy(&capture_buffer[pos], data_ptr, num_bytes);
buffer_frame_index += num_frames_to_read;
hr = audio_capture_client_->ReleaseBuffer(num_frames_to_read);
DLOG_IF(ERROR, FAILED(hr)) << "Failed to release capture buffer";
// Derive a delay estimate for the captured audio packet.
// The value contains two parts (A+B), where A is the delay of the
// first audio frame in the packet and B is the extra delay
// contained in any stored data. Unit is in audio frames.
double audio_delay_frames =
((perf_count_to_100ns_units_ * now_count.QuadPart -
first_audio_frame_timestamp) / 10000.0) * ms_to_frame_count_ +
buffer_frame_index - num_frames_to_read;
// Get a cached AGC volume level which is updated once every second
// on the audio manager thread. Note that, |volume| is also updated
// each time SetVolume() is called through IPC by the render-side AGC.
// Deliver captured data to the registered consumer using a packet
// size which was specified at construction.
uint32 delay_frames = static_cast<uint32>(audio_delay_frames + 0.5);
while (buffer_frame_index >= packet_size_frames_) {
uint8* audio_data =
// Deliver data packet, delay estimation and volume level to
// the user.
delay_frames * frame_size_,
// Store parts of the recorded data which can't be delivered
// using the current packet size. The stored section will be used
// either in the next while-loop iteration or in the next
// capture event.
(buffer_frame_index - packet_size_frames_) * frame_size_);
buffer_frame_index -= packet_size_frames_;
delay_frames -= packet_size_frames_;
error = true;
if (recording && error) {
// TODO(henrika): perhaps it worth improving the cleanup here by e.g.
// stopping the audio client, joining the thread etc.?
NOTREACHED() << "WASAPI capturing failed with error code "
<< GetLastError();
// Disable MMCSS.
if (mm_task && !avrt::AvRevertMmThreadCharacteristics(mm_task)) {
PLOG(WARNING) << "Failed to disable MMCSS";
void WASAPIAudioInputStream::HandleError(HRESULT err) {
NOTREACHED() << "Error code: " << err;
if (sink_)
HRESULT WASAPIAudioInputStream::SetCaptureDevice() {
ScopedComPtr<IMMDeviceEnumerator> enumerator;
HRESULT hr = enumerator.CreateInstance(__uuidof(MMDeviceEnumerator),
if (FAILED(hr))
return hr;
// Retrieve the IMMDevice by using the specified role or the specified
// unique endpoint device-identification string.
// TODO(henrika): possibly add support for the eCommunications as well.
if (device_id_ == AudioManagerBase::kDefaultDeviceId) {
// Retrieve the default capture audio endpoint for the specified role.
// Note that, in Windows Vista, the MMDevice API supports device roles
// but the system-supplied user interface programs do not.
hr = enumerator->GetDefaultAudioEndpoint(eCapture, eConsole,
} else if (device_id_ == AudioManagerBase::kLoopbackInputDeviceId) {
// Capture the default playback stream.
hr = enumerator->GetDefaultAudioEndpoint(eRender, eConsole,
} else {
// Retrieve a capture endpoint device that is specified by an endpoint
// device-identification string.
hr = enumerator->GetDevice(UTF8ToUTF16(device_id_).c_str(),
if (FAILED(hr))
return hr;
// Verify that the audio endpoint device is active, i.e., the audio
// adapter that connects to the endpoint device is present and enabled.
hr = endpoint_device_->GetState(&state);
if (FAILED(hr))
return hr;
if (!(state & DEVICE_STATE_ACTIVE)) {
DLOG(ERROR) << "Selected capture device is not active.";
return hr;
HRESULT WASAPIAudioInputStream::ActivateCaptureDevice() {
// Creates and activates an IAudioClient COM object given the selected
// capture endpoint device.
HRESULT hr = endpoint_device_->Activate(__uuidof(IAudioClient),
return hr;
HRESULT WASAPIAudioInputStream::GetAudioEngineStreamFormat() {
#ifndef NDEBUG
// The GetMixFormat() method retrieves the stream format that the
// audio engine uses for its internal processing of shared-mode streams.
// The method always uses a WAVEFORMATEXTENSIBLE structure, instead
// of a stand-alone WAVEFORMATEX structure, to specify the format.
// An WAVEFORMATEXTENSIBLE structure can specify both the mapping of
// channels to speakers and the number of bits of precision in each sample.
base::win::ScopedCoMem<WAVEFORMATEXTENSIBLE> format_ex;
hr = audio_client_->GetMixFormat(
// See
// for details on the WAVE file format.
WAVEFORMATEX format = format_ex->Format;
DVLOG(2) << " wFormatTags : 0x" << std::hex << format.wFormatTag;
DVLOG(2) << " nChannels : " << format.nChannels;
DVLOG(2) << " nSamplesPerSec : " << format.nSamplesPerSec;
DVLOG(2) << " nAvgBytesPerSec: " << format.nAvgBytesPerSec;
DVLOG(2) << " nBlockAlign : " << format.nBlockAlign;
DVLOG(2) << " wBitsPerSample : " << format.wBitsPerSample;
DVLOG(2) << " cbSize : " << format.cbSize;
DVLOG(2) << " wValidBitsPerSample: " <<
DVLOG(2) << " dwChannelMask : 0x" << std::hex <<
if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_PCM)
else if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT)
else if (format_ex->SubFormat == KSDATAFORMAT_SUBTYPE_WAVEFORMATEX)
return hr;
bool WASAPIAudioInputStream::DesiredFormatIsSupported() {
// An application that uses WASAPI to manage shared-mode streams can rely
// on the audio engine to perform only limited format conversions. The audio
// engine can convert between a standard PCM sample size used by the
// application and the floating-point samples that the engine uses for its
// internal processing. However, the format for an application stream
// typically must have the same number of channels and the same sample
// rate as the stream format used by the device.
// Many audio devices support both PCM and non-PCM stream formats. However,
// the audio engine can mix only PCM streams.
base::win::ScopedCoMem<WAVEFORMATEX> closest_match;
HRESULT hr = audio_client_->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED,
DLOG_IF(ERROR, hr == S_FALSE) << "Format is not supported "
<< "but a closest match exists.";
return (hr == S_OK);
HRESULT WASAPIAudioInputStream::InitializeAudioEngine() {
DWORD flags;
// Use event-driven mode only fo regular input devices. For loopback the
// EVENTCALLBACK flag is specified when intializing
// |audio_render_client_for_loopback_|.
if (device_id_ == AudioManagerBase::kLoopbackInputDeviceId) {
} else {
flags =
// Initialize the audio stream between the client and the device.
// We connect indirectly through the audio engine by using shared mode.
// Note that, |hnsBufferDuration| is set of 0, which ensures that the
// buffer is never smaller than the minimum buffer size needed to ensure
// that glitches do not occur between the periodic processing passes.
// This setting should lead to lowest possible latency.
HRESULT hr = audio_client_->Initialize(AUDCLNT_SHAREMODE_SHARED,
0, // hnsBufferDuration
if (FAILED(hr))
return hr;
// Retrieve the length of the endpoint buffer shared between the client
// and the audio engine. The buffer length determines the maximum amount
// of capture data that the audio engine can read from the endpoint buffer
// during a single processing pass.
// A typical value is 960 audio frames <=> 20ms @ 48kHz sample rate.
hr = audio_client_->GetBufferSize(&endpoint_buffer_size_frames_);
if (FAILED(hr))
return hr;
DVLOG(1) << "endpoint buffer size: " << endpoint_buffer_size_frames_
<< " [frames]";
#ifndef NDEBUG
// The period between processing passes by the audio engine is fixed for a
// particular audio endpoint device and represents the smallest processing
// quantum for the audio engine. This period plus the stream latency between
// the buffer and endpoint device represents the minimum possible latency
// that an audio application can achieve.
// TODO(henrika): possibly remove this section when all parts are ready.
REFERENCE_TIME device_period_shared_mode = 0;
REFERENCE_TIME device_period_exclusive_mode = 0;
HRESULT hr_dbg = audio_client_->GetDevicePeriod(
&device_period_shared_mode, &device_period_exclusive_mode);
if (SUCCEEDED(hr_dbg)) {
DVLOG(1) << "device period: "
<< static_cast<double>(device_period_shared_mode / 10000.0)
<< " [ms]";
REFERENCE_TIME latency = 0;
hr_dbg = audio_client_->GetStreamLatency(&latency);
if (SUCCEEDED(hr_dbg)) {
DVLOG(1) << "stream latency: " << static_cast<double>(latency / 10000.0)
<< " [ms]";
// Set the event handle that the audio engine will signal each time a buffer
// becomes ready to be processed by the client.
// In loopback case the capture device doesn't receive any events, so we
// need to create a separate playback client to get notifications. According
// to MSDN:
// A pull-mode capture client does not receive any events when a stream is
// initialized with event-driven buffering and is loopback-enabled. To
// work around this, initialize a render stream in event-driven mode. Each
// time the client receives an event for the render stream, it must signal
// the capture client to run the capture thread that reads the next set of
// samples from the capture endpoint buffer.
if (device_id_ == AudioManagerBase::kLoopbackInputDeviceId) {
hr = endpoint_device_->Activate(
__uuidof(IAudioClient), CLSCTX_INPROC_SERVER, NULL,
if (FAILED(hr))
return hr;
hr = audio_render_client_for_loopback_->Initialize(
0, 0, &format_, NULL);
if (FAILED(hr))
return hr;
hr = audio_render_client_for_loopback_->SetEventHandle(
} else {
hr = audio_client_->SetEventHandle(audio_samples_ready_event_.Get());
if (FAILED(hr))
return hr;
// Get access to the IAudioCaptureClient interface. This interface
// enables us to read input data from the capture endpoint buffer.
hr = audio_client_->GetService(__uuidof(IAudioCaptureClient),
if (FAILED(hr))
return hr;
// Obtain a reference to the ISimpleAudioVolume interface which enables
// us to control the master volume level of an audio session.
hr = audio_client_->GetService(__uuidof(ISimpleAudioVolume),
return hr;
} // namespace media