| // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| // Use of this source code is governed by a BSD-style license that can be |
| // found in the LICENSE file. |
| |
| #ifndef MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_ |
| #define MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_ |
| |
| #include <Audioclient.h> |
| #include <MMDeviceAPI.h> |
| |
| #include <string> |
| |
| #include "base/compiler_specific.h" |
| #include "base/gtest_prod_util.h" |
| #include "base/threading/platform_thread.h" |
| #include "base/threading/simple_thread.h" |
| #include "base/win/scoped_co_mem.h" |
| #include "base/win/scoped_comptr.h" |
| #include "base/win/scoped_handle.h" |
| #include "media/audio/audio_io.h" |
| #include "media/audio/audio_parameters.h" |
| #include "media/base/audio_fifo.h" |
| #include "media/base/channel_mixer.h" |
| #include "media/base/media_export.h" |
| #include "media/base/multi_channel_resampler.h" |
| |
| namespace media { |
| |
| class AudioManagerWin; |
| |
| // Implementation of AudioOutputStream for Windows using the Core Audio API |
| // where both capturing and rendering takes place on the same thread to enable |
| // audio I/O. This class allows arbitrary combinations of input and output |
| // devices running off different clocks and using different drivers, with |
| // potentially differing sample-rates. |
| // |
| // It is required to first acquire the native sample rate of the selected |
| // output device and then use the same rate when creating this object. |
| // The inner operation depends on the input sample rate which is determined |
| // during construction. Three different main modes are supported: |
| // |
| // 1) input rate == output rate => input side drives output side directly. |
| // 2) input rate != output rate => both sides are driven independently by |
| // events and a FIFO plus a resampling unit is used to compensate for |
| // differences in sample rates between the two sides. |
| // 3) input rate == output rate but native buffer sizes are not identical => |
| // same inner functionality as in (2) to compensate for the differences |
| // in buffer sizes and also compensate for any potential clock drift |
| // between the two devices. |
| // |
| // Mode detection is is done at construction and using mode (1) will lead to |
| // best performance (lower delay and no "varispeed distortion"), i.e., it is |
| // recommended to use same sample rates for input and output. Mode (2) uses a |
| // resampler which supports rate adjustments to fine tune for things like |
| // clock drift and differences in sample rates between different devices. |
| // Mode (2) - which uses a FIFO and a adjustable multi-channel resampler - |
| // is also called the varispeed mode and it is used for case (3) as well to |
| // compensate for the difference in buffer sizes mainly. |
| // Mode (3) can happen if two different audio devices are used. |
| // As an example: some devices needs a buffer size of 441 @ 44.1kHz and others |
| // 448 @ 44.1kHz. This is a rare case and will only happen for sample rates |
| // which are even multiples of 11025 Hz (11025, 22050, 44100, 88200 etc.). |
| // |
| // Implementation notes: |
| // |
| // - Open() can fail if the input and output parameters do not fulfill |
| // certain conditions. See source for Open() for more details. |
| // - Channel mixing will be performed if the clients asks for a larger |
| // number of channels than the native audio layer provides. |
| // Example: client wants stereo but audio layer provides mono. In this case |
| // upmixing from mono to stereo (1->2) will be done. |
| // |
| // TODO(henrika): |
| // |
| // - Add support for exclusive mode. |
| // - Add support for KSDATAFORMAT_SUBTYPE_IEEE_FLOAT, i.e., 32-bit float |
| // as internal sample-value representation. |
| // - Perform fine-tuning for non-matching sample rates to reduce latency. |
| // |
| class MEDIA_EXPORT WASAPIUnifiedStream |
| : public AudioOutputStream, |
| public base::DelegateSimpleThread::Delegate { |
| public: |
| // The ctor takes all the usual parameters, plus |manager| which is the |
| // the audio manager who is creating this object. |
| WASAPIUnifiedStream(AudioManagerWin* manager, |
| const AudioParameters& params, |
| const std::string& input_device_id); |
| |
| // The dtor is typically called by the AudioManager only and it is usually |
| // triggered by calling AudioOutputStream::Close(). |
| virtual ~WASAPIUnifiedStream(); |
| |
| // Implementation of AudioOutputStream. |
| virtual bool Open() OVERRIDE; |
| virtual void Start(AudioSourceCallback* callback) OVERRIDE; |
| virtual void Stop() OVERRIDE; |
| virtual void Close() OVERRIDE; |
| virtual void SetVolume(double volume) OVERRIDE; |
| virtual void GetVolume(double* volume) OVERRIDE; |
| |
| bool started() const { |
| return audio_io_thread_.get() != NULL; |
| } |
| |
| // Returns true if input sample rate differs from the output sample rate. |
| // A FIFO and a adjustable multi-channel resampler are utilized in this mode. |
| bool VarispeedMode() const { return (fifo_ && resampler_); } |
| |
| private: |
| enum { |
| // Time in milliseconds between two successive delay measurements. |
| // We save resources by not updating the delay estimates for each capture |
| // event (typically 100Hz rate). |
| kTimeDiffInMillisecondsBetweenDelayMeasurements = 1000, |
| |
| // Max possible FIFO size. |
| kFifoSize = 16384, |
| |
| // This value was determined empirically for minimum latency while still |
| // guarding against FIFO under-runs. The actual target size will be equal |
| // to kTargetFifoSafetyFactor * (native input buffer size). |
| // TODO(henrika): tune this value for lowest possible latency for all |
| // possible sample rate combinations. |
| kTargetFifoSafetyFactor = 2 |
| }; |
| |
| // Additional initialization required when input and output sample rate |
| // differs. Allocates resources for |fifo_|, |resampler_|, |render_event_|, |
| // and the |capture_bus_| and configures the |input_format_| structure |
| // given the provided input and output audio parameters. |
| void DoVarispeedInitialization(const AudioParameters& input_params, |
| const AudioParameters& output_params); |
| |
| // Clears varispeed related components such as the FIFO and the resampler. |
| void ResetVarispeed(); |
| |
| // Builds WAVEFORMATEX structures for input and output based on input and |
| // output audio parameters. |
| void SetIOFormats(const AudioParameters& input_params, |
| const AudioParameters& output_params); |
| |
| // DelegateSimpleThread::Delegate implementation. |
| virtual void Run() OVERRIDE; |
| |
| // MultiChannelResampler::MultiChannelAudioSourceProvider implementation. |
| // Callback for providing more data into the resampler. |
| // Only used in varispeed mode, i.e., when input rate != output rate. |
| virtual void ProvideInput(int frame_delay, AudioBus* audio_bus); |
| |
| // Issues the OnError() callback to the |source_|. |
| void HandleError(HRESULT err); |
| |
| // Stops and joins the audio thread in case of an error. |
| void StopAndJoinThread(HRESULT err); |
| |
| // Converts unique endpoint ID to user-friendly device name. |
| std::string GetDeviceName(LPCWSTR device_id) const; |
| |
| // Called on the audio IO thread for each capture event. |
| // Buffers captured audio into a FIFO if varispeed is used or into an audio |
| // bus if input and output sample rates are identical. |
| void ProcessInputAudio(); |
| |
| // Called on the audio IO thread for each render event when varispeed is |
| // active or for each capture event when varispeed is not used. |
| // In varispeed mode, it triggers a resampling callback, which reads from the |
| // FIFO, and calls AudioSourceCallback::OnMoreIOData using the resampled |
| // input signal and at the same time asks for data to play out. |
| // If input and output rates are the same - instead of reading from the FIFO |
| // and do resampling - we read directly from the audio bus used to store |
| // captured data in ProcessInputAudio. |
| void ProcessOutputAudio(IAudioClock* audio_output_clock); |
| |
| // Contains the thread ID of the creating thread. |
| base::PlatformThreadId creating_thread_id_; |
| |
| // Our creator, the audio manager needs to be notified when we close. |
| AudioManagerWin* manager_; |
| |
| // Contains the audio parameter structure provided at construction. |
| AudioParameters params_; |
| // For convenience, same as in params_. |
| int input_channels_; |
| int output_channels_; |
| |
| // Unique ID of the input device to be opened. |
| const std::string input_device_id_; |
| |
| // The sharing mode for the streams. |
| // Valid values are AUDCLNT_SHAREMODE_SHARED and AUDCLNT_SHAREMODE_EXCLUSIVE |
| // where AUDCLNT_SHAREMODE_SHARED is the default. |
| AUDCLNT_SHAREMODE share_mode_; |
| |
| // Rendering and capturing is driven by this thread (no message loop). |
| // All OnMoreIOData() callbacks will be called from this thread. |
| scoped_ptr<base::DelegateSimpleThread> audio_io_thread_; |
| |
| // Contains the desired audio output format which is set up at construction. |
| // It is required to first acquire the native sample rate of the selected |
| // output device and then use the same rate when creating this object. |
| WAVEFORMATPCMEX output_format_; |
| |
| // Contains the native audio input format which is set up at construction |
| // if varispeed mode is utilized. |
| WAVEFORMATPCMEX input_format_; |
| |
| // True when successfully opened. |
| bool opened_; |
| |
| // Volume level from 0 to 1 used for output scaling. |
| double volume_; |
| |
| // Size in audio frames of each audio packet where an audio packet |
| // is defined as the block of data which the destination is expected to |
| // receive in each OnMoreIOData() callback. |
| size_t output_buffer_size_frames_; |
| |
| // Size in audio frames of each audio packet where an audio packet |
| // is defined as the block of data which the source is expected to |
| // deliver in each OnMoreIOData() callback. |
| size_t input_buffer_size_frames_; |
| |
| // Length of the audio endpoint buffer. |
| uint32 endpoint_render_buffer_size_frames_; |
| uint32 endpoint_capture_buffer_size_frames_; |
| |
| // Counts the number of audio frames written to the endpoint buffer. |
| uint64 num_written_frames_; |
| |
| // Time stamp for last delay measurement. |
| base::TimeTicks last_delay_sample_time_; |
| |
| // Contains the total (sum of render and capture) delay in milliseconds. |
| double total_delay_ms_; |
| |
| // Contains the total (sum of render and capture and possibly FIFO) delay |
| // in bytes. The update frequency is set by a constant called |
| // |kTimeDiffInMillisecondsBetweenDelayMeasurements|. |
| int total_delay_bytes_; |
| |
| // Pointer to the client that will deliver audio samples to be played out. |
| AudioSourceCallback* source_; |
| |
| // IMMDevice interfaces which represents audio endpoint devices. |
| base::win::ScopedComPtr<IMMDevice> endpoint_render_device_; |
| base::win::ScopedComPtr<IMMDevice> endpoint_capture_device_; |
| |
| // IAudioClient interfaces which enables a client to create and initialize |
| // an audio stream between an audio application and the audio engine. |
| base::win::ScopedComPtr<IAudioClient> audio_output_client_; |
| base::win::ScopedComPtr<IAudioClient> audio_input_client_; |
| |
| // IAudioRenderClient interfaces enables a client to write output |
| // data to a rendering endpoint buffer. |
| base::win::ScopedComPtr<IAudioRenderClient> audio_render_client_; |
| |
| // IAudioCaptureClient interfaces enables a client to read input |
| // data from a capturing endpoint buffer. |
| base::win::ScopedComPtr<IAudioCaptureClient> audio_capture_client_; |
| |
| // The audio engine will signal this event each time a buffer has been |
| // recorded. |
| base::win::ScopedHandle capture_event_; |
| |
| // The audio engine will signal this event each time it needs a new |
| // audio buffer to play out. |
| // Only utilized in varispeed mode. |
| base::win::ScopedHandle render_event_; |
| |
| // This event will be signaled when streaming shall stop. |
| base::win::ScopedHandle stop_streaming_event_; |
| |
| // Container for retrieving data from AudioSourceCallback::OnMoreIOData(). |
| scoped_ptr<AudioBus> output_bus_; |
| |
| // Container for sending data to AudioSourceCallback::OnMoreIOData(). |
| scoped_ptr<AudioBus> input_bus_; |
| |
| // Container for storing output from the channel mixer. |
| scoped_ptr<AudioBus> channel_bus_; |
| |
| // All members below are only allocated, or used, in varispeed mode: |
| |
| // Temporary storage of resampled input audio data. |
| scoped_ptr<AudioBus> resampled_bus_; |
| |
| // Set to true first time a capture event has been received in varispeed |
| // mode. |
| bool input_callback_received_; |
| |
| // MultiChannelResampler is a multi channel wrapper for SincResampler; |
| // allowing high quality sample rate conversion of multiple channels at once. |
| scoped_ptr<MultiChannelResampler> resampler_; |
| |
| // Resampler I/O ratio. |
| double io_sample_rate_ratio_; |
| |
| // Used for input to output buffering. |
| scoped_ptr<AudioFifo> fifo_; |
| |
| // The channel mixer is only created and utilized if number of input channels |
| // is larger than the native number of input channels (e.g client wants |
| // stereo but the audio device only supports mono). |
| scoped_ptr<ChannelMixer> channel_mixer_; |
| |
| // The optimal number of frames we'd like to keep in the FIFO at all times. |
| int target_fifo_frames_; |
| |
| // A running average of the measured delta between actual number of frames |
| // in the FIFO versus |target_fifo_frames_|. |
| double average_delta_; |
| |
| // A varispeed rate scalar which is calculated based on FIFO drift. |
| double fifo_rate_compensation_; |
| |
| // Set to true when input side signals output side that a new delay |
| // estimate is needed. |
| bool update_output_delay_; |
| |
| // Capture side stores its delay estimate so the sum can be derived in |
| // the render side. |
| double capture_delay_ms_; |
| |
| // TODO(henrika): possibly remove these members once the performance is |
| // properly tuned. Only used for off-line debugging. |
| #ifndef NDEBUG |
| enum LogElementNames { |
| INPUT_TIME_STAMP, |
| NUM_FRAMES_IN_FIFO, |
| RESAMPLER_MARGIN, |
| RATE_COMPENSATION |
| }; |
| |
| scoped_ptr<int64[]> input_time_stamps_; |
| scoped_ptr<int[]> num_frames_in_fifo_; |
| scoped_ptr<int[]> resampler_margin_; |
| scoped_ptr<double[]> fifo_rate_comps_; |
| scoped_ptr<int[]> num_elements_; |
| scoped_ptr<int[]> input_params_; |
| scoped_ptr<int[]> output_params_; |
| |
| FILE* data_file_; |
| FILE* param_file_; |
| #endif |
| |
| DISALLOW_COPY_AND_ASSIGN(WASAPIUnifiedStream); |
| }; |
| |
| } // namespace media |
| |
| #endif // MEDIA_AUDIO_WIN_AUDIO_UNIFIED_WIN_H_ |