blob: ed4e9ce09e46e2e6c9d8d5991ae31affc0f6b2ec [file] [log] [blame]
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Private API for receiving real-time media perception information.
namespace mediaPerceptionPrivate {
enum Status {
// The media analytics process is waiting to be launched.
// The analytics process is running and the media processing pipeline is
// started, but it is not yet receiving image frames. This is a
// transitional state between <code>SUSPENDED</code> and
// <code>RUNNING</code> for the time it takes to warm up the media
// processing pipeline, which can take anywhere from a few seconds to a
// minute.
// Note: <code>STARTED</code> is the initial reply to SetState
// <code>RUNNING</code>.
// The analytics process is running and the media processing pipeling is
// injesting image frames. At this point, MediaPerception signals should
// be coming over D-Bus.
// Analytics process is running and the media processing pipeline is ready
// to be set to state <code>RUNNING</code>. The D-Bus communications are enabled but
// the media processing pipeline is suspended.
// Enum for restarting the media analytics process using Upstart.
// Calling setState <code>RESTARTING</code> will restart the media process
// to the <code>SUSPENDED</code> state. The app has to set the state to
// <code>RUNNING</code> in order to start receiving media perception
// information again.
// Stops the media analytics process via Upstart.
// Indicates that a ServiceError has occurred.
enum ServiceError {
// The media analytics process could not be reached. This is likely due to
// a faulty comms configuration or that the process crashed.
// The media analytics process is not running. The MPP API knows that the
// process has not been started yet.
// The media analytics process is busy launching. Wait for setState
// <code>RUNNING</code> or setState <code>RESTARTING</code> callback.
enum ComponentType {
// The smaller component with limited functionality (smaller size and
// limited models).
// The fully-featured component with more functionality (larger size and
// more models).
// The status of the media analytics process component on the device.
enum ComponentStatus {
// The component is successfully installed and the image is mounted.
// The component failed to download, install or load.
dictionary Component {
ComponentType type;
// The state of the media analytics downloadable component.
dictionary ComponentState {
ComponentStatus status;
// The version string for the current component.
DOMString? version;
// The parameters for processing a particular video stream.
dictionary VideoStreamParam {
// Identifies the video stream described by these parameters.
DOMString? id;
// Frame width in pixels.
long? width;
// Frame height in pixels.
long? height;
// The frame rate at which this video stream would be processed.
long? frameRate;
// The system and configuration state of the analytics process.
dictionary State {
Status status;
// Optional $(ref:setState) parameter. Specifies the video device the media
// analytics process should open while the media processing pipeline is
// starting. To set this parameter, status has to be <code>RUNNING</code>.
DOMString? deviceContext;
// Return parameter for $(ref:setState) or $(ref:getState) that
// specifies the error type for failure cases.
ServiceError? serviceError;
// A list of video streams processed by the analytics process. To set this
// parameter, status has to be <code>RUNNING</code>.
VideoStreamParam[]? videoStreamParam;
// Media analytics configuration. It can only be used when setting state to
DOMString? configuration;
dictionary Point {
// The horizontal distance from the top left corner of the image.
double? x;
// The vertical distance from the top left corner of the image.
double? y;
dictionary BoundingBox {
// Specifies whether the points are normalized to the size of the image.
boolean? normalized;
// The two points that define the corners of a bounding box.
Point? topLeft;
Point? bottomRight;
enum DistanceUnits {
// Generic dictionary to encapsulate a distance magnitude and units.
dictionary Distance {
// This field provides flexibility to report depths or distances of
// different entity types with different units.
DistanceUnits? units;
double? magnitude;
enum EntityType {
dictionary Entity {
// A unique id associated with the detected entity, which can be used to
// track the entity over time.
long? id;
EntityType? type;
// Label for this entity.
DOMString? entityLabel;
// Minimum box which captures entire detected entity.
BoundingBox? boundingBox;
// A value for the quality of this detection.
double? confidence;
// The estimated depth of the entity from the camera.
Distance? depth;
dictionary PacketLatency {
// Label for this packet.
DOMString? packetLabel;
// Packet processing latency in microseconds.
long? latencyUsec;
// Type of lighting conditions.
enum LightCondition {
// No noticeable change occurred.
// Light was switched on in the room.
// Light was switched off in the room.
// Light gradually got dimmer (for example, due to a sunset).
// Light gradually got brighter (for example, due to a sunrise).
// Black frame was detected - the current frame contains only noise.
// Detection of human presence close to the camera.
dictionary VideoHumanPresenceDetection {
// Indicates a probability in [0, 1] interval that a human is present in
// the video frame.
double? humanPresenceLikelihood;
// Indicates a probability in [0, 1] that motion has been detected in the
// video frame.
double? motionDetectedLikelihood;
// Indicates lighting condition in the video frame.
LightCondition? lightCondition;
// Indicates a probablity in [0, 1] interval that
// <code>lightCondition</code> value is correct.
double? lightConditionLikelihood;
// The set of computer vision metadata for an image frame.
dictionary FramePerception {
long? frameId;
long? frameWidthInPx;
long? frameHeightInPx;
// The timestamp associated with the frame (when its recieved by the
// analytics process).
double? timestamp;
// The list of entities detected in this frame.
Entity[]? entities;
// Processing latency for a list of packets.
PacketLatency[]? packetLatency;
// Human presence detection results for a video frame.
VideoHumanPresenceDetection? videoHumanPresenceDetection;
// An estimate of the direction that the sound is coming from.
dictionary AudioLocalization {
// An angle in radians in the horizontal plane. It roughly points to the
// peak in the probability distribution of azimuth defined below.
double? azimuthRadians;
// A probability distribution for the current snapshot in time that shows
// the likelihood of a sound source being at a particular azimuth. For
// example, <code>azimuthScores = [0.1, 0.2, 0.3, 0.4]</code> means that
// the probability that the sound is coming from an azimuth of 0, pi/2, pi,
// 3*pi/2 is 0.1, 0.2, 0.3 and 0.4, respectively.
double[]? azimuthScores;
// Spectrogram of an audio frame.
dictionary AudioSpectrogram {
double[]? values;
// Detection of human presence close to the microphone.
dictionary AudioHumanPresenceDetection {
// Indicates a probability in [0, 1] interval that a human has caused a
// sound close to the microphone.
double? humanPresenceLikelihood;
// Estimate of the noise spectrogram.
AudioSpectrogram? noiseSpectrogram;
// Spectrogram of an audio frame.
AudioSpectrogram? frameSpectrogram;
// Audio perception results for an audio frame.
dictionary AudioPerception {
// A timestamp in microseconds attached when this message was generated.
double? timestampUs;
// Audio localization results for an audio frame.
AudioLocalization? audioLocalization;
// Audio human presence detection results for an audio frame.
AudioHumanPresenceDetection? audioHumanPresenceDetection;
// Detection of human presence based on both audio and video inputs.
dictionary AudioVisualHumanPresenceDetection {
// Indicates a probability in [0, 1] interval that a human is present.
double? humanPresenceLikelihood;
// Perception results based on both audio and video inputs.
dictionary AudioVisualPerception {
// A timestamp in microseconds attached when this message was generated.
double? timestampUs;
// Human presence detection results.
AudioVisualHumanPresenceDetection? audioVisualHumanPresenceDetection;
dictionary MediaPerception {
// The time the media perception data was emitted by the media processing
// pipeline. This value will be greater than the timestamp stored within
// the FramePerception dictionary and the difference between them can be
// viewed as the processing time for a single frame.
double? timestamp;
// An array of framePerceptions.
FramePerception[]? framePerceptions;
// An array of audio perceptions.
AudioPerception[]? audioPerceptions;
// An array of audio-visual perceptions.
AudioVisualPerception[]? audioVisualPerceptions;
enum ImageFormat {
// Image represented by RGB data channels.
dictionary ImageFrame {
long? width;
long? height;
ImageFormat? format;
long? dataLength;
// The bytes of the image frame.
ArrayBuffer? frame;
dictionary PerceptionSample {
// The video analytics FramePerception for the associated image frame
// data.
FramePerception? framePerception;
// The image frame data for the associated FramePerception object.
ImageFrame? imageFrame;
// The audio perception results for an audio frame.
AudioPerception? audioPerception;
// Perception results based on both audio and video inputs.
AudioVisualPerception? audioVisualPerception;
dictionary Diagnostics {
// Return parameter for $(ref:getDiagnostics) that specifies the error
// type for failure cases.
ServiceError? serviceError;
// A buffer of image frames and the associated video analytics information
// that can be used to diagnose a malfunction.
PerceptionSample[]? perceptionSamples;
callback StateCallback = void(State state);
callback DiagnosticsCallback = void(Diagnostics diagnostics);
callback ComponentStateCallback = void(ComponentState componentState);
interface Functions {
// Gets the status of the media perception process.
// |callback| : The current state of the system.
static void getState(StateCallback callback);
// Sets the desired state of the system.
// |state| : A dictionary with the desired new state. The only settable
// states are <code>RUNNING</code>, <code>SUSPENDED</code>, and
// <code>RESTARTING</code>.
// |callback| : Invoked with the State of the system after setting it. Can
// be used to verify the state was set as desired.
static void setState(State state, StateCallback callback);
// Get a diagnostics buffer out of the video analytics process.
// |callback| : Returns a Diagnostics dictionary object.
static void getDiagnostics(DiagnosticsCallback callback);
// Attempts to download and load the media analytics component. This
// function should be called every time a client starts using this API. If
// the component is already loaded, the callback will simply return that
// information. The process must be <code>STOPPED</code> for this function
// to succeed.
// Note: If a different component type is desired, this function can
// be called with the new desired type and the new component will be
// downloaded and installed.
// |component| : The desired component to install and load.
// |callback| : Returns the state of the component.
static void setAnalyticsComponent(
Component component,
ComponentStateCallback callback);
interface Events {
// Fired when media perception information is received from the media
// analytics process.
// |mediaPerception| : The dictionary which contains a dump of everything
// the analytics process has detected or determined from the incoming media
// streams.
static void onMediaPerception(MediaPerception mediaPerception);