extensions/common/api/media_perception_private.idl - chromium/src.git - Git at Google

 // Copyright 2017 The Chromium Authors
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.

 // Private API for receiving real-time media perception information.
 [platforms=("chromeos")]
 namespace mediaPerceptionPrivate {
   enum Status {
     // The media analytics process is waiting to be launched.
     UNINITIALIZED,

     // The analytics process is running and the media processing pipeline is
     // started, but it is not yet receiving image frames. This is a
     // transitional state between <code>SUSPENDED</code> and
     // <code>RUNNING</code> for the time it takes to warm up the media
     // processing pipeline, which can take anywhere from a few seconds to a
     // minute.
     // Note: <code>STARTED</code> is the initial reply to SetState
     // <code>RUNNING</code>.
     STARTED,

     // The analytics process is running and the media processing pipeling is
     // injesting image frames. At this point, MediaPerception signals should
     // be coming over D-Bus.
     RUNNING,

     // Analytics process is running and the media processing pipeline is ready
     // to be set to state <code>RUNNING</code>. The D-Bus communications
     // are enabled but the media processing pipeline is suspended.
     SUSPENDED,

     // Enum for restarting the media analytics process using Upstart.
     // Calling setState <code>RESTARTING</code> will restart the media process
     // to the <code>SUSPENDED</code> state. The app has to set the state to
     // <code>RUNNING</code> in order to start receiving media perception
     // information again.
     RESTARTING,

     // Stops the media analytics process via Upstart.
     STOPPED,

     // Indicates that a ServiceError has occurred.
     SERVICE_ERROR
   };

   enum ServiceError {
     // The media analytics process could not be reached. This is likely due to
     // a faulty comms configuration or that the process crashed.
     SERVICE_UNREACHABLE,

     // The media analytics process is not running. The MPP API knows that the
     // process has not been started yet.
     SERVICE_NOT_RUNNING,

     // The media analytics process is busy launching. Wait for setState
     // <code>RUNNING</code> or setState <code>RESTARTING</code> callback.
     SERVICE_BUSY_LAUNCHING,

     // The component is not installed properly.
     SERVICE_NOT_INSTALLED,

     // Failed to establish a Mojo connection to the service.
     MOJO_CONNECTION_FAILURE
   };

   enum Feature {
     AUTOZOOM,
     HOTWORD_DETECTION,
     OCCUPANCY_DETECTION,
     EDGE_EMBEDDINGS,
     SOFTWARE_CROPPING
   };

   dictionary NamedTemplateArgument {
     DOMString? name;
     (DOMString or double)? value;
   };

   enum ComponentType {
     // The smaller component with limited functionality (smaller size and
     // limited models).
     LIGHT,
     // The fully-featured component with more functionality (larger size and
     // more models).
     FULL
   };

   // The status of the media analytics process component on the device.
   enum ComponentStatus {
     UNKNOWN,
     // The component is successfully installed and the image is mounted.
     INSTALLED,
     // The component failed to download, install or load.
     FAILED_TO_INSTALL
   };

   // Error code associated with a failure to install the media analytics
   // component.
   enum ComponentInstallationError {
     // Component requested does not exist.
     UNKNOWN_COMPONENT,

     // The update engine fails to install component.
     INSTALL_FAILURE,

     // Component can not be mounted.
     MOUNT_FAILURE,

     // The component is not compatible with the device.
     COMPATIBILITY_CHECK_FAILED,

     // The component was not found - reported for load requests with kSkip
     // update policy.
     NOT_FOUND
   };

   dictionary Component {
     ComponentType type;
   };

   // The state of the media analytics downloadable component.
   dictionary ComponentState {
     ComponentStatus status;

     // The version string for the current component.
     DOMString? version;

     // If the component installation failed, the encountered installation
     // error. Not set if the component installation succeeded.
     ComponentInstallationError? installationErrorCode;
   };

   // ------------------- Start of process management definitions. ------------
   // New interface for managing the process state of the media perception
   // service with the intention of eventually phasing out the setState() call.
   enum ProcessStatus {
     // The component process state is unknown, for example, if the process is
     // waiting to be launched. This is the initial state before
     // $(ref:setComponentProcessState) is first called.
     UNKNOWN,

     // The component process has been started.
     // This value can only be passed to $(ref:setComponentProcessState) if the
     // process is currently in state <code>STOPPED</code> or
     // <code>UNKNOWN</code>.
     STARTED,

     // The component process has been stopped.
     // This value can only be passed to $(ref:setComponentProcessState) if the
     // process is currently in state <code>STARTED</code>.
     // Note: the process is automatically stopped when the Chrome process
     // is closed.
     STOPPED,

     // Indicates that a ServiceError has occurred.
     SERVICE_ERROR
   };

   dictionary ProcessState {
     ProcessStatus? status;

     // Return parameter for $(ref:setComponentProcessState) that
     // specifies the error type for failure cases.
     ServiceError? serviceError;
   };
   // ------------------- End of process management definitions. --------------

   // The parameters for processing a particular video stream.
   dictionary VideoStreamParam {
     // Identifies the video stream described by these parameters.
     DOMString? id;

     // Frame width in pixels.
     long? width;

     // Frame height in pixels.
     long? height;

     // The frame rate at which this video stream would be processed.
     long? frameRate;
   };

   dictionary Point {
     // The horizontal distance from the top left corner of the image.
     double? x;

     // The vertical distance from the top left corner of the image.
     double? y;
   };

   // The parameters for a whiteboard in the image frame. Corners are given in
   // pixel coordinates normalized to the size of the image frame (i.e. in the
   // range [(0.0, 0.0), (1.0, 1.0)]. The aspectRatio is the physical aspect
   // ratio of the whiteboard (e.g. for a 1m high and 2m wide whiteboard, the
   // aspect ratio would be 2).
   dictionary Whiteboard {
     // The top left corner of the whiteboard in the image frame.
     Point? topLeft;

     // The top right corner of the whiteboard in the image frame.
     Point? topRight;

     // The bottom left corner of the whiteboard in the image frame.
     Point? bottomLeft;

     // The bottom right corner of the whiteboard in the image frame.
     Point? bottomRight;

     // The physical aspect ratio of the whiteboard.
     double? aspectRatio;
   };

   // The system and configuration state of the analytics process.
   dictionary State {
     Status status;

     // Optional $(ref:setState) parameter. Specifies the video device the media
     // analytics process should open while the media processing pipeline is
     // starting. To set this parameter, status has to be <code>RUNNING</code>.
     DOMString? deviceContext;

     // Return parameter for $(ref:setState) or $(ref:getState) that
     // specifies the error type for failure cases.
     ServiceError? serviceError;

     // A list of video streams processed by the analytics process. To set this
     // parameter, status has to be <code>RUNNING</code>.
     VideoStreamParam[]? videoStreamParam;

     // Media analytics configuration. It can only be used when setting state to
     // RUNNING.
     DOMString? configuration;

     // Corners and aspect ratio of the whiteboard in the image frame. Should
     // only be set when setting state to <code>RUNNING</code> and configuration
     // to whiteboard.
     Whiteboard? whiteboard;

     // A list of enabled media perception features.
     Feature[]? features;

     // A list of named parameters to be substituted at start-up. Will
     // only have effect when setting state to <code>RUNNING</code>.
     NamedTemplateArgument[]? namedTemplateArguments;
   };

   dictionary BoundingBox {
     // Specifies whether the points are normalized to the size of the image.
     boolean? normalized;

     // The two points that define the corners of a bounding box.
     Point? topLeft;
     Point? bottomRight;
   };

   enum DistanceUnits {
     UNSPECIFIED,
     METERS,
     PIXELS
   };

   // Generic dictionary to encapsulate a distance magnitude and units.
   dictionary Distance {
     // This field provides flexibility to report depths or distances of
     // different entity types with different units.
     DistanceUnits? units;

     double? magnitude;
   };

   enum EntityType {
     UNSPECIFIED,
     FACE,
     PERSON,
     MOTION_REGION,
     LABELED_REGION
   };

   enum FramePerceptionType {
     UNKNOWN_TYPE,
     FACE_DETECTION,
     PERSON_DETECTION,
     MOTION_DETECTION
   };

   dictionary Entity {
     // A unique id associated with the detected entity, which can be used to
     // track the entity over time.
     long? id;

     EntityType? type;

     // Label for this entity.
     DOMString? entityLabel;

     // Minimum box which captures entire detected entity.
     BoundingBox? boundingBox;

     // A value for the quality of this detection.
     double? confidence;

     // The estimated depth of the entity from the camera.
     Distance? depth;
   };

   dictionary PacketLatency {
     // Label for this packet.
     DOMString? packetLabel;

     // Packet processing latency in microseconds.
     long? latencyUsec;
   };

   // Type of lighting conditions.
   enum LightCondition {
     UNSPECIFIED,

     // No noticeable change occurred.
     NO_CHANGE,

     // Light was switched on in the room.
     TURNED_ON,

     // Light was switched off in the room.
     TURNED_OFF,

     // Light gradually got dimmer (for example, due to a sunset).
     DIMMER,

     // Light gradually got brighter (for example, due to a sunrise).
     BRIGHTER,

     // Black frame was detected - the current frame contains only noise.
     BLACK_FRAME
   };

   // Detection of human presence close to the camera.
   dictionary VideoHumanPresenceDetection {
     // Indicates a probability in [0, 1] interval that a human is present in
     // the video frame.
     double? humanPresenceLikelihood;

     // Indicates a probability in [0, 1] that motion has been detected in the
     // video frame.
     double? motionDetectedLikelihood;

     // Indicates lighting condition in the video frame.
     LightCondition? lightCondition;

     // Indicates a probablity in [0, 1] interval that
     // <code>lightCondition</code> value is correct.
     double? lightConditionLikelihood;
   };

   // The set of computer vision metadata for an image frame.
   dictionary FramePerception {
     long? frameId;

     long? frameWidthInPx;
     long? frameHeightInPx;

     // The timestamp associated with the frame (when its recieved by the
     // analytics process).
     double? timestamp;

     // The list of entities detected in this frame.
     Entity[]? entities;

     // Processing latency for a list of packets.
     PacketLatency[]? packetLatency;

     // Human presence detection results for a video frame.
     VideoHumanPresenceDetection? videoHumanPresenceDetection;

     // Indicates what types of frame perception were run.
     FramePerceptionType[]? framePerceptionTypes;
   };

   // An estimate of the direction that the sound is coming from.
   dictionary AudioLocalization {
     // An angle in radians in the horizontal plane. It roughly points to the
     // peak in the probability distribution of azimuth defined below.
     double? azimuthRadians;

     // A probability distribution for the current snapshot in time that shows
     // the likelihood of a sound source being at a particular azimuth. For
     // example, <code>azimuthScores = [0.1, 0.2, 0.3, 0.4]</code> means that
     // the probability that the sound is coming from an azimuth of 0, pi/2, pi,
     // 3*pi/2 is 0.1, 0.2, 0.3 and 0.4, respectively.
     double[]? azimuthScores;
   };

   // Spectrogram of an audio frame.
   dictionary AudioSpectrogram {
     double[]? values;
   };

   // Detection of human presence close to the microphone.
   dictionary AudioHumanPresenceDetection {
     // Indicates a probability in [0, 1] interval that a human has caused a
     // sound close to the microphone.
     double? humanPresenceLikelihood;

     // Estimate of the noise spectrogram.
     AudioSpectrogram? noiseSpectrogram;

     // Spectrogram of an audio frame.
     AudioSpectrogram? frameSpectrogram;
   };

   enum HotwordType {
     UNKNOWN_TYPE,
     OK_GOOGLE
   };

   // A hotword detected in the audio stream.
   dictionary Hotword {
     // Unique identifier for the hotword instance. Note that a single hotword
     // instance can span more than one audio frame. In that case a single
     // hotword instance can be reported in multiple Hotword or HotwordDetection
     // results. Hotword results associated with the same hotword instance will
     // have the same <code>id</code>.
     long? id;

     // Indicates the type of this hotword.
     HotwordType? type;

     // Id of the audio frame in which the hotword was detected.
     long? frameId;

     // Indicates the start time of this hotword in the audio frame.
     long? startTimestampMs;

     // Indicates the end time of this hotword in the audio frame.
     long? endTimestampMs;

     // Indicates a probability in [0, 1] interval that this hotword is present
     // in the audio frame.
     double? confidence;
   };

   // Detection of hotword in the audio stream.
   dictionary HotwordDetection {
     Hotword[]? hotwords;
   };

   // Audio perception results for an audio frame.
   dictionary AudioPerception {
     // A timestamp in microseconds attached when this message was generated.
     double? timestampUs;

     // Audio localization results for an audio frame.
     AudioLocalization? audioLocalization;

     // Audio human presence detection results for an audio frame.
     AudioHumanPresenceDetection? audioHumanPresenceDetection;

     // Hotword detection results.
     HotwordDetection? hotwordDetection;
   };

   // Detection of human presence based on both audio and video inputs.
   dictionary AudioVisualHumanPresenceDetection {
     // Indicates a probability in [0, 1] interval that a human is present.
     double? humanPresenceLikelihood;
   };

   // Perception results based on both audio and video inputs.
   dictionary AudioVisualPerception {
     // A timestamp in microseconds attached when this message was generated.
     double? timestampUs;

     // Human presence detection results.
     AudioVisualHumanPresenceDetection? audioVisualHumanPresenceDetection;
   };

   // Stores metadata such as version of media perception features.
   dictionary Metadata {
     DOMString? visualExperienceControllerVersion;
   };

   dictionary MediaPerception {
     // The time the media perception data was emitted by the media processing
     // pipeline. This value will be greater than the timestamp stored within
     // the FramePerception dictionary and the difference between them can be
     // viewed as the processing time for a single frame.
     double? timestamp;

     // An array of framePerceptions.
     FramePerception[]? framePerceptions;

     // An array of audio perceptions.
     AudioPerception[]? audioPerceptions;

     // An array of audio-visual perceptions.
     AudioVisualPerception[]? audioVisualPerceptions;

     // Stores metadata such as version of media perception features.
     Metadata? metadata;
   };

   enum ImageFormat {
     // Image represented by RGB data channels.
     RAW,
     PNG,
     JPEG
   };

   dictionary ImageFrame {
     long? width;
     long? height;

     ImageFormat? format;

     long? dataLength;

     // The bytes of the image frame.
     ArrayBuffer? frame;
   };

   dictionary PerceptionSample {
     // The video analytics FramePerception for the associated image frame
     // data.
     FramePerception? framePerception;

     // The image frame data for the associated FramePerception object.
     ImageFrame? imageFrame;

     // The audio perception results for an audio frame.
     AudioPerception? audioPerception;

     // Perception results based on both audio and video inputs.
     AudioVisualPerception? audioVisualPerception;

     // Stores metadata such as version of media perception features.
     Metadata? metadata;
   };

   dictionary Diagnostics {
     // Return parameter for $(ref:getDiagnostics) that specifies the error
     // type for failure cases.
     ServiceError? serviceError;

     // A buffer of image frames and the associated video analytics information
     // that can be used to diagnose a malfunction.
     PerceptionSample[]? perceptionSamples;
   };

   callback StateCallback = void(State state);

   callback DiagnosticsCallback = void(Diagnostics diagnostics);

   callback ComponentStateCallback = void(ComponentState componentState);

   callback ProcessStateCallback = void(ProcessState processState);

   interface Functions {
     // Gets the status of the media perception process.
     // |callback| : The current state of the system.
     static void getState(StateCallback callback);

     // Sets the desired state of the system.
     // |state| : A dictionary with the desired new state. The only settable
     // states are <code>RUNNING</code>, <code>SUSPENDED</code>, and
     // <code>RESTARTING</code>.
     // |callback| : Invoked with the State of the system after setting it. Can
     // be used to verify the state was set as desired.
     static void setState(
         State state,
         StateCallback callback);

     // Get a diagnostics buffer out of the video analytics process.
     // |callback| : Returns a Diagnostics dictionary object.
     static void getDiagnostics(DiagnosticsCallback callback);

     // Attempts to download and load the media analytics component. This
     // function should be called every time a client starts using this API. If
     // the component is already loaded, the callback will simply return that
     // information. The process must be <code>STOPPED</code> for this function
     // to succeed.
     // Note: If a different component type is desired, this function can
     // be called with the new desired type and the new component will be
     // downloaded and installed.
     // |component| : The desired component to install and load.
     // |callback| : Returns the state of the component.
     static void setAnalyticsComponent(
         Component component,
         ComponentStateCallback callback);

     // Manages the lifetime of the component process. This function should
     // only be used if the component is installed. It will fail if the
     // component is not installed.
     // |processState| : The desired state for the component process.
     // |callback| : Reports the new state of the process, which is expected to
     // be the same as the desired state, unless something goes wrong.
     static void setComponentProcessState(
         ProcessState processState,
         ProcessStateCallback callback);
   };

   interface Events {
     // Fired when media perception information is received from the media
     // analytics process.
     // |mediaPerception| : The dictionary which contains a dump of everything
     // the analytics process has detected or determined from the incoming media
     // streams.
     static void onMediaPerception(MediaPerception mediaPerception);
   };
 };
	// Copyright 2017 The Chromium Authors
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file.

	// Private API for receiving real-time media perception information.
	[platforms=("chromeos")]
	namespace mediaPerceptionPrivate {
	enum Status {
	// The media analytics process is waiting to be launched.
	UNINITIALIZED,

	// The analytics process is running and the media processing pipeline is
	// started, but it is not yet receiving image frames. This is a
	// transitional state between <code>SUSPENDED</code> and
	// <code>RUNNING</code> for the time it takes to warm up the media
	// processing pipeline, which can take anywhere from a few seconds to a
	// minute.
	// Note: <code>STARTED</code> is the initial reply to SetState
	// <code>RUNNING</code>.
	STARTED,

	// The analytics process is running and the media processing pipeling is
	// injesting image frames. At this point, MediaPerception signals should
	// be coming over D-Bus.
	RUNNING,

	// Analytics process is running and the media processing pipeline is ready
	// to be set to state <code>RUNNING</code>. The D-Bus communications
	// are enabled but the media processing pipeline is suspended.
	SUSPENDED,

	// Enum for restarting the media analytics process using Upstart.
	// Calling setState <code>RESTARTING</code> will restart the media process
	// to the <code>SUSPENDED</code> state. The app has to set the state to
	// <code>RUNNING</code> in order to start receiving media perception
	// information again.
	RESTARTING,

	// Stops the media analytics process via Upstart.
	STOPPED,

	// Indicates that a ServiceError has occurred.
	SERVICE_ERROR
	};

	enum ServiceError {
	// The media analytics process could not be reached. This is likely due to
	// a faulty comms configuration or that the process crashed.
	SERVICE_UNREACHABLE,

	// The media analytics process is not running. The MPP API knows that the
	// process has not been started yet.
	SERVICE_NOT_RUNNING,

	// The media analytics process is busy launching. Wait for setState
	// <code>RUNNING</code> or setState <code>RESTARTING</code> callback.
	SERVICE_BUSY_LAUNCHING,

	// The component is not installed properly.
	SERVICE_NOT_INSTALLED,

	// Failed to establish a Mojo connection to the service.
	MOJO_CONNECTION_FAILURE
	};

	enum Feature {
	AUTOZOOM,
	HOTWORD_DETECTION,
	OCCUPANCY_DETECTION,
	EDGE_EMBEDDINGS,
	SOFTWARE_CROPPING
	};

	dictionary NamedTemplateArgument {
	DOMString? name;
	(DOMString or double)? value;
	};

	enum ComponentType {
	// The smaller component with limited functionality (smaller size and
	// limited models).
	LIGHT,
	// The fully-featured component with more functionality (larger size and
	// more models).
	FULL
	};

	// The status of the media analytics process component on the device.
	enum ComponentStatus {
	UNKNOWN,
	// The component is successfully installed and the image is mounted.
	INSTALLED,
	// The component failed to download, install or load.
	FAILED_TO_INSTALL
	};

	// Error code associated with a failure to install the media analytics
	// component.
	enum ComponentInstallationError {
	// Component requested does not exist.
	UNKNOWN_COMPONENT,

	// The update engine fails to install component.
	INSTALL_FAILURE,

	// Component can not be mounted.
	MOUNT_FAILURE,

	// The component is not compatible with the device.
	COMPATIBILITY_CHECK_FAILED,

	// The component was not found - reported for load requests with kSkip
	// update policy.
	NOT_FOUND
	};

	dictionary Component {
	ComponentType type;
	};

	// The state of the media analytics downloadable component.
	dictionary ComponentState {
	ComponentStatus status;

	// The version string for the current component.
	DOMString? version;

	// If the component installation failed, the encountered installation
	// error. Not set if the component installation succeeded.
	ComponentInstallationError? installationErrorCode;
	};

	// ------------------- Start of process management definitions. ------------
	// New interface for managing the process state of the media perception
	// service with the intention of eventually phasing out the setState() call.
	enum ProcessStatus {
	// The component process state is unknown, for example, if the process is
	// waiting to be launched. This is the initial state before
	// $(ref:setComponentProcessState) is first called.
	UNKNOWN,

	// The component process has been started.
	// This value can only be passed to $(ref:setComponentProcessState) if the
	// process is currently in state <code>STOPPED</code> or
	// <code>UNKNOWN</code>.
	STARTED,

	// The component process has been stopped.
	// This value can only be passed to $(ref:setComponentProcessState) if the
	// process is currently in state <code>STARTED</code>.
	// Note: the process is automatically stopped when the Chrome process
	// is closed.
	STOPPED,

	// Indicates that a ServiceError has occurred.
	SERVICE_ERROR
	};

	dictionary ProcessState {
	ProcessStatus? status;

	// Return parameter for $(ref:setComponentProcessState) that
	// specifies the error type for failure cases.
	ServiceError? serviceError;
	};
	// ------------------- End of process management definitions. --------------

	// The parameters for processing a particular video stream.
	dictionary VideoStreamParam {
	// Identifies the video stream described by these parameters.
	DOMString? id;

	// Frame width in pixels.
	long? width;

	// Frame height in pixels.
	long? height;

	// The frame rate at which this video stream would be processed.
	long? frameRate;
	};

	dictionary Point {
	// The horizontal distance from the top left corner of the image.
	double? x;

	// The vertical distance from the top left corner of the image.
	double? y;
	};

	// The parameters for a whiteboard in the image frame. Corners are given in
	// pixel coordinates normalized to the size of the image frame (i.e. in the
	// range [(0.0, 0.0), (1.0, 1.0)]. The aspectRatio is the physical aspect
	// ratio of the whiteboard (e.g. for a 1m high and 2m wide whiteboard, the
	// aspect ratio would be 2).
	dictionary Whiteboard {
	// The top left corner of the whiteboard in the image frame.
	Point? topLeft;

	// The top right corner of the whiteboard in the image frame.
	Point? topRight;

	// The bottom left corner of the whiteboard in the image frame.
	Point? bottomLeft;

	// The bottom right corner of the whiteboard in the image frame.
	Point? bottomRight;

	// The physical aspect ratio of the whiteboard.
	double? aspectRatio;
	};

	// The system and configuration state of the analytics process.
	dictionary State {
	Status status;

	// Optional $(ref:setState) parameter. Specifies the video device the media
	// analytics process should open while the media processing pipeline is
	// starting. To set this parameter, status has to be <code>RUNNING</code>.
	DOMString? deviceContext;

	// Return parameter for $(ref:setState) or $(ref:getState) that
	// specifies the error type for failure cases.
	ServiceError? serviceError;

	// A list of video streams processed by the analytics process. To set this
	// parameter, status has to be <code>RUNNING</code>.
	VideoStreamParam[]? videoStreamParam;

	// Media analytics configuration. It can only be used when setting state to
	// RUNNING.
	DOMString? configuration;

	// Corners and aspect ratio of the whiteboard in the image frame. Should
	// only be set when setting state to <code>RUNNING</code> and configuration
	// to whiteboard.
	Whiteboard? whiteboard;

	// A list of enabled media perception features.
	Feature[]? features;

	// A list of named parameters to be substituted at start-up. Will
	// only have effect when setting state to <code>RUNNING</code>.
	NamedTemplateArgument[]? namedTemplateArguments;
	};

	dictionary BoundingBox {
	// Specifies whether the points are normalized to the size of the image.
	boolean? normalized;

	// The two points that define the corners of a bounding box.
	Point? topLeft;
	Point? bottomRight;
	};

	enum DistanceUnits {
	UNSPECIFIED,
	METERS,
	PIXELS
	};

	// Generic dictionary to encapsulate a distance magnitude and units.
	dictionary Distance {
	// This field provides flexibility to report depths or distances of
	// different entity types with different units.
	DistanceUnits? units;

	double? magnitude;
	};

	enum EntityType {
	UNSPECIFIED,
	FACE,
	PERSON,
	MOTION_REGION,
	LABELED_REGION
	};

	enum FramePerceptionType {
	UNKNOWN_TYPE,
	FACE_DETECTION,
	PERSON_DETECTION,
	MOTION_DETECTION
	};

	dictionary Entity {
	// A unique id associated with the detected entity, which can be used to
	// track the entity over time.
	long? id;

	EntityType? type;

	// Label for this entity.
	DOMString? entityLabel;

	// Minimum box which captures entire detected entity.
	BoundingBox? boundingBox;

	// A value for the quality of this detection.
	double? confidence;

	// The estimated depth of the entity from the camera.
	Distance? depth;
	};

	dictionary PacketLatency {
	// Label for this packet.
	DOMString? packetLabel;

	// Packet processing latency in microseconds.
	long? latencyUsec;
	};

	// Type of lighting conditions.
	enum LightCondition {
	UNSPECIFIED,

	// No noticeable change occurred.
	NO_CHANGE,

	// Light was switched on in the room.
	TURNED_ON,

	// Light was switched off in the room.
	TURNED_OFF,

	// Light gradually got dimmer (for example, due to a sunset).
	DIMMER,

	// Light gradually got brighter (for example, due to a sunrise).
	BRIGHTER,

	// Black frame was detected - the current frame contains only noise.
	BLACK_FRAME
	};

	// Detection of human presence close to the camera.
	dictionary VideoHumanPresenceDetection {
	// Indicates a probability in [0, 1] interval that a human is present in
	// the video frame.
	double? humanPresenceLikelihood;

	// Indicates a probability in [0, 1] that motion has been detected in the
	// video frame.
	double? motionDetectedLikelihood;

	// Indicates lighting condition in the video frame.
	LightCondition? lightCondition;

	// Indicates a probablity in [0, 1] interval that
	// <code>lightCondition</code> value is correct.
	double? lightConditionLikelihood;
	};

	// The set of computer vision metadata for an image frame.
	dictionary FramePerception {
	long? frameId;

	long? frameWidthInPx;
	long? frameHeightInPx;

	// The timestamp associated with the frame (when its recieved by the
	// analytics process).
	double? timestamp;

	// The list of entities detected in this frame.
	Entity[]? entities;

	// Processing latency for a list of packets.
	PacketLatency[]? packetLatency;

	// Human presence detection results for a video frame.
	VideoHumanPresenceDetection? videoHumanPresenceDetection;

	// Indicates what types of frame perception were run.
	FramePerceptionType[]? framePerceptionTypes;
	};

	// An estimate of the direction that the sound is coming from.
	dictionary AudioLocalization {
	// An angle in radians in the horizontal plane. It roughly points to the
	// peak in the probability distribution of azimuth defined below.
	double? azimuthRadians;

	// A probability distribution for the current snapshot in time that shows
	// the likelihood of a sound source being at a particular azimuth. For
	// example, <code>azimuthScores = [0.1, 0.2, 0.3, 0.4]</code> means that
	// the probability that the sound is coming from an azimuth of 0, pi/2, pi,
	// 3*pi/2 is 0.1, 0.2, 0.3 and 0.4, respectively.
	double[]? azimuthScores;
	};

	// Spectrogram of an audio frame.
	dictionary AudioSpectrogram {
	double[]? values;
	};

	// Detection of human presence close to the microphone.
	dictionary AudioHumanPresenceDetection {
	// Indicates a probability in [0, 1] interval that a human has caused a
	// sound close to the microphone.
	double? humanPresenceLikelihood;

	// Estimate of the noise spectrogram.
	AudioSpectrogram? noiseSpectrogram;

	// Spectrogram of an audio frame.
	AudioSpectrogram? frameSpectrogram;
	};

	enum HotwordType {
	UNKNOWN_TYPE,
	OK_GOOGLE
	};

	// A hotword detected in the audio stream.
	dictionary Hotword {
	// Unique identifier for the hotword instance. Note that a single hotword
	// instance can span more than one audio frame. In that case a single
	// hotword instance can be reported in multiple Hotword or HotwordDetection
	// results. Hotword results associated with the same hotword instance will
	// have the same <code>id</code>.
	long? id;

	// Indicates the type of this hotword.
	HotwordType? type;

	// Id of the audio frame in which the hotword was detected.
	long? frameId;

	// Indicates the start time of this hotword in the audio frame.
	long? startTimestampMs;

	// Indicates the end time of this hotword in the audio frame.
	long? endTimestampMs;

	// Indicates a probability in [0, 1] interval that this hotword is present
	// in the audio frame.
	double? confidence;
	};

	// Detection of hotword in the audio stream.
	dictionary HotwordDetection {
	Hotword[]? hotwords;
	};

	// Audio perception results for an audio frame.
	dictionary AudioPerception {
	// A timestamp in microseconds attached when this message was generated.
	double? timestampUs;

	// Audio localization results for an audio frame.
	AudioLocalization? audioLocalization;

	// Audio human presence detection results for an audio frame.
	AudioHumanPresenceDetection? audioHumanPresenceDetection;

	// Hotword detection results.
	HotwordDetection? hotwordDetection;
	};

	// Detection of human presence based on both audio and video inputs.
	dictionary AudioVisualHumanPresenceDetection {
	// Indicates a probability in [0, 1] interval that a human is present.
	double? humanPresenceLikelihood;
	};

	// Perception results based on both audio and video inputs.
	dictionary AudioVisualPerception {
	// A timestamp in microseconds attached when this message was generated.
	double? timestampUs;

	// Human presence detection results.
	AudioVisualHumanPresenceDetection? audioVisualHumanPresenceDetection;
	};

	// Stores metadata such as version of media perception features.
	dictionary Metadata {
	DOMString? visualExperienceControllerVersion;
	};

	dictionary MediaPerception {
	// The time the media perception data was emitted by the media processing
	// pipeline. This value will be greater than the timestamp stored within
	// the FramePerception dictionary and the difference between them can be
	// viewed as the processing time for a single frame.
	double? timestamp;

	// An array of framePerceptions.
	FramePerception[]? framePerceptions;

	// An array of audio perceptions.
	AudioPerception[]? audioPerceptions;

	// An array of audio-visual perceptions.
	AudioVisualPerception[]? audioVisualPerceptions;

	// Stores metadata such as version of media perception features.
	Metadata? metadata;
	};

	enum ImageFormat {
	// Image represented by RGB data channels.
	RAW,
	PNG,
	JPEG
	};

	dictionary ImageFrame {
	long? width;
	long? height;

	ImageFormat? format;

	long? dataLength;

	// The bytes of the image frame.
	ArrayBuffer? frame;
	};

	dictionary PerceptionSample {
	// The video analytics FramePerception for the associated image frame
	// data.
	FramePerception? framePerception;

	// The image frame data for the associated FramePerception object.
	ImageFrame? imageFrame;

	// The audio perception results for an audio frame.
	AudioPerception? audioPerception;

	// Perception results based on both audio and video inputs.
	AudioVisualPerception? audioVisualPerception;

	// Stores metadata such as version of media perception features.
	Metadata? metadata;
	};

	dictionary Diagnostics {
	// Return parameter for $(ref:getDiagnostics) that specifies the error
	// type for failure cases.
	ServiceError? serviceError;

	// A buffer of image frames and the associated video analytics information
	// that can be used to diagnose a malfunction.
	PerceptionSample[]? perceptionSamples;
	};

	callback StateCallback = void(State state);

	callback DiagnosticsCallback = void(Diagnostics diagnostics);

	callback ComponentStateCallback = void(ComponentState componentState);

	callback ProcessStateCallback = void(ProcessState processState);

	interface Functions {
	// Gets the status of the media perception process.
	// \|callback\| : The current state of the system.
	static void getState(StateCallback callback);

	// Sets the desired state of the system.
	// \|state\| : A dictionary with the desired new state. The only settable
	// states are <code>RUNNING</code>, <code>SUSPENDED</code>, and
	// <code>RESTARTING</code>.
	// \|callback\| : Invoked with the State of the system after setting it. Can
	// be used to verify the state was set as desired.
	static void setState(
	State state,
	StateCallback callback);

	// Get a diagnostics buffer out of the video analytics process.
	// \|callback\| : Returns a Diagnostics dictionary object.
	static void getDiagnostics(DiagnosticsCallback callback);

	// Attempts to download and load the media analytics component. This
	// function should be called every time a client starts using this API. If
	// the component is already loaded, the callback will simply return that
	// information. The process must be <code>STOPPED</code> for this function
	// to succeed.
	// Note: If a different component type is desired, this function can
	// be called with the new desired type and the new component will be
	// downloaded and installed.
	// \|component\| : The desired component to install and load.
	// \|callback\| : Returns the state of the component.
	static void setAnalyticsComponent(
	Component component,
	ComponentStateCallback callback);

	// Manages the lifetime of the component process. This function should
	// only be used if the component is installed. It will fail if the
	// component is not installed.
	// \|processState\| : The desired state for the component process.
	// \|callback\| : Reports the new state of the process, which is expected to
	// be the same as the desired state, unless something goes wrong.
	static void setComponentProcessState(
	ProcessState processState,
	ProcessStateCallback callback);
	};

	interface Events {
	// Fired when media perception information is received from the media
	// analytics process.
	// \|mediaPerception\| : The dictionary which contains a dump of everything
	// the analytics process has detected or determined from the incoming media
	// streams.
	static void onMediaPerception(MediaPerception mediaPerception);
	};
	};