|  | // Copyright 2017 The Chromium Authors. All rights reserved. | 
|  | // Use of this source code is governed by a BSD-style license that can be | 
|  | // found in the LICENSE file. | 
|  |  | 
|  | // Private API for receiving real-time media perception information. | 
|  | [platforms=("chromeos")] | 
|  | namespace mediaPerceptionPrivate { | 
|  | enum Status { | 
|  | // The media analytics process is waiting to be launched. | 
|  | UNINITIALIZED, | 
|  |  | 
|  | // The analytics process is running and the media processing pipeline is | 
|  | // started, but it is not yet receiving image frames. This is a | 
|  | // transitional state between <code>SUSPENDED</code> and | 
|  | // <code>RUNNING</code> for the time it takes to warm up the media | 
|  | // processing pipeline, which can take anywhere from a few seconds to a | 
|  | // minute. | 
|  | // Note: <code>STARTED</code> is the initial reply to SetState | 
|  | // <code>RUNNING</code>. | 
|  | STARTED, | 
|  |  | 
|  | // The analytics process is running and the media processing pipeling is | 
|  | // injesting image frames. At this point, MediaPerception signals should | 
|  | // be coming over D-Bus. | 
|  | RUNNING, | 
|  |  | 
|  | // Analytics process is running and the media processing pipeline is ready | 
|  | // to be set to state <code>RUNNING</code>. The D-Bus communications | 
|  | // are enabled but the media processing pipeline is suspended. | 
|  | SUSPENDED, | 
|  |  | 
|  | // Enum for restarting the media analytics process using Upstart. | 
|  | // Calling setState <code>RESTARTING</code> will restart the media process | 
|  | // to the <code>SUSPENDED</code> state. The app has to set the state to | 
|  | // <code>RUNNING</code> in order to start receiving media perception | 
|  | // information again. | 
|  | RESTARTING, | 
|  |  | 
|  | // Stops the media analytics process via Upstart. | 
|  | STOPPED, | 
|  |  | 
|  | // Indicates that a ServiceError has occurred. | 
|  | SERVICE_ERROR | 
|  | }; | 
|  |  | 
|  | enum ServiceError { | 
|  | // The media analytics process could not be reached. This is likely due to | 
|  | // a faulty comms configuration or that the process crashed. | 
|  | SERVICE_UNREACHABLE, | 
|  |  | 
|  | // The media analytics process is not running. The MPP API knows that the | 
|  | // process has not been started yet. | 
|  | SERVICE_NOT_RUNNING, | 
|  |  | 
|  | // The media analytics process is busy launching. Wait for setState | 
|  | // <code>RUNNING</code> or setState <code>RESTARTING</code> callback. | 
|  | SERVICE_BUSY_LAUNCHING, | 
|  |  | 
|  | // The component is not installed properly. | 
|  | SERVICE_NOT_INSTALLED, | 
|  |  | 
|  | // Failed to establish a Mojo connection to the service. | 
|  | MOJO_CONNECTION_FAILURE | 
|  | }; | 
|  |  | 
|  | enum Feature { | 
|  | AUTOZOOM, | 
|  | HOTWORD_DETECTION, | 
|  | OCCUPANCY_DETECTION, | 
|  | EDGE_EMBEDDINGS, | 
|  | SOFTWARE_CROPPING | 
|  | }; | 
|  |  | 
|  | dictionary NamedTemplateArgument { | 
|  | DOMString? name; | 
|  | (DOMString or double)? value; | 
|  | }; | 
|  |  | 
|  | enum ComponentType { | 
|  | // The smaller component with limited functionality (smaller size and | 
|  | // limited models). | 
|  | LIGHT, | 
|  | // The fully-featured component with more functionality (larger size and | 
|  | // more models). | 
|  | FULL | 
|  | }; | 
|  |  | 
|  | // The status of the media analytics process component on the device. | 
|  | enum ComponentStatus { | 
|  | UNKNOWN, | 
|  | // The component is successfully installed and the image is mounted. | 
|  | INSTALLED, | 
|  | // The component failed to download, install or load. | 
|  | FAILED_TO_INSTALL | 
|  | }; | 
|  |  | 
|  | // Error code associated with a failure to install the media analytics | 
|  | // component. | 
|  | enum ComponentInstallationError { | 
|  | // Component requested does not exist. | 
|  | UNKNOWN_COMPONENT, | 
|  |  | 
|  | // The update engine fails to install component. | 
|  | INSTALL_FAILURE, | 
|  |  | 
|  | // Component can not be mounted. | 
|  | MOUNT_FAILURE, | 
|  |  | 
|  | // The component is not compatible with the device. | 
|  | COMPATIBILITY_CHECK_FAILED, | 
|  |  | 
|  | // The component was not found - reported for load requests with kSkip | 
|  | // update policy. | 
|  | NOT_FOUND | 
|  | }; | 
|  |  | 
|  | dictionary Component { | 
|  | ComponentType type; | 
|  | }; | 
|  |  | 
|  | // The state of the media analytics downloadable component. | 
|  | dictionary ComponentState { | 
|  | ComponentStatus status; | 
|  |  | 
|  | // The version string for the current component. | 
|  | DOMString? version; | 
|  |  | 
|  | // If the component installation failed, the encountered installation | 
|  | // error. Not set if the component installation succeeded. | 
|  | ComponentInstallationError? installationErrorCode; | 
|  | }; | 
|  |  | 
|  | // ------------------- Start of process management definitions. ------------ | 
|  | // New interface for managing the process state of the media perception | 
|  | // service with the intention of eventually phasing out the setState() call. | 
|  | enum ProcessStatus { | 
|  | // The component process state is unknown, for example, if the process is | 
|  | // waiting to be launched. This is the initial state before | 
|  | // $(ref:setComponentProcessState) is first called. | 
|  | UNKNOWN, | 
|  |  | 
|  | // The component process has been started. | 
|  | // This value can only be passed to $(ref:setComponentProcessState) if the | 
|  | // process is currently in state <code>STOPPED</code> or | 
|  | // <code>UNKNOWN</code>. | 
|  | STARTED, | 
|  |  | 
|  | // The component process has been stopped. | 
|  | // This value can only be passed to $(ref:setComponentProcessState) if the | 
|  | // process is currently in state <code>STARTED</code>. | 
|  | // Note: the process is automatically stopped when the Chrome process | 
|  | // is closed. | 
|  | STOPPED, | 
|  |  | 
|  | // Indicates that a ServiceError has occurred. | 
|  | SERVICE_ERROR | 
|  | }; | 
|  |  | 
|  | dictionary ProcessState { | 
|  | ProcessStatus? status; | 
|  |  | 
|  | // Return parameter for $(ref:setComponentProcessState) that | 
|  | // specifies the error type for failure cases. | 
|  | ServiceError? serviceError; | 
|  | }; | 
|  | // ------------------- End of process management definitions. -------------- | 
|  |  | 
|  | // The parameters for processing a particular video stream. | 
|  | dictionary VideoStreamParam { | 
|  | // Identifies the video stream described by these parameters. | 
|  | DOMString? id; | 
|  |  | 
|  | // Frame width in pixels. | 
|  | long? width; | 
|  |  | 
|  | // Frame height in pixels. | 
|  | long? height; | 
|  |  | 
|  | // The frame rate at which this video stream would be processed. | 
|  | long? frameRate; | 
|  | }; | 
|  |  | 
|  | dictionary Point { | 
|  | // The horizontal distance from the top left corner of the image. | 
|  | double? x; | 
|  |  | 
|  | // The vertical distance from the top left corner of the image. | 
|  | double? y; | 
|  | }; | 
|  |  | 
|  | // The parameters for a whiteboard in the image frame. Corners are given in | 
|  | // pixel coordinates normalized to the size of the image frame (i.e. in the | 
|  | // range [(0.0, 0.0), (1.0, 1.0)]. The aspectRatio is the physical aspect | 
|  | // ratio of the whiteboard (e.g. for a 1m high and 2m wide whiteboard, the | 
|  | // aspect ratio would be 2). | 
|  | dictionary Whiteboard { | 
|  | // The top left corner of the whiteboard in the image frame. | 
|  | Point? topLeft; | 
|  |  | 
|  | // The top right corner of the whiteboard in the image frame. | 
|  | Point? topRight; | 
|  |  | 
|  | // The bottom left corner of the whiteboard in the image frame. | 
|  | Point? bottomLeft; | 
|  |  | 
|  | // The bottom right corner of the whiteboard in the image frame. | 
|  | Point? bottomRight; | 
|  |  | 
|  | // The physical aspect ratio of the whiteboard. | 
|  | double? aspectRatio; | 
|  | }; | 
|  |  | 
|  | // The system and configuration state of the analytics process. | 
|  | dictionary State { | 
|  | Status status; | 
|  |  | 
|  | // Optional $(ref:setState) parameter. Specifies the video device the media | 
|  | // analytics process should open while the media processing pipeline is | 
|  | // starting. To set this parameter, status has to be <code>RUNNING</code>. | 
|  | DOMString? deviceContext; | 
|  |  | 
|  | // Return parameter for $(ref:setState) or $(ref:getState) that | 
|  | // specifies the error type for failure cases. | 
|  | ServiceError? serviceError; | 
|  |  | 
|  | // A list of video streams processed by the analytics process. To set this | 
|  | // parameter, status has to be <code>RUNNING</code>. | 
|  | VideoStreamParam[]? videoStreamParam; | 
|  |  | 
|  | // Media analytics configuration. It can only be used when setting state to | 
|  | // RUNNING. | 
|  | DOMString? configuration; | 
|  |  | 
|  | // Corners and aspect ratio of the whiteboard in the image frame. Should | 
|  | // only be set when setting state to <code>RUNNING</code> and configuration | 
|  | // to whiteboard. | 
|  | Whiteboard? whiteboard; | 
|  |  | 
|  | // A list of enabled media perception features. | 
|  | Feature[]? features; | 
|  |  | 
|  | // A list of named parameters to be substituted at start-up. Will | 
|  | // only have effect when setting state to <code>RUNNING</code>. | 
|  | NamedTemplateArgument[]? namedTemplateArguments; | 
|  | }; | 
|  |  | 
|  | dictionary BoundingBox { | 
|  | // Specifies whether the points are normalized to the size of the image. | 
|  | boolean? normalized; | 
|  |  | 
|  | // The two points that define the corners of a bounding box. | 
|  | Point? topLeft; | 
|  | Point? bottomRight; | 
|  | }; | 
|  |  | 
|  | enum DistanceUnits { | 
|  | UNSPECIFIED, | 
|  | METERS, | 
|  | PIXELS | 
|  | }; | 
|  |  | 
|  | // Generic dictionary to encapsulate a distance magnitude and units. | 
|  | dictionary Distance { | 
|  | // This field provides flexibility to report depths or distances of | 
|  | // different entity types with different units. | 
|  | DistanceUnits? units; | 
|  |  | 
|  | double? magnitude; | 
|  | }; | 
|  |  | 
|  | enum EntityType { | 
|  | UNSPECIFIED, | 
|  | FACE, | 
|  | PERSON, | 
|  | MOTION_REGION, | 
|  | LABELED_REGION | 
|  | }; | 
|  |  | 
|  | enum FramePerceptionType { | 
|  | UNKNOWN_TYPE, | 
|  | FACE_DETECTION, | 
|  | PERSON_DETECTION, | 
|  | MOTION_DETECTION | 
|  | }; | 
|  |  | 
|  | dictionary Entity { | 
|  | // A unique id associated with the detected entity, which can be used to | 
|  | // track the entity over time. | 
|  | long? id; | 
|  |  | 
|  | EntityType? type; | 
|  |  | 
|  | // Label for this entity. | 
|  | DOMString? entityLabel; | 
|  |  | 
|  | // Minimum box which captures entire detected entity. | 
|  | BoundingBox? boundingBox; | 
|  |  | 
|  | // A value for the quality of this detection. | 
|  | double? confidence; | 
|  |  | 
|  | // The estimated depth of the entity from the camera. | 
|  | Distance? depth; | 
|  | }; | 
|  |  | 
|  | dictionary PacketLatency { | 
|  | // Label for this packet. | 
|  | DOMString? packetLabel; | 
|  |  | 
|  | // Packet processing latency in microseconds. | 
|  | long? latencyUsec; | 
|  | }; | 
|  |  | 
|  | // Type of lighting conditions. | 
|  | enum LightCondition { | 
|  | UNSPECIFIED, | 
|  |  | 
|  | // No noticeable change occurred. | 
|  | NO_CHANGE, | 
|  |  | 
|  | // Light was switched on in the room. | 
|  | TURNED_ON, | 
|  |  | 
|  | // Light was switched off in the room. | 
|  | TURNED_OFF, | 
|  |  | 
|  | // Light gradually got dimmer (for example, due to a sunset). | 
|  | DIMMER, | 
|  |  | 
|  | // Light gradually got brighter (for example, due to a sunrise). | 
|  | BRIGHTER, | 
|  |  | 
|  | // Black frame was detected - the current frame contains only noise. | 
|  | BLACK_FRAME | 
|  | }; | 
|  |  | 
|  | // Detection of human presence close to the camera. | 
|  | dictionary VideoHumanPresenceDetection { | 
|  | // Indicates a probability in [0, 1] interval that a human is present in | 
|  | // the video frame. | 
|  | double? humanPresenceLikelihood; | 
|  |  | 
|  | // Indicates a probability in [0, 1] that motion has been detected in the | 
|  | // video frame. | 
|  | double? motionDetectedLikelihood; | 
|  |  | 
|  | // Indicates lighting condition in the video frame. | 
|  | LightCondition? lightCondition; | 
|  |  | 
|  | // Indicates a probablity in [0, 1] interval that | 
|  | // <code>lightCondition</code> value is correct. | 
|  | double? lightConditionLikelihood; | 
|  | }; | 
|  |  | 
|  | // The set of computer vision metadata for an image frame. | 
|  | dictionary FramePerception { | 
|  | long? frameId; | 
|  |  | 
|  | long? frameWidthInPx; | 
|  | long? frameHeightInPx; | 
|  |  | 
|  | // The timestamp associated with the frame (when its recieved by the | 
|  | // analytics process). | 
|  | double? timestamp; | 
|  |  | 
|  | // The list of entities detected in this frame. | 
|  | Entity[]? entities; | 
|  |  | 
|  | // Processing latency for a list of packets. | 
|  | PacketLatency[]? packetLatency; | 
|  |  | 
|  | // Human presence detection results for a video frame. | 
|  | VideoHumanPresenceDetection? videoHumanPresenceDetection; | 
|  |  | 
|  | // Indicates what types of frame perception were run. | 
|  | FramePerceptionType[]? framePerceptionTypes; | 
|  | }; | 
|  |  | 
|  | // An estimate of the direction that the sound is coming from. | 
|  | dictionary AudioLocalization { | 
|  | // An angle in radians in the horizontal plane. It roughly points to the | 
|  | // peak in the probability distribution of azimuth defined below. | 
|  | double? azimuthRadians; | 
|  |  | 
|  | // A probability distribution for the current snapshot in time that shows | 
|  | // the likelihood of a sound source being at a particular azimuth. For | 
|  | // example, <code>azimuthScores = [0.1, 0.2, 0.3, 0.4]</code> means that | 
|  | // the probability that the sound is coming from an azimuth of 0, pi/2, pi, | 
|  | // 3*pi/2 is 0.1, 0.2, 0.3 and 0.4, respectively. | 
|  | double[]? azimuthScores; | 
|  | }; | 
|  |  | 
|  | // Spectrogram of an audio frame. | 
|  | dictionary AudioSpectrogram { | 
|  | double[]? values; | 
|  | }; | 
|  |  | 
|  | // Detection of human presence close to the microphone. | 
|  | dictionary AudioHumanPresenceDetection { | 
|  | // Indicates a probability in [0, 1] interval that a human has caused a | 
|  | // sound close to the microphone. | 
|  | double? humanPresenceLikelihood; | 
|  |  | 
|  | // Estimate of the noise spectrogram. | 
|  | AudioSpectrogram? noiseSpectrogram; | 
|  |  | 
|  | // Spectrogram of an audio frame. | 
|  | AudioSpectrogram? frameSpectrogram; | 
|  | }; | 
|  |  | 
|  | enum HotwordType { | 
|  | UNKNOWN_TYPE, | 
|  | OK_GOOGLE | 
|  | }; | 
|  |  | 
|  | // A hotword detected in the audio stream. | 
|  | dictionary Hotword { | 
|  | // Unique identifier for the hotword instance. Note that a single hotword | 
|  | // instance can span more than one audio frame. In that case a single | 
|  | // hotword instance can be reported in multiple Hotword or HotwordDetection | 
|  | // results. Hotword results associated with the same hotword instance will | 
|  | // have the same <code>id</code>. | 
|  | long? id; | 
|  |  | 
|  | // Indicates the type of this hotword. | 
|  | HotwordType? type; | 
|  |  | 
|  | // Id of the audio frame in which the hotword was detected. | 
|  | long? frameId; | 
|  |  | 
|  | // Indicates the start time of this hotword in the audio frame. | 
|  | long? startTimestampMs; | 
|  |  | 
|  | // Indicates the end time of this hotword in the audio frame. | 
|  | long? endTimestampMs; | 
|  |  | 
|  | // Indicates a probability in [0, 1] interval that this hotword is present | 
|  | // in the audio frame. | 
|  | double? confidence; | 
|  | }; | 
|  |  | 
|  | // Detection of hotword in the audio stream. | 
|  | dictionary HotwordDetection { | 
|  | Hotword[]? hotwords; | 
|  | }; | 
|  |  | 
|  | // Audio perception results for an audio frame. | 
|  | dictionary AudioPerception { | 
|  | // A timestamp in microseconds attached when this message was generated. | 
|  | double? timestampUs; | 
|  |  | 
|  | // Audio localization results for an audio frame. | 
|  | AudioLocalization? audioLocalization; | 
|  |  | 
|  | // Audio human presence detection results for an audio frame. | 
|  | AudioHumanPresenceDetection? audioHumanPresenceDetection; | 
|  |  | 
|  | // Hotword detection results. | 
|  | HotwordDetection? hotwordDetection; | 
|  | }; | 
|  |  | 
|  | // Detection of human presence based on both audio and video inputs. | 
|  | dictionary AudioVisualHumanPresenceDetection { | 
|  | // Indicates a probability in [0, 1] interval that a human is present. | 
|  | double? humanPresenceLikelihood; | 
|  | }; | 
|  |  | 
|  | // Perception results based on both audio and video inputs. | 
|  | dictionary AudioVisualPerception { | 
|  | // A timestamp in microseconds attached when this message was generated. | 
|  | double? timestampUs; | 
|  |  | 
|  | // Human presence detection results. | 
|  | AudioVisualHumanPresenceDetection? audioVisualHumanPresenceDetection; | 
|  | }; | 
|  |  | 
|  | // Stores metadata such as version of media perception features. | 
|  | dictionary Metadata { | 
|  | DOMString? visualExperienceControllerVersion; | 
|  | }; | 
|  |  | 
|  | dictionary MediaPerception { | 
|  | // The time the media perception data was emitted by the media processing | 
|  | // pipeline. This value will be greater than the timestamp stored within | 
|  | // the FramePerception dictionary and the difference between them can be | 
|  | // viewed as the processing time for a single frame. | 
|  | double? timestamp; | 
|  |  | 
|  | // An array of framePerceptions. | 
|  | FramePerception[]? framePerceptions; | 
|  |  | 
|  | // An array of audio perceptions. | 
|  | AudioPerception[]? audioPerceptions; | 
|  |  | 
|  | // An array of audio-visual perceptions. | 
|  | AudioVisualPerception[]? audioVisualPerceptions; | 
|  |  | 
|  | // Stores metadata such as version of media perception features. | 
|  | Metadata? metadata; | 
|  | }; | 
|  |  | 
|  | enum ImageFormat { | 
|  | // Image represented by RGB data channels. | 
|  | RAW, | 
|  | PNG, | 
|  | JPEG | 
|  | }; | 
|  |  | 
|  | dictionary ImageFrame { | 
|  | long? width; | 
|  | long? height; | 
|  |  | 
|  | ImageFormat? format; | 
|  |  | 
|  | long? dataLength; | 
|  |  | 
|  | // The bytes of the image frame. | 
|  | ArrayBuffer? frame; | 
|  | }; | 
|  |  | 
|  | dictionary PerceptionSample { | 
|  | // The video analytics FramePerception for the associated image frame | 
|  | // data. | 
|  | FramePerception? framePerception; | 
|  |  | 
|  | // The image frame data for the associated FramePerception object. | 
|  | ImageFrame? imageFrame; | 
|  |  | 
|  | // The audio perception results for an audio frame. | 
|  | AudioPerception? audioPerception; | 
|  |  | 
|  | // Perception results based on both audio and video inputs. | 
|  | AudioVisualPerception? audioVisualPerception; | 
|  |  | 
|  | // Stores metadata such as version of media perception features. | 
|  | Metadata? metadata; | 
|  | }; | 
|  |  | 
|  | dictionary Diagnostics { | 
|  | // Return parameter for $(ref:getDiagnostics) that specifies the error | 
|  | // type for failure cases. | 
|  | ServiceError? serviceError; | 
|  |  | 
|  | // A buffer of image frames and the associated video analytics information | 
|  | // that can be used to diagnose a malfunction. | 
|  | PerceptionSample[]? perceptionSamples; | 
|  | }; | 
|  |  | 
|  | callback StateCallback = void(State state); | 
|  |  | 
|  | callback DiagnosticsCallback = void(Diagnostics diagnostics); | 
|  |  | 
|  | callback ComponentStateCallback = void(ComponentState componentState); | 
|  |  | 
|  | callback ProcessStateCallback = void(ProcessState processState); | 
|  |  | 
|  | interface Functions { | 
|  | // Gets the status of the media perception process. | 
|  | // |callback| : The current state of the system. | 
|  | static void getState(StateCallback callback); | 
|  |  | 
|  | // Sets the desired state of the system. | 
|  | // |state| : A dictionary with the desired new state. The only settable | 
|  | // states are <code>RUNNING</code>, <code>SUSPENDED</code>, and | 
|  | // <code>RESTARTING</code>. | 
|  | // |callback| : Invoked with the State of the system after setting it. Can | 
|  | // be used to verify the state was set as desired. | 
|  | static void setState(State state, StateCallback callback); | 
|  |  | 
|  | // Get a diagnostics buffer out of the video analytics process. | 
|  | // |callback| : Returns a Diagnostics dictionary object. | 
|  | static void getDiagnostics(DiagnosticsCallback callback); | 
|  |  | 
|  | // Attempts to download and load the media analytics component. This | 
|  | // function should be called every time a client starts using this API. If | 
|  | // the component is already loaded, the callback will simply return that | 
|  | // information. The process must be <code>STOPPED</code> for this function | 
|  | // to succeed. | 
|  | // Note: If a different component type is desired, this function can | 
|  | // be called with the new desired type and the new component will be | 
|  | // downloaded and installed. | 
|  | // |component| : The desired component to install and load. | 
|  | // |callback| : Returns the state of the component. | 
|  | static void setAnalyticsComponent( | 
|  | Component component, | 
|  | ComponentStateCallback callback); | 
|  |  | 
|  | // Manages the lifetime of the component process. This function should | 
|  | // only be used if the component is installed. It will fail if the | 
|  | // component is not installed. | 
|  | // |processState| : The desired state for the component process. | 
|  | // |callback| : Reports the new state of the process, which is expected to | 
|  | // be the same as the desired state, unless something goes wrong. | 
|  | static void setComponentProcessState( | 
|  | ProcessState processState, ProcessStateCallback callback); | 
|  | }; | 
|  |  | 
|  | interface Events { | 
|  | // Fired when media perception information is received from the media | 
|  | // analytics process. | 
|  | // |mediaPerception| : The dictionary which contains a dump of everything | 
|  | // the analytics process has detected or determined from the incoming media | 
|  | // streams. | 
|  | static void onMediaPerception(MediaPerception mediaPerception); | 
|  | }; | 
|  | }; |