| // Copyright 2017 The Chromium Authors | 
 | // Use of this source code is governed by a BSD-style license that can be | 
 | // found in the LICENSE file. | 
 |  | 
 | // Private API for receiving real-time media perception information. | 
 | [platforms=("chromeos")] | 
 | namespace mediaPerceptionPrivate { | 
 |   enum Status { | 
 |     // The media analytics process is waiting to be launched. | 
 |     UNINITIALIZED, | 
 |  | 
 |     // The analytics process is running and the media processing pipeline is | 
 |     // started, but it is not yet receiving image frames. This is a | 
 |     // transitional state between <code>SUSPENDED</code> and | 
 |     // <code>RUNNING</code> for the time it takes to warm up the media | 
 |     // processing pipeline, which can take anywhere from a few seconds to a | 
 |     // minute. | 
 |     // Note: <code>STARTED</code> is the initial reply to SetState | 
 |     // <code>RUNNING</code>. | 
 |     STARTED, | 
 |  | 
 |     // The analytics process is running and the media processing pipeling is | 
 |     // injesting image frames. At this point, MediaPerception signals should | 
 |     // be coming over D-Bus. | 
 |     RUNNING, | 
 |  | 
 |     // Analytics process is running and the media processing pipeline is ready | 
 |     // to be set to state <code>RUNNING</code>. The D-Bus communications | 
 |     // are enabled but the media processing pipeline is suspended. | 
 |     SUSPENDED, | 
 |  | 
 |     // Enum for restarting the media analytics process using Upstart. | 
 |     // Calling setState <code>RESTARTING</code> will restart the media process | 
 |     // to the <code>SUSPENDED</code> state. The app has to set the state to | 
 |     // <code>RUNNING</code> in order to start receiving media perception | 
 |     // information again. | 
 |     RESTARTING, | 
 |  | 
 |     // Stops the media analytics process via Upstart. | 
 |     STOPPED, | 
 |  | 
 |     // Indicates that a ServiceError has occurred. | 
 |     SERVICE_ERROR | 
 |   }; | 
 |  | 
 |   enum ServiceError { | 
 |     // The media analytics process could not be reached. This is likely due to | 
 |     // a faulty comms configuration or that the process crashed. | 
 |     SERVICE_UNREACHABLE, | 
 |  | 
 |     // The media analytics process is not running. The MPP API knows that the | 
 |     // process has not been started yet. | 
 |     SERVICE_NOT_RUNNING, | 
 |  | 
 |     // The media analytics process is busy launching. Wait for setState | 
 |     // <code>RUNNING</code> or setState <code>RESTARTING</code> callback. | 
 |     SERVICE_BUSY_LAUNCHING, | 
 |  | 
 |     // The component is not installed properly. | 
 |     SERVICE_NOT_INSTALLED, | 
 |  | 
 |     // Failed to establish a Mojo connection to the service. | 
 |     MOJO_CONNECTION_FAILURE | 
 |   }; | 
 |  | 
 |   enum Feature { | 
 |     AUTOZOOM, | 
 |     HOTWORD_DETECTION, | 
 |     OCCUPANCY_DETECTION, | 
 |     EDGE_EMBEDDINGS, | 
 |     SOFTWARE_CROPPING | 
 |   }; | 
 |  | 
 |   dictionary NamedTemplateArgument { | 
 |     DOMString? name; | 
 |     (DOMString or double)? value; | 
 |   }; | 
 |  | 
 |   enum ComponentType { | 
 |     // The smaller component with limited functionality (smaller size and | 
 |     // limited models). | 
 |     LIGHT, | 
 |     // The fully-featured component with more functionality (larger size and | 
 |     // more models). | 
 |     FULL | 
 |   }; | 
 |  | 
 |   // The status of the media analytics process component on the device. | 
 |   enum ComponentStatus { | 
 |     UNKNOWN, | 
 |     // The component is successfully installed and the image is mounted. | 
 |     INSTALLED, | 
 |     // The component failed to download, install or load. | 
 |     FAILED_TO_INSTALL | 
 |   }; | 
 |  | 
 |   // Error code associated with a failure to install the media analytics | 
 |   // component. | 
 |   enum ComponentInstallationError { | 
 |     // Component requested does not exist. | 
 |     UNKNOWN_COMPONENT, | 
 |  | 
 |     // The update engine fails to install component. | 
 |     INSTALL_FAILURE, | 
 |  | 
 |     // Component can not be mounted. | 
 |     MOUNT_FAILURE, | 
 |  | 
 |     // The component is not compatible with the device. | 
 |     COMPATIBILITY_CHECK_FAILED, | 
 |  | 
 |     // The component was not found - reported for load requests with kSkip | 
 |     // update policy. | 
 |     NOT_FOUND | 
 |   }; | 
 |  | 
 |   dictionary Component { | 
 |     ComponentType type; | 
 |   }; | 
 |  | 
 |   // The state of the media analytics downloadable component. | 
 |   dictionary ComponentState { | 
 |     ComponentStatus status; | 
 |  | 
 |     // The version string for the current component. | 
 |     DOMString? version; | 
 |  | 
 |     // If the component installation failed, the encountered installation | 
 |     // error. Not set if the component installation succeeded. | 
 |     ComponentInstallationError? installationErrorCode; | 
 |   }; | 
 |  | 
 |   // ------------------- Start of process management definitions. ------------ | 
 |   // New interface for managing the process state of the media perception | 
 |   // service with the intention of eventually phasing out the setState() call. | 
 |   enum ProcessStatus { | 
 |     // The component process state is unknown, for example, if the process is | 
 |     // waiting to be launched. This is the initial state before | 
 |     // $(ref:setComponentProcessState) is first called. | 
 |     UNKNOWN, | 
 |  | 
 |     // The component process has been started. | 
 |     // This value can only be passed to $(ref:setComponentProcessState) if the | 
 |     // process is currently in state <code>STOPPED</code> or | 
 |     // <code>UNKNOWN</code>. | 
 |     STARTED, | 
 |  | 
 |     // The component process has been stopped. | 
 |     // This value can only be passed to $(ref:setComponentProcessState) if the | 
 |     // process is currently in state <code>STARTED</code>. | 
 |     // Note: the process is automatically stopped when the Chrome process | 
 |     // is closed. | 
 |     STOPPED, | 
 |  | 
 |     // Indicates that a ServiceError has occurred. | 
 |     SERVICE_ERROR | 
 |   }; | 
 |  | 
 |   dictionary ProcessState { | 
 |     ProcessStatus? status; | 
 |  | 
 |     // Return parameter for $(ref:setComponentProcessState) that | 
 |     // specifies the error type for failure cases. | 
 |     ServiceError? serviceError; | 
 |   }; | 
 |   // ------------------- End of process management definitions. -------------- | 
 |  | 
 |   // The parameters for processing a particular video stream. | 
 |   dictionary VideoStreamParam { | 
 |     // Identifies the video stream described by these parameters. | 
 |     DOMString? id; | 
 |  | 
 |     // Frame width in pixels. | 
 |     long? width; | 
 |  | 
 |     // Frame height in pixels. | 
 |     long? height; | 
 |  | 
 |     // The frame rate at which this video stream would be processed. | 
 |     long? frameRate; | 
 |   }; | 
 |  | 
 |   dictionary Point { | 
 |     // The horizontal distance from the top left corner of the image. | 
 |     double? x; | 
 |  | 
 |     // The vertical distance from the top left corner of the image. | 
 |     double? y; | 
 |   }; | 
 |  | 
 |   // The parameters for a whiteboard in the image frame. Corners are given in | 
 |   // pixel coordinates normalized to the size of the image frame (i.e. in the | 
 |   // range [(0.0, 0.0), (1.0, 1.0)]. The aspectRatio is the physical aspect | 
 |   // ratio of the whiteboard (e.g. for a 1m high and 2m wide whiteboard, the | 
 |   // aspect ratio would be 2). | 
 |   dictionary Whiteboard { | 
 |     // The top left corner of the whiteboard in the image frame. | 
 |     Point? topLeft; | 
 |  | 
 |     // The top right corner of the whiteboard in the image frame. | 
 |     Point? topRight; | 
 |  | 
 |     // The bottom left corner of the whiteboard in the image frame. | 
 |     Point? bottomLeft; | 
 |  | 
 |     // The bottom right corner of the whiteboard in the image frame. | 
 |     Point? bottomRight; | 
 |  | 
 |     // The physical aspect ratio of the whiteboard. | 
 |     double? aspectRatio; | 
 |   }; | 
 |  | 
 |   // The system and configuration state of the analytics process. | 
 |   dictionary State { | 
 |     Status status; | 
 |  | 
 |     // Optional $(ref:setState) parameter. Specifies the video device the media | 
 |     // analytics process should open while the media processing pipeline is | 
 |     // starting. To set this parameter, status has to be <code>RUNNING</code>. | 
 |     DOMString? deviceContext; | 
 |  | 
 |     // Return parameter for $(ref:setState) or $(ref:getState) that | 
 |     // specifies the error type for failure cases. | 
 |     ServiceError? serviceError; | 
 |  | 
 |     // A list of video streams processed by the analytics process. To set this | 
 |     // parameter, status has to be <code>RUNNING</code>. | 
 |     VideoStreamParam[]? videoStreamParam; | 
 |  | 
 |     // Media analytics configuration. It can only be used when setting state to | 
 |     // RUNNING. | 
 |     DOMString? configuration; | 
 |  | 
 |     // Corners and aspect ratio of the whiteboard in the image frame. Should | 
 |     // only be set when setting state to <code>RUNNING</code> and configuration | 
 |     // to whiteboard. | 
 |     Whiteboard? whiteboard; | 
 |  | 
 |     // A list of enabled media perception features. | 
 |     Feature[]? features; | 
 |  | 
 |     // A list of named parameters to be substituted at start-up. Will | 
 |     // only have effect when setting state to <code>RUNNING</code>. | 
 |     NamedTemplateArgument[]? namedTemplateArguments; | 
 |   }; | 
 |  | 
 |   dictionary BoundingBox { | 
 |     // Specifies whether the points are normalized to the size of the image. | 
 |     boolean? normalized; | 
 |  | 
 |     // The two points that define the corners of a bounding box. | 
 |     Point? topLeft; | 
 |     Point? bottomRight; | 
 |   }; | 
 |  | 
 |   enum DistanceUnits { | 
 |     UNSPECIFIED, | 
 |     METERS, | 
 |     PIXELS | 
 |   }; | 
 |  | 
 |   // Generic dictionary to encapsulate a distance magnitude and units. | 
 |   dictionary Distance { | 
 |     // This field provides flexibility to report depths or distances of | 
 |     // different entity types with different units. | 
 |     DistanceUnits? units; | 
 |  | 
 |     double? magnitude; | 
 |   }; | 
 |  | 
 |   enum EntityType { | 
 |     UNSPECIFIED, | 
 |     FACE, | 
 |     PERSON, | 
 |     MOTION_REGION, | 
 |     LABELED_REGION | 
 |   }; | 
 |  | 
 |   enum FramePerceptionType { | 
 |     UNKNOWN_TYPE, | 
 |     FACE_DETECTION, | 
 |     PERSON_DETECTION, | 
 |     MOTION_DETECTION | 
 |   }; | 
 |  | 
 |   dictionary Entity { | 
 |     // A unique id associated with the detected entity, which can be used to | 
 |     // track the entity over time. | 
 |     long? id; | 
 |  | 
 |     EntityType? type; | 
 |  | 
 |     // Label for this entity. | 
 |     DOMString? entityLabel; | 
 |  | 
 |     // Minimum box which captures entire detected entity. | 
 |     BoundingBox? boundingBox; | 
 |  | 
 |     // A value for the quality of this detection. | 
 |     double? confidence; | 
 |  | 
 |     // The estimated depth of the entity from the camera. | 
 |     Distance? depth; | 
 |   }; | 
 |  | 
 |   dictionary PacketLatency { | 
 |     // Label for this packet. | 
 |     DOMString? packetLabel; | 
 |  | 
 |     // Packet processing latency in microseconds. | 
 |     long? latencyUsec; | 
 |   }; | 
 |  | 
 |   // Type of lighting conditions. | 
 |   enum LightCondition { | 
 |     UNSPECIFIED, | 
 |  | 
 |     // No noticeable change occurred. | 
 |     NO_CHANGE, | 
 |  | 
 |     // Light was switched on in the room. | 
 |     TURNED_ON, | 
 |  | 
 |     // Light was switched off in the room. | 
 |     TURNED_OFF, | 
 |  | 
 |     // Light gradually got dimmer (for example, due to a sunset). | 
 |     DIMMER, | 
 |  | 
 |     // Light gradually got brighter (for example, due to a sunrise). | 
 |     BRIGHTER, | 
 |  | 
 |     // Black frame was detected - the current frame contains only noise. | 
 |     BLACK_FRAME | 
 |   }; | 
 |  | 
 |   // Detection of human presence close to the camera. | 
 |   dictionary VideoHumanPresenceDetection { | 
 |     // Indicates a probability in [0, 1] interval that a human is present in | 
 |     // the video frame. | 
 |     double? humanPresenceLikelihood; | 
 |  | 
 |     // Indicates a probability in [0, 1] that motion has been detected in the | 
 |     // video frame. | 
 |     double? motionDetectedLikelihood; | 
 |  | 
 |     // Indicates lighting condition in the video frame. | 
 |     LightCondition? lightCondition; | 
 |  | 
 |     // Indicates a probablity in [0, 1] interval that | 
 |     // <code>lightCondition</code> value is correct. | 
 |     double? lightConditionLikelihood; | 
 |   }; | 
 |  | 
 |   // The set of computer vision metadata for an image frame. | 
 |   dictionary FramePerception { | 
 |     long? frameId; | 
 |  | 
 |     long? frameWidthInPx; | 
 |     long? frameHeightInPx; | 
 |  | 
 |     // The timestamp associated with the frame (when its recieved by the | 
 |     // analytics process). | 
 |     double? timestamp; | 
 |  | 
 |     // The list of entities detected in this frame. | 
 |     Entity[]? entities; | 
 |  | 
 |     // Processing latency for a list of packets. | 
 |     PacketLatency[]? packetLatency; | 
 |  | 
 |     // Human presence detection results for a video frame. | 
 |     VideoHumanPresenceDetection? videoHumanPresenceDetection; | 
 |  | 
 |     // Indicates what types of frame perception were run. | 
 |     FramePerceptionType[]? framePerceptionTypes; | 
 |   }; | 
 |  | 
 |   // An estimate of the direction that the sound is coming from. | 
 |   dictionary AudioLocalization { | 
 |     // An angle in radians in the horizontal plane. It roughly points to the | 
 |     // peak in the probability distribution of azimuth defined below. | 
 |     double? azimuthRadians; | 
 |  | 
 |     // A probability distribution for the current snapshot in time that shows | 
 |     // the likelihood of a sound source being at a particular azimuth. For | 
 |     // example, <code>azimuthScores = [0.1, 0.2, 0.3, 0.4]</code> means that | 
 |     // the probability that the sound is coming from an azimuth of 0, pi/2, pi, | 
 |     // 3*pi/2 is 0.1, 0.2, 0.3 and 0.4, respectively. | 
 |     double[]? azimuthScores; | 
 |   }; | 
 |  | 
 |   // Spectrogram of an audio frame. | 
 |   dictionary AudioSpectrogram { | 
 |     double[]? values; | 
 |   }; | 
 |  | 
 |   // Detection of human presence close to the microphone. | 
 |   dictionary AudioHumanPresenceDetection { | 
 |     // Indicates a probability in [0, 1] interval that a human has caused a | 
 |     // sound close to the microphone. | 
 |     double? humanPresenceLikelihood; | 
 |  | 
 |     // Estimate of the noise spectrogram. | 
 |     AudioSpectrogram? noiseSpectrogram; | 
 |  | 
 |     // Spectrogram of an audio frame. | 
 |     AudioSpectrogram? frameSpectrogram; | 
 |   }; | 
 |  | 
 |   enum HotwordType { | 
 |     UNKNOWN_TYPE, | 
 |     OK_GOOGLE | 
 |   }; | 
 |  | 
 |   // A hotword detected in the audio stream. | 
 |   dictionary Hotword { | 
 |     // Unique identifier for the hotword instance. Note that a single hotword | 
 |     // instance can span more than one audio frame. In that case a single | 
 |     // hotword instance can be reported in multiple Hotword or HotwordDetection | 
 |     // results. Hotword results associated with the same hotword instance will | 
 |     // have the same <code>id</code>. | 
 |     long? id; | 
 |  | 
 |     // Indicates the type of this hotword. | 
 |     HotwordType? type; | 
 |  | 
 |     // Id of the audio frame in which the hotword was detected. | 
 |     long? frameId; | 
 |  | 
 |     // Indicates the start time of this hotword in the audio frame. | 
 |     long? startTimestampMs; | 
 |  | 
 |     // Indicates the end time of this hotword in the audio frame. | 
 |     long? endTimestampMs; | 
 |  | 
 |     // Indicates a probability in [0, 1] interval that this hotword is present | 
 |     // in the audio frame. | 
 |     double? confidence; | 
 |   }; | 
 |  | 
 |   // Detection of hotword in the audio stream. | 
 |   dictionary HotwordDetection { | 
 |     Hotword[]? hotwords; | 
 |   }; | 
 |  | 
 |   // Audio perception results for an audio frame. | 
 |   dictionary AudioPerception { | 
 |     // A timestamp in microseconds attached when this message was generated. | 
 |     double? timestampUs; | 
 |  | 
 |     // Audio localization results for an audio frame. | 
 |     AudioLocalization? audioLocalization; | 
 |  | 
 |     // Audio human presence detection results for an audio frame. | 
 |     AudioHumanPresenceDetection? audioHumanPresenceDetection; | 
 |  | 
 |     // Hotword detection results. | 
 |     HotwordDetection? hotwordDetection; | 
 |   }; | 
 |  | 
 |   // Detection of human presence based on both audio and video inputs. | 
 |   dictionary AudioVisualHumanPresenceDetection { | 
 |     // Indicates a probability in [0, 1] interval that a human is present. | 
 |     double? humanPresenceLikelihood; | 
 |   }; | 
 |  | 
 |   // Perception results based on both audio and video inputs. | 
 |   dictionary AudioVisualPerception { | 
 |     // A timestamp in microseconds attached when this message was generated. | 
 |     double? timestampUs; | 
 |  | 
 |     // Human presence detection results. | 
 |     AudioVisualHumanPresenceDetection? audioVisualHumanPresenceDetection; | 
 |   }; | 
 |  | 
 |   // Stores metadata such as version of media perception features. | 
 |   dictionary Metadata { | 
 |     DOMString? visualExperienceControllerVersion; | 
 |   }; | 
 |  | 
 |   dictionary MediaPerception { | 
 |     // The time the media perception data was emitted by the media processing | 
 |     // pipeline. This value will be greater than the timestamp stored within | 
 |     // the FramePerception dictionary and the difference between them can be | 
 |     // viewed as the processing time for a single frame. | 
 |     double? timestamp; | 
 |  | 
 |     // An array of framePerceptions. | 
 |     FramePerception[]? framePerceptions; | 
 |  | 
 |     // An array of audio perceptions. | 
 |     AudioPerception[]? audioPerceptions; | 
 |  | 
 |     // An array of audio-visual perceptions. | 
 |     AudioVisualPerception[]? audioVisualPerceptions; | 
 |  | 
 |     // Stores metadata such as version of media perception features. | 
 |     Metadata? metadata; | 
 |   }; | 
 |  | 
 |   enum ImageFormat { | 
 |     // Image represented by RGB data channels. | 
 |     RAW, | 
 |     PNG, | 
 |     JPEG | 
 |   }; | 
 |  | 
 |   dictionary ImageFrame { | 
 |     long? width; | 
 |     long? height; | 
 |  | 
 |     ImageFormat? format; | 
 |  | 
 |     long? dataLength; | 
 |  | 
 |     // The bytes of the image frame. | 
 |     ArrayBuffer? frame; | 
 |   }; | 
 |  | 
 |   dictionary PerceptionSample { | 
 |     // The video analytics FramePerception for the associated image frame | 
 |     // data. | 
 |     FramePerception? framePerception; | 
 |  | 
 |     // The image frame data for the associated FramePerception object. | 
 |     ImageFrame? imageFrame; | 
 |  | 
 |     // The audio perception results for an audio frame. | 
 |     AudioPerception? audioPerception; | 
 |  | 
 |     // Perception results based on both audio and video inputs. | 
 |     AudioVisualPerception? audioVisualPerception; | 
 |  | 
 |     // Stores metadata such as version of media perception features. | 
 |     Metadata? metadata; | 
 |   }; | 
 |  | 
 |   dictionary Diagnostics { | 
 |     // Return parameter for $(ref:getDiagnostics) that specifies the error | 
 |     // type for failure cases. | 
 |     ServiceError? serviceError; | 
 |  | 
 |     // A buffer of image frames and the associated video analytics information | 
 |     // that can be used to diagnose a malfunction. | 
 |     PerceptionSample[]? perceptionSamples; | 
 |   }; | 
 |  | 
 |   callback StateCallback = void(State state); | 
 |  | 
 |   callback DiagnosticsCallback = void(Diagnostics diagnostics); | 
 |  | 
 |   callback ComponentStateCallback = void(ComponentState componentState); | 
 |  | 
 |   callback ProcessStateCallback = void(ProcessState processState); | 
 |  | 
 |   interface Functions { | 
 |     // Gets the status of the media perception process. | 
 |     // |callback| : The current state of the system. | 
 |     static void getState(StateCallback callback); | 
 |  | 
 |     // Sets the desired state of the system. | 
 |     // |state| : A dictionary with the desired new state. The only settable | 
 |     // states are <code>RUNNING</code>, <code>SUSPENDED</code>, and | 
 |     // <code>RESTARTING</code>. | 
 |     // |callback| : Invoked with the State of the system after setting it. Can | 
 |     // be used to verify the state was set as desired. | 
 |     static void setState( | 
 |         State state, | 
 |         StateCallback callback); | 
 |  | 
 |     // Get a diagnostics buffer out of the video analytics process. | 
 |     // |callback| : Returns a Diagnostics dictionary object. | 
 |     static void getDiagnostics(DiagnosticsCallback callback); | 
 |  | 
 |     // Attempts to download and load the media analytics component. This | 
 |     // function should be called every time a client starts using this API. If | 
 |     // the component is already loaded, the callback will simply return that | 
 |     // information. The process must be <code>STOPPED</code> for this function | 
 |     // to succeed. | 
 |     // Note: If a different component type is desired, this function can | 
 |     // be called with the new desired type and the new component will be | 
 |     // downloaded and installed. | 
 |     // |component| : The desired component to install and load. | 
 |     // |callback| : Returns the state of the component. | 
 |     static void setAnalyticsComponent( | 
 |         Component component, | 
 |         ComponentStateCallback callback); | 
 |  | 
 |     // Manages the lifetime of the component process. This function should | 
 |     // only be used if the component is installed. It will fail if the | 
 |     // component is not installed. | 
 |     // |processState| : The desired state for the component process. | 
 |     // |callback| : Reports the new state of the process, which is expected to | 
 |     // be the same as the desired state, unless something goes wrong. | 
 |     static void setComponentProcessState( | 
 |         ProcessState processState, | 
 |         ProcessStateCallback callback); | 
 |   }; | 
 |  | 
 |   interface Events { | 
 |     // Fired when media perception information is received from the media | 
 |     // analytics process. | 
 |     // |mediaPerception| : The dictionary which contains a dump of everything | 
 |     // the analytics process has detected or determined from the incoming media | 
 |     // streams. | 
 |     static void onMediaPerception(MediaPerception mediaPerception); | 
 |   }; | 
 | }; |