diff --git a/index.html b/index.html index fdcd059..c987953 100644 --- a/index.html +++ b/index.html @@ -9,7 +9,7 @@ // See https://github.com/w3c/respec/wiki/ for how to configure ReSpec var respecConfig = { group: "webrtc", - xref: ["html", "infra", "permissions", "dom", "mediacapture-streams", "webaudio", "webidl"], + xref: ["geometry-1", "html", "infra", "permissions", "dom", "image-capture", "mediacapture-streams", "webaudio", "webcodecs", "webidl"], edDraftURI: "https://w3c.github.io/mediacapture-extensions/", editors: [ {name: "Jan-Ivar Bruaroey", company: "Mozilla Corporation", w3cid: 79152}, @@ -654,5 +654,384 @@
Human face metadata describes the human faces in video frames. It can + be set by web applications using the standard means when creating + {{VideoFrameMetadata}} for {{VideoFrame}}s or it can be set by an user agent + when the media track constraints, defined below, are used to + enable face detection for the {{MediaStreamTrack}} which provides the + {{VideoFrame}}s.
+The facial metadata may be used by video encoders to enhance the quality + of the faces in encoded video streams or for other suitable purposes.
++partial dictionary VideoFrameMetadata { + sequence<HumanFace> humanFaces; +};+
humanFaces
of type sequence<{{HumanFace}}>
The set of known human faces in this video frame.
++dictionary HumanFace { + long id; + float probability; + DOMRectReadOnly boundingBox; + HumanFaceLandmark leftEye; + HumanFaceLandmark rightEye; + HumanFaceLandmark mouth; +};+
id
of type {{long}}If set, an unique identifer of a face within a sequence. If the same face can be tracked in multiple + frames originating from the same {{MediaStreamTrack}} source, {{id}} is set to the same integer value + for the face in all frames.
+User agent MUST NOT select the assigned value of {{id}} in such a way that the detected faces could + be correlated to match in any way between different {{MediaStreamTrack}} objects.
+probability
of type {{float}}If set, a probability value in range (0,1]. + The approximate probability of the {{HumanFace}} specifying an actual human face.
+boundingBox
of type {{DOMRectReadOnly}}
A bounding box surrounding the face. The corner coordinates of the + bounding box are interpreted to represent a coordinate in a + normalized square space. The origin of coordinates {x,y} = + {0.0, 0.0} represents the upper left corner whereas the {x,y} = + {1.0, 1.0} represents the lower right corner relative to the + rendered frame.
+The face location in the frame may be specified even if it is + obscured by other objects in front of it or it lies partially or + fully outside of the frame.
+leftEye
of type {{HumanFaceLandmark}}
If exists, describes the face left eye properties.
+rightEye
of type {{HumanFaceLandmark}}
If exists, describes the face right eye properties.
+mouth
of type {{HumanFaceLandmark}}
If exists, describes the face mouth properties.
+dictionary HumanFaceLandmark { + Point2D centerPoint; +};+
centerPoint
of type {{Point2D}}The coordinates of the approximate center of the detected landmark. + The landmark location in the frame may be specified even if it is + obscured by other objects in front of it or it lies partially or + fully outside of the frame. See member {{HumanFace/boundingBox}} for definition + of the coordinate system.
++partial dictionary MediaTrackSupportedConstraints { + boolean humanFaceDetectionMode = true; + boolean humanFaceLandmarkDetectionMode = true; +};+
humanFaceDetectionMode
of type {{boolean}}, defaulting to true
Whether face detection mode constraining is recognized.
+humanFaceLandmarkDetectionMode
of type {{boolean}}, defaulting to true
Whether face landmark detection mode constraining is recognized.
++partial dictionary MediaTrackCapabilities { + sequence<DOMString> humanFaceDetectionMode; + sequence<DOMString> humanFaceLandmarkDetectionMode; +};+
humanFaceDetectionMode
of type sequence<{{DOMString}}>The sequence of supported face detection modes. + Each string MUST be one of the members of {{ObjectDetectionMode}}. The mode {{ObjectDetectionMode/"center-point"}} must not be supported.
+humanFaceLandmarkDetectionMode
of type sequence<{{DOMString}}>The sequence of supported face landmark detection modes. + Each string MUST be one of the members of {{ObjectDetectionMode}}. The mode {{ObjectDetectionMode/"bounding-box"}} must not be supported.
++partial dictionary MediaTrackConstraintSet { + ConstrainDOMString humanFaceDetectionMode; + ConstrainDOMString humanFaceLandmarkDetectionMode; +};+
humanFaceDetectionMode
of type {{ConstrainDOMString}}The string MUST be one of the members of {{ObjectDetectionMode}}. + See face detection mode constrainable property.
+humanFaceLandmarkDetectionMode
of type {{ConstrainDOMString}}The string MUST be one of the members of {{ObjectDetectionMode}}. + See face landmark detection mode constrainable property.
++partial dictionary MediaTrackSettings { + DOMString humanFaceDetectionMode; + DOMString humanFaceLandmarkDetectionMode; +};+
humanFaceDetectionMode
of type {{DOMString}}Current face detection mode setting. + The string MUST be one of the members of {{ObjectDetectionMode}} excluding {{ObjectDetectionMode/"center-point"}}.
+humanFaceLandmarkDetectionMode
of type {{DOMString}}Current face landmark detection mode setting. + The string MUST be one of the members of {{ObjectDetectionMode}} excluding {{ObjectDetectionMode/"bounding-box"}}.
++enum ObjectDetectionMode { + "none", + "center-point", + "bounding-box", +};+
none
This source does not offer object detection for the corresponding object type. + For setting, this is interpreted as a command to turn off + the setting of the corresponding object type detection metadata.
+center-point
This source offers detection of the center point of the corresponding + object type, either a human face or a face landmark, or such a mode is requested.
+bounding-box
This source offers detection of the bounding box of the corresponding + object type, either a human face or a face landmark, or such a mode is requested.
+Face detection mode describes which face properties + are to be detected and set in the metadata for the video frame.
+Face landmark detection mode describes which face landmark + properties are to be detected and set in the metadata for the video frame.
++// main.js: +// Check if face detection is supported by the browser +const supports = navigator.mediaDevices.getSupportedConstraints(); +if (supports.humanFaceDetectionMode) { + // Browser supports face contour detection. +} else { + throw('Face contour detection is not supported'); +} + +// Open camera with face detection enabled +const stream = await navigator.mediaDevices.getUserMedia({ + video: { humanFaceDetectionMode: 'bounding-box' } +}); +const [videoTrack] = stream.getVideoTracks(); + +// Use a video worker and show to user. +const videoElement = document.querySelector('video'); +const videoWorker = new Worker('video-worker.js'); +videoWorker.postMessage({track: videoTrack}, [videoTrack]); +const {data} = await new Promise(r => videoWorker.onmessage); +videoElement.srcObject = new MediaStream([data.videoTrack]); + +// video-worker.js: +self.onmessage = async ({data: {track}}) => { + const generator = new VideoTrackGenerator(); + parent.postMessage({videoTrack: generator.track}, [generator.track]); + const {readable} = new MediaStreamTrackProcessor({track}); + const transformer = new TransformStream({ + async transform(frame, controller) { + for (const face of frame.metadata().humanFaces || []) { + if (face.boundingBox) { + console.log( + `Face @ (${face.boundingBox.left}, ${face.face.boundingBox.top}), ` + + `(${face.boundingBox.right}, ${face.face.boundingBox.bottom})`); + } + } + controller.enqueue(frame); + } + }); + await readable.pipeThrough(transformer).pipeTo(generator.writable); +}; ++
+// main.js: +// Open camera. +const stream = navigator.mediaDevices.getUserMedia({video: true}); +const [videoTrack] = stream.getVideoTracks(); + +// Use a video worker and show to user. +const videoElement = document.querySelector('video'); +const videoWorker = new Worker('video-worker.js'); +videoWorker.postMessage({track: videoTrack}, [videoTrack]); +const {data} = await new Promise(r => videoWorker.onmessage); +videoElement.srcObject = new MediaStream([data.videoTrack]); + +// video-worker.js: +self.onmessage = async ({data: {track}}) => { + // Apply constraints. + let customBackgroundBlur = true; + let customEyeGazeCorrection = true; + let customFaceDetection = false; + let faceDetectionMode; + let landmarkDetectionMode; + const capabilities = track.getCapabilities(); + if (capabilities.backgroundBlur && capabilities.backgroundBlur.max > 0) { + // The platform supports background blurring. + // Let's use platform background blurring and skip the custom one. + await track.applyConstraints({ + advanced: [{backgroundBlur: capabilities.backgroundBlur.max}] + }); + customBackgroundBlur = false; + } else if ((capabilities.humanFaceDetectionMode || []).includes('bounding-box')) { + // The platform supports face bounding box detection but not background + // blurring. Let's use platform face detection to aid custom + // background blurring. + faceDetectionMode = 'bounding-box'; + await videoTrack.applyConstraints({ + advanced: [{ humanFaceDetectionMode: faceDetectionMode }] + }); + } else { + // The platform does not support background blurring nor face contour + // detection. Let's use custom face contour detection to aid custom + // background blurring. + customFaceDetection = true; + } + if ((capabilities.eyeGazeCorrection || []).includes(true)) { + // The platform supports eye gaze correction. + // Let's use platform eye gaze correction and skip the custom one. + await videoTrack.applyConstraints({ + advanced: [{eyeGazeCorrection: true}] + }); + customEyeGazeCorrection = false; + } else if ((capabilities.humanFaceLandmarkDetectionMode || []).includes('center-point')) { + // The platform supports face landmark detection but not eye gaze + // correction. Let's use platform face landmark detection to aid custom eye + // gaze correction. + landmarkDetectionMode = 'center-point'; + await videoTrack.applyConstraints({ + advanced: [{ humanFaceLandmarkDetectionMode: landmarkDetectionMode }] + }); + } else { + // The platform does not support eye gaze correction nor face landmark + // detection. Let's use custom face landmark detection to aid custom eye + // gaze correction. + customFaceDetection = true; + } + + // Load custom libraries which may utilize TensorFlow and/or WASM. + const requiredScripts = [].concat( + customBackgroundBlur ? 'background.js' : [], + customEyeGazeCorrection ? 'eye-gaze.js' : [], + customFaceDetection ? 'face.js' : [] + ); + importScripts(...requiredScripts); + + const generator = new VideoTrackGenerator(); + parent.postMessage({videoTrack: generator.track}, [generator.track]); + const {readable} = new MediaStreamTrackProcessor({track}); + const transformer = new TransformStream({ + async transform(frame, controller) { + // Detect faces or retrieve detected faces. + const humanFaces = + customFaceDetection + ? await detectFaces(frame) + : frame.metadata().humanFaces; + // Blur the background if needed. + if (customBackgroundBlur) { + const newFrame = await blurBackground(frame, humanFaces); + frame.close(); + frame = newFrame; + } + // Correct the eye gaze if needed. + if (customEyeGazeCorrection && (humanFaces || []).length > 0) { + const newFrame = await correctEyeGaze(frame, humanFaces); + frame.close(); + frame = newFrame; + } + controller.enqueue(frame); + } + }); + await readable.pipeThrough(transformer).pipeTo(generator.writable); +}; ++