diff --git a/index.html b/index.html
index fdcd059..c987953 100644
--- a/index.html
+++ b/index.html
@@ -9,7 +9,7 @@
   // See https://github.com/w3c/respec/wiki/ for how to configure ReSpec
   var respecConfig = {
     group: "webrtc",
-    xref: ["html", "infra", "permissions", "dom", "mediacapture-streams", "webaudio", "webidl"],
+    xref: ["geometry-1", "html", "infra", "permissions", "dom", "image-capture", "mediacapture-streams", "webaudio", "webcodecs", "webidl"],
     edDraftURI: "https://w3c.github.io/mediacapture-extensions/",
     editors:  [
       {name: "Jan-Ivar Bruaroey", company: "Mozilla Corporation", w3cid: 79152},
@@ -654,5 +654,384 @@ <h2>Exposing change of MediaStreamTrack configuration</h2>
     </p>
     </div>
   </section>
+  <section>
+    <h2>Human faces</h2>
+    <p>Human face metadata describes the human faces in video frames. It can
+    be set by web applications using the standard means when creating
+    {{VideoFrameMetadata}} for {{VideoFrame}}s or it can be set by an user agent
+    when the media track constraints, defined below, are used to
+    enable face detection for the {{MediaStreamTrack}} which provides the
+    {{VideoFrame}}s.</p>
+    <p>The facial metadata may be used by video encoders to enhance the quality
+    of the faces in encoded video streams or for other suitable purposes.</p>
+    <section>
+      <h3>{{VideoFrameMetadata}}</h3>
+      <pre class="idl">
+partial dictionary VideoFrameMetadata {
+  sequence&lt;HumanFace&gt; humanFaces;
+};</pre>
+      <section class="notoc">
+        <h4>Members</h4>
+        <dl class="dictionary-members" data-link-for="VideoFrameMetadata" data-dfn-for="VideoFrameMetadata">
+          <dt><dfn><code>humanFaces</code></dfn> of type <span class="idlMemberType"><code>sequence&lt;{{HumanFace}}&gt;</code></span></dt>
+          <dd>
+            <p>The set of known human faces in this video frame.</p>
+          </dd>
+        </dl>
+      </section>
+    </section>
+    <section>
+      <h3>{{HumanFace}}</h3>
+      <pre class="idl">
+dictionary HumanFace {
+  long              id;
+  float             probability;
+  DOMRectReadOnly   boundingBox;
+  HumanFaceLandmark leftEye;
+  HumanFaceLandmark rightEye;
+  HumanFaceLandmark mouth;
+};</pre>
+      <section class="notoc">
+        <h4>Dictionary {{HumanFace}} Members</h4>
+        <dl class="dictionary-members" data-dfn-for="HumanFace" data-link-for="HumanFace">
+          <dt><dfn><code>id</code></dfn> of type <span class="idlMemberType">{{long}}</span></dt>
+          <dd>
+            <p>If set, an unique identifer of a face within a sequence. If the same face can be tracked in multiple
+            frames originating from the same {{MediaStreamTrack}} source, {{id}} is set to the same integer value
+            for the face in all frames.</p>
+            <p>User agent MUST NOT select the assigned value of {{id}} in such a way that the detected faces could
+            be correlated to match in any way between different {{MediaStreamTrack}} objects.</p>
+          </dd>
+          <dt><dfn><code>probability</code></dfn> of type <span class="idlMemberType">{{float}}</span></dt>
+          <dd>
+            <p>If set, a probability value in range (0,1].
+            The approximate probability of the {{HumanFace}} specifying an actual human face.</p>
+          </dd>
+          <dt><dfn><code>boundingBox</code></dfn> of type <span class="idlMemberType"><code>{{DOMRectReadOnly}}</code></span></dt>
+          <dd>
+            <p>A bounding box surrounding the face. The corner coordinates of the
+            bounding box are interpreted to represent a coordinate in a
+            normalized square space. The origin of coordinates {x,y} =
+            {0.0, 0.0} represents the upper left corner whereas the {x,y} =
+            {1.0, 1.0} represents the lower right corner relative to the
+            rendered frame.</p>
+            <p>The face location in the frame may be specified even if it is
+            obscured by other objects in front of it or it lies partially or
+            fully outside of the frame.</p>
+          </dd>
+          <dt><dfn><code>leftEye</code></dfn> of type <span class="idlMemberType"><code>{{HumanFaceLandmark}}</code></span></dt>
+          <dd>
+            <p>If exists, describes the face left eye properties.</p>
+          </dd>
+          <dt><dfn><code>rightEye</code></dfn> of type <span class="idlMemberType"><code>{{HumanFaceLandmark}}</code></span></dt>
+          <dd>
+            <p>If exists, describes the face right eye properties.</p>
+          </dd>
+          <dt><dfn><code>mouth</code></dfn> of type <span class="idlMemberType"><code>{{HumanFaceLandmark}}</code></span></dt>
+          <dd>
+            <p>If exists, describes the face mouth properties.</p>
+          </dd>
+        </dl>
+      </section>
+    </section>
+    <section>
+      <h3>{{HumanFaceLandmark}}</h3>
+<pre class="idl">dictionary HumanFaceLandmark {
+  Point2D centerPoint;
+};</pre>
+      <section class="notoc">
+        <h4>Dictionary {{HumanFaceLandmark}} Members</h4>
+        <dl class="dictionary-members" data-dfn-for="HumanFaceLandmark" data-link-for="HumanFaceLandmark">
+          <dt><dfn><code>centerPoint</code></dfn> of type <span class="idlMemberType">{{Point2D}}</span></dt>
+          <dd><p>The coordinates of the approximate center of the detected landmark.
+            The landmark location in the frame may be specified even if it is
+            obscured by other objects in front of it or it lies partially or
+            fully outside of the frame. See member {{HumanFace/boundingBox}} for definition
+            of the coordinate system.</p>
+          </dd>
+        </dl>
+      </section>
+    </section>
+    <section>
+      <h3>{{MediaTrackSupportedConstraints}}</h3>
+<pre class="idl">
+partial dictionary MediaTrackSupportedConstraints {
+  boolean humanFaceDetectionMode = true;
+  boolean humanFaceLandmarkDetectionMode = true;
+};</pre>
+      <section class="notoc">
+        <h4>Dictionary {{MediaTrackSupportedConstraints}} Members</h4>
+        <dl class="dictionary-members" data-dfn-for="MediaTrackSupportedConstraints" data-link-for="MediaTrackSupportedConstraints">
+          <dt><dfn><code>humanFaceDetectionMode</code></dfn> of type <span class="idlMemberType">{{boolean}}</span>, defaulting to <code>true</code></dt>
+          <dd>
+            <p>Whether <a>face detection mode</a> constraining is recognized.</p>
+          </dd>
+          <dt><dfn><code>humanFaceLandmarkDetectionMode</code></dfn> of type <span class="idlMemberType">{{boolean}}</span>, defaulting to <code>true</code></dt>
+          <dd>
+            <p>Whether <a>face landmark detection mode</a> constraining is recognized.</p>
+          </dd>
+        </dl>
+      </section>
+    </section>
+    <section>
+      <h3>{{MediaTrackCapabilities}}</h3>
+      <pre class="idl">
+partial dictionary MediaTrackCapabilities {
+  sequence&lt;DOMString&gt; humanFaceDetectionMode;
+  sequence&lt;DOMString&gt; humanFaceLandmarkDetectionMode;
+};</pre>
+      <section class="notoc">
+        <h4>Dictionary {{MediaTrackCapabilities}} Members</h4>
+        <dl class="dictionary-members" data-dfn-for="MediaTrackCapabilities" data-link-for="MediaTrackCapabilities">
+          <dt><dfn><code>humanFaceDetectionMode</code></dfn> of type <span class="idlMemberType">sequence&lt;{{DOMString}}&gt;</span></dt>
+          <dd>
+            <p>The sequence of supported <a>face detection modes</a>.
+            Each string MUST be one of the members of {{ObjectDetectionMode}}. The mode {{ObjectDetectionMode/"center-point"}} must not be supported.</p>
+          </dd>
+          <dt><dfn><code>humanFaceLandmarkDetectionMode</code></dfn> of type <span class="idlMemberType">sequence&lt;{{DOMString}}&gt;</span></dt>
+          <dd>
+            <p>The sequence of supported <a>face landmark detection modes</a>.
+            Each string MUST be one of the members of {{ObjectDetectionMode}}. The mode {{ObjectDetectionMode/"bounding-box"}} must not be supported.</p>
+          </dd>
+        </dl>
+      </section>
+    </section>
+    <section>
+      <h3>{{MediaTrackConstraintSet}}</h3>
+<pre class="idl">
+partial dictionary MediaTrackConstraintSet {
+  ConstrainDOMString humanFaceDetectionMode;
+  ConstrainDOMString humanFaceLandmarkDetectionMode;
+};</pre>
+      <section class="notoc">
+        <h4>Dictionary {{MediaTrackConstraintSet}} Members</h4>
+        <dl class="dictionary-members" data-dfn-for="MediaTrackConstraintSet" data-link-for="MediaTrackConstraintSet">
+          <dt><dfn><code>humanFaceDetectionMode</code></dfn> of type <span class="idlMemberType">{{ConstrainDOMString}}</span></dt>
+          <dd>
+            <p>The string MUST be one of the members of {{ObjectDetectionMode}}.
+            See <a>face detection mode</a> constrainable property.</p>
+          </dd>
+          <dt><dfn><code>humanFaceLandmarkDetectionMode</code></dfn> of type <span class="idlMemberType">{{ConstrainDOMString}}</span></dt>
+          <dd>
+            <p>The string MUST be one of the members of {{ObjectDetectionMode}}.
+            See <a>face landmark detection mode</a> constrainable property.</p>
+          </dd>
+        </dl>
+      </section>
+    </section>
+    <section>
+      <h3>{{MediaTrackSettings}}</h3>
+<pre class="idl">
+partial dictionary MediaTrackSettings {
+  DOMString humanFaceDetectionMode;
+  DOMString humanFaceLandmarkDetectionMode;
+};</pre>
+      <section class="notoc">
+        <h4>Dictionary {{MediaTrackSettings}} Members</h4>
+        <dl class="dictionary-members" data-dfn-for="MediaTrackSettings" data-link-for="MediaTrackSettings">
+          <dt><dfn><code>humanFaceDetectionMode</code></dfn> of type <span class="idlMemberType">{{DOMString}}</span></dt>
+          <dd>
+            <p>Current <a>face detection mode</a> setting.
+            The string MUST be one of the members of {{ObjectDetectionMode}} excluding {{ObjectDetectionMode/"center-point"}}.</p>
+          </dd>
+          <dt><dfn><code>humanFaceLandmarkDetectionMode</code></dfn> of type <span class="idlMemberType">{{DOMString}}</span></dt>
+          <dd>
+            <p>Current <a>face landmark detection mode</a> setting.
+            The string MUST be one of the members of {{ObjectDetectionMode}} excluding {{ObjectDetectionMode/"bounding-box"}}.</p>
+          </dd>
+        </dl>
+      </section>
+    </section>
+    <section>
+      <h3>{{ObjectDetectionMode}}</h3>
+<pre class="idl">
+enum ObjectDetectionMode {
+  "none",
+  "center-point",
+  "bounding-box",
+};</pre>
+      <section class="notoc">
+        <h4>{{ObjectDetectionMode}} Enumeration Description</h4>
+        <dl data-dfn-for="ObjectDetectionMode" data-link-for="ObjectDetectionMode">
+          <dt><dfn><code>none</code></dfn></dt>
+          <dd>
+            <p>This source does not offer object detection for the corresponding object type.
+            For setting, this is interpreted as a command to turn off
+            the setting of the corresponding object type detection metadata.</p>
+          </dd>
+          <dt><dfn><code>center-point</code></dfn></dt>
+          <dd>
+            <p>This source offers detection of the center point of the corresponding
+            object type, either a human face or a face landmark, or such a mode is requested.</p>
+          </dd>
+          <dt><dfn><code>bounding-box</code></dfn></dt>
+          <dd>
+            <p>This source offers detection of the bounding box of the corresponding
+            object type, either a human face or a face landmark, or such a mode is requested.</p>
+          </dd>
+        </dl>
+      </section>
+    </section>
+    <section>
+      <h3>Constrainable Properties</h3>
+      <ol>
+        <li>
+          <p><dfn>Face detection mode</dfn> describes which face properties
+          are to be detected and set in the metadata for the video frame.</p>
+        </li>
+        <li>
+          <p><dfn>Face landmark detection mode</dfn> describes which face landmark
+          properties are to be detected and set in the metadata for the video frame.</p>
+        </li>
+      </ol>
+    </section>
+    <section>
+      <h3>Examples</h3>
+      <pre class="example">
+// main.js:
+// Check if face detection is supported by the browser
+const supports = navigator.mediaDevices.getSupportedConstraints();
+if (supports.humanFaceDetectionMode) {
+  // Browser supports face contour detection.
+} else {
+  throw('Face contour detection is not supported');
+}
+
+// Open camera with face detection enabled
+const stream = await navigator.mediaDevices.getUserMedia({
+  video: { humanFaceDetectionMode: 'bounding-box' }
+});
+const [videoTrack] = stream.getVideoTracks();
+
+// Use a video worker and show to user.
+const videoElement = document.querySelector('video');
+const videoWorker = new Worker('video-worker.js');
+videoWorker.postMessage({track: videoTrack}, [videoTrack]);
+const {data} = await new Promise(r => videoWorker.onmessage);
+videoElement.srcObject = new MediaStream([data.videoTrack]);
+
+// video-worker.js:
+self.onmessage = async ({data: {track}}) => {
+  const generator = new VideoTrackGenerator();
+  parent.postMessage({videoTrack: generator.track}, [generator.track]);
+  const {readable} = new MediaStreamTrackProcessor({track});
+  const transformer = new TransformStream({
+    async transform(frame, controller) {
+      for (const face of frame.metadata().humanFaces || []) {
+        if (face.boundingBox) {
+          console.log(
+            `Face @ (${face.boundingBox.left}, ${face.face.boundingBox.top}), ` +
+                   `(${face.boundingBox.right}, ${face.face.boundingBox.bottom})`);
+        }
+      }
+      controller.enqueue(frame);
+    }
+  });
+  await readable.pipeThrough(transformer).pipeTo(generator.writable);
+};
+      </pre>
+      <pre class="example">
+// main.js:
+// Open camera.
+const stream = navigator.mediaDevices.getUserMedia({video: true});
+const [videoTrack] = stream.getVideoTracks();
+
+// Use a video worker and show to user.
+const videoElement = document.querySelector('video');
+const videoWorker = new Worker('video-worker.js');
+videoWorker.postMessage({track: videoTrack}, [videoTrack]);
+const {data} = await new Promise(r => videoWorker.onmessage);
+videoElement.srcObject = new MediaStream([data.videoTrack]);
+
+// video-worker.js:
+self.onmessage = async ({data: {track}}) => {
+  // Apply constraints.
+  let customBackgroundBlur = true;
+  let customEyeGazeCorrection = true;
+  let customFaceDetection = false;
+  let faceDetectionMode;
+  let landmarkDetectionMode;
+  const capabilities = track.getCapabilities();
+  if (capabilities.backgroundBlur &amp;&amp; capabilities.backgroundBlur.max &gt; 0) {
+    // The platform supports background blurring.
+    // Let's use platform background blurring and skip the custom one.
+    await track.applyConstraints({
+      advanced: [{backgroundBlur: capabilities.backgroundBlur.max}]
+    });
+    customBackgroundBlur = false;
+  } else if ((capabilities.humanFaceDetectionMode || []).includes('bounding-box')) {
+    // The platform supports face bounding box detection but not background
+    // blurring. Let's use platform face detection to aid custom
+    // background blurring.
+    faceDetectionMode = 'bounding-box';
+    await videoTrack.applyConstraints({
+      advanced: [{ humanFaceDetectionMode: faceDetectionMode }]
+    });
+  } else {
+    // The platform does not support background blurring nor face contour
+    // detection. Let's use custom face contour detection to aid custom
+    // background blurring.
+    customFaceDetection = true;
+  }
+  if ((capabilities.eyeGazeCorrection || []).includes(true)) {
+    // The platform supports eye gaze correction.
+    // Let's use platform eye gaze correction and skip the custom one.
+    await videoTrack.applyConstraints({
+      advanced: [{eyeGazeCorrection: true}]
+    });
+    customEyeGazeCorrection = false;
+  } else if ((capabilities.humanFaceLandmarkDetectionMode || []).includes('center-point')) {
+    // The platform supports face landmark detection but not eye gaze
+    // correction. Let's use platform face landmark detection to aid custom eye
+    // gaze correction.
+    landmarkDetectionMode = 'center-point';
+    await videoTrack.applyConstraints({
+      advanced: [{ humanFaceLandmarkDetectionMode: landmarkDetectionMode }]
+    });
+  } else {
+    // The platform does not support eye gaze correction nor face landmark
+    // detection. Let's use custom face landmark detection to aid custom eye
+    // gaze correction.
+    customFaceDetection = true;
+  }
+
+  // Load custom libraries which may utilize TensorFlow and/or WASM.
+  const requiredScripts = [].concat(
+    customBackgroundBlur    ? 'background.js' : [],
+    customEyeGazeCorrection ? 'eye-gaze.js'   : [],
+    customFaceDetection     ? 'face.js'       : []
+  );
+  importScripts(...requiredScripts);
+
+  const generator = new VideoTrackGenerator();
+  parent.postMessage({videoTrack: generator.track}, [generator.track]);
+  const {readable} = new MediaStreamTrackProcessor({track});
+  const transformer = new TransformStream({
+    async transform(frame, controller) {
+      // Detect faces or retrieve detected faces.
+      const humanFaces =
+        customFaceDetection
+          ? await detectFaces(frame)
+          : frame.metadata().humanFaces;
+      // Blur the background if needed.
+      if (customBackgroundBlur) {
+        const newFrame = await blurBackground(frame, humanFaces);
+        frame.close();
+        frame = newFrame;
+      }
+      // Correct the eye gaze if needed.
+      if (customEyeGazeCorrection &amp;&amp; (humanFaces || []).length &gt; 0) {
+        const newFrame = await correctEyeGaze(frame, humanFaces);
+        frame.close();
+        frame = newFrame;
+      }
+      controller.enqueue(frame);
+    }
+  });
+  await readable.pipeThrough(transformer).pipeTo(generator.writable);
+};
+      </pre>
+    </section>
+  </section>
 </body>
 </html>