From 1a211a111d4a3131a223ab7c5b056e8638b08c76 Mon Sep 17 00:00:00 2001
From: Florent Castelli
Date: Thu, 2 May 2024 15:05:42 +0200
Subject: [PATCH] Add captureTimestamp and senderCaptureTimeOffset to frame
metadata
Fixes #225
---
index.bs | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 79 insertions(+)
diff --git a/index.bs b/index.bs
index 3d15bda..4075ef2 100644
--- a/index.bs
+++ b/index.bs
@@ -48,6 +48,11 @@ spec:webidl; type:dfn; text:resolve
"CloneArrayBuffer": {
"href": "https://tc39.es/ecma262/#sec-clonearraybuffer",
"title": "CloneArrayBuffer"
+ },
+ "RTP-EXT-CAPTURE-TIME": {
+ "href": "http://www.webrtc.org/experiments/rtp-hdrext/abs-capture-time",
+ "title": "RTP Header Extension for Absolute Capture Time",
+ "publisher": "WebRTC Project"
}
}
@@ -134,6 +139,20 @@ The readEncodedData algorithm is given a |rtcObject| as p
1. Let |frame| be the newly produced frame.
1. Set |frame|.`[[owner]]` to |rtcObject|.
1. Set |frame|.`[[counter]]` to |rtcObject|.`[[lastEnqueuedFrameCounter]]`.
+1. If the frame has been produced by a {{RTCRtpReceiver}}:
+ 1. If the relevant RTP packet contains the
+ [[RTP-EXT-CAPTURE-TIME|RTP Header Extension for Absolute Capture Time]], set |frame|.`[[captureTimestamp]]` to the
+ [[RTP-EXT-CAPTURE-TIME#absolute-capture-timestamp|absolute capture timestamp]] field and set |frame|.`[[senderCaptureTimeOffset]]`
+ to the [[RTP-EXT-CAPTURE-TIME#estimated-capture-clock-offset|capture clock offset field]] if it is present.
+ 1. Otherwise, if the relevant RTP packet does not contain the
+ [[RTP-EXT-CAPTURE-TIME|RTP Header Extension for Absolute Capture Time]] but a previous RTP packet did,
+ set |frame|.`[[captureTimestamp]]` to the result of calculating the absolute capture timestamp according to
+ [[RTP-EXT-CAPTURE-TIME#timestamp-interpolation|timestamp interpolation]] and set |frame|.`[[senderCaptureTimeOffset]]`
+ to the most recent value that was present.
+ 1. Otherwise, set |frame|.`[[captureTimestamp]]` to undefined and set |frame|.`[[senderCaptureTimeOffset]]` to undefined.
+1. If the frame has been produced by a {{RTCRtpSender}}, set |frame|.`[[captureTimestamp]]` to the capture timestamp
+ using the methodology described in [[RTP-EXT-CAPTURE-TIME#absolute-capture-timestamp]] and set frame.`[[senderCaptureTimeOffset]]`
+ to undefined.
1. [=ReadableStream/Enqueue=] |frame| in |rtcObject|.`[[readable]]`.
The writeEncodedData algorithm is given a |rtcObject| as parameter and a |frame| as input. It is defined by running the following steps:
@@ -293,6 +312,10 @@ The setEncryptionKey(|key|, |keyID|) met
# RTCRtpScriptTransform # {#scriptTransform}
+In this section, the capture system refers to the system where media is sourced from and the sender system
+refers to the system that is sending RTP and RTCP packets to the receiver system where {{RTCEncodedVideoFrameMetadata}} data
+or {{RTCEncodedAudioFrameMetadata}} data is populated.
+
## RTCEncodedVideoFrameType dictionary ## {#RTCEncodedVideoFrameType}
// New enum for video frame types. Will eventually re-use the equivalent defined
@@ -358,6 +381,8 @@ dictionary RTCEncodedVideoFrameMetadata {
sequence<unsigned long> contributingSources;
long long timestamp; // microseconds
unsigned long rtpTimestamp;
+ DOMHighResTimeStamp captureTimestamp;
+ DOMHighResTimeStamp senderCaptureTimeOffset;
DOMString mimeType;
};
@@ -431,6 +456,32 @@ dictionary RTCEncodedVideoFrameMetadata {
that reflects the sampling instant of the first octet in the RTP data packet.
+
+ captureTimestamp DOMHighResTimeStamp
+
+
+
+ The {{RTCEncodedVideoFrameMetadata/captureTimestamp}} is set by the frame source, and for frames that come
+ from the {{RTCRtpReceiver}}, it is extracted by the [[#stream-processing]] algorithm. Its reference clock
+ is the capture system's NTP clock (same clock used to generate NTP timestamps for RTCP sender reports on
+ that system).
+
+ On populating this member, the user agent MUST return the value of the frame's `[[captureTimestamp]]` slot.
+
+
+
+ senderCaptureTimeOffset DOMHighResTimeStamp
+
+
+
+ The {{RTCEncodedVideoFrameMetadata/senderCaptureTimeOffset}} is the sender system's estimate of the offset
+ between its own NTP clock and the capture system's NTP clock, for the same frame that the
+ {{RTCEncodedVideoFrameMetadata/captureTimestamp}} was originated from. It is extracted by the
+ [[#stream-processing]] algorithm.
+
+ On populating this member, the user agent MUST return the value of the frame's `[[senderCaptureTimeOffset]]` slot.
+
+
mimeType DOMString
@@ -611,6 +662,8 @@ dictionary RTCEncodedAudioFrameMetadata {
sequence<unsigned long> contributingSources;
short sequenceNumber;
unsigned long rtpTimestamp;
+ DOMHighResTimeStamp captureTimestamp;
+ DOMHighResTimeStamp senderCaptureTimeOffset;
DOMString mimeType;
};
@@ -664,6 +717,32 @@ dictionary RTCEncodedAudioFrameMetadata {
that reflects the sampling instant of the first octet in the RTP data packet.
+
+ captureTimestamp DOMHighResTimeStamp
+
+
+
+ The {{RTCEncodedAudioFrameMetadata/captureTimestamp}} is set by the frame source, and for frames that come
+ from the {{RTCRtpReceiver}}, it is extracted by the [[#stream-processing]] algorithm. Its reference clock
+ is the capture system's NTP clock (same clock used to generate NTP timestamps for RTCP sender reports on
+ that system).
+
+ On populating this member, the user agent MUST return the value of the frame's `[[captureTimestamp]]` slot.
+
+
+
+ senderCaptureTimeOffset DOMHighResTimeStamp
+
+
+
+ The {{RTCEncodedAudioFrameMetadata/senderCaptureTimeOffset}} is the sender system's estimate of the offset
+ between its own NTP clock and the capture system's NTP clock, for the same frame that the
+ {{RTCEncodedAudioFrameMetadata/captureTimestamp}} was originated from. It is extracted by the
+ [[#stream-processing]] algorithm.
+
+ On populating this member, the user agent MUST return the value of the frame's `[[senderCaptureTimeOffset]]` slot.
+
+
mimeType DOMString