From 1a211a111d4a3131a223ab7c5b056e8638b08c76 Mon Sep 17 00:00:00 2001 From: Florent Castelli Date: Thu, 2 May 2024 15:05:42 +0200 Subject: [PATCH] Add captureTimestamp and senderCaptureTimeOffset to frame metadata Fixes #225 --- index.bs | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/index.bs b/index.bs index 3d15bda..4075ef2 100644 --- a/index.bs +++ b/index.bs @@ -48,6 +48,11 @@ spec:webidl; type:dfn; text:resolve "CloneArrayBuffer": { "href": "https://tc39.es/ecma262/#sec-clonearraybuffer", "title": "CloneArrayBuffer" + }, + "RTP-EXT-CAPTURE-TIME": { + "href": "http://www.webrtc.org/experiments/rtp-hdrext/abs-capture-time", + "title": "RTP Header Extension for Absolute Capture Time", + "publisher": "WebRTC Project" } } @@ -134,6 +139,20 @@ The readEncodedData algorithm is given a |rtcObject| as p 1. Let |frame| be the newly produced frame. 1. Set |frame|.`[[owner]]` to |rtcObject|. 1. Set |frame|.`[[counter]]` to |rtcObject|.`[[lastEnqueuedFrameCounter]]`. +1. If the frame has been produced by a {{RTCRtpReceiver}}: + 1. If the relevant RTP packet contains the + [[RTP-EXT-CAPTURE-TIME|RTP Header Extension for Absolute Capture Time]], set |frame|.`[[captureTimestamp]]` to the + [[RTP-EXT-CAPTURE-TIME#absolute-capture-timestamp|absolute capture timestamp]] field and set |frame|.`[[senderCaptureTimeOffset]]` + to the [[RTP-EXT-CAPTURE-TIME#estimated-capture-clock-offset|capture clock offset field]] if it is present. + 1. Otherwise, if the relevant RTP packet does not contain the + [[RTP-EXT-CAPTURE-TIME|RTP Header Extension for Absolute Capture Time]] but a previous RTP packet did, + set |frame|.`[[captureTimestamp]]` to the result of calculating the absolute capture timestamp according to + [[RTP-EXT-CAPTURE-TIME#timestamp-interpolation|timestamp interpolation]] and set |frame|.`[[senderCaptureTimeOffset]]` + to the most recent value that was present. + 1. Otherwise, set |frame|.`[[captureTimestamp]]` to undefined and set |frame|.`[[senderCaptureTimeOffset]]` to undefined. +1. If the frame has been produced by a {{RTCRtpSender}}, set |frame|.`[[captureTimestamp]]` to the capture timestamp + using the methodology described in [[RTP-EXT-CAPTURE-TIME#absolute-capture-timestamp]] and set frame.`[[senderCaptureTimeOffset]]` + to undefined. 1. [=ReadableStream/Enqueue=] |frame| in |rtcObject|.`[[readable]]`. The writeEncodedData algorithm is given a |rtcObject| as parameter and a |frame| as input. It is defined by running the following steps: @@ -293,6 +312,10 @@ The setEncryptionKey(|key|, |keyID|) met # RTCRtpScriptTransform # {#scriptTransform} +In this section, the capture system refers to the system where media is sourced from and the sender system +refers to the system that is sending RTP and RTCP packets to the receiver system where {{RTCEncodedVideoFrameMetadata}} data +or {{RTCEncodedAudioFrameMetadata}} data is populated. + ## RTCEncodedVideoFrameType dictionary ## {#RTCEncodedVideoFrameType}
 // New enum for video frame types. Will eventually re-use the equivalent defined
@@ -358,6 +381,8 @@ dictionary RTCEncodedVideoFrameMetadata {
     sequence<unsigned long> contributingSources;
     long long timestamp;    // microseconds
     unsigned long rtpTimestamp;
+    DOMHighResTimeStamp captureTimestamp;
+    DOMHighResTimeStamp senderCaptureTimeOffset;
     DOMString mimeType;
 };
 
@@ -431,6 +456,32 @@ dictionary RTCEncodedVideoFrameMetadata { that reflects the sampling instant of the first octet in the RTP data packet.

+
+ captureTimestamp DOMHighResTimeStamp +
+
+

+ The {{RTCEncodedVideoFrameMetadata/captureTimestamp}} is set by the frame source, and for frames that come + from the {{RTCRtpReceiver}}, it is extracted by the [[#stream-processing]] algorithm. Its reference clock + is the capture system's NTP clock (same clock used to generate NTP timestamps for RTCP sender reports on + that system). + + On populating this member, the user agent MUST return the value of the frame's `[[captureTimestamp]]` slot. +

+
+
+ senderCaptureTimeOffset DOMHighResTimeStamp +
+
+

+ The {{RTCEncodedVideoFrameMetadata/senderCaptureTimeOffset}} is the sender system's estimate of the offset + between its own NTP clock and the capture system's NTP clock, for the same frame that the + {{RTCEncodedVideoFrameMetadata/captureTimestamp}} was originated from. It is extracted by the + [[#stream-processing]] algorithm. + + On populating this member, the user agent MUST return the value of the frame's `[[senderCaptureTimeOffset]]` slot. +

+
mimeType DOMString
@@ -611,6 +662,8 @@ dictionary RTCEncodedAudioFrameMetadata { sequence<unsigned long> contributingSources; short sequenceNumber; unsigned long rtpTimestamp; + DOMHighResTimeStamp captureTimestamp; + DOMHighResTimeStamp senderCaptureTimeOffset; DOMString mimeType; }; @@ -664,6 +717,32 @@ dictionary RTCEncodedAudioFrameMetadata { that reflects the sampling instant of the first octet in the RTP data packet.

+
+ captureTimestamp DOMHighResTimeStamp +
+
+

+ The {{RTCEncodedAudioFrameMetadata/captureTimestamp}} is set by the frame source, and for frames that come + from the {{RTCRtpReceiver}}, it is extracted by the [[#stream-processing]] algorithm. Its reference clock + is the capture system's NTP clock (same clock used to generate NTP timestamps for RTCP sender reports on + that system). + + On populating this member, the user agent MUST return the value of the frame's `[[captureTimestamp]]` slot. +

+
+
+ senderCaptureTimeOffset DOMHighResTimeStamp +
+
+

+ The {{RTCEncodedAudioFrameMetadata/senderCaptureTimeOffset}} is the sender system's estimate of the offset + between its own NTP clock and the capture system's NTP clock, for the same frame that the + {{RTCEncodedAudioFrameMetadata/captureTimestamp}} was originated from. It is extracted by the + [[#stream-processing]] algorithm. + + On populating this member, the user agent MUST return the value of the frame's `[[senderCaptureTimeOffset]]` slot. +

+
mimeType DOMString