diff --git a/.github/workflows/auto-publish.yml b/.github/workflows/auto-publish.yml new file mode 100644 index 0000000..4ca6ca9 --- /dev/null +++ b/.github/workflows/auto-publish.yml @@ -0,0 +1,36 @@ +# Workflow based on the main w3c/spec-prod action example: +# https://github.com/w3c/spec-prod/#basic-usage + +name: Build, Validate, Deploy and Publish + +on: + # Worflow runs on pull requests where it makes sure that the spec can still be + # generated, that markup is valid and that there are no broken links, as + # well as on pushes to the default branch where it also deploys the generated + # spec to the gh-pages branch and publishes the result to /TR. + # The "workflow_dispatch" hook allows admins to also trigger the workflow + # manually from GitHub's UI. + pull_request: {} + push: + branches: [main] + workflow_dispatch: + +jobs: + main: + runs-on: ubuntu-20.04 + steps: + # See doc at https://github.com/actions/checkout#checkout-v2 + - name: Checkout repository + uses: actions/checkout@v2 + + # See doc at https://github.com/w3c/spec-prod/#spec-prod + # The action only deploys the generated spec to the gh-pages branch when + # the workflow was triggered by a push to the default branch. + - name: Build and validate index.html, push to gh-pages branch if needed + uses: w3c/spec-prod@v2 + with: + GH_PAGES_BRANCH: gh-pages + W3C_ECHIDNA_TOKEN: ${{ secrets.ECHIDNA_TOKEN }} + W3C_WG_DECISION_URL: https://github.com/w3c/media-wg/issues/27 + W3C_BUILD_OVERRIDE: | + status: WD diff --git a/.github/workflows/tidy.yaml b/.github/workflows/tidy.yaml new file mode 100644 index 0000000..7fb7a22 --- /dev/null +++ b/.github/workflows/tidy.yaml @@ -0,0 +1,21 @@ +name: Tidy document +on: + workflow_dispatch: {} + push: + branches: + - gh-pages + paths: + - index.html + +jobs: + tidy: + name: Tidy up + runs-on: macos-latest + steps: + - uses: actions/checkout@v4 + - run: make + - uses: peter-evans/create-pull-request@v6 + with: + title: "Tidied up source" + commit-message: "chore(index.bs): tidy up" + branch: tidy diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 7a24eaa..0000000 --- a/.travis.yml +++ /dev/null @@ -1,12 +0,0 @@ -language: generic - -script: bash ./deploy.sh - -branches: - only: - - master - -env: - global: - - ENCRYPTION_LABEL: 9b5128ce1dd6 - - COMMIT_AUTHOR_EMAIL: travis-ci@w3.org diff --git a/README.md b/README.md index 7cc31ed..f55ecaf 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Media Session Standard +# Media Session API https://w3c.github.io/mediasession/ diff --git a/deploy.sh b/deploy.sh deleted file mode 100644 index 56f8c09..0000000 --- a/deploy.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash -set -e # Exit with nonzero exit code if anything fails - -SOURCE_BRANCH="master" -TARGET_BRANCH="gh-pages" - -# Pull requests and commits to other branches shouldn't try to deploy, just build to verify -if [ "$TRAVIS_PULL_REQUEST" != "false" -o "$TRAVIS_BRANCH" != "$SOURCE_BRANCH" ]; then - echo "Skipping deploy; just doing a build." - curl https://api.csswg.org/bikeshed/ -f -F file=@index.bs > index.html; - exit 0 -fi - -# Save some useful information -REPO=`git config remote.origin.url` -SSH_REPO=${REPO/https:\/\/github.com\//git@github.com:} -SHA=`git rev-parse --verify HEAD` - -# Clone the existing gh-pages for this repo into out/ -# Create a new empty branch if gh-pages doesn't exist yet (should only happen on first deply) -git clone $REPO out -cd out -git checkout $TARGET_BRANCH || git checkout --orphan $TARGET_BRANCH -cd .. - -# Clean out existing contents -rm -rf out/* || exit 0 - -# Re-format. - -./format.py index.bs - -# Re-generating. -curl https://api.csswg.org/bikeshed/ -f -F file=@index.bs > out/index.html; - -# Now let's go have some fun with the cloned repo -cd out -git config user.name "Travis CI" -git config user.email "$COMMIT_AUTHOR_EMAIL" - -# If there are no changes to the compiled out (e.g. this is a README update) then just bail. -if [[ -z $(git status -s) ]]; then - echo "No changes to the output on this push; exiting." - exit 0 -fi - -# Commit the "changes", i.e. the new version. -# The delta will show diffs between new and old versions. -git add . -git commit -m "Deploy to GitHub Pages: ${SHA}" - -# Get the deploy key by using Travis's stored variables to decrypt deploy_key.enc -ENCRYPTED_KEY_VAR="encrypted_${ENCRYPTION_LABEL}_key" -ENCRYPTED_IV_VAR="encrypted_${ENCRYPTION_LABEL}_iv" -ENCRYPTED_KEY=${!ENCRYPTED_KEY_VAR} -ENCRYPTED_IV=${!ENCRYPTED_IV_VAR} -openssl aes-256-cbc -K $ENCRYPTED_KEY -iv $ENCRYPTED_IV -in ../deploy_key.enc -out deploy_key -d -chmod 600 deploy_key -eval `ssh-agent -s` -ssh-add deploy_key - -# Now that we're all set up, we can push. -git push $SSH_REPO $TARGET_BRANCH diff --git a/deploy_key.enc b/deploy_key.enc deleted file mode 100644 index 4c0b312..0000000 Binary files a/deploy_key.enc and /dev/null differ diff --git a/explainer.md b/explainer.md index 142a186..0e37c4a 100644 --- a/explainer.md +++ b/explainer.md @@ -106,9 +106,10 @@ interface MediaSession : EventTarget { ### The `MediaMetadata` interface -A `MediaMetadata` object can contain media metadata like title, artist, album -and album art. To set the metadata for a `MediaSession`, the page should create -a `MediaMetadata` object and assign it to a `MediaSession` object: +A `MediaMetadata` object can contain media metadata like title, artist, album, +artwork, and video chapter information. To set the metadata for a `MediaSession`, +the page should create a `MediaMetadata` object and assign it to a `MediaSession` +object: ```javascript navigator.mediaSession.metadata = new MediaMetadata(/* MediaMetadata constructor */); @@ -123,6 +124,7 @@ interface MediaMetadata { attribute DOMString artist; attribute DOMString album; attribute FrozenArray artwork; + [SameObject] readonly attribute FrozenArray chapterInfo; }; dictionary MediaImage { @@ -130,6 +132,13 @@ dictionary MediaImage { DOMString sizes = ""; DOMString type = ""; }; + +[Exposed=Window] +interface ChapterInformation { + readonly attribute DOMString title; + readonly attribute double startTime; + [SameObject] readonly attribute FrozenArray artwork; +}; ``` ### The `MediaPositionState` dictionary diff --git a/format.py b/format.py index f7051a6..ec7a3a1 100755 --- a/format.py +++ b/format.py @@ -36,10 +36,10 @@ def tokenize(source): def validate(path, source, tokens): stack = [] - def fail(reason, offset): + def fail(reason, offset, source, path): lineno = source.count('\n', 0, offset) + 1 - print '%s:%s: error: %s' % (path, lineno, reason) - print source.splitlines()[lineno - 1] + print(f'{path}:{lineno}: error: {reason}') + print(source.splitlines()[lineno - 1]) sys.exit(1) for token, start, end, name in tokens: diff --git a/index.bs b/index.bs index 6c7f034..d576178 100644 --- a/index.bs +++ b/index.bs @@ -1,15 +1,19 @@ + +
 urlPrefix: https://html.spec.whatwg.org/multipage/; spec: HTML
     type: dfn
-        urlPrefix: infrastructure.html
-            text: case-sensitive; url: #case-sensitivity-and-string-comparison
-            text: ASCII case-insensitive; url: #ascii-case-insensitive
-            text: in parallel
-            text: unordered set of unique space-separated tokens; url: #unordered-set-of-unique-space-separated-tokens
-            text: document base url
-            text: MIME type
-        urlPrefix: embedded-content.html
-            text: media element
-            text: muted; url: #concept-media-muted
-            text: pause event; url: #event-media-pause
-            text: play event; url: #event-media-play
-            text: potentially playing
-        urlPrefix: browsers.html
-            text: browsing context
-            text: top-level browsing context
-            text: nested browsing context
         urlPrefix: webappapis.html
-            text: API base URL
             text: entry settings object
-            text: queue a task
-            text: task
-            text: task source
-        urlPrefix: semantics.html
-            text: link; for: HTMLLinkElement; url:#the-link-element
         urlPrefix: interaction.html
-            text: triggered by user activation
-    type: attribute
-        urlPrefix: semantics.html
-            text: sizes; for: HTMLLinkElement; url: #attr-link-sizes;
-urlPrefix: https://url.spec.whatwg.org/; spec: URL
-    type: dfn; urlPrefix: #concept-
-        text: url parser
-    type: dfn
-        text: absolute URL; url: #syntax-url-absolute
-        text: relative URL; url: #syntax-url-relative
-urlPrefix: https://fetch.spec.whatwg.org/; spec: FETCH
-    type: dfn; urlPrefix: #concept-
-        text: fetch
-        text: request
-        text: context; url: request-context
-        text: context frame type; url: request-context-frame-type
-        text: internal response
-        text: origin; url: request-origin
-        text: referrer; url: request-referrer
-        text: response
-        text: response type
-        text: url; url: request-url
-    type: dfn;
-        text: force Origin header flag
-urlPrefix: https://www.w3.org/TR/appmanifest/; spec: appmanifest
-    type: dfn
-        text: image object; url: #dfn-image-object
-urlPrefix: https://heycam.github.io/webidl/
-    type: exception
-        text: TypeError
-urlPrefix: https://tc39.github.io/ecma262/#sec-object.; type: dfn
-    text: freeze
+            text: activation notification
 

Introduction

@@ -117,49 +67,11 @@ urlPrefix: https://tc39.github.io/ecma262/#sec-object.; type: dfn Media is used extensively today, and the Web is one of the primary means of consuming media content. Many platforms can display media metadata, such as -title, artist, album and album art on various UI elements such as notification, -media control center, device lockscreen and wearable devices. This specification -aims to enable web pages to specify the media metadata to be displayed in -platform UI, and respond to media controls which may come from platform UI or -media keys, thereby improving the user experience. - -

Conformance

- -All diagrams, examples, and notes in this specification are non-normative, as -are all sections explicitly marked non-normative. Everything else in this -specification is normative. - -The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", -"SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", "MAY", and "OPTIONAL" in this -document are to be interpreted as described in RFC 2119. For readability, these -words do not appear in all uppercase letters in this specification. [[!RFC2119]] - -Requirements phrased in the imperative as part of algorithms (such as "strip any -leading space characters" or "return false and terminate these steps") are to be -interpreted with the meaning of the key word ("must", "should", "may", etc) used -in introducing the algorithm. - -Conformance requirements phrased as algorithms or specific steps may be -implemented in any manner, so long as the end result is equivalent. (In -particular, the algorithms defined in this specification are intended to be easy -to follow, and not intended to be performant.) - -User agents may impose implementation-specific limits on otherwise unconstrained -inputs, e.g. to prevent denial of service attacks, to guard against running out -of memory, or to work around platform-specific limitations. - -When a method or an attribute is said to call another method or attribute, the -user agent must invoke its internal API for that attribute or method so that -e.g. the author can't change the behavior by overriding attributes or methods -with custom properties or functions in JavaScript. - -Unless otherwise stated, string comparisons are done in a case-sensitive -manner. - -

Dependencies

- -The IDL fragments in this specification must be interpreted as required for -conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]] +title, artist, album and album art on various UI elements such as notifications, +media control center, device lockscreen, and wearable devices. This +specification aims to enable web pages to specify the media metadata to be +displayed in platform UI, and respond to media controls that may come from +platform UI or media keys, thereby improving the user experience.

Security and Privacy @@ -245,37 +157,36 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]]

Playback State

- In order to make play and - pause actions work properly, - the user agent SHOULD be able to determine if a browsing context of - the active media session is playing media or not, which is called - the guessed playback state. The RECOMMENDED way for determining - the guessed playback state is to monitor the media elements whose - node document's browsing context is the browsing context. The - browsing context's guessed playback state is playing if any of them is - potentially playing and not muted, and is paused otherwise. Other information - SHOULD also be considered, such as WebAudio and plugins. + In order to make {{MediaSessionAction/play}} and + {{MediaSessionAction/pause}} actions work properly, the user agent SHOULD + be able to determine if a [=/browsing context=] of the active media + session is playing media or not, which is called the guessed + playback state. The RECOMMENDED way for determining the guessed + playback state is to monitor the media elements whose node document's + [=Document/browsing context=] is the [=/browsing context=]. The + [=/browsing context=]'s guessed playback state is + {{MediaSessionPlaybackState/"playing"}} if any of them is + [=media element/potentially playing=] and not [=media element/muted=], + and is {{MediaSessionPlaybackState/"paused"}} otherwise. Other + information SHOULD also be considered, such as WebAudio and plugins.

- The playbackState attribute specifies - the declared playback state from the browsing context. The - state is combined with the guessed playback state to compute the + The {{MediaSession/playbackState}} attribute specifies the declared + playback state from the [=/browsing context=]. The state is combined + with the guessed playback state to compute the actual playback state, which is a finalized state and will be - used for - play and - pause actions. + used for {{MediaSessionAction/play}} and {{MediaSessionAction/pause}} + actions.

The actual playback state is computed in the following way:

  • - If the declared playback state is playing, return playing. + If the declared playback state is + {{MediaSessionPlaybackState/playing}}, return + {{MediaSessionPlaybackState/playing}}.
  • Otherwise, return the guessed playback state. @@ -286,9 +197,9 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]]

    The {{MediaSession/playbackState}} attribute could be useful when the page wants to do some preparation steps when the media is paused but it allows - the preparation steps to be interrupted by pause action. See Setting playbackState for example. + the preparation steps to be interrupted by {{MediaSessionAction/pause}} + action. See Setting playbackState + for example.

    @@ -304,7 +215,7 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]] There could be multiple {{MediaSession}} objects existing at the same time since the user agent could have multiple tabs, each tab could contain a top-level browsing context and multiple nested browsing - contexts, and each browsing context could have a {{MediaSession}} + contexts, and each [=/browsing context=] could have a {{MediaSession}} object. The user agent MUST select at most one of the {{MediaSession}} objects to @@ -315,10 +226,10 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]] routing. It only takes effect for the active media session. It is RECOMMENDED that the user agent selects the active media - session by managing audio focus. A tab or browsing context - is said to have audio focus if it is currently playing audio or - the user expects to control the media in it. The AudioFocus API targets this - area and could be used once it's finished. + session by managing audio focus. A tab or [=Window/browsing + context=] is said to have audio focus if it is currently playing + audio or the user expects to control the media in it. The AudioFocus API + targets this area and could be used once it's finished. Whenever the active media session is changed, the user agent MUST run the media session actions update algorithm and the update metadata @@ -330,10 +241,9 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]] The media metadata for the active media session MAY be displayed in the platform UI depending on platform conventions. Whenever the active - media session changes or setting metadata of the active media - session, the user agent MUST run the update metadata - algorithm. The steps are as follows: + media session changes or setting {{MediaSession/metadata}} of the + active media session, the user agent MUST run the update + metadata algorithm. The steps are as follows:

    1. @@ -341,17 +251,17 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]] presented to the platform, and terminate these steps.
    2. - If the metadata of the + If the {{MediaSession/metadata}} of the active media session is an empty metadata, unset the media metadata presented to the platform, and terminate these steps.
    3. - Update the media metadata presented to the platform to match the metadata for the + Update the media metadata presented to the platform to match the + {{MediaSession/metadata}} for the active media session.
    4. - If the user agent wants to display an artwork image, it is + If the user agent wants to display an [=MediaMetadata/artwork image=], it is RECOMMENDED to run the fetch image algorithm.
    @@ -365,30 +275,27 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]] existing algorithm execution instances.
  • - If metadata's artwork of the active media - session is empty, then terminate these steps. + If metadata's {{MediaMetadata/artwork}} of the active + media session is empty, then terminate these steps.
  • If the platform supports displaying media artwork, select a - preferred artwork image from metadata's artwork of the active - media session. + preferred artwork image from metadata's + {{MediaMetadata/artwork}} of the active media session.
  • - Fetch the preferred artwork image's - {{MediaImage/src}}. + [=Fetch=] the preferred artwork image's {{MediaImage/src}}. Then, in parallel:
    1. - Wait for the response. + Wait for the [=/response=].
    2. - If the response's internal response's type is default, attempt to decode the resource as - an image. + If the [=/response=]'s [=response/type=] is + {{ResponseType/"default"}}, attempt to decode the resource as an + image.
    3. If the image format is supported, use the image as the artwork for @@ -414,10 +321,19 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]]

      - A media session action - source is a source that might produce a media session action. - Such a source can be the platform or the UI surfaces created by the user - agent. + A media session action source is a source that might produce a + media session action. Such a source can be the platform or the UI + surfaces created by the user agent. +

      +

      + A media session action source has an optional + target which should be the + recipient of any media session action created by the + media session action source. If a media session action + source's + target is `null`, the active + media session is the recipient of all + media session action source's actions.

      @@ -425,45 +341,72 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]] which can have one of the following value:

      • - play: the action intent + play: the action's intent is to resume the playback.
      • - pause: the action intent - is to pause the currently active playback. + pause: the action's + intent is to pause the currently active playback.
      • - seekbackward: the action - intent is to move the playback time backward by a short period (eg. a - few seconds). + seekbackward: the + action's intent is to move the playback time backward by a short + period (eg. a few seconds).
      • - seekforward: the action + seekforward: the action's intent is to move the playback time forward by a short period (eg. a few seconds).
      • - previoustrack: the action - intent is to either start the current playback from the beginning if - the playback has a notion of beginning, or move to the previous item - in the playlist if the playback has a notion of playlist. + previoustrack: the + action's intent is to either start the current playback from the + beginning if the playback has a notion of beginning, or move to the + previous item in the playlist if the playback has a notion of + playlist.
      • - nexttrack: the action is - to move to the playback to the next item in the playlist if the - playback has a notion of playlist. + nexttrack: the action's + intent is to move to the playback to the next item in the playlist if + the playback has a notion of playlist.
      • - skipad: the action intent - is to skip the advertisement that is currently playing. + skipad: the action's + intent is to skip the advertisement that is currently playing.
      • - stop: the action intent + stop: the action's intent is to stop the playback and clear the state if appropriate.
      • - seekto: the action intent - is to move the playback time to a specific time. + seekto: the action's + intent is to move the playback time to a specific time. +
      • +
      • + togglemicrophone: the + action's intent is to mute or unmute the user's microphone. +
      • +
      • + togglecamera: the + action's intent is to turn the user's active camera on or off. +
      • +
      • + hangup: the action's + intent is to end a call. +
      • +
      • + previousslide: the + action's intent is to go back to the previous slide when presenting + slides. +
      • +
      • + nextslide: the action's + intent is to go to the next slide when presenting slides. +
      • +
      • + enterpictureinpicture: + the action's intent is to open the media session in a + picture-in-picture window.

      @@ -480,7 +423,7 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]] is invoked, the user agent MUST run the following steps:
      1. - If handler is null, remove action + If handler is `null`, remove action from the supported media session actions for {{MediaSession}} and abort these steps.
      2. @@ -501,17 +444,25 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]]

        When the user agent is notified by a media session action source - that a + named source that a media session action named action has been triggered, - the user agent MUST run the handle media session action steps - as follows and consider it triggered by user activation: + the user agent MUST queue a task, using the [=user interaction task + source=], to run the following + handle media session action steps:

        1. - If the active media session is null, abort these - steps. + Let session be source's target. +
        2. +
        3. + If session is `null`, set session to the + active media session.
        4. - Let actions be the active media session's + If session is `null`, abort these steps. +
        5. +
        6. + Let actions be session's supported media session actions.
        7. @@ -524,20 +475,11 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]]
        8. Run handler with the details parameter set to: -
            -
          • - {{MediaSessionSeekActionDetails}} if action is - seekbackward or - seekforward. -
          • -
          • - {{MediaSessionSeekToActionDetails}} if action is - seekto. -
          • -
          • - Otherwise, with {{MediaSessionActionDetails}}. -
          • -
          + {{MediaSessionActionDetails}}. +
        9. +
        10. + Run the activation notification steps in the [=/browsing + context=] associated with session.

        @@ -546,11 +488,11 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]] When the user agent receives a joint command for play and pause, such as a headset button click, it MUST - run the following steps: + queue a task, using the [=user interaction task source=], to run + the following steps:
        1. - If the active media session is null, abort these - steps. + If the active media session is `null`, abort these steps.
        2. Let action be a media session action. @@ -578,6 +520,23 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]] provided for the active media session.

          +

          + A user agent MAY implement a default handler for the togglemicrophone, togglecamera, or hangup media session actions if none was + provided for the active media session. +

          +

          + A user agent MAY expose microphone and camera state to web pages via + {{MediaStreamTrack}}'s {{MediaStreamTrack/muted}} attribute in addition + to {{MediaSessionAction/togglemicrophone}} or + {{MediaSessionAction/togglecamera}} [=media session action=]. In that + case, the user agent MUST execute the corresponding + {{MediaSessionActionHandler}} before running, as different tasks, the + steps defined to [$set a track's muted state$]. +

          +

          A page should only register a {{MediaSessionActionHandler}} for a media session action when it can handle the action given that the user agent @@ -640,7 +599,7 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]]

-

Position State

+

Position State

A user agent MAY display the current playback position and @@ -673,7 +632,7 @@ conforming IDL fragments, as described in the Web IDL specification. [[!WEBIDL]]

The RECOMMENDED way to determine the position state is to monitor the media elements whose node document's browsing context is the - browsing context. + [=/browsing context=].

@@ -745,10 +704,16 @@ enum MediaSessionAction { "nexttrack", "skipad", "stop", - "seekto" + "seekto", + "togglemicrophone", + "togglecamera", + "hangup", + "previousslide", + "nextslide", + "enterpictureinpicture" }; -callback MediaSessionActionHandler = void(MediaSessionActionDetails details); +callback MediaSessionActionHandler = undefined(MediaSessionActionDetails details); [Exposed=Window] interface MediaSession { @@ -756,9 +721,13 @@ interface MediaSession { attribute MediaSessionPlaybackState playbackState; - void setActionHandler(MediaSessionAction action, MediaSessionActionHandler? handler); + undefined setActionHandler(MediaSessionAction action, MediaSessionActionHandler? handler); + + undefined setPositionState(optional MediaPositionState state = {}); - void setPositionState(optional MediaPositionState? state); + Promise<undefined> setMicrophoneActive(boolean active); + + Promise<undefined> setCameraActive(boolean active); }; @@ -770,7 +739,7 @@ interface MediaSession {

A {{MediaSession}} has an associated metadata - object represented by a {{MediaMetadata}}. It is initially null. + object represented by a {{MediaMetadata}}. It is initially `null`.

@@ -781,24 +750,21 @@ interface MediaSession {

The metadata attribute - reflects the {{MediaSession}}'s metadata. On getting, - it MUST return the {{MediaSession}}'s metadata. On - setting, it MUST run the following steps with value being the new - value being set: + reflects the {{MediaSession}}'s {{MediaSession/metadata}}. On getting, it MUST + return the {{MediaSession}}'s {{MediaSession/metadata}}. On setting, it MUST + run the following steps with value being the new value being set:

  1. - If the {{MediaSession}}'s metadata is not - null, set its media session to - null. + If the {{MediaSession}}'s {{MediaSession/metadata}} is not `null`, set its + [=MediaMetadata/media session=] to `null`.
  2. - Set the {{MediaSession}}'s metadata to + Set the {{MediaSession}}'s {{MediaSession/metadata}} to value.
  3. - If the {{MediaSession}}'s metadata is not - null, set its media session to the - current {{MediaSession}}. + If the {{MediaSession}}'s {{MediaSession/metadata}} is not `null`, set its + [=MediaMetadata/media session=] to the current {{MediaSession}}.
  4. In parallel, run the update metadata algorithm. @@ -809,14 +775,14 @@ interface MediaSession {

    The playbackState attribute represents the declared playback state of the media - session, by which the session declares whether its browsing context + session, by which the session declares whether its [=/browsing context=] is playing media or not. The initial value is none. On setting, the user agent MUST set the IDL attribute to the new value if it is a valid {{MediaSessionPlaybackState}} value. On getting, the user agent MUST return the last valid value that was set. The {{MediaSession/playbackState}} - attribute is a hint for the user agent to determine whether the browsing - context is playing or paused. + attribute is a hint for the user agent to determine whether the [=/browsing + context=] is playing or paused.

    @@ -826,66 +792,67 @@ interface MediaSession {

    The {{MediaSessionPlaybackState}} enum is used to indicate whether a - browsing context is playing media or not, the values are described as + [=/browsing context=] is playing media or not, the values are described as follows:

    • none means the - browsing context - does not specify whether it's playing or paused, it can only be used in - the {{MediaSession/playbackState}} attribute. + [=/browsing context=] does not specify whether it's playing or paused, it + can only be used in the {{MediaSession/playbackState}} attribute.
    • playing means the - browsing context is currently playing media and it can be paused. + [=/browsing context=] is currently playing media and it can be paused.
    • paused means the - browsing context has paused media and it can be resumed. + [=/browsing context=] has paused media and it can be resumed.

    - The setActionHandler() method, when - invoked, MUST run the update action handler algorithm with + The setActionHandler(action, handler) + method, when invoked, MUST run the update action handler algorithm with action and handler on the {{MediaSession}}.

    - The setPositionState() method, when invoked - MUST perform the following steps: + The setPositionState(|state|) method, when + invoked MUST perform the following steps:

    +

    + The setMicrophoneActive(active) method + indicates to the user agent the microphone capture state desired by the page + (e.g. if the microphone is considered "inactive" by the page since it is no + longer sending audio through a call, the page can invoke + setMicrophoneActive(false)). When invoked, it MUST perform the + following steps: +

      +
    1. + Let document be [=this=]'s [=relevant global object=]'s + [=associated Document=]. +
    2. +
    3. + Let captureKind be "microphone". +
    4. +
    5. + Return the result of running the [=update capture state algorithm=] with + document, active and captureKind. +
    6. +
    +

    +

    + Similarly, the setCameraActive(active) + method indicates to the user agent the camera capture state desired by the + page. When invoked, it MUST perform the following steps: +

      +
    1. + Let document be [=this=]'s [=relevant global object=]'s + [=associated Document=]. +
    2. +
    3. + Let captureKind be "camera". +
    4. +
    5. + Return the result of running the [=update capture state algorithm=] with + document, active and captureKind. +
    6. +
    +

    +

    + The update capture state algorithm, when invoked with + document, active and captureKind, MUST + perform the following steps: +

      +
    1. + If document is not [=fully active=], return [=a promise + rejected with=] InvalidStateError. +
    2. +
    3. + If active is true and document's + [=Document/visibility state=] is not "visible", the user agent MAY return + [=a promise rejected with=] InvalidStateError. +
    4. +
    5. + Let p be a new promise. +
    6. +
    7. + In parallel, run the following steps: +
        +
      1. + Let applyPausePolicy be true if the user agent + implements a policy of pausing all input sources of type + captureKind in response to UI and false + otherwise. +
      2. +
      3. + If applyPausePolicy is true, run the following + substeps: +
          +
        1. + Let currentlyActive be false if the user + agent is currently [=pausing all input sources=] of type + captureKind + and true otherwise. +
        2. +
        3. + If active is currentlyActive, resolve + p with undefined and abort these steps. +
        4. +
        5. + If active is true, the user agent MAY wait + to proceed, for instance to prompt the user. +
        6. +
        7. + If the user agent denies the request to update the capture state, + reject p with a NotAllowedError and + abort these steps. +
        8. +
        +
      4. +
      5. + Update the user agent capture state UI according to + captureKind + and active. +
      6. +
      7. Resolve p with undefined.
      8. +
      9. + If applyPausePolicy is true, run the following + substeps: +
          +
        1. + Let newMutedState be true if + active is + false and false otherwise.
        2. +
        3. + For each {{MediaStreamTrack}} whose source is of type + captureKind, + queue a task to [$set a track's muted state$] to + newMutedState. +
        4. +
        +
      10. +
      +
    8. +
    9. + Return p. +
    10. +
    +

    +

    + Both the setMicrophoneActive(active) and setCameraActive(active) + methods can reject based on user agent specific heuristics. This might in + particular happen when the web page asks to activate (aka unmute) microphone + or camera. The user agent could decide to require [=transient activation=] in + that case. It might also require user input through a prompt to make the + actual decision. +

    + +

    + The user agent MAY display UI which invokes handlers for + media session actions. +

    +

    The {{MediaMetadata}} interface

    @@ -903,7 +1003,8 @@ interface MediaMetadata {
       attribute DOMString title;
       attribute DOMString artist;
       attribute DOMString album;
    -  attribute FrozenArray<object> artwork;
    +  attribute FrozenArray<MediaImage> artwork;
    +  [SameObject] readonly attribute FrozenArray<ChapterInformation> chapterInfo;
     };
     
     dictionary MediaMetadataInit {
    @@ -911,6 +1012,7 @@ dictionary MediaMetadataInit {
       DOMString artist = "";
       DOMString album = "";
       sequence<MediaImage> artwork = [];
    +  sequence<ChapterInformationInit> chapterInfo = [];
     };
     
    @@ -937,21 +1039,27 @@ dictionary MediaMetadataInit { but a list of type {{MediaImage}} internally.

    +

    + A {{MediaMetadata}} has an associated list of + chapter information. +

    +

    A {{MediaMetadata}} is said to be an empty metadata if it is equal - to null or all the following conditions are true: + to `null` or all the following conditions are true:

    - The MediaMetadata(init) + The MediaMetadata(init) constructor, when invoked, MUST run the following steps:

      @@ -977,6 +1085,20 @@ dictionary MediaMetadataInit { and set metadata's artwork images as the result if it succeeded. +
    1. + Let chapters be an empty list of type {{ChapterInformation}}. +
    2. +
    3. + For each entry in init's + {{MediaMetadataInit/chapterInfo}}, [=create a ChapterInformation=] from + entry and append it to + chapters. +
    4. +
    5. + Set metadata's chapter information + to the result of [=Create a frozen array|creating a frozen array=] from + chapters. +
    6. Return metadata.
    7. @@ -991,8 +1113,8 @@ the user agent MUST run the following steps: Let output be an empty list of type {{MediaImage}}.
    8. - For each entry in input's - {{MediaMetadataInit/artwork}}, perform the following steps: + For each entry in input (which is a {{MediaImage}} + list), perform the following steps:
      1. Let image be a new {{MediaImage}}. @@ -1024,31 +1146,29 @@ the user agent MUST run the following steps:

      - The title attribute - reflects the {{MediaMetadata}}'s title. On getting, - it MUST return the {{MediaMetadata}}'s title. On - setting, it MUST set the {{MediaMetadata}}'s title to - the given value. + The title attribute reflects the + {{MediaMetadata}}'s title. On getting, it MUST return + the {{MediaMetadata}}'s title. On setting, it MUST + set the {{MediaMetadata}}'s title to the given value.

      - The artist attribute - reflects the {{MediaMetadata}}'s artist. On getting, - it MUST return the {{MediaMetadata}}'s artist. On - setting, it MUST set the {{MediaMetadata}}'s artist + The artist attribute reflects the + {{MediaMetadata}}'s artist. On getting, it MUST + return the {{MediaMetadata}}'s artist. On setting, it + MUST set the {{MediaMetadata}}'s artist to the given value.

      - The album attribute - reflects the {{MediaMetadata}}'s album. On getting, - it MUST return the {{MediaMetadata}}'s album. On - setting, it MUST set the {{MediaMetadata}}'s album to - the given value. + The album attribute reflects the + {{MediaMetadata}}'s album. On getting, it MUST return + the {{MediaMetadata}}'s album. On setting, it MUST + set the {{MediaMetadata}}'s album to the given value.

      - The artwork + The artwork attribute reflects the {{MediaMetadata}}'s artwork images. On getting, it MUST return the result of the following steps:

        @@ -1084,8 +1204,8 @@ the user agent MUST run the following steps: better to do this with IDL primitives instead of JS - see https://www.w3.org/Bugs/Public/show_bug.cgi?id=29004 -->
      1. - Call Object.freeze on the object, - to prevent accidental mutation by scripts. + Call {{Object/freeze(O)}} on image, to prevent accidental + mutation by scripts.
      2. Append the object to frozenArtwork. @@ -1117,8 +1237,8 @@ the user agent MUST run the following steps: following steps:
        1. - If the instance has no associated media session, - abort these steps. + If the instance has no associated [=MediaMetadata/media session=], abort + these steps.
        2. Otherwise, queue a task to run the following substeps: @@ -1136,6 +1256,108 @@ the user agent MUST run the following steps:

        +

        The {{ChapterInformation}} +interface

        + +
        +[Exposed=Window]
        +interface ChapterInformation {
        +  readonly attribute DOMString title;
        +  readonly attribute double startTime;
        +  [SameObject] readonly attribute FrozenArray<MediaImage> artwork;
        +};
        +
        +dictionary ChapterInformationInit {
        +  DOMString title = "";
        +  double startTime = 0;
        +  sequence<MediaImage> artwork = [];
        +};
        +
        +
        + +

        + A {{ChapterInformation}} object is a representation of metadata for an + individual chapter, such as the title of the section, its timestamp, and + screenshot image data of this section, that can be used by user agents to + provide a customized user interface. +

        + +

        + A {{ChapterInformation}} can have an associated + media metadata. +

        + +

        + A {{ChapterInformation}} has an associated title + which is DOMString. +

        + +

        + A {{ChapterInformation}} has an associated + startTime which is double. +

        + +

        + A {{ChapterInformation}} has an associated list of + artwork images. +

        + +

        + To create a {{ChapterInformation}} with init, run the + following steps: + +

          +
        1. + Let chapterInfo be a new {{ChapterInformation}} object. +
        2. +
        3. + Set chapterInfo's {{ChapterInformation/title}} to + init's {{ChapterInformationInit/title}}. +
        4. +
        5. + Set chapterInfo's {{ChapterInformation/startTime}} to + init's {{ChapterInformationInit/startTime}}. If the startTime is negative or greater than + [=duration=], throw a TypeError. +
        6. +
        7. + Let {{ChapterInformationInit/artwork}} be the result of running the + convert artwork algorithm. +
        8. +
        9. + Set chapterInfo's artwork + images to the result of [=Create a frozen array|creating a frozen + array=] from {{ChapterInformationInit/artwork}}. +
        10. +
        11. + Return chapterInfo. +
        12. +
        +

        + +

        + The title attribute reflects the + {{ChapterInformation}}'s title. On getting, it + MUST return the {{ChapterInformation}}'s title. +

        + +

        + The startTime attribute reflects + the {{ChapterInformation}}'s startTime in + seconds. On getting, it MUST return the {{ChapterInformation}}'s startTime. +

        + +

        + The artwork + attribute reflects the {{ChapterInformation}}'s artwork images. On getting, it MUST return the + {{ChapterInformation}}'s + artwork images. +

        +

        The {{MediaImage}} dictionary

        @@ -1147,22 +1369,21 @@ dictionary MediaImage {
         };
         
        -The {{MediaImage}} dictionary members are inspired by the image objects in Web App Manifest. +

        The {{MediaImage}} dictionary members are inspired by +{{ImageResource}} in [[IMAGE-RESOURCE]].

        The src dictionary member is used -to specify the {{MediaImage}} object's source. It is -a URL from which the user agent can fetch the image's data. +to specify the {{MediaImage}} object's source. It is a URL from which the user agent can fetch +the image's data. The sizes dictionary member is used to specify the {{MediaImage}} object's {{MediaImage/sizes}}. It follows the -spec of sizes attribute in -the HTML -link element, which is a string -consisting of an unordered set of unique space-separated tokens which are -ASCII case-insensitive that represents the dimensions of an image. Each -keyword is either an ASCII case-insensitive match for the string "any", -or a value that consists of two valid non-negative integers that do not have a +spec of <{link/sizes}> attribute in the HTML <{link}> element, which is a string +consisting of an [=unordered set of unique space-separated tokens=] which are +[=ASCII case-insensitive=] that represents the dimensions of an image. Each +keyword is either an [=ASCII case-insensitive=] match for the string "any", or a +value that consists of two valid non-negative integers that do not have a leading U+0030 DIGIT ZERO (0) character and that are separated by a single U+0078 LATIN SMALL LETTER X or U+0058 LATIN CAPITAL LETTER X character. The keywords represent icon sizes in raw pixels (as opposed to CSS pixels). When @@ -1183,7 +1404,7 @@ dictionary
         
         dictionary MediaPositionState {
        -  double duration;
        +  unrestricted double duration;
           double playbackRate;
           double position;
         };
        @@ -1221,38 +1442,81 @@ dictionary MediaSessionActionDetails {
         };
         
         dictionary MediaSessionSeekActionDetails : MediaSessionActionDetails {
        -  double? seekOffset;
        +  double seekOffset;
         };
         
         dictionary MediaSessionSeekToActionDetails : MediaSessionActionDetails {
           required double seekTime;
        -  boolean? fastSeek;
        +  boolean fastSeek;
         };
        +
        +dictionary MediaSessionCaptureActionDetails : MediaSessionActionDetails {
        +  boolean isActivating;
        +};
        +
         
        The {{MediaSessionActionHandler}} MUST be run with the details -parameter which is represented by a dictionary inherited from -{{MediaSessionActionDetails}}. - -The action dictionary -member -is used to specify the action that the {{MediaSessionActionHandler}} is -associated with. +parameter whose dictionary type is: +
          +
        • {{MediaSessionActionDetails}} for {{MediaSessionAction/play}}.
        • +
        • {{MediaSessionActionDetails}} for {{MediaSessionAction/pause}}.
        • +
        • {{MediaSessionSeekActionDetails}} for + {{MediaSessionAction/seekbackward}}.
        • +
        • {{MediaSessionSeekActionDetails}} for + {{MediaSessionAction/seekforward}}.
        • +
        • {{MediaSessionActionDetails}} for + {{MediaSessionAction/previoustrack}}.
        • +
        • {{MediaSessionActionDetails}} for {{MediaSessionAction/nexttrack}}.
        • +
        • {{MediaSessionActionDetails}} for {{MediaSessionAction/skipad}}.
        • +
        • {{MediaSessionActionDetails}} for {{MediaSessionAction/stop}}.
        • +
        • {{MediaSessionSeekToActionDetails}} for + {{MediaSessionAction/seekto}}.
        • +
        • {{MediaSessionCaptureActionDetails}} for + {{MediaSessionAction/togglemicrophone}}.
        • +
        • {{MediaSessionCaptureActionDetails}} for + {{MediaSessionAction/togglecamera}}.
        • +
        • {{MediaSessionActionDetails}} for {{MediaSessionAction/hangup}}.
        • +
        • {{MediaSessionActionDetails}} for + {{MediaSessionAction/previousslide}}.
        • +
        • {{MediaSessionActionDetails}} for {{MediaSessionAction/nextslide}}.
        • +
        • {{MediaSessionActionDetails}} for + {{MediaSessionAction/enterpictureinpicture}}.
        • +
        + +The action +dictionary member is used to specify the media session action +that the {{MediaSessionActionHandler}} is associated with. The seekOffset -dictionary member MAY be provided and is the time in seconds to move the -playback time by. If it is not provided then the site should choose a sensible -time (e.g. a few seconds). +dictionary member is the time in seconds to move the playback time by. If +present, it should always be positive. If it is not provided then the site +should choose a sensible time (e.g. a few seconds). The seekTime -dictionary member MUST be provided and is the time in seconds to move the -playback time to. +dictionary member is the time in seconds to move the playback time to. + +The fastSeek +dictionary member will be true if the seek [=media session +action|action=] is being called multiple times as part of a sequence and this is +not the last call in that sequence. + +The isActivating +dictionary member will be false if the user agent is about +to [=pausing all input sources|pause all input sources=] related to the capture +[=media session action|action=] and true otherwise. This +dictionary member MUST be present if the user agent implements a policy +of [=pausing all input sources=]. -The fastSeek -dictionary member MAY be provided and will be true if the -seekto action is being called -multiple times as part of a sequence and this is not the last call in that -sequence. +

        Permissions Policy Integration

        + +This specification defines a [=policy-controlled feature=] identified by the +string "mediasession". Its [=default allowlist=] is [=default allowlist/*=]. + +A document's permissions policy determines whether any content in that +document is allowed to use the MediaSession API. If disabled in the document, +the User Agent MUST NOT select the document's media session as the active +media session.

        Examples

        @@ -1266,14 +1530,19 @@ sequence. title: "Episode Title", artist: "Podcast Host", album: "Podcast Title", - artwork: [{src: "podcast.jpg"}] + artwork: [{src: "podcast.jpg"}], + chapterInfo: [ + {title: "Chapter 1", startTime: 0, artwork: [{src: "chapter1.jpg"}]}, + {title: "Chapter 2", startTime: 120, artwork: [{src: "chapter2.jpg"}]} + ] }); Alternatively, providing multiple artwork images in the metadata can let the user agent be able to select different artwork images for different display purposes and better fit - for different screens: + for different screens (the same for the artwork in + {{MediaMetadata/chapterInfo}}):
             navigator.mediaSession.metadata = new MediaMetadata({
        @@ -1287,23 +1556,32 @@ sequence.
                 {src: "podcast.png", sizes: "128x128", type: "image/png"},
                 {src: "podcast_hd.png", sizes: "256x256", type: "image/png"},
                 {src: "podcast.ico", sizes: "128x128 256x256", type: "image/x-icon"}
        +      ],
        +      chapterInfo: [
        +        {title: "Chapter 1", startTime: 0, artwork: [
        +           {src: "chapter1_a.jpg", sizes: "128x128", type: "image/jpeg"},
        +           {src: "chapter1_b.png", sizes: "256x256", type: "image/png"}
        +         ]},
        +        {title: "Chapter 2", startTime: 120, artwork: [
        +           {src: "chapter2_a.jpg", sizes: "128x128", type: "image/jpeg"},
        +           {src: "chapter2_b.png", sizes: "256x256", type: "image/png"}
        +         ]}
               ]
             });
           
        For example, if the user agent wants to use an image as icon, it may choose - "podcast.jpg" or "podcast.png" for a - low-pixel-density screen, and "podcast_hd.jpg" - or "podcast_hd.png" for a high-pixel-density screen. If the user - agent wants to use an image for lockscreen background, - "podcast_xhd.jpg" will be preferred. + `"podcast.jpg"` or `"podcast.png"` for a low-pixel-density screen, and + `"podcast_hd.jpg"` or `"podcast_hd.png"` for a high-pixel-density screen. If + the user agent wants to use an image for lockscreen background, + `"podcast_xhd.jpg"` will be preferred.
        - Changing metadata: + Changing [=MediaSession/metadata=]: - For playlists or chapters of an audio book, multiple media elements can + For playlists or chapters of an audio book, multiple [=media elements=] can share a single media session.
        @@ -1346,7 +1624,7 @@ sequence.
             var audio = document.createElement("audio");
             audio.src = tracks[trackId];
         
        -    void updatePlayingMedia() {
        +    function updatePlayingMedia() {
               audio.src = tracks[trackId];
               // Update metadata (omitted)
             }
        @@ -1454,6 +1732,67 @@ sequence.
           
        +
        + Using video conferencing actions: +
        +    var isMicrophoneActive = false;
        +    var isCameraActive = false;
        +
        +    navigator.mediaSession.setMicrophoneActive(isMicrophoneActive);
        +    navigator.mediaSession.setCameraActive(isCameraActive);
        +
        +    navigator.mediaSession.setActionHandler("togglemicrophone", function() {
        +      if (isMicrophoneActive) {
        +        // Mute the microphone. Implementation omitted.
        +      } else {
        +        // Unmute the microphone. Implementation omitted.
        +      }
        +      isMicrophoneActive = !isMicrophoneActive;
        +      navigator.mediaSession.setMicrophoneActive(isMicrophoneActive);
        +    });
        +
        +    navigator.mediaSession.setActionHandler("togglecamera", function() {
        +      if (isCameraActive) {
        +        // Disable the camera. Implementation omitted.
        +      } else {
        +        // Enable the camera. Implementation omitted.
        +      }
        +      isCameraActive = !isCameraActive;
        +      navigator.mediaSession.setCameraActive(isCameraActive);
        +    });
        +
        +    navigator.mediaSession.setActionHandler("hangup", function() {
        +      // End the call. Implementation omitted.
        +    });
        +  
        +
        + +
        + Handling presenting slide actions: +
        +    var currentSlideIndex = 0;
        +
        +    navigator.mediaSession.setActionHandler("previousslide", function() {
        +      currentSlideIndex--;
        +      // Set current slide. Implementation omitted.
        +    });
        +
        +    navigator.mediaSession.setActionHandler("nextslide", function() {
        +      currentSlideIndex++;
        +      // Set current slide. Implementation omitted.
        +    });
        +  
        +
        + +
        + Handling picture-in-picture: +
        +    navigator.mediaSession.setActionHandler("enterpictureinpicture", function() {
        +      remoteVideo.requestPictureInPicture();
        +    });
        +  
        +
        +

        Acknowledgments

        The editors would like to thank Paul Adenot, Jake Archibald, Tab Atkins, @@ -1468,5 +1807,3 @@ patience in working through the initial design issues; Jer Noble for his help in building a model that also works well within the iOS audio focus model; and Mounir Lamouri and Anton Vayvod for their early involvement, feedback and support in making this specification happen. - -