- initial audio stream implementation w/ playback example

- audiovideostream wrapper
pupil-labs · Apr 3, 2024 · d883df0 · d883df0
1 parent ed5b8c9
commit d883df0
Show file tree

Hide file tree

Showing 12 changed files with 364 additions and 68 deletions.
diff --git a/examples/make_gaze_overlay_video.py b/examples/make_gaze_overlay_video.py
@@ -11,6 +11,8 @@
 gaze = rec.gaze
 eye = rec.eye
 scene = rec.scene
+scene_video = scene.video_stream
+scene_audio = scene.audio_stream
 imu = rec.imu
 
 
@@ -47,10 +49,12 @@ def convert_neon_pts_to_video_pts(neon_pts, neon_time_base, video_time_base):
 fps = 65535
 container = plv.open("video.mp4", mode="w")
 
-stream = container.add_stream("mpeg4", rate=fps)
-stream.width = scene.width
-stream.height = scene.height
-stream.pix_fmt = "yuv420p"
+out_video_stream = container.add_stream("mpeg4", rate=fps)
+out_video_stream.width = scene.width
+out_video_stream.height = scene.height
+out_video_stream.pix_fmt = "yuv420p"
+
+out_audio_stream = container.add_stream("aac", rate=scene_audio.sample_rate)
 
 neon_time_base = scene.data[0].time_base
 video_time_base = Fraction(1, fps)
@@ -105,9 +109,14 @@ def convert_neon_pts_to_video_pts(neon_pts, neon_time_base, video_time_base):
 pts_offset = 0
 video_pts = 0
 reached_video_start = False
-for gaze_datum, eye_frame, scene_frame, imu_datum in zip(
-    gaze.sample(my_ts), eye.sample(my_ts), scene.sample(my_ts), imu.sample(my_ts)
-):
+combined_data = zip(
+    gaze.sample(my_ts),
+    eye.sample(my_ts),
+    scene_video.sample(my_ts),
+    imu.sample(my_ts),
+    scene_audio.sample(my_ts),
+)
+for gaze_datum, eye_frame, scene_frame, imu_datum, audio_sample in combined_data:
     scene_image = (
         scene_frame.cv2
         if scene_frame is not None
@@ -163,7 +172,7 @@ def convert_neon_pts_to_video_pts(neon_pts, neon_time_base, video_time_base):
 
     frame.pts = pts_offset + video_pts
     frame.time_base = video_time_base
-    for packet in stream.encode(frame):
+    for packet in out_video_stream.encode(frame):
         container.mux(packet)
 
     if scene_frame is not None:
@@ -175,8 +184,8 @@ def convert_neon_pts_to_video_pts(neon_pts, neon_time_base, video_time_base):
         pts_offset += avg_video_pts_size
 
 try:
-    # Flush stream
-    for packet in stream.encode():
+    # Flush out_video_stream
+    for packet in out_video_stream.encode():
         container.mux(packet)
 finally:
     # Close the file

diff --git a/examples/plot_and_play_audio.ipynb b/examples/plot_and_play_audio.ipynb
diff --git a/src/pupil_labs/neon_recording/neon_recording.py b/src/pupil_labs/neon_recording/neon_recording.py
@@ -1,14 +1,12 @@
 import json
 import pathlib
 
-import numpy as np
-
 from . import structlog
 from .calib import Calibration, parse_calib_bin
 from .stream.gaze_stream import GazeStream
 from .stream.imu import IMUStream
 from .stream.stream import Stream
-from .stream.video_stream import VideoStream
+from .stream.av_stream import AudioVideoStream, VideoStream
 from .time_utils import load_and_convert_tstamps, ns_to_s
 
 log = structlog.get_logger(__name__)
@@ -183,8 +181,8 @@ def __init__(self, rec_dir_in: pathlib.Path | str):
         self._streams = {
             "gaze": GazeStream("gaze", self),
             "imu": IMUStream("imu", self),
-            "scene": VideoStream("scene", "Neon Scene Camera v1 ps1", self),
             "eye": VideoStream("eye", "Neon Sensor Module v1 ps1", self),
+            "scene": AudioVideoStream("scene", "Neon Scene Camera v1 ps1", self),
         }
 
         log.info("NeonRecording: Finished loading recording.")

diff --git a/src/pupil_labs/neon_recording/stream/__init__.py b/src/pupil_labs/neon_recording/stream/__init__.py
@@ -1,3 +1,3 @@
-from .stream import Stream  # noqa: F401
+from .stream import Stream
 
 __all__ = ["Stream"]
diff --git a/src/pupil_labs/neon_recording/stream/av_stream/__init__.py b/src/pupil_labs/neon_recording/stream/av_stream/__init__.py
@@ -0,0 +1,5 @@
+from .audio_stream import AudioStream
+from .video_stream import VideoStream
+from .audio_video_stream import AudioVideoStream
+
+__all__ = ["AudioStream", "VideoStream", "AudioVideoStream"]
diff --git a/src/pupil_labs/neon_recording/stream/av_stream/audio_stream.py b/src/pupil_labs/neon_recording/stream/av_stream/audio_stream.py
@@ -0,0 +1,117 @@
+import numpy as np
+
+from ... import structlog
+from .av_load import _load_av_container
+from ..stream import Stream
+
+log = structlog.get_logger(__name__)
+
+
+def _convert_audio_data_to_recarray(audio_data, ts, ts_rel):
+    log.debug("NeonRecording: Converting audio data to recarray format.")
+
+    if audio_data.shape[0] != len(ts):
+        log.error("NeonRecording: Length mismatch - audio_data and ts.")
+        raise ValueError("audio_data and ts must have the same length")
+    if len(ts) != len(ts_rel):
+        log.error("NeonRecording: Length mismatch - ts and ts_rel.")
+        raise ValueError("ts and ts_rel must have the same length")
+
+    out = np.recarray(
+        audio_data.shape[0],
+        dtype=[("sample", "<f8"), ("ts", "<f8"), ("ts_rel", "<f8")],
+    )
+    out.sample = audio_data[:]
+    out.ts = ts.astype(np.float64)
+    out.ts_rel = ts_rel.astype(np.float64)
+
+    return out
+
+
+class AudioStream(Stream):
+    def __init__(self, name, file_name, recording, container=None, video_ts=None):
+        super().__init__(name, recording)
+        self._file_name = file_name
+        self._backing_container = container
+        self._video_ts = video_ts
+        self._sample_rate = None
+        self._n_samples = None
+
+        self._load()
+
+    @property
+    def ts_rel(self):
+        return self._ts_rel
+
+    @property
+    def sample_rate(self):
+        return self._sample_rate
+
+    @property
+    def n_samples(self):
+        return self._n_samples
+
+    def _load(self):
+        # if a backing_container is supplied, then a ts array is usually also supplied
+        if self._backing_container is None:
+            log.info(f"NeonRecording: Loading audio from: {self._file_name}.")
+            self._backing_container, self._video_ts = _load_av_container(
+                self._recording._rec_dir, self._file_name
+            )
+
+        self._sample_rate = self._backing_container.streams.audio[0].sample_rate
+        self._n_frames = self._backing_container.streams.audio[0].frames
+        self._samples_per_frame = self._backing_container.streams.audio[0].frames[0].samples
+
+        ac = 0
+        audio_data = np.zeros(
+            shape=self._samples_per_frame * (self._n_frames - 1), dtype=np.float64
+        )
+        sample_start_times = np.zeros(shape=(self._n_frames - 1), dtype=np.float64)
+        for sc, sample in enumerate(self._backing_container.streams.audio[0].frames):
+            sample_start_times[sc] = sample.time
+
+            for val in sample.to_ndarray()[0]:
+                audio_data[ac] = val
+                ac += 1
+
+
+        ts_c = 0
+        tdiffs = np.diff(sample_start_times)
+        tdiffs = np.concatenate((tdiffs, [np.mean(tdiffs)]))
+        ts_rel = np.zeros(audio_data.shape)
+        for tc, start_time in enumerate(sample_start_times):
+            for t in range(self._samples_per_frame):
+                ts_rel[ts_c] = start_time + tdiffs[tc] * t / self._samples_per_frame
+                ts_c += 1
+
+
+        self._ts_rel = ts_rel
+        self._ts = self._ts_rel + self._video_ts[0]
+
+        audio_data = _convert_audio_data_to_recarray(audio_data, self._ts, ts_rel)
+
+        self._backing_data = audio_data
+        self._data = audio_data[:]
+
+    def _sample_linear_interp(self, sorted_ts):
+        pass
+
+        samples = self._data.sample
+
+        interp_data = np.zeros(
+            len(sorted_ts),
+            dtype=[("sample", "<f8"), ("ts", "<f8"), ("ts_rel", "<f8")],
+        ).view(np.recarray)
+        interp_data.sample = np.interp(sorted_ts, self._ts, samples, left=np.nan, right=np.nan)
+        interp_data.ts = sorted_ts
+        interp_data.ts_rel = np.interp(
+            sorted_ts, self._ts, self._ts_rel, left=np.nan, right=np.nan
+        )
+
+        for d in interp_data:
+            if not np.isnan(d.x):
+                yield d
+            else:
+                yield None
+
diff --git a/src/pupil_labs/neon_recording/stream/av_stream/audio_video_stream.py b/src/pupil_labs/neon_recording/stream/av_stream/audio_video_stream.py
@@ -0,0 +1,34 @@
+from ... import structlog
+from .audio_stream import AudioStream
+from .video_stream import VideoStream
+from .av_load import _load_av_container
+
+log = structlog.get_logger(__name__)
+
+class AudioVideoStream():
+    def __init__(self, name, file_name, recording):
+        self.name = name
+        self._recording = recording
+        self._file_name = file_name
+        self._video_stream = None
+        self._audio_stream = None
+
+        self._load()
+
+    @property
+    def video_stream(self):
+        return self._video_stream
+
+    @property
+    def audio_stream(self):
+        return self._audio_stream
+
+    def _load(self):
+        log.info(f"NeonRecording: Loading audio-video: {self._file_name}.")
+
+        container, video_ts = _load_av_container(self._recording._rec_dir, self._file_name)
+
+        self._video_stream = VideoStream(
+            "video", self._file_name, self._recording, container=container, ts=video_ts
+        )
+        self._audio_stream = AudioStream("audio", self._file_name, self._recording, container=container, video_ts=video_ts)
diff --git a/src/pupil_labs/neon_recording/stream/av_stream/av_load.py b/src/pupil_labs/neon_recording/stream/av_stream/av_load.py
@@ -0,0 +1,33 @@
+import pathlib
+
+import pupil_labs.video as plv
+
+from ... import structlog
+from ...time_utils import load_and_convert_tstamps
+
+log = structlog.get_logger(__name__)
+
+
+def _load_av_container(rec_dir: pathlib.Path, file_name: pathlib.Path | str):
+    log.debug(
+        f"NeonRecording: Loading video and associated timestamps: {file_name}."
+    )
+
+    if not (rec_dir / (file_name + ".mp4")).exists():
+        raise FileNotFoundError(
+            f"File not found: {rec_dir / (file_name + '.mp4')}. Please double check the recording download."
+        )
+
+    container = plv.open(rec_dir / (file_name + ".mp4"))
+
+    # use hardware ts
+    # ts = load_and_convert_tstamps(rec_dir / (file_name + '.time_aux'))
+    try:
+        video_ts = load_and_convert_tstamps(
+            rec_dir / (file_name + ".time")
+        )
+    except Exception as e:
+        log.exception(f"Error loading timestamps: {e}")
+        raise
+
+    return container, video_ts
diff --git a/...abs/neon_recording/stream/video_stream.py → ...ecording/stream/av_stream/video_stream.py b/...abs/neon_recording/stream/video_stream.py → ...ecording/stream/av_stream/video_stream.py
@@ -1,25 +1,40 @@
 import math
-from typing import Optional
 
 import numpy as np
-import pupil_labs.video as plv
 
-from .. import structlog
-from ..time_utils import load_and_convert_tstamps
-from .stream import Stream
+from ... import structlog
+from ..stream import Stream
+from .av_load import _load_av_container
 
 log = structlog.get_logger(__name__)
 
 
 class VideoStream(Stream):
-    def __init__(self, name, file_name, recording):
+    def __init__(self, name, file_name, recording, container=None, ts=None):
         super().__init__(name, recording)
         self._file_name = file_name
+        self._backing_container = container
+        self._ts = ts
         self._width = None
         self._height = None
 
         self._load()
 
+    def _load(self):
+        # if a backing_container is supplied, then a ts array is usually also supplied
+        if self._backing_container is None:
+            log.info(f"NeonRecording: Loading video: {self._file_name}.")
+            self._backing_container, self._ts = _load_av_container(self._recording._rec_dir, self._file_name)
+
+        self._backing_data = self._backing_container.streams.video[0]
+        self._data = self._backing_data.frames
+        setattr(self._data, "ts", self._ts)
+        self._ts_rel = self._ts - self._recording._start_ts
+        setattr(self._data, "ts_rel", self._ts_rel)
+
+        self._width = self._data[0].width
+        self._height = self._data[0].height
+
     @property
     def width(self):
         return self._width
@@ -30,55 +45,13 @@ def height(self):
 
     @property
     def ts_rel(self):
-        # if self._ts_rel is None:
-        # self._ts_rel = self._ts - self._recording._start_ts
-        # setattr(self._data, "ts_rel", self._ts_rel)
-
         return self._ts_rel
 
     def _sample_linear_interp(self, sorted_ts):
         raise NotImplementedError(
             "NeonRecording: Video streams only support nearest neighbor interpolation."
         )
 
-    def _load(self):
-        log.info(f"NeonRecording: Loading video: {self._file_name}.")
-
-        container, ts = self._load_video(self._file_name)
-
-        self._backing_data = container.streams.video[0]
-        self._data = self._backing_data.frames
-        self._ts = ts
-        setattr(self._data, "ts", self._ts)
-        self._ts_rel = self._ts - self._recording._start_ts
-
-        self._width = self._data[0].width
-        self._height = self._data[0].height
-
-    def _load_video(self, video_name: str):
-        log.debug(
-            f"NeonRecording: Loading video and associated timestamps: {video_name}."
-        )
-
-        if not (self._recording._rec_dir / (video_name + ".mp4")).exists():
-            raise FileNotFoundError(
-                f"File not found: {self._recording._rec_dir / (video_name + '.mp4')}. Please double check the recording download."
-            )
-
-        container = plv.open(self._recording._rec_dir / (video_name + ".mp4"))
-
-        # use hardware ts
-        # ts = load_and_convert_tstamps(self._recording._rec_dir / (video_name + '.time_aux'))
-        try:
-            ts = load_and_convert_tstamps(
-                self._recording._rec_dir / (video_name + ".time")
-            )
-        except Exception as e:
-            log.exception(f"Error loading timestamps: {e}")
-            raise
-
-        return container, ts
-
     def _sample_nearest_rob(self, sorted_tses):
         log.debug("NeonRecording: Sampling nearest timestamps.")
 

diff --git a/src/pupil_labs/neon_recording/stream/gaze_stream.py b/src/pupil_labs/neon_recording/stream/gaze_stream.py
@@ -1,5 +1,4 @@
 import pathlib
-from typing import Optional
 
 import numpy as np