Skip to content

Commit

Permalink
Merge pull request #13 from pupil-labs/audio
Browse files Browse the repository at this point in the history
initial audio stream implementation w/ playback example, audiovideostream wrapper
  • Loading branch information
rennis250 authored Apr 3, 2024
2 parents e89e880 + d883df0 commit 82716d4
Show file tree
Hide file tree
Showing 12 changed files with 364 additions and 68 deletions.
29 changes: 19 additions & 10 deletions examples/make_gaze_overlay_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
gaze = rec.gaze
eye = rec.eye
scene = rec.scene
scene_video = scene.video_stream
scene_audio = scene.audio_stream
imu = rec.imu


Expand Down Expand Up @@ -47,10 +49,12 @@ def convert_neon_pts_to_video_pts(neon_pts, neon_time_base, video_time_base):
fps = 65535
container = plv.open("video.mp4", mode="w")

stream = container.add_stream("mpeg4", rate=fps)
stream.width = scene.width
stream.height = scene.height
stream.pix_fmt = "yuv420p"
out_video_stream = container.add_stream("mpeg4", rate=fps)
out_video_stream.width = scene.width
out_video_stream.height = scene.height
out_video_stream.pix_fmt = "yuv420p"

out_audio_stream = container.add_stream("aac", rate=scene_audio.sample_rate)

neon_time_base = scene.data[0].time_base
video_time_base = Fraction(1, fps)
Expand Down Expand Up @@ -105,9 +109,14 @@ def convert_neon_pts_to_video_pts(neon_pts, neon_time_base, video_time_base):
pts_offset = 0
video_pts = 0
reached_video_start = False
for gaze_datum, eye_frame, scene_frame, imu_datum in zip(
gaze.sample(my_ts), eye.sample(my_ts), scene.sample(my_ts), imu.sample(my_ts)
):
combined_data = zip(
gaze.sample(my_ts),
eye.sample(my_ts),
scene_video.sample(my_ts),
imu.sample(my_ts),
scene_audio.sample(my_ts),
)
for gaze_datum, eye_frame, scene_frame, imu_datum, audio_sample in combined_data:
scene_image = (
scene_frame.cv2
if scene_frame is not None
Expand Down Expand Up @@ -163,7 +172,7 @@ def convert_neon_pts_to_video_pts(neon_pts, neon_time_base, video_time_base):

frame.pts = pts_offset + video_pts
frame.time_base = video_time_base
for packet in stream.encode(frame):
for packet in out_video_stream.encode(frame):
container.mux(packet)

if scene_frame is not None:
Expand All @@ -175,8 +184,8 @@ def convert_neon_pts_to_video_pts(neon_pts, neon_time_base, video_time_base):
pts_offset += avg_video_pts_size

try:
# Flush stream
for packet in stream.encode():
# Flush out_video_stream
for packet in out_video_stream.encode():
container.mux(packet)
finally:
# Close the file
Expand Down
130 changes: 130 additions & 0 deletions examples/plot_and_play_audio.ipynb

Large diffs are not rendered by default.

6 changes: 2 additions & 4 deletions src/pupil_labs/neon_recording/neon_recording.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import json
import pathlib

import numpy as np

from . import structlog
from .calib import Calibration, parse_calib_bin
from .stream.gaze_stream import GazeStream
from .stream.imu import IMUStream
from .stream.stream import Stream
from .stream.video_stream import VideoStream
from .stream.av_stream import AudioVideoStream, VideoStream
from .time_utils import load_and_convert_tstamps, ns_to_s

log = structlog.get_logger(__name__)
Expand Down Expand Up @@ -65,8 +63,8 @@ def __init__(self, rec_dir_in: pathlib.Path | str):
self.streams = {
"gaze": GazeStream("gaze", self),
"imu": IMUStream("imu", self),
"scene": VideoStream("scene", "Neon Scene Camera v1 ps1", self),
"eye": VideoStream("eye", "Neon Sensor Module v1 ps1", self),
"scene": AudioVideoStream("scene", "Neon Scene Camera v1 ps1", self),
}


Expand Down
2 changes: 1 addition & 1 deletion src/pupil_labs/neon_recording/stream/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from .stream import Stream # noqa: F401
from .stream import Stream

__all__ = ["Stream"]
5 changes: 5 additions & 0 deletions src/pupil_labs/neon_recording/stream/av_stream/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .audio_stream import AudioStream
from .video_stream import VideoStream
from .audio_video_stream import AudioVideoStream

__all__ = ["AudioStream", "VideoStream", "AudioVideoStream"]
117 changes: 117 additions & 0 deletions src/pupil_labs/neon_recording/stream/av_stream/audio_stream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import numpy as np

from ... import structlog
from .av_load import _load_av_container
from ..stream import Stream

log = structlog.get_logger(__name__)


def _convert_audio_data_to_recarray(audio_data, ts, ts_rel):
log.debug("NeonRecording: Converting audio data to recarray format.")

if audio_data.shape[0] != len(ts):
log.error("NeonRecording: Length mismatch - audio_data and ts.")
raise ValueError("audio_data and ts must have the same length")
if len(ts) != len(ts_rel):
log.error("NeonRecording: Length mismatch - ts and ts_rel.")
raise ValueError("ts and ts_rel must have the same length")

out = np.recarray(
audio_data.shape[0],
dtype=[("sample", "<f8"), ("ts", "<f8"), ("ts_rel", "<f8")],
)
out.sample = audio_data[:]
out.ts = ts.astype(np.float64)
out.ts_rel = ts_rel.astype(np.float64)

return out


class AudioStream(Stream):
def __init__(self, name, file_name, recording, container=None, video_ts=None):
super().__init__(name, recording)
self._file_name = file_name
self._backing_container = container
self._video_ts = video_ts
self._sample_rate = None
self._n_samples = None

self._load()

@property
def ts_rel(self):
return self._ts_rel

@property
def sample_rate(self):
return self._sample_rate

@property
def n_samples(self):
return self._n_samples

def _load(self):
# if a backing_container is supplied, then a ts array is usually also supplied
if self._backing_container is None:
log.info(f"NeonRecording: Loading audio from: {self._file_name}.")
self._backing_container, self._video_ts = _load_av_container(
self._recording._rec_dir, self._file_name
)

self._sample_rate = self._backing_container.streams.audio[0].sample_rate
self._n_frames = self._backing_container.streams.audio[0].frames
self._samples_per_frame = self._backing_container.streams.audio[0].frames[0].samples

ac = 0
audio_data = np.zeros(
shape=self._samples_per_frame * (self._n_frames - 1), dtype=np.float64
)
sample_start_times = np.zeros(shape=(self._n_frames - 1), dtype=np.float64)
for sc, sample in enumerate(self._backing_container.streams.audio[0].frames):
sample_start_times[sc] = sample.time

for val in sample.to_ndarray()[0]:
audio_data[ac] = val
ac += 1


ts_c = 0
tdiffs = np.diff(sample_start_times)
tdiffs = np.concatenate((tdiffs, [np.mean(tdiffs)]))
ts_rel = np.zeros(audio_data.shape)
for tc, start_time in enumerate(sample_start_times):
for t in range(self._samples_per_frame):
ts_rel[ts_c] = start_time + tdiffs[tc] * t / self._samples_per_frame
ts_c += 1


self._ts_rel = ts_rel
self._ts = self._ts_rel + self._video_ts[0]

audio_data = _convert_audio_data_to_recarray(audio_data, self._ts, ts_rel)

self._backing_data = audio_data
self._data = audio_data[:]

def _sample_linear_interp(self, sorted_ts):
pass

samples = self._data.sample

interp_data = np.zeros(
len(sorted_ts),
dtype=[("sample", "<f8"), ("ts", "<f8"), ("ts_rel", "<f8")],
).view(np.recarray)
interp_data.sample = np.interp(sorted_ts, self._ts, samples, left=np.nan, right=np.nan)
interp_data.ts = sorted_ts
interp_data.ts_rel = np.interp(
sorted_ts, self._ts, self._ts_rel, left=np.nan, right=np.nan
)

for d in interp_data:
if not np.isnan(d.x):
yield d
else:
yield None

Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from ... import structlog
from .audio_stream import AudioStream
from .video_stream import VideoStream
from .av_load import _load_av_container

log = structlog.get_logger(__name__)

class AudioVideoStream():
def __init__(self, name, file_name, recording):
self.name = name
self._recording = recording
self._file_name = file_name
self._video_stream = None
self._audio_stream = None

self._load()

@property
def video_stream(self):
return self._video_stream

@property
def audio_stream(self):
return self._audio_stream

def _load(self):
log.info(f"NeonRecording: Loading audio-video: {self._file_name}.")

container, video_ts = _load_av_container(self._recording._rec_dir, self._file_name)

self._video_stream = VideoStream(
"video", self._file_name, self._recording, container=container, ts=video_ts
)
self._audio_stream = AudioStream("audio", self._file_name, self._recording, container=container, video_ts=video_ts)
33 changes: 33 additions & 0 deletions src/pupil_labs/neon_recording/stream/av_stream/av_load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pathlib

import pupil_labs.video as plv

from ... import structlog
from ...time_utils import load_and_convert_tstamps

log = structlog.get_logger(__name__)


def _load_av_container(rec_dir: pathlib.Path, file_name: pathlib.Path | str):
log.debug(
f"NeonRecording: Loading video and associated timestamps: {file_name}."
)

if not (rec_dir / (file_name + ".mp4")).exists():
raise FileNotFoundError(
f"File not found: {rec_dir / (file_name + '.mp4')}. Please double check the recording download."
)

container = plv.open(rec_dir / (file_name + ".mp4"))

# use hardware ts
# ts = load_and_convert_tstamps(rec_dir / (file_name + '.time_aux'))
try:
video_ts = load_and_convert_tstamps(
rec_dir / (file_name + ".time")
)
except Exception as e:
log.exception(f"Error loading timestamps: {e}")
raise

return container, video_ts
Original file line number Diff line number Diff line change
@@ -1,25 +1,40 @@
import math
from typing import Optional

import numpy as np
import pupil_labs.video as plv

from .. import structlog
from ..time_utils import load_and_convert_tstamps
from .stream import Stream
from ... import structlog
from ..stream import Stream
from .av_load import _load_av_container

log = structlog.get_logger(__name__)


class VideoStream(Stream):
def __init__(self, name, file_name, recording):
def __init__(self, name, file_name, recording, container=None, ts=None):
super().__init__(name, recording)
self._file_name = file_name
self._backing_container = container
self._ts = ts
self._width = None
self._height = None

self._load()

def _load(self):
# if a backing_container is supplied, then a ts array is usually also supplied
if self._backing_container is None:
log.info(f"NeonRecording: Loading video: {self._file_name}.")
self._backing_container, self._ts = _load_av_container(self._recording._rec_dir, self._file_name)

self._backing_data = self._backing_container.streams.video[0]
self._data = self._backing_data.frames
setattr(self._data, "ts", self._ts)
self._ts_rel = self._ts - self._recording._start_ts
setattr(self._data, "ts_rel", self._ts_rel)

self._width = self._data[0].width
self._height = self._data[0].height

@property
def width(self):
return self._width
Expand All @@ -30,55 +45,13 @@ def height(self):

@property
def ts_rel(self):
# if self._ts_rel is None:
# self._ts_rel = self._ts - self._recording._start_ts
# setattr(self._data, "ts_rel", self._ts_rel)

return self._ts_rel

def _sample_linear_interp(self, sorted_ts):
raise NotImplementedError(
"NeonRecording: Video streams only support nearest neighbor interpolation."
)

def _load(self):
log.info(f"NeonRecording: Loading video: {self._file_name}.")

container, ts = self._load_video(self._file_name)

self._backing_data = container.streams.video[0]
self._data = self._backing_data.frames
self._ts = ts
setattr(self._data, "ts", self._ts)
self._ts_rel = self._ts - self._recording._start_ts

self._width = self._data[0].width
self._height = self._data[0].height

def _load_video(self, video_name: str):
log.debug(
f"NeonRecording: Loading video and associated timestamps: {video_name}."
)

if not (self._recording._rec_dir / (video_name + ".mp4")).exists():
raise FileNotFoundError(
f"File not found: {self._recording._rec_dir / (video_name + '.mp4')}. Please double check the recording download."
)

container = plv.open(self._recording._rec_dir / (video_name + ".mp4"))

# use hardware ts
# ts = load_and_convert_tstamps(self._recording._rec_dir / (video_name + '.time_aux'))
try:
ts = load_and_convert_tstamps(
self._recording._rec_dir / (video_name + ".time")
)
except Exception as e:
log.exception(f"Error loading timestamps: {e}")
raise

return container, ts

def _sample_nearest_rob(self, sorted_tses):
log.debug("NeonRecording: Sampling nearest timestamps.")

Expand Down
1 change: 0 additions & 1 deletion src/pupil_labs/neon_recording/stream/gaze_stream.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import pathlib
from typing import Optional

import numpy as np

Expand Down
Loading

0 comments on commit 82716d4

Please sign in to comment.