Skip to content

Commit

Permalink
Merge pull request #50 from sul-dlss/check-media
Browse files Browse the repository at this point in the history
Check media and log attributes
  • Loading branch information
edsu authored Nov 27, 2024
2 parents d14f962 + 211241e commit 2708711
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
__pycache__/
whisper_models
*.log
.python-version
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
boto3
ffprobe3
openai-whisper
python-dotenv
pytest
Expand Down
37 changes: 33 additions & 4 deletions speech_to_text.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import sys
import uuid
import shutil
import subprocess
import traceback
from functools import cache
from pathlib import Path
Expand Down Expand Up @@ -39,7 +40,7 @@ def main(daemon=True) -> None:
except KeyboardInterrupt:
logging.info("exiting")
sys.exit()
except Exception:
except SpeechToTextException:
logging.exception("error while processing job")
report_error(job, traceback.format_exc())

Expand Down Expand Up @@ -79,7 +80,7 @@ def get_job() -> Optional[dict]:
return None
else:
# this should never happen
raise Exception(f"expected 1 job from queue but got {len(jobs)}")
raise SpeechToTextException(f"expected 1 job from queue but got {len(jobs)}")


def download_media(job: dict) -> dict:
Expand All @@ -94,6 +95,9 @@ def download_media(job: dict) -> dict:
# e.g. pg879tb2706-v2/video_1.mp4
bucket.download_file(media_file, media_file)

media_info = inspect_media(media_file)
logging.info(f"downloaded {media_file}: {json.dumps(media_info)}")

return job


Expand All @@ -118,7 +122,13 @@ def run_whisper(job: dict) -> dict:
whisper_options.pop("model", None)
whisper_options.pop("writer", None)

result = whisper.transcribe(audio=media_file, model=model, **whisper_options)
try:
result = whisper.transcribe(
audio=media_file, model=model, **whisper_options
)
except Exception as e:
raise SpeechToTextException(str(e))

logging.info(f"whisper result: {result}")

logging.info(f"writing output using writer_options: {writer_options}")
Expand Down Expand Up @@ -319,7 +329,22 @@ def receive_done_job() -> Optional[dict]:
elif len(messages) == 0:
return None
else:
raise Exception("received more than one message from todo queue!")
raise SpeechToTextException("received more than one message from todo queue!")


def inspect_media(path) -> dict:
try:
output = subprocess.check_output(
["ffprobe", "-show_format", "-print_format", "json", "-v", "quiet", path]
)
result = json.loads(output)
return {
"duration": float(result["format"]["duration"]),
"format": result["format"]["format_name"],
"size": int(result["format"]["size"]),
}
except subprocess.CalledProcessError:
raise SpeechToTextException(f"Invalid media file {path}")


@cache
Expand Down Expand Up @@ -400,3 +425,7 @@ def load_whisper_model(model_name) -> whisper.model.Whisper:

else:
main(daemon=args.daemon)


class SpeechToTextException(Exception):
pass
23 changes: 23 additions & 0 deletions tests/test_inspect_media.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from speech_to_text import inspect_media, SpeechToTextException

import pytest


def test_duration():
result = inspect_media("tests/data/en.wav")
assert result["duration"] == 3.220000


def test_format():
result = inspect_media("tests/data/en.wav")
assert result["format"] == "wav"


def test_size():
result = inspect_media("tests/data/en.wav")
assert result["size"] == 618318


def test_invalid_media():
with pytest.raises(SpeechToTextException):
inspect_media("README.md")

0 comments on commit 2708711

Please sign in to comment.