Skip to content

Commit

Permalink
+ default behavior for missing speaker metadata
Browse files Browse the repository at this point in the history
  • Loading branch information
calebchiam committed Aug 14, 2020
1 parent e1cc169 commit 7c9c64e
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
8 changes: 7 additions & 1 deletion convokit/model/corpusHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import os
import json
from collections import defaultdict
from typing import Dict
import pickle

from .speaker import Speaker
from .utterance import Utterance
from .conversation import Conversation
from typing import Dict
from .convoKitMeta import ConvoKitMeta
from convokit.util import warn

BIN_DELIM_L, BIN_DELIM_R = "<##bin{", "}&&@**>"
KeyId = "id"
Expand Down Expand Up @@ -197,6 +199,10 @@ def initialize_speakers_and_utterances_objects(corpus, utt_dict, utterances, spe
u = defaultdict(lambda: None, u)
speaker_key = u[KeySpeaker]
if speaker_key not in speakers_dict:
if u[KeySpeaker] not in speakers_data:
warn("CorpusLoadWarning: Missing speaker metadata for speaker ID: {}. "
"Initializing default empty metadata instead.".format(u[KeySpeaker]))
speakers_data[u[KeySpeaker]] = {}
if KeyMeta in speakers_data[u[KeySpeaker]]:
speakers_dict[speaker_key] = Speaker(owner=corpus, id=u[KeySpeaker],
meta=speakers_data[u[KeySpeaker]][KeyMeta])
Expand Down
11 changes: 7 additions & 4 deletions convokit/tests/general/test_corpora_load_and_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,24 @@ class CorpusLoadAndDump(unittest.TestCase):
Load a variety of existing (small) corpora to verify that there are no backward compatibility issues
"""

def test_load_subreddit(self):
def test_load_dump_subreddit(self):
corpus = Corpus(download('subreddit-hey'))
corpus.dump('subreddit')

def test_load_tennis(self):
def test_load_dump_tennis(self):
corpus = Corpus(download('tennis-corpus'))
corpus.dump('tennis-corpus')

def test_load_politeness(self):
def test_load_dump_politeness(self):
corpus = Corpus(download('wikipedia-politeness-corpus'))
corpus.dump('wikipedia-politeness-corpus')

def test_load_switchboard(self):
def test_load_dump_switchboard(self):
corpus = Corpus(download("switchboard-corpus"))
corpus.dump('switchboard-corpus')

def test_load_wikiconv(self):
corpus = Corpus(download('wikiconv-2004'))

if __name__ == '__main__':
unittest.main()

0 comments on commit 7c9c64e

Please sign in to comment.