Skip to content

Commit

Permalink
added fix for draft_data_json
Browse files Browse the repository at this point in the history
  • Loading branch information
KunalTiwary committed Dec 8, 2024
1 parent 2182c7a commit 8855826
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 94 deletions.
85 changes: 17 additions & 68 deletions backend/projects/annotation_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,59 +167,6 @@
}


def convert_prediction_json_to_annotation_result(
prediction_json, speakers_json, audio_duration, index, tred_type, is_acoustic=False
):
"""
Convert prediction_json and transcribed_json to annotation_result
"""

result = []
if prediction_json == None:
return result

for idx, val in enumerate(prediction_json):
label_dict = {
"origin": "manual",
"to_name": "audio_url",
"from_name": "labels",
"original_length": audio_duration,
}
text_dict = {
"origin": "manual",
"to_name": "audio_url",
"from_name": "transcribed_json",
"original_length": audio_duration,
}
if is_acoustic:
text_dict["from_name"] = tred_type
id = f"shoonya_{index}s{idx}s{generate_random_string(13-len(str(idx)))}"
label_dict["id"] = id
text_dict["id"] = id
label_dict["type"] = "labels"
text_dict["type"] = "textarea"

value_labels = {
"start": val["start"],
"end": val["end"],
"labels": [
next(
speaker
for speaker in speakers_json
if speaker["speaker_id"] == val["speaker_id"]
)["name"]
],
}
value_text = {"start": val["start"], "end": val["end"], "text": [val["text"]]}

label_dict["value"] = value_labels
text_dict["value"] = value_text
result.append(label_dict)
result.append(text_dict)

return result


def convert_conversation_json_to_annotation_result(conversation_json, idx):
result = []
for i in range(len(conversation_json)):
Expand All @@ -239,12 +186,15 @@ def convert_conversation_json_to_annotation_result(conversation_json, idx):


def draft_data_json_to_annotation_result(draft_data_json, project_type, pk=None):
from projects.views import convert_prediction_json_to_annotation_result

registry_helper = ProjectRegistry.get_instance()
input_dataset_info = registry_helper.get_input_dataset_and_fields(project_type)
dataset_model = getattr(dataset_models, input_dataset_info["dataset_type"])
try:
dataset_item = dataset_model.objects.get(pk=pk)
except:
dataset_item = None
pass
result = []
idx = 0
Expand All @@ -263,21 +213,20 @@ def draft_data_json_to_annotation_result(draft_data_json, project_type, pk=None)
if field == "conversation_json":
ans = convert_conversation_json_to_annotation_result(value, idx)
elif field == "transcribed_json" or field == "prediction_json":
assert type(value) in [list, dict], f"Something wrong is there in the type of {value}"
if isinstance(value,list):
value = {
"verbatim_transcribed_json": value
}
for tred_type, tred_value in value.items():
sub_ans = convert_prediction_json_to_annotation_result(
tred_value,
dataset_item.speakers_json,
dataset_item.audio_duration,
idx,
tred_type = tred_type,
is_acoustic = (project_type == "AcousticNormalisedTranscriptionEditing")
)
ans.extend(sub_ans)
assert type(value) in [
list,
dict,
], f"Something wrong is there in the type of {value}"
if isinstance(value, list):
value = {"verbatim_transcribed_json": value}
sub_ans = convert_prediction_json_to_annotation_result(
None,
project_type,
dataset_item,
value,
True,
)
ans.extend(sub_ans)
else:
if field_type == "textarea":
field_dict["value"] = {"text": [value]}
Expand Down
63 changes: 37 additions & 26 deletions backend/projects/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,33 +854,42 @@ def get_task_count_unassigned(pk, user):
return len(proj_tasks_unassigned)


def convert_prediction_json_to_annotation_result(pk, proj_type):
def convert_prediction_json_to_annotation_result(
pk, proj_type, data_item, prediction_json, populate_draft_data=False
):
result = []
if (
proj_type == "AudioTranscriptionEditing"
or proj_type == "AcousticNormalisedTranscriptionEditing"
):
data_item = SpeechConversation.objects.get(pk=pk)
prediction_json = (
json.loads(data_item.prediction_json)
if isinstance(data_item.prediction_json, str)
else data_item.prediction_json
)
assert type(prediction_json) in [dict, list], "Seems something is wrong with the formatting"
if not data_item and not prediction_json:
data_item = SpeechConversation.objects.get(pk=pk)
prediction_json = (
json.loads(data_item.prediction_json)
if isinstance(data_item.prediction_json, str)
else data_item.prediction_json
)
assert type(prediction_json) in [
dict,
list,
], "Seems something is wrong with the formatting"
# see if the prediction is a list, then it seems that only verbatim json is present
if isinstance(prediction_json,list):
prediction_json = {
"verbatim_transcribed_json": prediction_json
}

if isinstance(prediction_json, list):
prediction_json = {"verbatim_transcribed_json": prediction_json}

speakers_json = data_item.speakers_json
audio_duration = data_item.audio_duration
# converting prediction_json to result (wherever it exists) for every task.
if prediction_json == None:
return result
# for pred_type, pred_json in prediction_json.items():
if 'acoustic_normalised_transcribed_json' in prediction_json.keys():
for idx, (val, val_acoustic) in enumerate(zip(prediction_json['verbatim_transcribed_json'],prediction_json['acoustic_normalised_transcribed_json'])):
if "acoustic_normalised_transcribed_json" in prediction_json.keys():
for idx, (val, val_acoustic) in enumerate(
zip(
prediction_json["verbatim_transcribed_json"],
prediction_json["acoustic_normalised_transcribed_json"],
)
):
label_dict = {
"origin": "manual",
"to_name": "audio_url",
Expand All @@ -900,18 +909,20 @@ def convert_prediction_json_to_annotation_result(pk, proj_type):
"original_length": audio_duration,
}
if proj_type == "AcousticNormalisedTranscriptionEditing":
text_dict["from_name"] = 'verbatim_transcribed_json'
text_dict_acoustic["from_name"] = 'acoustic_normalised_transcribed_json'

text_dict["from_name"] = "verbatim_transcribed_json"
text_dict_acoustic[
"from_name"
] = "acoustic_normalised_transcribed_json"

id = f"shoonya_{idx}s{generate_random_string(13 - len(str(idx)))}"
label_dict["id"] = id
text_dict["id"] = id
text_dict_acoustic["id"] = id

label_dict["type"] = "labels"
text_dict["type"] = "textarea"
text_dict_acoustic["type"] = "textarea"

value_labels = {
"start": val["start"],
"end": val["end"],
Expand All @@ -933,7 +944,7 @@ def convert_prediction_json_to_annotation_result(pk, proj_type):
"end": val_acoustic["end"],
"text": [val_acoustic["text"]],
}

label_dict["value"] = value_labels
text_dict["value"] = value_text
text_dict_acoustic["value"] = value_text_acoustic
Expand All @@ -942,7 +953,7 @@ def convert_prediction_json_to_annotation_result(pk, proj_type):
result.append(text_dict)
result.append(text_dict_acoustic)
else:
for idx, val in enumerate(prediction_json['verbatim_transcribed_json']):
for idx, val in enumerate(prediction_json["verbatim_transcribed_json"]):
label_dict = {
"origin": "manual",
"to_name": "audio_url",
Expand All @@ -956,13 +967,13 @@ def convert_prediction_json_to_annotation_result(pk, proj_type):
"original_length": audio_duration,
}
if proj_type == "AcousticNormalisedTranscriptionEditing":
text_dict["from_name"] = 'verbatim_transcribed_json'
text_dict["from_name"] = "verbatim_transcribed_json"
id = f"shoonya_{idx}s{generate_random_string(13 - len(str(idx)))}"
label_dict["id"] = id
text_dict["id"] = id
label_dict["type"] = "labels"
text_dict["type"] = "textarea"

value_labels = {
"start": val["start"],
"end": val["end"],
Expand All @@ -979,7 +990,7 @@ def convert_prediction_json_to_annotation_result(pk, proj_type):
"end": val["end"],
"text": [val["text"]],
}

label_dict["value"] = value_labels
text_dict["value"] = value_text
# mainly label_dict and text_dict are sent as result
Expand Down Expand Up @@ -2439,7 +2450,7 @@ def assign_new_tasks(self, request, pk, *args, **kwargs):
]:
try:
result = convert_prediction_json_to_annotation_result(
task.input_data.id, project.project_type
task.input_data.id, project.project_type, None, None, False
)
except Exception as e:
print(
Expand Down

0 comments on commit 8855826

Please sign in to comment.