forked from DataTalksClub/llm-zoomcamp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scorer.py
49 lines (37 loc) · 1.69 KB
/
scorer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import pandas as pd
import numpy as np
class ParticipantVisibleError(Exception):
# If you want an error message to be shown to participants, you must raise the error as a ParticipantVisibleError
# All other errors will only be shown to the competition host. This helps prevent unintentional leakage of solution data.
pass
def score(solution: pd.DataFrame, submission: pd.DataFrame, row_id_column_name: str) -> float:
'''
Accuracy that works with multiple correct answers.
'''
solution = solution.set_index(row_id_column_name, drop=True)
submission = submission.set_index(row_id_column_name, drop=True)
submission = submission.loc[solution.index]
target_column = 'answer'
assert target_column in solution.columns
assert target_column in submission.columns
# This fix is needed because submission is loaded with default parameters
# Pandas magically converts string column into float
def fix_suffix(value):
if value.endswith('.0'):
return value[:-2]
else:
return value
submission[target_column] = submission[target_column].astype(str)
submission[target_column] = submission[target_column].apply(fix_suffix)
def convert_to_list(value):
values = [v.strip() for v in value.strip().lstrip('[').rstrip(']').split(',')]
return values
solution[target_column] = solution[target_column].astype(str).apply(convert_to_list)
correct = [
submit_answer in correct_answer
for correct_answer, submit_answer in zip(
solution[target_column].values,
submission[target_column].values
)
]
return np.mean(correct)