-
Notifications
You must be signed in to change notification settings - Fork 1
/
evaluation.py
69 lines (51 loc) · 2.23 KB
/
evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import argparse
from glob import glob
from pathlib import Path
import pandas as pd
from sklearn.metrics import classification_report
from contextlib import redirect_stdout
def create_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(prog="LLM Classification Evaluation")
parser.add_argument("--in_dir", type=str, default="predictions/")
parser.add_argument("--out_dir", type=str, default="output/")
return parser
def produce_report(data: pd.DataFrame, column: str) -> pd.DataFrame:
data = data.loc[data["train_test_set"] == "test"]
test_report = classification_report(
data[column], data[f"pred_{column}"], output_dict=True
)
df = pd.DataFrame(test_report).T
return df
def print_report(data: pd.DataFrame, column: str) -> None:
data = data.loc[data["train_test_set"] == "test"]
report = classification_report(data[column], data[f"pred_{column}"])
report_width = len(report.split("\n")[0])
print("".center(report_width, "-"))
print(report, "\n")
def main():
parser = create_parser()
args = parser.parse_args()
in_dir = Path(args.in_dir)
out_dir = Path(args.out_dir)
# Collecting prediction files from given directory
files = glob(str(in_dir.joinpath("*_pred*.csv")))
files = [file for file in files if "only-political" not in file]
# Creating output directory if it does not exist.
out_dir.mkdir(exist_ok=True)
# Getting last part of the path to use as name of the file.
output_name = Path(in_dir).name
outputs = pd.DataFrame()
with open(out_dir.joinpath(f"{output_name}_reports.txt"), "w") as buffer:
with redirect_stdout(buffer):
for file in files:
data = pd.read_csv(file)
task, _, column, model = str(Path(file).stem).split("_")
print(f"Model: {model}. Task: {task}. Outcome Variable: {column}")
print_report(data, column)
df = produce_report(data, column)
test_report = df.assign(models=model, tasks=task, columns=column)
outputs = pd.concat([outputs, test_report])
out_file = out_dir.joinpath(f"{output_name}_outputs.csv")
outputs.to_csv(out_file)
if __name__ == "__main__":
main()