Skip to content

Commit

Permalink
fix wrongly indexed project fold splits
Browse files Browse the repository at this point in the history
  • Loading branch information
tobhey committed Sep 8, 2022
1 parent 2b70299 commit 99f7ca7
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,7 @@
" prediction = predictor.predict(class_text)\r\n",
" flat_predictions.append(prediction)\r\n",
"\r\n",
" log_text = '{}, {} -> {}'.format(class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
" log_text = 'PID: {}, {}, {} -> {}'.format(row.ProjectID, class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
" logLine(log_text)\r\n",
" \r\n",
" # get labels in correct order\r\n",
Expand Down Expand Up @@ -786,8 +786,8 @@
" for k in config_data.project_fold:\r\n",
" test = df.loc[df['ProjectID'].isin(k)].index\r\n",
" train = df.loc[~df['ProjectID'].isin(k)].index\r\n",
" df_train = df.iloc[train]\r\n",
" df_eval = df.iloc[test]\r\n",
" df_train = df.loc[train]\r\n",
" df_eval = df.loc[test]\r\n",
" log_text = '/////////////////////// Test-Projects: {} /////////////////////////////'.format(k)\r\n",
" logLine(log_text)\r\n",
" classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)\r\n",
Expand Down Expand Up @@ -912,4 +912,4 @@
}
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -726,7 +726,7 @@
" prediction = predictor.predict(class_text)\r\n",
" flat_predictions.append(prediction)\r\n",
"\r\n",
" log_text = '{}, {} -> {}'.format(class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
" log_text = 'PID: {}, {}, {} -> {}'.format(row.ProjectID, class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
" logLine(log_text)\r\n",
" \r\n",
" # get labels in correct order\r\n",
Expand Down Expand Up @@ -794,8 +794,8 @@
" for k in config_data.project_fold:\r\n",
" test = df.loc[df['ProjectID'].isin(k)].index\r\n",
" train = df.loc[~df['ProjectID'].isin(k)].index\r\n",
" df_train = df.iloc[train]\r\n",
" df_eval = df.iloc[test]\r\n",
" df_train = df.loc[train]\r\n",
" df_eval = df.loc[test]\r\n",
" log_text = '/////////////////////// Test-Projects: {} /////////////////////////////'.format(k)\r\n",
" logLine(log_text)\r\n",
" classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)\r\n",
Expand Down Expand Up @@ -924,4 +924,4 @@
}
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,7 @@
" prediction = predictor.predict(class_text)\r\n",
" flat_predictions.append(prediction)\r\n",
"\r\n",
" log_text = '{}, {} -> {}'.format(class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
" log_text = 'PID: {}, {}, {} -> {}'.format(row.ProjectID, class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
" logLine(log_text)\r\n",
" \r\n",
" # get labels in correct order\r\n",
Expand Down Expand Up @@ -865,8 +865,8 @@
" for k in config_data.project_fold:\r\n",
" test = df.loc[df['ProjectID'].isin(k)].index\r\n",
" train = df.loc[~df['ProjectID'].isin(k)].index\r\n",
" df_train = df.iloc[train]\r\n",
" df_eval = df.iloc[test]\r\n",
" df_train = df.loc[train]\r\n",
" df_eval = df.loc[test]\r\n",
" log_text = '/////////////////////// Test-Projects: {} /////////////////////////////'.format(k)\r\n",
" logLine(log_text)\r\n",
" classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)\r\n",
Expand Down Expand Up @@ -955,4 +955,4 @@
}
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -772,7 +772,7 @@
" prediction = predictor.predict(class_text)\r\n",
" flat_predictions.append(prediction)\r\n",
"\r\n",
" log_text = '{}, {} -> {}'.format(class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
" log_text = 'PID: {}, {}, {} -> {}'.format(row.ProjectID, class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
" logLine(log_text)\r\n",
" \r\n",
" # get labels in correct order\r\n",
Expand Down Expand Up @@ -841,8 +841,8 @@
" for k in config_data.project_fold:\r\n",
" test = df.loc[df['ProjectID'].isin(k)].index\r\n",
" train = df.loc[~df['ProjectID'].isin(k)].index\r\n",
" df_train = df.iloc[train]\r\n",
" df_eval = df.iloc[test]\r\n",
" df_train = df.loc[train]\r\n",
" df_eval = df.loc[test]\r\n",
" log_text = '/////////////////////// Test-Projects: {} /////////////////////////////'.format(k)\r\n",
" logLine(log_text)\r\n",
" classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)\r\n",
Expand Down Expand Up @@ -970,4 +970,4 @@
}
}
]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@
" prediction = predictor.predict(class_text)\r\n",
" flat_predictions.append(prediction)\r\n",
"\r\n",
" log_text = '{}, {} -> {}'.format(class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
" log_text = 'PID: {}, {}, {} -> {}'.format(row.ProjectID, class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
" logLine(log_text)\r\n",
" \r\n",
" # get labels in correct order\r\n",
Expand Down Expand Up @@ -860,8 +860,8 @@
" for k in config_data.project_fold:\r\n",
" test = df.loc[df['ProjectID'].isin(k)].index\r\n",
" train = df.loc[~df['ProjectID'].isin(k)].index\r\n",
" df_train = df.iloc[train]\r\n",
" df_eval = df.iloc[test]\r\n",
" df_train = df.loc[train]\r\n",
" df_eval = df.loc[test]\r\n",
" log_text = '/////////////////////// Test-Projects: {} /////////////////////////////'.format(k)\r\n",
" logLine(log_text)\r\n",
" classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)\r\n",
Expand Down Expand Up @@ -950,4 +950,4 @@
}
}
]
}
}
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/tobhey/NoRBERT)

# NoRBERT: Transfer Learning for Requirements Classification
| :exclamation: Please Note: |
|-----------------------------------------|
|We found a bug in the notebooks that caused the indexing of the project-specific folds to be wrong. Thus, the results for the p-fold and loPo settings reported in the original paper are not correct as they were not strictly project-specific. We published a corrected Version of the paper at https://doi.org/10.5445/IR/1000150464. We fixed the bug in this version. The overall claim that NoRBERT performs better on unseen projects than existing approaches still holds true but the results on this type of folds are slighty worse (about 5 percentage points in F1-score on average) than reported.|

This is the supplementary material repository of the paper "NoRBERT: Transfer Learning for Requirements Classification".
In this paper we explore the performance of transfer learning (with Google's language model BERT) on different tasks in requirements classification. Especially the performance on projects, completely unseen during training, is in the focus of the paper.
Additionally, we developed a new dataset based on the Promise NFR dataset, that includes a more fine-grained labeling of functional requirement based on their concerns (Function, Data, Behavior).
Expand All @@ -20,6 +24,7 @@ This repository contains the datasets and code used in the paper, as well as add
- [Notebooks](./Code/Apply_Pretrained_Model) to apply pretrained models for each task to an input requirement and pretrained models for each task
* [Results](./Results/) contains the results of all tested hyperparameter configurations for each task


| :exclamation: Please Note: |
|-----------------------------------------|
|Note that we calculated the overall results of the cross validations by collecting the predictions of all folds and calculating the metrics over all predictions instead of averaging the results per metric over the folds. However, our notebooks provide both results. |
Expand Down

0 comments on commit 99f7ca7

Please sign in to comment.