fix wrongly indexed project fold splits

tobhey · Sep 8, 2022 · 99f7ca7 · 99f7ca7
1 parent 2b70299
commit 99f7ca7
Show file tree

Hide file tree

Showing 6 changed files with 25 additions and 20 deletions.
diff --git a/Code/Task1_to_3_original_Promise_NFR_dataset/Task1_F_NFR_classification.ipynb b/Code/Task1_to_3_original_Promise_NFR_dataset/Task1_F_NFR_classification.ipynb
@@ -721,7 +721,7 @@
         "      prediction = predictor.predict(class_text)\r\n",
         "      flat_predictions.append(prediction)\r\n",
         "\r\n",
-        "      log_text = '{}, {} -> {}'.format(class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
+        "      log_text = 'PID: {}, {}, {} -> {}'.format(row.ProjectID, class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
         "      logLine(log_text)\r\n",
         "  \r\n",
         "  # get labels in correct order\r\n",
@@ -786,8 +786,8 @@
         "  for k in config_data.project_fold:\r\n",
         "    test = df.loc[df['ProjectID'].isin(k)].index\r\n",
         "    train = df.loc[~df['ProjectID'].isin(k)].index\r\n",
-        "    df_train = df.iloc[train]\r\n",
-        "    df_eval = df.iloc[test]\r\n",
+        "    df_train = df.loc[train]\r\n",
+        "    df_eval = df.loc[test]\r\n",
         "    log_text = '/////////////////////// Test-Projects: {} /////////////////////////////'.format(k)\r\n",
         "    logLine(log_text)\r\n",
         "    classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)\r\n",
@@ -912,4 +912,4 @@
       }
     }
   ]
-}
+}
diff --git a/..._3_original_Promise_NFR_dataset/Task2_3_Multiclass_classification_of_NFR_subclasses.ipynb b/..._3_original_Promise_NFR_dataset/Task2_3_Multiclass_classification_of_NFR_subclasses.ipynb
@@ -726,7 +726,7 @@
         "      prediction = predictor.predict(class_text)\r\n",
         "      flat_predictions.append(prediction)\r\n",
         "\r\n",
-        "      log_text = '{}, {} -> {}'.format(class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
+        "      log_text = 'PID: {}, {}, {} -> {}'.format(row.ProjectID, class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
         "      logLine(log_text)\r\n",
         "  \r\n",
         "  # get labels in correct order\r\n",
@@ -794,8 +794,8 @@
         "  for k in config_data.project_fold:\r\n",
         "    test = df.loc[df['ProjectID'].isin(k)].index\r\n",
         "    train = df.loc[~df['ProjectID'].isin(k)].index\r\n",
-        "    df_train = df.iloc[train]\r\n",
-        "    df_eval = df.iloc[test]\r\n",
+        "    df_train = df.loc[train]\r\n",
+        "    df_eval = df.loc[test]\r\n",
         "    log_text = '/////////////////////// Test-Projects: {} /////////////////////////////'.format(k)\r\n",
         "    logLine(log_text)\r\n",
         "    classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)\r\n",
@@ -924,4 +924,4 @@
       }
     }
   ]
-}
+}
diff --git a/..._original_Promise_NFR_dataset/Task2_Most_Frequent_NFR_classes_binary_classification.ipynb b/..._original_Promise_NFR_dataset/Task2_Most_Frequent_NFR_classes_binary_classification.ipynb
@@ -763,7 +763,7 @@
         "      prediction = predictor.predict(class_text)\r\n",
         "      flat_predictions.append(prediction)\r\n",
         "\r\n",
-        "      log_text = '{}, {} -> {}'.format(class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
+        "      log_text = 'PID: {}, {}, {} -> {}'.format(row.ProjectID, class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
         "      logLine(log_text)\r\n",
         "  \r\n",
         "  # get labels in correct order\r\n",
@@ -865,8 +865,8 @@
         "    for k in config_data.project_fold:\r\n",
         "      test = df.loc[df['ProjectID'].isin(k)].index\r\n",
         "      train = df.loc[~df['ProjectID'].isin(k)].index\r\n",
-        "      df_train = df.iloc[train]\r\n",
-        "      df_eval = df.iloc[test]\r\n",
+        "      df_train = df.loc[train]\r\n",
+        "      df_eval = df.loc[test]\r\n",
         "      log_text = '/////////////////////// Test-Projects: {} /////////////////////////////'.format(k)\r\n",
         "      logLine(log_text)\r\n",
         "      classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)\r\n",
@@ -955,4 +955,4 @@
       }
     }
   ]
-}
+}
diff --git a/Code/Task4_relabeled_Promise_NFR_dataset/Task4_Classify_Functional_and_Quality_aspects.ipynb b/Code/Task4_relabeled_Promise_NFR_dataset/Task4_Classify_Functional_and_Quality_aspects.ipynb
@@ -772,7 +772,7 @@
         "      prediction = predictor.predict(class_text)\r\n",
         "      flat_predictions.append(prediction)\r\n",
         "\r\n",
-        "      log_text = '{}, {} -> {}'.format(class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
+        "      log_text = 'PID: {}, {}, {} -> {}'.format(row.ProjectID, class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
         "      logLine(log_text)\r\n",
         "  \r\n",
         "  # get labels in correct order\r\n",
@@ -841,8 +841,8 @@
         "  for k in config_data.project_fold:\r\n",
         "    test = df.loc[df['ProjectID'].isin(k)].index\r\n",
         "    train = df.loc[~df['ProjectID'].isin(k)].index\r\n",
-        "    df_train = df.iloc[train]\r\n",
-        "    df_eval = df.iloc[test]\r\n",
+        "    df_train = df.loc[train]\r\n",
+        "    df_eval = df.loc[test]\r\n",
         "    log_text = '/////////////////////// Test-Projects: {} /////////////////////////////'.format(k)\r\n",
         "    logLine(log_text)\r\n",
         "    classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)\r\n",
@@ -970,4 +970,4 @@
       }
     }
   ]
-}
+}
diff --git a/Code/Task5_func_concerns_dataset/Classify_functional_requirement_concerns.ipynb b/Code/Task5_func_concerns_dataset/Classify_functional_requirement_concerns.ipynb
@@ -758,7 +758,7 @@
         "      prediction = predictor.predict(class_text)\r\n",
         "      flat_predictions.append(prediction)\r\n",
         "\r\n",
-        "      log_text = '{}, {} -> {}'.format(class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
+        "      log_text = 'PID: {}, {}, {} -> {}'.format(row.ProjectID, class_text, label_indices.get(class_label), label_indices.get(prediction))\r\n",
         "      logLine(log_text)\r\n",
         "  \r\n",
         "  # get labels in correct order\r\n",
@@ -860,8 +860,8 @@
         "    for k in config_data.project_fold:\r\n",
         "      test = df.loc[df['ProjectID'].isin(k)].index\r\n",
         "      train = df.loc[~df['ProjectID'].isin(k)].index\r\n",
-        "      df_train = df.iloc[train]\r\n",
-        "      df_eval = df.iloc[test]\r\n",
+        "      df_train = df.loc[train]\r\n",
+        "      df_eval = df.loc[test]\r\n",
         "      log_text = '/////////////////////// Test-Projects: {} /////////////////////////////'.format(k)\r\n",
         "      logLine(log_text)\r\n",
         "      classifier, overall_flat_predictions, overall_flat_true_labels, results = train_and_predict(df_train, df_eval, overall_flat_predictions, overall_flat_true_labels, results)\r\n",
@@ -950,4 +950,4 @@
       }
     }
   ]
-}
+}
diff --git a/README.md b/README.md
@@ -4,6 +4,10 @@
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/tobhey/NoRBERT)
 
 # NoRBERT: Transfer Learning for Requirements Classification
+| :exclamation:  Please Note:  |
+|-----------------------------------------|
+|We found a bug in the notebooks that caused the indexing of the project-specific folds to be wrong. Thus, the results for the p-fold and loPo settings reported in the original paper are not correct as they were not strictly project-specific. We published a corrected Version of the paper at https://doi.org/10.5445/IR/1000150464. We fixed the bug in this version. The overall claim that NoRBERT performs better on unseen projects than existing approaches still holds true but the results on this type of folds are slighty worse (about 5 percentage points in F1-score on average) than reported.|
+
 This is the supplementary material repository of the paper "NoRBERT: Transfer Learning for Requirements Classification".
 In this paper we explore the performance of transfer learning (with Google's language model BERT) on different tasks in requirements classification. Especially the performance on projects, completely unseen during training, is in the focus of the paper.
 Additionally, we developed a new dataset based on the Promise NFR dataset, that includes a more fine-grained labeling of functional requirement based on their concerns (Function, Data, Behavior).
@@ -20,6 +24,7 @@ This repository contains the datasets and code used in the paper, as well as add
 	- [Notebooks](./Code/Apply_Pretrained_Model) to apply pretrained models for each task to an input requirement and pretrained models for each task
 * [Results](./Results/) contains the results of all tested hyperparameter configurations for each task
 
+
 | :exclamation:  Please Note:  |
 |-----------------------------------------|
 |Note that we calculated the overall results of the cross validations by collecting the predictions of all folds and calculating the metrics over all predictions instead of averaging the results per metric over the folds. However, our notebooks provide both results. |