the-deep · okyksl · Aug 2, 2021 · Aug 3, 2021 · Aug 3, 2021 · Aug 3, 2021
diff --git a/docs/source/modeling/tracking.rst b/docs/source/modeling/tracking.rst
@@ -34,3 +34,8 @@ You can find an example of deployment in the repo.
 - The key of the deployment is creating a class that inherits from `mlflow.pyfunc.PythonModel` with a `predict()` function.
 - That class is pickled and logged as artifact of the training. At inference time it will be used to make predictions.
 
+Additionally, consider the following for more configurable deployment:
+
+- *Dynamic inference parameters*: Store inference hyperparameters (e.g., batch size or thresholds) as a separate artifact in MLFlow.  Use `artifacts` options in `log_model` and then retrieve the file using the `context` object provided by the MLFlow in `load_context` or `predict`.
+- *Multiple outputs*: `predict` function can return a Pandas DataFrame object. Employ it if the model has multiple targets or for providing logits scores for dynamic threshold adjusting on the client-side.
+- *Serving labels*: Log a separate artifact in MLFlow for the client-side to map predictions back to human-readable labels.
diff --git a/notebooks/models/oguz/transformer_v0.5_1D.ipynb b/notebooks/models/oguz/transformer_v0.5_1D.ipynb
@@ -430,6 +430,54 @@
    "outputs": [],
    "metadata": {}
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Inference Preprocessing (Offline Testing Environment)"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "DATA_PATH = 'leads.csv'\n",
+    "data = pd.read_csv(DATA_PATH)\n",
+    "data.head()"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "source": [
+    "from ast import literal_eval\n",
+    "\n",
+    "lengths = {}\n",
+    "\n",
+    "for field in ['extracted_text_as_paragraphs', 'extracted_text_as_sentences']:\n",
+    "    arr = data[field].apply(literal_eval).tolist()\n",
+    "    lengths[field] = [len(ds) for ds in arr]\n",
+    "    \n",
+    "    infer_df = pd.DataFrame.from_dict({\n",
+    "        'excerpt': [d for ds in arr for d in ds]\n",
+    "    })\n",
+    "    infer_df = infer_df[~(infer_df['excerpt'].str.len() == 0)]\n",
+    "    infer_df.to_csv(f'infer_{field}.csv', header=True, index=True)"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
   {
    "cell_type": "code",
    "execution_count": null,

diff --git a/scripts/training/oguz/huggingface-multihead/TODO b/scripts/training/oguz/huggingface-multihead/TODO
diff --git a/scripts/training/oguz/huggingface-multihead/augmentations.py b/scripts/training/oguz/huggingface-multihead/augmentations.py
diff --git a/scripts/training/oguz/huggingface-multihead/constants.py b/scripts/training/oguz/huggingface-multihead/constants.py
@@ -1,102 +1,112 @@
+SECTORS = [
+    "Agriculture",
+    "Cross",
+    "Education",
+    "Food Security",
+    "Health",
+    "Livelihoods",
+    "Logistics",
+    "Nutrition",
+    "Protection",
+    "Shelter",
+    "WASH",
+]
+
 PILLARS_1D = [
     "Context",
-    "Humanitarian Profile",
-    "Displacement",
+    "Shock/Event",
     "Casualties",
+    "Displacement",
     "Humanitarian Access",
-    "Information",
+    "Information And Communication",
+    "Covid-19",
 ]
 
 SUBPILLARS_1D = [
     [
-        "Context->Security & Stability",
         "Context->Demography",
         "Context->Economy",
-        "Context->Hazard & Threats",
-        "Context->Politics",
-        "Context->Overview",
-        "Context->Key Event",
-        "Context->Socio Cultural",
-        "Context->Legal  & Policy",
         "Context->Environment",
-        "Context->Stakeholders",
-        "Context->Response gap",
+        "Context->Security & Stability",
+        "Context->Socio Cultural",
+        "Context->Legal & Policy",
+        "Context->Politics",
+        "Context->Technological",
     ],
     [
-        "Humanitarian Profile->Affected Groups",
-        "Humanitarian Profile->Casualties",
-        "Humanitarian Profile->Population Movement",
+        "Shock/Event->Type And Characteristics",
+        "Shock/Event->Underlying/Aggravating Factors",
+        "Shock/Event->Hazard & Threats",
     ],
+    ["Casualties->Dead", "Casualties->Injured", "Casualties->Missing"],
     [
-        "Displacement->Push/Pull Factors",
-        "Displacement->Type/Numbers",
-        "Displacement->Local Integration",
+        "Displacement->Type/Numbers/Movements",
+        "Displacement->Push Factors",
+        "Displacement->Pull Factors",
         "Displacement->Intentions",
-        "Displacement->Displacement",
+        "Displacement->Local Integration",
     ],
-    ["Casualties->Dead", "Casualties->Injured", "Casualties->Missing"],
     [
+        "Humanitarian Access->Relief To Population",
+        "Humanitarian Access->Population To Relief",
         "Humanitarian Access->Physical Constraints",
-        "Humanitarian Access->Humanitarian Access Gaps",
+        (
+            "Humanitarian Access->Number Of People Facing Humanitarian Access Constraints"
+            "/Humanitarian Access Gaps"
+        ),
     ],
     [
-        "Information->Information Gaps",
-        "Information->Channels & Means",
-        "Information->Information Challenges",
+        "Information And Communication->Information Challenges And Barriers",
+        "Information And Communication->Communication Means And Preferences",
+        "Information And Communication->Knowledge And Info Gaps (Pop)",
+        "Information And Communication->Knowledge And Info Gaps (Hum)",
+    ],
+    [
+        "Covid-19->Cases",
+        "Covid-19->Deaths",
+        "Covid-19->Testing",
+        "Covid-19->Contact Tracing",
+        "Covid-19->Hospitalization & Care",
+        "Covid-19->Vaccination",
+        "Covid-19->Restriction Measures",
     ],
-]
-
-SECTORS = [
-    "Agriculture",
-    "Cross",
-    "Education",
-    "Food Security",
-    "Health",
-    "Livelihoods",
-    "Logistics",
-    "Nutrition",
-    "Protection",
-    "Shelter",
-    "WASH",
 ]
 
 PILLARS_2D = [
     "Humanitarian Conditions",
     "Capacities & Response",
     "Impact",
     "Priority Interventions",
-    "People At Risk",
+    "At Risk",
     "Priority Needs",
 ]
 
 SUBPILLARS_2D = [
     [
         "Humanitarian Conditions->Coping Mechanisms",
         "Humanitarian Conditions->Living Standards",
-        "Humanitarian Conditions->Number Of People In Need",
         "Humanitarian Conditions->Physical And Mental Well Being",
+        "Humanitarian Conditions->Number Of People In Need",
     ],
     [
         "Capacities & Response->International Response",
         "Capacities & Response->National Response",
-        "Capacities & Response->Number Of People Reached",
-        "Capacities & Response->Response Gaps",
+        "Capacities & Response->Local Response",
+        "Capacities & Response->Number Of People Reached/Response Gaps",
     ],
     [
         "Impact->Driver/Aggravating Factors",
         "Impact->Impact On People",
-        "Impact->Impact On People Or Impact On Services",
-        "Impact->Impact On Services",
-        "Impact->Impact On Systems And Services",
+        "Impact->Impact On Systems, Services And Networks",
         "Impact->Number Of People Affected",
     ],
     [
         "Priority Interventions->Expressed By Humanitarian Staff",
         "Priority Interventions->Expressed By Population",
     ],
     [
-        "People At Risk->Number Of People At Risk",
-        "People At Risk->Risk And Vulnerabilities",
+        "At Risk->Risk And Vulnerabilities",
+        "At Risk->Number Of People At Risk",
     ],
     [
         "Priority Needs->Expressed By Humanitarian Staff",