From 004d622c54da1db3bd7aa442459e3b1cc88b9e8f Mon Sep 17 00:00:00 2001
From: Yanju Chen <chyanju@gmail.com>
Date: Wed, 28 Feb 2024 12:45:20 -0800
Subject: [PATCH] update CI and testing suite

---
 .github/workflows/dev.yml |  6 ++++++
 tests/scripts/dep.py      |  2 ++
 tests/test4.ipynb         | 14 ++++++++++++--
 vanguard/aleo/testing.py  | 10 ++++++++++
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index e1bdf8f..24bc540 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -20,6 +20,7 @@ jobs:
                 pip install networkx[default]
                 pip install lark
                 pip install beautifulsoup4
+                pip install pandas
             - name: test hello.py
               run: python tests/scripts/hello.py
             - name: test dep.py
@@ -37,6 +38,7 @@ jobs:
                 pip install networkx[default]
                 pip install lark
                 pip install beautifulsoup4
+                pip install pandas
             - name: test parsing.py
               run: PYTHONPATH="./" python ./tests/scripts/parsing.py
 
@@ -52,6 +54,7 @@ jobs:
                 pip install networkx[default]
                 pip install lark
                 pip install beautifulsoup4
+                pip install pandas
             - name: test divz
               run: PYTHONPATH="./" python ./tests/scripts/test-divz.py
 
@@ -67,6 +70,7 @@ jobs:
                 pip install networkx[default]
                 pip install lark
                 pip install beautifulsoup4
+                pip install pandas
             - name: test infoleak
               run: PYTHONPATH="./" python ./tests/scripts/test-infoleak.py
     
@@ -82,6 +86,7 @@ jobs:
                 pip install networkx[default]
                 pip install lark
                 pip install beautifulsoup4
+                pip install pandas
             - name: test rtcnst
               run: PYTHONPATH="./" python ./tests/scripts/test-rtcnst.py
     
@@ -97,5 +102,6 @@ jobs:
                 pip install networkx[default]
                 pip install lark
                 pip install beautifulsoup4
+                pip install pandas
             - name: test unused
               run: PYTHONPATH="./" python ./tests/scripts/test-unused.py
\ No newline at end of file
diff --git a/tests/scripts/dep.py b/tests/scripts/dep.py
index 8f09c50..6b6fc05 100644
--- a/tests/scripts/dep.py
+++ b/tests/scripts/dep.py
@@ -1,5 +1,7 @@
 import lark
 import networkx
+import bs4
+import pandas
 
 if __name__ == "__main__":
     print("Hello World!")
\ No newline at end of file
diff --git a/tests/test4.ipynb b/tests/test4.ipynb
index f476934..feec2f7 100644
--- a/tests/test4.ipynb
+++ b/tests/test4.ipynb
@@ -103,7 +103,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 7,
    "id": "a983f9b5-286c-4748-b2c7-b450ff0d7948",
    "metadata": {},
    "outputs": [
@@ -119,7 +119,17 @@
       "# [✓][test] pid: unused0.aleo, fid: ex4, expected: True, actual: True\n",
       "# [✗][test] pid: unused0.aleo, fid: ex5, expected: False, actual: True\n",
       "# [✓][test] pid: unused0.aleo, fid: ex6, expected: True, actual: True\n",
-      "# [test] accuracy: 5/7 (0.7143)\n"
+      "# [test] accuracy: 5/7 (0.7143)\n",
+      "# [test] confusion matrix:\n",
+      "  actual    False  True \n",
+      "expected              \n",
+      "False         1      2\n",
+      "True          0      4\n",
+      "# [test] normalized confusion matrix:\n",
+      "  actual       False     True \n",
+      "expected                    \n",
+      "False     0.333333  0.666667\n",
+      "True      0.000000  1.000000\n"
      ]
     }
    ],
diff --git a/vanguard/aleo/testing.py b/vanguard/aleo/testing.py
index 4bc2876..997a755 100644
--- a/vanguard/aleo/testing.py
+++ b/vanguard/aleo/testing.py
@@ -1,6 +1,7 @@
 import requests
 import time
 import json
+import pandas as pd
 
 from bs4 import BeautifulSoup
 
@@ -32,6 +33,15 @@ def run_test_suite(build_path, detector, verbose=False):
 
     if verbose:
         print(f"# [test] accuracy: {ncorrect}/{len(actual_labels)} ({ncorrect/len(actual_labels):.4f})")
+
+    if verbose:
+        pd_expected = pd.Series(expected_labels, name="expected")
+        pd_actual = pd.Series(actual_labels, name="actual")
+        mtx =pd.crosstab(pd_expected, pd_actual)
+        mtx_norm = mtx.div(mtx.sum(axis=1), axis="index")
+        print(f"# [test] confusion matrix:\n  {mtx}")
+        print(f"# [test] normalized confusion matrix:\n  {mtx_norm}")
+    
     return (expected_labels, expected_infos, actual_labels, actual_infos)
 
 def crawl_from_haruka_explorer(istart, iend, folder):