ML og Untrained spacy UnitTest

Knox-AAU · Dec 13, 2023 · fe52d41 · fe52d41
1 parent 55e540e
commit fe52d41
Show file tree

Hide file tree

Showing 2 changed files with 81 additions and 3 deletions.
diff --git a/test/test_concept_linking/test_machineLearning.py b/test/test_concept_linking/test_machineLearning.py
@@ -7,6 +7,11 @@
 from concept_linking.solutions.MachineLearning.src.training_dataset import TrainingDataset
 from concept_linking.solutions.MachineLearning.src.config import TrainingConfig, ModelConfig
 from concept_linking.solutions.MachineLearning.src.data_preprocessing import split_data, load_data, extract_sentences
+from concept_linking.solutions.MachineLearning.main import predict
+from concept_linking.solutions.MachineLearning.src.prediction_dataset import PredictionDataset
+from concept_linking.solutions.MachineLearning.src.data_preprocessing import extract_sentences, load_data
+
+
 import json
 from sklearn.model_selection import train_test_split
 
@@ -16,7 +21,7 @@ class TestMachineLearning(unittest.TestCase):
     def setUp(self):
         self.data = [{"sentences": ["sentence " + str(i)]} for i in range(100)]
 
-    def test_correct_split_ratio(self):
+    def test_split_ratio(self):
         train_data, val_data, test_data = split_data(self.data, test_size=0.2, val_size=0.5, random_state=42)
 
         # Check if the split ratios are correct
@@ -25,7 +30,6 @@ def test_correct_split_ratio(self):
         self.assertEqual(len(test_data), 10)   # 10% for testing
 
     def test_error_on_insufficient_samples(self):
-        # Test with insufficient data
         small_data = [{"sentences": ["sentence 1", "sentence 2"]}]
 
         test_size = 0.5  # This will take 1 sentence for testing, leaving 1 for training and validation
@@ -34,7 +38,7 @@ def test_error_on_insufficient_samples(self):
         with self.assertRaises(ValueError):
             split_data(small_data, test_size=test_size, val_size=val_size, random_state=42)
 
-    def test_reproducibility_with_random_state(self):
+    def test_with_random_state(self):
         train_data1, val_data1, test_data1 = split_data(self.data, test_size=0.2, val_size=0.5, random_state=42)
         train_data2, val_data2, test_data2 = split_data(self.data, test_size=0.2, val_size=0.5, random_state=42)
 
@@ -61,6 +65,9 @@ def test_extract_sentences(self):
         result = extract_sentences(mock_data)
         self.assertEqual(result, expected_sentences)
 
+
+
+
 #class TestTrainModel(unittest.TestCase):
 #
 #    def setUp(self):

diff --git a/test/test_concept_linking/untrainedSpacy.py b/test/test_concept_linking/untrainedSpacy.py
@@ -0,0 +1,71 @@
+import unittest
+from unittest.mock import patch, MagicMock
+from concept_linking.solutions.UntrainedSpacy.untrainedSpacy import generateSpacyLabels, generateSpacyMatches, generateSpacyUnmatchedExplanations, generateTriplesFromJSON
+
+class TestUntrainedSpacyFunctions(unittest.TestCase):
+    class TestGenerateTriplesFromJSON(unittest.TestCase):
+
+        @patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.extract_entity_mentions_from_input')
+        def test_generateTriplesFromJSON(self, entity_mentions):
+            # Mocking the extract_entity_mentions_from_input function
+            entity_mentions.return_value = {
+                "sentence1": [
+                    ("knox-kb01.srv.aau.dk/Bob_Marley", "person")
+                ]
+            }
+
+            test_data = [
+                {
+                    "fileName": "Artikel.txt",
+                    "language": "en",
+                    "sentences": [
+                        {
+                            "sentence": "Bob Marley is a person and has a car",
+                            "sentenceStartIndex": 0,
+                            "sentenceEndIndex": 149,
+                            "entityMentions": [
+                                {
+                                    "name": "Bob Marley",
+                                    "type": "Entity",
+                                    "label": "PERSON",
+                                    "startIndex": 0,
+                                    "endIndex": 10,
+                                    "iri": "knox-kb01.srv.aau.dk/Bob Marley"
+                                },
+                            ]
+                        },
+                    ]
+                }
+            ]
+
+            output_sentence_test_run = False
+
+            expected_triples = [
+                ("knox-kb01.srv.aau.dk/Bob_Marley", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
+                 "https://dbpedia.org/ontology/Person")
+            ]
+
+            result = generateTriplesFromJSON(test_data, output_sentence_test_run)
+            self.assertEqual(result, expected_triples)
+
+    @patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.writeFile')
+    @patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.nlp.get_pipe')
+    def test_generateSpacyLabels(self, mock_get_pipe, mock_writeFile):
+        mock_get_pipe.return_value.labels = ['PERSON', 'ORG']
+        generateSpacyLabels()
+
+    @patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.readFile')
+    @patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.writeFile')
+    def test_generateSpacyMatches(self, mock_writeFile, mock_readFile):
+        mock_readFile.side_effect = ['person\norg', 'person\ncompany']
+        generateSpacyMatches()
+
+    @patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.readFile')
+    @patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.appendFile')
+    @patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.clearFile')
+    def test_generateSpacyUnmatchedExplanations(self, mock_clearFile, mock_readFile, mock_appendFile):
+        mock_readFile.return_value = 'org'
+        generateSpacyUnmatchedExplanations()
+
+if __name__ == '__main__':
+    unittest.main()