Skip to content

Commit

Permalink
ML og Untrained spacy UnitTest
Browse files Browse the repository at this point in the history
  • Loading branch information
Gamm0 committed Dec 13, 2023
1 parent 55e540e commit fe52d41
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 3 deletions.
13 changes: 10 additions & 3 deletions test/test_concept_linking/test_machineLearning.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
from concept_linking.solutions.MachineLearning.src.training_dataset import TrainingDataset
from concept_linking.solutions.MachineLearning.src.config import TrainingConfig, ModelConfig
from concept_linking.solutions.MachineLearning.src.data_preprocessing import split_data, load_data, extract_sentences
from concept_linking.solutions.MachineLearning.main import predict
from concept_linking.solutions.MachineLearning.src.prediction_dataset import PredictionDataset
from concept_linking.solutions.MachineLearning.src.data_preprocessing import extract_sentences, load_data


import json
from sklearn.model_selection import train_test_split

Expand All @@ -16,7 +21,7 @@ class TestMachineLearning(unittest.TestCase):
def setUp(self):
self.data = [{"sentences": ["sentence " + str(i)]} for i in range(100)]

def test_correct_split_ratio(self):
def test_split_ratio(self):
train_data, val_data, test_data = split_data(self.data, test_size=0.2, val_size=0.5, random_state=42)

# Check if the split ratios are correct
Expand All @@ -25,7 +30,6 @@ def test_correct_split_ratio(self):
self.assertEqual(len(test_data), 10) # 10% for testing

def test_error_on_insufficient_samples(self):
# Test with insufficient data
small_data = [{"sentences": ["sentence 1", "sentence 2"]}]

test_size = 0.5 # This will take 1 sentence for testing, leaving 1 for training and validation
Expand All @@ -34,7 +38,7 @@ def test_error_on_insufficient_samples(self):
with self.assertRaises(ValueError):
split_data(small_data, test_size=test_size, val_size=val_size, random_state=42)

def test_reproducibility_with_random_state(self):
def test_with_random_state(self):
train_data1, val_data1, test_data1 = split_data(self.data, test_size=0.2, val_size=0.5, random_state=42)
train_data2, val_data2, test_data2 = split_data(self.data, test_size=0.2, val_size=0.5, random_state=42)

Expand All @@ -61,6 +65,9 @@ def test_extract_sentences(self):
result = extract_sentences(mock_data)
self.assertEqual(result, expected_sentences)




#class TestTrainModel(unittest.TestCase):
#
# def setUp(self):
Expand Down
71 changes: 71 additions & 0 deletions test/test_concept_linking/untrainedSpacy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import unittest
from unittest.mock import patch, MagicMock
from concept_linking.solutions.UntrainedSpacy.untrainedSpacy import generateSpacyLabels, generateSpacyMatches, generateSpacyUnmatchedExplanations, generateTriplesFromJSON

class TestUntrainedSpacyFunctions(unittest.TestCase):
class TestGenerateTriplesFromJSON(unittest.TestCase):

@patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.extract_entity_mentions_from_input')
def test_generateTriplesFromJSON(self, entity_mentions):
# Mocking the extract_entity_mentions_from_input function
entity_mentions.return_value = {
"sentence1": [
("knox-kb01.srv.aau.dk/Bob_Marley", "person")
]
}

test_data = [
{
"fileName": "Artikel.txt",
"language": "en",
"sentences": [
{
"sentence": "Bob Marley is a person and has a car",
"sentenceStartIndex": 0,
"sentenceEndIndex": 149,
"entityMentions": [
{
"name": "Bob Marley",
"type": "Entity",
"label": "PERSON",
"startIndex": 0,
"endIndex": 10,
"iri": "knox-kb01.srv.aau.dk/Bob Marley"
},
]
},
]
}
]

output_sentence_test_run = False

expected_triples = [
("knox-kb01.srv.aau.dk/Bob_Marley", "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
"https://dbpedia.org/ontology/Person")
]

result = generateTriplesFromJSON(test_data, output_sentence_test_run)
self.assertEqual(result, expected_triples)

@patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.writeFile')
@patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.nlp.get_pipe')
def test_generateSpacyLabels(self, mock_get_pipe, mock_writeFile):
mock_get_pipe.return_value.labels = ['PERSON', 'ORG']
generateSpacyLabels()

@patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.readFile')
@patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.writeFile')
def test_generateSpacyMatches(self, mock_writeFile, mock_readFile):
mock_readFile.side_effect = ['person\norg', 'person\ncompany']
generateSpacyMatches()

@patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.readFile')
@patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.appendFile')
@patch('concept_linking.solutions.UntrainedSpacy.untrainedSpacy.clearFile')
def test_generateSpacyUnmatchedExplanations(self, mock_clearFile, mock_readFile, mock_appendFile):
mock_readFile.return_value = 'org'
generateSpacyUnmatchedExplanations()

if __name__ == '__main__':
unittest.main()

0 comments on commit fe52d41

Please sign in to comment.