Skip to content

Commit

Permalink
Merge branch 'concept-linking' of https://github.com/Knox-AAU/Preproc…
Browse files Browse the repository at this point in the history
  • Loading branch information
Vi Thien Le authored and Vi Thien Le committed Dec 11, 2023
2 parents 0c6cd7f + 1e87f76 commit 2be0381
Show file tree
Hide file tree
Showing 30 changed files with 23,341 additions and 22,869 deletions.
363 changes: 363 additions & 0 deletions concept_linking/data/files/EvaluationData/Results/Untrained_DK.json

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
362 changes: 362 additions & 0 deletions concept_linking/data/files/EvaluationData/Results/Untrained_EN.json

Large diffs are not rendered by default.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
32 changes: 16 additions & 16 deletions concept_linking/data/files/EvaluationData/evaluationSet_EN.json
Original file line number Diff line number Diff line change
Expand Up @@ -547,23 +547,23 @@
"entityMentions": [
{
"name": "Tesla",
"type": "ORG",
"type": "Entity",
"label": "ORG",
"startIndex": 31,
"endIndex": 36,
"iri": "knox-kb01.srv.aau.dk/Tesla"
},
{
"name": "Cybertruck",
"type": "PRODUCT",
"type": "Entity",
"label": "PRODUCT",
"startIndex": 78,
"endIndex": 89,
"iri": "knox-kb01.srv.aau.dk/Cybertruck"
},
{
"name": "Los Angeles",
"type": "GPE",
"type": "Entity",
"label": "GPE",
"startIndex": 99,
"endIndex": 110,
Expand All @@ -578,24 +578,24 @@
"entityMentions": [
{
"name": "World Health Organization",
"type": "ORG",
"type": "Entity",
"label": "ORG",
"startIndex": 4,
"endIndex": 31,
"iri": "knox-kb01.srv.aau.dk/World_Health_Organization"
},
{
"name": "Delta variant",
"type": "ENTITY",
"label": "ENTITY",
"name": "Delta",
"type": "Entity",
"label": "LOC",
"startIndex": 64,
"endIndex": 77,
"iri": "knox-kb01.srv.aau.dk/Delta_variant"
},
{
"name": "COVID-19",
"type": "ENTITY",
"label": "ENTITY",
"label": "ORG",
"startIndex": 81,
"endIndex": 89,
"iri": "knox-kb01.srv.aau.dk/COVID-19"
Expand All @@ -609,31 +609,31 @@
"entityMentions": [
{
"name": "Middle East",
"type": "LOC",
"type": "Entity",
"label": "LOC",
"startIndex": 32,
"endIndex": 44,
"iri": "knox-kb01.srv.aau.dk/Middle_East"
},
{
"name": "United Nations Security Council",
"type": "ORG",
"type": "Enitity",
"label": "ORG",
"startIndex": 48,
"endIndex": 80,
"iri": "knox-kb01.srv.aau.dk/United_Nations_Security_Council"
},
{
"name": "Israel",
"type": "GPE",
"type": "Entity",
"label": "GPE",
"startIndex": 106,
"endIndex": 112,
"iri": "knox-kb01.srv.aau.dk/Israel"
},
{
"name": "Palestine",
"type": "GPE",
"type": "Entity",
"label": "GPE",
"startIndex": 117,
"endIndex": 126,
Expand All @@ -648,31 +648,31 @@
"entityMentions": [
{
"name": "Apple Inc.",
"type": "ORG",
"type": "Entity",
"label": "ORG",
"startIndex": 10,
"endIndex": 20,
"iri": "knox-kb01.srv.aau.dk/Apple_Inc"
},
{
"name": "iPhone 13",
"type": "PRODUCT",
"type": "Entity",
"label": "PRODUCT",
"startIndex": 104,
"endIndex": 114,
"iri": "knox-kb01.srv.aau.dk/iPhone_13"
},
{
"name": "MacBooks",
"type": "PRODUCT",
"type": "Entity",
"label": "PRODUCT",
"startIndex": 140,
"endIndex": 148,
"iri": "knox-kb01.srv.aau.dk/MacBooks"
},
{
"name": "iPads",
"type": "PRODUCT",
"type": "Entity",
"label": "PRODUCT",
"startIndex": 159,
"endIndex": 164,
Expand Down
2 changes: 1 addition & 1 deletion concept_linking/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
-r solutions/PromptEngineering/requirements.txt

#Requirements for StringComparison
-r solutions/StringComparison/requirements.txt
#-r solutions/StringComparison/requirements.txt

#Requirements for UntrainedSpacy
-r solutions/UntrainedSpacy/requirements.txt
178 changes: 0 additions & 178 deletions concept_linking/solutions/MachineLearning/data/training_data.json

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,15 @@
import nltk
import random

nltk.download('punkt')
# Check if punkt is already downloaded
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
# Download punkt if not found
print("Downloading punkt...")
nltk.download('punkt')
print("Download complete.")


class EntityGenerator:
def __init__(self):
Expand Down Expand Up @@ -251,7 +259,7 @@ def generate_random_word(self):
}]

# Specify the file path
file_path = '../data/generated_data.json'
file_path = '../generate_dataset/generated_data.json'

# Write the JSON object to a file
with open(file_path, 'w') as json_file:
Expand Down
Loading

0 comments on commit 2be0381

Please sign in to comment.