diff --git a/Dockerfile b/Dockerfile index 68f7895..0589b48 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,9 +2,8 @@ FROM python:3.11-slim WORKDIR /code -COPY requirements.txt ./ -RUN pip install --no-cache-dir -r requirements.txt - COPY . . +RUN pip install --no-cache-dir -r requirements_docker.txt + CMD ["python", "-u", "-m", "server.server", "--host", "0.0.0.0", "--port", "4444", "--reload"] \ No newline at end of file diff --git a/concept_linking/README.md b/concept_linking/README.md index 5e4966b..2ac5f60 100644 --- a/concept_linking/README.md +++ b/concept_linking/README.md @@ -1,61 +1,126 @@ -# D: Preproscessing Layer - Concept Linking +# Concept Linking -Repository of group D in KNOX pipeline. +--- +## Background +In group D, four different solutions have been implemented. These will be mentioned further in a later section. +At default the solution that will be running is PromptEngineering. -## Description +To change which solution to run in the pipeline perform the following changes... -This repository is for type classification of already provided sententes with given entity mentions. Several different solutions were created, in order to find the best one. +First change directory to the 'server' folder in the root directory. -### Dependencies +Next, open the server.py file. +On line 24, under the text "#Begin ConceptLinking" +Change the code, to run the desired solution. -- Python -- PIP -- Git -### Installing +## Requirements +For any of the four solutions, it is necessary to install the requirements found +in the requirements.txt file inside /{Solution_Name}/requirements.txt + +However since this is a joint codebase for both relation-extraction and concept-linking, +there is a global requirements.txt file in the root directory. +It follows a nested structure, meaning that installing only to one if the root folder, +will install all the rest. + +It will install both the necessary requirements for both groups' solutions. +However since it is possible to change which of the four concept-linking solutions to run, +it is also necessary to the requirements to be installed accordingly. +This is done by navigation to ``` -git clone https://github.com/Knox-AAU/PreprocessingLayer_Concept-Linking +./concept_linking/requirements.txt ``` +In this file, there is listed a reference to the four different requirements.txt files. +Remove the #(comment) from the one referencing the solution you want to run. + +### Example +Install the requirements for the PromptEngineering solution -### Initial Setup +Navigate to the following directory -- Navigate to root folder -- Run the following command for installing all requirements: +``` +../concept_linking/solutions/PromptEngineering/ +``` +And run the following command ``` pip install -r requirements.txt ``` -### Adding modules +## Solutions -- Navigate to root folder. -- Run the following command to add all installed modules: +Following below is brief description of each of the four solutions and how to get started. + +--- -``` -pip freeze > requirements.txt -``` -### Executing program +### Machine Learning +description WIP -- Navigate to main.py in Program directory -- Run main.py with Python +### Prompt Engineering +Uses the LLM Llama2. A prompt is given to the model. ``` -python .\program\main.py + prompt_template = { + "system_message": ("The input sentence is all your knowledge. \n" + "Do not answer if it can't be found in the sentence. \n" + "Do not use bullet points. \n" + "Do not identify entity mentions yourself, use the provided ones \n" + "Given the input in the form of the content from a file: \n" + "[Sentence]: {content_sentence} \n" + "[EntityMention]: {content_entity} \n"), + + "user_message": ("Classify the [EntityMention] in regards to ontology classes: {ontology_classes} \n" + "The output answer must be in JSON in the following format: \n" + "{{ \n" + "'Entity': 'Eiffel Tower', \n" + "'Class': 'ArchitecturalStructure' \n" + "}} \n"), + + "max_tokens": 4092 + } ``` -or: +The variables {content_sentence} and {content_entity} is found in a previous part of the KNOX pipeline. +The variable {ontology_classes} fetched by the Ontology endpoint provided by group E(Database Layer) + +#### Using LocalLlama API server +It is possible to use a local LlamaServer. It can be found in ../concept_linking/tools/LlamaServer. +A README for setting up an instance of this server can be found in the directory given above. + +#### Using the Llama API server hosted in the KNOX pipeline +WIP +Go to the directory /concept_linking/PromptEngineering/main +set the api_url accordingly +``` +api_url={domain or ip+port of llama server hosted in the knox pipeline} ``` -cd .\program\ -python .\main.py -``` +Refer to the Server Distribution document + for specific dns and ip+port information. -## Report +### String Comparison +description WIP -Description of the project can be found in the report on [Overleaf](https://www.overleaf.com/project/65000513b10b4521e8907099) (requires permission) -## Authors +### Untrained Spacy +description WIP + + + +--- + +## Tools + +### LlamaServer -Lucas, Gamma, Vi, Mikkel, Caspar & Rune +### OntologyGraphBuilder + +--- + +## Report +Description of the project can be found in the report on Overleaf (requires permission) + +## Authors +Lucas, Gamma, Vi, Mikkel, Caspar & Rune \ No newline at end of file diff --git a/concept_linking/requirements.txt b/concept_linking/requirements.txt new file mode 100644 index 0000000..24312c6 --- /dev/null +++ b/concept_linking/requirements.txt @@ -0,0 +1,13 @@ +# Current solution +#Requirements for PromptEngineering +-r solutions/PromptEngineering/requirements.txt + +# Other solutions +#Requirements for MachineLearning +#-r solutions/MachineLearning/requirements.txt + +#Requirements for StringComparison +#-r solutions/StringComparison/requirements.txt + +#Requirements for UntrainedSpacy +#-r solutions/UntrainedSpacy/requirements.txt diff --git a/concept_linking/solutions/PromptEngineering/requirements.txt b/concept_linking/solutions/PromptEngineering/requirements.txt index 46b134b..db07bab 100644 Binary files a/concept_linking/solutions/PromptEngineering/requirements.txt and b/concept_linking/solutions/PromptEngineering/requirements.txt differ diff --git a/concept_linking/tools/LlamaServer/README.md b/concept_linking/tools/LlamaServer/README.md new file mode 100644 index 0000000..e37c7cc --- /dev/null +++ b/concept_linking/tools/LlamaServer/README.md @@ -0,0 +1,53 @@ +# Tools: LlamaServer + +## Description +Local API server based on Llama2. + +## Getting started +It is possible to use the LlamaServer found in ../concept_linking/tools/LlamaServer +as a local llama api server. + +The server uses a quantized version of the Llama2-7B model. +It needs to be present in the directory given above. +However, if it is not present just run the server a single time, and it will be downloaded. +This is necessary before trying to create a docker image for the server. + +[//]: # (This instance has been optimized for usage with Nvidia CUDA supported GPUs'.) + +[//]: # (The software will automatically detect a valid CUDA gpu if one is present.) + +[//]: # (If not, the solution will fall back to using the CPU.) + +## Requirements +Install the requirements for this solution + +Navigate to the following directory + +``` +../concept_linking/tools/LlamaServer/ +``` + +And run the following command +``` +pip install -r requirements.txt +``` + +Since this is meant as a tool for running Llama locally on Windows. It is required to have a C++ installation. +C++ can be installed via the Visual Studio Installer. +Select "Desktop development with C++" + +## Docker +To build the server as a docker image, change the directory in a terminal to ../concept_linking/tools/LlamaServer. +Run the following command + +``` +docker build -t llama-cpu-server . +``` + +* Open Docker desktop +* Select Images, under 'Actions' select 'run' +* A prompt will now appear, expand the 'Optional settings' +* Under 'Ports' set 'Host Port' to 5000 +* Press run. + +The server should now be up and running diff --git a/relation_extraction/requirements.txt b/relation_extraction/requirements.txt new file mode 100644 index 0000000..3d23496 --- /dev/null +++ b/relation_extraction/requirements.txt @@ -0,0 +1,5 @@ +requests==2.31.0 +strsimpy==0.2.1 +mock==5.1.0 +python-dotenv==0.21.0 +Flask==3.0.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 3d23496..af4532a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ -requests==2.31.0 -strsimpy==0.2.1 -mock==5.1.0 -python-dotenv==0.21.0 -Flask==3.0.0 \ No newline at end of file +#For python +#---------- +# Concept linking +-r /concept_linking/requirements.txt + +# Relation extraction +-r /relation_extraction/requirements.txt \ No newline at end of file diff --git a/requirements_docker.txt b/requirements_docker.txt new file mode 100644 index 0000000..c1ff3d9 --- /dev/null +++ b/requirements_docker.txt @@ -0,0 +1,7 @@ +#For docker +#---------- +# Concept linking +-r /code/concept_linking/requirements.txt + +# Relation extraction +-r /code/relation_extraction/requirements.txt \ No newline at end of file diff --git a/server/server.py b/server/server.py index 745329e..f0c9907 100644 --- a/server/server.py +++ b/server/server.py @@ -2,9 +2,11 @@ import json import os from relation_extraction.relation_extractor import RelationExtractor +from concept_linking.solutions.PromptEngineering.main import perform_entity_type_classification app = Flask(__name__) + @app.route('/tripleconstruction', methods=["POST"]) def do_triple_construction(): print("Received POST request...") @@ -20,7 +22,7 @@ def do_triple_construction(): post_json = json.loads(post_data) RelationExtractor.begin_extraction(post_json) - #Begin ConceptLinking + perform_entity_type_classification(post_json) message = "Post request was successfully processed. Relation extraction and concept linking completed." return jsonify(message=message), 200 @@ -28,10 +30,12 @@ def do_triple_construction(): except Exception as e: return jsonify(error=f"Error occured: {str(e)}"), 422 + @app.errorhandler(404) def page_not_found(error): message = "Invalid endpoint" return jsonify(error=message), 404 + if __name__ == '__main__': - app.run(host='0.0.0.0', port=4444) \ No newline at end of file + app.run(host='0.0.0.0', port=4444)