Skip to content

Commit

Permalink
text summarizer
Browse files Browse the repository at this point in the history
  • Loading branch information
bansikah22 committed Oct 31, 2024
1 parent dd450a9 commit 781b3ce
Show file tree
Hide file tree
Showing 18 changed files with 201 additions and 1 deletion.
40 changes: 40 additions & 0 deletions python-projects/model-based/text-summarization-tool/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Clone the repository
git clone your-github-repo-url text-summarization-tool

# Navigate to the project directory
cd text-summarization-tool

# Set up the backend
```bash
cd backend
python3 -m venv venv # Create a virtual environment | don't follow this so that you won't have to install all the dependencies again so just source to activate the already existing pages
source venv/bin/activate # Activate the virtual environment (use `venv\Scripts\activate` on Windows)
pip install -r requirements.txt # Install required packages
python app.py or python3 app.py # This runs the Flask server and also downloads the model, or you can change the facebook/bart-large-cnn model to a smaller one like `t5-small`
```
# Set up the frontend
cd ../frontend
open the index.html on your browser or follow the optional step below

# Optional: Use a local server to serve the frontend
python -m http.server # This serves the frontend at http://localhost:8000

Accessing the Application
Backend URL: http://127.0.0.1:5000 (for API requests)
Frontend URL: http://localhost:8000 (for the web interface)

Testing the Application
Open the frontend in your browser at http://localhost:8000.
Enter a long piece of text and click "Summarize" to test if the application works as expected.

```text
flask
flask-cors
transformers
torch
```
Explanation of the Packages
`flask:` The web framework for building the backend API.
`flask-cors:` A Flask extension for handling Cross-Origin Resource Sharing (CORS), allowing your frontend to communicate with the backend.
`transformers:` The Hugging Face library that provides the pre-trained models and pipelines, including the distilbart model.
`torch:` The PyTorch library, which is required to run the models from the transformers library.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
venv
Binary file not shown.
29 changes: 29 additions & 0 deletions python-projects/model-based/text-summarization-tool/backend/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from flask import Flask, request, jsonify
from flask_cors import CORS
from model.summarization import summarize_text
from utils.helpers import validate_text, sanitize_text
from config import Config

app = Flask(__name__)
app.config.from_object(Config)
CORS(app, origins=app.config['ALLOWED_ORIGINS']) # Allow CORS for specified origins

@app.route('/summarize', methods=['POST'])
def summarize():
data = request.json
text = data['text']

try:
# Validate and sanitize the input text
validate_text(text)
sanitized_text = sanitize_text(text)

summary = summarize_text(sanitized_text)
return jsonify(summary=summary)
except ValueError as ve:
return jsonify(error=str(ve)), 400
except Exception as e:
return jsonify(error=str(e)), 500

if __name__ == '__main__':
app.run(debug=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import os

class Config:
"""Configuration class for the application."""
DEBUG = True
ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "*") # CORS settings
MAX_INPUT_LENGTH = 2000 # Maximum input length in words

# You can add more configuration variables as needed
Empty file.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from transformers import pipeline

# Load the summarization model
summarizer = pipeline("summarization", model="t5-small")

def summarize_text(text):
# Ensure the text is less than 1024 tokens for BART model
if len(text.split()) > 1024:
text = ' '.join(text.split()[:1024]) # Truncate to 1024 words
summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
return summary[0]['summary_text']
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
Flask
flask-cors
transformers # For AI model
torch # If you're using a PyTorch-based model
Empty file.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# helpers.py

def validate_text(text):
"""Validates that the input text is a non-empty string."""
if not isinstance(text, str) or len(text.strip()) == 0:
raise ValueError("Text must be a non-empty string.")
return True

def sanitize_text(text):
"""Sanitizes the input text by stripping whitespace."""
return text.strip()
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
document.getElementById('summarizationForm').onsubmit = async function (e) {
e.preventDefault();
const inputText = document.getElementById('inputText').value;

// Show loading indicator
const summaryResult = document.getElementById('summaryResult');
summaryResult.innerText = "Generating summary...";

try {
const response = await fetch('http://127.0.0.1:5000/summarize', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ text: inputText })
});

const summaryData = await response.json();
if (summaryData.error) {
summaryResult.innerText = "Error: " + summaryData.error;
} else {
summaryResult.innerText = summaryData.summary;
}
} catch (error) {
summaryResult.innerText = "Error: " + error.message;
}
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Text Summarization Tool</title>
<link rel="stylesheet" href="style.css">
</head>
<body>
<div class="container">
<h1>Text Summarization Tool</h1>
<form id="summarizationForm">
<textarea id="inputText" rows="10" cols="50" placeholder="Enter your text here..."></textarea>
<br>
<button type="submit">Summarize</button>
</form>
<div id="summaryResult"></div>
</div>
<script src="app.js"></script>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
body {
font-family: Arial, sans-serif;
background-color: #f4f4f4;
margin: 0;
padding: 20px;
}

.container {
max-width: 600px;
margin: auto;
background: white;
padding: 20px;
border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
}

h1 {
text-align: center;
}

textarea {
width: 100%;
padding: 10px;
margin-top: 10px;
border-radius: 4px;
border: 1px solid #ccc;
}

button {
width: 100%;
padding: 10px;
border: none;
background-color: #007BFF;
color: white;
border-radius: 4px;
cursor: pointer;
}

button:hover {
background-color: #0056b3;
}

#summaryResult {
margin-top: 20px;
padding: 10px;
background: #e7f1ff;
border: 1px solid #007BFF;
border-radius: 4px;
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
public class WebConfig implements WebMvcConfigurer {

@Override
public void addCorsMappings(CorsRegistry registry) {
public void addCorsMappings(@SuppressWarnings("null") CorsRegistry registry) {
registry.addMapping("/**")
.allowedOrigins("http://localhost:3000") // Adjust as needed for your React frontend
.allowedMethods("GET", "POST", "PUT", "DELETE", "OPTIONS");
Expand Down

0 comments on commit 781b3ce

Please sign in to comment.