-
Notifications
You must be signed in to change notification settings - Fork 304
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #741 from ananyag309/branch
Added Post Traumatic Stress Prediction
- Loading branch information
Showing
5 changed files
with
142 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
The link to the dataset is given below :- | ||
|
||
# LINK: https://www.kaggle.com/datasets/mahek6114/post-traumatic-stress-disorder-detection |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import os | ||
import numpy as np | ||
import streamlit as st | ||
import tensorflow as tf | ||
from tensorflow.keras.preprocessing.text import Tokenizer | ||
from tensorflow.keras.preprocessing.sequence import pad_sequences | ||
from tensorflow.keras.models import load_model | ||
|
||
# Define paths | ||
working_dir = os.path.dirname(os.path.abspath(__file__)) | ||
model_path = f"{working_dir}/trained_model/PTSD_Detection_Model.h5" | ||
model = load_model(model_path) | ||
|
||
# Preprocessing function | ||
def preprocess_text(text): | ||
# Tokenize text | ||
tokenizer = Tokenizer(num_words=5000) | ||
tokenizer.fit_on_texts(text) | ||
sequences = tokenizer.texts_to_sequences(text) | ||
padded_sequences = pad_sequences(sequences, maxlen=100) | ||
return padded_sequences | ||
|
||
# Streamlit app | ||
st.title('🧠 PTSD Detection') | ||
uploaded_file = st.file_uploader("Upload a text file", type=["txt"]) | ||
|
||
if uploaded_file is not None: | ||
# Read the text file | ||
text_data = uploaded_file.read().decode("utf-8") | ||
text_data = [text_data] | ||
|
||
# Preprocess the text data | ||
with st.spinner('Processing text...'): | ||
preprocessed_text = preprocess_text(text_data) | ||
predicted_probabilities = model.predict(preprocessed_text) | ||
predicted_class_index = np.argmax(predicted_probabilities) | ||
|
||
# Map class index to PTSD status | ||
class_mapping = {0: "No PTSD", 1: "PTSD"} | ||
predicted_class = class_mapping[int(predicted_class_index)] | ||
|
||
st.write(f"Predicted PTSD status: {predicted_class}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import os | ||
import numpy as np | ||
import tensorflow as tf | ||
from tensorflow.keras.preprocessing.text import Tokenizer | ||
from tensorflow.keras.preprocessing.sequence import pad_sequences | ||
from tensorflow.keras.models import Sequential | ||
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense | ||
|
||
# Load and preprocess data | ||
def load_data(data_dir): | ||
texts, labels = [], [] | ||
for label, category in enumerate(['without_ptsd', 'with_ptsd']): | ||
category_dir = os.path.join(data_dir, category) | ||
for filename in os.listdir(category_dir): | ||
with open(os.path.join(category_dir, filename), 'r') as file: | ||
texts.append(file.read()) | ||
labels.append(label) | ||
return texts, np.array(labels) | ||
|
||
data_dir = 'dataset' | ||
texts, labels = load_data(data_dir) | ||
|
||
# Tokenize and pad sequences | ||
tokenizer = Tokenizer(num_words=5000) | ||
tokenizer.fit_on_texts(texts) | ||
sequences = tokenizer.texts_to_sequences(texts) | ||
padded_sequences = pad_sequences(sequences, maxlen=100) | ||
|
||
# Split data into training and test sets | ||
split_idx = int(0.75 * len(padded_sequences)) | ||
x_train, x_test = padded_sequences[:split_idx], padded_sequences[split_idx:] | ||
y_train, y_test = labels[:split_idx], labels[split_idx:] | ||
|
||
# Build the model | ||
model = Sequential([ | ||
Embedding(input_dim=5000, output_dim=128, input_length=100), | ||
Conv1D(filters=64, kernel_size=5, activation='relu'), | ||
GlobalMaxPooling1D(), | ||
Dense(10, activation='relu'), | ||
Dense(1, activation='sigmoid') | ||
]) | ||
|
||
# Compile the model | ||
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) | ||
|
||
# Train the model | ||
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test)) | ||
|
||
# Save the model | ||
model.save(model_path) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
## Overview | ||
This project utilizes a Convolutional Neural Network (CNN) to detect Post-Traumatic Stress Disorder (PTSD) from textual data. The model is trained on a dataset containing text samples from individuals diagnosed with PTSD and those without the condition. | ||
|
||
## Dataset | ||
The dataset is divided into two categories: | ||
|
||
with_ptsd: Text samples from individuals diagnosed with PTSD. | ||
<br> | ||
without_ptsd: Text samples from individuals without PTSD. | ||
|
||
## Model Architecture | ||
The model consists of the following layers: | ||
|
||
- Embedding layer for text data | ||
- Convolutional layers with ReLU activation and MaxPooling | ||
- Flatten layer | ||
- Fully connected Dense layers with Dropout | ||
- Output layer with Sigmoid activation | ||
|
||
## Results | ||
The model achieves good accuracy in detecting PTSD, as demonstrated by the classification report and accuracy score. | ||
|
||
## Usage | ||
To train the model and make predictions, run the provided script. The trained model will be saved as ptsd_detection_model.h5. | ||
|
||
## Requirements | ||
- numpy | ||
- pandas | ||
- tensorflow | ||
- keras | ||
- sklearn | ||
|
||
## How to Run | ||
Ensure you have the required libraries installed. | ||
Prepare your dataset in the specified format. | ||
Run the script to train the model and make predictions. | ||
|
||
## Display Predictions | ||
The script includes a function to evaluate the model on test data, displaying classification metrics such as precision, recall, and F1 score. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
**Requirements For Project :-** | ||
|
||
1. NumPy: Fundamental package for numerical computing. | ||
2. pandas: Data analysis and manipulation library. | ||
3. scikit-learn: Machine learning library for classification, regression, and clustering. | ||
4. Matplotlib: Plotting library for creating visualizations. | ||
5. tqdm: Progress bar utility for tracking iterations. | ||
6. seaborn: Statistical data visualization library based on Matplotlib. |