From 1bebea6a0f96cc3d8fcb9ded7ea3c343f13bfed8 Mon Sep 17 00:00:00 2001 From: Ananya Gupta <145869907+ananyag309@users.noreply.github.com> Date: Mon, 15 Jul 2024 22:30:18 +0530 Subject: [PATCH] Add files via upload --- .../Dataset/readme.md | 3 ++ .../Models/main.py | 42 ++++++++++++++++ .../Models/train.py | 50 +++++++++++++++++++ Post Traumatic Stress Prediction/README.md | 39 +++++++++++++++ .../requirements.txt | 8 +++ 5 files changed, 142 insertions(+) create mode 100644 Post Traumatic Stress Prediction/Dataset/readme.md create mode 100644 Post Traumatic Stress Prediction/Models/main.py create mode 100644 Post Traumatic Stress Prediction/Models/train.py create mode 100644 Post Traumatic Stress Prediction/README.md create mode 100644 Post Traumatic Stress Prediction/requirements.txt diff --git a/Post Traumatic Stress Prediction/Dataset/readme.md b/Post Traumatic Stress Prediction/Dataset/readme.md new file mode 100644 index 000000000..28c23dae1 --- /dev/null +++ b/Post Traumatic Stress Prediction/Dataset/readme.md @@ -0,0 +1,3 @@ +The link to the dataset is given below :- + +# LINK: https://www.kaggle.com/datasets/mahek6114/post-traumatic-stress-disorder-detection \ No newline at end of file diff --git a/Post Traumatic Stress Prediction/Models/main.py b/Post Traumatic Stress Prediction/Models/main.py new file mode 100644 index 000000000..9ad66c958 --- /dev/null +++ b/Post Traumatic Stress Prediction/Models/main.py @@ -0,0 +1,42 @@ +import os +import numpy as np +import streamlit as st +import tensorflow as tf +from tensorflow.keras.preprocessing.text import Tokenizer +from tensorflow.keras.preprocessing.sequence import pad_sequences +from tensorflow.keras.models import load_model + +# Define paths +working_dir = os.path.dirname(os.path.abspath(__file__)) +model_path = f"{working_dir}/trained_model/PTSD_Detection_Model.h5" +model = load_model(model_path) + +# Preprocessing function +def preprocess_text(text): + # Tokenize text + tokenizer = Tokenizer(num_words=5000) + tokenizer.fit_on_texts(text) + sequences = tokenizer.texts_to_sequences(text) + padded_sequences = pad_sequences(sequences, maxlen=100) + return padded_sequences + +# Streamlit app +st.title('🧠 PTSD Detection') +uploaded_file = st.file_uploader("Upload a text file", type=["txt"]) + +if uploaded_file is not None: + # Read the text file + text_data = uploaded_file.read().decode("utf-8") + text_data = [text_data] + + # Preprocess the text data + with st.spinner('Processing text...'): + preprocessed_text = preprocess_text(text_data) + predicted_probabilities = model.predict(preprocessed_text) + predicted_class_index = np.argmax(predicted_probabilities) + + # Map class index to PTSD status + class_mapping = {0: "No PTSD", 1: "PTSD"} + predicted_class = class_mapping[int(predicted_class_index)] + + st.write(f"Predicted PTSD status: {predicted_class}") diff --git a/Post Traumatic Stress Prediction/Models/train.py b/Post Traumatic Stress Prediction/Models/train.py new file mode 100644 index 000000000..d20e7d185 --- /dev/null +++ b/Post Traumatic Stress Prediction/Models/train.py @@ -0,0 +1,50 @@ +import os +import numpy as np +import tensorflow as tf +from tensorflow.keras.preprocessing.text import Tokenizer +from tensorflow.keras.preprocessing.sequence import pad_sequences +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense + +# Load and preprocess data +def load_data(data_dir): + texts, labels = [], [] + for label, category in enumerate(['without_ptsd', 'with_ptsd']): + category_dir = os.path.join(data_dir, category) + for filename in os.listdir(category_dir): + with open(os.path.join(category_dir, filename), 'r') as file: + texts.append(file.read()) + labels.append(label) + return texts, np.array(labels) + +data_dir = 'dataset' +texts, labels = load_data(data_dir) + +# Tokenize and pad sequences +tokenizer = Tokenizer(num_words=5000) +tokenizer.fit_on_texts(texts) +sequences = tokenizer.texts_to_sequences(texts) +padded_sequences = pad_sequences(sequences, maxlen=100) + +# Split data into training and test sets +split_idx = int(0.75 * len(padded_sequences)) +x_train, x_test = padded_sequences[:split_idx], padded_sequences[split_idx:] +y_train, y_test = labels[:split_idx], labels[split_idx:] + +# Build the model +model = Sequential([ + Embedding(input_dim=5000, output_dim=128, input_length=100), + Conv1D(filters=64, kernel_size=5, activation='relu'), + GlobalMaxPooling1D(), + Dense(10, activation='relu'), + Dense(1, activation='sigmoid') +]) + +# Compile the model +model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) + +# Train the model +model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test)) + +# Save the model +model.save(model_path) \ No newline at end of file diff --git a/Post Traumatic Stress Prediction/README.md b/Post Traumatic Stress Prediction/README.md new file mode 100644 index 000000000..62722f732 --- /dev/null +++ b/Post Traumatic Stress Prediction/README.md @@ -0,0 +1,39 @@ +## Overview +This project utilizes a Convolutional Neural Network (CNN) to detect Post-Traumatic Stress Disorder (PTSD) from textual data. The model is trained on a dataset containing text samples from individuals diagnosed with PTSD and those without the condition. + +## Dataset +The dataset is divided into two categories: + +with_ptsd: Text samples from individuals diagnosed with PTSD. +
+without_ptsd: Text samples from individuals without PTSD. + +## Model Architecture +The model consists of the following layers: + +- Embedding layer for text data +- Convolutional layers with ReLU activation and MaxPooling +- Flatten layer +- Fully connected Dense layers with Dropout +- Output layer with Sigmoid activation + +## Results +The model achieves good accuracy in detecting PTSD, as demonstrated by the classification report and accuracy score. + +## Usage +To train the model and make predictions, run the provided script. The trained model will be saved as ptsd_detection_model.h5. + +## Requirements +- numpy +- pandas +- tensorflow +- keras +- sklearn + +## How to Run +Ensure you have the required libraries installed. +Prepare your dataset in the specified format. +Run the script to train the model and make predictions. + +## Display Predictions +The script includes a function to evaluate the model on test data, displaying classification metrics such as precision, recall, and F1 score. diff --git a/Post Traumatic Stress Prediction/requirements.txt b/Post Traumatic Stress Prediction/requirements.txt new file mode 100644 index 000000000..21c7a3ddb --- /dev/null +++ b/Post Traumatic Stress Prediction/requirements.txt @@ -0,0 +1,8 @@ +**Requirements For Project :-** + +1. NumPy: Fundamental package for numerical computing. +2. pandas: Data analysis and manipulation library. +3. scikit-learn: Machine learning library for classification, regression, and clustering. +4. Matplotlib: Plotting library for creating visualizations. +5. tqdm: Progress bar utility for tracking iterations. +6. seaborn: Statistical data visualization library based on Matplotlib. \ No newline at end of file