music_recommendation.py

from keras.preprocessing.image import img_to_array
import imutils
import cv2
from keras.models import load_model
import numpy as np
from tkinter import*
import playsound # to play saved mp3 file
from gtts import gTTS # google text to speech
import os
from matplotlib import pyplot as plt 


num=1
def speaks(output):
    global num
    num +=1

 #print("Safety Assistant : ", output)
    tospeak = gTTS(text=output, lang='en-US', slow=False)
    file = str(num)+".mp3"
    tospeak.save(file)
    playsound.playsound(file, True)
    os.remove(file)

speaks("SONGS WILL BE PLAYED ACCORDING TO THE EMOTION DETECTED")
# parameters for loading data and images
detection_model_path = 'haarcascade_files/haarcascade_frontalface_default.xml'
emotion_model_path = 'models/_mini_XCEPTION.102-0.66.hdf5'

# hyper-parameters for bounding boxes shape
# loading models
face_detection = cv2.CascadeClassifier(detection_model_path)
emotion_classifier = load_model(emotion_model_path, compile=False)
EMOTIONS = ["angry" ,"disgust","scared", "happy", "sad", "surprised",
 "neutral"]


#feelings_faces = []
#for index, emotion in enumerate(EMOTIONS):
   # feelings_faces.append(cv2.imread('emojis/' + emotion + '.png', -1))

# starting video streaming
cv2.namedWindow('your_face')
camera = cv2.VideoCapture(0)
while True:
    frame = camera.read()[1]
    #reading the frame
    frame = imutils.resize(frame,width=300)
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_detection.detectMultiScale(gray,scaleFactor=1.1,minNeighbors=5,minSize=(30,30),flags=cv2.CASCADE_SCALE_IMAGE)
    
    canvas = np.zeros((250, 300, 3), dtype="uint8")
    frameClone = frame.copy()
    if len(faces) > 0:
        faces = sorted(faces, reverse=True,
        key=lambda x: (x[2] - x[0]) * (x[3] - x[1]))[0]
        (fX, fY, fW, fH) = faces
                    # Extract the ROI of the face from the grayscale image, resize it to a fixed 28x28 pixels, and then prepare
            # the ROI for classification via the CNN
        roi = gray[fY:fY + fH, fX:fX + fW]
        roi = cv2.resize(roi, (64, 64))
        roi = roi.astype("float") / 255.0
        roi = img_to_array(roi)
        roi = np.expand_dims(roi, axis=0)
        
        
        preds = emotion_classifier.predict(roi)[0]
        emotion_probability = np.max(preds)
        label = EMOTIONS[preds.argmax()]
    else:
        import tkinter as tk
        from tkinter import messagebox
        root123= tk.Tk()
        root123.withdraw()
        msgbox=tk.messagebox.showinfo('ERROR MESSAGE', "FACE NOT DETECTED,PLEASE TRY AGAIN!!")
        break

 
    for (i, (emotion, prob)) in enumerate(zip(EMOTIONS, preds)):
                # construct the label text
                text = "{}: {:.2f}%".format(emotion, prob * 100)

                # draw the label + probability bar on the canvas
               # emoji_face = feelings_faces[np.argmax(preds)]

                
                w = int(prob * 300)
                cv2.rectangle(canvas, (7, (i * 35) + 5),
                (w, (i * 35) + 35), (0, 0, 255), -1)
                cv2.putText(canvas, text, (10, (i * 35) + 23),
                cv2.FONT_HERSHEY_SIMPLEX, 0.45,
                (255, 255, 255), 2)
                cv2.putText(frameClone, label, (fX, fY - 10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)
                cv2.rectangle(frameClone, (fX, fY), (fX + fW, fY + fH),
                              (0, 0, 255), 2)
#    for c in range(0, 3):
#        frame[200:320, 10:130, c] = emoji_face[:, :, c] * \
#        (emoji_face[:, :, 3] / 255.0) + frame[200:320,
#        10:130, c] * (1.0 - emoji_face[:, :, 3] / 255.0)


    cv2.imshow('your_face', frameClone)
    cv2.imshow("Probabilities", canvas)
    # print(label)
    # while True:
    if cv2.waitKey(1) & 0xFF == ord('s'):
        variable=label
        
        # break
        if variable=="happy":
            speaks("EMOTION DETECTED IS HAPPY")
            speaks("GENRES ALLOCATED FOR THE DETECTED EMOTION ARE JAZZ HOLIDAY AND NEW AGE ")
            import happy
            break
        elif variable=="sad":
            speaks("EMOTION DETECTED IS SAD")
            speaks("GENRE ALLOCATED FOR THE DETECTED EMOTION IS BLUES")
            import sad
            break
        elif variable =="angry":
            speaks("EMOTION DETECTED IS ANGRY")
            speaks("GENRES ALLOCATED FOR THE DETECTED EMOTION ARE POP/ROCK ELECTRONIC")
            import angry
            break
        elif variable =="scared":
            speaks("EMOTION DETECTED IS SCARED")
            speaks("GENRE ALLOCATED FOR THE DETECTED EMOTION IS BLUES ")
            import scared
            break
        elif variable =="surprised":
            speaks("EMOTION DETECTED IS SURPRISED")
            speaks("GENRES ALLOCATED FOR THE DETECTED EMOTION ARE JAZZ HOLIDAY AND NEW AGE ")
            import surprised
            break
        elif variable =="neutral":
            speaks("EMOTION DETECTED IS NEUTRAL")
            speaks("GENRES ALLOCATED FOR THE DETECTED EMOTION ARE JAZZ HOLIDAY AND NEW AGE")
            import neutral
            break
        elif variable =="disgust":
            speaks("EMOTION DETECTED IS DISGUST")
            speaks("GENRES ALLOCATED FOR THE DETECTED EMOTION ARE POP/ROCK ELECTRONIC")
            import disgust
            break

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

camera.release()
cv2.destroyAllWindows()