diff --git a/notebooks/main.py b/notebooks/main.py new file mode 100644 index 0000000..525a383 --- /dev/null +++ b/notebooks/main.py @@ -0,0 +1,144 @@ +import numpy as np +import pandas as pd +import json +import bs4 as bs +import urllib.request +import pickle +import requests +from flask import Flask, render_template, request +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.metrics.pairwise import cosine_similarity +from datetime import date, datetime + +# load the nlp model and tfidf vectorizer from disk +filename = 'nlp_model.pkl' +clf = pickle.load(open(filename, 'rb')) +vectorizer = pickle.load(open('tranform.pkl','rb')) + +# converting list of string to list (eg. "["abc","def"]" to ["abc","def"]) +def convert_to_list(my_list): + my_list = my_list.split('","') + my_list[0] = my_list[0].replace('["','') + my_list[-1] = my_list[-1].replace('"]','') + return my_list + +# convert list of numbers to list (eg. "[1,2,3]" to [1,2,3]) +def convert_to_list_num(my_list): + my_list = my_list.split(',') + my_list[0] = my_list[0].replace("[","") + my_list[-1] = my_list[-1].replace("]","") + return my_list + +def get_suggestions(): + data = pd.read_csv('main_data.csv') + return list(data['movie_title'].str.capitalize()) + +app = Flask(__name__) + +@app.route("/") +@app.route("/home") +def home(): + suggestions = get_suggestions() + return render_template('home.html',suggestions=suggestions) + + +@app.route("/recommend",methods=["POST"]) +def recommend(): + # getting data from AJAX request + title = request.form['title'] + cast_ids = request.form['cast_ids'] + cast_names = request.form['cast_names'] + cast_chars = request.form['cast_chars'] + cast_bdays = request.form['cast_bdays'] + cast_bios = request.form['cast_bios'] + cast_places = request.form['cast_places'] + cast_profiles = request.form['cast_profiles'] + imdb_id = request.form['imdb_id'] + poster = request.form['poster'] + genres = request.form['genres'] + overview = request.form['overview'] + vote_average = request.form['rating'] + vote_count = request.form['vote_count'] + rel_date = request.form['rel_date'] + release_date = request.form['release_date'] + runtime = request.form['runtime'] + status = request.form['status'] + rec_movies = request.form['rec_movies'] + rec_posters = request.form['rec_posters'] + rec_movies_org = request.form['rec_movies_org'] + rec_year = request.form['rec_year'] + rec_vote = request.form['rec_vote'] + + # get movie suggestions for auto complete + suggestions = get_suggestions() + + + # call the convert_to_list function for every string that needs to be converted to list + rec_movies_org = convert_to_list(rec_movies_org) + rec_movies = convert_to_list(rec_movies) + rec_posters = convert_to_list(rec_posters) + cast_names = convert_to_list(cast_names) + cast_chars = convert_to_list(cast_chars) + cast_profiles = convert_to_list(cast_profiles) + cast_bdays = convert_to_list(cast_bdays) + cast_bios = convert_to_list(cast_bios) + cast_places = convert_to_list(cast_places) + + """" + list = [' rec_movies_org','rec_movies'] + for i in list: + list = convert_to_list(list) + """ + + # convert string to list (eg. "[1,2,3]" to [1,2,3]) + cast_ids = convert_to_list_num(cast_ids) + rec_vote = convert_to_list_num(rec_vote) + rec_year = convert_to_list_num(rec_year) + + # rendering the string to python string + for i in range(len(cast_bios)): + cast_bios[i] = cast_bios[i].replace(r'\n', '\n').replace(r'\"','\"') + + for i in range(len(cast_chars)): + cast_chars[i] = cast_chars[i].replace(r'\n', '\n').replace(r'\"','\"') + + # combining multiple lists as a dictionary which can be passed to the html file so that it can be processed easily and the order of information will be preserved + movie_cards = {rec_posters[i]: [rec_movies[i],rec_movies_org[i],rec_vote[i],rec_year[i]] for i in range(len(rec_posters))} + + casts = {cast_names[i]:[cast_ids[i], cast_chars[i], cast_profiles[i]] for i in range(len(cast_profiles))} + + cast_details = {cast_names[i]:[cast_ids[i], cast_profiles[i], cast_bdays[i], cast_places[i], cast_bios[i]] for i in range(len(cast_places))} + + # web scraping to get user reviews from IMDB site + sauce = urllib.request.urlopen('https://www.imdb.com/title/{}/reviews?ref_=tt_ov_rt'.format(imdb_id)).read() + soup = bs.BeautifulSoup(sauce,'lxml') + soup_result = soup.find_all("div",{"class":"text show-more__control"}) + + reviews_list = [] # list of reviews + reviews_status = [] # list of comments (good or bad) + for reviews in soup_result: + if reviews.string: + reviews_list.append(reviews.string) + # passing the review to our model + movie_review_list = np.array([reviews.string]) + movie_vector = vectorizer.transform(movie_review_list) + pred = clf.predict(movie_vector) + reviews_status.append('Positive' if pred else 'Negative') + + # getting current date + movie_rel_date = "" + curr_date = "" + if(rel_date): + today = str(date.today()) + curr_date = datetime.strptime(today,'%Y-%m-%d') + movie_rel_date = datetime.strptime(rel_date, '%Y-%m-%d') + + # combining reviews and comments into a dictionary + movie_reviews = {reviews_list[i]: reviews_status[i] for i in range(len(reviews_list))} + + # passing all the data to the html file + return render_template('recommend.html',title=title,poster=poster,overview=overview,vote_average=vote_average, + vote_count=vote_count,release_date=release_date,movie_rel_date=movie_rel_date,curr_date=curr_date,runtime=runtime,status=status,genres=genres,movie_cards=movie_cards,reviews=movie_reviews,casts=casts,cast_details=cast_details) + +if __name__ == '__main__': + app.run(debug=True)