Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pri mistake #19

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
121 changes: 94 additions & 27 deletions backend/app.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,113 @@
import json
import os
from flask import Flask, render_template, request
from flask import Flask, render_template, request, jsonify
from flask_cors import CORS
from helpers.MySQLDatabaseHandler import MySQLDatabaseHandler
import pandas as pd
import re
from collections import defaultdict
import math

# ROOT_PATH for linking with all your files.
# Feel free to use a config.py or settings.py with a global export variable
os.environ['ROOT_PATH'] = os.path.abspath(os.path.join("..",os.curdir))
app = Flask(__name__)
CORS(app)

# ROOT_PATH for linking with all your files.
os.environ['ROOT_PATH'] = os.path.abspath(os.path.join("..", os.curdir))

# Get the directory of the current script
current_directory = os.path.dirname(os.path.abspath(__file__))

# Specify the path to the JSON file relative to the current script
json_file_path = os.path.join(current_directory, 'init.json')
json_file_path = os.path.join(current_directory, 'data.json')

# Assuming your JSON data is stored in a file named 'init.json'
# Assuming your JSON data is stored in a file named 'data.json'
with open(json_file_path, 'r') as file:
data = json.load(file)
episodes_df = pd.DataFrame(data['episodes'])
reviews_df = pd.DataFrame(data['reviews'])

app = Flask(__name__)
CORS(app)
def preprocess_data(data):
preprocessed_data = defaultdict(str)
for city, categories in data.items():
for _, details in categories.items():
food_info = details.get('Eat')
if food_info is not None:
preprocessed_data[city] = food_info
return preprocessed_data

def create_term_frequency_matrix(data):
term_frequency_matrix = defaultdict(dict)
for city, food_info in data.items():
terms = re.findall(r'\w+', food_info.lower())
for term in terms:
if term not in term_frequency_matrix[city]:
term_frequency_matrix[city][term] = 1
else:
term_frequency_matrix[city][term] += 1
return term_frequency_matrix

def calculate_jaccard_similarity(query, data_term_frequency_matrix):
query_terms = set(re.findall(r'\w+', query.lower()))
similarities = {}
for city, matrix in data_term_frequency_matrix.items():
data_terms = set(matrix.keys())
intersection = query_terms.intersection(data_terms)
union = query_terms.union(data_terms)
jaccard_similarity = len(intersection) / len(union)
similarities[city] = jaccard_similarity
return similarities

def top_sim(similarities):
sorted_similarities = sorted(similarities.items(), key=lambda x: x[1], reverse=True)
top_10 = sorted_similarities[:10]
return top_10

# Sample search using json with pandas
def json_search(query):
matches = []
merged_df = pd.merge(episodes_df, reviews_df, left_on='id', right_on='id', how='inner')
matches = merged_df[merged_df['title'].str.lower().str.contains(query.lower())]
matches_filtered = matches[['title', 'descr', 'imdb_rating']]
matches_filtered_json = matches_filtered.to_json(orient='records')
return matches_filtered_json
def calculate_query_vector(query, data_term_frequency_matrix):
query_vector = defaultdict(int)
terms = re.findall(r'\w+', query.lower())
for term in terms:
query_vector[term] += 1
return query_vector

def calculate_cosine_similarity(query_vector, doc_vector):
dotprod = 0
for term in query_vector:
prod = query_vector.get(term,0)*doc_vector.get(term,0)
dotprod+=prod
query_sum = 0
doc_sum = 0
for value in query_vector.values():
query_sum += (value**2)
for value in query_vector.values():
doc_sum += (value**2)

query_norm = math.sqrt(query_sum)
doc_norm = math.sqrt(doc_sum)

if query_norm==0 or doc_norm==0:
return 0
cossim = dotprod / (query_norm*doc_norm)
return cossim

@app.route("/")
def home():
return render_template('base.html',title="sample html")
return render_template('base.html', title="Sample HTML")

@app.route("/food_search")
def food_search():
query = request.args.get("query")
preprocessed_data = preprocess_data(data)
term_frequency_matrix = create_term_frequency_matrix(preprocessed_data)
# similarities = calculate_jaccard_similarity(query, term_frequency_matrix)

query_vector = calculate_query_vector(query, term_frequency_matrix)
similarities = {}
for city, city_vector in term_frequency_matrix.items():
cosine_sim = calculate_cosine_similarity(query_vector, city_vector)
similarities[city] = cosine_sim


top_10 = top_sim(similarities)

top_10_json = [{"city": city, "similarity": similarity} for city, similarity in top_10]
return jsonify(top_10=top_10_json)

@app.route("/episodes")
def episodes_search():
text = request.args.get("title")
return json_search(text)

if 'DB_NAME' not in os.environ:
app.run(debug=True,host="0.0.0.0",port=5000)
if __name__ == "__main__":
app.run(debug=True)
1 change: 1 addition & 0 deletions backend/data.json

Large diffs are not rendered by default.

97 changes: 65 additions & 32 deletions backend/static/style.css
Original file line number Diff line number Diff line change
@@ -1,91 +1,124 @@
.full-body-container{
.full-body-container {
position: absolute;
top:0;
left:0;
right:0;
bottom:0;
width:100%;
height:100%;
top: 0;
left: 0;
right: 0;
bottom: 0;
width: 100%;
height: 100%;
display: flex;
justify-content: start;
align-items: center;
flex-direction: column;
}

#google-c{
color:#4285F4;
#google-0 {
color: #DB4437; /* A */
}

#google-1 {
color: #4285F4; /* b */
}

#google-2 {
color: #0F9D58; /* r */
}

#google-3 {
color: #F4B400; /* o */
}

#google-4 {
color: #4285F4; /* a */
}

#google-5 {
color: #DB4437; /* d */
}

#google-6 {
color
: #0F9D58; /* A */
}

#google-7 {
color: #DB4437; /* d */
}

#google-8 {
color: #F4B400; /* v */
}

#google-s{
color:#DB4437;
#google-9 {
color: #4285F4; /* i */
}

#google-4{
color: #F4B400;
#google-10 {
color: #DB4437; /* s */
}

#google-3{
color: #4285F4;
#google-11 {
color: #0F9D58; /* e */
}

#google-0-1{
color: #0F9D58;
#google-12 {
color: #F4B400; /* r */
}

#google-0-2{
color:#DB4437;
#google-13 {
color: #4285F4; /* s */
}

.google-colors{
.google-colors {
display: flex;
align-items: center;
font-size: 48px;
font-family: 'Open Sans', sans-serif;
}

.google-colors h1{
.google-colors h1 {
margin-bottom: 0;
margin-right: 3px;
}

.input-box{
.input-box {
border-radius: 50px;
border:1px solid black;
border: 1px solid black;
display: flex;
align-items: center;
margin-top: 10px;
padding:12px;
padding: 12px;
width: 600px;
}

.input-box img{
.input-box img {
height: 20px;
width:20px;
width: 20px;
}

.top-text{
.top-text {
display: flex;
flex-direction: column;
align-items: center;
}

.input-box input{
.input-box input {
width: 100%;
margin-left: 10px;
font-size: 16px;
border: none;
outline: 0;
}

#answer-box{
#answer-box {
width: 700px;
margin-top:50px;
margin-top: 50px;
}

.episode-title{
.episode-title {
font-family: 'Kanit', sans-serif;
}

.episode-desc{
.episode-desc {
font-family: 'Montserrat', sans-serif;
}
Loading