-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFind IMDB Ratings.py
79 lines (61 loc) · 2.41 KB
/
Find IMDB Ratings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from bs4 import BeautifulSoup
import requests
import pandas as pd
import os
# Setting up session
s = requests.session()
# List contaiting all the films for which data has to be scraped from IMDB
films = []
# Lists contaiting web scraped data
names = []
ratings = []
genres = []
# Define path where your films are present
# For eg: "/Users/utkarsh/Desktop/films"
path = input("Enter the path where your films are: ")
# Films with extensions
filmswe = os.listdir(path)
for film in filmswe:
# Append into my films list (without extensions)
films.append(os.path.splitext(film)[0])
# print(os.path.splitext(film)[0])
for line in films:
# x = line.split(", ")
title = line.lower()
# release = x[1]
query = "+".join(title.split())
URL = "https://www.imdb.com/search/title/?title=" + query
print(URL)
# print(release)
try:
response = s.get(URL)
#getting contect from IMDB Website
content = response.content
# print(response.status_code)
soup = BeautifulSoup(response.content, features="html.parser")
#searching all films containers found
containers = soup.find_all("div", class_="lister-item-content")
for result in containers:
name1 = result.h3.a.text
name = result.h3.a.text.lower()
# Uncomment below lines if you want year specific as well, define year variable before this
# year = result.h3.find(
# "span", class_="lister-item-year text-muted unbold"
# ).text.lower()
#if film found (searching using name)
if title in name:
#scraping rating
rating = result.find("div",class_="inline-block ratings-imdb-rating")["data-value"]
#scraping genre
genre = result.p.find("span", class_="genre")
genre = genre.contents[0]
#appending name, rating and genre to individual lists
names.append(name1)
ratings.append(rating)
genres.append(genre)
except Exception:
print("Try again with valid combination of tile and release year")
#storing in pandas dataframe
df = pd.DataFrame({'Film Name':names,'Rating':ratings,'Genre':genres})
#making csv using pandas
df.to_csv('film_ratings.csv', index=False, encoding='utf-8')