-
Notifications
You must be signed in to change notification settings - Fork 0
/
model.py
58 lines (45 loc) · 1.97 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
# Load the dataset
df = pd.read_csv('cartoonmovies.csv')
# preprocessing of dataset
df = df.dropna()
df = df.drop('Title', axis=1)
df = df.drop('Director', axis=1)
df = df.drop('cast', axis=1)
# For example, label encode the "TYPE" column
label_encoder = LabelEncoder()
df['Type'] = label_encoder.fit_transform(df['Type'])
# Use one-hot encoding for nominal categorical features like "COUNTRY" and "LISTEDIN"
df = pd.get_dummies(df, columns=['country', 'ListedIn'], drop_first=True)
# Split the data into train and test sets
X = df.drop('Ratings', axis=1)
y = df['Ratings']
# Splitting the dataset into Test and Train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize and train the Random Forest Regressor model
model = RandomForestRegressor(n_estimators=100, max_depth=10, min_samples_split=5, min_samples_leaf=2, random_state=42)
model.fit(X_train, y_train)
# Evaluate the model on the test set
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100
print(f"Accuracy: {mape:.2f}%")
# User input for prediction
def predict_rating(user_country, user_duration, user_listedin):
user_input = {
'country': user_country,
'Duration': float(user_duration),
'ListedIn': user_listedin
}
user_input_df = pd.DataFrame(user_input, index=[0])
user_input_encoded = pd.get_dummies(user_input_df, columns=['country', 'ListedIn'], drop_first=True)
user_input_encoded = user_input_encoded.reindex(columns=X_train.columns, fill_value=0)
predicted_rating = model.predict(user_input_encoded)[0]
predicted_rating = round(predicted_rating, 2)
return predicted_rating