-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMexico_Housing_Insights
52 lines (43 loc) · 1.54 KB
/
Mexico_Housing_Insights
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#groupby state and price_usd using mean price, sort descending
mean_price_by_state = df.groupby("state")["price_usd"].mean().sort_values(ascending=False)
mean_price_by_state
#bar plot using pandas, pandas can use plt functionalities as well
mean_price_by_state.plot(
kind="bar",
xlabel="State",
ylabel="Price[USD]",
title="Mean House Price by State"
);
#adding a new feature
df["price_per_m2"] = df["price_usd"] / df["area_m2"]
#use of groupby and plot together
mean_price_per_m2_by_state = (
df.groupby("state")["price_per_m2"].mean().sort_values(ascending=False)
.plot(
kind="bar",
xlabel="State",
ylabel="price_per_m2",
title="Mean House Price per m2 by State"
)
);
#scatter plot to explore correlation
plt.scatter(df["area_m2"], df["price_usd"])
plt.xlabel("Area")
plt.ylabel("Price")
plt.title("Price vs Area")
#calculating actual correlation
p_correlation = df["area_m2"].corr(df["price_usd"])
#check if all OK
print(p_correlation)
#going inside, selecting a state, above scatter plot and correlation can be applied on the selection to verify the hypothesis
df_morelos = df[df["state"]== "Morelos"]
#doing it all together
# Subset `df` to include only observations from `"Distrito Federal"`
df_mexico_city = df[df["state"]=="Distrito Federal"]
# Create a scatter plot price vs area
plt.scatter(df_mexico_city["area_m2"], df_mexico_city["price_usd"])
plt.xlabel("Area")
plt.ylabel("Price")
plt.title("Area vs Price");
p_correlation = df_mexico_city["area_m2"].corr(df_mexico_city["price_usd"])
print(p_correlation)