-
Notifications
You must be signed in to change notification settings - Fork 0
/
water_analysis.py
193 lines (164 loc) · 6.71 KB
/
water_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import copy as cp
plt.style.use('fivethirtyeight')
sns.set_context(font_scale= 4)
class Data:
'''
################## This is a analysis on water resources #####################
And this module can be used by passing 'water analysis file' as a parameter to the Data class.
These are the available methods and fields to use:
Field -> data
Functions name:
* mimic_surge_error
* waterAmount_150_above
* waterAmount_150_less
* waterAmount_0_values
* fill_waterAmount_of_0_values
* leaky_taps
* rename_tap
* sort_filter_name
* visualizing_amount
* leakage_amount
* taps
* taps_location
* all_taps_location_plot
'''
__all__= {
"mimic_surge_error",
"waterAmount_150_above",
"waterAmount_150_less",
"waterAmount_0_values",
"leaky_taps",
"rename_tap",
"sort_filter_name",
"visualizing_amount",
"leakage_amount",
"taps",
"taps_location",
"all_taps_location_plot"
""
}
def __init__(self, df):
self.data= cp.deepcopy(df)
def rename_column_name(self, names):
"""
Renaming the columns name to what is specified in the Data class
"""
self.data.columns= names
def mimic_surge_error(self):
"""
The will return a dataframe that is less or equal to the specify surge value
"""
surge = int(input("Specify surge value: "))
return self.data[self.data['Amount_water'] <= surge]
def waterAmount_150_above(self):
"""
This will return a dataframe, where the amount of water is greater than or equal to 150ml and less than 500ml
"""
amount150= self.data.loc[self.data["Amount_water"] >=150]
amount500less = amount150[amount150["Amount_water"] < 500]
return amount500less
def waterAmount_150_less(self):
amount150less= self.data[self.data["Amount_water"] < 150]
return amount150less
def waterAmount_0_values(self):
"""
Returning a dataframe, where the amount of water is equal to zero values
"""
zer= self.data['Amount_water']==0
return self.data[zer]
def fill_waterAmount_of_0_values(self):
"""
This will replace all the instance of water amount of zero value with the mean of the column
"""
self.data['Amount_water']= self.data['Amount_water'].replace(0, self.data['Amount_water'].mean())
return self.data
def leaky_taps(self):
"""
Returning names of the leaky taps
"""
leak= self.waterAmount_0_values()
return leak.Filter_name
def taps(self):
"""
This method is only going to group all the taps into a list and then return the list.
The return type is list, which you can slice or iterate through.
"""
tap= []
unique_tap= self.data['Filter_name'].unique()
tap_length= len(unique_tap)
for i in range(1, tap_length+1):
tap.append(self.data[self.data['Filter_name'] == 'Tap '+ str(i)])
return tap
def taps_location(self):
"""
This method is for grouping each tap according to their location into a list.
The return type is list, which you can slice or iterate.
"""
location=list(self.data['Filter_location'].unique())
loc_num= len(location) #Length of Location number
tap= self.taps()
tap_location= [[], [], [], [], [], [], []]
basetap= 0
while basetap < 7:
k= 0
while k < loc_num:
for j in location:
tap_loc = tap[basetap]
tap_location[k].append(tap_loc[tap_loc['Filter_location'] == j])
k += 1
basetap += 1
return tap_location #The slicing index can be change to 6 when the location is up to 6
def all_taps_location_plot(self):
"""
This is method is going to return visualization for each tap and location.
It then going to save it in a directory call "the location name" in taporder folder.
"""
tapsL= self.taps_location() # Tap for different location
try:
for tap_loc in tapsL:
location= tap_loc[0]['Filter_location'][0]
for j in range(0, 7):
plt.figure(figsize= (25, 13))
plt.plot(tap_loc[j].sort_index(ascending= True).index.values, tap_loc[j]['Amount_water'])
plt.ylabel('Amount of water', fontsize= 19)
plt.title(f"Tap {j+1} for {location}", fontsize= 19)
plt.xticks(fontsize= 15)
plt.yticks(fontsize= 15)
plt.ylim([0, 903])
plt.xlabel('Time', fontsize= 19)
#plt.savefig('taporder/%s/tap %s.png' % (location, str(j + 1))) #To save visuals in taporder folder
except IndexError:
print("Nothing to display here")
def rename_tap(self, name):
self.data["Filter_name"].replace(name, inplace= True)
def sort_filter_name(self):
self.data.sort_values(by= "Filter_name", axis = 0, inplace= True)
def _annot(self, splot, decide= True):
for p in splot.patches:
if decide:
rounder= '.1f'
else:
rounder= '1d'
splot.annotate(format(p.get_height(), rounder),\
(p.get_x() + p.get_width() / 2., p.get_height()),\
ha = 'center', va = 'center',\
size=15,\
xytext = (0, -12),\
textcoords = 'offset points')
def visualizing_amount_bar(self, df):
splot= sns.barplot(data= df.sort_values(by= 'Filter_name'), x= 'Filter_name', y= 'Amount_water', errcolor= 'none', palette= 'coolwarm')
self._annot(splot)
def visualizing_amount_location(self, df):
splot= sns.barplot(data= df, x= 'Filter_location', y= 'Amount_water', errcolor= 'none', palette= 'coolwarm')
plt.xticks(rotation= 40)
self._annot(splot)
def visualizing_location_count(self, df):
if(isinstance(df, pd.DataFrame)):
splot= sns.countplot(data= df, x= 'Filter_name', palette= 'coolwarm')
self._annot(splot, decide= False)
elif(isinstance(df, pd.Series)):
splot= sns.countplot(x= df, palette= 'coolwarm')
self._annot(splot, decide= False)