-
Notifications
You must be signed in to change notification settings - Fork 114
/
Copy pathutils.py
75 lines (67 loc) · 2.59 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from random import randint
import pandas as pd
import requests, zipfile
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
import os
def download_data():
if 'train_FD004.txt' not in os.listdir('data'):
print('Downloading Data...')
# Download the data
r = requests.get("https://ti.arc.nasa.gov/c/6/", stream=True)
z = zipfile.ZipFile(StringIO.StringIO(r.content))
z.extractall('data')
else:
print('Using previously downloaded data')
def load_data(data_path):
operational_settings = ['operational_setting_{}'.format(i + 1) for i in range (3)]
sensor_columns = ['sensor_measurement_{}'.format(i + 1) for i in range(26)]
cols = ['engine_no', 'time_in_cycles'] + operational_settings + sensor_columns
data = pd.read_csv(data_path, sep=' ', header=None, names=cols)
data = data.drop(cols[-5:], axis=1)
data['index'] = data.index
data.index = data['index']
data['time'] = pd.date_range('1/1/2000', periods=data.shape[0], freq='600s')
print('Loaded data with:\n{} Recordings\n{} Engines'.format(
data.shape[0], len(data['engine_no'].unique())))
print('21 Sensor Measurements\n3 Operational Settings')
return data
def new_labels(data, labels):
ct_ids = []
ct_times = []
ct_labels = []
data = data.copy()
data['RUL'] = labels
gb = data.groupby(['engine_no'])
for engine_no_df in gb:
instances = engine_no_df[1].shape[0]
r = randint(5, instances - 1)
ct_ids.append(engine_no_df[1].iloc[r,:]['engine_no'])
ct_times.append(engine_no_df[1].iloc[r,:]['time'])
ct_labels.append(engine_no_df[1].iloc[r,:]['RUL'])
ct = pd.DataFrame({'engine_no': ct_ids,
'cutoff_time': ct_times,
'RUL': ct_labels})
ct = ct[['engine_no', 'cutoff_time', 'RUL']]
ct.index = ct['engine_no']
ct.index = ct.index.rename('index')
return ct
def make_cutoff_times(data):
gb = data.groupby(['engine_no'])
labels = []
for engine_no_df in gb:
instances = engine_no_df[1].shape[0]
label = [instances - i - 1 for i in range(instances)]
labels += label
return new_labels(data, labels)
def feature_importances(X, reg, feats=5):
feature_imps = [(imp, X.columns[i])
for i, imp in enumerate(reg.feature_importances_)]
feature_imps.sort()
feature_imps.reverse()
for i, f in enumerate(feature_imps[0:feats]):
print('{}: {} [{:.3f}]'.format(i + 1, f[1], f[0]))
print('-----\n')
return [f[1] for f in feature_imps[:feats]]