-
Notifications
You must be signed in to change notification settings - Fork 0
/
displacement.py
executable file
·97 lines (73 loc) · 3.64 KB
/
displacement.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python
import argparse
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
# This script read input GPS data and uses KMeans clustering to determine the displacement in three dimensions. Note: this is
# very dependant on the input data quality. Noisy input will give meaningless results. Please check the graphs to confirm a good fit.
parser = argparse.ArgumentParser(description='Find displacements in 3 dimensions in input GPS records')
parser.add_argument('filenames', type=str, nargs='+', help='One or more input files to process')
parser.add_argument('-c', '--clusters', dest='clusters', type=int, default=2,
help='The number of clusters to find. Usually 2, a cluster before and after the quake')
parser.add_argument('-f', dest='format', type=str, default='LC', choices=['LC', 'reformatted'],
help='Use either the LC format (default) or reformatted')
def read_file(fname, cols, skiprows):
data = pd.read_csv(fname, delim_whitespace=True, usecols=cols, skiprows=skiprows)
# drop any junk, eg: the second header line
data = data.dropna()
# using a normal integer index starting from 0
data = data.reset_index(drop=True)
return data
def get_displacement(dframe, columns, n):
"""Use KMeans clustering to find the cluster centers and return a modified dataframe and time ordered numpy array of displacements (XYZ)"""
km = KMeans(n_clusters=n)
clusters = km.fit_predict(dframe[columns].values)
# add a new column for the cluster id
dframe['cluster_id'] = pd.Series(clusters, index=dframe.index)
# add columns for the mean X/Y/Z for each cluster from min to max time (straight lines)
mean_columns = []
for col in columns:
col_name = col + '_mean'
mean_columns.append(col_name)
dframe[col_name] = pd.Series(np.zeros(dframe.shape[0], dtype=np.float64), index=dframe.index)
cluster_times = []
# get a list of min/max times of clusters so we can sort them by time, this will give us relative displacement by time
for cent_idx, centers in enumerate(km.cluster_centers_):
times = dframe.index[dframe['cluster_id'] == cent_idx]
cluster_times.append(times.values.mean())
# record the mean value of the cluster for each component in the output dataframe
for col_idx, col_name in enumerate(mean_columns):
dframe[col_name][times] = centers[col_idx]
# record the relative displacement
displacements = []
cluster_idx = 0
previous_xyz = None
for sorted_idx in np.argsort(cluster_times):
if cluster_idx == 0:
previous_xyz = km.cluster_centers_[sorted_idx]
else:
delta = km.cluster_centers_[sorted_idx] - previous_xyz
displacements.append(delta)
previous_xyz = km.cluster_centers_[sorted_idx]
cluster_idx += 1
return np.array(displacements)
def main():
args = parser.parse_args()
# columns in the input file to parse. LC is the unformatted full version
if args.format == 'LC':
cols = ['dNorth', 'dEast', 'dHeight']
skiprows = [1]
else:
cols = ['n(cm)', 'e(cm)', 'u(cm)']
skiprows = None
for fname in args.filenames:
dframe = read_file(fname, cols, skiprows)
disp = get_displacement(dframe, cols, args.clusters)
title = 'File: %s, displacement(s) for' % fname
for i, event in enumerate(disp):
title += (' event %d :%s=%.2f %s=%.2f %s=%.2f' % (i+1, cols[0], event[0], cols[1], event[1], cols[2], event[2]))
dframe.plot(title=title)
plt.show()
if __name__ == '__main__':
main()