-
Notifications
You must be signed in to change notification settings - Fork 0
/
segmentation.py
285 lines (214 loc) · 9.39 KB
/
segmentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
import numpy as np
import random
from scipy.spatial.distance import squareform, pdist, cdist
from skimage.util import img_as_float
### Clustering Methods
def closest_cluster_slow(point, clusters):
min_distance = 999999
min_index = 0
i = 0
for cluster_center in clusters:
distance = abs(point - cluster_center).sum()
if (distance < min_distance):
min_distance = distance
min_index = i
i+=1
return min_index
def kmeans(features, k, num_iters=100):
""" Use kmeans algorithm to group features into k clusters.
K-Means algorithm can be broken down into following steps:
1. Randomly initialize cluster centers
2. Assign each point to the closest center
3. Compute new center of each cluster
4. Stop if cluster assignments did not change
5. Go to step 2
Args:
features - Array of N features vectors. Each row represents a feature
vector.
k - Number of clusters to form.
num_iters - Maximum number of iterations the algorithm will run.
Returns:
assignments - Array representing cluster assignment of each point.
(e.g. i-th point is assigned to cluster assignments[i])
"""
N, D = features.shape
assert N >= k, 'Number of clusters cannot be greater than number of points'
# Randomly initalize cluster centers
idxs = np.random.choice(N, size=k, replace=False)
centers = features[idxs]
assignments = np.zeros(N, dtype=np.uint32)
for n in range(num_iters):
new_centers = [0] * len(centers)
for pixel in range(len(features)):
closest_cluster_index = closest_cluster_slow(features[pixel], centers)
assignments[pixel] = closest_cluster_index
for center in range(len(centers)):
points_in_cluster = features[assignments == center]
new_center = np.mean(points_in_cluster, axis=0)
new_centers[center] = new_center
if (np.array_equal(np.array(new_centers), np.array(centers))):
return assignments
else: centers = new_centers
return assignments
def closest_cluster_fast(feats, clusters):
distances = cdist(feats, clusters, 'euclidean')
min_distance_indices = distances.argmin(axis=1)
return min_distance_indices
def kmeans_fast(features, k, num_iters=100):
""" Use kmeans algorithm to group features into k clusters.
This function makes use of numpy functions and broadcasting to speed up the
first part(cluster assignment) of kmeans algorithm.
Hints
- You may find cdist (imported from scipy.spatial.distance) and np.argmin useful
Args:
features - Array of N features vectors. Each row represents a feature
vector.
k - Number of clusters to form.
num_iters - Maximum number of iterations the algorithm will run.
Returns:
assignments - Array representing cluster assignment of each point.
(e.g. i-th point is assigned to cluster assignments[i])
"""
N, D = features.shape
assert N >= k, 'Number of clusters cannot be greater than number of points'
# Randomly initalize cluster centers
idxs = np.random.choice(N, size=k, replace=False)
centers = features[idxs]
assignments = np.zeros(N, dtype=np.uint32)
for n in range(num_iters):
new_centers = [0] * len(centers)
assignments = closest_cluster_fast(features, centers)
for center in range(len(centers)):
points_in_cluster = features[assignments == center]
new_center = np.mean(points_in_cluster, axis=0)
new_centers[center] = new_center
if (np.array_equal(np.array(new_centers), np.array(centers))):
return assignments
else: centers = new_centers
return assignments
def hierarchical_clustering(features, k):
""" Run the hierarchical agglomerative clustering algorithm.
The algorithm is conceptually simple:
Assign each point to its own cluster
While the number of clusters is greater than k:
Compute the distance between all pairs of clusters
Merge the pair of clusters that are closest to each other
We will use Euclidean distance to define distance between clusters.
Recomputing the centroids of all clusters and the distances between all
pairs of centroids at each step of the loop would be very slow. Thankfully
most of the distances and centroids remain the same in successive
iterations of the outer loop; therefore we can speed up the computation by
only recomputing the centroid and distances for the new merged cluster.
Even with this trick, this algorithm will consume a lot of memory and run
very slowly when clustering large set of points. In practice, you probably
do not want to use this algorithm to cluster more than 10,000 points.
Hints
- You may find pdist (imported from scipy.spatial.distance) useful
Args:
features - Array of N features vectors. Each row represents a feature
vector.
k - Number of clusters to form.
Returns:
assignments - Array representing cluster assignment of each point.
(e.g. i-th point is assigned to cluster assignments[i])
"""
N, D = features.shape
assert N >= k, 'Number of clusters cannot be greater than number of points'
# Assign each point to its own cluster
assignments = np.arange(N, dtype=np.uint32)
centers = np.copy(features)
n_clusters = N
while n_clusters > k:
### YOUR CODE HERE
pass
### END YOUR CODE
return assignments
### Pixel-Level Features
def color_features(img):
""" Represents a pixel by its color.
Args:
img - array of shape (H, W, C)
Returns:
features - array of (H * W, C)
"""
H, W, C = img.shape
img = img_as_float(img)
features = np.zeros((H*W, C))
### YOUR CODE HERE
features = [img[row][pixel] for row in range(H) for pixel in range(W)]
features = np.array(features)
pass
### END YOUR CODE
return features
import pandas as pd
from sklearn.preprocessing import StandardScaler
def color_position_features(img):
""" Represents a pixel by its color and position.
Combine pixel's RGB value and xy coordinates into a feature vector.
i.e. for a pixel of color (r, g, b) located at position (x, y) in the
image. its feature vector would be (r, g, b, x, y).
Don't forget to normalize features.
Hints
- You may find np.mgrid and np.dstack useful
- You may use np.mean and np.std
Args:
img - array of shape (H, W, C)
Returns:
features - array of (H * W, C+2)
"""
H, W, C = img.shape
color = img_as_float(img)
features = np.zeros((H*W, C+2))
### YOUR CODE HERE
colors = [color[row][pixel] for row in range(H) for pixel in range(W)]
positions = [[x, y] for x in range(H) for y in range(W)]
output = pd.DataFrame(colors, columns=['r','g','b'])
output = pd.concat([output, pd.DataFrame(positions, columns=['x','y'])], axis=1)
output = StandardScaler().fit_transform(output)
features = np.array(output)
pass
### END YOUR CODE
return output
### Quantitative Evaluation
def compute_accuracy(mask_gt, mask):
""" Compute the pixel-wise accuracy of a foreground-background segmentation
given a ground truth segmentation.
Args:
mask_gt - The ground truth foreground-background segmentation. A
logical of size H x W where mask_gt[y, x] is 1 if and only if
pixel (y, x) of the original image was part of the foreground.
mask - The estimated foreground-background segmentation. A logical
array of the same size and format as mask_gt.
Returns:
accuracy - The fraction of pixels where mask_gt and mask agree. A
bigger number is better, where 1.0 indicates a perfect segmentation.
"""
accuracy = None
H, W = mask.shape
size = H * W
equal_pixels = np.sum(mask == mask_gt)
accuracy = equal_pixels / size
return accuracy
def evaluate_segmentation(mask_gt, segments):
""" Compare the estimated segmentation with the ground truth.
Note that 'mask_gt' is a binary mask, while 'segments' contain k segments.
This function compares each segment in 'segments' with the ground truth and
outputs the accuracy of the best segment.
Args:
mask_gt - The ground truth foreground-background segmentation. A
logical of size H x W where mask_gt[y, x] is 1 if and only if
pixel (y, x) of the original image was part of the foreground.
segments - An array of the same size as mask_gt. The value of a pixel
indicates the segment it belongs.
Returns:
best_accuracy - Accuracy of the best performing segment.
0 <= accuracy <= 1, where 1.0 indicates a perfect segmentation.
"""
num_segments = np.max(segments) + 1
best_accuracy = 0
# Compare each segment in 'segments' with the ground truth
for i in range(num_segments):
mask = (segments == i).astype(int)
accuracy = compute_accuracy(mask_gt, mask)
best_accuracy = max(accuracy, best_accuracy)
return best_accuracy