-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathLUNA_mask_extraction.py
121 lines (104 loc) · 4.53 KB
/
LUNA_mask_extraction.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from __future__ import print_function, division
from pathlib import Path
import os
import SimpleITK as sitk
import numpy as np
import csv
from glob import glob
import pandas as pd
try:
from tqdm import tqdm # long waits are not fun
except:
print('TQDM does make much nicer wait bars...')
tqdm = lambda x: x
# Some helper functions
def make_mask(center, diam, z, width, height, spacing, origin):
'''
Center : centers of circles px -- list of coordinates x,y,z
diam : diameters of circles px -- diameter
widthXheight : pixel dim of image
spacing = mm/px conversion rate np array x,y,z
origin = x,y,z mm np.array
z = z position of slice in world coordinates mm
'''
mask = np.zeros([height,width]) # 0's everywhere except nodule swapping x,y to match img
# convert to nodule space from world coordinates
# Defining the voxel range in which the nodule falls
v_center = (center-origin)/spacing
v_diam = int(diam/spacing[0]+5)
v_xmin = np.max([0,int(v_center[0]-v_diam)-5])
v_xmax = np.min([width-1,int(v_center[0]+v_diam)+5])
v_ymin = np.max([0,int(v_center[1]-v_diam)-5])
v_ymax = np.min([height-1,int(v_center[1]+v_diam)+5])
v_xrange = range(v_xmin, v_xmax+1)
v_yrange = range(v_ymin, v_ymax+1)
# Convert back to world coordinates for distance calculation
x_data = [x*spacing[0]+origin[0] for x in range(width)]
y_data = [x*spacing[1]+origin[1] for x in range(height)]
# Fill in 1 within sphere around nodule
for v_x in v_xrange:
for v_y in v_yrange:
p_x = spacing[0]*v_x + origin[0]
p_y = spacing[1]*v_y + origin[1]
if np.linalg.norm(center-np.array([p_x, p_y, z])) <= diam:
mask[int((p_y-origin[1])/spacing[1]), int((p_x-origin[0])/spacing[0])] = 1.0
return mask
def matrix2int16(matrix):
'''
matrix must be a numpy array NXN
Returns uint16 version
'''
m_min= np.min(matrix)
m_max= np.max(matrix)
matrix = matrix-m_min
return np.array(np.rint((matrix-m_min)/float(m_max-m_min) * 65535.0),dtype=np.uint16)
############
# Getting list of image files
luna_path = "./dataset/"
luna_subset_path = luna_path+"volumes/images/"
output_path = "./dataset/volumes_modified/"
file_list = glob(luna_subset_path+"*.mhd")
#####################
#
# Helper function to get rows in data frame associated
# with each file
def get_filename(file_list, case):
for f in file_list:
if case in f:
return f
# The locations of the nodes
df_node = pd.read_csv(luna_path+"annotations.csv")
df_node["file"] = df_node["seriesuid"].map(lambda file_name: get_filename(file_list, file_name))
df_node = df_node.dropna()
#####
#
# Looping over the image files
#
for fcount, img_file in enumerate(tqdm(file_list)):
mini_df = df_node[df_node["file"]==img_file] #get all nodules associate with file
if mini_df.shape[0] > 0: # some files may not have a nodule--skipping those
# load the data once
itk_img = sitk.ReadImage(img_file)
img_array = sitk.GetArrayFromImage(itk_img) # indexes are z,y,x (notice the ordering)
num_z, height, width = img_array.shape # heightXwidth constitute the transverse plane
origin = np.array(itk_img.GetOrigin()) # x,y,z Origin in world coordinates (mm)
spacing = np.array(itk_img.GetSpacing()) # spacing of voxels in world coor. (mm)
# go through all nodes (why just the biggest?)
for node_idx, cur_row in mini_df.iterrows():
node_x = cur_row["coordX"]
node_y = cur_row["coordY"]
node_z = cur_row["coordZ"]
diam = cur_row["diameter_mm"]
# just keep 3 slices
imgs = np.ndarray([3, height, width], dtype=np.float32)
masks = np.ndarray([3, height, width], dtype=np.uint8)
center = np.array([node_x, node_y, node_z]) # nodule center
v_center = np.rint((center-origin)/spacing) # nodule center in voxel space (still x,y,z ordering)
for i, i_z in enumerate(np.arange(int(v_center[2])-1,
int(v_center[2])+2).clip(0, num_z-1)): # clip prevents going out of bounds in Z
mask = make_mask(center, diam, i_z*spacing[2]+origin[2],
width, height, spacing, origin)
masks[i] = mask
imgs[i] = img_array[i_z]
np.save(Path(os.path.join(output_path, "images_%04d_%04d.npy" % (fcount, node_idx))), imgs)
np.save(Path(os.path.join(output_path, "masks_%04d_%04d.npy" % (fcount, node_idx))), masks)