-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path_process.py
183 lines (162 loc) · 6.87 KB
/
_process.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
"""Mass spectrometry processing
This module should be imported and contains the following:
* process_spectras - Function to process msi.
* aligned_representation - Function to to create aligned representation
for msi spectras.
* common_representation - Function to to create common representation for
msi spectras.
* meaningful_signal - Function to create meaningful signal scaler for msi
spectras.
"""
import os
import numpy as np
from typing import List
from pyimzml.ImzMLParser import ImzMLParser
from pyimzml.ImzMLWriter import ImzMLWriter
from processing import (
EqualWidthBinning, ReferenceLockMass, TICNormalizer, MeanSegmentation,
ZScoreCorrection
)
from utils import read_msi, get_mean_spectra
from tqdm import tqdm
def aligned_representation(input_path: str, output_path: str,
original_lock_mass_position: float,
tol: float = 0.3) -> None:
"""Function to create aligned representation for msi spectras. Function
creates a new msi file in the given folder.
Args:
input_path (str): Path to imzML file that needs to be aligned.
output_path (str): Path to folder for saving output.
original_lock_mass_position (float): The original peak value for expected.
tol (float, optional): Tolerance for searching the shifted peak from expected
peak. Defaults to 0.3.
"""
# Parse the MSI file
with ImzMLParser(input_path) as reader:
# Get lock mass object
print("started mean spectra calc")
mean_spectra = get_mean_spectra(reader)
lock_mass = ReferenceLockMass(original_lock_mass_position, mean_spectra, tol)
print(input_path, lock_mass.scale_ratio, lock_mass.diff)
# Create a new MSI for aligned data
with ImzMLWriter(output_path, mode="processed") as writer:
print("started aligning msi")
# Iterate over all spectra in the file
for idx, (x,y,z) in enumerate(reader.coordinates):
# Apply lock mass
aligned_mzs, intensities = lock_mass.lock_mass(
reader.getspectrum(idx)
)
# Write spectra to new MSI with coordinate
writer.addSpectrum(aligned_mzs, intensities, (x, y, z))
def common_representation(
input_path: str, output_path: str, x_min: int, x_max: int, y_min: int,
y_max: int, mz_start: int, mz_end: int, mass_resolution: float
) -> None:
"""Function to create common representation for msi spectras. Function
creates a new msi file in the given folder.
Args:
input_path (str): Path to imzML file that needs to be processed.
output_path (str): Path to folder for saving output.
x_min (int): X minimum coordinate of the the tissue in the input.
x_max (int): X maximum coordinate of the the tissue in the input.
y_min (int): Y minimum coordinate of the the tissue in the input.
y_max (int): Y maximum coordinate of the the tissue in the input.
mz_start (int): The start value of the mz range.
mz_end (int): The end value of the mz range.
mass_resolution (float): The mass resolution.
"""
# Get normalizer object
normalizer = TICNormalizer()
# Get binning object
binning = EqualWidthBinning(mz_start, mz_end, mass_resolution)
# Create process pipe
process_pipe = (
lambda mzs, intensities:
(binning.bin(normalizer.normalize((mzs, intensities))))
)
# Parse the MSI file containing ROI
with ImzMLParser(input_path) as reader:
# Create a new MSI for ROI. because we apply binning
# we can use mode="continuous"
with ImzMLWriter(
os.path.join(output_path, "common_representation.imzML"),
mode="continuous"
) as writer:
# Loop over each spectra in MSI
for idx, (x, y, z) in enumerate(reader.coordinates):
# Check if spectra is in ROI boundaries
if ((x_min <= x <= x_max) & (y_min <= y <= y_max)):
# Read spectra from MSI
raw_mzs, raw_intensities = reader.getspectrum(idx)
# Apply processing pipe
preprocessed_mzs, preprocessed_intensities = process_pipe(
raw_mzs, raw_intensities
)
# Write spectra to new MSI with relative coordinate
writer.addSpectrum(
preprocessed_mzs, preprocessed_intensities,
(x - x_min + 1, y - y_min + 1, z)
)
def meaningful_signal(
input_path: str, output_path: str, representative_peaks: List[float],
mass_resolution: float
):
"""Function to create meaningful signal for msi spectras. Function
creates a new msi file in the given folder and a segmentation file.
Args:
input_path (str): Path to continuos imzML file that needs to be
processed.
output_path (str): Path to folder for saving output.
representative_peaks (List[float]): Representative peaks (mz values)
for getting a single channel image.
mass_resolution (float): Mass resolution of the msi.
"""
# Parse the msi file
with ImzMLParser(input_path) as reader:
# Get full msi
mzs, img = read_msi(reader)
# Segment image
segment_img = MeanSegmentation(mzs, representative_peaks,
mass_resolution).segment(img)
# Save segmentation
np.save(os.path.join(output_path, 'segmentation.npy'), segment_img)
# Apply image correction
zscore_img = ZScoreCorrection().correct(img, segment_img)
# Open writer
with ImzMLWriter(
os.path.join(output_path, "meaningful_signal.imzML"), mode="continuous"
) as writer:
# Save zscore image
for _, (x, y, z) in enumerate(reader.coordinates):
writer.addSpectrum(mzs, zscore_img[y - 1, x - 1], (x, y, z))
def process(
input_path: str, output_path: str,
x_min: int, x_max: int, y_min: int, y_max: int, mz_start: int,
mz_end: int, mass_resolution: float, representative_peaks: List[float]
) -> None:
"""Function to process msi.
Args:
input_path (str): Path to imzML file that needs to be processed.
output_path (str): Path to folder for saving output.
x_min (int): X minimum coordinate of the the tissue in the input.
x_max (int): X maximum coordinate of the the tissue in the input.
y_min (int): Y minimum coordinate of the the tissue in the input.
y_max (int): Y maximum coordinate of the the tissue in the input.
mz_start (int): The start value of the mz range.
mz_end (int): The end value of the mz range.
mass_resolution (float): The mass resolution.
representative_peaks (List[float]): Representative peaks (mz values)
for getting a single channel image.
"""
""
# Create common representation
common_representation(
input_path, output_path,
x_min, x_max, y_min, y_max, mz_start, mz_end, mass_resolution / 2
)
# Create meaningful signal
meaningful_signal(
os.path.join(output_path, "common_representation.imzML"), output_path,
representative_peaks, mass_resolution
)