-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_MUSE.py
109 lines (80 loc) · 3.79 KB
/
model_MUSE.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from aestetik.utils.utils_morphology import extract_morphology_embeddings
from torchvision.models import inception_v3, Inception_V3_Weights
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from torchvision import transforms
import tensorflow.compat.v1 as tf
import muse_sc as muse
from tqdm import tqdm
import scanpy as sc
import numpy as np
import torch
import scipy
import json
import yaml
import sys
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
SEED = 2023
np.random.seed(SEED)
sample = str(sys.argv[1])
param_name = str(sys.argv[2])
img_format = str(sys.argv[3])
cross_validation_combination = str(sys.argv[4])
model = str(sys.argv[5])
mode = str(sys.argv[6])
out_folder = str(sys.argv[7])
adata_in = f"{out_folder}/data/h5ad/{sample}.h5ad"
img_path = f"{out_folder}/data/image/{sample}.{img_format}"
json_path = f"{out_folder}/data/meta/{sample}.json"
img_features_path = f"{out_folder}/data/image_features/{sample}_inception.npy"
parameter_path = f"{out_folder}/{cross_validation_combination}/{model}_fine_tune/parameters/{param_name}.yaml"
cluster_out_path = f"{out_folder}/{cross_validation_combination}/{model}_{mode}/clusters/model-{sample}-{param_name}.csv"
# my parameters
with open(parameter_path, "r") as stream:
parameters = yaml.safe_load(stream)
n_components = int(parameters["n_components"])
del parameters["n_components"]
spot_diameter_fullres = round(json.load(open(json_path))["spot_diameter_fullres"])
spot_diameter_fullres
adata = sc.read(adata_in)
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=500)
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
data_a = adata.X[ :, adata.var.highly_variable.values]
# check sparse
if type(data_a) == scipy.sparse._csr.csr_matrix:
data_a = data_a.toarray()
weights = Inception_V3_Weights.DEFAULT
image_model_children = inception_v3(weights=weights)
image_model_children.fc = torch.nn.Identity()
image_model_children.eval()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
feature_dim = 2048
preprocess = transforms.Compose([
transforms.ToTensor(),
weights.transforms(antialias=True),
])
data_b = extract_morphology_embeddings(img_path,
image_model_children,
x_pixel=adata.obs.y_pixel,
y_pixel=adata.obs.x_pixel,
spot_diameter=spot_diameter_fullres,
device=device,
feature_dim=feature_dim,
preprocess=preprocess,
apply_pca=False)
view_a_feature = PCA(n_components=n_components).fit_transform(data_a)
view_b_feature = PCA(n_components=n_components).fit_transform(data_b)
view_a_label = KMeans(n_clusters=adata.obs.ground_truth.unique().size, random_state=SEED, n_init="auto").fit(view_a_feature).predict(view_a_feature)
view_b_label = KMeans(n_clusters=adata.obs.ground_truth.unique().size, random_state=SEED, n_init="auto").fit(view_b_feature).predict(view_b_feature)
muse_feature, reconstruct_x, reconstruct_y, \
latent_x, latent_y = muse.muse_fit_predict(data_a,
data_b,
view_a_label,
view_b_label,
**parameters)
muse_cluster = KMeans(n_clusters=adata.obs.ground_truth.unique().size, random_state=SEED, n_init="auto").fit(muse_feature).predict(muse_feature)
adata.obs["muse_cluster"] = muse_cluster.astype(str)
cluster_out = adata.obs[["muse_cluster"]].reset_index().rename({'index': "spot_barcode"}, axis=1)
cluster_out.to_csv(cluster_out_path, index=False)