Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
yiwang12 authored Nov 2, 2023
1 parent 82a270b commit e34b801
Show file tree
Hide file tree
Showing 4 changed files with 757 additions and 0 deletions.
154 changes: 154 additions & 0 deletions analysis_dlpfc/step2_fit_mNSF/fit_s1_12_L10.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@


########################################################################
dir_mNSF_functions='/users/ywang/Hansen_projects/scRNA/mNSF_2023_10_20/'
#dir_mNSF_functions='/users/ywang/Hansen_projects/scRNA/mNSF_2023_10_20/mNSF-main'
dir_output="/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/"

########################################################################
########################################################################
import sys
sys.path.append(dir_mNSF_functions)

#from scanpy import read_h5ad

import random
import mNSF

from mNSF import process_multiSample

from mNSF.NSF import preprocess
from mNSF.NSF import misc
#from mNSF.NSF import visualize
#from mNSF import training_multiSample
from mNSF import training_multiSample
from mNSF import process_multiSample
from mNSF.NSF import visualize

#from tensorflow.data import Dataset

from os import path
#import pandas
import os
import numpy as np
import tensorflow as tf
import pandas as pd
import sys
import pickle

from scanpy import pp

sys.path.append(dir_output)
os.chdir(dir_output)



########################################################################
########################################################################
L=10


nsample=12

mpth = path.join("models")
misc.mkdir_p(mpth)
pp = path.join(mpth,"pp",str(2))#list_fit[0].generate_pickle_path("constant",base=mpth)
misc.mkdir_p(pp)


########################################################################3
################### step 0 Data loading
########################################################################

list_D=list()
list_X=list()

for ksample in range(0,nsample):
Y=pd.read_csv(path.join('//dcs04/hansen/data/ywang/ST/DLPFC/processed_Data//Y_features_sele_sample'+str(ksample+1)+'_500genes.csv'))
X=pd.read_csv(path.join('//dcs04/hansen/data/ywang/ST/DLPFC/processed_Data///X_allSpots_sample'+str(ksample+1)+'.csv'))
D=process_multiSample.get_D(X,Y)
list_D.append(D)
list_X.append(D["X"])


list_Dtrain=process_multiSample.get_listDtrain(list_D)
list_sampleID=process_multiSample.get_listSampleID(list_D)


# inducing points, 70% of total spots for each sample
for ksample in range(0,nsample):
random.seed(111)
ninduced=round(list_D[ksample]['X'].shape[0] * 0.35)
random.seed(222)
print(ninduced)
D=list_D[ksample]
rd_ = random.sample(range(0, D['X'].shape[0]), ninduced)
list_D[ksample]["Z"]=D['X'][rd_ ,:]





########################################################################3
################### step 1 initialize model
########################################################################
#lik="nb"

list_fit=process_multiSample.ini_multiSample(list_D,L,"nb")


########################################################################
################### step 2 fit model
########################################################################


list_fit=training_multiSample.train_model_mNSF(list_fit,pp,list_Dtrain,list_D)



# save the fitted model
process_multiSample.save_object(list_fit, 'list_fit_nb_12samples_szMean_L10_fullData.pkl')



########################################################################
with open( 'list_fit_nb_12samples_szMean_L10_fullData.pkl', 'rb') as inp:
list_fit = pickle.load(inp)



########################################################################
################### step 3 save and plot results
########################################################################
inpf12=process_multiSample.interpret_npf_v3(list_fit,list_X,S=100,lda_mode=False)


W = inpf12["loadings"]
#Wdf=pd.DataFrame(W*inpf12["totals1"





Wdf=pd.DataFrame(W*inpf12["totalsW"][:,None], columns=range(1,L+1))
Wdf.to_csv(path.join("loadings_spde_nb_szMean_12samples_L10_fullData.csv"))



## save the factors
#inpf12 = process_multiSample.interpret_npf_v3(list_fit,list_X,S=100,lda_mode=False)
Factors = inpf12["factors"][:,0:L]

for k in range(0,nsample):
indices=list_sampleID[k]
indices=indices.astype(int)
Factors_df = pd.DataFrame(Factors[indices,:])
Factors_df.to_csv(path.join(dir_output,"factors_nb_szMean_sample_s"+str(k+1)+"_L10_fullData.csv"))


#





198 changes: 198 additions & 0 deletions analysis_dlpfc/step3_plot_ST/plot_szMean_L10.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@

### load pacakges
library(tidyverse)
library(ggplot2)
library(Matrix)
# library(Rmisc)#
library(ggforce)#
# library(rjson)#
library(cowplot)
library(RColorBrewer)
library(grid)
require(nnet)
# require(nnet)

group.colors <- c(Layer1 = "#FC8D62", Layer2 = "#FFD92F", Layer3 ="#A6D854", Layer4 = "#66C2A5", Layer5 = "#00A9FF",
Layer6="#8DA0CB",WM="#E78AC3")


#######
library(RColorBrewer)
library(ggplot2)
# library(readbitmap)#
myPalette_ <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
# https://ggplot2-book.org/scale-colour.html
myPalette = scale_fill_brewer(palette = "Set2")

#######
# myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral")))
setwd("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_dist005_szMean/")

# setwd("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll/")
dir_processedData = "/dcs04/hansen/data/ywang/ST/DLPFC/processed_Data_keepAll/"

# list_sample_pair=list(c(1,2),c(2,3),
# c(3,4),c(5,6),c(6,7),c(7,8),c(9,10),c(10,11),c(11,12))
list_sample_pair=list(c(1,2),
c(3,4),c(5,6),c(7,8),c(9,10),c(11,12))

#######################################
list_layer=list()
# list_rownames_kpSpots_=list()


####################################### s1
for(kpair in 1:length(list_sample_pair)){
list_layer[[kpair]]=list()
list_rownames_kpSpots_[[kpair]]=list()

sample_s1=list_sample_pair[[kpair]][1]
sample_s2=list_sample_pair[[kpair]][2]
# rownames_allSpots = readRDS(paste0(dir_processedData,"rownames_allSpots",
# "_S",sample_s1,"_",sample_s2,"_filterDist005_corrected.rds"))

# rownames_kpSpots_s1= readRDS(paste0(dir_processedData,"rownames_kpSpots",
# "_S",sample_s1,"_",sample_s2,"_filterDist005_s1_corrected.rds"))
# rownames_kpSpots_s2= readRDS(paste0(dir_processedData,"rownames_kpSpots",
# "_S",sample_s1,"_",sample_s2,"_filterDist005_s2_corrected.rds"))
#
# rownames_kpSpots = c(rownames_kpSpots_s1, rownames_kpSpots_s2)
# length(rownames_kpSpots)
# # 7112

# load layers
# save(layer,
# file=paste0("layer_sample_Sample",j,".RData"))
load(paste0("//dcs04/hansen/data/ywang/archive/scRNA/Oct5_2021_Lukas_data_more_Genes/out/layer_sample_Sample",sample_s1,".RData"))
# save(layer_sample_tmp,file=paste0("layer_sample",j,".RData"))
layer_sample_tmp__s1=layer[]

load(paste0("//dcs04/hansen/data/ywang/archive/scRNA/Oct5_2021_Lukas_data_more_Genes/out/layer_sample_Sample",sample_s2,".RData"))
# save(layer_sample_tmp,file=paste0("layer_sample",j,".RData"))
layer_sample_tmp__s2=layer[]

layer_sample_tmp__=c(layer_sample_tmp__s1,layer_sample_tmp__s2)
# names(layer_sample_tmp__)=rownames_allSpots

# layer_sample_tmp__s1=layer_sample_tmp__[rownames_kpSpots_s1]
# layer_sample_tmp__s2=layer_sample_tmp__[rownames_kpSpots_s2]

# layer_sample_tmp__ = c(layer_sample_tmp__s1, layer_sample_tmp__s2)
# length(layer_sample_tmp__)

list_layer[[kpair]][[1]]=layer_sample_tmp__s1
list_layer[[kpair]][[2]]=layer_sample_tmp__s2



# list_rownames_kpSpots_[[kpair]][[1]]=rownames_kpSpots_s1
# list_rownames_kpSpots_[[kpair]][[2]]=rownames_kpSpots_s2

}

make_plot<-function(pos,factor_mat,range_perFactor_,
layer_sample_tmp___,samplename_){
plots_l = list()
for (i in 1:ncol(factor_mat)) {
# dim(a)
df_tmp=data.frame( imagerow=pos[,1],
imagecol=pos[,2],
fill_tmp=factor_mat[,i],
layer=layer_sample_tmp___)
df_tmp=df_tmp[!is.na(df_tmp$layer),]
df_tmp$layer = factor(df_tmp$layer, levels = c('WM',"Layer6","Layer5","Layer4","Layer3", "Layer2","Layer1"))


plot_tmp = ggplot(df_tmp,aes(x=imagecol,y=imagerow,fill=fill_tmp)) +
# geom_spatial(data=images_tibble[i,], aes(grob=grob), x=0.5, y=0.5)+
geom_point(shape = 21, colour = "black", size = 2, stroke = NA)+
coord_cartesian(expand=FALSE)+
# scale_fill_gradientn(colours = myPalette_(100))+
scale_fill_gradientn(colours = myPalette_(100), limits=range_perFactor_[[i]])+

xlab("") +
ylab("") +
ggtitle(paste0(samplename_,", mNSF factor ",i))+
labs(fill = paste0(" "))+
theme_set(theme_bw(base_size = 10))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_blank(),
axis.ticks = element_blank())

# if(type_tmp=="l"){
plots_l[[i]]=plot_tmp
# }else{


}
plots_l[[i+1]]=ggplot(df_tmp,aes(x=imagecol,y=imagerow,fill=layer)) +
geom_point(shape = 21, colour = "black", size = 2, stroke = NA)+
coord_cartesian(expand=FALSE)+
xlab("") +
ylab("") +
ggtitle(paste0("layer "))+
labs(fill = paste0(" "))+
theme_set(theme_bw(base_size = 10))+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
axis.line = element_line(colour = "black"),
axis.text = element_blank(),
axis.ticks = element_blank())+

scale_fill_manual(values=group.colors)
plots_l
}

range_perFactor = list()
for(ksample in 1:12){
factor_mat_tmp=read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/factors_nb_szMean_sample_s",ksample,"_L10_fullData.csv"),header=T)
factor_mat_tmp=factor_mat_tmp[,-1]
for(l in 1:ncol(factor_mat_tmp)){
if(ksample==1){ range_perFactor[[l]]=range(factor_mat_tmp[,l])}else{
range_perFactor[[l]]=range(c(range_perFactor[[l]], factor_mat_tmp[,l]))
}

}
}

pdf(paste0("LdaFalse_L10_szMean_s1To12_fullData.pdf"),height=3,width=18/6*8*3.2)
for(kpair in 1:6){
for(k in 1:2){
ksample = (kpair-1)*2+ k
a_=read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/factors_nb_szMean_sample_s",ksample,"_L10_fullData.csv"),header=T)
a_ = a_[,-1]
layers=list_layer[[kpair]][[k]]
X=read.csv(paste0('//dcs04/hansen/data/ywang/ST/DLPFC/processed_Data///X_allSpots_sample',ksample,'.csv'))
p1= make_plot(X, a_, range_perFactor,layers, paste0("sample ",ksample))
print(plot_grid(plotlist = p1,nrow=1))
}
}
dev.off()




pdf(paste0("LdaFalse_L10_szMean_s1To12_s1_5_7_9_fullData.pdf"),height=3,width=18/6*8*3.2)
for(kpair in c(1,3,4,5)){
for(k in 1){
ksample = (kpair-1)*2+ k
a_=read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/factors_nb_szMean_sample_s",ksample,"_L10_fullData.csv"),header=T)
a_ = a_[,-1]
layers=list_layer[[kpair]][[k]]
rownames_kpSpots=list_rownames_kpSpots_[[kpair]][[k]]
p1= make_plot(rownames_kpSpots, a_,range_perFactor, layers, paste0("sample ",ksample))
print(plot_grid(plotlist = p1,nrow=1))
}
}
dev.off()







Loading

0 comments on commit e34b801

Please sign in to comment.