diff --git a/analysis_dlpfc/step2_fit_mNSF/fit_s1_12_L10.py b/analysis_dlpfc/step2_fit_mNSF/fit_s1_12_L10.py new file mode 100644 index 0000000..fc8a4de --- /dev/null +++ b/analysis_dlpfc/step2_fit_mNSF/fit_s1_12_L10.py @@ -0,0 +1,154 @@ + + +######################################################################## +dir_mNSF_functions='/users/ywang/Hansen_projects/scRNA/mNSF_2023_10_20/' +#dir_mNSF_functions='/users/ywang/Hansen_projects/scRNA/mNSF_2023_10_20/mNSF-main' +dir_output="/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/" + +######################################################################## +######################################################################## +import sys +sys.path.append(dir_mNSF_functions) + +#from scanpy import read_h5ad + +import random +import mNSF + +from mNSF import process_multiSample + +from mNSF.NSF import preprocess +from mNSF.NSF import misc +#from mNSF.NSF import visualize +#from mNSF import training_multiSample +from mNSF import training_multiSample +from mNSF import process_multiSample +from mNSF.NSF import visualize + +#from tensorflow.data import Dataset + +from os import path +#import pandas +import os +import numpy as np +import tensorflow as tf +import pandas as pd +import sys +import pickle + +from scanpy import pp + +sys.path.append(dir_output) +os.chdir(dir_output) + + + +######################################################################## +######################################################################## +L=10 + + +nsample=12 + +mpth = path.join("models") +misc.mkdir_p(mpth) +pp = path.join(mpth,"pp",str(2))#list_fit[0].generate_pickle_path("constant",base=mpth) +misc.mkdir_p(pp) + + +########################################################################3 +################### step 0 Data loading +######################################################################## + +list_D=list() +list_X=list() + +for ksample in range(0,nsample): + Y=pd.read_csv(path.join('//dcs04/hansen/data/ywang/ST/DLPFC/processed_Data//Y_features_sele_sample'+str(ksample+1)+'_500genes.csv')) + X=pd.read_csv(path.join('//dcs04/hansen/data/ywang/ST/DLPFC/processed_Data///X_allSpots_sample'+str(ksample+1)+'.csv')) + D=process_multiSample.get_D(X,Y) + list_D.append(D) + list_X.append(D["X"]) + + +list_Dtrain=process_multiSample.get_listDtrain(list_D) +list_sampleID=process_multiSample.get_listSampleID(list_D) + + +# inducing points, 70% of total spots for each sample +for ksample in range(0,nsample): + random.seed(111) + ninduced=round(list_D[ksample]['X'].shape[0] * 0.35) + random.seed(222) + print(ninduced) + D=list_D[ksample] + rd_ = random.sample(range(0, D['X'].shape[0]), ninduced) + list_D[ksample]["Z"]=D['X'][rd_ ,:] + + + + + +########################################################################3 +################### step 1 initialize model +######################################################################## +#lik="nb" + +list_fit=process_multiSample.ini_multiSample(list_D,L,"nb") + + +######################################################################## +################### step 2 fit model +######################################################################## + + +list_fit=training_multiSample.train_model_mNSF(list_fit,pp,list_Dtrain,list_D) + + + +# save the fitted model +process_multiSample.save_object(list_fit, 'list_fit_nb_12samples_szMean_L10_fullData.pkl') + + + +######################################################################## +with open( 'list_fit_nb_12samples_szMean_L10_fullData.pkl', 'rb') as inp: + list_fit = pickle.load(inp) + + + +######################################################################## +################### step 3 save and plot results +######################################################################## +inpf12=process_multiSample.interpret_npf_v3(list_fit,list_X,S=100,lda_mode=False) + + +W = inpf12["loadings"] +#Wdf=pd.DataFrame(W*inpf12["totals1" + + + + + +Wdf=pd.DataFrame(W*inpf12["totalsW"][:,None], columns=range(1,L+1)) +Wdf.to_csv(path.join("loadings_spde_nb_szMean_12samples_L10_fullData.csv")) + + + +## save the factors +#inpf12 = process_multiSample.interpret_npf_v3(list_fit,list_X,S=100,lda_mode=False) +Factors = inpf12["factors"][:,0:L] + +for k in range(0,nsample): + indices=list_sampleID[k] + indices=indices.astype(int) + Factors_df = pd.DataFrame(Factors[indices,:]) + Factors_df.to_csv(path.join(dir_output,"factors_nb_szMean_sample_s"+str(k+1)+"_L10_fullData.csv")) + + +# + + + + + diff --git a/analysis_dlpfc/step3_plot_ST/plot_szMean_L10.r b/analysis_dlpfc/step3_plot_ST/plot_szMean_L10.r new file mode 100644 index 0000000..e5f0f39 --- /dev/null +++ b/analysis_dlpfc/step3_plot_ST/plot_szMean_L10.r @@ -0,0 +1,198 @@ + +### load pacakges +library(tidyverse) +library(ggplot2) +library(Matrix) +# library(Rmisc)# +library(ggforce)# +# library(rjson)# +library(cowplot) +library(RColorBrewer) +library(grid) +require(nnet) +# require(nnet) + +group.colors <- c(Layer1 = "#FC8D62", Layer2 = "#FFD92F", Layer3 ="#A6D854", Layer4 = "#66C2A5", Layer5 = "#00A9FF", + Layer6="#8DA0CB",WM="#E78AC3") + + +####### +library(RColorBrewer) +library(ggplot2) +# library(readbitmap)# +myPalette_ <- colorRampPalette(rev(brewer.pal(11, "Spectral"))) +# https://ggplot2-book.org/scale-colour.html +myPalette = scale_fill_brewer(palette = "Set2") + +####### +# myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral"))) +setwd("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_dist005_szMean/") + +# setwd("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll/") +dir_processedData = "/dcs04/hansen/data/ywang/ST/DLPFC/processed_Data_keepAll/" + +# list_sample_pair=list(c(1,2),c(2,3), + # c(3,4),c(5,6),c(6,7),c(7,8),c(9,10),c(10,11),c(11,12)) +list_sample_pair=list(c(1,2), + c(3,4),c(5,6),c(7,8),c(9,10),c(11,12)) + +####################################### +list_layer=list() +# list_rownames_kpSpots_=list() + + + ####################################### s1 +for(kpair in 1:length(list_sample_pair)){ + list_layer[[kpair]]=list() + list_rownames_kpSpots_[[kpair]]=list() + + sample_s1=list_sample_pair[[kpair]][1] + sample_s2=list_sample_pair[[kpair]][2] + # rownames_allSpots = readRDS(paste0(dir_processedData,"rownames_allSpots", + # "_S",sample_s1,"_",sample_s2,"_filterDist005_corrected.rds")) + + # rownames_kpSpots_s1= readRDS(paste0(dir_processedData,"rownames_kpSpots", + # "_S",sample_s1,"_",sample_s2,"_filterDist005_s1_corrected.rds")) + # rownames_kpSpots_s2= readRDS(paste0(dir_processedData,"rownames_kpSpots", + # "_S",sample_s1,"_",sample_s2,"_filterDist005_s2_corrected.rds")) + # + # rownames_kpSpots = c(rownames_kpSpots_s1, rownames_kpSpots_s2) + # length(rownames_kpSpots) + # # 7112 + + # load layers + # save(layer, + # file=paste0("layer_sample_Sample",j,".RData")) + load(paste0("//dcs04/hansen/data/ywang/archive/scRNA/Oct5_2021_Lukas_data_more_Genes/out/layer_sample_Sample",sample_s1,".RData")) + # save(layer_sample_tmp,file=paste0("layer_sample",j,".RData")) + layer_sample_tmp__s1=layer[] + + load(paste0("//dcs04/hansen/data/ywang/archive/scRNA/Oct5_2021_Lukas_data_more_Genes/out/layer_sample_Sample",sample_s2,".RData")) + # save(layer_sample_tmp,file=paste0("layer_sample",j,".RData")) + layer_sample_tmp__s2=layer[] + + layer_sample_tmp__=c(layer_sample_tmp__s1,layer_sample_tmp__s2) + # names(layer_sample_tmp__)=rownames_allSpots + + # layer_sample_tmp__s1=layer_sample_tmp__[rownames_kpSpots_s1] + # layer_sample_tmp__s2=layer_sample_tmp__[rownames_kpSpots_s2] + + # layer_sample_tmp__ = c(layer_sample_tmp__s1, layer_sample_tmp__s2) + # length(layer_sample_tmp__) + + list_layer[[kpair]][[1]]=layer_sample_tmp__s1 + list_layer[[kpair]][[2]]=layer_sample_tmp__s2 + + + + # list_rownames_kpSpots_[[kpair]][[1]]=rownames_kpSpots_s1 + # list_rownames_kpSpots_[[kpair]][[2]]=rownames_kpSpots_s2 + + } + +make_plot<-function(pos,factor_mat,range_perFactor_, + layer_sample_tmp___,samplename_){ + plots_l = list() + for (i in 1:ncol(factor_mat)) { + # dim(a) + df_tmp=data.frame( imagerow=pos[,1], + imagecol=pos[,2], + fill_tmp=factor_mat[,i], + layer=layer_sample_tmp___) + df_tmp=df_tmp[!is.na(df_tmp$layer),] + df_tmp$layer = factor(df_tmp$layer, levels = c('WM',"Layer6","Layer5","Layer4","Layer3", "Layer2","Layer1")) + + + plot_tmp = ggplot(df_tmp,aes(x=imagecol,y=imagerow,fill=fill_tmp)) + + # geom_spatial(data=images_tibble[i,], aes(grob=grob), x=0.5, y=0.5)+ + geom_point(shape = 21, colour = "black", size = 2, stroke = NA)+ + coord_cartesian(expand=FALSE)+ + # scale_fill_gradientn(colours = myPalette_(100))+ + scale_fill_gradientn(colours = myPalette_(100), limits=range_perFactor_[[i]])+ + + xlab("") + + ylab("") + + ggtitle(paste0(samplename_,", mNSF factor ",i))+ + labs(fill = paste0(" "))+ + theme_set(theme_bw(base_size = 10))+ + theme(panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.line = element_line(colour = "black"), + axis.text = element_blank(), + axis.ticks = element_blank()) + + # if(type_tmp=="l"){ + plots_l[[i]]=plot_tmp + # }else{ + + + } + plots_l[[i+1]]=ggplot(df_tmp,aes(x=imagecol,y=imagerow,fill=layer)) + + geom_point(shape = 21, colour = "black", size = 2, stroke = NA)+ + coord_cartesian(expand=FALSE)+ + xlab("") + + ylab("") + + ggtitle(paste0("layer "))+ + labs(fill = paste0(" "))+ + theme_set(theme_bw(base_size = 10))+ + theme(panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.line = element_line(colour = "black"), + axis.text = element_blank(), + axis.ticks = element_blank())+ + + scale_fill_manual(values=group.colors) + plots_l +} + +range_perFactor = list() +for(ksample in 1:12){ + factor_mat_tmp=read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/factors_nb_szMean_sample_s",ksample,"_L10_fullData.csv"),header=T) + factor_mat_tmp=factor_mat_tmp[,-1] + for(l in 1:ncol(factor_mat_tmp)){ + if(ksample==1){ range_perFactor[[l]]=range(factor_mat_tmp[,l])}else{ + range_perFactor[[l]]=range(c(range_perFactor[[l]], factor_mat_tmp[,l])) + } + + } +} + +pdf(paste0("LdaFalse_L10_szMean_s1To12_fullData.pdf"),height=3,width=18/6*8*3.2) +for(kpair in 1:6){ + for(k in 1:2){ + ksample = (kpair-1)*2+ k + a_=read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/factors_nb_szMean_sample_s",ksample,"_L10_fullData.csv"),header=T) + a_ = a_[,-1] + layers=list_layer[[kpair]][[k]] + X=read.csv(paste0('//dcs04/hansen/data/ywang/ST/DLPFC/processed_Data///X_allSpots_sample',ksample,'.csv')) + p1= make_plot(X, a_, range_perFactor,layers, paste0("sample ",ksample)) + print(plot_grid(plotlist = p1,nrow=1)) + } +} +dev.off() + + + + +pdf(paste0("LdaFalse_L10_szMean_s1To12_s1_5_7_9_fullData.pdf"),height=3,width=18/6*8*3.2) +for(kpair in c(1,3,4,5)){ + for(k in 1){ + ksample = (kpair-1)*2+ k + a_=read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/factors_nb_szMean_sample_s",ksample,"_L10_fullData.csv"),header=T) + a_ = a_[,-1] + layers=list_layer[[kpair]][[k]] + rownames_kpSpots=list_rownames_kpSpots_[[kpair]][[k]] + p1= make_plot(rownames_kpSpots, a_,range_perFactor, layers, paste0("sample ",ksample)) + print(plot_grid(plotlist = p1,nrow=1)) + } +} +dev.off() + + + + + + + diff --git a/analysis_dlpfc/step4_plot_violin/plot_con_violin_s1_5_7_12_L10.r b/analysis_dlpfc/step4_plot_violin/plot_con_violin_s1_5_7_12_L10.r new file mode 100644 index 0000000..491373c --- /dev/null +++ b/analysis_dlpfc/step4_plot_violin/plot_con_violin_s1_5_7_12_L10.r @@ -0,0 +1,149 @@ + +### load packages +library(tidyverse) +library(ggplot2) +library(Matrix) +library(ggforce)# +library(cowplot) +library(RColorBrewer) +library(grid) +require(nnet) + +####### + +myPalette_ <- colorRampPalette(rev(brewer.pal(11, "Spectral"))) +# https://ggplot2-book.org/scale-colour.html +myPalette = scale_fill_brewer(palette = "Set2") + +dodge <- position_dodge(width = 3) +group.colors <- c(Layer1 = "#FC8D62", Layer2 = "#FFD92F", Layer3 ="#A6D854", Layer4 = "#66C2A5", Layer5 = "#00A9FF", + Layer6="#8DA0CB",WM="#E78AC3") +####### +setwd("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_dist005_szMean/") +dir_processedData = "/dcs04/hansen/data/ywang/ST/DLPFC/processed_Data_keepAll/" +list_sample_pair=list(c(1,2),c(5,6),c(7,8),c(11,12)) + +####################################### +list_layer=list() + +####################################### s1 +for(kpair in 1:length(list_sample_pair)){ + list_layer[[kpair]]=list() + list_rownames_kpSpots_[[kpair]]=list() + + sample_s1=list_sample_pair[[kpair]][1] + sample_s2=list_sample_pair[[kpair]][2] + # rownames_allSpots = readRDS(paste0(dir_processedData,"rownames_allSpots", + # "_S",sample_s1,"_",sample_s2,"_filterDist005_corrected.rds")) + + # rownames_kpSpots_s1= readRDS(paste0(dir_processedData,"rownames_kpSpots", + # "_S",sample_s1,"_",sample_s2,"_filterDist005_s1_corrected.rds")) + # rownames_kpSpots_s2= readRDS(paste0(dir_processedData,"rownames_kpSpots", + # "_S",sample_s1,"_",sample_s2,"_filterDist005_s2_corrected.rds")) + # + # rownames_kpSpots = c(rownames_kpSpots_s1, rownames_kpSpots_s2) + # length(rownames_kpSpots) + # # 7112 + + # load layers + # save(layer, + # file=paste0("layer_sample_Sample",j,".RData")) + load(paste0("//dcs04/hansen/data/ywang/archive/scRNA/Oct5_2021_Lukas_data_more_Genes/out/layer_sample_Sample",sample_s1,".RData")) + # save(layer_sample_tmp,file=paste0("layer_sample",j,".RData")) + layer_sample_tmp__s1=layer[] + + load(paste0("//dcs04/hansen/data/ywang/archive/scRNA/Oct5_2021_Lukas_data_more_Genes/out/layer_sample_Sample",sample_s2,".RData")) + # save(layer_sample_tmp,file=paste0("layer_sample",j,".RData")) + layer_sample_tmp__s2=layer[] + + layer_sample_tmp__=c(layer_sample_tmp__s1,layer_sample_tmp__s2) + # names(layer_sample_tmp__)=rownames_allSpots + + # layer_sample_tmp__s1=layer_sample_tmp__[rownames_kpSpots_s1] + # layer_sample_tmp__s2=layer_sample_tmp__[rownames_kpSpots_s2] + + # layer_sample_tmp__ = c(layer_sample_tmp__s1, layer_sample_tmp__s2) + # length(layer_sample_tmp__) + + list_layer[[kpair]][[1]]=layer_sample_tmp__s1 + list_layer[[kpair]][[2]]=layer_sample_tmp__s2 + + + + # list_rownames_kpSpots_[[kpair]][[1]]=rownames_kpSpots_s1 + # list_rownames_kpSpots_[[kpair]][[2]]=rownames_kpSpots_s2 + +} + + # load mNSF factors + a1_=read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/factors_nb_szMean_sample_s",1,"_L10_fullData.csv"),header=T) + a2_=read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/factors_nb_szMean_sample_s",5,"_L10_fullData.csv"),header=T) + a3_=read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/factors_nb_szMean_sample_s",7,"_L10_fullData.csv"),header=T) + a4_=read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/factors_nb_szMean_sample_s",12,"_L10_fullData.csv"),header=T) + + + + ############ plot + make_plot_vio<-function(factor_mat,layer_sample_tmp___){ + plots_l = list() + + + for (i in 1:10) { + + df_tmp=data.frame(#imagecol=X[,2], + #imagerow=X[,3], + layer=layer_sample_tmp___, + value=factor_mat[,i] + ) + + myPalette <- colorRampPalette(rev(brewer.pal(11, "Spectral"))) + + plot_tmp = ggplot(df_tmp[!is.na(df_tmp$layer),],aes(x=layer,y=value)) +#,fill=layer,,group=layer + geom_violin(position = dodge)+ + geom_boxplot(width=.3, outlier.colour=NA, position = dodge)+#+ + # geom_spatial(data=images_tibble[i,], aes(grob=grob), x=0.5, y=0.5)+ + # geom_jitter(shape = 21,size = 0.1, stroke = 0.5,aes(color = value),alpha=0.6 )+#colour = "black", + # geom_point(shape = 21, colour = "black", size = .7, stroke = 0.5)+ + geom_jitter(shape = 21, colour = "black", size = .3, stroke = 0.5,alpha=0.7)+ + + xlab("") + + ylab("") + + coord_cartesian(expand=FALSE)+ + + ggtitle(paste0("factor ",i))+ + # scale_fill_manual(values=group.colors)+#+ geom_jitter() + # scale_fill_gradientn(colours = myPalette(100)) + scale_fill_gradientn(colours = myPalette(100)) + + plots_l[[i]]=plot_tmp+coord_flip() + + + } + + plots_l + } + + + p1= make_plot_vio( a1_[-1], list_layer[[1]][[1]]) + p2= make_plot_vio( a2_[-1], list_layer[[2]][[1]]) + p3= make_plot_vio( a3_[-1], list_layer[[3]][[1]]) + p4= make_plot_vio( a4_[-1], list_layer[[4]][[2]]) + + setwd("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_dist005_szMean/") + + pdf(paste0( "sample_1_LdaFalse_L10_szMean_violin_fullData.pdf"),height=3,width=10/3*10) + print(plot_grid(plotlist = p1,nrow=1)) + dev.off() + + pdf(paste0( "sample_5_LdaFalse_L10_szMean_violin_fullData.pdf"),height=3,width=10/3*10) + print(plot_grid(plotlist = p2,nrow=1)) + dev.off() + + pdf(paste0( "sample_7_LdaFalse_L10_szMean_violin_fullData.pdf"),height=3,width=10/3*10) + print(plot_grid(plotlist = p3,nrow=1)) + dev.off() + + pdf(paste0( "sample_12_LdaFalse_L10_szMean_violin_fullData.pdf"),height=3,width=10/3*10) + print(plot_grid(plotlist = p4,nrow=1)) + dev.off() + diff --git a/analysis_dlpfc/step5_check_associated_genes/factor2.r b/analysis_dlpfc/step5_check_associated_genes/factor2.r new file mode 100644 index 0000000..66bdc6c --- /dev/null +++ b/analysis_dlpfc/step5_check_associated_genes/factor2.r @@ -0,0 +1,256 @@ + +### load pacakges +library(tidyverse) +library(ggplot2) +library(ggforce)# +library(cowplot) +library(RColorBrewer) +library(grid) +require(nnet) +library(matrixStats) + +########################################################## +group.colors <- c(Layer1 = "#FC8D62", Layer2 = "#FFD92F", Layer3 ="#A6D854", Layer4 = "#66C2A5", Layer5 = "#00A9FF", + Layer6="#8DA0CB",WM="#E78AC3") +myPalette_ <- colorRampPalette(rev(brewer.pal(11, "Spectral"))) +myPalette = scale_fill_brewer(palette = "Set2") + +########################################################## +setwd("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_dist005_szMean/") + +########################################################## +kfactor = 2 +list_sample = c(1,5,7,12) + +########################################################## get genes associated with factor kfactor +## load gene expression matrix and normalize it for sample 1,5,7,12 +dir_processedData = "/dcs04/hansen/data/ywang/ST/DLPFC/processed_Data_keepAll/" +list_count_log2_norm=list() + +for(ksample in list_sample){ + if(ksample%%2 == 1 ){ + ksamples_pair = c(ksample,ksample+1) + s_ = 1 + }else{ + ksamples_pair = c(ksample-1, ksample) + s_ = 2 + + } + # load count data for 500 HVGs + count_sample_tmp=read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/processed_Data_keepAll/Y_kp_s",ksamples_pair[1],"_s",ksamples_pair[2],"_filterDist005_s",s_,"_corrected.csv")) + # transpose the count matrix so that each row is one gene, then normalize by library size + count_sample_tmp_log2_norm = log2(1+t((count_sample_tmp)/colSums(count_sample_tmp))*10^6) + colnames(count_sample_tmp_log2_norm) = rownames(count_sample_tmp) + rownames(count_sample_tmp_log2_norm) = colnames(count_sample_tmp) + list_count_log2_norm[[as.character(ksample)]] = count_sample_tmp_log2_norm +} + +genes = rownames(list_count_log2_norm[[1]]) + +## load factor matrix for sample 1,5,7,12 + +list_factor_mat=list() + +for(ksample in list_sample){ + if(ksample%%2 == 1 ){ + ksamples_pair = c(ksample,ksample+1) + s_ = 1 + }else{ + ksamples_pair = c(ksample-1, ksample) + s_ = 2 + + } + factor_mat_sampleTmp = read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/factors_nb_szMean_sample_s",ksample,"_L10.csv"),header=T) + factor_mat_sampleTmp = factor_mat_sampleTmp[,-1] + list_factor_mat[[as.character(ksample)]] = factor_mat_sampleTmp +} + + +####################################### +####################################### +## get genes associated with factor 5 +list_sample_pair=list(c(1,2), + c(3,4),c(5,6),c(7,8),c(9,10),c(11,12)) + +list_cor = list() +for(ksample in list_sample){ + list_cor[[as.character(ksample)]]=cor(list_factor_mat[[as.character(ksample)]][,kfactor], + t(list_count_log2_norm[[as.character(ksample)]])) + + +} + +mat_cor = array(unlist(list_cor),dim=c(500,length(list_cor))) +cor_min=rowMins(mat_cor) + +topGenes= genes[order(cor_min,decreasing = T)][1:10] +list_mat_exp_norm_topGenes = list() +for(ksample in list_sample){ + rownames(list_count_log2_norm[[as.character(ksample)]])=genes + list_mat_exp_norm_topGenes[[as.character(ksample)]]=list_count_log2_norm[[as.character(ksample)]][topGenes,] +} + + +####################################### +####################################### +## prepare layer info for each sample for plotting +list_layer=list() +list_rownames_kpSpots_=list() + + +for(kpair in 1:length(list_sample_pair)){ + list_layer[[kpair]]=list() + list_rownames_kpSpots_[[kpair]]=list() + + sample_s1=list_sample_pair[[kpair]][1] + sample_s2=list_sample_pair[[kpair]][2] + rownames_allSpots = readRDS(paste0(dir_processedData,"rownames_allSpots", + "_S",sample_s1,"_",sample_s2,"_filterDist005_corrected.rds")) + + rownames_kpSpots_s1= readRDS(paste0(dir_processedData,"rownames_kpSpots", + "_S",sample_s1,"_",sample_s2,"_filterDist005_s1_corrected.rds")) + rownames_kpSpots_s2= readRDS(paste0(dir_processedData,"rownames_kpSpots", + "_S",sample_s1,"_",sample_s2,"_filterDist005_s2_corrected.rds")) + + rownames_kpSpots = c(rownames_kpSpots_s1, rownames_kpSpots_s2) + length(rownames_kpSpots) + # 7112 + + # load layers + # save(layer, + # file=paste0("layer_sample_Sample",j,".RData")) + load(paste0("//dcs04/hansen/data/ywang/archive/scRNA/Oct5_2021_Lukas_data_more_Genes/out/layer_sample_Sample",sample_s1,".RData")) + # save(layer_sample_tmp,file=paste0("layer_sample",j,".RData")) + layer_sample_tmp__s1=layer[] + + load(paste0("//dcs04/hansen/data/ywang/archive/scRNA/Oct5_2021_Lukas_data_more_Genes/out/layer_sample_Sample",sample_s2,".RData")) + # save(layer_sample_tmp,file=paste0("layer_sample",j,".RData")) + layer_sample_tmp__s2=layer[] + + layer_sample_tmp__=c(layer_sample_tmp__s1,layer_sample_tmp__s2) + names(layer_sample_tmp__)=rownames_allSpots + + layer_sample_tmp__s1=layer_sample_tmp__[rownames_kpSpots_s1] + layer_sample_tmp__s2=layer_sample_tmp__[rownames_kpSpots_s2] + + layer_sample_tmp__ = c(layer_sample_tmp__s1, layer_sample_tmp__s2) + # length(layer_sample_tmp__) + + list_layer[[kpair]][[1]]=layer_sample_tmp__s1 + list_layer[[kpair]][[2]]=layer_sample_tmp__s2 + + + + list_rownames_kpSpots_[[kpair]][[1]]=rownames_kpSpots_s1 + list_rownames_kpSpots_[[kpair]][[2]]=rownames_kpSpots_s2 + + } + +####################################### +## function for plotting +####################################### +make_plot<-function(rownames_kpSpots,exp_mat,range_perGene_, + layer_sample_tmp___,samplename_){ + plots_l = list() + for (i in 1:ncol(exp_mat)) { + # dim(a) + df_tmp=data.frame( imagerow=as.numeric(sub("[_].*","",rownames_kpSpots)), + imagecol=as.numeric(sub(".*[_]","",rownames_kpSpots)), + fill_tmp=exp_mat[,i], + layer=layer_sample_tmp___) + df_tmp=df_tmp[!is.na(df_tmp$layer),] + df_tmp$layer = factor(df_tmp$layer, levels = c('WM',"Layer6","Layer5","Layer4","Layer3", "Layer2","Layer1")) + + + plot_tmp = ggplot(df_tmp,aes(x=imagecol,y=imagerow,fill=fill_tmp)) + + # geom_spatial(data=images_tibble[i,], aes(grob=grob), x=0.5, y=0.5)+ + geom_point(shape = 21, colour = "black", size = 2, stroke = NA)+ + coord_cartesian(expand=FALSE)+ + # scale_fill_gradientn(colours = myPalette_(100))+ + scale_fill_gradientn(colours = myPalette_(100), limits=range_perGene_[[i]])+ + + xlab("") + + ylab("") + + ggtitle(paste0(samplename_,", ",colnames(exp_mat)[i]))+ + labs(fill = paste0(" "))+ + theme_set(theme_bw(base_size = 10))+ + theme(panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.line = element_line(colour = "black"), + axis.text = element_blank(), + axis.ticks = element_blank()) + + # if(type_tmp=="l"){ + plots_l[[i]]=plot_tmp + # }else{ + + + } + plots_l[[i+1]]=ggplot(df_tmp,aes(x=imagecol,y=imagerow,fill=layer)) + + geom_point(shape = 21, colour = "black", size = 2, stroke = NA)+ + coord_cartesian(expand=FALSE)+ + xlab("") + + ylab("") + + ggtitle(paste0("layer "))+ + labs(fill = paste0(" "))+ + theme_set(theme_bw(base_size = 10))+ + theme(panel.grid.major = element_blank(), + panel.grid.minor = element_blank(), + panel.background = element_blank(), + axis.line = element_line(colour = "black"), + axis.text = element_blank(), + axis.ticks = element_blank())+ + + scale_fill_manual(values=group.colors) + plots_l +} + +####################################### +## get the range for each gene, used for plotting +range_perGene = list() +for(ksample in list_sample){ + # exp_mat_tmp=read.csv(paste0("/dcs04/hansen/data/ywang/ST/DLPFC/PASTE_out_keepAll_scTransform/factors_nb_szMean_sample_s",ksample,"_L10.csv"),header=T) + exp_mat_tmp=list_mat_exp_norm_topGenes[[as.character(ksample)]] + exp_mat_tmp_t = t(exp_mat_tmp) + colnames(exp_mat_tmp_t) = rownames(exp_mat_tmp) + for(l in 1:ncol(exp_mat_tmp_t)){ + if(ksample==1){ range_perGene[[l]]=range(exp_mat_tmp_t[,l])}else{ + range_perGene[[l]]=range(c(range_perGene[[l]], exp_mat_tmp_t[,l])) + } + + } +} + +## make plots +pdf(paste0("genes_factor",kfactor,"_L10_mNSF.pdf"),height=3,width=18/6*8*3.2) + for(ksample in list_sample){ + exp_mat_tmp=list_mat_exp_norm_topGenes[[as.character(ksample)]] + exp_mat_tmp_t = (t(exp_mat_tmp)) + colnames(exp_mat_tmp_t) = rownames(exp_mat_tmp) + a_ = exp_mat_tmp_t + if(ksample%%2 == 1 ){ + ksamples_pair = c(ksample,ksample+1) + s_ = 1 + }else{ + ksamples_pair = c(ksample-1, ksample) + s_ = 2 + + } + kpair = round((ksample+1)/2) + layers=list_layer[[kpair]][[s_]] + rownames_kpSpots=list_rownames_kpSpots_[[kpair]][[s_]] + p1= make_plot(rownames_kpSpots, a_, range_perGene, layers, paste0("sample ",ksample)) + print(plot_grid(plotlist = p1,nrow=1)) + # } + } +dev.off() + + + + + + + + +