From 2a10bc4c9557120a9bf426c2251eb0896ee6455f Mon Sep 17 00:00:00 2001 From: James Cai Date: Fri, 27 Dec 2024 14:59:05 -0600 Subject: [PATCH] . --- +gui/callback_SelectCellsByQC.m | 27 ++++ +gui/callback_scTenifoldCko.m | 66 ++++---- +pkg/i_addwd2script.m | 7 +- .../{script.py => script_gene.py} | 0 +run/external/py_scTenifoldCko/script_path.py | 78 +++++++++ ...foldCko_type.m => py_scTenifoldCko_gene.m} | 4 +- +run/py_scTenifoldCko_path.m | 153 +++++------------- scgeatool.m | 26 +-- 8 files changed, 193 insertions(+), 168 deletions(-) rename +run/external/py_scTenifoldCko/{script.py => script_gene.py} (100%) create mode 100644 +run/external/py_scTenifoldCko/script_path.py rename +run/{py_scTenifoldCko_type.m => py_scTenifoldCko_gene.m} (98%) diff --git a/+gui/callback_SelectCellsByQC.m b/+gui/callback_SelectCellsByQC.m index f6bdce3f..7e0a64c9 100644 --- a/+gui/callback_SelectCellsByQC.m +++ b/+gui/callback_SelectCellsByQC.m @@ -10,6 +10,9 @@ sce = guidata(FigureHandle); % 'SC_QCFILTER (QC Preserves Lowly-expressed Cells/Genes)',... +oldcn = sce.NumCells; +oldgn = sce.NumGenes; + listitems = {'SC_QCFILTER (Basic QC for Cells/Genes)', ... 'SC_QCFILTER (Enabling Whitelist Genes)', ... '------------------------------------------------', ... @@ -445,6 +448,30 @@ return; end end + + newcn = sce.NumCells; + newgn = sce.NumGenes; + if newgn==0 + helpdlg("All genes are removed. Opertaion is cancelled.",''); + requirerefresh = false; + return; + end + if newcn==0 + helpdlg("All cells are removed. Opertaion is cancelled.",''); + requirerefresh = false; + return; + end + if oldcn-newcn==0 && oldgn-newgn==0 + helpdlg("No cells and genes are removed.",''); + requirerefresh = false; + return; + end + answer = questdlg(sprintf('%d cells will be removed; %d genes will be removed.', ... + oldcn-newcn, oldgn-newgn),'','Accept Changes', 'Cancel Changes', 'Accept Changes'); + if ~strcmp(answer, 'Accept Changes') + requirerefresh = false; + return; + end guidata(FigureHandle, sce); end diff --git a/+gui/callback_scTenifoldCko.m b/+gui/callback_scTenifoldCko.m index 7d3c4d9c..8b8385df 100644 --- a/+gui/callback_scTenifoldCko.m +++ b/+gui/callback_scTenifoldCko.m @@ -116,12 +116,6 @@ function callback_scTenifoldCko(src, ~) gsorted = natsort(sce.g); if isempty(gsorted), return; end - - - - - - Cko_approach = questdlg('Select CKO approach:','','Block Ligand-Receptor','Complete Gene Knockout','Block Ligand-Receptor'); if ~ismember(Cko_approach, {'Block Ligand-Receptor','Complete Gene Knockout'}), return; end @@ -142,27 +136,24 @@ function callback_scTenifoldCko(src, ~) targetpath = ... string([sprintf('%s (%s) -> %s (%s)', celltype1, targetg(1), celltype2, targetg(2));... sprintf('%s (%s) -> %s (%s)', celltype1, targetg(2), celltype2, targetg(1));... - sprintf('%s (%s) <- %s (%s)', celltype1, targetg(1), celltype2, targetg(2));... - sprintf('%s (%s) <- %s (%s)', celltype1, targetg(2), celltype2, targetg(1))]); + sprintf('%s (%s) <- %s (%s)', celltype1, targetg(2), celltype2, targetg(1));... + sprintf('%s (%s) <- %s (%s)', celltype1, targetg(1), celltype2, targetg(2))]); [width] = min([max(strlength(targetpath))*6, 500]); - [indx2, tf] = listdlg('PromptString', {'Select path(s) to block:'}, ... + [targetpathid, tf] = listdlg('PromptString', {'Select path(s) to block:'}, ... 'SelectionMode', 'multiple', 'ListString', targetpath, 'ListSize', [width, 300]); - if tf == 1 - targetpath = targetpath(indx2); - else - return; - end - celltype1 - celltype2 - targetg - targetpath - - - return; - %[Tcell] = run.py_scTenifoldCko_path(sce, celltype1, celltype2, targetg, ... - % targetpath, wkdir, true, prepare_input_only); + if tf ~= 1, return; end + + % assignin("base","sce",sce) + % assignin("base","celltype1",celltype1) + % assignin("base","celltype2",celltype2) + % assignin("base","targetg",targetg) + % assignin("base","targetpathid",targetpathid) + % prepare_input_only = true; - + [Tcell] = run.py_scTenifoldCko_path(sce, celltype1, celltype2, targetg, ... + targetpathid, wkdir, true, prepare_input_only); + + % return; case 'Complete Gene Knockout' [indx2, tf] = listdlg('PromptString', {'Select a KO gene'}, ... 'SelectionMode', 'single', 'ListString', gsorted, 'ListSize', [220, 300]); @@ -186,13 +177,12 @@ function callback_scTenifoldCko(src, ~) end T = []; - [Tcell] = run.py_scTenifoldCko_type(sce, celltype1, celltype2, targetg, ... + [Tcell] = run.py_scTenifoldCko_gene(sce, celltype1, celltype2, targetg, ... targettype, wkdir, true, prepare_input_only); otherwise errordlg('Invalid option.',''); return; end - if ~isempty(Tcell) [T1] = Tcell{1}; @@ -222,9 +212,9 @@ function callback_scTenifoldCko(src, ~) T=[T, table(knownpair)]; % T(:,[4 5 6 7 11])=[]; - outfile = fullfile(wkdir,"outfile.csv"); + outfile = fullfile(wkdir,"outfile_interaction_changes.csv"); if isfile(outfile) - answerx = questdlg('Overwrite outfile.csv? Select No to save in a temporary file.'); + answerx = questdlg('Overwrite file? Select No to save in a temporary file.'); else answerx = 'Yes'; end @@ -260,13 +250,10 @@ function callback_scTenifoldCko(src, ~) end end - fn=fullfile(wkdir, 'merged_embeds.h5'); - - eb = h5read(fn,'/data')'; - + fn=fullfile(wkdir, 'merged_embeds.h5'); + eb = h5read(fn,'/data')'; n = height(eb); - sl = n / 4; - + sl = n / 4; % Split the eb into four equal-length sub-ebs a = eb(1:sl,:); b = eb(sl+1:2*sl,:); @@ -282,11 +269,16 @@ function callback_scTenifoldCko(src, ~) %[sce.g(x) sce.g(y)] [T] = ten.i_dr(a, c, sce.g, true); - outfile = sprintf('outfile_%s_expression_changes.csv',celltype1); + T = addvars(T, repelem(celltype1, height(T), 1), 'Before', 1); + T.Properties.VariableNames{'Var1'} = 'celltype'; + outfile = sprintf('outfile_expression_changes_in_%s.csv', ... + matlab.lang.makeValidName(celltype1)); writetable(T, outfile); [T] = ten.i_dr(b, d, sce.g, true); - outfile = sprintf('outfile_%s_expression_changes.csv',celltype1); + T = addvars(T, repelem(celltype2, height(T), 1), 'Before', 1); + T.Properties.VariableNames{'Var1'} = 'celltype'; + outfile = sprintf('outfile_expression_changes_in_%s.csv', ... + matlab.lang.makeValidName(celltype2)); writetable(T, outfile); - end diff --git a/+pkg/i_addwd2script.m b/+pkg/i_addwd2script.m index b59a4561..e58ed2d1 100644 --- a/+pkg/i_addwd2script.m +++ b/+pkg/i_addwd2script.m @@ -5,16 +5,17 @@ function i_addwd2script(scriptfile, targetdir, ftype) if ~exist(targetdir, 'dir'), error('pkg.i_addwd2script error.'); end S = fileread(scriptfile); - + [~,b,c]=fileparts(scriptfile); + switch ftype case 'R' x = sprintf('setwd("%s")', strrep(targetdir,'\','\\')); S = [x, newline, S]; - outscriptfile = fullfile(targetdir,'script.R'); + outscriptfile = fullfile(targetdir, [b,c]); case 'python' x = sprintf('os.chdir("%s")', strrep(targetdir,'\','\\')); S = ['import os', newline, x, newline, S]; - outscriptfile = fullfile(targetdir,'script.py'); + outscriptfile = fullfile(targetdir, [b,c]); end diff --git a/+run/external/py_scTenifoldCko/script.py b/+run/external/py_scTenifoldCko/script_gene.py similarity index 100% rename from +run/external/py_scTenifoldCko/script.py rename to +run/external/py_scTenifoldCko/script_gene.py diff --git a/+run/external/py_scTenifoldCko/script_path.py b/+run/external/py_scTenifoldCko/script_path.py new file mode 100644 index 00000000..fd95bcd9 --- /dev/null +++ b/+run/external/py_scTenifoldCko/script_path.py @@ -0,0 +1,78 @@ +#script_path +#import os +import sys +#abspath = os.path.abspath(__file__) +#dname = os.path.dirname(abspath) +# os.chdir(dname) + +import scanpy as sc +import scTenifoldXct as st +from scTenifoldXct.dataLoader import build_adata +import pandas as pd +import numpy as np +import h5py +import scipy +from scipy import sparse + +twosided = 1 +with h5py.File('X1.mat', 'r') as f: + if 'targetgid' in f: + targetgid = f['targetgid'][0] + +adata1 = build_adata("X1.mat", "g1.txt", "c1.txt", delimiter=',', meta_cell_cols=['cell_type'], transpose=False) +print('Input read.............1') +xct1 = st.scTenifoldXct(data = adata1, + source_celltype = 'Source', + target_celltype = 'Target', + obs_label = "cell_type", + rebuild_GRN = False, + GRN_file_dir = './1', + verbose = True) + +adata2 = build_adata("X2.mat", "g2.txt", "c2.txt", delimiter=',', meta_cell_cols=['cell_type'], transpose=False) +print('Input read.............2') +xct2 = st.scTenifoldXct(data = adata2, + source_celltype = 'Source', + target_celltype = 'Target', + obs_label = "cell_type", + rebuild_GRN = False, + GRN_file_dir = './2', + verbose = True) + +xct2=xct2.Knk(targetgid) + +XCTs = st.merge_scTenifoldXct(xct1, xct2) +emb = XCTs.get_embeds(train = True) + +with h5py.File('merged_embeds.h5', 'w') as h5file: + h5file.create_dataset('data', data=emb) + +XCTs.nn_aligned_diff(emb) +xcts_pairs_diff = XCTs.chi2_diff_test(pval = 1.0) +pd.DataFrame(xcts_pairs_diff).to_csv('output1.txt',index=False,header=True) + +# if sys.argv[1]=="2": +if twosided==1: + xct1 = st.scTenifoldXct(data = adata1, + source_celltype = 'Target', + target_celltype = 'Source', + obs_label = "cell_type", + rebuild_GRN = False, + GRN_file_dir = './1', + verbose = True) + + xct2 = st.scTenifoldXct(data = adata2, + source_celltype = 'Target', + target_celltype = 'Source', + obs_label = "cell_type", + rebuild_GRN = False, + GRN_file_dir = './2', + verbose = True) + + xct2=xct2.Knk(targetgid) + XCTs = st.merge_scTenifoldXct(xct1, xct2) + emb = XCTs.get_embeds(train = True) + XCTs.nn_aligned_diff(emb) + xcts_pairs_diff = XCTs.chi2_diff_test(pval = 1.0) + pd.DataFrame(xcts_pairs_diff).to_csv('output2.txt',index=False,header=True) + diff --git a/+run/py_scTenifoldCko_type.m b/+run/py_scTenifoldCko_gene.m similarity index 98% rename from +run/py_scTenifoldCko_type.m rename to +run/py_scTenifoldCko_gene.m index 113c275c..8b4703cb 100644 --- a/+run/py_scTenifoldCko_type.m +++ b/+run/py_scTenifoldCko_gene.m @@ -1,4 +1,4 @@ -function [T] = py_scTenifoldCko_type(sce, celltype1, celltype2, targetg, ... +function [T] = py_scTenifoldCko_gene(sce, celltype1, celltype2, targetg, ... targettype, wkdir, ... isdebug, prepare_input_only) @@ -127,7 +127,7 @@ fw = gui.gui_waitbar([], [], 'Step 4 of 4: Running scTenifoldXct.py...'); -codefullpath = fullfile(codepth,'script.py'); +codefullpath = fullfile(codepth,'script_gene.py'); pkg.i_addwd2script(codefullpath, wkdir, 'python'); diff --git a/+run/py_scTenifoldCko_path.m b/+run/py_scTenifoldCko_path.m index cba9e080..503ee266 100644 --- a/+run/py_scTenifoldCko_path.m +++ b/+run/py_scTenifoldCko_path.m @@ -1,42 +1,24 @@ function [T] = py_scTenifoldCko_path(sce, celltype1, celltype2, targetg, ... - targetpath, wkdir, ... + targetpathid, wkdir, ... isdebug, prepare_input_only) T = []; if nargin < 8, prepare_input_only = false; end if nargin < 7, isdebug = true; end if nargin < 6, wkdir = []; end -if nargin < 5 || isempty(targetpath), targetpath=sprintf('%s+%s', celltype1, celltype2); end +if nargin < 5 || isempty(targetpathid), targetpathid = [1 3]; end if nargin < 4 || isempty(targetg), targetg = sce.g(1); end -if nargin < 3, error('Usage: [T] = py_scTenifoldCko(sce, celltype1, celltype2, targetg)'); end - +if nargin < 3, error('Usage: [T] = py_scTenifoldCko_path(sce, celltype1, celltype2, ["ligandgene", "receptorgene"])'); end twosided = true; - -sce1 = sce; -sce2 = sce; - -if targetpath==sprintf('%s+%s', celltype1, celltype2) - sce2.X(sce2.g==targetg, :)=0; -elseif targetpath==celltype1 - sce2.X(sce2.g==targetg, sce2.c_cell_type_tx==celltype1)=0; -elseif targetpath==celltype2 - sce2.X(sce2.g==targetg, sce2.c_cell_type_tx==celltype2)=0; -else - error('py_scTenifoldCko error.'); -end - -%[Tcell, iscomplete] = py_scTenifoldXct2(sce1, sce2, celltype1, celltype2, ... -% twosided, wkdir, isdebug, prepare_input_only); - +[~, targetgid]=ismember(targetg,sce.g); +assert(numel(targetgid)==2) % ----------------- - oldpth = pwd(); pw1 = fileparts(mfilename('fullpath')); codepth = fullfile(pw1, 'external', 'py_scTenifoldCko'); if isempty(wkdir) || ~isfolder(wkdir) - % cd(codepth); wkdir=tempdir; cd(wkdir); else @@ -88,21 +70,11 @@ if ~isdebug, pkg.i_deletefiles(tmpfilelist); end - % load(fullfile(pw1,'..','resources','Ligand_Receptor','Ligand_Receptor.mat'), ... - % 'ligand','receptor'); - % validg=unique([ligand receptor]); - % [y]=ismember(upper(sce.g),validg); - % X=sce.X(y,:); - % g=sce.g(y); - % writematrix(sce.X,'X.txt'); - - - in_prepareX(sce1, 1); - in_prepareX(sce2, 2); + in_prepareX12intact(sce); fw = gui.gui_waitbar([], [], 'Step 2 of 4: Building S1 networks...'); %try - in_prepareA12(sce1, targetg); + in_prepareA12intact(sce); % catch ME % if isvalid(fw) % gui.gui_waitbar(fw, [], 'Building S1 networks is incomplete'); @@ -127,17 +99,14 @@ fw = gui.gui_waitbar([], [], 'Step 4 of 4: Running scTenifoldXct.py...'); -codefullpath = fullfile(codepth,'script.py'); +codefullpath = fullfile(codepth,'script_path.py'); pkg.i_addwd2script(codefullpath, wkdir, 'python'); - if ~prepare_input_only - twosidedtag = 2; - cmdlinestr = sprintf('"%s" "%s" %d', x.Executable, codefullpath, twosidedtag); + cmdlinestr = sprintf('"%s" "%s"', x.Executable, codefullpath); disp(cmdlinestr) try - % [status] = system(cmdlinestr, '-echo'); - [status] = system(cmdlinestr); + [status] = system(cmdlinestr, '-echo'); % https://www.mathworks.com/matlabcentral/answers/334076-why-does-externally-called-exe-using-the-system-command-freeze-on-the-third-call catch ME if isvalid(fw) @@ -151,7 +120,7 @@ if prepare_input_only gui.gui_waitbar(fw, [], 'Input preparation is complete.'); else - gui.gui_waitbar(fw, [], 'Running scTenifoldCko.py is complete.'); + gui.gui_waitbar(fw, [], 'Running scTenifoldCko_path.py is complete.'); end end @@ -159,7 +128,7 @@ if status == 0 && exist('output1.txt', 'file') T = readtable('output1.txt'); - if twosided && exist('output2.txt', 'file') + if exist('output2.txt', 'file') T2 = readtable('output2.txt'); T = {T, T2}; end @@ -182,92 +151,50 @@ % -------------------------------------------------- % -------------------------------------------------- - function in_prepareX(sce, id) - if ~exist(sprintf('%d', id), 'dir') - mkdir(sprintf('%d', id)); - end - idx = sce.c_cell_type_tx == celltype1 | sce.c_cell_type_tx == celltype2; - sce = sce.selectcells(idx); - sce.c_batch_id = sce.c_cell_type_tx; - sce.c_batch_id(sce.c_cell_type_tx == celltype1) = "Source"; - sce.c_batch_id(sce.c_cell_type_tx == celltype2) = "Target"; - % sce=sce.qcfilter; - if issparse(sce.X) - X = single(full(sce.X)); - else - X = single(sce.X); + function in_prepareX12intact(sce) + for id = 1:2 + if ~exist(sprintf('%d', id), 'dir') + mkdir(sprintf('%d', id)); + end + idx = sce.c_cell_type_tx == celltype1 | sce.c_cell_type_tx == celltype2; + sce = sce.selectcells(idx); + sce.c_batch_id = sce.c_cell_type_tx; + sce.c_batch_id(sce.c_cell_type_tx == celltype1) = "Source"; + sce.c_batch_id(sce.c_cell_type_tx == celltype2) = "Target"; + % sce=sce.qcfilter; + if issparse(sce.X) + X = single(full(sce.X)); + else + X = single(sce.X); + end + save(sprintf('X%d.mat', id), '-v7.3', 'X','targetgid'); + writematrix(sce.g, sprintf('g%d.txt', id)); + writematrix(sce.c_batch_id, sprintf('c%d.txt', id)); + fprintf('Input X%d g%d c%d written.\n', id, id, id); + t = table(sce.g, sce.g, 'VariableNames', {' ', 'gene_name'}); + writetable(t, sprintf('%d/gene_name_Source.tsv', id), ... + 'filetype', 'text', 'Delimiter', '\t'); + writetable(t, sprintf('%d/gene_name_Target.tsv', id), ... + 'filetype', 'text', 'Delimiter', '\t'); + disp('Input gene_names written.'); end - twosided = true; - save(sprintf('X%d.mat', id), '-v7.3', 'X', 'twosided'); - writematrix(sce.g, sprintf('g%d.txt', id)); - writematrix(sce.c_batch_id, sprintf('c%d.txt', id)); - fprintf('Input X%d g%d c%d written.\n', id, id, id); - t = table(sce.g, sce.g, 'VariableNames', {' ', 'gene_name'}); - writetable(t, sprintf('%d/gene_name_Source.tsv', id), ... - 'filetype', 'text', 'Delimiter', '\t'); - writetable(t, sprintf('%d/gene_name_Target.tsv', id), ... - 'filetype', 'text', 'Delimiter', '\t'); - disp('Input gene_names written.'); end - function in_prepareA12(sce, targetg) - - idx = find(sce.g==targetg); - assert(isscalar(idx)); - + function in_prepareA12intact(sce) disp('Building A1 network...') A1 = sc_pcnetpar(sce.X(:, sce.c_cell_type_tx == celltype1)); disp('A1 network built.') A1 = A1 ./ max(abs(A1(:))); - % A=0.5*(A1+A1.'); A = ten.e_filtadjc(A1, 0.75, false); save(sprintf('%d/pcnet_Source.mat', 1), 'A', '-v7.3'); - - if contains(targetpath, celltype1) - fprintf('\nKO gene in %s.\n', celltype1); - if nnz(A(idx, :) ~= 0) < 10 - warning('KO gene (%s) has no link or too few links with other genes.', ... - targetg); - end - A(idx,:)=0; - end save(sprintf('%d/pcnet_Source.mat', 2), 'A', '-v7.3'); - - disp('Building A2 network...'); A2 = sc_pcnetpar(sce.X(:, sce.c_cell_type_tx == celltype2)); disp('A2 network built.'); A2 = A2 ./ max(abs(A2(:))); A = ten.e_filtadjc(A2, 0.75, false); save(sprintf('%d/pcnet_Target.mat', 1), 'A', '-v7.3'); - - if contains(targetpath, celltype2) - fprintf('\nKO gene in %s.\n', celltype2); - if nnz(A(idx, :) ~= 0) < 10 - warning('KO gene (%s) has no link or too few links with other genes.', ... - targetg); - end - A(idx,:)=0; - end save(sprintf('%d/pcnet_Target.mat', 2), 'A', '-v7.3'); - end - - % function in_prepareA(sce, id) - % disp('Building A1 network...') - % A1 = sc_pcnetpar(sce.X(:, sce.c_cell_type_tx == celltype1)); - % disp('A1 network built.') - % A1 = A1 ./ max(abs(A1(:))); - % % A=0.5*(A1+A1.'); - % A = ten.e_filtadjc(A1, 0.75, false); - % save(sprintf('%d/pcnet_Source.mat', id), 'A', '-v7.3'); - % - % disp('Building A2 network...'); - % A2 = sc_pcnetpar(sce.X(:, sce.c_cell_type_tx == celltype2)); - % disp('A2 network built.'); - % A2 = A2 ./ max(abs(A2(:))); - % % A=0.5*(A2+A2.'); - % A = ten.e_filtadjc(A2, 0.75, false); - % save(sprintf('%d/pcnet_Target.mat', id), 'A', '-v7.3'); - % end + end end \ No newline at end of file diff --git a/scgeatool.m b/scgeatool.m index 7ae951d2..ed673025 100644 --- a/scgeatool.m +++ b/scgeatool.m @@ -916,9 +916,9 @@ function in_SingleClickSolution(src, ~) end function in_SelectCellsByQC(src, ~) - oldsce = sce; - oldn = sce.NumCells; - oldm = sce.NumGenes; + %oldsce = sce; + % oldn = sce.NumCells; + % oldm = sce.NumGenes; sce.c = c; guidata(FigureHandle, sce); try @@ -932,16 +932,16 @@ function in_SelectCellsByQC(src, ~) sce = guidata(FigureHandle); [c, cL] = grp2idx(sce.c); in_RefreshAll(src, [], true, false); - newn = sce.NumCells; - newm = sce.NumGenes; - answer = questdlg(sprintf('%d cells removed; %d genes removed.', ... - oldn-newn, oldm-newm),'','Accept Changes', 'Undo Changes', 'Accept Changes'); - if ~strcmp(answer, 'Accept Changes') - sce = oldsce; - [c, cL] = grp2idx(sce.c); - in_RefreshAll(src, [], true, false); - guidata(FigureHandle, sce); - end + % newn = sce.NumCells; + % newm = sce.NumGenes; + % answer = questdlg(sprintf('%d cells removed; %d genes removed.', ... + % oldn-newn, oldm-newm),'','Accept Changes', 'Undo Changes', 'Accept Changes'); + % if ~strcmp(answer, 'Accept Changes') + % sce = oldsce; + % [c, cL] = grp2idx(sce.c); + % in_RefreshAll(src, [], true, false); + % guidata(FigureHandle, sce); + % end end if ~isempty(highlightindex) h.BrushData = highlightindex;