Skip to content

Commit

Permalink
Merge pull request #12 from louadi/NEASE-v1.3
Browse files Browse the repository at this point in the history
Nease v1.3
  • Loading branch information
louadi authored Apr 18, 2024
2 parents fe4bcda + 53bac6a commit 99a311b
Show file tree
Hide file tree
Showing 11 changed files with 78 additions and 45 deletions.
Binary file added nease/data/database/ELM_interactions_mouse
Binary file not shown.
Binary file added nease/data/database/Mouse
Binary file not shown.
Binary file added nease/data/database/elm_mouse
Binary file not shown.
Binary file added nease/data/database/pdb_mouse
Binary file not shown.
Binary file added nease/data/network/PPI_mouse.pkl
Binary file not shown.
Binary file added nease/data/network/graph_mouse.pkl
Binary file not shown.
Binary file added nease/data/pathways/pathways_mouse
Binary file not shown.
51 changes: 38 additions & 13 deletions nease/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

# main functions for nease

def exons_to_edges(mapped,G,elm_interactions):
def exons_to_edges(mapped,G,elm_interactions,organism):

# check if domains have known interactions/binding:
mapped['domain']=mapped['NCBI gene ID']+'/'+mapped['Pfam ID']
Expand Down Expand Up @@ -40,10 +40,12 @@ def interactiontypye(ddi,elm):
#mapped['Interacting domain']=mapped['domain'].apply(lambda x: G.has_node(x))

mapped=mapped.rename(columns={"max_change": "dPSI",
"domain": "Domain ID"}).reset_index(drop=True)

mapped['Visualization link']=''
mapped.loc[mapped['Interacting domain'],['Visualization link']]=DIGGER+mapped['Exon stable ID']
"domain": "Domain ID"}).reset_index(drop=True)
mapped['Visualization link'] = ''
if organism == 'Human':
mapped.loc[mapped['Interacting domain'], ['Visualization link']] = DIGGER + 'human/' + mapped['Exon stable ID']
elif organism == 'Mouse':
mapped.loc[mapped['Interacting domain'], ['Visualization link']] = DIGGER + 'mouse/' + mapped['Exon stable ID']
return mapped


Expand Down Expand Up @@ -118,10 +120,10 @@ def get_elm(node):



interacting_domains=interacting_domains[['Gene name','NCBI gene ID','Identifier','dPSI','Affected binding (NCBI)']].append(elm_affected, ignore_index=True)
interacting_domains=pd.concat([interacting_domains, elm_affected], ignore_index=True)




return interacting_domains


Expand Down Expand Up @@ -165,7 +167,6 @@ def pathway_enrichment(g2edges,paths, mapping,organism,p_value_cutoff,only_DDIs)
pathway_genes=[]
# Totat degree of structural network for human (pre-computer)
# For statistical test: edge enrichment
# TO DO for mouse
if organism=='Human':
if only_DDIs:
n=52467
Expand All @@ -178,6 +179,19 @@ def pathway_enrichment(g2edges,paths, mapping,organism,p_value_cutoff,only_DDIs)
else:
n=60235
ppi_type='Degree in the structural PPI'

elif organism=='Mouse':
if only_DDIs:
n=13348

# every pathway degreee two sturctural PPIs and
# 'Degree in the structural PPI' : degree in ppi annotated with DDI.DMI/PDB
# 'Degree in the PPI/DDI' : degree in ppi annotated with DDI only
ppi_type='Degree in the PPI/DDI'

else:
n=31127
ppi_type='Degree in the structural PPI'

# number of effected edges
affected_edges=len([item for sublist in g2edges.values() for item in sublist])
Expand Down Expand Up @@ -223,10 +237,10 @@ def pathway_enrichment(g2edges,paths, mapping,organism,p_value_cutoff,only_DDIs)
# add gene to the gene list of the pathway
genes_tmp.append(Entrez_to_name(gene,mapping) +" ("+str(tmp)+")")


# gene specific test
_,p_gene=edge_enrich(tmp ,len(g2edges[gene])-tmp , p, n)

if p_gene<=0.05:
# gene with edges siginifically connected to the pathway

Expand Down Expand Up @@ -277,7 +291,6 @@ def single_path_enrich(path_id,Pathways,g2edges,mapping,organism,only_DDIs):

# Totat degree of structural network for human (pre-computer)
# For statistical test: edge enrichment
# TO DO for mouse
if organism=='Human':
if only_DDIs:
n=52467
Expand All @@ -286,8 +299,19 @@ def single_path_enrich(path_id,Pathways,g2edges,mapping,organism,only_DDIs):
else:
n=60235
ppi_type='Degree in the structural PPI'
elif organism == 'Mouse':
if only_DDIs:
n = 13348

# every pathway degreee two sturctural PPIs and
# 'Degree in the structural PPI' : degree in ppi annotated with DDI.DMI/PDB
# 'Degree in the PPI/DDI' : degree in ppi annotated with DDI only
ppi_type = 'Degree in the PPI/DDI'

else:
n = 31127
ppi_type = 'Degree in the structural PPI'


p=int(Pathways [Pathways['external_id']==path_id][ppi_type])
path_genes=list(Pathways [Pathways['external_id']==path_id]['entrez_gene_ids'])[0]

Expand Down Expand Up @@ -355,7 +379,8 @@ def edge_enrich(a,b,p,n):

#linked to pathway but not affected
c=p-a

if c < 0:
return np.nan, 1.0
# background of test: not linked to p and not affected edges
d=(2*n)-p-b

Expand Down
7 changes: 7 additions & 0 deletions nease/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,11 @@ def load_obj(data_folder):
pdb['Human']= pd.read_pickle(os.path.join(here,"data/database/pdb"))
non_coding=load_obj(os.path.join(here,'data/database/non_coding'))

database_mapping['Mouse']= pd.read_pickle(os.path.join(here,"data/database/Mouse"))
Pathways['Mouse']= pd.read_pickle(os.path.join(here,"data/pathways/pathways_mouse"))
network['Mouse']=load_obj(os.path.join(here,'data/network/graph_mouse'))
PPI['Mouse']=load_obj(os.path.join(here,'data/network/PPI_mouse'))
elm['Mouse']= pd.read_pickle(os.path.join(here,"data/database/elm_mouse"))
elm_interactions['Mouse']= pd.read_pickle(os.path.join(here,"data/database/ELM_interactions_mouse"))
pdb['Mouse']= pd.read_pickle(os.path.join(here,"data/database/pdb_mouse"))

27 changes: 12 additions & 15 deletions nease/nease.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,9 @@ def __init__(self,
self.input_type=input_type

if not only_DDIs:

self.elm=elm[organism]
self.elm_interactions=elm_interactions[organism]
self.pdb=pdb[organism]

self.non_coding=non_coding
self.data=[]
self.spliced_genes=[]
Expand Down Expand Up @@ -126,7 +124,7 @@ def __init__(self,
elif input_type=='Whippet':

try:

data = data[data['DeltaPsi'] >= 0.90]
data=data.rename_axis('Gene ID').reset_index()
data['tmp']= data['Node'].apply(lambda x: x.split(':')[1])

Expand Down Expand Up @@ -157,9 +155,8 @@ def __init__(self,
elif input_type=='DEXSeq':

try:

data=data[data['padj']<=p_value_cutoff]
data=data[[data.columns[0],'genomicData.start','genomicData.end','log2fold_control_case']]
data=data[[data.columns[1],'genomicData.start','genomicData.end','log2fold_control_case']]
print('proceding with log2fold threshold: '+str(min_delta))
self.data,self.spliced_genes,self.elm_affected,self.pdb_affected,self.symetric_genes=process_standard(data,self.mapping,min_delta ,self.only_DDIs,self,remove_non_in_frame,only_divisible_by_3)

Expand All @@ -184,7 +181,7 @@ def __init__(self,



if len(self.data)==0:
if len(self.data)==0:#
print('process canceled...')


Expand All @@ -201,7 +198,7 @@ def __init__(self,
self.elm_interactions['interactor 1']=self.elm_interactions['Interator gene 1'].astype('str')+"/"+self.elm_interactions['Elm id of gene 1']
self.elm_interactions['interactor 2']=self.elm_interactions['Interator gene 2'].astype('str')+"/"+self.elm_interactions['Domain of gene 2']

self.data=exons_to_edges(self.data,Join,self.elm_interactions)
self.data=exons_to_edges(self.data,Join,self.elm_interactions,self.organism)



Expand All @@ -211,7 +208,7 @@ def __init__(self,
print(str(len(self.data['Domain ID'].unique()))+' protein domains are affected by AS.\n')
if not self.only_DDIs:
print(str(len(self.elm_affected['ELMIdentifier'].unique()))+" linear motifs are affected by AS.\n"
+str(len(self.pdb_affected))+ ' interacting resiude are affected by AS.\n')
+str(len(self.pdb_affected))+ ' interacting residue are affected by AS.\n')

print(str(len(self.data[self.data['Interacting domain']]['Domain ID'].unique()))+' of the affected domains/motifs have known interactions.')

Expand Down Expand Up @@ -342,14 +339,14 @@ def get_domains(self):
if len(self.data)==0 :
print('Processing failed')


# elif self.organism=="Mouse":
# #no visualization available for mouse in DIGGER
# return self.data.drop(columns=['Domain ID','Visualization link'])
# else:

elif self.organism=="Mouse":
#no visualization available for mouse in DIGGER
return self.data.drop(columns=['Domain ID','Visualization link'])
else:

#DIGGER visualization available for Human
return self.data.drop(columns=[ 'Domain ID','DDI','elm']).reset_index(drop=True)
#DIGGER visualization available for Human and Mouse
return self.data.drop(columns=[ 'Domain ID','DDI','elm']).reset_index(drop=True)



Expand Down
38 changes: 21 additions & 17 deletions nease/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,13 @@ def process_standard (data,
try:

# Remove non numerical
if not data[columns[1]].dtype=='int':
if not data[columns[1]].dtype=='int64':
data=data[data[columns[1]].apply(lambda x: x.isnumeric())]
# convert to int
data[columns[1]]=data[columns[1]].astype(int)


if not data[columns[2]].dtype=='int':
if not data[columns[2]].dtype=='int64':
data=data[data[columns[2]].apply(lambda x: x.isnumeric())]
data[columns[2]]=data[columns[2]].astype(int)
except:
Expand Down Expand Up @@ -137,8 +137,7 @@ def process_standard (data,
print('\n'+str(filtred)+' Exons out of '+str(initial)+' were removed.')





# map to domains by calculating the overlap of exon coordinate and domain
mapping_tb=pd.merge(data, mapping, left_on=columns[0], right_on='Gene stable ID').drop_duplicates()

Expand Down Expand Up @@ -294,12 +293,21 @@ def process_MAJIQ(data,
# NCBI id used here for the network visualization
spliced_genes=list(data['Gene ID'].unique())
#spliced_genes=[Ensemb_to_entrez(x,mapping) for x in spliced_genes ]


# Define a custom function to handle NaN values during join
def custom_join(row):
if pd.notna(row["Junctions coords"]) and pd.notna(row["IR coords"]):
return f"{row['Junctions coords']};{row['IR coords']}"
else:
return row['Junctions coords']


junctions=list(data['Junctions coords'])
confidence=list(data['P(|dPSI|>=0.20)'])
ir=list(data['IR coords'])
data['test'] = data.apply(custom_join, axis=1)
junctions = list(data['test'])
confidence=list(data['P(|dPSI|>=0.20)'])
junc_confid=dict(zip(junctions, confidence))


complexx=0
jun_to_target={}
Expand All @@ -318,7 +326,8 @@ def process_MAJIQ(data,
else:
targets=[ y for y in x if y not in source ]


test = len(targets)
test2 = len(junc_confid[j])
#check if we have correct confidence for every target
if len(targets)==len(junc_confid[j]):

Expand All @@ -328,14 +337,12 @@ def process_MAJIQ(data,
#save
jun_to_target[j]=targets
jun_to_source[j]=int(source[0])


t=lambda x: jun_to_target[x] if x in jun_to_target.keys() else False
data["targets"] = data['Junctions coords'].apply(t)
data["targets"] = data['test'].apply(t)


g=lambda x: jun_to_source[x] if x in jun_to_source.keys() else False
data["source"] = data['Junctions coords'].apply(g)
data["source"] = data['test'].apply(g)


data=data[data['targets']!=False]
Expand All @@ -353,11 +360,8 @@ def process_MAJIQ(data,
data=data [ [ 'Gene ID','E(dPSI) per LSV junction' , 'Junctions coords','P(|dPSI|>=0.20) per LSV junction','delta','targets','source' ]]

data['targets']=data['targets'].astype(int)







#Map exons to domain
mapping_tb=pd.merge(mapping, data, left_on='Genomic coding start', right_on='targets')

Expand Down

0 comments on commit 99a311b

Please sign in to comment.