Skip to content

Commit

Permalink
Merge branch 'master' of github.com:ventolab/CellphoneDB
Browse files Browse the repository at this point in the history
  • Loading branch information
datasome committed Oct 31, 2023
2 parents 1925abf + 143c643 commit d06b4dd
Show file tree
Hide file tree
Showing 45 changed files with 11,739 additions and 11,148 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ cellphonedb/out
__pycache__
out
docs/api/_build
cellphonedb/src/tests/test_data
.DS_Store
152 changes: 74 additions & 78 deletions README.md

Large diffs are not rendered by default.

145 changes: 0 additions & 145 deletions cellphonedb/controller.py

This file was deleted.

3 changes: 2 additions & 1 deletion cellphonedb/src/core/exceptions/DatabaseCreationException.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
class DatabaseCreationException(Exception):
def __init__(self, description: str = None, hint: str = None):
super(DatabaseCreationException, self).__init__("Failed to create CellphoneDB database due to failed data sanity tests")
super(DatabaseCreationException, self).__init__(
"Failed to create CellphoneDB database due to failed data sanity tests")
self.description = description
self.hint = hint
4 changes: 3 additions & 1 deletion cellphonedb/src/core/exceptions/ProcessMetaException.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
class ProcessMetaException(Exception):
def __init__(self):
super(ProcessMetaException, self).__init__('Error processing Meta data')
super(ProcessMetaException, self).__init__(
'Error processing Meta data - please consult ' +
'https://cellphonedb.readthedocs.io/en/latest/RESULTS-DOCUMENTATION.html#meta-file')
10 changes: 5 additions & 5 deletions cellphonedb/src/core/generators/gene_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def gene_generator(ensembl_db: pd.DataFrame,
user_gene: pd.DataFrame,
result_columns: list) -> pd.DataFrame:
def get_first_gene_name(gene_names: str) -> str:
if type(gene_names) != str:
if type(gene_names) is not str:
return ''
gene_names = gene_names.split(' ')
return gene_names[0]
Expand All @@ -21,7 +21,7 @@ def get_first_gene_name(gene_names: str) -> str:
no_hla_uniprots = uniprot_db[~uniprot_db['gene_name'].str.contains('HLA')]
# Merge non-hla uniprot entries with ensembl on gene_name only (ignore uniprot column for now)
cpdb_genes = no_hla_uniprots.merge(ensembl_db, how='inner', on='gene_name',
sort=False, suffixes=('', '_ensembl')) \
sort=False, suffixes=('', '_ensembl')) \
.drop_duplicates(['ensembl', 'uniprot', 'gene_name'])

# Add additional unique ensembl genes if their corresponding uniprot accession is in no_hla_uniprots['uniprot']
Expand All @@ -31,7 +31,7 @@ def get_first_gene_name(gene_names: str) -> str:
ensembl_db_filtered['uniprot'].apply(lambda uniprot: uniprot in no_hla_uniprots['uniprot'].tolist())]
# Filter out from additional_genes genes that are already in cpdb_genes['ensembl']
additional_genes = additional_genes[
additional_genes['ensembl'].apply(lambda ensembl: not ensembl in cpdb_genes['ensembl'].tolist())
additional_genes['ensembl'].apply(lambda ensembl: ensembl not in cpdb_genes['ensembl'].tolist())
]
cpdb_genes = pd.concat([cpdb_genes, additional_genes], ignore_index=True, sort=False)

Expand All @@ -40,13 +40,13 @@ def get_first_gene_name(gene_names: str) -> str:
# Remove duplicated ensembl genes if hgnc_symbol != gene_name
cpdb_genes.drop(duplicated_ensembl_genes[
duplicated_ensembl_genes.apply(lambda gene: gene['hgnc_symbol'] != gene['gene_name'],
axis=1)].index, inplace=True)
axis=1)].index, inplace=True)
# Append hla_genes to cpdb_genes
cpdb_genes = pd.concat([cpdb_genes, hla_genes], ignore_index=True, sort=False).drop_duplicates(result_columns)

# Append user_gene to cpdb_genes, if provided
cpdb_genes = pd.concat([cpdb_genes, user_gene], ignore_index=True, sort=False).drop_duplicates(result_columns,
keep='last')
keep='last')
# Check if exist any duplicated ensembl
# print('Duplicated ensembl genes')
# print(len(cpdb_genes[cpdb_genes['ensembl'].duplicated()]))
Expand Down
Loading

0 comments on commit d06b4dd

Please sign in to comment.