Merge branch 'master' of github.com:ventolab/CellphoneDB

ventolab · Oct 31, 2023 · d06b4dd · d06b4dd
2 parents 1925abf + 143c643
commit d06b4dd
Show file tree

Hide file tree

Showing 45 changed files with 11,739 additions and 11,148 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,5 @@ cellphonedb/out
 __pycache__
 out
 docs/api/_build
+cellphonedb/src/tests/test_data
 .DS_Store
diff --git a/README.md b/README.md
diff --git a/cellphonedb/controller.py b/cellphonedb/controller.py
diff --git a/cellphonedb/src/core/exceptions/DatabaseCreationException.py b/cellphonedb/src/core/exceptions/DatabaseCreationException.py
@@ -1,5 +1,6 @@
 class DatabaseCreationException(Exception):
     def __init__(self, description: str = None, hint: str = None):
-        super(DatabaseCreationException, self).__init__("Failed to create CellphoneDB database due to failed data sanity tests")
+        super(DatabaseCreationException, self).__init__(
+            "Failed to create CellphoneDB database due to failed data sanity tests")
         self.description = description
         self.hint = hint
diff --git a/cellphonedb/src/core/exceptions/ProcessMetaException.py b/cellphonedb/src/core/exceptions/ProcessMetaException.py
@@ -1,3 +1,5 @@
 class ProcessMetaException(Exception):
     def __init__(self):
-        super(ProcessMetaException, self).__init__('Error processing Meta data')
+        super(ProcessMetaException, self).__init__(
+            'Error processing Meta data - please consult ' +
+            'https://cellphonedb.readthedocs.io/en/latest/RESULTS-DOCUMENTATION.html#meta-file')
diff --git a/cellphonedb/src/core/generators/gene_generator.py b/cellphonedb/src/core/generators/gene_generator.py
@@ -7,7 +7,7 @@ def gene_generator(ensembl_db: pd.DataFrame,
                    user_gene: pd.DataFrame,
                    result_columns: list) -> pd.DataFrame:
     def get_first_gene_name(gene_names: str) -> str:
-        if type(gene_names) != str:
+        if type(gene_names) is not str:
             return ''
         gene_names = gene_names.split(' ')
         return gene_names[0]
@@ -21,7 +21,7 @@ def get_first_gene_name(gene_names: str) -> str:
     no_hla_uniprots = uniprot_db[~uniprot_db['gene_name'].str.contains('HLA')]
     # Merge non-hla uniprot entries with ensembl on gene_name only (ignore uniprot column for now)
     cpdb_genes = no_hla_uniprots.merge(ensembl_db, how='inner', on='gene_name',
-                                                     sort=False, suffixes=('', '_ensembl')) \
+                                       sort=False, suffixes=('', '_ensembl')) \
         .drop_duplicates(['ensembl', 'uniprot', 'gene_name'])
 
     # Add additional unique ensembl genes if their corresponding uniprot accession is in no_hla_uniprots['uniprot']
@@ -31,7 +31,7 @@ def get_first_gene_name(gene_names: str) -> str:
         ensembl_db_filtered['uniprot'].apply(lambda uniprot: uniprot in no_hla_uniprots['uniprot'].tolist())]
     # Filter out from additional_genes genes that are already in cpdb_genes['ensembl']
     additional_genes = additional_genes[
-        additional_genes['ensembl'].apply(lambda ensembl: not ensembl in cpdb_genes['ensembl'].tolist())
+        additional_genes['ensembl'].apply(lambda ensembl: ensembl not in cpdb_genes['ensembl'].tolist())
     ]
     cpdb_genes = pd.concat([cpdb_genes, additional_genes], ignore_index=True, sort=False)
 
@@ -40,13 +40,13 @@ def get_first_gene_name(gene_names: str) -> str:
     # Remove duplicated ensembl genes if hgnc_symbol != gene_name
     cpdb_genes.drop(duplicated_ensembl_genes[
                         duplicated_ensembl_genes.apply(lambda gene: gene['hgnc_symbol'] != gene['gene_name'],
-                                                      axis=1)].index, inplace=True)
+                                                       axis=1)].index, inplace=True)
     # Append hla_genes to cpdb_genes
     cpdb_genes = pd.concat([cpdb_genes, hla_genes], ignore_index=True, sort=False).drop_duplicates(result_columns)
 
     # Append user_gene to cpdb_genes, if provided
     cpdb_genes = pd.concat([cpdb_genes, user_gene], ignore_index=True, sort=False).drop_duplicates(result_columns,
-                                                                                             keep='last')
+                                                                                                   keep='last')
     # Check if exist any duplicated ensembl
     # print('Duplicated ensembl genes')
     # print(len(cpdb_genes[cpdb_genes['ensembl'].duplicated()]))