Merge pull request #13 from pckroon/logging

Use logging module rather than print for stereo warnings
pckroon · Jun 25, 2020 · 5544500 · 5544500
2 parents 46dd227 + 66d9e5c
commit 5544500
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 12 deletions.
diff --git a/README.md b/README.md
@@ -53,6 +53,25 @@ element attribute.
     of 1.5. If `False`, will create a molecule using *only* the information in 
     the SMILES string.
 
+### Stereochemical information
+Currently the library cannot handle stereochemical information, neither E/Z nor
+R/S. Any stereochemical information that was in the SMILES string will be
+*discarded* upon parsing. This means there will be no difference between
+parsing *e.g.* `N[C@](Br)(O)C`, `N[C@@](Br)(O)C` and `NC(Br)(O)C`. Parsing
+these *will result in the same molecule*. The same holds for *e.g.* `F/C=C/F`
+and `FC=CF`. These will result in the same molecule.
+
+Whenever stereochemical information is being discarded a warning will be
+logged using the built-in `logging` module. If you want to disable all the
+messages logged by `pysmiles` you can add the following snippet to your code,
+without interfering with any logging by your own code:
+
+```python
+import logging
+logging.getLogger('pysmiles').setLevel(logging.CRITICAL)  # Anything higher than warning
+```
+
+
 ## Writing SMILES
 The function `write_smiles(molecule, default_element='*', start=None)` can be
 used to write SMILES strings from a molecule. The function does *not* check 
@@ -196,7 +215,6 @@ print(write_smiles(mol))
 - `correct_aromatic_rings` is fragile.
 - There is currently no way of specifying stereo chemical information. The 
     parser can deal with it, but it will be discarded.
-- It is not on PyPI
 - It only processes SMILES. This might later be extended to e.g. InChi, SLN,
     SMARTS, etc.
 

diff --git a/pysmiles/read_smiles.py b/pysmiles/read_smiles.py
@@ -18,13 +18,15 @@
 """
 
 import enum
+import logging
 
 import networkx as nx
 
 from .smiles_helper import (add_explicit_hydrogens, remove_explicit_hydrogens,
                             parse_atom, fill_valence, mark_aromatic_edges,
                             mark_aromatic_atoms)
 
+LOGGER = logging.getLogger(__name__)
 
 @enum.unique
 class TokenType(enum.Enum):
@@ -175,7 +177,7 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True,
                 ring_nums[token] = (idx - 1, next_bond)
                 next_bond = None
         elif tokentype == TokenType.EZSTEREO:
-            print("I can't deal with stereo yet...")
+            LOGGER.warning('E/Z stereochemical information, which is specified by "%s", will be discarded', token)
     if ring_nums:
         raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys())))
 

diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py
@@ -18,11 +18,14 @@
 some convenience functions for adding hydrogens, and detecting aromaticity.
 """
 
+import logging
 import re
 import operator
 
 import networkx as nx
 
+LOGGER = logging.getLogger(__name__)
+
 ISOTOPE_PATTERN = r'(?P<isotope>[\d]+)?'
 ELEMENT_PATTERN = r'(?P<element>b|c|n|o|s|p|\*|[A-Z][a-z]{0,2})'
 STEREO_PATTERN = r'(?P<stereo>@|@@|@TH[1-2]|@AL[1-2]|@SP[1-3]|@OH[\d]{1,2}|'\
@@ -98,7 +101,7 @@ def parse_atom(atom):
         raise ValueError("A hydrogen atom can't have hydrogens")
 
     if 'stereo' in out:
-        print("I don't quite know how to handle stereo yet...")
+        LOGGER.warning('Atom "%s" contains stereochemical information that will be discarded.', atom)
 
     return out
 

diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py
@@ -497,12 +497,16 @@ def test_invalid_smiles(smiles, error_type):
         read_smiles(smiles)
 
 
-def test_cis_trans():
-    smiles = r'F/C=C/F', r'C(\F)=C/F', r'F\C=C/F', r'C(/F)=C/F'
-    for smile in smiles:
-        read_smiles(smile, explicit_hydrogen=False)
-
-
-def test_extended_stereo():
-    smiles = 'NC(Br)=[C@]=C(O)C'
-    read_smiles(smiles)
+@pytest.mark.parametrize('smiles, n_records',[
+    (r'F/C=C/F', 2),
+    (r'C(\F)=C/F', 2),
+    (r'F\C=C/F', 2),
+    (r'C(/F)=C/F', 2),
+    ('NC(Br)=[C@]=C(O)C', 1),
+    ('c1ccccc1', 0)
+])
+def test_stereo_logging(caplog, smiles, n_records):
+    read_smiles(smiles, explicit_hydrogen=False)
+    assert len(caplog.records) == n_records
+    for record in caplog.records:
+        assert record.levelname == "WARNING"