From 66d9e5c232bba8b4f8968e1e046364015542d2a8 Mon Sep 17 00:00:00 2001 From: Peter C Kroon Date: Thu, 25 Jun 2020 14:05:04 +0200 Subject: [PATCH] Use logging module rather than print for stereo warnings Fixes #12 --- README.md | 20 +++++++++++++++++++- pysmiles/read_smiles.py | 4 +++- pysmiles/smiles_helper.py | 5 ++++- tests/test_read_smiles.py | 22 +++++++++++++--------- 4 files changed, 39 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index c8b693c..23e7b17 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,25 @@ element attribute. of 1.5. If `False`, will create a molecule using *only* the information in the SMILES string. +### Stereochemical information +Currently the library cannot handle stereochemical information, neither E/Z nor +R/S. Any stereochemical information that was in the SMILES string will be +*discarded* upon parsing. This means there will be no difference between +parsing *e.g.* `N[C@](Br)(O)C`, `N[C@@](Br)(O)C` and `NC(Br)(O)C`. Parsing +these *will result in the same molecule*. The same holds for *e.g.* `F/C=C/F` +and `FC=CF`. These will result in the same molecule. + +Whenever stereochemical information is being discarded a warning will be +logged using the built-in `logging` module. If you want to disable all the +messages logged by `pysmiles` you can add the following snippet to your code, +without interfering with any logging by your own code: + +```python +import logging +logging.getLogger('pysmiles').setLevel(logging.CRITICAL) # Anything higher than warning +``` + + ## Writing SMILES The function `write_smiles(molecule, default_element='*', start=None)` can be used to write SMILES strings from a molecule. The function does *not* check @@ -196,7 +215,6 @@ print(write_smiles(mol)) - `correct_aromatic_rings` is fragile. - There is currently no way of specifying stereo chemical information. The parser can deal with it, but it will be discarded. -- It is not on PyPI - It only processes SMILES. This might later be extended to e.g. InChi, SLN, SMARTS, etc. diff --git a/pysmiles/read_smiles.py b/pysmiles/read_smiles.py index 1d55882..f4bf6e0 100644 --- a/pysmiles/read_smiles.py +++ b/pysmiles/read_smiles.py @@ -18,6 +18,7 @@ """ import enum +import logging import networkx as nx @@ -25,6 +26,7 @@ parse_atom, fill_valence, mark_aromatic_edges, mark_aromatic_atoms) +LOGGER = logging.getLogger(__name__) @enum.unique class TokenType(enum.Enum): @@ -175,7 +177,7 @@ def read_smiles(smiles, explicit_hydrogen=False, zero_order_bonds=True, ring_nums[token] = (idx - 1, next_bond) next_bond = None elif tokentype == TokenType.EZSTEREO: - print("I can't deal with stereo yet...") + LOGGER.warning('E/Z stereochemical information, which is specified by "%s", will be discarded', token) if ring_nums: raise KeyError('Unmatched ring indices {}'.format(list(ring_nums.keys()))) diff --git a/pysmiles/smiles_helper.py b/pysmiles/smiles_helper.py index cd78eb0..fb59432 100644 --- a/pysmiles/smiles_helper.py +++ b/pysmiles/smiles_helper.py @@ -18,11 +18,14 @@ some convenience functions for adding hydrogens, and detecting aromaticity. """ +import logging import re import operator import networkx as nx +LOGGER = logging.getLogger(__name__) + ISOTOPE_PATTERN = r'(?P[\d]+)?' ELEMENT_PATTERN = r'(?Pb|c|n|o|s|p|\*|[A-Z][a-z]{0,2})' STEREO_PATTERN = r'(?P@|@@|@TH[1-2]|@AL[1-2]|@SP[1-3]|@OH[\d]{1,2}|'\ @@ -98,7 +101,7 @@ def parse_atom(atom): raise ValueError("A hydrogen atom can't have hydrogens") if 'stereo' in out: - print("I don't quite know how to handle stereo yet...") + LOGGER.warning('Atom "%s" contains stereochemical information that will be discarded.', atom) return out diff --git a/tests/test_read_smiles.py b/tests/test_read_smiles.py index 51390d0..d85e718 100644 --- a/tests/test_read_smiles.py +++ b/tests/test_read_smiles.py @@ -497,12 +497,16 @@ def test_invalid_smiles(smiles, error_type): read_smiles(smiles) -def test_cis_trans(): - smiles = r'F/C=C/F', r'C(\F)=C/F', r'F\C=C/F', r'C(/F)=C/F' - for smile in smiles: - read_smiles(smile, explicit_hydrogen=False) - - -def test_extended_stereo(): - smiles = 'NC(Br)=[C@]=C(O)C' - read_smiles(smiles) +@pytest.mark.parametrize('smiles, n_records',[ + (r'F/C=C/F', 2), + (r'C(\F)=C/F', 2), + (r'F\C=C/F', 2), + (r'C(/F)=C/F', 2), + ('NC(Br)=[C@]=C(O)C', 1), + ('c1ccccc1', 0) +]) +def test_stereo_logging(caplog, smiles, n_records): + read_smiles(smiles, explicit_hydrogen=False) + assert len(caplog.records) == n_records + for record in caplog.records: + assert record.levelname == "WARNING"