Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#1277 SMILES with bad stereochemistry is not parsed #1280

Merged
merged 39 commits into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
20b5f6f
bad stereo
even1024 Sep 15, 2023
74fba33
clang fix
even1024 Sep 15, 2023
e38b5bb
py formatting
even1024 Sep 15, 2023
2a2e4c3
atrop layout
even1024 Sep 18, 2023
10a0f9b
atrop
even1024 Sep 18, 2023
f37353d
Merge branch '1277-bad-stereochemistry-smiles' of https://github.com/…
even1024 Sep 18, 2023
2e10a5e
clang fix
even1024 Sep 18, 2023
c1d27ca
clang fix
even1024 Sep 18, 2023
9144bee
python fix
even1024 Sep 18, 2023
8c42d8c
atropo refactoring
even1024 Sep 27, 2023
b43585e
clang fix
even1024 Sep 27, 2023
e04f16a
test fix
even1024 Sep 27, 2023
9268356
test fix
even1024 Sep 27, 2023
3f759a4
test fix
even1024 Sep 27, 2023
ee18d00
test fix
even1024 Sep 27, 2023
64f4ac9
point 1
even1024 Sep 29, 2023
6f383ef
stereocenters fixed
even1024 Oct 3, 2023
97f7c23
stereocenters fixed
even1024 Oct 3, 2023
dad9634
tests fixed
even1024 Oct 3, 2023
1d83dc9
tests fixed
even1024 Oct 3, 2023
8f9a830
tests fixed
even1024 Oct 3, 2023
b68f89f
fix atropisomers detecytion algorythm
even1024 Oct 3, 2023
7a84a67
wiggly bonds fix
even1024 Oct 4, 2023
a2f39ab
wiggly bonds fix
even1024 Oct 4, 2023
b693ab9
py format
even1024 Oct 4, 2023
5ff9efe
wiggly bonds fix
even1024 Oct 4, 2023
b407380
Merge branch '1277-bad-stereochemistry-smiles' of https://github.com/…
even1024 Oct 4, 2023
66773cd
fix double bond as atropobond
even1024 Oct 8, 2023
efe5cd2
inchi fix
even1024 Oct 8, 2023
8890bc2
inchi fix
even1024 Oct 8, 2023
6e6080b
cml fix
even1024 Oct 8, 2023
434a81b
free center fix
even1024 Oct 8, 2023
e1f5f74
free center fix
even1024 Oct 8, 2023
6089687
bingo fixes
even1024 Oct 8, 2023
0651b15
cml fix
even1024 Oct 9, 2023
3f3395a
tests fixed
even1024 Oct 9, 2023
3201b9c
Merge branch '1277-bad-stereochemistry-smiles' of https://github.com/…
even1024 Oct 9, 2023
5dc3c8c
bingo fix
even1024 Oct 9, 2023
a1c1502
bingo fix
even1024 Oct 9, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions api/c/indigo/src/indigo_layout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@ CEXPORT int indigoLayout(int object)
ml.max_iterations = self.layout_max_iterations;
ml.bond_length = 1.6f;
ml.layout_orientation = (layout_orientation_value)self.layout_orientation;
bool has_atropisomery = mol->hasAtropisomericCenter();
if (has_atropisomery)
bool has_forced_stereo = mol->forcedStereoBonds().size();
if (has_forced_stereo)
ml.respect_existing_layout = true;

TimeoutCancellationHandler cancellation(self.cancellation_timeout);
Expand All @@ -69,12 +69,15 @@ CEXPORT int indigoLayout(int object)

if (obj.type != IndigoObject::SUBMOLECULE)
{
if (!has_atropisomery)
if (!has_forced_stereo)
mol->clearBondDirections();
try
{
mol->markBondsStereocenters();
mol->markBondsAlleneStereo();
auto& fbonds = mol->forcedStereoBonds();
for (int i = fbonds.begin(); i != fbonds.end(); i = fbonds.next(i))
mol->setBondDirection(fbonds.key(i), fbonds.value(i));
}
catch (Exception e)
{
Expand Down
2 changes: 1 addition & 1 deletion api/tests/integration/ref/formats/mol_to_smiles.py.out
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
C1%82(C%83O%84%85)OC%86(C(O%87%88%89)C(O%90%91)C%921O%93%94%95)O%96%97%98.[*:1]%96.[*:2]%93.[*:3]%84.[*:4]%90.[*:5]%87.[*:6]%94%85.[*:7]%95%97.[*:8]%83.[*:9]%91%88.[*:10]%89%98.[*:11]%86.[*:12]%92.[*:13]%82 |$;;;;;;;;;;;;_R1;_R2;_R3;_R4;_R5;_R6;_R7;_R8;_R9;_R10;_R11;_R12;_R13$|
[O-][N+](C1=NN=NN1CC1N=NNN=1)=O
C1C(O)=C(C2C=CC(C)=CC=2N)C(C)=CC=1 |o1:3,r,wU:3.12|
C1C(O)=C(C2C=CC(C)=CC=2N)C(C)=CC=1 |o1:3,r,wU:3.12,(25.51,-10.61,;25.51,-12.12,;24.22,-12.87,;26.82,-12.86,;26.82,-14.36,;25.51,-15.12,;25.52,-16.62,;26.82,-17.36,;26.82,-18.86,;28.12,-16.61,;28.12,-15.11,;29.18,-14.05,;28.11,-12.11,;29.41,-12.86,;28.11,-10.61,;26.82,-9.86,)|
7 changes: 5 additions & 2 deletions api/tests/integration/ref/formats/smiles.py.out
Original file line number Diff line number Diff line change
Expand Up @@ -73,5 +73,8 @@ chemaxon:
CCCCC |Sg:n:1,2,3::hh|
*** Atropisomers ***
atropisomer:
C1=CC=C(C)C(C2=C(N)C=C(C)C=C2)=C1O |o1:5,r,wU:5.4|
C1=CC=C(C)C(C2=C(N)C=C(C)C=C2)=C1O |o1:5,r,wU:5.4|
C1=CC=C(C)C(C2=C(N)C=C(C)C=C2)=C1O |o1:5,r,wU:5.4,(0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,;0.00,0.00,)|
C1=CC=C(C)C(C2=C(N)C=C(C)C=C2)=C1O |o1:5,r,wU:5.4,(2.40,-1.39,;3.20,-0.00,;2.40,1.39,;0.80,1.39,;0.00,2.77,;0.00,0.00,;-1.60,0.00,;-2.40,1.39,;-1.60,2.77,;-4.00,1.39,;-4.80,0.00,;-6.40,0.00,;-4.00,-1.39,;-2.40,-1.39,;0.80,-1.39,;-0.00,-2.77,)|
atropisomer:
C1C(O)=C(C2C=CC(C)=CC=2N)C(C)=CC=1
C1C(O)=C(C2C=CC(C)=CC=2N)C(C)=CC=1
5 changes: 4 additions & 1 deletion api/tests/integration/tests/formats/smiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,10 @@
print(indigo.loadMolecule(sm).smiles())

print("*** Atropisomers ***")
mols_smiles = ["C1C(O)=C(C2C=CC(C)=CC=2N)C(C)=CC=1 |o1:3,r,wU:3.12|"]
mols_smiles = [
"C1C(O)=C(C2C=CC(C)=CC=2N)C(C)=CC=1 |o1:3,r,wU:3.12|",
"C1=CC=C(C)C(C2=C(N)C=C(C)C=C2)=C1O |wU:5.4,wD:5.5|",
]
for sm in mols_smiles:
print("atropisomer:")
mol = indigo.loadMolecule(sm)
Expand Down
5 changes: 5 additions & 0 deletions core/indigo-core/molecule/base_molecule.h
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,11 @@ namespace indigo

void clearBondDirections();
int getBondDirection(int idx) const;
int* getForcedBondDirection(int idx) const;
void setBondDirection(int idx, int dir);
void setForcedBondDirection(int idx, int dir);
bool isForcedStereoBond(int idx);
const RedBlackMap<int, int>& forcedStereoBonds();

int getBondDirection2(int center_idx, int nei_idx);

Expand Down Expand Up @@ -511,6 +515,7 @@ namespace indigo
Array<int> _sl_bonds;

Array<int> _bond_directions;
RedBlackMap<int, int> _forced_stereo_directions;

Array<Vec3f> _xyz;
RedBlackMap<int, Vec3f> _stereo_flag_positions;
Expand Down
2 changes: 2 additions & 0 deletions core/indigo-core/molecule/smiles_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ namespace indigo
int _balance;
int _current_compno;
bool _inside_smarts_component;
bool _has_atom_coordinates = false;

BaseMolecule* _bmol;
QueryMolecule* _qmol;
Expand All @@ -177,6 +178,7 @@ namespace indigo
bool _isAlleneLike(int i);
void _handleCurlyBrace(_AtomDesc& atom, bool& inside_polymer);
void _handlePolymerRepetition(int i);
void _handleForcedStereo();

void _readAtom(Array<char>& atom_str, bool first_in_brackets, _AtomDesc& atom, std::unique_ptr<QueryMolecule::Atom>& qatom);

Expand Down
26 changes: 24 additions & 2 deletions core/indigo-core/molecule/src/base_molecule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3914,6 +3914,27 @@ int BaseMolecule::getBondDirection(int idx) const
return _bond_directions[idx];
}

int* BaseMolecule::getForcedBondDirection(int idx) const
{
return _forced_stereo_directions.at2(idx);
}

void BaseMolecule::setForcedBondDirection(int idx, int dir)
{
if (!_forced_stereo_directions.find(idx))
_forced_stereo_directions.insert(idx, dir);
}

bool BaseMolecule::isForcedStereoBond(int idx)
{
return _forced_stereo_directions.find(idx);
}

const RedBlackMap<int, int>& BaseMolecule::forcedStereoBonds()
{
return _forced_stereo_directions;
}

int BaseMolecule::getBondDirection2(int center_idx, int nei_idx)
{
int idx = findEdgeIndex(center_idx, nei_idx);
Expand All @@ -3936,6 +3957,7 @@ void BaseMolecule::setBondDirection(int idx, int dir)
void BaseMolecule::clearBondDirections()
{
_bond_directions.clear();
_forced_stereo_directions.clear();
}

bool BaseMolecule::isChiral()
Expand Down Expand Up @@ -4431,7 +4453,7 @@ bool BaseMolecule::isAtropisomerismReferenceAtom(int atom_idx)
// check if the atom has at least one stereo-bond
for (int i = v.neiBegin(); i != v.neiEnd(); i = v.neiNext(i))
{
if (getBondDirection(v.neiEdge(i)))
if (getBondDirection(v.neiEdge(i)) || getForcedBondDirection(v.neiEdge(i)))
{
has_stereo = true;
break;
Expand All @@ -4444,7 +4466,7 @@ bool BaseMolecule::isAtropisomerismReferenceAtom(int atom_idx)
for (int i = v.neiBegin(); i != v.neiEnd(); i = v.neiNext(i))
{
auto bond_idx = v.neiEdge(i);
if (getBondDirection(bond_idx))
if ((getBondDirection(bond_idx) || getForcedBondDirection(v.neiEdge(i))) && getEdgeTopology(bond_idx) == TOPOLOGY_RING)
continue;
if (isRotationBond(bond_idx))
rotation_bonds.insert(bond_idx);
Expand Down
5 changes: 3 additions & 2 deletions core/indigo-core/molecule/src/molecule_json_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1125,9 +1125,10 @@ void MoleculeJsonLoader::loadMolecule(BaseMolecule& mol, bool load_arrows)

for (int i : mol.edges())
{
if (mol.getBondDirection(i) > 0 && !sensible_bond_directions[i])
if (mol.getBondDirection(i) > 0 && (!sensible_bond_directions[i] || mol.getBondTopology(i) == TOPOLOGY_RING))
{
if (!stereochemistry_options.ignore_errors)
mol.setForcedBondDirection(i, mol.getBondDirection(i));
if (!stereochemistry_options.ignore_errors && !sensible_bond_directions[i])
throw Error("direction of bond #%d makes no sense", i);
}
}
Expand Down
12 changes: 8 additions & 4 deletions core/indigo-core/molecule/src/molfile_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2077,14 +2077,18 @@ void MolfileLoader::_postLoad()
_bmol->stereocenters.setType(i, _stereocenter_types[i], _stereocenter_groups[i]);
}

_bmol->buildCisTrans(_ignore_cistrans.ptr());

for (i = 0; i < _bonds_num; i++)
if (_bmol->getBondDirection(i) > 0 && !_sensible_bond_directions[i])
{
if (_bmol->getBondDirection(i) && (!_sensible_bond_directions[i] || _bmol->getBondTopology(i) == TOPOLOGY_RING))
{
if (!stereochemistry_options.ignore_errors)
auto& e = _bmol->getEdge(i);
_bmol->setForcedBondDirection(i, _bmol->getBondDirection(i));
if (!stereochemistry_options.ignore_errors && !_sensible_bond_directions[i])
throw Error("direction of bond #%d makes no sense", i);
}

_bmol->buildCisTrans(_ignore_cistrans.ptr());
}

// Remove adding default R-group logic behavior
/*
Expand Down
48 changes: 33 additions & 15 deletions core/indigo-core/molecule/src/smiles_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ void SmilesLoader::loadMolecule(Molecule& mol)
_bmol = &mol;
_mol = &mol;
_qmol = 0;
_has_atom_coordinates = false;
_loadMolecule();

mol.setIgnoreBadValenceFlag(ignore_bad_valence);
Expand Down Expand Up @@ -356,17 +357,15 @@ void SmilesLoader::_readOtherStuff()
auto bond_idx = _scanner.readUnsigned();
if (wmode)
{
auto& v = _bmol->getEdge(bond_idx);
if (v.end == atom_idx)
_bmol->swapEdgeEnds(bond_idx);
if (v.beg == atom_idx)
if (bond_idx < _bmol->edgeCount() && atom_idx < _bmol->vertexCount())
{
_bmol->setBondDirection(bond_idx, wmode == 'U' ? BOND_UP : BOND_DOWN);
if (_bmol->isAtropisomerismReferenceAtom(atom_idx))
auto& v = _bmol->getEdge(bond_idx);
if (v.end == atom_idx)
_bmol->swapEdgeEnds(bond_idx);

if (v.beg == atom_idx)
{
if (!_bmol->stereocenters.exists(atom_idx))
_bmol->addStereocenters(atom_idx, MoleculeStereocenters::ATOM_ANY, 0, false);
_bmol->stereocenters.setAtropisomeric(atom_idx, true);
_bmol->setForcedBondDirection(bond_idx, wmode == 'U' ? BOND_UP : BOND_DOWN);
}
}
}
Expand Down Expand Up @@ -752,8 +751,7 @@ void SmilesLoader::_readOtherStuff()
}
if (_scanner.readChar() != ')')
throw Error("expected ')' after coordinates");
_bmol->markBondsStereocenters();
_bmol->markBondsAlleneStereo();
_has_atom_coordinates = true;
}
else if (c == 'h') // highlighting (Indigo's own extension)
{
Expand Down Expand Up @@ -1340,16 +1338,28 @@ void SmilesLoader::_readOtherStuff()
_bmol->removeAtoms(to_remove);
}

void SmilesLoader::_handleForcedStereo()
{
auto& fbonds = _bmol->forcedStereoBonds();
for (int i = fbonds.begin(); i != fbonds.end(); i = fbonds.next(i))
_bmol->setBondDirection(fbonds.key(i), fbonds.value(i));

for (int i = fbonds.begin(); i != fbonds.end(); i = fbonds.next(i))
{
auto& e = _bmol->getEdge(fbonds.key(i));
if (_bmol->stereocenters.exists(e.beg) && _bmol->isAtropisomerismReferenceAtom(e.beg))
_bmol->stereocenters.setAtropisomeric(e.beg, true);
}
}

void SmilesLoader::_validateStereoCenters()
{
for (int i = _bmol->stereocenters.begin(); i < _bmol->stereocenters.end(); i = _bmol->stereocenters.next(i))
{
auto atom_idx = _bmol->stereocenters.getAtomIndex(i);
if (_bmol->isPossibleStereocenter(atom_idx) || _bmol->isAtropisomerismReferenceAtom(atom_idx))
if (_bmol->isPossibleStereocenter(atom_idx) || _bmol->stereocenters.isAtropisomeric(atom_idx))
continue;
if (stereochemistry_options.ignore_errors)
_bmol->stereocenters.remove(i);
else
if (!stereochemistry_options.ignore_errors)
throw Error("atom %d is not a stereocenter", atom_idx);
}
}
Expand Down Expand Up @@ -1928,6 +1938,14 @@ void SmilesLoader::_loadParsedMolecule()
{
_scanner.skip(1);
_readOtherStuff();
if (_has_atom_coordinates)
{
_bmol->markBondsStereocenters();
_bmol->markBondsAlleneStereo();
}

if (_bmol->forcedStereoBonds().size())
_handleForcedStereo();
}

// Update attachment orders for rsites
Expand Down
72 changes: 55 additions & 17 deletions core/indigo-core/molecule/src/smiles_saver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "molecule/molecule.h"
#include "molecule/molecule_rgroups.h"
#include "molecule/molecule_savers.h"
#include "molecule/molecule_stereocenter_options.h"
#include "molecule/molecule_stereocenters.h"
#include "molecule/query_molecule.h"

Expand Down Expand Up @@ -625,7 +626,8 @@ void SmilesSaver::_saveMolecule()
_writeRingBonds();
_writeUnsaturated();
_writeSubstitutionCounts();
_writeWedges();
if (_bmol->hasAtropisomericCenter())
_writeWedges();

if (_comma)
_output.writeChar('|');
Expand Down Expand Up @@ -1840,32 +1842,68 @@ void SmilesSaver::_writeSubstitutionCounts()

void SmilesSaver::_writeWedges()
{
bool is_first = true;

if (_bmol)
{
std::vector<std::pair<int, int>> down_dirs, up_dirs;
for (int i = 0; i < _written_bonds.size(); ++i)
{
auto bond_idx = _written_bonds[i];
auto& e = _bmol->getEdge(bond_idx);
if (_bmol->stereocenters.exists(e.beg) && _bmol->stereocenters.isAtropisomeric(e.beg))
auto bdir = _bmol->getBondDirection(bond_idx);
if (bdir && bdir < BOND_EITHER && _bmol->isForcedStereoBond(bond_idx))
{
const auto& edge = _bmol->getEdge(bond_idx);
auto wa_idx = _written_atoms.find(edge.beg);
if (bdir == BOND_UP)
up_dirs.emplace_back(wa_idx, i);
else
down_dirs.emplace_back(wa_idx, i);
}
}
bool is_first = true;
if (up_dirs.size())
{
for (const auto& kvp : up_dirs)
{
auto bdir = _bmol->getBondDirection(bond_idx);
if (bdir && bdir < BOND_EITHER)
if (is_first)
{
if (is_first)
{
_startExtension();
_output.writeString(bdir == BOND_UP ? "wU:" : "wD:");
is_first = false;
}
else
_output.writeString(",");
const auto& edge = _bmol->getEdge(bond_idx);
auto wa_idx = _written_atoms.find(edge.beg);
_output.printf("%d.%d", wa_idx, i);
_startExtension();
_output.writeString("wU:");
is_first = false;
}
else
_output.writeString(",");
_output.printf("%d.%d", kvp.first, kvp.second);
}
}
is_first = true;
if (down_dirs.size())
{
for (const auto& kvp : down_dirs)
{
if (is_first)
{
_startExtension();
_output.writeString("wD:");
is_first = false;
}
else
_output.writeString(",");
_output.printf("%d.%d", kvp.first, kvp.second);
}
}
if (down_dirs.size() || up_dirs.size())
{
_output.writeString(",(");
for (int i = 0; i < _written_atoms.size(); ++i)
{
if (i)
_output.writeString(";");
auto atom_idx = _written_atoms[i];
const auto& pos = _mol->getAtomXyz(atom_idx);
_output.printf("%.2f,%.2f,", pos.x, pos.y);
}
_output.writeString(")");
}
}
}
Expand Down