Skip to content

Commit

Permalink
Fixed ecfp fingerprints
Browse files Browse the repository at this point in the history
  • Loading branch information
Kviatkovskii, Mikhail (Ext) committed Jul 5, 2024
1 parent a49f06f commit 8367356
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 20 deletions.
20 changes: 10 additions & 10 deletions src/ringo/molecule/model/molecule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use std::collections::{BTreeSet};
use std::collections::hash_map::DefaultHasher;
use std::fmt::Debug;
use std::hash::Hasher;
use crate::ringo::math::similarity::tanimoto::tanimoto_vec;
use crate::ringo::molecule::smiles::reader::molecule::parse_molecule;

pub struct Molecule {
Expand Down Expand Up @@ -92,7 +93,7 @@ impl Molecule {
let mut fp = BitSet::new();

for node in self.graph.node_indices() {
ecfp_recursive(&self.graph, radius, 1, node, &mut fp, fp_length);
ecfp_recursive(&self.graph, radius, 1, node, &mut fp, fp_length, &mut DefaultHasher::new());
}

BitVec::from_fn(fp_length, |idx| fp.contains(idx))
Expand All @@ -106,22 +107,21 @@ fn ecfp_recursive(
node: NodeIndex,
fp: &mut BitSet,
fp_length: usize,
hasher: &mut DefaultHasher,
) {
//println!("ecfp_recursive({}, {}, {})", radius, depth, node.index());

if depth > radius {
return;
}

let mut hasher = DefaultHasher::new();

let atom = graph.node_weight(node).unwrap();
hasher.write_u8(atom.element.atomic_number);
hasher.write_u8(atom.isotope);
hasher.write_i8(atom.charge);
hasher.write_u8(atom.hs);

fp.insert(hasher.finish() as usize % fp_length);
let value = hasher.clone().finish();
fp.insert(value as usize % fp_length);

for edge in graph.edges(node) {
let bond = edge.weight();
Expand All @@ -133,15 +133,15 @@ fn ecfp_recursive(
edge.source()
};

ecfp_recursive(graph, radius, depth + 1, target, fp, fp_length);
ecfp_recursive(graph, radius, depth + 1, target, fp, fp_length, hasher);
}
}


#[test]
fn test_ecfp() {
let ecfp_ibuprofen = parse_molecule("CC(C)CC1=CC=C(C=C1)C(C)C(=O)O").unwrap().1.ecfp(6, 128);
let paracetamol = parse_molecule("CC(=O)NC1=CC=C(C=C1)O").unwrap().1.ecfp(6, 128);
println!("{:?}", ecfp_ibuprofen);
println!("{:?}", paracetamol);
let ecfp_ibuprofen = parse_molecule("CC(C)CC1=CC=C(C=C1)C(C)C(=O)O").unwrap().1.ecfp(2, 128);
let ecfp_naproxen = parse_molecule("CC(C1=CC2=C(C=C1)C=C(C=C2)OC)C(=O)O").unwrap().1.ecfp(2, 128);
let sim = tanimoto_vec(&ecfp_ibuprofen, &ecfp_naproxen);
assert!(0.53 < sim && sim < 0.54);
}
2 changes: 1 addition & 1 deletion src/ringo/molecule/smiles/reader/molecule.rs
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ mod tests {
for smiles in ["C", "CC", "C1COCNC1", "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"] {
println!("{}: ", smiles);
let m = parse_molecule(smiles).unwrap().1;
let result = m.ecfp(2, 128);
let result = m.ecfp(2, 512);
for bit in result {
print!("{}", if bit { 1 } else { 0 });
}
Expand Down
12 changes: 3 additions & 9 deletions src/ringo/ringo/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,22 @@ use std::io::{BufRead};
use crate::ringo::molecule::smiles::reader::molecule::parse_molecule;
use crate::ringo::ringo::index_item::IndexItem;

fn index(smiles_file: &str) -> Vec<IndexItem> {
fn index(smiles_file: &str) {
// open file for reading
let fi = File::open(smiles_file).expect("Could not open file");

let mut result = Vec::new();
// open binary file for index
let mut offset = 0;
// let mut fo = File::create(smiles_file.to_owned() + ".fp");
for line in std::io::BufReader::new(fi).lines() {
let line = line.unwrap();
let molecule = parse_molecule(&line).unwrap().1;
// let ecfp = molecule.ecfp(2, 512);
// write ecfp and offset to binary file
result.push(IndexItem::new(offset, molecule.ecfp(2, 512)));

IndexItem::new(offset, molecule.ecfp(2, 512));
offset += line.len() + 1;
}
return result;
}

#[test]
fn test_index() {
let result = index("molecules.smi");
assert_eq!(result.len(), 2);
index("molecules.smi");
}

0 comments on commit 8367356

Please sign in to comment.