Source code for cgsmiles.rdkit

"""
Functions to interface with rdkit.
"""
import numpy as np
import networkx as nx
from rdkit import Chem
from rdkit.Chem import AllChem
from pysmiles.smiles_helper import add_explicit_hydrogens

BOND_TYPE_MAP = {0: Chem.BondType.ZERO,
                 1: Chem.BondType.SINGLE,
                 2: Chem.BondType.DOUBLE,
                 3: Chem.BondType.TRIPLE,
                 4: Chem.BondType.QUADRUPLE,
                 1.5: Chem.BondType.AROMATIC}

[docs] def rdkit_to_networkx(rdkit_mol): """ Convert an rdkit molecule to a networkx graph. Parameters ---------- rdkit_mol: rdkit.Chem.rdchem.Mol an RDKit molecule Returns ------- networkx.Graph pysmiles compatible molecule graph """ # check if there are 3D coordinates try: conf = rdkit_mol.GetConformer() except ValueError: conf = None out_mol = nx.Graph() for atom in rdkit_mol.GetAtoms(): props = {} props['atomic_num'] = atom.GetAtomicNum() props['symbol'] = atom.GetSymbol() props['charge'] = int(atom.GetFormalCharge()) props['element'] = props['symbol'] props['hcount'] = atom.GetTotalNumHs() if conf: pos = conf.GetAtomPosition(idx) props['position'] = np.array([pos.x, pos.y, pos.z]) out_mol.add_node(atom.GetIdx(), **props) for bond in rdkit_mol.GetBonds(): bt = bond.GetBondTypeAsDouble() if bt != 1.5: bt = int(bt) out_mol.add_edge(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), order=bt) return out_mol
[docs] def networkx_to_rdkit(mol_graph): """ Convert a networkx molecule graph to a rdkit molecule. Parameters ---------- mol_graph: networkx.Graph Returns ------- rdkit.Chem.rdchem.Mol the RDKit molecule """ mol = Chem.RWMol() node_to_idx = {} for node, props in mol_graph.nodes(data=True): atom = Chem.Atom(props.get('element', '*')) atom.SetFormalCharge(props.get('charge', 0)) node_to_idx[node] = mol.AddAtom(atom) for u, v, data in mol_graph.edges(data=True): order = data.get('order', 1) bt = BOND_TYPE_MAP.get(order, 1) mol.AddBond(node_to_idx[u], node_to_idx[v], bt) mol = mol.GetMol() # some clean up to get the molecule up to speed Chem.SanitizeMol(mol) return mol
[docs] def embed_3d_via_rdkit(mol_graph): """ Generate 3D coordiantes of a molecule using the rdKit embedding scheme. Coordinates annotated in place. Parameters ---------- mol_graph: networkx.Graph """ # add explicit hydrogen atoms add_explicit_hydrogens(mol_graph) # convert to rdkit mol rdkit_mol = networkx_to_rdkit(mol_graph) # Add hydrogens to the molecule rdkit_mol = Chem.AddHs(rdkit_mol) # Generate 3D coordinates AllChem.EmbedMolecule(rdkit_mol) # Optimize the 3D structure using a force field AllChem.UFFOptimizeMolecule(rdkit_mol) # Get the conformer conf = rdkit_mol.GetConformer() # write the positions to the original molecule graph for ndx, atom in enumerate(rdkit_mol.GetAtoms()): pos = conf.GetAtomPosition(atom.GetIdx()) mol_graph.nodes[ndx]['position'] = np.array([pos.x, pos.y, pos.z]) return mol_graph