Source code for cgsmiles.pysmiles_utils

import logging
import networkx as nx
import pysmiles
from pysmiles.smiles_helper import (_annotate_ez_isomers,
                                    remove_explicit_hydrogens,
                                    add_explicit_hydrogens)

LOGGER = logging.getLogger(__name__)

[docs] def compute_mass(input_molecule): """ Compute the mass of a molecule from the PTE. Parameters ---------- molecule: networkx.Graph molecule which must have element specified per node Returns ------- float the atomic mass """ molecule = input_molecule.copy() # we need to add the hydrogen atoms # for computing the mass rebuild_h_atoms(molecule) mass = 0 for node in molecule.nodes: element = molecule.nodes[node]['element'] mass += pysmiles.PTE[element]['AtomicMass'] return mass
[docs] def rebuild_h_atoms(mol_graph, keep_bonding=False, copy_attrs=['fragid', 'fragname', 'weight']): """ Helper function which add hydrogen atoms to the molecule graph. First the hcount attribute produced by pysmiles is updated, because fragments have no bonds at time of reading so pysmiles does not know the connectivity. Hence the hcount is redone based on the actual connectivity of the final molecule. This function also makes sure to tag single hydrogen fragments, in order to not merge them with adjecent fragments. Otherwise, explicit single hydrogen residues would not be possible. The molecule graph is updated in place with the hydrogen atoms that are missing. Using the keep_bonding argument the hydrogen count is reduced by the number of bonding descriptors. In this way hydrogen atoms can also be added to fragments only. The `copy_attrs` argument defines a list of attributes to copy to the newly added hydrogen atoms. In case the hydrogen atoms are their own fragments attributes are not copied. If an attribute is already assigned, because the hydrogen atom was explicit that attribute is not replaced. Parameters ---------- mol_graph: networkx.Graph graph describing the full molecule without hydrogen atoms copy_attrs: list[collections.abc.Hashable] a list of attributes to copy from the parent node to the hydrogen atom keep_bonding: bool adjust hcount for number of bonding descriptors """ try: pysmiles.smiles_helper.correct_aromatic_rings(mol_graph, strict=True) except SyntaxError as pysmiles_err: print(pysmiles_err) msg = ("Likely you are writing an aromatic molecule that does not " "show delocalization-induced molecular equivalency and thus " "is not considered aromatic. For example, 4-methyl imidazole " "is often written as [nH]1cc(nc1)C, but should be written as " "[NH]1C=C(N=C1)C. A corresponding CGsmiles string would be " "{[#A]1[#B][#C]1}.{#A=[>][<]N,#B=[$]N=C[>],#C=[$]C(C)=C[<]}") raise SyntaxError(msg) nx.set_node_attributes(mol_graph, 0, 'hcount') # first we need to figure out the correct hcounts on each node # this also corrects for simple aromatic problems like in thiophene pysmiles.smiles_helper.fill_valence(mol_graph, respect_hcount=False) # optionally we adjust the hcount by the number of bonding operators if keep_bonding: bonding_nodes = nx.get_node_attributes(mol_graph, 'bonding') for node, bond_ops in bonding_nodes.items(): mol_graph.nodes[node]['hcount'] -= sum([int(bond[-1]) for bond in bond_ops]) # now we add the hydrogen atoms pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph) for node, element in mol_graph.nodes(data='element'): if element == "H" and not mol_graph.nodes[node].get("single_h_frag", False): anchor = next(mol_graph.neighbors(node)) for attr in copy_attrs: if attr in mol_graph.nodes[node]: continue value = mol_graph.nodes[anchor].get(attr, None) mol_graph.nodes[node][attr] = value
[docs] def annotate_ez_isomers_cgsmiles(molecule): """ Small wrapper dealing with ez_isomer annotation. Parameters ---------- molecule: networkx.Graph The molecule of interest, which must of ez_isomer_pairs and ez_isomer_class set as node attributes """ ez_isomer_class = nx.get_node_attributes(molecule, 'ez_isomer_class') _annotate_ez_isomers(molecule, ez_isomer_class) # clean up for node in ez_isomer_class: del molecule.nodes[node]['ez_isomer_class']
[docs] def read_fragment_smiles(smiles_str, fragname, bonding_descrpt={}, ez_isomers={}, attributes={}): """ Read a smiles_str corresponding to a CGsmiles fragment and annotate bonding descriptors, isomers, as well as any other attributes. This function also sets default attributes as follows: - fragname to `fragname` - fragid to 0 - w to 1 Parameters ---------- smiles_str: str string in OpenSMILES format fragname: str the name of the fragment ez_isomers: dict attributes: dict Returns ------- networkx.Graph the graph of the molecular fragment """ if smiles_str == 'H': LOGGER.warning("You define an H fragment, which is not valid SMILES. We'll make it [H].") smiles_str = '[H]' mol_graph = pysmiles.read_smiles(smiles_str, explicit_hydrogen=True, reinterpret_aromatic=False, strict=False) # set some default values nx.set_node_attributes(mol_graph, fragname, 'fragname') nx.set_node_attributes(mol_graph, 0, 'fragid') nx.set_node_attributes(mol_graph, 1, 'weight') # we add all bonding descriptors to the molecule nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding') # set other attributes nx.set_node_attributes(mol_graph, attributes) # set the default atomnames consiting of the element and index atomnames = {node[0]: node[1]['element']+str(node[0]) for node in mol_graph.nodes(data=True)} nx.set_node_attributes(mol_graph, atomnames, 'atomname') # we have just a single atom so no need for any annotations if len(mol_graph) == 1: # we set the hcount for all non-hydrogen elements if mol_graph.nodes[0]['element'] != 'H': mol_graph.nodes[0]['hcount'] = 0 # we tag all single h-atoms else: mol_graph.nodes[0]['single_h_frag'] = True return mol_graph # we need to remove hydrogen atoms except when they are having # attributes; in this case we need to keep them hatoms = set([n for n, e in mol_graph.nodes(data='element') if e == 'H']) hatoms_to_keep = set(attributes.keys()) & hatoms # temp fix until pysmiles util is imporved # we set the element to z so they are ignored when pysmiles removes hatoms nx.set_node_attributes(mol_graph, dict(zip(hatoms_to_keep, len(hatoms_to_keep)*'z')), 'element') pysmiles.remove_explicit_hydrogens(mol_graph) # now we reset the hatoms nx.set_node_attributes(mol_graph, dict(zip(hatoms_to_keep, len(hatoms_to_keep)*'H')), 'element') # we need to split countable node keys and the associated value ez_isomer_class = {idx: val[-1] for idx, val in ez_isomers.items()} nx.set_node_attributes(mol_graph, ez_isomer_class, 'ez_isomer_class') return mol_graph