import logging
import networkx as nx
import pysmiles
from pysmiles.smiles_helper import (_annotate_ez_isomers,
remove_explicit_hydrogens,
add_explicit_hydrogens)
LOGGER = logging.getLogger(__name__)
[docs]
def compute_mass(input_molecule):
"""
Compute the mass of a molecule from the PTE.
Parameters
----------
molecule: networkx.Graph
molecule which must have element specified per node
Returns
-------
float
the atomic mass
"""
molecule = input_molecule.copy()
# we need to add the hydrogen atoms
# for computing the mass
rebuild_h_atoms(molecule)
mass = 0
for node in molecule.nodes:
element = molecule.nodes[node]['element']
mass += pysmiles.PTE[element]['AtomicMass']
return mass
[docs]
def rebuild_h_atoms(mol_graph,
keep_bonding=False,
copy_attrs=['fragid', 'fragname', 'weight']):
"""
Helper function which add hydrogen atoms to the molecule graph.
First the hcount attribute produced by pysmiles is updated, because
fragments have no bonds at time of reading so pysmiles does not
know the connectivity. Hence the hcount is redone based on the
actual connectivity of the final molecule.
This function also makes sure to tag single hydrogen fragments,
in order to not merge them with adjecent fragments. Otherwise,
explicit single hydrogen residues would not be possible.
The molecule graph is updated in place with the hydrogen atoms
that are missing.
Using the keep_bonding argument the hydrogen count is reduced
by the number of bonding descriptors. In this way hydrogen
atoms can also be added to fragments only.
The `copy_attrs` argument defines a list of attributes to copy
to the newly added hydrogen atoms. In case the hydrogen atoms
are their own fragments attributes are not copied. If an attribute
is already assigned, because the hydrogen atom was explicit that
attribute is not replaced.
Parameters
----------
mol_graph: networkx.Graph
graph describing the full molecule without hydrogen atoms
copy_attrs: list[collections.abc.Hashable]
a list of attributes to copy from the parent node to the
hydrogen atom
keep_bonding: bool
adjust hcount for number of bonding descriptors
"""
try:
pysmiles.smiles_helper.correct_aromatic_rings(mol_graph, strict=True)
except SyntaxError as pysmiles_err:
print(pysmiles_err)
msg = ("Likely you are writing an aromatic molecule that does not "
"show delocalization-induced molecular equivalency and thus "
"is not considered aromatic. For example, 4-methyl imidazole "
"is often written as [nH]1cc(nc1)C, but should be written as "
"[NH]1C=C(N=C1)C. A corresponding CGsmiles string would be "
"{[#A]1[#B][#C]1}.{#A=[>][<]N,#B=[$]N=C[>],#C=[$]C(C)=C[<]}")
raise SyntaxError(msg)
nx.set_node_attributes(mol_graph, 0, 'hcount')
# first we need to figure out the correct hcounts on each node
# this also corrects for simple aromatic problems like in thiophene
pysmiles.smiles_helper.fill_valence(mol_graph, respect_hcount=False)
# optionally we adjust the hcount by the number of bonding operators
if keep_bonding:
bonding_nodes = nx.get_node_attributes(mol_graph, 'bonding')
for node, bond_ops in bonding_nodes.items():
mol_graph.nodes[node]['hcount'] -= sum([int(bond[-1]) for bond in bond_ops])
# now we add the hydrogen atoms
pysmiles.smiles_helper.add_explicit_hydrogens(mol_graph)
for node, element in mol_graph.nodes(data='element'):
if element == "H" and not mol_graph.nodes[node].get("single_h_frag", False):
anchor = next(mol_graph.neighbors(node))
for attr in copy_attrs:
if attr in mol_graph.nodes[node]:
continue
value = mol_graph.nodes[anchor].get(attr, None)
mol_graph.nodes[node][attr] = value
[docs]
def annotate_ez_isomers_cgsmiles(molecule):
"""
Small wrapper dealing with ez_isomer annotation.
Parameters
----------
molecule: networkx.Graph
The molecule of interest, which must of ez_isomer_pairs
and ez_isomer_class set as node attributes
"""
ez_isomer_class = nx.get_node_attributes(molecule, 'ez_isomer_class')
_annotate_ez_isomers(molecule, ez_isomer_class)
# clean up
for node in ez_isomer_class:
del molecule.nodes[node]['ez_isomer_class']
[docs]
def read_fragment_smiles(smiles_str,
fragname,
bonding_descrpt={},
ez_isomers={},
attributes={}):
"""
Read a smiles_str corresponding to a CGsmiles fragment and
annotate bonding descriptors, isomers, as well as any other
attributes.
This function also sets default attributes as follows:
- fragname to `fragname`
- fragid to 0
- w to 1
Parameters
----------
smiles_str: str
string in OpenSMILES format
fragname: str
the name of the fragment
ez_isomers: dict
attributes: dict
Returns
-------
networkx.Graph
the graph of the molecular fragment
"""
if smiles_str == 'H':
LOGGER.warning("You define an H fragment, which is not valid SMILES. We'll make it [H].")
smiles_str = '[H]'
mol_graph = pysmiles.read_smiles(smiles_str,
explicit_hydrogen=True,
reinterpret_aromatic=False,
strict=False)
# set some default values
nx.set_node_attributes(mol_graph, fragname, 'fragname')
nx.set_node_attributes(mol_graph, 0, 'fragid')
nx.set_node_attributes(mol_graph, 1, 'weight')
# we add all bonding descriptors to the molecule
nx.set_node_attributes(mol_graph, bonding_descrpt, 'bonding')
# set other attributes
nx.set_node_attributes(mol_graph, attributes)
# set the default atomnames consiting of the element and index
atomnames = {node[0]: node[1]['element']+str(node[0]) for node in mol_graph.nodes(data=True)}
nx.set_node_attributes(mol_graph, atomnames, 'atomname')
# we have just a single atom so no need for any annotations
if len(mol_graph) == 1:
# we set the hcount for all non-hydrogen elements
if mol_graph.nodes[0]['element'] != 'H':
mol_graph.nodes[0]['hcount'] = 0
# we tag all single h-atoms
else:
mol_graph.nodes[0]['single_h_frag'] = True
return mol_graph
# we need to remove hydrogen atoms except when they are having
# attributes; in this case we need to keep them
hatoms = set([n for n, e in mol_graph.nodes(data='element') if e == 'H'])
hatoms_to_keep = set(attributes.keys()) & hatoms
# temp fix until pysmiles util is imporved
# we set the element to z so they are ignored when pysmiles removes hatoms
nx.set_node_attributes(mol_graph,
dict(zip(hatoms_to_keep, len(hatoms_to_keep)*'z')),
'element')
pysmiles.remove_explicit_hydrogens(mol_graph)
# now we reset the hatoms
nx.set_node_attributes(mol_graph,
dict(zip(hatoms_to_keep, len(hatoms_to_keep)*'H')),
'element')
# we need to split countable node keys and the associated value
ez_isomer_class = {idx: val[-1] for idx, val in ez_isomers.items()}
nx.set_node_attributes(mol_graph, ez_isomer_class, 'ez_isomer_class')
return mol_graph