Source code for pyxtal.molecule

"""
Module for handling molecules.
"""

import importlib.resources
import os
import re
from copy import deepcopy
from operator import itemgetter

import networkx as nx
import numpy as np
from monty.serialization import loadfn
from numpy.random import Generator
from pymatgen.core.structure import Molecule
from pymatgen.symmetry.analyzer import PointGroupAnalyzer, generate_full_symmops
from scipy.spatial.distance import cdist
from scipy.spatial.transform import Rotation

from pyxtal.constants import single_smiles
from pyxtal.database.collection import Collection
from pyxtal.database.element import Element
from pyxtal.msg import AtomTypeError, ConformerError
from pyxtal.operations import OperationAnalyzer, SymmOp, angle, rotate_vector
from pyxtal.symmetry import Group
from pyxtal.tolerance import Tol_matrix

with importlib.resources.as_file(importlib.resources.files("pyxtal") / "database" / "bonds.json") as path:
    bonds = loadfn(path)

molecule_collection = Collection("molecules")


[docs] def find_rotor_from_smile(smile): """ Find the positions of rotatable bonds based on a SMILES string. Rotatable bonds are those which are not part of rings and which fit specific chemical patterns. These torsions are filtered by rules such as avoiding atoms with only one neighbor and avoiding equivalent torsions. Args: smile (str): The SMILES string representing the molecule. Returns: list of tuples: Each tuple represents a torsion as (i, j, k, l) where i-j-k-l are atom indices involved in the rotatable bond. """ def cleaner(list_to_clean, neighbors): """ Remove duplicate and invalid torsions from a list of atom index tuples. Filters torsions based on the neighbors count for the atoms involved in the torsion. This avoids torsions that involve terminal atoms and duplicates. Args: list_to_clean (list of tuples): List of torsions (i, j, k, l) neighbors (list of int): List of neighbors for each atom in the molecule. Returns: list of tuples: Cleaned list of torsions. """ for_remove = [] for x in reversed(range(len(list_to_clean))): ix0 = itemgetter(0)(list_to_clean[x]) ix1 = itemgetter(0)(list_to_clean[x]) ix2 = itemgetter(0)(list_to_clean[x]) ix3 = itemgetter(3)(list_to_clean[x]) # for i-j-k-l, we don't want i, l are the ending members # C-C-S=O is not a good choice since O is only 1-coordinated # C-C-NO2 is a good choice since O is only 1-coordinated # Remove torsions that involve terminal atoms with only one neighbor if neighbors[ix0] == 1 and neighbors[ix1] == 2 or neighbors[ix3] == 1 and neighbors[ix2] == 2: for_remove.append(x) else: # Remove duplicate torsions that are equivalent for y in reversed(range(x)): ix1 = itemgetter(1)(list_to_clean[x]) ix2 = itemgetter(2)(list_to_clean[x]) iy1 = itemgetter(1)(list_to_clean[y]) iy2 = itemgetter(2)(list_to_clean[y]) if [ix1, ix2] == [iy1, iy2] or [ix1, ix2] == [iy2, iy1]: for_remove.append(y) clean_list = [] for i, v in enumerate(list_to_clean): if i not in set(for_remove): clean_list.append(v) return clean_list if smile in ["Cl-", "F-", "Br-", "I-", "Li+", "Na+"]: return [] else: from rdkit import Chem # SMARTS patterns to identify rotatable bonds and double bonds smarts_torsion1 = "[*]~[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]~[*]" smarts_torsion2 = "[*]~[^2]=[^2]~[*]" # C=C bonds # smarts_torsion2="[*]~[^1]#[^1]~[*]" # C-C triples bonds, to be fixed mol = Chem.MolFromSmiles(smile) mol_with_H = Chem.AddHs(mol) N_atom = mol.GetNumAtoms() neighbors = [len(a.GetNeighbors()) for a in mol_with_H.GetAtoms()][:N_atom] # make sure that the ending members will be counted # neighbors[0] += 1; neighbors[-1] += 1 patn_tor1 = Chem.MolFromSmarts(smarts_torsion1) torsion1 = cleaner(list(mol.GetSubstructMatches(patn_tor1)), neighbors) patn_tor2 = Chem.MolFromSmarts(smarts_torsion2) torsion2 = cleaner(list(mol.GetSubstructMatches(patn_tor2)), neighbors) # Combine and clean torsions tmp = cleaner(torsion1 + torsion2, neighbors) # Exclude torsions that are part of rings torsions = [] for t in tmp: (i, j, k, l) = t b = mol.GetBondBetweenAtoms(j, k) if not b.IsInRing(): torsions.append(t) # if len(torsions) > 6: torsions[1] = (4, 7, 10, 15) return torsions # + [(6, 7, 8, 3), (6, 5, 4, 3)]
[docs] def has_non_aromatic_ring(smiles): """ Determine if a molecule has a non-aromatic ring. It checks if a cyclic ring system exists that is not aromatic. Args: smiles (str): A SMILES string representing the molecule. Returns: bool: True if it contains a non-aromatic ring, False otherwise. """ from rdkit import Chem # Convert the SMILES string to an RDKit molecule object mol = Chem.MolFromSmiles(smiles) # Check if the molecule has rings at all if not mol.HasSubstructMatch(Chem.MolFromSmarts("[R]")): return False # No rings present # Get information about the rings in the molecule ring_info = mol.GetRingInfo() # Check each ring to see if it is aromatic; return True if a non-aromatic ring is found return any( not all(mol.GetBondWithIdx(idx).GetIsAromatic() for idx in ring) for ring in ring_info.BondRings() ) # No non-aromatic rings found
[docs] def generate_molecules(smile, wps=None, N_iter=5, N_conf=10, tol=0.5): """ generate pyxtal_molecules from smiles codes. Args: smile: smiles code wps: list of wps N_iter: rdkit parameter N_conf: number of conformers tol: rdkit parameter Returns: a list of pyxtal molecules """ from rdkit import Chem from rdkit.Chem import AllChem torsionlist = find_rotor_from_smile(smile) if len(torsionlist) == 0: if has_non_aromatic_ring(smile): Num = 10 else: Num = len(tor) def get_conformers(smile, seed): mol = Chem.MolFromSmiles(smile) mol = Chem.AddHs(mol) ps = AllChem.ETKDGv3() ps.randomSeed = seed ps.runeRmsThresh = tol AllChem.EmbedMultipleConfs(mol, max([4, Num]), ps) return mol m0 = pyxtal_molecule(smile + ".smi", fix=True) _, valid = m0.get_orientations_in_wps(wps) mols = [] if valid: # print('torsion', m0.get_torsion_angles()) mols.append(m0) for i in range(N_iter): mol = get_conformers(smile, seed=i) res = AllChem.MMFFOptimizeMoleculeConfs(mol) for id, conf in enumerate(mol.GetConformers()): m = m0.copy() xyz = m.align(conf) m.reset_positions(xyz) m.get_symmetry(symmetrize=True) m.energy = res[id][1] _, valid = m.get_orientations_in_wps(wps) add = bool(valid) if add: match = False for mol in mols: rms, _ = mol.get_rmsd2(xyz, mol.mol.cart_coords) # print("rms", mol.get_torsion_angles(), rms) if rms < tol: match = True break if not match: # print(len(mols)+1, m.get_torsion_angles(xyz)) mols.append(m) if len(mols) == N_conf: return mols # for m in mols: # print(m.energy, m.pga.sch_symbol, len(torsionlist)) return mols
[docs] class pyxtal_molecule: """ A molecule class to support the descriptin of molecules in a xtal Features: 0. Parse the input from different formats (SMILES, xyz, gjf, etc.). 1. Estimate molecular properties such as volume, tolerance, and radii. 2. Find and store symmetry information of the molecule. 3. Get the principal axis of the molecule. 4. Re-align the molecule to center it at (0, 0, 0). SMILES Format: If a SMILES format is used, the molecular center is defined following RDKit's handling of molecular transformations: https://www.rdkit.org/docs/source/rdkit.Chem.rdMolTransforms.html Otherwise, the center is just the mean of atomic positions Args: mol (str or pymatgen.Molecule): The molecule representation, either as a string (SMILES or filename) or as a pymatgen `Molecule` object. tm (Tol_matrix, optional): A tolerance matrix object, used for molecular tolerances. symmetrize (bool, optional): Whether to symmetrize the molecule using its point group. fix (bool, optional): Fix torsions in the molecule. torsions (list, optional): List of torsions to analyze or fix. seed (int, optional): Random seed for internal processes. Defaults to a hex seed. random_state (int or numpy.Generator, optional): Numpy random state for random number generation. symtol (float, optional): Symmetry tolerance. Default is 0.3. active_sites (list, optional): List of active sites within the molecule. """
[docs] def list_molecules(): """ list the internally supported molecules """ molecule_collection.show_names()
def __init__( self, mol=None, symmetrize=True, fix=False, torsions=None, seed=None, random_state=None, tm=Tol_matrix(prototype="molecular"), symtol=0.3, active_sites=None, ): mo = None self.smile = None self.torsionlist = [] #None self.reflect = False if seed is None: seed = 0xF00D self.seed = seed # Active sites is a two list of tuples [(donors), (acceptors)] self.active_sites = active_sites if isinstance(random_state, Generator): self.random_state = random_state.spawn(1)[0] else: self.random_state = np.random.default_rng(random_state) # Parse molecules: either file or molecule name if isinstance(mol, str): tmp = mol.split(".") self.name = tmp[0] if len(tmp) > 1: # Load the molecule from the given file if tmp[-1] in ["xyz", "gjf", "g03", "json"]: if os.path.exists(mol): mo = Molecule.from_file(mol) else: raise NameError(f"{mol:s} is not a valid path") elif tmp[-1] == "smi": self.smile = tmp[0] res = self.rdkit_mol_init(tmp[0], fix, torsions) (symbols, xyz, self.torsionlist) = res mo = Molecule(symbols, xyz) symmetrize = False else: raise NameError(f"{tmp[-1]:s} is not a supported format") else: # print('\nLoad the molecule {:s} from collections'.format(mol)) mo = molecule_collection[mol] elif hasattr(mol, "sites"): # pymatgen molecule self.name = str(mol.formula) mo = mol if mo is None: msg = f"Could not create molecules from given input: {mol:s}" raise NameError(msg) # Molecule and symmetry analysis self.props = mo.site_properties if len(mo) > 1 and symmetrize: try: pga = PointGroupAnalyzer(mo, symtol) mo = pga.symmetrize_molecule()["sym_mol"] except: print( "Warning: Problem in parsing molecular symmetry with symtol=", symtol, ) print("Proceed with no symmetrization") self.mol = mo self.get_symmetry() # Additional molecular properties self.tm = tm self.box = self.get_box() self.volume = self.box.volume self.get_symbols() self.get_radius() self.tols_matrix = self.get_tols_matrix() xyz = self.mol.cart_coords self.reset_positions(xyz - self.get_center(xyz)) if self.smile is not None and self.smile not in single_smiles: # print(self.smile) ori, _, self.reflect = self.get_orientation(xyz) def __str__(self): return "[" + self.name + "]"
[docs] def save_str(self): """ save the object as a dictionary """ return self.mol.to(fmt="xyz")
[docs] @classmethod def load_str(cls, string): """ load the molecule from a dictionary """ mol = Molecule.from_str(string, fmt="xyz") return cls(mol)
[docs] def copy(self): """ simply copy the structure """ return deepcopy(self)
[docs] def swap_axis(self, ax): """ swap the molecular axis """ coords = self.mol.cart_coords[:, ax] mo = Molecule(self.symbols, coords) return pyxtal_molecule(mo, self.tm)
[docs] def get_box(self, padding=None): """ Given a molecule, find a minimum orthorhombic box containing it. Size is calculated using min and max x, y, and z values, plus the padding defined by the vdw radius For best results, call oriented_molecule first. Args: padding: float (default is 3.4 according to the vdw radius of C) Returns: box: a Box object """ mol, P = reoriented_molecule(self.mol) xyz = mol.cart_coords dims = [0, 0, 0] for i in range(3): dims[i] = np.max(xyz[:, i]) - np.min(xyz[:, i]) if padding is not None: dims[i] += padding dims[i] = max([dims[i], 2.0]) # for planar molecules else: ids = np.argsort(xyz[:, i]) r = Element(mol[ids[0]].species_string).vdw_radius r += Element(mol[ids[-1]].species_string).vdw_radius dims[i] = max([dims[i] + r, 3.4]) # for planar molecules return Box(dims)
[docs] def get_lengths(self): if not hasattr(self, "box"): self.box = self.get_box() return self.box.width, self.box.height, self.box.length
[docs] def get_max_length(self): w, h, l = self.get_lengths() return max([w, h, l])
[docs] def get_box_coordinates(self, xyz, padding=0, resolution=1.0): """ create the points cloud to describe the molecular box Args: center: molecular center position orientation: orientation matrix padding: padding of the box resolution: float in angstrom Return: cell: box axis vertices: [N, 3] array, Cartesian coordinates to describe the box. center: box center """ cell = self.get_principle_axes(xyz).T center = self.get_center(xyz) # , geometry=True) box = self.get_box(padding) # print(box) w, h, l = box.width, box.height, box.length cell[0, :] *= l cell[1, :] *= w cell[2, :] *= h x_ = np.linspace(-1 / 2, 1 / 2, int(l / resolution) + 1) y_ = np.linspace(-1 / 2, 1 / 2, int(w / resolution) + 1) z_ = np.linspace(-1 / 2, 1 / 2, int(h / resolution) + 1) # XY # print(len(x_), len(y_), len(z_)) x, y = np.meshgrid(x_, y_, indexing="ij") size = len(x.flatten()) xy = np.zeros([size * 2, 3]) xy[:size, 0] = x.flatten() xy[size:, 0] = x.flatten() xy[:size, 1] = y.flatten() xy[size:, 1] = y.flatten() xy[:size, 2] = -0.5 xy[size:, 2] = 0.5 # print(xy.shape) # print(xy) y, z = np.meshgrid(y_, z_, indexing="ij") size = len(y.flatten()) yz = np.zeros([size * 2, 3]) yz[:size, 1] = y.flatten() yz[size:, 1] = y.flatten() yz[:size, 2] = z.flatten() yz[size:, 2] = z.flatten() yz[:size, 0] = -0.5 yz[size:, 0] = 0.5 x, z = np.meshgrid(x_, z_, indexing="ij") size = len(z.flatten()) xz = np.zeros([size * 2, 3]) xz[:size, 0] = x.flatten() xz[size:, 0] = x.flatten() xz[:size, 2] = z.flatten() xz[size:, 2] = z.flatten() xz[:size, 1] = -0.5 xz[size:, 1] = 0.5 vertices = np.zeros([len(xy) + len(yz) + len(xz), 3]) vertices[: len(xy), :] = xy vertices[len(xy) : len(xy) + len(yz), :] = yz vertices[len(xy) + len(yz) :, :] = xz vertices = vertices.dot(cell) vertices += center return cell, vertices, center
[docs] def get_radius(self): """ get the radius of a molecule """ r_max = 0 for coord, number in zip(self.mol.cart_coords, self.mol.atomic_numbers): radius = np.linalg.norm(coord) + 0.5 * self.tm.get_tol(number, number) if radius > r_max: r_max = radius self.radius = r_max # reestimate the radius if it has stick shape rmax = max([self.box.width, self.box.height, self.box.length]) rmin = min([self.box.width, self.box.height, self.box.length]) if rmax / rmin > 3 and rmax > 12: self.radius = rmin
[docs] def get_symbols(self): self.symbols = [specie.name for specie in self.mol.species]
[docs] def get_tols_matrix(self, mol2=None, tm=None): """ Compute the 2D tolerance matrix between the current and other molecules Args: mol2: the 2nd pyxtal_molecule object tm: tolerance class Returns: a 2D matrix which is used internally for distance checking. """ if tm is None: tm = self.tm numbers1 = self.mol.atomic_numbers numbers2 = self.mol.atomic_numbers if mol2 is None else mol2.mol.atomic_numbers tols = np.zeros((len(numbers1), len(numbers2))) for i1, number1 in enumerate(numbers1): for i2, number2 in enumerate(numbers2): tols[i1, i2] = tm.get_tol(number1, number2) # allow hydrogen bond if [number1, number2] in [ [1, 7], [1, 8], [1, 9], [7, 1], [8, 1], [9, 1], ]: tols[i1, i2] *= 0.9 if len(self.mol) == 1: tols *= 0.8 # if only one atom, reduce the tolerance return tols
[docs] def set_labels(self): """ Set atom labels for the given molecule for H-bond caculation. Needs to identify the following: - (O)N-H - acid-O - amide-O - alcohol-O - N with NH2 - N with NH """ def search_H(pairs, ref, pos_H): """ quick routine to search for the id of H that is bonded to N/O: Args: pairs: list of atomic pairs ref: reference id for O or N pos_H: the starting position for H """ res = [] for p in pairs: if ref in p and max(p) >= pos_H: res.append(max(p)) return res if len(self.mol) > 1: from rdkit import Chem # template acid1 = Chem.MolFromSmarts("[C,c]C(=O)O") # COOH acid2 = Chem.MolFromSmarts("[CH](=O)O") # COOH amide1 = Chem.MolFromSmarts("[C,c]C(=O)N") # CONH amide2 = Chem.MolFromSmarts("[CH](=O)N") # CONH alcohol = Chem.MolFromSmarts("[c,CX3][OH]") # ROH # alcohol2 = Chem.MolFromSmarts('c[OH]') #ROH Chem.MolFromSmarts("c") # Aromatic NH1 = Chem.MolFromSmarts("[NH1]") # NH1 NH2 = Chem.MolFromSmarts("[NH2]") # NH2 # Initialize mol m = Chem.MolFromSmiles(self.smile) pos_H = m.GetNumAtoms() # starting position for H m = Chem.AddHs(m) labels = [a.GetSymbol() for a in m.GetAtoms()] # Create bonds bonds = m.GetBonds() pairs = np.zeros([len(bonds), 2], dtype=int) for i, bond in enumerate(bonds): pairs[i, 0] = bond.GetBeginAtomIdx() pairs[i, 1] = bond.GetEndAtomIdx() # Assign aromatic # ds = m.GetSubstructMatches(aromatic_carbon) # for d in ds: labels[d[0]] += '_aromatic' # Assign O N_O = labels.count("O") if N_O > 0: count_O = 0 for i, smart in enumerate([acid1, acid2, amide1, amide2, alcohol]): ds = m.GetSubstructMatches(smart) # print(i, ds) for d in ds: if i in [0, 1]: # COOH or COO in general if i == 0: labels[d[2]] += "_acid" id = 3 # labels[d[3]] += '_acid' else: id = 2 labels[d[1]] += "_acid" Hs = search_H(pairs, d[id], pos_H) if len(Hs) > 0: labels[Hs[0]] += "_O" count_O += 2 elif i in [2, 3]: # CONH id = 3 if i == 2 else 2 labels[d[id - 1]] += "_amide" count_O += 1 else: # OH labels[d[-1]] += "_alcohol" labels[search_H(pairs, d[-1], pos_H)[0]] += "_O" count_O += 1 if count_O == N_O: # print(i, count_O, 'break') break # Assign N N_N = labels.count("N") if N_N > 0: count_N = 0 for i, smart in enumerate([NH1, NH2]): ds = m.GetSubstructMatches(smart) for d in ds: if i == 0: labels[d[0]] += "_H1" # N_H2 labels[search_H(pairs, d[0], pos_H)[0]] += "_N" else: labels[d[0]] += "_H2" # N_H2 Hs = search_H(pairs, d[0], pos_H) labels[Hs[0]] += "_N" labels[Hs[1]] += "_N" count_N += 1 if count_N == N_N: break else: labels = self.symbols print(labels) self.labels = labels
[docs] def get_coefs_matrix(self, mol2=None, ignore_error=True): """ Get the Atom-Atom potential parameters E = A*exp(-B*R) - C*R^(-6) according to Gavezotti, Acc. Chem. Res., 27, 1994 in Kcal/mol and angstrom Args: mol2: the 2nd pyxtal_molecule object tm: tolerance class Returns: a 3D matrix for computing the intermolecular energy """ labels1 = self.labels if hasattr(self, "labels") else self.symbols numbers1 = self.mol.atomic_numbers if mol2 is None: numbers2 = self.mol.atomic_numbers labels2 = labels1 else: numbers2 = mol2.mol.atomic_numbers labels2 = mol2.labels if hasattr(mol2, "labels") else mol2.symbols coefs = np.zeros([len(numbers1), len(numbers2), 3]) for i1, n1 in enumerate(numbers1): for i2, n2 in enumerate(numbers2): if [n1, n2] in [[1, 1]]: # H-H coefs[i1, i2, :] = [5774, 4.01, 26.1] elif [n1, n2] in [[1, 6], [6, 1]]: # H-C coefs[i1, i2, :] = [28870, 4.10, 113.0] elif [n1, n2] in [[1, 7]]: # H-N if len(labels1[i1]) > 1: if labels1[i1] == "H_N1": # HB-N(-NH-N): coefs[i1, i2, :] = [7215600, 7.78, 476] else: # HB-N(-NH2-N): coefs[i1, i2, :] = 1803920, 7.37, 165 else: coefs[i1, i2, :] = [54560, 4.52, 120.0] elif [n1, n2] in [[7, 1]]: # N-H if len(labels2[i2]) > 1: if labels2[i2] == "H_N1": # HB-N(-NH-N): coefs[i1, i2, :] = [7215600, 7.78, 476] else: # HB-N(-NH2-N): coefs[i1, i2, :] = 1803920, 7.37, 165 else: coefs[i1, i2, :] = [54560, 4.52, 120.0] elif [n1, n2] in [[1, 8]]: # H-O if len(labels1[i1]) > 1: if labels2[i2] == "O_amide": # HB...O=C-N coefs[i1, i2, :] = [3607810, 7.78, 238] elif labels2[i2] == "O_acid": # HB...O=C-OH coefs[i1, i2, :] = [6313670, 8.75, 205] elif labels2[i2] == "O_alcohol": # HB...OH coefs[i1, i2, :] = [4509750, 7.78, 298] else: # print("Oxygen label problem", labels2[i2]); import sys; sys.exit() coefs[i1, i2, :] = [70610, 4.82, 105.0] else: # Normal cases: coefs[i1, i2, :] = [70610, 4.82, 105.0] elif [n1, n2] in [[8, 1]]: # O-H if len(labels2[i2]) > 1: if labels1[i1] == "O_amide": # HB...O=C-N coefs[i1, i2, :] = [3607810, 7.78, 238] elif labels1[i1] == "O_acid": # HB...O=C-OH coefs[i1, i2, :] = [6313670, 8.75, 205] elif labels1[i1] == "O_alcohol": # HB...OH coefs[i1, i2, :] = [4509750, 7.78, 298] else: # print('Oxygen label problem', labels2[i1]); import sys; sys.exit() coefs[i1, i2, :] = [70610, 4.82, 105.0] else: # Normal cases: coefs[i1, i2, :] = [70610, 4.82, 105.0] elif [n1, n2] in [[1, 16], [16, 1]]: # H-S coefs[i1, i2, :] = [64190, 4.03, 279.0] elif [n1, n2] in [[1, 17], [17, 1]]: # H-Cl coefs[i1, i2, :] = [70020, 4.09, 279.0] elif [n1, n2] in [[6, 6]]: # C-C coefs[i1, i2, :] = [54050, 3.47, 578.0] elif [n1, n2] in [[6, 7], [7, 6]]: # C-N coefs[i1, i2, :] = [117470, 3.86, 667.0] elif [n1, n2] in [[6, 8], [8, 6]]: # C-O coefs[i1, i2, :] = [93950, 3.74, 641.0] elif [n1, n2] in [[6, 16], [16, 6]]: # C-S coefs[i1, i2, :] = [126460, 3.41, 1504.0] elif [n1, n2] in [[6, 17], [17, 6]]: # C-Cl coefs[i1, i2, :] = [93370, 3.52, 923.0] elif [n1, n2] in [[7, 7]]: # N-N coefs[i1, i2, :] = [87300, 3.65, 691.0] elif [n1, n2] in [[7, 8], [8, 7]]: # N-O coefs[i1, i2, :] = [64190, 3.86, 364.0] elif [n1, n2] in [[7, 16], [16, 7], [7, 17], [17, 7]]: # N-S/Cl coefs[i1, i2, :] = [0, 3.65, 0] elif [n1, n2] in [[8, 8]]: # O-O if False: # labels1[i1] == 'O_alcohol' and labels2[i2] == 'O_alcohol': coefs[i1, i2, :] = [3607800, 5.00, 3372.0] else: coefs[i1, i2, :] = [46680, 3.74, 319.0] elif [n1, n2] in [[8, 16], [16, 8]]: # O-S coefs[i1, i2, :] = [110160, 3.63, 906.0] elif [n1, n2] in [[8, 17], [17, 8]]: # O-Cl coefs[i1, i2, :] = [80855, 3.63, 665.0] elif [n1, n2] in [[16, 16]]: # S-S coefs[i1, i2, :] = [259960, 3.52, 2571] elif [n1, n2] in [[16, 17], [17, 16]]: # S-Cl coefs[i1, i2, :] = [1800000, 3.52, 2000] # made elif [n1, n2] in [[17, 17]]: # Cl-Cl coefs[i1, i2, :] = [140050, 3.52, 1385] else: if ignore_error: coefs[i1, i2, :] = [0.0, 0.0, 0.0] else: msg = f"atom type is not supported: {n1:d} {n2:d}" raise AtomTypeError(msg) # return None return coefs
[docs] def show(self): """ show the molecule """ from pyxtal.viz import display_molecules return display_molecules([self.mol])
[docs] def show_box(self, center=np.zeros(3), orientation=None): """ show the molecule """ from pyxtal.viz import display_molecules return display_molecules(self.mol)
[docs] def rdkit_mol(self, N_confs=1): """ initialize the mol xyz and torsion list """ from rdkit import Chem mol = Chem.MolFromMolBlock(self.rdkit_mb, removeHs=False) if N_confs > 1: conf = mol.GetConformer(0) for _i in range(N_confs - 1): mol.AddConformer(conf, True) return mol
[docs] def rdkit_mol_init(self, smile, fix, torsions): """ initialize the mol xyz and torsion list Args: smile: smile string fix: whether or not fix the seed torsions: None or list """ from rdkit import Chem from rdkit.Chem import AllChem if smile not in single_smiles: # ["Cl-", "F-", "Br-", "I-", "Li+", "Na+"]: torsionlist = find_rotor_from_smile(smile) mol = Chem.MolFromSmiles(smile) mol = Chem.AddHs(mol) symbols = [] for id in range(mol.GetNumAtoms()): symbols.append(mol.GetAtomWithIdx(id).GetSymbol()) if len(smile) > 100: # a tmp fix for KEKULN10 AllChem.EmbedMultipleConfs(mol, numConfs=1, randomSeed=3) cid = 0 else: ps = AllChem.ETKDGv3() ps.randomSeed = self.seed AllChem.EmbedMultipleConfs(mol, 1, ps) if mol.GetNumConformers() == 0: AllChem.EmbedMultipleConfs(mol, 3, ps) res = AllChem.MMFFOptimizeMoleculeConfs(mol) engs = [c[1] for c in res] cid = engs.index(min(engs)) self.rdkit_mb = Chem.MolToMolBlock(mol) self.energy = engs[cid] ref_conf = mol.GetConformer(cid) # always the reference molecule if fix or torsions is not None or len(torsionlist) == 0: conf = ref_conf else: randomSeed = -1 if self.random_state is None else 1 AllChem.EmbedMultipleConfs( mol, numConfs=max([1, 4 * len(torsionlist)]), maxAttempts=200, useRandomCoords=True, pruneRmsThresh=0.5, randomSeed=randomSeed, ) N_confs = mol.GetNumConformers() conf_id = int(self.random_state.choice(range(N_confs))) conf = mol.GetConformer(conf_id) # xyz = conf.GetPositions() # res = AllChem.MMFFOptimizeMoleculeConfs(mol) # print("Eng", res[conf_id]) # set tosion angles from random or pre-defined values if torsions is not None: xyz = self.set_torsion_angles(conf, torsions, torsionlist=torsionlist) else: xyz = conf.GetPositions() xyz -= self.get_center(xyz) else: # single atom cation or anions pattern = r"[A-Za-z]+(?=[+\-]?[^A-Za-z]|$)" matches = re.findall(pattern, smile) if matches: symbols = [matches[0]] # ["Cl"] else: raise ValueError("the input smiles cannot be analyzed", smile) xyz = np.zeros([1, 3]) torsionlist = [] return symbols, xyz, torsionlist
[docs] def perturb_torsion(self, xyz): """ slightly perturb the torsion """ angs = self.get_torsion_angles(xyz, self.torsionlist) angs *= 1 + 0.1 * self.random_state.uniform(-1.0, 1.0, len(angs)) xyz = self.set_torsion_angles(conf, angs, torsionlist=self.torsionlist) xyz -= self.get_center(xyz) return xyz
[docs] def align(self, conf, reflect=False, torsionlist=None): """ Align the molecule and return the xyz The default CanonicalizeConformer function may also include inversion """ from rdkit.Chem import rdMolTransforms as rdmt # Rotation if len(self.smile) > 1: trans = rdmt.ComputeCanonicalTransform(conf) if abs(abs(np.linalg.det(trans)) - 1.0) > 1e-1: print("Bug in trans", np.linalg.det(trans)) import sys sys.exit() elif np.linalg.det(trans[:3, :3]) < 0: trans[:3, :3] *= -1 # add reflection if needed if reflect: trans[:3, :3] *= -1 rdmt.TransformConformer(conf, trans) # Translation pt = rdmt.ComputeCentroid(conf) center = np.array([pt.x, pt.y, pt.z]) xyz = conf.GetPositions() - center # adjust cases like H2O if len(self.smile) == 1: xyz -= np.mean(xyz, axis=0) A = get_inertia_tensor(xyz) P = np.linalg.eigh(A)[1] if np.linalg.det(P) < 0: P[0] *= -1 xyz = np.dot(xyz, P) return xyz
[docs] def get_center(self, xyz, geometry=False): """ get the molecular center for a transformed xyz """ if geometry or self.smile is None: return np.mean(xyz, axis=0) else: if self.smile in [ "Cl-", "F-", "Br-", "I-", "Li+", "Na+", "[Cl-]", "[F-]", "[Br-]", "[I-]", "[Li+]", "[Na+]", ]: return xyz[0] else: if len(self.smile) == 1: # return xyz[0] return np.mean(xyz, axis=0) else: # from rdkit from rdkit.Chem import rdMolTransforms as rdmt from rdkit.Geometry import Point3D conf = self.rdkit_mol().GetConformer(0) for i in range(len(xyz)): x, y, z = xyz[i] conf.SetAtomPosition(i, Point3D(x, y, z)) pt = rdmt.ComputeCentroid(conf) return np.array([pt.x, pt.y, pt.z])
[docs] def get_principle_axes(self, xyz, rdmt=True): """ get the principle axis for a rotated xyz, sorted by the moments """ if self.smile is None or len(self.smile) == 1 or not rdmt: Inertia = get_inertia_tensor(xyz) _, matrix = np.linalg.eigh(Inertia) return matrix else: from rdkit.Chem import rdMolTransforms as rdmt from rdkit.Geometry import Point3D conf1 = self.rdkit_mol().GetConformer(0) for i in range(len(self.mol)): x, y, z = xyz[i] conf1.SetAtomPosition(i, Point3D(x, y, z)) return rdmt.ComputePrincipalAxesAndMoments(conf1)[0]
[docs] def get_torsion_angles(self, xyz=None, torsionlist=None): """ get the torsion angles """ if xyz is None: xyz = self.mol.cart_coords if torsionlist is None: torsionlist = self.torsionlist angs = [] if len(torsionlist) > 0: from rdkit.Chem import rdMolTransforms as rdmt from rdkit.Geometry import Point3D conf = self.rdkit_mol().GetConformer(0) for i in range(len(xyz)): x, y, z = xyz[i] conf.SetAtomPosition(i, Point3D(x, y, z)) for torsion in torsionlist: (i, j, k, l) = torsion angs.append(rdmt.GetDihedralDeg(conf, i, j, k, l)) return angs
[docs] def set_torsion_angles(self, conf, angles, reflect=False, torsionlist=None): """ reset the torsion angles and update molecular xyz """ from rdkit.Chem import rdMolTransforms as rdmt if torsionlist is None: torsionlist = self.torsionlist for id, torsion in enumerate(torsionlist): (i, j, k, l) = torsion rdmt.SetDihedralDeg(conf, i, j, k, l, angles[id]) return self.align(conf, reflect)
[docs] def relax(self, xyz, align=False): """ Relax the input xyz coordinates with rdit MMFF Args: xyz: 3D coordinates align: whether or not align the xyz Returns: xyz: new xyz eng: energy value """ from rdkit.Chem import AllChem from rdkit.Geometry import Point3D mol = self.rdkit_mol(1) conf0 = mol.GetConformer(0) # reset the xyz for i in range(len(self.mol)): x, y, z = xyz[i] conf0.SetAtomPosition(i, Point3D(x, y, z)) res = AllChem.MMFFOptimizeMoleculeConfs(mol) xyz = self.align(conf0) if align else mol.GetConformer(0).GetPositions() return xyz, res[0][1]
[docs] def get_rmsd2(self, xyz0, xyz1): """ Compute the rmsd with another 3D xyz coordinates Args: xyz: 3D coordinates Returns: rmsd: transition matrix: """ from rdkit.Chem import RemoveHs, rdMolAlign from rdkit.Geometry import Point3D mol = self.rdkit_mol(3) conf0 = mol.GetConformer(0) conf1 = mol.GetConformer(1) for i in range(len(self.mol)): x0, y0, z0 = xyz0[i] x1, y1, z1 = xyz1[i] conf0.SetAtomPosition(i, Point3D(x0, y0, z0)) conf1.SetAtomPosition(i, Point3D(x1, y1, z1)) mol = RemoveHs(mol) rmsd, trans = rdMolAlign.GetAlignmentTransform(mol, mol, 1, 0) return rmsd, trans
[docs] def get_rmsd(self, xyz, debug=False): """ Compute the rmsd with another 3D xyz coordinates Args: xyz: 3D coordinates Returns: rmsd (float): rmsd values transition matrix: 4*4 matrix match: True or False """ from rdkit.Chem import RemoveHs, rdMolAlign from rdkit.Geometry import Point3D mol = self.rdkit_mol(3) # 3 conformers for comparison conf0 = mol.GetConformer(0) conf1 = mol.GetConformer(1) # reference+reflection conf2 = mol.GetConformer(2) # trial xyz angs = self.get_torsion_angles(xyz) xyz0 = self.set_torsion_angles(conf0, angs) # conf0 with aligned xyz1 = self.set_torsion_angles(conf0, angs, True) # conf0 with aligned+reflect # print('xyz0', xyz0) # reset the xyz for i in range(len(self.mol)): x0, y0, z0 = xyz0[i] x1, y1, z1 = xyz1[i] x, y, z = xyz[i] conf0.SetAtomPosition(i, Point3D(x0, y0, z0)) conf1.SetAtomPosition(i, Point3D(x1, y1, z1)) conf2.SetAtomPosition(i, Point3D(x, y, z)) mol = RemoveHs(mol) rmsd1, trans1 = rdMolAlign.GetAlignmentTransform(mol, mol, 2, 0) rmsd2, trans2 = rdMolAlign.GetAlignmentTransform(mol, mol, 2, 1) if debug: from rdkit.Chem import rdmolfiles rdmolfiles.MolToXYZFile(mol, "1.xyz", 0) rdmolfiles.MolToXYZFile(mol, "2.xyz", 1) rdmolfiles.MolToXYZFile(mol, "3.xyz", 2) print(rmsd1, rmsd2) if rmsd1 <= rmsd2: # return rmsd1, trans1, True return rmsd1, trans1, False else: # return rmsd2, trans2, False return rmsd2, trans2, True
[docs] def get_orientation(self, xyz, rtol=0.15): """ For the given xyz, compute the orientation Args: xyz: molecular xyz """ xyz -= self.get_center(xyz) if self.smile is not None and len(self.smile) > 1: # not in ["O", "o"]: rmsd, trans, reflect = self.get_rmsd(xyz) tol = rtol * len(xyz) if rmsd < tol: trans = trans[:3, :3].T r = Rotation.from_matrix(trans) return r.as_euler("zxy", degrees=True), rmsd, reflect else: msg = "Problem in conformer\n" msg += f"{rmsd1:5.2f} {rmsd2:5.2f}\n" if len(self.torsionlist) > 0: msg += str(self.get_torsion_angles(xyz)) + "\n" msg += str(self.get_torsion_angles(xyz0)) + "\n" msg += str(self.get_torsion_angles(xyz1)) + "\n" raise ConformerError(msg) else: # the orientation of CH4, NH3, H2O ref = np.array( [ [-0.00111384, 0.36313718, 0.0], [-0.82498189, -0.18196256, 0.0], [0.82609573, -0.18117463, 0.0], ] ) Inertia = get_inertia_tensor(xyz) _, matrix = np.linalg.eigh(Inertia) np.dot(xyz, matrix) # identify the rotation matrix libs = np.array( [ [[1, 1, 1]], [[-1, 1, 1]], [[1, -1, 1]], [[1, 1, -1]], [[-1, -1, 1]], [[1, -1, -1]], [[-1, 1, -1]], [[-1, -1, -1]], ] ) dists = np.zeros(8) for i, lib in enumerate(libs): matrix0 = matrix * np.repeat(lib, 3, axis=0) res = np.dot(ref, np.linalg.inv(matrix0)) dists[i] = np.sum((res - xyz[:len(ref)]) ** 2) # print(i, res) id = np.argmin(dists) matrix = matrix * np.repeat(libs[id], 3, axis=0) r = Rotation.from_matrix(np.linalg.inv(matrix).T) ang = r.as_euler("zxy", degrees=True) return ang, 0, False
[docs] def to_ase(self): """ Convert to ase atoms """ return self.mol.to_ase_atoms()
[docs] def reset_positions(self, coors): """ reset the coordinates """ from pymatgen.core.sites import Site if len(coors) != len(self.mol._sites): raise ValueError("number of atoms is inconsistent!") else: for i, coor in enumerate(coors): _site = self.mol._sites[i] new_site = Site(_site.species, coor, properties=_site.properties) self.mol._sites[i] = new_site
[docs] def apply_inversion(self): """ reset the coordinates """ xyz = self.mol.cart_coords center = self.get_center(xyz) xyz -= center xyz *= -1 self.reset_positions(xyz)
[docs] def get_symmetry(self, xyz=None, symmetrize=False, rtol=0.30): """ Set the molecule's point symmetry. - pga: pymatgen.symmetry.analyzer.PointGroupAnalyzer object - pg: pyxtal.symmetry.Group object - symops: a list of SymmOp objects Args: symmetrize: boolean, whether or not symmetrize the coordinates """ mol = deepcopy(self.mol) if xyz is None else Molecule(self.symbols, xyz) if self.smile is not None: mol.remove_species("H") mol._spin_multiplicity = None # don't check spin if symmetrize: pga = PointGroupAnalyzer(mol, rtol, eigen_tolerance=1e-3) mol = pga.symmetrize_molecule()["sym_mol"] pga = PointGroupAnalyzer(mol, rtol, eigen_tolerance=1e-3) self.mol_no_h = mol # print(mol.to(fmt='xyz'), pga.sch_symbol) # For single atoms, no point group using a list of operations if len(mol) == 1: symm_m = [] symbol = "C1" else: symbol = pga.sch_symbol pg = pga.get_pointgroup() symm_m = list(pg) if "*" in symbol: # linear molecules symbol = symbol.replace("*", "6") # Add 12-fold and reflections in place of ininitesimal rotation for i, axis in enumerate(np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])): # op = SymmOp.from_rotation_and_translation(aa2matrix(axis, np.pi/6), [0,0,0]) m1 = Rotation.from_rotvec(np.pi / 6 * axis).as_matrix() op = SymmOp.from_rotation_and_translation(m1, [0, 0, 0]) if pga.is_valid_op(op): symm_m.append(op) # Any molecule with infinitesimal symmetry is linear; # Thus, it possess mirror symmetry for any axis perpendicular # To the rotational axis. pymatgen does not add this symmetry # for all linear molecules - for example, hydrogen if i == 0: symm_m.append(SymmOp.from_xyz_str("x,-y,z")) symm_m.append(SymmOp.from_xyz_str("x,y,-z")) # r = SymmOp.from_xyz_str("-x,y,-z") elif i == 1: symm_m.append(SymmOp.from_xyz_str("-x,y,z")) symm_m.append(SymmOp.from_xyz_str("x,y,-z")) # r = SymmOp.from_xyz_str("-x,-y,z") elif i == 2: symm_m.append(SymmOp.from_xyz_str("-x,y,z")) symm_m.append(SymmOp.from_xyz_str("x,-y,z")) # r = SymmOp.from_xyz_str("x,-y,-z") # Generate a full list of SymmOps for the pointgroup symm_m = generate_full_symmops(symm_m, 1e-3) break self.symops = symm_m self.pga = pga if symbol == "S2": symbol = "Ci" self.pg = Group(symbol, dim=0)
[docs] def get_orientations_in_wps(self, wps=None, rtol=1e-2): """ Compute the valid orientations from a given Wyckoff site symmetry. Args: wp: a pyxtal.symmetry.Wyckoff_position object Returns: a list of operations.Orientation objects """ if wps is None: return None, True else: valid = False valid_oris = [] for wp in wps: allowed = self.get_orientations_in_wp(wp, rtol) if len(allowed) > 0: valid = True valid_oris.append(allowed) return valid_oris, valid
[docs] def get_orientations_in_wp(self, wp, rtol=1e-2): """ Compute the valid orientations from a given Wyckoff site symmetry. Args: wp: a pyxtal.symmetry.Wyckoff_position object Returns: a list of pyxtal.molecule.Orientation objects """ # For single atoms, there are no constraints if len(self.mol) == 1 or wp.index == 0: return [Orientation([[1, 0, 0], [0, 1, 0], [0, 0, 1]], degrees=2, random_state=self.random_state)] # C1 molecule cannot take specical position elif wp.index > 1 and self.pga.sch_symbol == "C1": return [] symm_w = wp.get_site_symm_wo_translation() # symmetry without translation # molecule has fewer symops if len(self.pg[0]) < len(symm_w): return [] symm_m = self.symops opa_m = [] for op_m in symm_m: opa = OperationAnalyzer(op_m) opa_m.append(opa) # Store OperationAnalyzer objects for each Wyckoff symmetry SymmOp opa_w = [] for op_w in symm_w: opa_w.append(OperationAnalyzer(op_w)) """ Check for constraints from the Wyckoff symmetry. If we find ANY two constraints (SymmOps with unique axes), the molecule's point group MUST contain SymmOps which can be aligned to these particular constraints. However, there may be multiple compatible orientations of the molecule consistent with these constraints """ constraint1 = None constraint2 = None for i, op_w in enumerate(symm_w): if opa_w[i].axis is not None: constraint1 = opa_w[i] for j, op_w in enumerate(symm_w): if opa_w[j].axis is not None: dot = np.dot(opa_w[i].axis, opa_w[j].axis) if (not np.isclose(dot, 1, rtol=rtol)) and (not np.isclose(dot, -1, rtol=rtol)): constraint2 = opa_w[j] break break # Indirectly store the angle between the constraint axes if constraint1 is not None and constraint2 is not None: dot_w = np.dot(constraint1.axis, constraint2.axis) # Generate 1st consistent molecular constraints constraints_m = [] if constraint1 is not None: for i, opa1 in enumerate(opa_m): if opa1.is_conjugate(constraint1): constraints_m.append([opa1, []]) # Generate 2nd constraint in opposite direction extra = deepcopy(opa1) extra.axis = [ opa1.axis[0] * -1, opa1.axis[1] * -1, opa1.axis[2] * -1, ] constraints_m.append([extra, []]) # Remove redundancy for the first constraints list_i = list(range(len(constraints_m))) list_j = list(range(len(constraints_m))) copy = deepcopy(constraints_m) for i, c1 in enumerate(copy): if i in list_i: for j, c2 in enumerate(copy): if i > j and j in list_j and j in list_i: # Check if axes are colinear if np.isclose(np.dot(c1[0].axis, c2[0].axis), 1, rtol=rtol): list_i.remove(j) list_j.remove(j) # Check if axes are symmetrically equivalent else: cond1 = False # cond2 = False for opa in opa_m: if opa.type == "rotation": op = opa.op if np.isclose( np.dot(op.operate(c1[0].axis), c2[0].axis), 1, rtol=5 * rtol, ): cond1 = True break if cond1: # or cond2 is True: list_i.remove(j) list_j.remove(j) c_m = deepcopy(constraints_m) constraints_m = [] for i in list_i: constraints_m.append(c_m[i]) # Generate 2nd consistent molecular constraints valid = list(range(len(constraints_m))) if constraint2 is not None: for i, c in enumerate(constraints_m): opa1 = c[0] for j, opa2 in enumerate(opa_m): if opa2.is_conjugate(constraint2): dot_m = np.dot(opa1.axis, opa2.axis) # Ensure that the angles are equal if abs(dot_m - dot_w) < 0.02 or abs(dot_m + dot_w) < 0.02: constraints_m[i][1].append(opa2) # Generate 2nd constraint in opposite direction extra = deepcopy(opa2) extra.axis = [ opa2.axis[0] * -1, opa2.axis[1] * -1, opa2.axis[2] * -1, ] constraints_m[i][1].append(extra) # If no consistent constraints are found, remove first constraint if constraints_m[i][1] == []: valid.remove(i) copy = deepcopy(constraints_m) constraints_m = [] for i in valid: constraints_m.append(copy[i]) # Generate orientations consistent with the possible constraints orientations = [] # Loop over molecular constraint sets for c1 in constraints_m: v1 = c1[0].axis v2 = constraint1.axis T = rotate_vector(v1, v2) # If there is only one constraint if c1[1] == []: o = Orientation(T, degrees=1, axis=constraint1.axis, random_state=self.random_state) orientations.append(o) else: # Loop over second molecular constraints for opa in c1[1]: phi = angle(constraint1.axis, constraint2.axis) phi2 = angle(constraint1.axis, np.dot(T, opa.axis)) if np.isclose(phi, phi2, rtol=rtol): r = np.sin(phi) c = np.linalg.norm(np.dot(T, opa.axis) - constraint2.axis) theta = np.arccos(1 - (c**2) / (2 * (r**2))) # R = aa2matrix(constraint1.axis, theta) R = Rotation.from_rotvec(theta * constraint1.axis).as_matrix() T2 = np.dot(R, T) a = angle(np.dot(T2, opa.axis), constraint2.axis) if not np.isclose(a, 0, rtol=rtol): T2 = np.dot(np.linalg.inv(R), T) o = Orientation(T2, degrees=0, random_state=self.random_state) orientations.append(o) # Ensure the identity orientation is checked if no constraints are found if constraints_m == []: o = Orientation(np.identity(3), degrees=2, random_state=self.random_state) orientations.append(o) # Remove redundancy from orientations list_i = list(range(len(orientations))) list_j = list(range(len(orientations))) for i, o1 in enumerate(orientations): if i in list_i: for j, o2 in enumerate(orientations): if i > j and j in list_j and j in list_i: # m1 = o1.get_matrix(angle=0) # m2 = o2.get_matrix(angle=0) m1 = o1.matrix m2 = o2.matrix new_op = SymmOp.from_rotation_and_translation(np.dot(m2, np.linalg.inv(m1)), [0, 0, 0]) P = SymmOp.from_rotation_and_translation(np.linalg.inv(m1), [0, 0, 0]) old_op = P * new_op * P.inverse if self.pga.is_valid_op(old_op): list_i.remove(j) list_j.remove(j) orientations_new = [] for i in list_i: orientations_new.append(orientations[i]) # Check each of the found orientations for consistency with Wyckoff site. # If consistent, put into an array of valid orientations # print("======", orientations_new) allowed = [] for o in orientations_new: op = o.get_op() mo = deepcopy(self.mol_no_h) mo.apply_operation(op) # print(mo) if is_compatible_symmetry(mo, wp): allowed.append(o) return allowed
[docs] def get_energy(self, xyz1, xyz2): """ Get packing energy between two neighboring molecules """ cdist(xyz1 - xyz2)
[docs] class Box: """ Class for storing the binding box for a molecule. Args: dims: [length, width, height] """ def __init__(self, dims): self.length = dims[0] # float(abs(maxy - miny)) self.width = dims[1] # float(abs(maxx - minx)) self.height = dims[2] # float(abs(maxz - minz)) self.volume = self.width * self.length * self.height def __str__(self): return f"l: {self.length:6.2f}, w: {self.width:6.2f}, d: {self.height:6.2f}"
[docs] def operate(self, rot=np.eye(3), center=np.zeros(3)): """ Perform operation on the box: Args: rot: 3*3 rotation matrix center: center position """ raise NotImplementedError
[docs] class Orientation: """ Stores orientations for molecules based on vector constraints. Can be stored to regenerate orientations consistent with a given constraint vector, without re-calling orientation_in_wyckoff_position. Allows for generating orientations which differ only in their rotation about a given axis. Args: matrix: a 3x3 rotation matrix describing the orientation (and/or inversion) to store degrees: the number of degrees of freedom... 0 - The orientation refers to a single rotation matrix 1 - The orientation can be rotated about a single axis 2 - The orientation can be any pure rotation matrix axis: an optional axis about which the orientation can rotate. Only used if degrees is equal to 1 """ def __init__(self, matrix=None, degrees=2, axis=None, random_state=None): self.matrix = np.array(matrix) self.degrees = degrees if isinstance(random_state, Generator): self.random_state = random_state.spawn(1)[0] else: self.random_state = np.random.default_rng(random_state) if degrees == 1: if axis is None: raise ValueError("axis is required for orientation") axis /= np.linalg.norm(axis) self.axis = axis self.r = Rotation.from_matrix(self.matrix) self.angle = None def __str__(self): s = "-------PyXtal.molecule.Orientation class----\n" s += f"degree of freedom: {self.degrees:d}\n" s += "Rotation matrix:\n" s += "{:6.3f} {:6.3f} {:6.3f}\n".format(*self.matrix[:, 0]) s += "{:6.3f} {:6.3f} {:6.3f}\n".format(*self.matrix[:, 1]) s += "{:6.3f} {:6.3f} {:6.3f}\n".format(*self.matrix[:, 2]) if self.axis is not None: s += "Rotation axis\n" s += "{:6.2f} {:6.2f} {:6.3f}\n".format(*self.axis) return s
[docs] def reset_matrix(self, matrix): self.matrix = matrix self.r = Rotation.from_matrix(matrix)
def __repr__(self): return str(self)
[docs] def copy(self): return deepcopy(self)
[docs] def save_dict(self): return {"matrix": self.matrix, "degrees": self.degrees, "axis": self.axis}
[docs] @classmethod def load_dict(cls, dicts): matrix = dicts["matrix"] degrees = dicts["degrees"] axis = dicts["axis"] return cls(matrix, degrees, axis)
[docs] def change_orientation(self, angle="random", flip=False, update=True): """ Change the orientation of molecule by applying a rotation. It allows for specification of an angle (or a random angle) to rotate about the constraint axis. If the system has 2 degrees of rotational freedom, the molecule can also be flipped with a probability Args: angle (float or str, optional): The angle to rotate about the constraint axis. If "random", a random rotation angle is selected flip (bool, optional): Whether to apply an random flip. This is only applied if the system has 2 degrees of rotational freedom. """ if self.degrees >= 1: # Choose the axis if self.axis is None: self.set_axis() # Parse the angle if angle == "random": angle = (self.random_state.random() - 1) * np.pi * 2 #self.angle = angle # Update the matrix r1 = Rotation.from_rotvec(angle * self.axis) # Optionally flip the molecule if self.degrees == 2 and flip and self.random_state.random() > 0.5: ax = self.random_state.choice(["x", "y", "z"]) angle0 = self.random_state.choice([90, 180, 270]) r2 = Rotation.from_euler(ax, angle0, degrees=True) r1 = r2 * r1 r = r1 * self.r matrix = r.as_matrix() if update: self.r = r self.matrix = matrix return matrix else: return self.matrix
[docs] def set_axis(self): if self.degrees == 2: axis = self.random_state.random(3) - 0.5 self.axis = axis / np.linalg.norm(axis) self.angle = 0
[docs] def rotate_by_matrix(self, matrix, ignore_constraint=True): """ rotate Args: matrix: 3*3 rotation matrix """ if not ignore_constraint: if self.degrees == 0: raise ValueError("cannot rotate") if self.degrees == 1: axis = self.axis vec = Rotation.from_matrix(matrix).as_rotvec() if angle(vec, self.axis) > 1e-2 and angle(vec, -self.axis) > 1e-2: raise ValueError("must rotate along the given axis") else: axis = None matrix = matrix.dot(self.matrix) return Orientation(matrix, self.degrees, axis, self.random_state)
[docs] def get_matrix(self, angle="random"): """ Generate a 3x3 rotation matrix consistent with the orientation's constraints. Allows for specification of an angle (possibly random) to rotate about the constraint axis. Args: angle: an angle to rotate about the constraint axis. If "random", chooses a random rotation angle. If self.degrees==2, chooses a random 3d rotation matrix to multiply by. If the original matrix is wanted, set angle=0, or call self.matrix Returns: a 3x3 rotation (and/or inversion) matrix (numpy array) """ if self.degrees == 2: if angle == "random": axis = self.random_state.sample(3) axis = axis / np.linalg.norm(axis) angle = self.random_state.random() * np.pi * 2 else: axis = self.axis return Rotation.from_rotvec(angle * axis).as_matrix() elif self.degrees == 1: angle = self.random_state.random() * np.pi * 2 if angle == "random" else self.angle return Rotation.from_rotvec(angle * self.axis).as_matrix() elif self.degrees == 0: return self.matrix return None
[docs] def get_op(self): # , angle=None): """ Generate a SymmOp object consistent with the orientation's constraints. Allows for specification of an angle (possibly random) to rotate about the constraint axis. Args: angle: an angle to rotate about the constraint axis. If "random", chooses a random rotation angle. If self.degrees==2, chooses a random 3d rotation matrix to multiply by. If the original matrix is wanted, set angle=0, or call self.matrix Returns: pymatgen.core.structure. SymmOp object """ # if angle is not None: # self.change_orientation(angle) return SymmOp.from_rotation_and_translation(self.matrix, [0, 0, 0])
[docs] def random_orientation(self): """ Applies random rotation (if possible) and returns a new orientation with the new base matrix. Returns: a new orientation object with a different base rotation matrix """ self.change_orientation() return self
[docs] def get_Euler_angles(self): """ get the Euler angles """ return self.r.as_euler("zxy", degrees=True)
[docs] def get_inertia_tensor(coords, weights=None): """ Calculate the symmetric inertia tensor for a molecule. Args: coords: [N, 3] array of coordinates Returns: a 3x3 numpy array representing the inertia tensor """ if weights is None: weights = np.ones(len(coords)) coords -= np.mean(coords, axis=0) Inertia = np.zeros([3, 3]) Inertia[0, 0] = np.sum(weights * coords[:, 1] ** 2 + weights * coords[:, 2] ** 2) Inertia[1, 1] = np.sum(weights * coords[:, 0] ** 2 + weights * coords[:, 2] ** 2) Inertia[2, 2] = np.sum(weights * coords[:, 0] ** 2 + weights * coords[:, 1] ** 2) Inertia[0, 1] = Inertia[1, 0] = -np.sum(weights * coords[:, 0] * coords[:, 1]) Inertia[0, 2] = Inertia[2, 0] = -np.sum(weights * coords[:, 0] * coords[:, 2]) Inertia[1, 2] = Inertia[2, 1] = -np.sum(weights * coords[:, 1] * coords[:, 2]) return Inertia
[docs] def reoriented_molecule(mol): # , nested=False): """ Allign a molecule so that its principal axes is the identity matrix. Args: mol: a Molecule object Returns: new_mol: a reoriented copy of the original molecule. P: the 3x3 rotation matrix used to obtain it. """ coords = mol.cart_coords numbers = mol.atomic_numbers coords -= np.mean(coords, axis=0) A = get_inertia_tensor(coords) # Store the eigenvectors of the inertia tensor P = np.linalg.eigh(A)[1] if np.linalg.det(P) < 0: P[:, 0] *= -1 coords = np.dot(coords, P) return Molecule(numbers, coords), P
[docs] def is_compatible_symmetry(mol, wp): """ Tests if a molecule meets the symmetry requirements of a Wyckoff position Args: mol: a pymatgen Molecule object. wp: a pyxtal.symmetry.Wyckoff_position object """ # For single atoms, there are no constraints if len(mol) == 1 or wp.index == 0: return True pga = PointGroupAnalyzer(mol) return all(pga.is_valid_op(op) for op in wp.get_site_symm_wo_translation())
[docs] def make_graph(mol, tol=0.2): """ make graph object for the input molecule """ # print("making graphs") G = nx.Graph() names = {} for i, site in enumerate(mol._sites): names[i] = site.specie.value if names[i] not in ["C", "H", "O", "N", "S", "P", "Si", "F", "Cl", "Br", "I"]: raise ValueError(f"{names[i]} is not supported") for i in range(len(mol) - 1): site1 = mol.sites[i] for j in range(i + 1, len(mol)): site2 = mol.sites[j] key = f"{names[i]:s}-{names[j]:s}" if site1.distance(site2) < bonds[key]: G.add_edge(i, j) # print(key, site1.distance(site2)) nx.set_node_attributes(G, names, "name") return G
[docs] def compare_mol_connectivity(mol1, mol2, ignore_name=False): """ Compare two molecules by connectivity """ G1 = make_graph(mol1) G2 = make_graph(mol2) if ignore_name: GM = nx.isomorphism.GraphMatcher(G1, G2) else: fun = lambda n1, n2: n1["name"] == n2["name"] GM = nx.isomorphism.GraphMatcher(G1, G2, node_match=fun) return GM.is_isomorphic(), GM.mapping
if __name__ == "__main__": smiles = "CCN1C(=O)c2ccc3C(=O)N(CC)C(=O)c4ccc(C1=O)c2c34" ans1 = [(0, 1, 2, 20), (13, 12, 11, 14)] print(ans1) ans2 = find_rotor_from_smile(smiles) print(ans2) assert ans1 == ans2 smiles = "Nc1c(Cl)cc(cc1N(=O)=O)N(=O)=O" ans2 = find_rotor_from_smile(smiles) print(ans2) ans1 = [(6, 5, 11, 13), (6, 7, 8, 10)] print(ans1) assert ans1 == ans2 smiles = "COc1cc(C=O)ccc1O" ans2 = find_rotor_from_smile(smiles) print(ans2) ans1 = [(0, 1, 2, 9), (6, 5, 4, 7)] print(ans1) assert ans1 == ans2