Source code for pyxtal.io

"""
This module handles reading and write crystal files.
"""
import numpy as np
from pymatgen.core.structure import Structure, Molecule
from pymatgen.core.bonds import CovalentBond
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pyxtal.wyckoff_site import atom_site, mol_site
from pyxtal.molecule import pyxtal_molecule, Orientation, compare_mol_connectivity
from pyxtal.symmetry import Wyckoff_position, Group
from pyxtal.lattice import Lattice
from pyxtal.util import get_symmetrized_pmg
from pyxtal.constants import deg, logo
from pyxtal.msg import ReadSeedError
from pkg_resources import resource_filename
from monty.serialization import loadfn

bonds = loadfn(resource_filename("pyxtal", "database/bonds.json"))

[docs] def in_merged_coords(wp, pt, pts, cell): """ Whether or not the pt in within the pts """ (c, s) = pt for pt0 in pts: (c0, s0) = pt0 if s == s0 and wp.are_equivalent_pts(c, c0, cell): #print(c, c0, 'equivalent') return True return False
[docs] def write_cif(struc, filename=None, header="", permission='w', sym_num=None, style='mp'): """ Export the structure in cif format The default setting for _atom_site follows the materials project cif Args: struc: pyxtal structure object filename: path of the structure file header: additional information permission: write(`w`) or append(`a+`) to the given file sym_num: the number of symmetry operations, None means writing all symops style: `icsd` or `mp` (used in pymatgen) """ if struc.molecular: sites = struc.mol_sites molecule = True #special = struc.has_special_site() #print('===============================================', struc) else: sites = struc.atom_sites molecule = False if sym_num is None: l_type = struc.group.lattice_type number = struc.group.number G1 = struc.group[0] if G1.is_standard_setting(): symbol = struc.group.symbol else: symbol = sites[0].wp.get_hm_symbol() else: #P1 symmetry l_type = 'triclinic' symbol = 'P1' number = 1 G1 = Group(1).Wyckoff_positions[0] lines = logo lines += 'data_' + header + '\n' if hasattr(struc, "energy"): if struc.molecular: eng = struc.energy/sum(struc.numMols) else: eng = struc.energy/sum(struc.numIons) lines += '#Energy: {:} eV/cell\n'.format(eng) lines += "\n_symmetry_space_group_name_H-M '{:s}'\n".format(symbol) lines += '_symmetry_Int_Tables_number {:>15d}\n'.format(number) lines += '_symmetry_cell_setting {:>15s}\n'.format(l_type) a, b, c, alpha, beta, gamma = struc.lattice.get_para(degree=True) lines += '_cell_length_a {:12.6f}\n'.format(a) lines += '_cell_length_b {:12.6f}\n'.format(b) lines += '_cell_length_c {:12.6f}\n'.format(c) lines += '_cell_angle_alpha {:12.6f}\n'.format(alpha) lines += '_cell_angle_beta {:12.6f}\n'.format(beta) lines += '_cell_angle_gamma {:12.6f}\n'.format(gamma) lines += '_cell_volume {:12.6f}\n'.format(struc.lattice.volume) #if struc.molecular: # lines += '_cell_formula_units_Z {:d}\n'.format(sum(struc.numMols)) #else: # lines += '_cell_formula_units_Z {:d}\n'.format(sum(struc.numIons)) lines += '\nloop_\n' lines += ' _symmetry_equiv_pos_site_id\n' lines += ' _symmetry_equiv_pos_as_xyz\n' for i, op in enumerate(G1): lines += "{:d} '{:s}'\n".format(i+1, op.as_xyz_str()) lines += '\nloop_\n' lines += ' _atom_site_label\n' lines += ' _atom_site_type_symbol\n' lines += ' _atom_site_symmetry_multiplicity\n' if style == 'icsd': lines += ' _atom_site_Wyckoff_symbol\n' lines += ' _atom_site_fract_x\n' lines += ' _atom_site_fract_y\n' lines += ' _atom_site_fract_z\n' lines += ' _atom_site_occupancy\n' for site in sites: mul = site.wp.multiplicity letter = site.wp.letter if molecule: if sym_num is None: coord0s, specie0s = site._get_coords_and_species(first=True) if site.wp.index > 0: #print("#Check if the mul is consistent!", site.wp.index) muls = [] coords = [] species = [] merges = [] for coord, specie in zip(coord0s, specie0s): _, wp, _ = G1.merge(coord, struc.lattice.matrix, 0.05) if len(wp) > mul: if not in_merged_coords(G1, [coord, specie], merges, struc.lattice.matrix): #print("General Position", specie, coord) coords.append(coord) species.append(specie) muls.append(len(wp)) merges.append((coord, specie)) else: #print("Special Position", specie, coord) coords.append(coord) species.append(specie) muls.append(mul) else: coords, species = coord0s, specie0s muls = [mul] * len(coords) else: coords = None species = [] for id in range(sym_num): mol = site.get_mol_object(id) tmp = mol.cart_coords.dot(site.lattice.inv_matrix) if coords is None: coords = tmp else: coords = np.append(coords, tmp, axis=0) species.extend([s.value for s in mol.species]) muls = [mul] * len(coords) #coords, species = site._get_coords_and_species(ids=sym_num) else: coords, species, muls = [site.position], [site.specie], [mul] for specie, coord, mul in zip(species, coords, muls): lines += '{:6s} {:6s} {:3d} '.format(specie, specie, mul) if style != 'mp': lines += '{:s} '.format(letter) lines += '{:12.6f}{:12.6f}{:12.6f} 1\n'.format(*coord) lines +='#END\n\n' if filename is None: return lines else: with open(filename, permission) as f: f.write(lines) return
[docs] def read_cif(filename): """ read the cif, mainly for pyxtal cif output Be cautious in using it to read other cif files Args: filename: path of the structure file Return: pyxtal structure """ species = [] coords = [] with open(filename, 'r') as f: lines = f.readlines() for i, line in enumerate(lines): if line.startswith('_symmetry_Int_Tables_number'): sg = int(line.split()[-1]) elif line.startswith('_cell_length_a'): a = float(lines[i].split()[-1]) b = float(lines[i+1].split()[-1]) c = float(lines[i+2].split()[-1]) alpha = float(lines[i+3].split()[-1]) beta = float(lines[i+4].split()[-1]) gamma = float(lines[i+5].split()[-1]) elif line.startswith('_symmetry_cell_setting'): lat_type = line.split()[-1] elif line.startswith('_symmetry_space_group_name_H-M '): symbol = line.split()[-1] if eval(symbol) in ["Pn", "P21/n", "C2/n"]: diag = True else: diag = False elif line.find('_atom_site') >= 0: s = i while True: s += 1 if lines[s].find('_atom_site') >= 0: pass elif len(lines[s].split()) <= 3: break else: tmp = lines[s].split() pos = [float(tmp[-4]), float(tmp[-3]), float(tmp[-2])] species.append(tmp[0]) coords.append(pos) break wp0 = Group(sg)[0] lattice = Lattice.from_para(a, b, c, alpha, beta, gamma, lat_type) sites = [] for specie, coord in zip(species, coords): pt, wp, _ = wp0.merge(coord, lattice.matrix, tol=0.1) sites.append(atom_site(wp, pt, specie, diag)) return lattice, sites
[docs] class structure_from_ext(): def __init__(self, struc, ref_mols, tol=0.2, ignore_HH=False, add_H=False, hn=None): """ extract the mol_site information from the give cif file and reference molecule Args: struc: cif/poscar file or a Pymatgen Structure object ref_mols: a list of reference molecule (xyz file or Pyxtal molecule) tol: scale factor for covalent bond distance ignore_HH: whether or not ignore short H-H in checking molecule add_H: whether or not add the H atoms """ for ref_mol in ref_mols: if isinstance(ref_mol, str): ref_mol = pyxtal_molecule(ref_mol, fix=True) elif isinstance(ref_mol, pyxtal_molecule): ref_mol = ref_mol else: print(type(ref_mol)) raise NameError("reference molecule cannot be defined") if isinstance(struc, str): pmg_struc = Structure.from_file(struc) elif isinstance(struc, Structure): pmg_struc = struc else: print(type(struc)) raise NameError("input structure cannot be intepretted") # reset the hydrogen position if add_H: pmg_struc.remove_species('H') self.ref_mols = ref_mols self.tol = tol self.add_H = add_H sym_struc, number = get_symmetrized_pmg(pmg_struc, hn=hn) if hn is None: group = Group(number) else: group = Group(hn, use_hall=True) self.group = group self.wyc = group[0] molecules = search_molecules_in_crystal(sym_struc, self.tol, ignore_HH=ignore_HH) self.pmg_struc = sym_struc matrix = sym_struc.lattice.matrix ltype = group.lattice_type self.lattice = Lattice.from_matrix(matrix, ltype=ltype) self.resort(molecules) if len(self.ids) == 0: raise RuntimeError('Cannot extract molecules')
[docs] def resort(self, molecules): from pyxtal.operations import apply_ops, find_ids # filter out the molecular generators lat = self.pmg_struc.lattice.matrix inv_lat = self.pmg_struc.lattice.inv_matrix new_lat = self.lattice.matrix positions = np.zeros([len(molecules),3]) for i in range(len(molecules)): positions[i] = np.dot(molecules[i].cart_coords.mean(axis=0), inv_lat) wps = [] ids = [] #id for the generator visited_ids = [] for id, pos in enumerate(positions): if id not in visited_ids: centers = apply_ops(pos, self.wyc) tmp_ids = find_ids(centers, positions) visited_ids.extend(tmp_ids) #print(id, pos, tmp_ids, len(self.wyc), len(molecules[id])) if len(tmp_ids) == len(self.wyc): #general position #if len(molecules[id])==1: print("groups", tmp_ids, '\n', centers) wps.append(self.wyc) ids.append(id) else: #special sites for id0 in tmp_ids: p0 = positions[id0] p1, wp, _ = self.wyc.merge(p0, new_lat, 0.1) diff = p1 - p0 diff -= np.round(diff) if np.abs(diff).sum() < 1e-2: #sort position by mapping wps.append(wp) ids.append(id0) #find the right ids #print("add special", wp.index, id0) break #print("===============================================================", self.wps) # add position and molecule, print("ids", ids, mults) N_sites = len(ids) self.numMols = [0] * len(self.ref_mols) self.positions = [] self.wps = [] self.p_mols = [] self.ids = [] ids_done = [] #search for the matched molecules for j, mol2_ref in enumerate(self.ref_mols): mol2 = mol2_ref.copy() if self.add_H: mol2.mol.remove_species("H") for i, id in enumerate(ids): mol1 = molecules[id] #print("++++++++++++++++++++++++++", id, ids, len(mol2.mol), len(mol1)) #print(mol2.mol.to('xyz')) if id not in ids_done and len(mol2.mol) == len(mol1): p_mol = mol2_ref.copy() # create p_mol match, mapping = compare_mol_connectivity(mol2.mol, mol1) if match: if len(mol1) > 1: # rearrange the order order = [mapping[at] for at in range(len(mol1))] xyz = mol1.cart_coords[order] # add hydrogen positions here if self.add_H: #print(mol2.smile) xyz = self.add_Hydrogens(mol2.smile, xyz) #print(xyz) frac = np.dot(xyz, inv_lat) xyz = np.dot(frac, new_lat) center = p_mol.get_center(xyz) p_mol.reset_positions(xyz-center) position = np.dot(center, np.linalg.inv(new_lat)) else: xyz = mol1.cart_coords[0] position = np.dot(xyz, inv_lat) position -= np.floor(position) self.positions.append(position) self.p_mols.append(p_mol) self.ids.append(j) ids_done.append(id) self.wps.append(wps[i]) self.numMols[j] += len(wps[i]) #print("================================================ADDDDDD", id, len(mol1)) # check if some molecules cannot be matched if len(ids_done) < len(ids): #print("==========================================================Nonmatched molecules", ids_done, ids) for id in ids: if id not in ids_done: msg = "This molecule cannot be matched to the reference\n" msg += 'Molecules extracted from the structure\n' msg += molecules[id].to(fmt='xyz') + '\n' msg += "Reference molecule from smiles or xyz\n" msg += mol2.mol.to(fmt='xyz') raise ReadSeedError(msg)
[docs] def add_Hydrogens(self, smile, xyz): """ add hydrogen for pymtagen molecule """ from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Geometry import Point3D #print("SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSS", smile) m1 = Chem.MolFromSmiles(smile) m2 = Chem.AddHs(m1) if len(smile) > 100: AllChem.EmbedMolecule(m2, randomSeed=3) else: AllChem.EmbedMolecule(m2, randomSeed=0xf00d) m2 = Chem.RemoveHs(m2) conf = m2.GetConformer(0) for i in range(conf.GetNumAtoms()): x, y, z = xyz[i] conf.SetAtomPosition(i, Point3D(x,y,z)) #Chem.MolToPDBFile(m2, 'test.pdb') #conf = m2.GetConformer(0); print(conf.GetPositions()) m1 = Chem.AddHs(m1) if len(smile) > 100: AllChem.EmbedMolecule(m1, randomSeed=3) else: AllChem.EmbedMolecule(m1, randomSeed=0xf00d) #print(m1.GetNumAtoms(), m2.GetNumAtoms()) AllChem.UFFOptimizeMolecule(m1) try: m3 = AllChem.ConstrainedEmbed(m1, m2) except ValueError as e: raise ReadSeedError(str(e)) conf = m3.GetConformer(0) #; print(conf.GetPositions()) return conf.GetPositions()
[docs] def make_mol_sites(self): """ generate the molecular wyckoff sites """ ori = Orientation(np.eye(3)) sites = [] for id, mol, pos, wp in zip(self.ids, self.p_mols, self.positions, self.wps): #print(id, mol.smile, wp.multiplicity) site = mol_site(mol, pos, ori, wp, self.lattice) site.type = id #print(pos) #print(self.lattice.matrix) #print([a.value for a in site.molecule.mol.species]) #print(site.molecule.mol.cart_coords) #print(site._get_coords_and_species(absolute=True)[0][:10]) sites.append(site) return sites
[docs] def align(self): """ compute the orientation wrt the reference molecule """ try: from openbabel import pybel, openbabel except: import pybel, openbabel m1 = pybel.readstring('xyz', self.ref_mol.to('xyz')) m2 = pybel.readstring('xyz', self.molecule.to('xyz')) aligner = openbabel.OBAlign(True, False) aligner.SetRefMol(m1.OBMol) aligner.SetTargetMol(m2.OBMol) aligner.Align() print("RMSD: ", aligner.GetRMSD()) rot=np.zeros([3,3]) for i in range(3): for j in range(3): rot[i,j] = aligner.GetRotMatrix().Get(i,j) coord2 = self.molecule.cart_coords coord2 -= np.mean(coord2, axis=0) coord3 = rot.dot(coord2.T).T + np.mean(self.ref_mol.cart_coords, axis=0) self.mol_aligned = Molecule(self.ref_mol.atomic_numbers, coord3) self.ori = Orientation(rot)
[docs] def show(self, overlay=True): from pyxtal.viz import display_molecules if overlay: return display_molecules([self.ref_mol, self.mol_aligned]) else: return display_molecules([self.ref_mol, self.molecule])
[docs] def search_molecules_in_crystal(struc, tol=0.2, once=False, ignore_HH=True): """ Function to perform to find the molecule in a Pymatgen structure Args: struc: Pymatgen Structure tol: tolerance value to check the connectivity once: search only one molecule or all molecules ignore_HH: whether or not ignore the short H-H in checking molecule Returns: molecules: list of pymatgen molecules positions: list of center positions """ def check_one_layer(struc, sites0, visited): new_members = [] for site0 in sites0: sites_add, visited = check_one_site(struc, site0, visited) new_members.extend(sites_add) return new_members, visited def check_one_site(struc, site0, visited, rmax=2.8): neigh_sites = struc.get_neighbors(site0, rmax) ids = [m.index for m in visited] sites_add = [] ids_add = [] pbc = isinstance(struc, Structure) for site1 in neigh_sites: if (site1.index not in ids+ids_add): try: if CovalentBond.is_bonded(site0, site1, tol): if pbc: (d, image) = site0.distance_and_image(site1) else: d = site0.distance(site1) val1, val2 = site1.specie.value, site0.specie.value key = "{:s}-{:s}".format(val1, val2) #sometime the H-H short distance is not avoidable if key == 'H-H': if not ignore_HH: if pbc: site1.frac_coords += image sites_add.append(site1) ids_add.append(site1.index) else: if d < bonds[key]: if pbc: site1.frac_coords += image sites_add.append(site1) ids_add.append(site1.index) except ValueError: #QZ: use our own bond distance lib if pbc: (d, image) = site0.distance_and_image(site1) else: d = site0.distance(site1) val1, val2 = site1.specie.value, site0.specie.value key = "{:s}-{:s}".format(val1, val2) if d < bonds[key]: if pbc: site1.frac_coords += image sites_add.append(site1) ids_add.append(site1.index) if len(sites_add) > 0: visited.extend(sites_add) return sites_add, visited molecules = [] visited_ids = [] for id, site in enumerate(struc.sites): if id not in visited_ids: first_site = site visited = [first_site] first_site.index = id n_iter, max_iter = 0, len(struc)-len(visited_ids) while n_iter < max_iter: if n_iter == 0: new_sites, visited = check_one_site(struc, first_site, visited) else: new_sites, visited = check_one_layer(struc, new_sites, visited) n_iter += 1 if len(new_sites)==0: break coords = [s.coords for s in visited] coords = np.array(coords) numbers = [s.specie.number for s in visited] molecules.append(Molecule(numbers, coords)) visited_ids.extend([s.index for s in visited]) #print(molecules[-1].to(fmt='xyz')); import sys; sys.exit() if once and len(molecules) == 1: break return molecules
if __name__ == "__main__": from pyxtal.database.collection import Collection pmg = Structure.from_file('pyxtal/database/cifs/resorcinol.cif') mols = search_molecules_in_crystal(pmg, tol=0.2, once=False) print(len(mols)) pmg = Collection("molecules")['xxv'] mols = search_molecules_in_crystal(pmg, tol=0.2, once=False) print(len(mols))