Source code for pyxtal.representation

import numpy as np

from pyxtal.lattice import Lattice
from pyxtal.molecule import find_rotor_from_smile
from pyxtal.symmetry import Group
from pyxtal.wyckoff_site import atom_site, mol_site


[docs] class representation_atom: """ A class to handle the 1D representation of atomic crystal Works for Zprime > 1 Args: x: a list of [cell, site_1, site_2, ...] """ def __init__(self, x): self.x = x def __str__(self): return self.to_string()
[docs] @classmethod def from_pyxtal(cls, struc, standard=True): """ Initialize 1D rep. from the pyxtal object Args: struc: pyxtal object """ if standard and not struc.standard_setting: pmg = struc.to_pymatgen() struc.from_seed(pmg, standard=True) symmetry = [struc.atom_sites[0].wp.hall_number] lat = struc.lattice.encode() vector = [symmetry + lat] vector.extend([site.encode() for site in struc.atom_sites]) x = vector return cls(x)
[docs] def to_standard_setting(self): xtal = self.to_pyxtal() self.x = representation.from_pyxtal(xtal, standard=True).x
[docs] def to_pyxtal(self): """ Export the pyxtal structure Args: smiles: list of smiles compoisition: list of composition """ from pyxtal import pyxtal # symmetry v = self.x[0] struc = pyxtal() struc.group, _number = Group(v[0], use_hall=True), v[0] # lattice ltype = struc.group.lattice_type if ltype == "triclinic": a, b, c, alpha, beta, gamma = v[1], v[2], v[3], v[4], v[5], v[6] elif ltype == "monoclinic": a, b, c, alpha, beta, gamma = v[1], v[2], v[3], 90, v[4], 90 elif ltype == "orthorhombic": a, b, c, alpha, beta, gamma = v[1], v[2], v[3], 90, 90, 90 elif ltype == "tetragonal": a, b, c, alpha, beta, gamma = v[1], v[1], v[2], 90, 90, 90 elif ltype == "hexagonal": a, b, c, alpha, beta, gamma = v[1], v[1], v[2], 90, 90, 120 else: a, b, c, alpha, beta, gamma = v[1], v[1], v[1], 90, 90, 90 try: struc.lattice = Lattice.from_para(a, b, c, alpha, beta, gamma, ltype=ltype) except: print(a, b, c, alpha, beta, gamma, ltype) raise ValueError("Problem in Lattice") # sites struc.numIons = [] struc.atom_sites = [] species = [] for _x in self.x[1:]: dicts = {} specie, index, pos = _x[0], _x[1], _x[2:] dicts["specie"] = specie dicts["index"] = index dicts["dim"] = 3 dicts["PBC"] = [1, 1, 1] dicts["hn"] = struc.group.hall_number wp = struc.group[index] dicts["position"] = wp.get_position_from_free_xyzs(pos) site = atom_site.load_dict(dicts) struc.atom_sites.append(site) if specie not in species: species.append(specie) struc.numIons.append(wp.multiplicity) else: for i, _specie in enumerate(species): if _specie == specie: struc.numIons[i] += site.wp.multiplicity struc.species = species struc._get_formula() struc.source = "1D rep." struc.valid = True struc.standard_setting = site.wp.is_standard_setting() return struc
[docs] def to_array(self): """ Export only varibles to a 1d numpy array """ cells, xyzs = self.x[0][1:], self.x[1:] x = cells for xyz in xyzs: x = np.hstack((x, xyz[2:])) return x
[docs] def to_string(self, time=None, eng=None, tag=None): """ Export string representation Args: time: float eng: float tag: string """ x = self.x strs = f"{int(x[0][0]):3d} " # data for cell if x[0][0] <= 348: num = 4 elif x[0][0] <= 488: num = 3 else: # cubic num = 2 for c in x[0][1:num]: strs += f"{c:5.2f} " for c in x[0][num:]: strs += f"{c:5.1f} " # data for atoms strs += f"{len(x) - 1:d} " # Number of sites for i in range(1, len(x)): strs += f"{x[i][0]:s} " strs += f"{x[i][1]:d} " for v in x[i][2:]: strs += f"{v:6.4f} " if time is not None: strs += f"{time:5.2f}" if eng is not None: strs += f"{eng:11.3f}" if tag is not None: strs += f" {tag:s}" return strs
[docs] class representation: """ A class to handle the 1D representation of molecular crystal Works for Zprime > 1 Args: x: a list of [cell, site_1, site_2, ...] smiles: a list of [smile_1, smile_2, ...] """ def __init__(self, x, smiles=None): if smiles is not None: self.smiles = [] for _i, smile in enumerate(smiles): if smile.endswith(".smi"): smile = smile[:-4] self.smiles.append(smile) else: self.smiles = None self.x = x def __str__(self): return self.to_string()
[docs] @classmethod def from_pyxtal(cls, struc, standard=False): """ Initialize 1D rep. from the pyxtal object Args: struc: pyxtal object """ if standard and not struc.standard_setting: # struc.optimize_lattice(standard=True) pmg = struc.to_pymatgen() struc.from_seed(pmg, molecules=struc.molecules, standard=True) symmetry = [struc.mol_sites[0].wp.hall_number] lat = struc.lattice.encode() vector = [symmetry + lat] smiles = [] for site in struc.mol_sites: vector.append(site.encode()) smiles.append(site.molecule.smile) x = vector if smiles[0] is None: smiles = None return cls(x, smiles)
[docs] @classmethod def from_string(cls, inputs, smiles, composition=None): """ Initialize 1D rep. from the string Args: inputs: input string smiles: list of smiles """ # parse the cell if composition is None: composition = [1] * len(smiles) inputs = [float(tmp) for tmp in inputs.split()] hn = int(inputs[0]) if hn <= 2: n_cell = 8 elif hn <= 107: n_cell = 6 elif hn <= 348: n_cell = 5 elif hn <= 488: n_cell = 4 else: n_cell = 3 # cubic cell = [hn] + inputs[1 : n_cell - 1] x = [cell] n_site = int(inputs[n_cell - 1]) if n_site != sum(composition): msg = f"Composition is inconsistent: {sum(composition):d}/{n_site:d}\n" msg += str(inputs) raise ValueError(msg) # n_cell += 1 for i, smile in enumerate(smiles): if smile.endswith(".smi"): smile = smile[:-4] for _c in range(composition[i]): if smile in ["Cl-"]: n_mol = 4 else: n_torsion = len(find_rotor_from_smile(smile)) n_mol = 8 + n_torsion # (wp_id, x, y, z, ori_x, ori_y, ori_z, inv) + torsion # inversion # print(n_mol, n_cell, len(inputs)) inputs[n_cell] = int(inputs[n_cell]) inputs[n_cell + n_mol - 1] = int(inputs[n_cell + n_mol - 1]) x.append(inputs[n_cell : n_cell + n_mol]) # ; print('string', x[-1]) n_cell += n_mol assert n_cell == len(inputs) return cls(x, smiles)
[docs] def to_standard_setting(self): xtal = self.to_pyxtal() xtal.optimize_lattice(standard=True) rep0 = representation.from_pyxtal(xtal) self.x = rep0.x
[docs] def to_pyxtal(self, smiles=None, composition=None): """ Export the pyxtal structure Args: smiles: list of smiles compoisition: list of composition """ from pyxtal import pyxtal if smiles is None: smiles = self.smiles if composition is None: composition = [1] * len(smiles) if sum(composition) + 1 != len(self.x): msg = "Composition is inconsistent:\n" msg += str(composition) + "\n" msg += self.to_string() raise ValueError(msg) # symmetry v = self.x[0] struc = pyxtal(molecular=True) struc.group, _number = Group(v[0], use_hall=True), v[0] # lattice ltype = struc.group.lattice_type if ltype == "triclinic": a, b, c, alpha, beta, gamma = v[1], v[2], v[3], v[4], v[5], v[6] elif ltype == "monoclinic": a, b, c, alpha, beta, gamma = v[1], v[2], v[3], 90, v[4], 90 elif ltype == "orthorhombic": a, b, c, alpha, beta, gamma = v[1], v[2], v[3], 90, 90, 90 elif ltype == "tetragonal": a, b, c, alpha, beta, gamma = v[1], v[1], v[2], 90, 90, 90 elif ltype == "hexagonal": a, b, c, alpha, beta, gamma = v[1], v[1], v[2], 90, 90, 120 else: a, b, c, alpha, beta, gamma = v[1], v[1], v[1], 90, 90, 90 try: struc.lattice = Lattice.from_para(a, b, c, alpha, beta, gamma, ltype=ltype) except: print(a, b, c, alpha, beta, gamma, ltype) raise ValueError("Problem in Lattice") # sites struc.numMols = [0] * len(smiles) struc.molecules = [] struc.mol_sites = [] count = 1 for i, comp in enumerate(composition): smile = smiles[i] if smile.endswith(".smi"): smile = smile[:-4] for _j in range(comp): v = self.x[count] dicts = {} dicts["smile"] = smile dicts["type"] = i dicts["dim"] = 3 dicts["PBC"] = [1, 1, 1] # dicts['number'] = number dicts["hn"] = struc.group.hall_number dicts["index"] = v[0] dicts["lattice"] = struc.lattice.matrix dicts["lattice_type"] = ltype dicts["center"] = v[1:4] if smile not in ["Cl-"]: dicts["orientation"] = np.array(v[4:7]) dicts["rotor"] = v[7:-1] # ; print('ro', dicts['rotor']) dicts["reflect"] = int(v[-1]) site = mol_site.from_1D_dicts(dicts) bypass = False for mol_id, molecule in enumerate(struc.molecules): if str(site.molecule) == str(molecule): site.type = mol_id struc.numMols[mol_id] += site.wp.multiplicity bypass = True break if not bypass: struc.molecules.append(site.molecule) site.type = len(struc.molecules) - 1 struc.numMols[site.type] += site.wp.multiplicity # site.type = i struc.mol_sites.append(site) # move to next rep count += 1 struc._get_formula() struc.source = "1D rep." struc.valid = True struc.standard_setting = site.wp.is_standard_setting() return struc
[docs] def to_string(self, time=None, eng=None, tag=None): """ Export string representation Args: time: float eng: float tag: string """ x = self.x strs = f"{int(x[0][0]):3d} " # data for cell if x[0][0] <= 348: num = 4 elif x[0][0] <= 488: num = 3 else: # cubic num = 2 for c in x[0][1:num]: strs += f"{c:5.2f} " for c in x[0][num:]: strs += f"{c:5.1f} " # data for molecule strs += f"{len(x) - 1:d} " # ; print(x[1]) for i in range(1, len(x)): strs += f"{x[i][0]:d} " for v in x[i][1:4]: strs += f"{v:4.2f} " for v in x[i][4:-1]: strs += f"{v:6.1f} " strs += f"{int(x[i][-1]):d} " if time is not None: strs += f"{time:5.2f}" if eng is not None: strs += f"{eng:11.3f}" if tag is not None: strs += f" {tag:s}" return strs
[docs] def same_smiles(self, smiles): if len(self.smiles) == smiles: return all(s2 == s2 for s1, s2 in zip(self.smiles, smiles)) else: return False
[docs] def get_dist(self, rep): """ get distance with the other rep1 Now only supports Z'=1 """ from pyxtal.symmetry import Wyckoff_position as WP if self.same_smiles(rep.smiles): msg = "different smiles" print(msg) return None elif len(self.x) != len(rep.x): msg = "different number of sites" print(msg) return None elif self.x[0][0] != rep.x[0][0]: msg = "different space group numbers" print(msg) return None else: diffs = [] wp = WP.from_group_and_index(self.x[0][0], 0, use_hall=True) for i in range(len(self.x)): np.zeros(len(self.x[i])) tmp1 = np.array(self.x[i]) tmp2 = np.array(rep.x[i]) # cell difference if i == 0: diff_cell = tmp2 - tmp1 diffs.extend(diff_cell) # site difference else: # symmmetry variation xyzs = wp.apply_ops(tmp2[:3]) diff_xyzs = xyzs - tmp1[:3] diff_xyzs -= np.rint(diff_xyzs) id = np.argmin(np.linalg.norm(diff_xyzs, axis=1)) diff_xyz = diff_xyzs[id] diff_ori = tmp2[3:6] - tmp1[3:6] diff_ori /= [360.0, 180.0, 360.0] diff_ori -= np.rint(diff_ori) diff_ori *= [360.0, 180.0, 360.0] diff_tor = tmp2[6:] - tmp1[6:] diff_tor /= 360.0 diff_tor -= np.rint(diff_tor) diff_tor *= 360.0 diffs.extend(diff_xyz) diffs.extend(diff_ori) diffs.extend(diff_tor) return np.array(diffs)
if __name__ == "__main__": # aspirin smiles = ["CC(=O)OC1=CC=CC=C1C(=O)O"] x = [ [81, 11.43, 6.49, 11.19, 83.31], [0, 0.77, 0.57, 0.53, 48.55, 24.31, 145.94, -77.85, -4.40, 170.86, False], ] # rep0 = representation(x, smiles) # print(rep0.to_string()) rep1 = representation(x, smiles) xtal = rep1.to_pyxtal() print(xtal) rep2 = representation.from_pyxtal(xtal) print(rep2.to_pyxtal()) print(rep2.to_string()) print("Test read from string") string = "82 11.43 6.49 11.19 83.31 1 0 0.77 0.57 0.53 48.55 24.31 145.9 -77.85 -4.40 170.9 0" rep3 = representation.from_string(string, smiles) print(rep3.to_string()) print(rep3.to_pyxtal()) # x = rep3.to_pyxtal(); x.optimize_lattice(standard=True); print(x) rep3.to_standard_setting() print(rep3.to_pyxtal()) print(rep3.to_string()) print("Test other cases") string1 = "81 14.08 6.36 25.31 83.9 1 0 0.83 0.40 0.63 136.6 -21.6 -151.1 -101.1 -131.2 154.7 -176.4 -147.8 178.2 -179.1 -53.3 0" # noqa: E501 string2 = "81 14.08 6.36 25.31 83.9 1 0 0.03 0.84 0.89 149.1 -8.0 -37.8 -39.9 -104.2 176.2 -179.6 137.8 -178.5 -173.3 -103.6 0" # noqa: E501 smiles = ["CC1=CC=C(C=C1)S(=O)(=O)C2=C(N=C(S2)C3=CC=C(C=C3)NC(=O)OCC4=CC=CC=C4)C"] rep4 = representation.from_string(string1, smiles) rep5 = representation.from_string(string2, smiles) print(string1) print(string2) print(rep4.get_dist(rep5)) from pyxtal import pyxtal xtal = pyxtal() xtal.from_seed("pyxtal/database/cifs/Fd3.cif") xtal.from_seed("pyxtal/database/cifs/NaSb3F10.cif") rep = representation_atom.from_pyxtal(xtal) print(rep) print(xtal) print(rep.to_pyxtal()) # strings = [ # "83 14.08 6.36 25.31 83.9 1 0.72 0.40 0.27 131.6 -17.0 -120.0 -83.8 -134.1 -174.5 -175.7 -168.8 173.9 178.0 -157.4 0", # noqa: E501 # "81 14.08 6.36 25.31 83.9 1 0.59 0.81 0.39 -117.8 -50.1 -95.3 -25.8 -80.6 164.7 155.9 -124.9 -159.2 178.6 -154.7 0", # noqa: E501 # "81 14.08 6.36 25.31 83.9 1 0.75 0.09 0.01 133.8 -19.5 -55.1 -86.7 -91.7 -175.0 -170.4 -176.8 173.3 -164.8 -58.4 0", # noqa: E501 # "81 14.08 6.36 25.31 83.9 1 0.72 0.44 0.01 135.2 27.5 97.2 -101.1 -105.1 -29.7 -169.7 -50.1 172.2 -173.1 131.6 0", # noqa: E501 # "82 14.00 6.34 25.26 83.6 1 0.21 0.08 0.54 146.0 -12.0 50.2 108.0 112.3 -166.3 -158.7 -35.5 172.3 -168.7 133.0 0", # noqa: E501 # "81 14.08 6.36 25.31 83.9 1 0.05 0.30 0.89 -68.2 41.2 148.8 -66.9 -85.0 -167.4 172.3 -166.2 -178.3 166.4 -45.9 0", # noqa: E501 # ] # import pymatgen.analysis.structure_matcher as sm # matcher = sm.StructureMatcher(ltol=0.3, stol=0.3, angle_tol=10) # for i, string in enumerate(strings): # print(str(i) + ' ' +string) # rep4 = representation.from_string(string, smiles) # pmg1 = rep4.to_pyxtal().to_pymatgen(); pmg1.remove_species('H') # rep4.to_standard_setting() # pmg2 = rep4.to_pyxtal().to_pymatgen(); pmg2.remove_species('H') # print(i, rep4.to_string(), matcher.fit(pmg1, pmg2))