Source code for pyxtal.representation

import numpy as np

from pyxtal.lattice import Lattice
from pyxtal.molecule import find_rotor_from_smile
from pyxtal.symmetry import Group
from pyxtal.wyckoff_site import atom_site, mol_site


[docs]
class representation_atom:
    """
    A class to handle the 1D representation of atomic crystal
    Works for Zprime > 1

    Args:
        x: a list of [cell, site_1, site_2, ...]
    """

    def __init__(self, x):
        self.x = x

    def __str__(self):
        return self.to_string()


[docs]
    @classmethod
    def from_pyxtal(cls, struc, standard=True):
        """
        Initialize 1D rep. from the pyxtal object

        Args:
            struc: pyxtal object
        """
        if standard and not struc.standard_setting:
            pmg = struc.to_pymatgen()
            struc.from_seed(pmg, standard=True)
        symmetry = [struc.atom_sites[0].wp.hall_number]
        lat = struc.lattice.encode()
        vector = [symmetry + lat]
        vector.extend([site.encode() for site in struc.atom_sites])
        x = vector
        return cls(x)



[docs]
    def to_standard_setting(self):
        xtal = self.to_pyxtal()
        self.x = representation.from_pyxtal(xtal, standard=True).x



[docs]
    def to_pyxtal(self):
        """
        Export the pyxtal structure

        Args:
            smiles: list of smiles
            composition: list of composition
        """
        from pyxtal import pyxtal

        # symmetry
        v = self.x[0]
        struc = pyxtal()
        struc.group, _number = Group(v[0], use_hall=True), v[0]

        # lattice
        ltype = struc.group.lattice_type #; print(ltype)
        if ltype == "triclinic":
            a, b, c, alpha, beta, gamma = v[1], v[2], v[3], v[4], v[5], v[6]
        elif ltype == "monoclinic":
            a, b, c, alpha, beta, gamma = v[1], v[2], v[3], 90, v[4], 90
        elif ltype == "orthorhombic":
            a, b, c, alpha, beta, gamma = v[1], v[2], v[3], 90, 90, 90
        elif ltype == "tetragonal":
            a, b, c, alpha, beta, gamma = v[1], v[1], v[2], 90, 90, 90
        elif ltype in ["trigonal", "hexagonal"]:
            a, b, c, alpha, beta, gamma = v[1], v[1], v[2], 90, 90, 120
        else:
            a, b, c, alpha, beta, gamma = v[1], v[1], v[1], 90, 90, 90
        try:
            struc.lattice = Lattice.from_para(a, b, c, alpha, beta, gamma, ltype=ltype)
        except:
            print(a, b, c, alpha, beta, gamma, ltype)
            raise ValueError("Problem in Lattice")

        # sites
        struc.numIons = []
        struc.atom_sites = []
        species = []

        for _x in self.x[1:]:
            dicts = {}
            specie, index, pos = _x[0], _x[1], _x[2:]
            dicts["specie"] = specie
            dicts["index"] = index
            dicts["dim"] = 3
            dicts["PBC"] = [1, 1, 1]
            dicts["hn"] = struc.group.hall_number
            wp = struc.group[index]
            dicts["position"] = wp.get_position_from_free_xyzs(pos)
            site = atom_site.load_dict(dicts)
            struc.atom_sites.append(site)

            if specie not in species:
                species.append(specie)
                struc.numIons.append(wp.multiplicity)
            else:
                for i, _specie in enumerate(species):
                    if _specie == specie:
                        struc.numIons[i] += site.wp.multiplicity

        struc.species = species
        struc._get_formula()
        struc.source = "1D rep."
        struc.valid = True
        struc.standard_setting = site.wp.is_standard_setting()

        return struc



[docs]
    def to_array(self):
        """
        Export only variables to a 1d numpy array
        """
        cells, xyzs = self.x[0][1:], self.x[1:]
        x = cells
        for xyz in xyzs:
            x = np.hstack((x, xyz[2:]))
        return x



[docs]
    def to_string(self, time=None, eng=None, tag=None):
        """
        Export string representation

        Args:
            time: float
            eng: float
            tag: string
        """
        x = self.x
        strs = f"{int(x[0][0]):3d} "

        # data for cell
        if x[0][0] <= 348:
            num = 4
        elif x[0][0] <= 488:
            num = 3
        else:  # cubic
            num = 2

        for c in x[0][1:num]:
            strs += f"{c:5.2f} "
        for c in x[0][num:]:
            strs += f"{c:5.1f} "

        # data for atoms
        strs += f"{len(x) - 1:d} "  # Number of sites
        for i in range(1, len(x)):
            strs += f"{x[i][0]:s} "
            strs += f"{x[i][1]:d} "
            for v in x[i][2:]:
                strs += f"{v:6.4f} "

        if time is not None:
            strs += f"{time:5.2f}"

        if eng is not None:
            strs += f"{eng:11.3f}"

        if tag is not None:
            strs += f" {tag:s}"

        return strs





[docs]
class representation:
    """
    A class to handle the 1D representation of molecular crystal
    Works for Zprime > 1

    Args:
        x: a list of [cell, site_1, site_2, ...]
        smiles: a list of [smile_1, smile_2, ...]
    """

    def __init__(self, x, smiles=None):
        if smiles is not None:
            self.smiles = []
            for _i, smile in enumerate(smiles):
                if smile.endswith(".smi"):
                    smile = smile[:-4]
                self.smiles.append(smile)
        else:
            self.smiles = None
        self.x = x

    def __str__(self):
        return self.to_string()


[docs]
    @classmethod
    def from_pyxtal(cls, struc, standard=False):
        """
        Initialize 1D rep. from the pyxtal object

        Args:
            struc: pyxtal object
        """
        if standard and not struc.standard_setting:
            # struc.optimize_lattice(standard=True)
            pmg = struc.to_pymatgen()
            struc.from_seed(pmg, molecules=struc.molecules, standard=True)
        symmetry = [struc.mol_sites[0].wp.hall_number]
        lat = struc.lattice.encode()
        vector = [symmetry + lat]
        smiles = []
        for site in struc.mol_sites:
            vector.append(site.encode())
            smiles.append(site.molecule.smile)
        x = vector
        if smiles[0] is None: smiles = None
        return cls(x, smiles)



[docs]
    @classmethod
    def from_string(cls, inputs, smiles, composition=None):
        """
        Initialize 1D rep. from the string

        Args:
            inputs: input string
            smiles: list of smiles
        """
        # parse the cell
        if composition is None:
            composition = [1] * len(smiles)

        inputs = [float(tmp) for tmp in inputs.split()]
        hn = int(inputs[0])
        if hn <= 2:
            n_cell = 8
        elif hn <= 107:
            n_cell = 6
        elif hn <= 348:
            n_cell = 5
        elif hn <= 488:
            n_cell = 4
        else:
            n_cell = 3  # cubic
        cell = [hn] + inputs[1 : n_cell - 1]

        x = [cell]
        n_site = int(inputs[n_cell - 1])
        if n_site != sum(composition):
            msg = f"Composition is inconsistent: {sum(composition):d}/{n_site:d}\n"
            msg += str(inputs)
            raise ValueError(msg)
        # n_cell += 1

        for i, smile in enumerate(smiles):
            if smile.endswith(".smi"):
                smile = smile[:-4]
            for _c in range(composition[i]):
                if smile in ["Cl-"]:
                    n_mol = 4
                else:
                    n_torsion = len(find_rotor_from_smile(smile))
                    n_mol = 8 + n_torsion  # (wp_id, x, y, z, ori_x, ori_y, ori_z, inv) + torsion
                # inversion
                # print(n_mol, n_cell, len(inputs))
                inputs[n_cell] = int(inputs[n_cell])
                inputs[n_cell + n_mol - 1] = int(inputs[n_cell + n_mol - 1])
                x.append(inputs[n_cell : n_cell + n_mol])  # ; print('string', x[-1])
                n_cell += n_mol
        assert n_cell == len(inputs)
        return cls(x, smiles)



[docs]
    def to_standard_setting(self):
        xtal = self.to_pyxtal()
        xtal.optimize_lattice(standard=True)
        rep0 = representation.from_pyxtal(xtal)
        self.x = rep0.x



[docs]
    def to_pyxtal(self, smiles=None, composition=None, molecules=None):
        """
        Export the pyxtal structure

        Args:
            smiles: list of smiles
            composition: list of composition
            molecules: optional list of pyxtal_molecule objects aligned with smiles
        """
        from pyxtal import pyxtal

        if smiles is None:
            smiles = self.smiles

        if composition is None:
            composition = [1] * len(smiles)

        if sum(composition) + 1 != len(self.x):
            msg = "Composition is inconsistent:\n"
            msg += str(composition) + "\n"
            msg += self.to_string()
            raise ValueError(msg)

        # symmetry
        v = self.x[0]
        struc = pyxtal(molecular=True)
        struc.group = Group(v[0], use_hall=True)

        # lattice
        ltype = struc.group.lattice_type
        if ltype == "triclinic":
            a, b, c, alpha, beta, gamma = v[1], v[2], v[3], v[4], v[5], v[6]
        elif ltype == "monoclinic":
            a, b, c, alpha, beta, gamma = v[1], v[2], v[3], 90, v[4], 90
        elif ltype == "orthorhombic":
            a, b, c, alpha, beta, gamma = v[1], v[2], v[3], 90, 90, 90
        elif ltype == "tetragonal":
            a, b, c, alpha, beta, gamma = v[1], v[1], v[2], 90, 90, 90
        elif ltype in ["trigonal", "hexagonal"]:
            a, b, c, alpha, beta, gamma = v[1], v[1], v[2], 90, 90, 120
        else:
            a, b, c, alpha, beta, gamma = v[1], v[1], v[1], 90, 90, 90
        try:
            struc.lattice = Lattice.from_para(a, b, c, alpha, beta, gamma, ltype=ltype)
        except:
            print(a, b, c, alpha, beta, gamma, ltype)
            raise ValueError("Problem in Lattice")

        # sites
        struc.numMols = [0] * len(smiles)
        struc.molecules = []
        struc.mol_sites = []

        count = 1
        for i, comp in enumerate(composition):
            smile = smiles[i]
            molecule = None if molecules is None else molecules[i]
            if smile.endswith(".smi"):
                smile = smile[:-4]
            for _j in range(comp):
                v = self.x[count]
                dicts = {}
                dicts["smile"] = smile
                dicts["type"] = i
                dicts["dim"] = 3
                dicts["PBC"] = [1, 1, 1]
                # dicts['number'] = number
                dicts["hn"] = struc.group.hall_number
                dicts["index"] = v[0]
                dicts["lattice"] = struc.lattice.matrix
                dicts["lattice_type"] = ltype
                dicts["center"] = v[1:4]
                dicts["molecule"] = molecule
                if smile not in ["Cl-"]:
                    dicts["orientation"] = np.array(v[4:7])
                    dicts["rotor"] = v[7:-1]  # ; print('ro', dicts['rotor'])
                    dicts["reflect"] = int(v[-1])
                site = mol_site.from_1D_dicts(dicts)

                bypass = False
                for mol_id, molecule in enumerate(struc.molecules):
                    if str(site.molecule) == str(molecule):
                        site.type = mol_id
                        struc.numMols[mol_id] += site.wp.multiplicity
                        bypass = True
                        break
                if not bypass:
                    struc.molecules.append(site.molecule)
                    site.type = len(struc.molecules) - 1
                    struc.numMols[site.type] += site.wp.multiplicity

                # site.type = i
                struc.mol_sites.append(site)
                # move to next rep
                count += 1

        struc._get_formula()
        struc.source = "1D rep."
        struc.valid = True
        struc.standard_setting = site.wp.is_standard_setting()

        return struc



[docs]
    def to_string(self, time=None, eng=None, tag=None):
        """
        Export string representation

        Args:
            time: float
            eng: float
            tag: string
        """
        x = self.x
        strs = f"{int(x[0][0]):3d} "

        # data for cell
        if x[0][0] <= 348:
            num = 4
        elif x[0][0] <= 488:
            num = 3
        else:  # cubic
            num = 2

        for c in x[0][1:num]:
            strs += f"{c:5.2f} "
        for c in x[0][num:]:
            strs += f"{c:5.1f} "

        # data for molecule
        strs += f"{len(x) - 1:d} "  # ; print(x[1])
        for i in range(1, len(x)):
            strs += f"{x[i][0]:d} "
            for v in x[i][1:4]:
                strs += f"{v:4.2f} "
            for v in x[i][4:-1]:
                strs += f"{v:6.1f} "
            strs += f"{int(x[i][-1]):d} "

        if time is not None:
            strs += f"{time:5.2f}"

        if eng is not None:
            strs += f"{eng:11.3f}"

        if tag is not None:
            strs += f" {tag:s}"

        return strs



[docs]
    def same_smiles(self, smiles):
        if len(self.smiles) == smiles:
            return all(s1 == s2 for s1, s2 in zip(self.smiles, smiles))
        else:
            return False



[docs]
    def get_dist(self, rep):
        """
        get distance with the other rep1
        Now only supports Z'=1
        """
        from pyxtal.symmetry import Wyckoff_position as WP

        if self.same_smiles(rep.smiles):
            msg = "different smiles"
            print(msg)
            return None
        elif len(self.x) != len(rep.x):
            msg = "different number of sites"
            print(msg)
            return None
        elif self.x[0][0] != rep.x[0][0]:
            msg = "different space group numbers"
            print(msg)
            return None
        else:
            diffs = []
            wp = WP.from_group_and_index(self.x[0][0], 0, use_hall=True)
            for i in range(len(self.x)):
                np.zeros(len(self.x[i]))
                tmp1 = np.array(self.x[i])
                tmp2 = np.array(rep.x[i])
                # cell difference
                if i == 0:
                    diff_cell = tmp2 - tmp1
                    diffs.extend(diff_cell)
                # site difference
                else:
                    # symmmetry variation
                    xyzs = wp.apply_ops(tmp2[:3])
                    diff_xyzs = xyzs - tmp1[:3]
                    diff_xyzs -= np.rint(diff_xyzs)
                    id = np.argmin(np.linalg.norm(diff_xyzs, axis=1))
                    diff_xyz = diff_xyzs[id]
                    diff_ori = tmp2[3:6] - tmp1[3:6]
                    diff_ori /= [360.0, 180.0, 360.0]
                    diff_ori -= np.rint(diff_ori)
                    diff_ori *= [360.0, 180.0, 360.0]
                    diff_tor = tmp2[6:] - tmp1[6:]
                    diff_tor /= 360.0
                    diff_tor -= np.rint(diff_tor)
                    diff_tor *= 360.0
                    diffs.extend(diff_xyz)
                    diffs.extend(diff_ori)
                    diffs.extend(diff_tor)
            return np.array(diffs)



[docs]
    def update_smiles(self, smiles):
        """
        Update the smiles of the representation
        """
        if len(smiles) != len(self.smiles):
            raise ValueError("Number of smiles does not match")
        self.smiles = smiles
        # update the x for the new smiles
        for i, smile in enumerate(smiles):
            if smile.endswith(".smi"):
                smile = smile[:-4]
            for j in range(len(self.x)):
                if self.x[j][0] == i:
                    self.x[j][0] = smile



if __name__ == "__main__":
    # aspirin
    smiles = ["CC(=O)OC1=CC=CC=C1C(=O)O"]
    x = [
        [81, 11.43, 6.49, 11.19, 83.31],
        [0, 0.77, 0.57, 0.53, 48.55, 24.31, 145.94, -77.85, -4.40, 170.86, False],
    ]
    # rep0 = representation(x, smiles)
    # print(rep0.to_string())
    rep1 = representation(x, smiles)
    xtal = rep1.to_pyxtal()
    print(xtal)
    rep2 = representation.from_pyxtal(xtal)
    print(rep2.to_pyxtal())
    print(rep2.to_string())

    print("Test read from string")
    string = "82 11.43  6.49 11.19 83.31 1  0 0.77  0.57  0.53 48.55 24.31 145.9 -77.85 -4.40 170.9 0"
    rep3 = representation.from_string(string, smiles)
    print(rep3.to_string())
    print(rep3.to_pyxtal())
    # x = rep3.to_pyxtal(); x.optimize_lattice(standard=True); print(x)
    rep3.to_standard_setting()
    print(rep3.to_pyxtal())
    print(rep3.to_string())

    print("Test other cases")
    string1 = "81 14.08  6.36 25.31  83.9 1 0 0.83 0.40 0.63  136.6  -21.6 -151.1 -101.1 -131.2  154.7 -176.4 -147.8  178.2 -179.1  -53.3 0"  # noqa: E501
    string2 = "81 14.08  6.36 25.31  83.9 1 0 0.03 0.84 0.89  149.1   -8.0  -37.8  -39.9 -104.2  176.2 -179.6  137.8 -178.5 -173.3 -103.6 0"  # noqa: E501
    smiles = ["CC1=CC=C(C=C1)S(=O)(=O)C2=C(N=C(S2)C3=CC=C(C=C3)NC(=O)OCC4=CC=CC=C4)C"]
    rep4 = representation.from_string(string1, smiles)
    rep5 = representation.from_string(string2, smiles)
    print(string1)
    print(string2)
    print(rep4.get_dist(rep5))

    from pyxtal import pyxtal

    xtal = pyxtal()
    xtal.from_seed("pyxtal/database/cifs/Fd3.cif")
    xtal.from_seed("pyxtal/database/cifs/NaSb3F10.cif")
    rep = representation_atom.from_pyxtal(xtal)
    print(rep)
    print(xtal)
    print(rep.to_pyxtal())
    # strings = [
    # "83 14.08 6.36 25.31 83.9 1 0.72 0.40 0.27  131.6 -17.0 -120.0  -83.8 -134.1 -174.5 -175.7 -168.8  173.9  178.0 -157.4 0",
    # "81 14.08 6.36 25.31 83.9 1 0.59 0.81 0.39 -117.8 -50.1  -95.3  -25.8  -80.6  164.7  155.9 -124.9 -159.2  178.6 -154.7 0",
    # "81 14.08 6.36 25.31 83.9 1 0.75 0.09 0.01  133.8 -19.5  -55.1  -86.7  -91.7 -175.0 -170.4 -176.8  173.3 -164.8  -58.4 0",
    # "81 14.08 6.36 25.31 83.9 1 0.72 0.44 0.01  135.2  27.5   97.2 -101.1 -105.1  -29.7 -169.7  -50.1  172.2 -173.1  131.6 0",
    # "82 14.00 6.34 25.26 83.6 1 0.21 0.08 0.54  146.0 -12.0   50.2  108.0  112.3 -166.3 -158.7  -35.5  172.3 -168.7  133.0 0",
    # "81 14.08 6.36 25.31 83.9 1 0.05 0.30 0.89  -68.2  41.2  148.8  -66.9  -85.0 -167.4  172.3 -166.2 -178.3  166.4  -45.9 0",
    # ]

    # import pymatgen.analysis.structure_matcher as sm
    # matcher = sm.StructureMatcher(ltol=0.3, stol=0.3, angle_tol=10)
    # for i, string in enumerate(strings):
    #    print(str(i) + '  ' +string)
    #    rep4 = representation.from_string(string, smiles)
    #    pmg1 = rep4.to_pyxtal().to_pymatgen(); pmg1.remove_species('H')
    #    rep4.to_standard_setting()
    #    pmg2 = rep4.to_pyxtal().to_pymatgen(); pmg2.remove_species('H')
    #    print(i, rep4.to_string(), matcher.fit(pmg1, pmg2))