Source code for mala.descriptors.descriptor

"""Base class for all descriptor calculators."""

from abc import abstractmethod
import json
import os
import tempfile

import ase
from ase.cell import Cell
from ase.units import m
from ase.neighborlist import NeighborList, NewPrimitiveNeighborList
import numpy as np
from skspatial.objects import Plane

from mala.common.parameters import ParametersDescriptors, Parameters
from mala.common.parallelizer import (
    get_comm,
    printout,
    get_rank,
    get_size,
    barrier,
    parallel_warn,
    set_lammps_instance,
)
from mala.common.physical_data import PhysicalData
from mala.descriptors.lammps_utils import set_cmdlinevars



[docs]
class Descriptor(PhysicalData):
    """
    Base class for all descriptors available in MALA.

    Descriptors encode the atomic fingerprint of a DFT calculation.

    Parameters
    ----------
    parameters : mala.common.parameters.Parameters
        Parameters object used to create this object.

    Attributes
    ----------
    parameters: mala.common.parameters.ParametersDescriptors
        MALA descriptor calculation parameters.
    """

    ##############################
    # Constructors
    ##############################

    def __new__(cls, params: Parameters = None):
        """
        Create a Descriptor instance.

        The correct type of descriptor calculator will automatically be
        instantiated by this class if possible. You can also instantiate
        the desired descriptor directly by calling upon the subclass.

        Parameters
        ----------
        params : mala.common.parametes.Parameters
            Parameters used to create this descriptor calculator.
        """
        descriptors = None
        # Check if we're accessing through base class.
        # If not, we need to return the correct object directly.
        if cls == Descriptor:
            if params.descriptors.descriptor_type == "Bispectrum":
                from mala.descriptors.bispectrum import Bispectrum

                descriptors = super(Descriptor, Bispectrum).__new__(Bispectrum)

            if params.descriptors.descriptor_type == "AtomicDensity":
                from mala.descriptors.atomic_density import AtomicDensity

                descriptors = super(Descriptor, AtomicDensity).__new__(
                    AtomicDensity
                )

            if params.descriptors.descriptor_type == "ACE":
                from mala.descriptors.ace import ACE

                descriptors = super(Descriptor, ACE).__new__(ACE)

            if params.descriptors.descriptor_type == "MinterpyDescriptors":
                from mala.descriptors.minterpy_descriptors import (
                    MinterpyDescriptors,
                )

                descriptors = super(Descriptor, MinterpyDescriptors).__new__(
                    MinterpyDescriptors
                )

            if descriptors is None:
                raise Exception("Unsupported descriptor calculator.")
        else:
            descriptors = super(Descriptor, cls).__new__(cls)

        # For pickling
        setattr(descriptors, "params_arg", params)
        return descriptors

    def __getnewargs__(self):
        """
        Get the necessary arguments to call __new__.

        Used for pickling.


        Returns
        -------
        params : mala.Parameters
            The parameters object with which this object was created.
        """
        return (self.params_arg,)

    def __init__(self, parameters):
        super(Descriptor, self).__init__(parameters)
        self.parameters: ParametersDescriptors = parameters.descriptors
        self.feature_size = 0  # so iterations will fail
        self._in_format_ase = ""
        self._atoms = None
        self._voxel = None

        # If we ever have NON LAMMPS descriptors, these parameters have no
        # meaning anymore and should probably be moved to an intermediate
        # DescriptorsLAMMPS class, from which the LAMMPS descriptors inherit.
        self._lammps_temporary_input = None
        self._lammps_temporary_log = None

    ##############################
    # Properties
    ##############################

    @property
    def si_unit_conversion(self):
        """
        Numeric value of the conversion from MALA (ASE) units to SI.

        Needed for OpenPMD interface.
        """
        return m**3

    @property
    def si_dimension(self):
        """
        Dictionary containing the SI unit dimensions in OpenPMD format.

        Needed for OpenPMD interface.
        """
        import openpmd_api as io

        return {io.Unit_Dimension.L: -3}

    @property
    def descriptors_contain_xyz(self):
        """Control whether descriptor vectors will contain xyz coordinates."""
        return self.parameters.descriptors_contain_xyz

    @descriptors_contain_xyz.setter
    def descriptors_contain_xyz(self, value):
        self.parameters.descriptors_contain_xyz = value

    ##############################
    # Methods
    ##############################

    # File I/O
    ##########


[docs]
    @staticmethod
    def convert_units(array, in_units="1/eV"):
        """
        Convert descriptors from a specified unit into the ones used in MALA.

        Parameters
        ----------
        array : numpy.ndarray
            Data for which the units should be converted.

        in_units : string
            Units of array.

        Returns
        -------
        converted_array : numpy.ndarray
            Data in MALA units.

        """
        raise Exception(
            "No unit conversion method implemented for this"
            " descriptor type."
        )


    @property
    def feature_size(self):
        """Get the feature dimension of this data."""
        return self._feature_size

    @feature_size.setter
    def feature_size(self, value):
        self._feature_size = value


[docs]
    @staticmethod
    def backconvert_units(array, out_units):
        """
        Convert descriptors from MALA units into a specified unit.

        Parameters
        ----------
        array : numpy.ndarray
            Data in MALA units.

        out_units : string
            Desired units of output array.

        Returns
        -------
        converted_array : numpy.ndarray
            Data in out_units.

        """
        raise Exception(
            "No unit back conversion method implemented for "
            "this descriptor type."
        )



[docs]
    def setup_lammps_tmp_files(self, lammps_type, outdir):
        """
        Create the temporary lammps input and log files.

        Parameters
        ----------
        lammps_type: str
            Type of descriptor calculation (e.g. bgrid for bispectrum)
        outdir: str
            Directory where lammps files are kept

        Returns
        -------
        None
        """
        if get_rank() == 0:
            prefix_inp_str = "lammps_" + lammps_type + "_input"
            prefix_log_str = "lammps_" + lammps_type + "_log"
            lammps_tmp_input_file = tempfile.NamedTemporaryFile(
                delete=False, prefix=prefix_inp_str, suffix="_.tmp", dir=outdir
            )
            self._lammps_temporary_input = lammps_tmp_input_file.name
            lammps_tmp_input_file.close()

            lammps_tmp_log_file = tempfile.NamedTemporaryFile(
                delete=False, prefix=prefix_log_str, suffix="_.tmp", dir=outdir
            )
            self._lammps_temporary_log = lammps_tmp_log_file.name
            lammps_tmp_log_file.close()
        else:
            self._lammps_temporary_input = None
            self._lammps_temporary_log = None

        if self.parameters._configuration["mpi"]:
            self._lammps_temporary_input = get_comm().bcast(
                self._lammps_temporary_input, root=0
            )
            self._lammps_temporary_log = get_comm().bcast(
                self._lammps_temporary_log, root=0
            )


    # Calculations
    ##############


[docs]
    @staticmethod
    def enforce_pbc(atoms):
        """
        Explictly enforces the PBC on an ASE atoms object.

        QE (and potentially other codes?) do that internally. Meaning that the
        raw positions of atoms (in Angstrom) can lie outside of the unit cell.
        When setting up the DFT calculation, these atoms get shifted into
        the unit cell. Since we directly use these raw positions for the
        descriptor calculation, we need to enforce that in the ASE atoms
        objects, the atoms are explicitly in the unit cell.

        Parameters
        ----------
        atoms : ase.atoms
            The ASE atoms object for which the PBC need to be enforced.

        Returns
        -------
        new_atoms : ase.Atoms
            The ASE atoms object for which the PBC have been enforced.
        """
        new_atoms = atoms.copy()
        new_atoms.set_scaled_positions(new_atoms.get_scaled_positions())

        # This might be unecessary, but I think it is nice to have some sort of
        # metric here.
        rescaled_atoms = 0
        for i in range(0, len(atoms)):
            if False in (
                np.isclose(
                    new_atoms[i].position, atoms[i].position, atol=0.001
                )
            ):
                rescaled_atoms += 1
        printout(
            "Descriptor calculation: had to enforce periodic boundary "
            "conditions on",
            rescaled_atoms,
            "atoms before calculation.",
            min_verbosity=2,
        )
        return new_atoms



[docs]
    def calculate_from_qe_out(
        self, qe_out_file, working_directory=".", **kwargs
    ):
        """
        Calculate the descriptors based on a Quantum Espresso outfile.

        Parameters
        ----------
        qe_out_file : string
            Name of Quantum Espresso output file for snapshot.

        working_directory : string
            A directory in which to write the output of the LAMMPS calculation.
            Usually the local directory should suffice, given that there
            are no multiple instances running in the same directory.

        kwargs : dict
            A collection of keyword arguments, that are mainly used for
            debugging and development. Different types of descriptors
            may support different keyword arguments. Commonly supported
            are

            - "use_fp64": To use enforce floating point 64 precision for
              descriptors.
            - "keep_logs": To not delete temporary files created during
              LAMMPS calculation of descriptors.

        Returns
        -------
        descriptors : numpy.ndarray
            Numpy array containing the descriptors with the dimension
            (x,y,z,descriptor_dimension)

        """
        self._in_format_ase = "espresso-out"
        printout("Calculating descriptors from", qe_out_file, min_verbosity=0)
        # We get the atomic information by using ASE.
        self._atoms = ase.io.read(qe_out_file, format=self._in_format_ase)

        # Enforcing / Checking PBC on the read atoms.
        self._atoms = self.enforce_pbc(self._atoms)

        # Get the grid dimensions.
        if "grid_dimensions" in kwargs.keys():
            self.grid_dimensions = kwargs["grid_dimensions"]

            # Deleting this keyword from the list to avoid conflict with
            # dict below.
            del kwargs["grid_dimensions"]
        else:
            qe_outfile = open(qe_out_file, "r")
            lines = qe_outfile.readlines()
            self.grid_dimensions = [0, 0, 0]

            for line in lines:
                if "FFT dimensions" in line:
                    tmp = line.split("(")[1].split(")")[0]
                    self.grid_dimensions[0] = int(tmp.split(",")[0])
                    self.grid_dimensions[1] = int(tmp.split(",")[1])
                    self.grid_dimensions[2] = int(tmp.split(",")[2])
                    break

        self._voxel = self._atoms.cell.copy()
        self._voxel[0] = self._voxel[0] / (self.grid_dimensions[0])
        self._voxel[1] = self._voxel[1] / (self.grid_dimensions[1])
        self._voxel[2] = self._voxel[2] / (self.grid_dimensions[2])

        return self._calculate(working_directory, **kwargs)



[docs]
    def calculate_from_json(self, json_file, working_directory=".", **kwargs):
        """
        Calculate the descriptors based on a MALA generated json file.

        These json files are generated by the MALA DataConverter class
        and bundle information about a DFT simulation.

        Parameters
        ----------
        json_file : string
            Name of MALA json output file for snapshot.

        working_directory : string
            A directory in which to write the output of the LAMMPS calculation.
            Usually the local directory should suffice, given that there
            are no multiple instances running in the same directory.

        kwargs : dict
            A collection of keyword arguments, that are mainly used for
            debugging and development. Different types of descriptors
            may support different keyword arguments. Commonly supported
            are

            - "use_fp64": To use enforce floating point 64 precision for
              descriptors.
            - "keep_logs": To not delete temporary files created during
              LAMMPS calculation of descriptors.

        Returns
        -------
        descriptors : numpy.ndarray
            Numpy array containing the descriptors with the dimension
            (x,y,z,descriptor_dimension)

        """
        if isinstance(json_file, str):
            json_dict = json.load(open(json_file, encoding="utf-8"))
        else:
            json_dict = json.load(json_file)
        self.grid_dimensions = json_dict["grid_dimensions"]
        self._atoms = ase.Atoms.fromdict(json_dict["atoms"])
        self._voxel = Cell(json_dict["voxel"]["array"])
        return self._calculate(working_directory, **kwargs)



[docs]
    def calculate_from_atoms(
        self, atoms, grid_dimensions, working_directory=".", **kwargs
    ):
        """
        Calculate the bispectrum descriptors based on atomic configurations.

        Parameters
        ----------
        atoms : ase.Atoms
            Atoms object holding the atomic configuration.

        grid_dimensions : List
            Grid dimensions to be used, in the format [x,y,z].

        working_directory : string
            A directory in which to write the output of the LAMMPS calculation.
            Usually the local directory should suffice, given that there
            are no multiple instances running in the same directory.

        kwargs : dict
            A collection of keyword arguments, that are mainly used for
            debugging and development. Different types of descriptors
            may support different keyword arguments. Commonly supported
            are

            - "use_fp64": To use enforce floating point 64 precision for
              descriptors.
            - "keep_logs": To not delete temporary files created during
              LAMMPS calculation of descriptors.

        Returns
        -------
        descriptors : numpy.ndarray
            Numpy array containing the descriptors with the dimension
            (x,y,z,descriptor_dimension)
        """
        # Enforcing / Checking PBC on the input atoms.
        self._atoms = self.enforce_pbc(atoms)
        self.grid_dimensions = grid_dimensions
        self._voxel = self._atoms.cell.copy()
        self._voxel[0] = self._voxel[0] / (self.grid_dimensions[0])
        self._voxel[1] = self._voxel[1] / (self.grid_dimensions[1])
        self._voxel[2] = self._voxel[2] / (self.grid_dimensions[2])
        return self._calculate(working_directory, **kwargs)



[docs]
    def gather_descriptors(self, descriptors_np, use_pickled_comm=False):
        """
        Gathers all descriptors on rank 0 and sorts them.

        This is useful for e.g. parallel preprocessing.
        This function removes the extra 3 components that come from parallel
        processing.
        I.e. if we have 91 bispectrum descriptors, LAMMPS directly outputs us
        97 (in parallel mode), and this function returns 94, as to retain the
        3 x,y,z ones we by default include.

        Parameters
        ----------
        descriptors_np : numpy.ndarray
            Numpy array with the descriptors of this ranks local grid.

        use_pickled_comm : bool
            If True, the pickled communication route from mpi4py is used.
            If False, a Recv/Sendv combination is used. I am not entirely
            sure what is faster. Technically Recv/Sendv should be faster,
            but I doubt my implementation is all that optimal. For the pickled
            route we can use gather(), which should be fairly quick.
            However, for large grids, one CANNOT use the pickled route;
            too large python objects will break it. Therefore, I am setting
            the Recv/Sendv route as default.
        """
        # Barrier to make sure all ranks have descriptors..
        comm = get_comm()
        barrier()

        # Gather the descriptors into a list.
        if use_pickled_comm:
            all_descriptors_list = comm.gather(descriptors_np, root=0)
        else:
            sendcounts = np.array(
                comm.gather(np.shape(descriptors_np)[0], root=0)
            )
            raw_feature_length = self.feature_size + 3

            if get_rank() == 0:
                # print("sendcounts: {}, total: {}".format(sendcounts,
                #                                          sum(sendcounts)))

                # Preparing the list of buffers.
                all_descriptors_list = []
                for i in range(0, get_size()):
                    all_descriptors_list.append(
                        np.empty(
                            sendcounts[i] * raw_feature_length,
                            dtype=descriptors_np.dtype,
                        )
                    )

                # No MPI necessary for first rank. For all the others,
                # collect the buffers.
                all_descriptors_list[0] = descriptors_np
                for i in range(1, get_size()):
                    comm.Recv(all_descriptors_list[i], source=i, tag=100 + i)
                    all_descriptors_list[i] = np.reshape(
                        all_descriptors_list[i],
                        (sendcounts[i], raw_feature_length),
                    )
            else:
                comm.Send(descriptors_np, dest=0, tag=get_rank() + 100)
            barrier()

        # if get_rank() == 0:
        #     printout(np.shape(all_descriptors_list[0]))
        #     printout(np.shape(all_descriptors_list[1]))
        #     printout(np.shape(all_descriptors_list[2]))
        #     printout(np.shape(all_descriptors_list[3]))

        # Dummy for the other ranks.
        # (For now, might later simply broadcast to other ranks).
        descriptors_full = np.zeros([1, 1, 1, 1])

        # Reorder the list.
        if get_rank() == 0:
            # Prepare the descriptor array.
            nx = self.grid_dimensions[0]
            ny = self.grid_dimensions[1]
            nz = self.grid_dimensions[2]
            descriptors_full = np.zeros([nx, ny, nz, self.feature_size])
            # Fill the full bispectrum descriptors array.
            for idx, local_grid in enumerate(all_descriptors_list):
                # We glue the individual cells back together, and transpose.
                first_x = int(local_grid[0][0])
                first_y = int(local_grid[0][1])
                first_z = int(local_grid[0][2])
                last_x = int(local_grid[-1][0]) + 1
                last_y = int(local_grid[-1][1]) + 1
                last_z = int(local_grid[-1][2]) + 1
                descriptors_full[
                    first_x:last_x, first_y:last_y, first_z:last_z
                ] = np.reshape(
                    local_grid[:, 3:],
                    [
                        last_z - first_z,
                        last_y - first_y,
                        last_x - first_x,
                        self.feature_size,
                    ],
                ).transpose(
                    [2, 1, 0, 3]
                )

                # Leaving this in here for debugging purposes.
                # This is the slow way to reshape the descriptors.
                # for entry in local_grid:
                #     x = int(entry[0])
                #     y = int(entry[1])
                #     z = int(entry[2])
                #     descriptors_full[x, y, z] = entry[3:]
        if self.parameters.descriptors_contain_xyz:
            return descriptors_full
        else:
            return descriptors_full[:, :, :, 3:]



[docs]
    def convert_local_to_3d(self, descriptors_np):
        """
        Convert the desciptors as done in the gather function, but per rank.

        This is useful for e.g. parallel preprocessing.
        This function removes the extra 3 components that come from parallel
        processing.
        I.e. if we have 91 bispectrum descriptors, LAMMPS directly outputs us
        97 (in parallel mode), and this function returns 94, as to retain the
        3 x,y,z ones we by default include.

        Parameters
        ----------
        descriptors_np : numpy.ndarray
            Numpy array with the descriptors of this ranks local grid.
        """
        local_offset = [None, None, None]
        local_reach = [None, None, None]
        local_offset[0] = int(descriptors_np[0][0])
        local_offset[1] = int(descriptors_np[0][1])
        local_offset[2] = int(descriptors_np[0][2])
        local_reach[0] = int(descriptors_np[-1][0]) + 1
        local_reach[1] = int(descriptors_np[-1][1]) + 1
        local_reach[2] = int(descriptors_np[-1][2]) + 1
        nx = local_reach[0] - local_offset[0]
        ny = local_reach[1] - local_offset[1]
        nz = local_reach[2] - local_offset[2]

        descriptors_full = np.zeros([nx, ny, nz, self.feature_size])

        descriptors_full[0:nx, 0:ny, 0:nz] = np.reshape(
            descriptors_np[:, 3:], [nz, ny, nx, self.feature_size]
        ).transpose([2, 1, 0, 3])
        return descriptors_full, local_offset, local_reach



[docs]
    def read_dimensions_from_json(self, json_file):
        """
        Read only the descriptor dimensions from a json file.

        These json files are generated by the MALA DataConverter class
        and bundle information about a DFT simulation.

        Parameters
        ----------
        json_file : string
            Path to the numpy file.

        Returns
        -------
        dimension_info : List or tuple
            If read_dtype is False, then only a list containing the dimensions
            of the saved array is returned. If read_dtype is True, a tuple
            containing this list of dimensions and the dtype of the array will
            be returned.
        """
        if isinstance(json_file, str):
            json_dict = json.load(open(json_file, encoding="utf-8"))
        else:
            json_dict = json.load(json_file)
        grid_dimensions = json_dict["grid_dimensions"] + [
            self._read_feature_dimension_from_json(json_dict)
        ]
        return grid_dimensions


    # Private methods
    #################

    def _process_loaded_array(self, array, units=None):
        """
        Process loaded array (i.e., unit change, reshaping, etc.).

        Parameters
        ----------
        array : numpy.ndarray
            Array to process.

        units : string
            Units of input array.
        """
        array *= self.convert_units(1, in_units=units)

    def _process_loaded_dimensions(self, array_dimensions):
        """
        Process loaded dimensions.

        In this case, cut xyz coordinates from descriptors if
        descriptors_contain_xyz is set in the parameters.

        Parameters
        ----------
        array_dimensions : tuple
            Raw dimensions of the array.
        """
        if self.descriptors_contain_xyz:
            return (
                array_dimensions[0],
                array_dimensions[1],
                array_dimensions[2],
                array_dimensions[3] - 3,
            )
        else:
            return array_dimensions

    def _set_geometry_info(self, mesh):
        """
        Set geometry information to openPMD mesh.

        This has to be done as part of the openPMD saving process.

        Parameters
        ----------
        mesh : openpmd_api.Mesh
            OpenPMD mesh for which to set geometry information.
        """
        # Geometry: Save the cell parameters and angles of the grid.
        if self._atoms is not None:
            import openpmd_api as io

            self._voxel = self._atoms.cell.copy()
            self._voxel[0] = self._voxel[0] / (self.grid_dimensions[0])
            self._voxel[1] = self._voxel[1] / (self.grid_dimensions[1])
            self._voxel[2] = self._voxel[2] / (self.grid_dimensions[2])

            mesh.geometry = io.Geometry.cartesian
            mesh.grid_spacing = self._voxel.cellpar()[0:3]
            mesh.set_attribute("angles", self._voxel.cellpar()[3:])

    def _get_atoms(self):
        """
        Access atoms saved in PhysicalData-derived class.

        For any derived class which is atom based (currently, all are), this
        function returns the atoms, which may not be directly accessible as
        an attribute for a variety of reasons.

        Returns
        -------
        atoms : ase.Atoms
            An ASE atoms object holding the associated atoms of this object.
        """
        return self._atoms

    def _feature_mask(self):
        """
        Return a mask for features that are not part of the feature dimension.

        The mask assumes that the features which do not belong to the feature
        dimension are at the beginning of the array. Here, return 3 if the
        descriptors contain xyz coordinates, otherwise 0.

        Returns
        -------
        mask : int
            Starting index after which the actual feature dimension starts.
        """
        if self.descriptors_contain_xyz:
            return 3
        else:
            return 0

    def _setup_lammps(self, nx, ny, nz, lammps_dict):
        """
        Set up the lammps processor grid.

        Takes into account y/z-splitting.

        Parameters
        ----------
        nx : int
            Number of gridpoints in x-direction.

        ny : int
            Number of gridpoints in y-direction.
        nz : int
            Number of gridpoints in z-direction.

        lammps_dict : dict
            Dictionary with LAMMPS options.

        Returns
        -------
        lmp : lammps.LAMMPS
            LAMMPS instance.
        """
        from lammps import lammps

        # Build LAMMPS arguments from the data we read.
        lmp_cmdargs = [
            "-screen",
            "none",
            "-log",
            self._lammps_temporary_log,
        ]
        lammps_dict["atom_config_fname"] = self._lammps_temporary_input

        if self.parameters._configuration["mpi"]:
            size = get_size()
            # for parallel tem need to set lammps commands: processors and
            # balance current implementation is to match lammps mpi processor
            # grid to QE processor splitting QE distributes grid points in
            # parallel as slices along z axis currently grid points fall on z
            # axix plane cutoff values in lammps this leads to some ranks
            # having 0 grid points and other having 2x gridpoints
            # balance command in lammps aleviates this issue
            # integers for plane cuts in z axis appear to be most important
            #
            # determine if nyfft flag is set so that QE also parallelizes
            # along y axis if nyfft is true lammps mpi processor grid needs to
            # be 1x{ny}x{nz} need to configure separate total_energy_module
            # with nyfft enabled
            if self.parameters.use_y_splitting > 1:
                # If more processors thatn y*z grid dimensions requested
                # send error. More processors than y*z grid dimensions reduces
                # efficiency and scaling of QE.
                nyfft = self.parameters.use_y_splitting
                # number of y processors is equal to nyfft
                yprocs = nyfft
                # number of z processors is equal to total processors/nyfft is
                # nyfft is used else zprocs = size
                if size % yprocs == 0:
                    zprocs = int(size / yprocs)
                else:
                    raise ValueError(
                        "Cannot evenly divide z-planes in y-direction"
                    )

                # check if total number of processors is greater than number of
                # grid sections produce error if number of processors is
                # greater than grid partions - will cause mismatch later in QE
                mpi_grid_sections = yprocs * zprocs
                if mpi_grid_sections < size:
                    raise ValueError(
                        "More processors than grid sections. "
                        "This will cause a crash further in the "
                        "calculation. Choose a total number of "
                        "processors equal to or less than the "
                        "total number of grid sections requsted "
                        "for the calculation (nyfft*nz)."
                    )
                # TODO not sure what happens when size/nyfft is not integer -
                #  further testing required

                # set the mpi processor grid for lammps
                lammps_procs = f"1 {yprocs} {zprocs}"
                printout(
                    "mpi grid with nyfft: ", lammps_procs, min_verbosity=2
                )

                # prepare y plane cuts for balance command in lammps if not
                # integer value
                if int(ny / yprocs) == (ny / yprocs):
                    ycut = 1 / yprocs
                    yint = ""
                    for i in range(0, yprocs - 1):
                        yvals = ((i + 1) * ycut) - 0.00000001
                        yint += format(yvals, ".8f")
                        yint += " "
                else:
                    # account for remainder with uneven number of
                    # planes/processors
                    ycut = 1 / yprocs
                    yrem = ny - (yprocs * int(ny / yprocs))
                    yint = ""
                    for i in range(0, yrem):
                        yvals = (((i + 1) * 2) * ycut) - 0.00000001
                        yint += format(yvals, ".8f")
                        yint += " "
                    for i in range(yrem, yprocs - 1):
                        yvals = ((i + 1 + yrem) * ycut) - 0.00000001
                        yint += format(yvals, ".8f")
                        yint += " "
                # prepare z plane cuts for balance command in lammps
                if int(nz / zprocs) == (nz / zprocs):
                    zcut = 1 / nz
                    zint = ""
                    for i in range(0, zprocs - 1):
                        zvals = ((i + 1) * (nz / zprocs) * zcut) - 0.00000001
                        zint += format(zvals, ".8f")
                        zint += " "
                else:
                    # account for remainder with uneven number of
                    # planes/processors
                    raise ValueError(
                        "Cannot divide z-planes on processors"
                        " without remainder. "
                        "This is currently unsupported."
                    )

                    # zcut = 1/nz
                    # zrem = nz - (zprocs*int(nz/zprocs))
                    # zint = ''
                    # for i in range(0, zrem):
                    #     zvals = (((i+1)*2)*zcut)-0.00000001
                    #     zint += format(zvals, ".8f")
                    #     zint += ' '
                    # for i in range(zrem, zprocs-1):
                    #     zvals = ((i+1+zrem)*zcut)-0.00000001
                    #     zint += format(zvals, ".8f")
                    #     zint += ' '
                lammps_dict["lammps_procs"] = (
                    f"processors {lammps_procs} " f"map xyz"
                )
                lammps_dict["zbal"] = f"balance 1.0 y {yint} z {zint}"
                lammps_dict["ngridx"] = nx
                lammps_dict["ngridy"] = ny
                lammps_dict["ngridz"] = nz
                lammps_dict["switch"] = self.parameters.bispectrum_switchflag

            else:
                if self.parameters.use_z_splitting:
                    # when nyfft is not used only split processors along z axis
                    size = get_size()
                    zprocs = size
                    # check to make sure number of z planes is not less than
                    # processors. If more processors than planes calculation
                    # efficiency decreases
                    if nz < size:
                        raise ValueError(
                            "More processors than grid sections. "
                            "This will cause a crash further in "
                            "the calculation. Choose a total "
                            "number of processors equal to or "
                            "less than the total number of grid "
                            "sections requsted for the "
                            "calculation (nz)."
                        )

                    # match lammps mpi grid to be 1x1x{zprocs}
                    lammps_procs = f"1 1 {zprocs}"
                    # print("mpi grid z only: ", lammps_procs)

                    # prepare z plane cuts for balance command in lammps
                    if int(nz / zprocs) == (nz / zprocs):
                        printout("No remainder in z")
                        zcut = 1 / nz
                        zint = ""
                        for i in range(0, zprocs - 1):
                            zvals = (
                                (i + 1) * (nz / zprocs) * zcut
                            ) - 0.00000001
                            zint += format(zvals, ".8f")
                            zint += " "
                    else:
                        # raise ValueError("Cannot divide z-planes on processors"
                        #                 " without remainder. "
                        #                 "This is currently unsupported.")
                        zcut = 1 / nz
                        zrem = nz - (zprocs * int(nz / zprocs))
                        zint = ""
                        for i in range(0, zrem):
                            zvals = (
                                ((i + 1) * (int(nz / zprocs) + 1)) * zcut
                            ) - 0.00000001
                            zint += format(zvals, ".8f")
                            zint += " "
                        for i in range(zrem, zprocs - 1):
                            zvals = (
                                ((i + 1) * int(nz / zprocs) + zrem) * zcut
                            ) - 0.00000001
                            zint += format(zvals, ".8f")
                            zint += " "

                    lammps_dict["lammps_procs"] = f"processors {lammps_procs}"
                    lammps_dict["zbal"] = f"balance 1.0 z {zint}"
                    lammps_dict["ngridx"] = nx
                    lammps_dict["ngridy"] = ny
                    lammps_dict["ngridz"] = nz
                    lammps_dict["switch"] = (
                        self.parameters.bispectrum_switchflag
                    )

                else:
                    lammps_dict["ngridx"] = nx
                    lammps_dict["ngridy"] = ny
                    lammps_dict["ngridz"] = nz
                    lammps_dict["switch"] = (
                        self.parameters.bispectrum_switchflag
                    )

        else:
            size = 1
            lammps_dict["ngridx"] = nx
            lammps_dict["ngridy"] = ny
            lammps_dict["ngridz"] = nz
            lammps_dict["switch"] = self.parameters.bispectrum_switchflag
        if self.parameters._configuration["gpu"]:
            # Tell Kokkos to use one GPU.
            lmp_cmdargs.append("-k")
            lmp_cmdargs.append("on")
            lmp_cmdargs.append("g")
            lmp_cmdargs.append(str(size))

            # Tell LAMMPS to use Kokkos versions of those commands for
            # which a Kokkos version exists.
            lmp_cmdargs.append("-sf")
            lmp_cmdargs.append("kk")
            pass

        lmp_cmdargs = set_cmdlinevars(lmp_cmdargs, lammps_dict)

        lmp = lammps(cmdargs=lmp_cmdargs)
        set_lammps_instance(lmp)

        return lmp

    def _clean_calculation(self, lmp, keep_logs):
        """
        Clean a LAMMPS calculation.

        This function closes the LAMMPS instance and deletes the temporary
        files created during the calculation, if keep_logs is False.

        Parameters
        ----------
        lmp : lammps.LAMMPS
            LAMMPS instance to close.

        keep_logs : bool
            If True, the temporary files are not deleted.
        """
        lmp.close()
        if not keep_logs:
            if get_rank() == 0:
                os.remove(self._lammps_temporary_log)
                os.remove(self._lammps_temporary_input)

    def _setup_atom_list(self):
        """
        Set up a list of atoms potentially relevant for descriptor calculation.

        If periodic boundary conditions are used, which is usually the case
        for MALA simulation, one has to compute descriptors by also
        incorporating atoms from neighboring cells.

        FURTHER OPTIMIZATION: Probably not that much, this mostly already uses
        optimized python functions.

        Returns
        -------
        all_atoms : numpy.ndarray
            Numpy array containing the positions of all atoms potentially
            relevant for the descriptor calculation.
        """
        if np.any(self._atoms.pbc):

            # To determine the list of relevant atoms we first take the edges
            # of the simulation cell and use them to determine all cells
            # which hold atoms that _may_ be relevant for the calculation.
            edges = list(
                np.array(
                    [
                        [0, 0, 0],
                        [1, 0, 0],
                        [0, 1, 0],
                        [0, 0, 1],
                        [1, 1, 1],
                        [0, 1, 1],
                        [1, 0, 1],
                        [1, 1, 0],
                    ]
                )
                * np.array(self.grid_dimensions)
            )
            all_cells_list = None

            # For each edge point create a neighborhoodlist to all cells
            # given by the cutoff radius.
            for edge in edges:
                edge_point = self._grid_to_coord(edge)
                neighborlist = NeighborList(
                    np.zeros(len(self._atoms) + 1)
                    + [self.parameters.atomic_density_cutoff],
                    bothways=True,
                    self_interaction=False,
                    primitive=NewPrimitiveNeighborList,
                )

                atoms_with_grid_point = self._atoms.copy()

                # Construct a ghost atom representing the grid point.
                atoms_with_grid_point.append(ase.Atom("H", edge_point))
                neighborlist.update(atoms_with_grid_point)
                indices, offsets = neighborlist.get_neighbors(len(self._atoms))

                # Incrementally fill the list containing all cells to be
                # considered.
                if all_cells_list is None:
                    all_cells_list = np.unique(offsets, axis=0)
                else:
                    all_cells_list = np.concatenate(
                        (all_cells_list, np.unique(offsets, axis=0))
                    )

            # Delete the original cell from the list of all cells.
            # This is to avoid double checking of atoms below.
            all_cells = np.unique(all_cells_list, axis=0)
            idx = 0
            for a in range(0, len(all_cells)):
                if (all_cells[a, :] == np.array([0, 0, 0])).all():
                    break
                idx += 1
            all_cells = np.delete(all_cells, idx, axis=0)

            # Create an object to hold all relevant atoms.
            # First, instantiate it by filling it will all atoms from all
            # potentiall relevant cells, as identified above.
            all_atoms = None
            for a in range(0, len(self._atoms)):
                if all_atoms is None:
                    all_atoms = (
                        self._atoms.positions[a]
                        + all_cells @ self._atoms.get_cell()
                    )
                else:
                    all_atoms = np.concatenate(
                        (
                            all_atoms,
                            self._atoms.positions[a]
                            + all_cells @ self._atoms.get_cell(),
                        )
                    )

            # Next, construct the planes forming the unit cell.
            # Atoms from neighboring cells are only included in the list of
            # all relevant atoms, if they have a distance to any of these
            # planes smaller than the cutoff radius. Elsewise, they would
            # not be included in the eventual calculation anyhow.
            planes = [
                [[0, 1, 0], [0, 0, 1], [0, 0, 0]],
                [
                    [self.grid_dimensions[0], 1, 0],
                    [self.grid_dimensions[0], 0, 1],
                    self.grid_dimensions,
                ],
                [[1, 0, 0], [0, 0, 1], [0, 0, 0]],
                [
                    [1, self.grid_dimensions[1], 0],
                    [0, self.grid_dimensions[1], 1],
                    self.grid_dimensions,
                ],
                [[1, 0, 0], [0, 1, 0], [0, 0, 0]],
                [
                    [1, 0, self.grid_dimensions[2]],
                    [0, 1, self.grid_dimensions[2]],
                    self.grid_dimensions,
                ],
            ]
            all_distances = []
            for plane in planes:
                curplane = Plane.from_points(
                    self._grid_to_coord(plane[0]),
                    self._grid_to_coord(plane[1]),
                    self._grid_to_coord(plane[2]),
                )
                distances = []

                # TODO: This may be optimized, and formulated in an array
                # operation.
                for a in range(np.shape(all_atoms)[0]):
                    distances.append(curplane.distance_point(all_atoms[a]))
                all_distances.append(distances)
            all_distances = np.array(all_distances)
            all_distances = np.min(all_distances, axis=0)
            all_atoms = np.squeeze(
                all_atoms[
                    np.argwhere(
                        all_distances < self.parameters.atomic_density_cutoff
                    ),
                    :,
                ]
            )
            return np.concatenate((all_atoms, self._atoms.positions))

        else:
            # If no PBC are used, only consider a single cell.
            return self._atoms.positions

    def _grid_to_coord(self, gridpoint):
        """
        Convert grid indices to a real space coordinate.

        Parameters
        ----------
        gridpoint : List
            List of grid indices in the format [x, y, z].

        Returns
        -------
        coord : numpy.ndarray
            Real space coordinate corresponding to the grid point.
        """
        # Convert grid indices to real space grid point.
        i = gridpoint[0]
        j = gridpoint[1]
        k = gridpoint[2]
        # Orthorhombic cells and triclinic ones have
        # to be treated differently, see domain.cpp

        if self._atoms.cell.orthorhombic:
            return np.diag(self._voxel) * [i, j, k]
        else:
            ret = [0, 0, 0]
            ret[0] = (
                i / self.grid_dimensions[0] * self._atoms.cell[0, 0]
                + j / self.grid_dimensions[1] * self._atoms.cell[1, 0]
                + k / self.grid_dimensions[2] * self._atoms.cell[2, 0]
            )
            ret[1] = (
                j / self.grid_dimensions[1] * self._atoms.cell[1, 1]
                + k / self.grid_dimensions[2] * self._atoms.cell[1, 2]
            )
            ret[2] = k / self.grid_dimensions[2] * self._atoms.cell[2, 2]
            return np.array(ret)

    @abstractmethod
    def _calculate(self, outdir, **kwargs):
        """
        Perform descriptor calculation.

        Has to be implemented by inheriting classes.

        Parameters
        ----------
        outdir : string
            Path to the output directory.

        kwargs : dict
            Additional keyword arguments.
        """
        pass

    @abstractmethod
    def _read_feature_dimension_from_json(self, json_dict):
        """
        Read the feature dimension from a saved JSON file.

        This process may also involve reading additional information from the
        Parameters object.

        Parameters
        ----------
        json_dict : dict
            Dictionary containing info loaded from the JSON file.
        """
        pass

    def _set_feature_size_from_array(self, array):
        """
        Set the feature size from the array.

        Feature sizes are saved in different ways for different physical data
        classes.

        Parameters
        ----------
        array : numpy.ndarray
            Array to extract the feature size from.
        """
        self.feature_size = np.shape(array)[-1]