"""Base class for all descriptor calculators."""
from abc import abstractmethod
import json
import os
import tempfile
import ase
from ase.cell import Cell
from ase.units import m
from ase.neighborlist import NeighborList, NewPrimitiveNeighborList
import numpy as np
from skspatial.objects import Plane
from mala.common.parameters import ParametersDescriptors, Parameters
from mala.common.parallelizer import (
get_comm,
printout,
get_rank,
get_size,
barrier,
parallel_warn,
set_lammps_instance,
)
from mala.common.physical_data import PhysicalData
from mala.descriptors.lammps_utils import set_cmdlinevars
[docs]
class Descriptor(PhysicalData):
"""
Base class for all descriptors available in MALA.
Descriptors encode the atomic fingerprint of a DFT calculation.
Parameters
----------
parameters : mala.common.parameters.Parameters
Parameters object used to create this object.
Attributes
----------
parameters: mala.common.parameters.ParametersDescriptors
MALA descriptor calculation parameters.
"""
##############################
# Constructors
##############################
def __new__(cls, params: Parameters = None):
"""
Create a Descriptor instance.
The correct type of descriptor calculator will automatically be
instantiated by this class if possible. You can also instantiate
the desired descriptor directly by calling upon the subclass.
Parameters
----------
params : mala.common.parametes.Parameters
Parameters used to create this descriptor calculator.
"""
descriptors = None
# Check if we're accessing through base class.
# If not, we need to return the correct object directly.
if cls == Descriptor:
if params.descriptors.descriptor_type == "Bispectrum":
from mala.descriptors.bispectrum import Bispectrum
descriptors = super(Descriptor, Bispectrum).__new__(Bispectrum)
if params.descriptors.descriptor_type == "AtomicDensity":
from mala.descriptors.atomic_density import AtomicDensity
descriptors = super(Descriptor, AtomicDensity).__new__(
AtomicDensity
)
if params.descriptors.descriptor_type == "MinterpyDescriptors":
from mala.descriptors.minterpy_descriptors import (
MinterpyDescriptors,
)
descriptors = super(Descriptor, MinterpyDescriptors).__new__(
MinterpyDescriptors
)
if descriptors is None:
raise Exception("Unsupported descriptor calculator.")
else:
descriptors = super(Descriptor, cls).__new__(cls)
# For pickling
setattr(descriptors, "params_arg", params)
return descriptors
def __getnewargs__(self):
"""
Get the necessary arguments to call __new__.
Used for pickling.
Returns
-------
params : mala.Parameters
The parameters object with which this object was created.
"""
return (self.params_arg,)
def __init__(self, parameters):
super(Descriptor, self).__init__(parameters)
self.parameters: ParametersDescriptors = parameters.descriptors
self.feature_size = 0 # so iterations will fail
self._in_format_ase = ""
self._atoms = None
self._voxel = None
# If we ever have NON LAMMPS descriptors, these parameters have no
# meaning anymore and should probably be moved to an intermediate
# DescriptorsLAMMPS class, from which the LAMMPS descriptors inherit.
self._lammps_temporary_input = None
self._lammps_temporary_log = None
##############################
# Properties
##############################
@property
def si_unit_conversion(self):
"""
Numeric value of the conversion from MALA (ASE) units to SI.
Needed for OpenPMD interface.
"""
return m**3
@property
def si_dimension(self):
"""
Dictionary containing the SI unit dimensions in OpenPMD format.
Needed for OpenPMD interface.
"""
import openpmd_api as io
return {io.Unit_Dimension.L: -3}
@property
def descriptors_contain_xyz(self):
"""Control whether descriptor vectors will contain xyz coordinates."""
return self.parameters.descriptors_contain_xyz
@descriptors_contain_xyz.setter
def descriptors_contain_xyz(self, value):
self.parameters.descriptors_contain_xyz = value
##############################
# Methods
##############################
# File I/O
##########
[docs]
@staticmethod
def convert_units(array, in_units="1/eV"):
"""
Convert descriptors from a specified unit into the ones used in MALA.
Parameters
----------
array : numpy.ndarray
Data for which the units should be converted.
in_units : string
Units of array.
Returns
-------
converted_array : numpy.ndarray
Data in MALA units.
"""
raise Exception(
"No unit conversion method implemented for this"
" descriptor type."
)
@property
def feature_size(self):
"""Get the feature dimension of this data."""
return self._feature_size
@feature_size.setter
def feature_size(self, value):
self._feature_size = value
[docs]
@staticmethod
def backconvert_units(array, out_units):
"""
Convert descriptors from MALA units into a specified unit.
Parameters
----------
array : numpy.ndarray
Data in MALA units.
out_units : string
Desired units of output array.
Returns
-------
converted_array : numpy.ndarray
Data in out_units.
"""
raise Exception(
"No unit back conversion method implemented for "
"this descriptor type."
)
[docs]
def setup_lammps_tmp_files(self, lammps_type, outdir):
"""
Create the temporary lammps input and log files.
Parameters
----------
lammps_type: str
Type of descriptor calculation (e.g. bgrid for bispectrum)
outdir: str
Directory where lammps files are kept
Returns
-------
None
"""
if get_rank() == 0:
prefix_inp_str = "lammps_" + lammps_type + "_input"
prefix_log_str = "lammps_" + lammps_type + "_log"
lammps_tmp_input_file = tempfile.NamedTemporaryFile(
delete=False, prefix=prefix_inp_str, suffix="_.tmp", dir=outdir
)
self._lammps_temporary_input = lammps_tmp_input_file.name
lammps_tmp_input_file.close()
lammps_tmp_log_file = tempfile.NamedTemporaryFile(
delete=False, prefix=prefix_log_str, suffix="_.tmp", dir=outdir
)
self._lammps_temporary_log = lammps_tmp_log_file.name
lammps_tmp_log_file.close()
else:
self._lammps_temporary_input = None
self._lammps_temporary_log = None
if self.parameters._configuration["mpi"]:
self._lammps_temporary_input = get_comm().bcast(
self._lammps_temporary_input, root=0
)
self._lammps_temporary_log = get_comm().bcast(
self._lammps_temporary_log, root=0
)
# Calculations
##############
[docs]
@staticmethod
def enforce_pbc(atoms):
"""
Explictly enforces the PBC on an ASE atoms object.
QE (and potentially other codes?) do that internally. Meaning that the
raw positions of atoms (in Angstrom) can lie outside of the unit cell.
When setting up the DFT calculation, these atoms get shifted into
the unit cell. Since we directly use these raw positions for the
descriptor calculation, we need to enforce that in the ASE atoms
objects, the atoms are explicitly in the unit cell.
Parameters
----------
atoms : ase.atoms
The ASE atoms object for which the PBC need to be enforced.
Returns
-------
new_atoms : ase.Atoms
The ASE atoms object for which the PBC have been enforced.
"""
new_atoms = atoms.copy()
new_atoms.set_scaled_positions(new_atoms.get_scaled_positions())
# This might be unecessary, but I think it is nice to have some sort of
# metric here.
rescaled_atoms = 0
for i in range(0, len(atoms)):
if False in (
np.isclose(
new_atoms[i].position, atoms[i].position, atol=0.001
)
):
rescaled_atoms += 1
printout(
"Descriptor calculation: had to enforce periodic boundary "
"conditions on",
rescaled_atoms,
"atoms before calculation.",
min_verbosity=2,
)
return new_atoms
[docs]
def calculate_from_qe_out(
self, qe_out_file, working_directory=".", **kwargs
):
"""
Calculate the descriptors based on a Quantum Espresso outfile.
Parameters
----------
qe_out_file : string
Name of Quantum Espresso output file for snapshot.
working_directory : string
A directory in which to write the output of the LAMMPS calculation.
Usually the local directory should suffice, given that there
are no multiple instances running in the same directory.
kwargs : dict
A collection of keyword arguments, that are mainly used for
debugging and development. Different types of descriptors
may support different keyword arguments. Commonly supported
are
- "use_fp64": To use enforce floating point 64 precision for
descriptors.
- "keep_logs": To not delete temporary files created during
LAMMPS calculation of descriptors.
Returns
-------
descriptors : numpy.ndarray
Numpy array containing the descriptors with the dimension
(x,y,z,descriptor_dimension)
"""
self._in_format_ase = "espresso-out"
printout("Calculating descriptors from", qe_out_file, min_verbosity=0)
# We get the atomic information by using ASE.
self._atoms = ase.io.read(qe_out_file, format=self._in_format_ase)
# Enforcing / Checking PBC on the read atoms.
self._atoms = self.enforce_pbc(self._atoms)
# Get the grid dimensions.
if "grid_dimensions" in kwargs.keys():
self.grid_dimensions = kwargs["grid_dimensions"]
# Deleting this keyword from the list to avoid conflict with
# dict below.
del kwargs["grid_dimensions"]
else:
qe_outfile = open(qe_out_file, "r")
lines = qe_outfile.readlines()
self.grid_dimensions = [0, 0, 0]
for line in lines:
if "FFT dimensions" in line:
tmp = line.split("(")[1].split(")")[0]
self.grid_dimensions[0] = int(tmp.split(",")[0])
self.grid_dimensions[1] = int(tmp.split(",")[1])
self.grid_dimensions[2] = int(tmp.split(",")[2])
break
self._voxel = self._atoms.cell.copy()
self._voxel[0] = self._voxel[0] / (self.grid_dimensions[0])
self._voxel[1] = self._voxel[1] / (self.grid_dimensions[1])
self._voxel[2] = self._voxel[2] / (self.grid_dimensions[2])
return self._calculate(working_directory, **kwargs)
[docs]
def calculate_from_json(self, json_file, working_directory=".", **kwargs):
"""
Calculate the descriptors based on a MALA generated json file.
These json files are generated by the MALA DataConverter class
and bundle information about a DFT simulation.
Parameters
----------
json_file : string
Name of MALA json output file for snapshot.
working_directory : string
A directory in which to write the output of the LAMMPS calculation.
Usually the local directory should suffice, given that there
are no multiple instances running in the same directory.
kwargs : dict
A collection of keyword arguments, that are mainly used for
debugging and development. Different types of descriptors
may support different keyword arguments. Commonly supported
are
- "use_fp64": To use enforce floating point 64 precision for
descriptors.
- "keep_logs": To not delete temporary files created during
LAMMPS calculation of descriptors.
Returns
-------
descriptors : numpy.ndarray
Numpy array containing the descriptors with the dimension
(x,y,z,descriptor_dimension)
"""
if isinstance(json_file, str):
json_dict = json.load(open(json_file, encoding="utf-8"))
else:
json_dict = json.load(json_file)
self.grid_dimensions = json_dict["grid_dimensions"]
self._atoms = ase.Atoms.fromdict(json_dict["atoms"])
self._voxel = Cell(json_dict["voxel"]["array"])
return self._calculate(working_directory, **kwargs)
[docs]
def calculate_from_atoms(
self, atoms, grid_dimensions, working_directory=".", **kwargs
):
"""
Calculate the bispectrum descriptors based on atomic configurations.
Parameters
----------
atoms : ase.Atoms
Atoms object holding the atomic configuration.
grid_dimensions : list
Grid dimensions to be used, in the format [x,y,z].
working_directory : string
A directory in which to write the output of the LAMMPS calculation.
Usually the local directory should suffice, given that there
are no multiple instances running in the same directory.
kwargs : dict
A collection of keyword arguments, that are mainly used for
debugging and development. Different types of descriptors
may support different keyword arguments. Commonly supported
are
- "use_fp64": To use enforce floating point 64 precision for
descriptors.
- "keep_logs": To not delete temporary files created during
LAMMPS calculation of descriptors.
Returns
-------
descriptors : numpy.ndarray
Numpy array containing the descriptors with the dimension
(x,y,z,descriptor_dimension)
"""
# Enforcing / Checking PBC on the input atoms.
self._atoms = self.enforce_pbc(atoms)
self.grid_dimensions = grid_dimensions
self._voxel = self._atoms.cell.copy()
self._voxel[0] = self._voxel[0] / (self.grid_dimensions[0])
self._voxel[1] = self._voxel[1] / (self.grid_dimensions[1])
self._voxel[2] = self._voxel[2] / (self.grid_dimensions[2])
return self._calculate(working_directory, **kwargs)
[docs]
def gather_descriptors(self, descriptors_np, use_pickled_comm=False):
"""
Gathers all descriptors on rank 0 and sorts them.
This is useful for e.g. parallel preprocessing.
This function removes the extra 3 components that come from parallel
processing.
I.e. if we have 91 bispectrum descriptors, LAMMPS directly outputs us
97 (in parallel mode), and this function returns 94, as to retain the
3 x,y,z ones we by default include.
Parameters
----------
descriptors_np : numpy.ndarray
Numpy array with the descriptors of this ranks local grid.
use_pickled_comm : bool
If True, the pickled communication route from mpi4py is used.
If False, a Recv/Sendv combination is used. I am not entirely
sure what is faster. Technically Recv/Sendv should be faster,
but I doubt my implementation is all that optimal. For the pickled
route we can use gather(), which should be fairly quick.
However, for large grids, one CANNOT use the pickled route;
too large python objects will break it. Therefore, I am setting
the Recv/Sendv route as default.
"""
# Barrier to make sure all ranks have descriptors..
comm = get_comm()
barrier()
# Gather the descriptors into a list.
if use_pickled_comm:
all_descriptors_list = comm.gather(descriptors_np, root=0)
else:
sendcounts = np.array(
comm.gather(np.shape(descriptors_np)[0], root=0)
)
raw_feature_length = self.feature_size + 3
if get_rank() == 0:
# print("sendcounts: {}, total: {}".format(sendcounts,
# sum(sendcounts)))
# Preparing the list of buffers.
all_descriptors_list = []
for i in range(0, get_size()):
all_descriptors_list.append(
np.empty(
sendcounts[i] * raw_feature_length,
dtype=descriptors_np.dtype,
)
)
# No MPI necessary for first rank. For all the others,
# collect the buffers.
all_descriptors_list[0] = descriptors_np
for i in range(1, get_size()):
comm.Recv(all_descriptors_list[i], source=i, tag=100 + i)
all_descriptors_list[i] = np.reshape(
all_descriptors_list[i],
(sendcounts[i], raw_feature_length),
)
else:
comm.Send(descriptors_np, dest=0, tag=get_rank() + 100)
barrier()
# if get_rank() == 0:
# printout(np.shape(all_descriptors_list[0]))
# printout(np.shape(all_descriptors_list[1]))
# printout(np.shape(all_descriptors_list[2]))
# printout(np.shape(all_descriptors_list[3]))
# Dummy for the other ranks.
# (For now, might later simply broadcast to other ranks).
descriptors_full = np.zeros([1, 1, 1, 1])
# Reorder the list.
if get_rank() == 0:
# Prepare the descriptor array.
nx = self.grid_dimensions[0]
ny = self.grid_dimensions[1]
nz = self.grid_dimensions[2]
descriptors_full = np.zeros([nx, ny, nz, self.feature_size])
# Fill the full bispectrum descriptors array.
for idx, local_grid in enumerate(all_descriptors_list):
# We glue the individual cells back together, and transpose.
first_x = int(local_grid[0][0])
first_y = int(local_grid[0][1])
first_z = int(local_grid[0][2])
last_x = int(local_grid[-1][0]) + 1
last_y = int(local_grid[-1][1]) + 1
last_z = int(local_grid[-1][2]) + 1
descriptors_full[
first_x:last_x, first_y:last_y, first_z:last_z
] = np.reshape(
local_grid[:, 3:],
[
last_z - first_z,
last_y - first_y,
last_x - first_x,
self.feature_size,
],
).transpose(
[2, 1, 0, 3]
)
# Leaving this in here for debugging purposes.
# This is the slow way to reshape the descriptors.
# for entry in local_grid:
# x = int(entry[0])
# y = int(entry[1])
# z = int(entry[2])
# descriptors_full[x, y, z] = entry[3:]
if self.parameters.descriptors_contain_xyz:
return descriptors_full
else:
return descriptors_full[:, :, :, 3:]
[docs]
def convert_local_to_3d(self, descriptors_np):
"""
Convert the desciptors as done in the gather function, but per rank.
This is useful for e.g. parallel preprocessing.
This function removes the extra 3 components that come from parallel
processing.
I.e. if we have 91 bispectrum descriptors, LAMMPS directly outputs us
97 (in parallel mode), and this function returns 94, as to retain the
3 x,y,z ones we by default include.
Parameters
----------
descriptors_np : numpy.ndarray
Numpy array with the descriptors of this ranks local grid.
"""
local_offset = [None, None, None]
local_reach = [None, None, None]
local_offset[0] = int(descriptors_np[0][0])
local_offset[1] = int(descriptors_np[0][1])
local_offset[2] = int(descriptors_np[0][2])
local_reach[0] = int(descriptors_np[-1][0]) + 1
local_reach[1] = int(descriptors_np[-1][1]) + 1
local_reach[2] = int(descriptors_np[-1][2]) + 1
nx = local_reach[0] - local_offset[0]
ny = local_reach[1] - local_offset[1]
nz = local_reach[2] - local_offset[2]
descriptors_full = np.zeros([nx, ny, nz, self.feature_size])
descriptors_full[0:nx, 0:ny, 0:nz] = np.reshape(
descriptors_np[:, 3:], [nz, ny, nx, self.feature_size]
).transpose([2, 1, 0, 3])
return descriptors_full, local_offset, local_reach
[docs]
def read_dimensions_from_json(self, json_file):
"""
Read only the descriptor dimensions from a json file.
These json files are generated by the MALA DataConverter class
and bundle information about a DFT simulation.
Parameters
----------
json_file : string
Path to the numpy file.
Returns
-------
dimension_info : list or tuple
If read_dtype is False, then only a list containing the dimensions
of the saved array is returned. If read_dtype is True, a tuple
containing this list of dimensions and the dtype of the array will
be returned.
"""
if isinstance(json_file, str):
json_dict = json.load(open(json_file, encoding="utf-8"))
else:
json_dict = json.load(json_file)
grid_dimensions = json_dict["grid_dimensions"] + [
self._read_feature_dimension_from_json(json_dict)
]
return grid_dimensions
# Private methods
#################
def _process_loaded_array(self, array, units=None):
"""
Process loaded array (i.e., unit change, reshaping, etc.).
Parameters
----------
array : numpy.ndarray
Array to process.
units : string
Units of input array.
"""
array *= self.convert_units(1, in_units=units)
def _process_loaded_dimensions(self, array_dimensions):
"""
Process loaded dimensions.
In this case, cut xyz coordinates from descriptors if
descriptors_contain_xyz is set in the parameters.
Parameters
----------
array_dimensions : tuple
Raw dimensions of the array.
"""
if self.descriptors_contain_xyz:
return (
array_dimensions[0],
array_dimensions[1],
array_dimensions[2],
array_dimensions[3] - 3,
)
else:
return array_dimensions
def _set_geometry_info(self, mesh):
"""
Set geometry information to openPMD mesh.
This has to be done as part of the openPMD saving process.
Parameters
----------
mesh : openpmd_api.Mesh
OpenPMD mesh for which to set geometry information.
"""
# Geometry: Save the cell parameters and angles of the grid.
if self._atoms is not None:
import openpmd_api as io
self._voxel = self._atoms.cell.copy()
self._voxel[0] = self._voxel[0] / (self.grid_dimensions[0])
self._voxel[1] = self._voxel[1] / (self.grid_dimensions[1])
self._voxel[2] = self._voxel[2] / (self.grid_dimensions[2])
mesh.geometry = io.Geometry.cartesian
mesh.grid_spacing = self._voxel.cellpar()[0:3]
mesh.set_attribute("angles", self._voxel.cellpar()[3:])
def _get_atoms(self):
"""
Access atoms saved in PhysicalData-derived class.
For any derived class which is atom based (currently, all are), this
function returns the atoms, which may not be directly accessible as
an attribute for a variety of reasons.
Returns
-------
atoms : ase.Atoms
An ASE atoms object holding the associated atoms of this object.
"""
return self._atoms
def _feature_mask(self):
"""
Return a mask for features that are not part of the feature dimension.
The mask assumes that the features which do not belong to the feature
dimension are at the beginning of the array. Here, return 3 if the
descriptors contain xyz coordinates, otherwise 0.
Returns
-------
mask : int
Starting index after which the actual feature dimension starts.
"""
if self.descriptors_contain_xyz:
return 3
else:
return 0
def _setup_lammps(self, nx, ny, nz, lammps_dict):
"""
Set up the lammps processor grid.
Takes into account y/z-splitting.
Parameters
----------
nx : int
Number of gridpoints in x-direction.
ny : int
Number of gridpoints in y-direction.
nz : int
Number of gridpoints in z-direction.
lammps_dict : dict
Dictionary with LAMMPS options.
Returns
-------
lmp : lammps.LAMMPS
LAMMPS instance.
"""
from lammps import lammps
# Build LAMMPS arguments from the data we read.
lmp_cmdargs = [
"-screen",
"none",
"-log",
self._lammps_temporary_log,
]
lammps_dict["atom_config_fname"] = self._lammps_temporary_input
if self.parameters._configuration["mpi"]:
size = get_size()
# for parallel tem need to set lammps commands: processors and
# balance current implementation is to match lammps mpi processor
# grid to QE processor splitting QE distributes grid points in
# parallel as slices along z axis currently grid points fall on z
# axix plane cutoff values in lammps this leads to some ranks
# having 0 grid points and other having 2x gridpoints
# balance command in lammps aleviates this issue
# integers for plane cuts in z axis appear to be most important
#
# determine if nyfft flag is set so that QE also parallelizes
# along y axis if nyfft is true lammps mpi processor grid needs to
# be 1x{ny}x{nz} need to configure separate total_energy_module
# with nyfft enabled
if self.parameters.use_y_splitting > 1:
# TODO automatically pass nyfft into QE from MALA
# if more processors thatn y*z grid dimensions requested
# send error. More processors than y*z grid dimensions reduces
# efficiency and scaling of QE.
nyfft = self.parameters.use_y_splitting
# number of y processors is equal to nyfft
yprocs = nyfft
# number of z processors is equal to total processors/nyfft is
# nyfft is used else zprocs = size
if size % yprocs == 0:
zprocs = int(size / yprocs)
else:
raise ValueError(
"Cannot evenly divide z-planes in y-direction"
)
# check if total number of processors is greater than number of
# grid sections produce error if number of processors is
# greater than grid partions - will cause mismatch later in QE
mpi_grid_sections = yprocs * zprocs
if mpi_grid_sections < size:
raise ValueError(
"More processors than grid sections. "
"This will cause a crash further in the "
"calculation. Choose a total number of "
"processors equal to or less than the "
"total number of grid sections requsted "
"for the calculation (nyfft*nz)."
)
# TODO not sure what happens when size/nyfft is not integer -
# further testing required
# set the mpi processor grid for lammps
lammps_procs = f"1 {yprocs} {zprocs}"
printout(
"mpi grid with nyfft: ", lammps_procs, min_verbosity=2
)
# prepare y plane cuts for balance command in lammps if not
# integer value
if int(ny / yprocs) == (ny / yprocs):
ycut = 1 / yprocs
yint = ""
for i in range(0, yprocs - 1):
yvals = ((i + 1) * ycut) - 0.00000001
yint += format(yvals, ".8f")
yint += " "
else:
# account for remainder with uneven number of
# planes/processors
ycut = 1 / yprocs
yrem = ny - (yprocs * int(ny / yprocs))
yint = ""
for i in range(0, yrem):
yvals = (((i + 1) * 2) * ycut) - 0.00000001
yint += format(yvals, ".8f")
yint += " "
for i in range(yrem, yprocs - 1):
yvals = ((i + 1 + yrem) * ycut) - 0.00000001
yint += format(yvals, ".8f")
yint += " "
# prepare z plane cuts for balance command in lammps
if int(nz / zprocs) == (nz / zprocs):
zcut = 1 / nz
zint = ""
for i in range(0, zprocs - 1):
zvals = ((i + 1) * (nz / zprocs) * zcut) - 0.00000001
zint += format(zvals, ".8f")
zint += " "
else:
# account for remainder with uneven number of
# planes/processors
raise ValueError(
"Cannot divide z-planes on processors"
" without remainder. "
"This is currently unsupported."
)
# zcut = 1/nz
# zrem = nz - (zprocs*int(nz/zprocs))
# zint = ''
# for i in range(0, zrem):
# zvals = (((i+1)*2)*zcut)-0.00000001
# zint += format(zvals, ".8f")
# zint += ' '
# for i in range(zrem, zprocs-1):
# zvals = ((i+1+zrem)*zcut)-0.00000001
# zint += format(zvals, ".8f")
# zint += ' '
lammps_dict["lammps_procs"] = (
f"processors {lammps_procs} " f"map xyz"
)
lammps_dict["zbal"] = f"balance 1.0 y {yint} z {zint}"
lammps_dict["ngridx"] = nx
lammps_dict["ngridy"] = ny
lammps_dict["ngridz"] = nz
lammps_dict["switch"] = self.parameters.bispectrum_switchflag
else:
if self.parameters.use_z_splitting:
# when nyfft is not used only split processors along z axis
size = get_size()
zprocs = size
# check to make sure number of z planes is not less than
# processors. If more processors than planes calculation
# efficiency decreases
if nz < size:
raise ValueError(
"More processors than grid sections. "
"This will cause a crash further in "
"the calculation. Choose a total "
"number of processors equal to or "
"less than the total number of grid "
"sections requsted for the "
"calculation (nz)."
)
# match lammps mpi grid to be 1x1x{zprocs}
lammps_procs = f"1 1 {zprocs}"
# print("mpi grid z only: ", lammps_procs)
# prepare z plane cuts for balance command in lammps
if int(nz / zprocs) == (nz / zprocs):
printout("No remainder in z")
zcut = 1 / nz
zint = ""
for i in range(0, zprocs - 1):
zvals = (
(i + 1) * (nz / zprocs) * zcut
) - 0.00000001
zint += format(zvals, ".8f")
zint += " "
else:
# raise ValueError("Cannot divide z-planes on processors"
# " without remainder. "
# "This is currently unsupported.")
zcut = 1 / nz
zrem = nz - (zprocs * int(nz / zprocs))
zint = ""
for i in range(0, zrem):
zvals = (
((i + 1) * (int(nz / zprocs) + 1)) * zcut
) - 0.00000001
zint += format(zvals, ".8f")
zint += " "
for i in range(zrem, zprocs - 1):
zvals = (
((i + 1) * int(nz / zprocs) + zrem) * zcut
) - 0.00000001
zint += format(zvals, ".8f")
zint += " "
lammps_dict["lammps_procs"] = f"processors {lammps_procs}"
lammps_dict["zbal"] = f"balance 1.0 z {zint}"
lammps_dict["ngridx"] = nx
lammps_dict["ngridy"] = ny
lammps_dict["ngridz"] = nz
lammps_dict["switch"] = (
self.parameters.bispectrum_switchflag
)
else:
lammps_dict["ngridx"] = nx
lammps_dict["ngridy"] = ny
lammps_dict["ngridz"] = nz
lammps_dict["switch"] = (
self.parameters.bispectrum_switchflag
)
else:
size = 1
lammps_dict["ngridx"] = nx
lammps_dict["ngridy"] = ny
lammps_dict["ngridz"] = nz
lammps_dict["switch"] = self.parameters.bispectrum_switchflag
if self.parameters._configuration["gpu"]:
# Tell Kokkos to use one GPU.
lmp_cmdargs.append("-k")
lmp_cmdargs.append("on")
lmp_cmdargs.append("g")
lmp_cmdargs.append(str(size))
# Tell LAMMPS to use Kokkos versions of those commands for
# which a Kokkos version exists.
lmp_cmdargs.append("-sf")
lmp_cmdargs.append("kk")
pass
lmp_cmdargs = set_cmdlinevars(lmp_cmdargs, lammps_dict)
lmp = lammps(cmdargs=lmp_cmdargs)
set_lammps_instance(lmp)
return lmp
def _clean_calculation(self, lmp, keep_logs):
"""
Clean a LAMMPS calculation.
This function closes the LAMMPS instance and deletes the temporary
files created during the calculation, if keep_logs is False.
Parameters
----------
lmp : lammps.LAMMPS
LAMMPS instance to close.
keep_logs : bool
If True, the temporary files are not deleted.
"""
lmp.close()
if not keep_logs:
if get_rank() == 0:
os.remove(self._lammps_temporary_log)
os.remove(self._lammps_temporary_input)
def _setup_atom_list(self):
"""
Set up a list of atoms potentially relevant for descriptor calculation.
If periodic boundary conditions are used, which is usually the case
for MALA simulation, one has to compute descriptors by also
incorporating atoms from neighboring cells.
FURTHER OPTIMIZATION: Probably not that much, this mostly already uses
optimized python functions.
Returns
-------
all_atoms : numpy.ndarray
Numpy array containing the positions of all atoms potentially
relevant for the descriptor calculation.
"""
if np.any(self._atoms.pbc):
# To determine the list of relevant atoms we first take the edges
# of the simulation cell and use them to determine all cells
# which hold atoms that _may_ be relevant for the calculation.
edges = list(
np.array(
[
[0, 0, 0],
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[1, 1, 1],
[0, 1, 1],
[1, 0, 1],
[1, 1, 0],
]
)
* np.array(self.grid_dimensions)
)
all_cells_list = None
# For each edge point create a neighborhoodlist to all cells
# given by the cutoff radius.
for edge in edges:
edge_point = self._grid_to_coord(edge)
neighborlist = NeighborList(
np.zeros(len(self._atoms) + 1)
+ [self.parameters.atomic_density_cutoff],
bothways=True,
self_interaction=False,
primitive=NewPrimitiveNeighborList,
)
atoms_with_grid_point = self._atoms.copy()
# Construct a ghost atom representing the grid point.
atoms_with_grid_point.append(ase.Atom("H", edge_point))
neighborlist.update(atoms_with_grid_point)
indices, offsets = neighborlist.get_neighbors(len(self._atoms))
# Incrementally fill the list containing all cells to be
# considered.
if all_cells_list is None:
all_cells_list = np.unique(offsets, axis=0)
else:
all_cells_list = np.concatenate(
(all_cells_list, np.unique(offsets, axis=0))
)
# Delete the original cell from the list of all cells.
# This is to avoid double checking of atoms below.
all_cells = np.unique(all_cells_list, axis=0)
idx = 0
for a in range(0, len(all_cells)):
if (all_cells[a, :] == np.array([0, 0, 0])).all():
break
idx += 1
all_cells = np.delete(all_cells, idx, axis=0)
# Create an object to hold all relevant atoms.
# First, instantiate it by filling it will all atoms from all
# potentiall relevant cells, as identified above.
all_atoms = None
for a in range(0, len(self._atoms)):
if all_atoms is None:
all_atoms = (
self._atoms.positions[a]
+ all_cells @ self._atoms.get_cell()
)
else:
all_atoms = np.concatenate(
(
all_atoms,
self._atoms.positions[a]
+ all_cells @ self._atoms.get_cell(),
)
)
# Next, construct the planes forming the unit cell.
# Atoms from neighboring cells are only included in the list of
# all relevant atoms, if they have a distance to any of these
# planes smaller than the cutoff radius. Elsewise, they would
# not be included in the eventual calculation anyhow.
planes = [
[[0, 1, 0], [0, 0, 1], [0, 0, 0]],
[
[self.grid_dimensions[0], 1, 0],
[self.grid_dimensions[0], 0, 1],
self.grid_dimensions,
],
[[1, 0, 0], [0, 0, 1], [0, 0, 0]],
[
[1, self.grid_dimensions[1], 0],
[0, self.grid_dimensions[1], 1],
self.grid_dimensions,
],
[[1, 0, 0], [0, 1, 0], [0, 0, 0]],
[
[1, 0, self.grid_dimensions[2]],
[0, 1, self.grid_dimensions[2]],
self.grid_dimensions,
],
]
all_distances = []
for plane in planes:
curplane = Plane.from_points(
self._grid_to_coord(plane[0]),
self._grid_to_coord(plane[1]),
self._grid_to_coord(plane[2]),
)
distances = []
# TODO: This may be optimized, and formulated in an array
# operation.
for a in range(np.shape(all_atoms)[0]):
distances.append(curplane.distance_point(all_atoms[a]))
all_distances.append(distances)
all_distances = np.array(all_distances)
all_distances = np.min(all_distances, axis=0)
all_atoms = np.squeeze(
all_atoms[
np.argwhere(
all_distances < self.parameters.atomic_density_cutoff
),
:,
]
)
return np.concatenate((all_atoms, self._atoms.positions))
else:
# If no PBC are used, only consider a single cell.
return self._atoms.positions
def _grid_to_coord(self, gridpoint):
"""
Convert grid indices to a real space coordinate.
Parameters
----------
gridpoint : list
List of grid indices in the format [x, y, z].
Returns
-------
coord : numpy.ndarray
Real space coordinate corresponding to the grid point.
"""
# Convert grid indices to real space grid point.
i = gridpoint[0]
j = gridpoint[1]
k = gridpoint[2]
# Orthorhombic cells and triclinic ones have
# to be treated differently, see domain.cpp
if self._atoms.cell.orthorhombic:
return np.diag(self._voxel) * [i, j, k]
else:
ret = [0, 0, 0]
ret[0] = (
i / self.grid_dimensions[0] * self._atoms.cell[0, 0]
+ j / self.grid_dimensions[1] * self._atoms.cell[1, 0]
+ k / self.grid_dimensions[2] * self._atoms.cell[2, 0]
)
ret[1] = (
j / self.grid_dimensions[1] * self._atoms.cell[1, 1]
+ k / self.grid_dimensions[2] * self._atoms.cell[1, 2]
)
ret[2] = k / self.grid_dimensions[2] * self._atoms.cell[2, 2]
return np.array(ret)
@abstractmethod
def _calculate(self, outdir, **kwargs):
"""
Perform descriptor calculation.
Has to be implemented by inheriting classes.
Parameters
----------
outdir : string
Path to the output directory.
kwargs : dict
Additional keyword arguments.
"""
pass
@abstractmethod
def _read_feature_dimension_from_json(self, json_dict):
"""
Read the feature dimension from a saved JSON file.
This process may also involve reading additional information from the
Parameters object.
Parameters
----------
json_dict : dict
Dictionary containing info loaded from the JSON file.
"""
pass
def _set_feature_size_from_array(self, array):
"""
Set the feature size from the array.
Feature sizes are saved in different ways for different physical data
classes.
Parameters
----------
array : numpy.ndarray
Array to extract the feature size from.
"""
self.feature_size = np.shape(array)[-1]