Source code for tme.density

"""
Representation of N-dimensional densities

Copyright (c) 2023 European Molecular Biology Laboratory

Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
"""

import warnings
from io import BytesIO
from copy import deepcopy
from gzip import open as gzip_open
from typing import Tuple, Dict, Set
from os.path import splitext, basename

import h5py
import mrcfile
import numpy as np

from scipy.ndimage import (
    zoom,
    laplace,
    sobel,
    minimum_filter,
    binary_erosion,
    generic_gradient_magnitude,
)

from .types import NDArray
from .rotations import align_to_axis
from .backends import NumpyFFTWBackend
from .structure import Structure
from .matching_utils import (
    array_to_memmap,
    memmap_to_array,
    minimum_enclosing_box,
    is_gzipped,
)

__all__ = ["Density"]



[docs]
class Density:
    """
    Abstract representation of N-dimensional densities.

    Parameters
    ----------
    data : array_like
        Array of densities.
    origin : array_like, optional
        Origin of the coordinate system, zero by default.
    sampling_rate : array_like, optional
        Sampling rate along data axis, one by default.
    metadata : dict, optional
        Dictionary with metadata information, empty by default.

    Raises
    ------
    ValueError
        If metadata is not a dictionary.

        If sampling rate / origin is not defined for a single or all axes.

    Examples
    --------
    The following achieves the minimal definition of a :py:class:`Density` instance

    >>> import numpy as np
    >>> from tme import Density
    >>> data = np.random.rand(50,70,40)
    >>> Density(data=data)

    Optional parameters ``origin`` correspond to the coordinate system reference,
    ``sampling_rate`` to the spatial length per axis element, and ``metadata`` to
    a dictionary with supplementary information. By default,
    :py:attr:`Density.origin` is set to zero, :py:attr:`Density.sampling_rate`
    to one, and :py:attr:`Density.metadata` is an empty dictionary. If provided,
    ``origin`` and ``sampling_rate`` either need to be a single value

    >>> Density(data=data, origin=0, sampling_rate=1)

    be specified along each data axis

    >>> Density(data=data, origin=(0, 0, 0), sampling_rate=(1.5, 1.1, 1.2))

    or be a combination of both

    >>> Density(data=data, origin=0, sampling_rate=(1.5, 1.1, 1.2))
    """

    def __init__(
        self,
        data: NDArray,
        origin: NDArray = None,
        sampling_rate: NDArray = None,
        metadata: Dict = {},
    ):
        origin = np.zeros(data.ndim) if origin is None else origin
        sampling_rate = 1 if sampling_rate is None else sampling_rate
        origin, sampling_rate = np.asarray(origin), np.asarray(sampling_rate)
        origin = np.repeat(origin, data.ndim // origin.size)
        sampling_rate = np.repeat(sampling_rate, data.ndim // sampling_rate.size)

        if sampling_rate.size != data.ndim:
            raise ValueError(
                "sampling_rate size should be 1 or "
                f"{data.ndim}, not {sampling_rate.size}."
            )
        if origin.size != data.ndim:
            raise ValueError(f"Expected origin size : {data.ndim}, got {origin.size}.")
        if not isinstance(metadata, dict):
            raise ValueError("Argument metadata has to be of class dict.")

        self.data, self.origin, self.sampling_rate = data, origin, sampling_rate
        self.metadata = metadata

    def __repr__(self):
        response = "Density object at {}\nOrigin: {}, Sampling Rate: {}, Shape: {}"
        return response.format(
            hex(id(self)),
            tuple(round(float(x), 3) for x in self.origin),
            tuple(round(float(x), 3) for x in self.sampling_rate),
            self.shape,
        )


[docs]
    @classmethod
    def from_file(
        cls, filename: str, subset: Tuple[slice] = None, use_memmap: bool = False
    ) -> "Density":
        """
        Reads a file into a :py:class:`Density` instance.

        Parameters
        ----------
        filename : str
            Path to a file in CCP4/MRC, EM, HDF5 or a format supported by
            :obj:`skimage.io.imread`. The file can be gzip compressed.
        subset : tuple of slices, optional
            Slices representing the desired subset along each dimension.
        use_memmap : bool, optional
            Memory map the data contained in ``filename`` to save memory.

        Returns
        -------
        :py:class:`Density`
            Class instance representing the data in ``filename``.

        References
        ----------
        .. [1] Burnley T et al., Acta Cryst. D, 2017
        .. [2] Nickell S. et al, Journal of Structural Biology, 2005.
        .. [3] https://scikit-image.org/docs/stable/api/skimage.io.html

        Examples
        --------
        :py:meth:`Density.from_file` reads files in CCP4/MRC, EM, or a format supported
        by skimage.io.imread and converts them into a :py:class:`Density` instance. The
        following outlines how to read a file in the CCP4/MRC format [1]_:

        >>> from tme import Density
        >>> Density.from_file("/path/to/file.mrc")

        In some cases, you might want to read only a specific subset of the data.
        This can be achieved by passing a tuple of slices to the ``subset`` parameter.
        For example, to read only the first 50 voxels along each dimension:

        >>> subset_slices = (slice(0, 50), slice(0, 50), slice(0, 50))
        >>> Density.from_file("/path/to/file.mrc", subset=subset_slices)

        Memory mapping can be used to read the file from disk without loading
        it entirely into memory. This is particularly useful for large datasets
        or when working with limited memory resources:

        >>> Density.from_file("/path/to/large_file.mrc", use_memmap=True)

        Note that use_memmap will be ignored if the file is gzip compressed.

        If the input file has an `.em` or `.em.gz` extension, it will automatically
        be parsed as EM file [2]_.

        >>> Density.from_file("/path/to/file.em")
        >>> Density.from_file("/path/to/file.em.gz")

        If the file format is not CCP4/MRC or EM, :py:meth:`Density.from_file` attempts
        to use :obj:`skimage.io.imread` to read the file [3]_, which does not extract
        origin or sampling_rate information from the file:

        >>> Density.from_file("/path/to/other_format.tif")

        Notes
        -----
        If ``filename`` ends ".em" or ".h5" it will be parsed as EM or HDF5 file.
        Otherwise, the default reader is CCP4/MRC and on failure
        :obj:`skimage.io.imread` is used regardless of extension. The later does
        not extract origin or sampling_rate information from the file.

        See Also
        --------
        :py:meth:`Density.to_file`

        """
        try:
            with warnings.catch_warnings():
                if filename.endswith("em") or filename.endswith("em.gz"):
                    func = cls._load_em
                elif filename.endswith("h5") or filename.endswith("h5.gz"):
                    func = cls._load_hdf5
                else:
                    func = cls._load_mrc
                    warnings.filterwarnings("ignore", category=RuntimeWarning)
                data, origin, sampling_rate, meta = func(
                    filename=filename, subset=subset, use_memmap=use_memmap
                )
        except ValueError:
            data, origin, sampling_rate, meta = cls._load_skio(filename=filename)
            if subset is not None:
                cls._validate_slices(slices=subset, shape=data.shape)
                data = data[subset].copy()

        return cls(data=data, origin=origin, sampling_rate=sampling_rate, metadata=meta)


    @classmethod
    def _load_mrc(
        cls, filename: str, subset: Tuple[int] = None, use_memmap: bool = False
    ) -> Tuple[NDArray, NDArray, NDArray, Dict]:
        """
        Extracts data from a CCP4/MRC file.

        Parameters
        ----------
        filename : str
            Path to a file in CCP4/MRC format.
        subset : tuple of slices, optional
            Slices representing the desired subset along each dimension.
        use_memmap : bool, optional
            Whether the Density objects data attribute should be memmory mapped.

        Returns
        -------
        Tuple[NDArray, NDArray, NDArray, Dict]
            File data, coordinate origin, sampling rate array and metadata dictionary.

        References
        ----------
        .. [1] Burnley T, Palmer C & Winn M (2017) Recent developments in the
            CCP-EM software suite. Acta Cryst. D73:469–477.
            doi: 10.1107/S2059798317007859

        See Also
        --------
        :py:meth:`Density.from_file`

        """
        with mrcfile.open(filename, header_only=True, permissive=True) as mrc:
            data_shape = mrc.header.nz, mrc.header.ny, mrc.header.nx
            data_type = mrcfile.utils.data_dtype_from_header(mrc.header)

            # All map related parameters should be in zyx order
            origin = (
                mrc.header["origin"]
                .astype([("x", "<f4"), ("y", "<f4"), ("z", "<f4")])
                .view(("<f4", 3))
            )
            origin = origin[::-1]

            # nx := column; ny := row; nz := section
            start = np.array([mrc.header[x] for x in ("nzstart", "nystart", "nxstart")])
            # mapc := column; mapr := row; maps := section;
            crs_index = tuple(int(mrc.header[x]) - 1 for x in ("mapc", "mapr", "maps"))
            if not (0 in crs_index and 1 in crs_index and 2 in crs_index):
                raise ValueError(f"Malformatted CRS array in {filename}")

            sampling_rate = mrc.voxel_size.astype(
                [("x", "<f4"), ("y", "<f4"), ("z", "<f4")]
            ).view(("<f4", 3))
            sampling_rate = np.array(sampling_rate)[::-1]

            if np.allclose(origin, 0) and not np.allclose(start, 0):
                origin = np.multiply(start, sampling_rate)

            extended_header = mrc.header.nsymbt

            metadata = {
                "min": float(mrc.header.dmin),
                "max": float(mrc.header.dmax),
                "mean": float(mrc.header.dmean),
                "std": float(mrc.header.rms),
            }

        non_standard_crs = not np.all(crs_index == (0, 1, 2))
        if non_standard_crs:
            warnings.warn("Non standard MAPC, MAPR, MAPS, adapting data and origin.")

        if is_gzipped(filename):
            if use_memmap:
                warnings.warn(
                    f"Cannot open gzipped file {filename} as memmap."
                    f" Please run 'gunzip {filename}' to use memmap functionality."
                )
            use_memmap = False

        kwargs = {"header_only": False, "permissive": True}
        if subset is not None:
            # Format is zyx, but subsets are xyz
            subset = tuple(
                subset[i] if i < len(subset) else slice(0, data_shape[i])
                for i in crs_index[::-1]
            )
            subset_shape = tuple(x.stop - x.start for x in subset)
            if np.allclose(subset_shape, data_shape):
                return cls._load_mrc(
                    filename=filename, subset=None, use_memmap=use_memmap
                )

            data = cls._read_binary_subset(
                filename=filename,
                slices=subset,
                data_shape=data_shape,
                dtype=data_type,
                header_size=1024 + extended_header,
            )
        elif subset is None and not use_memmap:
            with mrcfile.open(filename, **kwargs) as mrc:
                data = mrc.data
        else:
            with mrcfile.mrcmemmap.MrcMemmap(filename, **kwargs) as mrc:
                data = mrc.data

        if non_standard_crs:
            data = np.transpose(data, crs_index)
            origin = np.take(origin, crs_index)
            sampling_rate = np.take(sampling_rate, crs_index)

        return data.T, origin[::-1], sampling_rate[::-1], metadata

    @classmethod
    def _load_em(
        cls, filename: str, subset: Tuple[int] = None, use_memmap: bool = False
    ) -> Tuple[NDArray, NDArray, NDArray, Dict]:
        """
        Extracts data from a EM file.

        Parameters
        ----------
        filename : str
            Path to a file in EM format.
        subset : tuple of slices, optional
            Slices representing the desired subset along each dimension.
        use_memmap : bool, optional
            Whether the Density objects data attribute should be memmory mapped.

        Returns
        -------
        Tuple[NDArray, NDArray, NDArray, Dict]
            File data, coordinate origin, sampling rate array and metadata dictionary.

        References
        ----------
        .. [1] Nickell S. et al, Journal of Structural Biology, 2005.

        Warns
        -----
        If the sampling rate is zero.

        Notes
        -----
        A sampling rate of zero will be treated as missing value and changed to one. This
        function does not yet extract an origin like :py:meth:`Density._load_mrc`.

        See Also
        --------
        :py:meth:`Density.from_file`
        """
        DATA_TYPE_CODING = {
            1: np.byte,
            2: np.int16,
            3: np.int32,
            5: np.float32,
            6: np.float64,
            8: np.complex64,
            9: np.complex128,
        }

        func = gzip_open if is_gzipped(filename) else open
        with func(filename, mode="rb") as f:
            if is_gzipped(filename):
                f = BytesIO(f.read())

            f.seek(3, 1)
            data_type_code = np.frombuffer(f.read(1), dtype="<i1")[0]
            data_type = DATA_TYPE_CODING.get(data_type_code)

            data_shape = np.frombuffer(f.read(3 * 4), dtype="<i4")[::-1]

            f.seek(80, 1)
            user_params = np.frombuffer(f.read(40 * 4), dtype="<i4")

            pixel_size = user_params[6] / 1000.0
            f.seek(256, 1)

            if use_memmap and subset is None:
                data = np.memmap(f, dtype=data_type, mode="r", offset=f.tell()).reshape(
                    data_shape
                )
            elif subset is None:
                data_size = np.prod(data_shape) * np.dtype(data_type).itemsize
                data = np.frombuffer(f.read(data_size), dtype=data_type).reshape(
                    data_shape
                )
                data = data.astype(np.float32)
            else:
                subset_shape = [x.stop - x.start for x in subset]
                if np.allclose(subset_shape, data_shape):
                    return cls._load_em(
                        filename=filename, subset=None, use_memmap=use_memmap
                    )

                data = cls._read_binary_subset(
                    filename=filename,
                    slices=subset,
                    data_shape=data_shape,
                    dtype=data_type(),
                    header_size=f.tell(),
                )

        origin = np.zeros(3, dtype=data.dtype)

        if pixel_size == 0:
            warnings.warn(
                f"Got invalid sampling rate {pixel_size}, overwriting it to 1."
            )
            pixel_size = 1
        sampling_rate = np.repeat(pixel_size, data.ndim).astype(data.dtype)

        return data, origin, sampling_rate, {}

    @staticmethod
    def _validate_slices(slices: Tuple[slice], shape: Tuple[int]):
        """
        Validate whether the given slices fit within the provided data shape.

        Parameters
        ----------
        slices : Tuple[slice]
            A tuple of slice objects, one per dimension of the data.
        shape : Tuple[int]
            The shape of the data being sliced, as a tuple of integers.

        Raises
        ------
        ValueError
            - If the length of `slices` doesn't match the dimension of shape.
            - If any slice has a stop value exceeding any dimension in shape.
            - If any slice has a stop value that is negative.
        """

        n_dims = len(shape)
        if len(slices) != n_dims:
            raise ValueError(
                f"Expected length of slices : {n_dims}, got : {len(slices)}"
            )

        if any(
            [
                slices[i].stop > shape[i] or slices[i].start > shape[i]
                for i in range(n_dims)
            ]
        ):
            raise ValueError(f"Subset exceeds data dimensions ({shape}).")

        if any([slices[i].stop < 0 or slices[i].start < 0 for i in range(n_dims)]):
            raise ValueError("Subsets have to be non-negative.")

    @classmethod
    def _read_binary_subset(
        cls,
        filename: str,
        slices: Tuple[slice],
        data_shape: Tuple[int],
        dtype: type,
        header_size: int,
    ) -> NDArray:
        """
        Read a subset of data from a binary file with a header.

        Parameters
        ----------
        filename : str
            Path to the binary file.
        slices : tuple of slice objects
            Slices representing the desired subset in each dimension.
        data_shape : tuple of ints
            Shape of the complete dataset in the file.
        dtype : numpy dtype
            Data type of the dataset in the file.
        header_size : int
            Size of the file's header in bytes.

        Returns
        -------
        NDArray
            Subset of the dataset as specified by the slices.

        Raises
        ------
        NotImplementedError
            If the data is not three dimensional.

        See Also
        --------
        :py:meth:`Density._load_mrc`
        :py:meth:`Density._load_em`
        """
        n_dims = len(data_shape)
        if n_dims != 3:
            raise NotImplementedError("Only 3-dimensional data can be subsetted.")

        cls._validate_slices(slices=slices, shape=data_shape)
        bytes_per_item = dtype.itemsize

        subset_shape = [s.stop - s.start for s in slices]
        subset_data = np.empty(subset_shape, dtype=dtype)

        row_bytes = (slices[2].stop - slices[2].start) * bytes_per_item
        full_row_bytes = data_shape[2] * bytes_per_item
        x_offset = slices[2].start * bytes_per_item

        func = gzip_open if is_gzipped(filename) else open
        with func(filename, mode="rb") as f:
            if is_gzipped(filename):
                f = BytesIO(f.read())

            for z in range(slices[0].start, slices[0].stop):
                base_offset_z = header_size + z * data_shape[1] * full_row_bytes

                for y in range(slices[1].start, slices[1].stop):
                    offset = base_offset_z + y * full_row_bytes + x_offset
                    f.seek(offset)
                    row = np.frombuffer(f.read(row_bytes), dtype=dtype)
                    subset_data[z - slices[0].start, y - slices[1].start] = row

        return subset_data

    @staticmethod
    def _load_skio(filename: str) -> Tuple[NDArray, NDArray, NDArray, Dict]:
        """
        Uses :obj:`skimage.io.imread` to extract data from filename [1]_.

        Parameters
        ----------
        filename : str
            Path to a file whose format is supported by :obj:`skimage.io.imread`.

        Returns
        -------
        Tuple[NDArray, NDArray, NDArray, Dict]
            File data, coordinate origin, sampling rate array and metadata dictionary.

        References
        ----------
        .. [1] https://scikit-image.org/docs/stable/api/skimage.io.html

        Warns
        -----
            Warns that origin and sampling_rate are not yet extracted from ``filename``.

        See Also
        --------
        :py:meth:`Density.from_file`
        """
        import skimage.io as skio

        swap = filename
        if is_gzipped(filename):
            with gzip_open(filename, "rb") as infile:
                swap = BytesIO(infile.read())

        data = skio.imread(swap)
        warnings.warn(
            "origin and sampling_rate are not yet extracted from non CCP4/MRC files."
        )
        return data, np.zeros(data.ndim), np.ones(data.ndim), {}

    @staticmethod
    def _load_hdf5(
        filename: str, subset: Tuple[slice], use_memmap: bool = False, **kwargs
    ) -> "Density":
        """
        Extracts data from an H5 file.

        Parameters
        ----------
        filename : str
            Path to a file in CCP4/MRC format.
        subset : tuple of slices, optional
            Slices representing the desired subset along each dimension.
        use_memmap : bool, optional
            Whether the Density objects data attribute should be memmory mapped.

        Returns
        -------
        Density
            An instance of the Density class populated with the data from the HDF5 file.

        See Also
        --------
        :py:meth:`Density._save_hdf5`
        """
        subset = ... if subset is None else subset

        with h5py.File(filename, mode="r") as infile:
            data = infile["data"]
            data_attributes = [
                infile["data"].id.get_offset(),
                infile["data"].shape,
                infile["data"].dtype,
            ]
            origin = infile["origin"][...].copy()
            sampling_rate = infile["sampling_rate"][...].copy()
            metadata = {key: val for key, val in infile.attrs.items()}
            if not use_memmap:
                return data[subset], origin, sampling_rate, metadata

        offset, shape, dtype = data_attributes
        data = np.memmap(filename, dtype=dtype, shape=shape, offset=offset)[subset]

        return data, origin, sampling_rate, metadata


[docs]
    @classmethod
    def from_structure(
        cls,
        filename_or_structure: str,
        shape: Tuple[int] = None,
        sampling_rate: NDArray = np.ones(1),
        origin: Tuple[float] = None,
        weight_type: str = "atomic_weight",
        weight_type_args: Dict = {},
        chain: str = None,
        filter_by_elements: Set = None,
        filter_by_residues: Set = None,
    ) -> "Density":
        """
        Reads in an atomic structure and converts it into a :py:class:`Density`
        instance.

        Parameters
        ----------
        filename_or_structure : str or :py:class:`tme.structure.Structure`
            Either :py:class:`tme.structure.Structure` instance or path to
            structure file that can be read by
            :py:meth:`tme.structure.Structure.from_file`.
        shape : tuple of int, optional
            Shape of the new :py:class:`Density` instance. By default,
            computes the minimum 3D box holding all atoms.
        sampling_rate : float, optional
            Sampling rate of the output array along each axis, in the same unit
            as the atoms in the structure. Defaults to one Ångstroms
            per axis unit.
        origin : tuple of float, optional
            Origin of the coordinate system. If provided, its expected to be in
            z, y, x form in the same unit as the atoms in the structure.
            By default, computes origin as distance between minimal coordinate
            and coordinate system origin.
        weight_type : str, optional
            Which weight should be given to individual atoms. For valid values
            see :py:meth:`tme.structure.Structure.to_volume`.
        weight_type_args : dict, optional
            Additional arguments for atom weight computation.
        chain : str, optional
            The chain that should be extracted from the structure. If multiple chains
            should be selected, they needto be a comma separated string,
            e.g. 'A,B,CE'. If chain None, all chains are returned. Default is None.
        filter_by_elements : set, optional
            Set of atomic elements to keep. Default is all atoms.
        filter_by_residues : set, optional
            Set of residues to keep. Default is all residues.

        Returns
        -------
        :py:class:`Density`
            Newly created :py:class:`Density` instance.

        References
        ----------
        .. [1]  Sorzano, Carlos et al (Mar. 2015). Fast and accurate conversion
            of atomic models into electron density maps. AIMS Biophysics 2, 8–20.

        Examples
        --------
        The following outlines the minimal parameters needed to read in an
        atomic structure and convert it into a :py:class:`Density` instance. For
        specification on supported formats refer to
        :py:meth:`tme.structure.Structure.from_file`.

        >>> path_to_structure = "/path/to/structure.cif"
        >>> density = Density.from_structure(path_to_structure)

        :py:meth:`Density.from_structure` will automatically determine the appropriate
        density dimensions based on the structure. The origin will be computed as
        minimal distance required to move the closest atom of the structure to the
        coordinate system origin. Furthermore, all chains will be used and the atom
        densities will be represented by their atomic weight and accumulated
        on a per-voxel basis.

        The following will read in chain A of an atomic structure and discretize
        it on a grid of dimension 100 x 100 x 100 using a sampling rate of
        2.5 Angstrom per voxel.

        >>> density = Density.from_structure(
        >>>    filename_or_structure = path_to_structure,
        >>>    shape = (100, 100, 100),
        >>>    sampling_rate = 2.5,
        >>>    chain = "A"
        >>> )

        We can restrict the generated :py:class:`Density` instance to only contain
        specific elements like carbon and nitrogen:

        >>> density = Density.from_structure(
        >>>    filename_or_structure = path_to_structure,
        >>>    filter_by_elements = {"C", "N"}
        >>> )

        or specified residues such as polar amino acids:

        >>> density = Density.from_structure(
        >>>    filename_or_structure = path_to_structure,
        >>>    filter_by_residues = {"SER", "THR", "CYS", "ASN", "GLN", "TYR"}
        >>> )

        In addtion, :py:meth:`Density.from_structure` supports a variety of methods
        to convert atoms into densities, such as Gaussians

        >>> density = Density.from_structure(
        >>>    filename_or_structure = path_to_structure,
        >>>    weight_type = "gaussian",
        >>>    weight_type_args={"resolution": 20}
        >>> )

        experimentally determined scattering factors

        >>> density = Density.from_structure(
        >>>    filename_or_structure = path_to_structure,
        >>>    weight_type = "scattering_factors",
        >>>    weight_type_args={"source": "dt1969"}
        >>> )

        and their lowpass filtered representation introduced in [1]_:

        >>> density = Density.from_structure(
        >>>    filename_or_structure = path_to_structure,
        >>>    weight_type = "lowpass_scattering_factors",
        >>>    weight_type_args={"source": "dt1969"}
        >>> )

        See Also
        --------
        :py:meth:`tme.structure.Structure.from_file`
        :py:meth:`tme.structure.Structure.to_volume`
        """
        structure = filename_or_structure
        if isinstance(filename_or_structure, str):
            structure = Structure.from_file(
                filename=filename_or_structure,
                filter_by_elements=filter_by_elements,
                filter_by_residues=filter_by_residues,
            )

        volume, origin, sampling_rate = structure.to_volume(
            shape=shape,
            sampling_rate=sampling_rate,
            origin=origin,
            chain=chain,
            weight_type=weight_type,
            weight_type_args=weight_type_args,
        )

        return cls(
            data=volume,
            origin=origin,
            sampling_rate=sampling_rate,
            metadata=structure.metadata.copy(),
        )



[docs]
    def to_file(self, filename: str, gzip: bool = False) -> None:
        """
        Writes class instance to disk.

        Parameters
        ----------
        filename : str
            Path to write to.
        gzip : bool, optional
            Gzip compress the output and add corresponding suffix to filename
            if not present. False by default.

        References
        ----------
        .. [1] Burnley T et al., Acta Cryst. D, 2017
        .. [2] Nickell S. et al, Journal of Structural Biology, 2005
        .. [3] https://scikit-image.org/docs/stable/api/skimage.io.html

        Examples
        --------
        The following creates a :py:class:`Density` instance `dens` holding
        random data values and writes it to disk:

        >>> import numpy as np
        >>> from tme import Density
        >>> data = np.random.rand(50,50,50)
        >>> dens = Density(data=data, origin=(0, 0, 0), sampling_rate=(1, 1, 1))
        >>> dens.to_file("example.mrc")

        The output file can also be directly ``gzip`` compressed. The corresponding
        ".gz" extension will be automatically added if absent [1]_.

        >>> dens.to_file("example.mrc", gzip=True)

        The :py:meth:`Density.to_file` method also supports writing EM files [2]_:

        >>> dens.to_file("example.em")

        In addition, a variety of image file formats are supported [3]_:

        >>> data = np.random.rand(50,50)
        >>> dens = Density(data=data, origin=(0, 0), sampling_rate=(1, 1))
        >>> dens.to_file("example.tiff")

        Notes
        -----
        If ``filename`` endswith ".em" or ".h5" a EM file or HDF5 file will be created.
        The default output format is CCP4/MRC and on failure, :obj:`skimage.io.imsave`
        is used.

        See Also
        --------
        :py:meth:`Density.from_file`
        """
        if gzip:
            filename = filename if filename.endswith(".gz") else f"{filename}.gz"

        try:
            func = self._save_mrc
            if filename.endswith("em") or filename.endswith("em.gz"):
                func = self._save_em
            elif filename.endswith("h5") or filename.endswith("h5.gz"):
                func = self._save_hdf5
            _ = func(filename=filename, gzip=gzip)
        except ValueError:
            _ = self._save_skio(filename=filename, gzip=gzip)


    def _save_mrc(self, filename: str, gzip: bool = False) -> None:
        """
        Writes class instance to disk as mrc file.

        Parameters
        ----------
        filename : str
            Path to write to.
        gzip : bool, optional
            If True, the output will be gzip compressed.

        References
        ----------
        .. [1] Burnley T et al., Acta Cryst. D, 2017
        """
        compression = "gzip" if gzip else None
        data = np.swapaxes(self.data, 0, 2)
        try:
            _ = mrcfile.utils.mode_from_dtype(data.dtype)
        except ValueError:
            warnings.warn(
                "Current data type not supported by MRC format. Defaulting to float32."
            )
            data = data.astype(np.float32)

        with mrcfile.new(filename, overwrite=True, compression=compression) as mrc:
            mrc.set_data(data)
            mrc.header.nxstart, mrc.header.nystart, mrc.header.nzstart = np.rint(
                np.divide(self.origin, self.sampling_rate)
            )
            mrc.header.mapc, mrc.header.mapr, mrc.header.maps = (1, 2, 3)
            mrc.header["origin"] = tuple(self.origin)
            mrc.voxel_size = tuple(self.sampling_rate)

    def _save_em(self, filename: str, gzip: bool = False) -> None:
        """
        Writes data to disk as an .em file.

        Parameters
        ----------
        filename : str
            Path to write to.
        gzip : bool, optional
            If True, the output will be gzip compressed.

        References
        ----------
        .. [1] Nickell S. et al, Journal of Structural Biology, 2005.
        """
        DATA_TYPE_MAPPING = {
            np.dtype(np.int8): 1,
            np.dtype(np.int16): 2,
            np.dtype(np.int32): 3,
            np.dtype(np.float32): 5,
            np.dtype(np.float64): 6,
            np.dtype(np.complex64): 8,
            np.dtype(np.complex128): 9,
        }

        data_type_code = DATA_TYPE_MAPPING.get(self.data.dtype, 5)

        func = gzip_open if gzip else open
        with func(filename, "wb") as f:
            f.write(np.array([0], dtype=np.int8).tobytes())
            f.write(np.array([0, 0, data_type_code], dtype=np.int8).tobytes())
            f.write(np.array(self.data.shape, dtype="<i4").tobytes())
            f.write(b" " * 80)
            user_params = np.zeros(40, dtype="<i4")
            user_params[6] = int(self.sampling_rate[0] * 1000)
            f.write(user_params.tobytes())
            f.write(b" " * 256)
            f.write(self.data.tobytes())

    def _save_skio(self, filename: str, gzip: bool = False) -> None:
        """
        Uses :obj:`skimage.io.imsave` to write data to filename [1]_.

        Parameters
        ----------
        filename : str
            Path to write to with a format supported by :obj:`skimage.io.imsave`.
        gzip : bool, optional
            If True, the output will be gzip compressed.

        References
        ----------
        .. [1] https://scikit-image.org/docs/stable/api/skimage.io.html
        """
        import skimage.io as skio

        swap, kwargs = filename, {}
        if gzip:
            swap = BytesIO()
            kwargs["format"] = splitext(basename(filename.replace(".gz", "")))[
                1
            ].replace(".", "")
        skio.imsave(fname=swap, arr=self.data.astype("float32"), **kwargs)
        if gzip:
            with gzip_open(filename, "wb") as outfile:
                outfile.write(swap.getvalue())

    def _save_hdf5(self, filename: str, gzip: bool = False) -> None:
        """
        Saves the Density instance data to an HDF5 file, with optional compression.

        Parameters
        ----------
        filename : str
            Path to write to.
        gzip : bool, optional
            If True, the output will be gzip compressed.

        See Also
        --------
        :py:meth:`Density._load_hdf5`
        """
        compression = "gzip" if gzip else None
        with h5py.File(filename, mode="w") as f:
            f.create_dataset(
                "data",
                data=self.data,
                shape=self.data.shape,
                dtype=self.data.dtype,
                compression=compression,
            )
            f.create_dataset("origin", data=self.origin)
            f.create_dataset("sampling_rate", data=self.sampling_rate)

            self.metadata["mean"] = self.metadata.get("mean", 0)
            self.metadata["std"] = self.metadata.get("std", 0)
            self.metadata["min"] = self.metadata.get("min", 0)
            self.metadata["max"] = self.metadata.get("max", 0)
            if not isinstance(self.data, np.memmap):
                self.metadata["mean"] = self.data.mean()
                self.metadata["std"] = self.data.std()
                self.metadata["min"] = self.data.min()
                self.metadata["max"] = self.data.max()

            for key, val in self.metadata.items():
                f.attrs[key] = val

    @property
    def empty(self) -> "Density":
        """
        Returns a copy of the class instance with all elements in
        :py:attr:`Density.data` set to zero. :py:attr:`Density.metadata` will be
        initialized accordingly. :py:attr:`Density.origin` and
        :py:attr:`Density.sampling_rate` are copied.

        Returns
        -------
        :py:class:`Density`
            Empty class instance.

        Examples
        --------
        >>> import numpy as np
        >>> from tme import Density
        >>> original_density = Density.from_file("/path/to/file.mrc")
        >>> empty_density = original_density.empty
        >>> np.all(empty_density.data == 0)
        True
        """
        return Density(
            data=np.zeros_like(self.data),
            origin=deepcopy(self.origin),
            sampling_rate=deepcopy(self.sampling_rate),
            metadata={"min": 0, "max": 0, "mean": 0, "std": 0},
        )


[docs]
    def copy(self) -> "Density":
        """
        Create a copy of the class instance.

        Returns
        -------
        :py:class:`Density`
            A copy of the class instance.

        Examples
        --------
        >>> from tme import Density
        >>> original_density = Density.from_file("/path/to/file.mrc")
        >>> copied_density = original_density.copy
        >>> np.all(copied_density.data == original_density.data)
        True
        """
        return Density(
            data=self.data.copy(),
            origin=deepcopy(self.origin[:]),
            sampling_rate=self.sampling_rate,
            metadata=deepcopy(self.metadata),
        )



[docs]
    def to_memmap(self) -> None:
        """
        Converts :py:attr:`Density.data` to a :obj:`numpy.memmap`.

        Examples
        --------
        The following outlines how to use the :py:meth:`Density.to_memmap` method.

        >>> from tme import Density
        >>> large_density = Density.from_file("/path/to/large_file.mrc")
        >>> large_density.to_memmap()

        A more efficient solution to achieve the result outlined above is to
        provide the ``use_memmap`` flag in :py:meth:`Density.from_file`.

        >>> Density.from_file("/path/to/large_file.mrc", use_memmap = True)

        In practice, the :py:meth:`Density.to_memmap` method finds application, if a
        large number of :py:class:`Density` instances need to be in memory at once,
        without occupying the full phyiscal memory required to store
        :py:attr:`Density.data`.


        See Also
        --------
        :py:meth:`Density.to_numpy`
        """
        if isinstance(self.data, np.memmap):
            return None
        self.data = array_to_memmap(arr=self.data)



[docs]
    def to_numpy(self) -> None:
        """
        Converts :py:attr:`Density.data` to an in-memory :obj:`numpy.ndarray`.

        Examples
        --------
        >>> from tme import Density
        >>> density = Density.from_file("/path/to/large_file.mrc")
        >>> density.to_memmap()  # Convert to memory-mapped array first
        >>> density.to_numpy()   # Now, convert back to an in-memory array

        See Also
        --------
        :py:meth:`Density.to_memmap`
        """
        self.data = memmap_to_array(self.data)


    @property
    def shape(self) -> Tuple[int]:
        """
        Returns the dimensions of :py:attr:`Density.data`.

        Returns
        -------
        tuple
            The dimensions of :py:attr:`Density.data`.

        Examples
        --------
        >>> import numpy as np
        >>> from tme import Density
        >>> dens = Density(np.array([0, 1, 1, 1, 0]))
        >>> dens.shape
        (5,)
        """
        return self.data.shape

    @property
    def data(self) -> NDArray:
        """
        Returns the value of :py:attr:`Density.data`.

        Returns
        -------
        NDArray
            Value of the instance's :py:attr:`Density.data` attribute.

        Examples
        --------
        The following outlines the usage of :py:attr:`Density.data`:

        >>> import numpy as np
        >>> from tme import Density
        >>> dens = Density(np.array([0, 1, 1, 1, 0]))
        >>> dens.data
        array([0, 1, 1, 1, 0])

        """
        return self._data

    @data.setter
    def data(self, data: NDArray) -> None:
        """
        Sets the value of the instance's :py:attr:`Density.data` attribute.
        """
        self._data = data

    @property
    def origin(self) -> NDArray:
        """
        Returns the value of the instance's :py:attr:`Density.origin`
        attribute.

        Returns
        -------
        NDArray
            Value of the instance's :py:attr:`Density.origin` attribute.

        Examples
        --------
        The following outlines the usage of :py:attr:`Density.origin`:

        >>> import numpy as np
        >>> from tme import Density
        >>> dens = Density(np.array([0, 1, 1, 1, 0]))
        >>> dens.origin
        array([0.])
        """
        return self._origin

    @origin.setter
    def origin(self, origin: NDArray) -> None:
        """
        Sets the origin of the class instance.
        """
        origin = np.asarray(origin)
        origin = np.repeat(origin, self.data.ndim // origin.size)
        self._origin = origin

    @property
    def sampling_rate(self) -> NDArray:
        """
        Returns the value of the instance's :py:attr:`Density.sampling_rate` attribute.

        Returns
        -------
        NDArray
            Sampling rate along axis.
        """
        return self._sampling_rate

    @sampling_rate.setter
    def sampling_rate(self, sampling_rate: NDArray) -> None:
        """
        Sets the sampling rate of the class instance.
        """
        sampling_rate = np.asarray(sampling_rate)
        sampling_rate = np.repeat(sampling_rate, self.data.ndim // sampling_rate.size)
        self._sampling_rate = sampling_rate

    @property
    def metadata(self) -> Dict:
        """
        Returns the instance's :py:attr:`Density.metadata` attribute.

        Returns
        -------
        Dict
            Metadata dictionary. Empty by default.
        """
        return self._metadata

    @metadata.setter
    def metadata(self, metadata: Dict) -> None:
        """
        Sets the metadata of the class instance.
        """
        self._metadata = metadata


[docs]
    def to_pointcloud(self, threshold: float = 0) -> NDArray:
        """
        Returns data indices that are larger than the given threshold.

        Parameters
        ----------
        threshold : float, optional
            The cutoff value to determine the indices. Default is 0.

        Returns
        -------
        NDArray
            Data indices that are larger than the given threshold with shape
            (dimensions, indices).

        Examples
        --------
        >>> density.to_pointcloud(0)
        """
        return np.array(np.where(self.data > threshold))


    def _pad_slice(self, box: Tuple[slice], pad_kwargs: Dict = {}) -> NDArray:
        """
        Pads the internal data array according to box.

        Negative slices indices will result in a left-hand padding, while
        slice indices larger than the box_size property of the class
        instance will result in a right-hand padding.

        Parameters
        ----------
        box : tuple of slice
            Tuple of slice objects that define the box dimensions.
        pad_kwargs: dict, optional
            Parameter dictionary passed to numpy pad.

        Returns
        -------
        NDArray
            The padded internal data array.
        """
        box_start = np.array([b.start for b in box])
        box_stop = np.array([b.stop for b in box])
        left_pad = -np.minimum(box_start, np.zeros(len(box), dtype=int))

        right_pad = box_stop - box_start * (box_start > 0)
        right_pad -= np.array(self.shape, dtype=int)
        right_pad = np.maximum(right_pad, np.zeros_like(right_pad))
        padding = tuple((left, right) for left, right in zip(left_pad, right_pad))

        ret = np.pad(self.data, padding, **pad_kwargs)
        return ret


[docs]
    def adjust_box(self, box: Tuple[slice], pad_kwargs: Dict = {}) -> None:
        """
        Adjusts :py:attr:`Density.data` and :py:attr:`Density.origin`
        according to the provided box.

        Parameters
        ----------
        box : tuple of slices
            Description of how each axis of :py:attr:`Density.data` should be sliced.
        pad_kwargs: dict, optional
            Parameter dictionary passed to :obj:`numpy.pad`.

        See Also
        --------
        :py:meth:`Density.trim_box`

        Examples
        --------
        The following demonstrates the ability of :py:meth:`Density.adjust_box`
        to extract a subdensity from the current :py:class:`Density` instance.
        :py:meth:`Density.adjust_box` not only operats on :py:attr:`Density.data`,
        but also modifies :py:attr:`Density.origin` according to ``box``.

        >>> import numpy as np
        >>> from tme import Density
        >>> dens = Density(np.ones((5, 5)))
        >>> box = (slice(1, 4), slice(2, 5))
        >>> dens.adjust_box(box)
        >>> dens
        Origin: (1.0, 2.0), sampling_rate: (1, 1), Shape: (3, 3)

        :py:meth:`Density.adjust_box` can also extend the box of the current
        :py:class:`Density` instance. This is achieved by negative start or
        stops that exceed the dimension of the current :py:attr:`Density.data` array.

        >>> box = (slice(-1, 10), slice(2, 10))
        >>> dens.adjust_box(box)
        >>> dens
        Origin: (0.0, 4.0), sampling_rate: (1, 1), Shape: (11, 8)

        However, do note that only the start coordinate of each slice in ``box``
        can be negative.

        >>> box = (slice(-1, 10), slice(2, -10))
        >>> dens.adjust_box(box)
        >>> dens
        Origin: (-1.0, 6.0), sampling_rate: (1, 1), Shape: (11, 0)
        """
        crop_box = tuple(
            slice(max(b.start, 0), min(b.stop, shape))
            for b, shape in zip(box, self.data.shape)
        )
        self.data = self.data[crop_box].copy()

        # In case the box is larger than the current map
        self.data = self._pad_slice(box, pad_kwargs=pad_kwargs)

        # Adjust the origin
        left_shift = np.array([-1 * box[i].start for i in range(len(box))])
        self.origin = self.origin - np.multiply(left_shift, self.sampling_rate)



[docs]
    def trim_box(self, cutoff: float, margin: int = 0) -> Tuple[slice]:
        """
        Computes a rectangle with sufficient dimension that encloses all
        values of the internal data array larger than the specified cutoff,
        expanded by the specified margin.

        The output can be passed to :py:meth:`Density.adjust_box` to crop
        the internal data array.

        Parameters
        ----------
        cutoff : float
            The threshold value for determining the minimum enclosing box. Default is 0.
        margin : int, optional
            The margin to add to the box dimensions. Default is 0.

        Returns
        -------
        tuple
            A tuple containing slice objects representing the box.

        Raises
        ------
        ValueError
            If the cutoff is larger than or equal to the maximum density value.

        Examples
        --------
        The following will compute the bounding box that encloses all values
        in the example array that are larger than zero:

        >>> import numpy as np
        >>> from tme import Density
        >>> dens = Density(np.array([0,1,1,1,0]))
        >>> dens.trim_box(0)
        (slice(1, 4, None),)

        The resulting tuple can be passed to :py:meth:`Density.adjust_box` to trim the
        current :py:class:`Density` instance:

        >>> dens.adjust_box(dens.trim_box(0))
        >>> dens.data.shape
        (3,)

        See Also
        --------
        :py:meth:`Density.adjust_box`
        """
        if cutoff >= self.data.max():
            raise ValueError(
                f"Cutoff exceeds data range ({cutoff} >= {self.data.max()})."
            )
        starts, stops = [], []
        for axis in range(self.data.ndim):
            projected_max = np.max(
                self.data, axis=tuple(i for i in range(self.data.ndim) if i != axis)
            )
            valid = np.where(projected_max > cutoff)[0]
            starts.append(max(0, valid[0] - margin))
            stops.append(min(self.data.shape[axis], valid[-1] + margin + 1))
        slices = tuple(slice(*coord) for coord in zip(starts, stops))
        return slices



[docs]
    def minimum_enclosing_box(
        self,
        cutoff: float,
        use_geometric_center: bool = False,
    ) -> Tuple[slice]:
        """
        Compute the enclosing box that holds all possible rotations of the internal
        data array.

        Parameters
        ----------
        cutoff : float
            Above this value arr elements are considered. Defaults to 0.
        use_geometric_center : bool, optional
            Accommodate the geometric instead of the mass center.

        Returns
        -------
        tuple
            Tuple of slices corresponding to the minimum enclosing box.

        See Also
        --------
        :py:meth:`Density.adjust_box`
        :py:meth:`tme.matching_utils.minimum_enclosing_box`
        """
        if cutoff is None:
            cutoff = self.data.min() - 1

        coordinates = self.to_pointcloud(threshold=cutoff)
        shape = minimum_enclosing_box(
            coordinates=coordinates,
            use_geometric_center=use_geometric_center,
        )

        starts, stops = coordinates.min(axis=1), coordinates.max(axis=1)
        diff = np.maximum(np.subtract(shape, np.subtract(stops, starts)), 0)

        shift_start = np.divide(diff, 2).astype(int)
        shift_stop = shift_start + np.mod(diff, 2)

        # These are purposefully negative to indicate left and right pad
        starts = (starts - shift_start).astype(int)
        stops = (stops + shift_stop).astype(int)
        return tuple(slice(start, stop) for start, stop in zip(starts, stops))



[docs]
    def pad(
        self, new_shape: Tuple[int], center: bool = True, padding_value: float = 0
    ) -> None:
        """
        :py:meth:`Density.pad` extends the internal :py:attr:`Density.data`
        array of the current :py:class:`Density` instance to ``new_shape`` and
        adapts :py:attr:`Density.origin` accordingly.

        Parameters
        ----------
        new_shape : tuple of int
            The desired shape for the new volume.
        center : bool, optional
            Whether the data should be centered in the new box. Default is True.
        padding_value : float, optional
            Value to pad the data array with. Default is zero.

        Raises
        ------
        ValueError
            If the length of ``new_shape`` does not match the dimensionality of
            :py:attr:`Density.data`.

        Examples
        --------
        The following demonstrates the functionality of :py:meth:`Density.pad` on
        a one-dimensional array:

        >>> import numpy as np
        >>> from tme import Density
        >>> dens = Density(np.array([1,1,1]))
        >>> dens.pad(new_shape = (5,), center = True)
        >>> dens.data
        array([0, 1, 1, 1, 0])

        If ``center`` is set to False, the padding values will be appended:

        >>> dens = Density(np.array([1,1,1]))
        >>> dens.pad(new_shape = (5,), center = False)
        >>> dens.data
        array([1, 1, 1, 0, 0])

        It's also possible to pass a user-defined ``padding_value``:

        >>> dens = Density(np.array([1,1,1]))
        >>> dens.pad(new_shape = (5,), center = True, padding_value = -1)
        >>> dens.data
        array([-1, 1, 1, 1, -1])
        """
        if len(new_shape) != self.data.ndim:
            raise ValueError(
                f"new_shape has dimension {len(new_shape)}"
                f" but expected was {self.data.ndim}."
            )

        new_box = tuple(slice(0, stop) for stop in new_shape)
        if center:
            overhang = np.subtract(new_shape, self.shape).astype(int)
            padding = overhang // 2
            left = -padding
            right = np.add(self.shape, padding + overhang % 2)
            new_box = tuple(slice(*box) for box in zip(left, right))

        self.adjust_box(new_box, pad_kwargs={"constant_values": padding_value})



[docs]
    def centered(
        self, cutoff: float = 0.0, order: int = 1
    ) -> Tuple["Density", NDArray]:
        """
        Shifts the data center of mass to the center of the data array using linear
        interpolation. The box size of the returned :py:class:`Density` object is at
        least equal to the box size of the class instance.

        Parameters
        ----------
        cutoff : float, optional
            Consider all elements larger than cutoff for computing the new box.
            Default is 0.0.
        order : int, optional
            Interpolation order, defaults to 1.

        Notes
        -----
        Should any axis of the class instance data array be smaller than the return
        value of :py:meth:`Density.minimum_enclosing_box`, the size of the internal
        data array is adapted to avoid array elements larger than cutoff to fall
        outside the data array.

        Returns
        -------
        :py:class:`Density`
            A centered copy of the class instance.
        NDArray
            The offset between array center and center of mass.

        See Also
        --------
        :py:meth:`Density.trim_box`
        :py:meth:`Density.minimum_enclosing_box`

        Examples
        --------
        :py:meth:`Density.centered` returns a tuple containing a centered version
        of the current :py:class:`Density` instance. Centering is achieved via padding
        and rigid-transform of the internal :py:attr:`Density.data` attribute.
        `centered_dens` is sufficiently large to represent all rotations of the
        :py:attr:`Density.data` attribute.

        >>> import numpy as np
        >>> from tme import Density
        >>> dens = Density(np.ones((5,5,5)))
        >>> centered_dens = dens.centered(0)

        :py:meth:`Density.centered` extended the :py:attr:`Density.data` attribute
        of the current :py:class:`Density` instance and modified
        :py:attr:`Density.origin` accordingly.

        >>> centered_dens
        Origin: (-2.0, -2.0, -2.0), sampling_rate: (1, 1, 1), Shape: (9, 9, 9)
        """
        ret = self.copy()

        box = ret.minimum_enclosing_box(cutoff=cutoff, use_geometric_center=False)
        ret.adjust_box(box)

        new_shape = np.maximum(ret.shape, self.shape)
        new_shape = np.add(new_shape, 1 - np.mod(new_shape, 2))
        ret.pad(new_shape)

        center = self.center_of_mass(ret.data, cutoff)
        shift = np.subtract(np.divide(np.subtract(ret.shape, 1), 2), center)

        ret = ret.rigid_transform(
            translation=shift,
            rotation_matrix=np.eye(ret.data.ndim),
            use_geometric_center=True,
            order=order,
        )
        ret.origin = np.subtract(ret.origin, np.multiply(shift, ret.sampling_rate))
        return ret



[docs]
    def rigid_transform(
        self,
        rotation_matrix: NDArray,
        translation: NDArray = None,
        order: int = 3,
        use_geometric_center: bool = True,
    ) -> "Density":
        """
        Performs a rigid transform of the class instance.

        Parameters
        ----------
        rotation_matrix : NDArray
            Rotation matrix to apply.
        translation : NDArray
            Translation to apply.
        order : int, optional
            Interpolation order to use. Default is 3, has to be in range 0-5.
        use_geometric_center : bool, optional
            Use geometric or mass center as rotation center.

        Returns
        -------
        Density
            The transformed instance of :py:class:`Density`.

        Examples
        --------
        Define the :py:class:`Density` instance

        >>> import numpy as np
        >>> from tme import Density
        >>> dens = Density(np.arange(9).reshape(3,3).astype(np.float32))
        >>> dens, translation = dens.centered(0)

        and apply the rotation, in this case a mirror around the z-axis

        >>> rotation_matrix = np.eye(dens.data.ndim)
        >>> rotation_matrix[0, 0] = -1
        >>> dens_transform = dens.rigid_transform(rotation_matrix = rotation_matrix)
        >>> dens_transform.data
        array([[0.        , 0.        , 0.        , 0.        , 0.        ],
               [0.5       , 3.0833333 , 3.5833333 , 3.3333333 , 0.        ],
               [0.75      , 4.6666665 , 5.6666665 , 5.4166665 , 0.        ],
               [0.25      , 1.6666666 , 2.6666667 , 2.9166667 , 0.        ],
               [0.        , 0.08333334, 0.5833333 , 0.8333333 , 0.        ]],
              dtype=float32)

        Notes
        -----
        This function assumes the internal :py:attr:`Density.data` attribute is
        sufficiently sized to hold the transformation.

        See Also
        --------
        :py:meth:`Density.centered`, :py:meth:`Density.minimum_enclosing_box`
        """
        ret = self.empty
        data = self.data
        if not isinstance(data.dtype, np.floating):
            data = data.astype(np.float32)

        ret.data = ret.data.astype(data.dtype)
        NumpyFFTWBackend().rigid_transform(
            arr=data,
            rotation_matrix=rotation_matrix,
            translation=translation,
            use_geometric_center=use_geometric_center,
            out=ret.data,
            order=order,
        )

        eps = np.finfo(ret.data.dtype).eps
        ret.data[np.abs(ret.data) < eps] = 0
        return ret



[docs]
    def resample(
        self, new_sampling_rate: Tuple[float], method: str = "spline", order: int = 1
    ) -> "Density":
        """
        Resamples :py:attr:`Density.data` to ``new_sampling_rate``.

        Parameters
        ----------
        new_sampling_rate : tuple of floats or float
            Sampling rate to resample to for a single or all axes.
        method: str, optional
            Resampling method to use, defaults to `spline`. Availabe options are:

            +---------+----------------------------------------------------------+
            | spline  | Spline interpolation using :obj:`scipy.ndimage.zoom`     |
            +---------+----------------------------------------------------------+
            | fourier | Fourier cropping                                         |
            +---------+----------------------------------------------------------+
        order : int, optional
            Order of spline used for interpolation, by default 1. Ignored when
            ``method`` is `fourier`.

        Raises
        ------
        ValueError
            If ``method`` is not supported.

        Returns
        -------
        :py:class:`Density`
            A resampled copy of the class instance.

        Examples
        --------
        The following makes use of :py:meth:`tme.matching_utils.create_mask`
        to define a :py:class:`Density` instance containing a 2D circle with
        a sampling rate of 2

        >>> from tme import Density
        >>> from tme.matching_utils import create_mask
        >>> mask = create_mask(
        >>>     mask_type="ellipse",
        >>>     shape=(11,11),
        >>>     center=(5,5),
        >>>     radius=3
        >>> )
        >>> dens = Density(mask, sampling_rate=2)
        >>> dens
        Origin: (0.0, 0.0), sampling_rate: (2, 2), Shape: (11, 11)

        Using :py:meth:`Density.resample` we can modulate the sampling rate
        using spline interpolation of desired order

        >>> dens.resample(new_sampling_rate= 4, method="spline", order=3)
        Origin: (0.0, 0.0), sampling_rate: (4, 4), Shape: (6, 6)

        Or Fourier cropping which results in a less smooth output, but more faithfully
        captures the contained frequency information

        >>> dens.resample(new_sampling_rate=4, method="fourier")
        Origin: (0.0, 0.0), sampling_rate: (4, 4), Shape: (6, 6)

        ``new_sampling_rate`` can also be specified per axis

        >>> dens.resample(new_sampling_rate=(4,1), method="spline", order=3)
        Origin: (0.0, 0.0), sampling_rate: (4, 1), Shape: (6, 22)

        """
        _supported_methods = ("spline", "fourier")
        if method not in _supported_methods:
            raise ValueError(
                f"Expected method to be one of {_supported_methods}, got '{method}'."
            )
        new_sampling_rate = np.array(new_sampling_rate)
        new_sampling_rate = np.repeat(
            new_sampling_rate, self.data.ndim // new_sampling_rate.size
        )

        ret = self.copy()
        scale_factor = np.divide(ret.sampling_rate, new_sampling_rate)
        if method == "spline":
            ret.data = zoom(ret.data, scale_factor, order=order)
        elif method == "fourier":
            ret_shape = np.round(np.multiply(scale_factor, ret.shape)).astype(int)

            axis = range(len(ret_shape))
            mask = np.zeros(self.shape, dtype=bool)
            mask[tuple(slice(0, x) for x in ret_shape)] = 1
            mask = np.roll(
                mask, shift=-np.floor(np.divide(ret_shape, 2)).astype(int), axis=axis
            )
            mask_ret = np.zeros(ret_shape, dtype=bool)
            mask_ret[tuple(slice(0, x) for x in self.shape)] = 1
            mask_ret = np.roll(
                mask_ret,
                shift=-np.floor(np.divide(self.shape, 2)).astype(int),
                axis=axis,
            )

            mask, mask_ret = np.where(mask), np.where(mask_ret)

            arr_ft = np.fft.fftn(self.data)[mask]
            arr_ft *= np.prod(ret_shape) / np.prod(self.shape)
            ret_ft = np.zeros(ret_shape, dtype=arr_ft.dtype)
            np.add.at(ret_ft, mask_ret, arr_ft)
            ret.data = np.real(np.fft.ifftn(ret_ft)).astype(self.data.dtype)
        ret.sampling_rate = new_sampling_rate
        return ret



[docs]
    def density_boundary(
        self, weight: float, fraction_surface: float = 0.1, volume_factor: float = 1.21
    ) -> Tuple[float]:
        """
        Computes the density boundary of the class instance. The density
        boundary in this setting is defined as minimal and maximal density value
        enclosing a certain ``weight``.

        Parameters
        ----------
        weight : float
            Density weight to compute volume cutoff on. This could e.g. be the
            sum of contained atomic weights.
        fraction_surface : float, optional
            Approximate fraction of surface voxels on all voxels enclosing
            ``weight``, by default 0.1. Decreasing this value increases the
            upper volume boundary.
        volume_factor : float, optional
            Factor used to compute how many distinct density values
            can be used to represent ``weight``, by default 1.21.

        Returns
        -------
        tuple
            Tuple containing lower and upper bound on densities.

        References
        ----------
        .. [1] Cragnolini T, Sahota H, Joseph AP, Sweeney A, Malhotra S,
            Vasishtan D, Topf M (2021a) TEMPy2: A Python library with
            improved 3D electron microscopy density-fitting and validation
            workflows. Acta Crystallogr Sect D Struct Biol 77:41–47.
            https://doi.org/10.1107/S2059798320014928

        Raises
        ------
        ValueError
            If input any input parameter is <= 0.
        """
        if weight <= 0 or fraction_surface <= 0 or volume_factor <= 0:
            raise ValueError(
                "weight, fraction_surface and volume_factor need to be >= 0."
            )
        num_voxels = np.min(
            volume_factor * weight / np.power(self.sampling_rate, self.data.ndim)
        ).astype(int)
        surface_included_voxels = int(num_voxels * (1 + fraction_surface))

        map_partition = np.partition(
            self.data.flatten(), (-num_voxels, -surface_included_voxels)
        )
        upper_limit = map_partition[-num_voxels]
        lower_limit = map_partition[-surface_included_voxels]

        return (lower_limit, upper_limit)



[docs]
    def surface_coordinates(
        self, density_boundaries: Tuple[float], method: str = "ConvexHull"
    ) -> NDArray:
        """
        Calculates the surface coordinates of the class instance using
        different boundary and surface detection methods. This method is relevant
        for determining coordinates used in non-exhaustive template matching,
        see :py:class:`tme.matching_optimization.optimize_match`.

        Parameters
        ----------
        density_boundaries : tuple
            Lower and upper bound of density values to be considered
            (can be obtained from :py:meth:`Density.density_boundary`).
        method : str, optional
            Method to use for surface coordinate computation

            +--------------+-----------------------------------------------------+
            | ConvexHull   | Use the lower bound density convex hull vertices.   |
            +--------------+-----------------------------------------------------+
            | Weight       | Use all coordinates within ``density_boundaries``.  |
            +--------------+-----------------------------------------------------+
            | Sobel        | Set densities below the lower bound density to zero |
            |              | apply a sobel filter and return density coordinates |
            |              | larger than 0.5 times the maximum filter value.     |
            +--------------+-----------------------------------------------------+
            | Laplace      | Like 'Sobel', but with a Laplace filter.            |
            +--------------+-----------------------------------------------------+
            | Minimum      | Like 'Sobel' and 'Laplace' but with a spherical     |
            |              | minimum filter on the lower density bound.          |
            +--------------+-----------------------------------------------------+

        Raises
        ------
        ValueError
            If the chosen method is not available.

        Returns
        -------
        NDArray
            An array of surface coordinates with shape (points, dimensions).

        References
        ----------
        .. [1] Cragnolini T, et al. (2021) Acta Crys Sect D Struct Biol

        See Also
        --------
        :py:class:`tme.matching_optimization.NormalVectorScore`
        :py:class:`tme.matching_optimization.PartialLeastSquareDifference`
        :py:class:`tme.matching_optimization.MutualInformation`
        :py:class:`tme.matching_optimization.Envelope`
        :py:class:`tme.matching_optimization.Chamfer`
        """
        _available_methods = ["ConvexHull", "Weight", "Sobel", "Laplace", "Minimum"]

        if method not in _available_methods:
            raise ValueError(
                "Argument method has to be one of the following: %s"
                % ", ".join(_available_methods)
            )

        lower_bound, upper_bound = density_boundaries
        if method == "ConvexHull":
            from scipy.spatial import ConvexHull

            binary = np.transpose(np.where(self.data > lower_bound))
            hull = ConvexHull(binary)
            surface_points = binary[hull.vertices[:]]

        elif method == "Sobel":
            filtered_map = np.multiply(self.data, (self.data > lower_bound))
            magn = generic_gradient_magnitude(filtered_map, sobel)
            surface_points = np.argwhere(magn > 0.5 * magn.max())

        elif method == "Laplace":
            filtered_map = self.data > lower_bound
            magn = laplace(filtered_map)
            surface_points = np.argwhere(magn > 0.5 * magn.max())

        elif method == "Minimum":
            fp = np.zeros((self.data.ndim,) * self.data.ndim)
            center = np.ones(self.data.ndim, dtype=int)
            fp[tuple(center)] = 1
            for i in range(self.data.ndim):
                offset = np.zeros(self.data.ndim, dtype=int)
                offset[i] = 1
                fp[tuple(center + offset)] = 1
                fp[tuple(center - offset)] = 1

            filtered_map = (self.data > lower_bound).astype(int)
            filtered_map_surface = minimum_filter(
                filtered_map, footprint=fp, mode="constant", cval=0.8
            )
            filtered_map_surface = ((filtered_map - filtered_map_surface) == 1).astype(
                int
            )
            surface_points = np.argwhere(filtered_map_surface == 1)

        elif method == "Weight":
            surface_points = np.argwhere(
                np.logical_and(self.data < upper_bound, self.data > lower_bound)
            )

        return surface_points



[docs]
    def normal_vectors(self, coordinates: NDArray) -> NDArray:
        """
        Calculates the normal vectors for the given coordinates on the densities
        of the class instance. If the normal vector to a given coordinate
        can not be computed, the zero vector is returned instead. The output of this
        function can e.g. be used in
        :py:class:`tme.matching_optimization.NormalVectorScore`.

        Parameters
        ----------
        coordinates : NDArray
            An array of integer coordinates with shape (dimensions, coordinates)
            of which to calculate the normal vectors.

        Returns
        -------
        NDArray
            An array with unit normal vectors with same shape as coordinates.

        References
        ----------
        .. [1] Cragnolini T, Sahota H, Joseph AP, Sweeney A, Malhotra S,
            Vasishtan D, Topf M (2021a) TEMPy2: A Python library with
            improved 3D electron microscopy density-fitting and validation
            workflows. Acta Crystallogr Sect D Struct Biol 77:41–47.
            https://doi.org/10.1107/S2059798320014928

        Raises
        ------
        ValueError
            If coordinates.shape[1] does not match self.data.ndim,
            coordinates.ndim != 2 or lies outside self.data.

        See Also
        --------
        :py:class:`tme.matching_optimization.NormalVectorScore`
        :py:class:`tme.matching_optimization.PartialLeastSquareDifference`
        :py:class:`tme.matching_optimization.MutualInformation`
        :py:class:`tme.matching_optimization.Envelope`
        :py:class:`tme.matching_optimization.Chamfer`
        """
        normal_vectors, coordinates = [], np.asarray(coordinates, dtype=int)

        if coordinates.ndim != 2:
            raise ValueError("Coordinates should have shape point x dimension.")
        if coordinates.shape[1] != self.data.ndim:
            raise ValueError(
                f"Expected coordinate dimension {self.data.ndim}, "
                f"got {coordinates.shape[1]}."
            )
        in_box = np.logical_and(
            coordinates < np.array(self.shape), coordinates >= 0
        ).min(axis=1)
        coordinates = coordinates[in_box, :]
        for index in range(coordinates.shape[0]):
            point = coordinates[index, :]
            start = np.maximum(point - 1, 0)
            stop = np.minimum(point + 2, self.data.shape)
            slc = tuple(slice(*coords) for coords in zip(start, stop))

            inner_facing = np.array(np.where(self.data[slc] > self.data[tuple(point)]))
            if inner_facing.size == 0:
                normal_vectors.append(np.zeros_like(point))
                continue
            inner_facing -= np.ones_like(point)[:, None]
            inner_facing = inner_facing.sum(axis=1)
            inner_facing = inner_facing / np.linalg.norm(inner_facing)
            normal_vectors.append(inner_facing)

        return np.array(normal_vectors)



[docs]
    def core_mask(self) -> NDArray:
        """
        Calculates a weighted core mask by performing iterative binary erosion on
        :py:attr:`Density.data`. In each iteration, all mask elements corresponding
        to a non-zero data elemnt are incremented by one. Therefore, a mask element
        with value N corresponds to a data value that remained non-zero for N iterations.
        Mask elements with high values are likely part of the core density [1]_.

        Returns
        -------
        NDArray
            Core-weighted mask with shape of :py:attr:`Density.data`.

        References
        ----------
        .. [1]  Gydo Zundert and Alexandre Bonvin. Fast and sensitive rigid-body
                fitting into cryo-em density maps with powerfit. AIMS Biophysics,
                2:73–87, 04 2015. doi:10.3934/biophy.2015.2.73
        """
        core_indices = np.zeros(self.shape)
        eroded_mask = self.data > 0
        while eroded_mask.sum() > 0:
            core_indices += eroded_mask
            eroded_mask = binary_erosion(eroded_mask)
        return core_indices



[docs]
    def center_of_mass(self, arr: NDArray = None, cutoff: float = None) -> NDArray:
        """
        Computes the center of mass of a numpy ndarray instance.

        Parameters
        ----------
        arr : NDArray, optional
            Array to compute the center of mass of, default is :py:attr:`Density.data`.
        cutoff : float, optional
            Density cutoff for calculation. Defaults to None

        Returns
        -------
        NDArray
            Center of mass with shape (arr.ndim).
        """
        arr = self.data if arr is None else arr
        return NumpyFFTWBackend().center_of_mass(arr, cutoff)



[docs]
    @classmethod
    def match_densities(
        cls,
        target: "Density",
        template: "Density",
        cutoff_target: float = 0,
        cutoff_template: float = 0,
        scoring_method: str = "NormalizedCrossCorrelation",
        **kwargs,
    ) -> Tuple["Density", NDArray, NDArray, NDArray]:
        """
        Aligns two :py:class:`Density` instances target and template and returns
        the aligned template.

        If voxel sizes of target and template dont match coordinates are scaled
        to the numerically smaller voxel size. Instances are prealigned based on their
        center of mass. Finally :py:meth:`tme.matching_optimization.optimize_match` is
        used to determine translation and rotation to map template to target.

        Parameters
        ----------
        target : Density
            The target map for alignment.
        template : Density
            The template that should be aligned to the target.
        cutoff_target : float, optional
            The cutoff value for the target map, by default 0.
        cutoff_template : float, optional
            The cutoff value for the template map, by default 0.
        scoring_method : str, optional
            The scoring method to use for alignment. See
            :py:class:`tme.matching_optimization.create_score_object` for available methods,
            by default "NormalizedCrossCorrelation".
        kwargs : dict, optional
            Optional keyword arguments passed to
            :py:meth:`tme.matching_optimization.optimize_match`.

        Returns
        -------
        Tuple
            Tuple containing template aligned to target as :py:class:`Density` object,
            translation in voxels and rotation matrix used for the transformation.

        Notes
        -----
        No densities below cutoff_template are present in the returned Density object.
        """
        from .matching_utils import standardize
        from .matching_optimization import optimize_match, create_score_object

        template_mask = template.empty
        template_mask.data.fill(1)

        template.data = standardize(
            template=template.data,
            mask=template_mask.data,
            n_observations=template_mask.data.sum(),
        )

        target_sampling_rate = np.array(target.sampling_rate)
        template_sampling_rate = np.array(template.sampling_rate)
        target_sampling_rate = np.repeat(
            target_sampling_rate, target.data.ndim // target_sampling_rate.size
        )
        template_sampling_rate = np.repeat(
            template_sampling_rate, template.data.ndim // template_sampling_rate.size
        )
        if not np.allclose(target_sampling_rate, template_sampling_rate):
            print(
                "Voxel size of target and template do not match. "
                "Using smaller voxel size for refinement."
            )

        target_coordinates = target.to_pointcloud(cutoff_target)

        template_coordinates = template.to_pointcloud(cutoff_template)
        template_weights = template.data[tuple(template_coordinates)]

        refinement_sampling_rate = np.minimum(
            target_sampling_rate, template_sampling_rate
        )
        target_scaling = np.divide(target_sampling_rate, refinement_sampling_rate)
        template_scaling = np.divide(template_sampling_rate, refinement_sampling_rate)
        target_coordinates = target_coordinates * target_scaling[:, None]
        template_coordinates = template_coordinates * template_scaling[:, None]

        mass_center_difference = np.subtract(
            target.center_of_mass(target.data, cutoff_target),
            target.center_of_mass(template.data, cutoff_template),
        ).astype(int)
        template_coordinates += mass_center_difference[:, None]

        coordinates_mask = template_mask.to_pointcloud()
        coordinates_mask = coordinates_mask * template_scaling[:, None]
        coordinates_mask += mass_center_difference[:, None]

        score_object = create_score_object(
            score=scoring_method,
            target=target.data,
            template_coordinates=template_coordinates,
            template_mask_coordinates=coordinates_mask,
            template_weights=template_weights,
            sampling_rate=np.ones(template.data.ndim),
        )

        translation, rotation_matrix, score = optimize_match(
            score_object=score_object, **kwargs
        )

        translation += mass_center_difference
        translation = np.divide(translation, template_scaling)

        template.sampling_rate = template_sampling_rate.copy()
        ret = template.rigid_transform(
            rotation_matrix=rotation_matrix, use_geometric_center=False
        )
        ret.origin = target.origin.copy()
        ret.origin = ret.origin + np.multiply(translation, target_sampling_rate)

        return ret, translation, rotation_matrix



[docs]
    @classmethod
    def match_structure_to_density(
        cls,
        target: "Density",
        template: "Structure",
        cutoff_target: float = 0,
        scoring_method: str = "NormalizedCrossCorrelation",
        optimization_method: str = "basinhopping",
        maxiter: int = 500,
    ) -> Tuple["Structure", NDArray, NDArray]:
        """
        Aligns a :py:class:`tme.structure.Structure` template to :py:class:`Density`
        target and returns an aligned :py:class:`tme.structure.Structure` instance.

        If voxel sizes of target and template dont match coordinates are scaled
        to the numerically smaller voxel size. Prealignment is done by center's
        of mass. Finally :py:class:`tme.matching_optimization.optimize_match` is used to
        determine translation and rotation to match a template to target.

        Parameters
        ----------
        target : :py:class:`Density`
            The target map for template matching.
        template : Structure
            The template that should be aligned to the target.
        cutoff_target : float, optional
            The cutoff value for the target map, by default 0.
        cutoff_template : float, optional
            The cutoff value for the template map, by default 0.
        scoring_method : str, optional
            The scoring method to use for template matching. See
            :py:class:`tme.matching_optimization.create_score_object` for available methods,
            by default "NormalizedCrossCorrelation".
        optimization_method : str, optional
            Optimizer that is used.
            See :py:meth:`tme.matching_optimization.optimize_match`.
        maxiter : int, optional
            Maximum number of iterations for the optimizer.
            See :py:meth:`tme.matching_optimization.optimize_match`.

        Returns
        -------
        Structure
            Tuple containing template aligned to target as
            :py:class:`tme.structure.Structure` object, translation and rotation
            matrix used for the transformation.

        Notes
        -----
        Translation and rotation are in xyz format, different from
        :py:meth:`match_densities`, which is zyx.
        """
        template_density = cls.from_structure(
            filename_or_structure=template, sampling_rate=target.sampling_rate
        )

        ret, translation, rotation_matrix = cls.match_densities(
            target=target,
            template=template_density,
            cutoff_target=cutoff_target,
            cutoff_template=0,
            scoring_method=scoring_method,
            optimization_method=optimization_method,
            maxiter=maxiter,
        )
        out = template.copy()
        final_translation = np.subtract(ret.origin, template_density.origin)

        final_translation = final_translation
        rotation_matrix = rotation_matrix

        out = out.rigid_transform(
            translation=final_translation, rotation_matrix=rotation_matrix
        )

        return out, final_translation, rotation_matrix



[docs]
    def align_to_axis(self, data: NDArray = None, axis: int = 2, flip: bool = False):
        if data is None:
            data = self.data

        coordinates = np.array(np.where(data > 0))
        weights = self.data[tuple(coordinates)]
        return align_to_axis(coordinates.T, weights=weights, axis=axis, flip=flip)



[docs]
    @staticmethod
    def fourier_shell_correlation(density1: "Density", density2: "Density") -> NDArray:
        """
        Computes the Fourier Shell Correlation (FSC) between two instances of `Density`.

        The Fourier transforms of the input maps are divided into shells
        based on their spatial frequency. The correlation between corresponding shells
        in the two maps is computed to give the FSC.

        Parameters
        ----------
        density1 : :py:class:`Density`
            Reference for comparison.
        density2 : :py:class:`Density`
            Target for comparison.

        Returns
        -------
        NDArray
            An array of shape (N, 2), where N is the number of shells.
            The first column represents the spatial frequency for each shell
            and the second column represents the corresponding FSC.

        References
        ----------
        .. [1] https://github.com/tdgrant1/denss/blob/master/saxstats/saxstats.py
        """
        side = density1.data.shape[0]
        df = 1.0 / side

        qx_ = np.fft.fftfreq(side) * side * df
        qx, qy, qz = np.meshgrid(qx_, qx_, qx_, indexing="ij")
        qr = np.sqrt(qx**2 + qy**2 + qz**2)

        qmax = np.max(qr)
        qstep = np.min(qr[qr > 0])
        nbins = int(qmax / qstep)
        qbins = np.linspace(0, nbins * qstep, nbins + 1)
        qbin_labels = np.searchsorted(qbins, qr, "right") - 1

        F1 = np.fft.fftn(density1.data)
        F2 = np.fft.fftn(density2.data)

        qbin_labels = qbin_labels.reshape(-1)
        numerator = np.bincount(
            qbin_labels, weights=np.real(F1 * np.conj(F2)).reshape(-1)
        )
        term1 = np.bincount(qbin_labels, weights=np.abs(F1).reshape(-1) ** 2)
        term2 = np.bincount(qbin_labels, weights=np.abs(F2).reshape(-1) ** 2)
        np.multiply(term1, term2, out=term1)
        denominator = np.sqrt(term1)
        FSC = np.divide(numerator, denominator)

        qidx = np.where(qbins < qx.max())

        return np.vstack((qbins[qidx], FSC[qidx])).T