""" Implements Preprocessor class for filtering operations.
Copyright (c) 2023 European Molecular Biology Laboratory
Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
"""
import os
import pickle
import inspect
from typing import Dict, Tuple
import numpy as np
from scipy import ndimage
from scipy.special import iv as bessel
from scipy.interpolate import interp1d, splrep, BSpline
from scipy.optimize import differential_evolution, minimize
from .types import NDArray
[docs]
class Preprocessor:
"""
Implements filtering operations on density arrays.
"""
[docs]
def apply_method(self, method: str, parameters: Dict):
"""
Invoke ``Preprocessor.method`` using ``parameters``.
Parameters
----------
method : str
The name of the method to be used.
parameters : dict
The parameters for the specified method.
Returns
-------
The output of ``method``.
Raises
------
NotImplementedError
If ``method`` is not a member of :py:class:`Preprocessor`.
"""
if not hasattr(self, method):
raise NotImplementedError(
f"'{method}' is not supported as a filter method on this class."
)
return getattr(self, method)(**parameters)
[docs]
def method_to_id(self, method: str, parameters: Dict) -> str:
"""
Generate a unique ID for a specific method operation.
Parameters
----------
method : str
The name of the method.
parameters : dict
A dictionary containing the parameters used by the method.
Returns
-------
str
A string representation of the method operation, which can be used
as a unique identifier.
Raises
------
NotImplementedError
If ``method`` is not a member of :py:class:`Preprocessor`.
"""
if not hasattr(self, method):
raise NotImplementedError(
f"'{method}' is not supported as a filter method on this class."
)
signature = inspect.signature(getattr(self, method))
default = {
k: v.default
for k, v in signature.parameters.items()
if v.default is not inspect.Parameter.empty
}
default.update(parameters)
return "-".join([str(default[key]) for key in sorted(default.keys())])
[docs]
def gaussian_filter(
self,
template: NDArray,
sigma: Tuple[float],
cutoff_value: float = 4.0,
) -> NDArray:
"""
Convolve an atomic structure with a Gaussian kernel.
Parameters
----------
template : NDArray
Input data.
sigma : float or tuple of floats
The standard deviation of the Gaussian kernel along one or all axes.
cutoff_value : float, optional
Truncates the Gaussian kernel at cutoff_values times sigma.
Returns
-------
NDArray
Gaussian filtered template.
"""
sigma = 0 if sigma is None else sigma
return ndimage.gaussian_filter(template, sigma, cval=cutoff_value)
[docs]
def difference_of_gaussian_filter(
self, template: NDArray, low_sigma: NDArray, high_sigma: NDArray
) -> NDArray:
"""
Apply the Difference of Gaussian (DoG) bandpass filter on
the provided template.
Parameters
----------
template : NDArray
The input template on which to apply the technique.
low_sigma : NDArray
The smaller standard deviation for the Gaussian kernel.
Should be scalar or sequence of scalars of length template.ndim.
high_sigma : NDArray
The larger standard deviation for the Gaussian kernel.
Should be scalar or sequence of scalars of length template.ndim.
Returns
-------
NDArray
The result of applying the Difference of Gaussian technique on the template.
"""
if np.any(low_sigma > high_sigma):
print("low_sigma should be smaller than high_sigma.")
im1 = self.gaussian_filter(template, low_sigma)
im2 = self.gaussian_filter(template, high_sigma)
return im1 - im2
[docs]
def local_gaussian_alignment_filter(
self,
target: NDArray,
template: NDArray,
lbd: float,
sigma_range: Tuple[float, float] = (0.1, 20),
) -> NDArray:
"""
Simulate electron density by optimizing a sum of Gaussians.
For that, the following minimization problem is considered:
.. math::
dl_{\\text{{target}}} = \\frac{\\lambda}{\\sigma_{x}^{2}} + \\epsilon^{2}
.. math::
\\epsilon^{2} = \\| \\text{target} - \\text{template} \\|^{2}
Parameters
----------
target : NDArray
The target electron density map.
template : NDArray
The input atomic structure map.
lbd : float
The lambda hyperparameter.
sigma_range : tuple of float, optional
The range of sigma values for the optimizer. Default is (0.1, 20).
Returns
-------
NDArray
Simulated electron densities.
References
----------
.. [1] Gomez, G (Jan. 2000). Local Smoothness in terms of Variance:
The Adaptive Gaussian Filter. In Procedings of the British
Machine Vision Conference 2000.
"""
class _optimizer(Preprocessor):
def __init__(self, target, template, lbd):
self._target = target
self._template = template
self._dl = np.full(template.shape, 10**9)
self._filter = np.zeros_like(template)
self._lbd = lbd
def __call__(self, x, *args):
x = x[0]
filter = super().gaussian_filter(sigma=x, template=template)
dl = self._lbd / (x**2) + np.power(self._target - filter, 2)
ind = dl < self._dl
self._dl[ind] = dl[ind]
self._filter[ind] = filter[ind]
return np.sum(self._dl)
# This method needs pre normalization
template = template.copy()
target = target.copy()
sd_target = np.std(target)
sd_template = np.std(template)
m_target = np.mean(target)
m_template = np.mean(target)
if sd_target != 0:
target = (target - m_target) / sd_target
if sd_template != 0:
template = (template - m_template) / sd_template
temp = _optimizer(target=target, template=template, lbd=lbd)
_ = differential_evolution(temp, bounds=[sigma_range], seed=2)
# Make sure there is no negative density
temp._filter += np.abs(np.min(temp._filter))
return temp._filter
[docs]
def local_gaussian_filter(
self,
template: NDArray,
lbd: float,
sigma_range: Tuple[float, float],
gaussian_sigma: float,
) -> NDArray:
"""
Wrapper for `Preprocessor.local_gaussian_alignment_filter` if no
target is available.
Parameters
----------
template : NDArray
The input atomic structure map.
apix : float
Ångstrom per voxel passed to `Preprocessor.gaussian_filter`.
lbd : float
The lambda hyperparameter, common values: 2, 5, 20.
sigma_range : tuple of float
The range of sigma values for the optimizer.
gaussian_sigma : float
The sigma value passed to `Preprocessor.gaussian_filter` to
obtain a target.
Returns
-------
NDArray
Simulated electron densities.
"""
filtered_data = self.gaussian_filter(sigma=gaussian_sigma, template=template)
return self.local_gaussian_alignment_filter(
target=filtered_data,
template=template,
lbd=lbd,
sigma_range=sigma_range,
)
[docs]
def edge_gaussian_filter(
self,
template: NDArray,
edge_algorithm: str,
sigma: float,
reverse: bool = False,
) -> NDArray:
"""
Perform Gaussian filterring according to edges in the input template.
Parameters
----------
template : NDArray
The input atomic structure map.
sigma : NDArray
The sigma value for the Gaussian filter.
edge_algorithm : str
The algorithm used to identify edges. Options are:
+-------------------+------------------------------------------------+
| 'sobel' | Applies sobel filter for edge detection. |
+-------------------+------------------------------------------------+
| 'prewitt' | Applies prewitt filter for edge detection. |
+-------------------+------------------------------------------------+
| 'laplace' | Computes edges as second derivative. |
+-------------------+------------------------------------------------+
| 'gaussian' | See scipy.ndimage.gaussian_gradient_magnitude |
+-------------------+------------------------------------------------+
| 'gaussian_laplace | See scipy.ndimage.gaussian_laplace |
+-------------------+------------------------------------------------+
reverse : bool, optional
If true, the filterring is strong along edges. Default is False.
Returns
-------
NDArray
Simulated electron densities.
"""
if edge_algorithm == "sobel":
edges = ndimage.generic_gradient_magnitude(template, ndimage.sobel)
elif edge_algorithm == "prewitt":
edges = ndimage.generic_gradient_magnitude(template, ndimage.prewitt)
elif edge_algorithm == "laplace":
edges = ndimage.laplace(template)
elif edge_algorithm == "gaussian":
edges = ndimage.gaussian_gradient_magnitude(template, sigma / 2)
elif edge_algorithm == "gaussian_laplace":
edges = ndimage.gaussian_laplace(template, sigma / 2)
else:
raise ValueError(
"Supported edge_algorithm values are"
"'sobel', 'prewitt', 'laplace', 'gaussian_laplace', 'gaussian'"
)
edges[edges != 0] = 1
edges /= edges.max()
edges = ndimage.gaussian_filter(edges, sigma)
filt = ndimage.gaussian_filter(template, sigma)
if not reverse:
res = template * edges + filt * (1 - edges)
else:
res = template * (1 - edges) + filt * (edges)
return res
[docs]
def mean_filter(self, template: NDArray, width: NDArray) -> NDArray:
"""
Perform mean filtering.
Parameters
----------
template : NDArray
The input atomic structure map.
width : NDArray
Width of the mean filter along each axis. Can either have length
one or template.ndim.
Returns
-------
NDArray
Simulated electron densities.
"""
template = template.copy()
interpolation_box = template.shape
width = np.array(width)
filter_width = np.repeat(width, template.ndim // width.size)
filter_mask = np.ones(filter_width)
filter_mask = filter_mask / np.sum(filter_mask)
template = ndimage.convolve(template, filter_mask, mode="reflect")
# Sometimes scipy messes up the box sizes ...
template = self.interpolate_box(box=interpolation_box, arr=template)
return template
[docs]
def kaiserb_filter(self, template: NDArray, width: int) -> NDArray:
"""
Apply Kaiser filter defined as:
.. math::
f_{kaiser} = \\frac{I_{0}(\\beta\\sqrt{1-
\\frac{4n^{2}}{(M-1)^{2}}})}{I_{0}(\\beta)}
-\\frac{M-1}{2} \\leq n \\leq \\frac{M-1}{2}
\\text{With } \\beta=3.2
Parameters
----------
template : NDArray
The input atomic structure map.
width : int
Width of the filter window.
normalize : bool, optional
If true, the output is z-transformed. Default is False.
Returns
-------
NDArray
Simulated electron densities.
References
----------
.. [1] Sorzano, Carlos et al (Mar. 2015). Fast and accurate conversion
of atomic models into electron density maps. AIMS Biophysics
2, 8–20.
"""
template, interpolation_box = template.copy(), template.shape
kaiser_window = window_kaiserb(width=width)
template = apply_window_filter(arr=template, filter_window=kaiser_window)
if not np.all(template.shape == interpolation_box):
template = self.interpolate_box(box=interpolation_box, arr=template)
return template
[docs]
def blob_filter(self, template: NDArray, width: int) -> NDArray:
"""
Apply blob filter defined as:
.. math::
f_{blob} = \\frac{\\sqrt{1-(\\frac{4n^{2}}{(M-1)^{2}})^{m}} I_{m}
(\\beta\\sqrt{1-(\\frac{4n^{2}}{(M-1)^{2}})})}
{I_{m}(\\beta)}
-\\frac{M-1}{2} \\leq n \\leq \\frac{M-1}{2}
\\text{With } \\beta=3.2 \\text{ and order=2}
Parameters
----------
template : NDArray
The input atomic structure map.
width : int
Width of the filter window.
Returns
-------
NDArray
Simulated electron densities.
References
----------
.. [1] Sorzano, Carlos et al (Mar. 2015). Fast and accurate conversion
of atomic models into electron density maps. AIMS Biophysics
2, 8–20.
"""
template, interpolation_box = template.copy(), template.shape
blob_window = window_blob(width=width)
template = apply_window_filter(arr=template, filter_window=blob_window)
if not np.all(template.shape == interpolation_box):
template = self.interpolate_box(box=interpolation_box, arr=template)
return template
[docs]
def hamming_filter(self, template: NDArray, width: int) -> NDArray:
"""
Apply Hamming filter defined as:
.. math::
f_{hamming} = 0.54 - 0.46\\cos(\\frac{2\\pi n}{M-1})
0 \\leq n \\leq M-1
Parameters
----------
template : NDArray
The input atomic structure map.
width : int
Width of the filter window.
Returns
-------
NDArray
Simulated electron densities.
"""
template, interpolation_box = template.copy(), template.shape
hamming_window = np.hamming(int(width))
hamming_window /= hamming_window.sum()
template = apply_window_filter(arr=template, filter_window=hamming_window)
if not np.all(template.shape == interpolation_box):
template = self.interpolate_box(box=interpolation_box, arr=template)
return template
[docs]
def rank_filter(self, template: NDArray, rank: int) -> NDArray:
"""
Perform rank filtering.
Parameters
----------
template : NDArray
The input atomic structure map.
rank : int
Footprint value. 0 -> minimum filter, -1 -> maximum filter.
Returns
-------
NDArray
Simulated electron densities.
"""
template = template.copy()
interpolation_box = template.shape
size = rank // 2
if size <= 1:
size = 3
template = ndimage.rank_filter(template, rank=rank, size=size)
template = self.interpolate_box(box=interpolation_box, arr=template)
return template
[docs]
def mipmap_filter(self, template: NDArray, level: int) -> NDArray:
"""
Perform mip map antialiasing filtering.
Parameters
----------
template : NDArray
The input atomic structure map.
level : int
Pyramid layer. Resolution decreases cubically with level.
Returns
-------
NDArray
Simulated electron densities.
"""
array = template.copy()
interpolation_box = array.shape
for k in range(template.ndim):
array = ndimage.decimate(array, q=level, axis=k)
template = ndimage.zoom(array, np.divide(template.shape, array.shape))
template = self.interpolate_box(box=interpolation_box, arr=template)
return template
[docs]
def interpolate_box(
self, arr: NDArray, box: Tuple[int], kind: str = "nearest"
) -> NDArray:
"""
Resample ``arr`` within ``box`` using ``kind`` interpolation.
Parameters
----------
arr : NDArray
The input numpy array.
box : tuple of int
Tuple of integers corresponding to the shape of the output array.
kind : str, optional
Interpolation method used (see scipy.interpolate.interp1d).
Default is 'nearest'.
Raises
------
ValueError
If the shape of box does not match arr.ndim
Returns
-------
NDArray
Interpolated numpy array.
"""
if len(box) != arr.ndim:
raise ValueError(f"Expected box of {arr.ndim}, got {len(box)}")
for axis, size in enumerate(box):
f = interp1d(
np.linspace(0, 1, arr.shape[axis]),
arr,
kind=kind,
axis=axis,
fill_value="extrapolate",
)
arr = f(np.linspace(0, 1, size))
return arr
[docs]
def bandpass_filter(
self,
template: NDArray,
lowpass: float,
highpass: float,
sampling_rate: NDArray = None,
gaussian_sigma: float = 0.0,
) -> NDArray:
"""
Apply a band-pass filter on the provided template, using a
Butterworth approximation.
Parameters
----------
template : NDArray
The input numpy array on which the band-pass filter should be applied.
lowpass : float
The lower boundary of the frequency range to be preserved. Lower values will
retain broader, more global features.
highpass : float
The upper boundary of the frequency range to be preserved. Higher values
will emphasize finer details and potentially noise.
sampling_rate : NDarray, optional
The sampling rate along each dimension.
gaussian_sigma : float, optional
Sigma value for the gaussian smoothing to be applied to the filter.
Returns
-------
NDArray
Bandpass filtered numpy array.
"""
bpf = self.bandpass_mask(
shape=template.shape,
lowpass=lowpass,
highpass=highpass,
sampling_rate=sampling_rate,
gaussian_sigma=gaussian_sigma,
omit_negative_frequencies=False,
)
fft_data = np.fft.fftn(template)
np.multiply(fft_data, bpf, out=fft_data)
ret = np.real(np.fft.ifftn(fft_data))
return ret
[docs]
def bandpass_mask(
self,
shape: Tuple[int],
lowpass: float,
highpass: float,
sampling_rate: NDArray = None,
gaussian_sigma: float = 0.0,
omit_negative_frequencies: bool = True,
) -> NDArray:
"""
Compute an approximate Butterworth bundpass filter. The returned filter
has it's DC component at the origin.
Parameters
----------
shape : tuple of ints
Shape of the returned bandpass filter.
lowpass : float
The lower boundary of the frequency range to be preserved. Lower values will
retain broader, more global features.
maximum_frequency : float
The upper boundary of the frequency range to be preserved. Higher values
will emphasize finer details and potentially noise.
sampling_rate : NDarray, optional
The sampling rate along each dimension.
gaussian_sigma : float, optional
Sigma value for the gaussian smoothing to be applied to the filter.
omit_negative_frequencies : bool, optional
Whether the wedge mask should omit negative frequencies, i.e. be
applicable to non hermitian-symmetric fourier transforms.
Returns
-------
NDArray
Bandpass filtered.
"""
from .preprocessing import BandPassFilter
return BandPassFilter(
sampling_rate=sampling_rate,
lowpass=lowpass,
highpass=highpass,
return_real_fourier=omit_negative_frequencies,
use_gaussian=gaussian_sigma == 0.0,
)(shape=shape)["data"]
[docs]
def step_wedge_mask(
self,
shape: Tuple[int],
tilt_angles: Tuple[float] = None,
opening_axis: int = 0,
tilt_axis: int = 2,
weights: float = None,
infinite_plane: bool = False,
omit_negative_frequencies: bool = True,
) -> NDArray:
"""
Create a wedge mask with the same shape as template by rotating a
plane according to tilt angles. The DC component of the filter is at the origin.
Parameters
----------
tilt_angles : tuple of float
Sequence of tilt angles.
shape : Tuple of ints
Shape of the output wedge array.
tilt_axis : int, optional
Axis that the plane is tilted over.
- 0 for Z-axis
- 1 for Y-axis
- 2 for X-axis
opening_axis : int, optional
Axis running through the void defined by the wedge.
- 0 for Z-axis
- 1 for Y-axis
- 2 for X-axis
sigma : float, optional
Standard deviation for Gaussian kernel used for smoothing the wedge.
weights : float, tuple of float
Weight of each element in the wedge. Defaults to one.
omit_negative_frequencies : bool, optional
Whether the wedge mask should omit negative frequencies, i.e. be
applicable to symmetric Fourier transforms (see :obj:`numpy.fft.fftn`)
Returns
-------
NDArray
A numpy array containing the wedge mask.
See Also
--------
:py:meth:`Preprocessor.continuous_wedge_mask`
"""
from .preprocessing.tilt_series import WedgeReconstructed
return WedgeReconstructed(
angles=tilt_angles,
tilt_axis=tilt_axis,
opening_axis=opening_axis,
frequency_cutoff=None if infinite_plane else 0.5,
create_continuous_wedge=False,
weights=weights,
weight_wedge=weights is not None,
)(shape=shape, return_real_fourier=omit_negative_frequencies,)["data"]
[docs]
def continuous_wedge_mask(
self,
start_tilt: float,
stop_tilt: float,
shape: Tuple[int],
opening_axis: int = 0,
tilt_axis: int = 2,
infinite_plane: bool = True,
omit_negative_frequencies: bool = True,
) -> NDArray:
"""
Generate a wedge in a given shape based on specified tilt angles and axis.
The DC component of the filter is at the origin.
Parameters
----------
start_tilt : float
Starting tilt angle in degrees, e.g. a stage tilt of 70 degrees
would yield a start_tilt value of 70.
stop_tilt : float
Ending tilt angle in degrees, , e.g. a stage tilt of -70 degrees
would yield a stop_tilt value of 70.
tilt_axis : int
Axis that the plane is tilted over.
- 0 for Z-axis
- 1 for Y-axis
- 2 for X-axis
opening_axis : int
Axis running through the void defined by the wedge.
- 0 for Z-axis
- 1 for Y-axis
- 2 for X-axis
shape : Tuple of ints
Shape of the output wedge array.
omit_negative_frequencies : bool, optional
Whether the wedge mask should omit negative frequencies, i.e. be
applicable to symmetric Fourier transforms (see :obj:`numpy.fft.fftn`)
infinite_plane : bool, optional
Whether the plane should be considered to be larger than the shape. In this
case the output wedge mask fill have no spheric component.
Returns
-------
NDArray
Array of the specified shape with the wedge created based on
the tilt angles.
See Also
--------
:py:meth:`Preprocessor.step_wedge_mask`
"""
from .preprocessing.tilt_series import WedgeReconstructed
return WedgeReconstructed(
angles=(start_tilt, stop_tilt),
tilt_axis=tilt_axis,
opening_axis=opening_axis,
frequency_cutoff=None if infinite_plane else 0.5,
create_continuous_wedge=True,
)(shape=shape, return_real_fourier=omit_negative_frequencies)["data"]
def window_kaiserb(width: int, beta: float = 3.2, order: int = 0) -> NDArray:
"""
Create a Kaiser-Bessel window.
Parameters
----------
width : int
Width of the window.
beta : float, optional
Beta parameter of the Kaiser-Bessel window. Default is 3.2.
order : int, optional
Order of the Bessel function. Default is 0.
Returns
-------
NDArray
Kaiser-Bessel window.
References
----------
.. [1] Sorzano, Carlos et al (Mar. 2015). Fast and accurate conversion
of atomic models into electron density maps. AIMS Biophysics
2, 8–20.
"""
window = np.arange(0, width)
alpha = (width - 1) / 2.0
arr = beta * np.sqrt(1 - ((window - alpha) / alpha) ** 2.0)
return bessel(order, arr) / bessel(order, beta)
def window_blob(width: int, beta: float = 3.2, order: int = 2) -> NDArray:
"""
Generate a blob window based on Bessel functions.
Parameters
----------
width : int
Width of the window.
beta : float, optional
Beta parameter. Default is 3.2.
order : int, optional
Order of the Bessel function. Default is 2.
Returns
-------
NDArray
Blob window.
References
----------
.. [1] Sorzano, Carlos et al (Mar. 2015). Fast and accurate conversion
of atomic models into electron density maps. AIMS Biophysics
2, 8–20.
"""
window = np.arange(0, width)
alpha = (width - 1) / 2.0
arr = beta * np.sqrt(1 - ((window - alpha) / alpha) ** 2.0)
arr = np.divide(np.power(arr, order) * bessel(order, arr), bessel(order, beta))
arr[arr != arr] = 0
return arr
def window_sinckb(omega: float, d: float, dw: float):
"""
Compute the sinc window combined with a Kaiser window.
Parameters
----------
omega : float
Reduction factor.
d : float
Ripple.
dw : float
Delta w.
Returns
-------
ndarray
Impulse response of the low-pass filter.
References
----------
.. [1] Sorzano, Carlos et al (Mar. 2015). Fast and accurate conversion
of atomic models into electron density maps. AIMS Biophysics
2, 8–20.
"""
kaiser = kaiser_mask(d, dw)
sinc_m = sinc_mask(np.zeros(kaiser.shape), omega)
mask = sinc_m * kaiser
return mask / np.sum(mask)
def apply_window_filter(
arr: NDArray,
filter_window: NDArray,
mode: str = "reflect",
cval: float = 0.0,
origin: int = 0,
):
"""
Apply a window filter on an input array.
Parameters
----------
arr : NDArray,
Input array.
filter_window : NDArray,
Window filter to apply.
mode : str, optional
Mode for the filtering, default is "reflect".
cval : float, optional
Value to fill when mode is "constant", default is 0.0.
origin : int, optional
Origin of the filter window, default is 0.
Returns
-------
NDArray,
Array after filtering.
"""
filter_window = filter_window[::-1]
for axs in range(arr.ndim):
ndimage.correlate1d(
input=arr,
weights=filter_window,
axis=axs,
output=arr,
mode=mode,
cval=cval,
origin=origin,
)
return arr
def sinc_mask(mask: NDArray, omega: float) -> NDArray:
"""
Create a sinc mask.
Parameters
----------
mask : NDArray
Input mask.
omega : float
Reduction factor.
Returns
-------
NDArray
Sinc mask.
"""
# Move filter origin to the center of the mask
mask_origin = int((mask.size - 1) / 2)
dist = np.arange(-mask_origin, mask_origin + 1)
return np.multiply(omega / np.pi, np.sinc((omega / np.pi) * dist))
def kaiser_mask(d: float, dw: float) -> NDArray:
"""
Create a Kaiser mask.
Parameters
----------
d : float
Ripple.
dw : float
Delta-w.
Returns
-------
NDArray
Kaiser mask.
"""
# convert dw from a frequency normalized to 1 to a frequency normalized to pi
dw *= np.pi
A = -20 * np.log10(d)
M = max(1, np.ceil((A - 8) / (2.285 * dw)))
beta = 0
if A > 50:
beta = 0.1102 * (A - 8.7)
elif A >= 21:
beta = 0.5842 * np.power(A - 21, 0.4) + 0.07886 * (A - 21)
mask_values = np.abs(np.arange(-M, M + 1))
mask = np.sqrt(1 - np.power(mask_values / M, 2))
return np.divide(bessel(0, beta * mask), bessel(0, beta))
def electron_factor(
dist: NDArray, method: str, atom: str, fourier: bool = False
) -> NDArray:
"""
Compute the electron factor.
Parameters
----------
dist : NDArray
Distance.
method : str
Method name.
atom : str
Atom type.
fourier : bool, optional
Whether to compute the electron factor in Fourier space.
Returns
-------
NDArray
Computed electron factor.
"""
data = get_scattering_factors(method)
n_range = len(data.get(atom, [])) // 2
default = np.zeros(n_range * 3)
res = 0.0
a_values = data.get(atom, default)[:n_range]
b_values = data.get(atom, default)[n_range : 2 * n_range]
if method == "dt1969":
b_values = data.get(atom, default)[1 : (n_range + 1)]
for i in range(n_range):
a = a_values[i]
b = b_values[i]
if fourier:
temp = a * np.exp(-b * np.power(dist, 2))
else:
b = b / (4 * np.power(np.pi, 2))
temp = a * np.sqrt(np.pi / b) * np.exp(-np.power(dist, 2) / (4 * b))
if not np.isnan(temp).any():
res += temp
return res / (2 * np.pi)
def optimize_hlfp(profile, M, T, atom, method, filter_method):
"""
Optimize high-low pass filter (HLFP).
Parameters
----------
profile : NDArray
Input profile.
M : int
Scaling factor.
T : float
Time step.
atom : str
Atom type.
method : str
Method name.
filter_method : str
Filter method name.
Returns
-------
float
Fitness value.
References
----------
.. [1] Sorzano, Carlos et al (Mar. 2015). Fast and accurate conversion
of atomic models into electron density maps. AIMS Biophysics
2, 8–20.
"""
# omega, d, dw
initial_params = [1.0, 0.01, 1.0 / 8.0]
if filter_method == "brute":
best_fitness = float("inf")
OMEGA, D, DW = np.meshgrid(
np.arange(0.7, 1.3, 0.015),
np.arange(0.01, 0.2, 0.015),
np.arange(0.05, 0.2, 0.015),
)
for omega, d, dw in zip(OMEGA.ravel(), D.ravel(), DW.ravel()):
current_fitness = _hlpf_fitness([omega, d, dw], T, M, profile, atom, method)
if current_fitness < best_fitness:
best_fitness = current_fitness
initial_params = [omega, d, dw]
final_params = np.array(initial_params)
else:
res = minimize(
_hlpf_fitness,
initial_params,
args=tuple([T, M, profile, atom, method]),
method="SLSQP",
bounds=([0.2, 2], [1e-3, 2], [1e-3, 1]),
)
final_params = res.x
if np.any(final_params != final_params):
print(f"Solver returned NAs for atom {atom} at {M}" % (atom, M))
final_params = final_params
final_params[0] *= np.pi / M
mask = window_sinckb(*final_params)
if profile.shape[0] > mask.shape[0]:
profile_origin = int((profile.size - 1) / 2)
mask = window(mask, profile_origin, profile_origin)
return mask
def _hlpf_fitness(
params: Tuple[float], T: float, M: float, profile: NDArray, atom: str, method: str
) -> float:
"""
Fitness function for high-low pass filter optimization.
Parameters
----------
params : tuple of float
Parameters [omega, d, dw] for optimization.
T : float
Time step.
M : int
Scaling factor.
profile : NDArray
Input profile.
atom : str
Atom type.
method : str
Method name.
Returns
-------
float
Fitness value.
References
----------
.. [1] Sorzano, Carlos et al (Mar. 2015). Fast and accurate conversion
of atomic models into electron density maps. AIMS Biophysics
2, 8–20.
.. [2] https://github.com/I2PC/xmipp/blob/707f921dfd29cacf5a161535034d28153b58215a/src/xmipp/libraries/data/pdb.cpp#L1344
"""
omega, d, dw = params
if not (0.7 <= omega <= 1.3) and (0 <= d <= 0.2) and (1e-3 <= dw <= 0.2):
return 1e38 * np.random.randint(1, 100)
mask = window_sinckb(omega=omega * np.pi / M, d=d, dw=dw)
if profile.shape[0] > mask.shape[0]:
profile_origin = int((profile.size - 1) / 2)
mask = window(mask, profile_origin, profile_origin)
else:
filter_origin = int((mask.size - 1) / 2)
profile = window(profile, filter_origin, filter_origin)
f_mask = ndimage.convolve(profile, mask)
orig = int((f_mask.size - 1) / 2)
dist = np.arange(-orig, orig + 1) * T
t, c, k = splrep(x=dist, y=f_mask, k=3)
i_max = np.ceil(np.divide(f_mask.shape, M)).astype(int)[0]
coarse_mask = np.arange(-i_max, i_max + 1) * M
spline = BSpline(t, c, k)
coarse_values = spline(coarse_mask)
# padding to retain longer fourier response
aux = window(
coarse_values, x0=10 * coarse_values.shape[0], xf=10 * coarse_values.shape[0]
)
f_filter = np.fft.fftn(aux)
f_filter_mag = np.abs(f_filter)
freq = np.fft.fftfreq(f_filter.size)
freq /= M * T
amplitude_f = mask.sum() / coarse_values.sum()
size_f = f_filter_mag.shape[0] * amplitude_f
fourier_form_f = electron_factor(dist=freq, atom=atom, method=method, fourier=True)
valid_freq_mask = freq >= 0
f1_values = np.log10(f_filter_mag[valid_freq_mask] * size_f)
f2_values = np.log10(np.divide(T, fourier_form_f[valid_freq_mask]))
squared_differences = np.square(f1_values - f2_values)
error = np.sum(squared_differences)
error /= np.sum(valid_freq_mask)
return error
def window(arr, x0, xf, constant_values=0):
"""
Window an array by slicing between x0 and xf and padding if required.
Parameters
----------
arr : ndarray
Input array to be windowed.
x0 : int
Start of the window.
xf : int
End of the window.
constant_values : int or float, optional
The constant values to use for padding, by default 0.
Returns
-------
ndarray
Windowed array.
"""
origin = int((arr.size - 1) / 2)
xs = origin - x0
xe = origin - xf
if xs >= 0 and xe <= arr.shape[0]:
if xs <= arr.shape[0] and xe > 0:
arr = arr[xs:xe]
xs = 0
xe = 0
elif xs <= arr.shape[0]:
arr = arr[xs:]
xs = 0
elif xe >= 0 and xe <= arr.shape[0]:
arr = arr[:xe]
xe = 0
xs *= -1
xe *= -1
return np.pad(
arr, (int(xs), int(xe)), mode="constant", constant_values=constant_values
)
def atom_profile(
M, atom, T=0.08333333, method="peng1995", lfilter=True, filter_method="minimize"
):
"""
Generate an atom profile using a variety of methods.
Parameters
----------
M : float
Down sampling factor.
atom : Any
Type or representation of the atom.
T : float, optional
Sampling rate in angstroms/pixel, by default 0.08333333.
method : str, optional
Method to be used for generating the profile, by default "peng1995".
lfilter : bool, optional
Whether to apply filter on the profile, by default True.
filter_method : str, optional
The method for the filter, by default "minimize".
Returns
-------
BSpline
A spline representation of the atom profile.
References
----------
.. [1] Sorzano, Carlos et al (Mar. 2015). Fast and accurate conversion
of atomic models into electron density maps. AIMS Biophysics
2, 8–20.
.. [2] https://github.com/I2PC/xmipp/blob/707f921dfd29cacf5a161535034d28153b58215a/src/xmipp/libraries/data/pdb.cpp#L1344
"""
M = M / T
imax = np.ceil(4 / T * np.sqrt(76.7309 / (2 * np.power(np.pi, 2))))
dist = np.arange(-imax, imax + 1) * T
profile = electron_factor(dist, method, atom)
if lfilter:
window = optimize_hlfp(
profile=profile,
M=M,
T=T,
atom=atom,
method=method,
filter_method=filter_method,
)
profile = ndimage.convolve(profile, window)
indices = np.where(profile > 1e-3)
min_indices = np.maximum(np.amin(indices, axis=1), 0)
max_indices = np.minimum(np.amax(indices, axis=1) + 1, profile.shape)
slices = tuple(slice(*coord) for coord in zip(min_indices, max_indices))
profile = profile[slices]
profile_origin = int((profile.size - 1) / 2)
dist = np.arange(-profile_origin, profile_origin + 1) * T
t, c, k = splrep(x=dist, y=profile, k=3)
return BSpline(t, c, k)
def get_scattering_factors(method: str) -> Dict:
"""
Retrieve scattering factors from a stored file based on the given method.
Parameters
----------
method : str
Method name used to get the scattering factors.
Returns
-------
Dict
Dictionary containing scattering factors for the given method.
Raises
------
ValueError
If the method is not found in the stored data.
"""
path = os.path.join(os.path.dirname(__file__), "data", "scattering_factors.pickle")
with open(path, "rb") as infile:
data = pickle.load(infile)
if method not in data:
raise ValueError(f"{method} is not valid. Use {', '.join(data.keys())}.")
return data[method]