""" Implements class Structure to represent atomic structures.
Copyright (c) 2023 European Molecular Biology Laboratory
Author: Valentin Maurer <valentin.maurer@embl-hamburg.de>
"""
import warnings
from copy import deepcopy
from itertools import groupby
from dataclasses import dataclass
from collections import namedtuple
from typing import List, Dict, Tuple
from os.path import splitext, basename
import numpy as np
from .types import NDArray
from .preprocessor import atom_profile, Preprocessor
from .parser import PDBParser, MMCIFParser
from .matching_utils import rigid_transform, minimum_enclosing_box
[docs]
@dataclass(repr=False)
class Structure:
"""
Represents atomic structures per the Protein Data Bank (PDB) specification.
Examples
--------
The following achieves the definition of a :py:class:`Structure` instance
>>> from tme import Structure
>>> structure = Structure(
>>> record_type=["ATOM", "ATOM", "ATOM"],
>>> atom_serial_number=[0, 1, 2] ,
>>> atom_name=["C", "N", "H"],
>>> atom_coordinate=[[30,15,10], [35, 20, 15], [35,25,20]],
>>> alternate_location_indicator=[".", ".", "."],
>>> residue_name=["GLY", "GLY", "HIS"],
>>> chain_identifier=["A", "A", "B"],
>>> residue_sequence_number=[0, 0, 1],
>>> code_for_residue_insertion=["?", "?", "?"],
>>> occupancy=[0, 0, 0],
>>> temperature_factor=[0, 0, 0],
>>> segment_identifier=["1", "1", "1"],
>>> element_symbol=["C", "N", "C"],
>>> charge=["?", "?", "?"],
>>> metadata={},
>>> )
>>> structure
Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
:py:class:`Structure` instances support a range of subsetting operations based on
atom indices
>>> structure[1]
Unique Chains: A, Atom Range: 1-1 [N = 1], Residue Range: 0-0 [N = 1]
>>> structure[(False, False, True)]
Unique Chains: B, Atom Range: 2-2 [N = 1], Residue Range: 1-1 [N = 1]
>>> structure[(1,2)]
Unique Chains: A-B, Atom Range: 1-2 [N = 2], Residue Range: 0-1 [N = 2]
They can be written to disk in a range of formats using :py:meth:`Structure.to_file`
>>> structure.to_file("test.pdb") # Writes a PDB file to disk
>>> structure.to_file("test.cif") # Writes a mmCIF file to disk
New instances can be created from a range of formats using
:py:meth:`Structure.from_file`
>>> Structure.from_file("test.pdb") # Reads PDB file from disk
Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
>>> Structure.from_file("test.cif") # Reads mmCIF file from disk
Unique Chains: A-B, Atom Range: 0-2 [N = 3], Residue Range: 0-1 [N = 3]
Class instances can be discretized on grids and converted to
:py:class:`tme.density.Density` instances using :py:meth:`Structure.to_volume`
or :py:meth:`tme.density.Density.from_structure`.
>>> volume, origin, sampling_rate = structure.to_volume(shape=(50,40,30))
References
----------
.. [1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
.. [2] https://www.ccp4.ac.uk/html/mmcifformat.html
"""
#: Array of record types, e.g.ATOM.
record_type: NDArray
#: Array of serial numbers.
atom_serial_number: NDArray
#: Array of atom names.
atom_name: NDArray
#: Array of x,y,z atom coordinates.
atom_coordinate: NDArray
#: Array of alternate location indices.
alternate_location_indicator: NDArray
#: Array of residue names.
residue_name: NDArray
#: Array of chain identifiers.
chain_identifier: NDArray
#: Array of residue ids.
residue_sequence_number: NDArray
#: Array of insertion information.
code_for_residue_insertion: NDArray
#: Array of occupancy factors.
occupancy: NDArray
#: Array of B-factors.
temperature_factor: NDArray
#: Array of segment identifiers.
segment_identifier: NDArray
#: Array of element symbols.
element_symbol: NDArray
#: Array of charges.
charge: NDArray
#: Metadata dictionary.
metadata: dict
def __post_init__(self, *args, **kwargs):
"""
Initialize the structure and populate header metadata.
Raises
------
ValueError
If NDArray attributes does not match the number of atoms.
"""
for attribute in self.__dict__:
value = getattr(self, attribute)
target_type = self.__annotations__.get(attribute, None)
if target_type == NDArray:
setattr(self, attribute, np.atleast_1d(np.array(value)))
n_atoms = self.atom_coordinate.shape[0]
for attribute in self.__dict__:
value = getattr(self, attribute)
if not isinstance(value, np.ndarray):
continue
if value.shape[0] != n_atoms:
raise ValueError(
f"Expected shape of {attribute}: {n_atoms}, got {value.shape[0]}."
)
self._elements = Elements()
self.metadata = self._populate_metadata(self.metadata)
def __getitem__(self, indices: List[int]) -> "Structure":
"""
Get a Structure instance for specified indices.
Parameters
----------
indices : Union[int, bool, NDArray]
The indices to get.
Returns
-------
Structure
The Structure instance for the given indices.
"""
if type(indices) in (int, bool):
indices = (indices,)
indices = np.asarray(indices)
attributes = (
"record_type",
"atom_serial_number",
"atom_name",
"atom_coordinate",
"alternate_location_indicator",
"residue_name",
"chain_identifier",
"residue_sequence_number",
"code_for_residue_insertion",
"occupancy",
"temperature_factor",
"segment_identifier",
"element_symbol",
"charge",
)
kwargs = {attr: getattr(self, attr)[indices] for attr in attributes}
ret = self.__class__(**kwargs, metadata={})
return ret
def __repr__(self):
"""
Return a string representation of the Structure.
"""
unique_chains = "-".join(
[
",".join([str(x) for x in entity])
for entity in self.metadata["unique_chains"]
]
)
min_atom = np.min(self.atom_serial_number)
max_atom = np.max(self.atom_serial_number)
n_atom = self.atom_serial_number.size
min_residue = np.min(self.residue_sequence_number)
max_residue = np.max(self.residue_sequence_number)
n_residue = np.unique(self.residue_sequence_number).size
repr_str = (
f"Structure object at {id(self)}\n"
f"Unique Chains: {unique_chains}, "
f"Atom Range: {min_atom}-{max_atom} [N = {n_atom}], "
f"Residue Range: {min_residue}-{max_residue} [N = {n_residue}]"
)
return repr_str
[docs]
def copy(self) -> "Structure":
"""
Returns a copy of the Structure instance.
Returns
-------
:py:class:`Structure`
The copied Structure instance.
Examples
--------
>>> import numpy as np
>>> structure_copy = structure.copy()
>>> np.allclose(structure_copy.atom_coordinate, structure.atom_coordinate)
True
"""
return deepcopy(self)
def _populate_metadata(self, metadata: Dict = {}) -> Dict:
"""
Populate the metadata dictionary with the data from the Structure instance.
Parameters
----------
metadata : dict, optional
The initial metadata dictionary, by default {}.
Returns
-------
dict
The populated metadata dictionary.
"""
metadata["weight"] = np.sum(
[self._elements[atype].atomic_weight for atype in self.element_symbol]
)
label, idx, chain = np.unique(
self.chain_identifier, return_inverse=True, return_index=True
)
chain_weight = np.bincount(
chain,
[self._elements[atype].atomic_weight for atype in self.element_symbol],
)
labels = self.chain_identifier[idx]
metadata["chain_weight"] = {key: val for key, val in zip(labels, chain_weight)}
# Group non-unique chains in separate lists in metadata["unique_chains"]
metadata["unique_chains"], temp = [], {}
for chain_label in label:
index = len(metadata["unique_chains"])
chain_sequence = "".join(
[
str(y)
for y in self.element_symbol[
np.where(self.chain_identifier == chain_label)
]
]
)
if chain_sequence not in temp:
temp[chain_sequence] = index
metadata["unique_chains"].append([chain_label])
continue
idx = temp.get(chain_sequence)
metadata["unique_chains"][idx].append(chain_label)
filtered_data = [
(label, integer)
for label, integer in zip(
self.chain_identifier, self.residue_sequence_number
)
]
filtered_data = sorted(filtered_data, key=lambda x: x[0])
metadata["chain_range"] = {}
for label, values in groupby(filtered_data, key=lambda x: x[0]):
values = [int(x[1]) for x in values]
metadata["chain_range"][label] = (min(values), max(values))
return metadata
[docs]
@classmethod
def from_file(
cls,
filename: str,
keep_non_atom_records: bool = False,
filter_by_elements: set = None,
filter_by_residues: set = None,
) -> "Structure":
"""
Reads an atomic structure file and into a :py:class:`Structure` instance.
Parameters
----------
filename : str
Input file. Supported extensions are:
+------+-------------------------------------------------------------+
| .pdb | Reads a PDB file |
+------+-------------------------------------------------------------+
| .cif | Reads an mmCIF file |
+------+-------------------------------------------------------------+
keep_non_atom_records : bool, optional
Wheter to keep residues that are not labelled ATOM.
filter_by_elements: set, optional
Which elements to keep. Default corresponds to all elements.
filter_by_residues: set, optional
Which residues to keep. Default corresponds to all residues.
Raises
------
ValueError
If the extension is not supported.
Returns
-------
:py:class:`Structure`
Structure instance representing the read in file.
Examples
--------
>>> from importlib_resources import files
>>> from tme import Structure
>>> fname = str(files("tests.data").joinpath("Structures/5khe.cif"))
>>> structure = Structure.from_file(filename=fname)
>>> structure
Unique Chains: A-B, Atom Range: 1-1564 [N = 1564], Residue Range: 142-239 [N = 1564]
We can include non ATOM entries and restrict the considered elements
and residues
>>> structure = Structure.from_file(
>>> filename=fname,
>>> keep_non_atom_records=True,
>>> filter_by_elements = {"C"},
>>> filter_by_residues = {"GLY"},
>>> )
>>> structure
Unique Chains: A,B, Atom Range: 96-1461 [N = 44], Residue Range: 154-228 [N = 44]
"""
_, file_extension = splitext(basename(filename.upper()))
if file_extension == ".PDB":
func = cls._load_pdb
elif file_extension == ".CIF":
func = cls._load_mmcif
else:
raise NotImplementedError(
"Could not determine structure filetype from extension."
" Supported filetypes are mmcif (.cif) and pdb (.pdb)."
)
data = func(filename)
keep = np.ones(data["element_symbol"].size, dtype=bool)
if filter_by_elements:
keep = np.logical_and(
keep,
np.in1d(data["element_symbol"], np.array(list(filter_by_elements))),
)
if filter_by_residues:
keep = np.logical_and(
keep, np.in1d(data["residue_name"], np.array(list(filter_by_residues)))
)
if not keep_non_atom_records:
keep = np.logical_and(keep, data["record_type"] == "ATOM")
for key in data:
if key == "metadata":
continue
if isinstance(data[key], np.ndarray):
data[key] = data[key][keep]
else:
data[key] = [x for x, flag in zip(data[key], keep) if flag]
data["metadata"]["filepath"] = filename
return cls(**data)
@staticmethod
def _load_mmcif(filename: str) -> Dict:
"""
Parses a macromolecular Crystallographic Information File (mmCIF)
and returns the data in a dictionary format.
Parameters
----------
filename : str
The filename of the mmCIF to load.
Returns
-------
dict
A dictionary of numpy arrays. Keys are the names of the PDB
coordinate section. In addition, some details about the parsed
structure are included. In case of conversion failure, the failing
attribute is set to 0 if its supposed to be an integer value.
"""
result = MMCIFParser(filename)
atom_site_mapping = {
"record_type": ("group_PDB", str),
"atom_serial_number": ("id", int),
"atom_name": ("label_atom_id", str),
"alternate_location_indicator": ("label_alt_id", str),
"residue_name": ("label_comp_id", str),
# "chain_identifier": ("auth_asym_id", str),
"chain_identifier": ("label_asym_id", str),
"residue_sequence_number": ("label_seq_id", int),
"code_for_residue_insertion": ("pdbx_PDB_ins_code", str),
"occupancy": ("occupancy", float),
"temperature_factor": ("B_iso_or_equiv", float),
"segment_identifier": ("pdbx_PDB_model_num", str),
"element_symbol": ("type_symbol", str),
"charge": ("pdbx_formal_charge", str),
}
out = {}
for out_key, (atom_site_key, dtype) in atom_site_mapping.items():
out_data = [
x.strip() for x in result["atom_site"].get(atom_site_key, ["."])
]
if dtype is int:
out_data = [0 if x == "." else int(x) for x in out_data]
try:
out[out_key] = np.asarray(out_data).astype(dtype)
except ValueError:
default = ["."] if dtype is str else 0
print(f"Converting {out_key} to {dtype} failed, set to {default}.")
out[out_key] = np.repeat(default, len(out_data)).astype(dtype)
number_entries = len(max(out.values(), key=len))
for key, value in out.items():
if value.size != 1:
continue
out[key] = np.repeat(value, number_entries // value.size)
out["metadata"] = {}
out["atom_coordinate"] = np.transpose(
np.array(
[
result["atom_site"]["Cartn_x"],
result["atom_site"]["Cartn_y"],
result["atom_site"]["Cartn_z"],
],
dtype=np.float32,
)
)
detail_mapping = {
"resolution": ("em_3d_reconstruction", "resolution", np.nan),
"resolution_method": ("em_3d_reconstruction", "resolution_method", np.nan),
"method": ("exptl", "method", np.nan),
"electron_source": ("em_imaging", "electron_source", np.nan),
"illumination_mode": ("em_imaging", "illumination_mode", np.nan),
"microscope_model": ("em_imaging", "microscope_model", np.nan),
}
for out_key, (base_key, inner_key, default) in detail_mapping.items():
if base_key not in result:
continue
out["metadata"][out_key] = result[base_key].get(inner_key, default)
return out
@staticmethod
def _load_pdb(filename: str) -> Dict:
"""
Parses a Protein Data Bank (PDB) file and returns the data
in a dictionary format.
Parameters
----------
filename : str
The filename of the PDB file to load.
Returns
-------
dict
A dictionary of numpy arrays. Keys are the names of the PDB
coordinate section. In addition, some details about the parsed
structure are included. In case of conversion failure, the failing
attribute is set to 0 if its supposed to be an integer value.
"""
result = PDBParser(filename)
atom_site_mapping = {
"record_type": ("record_type", str),
"atom_serial_number": ("atom_serial_number", int),
"atom_name": ("atom_name", str),
"alternate_location_indicator": ("alternate_location_indicator", str),
"residue_name": ("residue_name", str),
"chain_identifier": ("chain_identifier", str),
"residue_sequence_number": ("residue_sequence_number", int),
"code_for_residue_insertion": ("code_for_residue_insertion", str),
"occupancy": ("occupancy", float),
"temperature_factor": ("temperature_factor", float),
"segment_identifier": ("segment_identifier", str),
"element_symbol": ("element_symbol", str),
"charge": ("charge", str),
}
out = {"metadata": result["details"]}
for out_key, (inner_key, dtype) in atom_site_mapping.items():
out_data = [x.strip() for x in result[inner_key]]
if dtype is int:
out_data = [0 if x == "." else int(x) for x in out_data]
try:
out[out_key] = np.asarray(out_data).astype(dtype)
except ValueError:
default = "." if dtype is str else 0
print(
f"Converting {out_key} to {dtype} failed. Setting {out_key} to {default}."
)
out[out_key] = np.repeat(default, len(out_data)).astype(dtype)
out["atom_coordinate"] = np.array(result["atom_coordinate"], dtype=np.float32)
return out
[docs]
def to_file(self, filename: str) -> None:
"""
Writes the :py:class:`Structure` instance to disk.
Parameters
----------
filename : str
The name of the file to be created. Supported extensions are
+------+-------------------------------------------------------------+
| .pdb | Creates a PDB file |
+------+-------------------------------------------------------------+
| .cif | Creates an mmCIF file |
+------+-------------------------------------------------------------+
Raises
------
ValueError
If the extension is not supported.
Examples
--------
>>> from importlib_resources import files
>>> from tempfile import NamedTemporaryFile
>>> from tme import Structure
>>> fname = str(files("tests.data").joinpath("Structures/5khe.cif"))
>>> oname = NamedTemporaryFile().name
>>> structure = Structure.from_file(filename=fname)
>>> structure.to_file(f"{oname}.cif") # Writes an mmCIF file to disk
>>> structure.to_file(f"{oname}.pdb") # Writes a PDB file to disk
"""
if np.any(np.vectorize(len)(self.chain_identifier) > 2):
warnings.warn("Chain identifiers longer than one will be shortened.")
_, file_extension = splitext(basename(filename.upper()))
if file_extension == ".PDB":
func = self._write_pdb
elif file_extension == ".CIF":
func = self._write_mmcif
else:
raise NotImplementedError(
"Could not determine structure filetype."
" Supported filetypes are mmcif (.cif) and pdb (.pdb)."
)
if self.atom_coordinate.shape[0] > 10**5 and func == self._write_pdb:
warnings.warn(
"The structure contains more than 100,000 atoms. Consider using mmcif."
)
with open(filename, mode="w", encoding="utf-8") as ofile:
ofile.writelines(func())
def _write_pdb(self) -> List[str]:
"""
Returns a PDB string representation of the structure instance.
Returns
-------
list
List containing PDB file coordine lines.
"""
data_out = []
for index in range(self.atom_coordinate.shape[0]):
x, y, z = self.atom_coordinate[index, :]
line = list(" " * 80)
line[0:6] = f"{self.record_type[index]:<6}"
line[6:11] = f"{self.atom_serial_number[index]:>5}"
line[12:16] = f"{self.atom_name[index]:<4}"
line[16] = f"{self.alternate_location_indicator[index]:<1}"
line[17:20] = f"{self.residue_name[index]:<3}"
line[21] = f"{self.chain_identifier[index][0]:<1}"
line[22:26] = f"{self.residue_sequence_number[index]:>4}"
line[26] = f"{self.code_for_residue_insertion[index]:<1}"
line[30:38] = f"{x:>8.3f}"
line[38:46] = f"{y:>8.3f}"
line[46:54] = f"{z:>8.3f}"
line[54:60] = f"{self.occupancy[index]:>6.2f}"
line[60:66] = f"{self.temperature_factor[index]:>6.2f}"
line[72:76] = f"{self.segment_identifier[index]:>4}"
line[76:78] = f"{self.element_symbol[index]:<2}"
line[78:80] = f"{self.charge[index]:>2}"
data_out.append("".join(line))
data_out.append("END")
data_out = "\n".join(data_out)
return data_out
def _write_mmcif(self) -> List[str]:
"""
Returns a MMCIF string representation of the structure instance.
Returns
-------
list
List containing MMCIF file coordinate lines.
"""
model_num, entity_id = 1, 1
data = {
"group_PDB": [],
"id": [],
"type_symbol": [],
"label_atom_id": [],
"label_alt_id": [],
"label_comp_id": [],
"label_asym_id": [],
"label_entity_id": [],
"label_seq_id": [],
"pdbx_PDB_ins_code": [],
"Cartn_x": [],
"Cartn_y": [],
"Cartn_z": [],
"occupancy": [],
"B_iso_or_equiv": [],
"pdbx_formal_charge": [],
"auth_seq_id": [],
"auth_comp_id": [],
"auth_asym_id": [],
"auth_atom_id": [],
"pdbx_PDB_model_num": [],
}
for index in range(self.atom_coordinate.shape[0]):
x, y, z = self.atom_coordinate[index, :]
data["group_PDB"].append(self.record_type[index])
data["id"].append(str(self.atom_serial_number[index]))
data["type_symbol"].append(self.element_symbol[index])
data["label_atom_id"].append(self.atom_name[index])
data["label_alt_id"].append(self.alternate_location_indicator[index])
data["label_comp_id"].append(self.residue_name[index])
data["label_asym_id"].append(self.chain_identifier[index][0])
data["label_entity_id"].append(str(entity_id))
data["label_seq_id"].append(str(self.residue_sequence_number[index]))
data["pdbx_PDB_ins_code"].append(self.code_for_residue_insertion[index])
data["Cartn_x"].append(f"{x:.3f}")
data["Cartn_y"].append(f"{y:.3f}")
data["Cartn_z"].append(f"{z:.3f}")
data["occupancy"].append(f"{self.occupancy[index]:.2f}")
data["B_iso_or_equiv"].append(f"{self.temperature_factor[index]:.2f}")
data["pdbx_formal_charge"].append(self.charge[index])
data["auth_seq_id"].append(str(self.residue_sequence_number[index]))
data["auth_comp_id"].append(self.residue_name[index])
data["auth_asym_id"].append(self.chain_identifier[index][0])
data["auth_atom_id"].append(self.atom_name[index])
data["pdbx_PDB_model_num"].append(str(model_num))
output_data = {"atom_site": data}
original_file = self.metadata.get("filepath", "")
try:
new_data = {k: v for k, v in MMCIFParser(original_file).items()}
index = self.atom_serial_number - 1
new_data["atom_site"] = {
k: [v[i] for i in index] for k, v in new_data["atom_site"].items()
}
new_data["atom_site"]["Cartn_x"] = data["Cartn_x"]
new_data["atom_site"]["Cartn_y"] = data["Cartn_y"]
new_data["atom_site"]["Cartn_z"] = data["Cartn_z"]
output_data = new_data
except Exception:
pass
ret = ""
for category, subdict in output_data.items():
if not len(subdict):
continue
ret += "#\n"
is_loop = isinstance(subdict[list(subdict.keys())[0]], list)
if not is_loop:
for k in subdict:
ret += f"_{category}.{k}\t{subdict[k]}\n"
else:
ret += "loop_\n"
ret += "".join([f"_{category}.{k}\n" for k in subdict])
subdict = {
k: [_format_string(s) for s in v] for k, v in subdict.items()
}
key_length = {
key: len(max(value, key=lambda x: len(x), default=""))
for key, value in subdict.items()
}
padded_subdict = {
key: [s.ljust(key_length[key] + 1) for s in values]
for key, values in subdict.items()
}
data = [
"".join([str(x) for x in content])
for content in zip(*padded_subdict.values())
]
ret += "\n".join([entry for entry in data]) + "\n"
return ret
[docs]
def subset_by_chain(self, chain: str = None) -> "Structure":
"""
Return a subset of the structure that contains only atoms belonging to
a specific chain. If no chain is specified, all chains are returned.
Parameters
----------
chain : str, optional
The chain identifier. If multiple chains should be selected they need
to be a comma separated string, e.g. 'A,B,CE'. If chain None,
all chains are returned. Default is None.
Returns
-------
:py:class:`Structure`
A subset of the class instance containing only the specified chains.
Raises
------
ValueError
If none of the specified chains exist.
Examples
--------
>>> from importlib_resources import files
>>> from tme import Structure
>>> fname = str(files("tests.data").joinpath("Structures/5khe.cif"))
>>> structure = Structure.from_file(filename=fname)
>>> structure.subset_by_chain(chain="A") # Keep A
>>> structure.subset_by_chain(chain="A,B") # Keep A and B
>>> structure.subset_by_chain(chain="B,C") # Keep B, C does not exist
"""
chain = np.unique(self.chain_identifier) if chain is None else chain.split(",")
keep = np.in1d(self.chain_identifier, chain)
return self[keep]
[docs]
def subset_by_range(
self,
start: int,
stop: int,
chain: str = None,
) -> "Structure":
"""
Return a subset of the structure within a specific range of residues.
Parameters
----------
start : int
The starting residue sequence number.
stop : int
The ending residue sequence number.
chain : str, optional
The chain identifier. If multiple chains should be selected they need
to be a comma separated string, e.g. 'A,B,CE'. If chain None,
all chains are returned. Default is None.
Returns
-------
:py:class:`Structure`
A subset of the original structure within the specified residue range.
Raises
------
ValueError
If none of the specified residue chain combinations exist.
Examples
--------
>>> from importlib_resources import files
>>> from tme import Structure
>>> fname = str(files("tests.data").joinpath("Structures/5khe.cif"))
>>> structure = Structure.from_file(filename=fname)
>>> structure.subset_by_range(chain="A",start=150,stop=180)
"""
ret = self.subset_by_chain(chain=chain)
keep = np.logical_and(
ret.residue_sequence_number >= start, ret.residue_sequence_number <= stop
)
return ret[keep]
[docs]
def center_of_mass(self) -> NDArray:
"""
Calculate the center of mass of the structure.
Returns
-------
NDArray
The center of mass of the structure.
Examples
--------
>>> from importlib_resources import files
>>> from tme import Structure
>>> fname = str(files("tests.data").joinpath("Structures/5khe.cif"))
>>> structure = Structure.from_file(filename=fname)
>>> structure.center_of_mass()
array([-0.89391639, 29.94908928, -2.64736741])
"""
weights = [self._elements[atype].atomic_weight for atype in self.element_symbol]
return np.dot(self.atom_coordinate.T, weights) / np.sum(weights)
[docs]
def centered(self) -> Tuple["Structure", NDArray]:
"""
Shifts the structure analogous to :py:meth:`tme.density.Density.centered`.
Returns
-------
Structure
A copy of the class instance whose data center of mass is in the
center of the data array.
NDArray
The coordinate translation.
See Also
--------
:py:meth:`tme.density.Density.centered`
Examples
--------
>>> from importlib_resources import files
>>> from tme import Structure
>>> fname = str(files("tests.data").joinpath("Structures/5khe.cif"))
>>> structure = Structure.from_file(filename=fname)
>>> centered_structure, translation = structure.centered()
>>> translation
array([34.89391639, 4.05091072, 36.64736741])
"""
center_of_mass = self.center_of_mass()
enclosing_box = minimum_enclosing_box(coordinates=self.atom_coordinate.T)
shift = np.subtract(np.divide(enclosing_box, 2), center_of_mass)
transformed_structure = self.rigid_transform(
translation=shift, rotation_matrix=np.eye(shift.size)
)
return transformed_structure, shift
def _coordinate_to_position(
self,
shape: Tuple[int],
sampling_rate: Tuple[float],
origin: Tuple[float],
) -> (NDArray, Tuple[str], Tuple[int], float, Tuple[float]):
"""
Converts coordinates to positions.
Parameters
----------
shape : Tuple[int,]
The desired shape of the output array.
sampling_rate : float
The sampling rate of the output array in unit of self.atom_coordinate.
origin : Tuple[float,]
The origin of the coordinate system.
Returns
-------
Tuple[NDArray, List[str], Tuple[int, ], float, Tuple[float,]]
Returns positions, atom_types, shape, sampling_rate, and origin.
"""
coordinates = self.atom_coordinate.copy()
atom_types = self.element_symbol.copy()
# positions are in x, y, z map is z, y, x
coordinates = coordinates[:, ::-1]
sampling_rate = 1 if sampling_rate is None else sampling_rate
adjust_origin = origin is not None and shape is None
origin = coordinates.min(axis=0) if origin is None else origin
positions = (coordinates - origin) / sampling_rate
positions = np.rint(positions).astype(int)
if adjust_origin:
left_shift = positions.min(axis=0)
positions -= left_shift
shape = positions.max(axis=0) + 1
origin = origin + np.multiply(left_shift, sampling_rate)
if shape is None:
shape = positions.max(axis=0) + 1
valid_positions = np.sum(
np.logical_and(positions < shape, positions >= 0), axis=1
)
positions = positions[valid_positions == positions.shape[1], :]
atom_types = atom_types[valid_positions == positions.shape[1]]
self.metadata["nAtoms_outOfBound"] = 0
if positions.shape[0] != coordinates.shape[0]:
out_of_bounds = coordinates.shape[0] - positions.shape[0]
print(f"{out_of_bounds}/{coordinates.shape[0]} atoms were out of bounds.")
self.metadata["nAtoms_outOfBound"] = out_of_bounds
return positions, atom_types, shape, sampling_rate, origin
def _position_to_vdw_sphere(
self,
positions: Tuple[float],
atoms: Tuple[str],
sampling_rate: Tuple[float],
volume: NDArray,
) -> None:
"""
Updates a volume with van der Waals spheres.
Parameters
----------
positions : Tuple[float, float, float]
The positions of the atoms.
atoms : Tuple[str]
The types of the atoms.
sampling_rate : float
The desired sampling rate in unit of self.atom_coordinate of the
output array.
volume : NDArray
The volume to update.
"""
index_dict, vdw_rad, shape = {}, {}, volume.shape
for atom_index, atom_position in enumerate(positions):
atom_type = atoms[atom_index]
if atom_type not in index_dict.keys():
atom_vdwr = np.ceil(
np.divide(self._elements[atom_type].vdwr, (sampling_rate * 100))
).astype(int)
vdw_rad[atom_type] = atom_vdwr
atom_slice = tuple(slice(-k, k + 1) for k in atom_vdwr)
distances = np.linalg.norm(
np.divide(
np.mgrid[atom_slice],
atom_vdwr.reshape((-1,) + (1,) * volume.ndim),
),
axis=0,
)
index_dict[atom_type] = (distances <= 1).astype(volume.dtype)
footprint = index_dict[atom_type]
start = np.maximum(np.subtract(atom_position, vdw_rad[atom_type]), 0)
stop = np.minimum(np.add(atom_position, vdw_rad[atom_type]) + 1, shape)
volume_slice = tuple(slice(*coord) for coord in zip(start, stop))
start_index = np.maximum(-np.subtract(atom_position, vdw_rad[atom_type]), 0)
stop_index = np.add(
footprint.shape,
np.minimum(
np.subtract(shape, np.add(atom_position, vdw_rad[atom_type]) + 1), 0
),
)
index_slice = tuple(slice(*coord) for coord in zip(start_index, stop_index))
volume[volume_slice] += footprint[index_slice]
def _position_to_scattering_factors(
self,
positions: NDArray,
atoms: NDArray,
sampling_rate: NDArray,
volume: NDArray,
lowpass_filter: bool = True,
downsampling_factor: float = 1.35,
source: str = "peng1995",
) -> None:
"""
Updates a volume with scattering factors.
Parameters
----------
positions : NDArray
The positions of the atoms.
atoms : NDArray
Element symbols.
sampling_rate : float
Sampling rate that was used to convert coordinates to positions.
volume : NDArray
The volume to update.
lowpass_filter : NDArray
Whether the scattering factors should be lowpass filtered.
downsampling_factor : NDArray
Downsampling factor for scattering factor computation.
source : str
Which scattering factors to use
Reference
---------
https://github.com/I2PC/xmipp.
"""
scattering_profiles, shape = dict(), volume.shape
for atom_index, point in enumerate(positions):
if atoms[atom_index] not in scattering_profiles:
spline = atom_profile(
atom=atoms[atom_index],
M=downsampling_factor,
method=source,
lfilter=lowpass_filter,
)
scattering_profiles.update({atoms[atom_index]: spline})
atomic_radius = np.divide(
self._elements[atoms[atom_index]].vdwr, sampling_rate * 100
)
starts = np.maximum(np.ceil(point - atomic_radius), 0).astype(int)
stops = np.minimum(np.floor(point + atomic_radius), shape).astype(int)
grid_index = np.meshgrid(
*[range(start, stop) for start, stop in zip(starts, stops)]
)
distances = np.einsum(
"aijk->ijk",
np.array([(grid_index[i] - point[i]) ** 2 for i in range(len(point))]),
dtype=np.float64,
)
distances = np.sqrt(distances)
if not len(distances):
grid_index, distances = point, 0
np.add.at(
volume,
tuple(grid_index),
scattering_profiles[atoms[atom_index]](distances),
)
@staticmethod
def _position_to_molmap(
positions: NDArray,
weights: Tuple[float],
resolution: float = 4,
sigma_factor: float = 1 / (np.pi * np.sqrt(2)),
cutoff_value: float = 4.0,
sampling_rate: float = None,
) -> NDArray:
"""
Simulates electron densities analogous to Chimera's molmap function [1]_.
Parameters
----------
positions : NDArray
Array containing atomic positions in z,y,x format (n,d).
weights : tuple of float
The weights to use for the entries in positions.
resolution : float, optional
The product of resolution and sigma_factor gives the sigma used to
compute the discretized Gaussian.
sigma_factor : float, optional
The factor used with resolution to compute sigma. Default is 1 / (π√2).
cutoff_value : float, optional
The cutoff value for the Gaussian kernel. Default is 4.0.
sampling_rate : float, optional
Sampling rate along each dimension. One third of resolution by default.
References
----------
..[1] https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/midas/molmap.html
Returns
-------
NDArray
A numpy array containing the simulated electron densities.
"""
if sampling_rate is None:
sampling_rate = resolution / 3
pad = int(3 * resolution)
sigma = sigma_factor * resolution
sigma_grid = sigma / sampling_rate
# Limit padding to numerically stable values
smax = np.max(sigma_grid)
arr = np.arange(0, pad)
gaussian = (
np.exp(-0.5 * (arr / smax) ** 2)
* np.power(2 * np.pi, -1.5)
* np.power(sigma, -3.0)
)
pad_cutoff = np.max(arr[gaussian > 1e-8])
if arr.size != 0:
pad = int(pad_cutoff) + 1
positions = positions[:, ::-1]
origin = positions.min(axis=0) - pad * sampling_rate
positions = np.rint(np.divide((positions - origin), sampling_rate)).astype(int)
shape = positions.max(axis=0).astype(int) + pad + 1
out = np.zeros(shape, dtype=np.float32)
np.add.at(out, tuple(positions.T), weights)
out = Preprocessor().gaussian_filter(
template=out, sigma=sigma_grid, cutoff_value=cutoff_value
)
return out, origin
def _get_atom_weights(
self, atoms: Tuple[str] = None, weight_type: str = "atomic_weight"
) -> Tuple[float]:
"""
Returns weights of individual atoms according to a specified weight type.
Parameters
----------
atoms : Tuple of strings, optional
The atoms to get the weights for. If None, weights for all atoms
are used. Default is None.
weight_type : str, optional
The type of weights to return. This can either be 'atomic_weight',
'atomic_number', or 'van_der_waals_radius'. Default is 'atomic_weight'.
Returns
-------
List[float]
A list containing the weights of the atoms.
"""
atoms = self.element_symbol if atoms is None else atoms
match weight_type:
case "atomic_weight":
weight = [self._elements[atom].atomic_weight for atom in atoms]
case "atomic_number":
weight = [self._elements[atom].atomic_number for atom in atoms]
case _:
raise NotImplementedError(
"weight_type can either be 'atomic_weight' or 'atomic_number'"
)
return weight
[docs]
def to_volume(
self,
shape: Tuple[int] = None,
sampling_rate: Tuple[float] = None,
origin: Tuple[float] = None,
chain: str = None,
weight_type: str = "atomic_weight",
weight_type_args: Dict = dict(),
) -> Tuple[NDArray, NDArray, NDArray]:
"""
Maps class instance to a volume.
Parameters
----------
shape : tuple of ints, optional
Output array shape in (z,y,x) form.
sampling_rate : tuple of float, optional
Sampling rate of the output array in units of
:py:attr:`Structure.atom_coordinate`
origin : tuple of floats, optional
Origin of the coordinate system in (z,y,x) form.
chain : str, optional
Chains to be included. Either single or comma separated string of chains.
Defaults to None which returns all chains.
weight_type : str, optional
Weight given to individual atoms. Supported weights are:
+----------------------------+---------------------------------------+
| atomic_weight | Using element weight point mass |
+----------------------------+---------------------------------------+
| atomic_number | Using atomic number point mass |
+----------------------------+---------------------------------------+
| gaussian | Using element weighted Gaussian mass |
+----------------------------+---------------------------------------+
| van_der_waals_radius | Using binary van der waal spheres |
+----------------------------+---------------------------------------+
| scattering_factors | Using experimental scattering factors |
+----------------------------+---------------------------------------+
| lowpass_scattering_factors | Lowpass filtered scattering_factors |
+----------------------------+---------------------------------------+
weight_type_args : dict, optional
Additional arguments used for individual weight_types. `gaussian`
accepts ``resolution``, `scattering` accepts ``method``.
Returns
-------
Tuple[NDArray, NDArray, NDArray]
Volume, origin and sampling_rate.
Examples
--------
>>> from importlib_resources import files
>>> from tme import Structure
>>> fname = str(files("tests.data").joinpath("Structures/5khe.cif"))
>>> structure = Structure.from_file(filename=fname)
>>> vol, origin, sampling = structure.to_volume()
>>> vol.shape, origin, sampling
((59, 35, 53), array([-30.71, 12.42, -27.15]), array([1., 1., 1.]))
>>> vol, origin, sampling = structure.to_volume(sampling_rate=(2.2,1,3))
((27, 35, 18), array([-30.71, 12.42, -27.15]), array([2.2, 1. , 3. ]))
``sampling_rate`` and ``origin`` can be set to ensure correct alignment
with corresponding density maps such as the ones at EMDB. Analogous to
:py:meth:`Structure.subset_by_chain` only parts of the structure can be
mapped onto grids using a variety of weighting schemes
>>> structure.to_volume(weight_type="van_der_waals_radius")
>>> structure.to_volume(
>>> weight_type="lowpass_scattering_factors",
>>> method_args={"source" : "dt1969", "downsampling_factor" : 1.35},
>>> )
"""
_weight_types = {
"gaussian",
"atomic_weight",
"atomic_number",
"van_der_waals_radius",
"scattering_factors",
"lowpass_scattering_factors",
}
_weight_string = ",".join([f"'{x}'" for x in _weight_types])
if weight_type not in _weight_types:
raise NotImplementedError(f"weight_type needs to be in {_weight_string}")
if sampling_rate is None:
sampling_rate = np.ones(self.atom_coordinate.shape[1])
sampling_rate = np.array(sampling_rate)
if sampling_rate.size == 1:
sampling_rate = np.repeat(sampling_rate, self.atom_coordinate.shape[1])
elif sampling_rate.size != self.atom_coordinate.shape[1]:
raise ValueError(
"sampling_rate should either be single value of array with"
f"size {self.atom_coordinate.shape[1]}."
)
temp = self.subset_by_chain(chain=chain)
positions, atoms, _shape, sampling_rate, origin = temp._coordinate_to_position(
shape=shape, sampling_rate=sampling_rate, origin=origin
)
volume = np.zeros(_shape, dtype=np.float32)
if weight_type in ("atomic_weight", "atomic_number"):
weights = temp._get_atom_weights(atoms=atoms, weight_type=weight_type)
np.add.at(volume, tuple(positions.T), weights)
elif weight_type == "van_der_waals_radius":
self._position_to_vdw_sphere(positions, atoms, sampling_rate, volume)
elif weight_type == "scattering_factors":
self._position_to_scattering_factors(
positions,
atoms,
sampling_rate,
volume,
lowpass_filter=False,
**weight_type_args,
)
elif weight_type == "lowpass_scattering_factors":
self._position_to_scattering_factors(
positions,
atoms,
sampling_rate,
volume,
lowpass_filter=True,
**weight_type_args,
)
elif weight_type == "gaussian":
volume, origin = self._position_to_molmap(
positions=temp.atom_coordinate,
weights=temp._get_atom_weights(
atoms=atoms, weight_type="atomic_number"
),
sampling_rate=sampling_rate,
**weight_type_args,
)
self.metadata.update(temp.metadata)
return volume, origin, sampling_rate
[docs]
@classmethod
def compare_structures(
cls,
structure1: "Structure",
structure2: "Structure",
origin: NDArray = None,
sampling_rate: float = None,
weighted: bool = False,
) -> float:
"""
Compute root mean square deviation (RMSD) between two structures with the
same number of atoms.
Parameters
----------
structure1, structure2 : :py:class:`Structure`
Structure instances to compare.
origin : tuple of floats, optional
Coordinate system origin. For computing RMSD on discretized grids.
sampling_rate : tuple of floats, optional
Sampling rate in units of :py:attr:`atom_coordinate`.
For computing RMSD on discretized grids.
weighted : bool, optional
Whether atoms should be weighted acoording to their atomic weight.
Returns
-------
float
Root Mean Square Deviation between input structures.
Examples
--------
>>> from importlib_resources import files
>>> from tme.matching_utils import get_rotation_matrices
>>> from tme import Structure
>>> fname = str(files("tests.data").joinpath("Structures/5khe.cif"))
>>> structure = Structure.from_file(filename=fname)
>>> transformed = structure.rigid_transform(
>>> rotation_matrix = get_rotation_matrices(60)[2],
>>> translation = (0, 1, -5)
>>> )
>>> Structure.compare_structures(structure, transformed)
31.35238
>>> Structure.compare_structures(structure, structure)
0.0
"""
if origin is None:
origin = np.zeros(structure1.atom_coordinate.shape[1])
coordinates1 = structure1.atom_coordinate
coordinates2 = structure2.atom_coordinate
atoms1, atoms2 = structure1.element_symbol, structure2.element_symbol
if sampling_rate is not None:
coordinates1 = np.rint(
np.divide(np.subtract(coordinates1, origin), sampling_rate)
).astype(int)
coordinates2 = np.rint(
np.divide(np.subtract(coordinates2, origin), sampling_rate)
).astype(int)
weights1 = np.ones_like(structure1.atom_coordinate.shape[0])
weights2 = np.ones_like(structure2.atom_coordinate.shape[0])
if weighted:
weights1 = np.array(structure1._get_atom_weights(atoms=atoms1))
weights2 = np.array(structure2._get_atom_weights(atoms=atoms2))
if not np.allclose(coordinates1.shape, coordinates2.shape):
raise ValueError(
"Input structures need to have the same number of coordinates."
)
if not np.allclose(weights1.shape, weights2.shape):
raise ValueError("Input structures need to have the same number of atoms.")
squared_diff = np.sum(np.square(coordinates1 - coordinates2), axis=1)
weighted_quared_diff = squared_diff * ((weights1 + weights2) / 2)
rmsd = np.sqrt(np.mean(weighted_quared_diff))
return rmsd
[docs]
@classmethod
def align_structures(
cls,
structure1: "Structure",
structure2: "Structure",
origin: NDArray = None,
sampling_rate: float = None,
weighted: bool = False,
) -> Tuple["Structure", float]:
"""
Align ``structure2`` to ``structure1`` using the Kabsch Algorithm. Both
structures need to have the same number of atoms.
Parameters
----------
structure1, structure2 : :py:class:`Structure`
Structure instances to align.
origin : tuple of floats, optional
Coordinate system origin. For computing RMSD on discretized grids.
sampling_rate : tuple of floats, optional
Sampling rate in units of :py:attr:`atom_coordinate`.
For computing RMSD on discretized grids.
weighted : bool, optional
Whether atoms should be weighted by their atomic weight.
Returns
-------
:py:class:`Structure`
``structure2`` aligned to ``structure1``.
float
Alignment RMSD
Examples
--------
>>> from importlib_resources import files
>>> from tme import Structure
>>> from tme.matching_utils import get_rotation_matrices
>>> fname = str(files("tests.data").joinpath("Structures/5khe.cif"))
>>> structure = Structure.from_file(filename=fname)
>>> transformed = structure.rigid_transform(
>>> rotation_matrix = get_rotation_matrices(60)[2],
>>> translation = (0, 1, -5)
>>> )
>>> aligned, rmsd = Structure.align_structures(structure, transformed)
Initial RMSD: 31.07189 - Final RMSD: 0.00000
"""
if origin is None:
origin = np.minimum(
structure1.atom_coordinate.min(axis=0),
structure2.atom_coordinate.min(axis=0),
).astype(int)
initial_rmsd = cls.compare_structures(
structure1=structure1,
structure2=structure2,
origin=origin,
sampling_rate=sampling_rate,
weighted=weighted,
)
reference = structure1.atom_coordinate.copy()
query = structure2.atom_coordinate.copy()
if sampling_rate is not None:
reference, atoms1, shape, _, _ = structure1._coordinate_to_position(
shape=None, sampling_rate=sampling_rate, origin=origin
)
query, atoms2, shape, _, _ = structure2._coordinate_to_position(
shape=None, sampling_rate=sampling_rate, origin=origin
)
reference_mean = reference.mean(axis=0)
query_mean = query.mean(axis=0)
reference = reference - reference_mean
query = query - query_mean
corr = np.dot(query.T, reference)
U, S, Vh = np.linalg.svd(corr)
rotation = np.dot(Vh.T, U.T).T
if np.linalg.det(rotation) < 0:
Vh[2, :] *= -1
rotation = np.dot(Vh.T, U.T).T
translation = reference_mean - np.dot(query_mean, rotation)
temp = structure1.copy()
temp.atom_coordinate = reference + reference_mean
ret = structure2.copy()
ret.atom_coordinate = np.dot(query + query_mean, rotation) + translation
final_rmsd = cls.compare_structures(
structure1=temp,
structure2=ret,
origin=origin,
sampling_rate=None,
weighted=weighted,
)
print(f"Initial RMSD: {initial_rmsd:.5f} - Final RMSD: {final_rmsd:.5f}")
return ret, final_rmsd
@dataclass(frozen=True, repr=True)
class Elements:
"""Lookup table for chemical elements."""
Atom = namedtuple(
"Atom",
[
"atomic_number",
"atomic_radius",
"lattice_constant",
"lattice_structure",
"vdwr",
"covalent_radius_bragg",
"atomic_weight",
],
)
_default = Atom(0, 0, 0, "Atom does not exist in ressource.", 0, 0, 0)
_elements = {
"H": Atom(1, 25, 3.75, "HEX", 110, np.nan, 1.008),
"HE": Atom(2, 120, 3.57, "HEX", 140, np.nan, 4.002602),
"LI": Atom(3, 145, 3.49, "BCC", 182, 150, 6.94),
"BE": Atom(4, 105, 2.29, "HEX", 153, 115, 9.0121831),
"B": Atom(5, 85, 8.73, "TET", 192, np.nan, 10.81),
"C": Atom(6, 70, 3.57, "DIA", 170, 77, 12.011),
"N": Atom(7, 65, 4.039, "HEX", 155, 65, 14.007),
"O": Atom(8, 60, 6.83, "CUB", 152, 65, 15.999),
"F": Atom(9, 50, np.nan, "MCL", 147, 67, 18.998403163),
"NE": Atom(10, 160, 4.43, "FCC", 154, np.nan, 20.1797),
"NA": Atom(11, 180, 4.23, "BCC", 227, 177, 22.98976928),
"MG": Atom(12, 150, 3.21, "HEX", 173, 142, 24.305),
"AL": Atom(13, 125, 4.05, "FCC", 184, 135, 26.9815385),
"SI": Atom(14, 110, 5.43, "DIA", 210, 117, 28.085),
"P": Atom(15, 100, 7.17, "CUB", 180, np.nan, 30.973761998),
"S": Atom(16, 100, 10.47, "ORC", 180, 102, 32.06),
"CL": Atom(17, 100, 6.24, "ORC", 175, 105, 35.45),
"AR": Atom(18, 71, 5.26, "FCC", 188, np.nan, 39.948),
"K": Atom(19, 220, 5.23, "BCC", 275, 207, 39.0983),
"CA": Atom(20, 180, 5.58, "FCC", 231, 170, 40.078),
"SC": Atom(21, 160, 3.31, "HEX", 215, np.nan, 44.955908),
"TI": Atom(22, 140, 2.95, "HEX", 211, 140, 47.867),
"V": Atom(23, 135, 3.02, "BCC", 207, np.nan, 50.9415),
"CR": Atom(24, 140, 2.88, "BCC", 206, 140, 51.9961),
"MN": Atom(25, 140, 8.89, "CUB", 205, 147, 54.938044),
"FE": Atom(26, 140, 2.87, "BCC", 204, 140, 55.845),
"CO": Atom(27, 135, 2.51, "HEX", 200, 137, 58.933194),
"NI": Atom(28, 135, 3.52, "FCC", 197, 135, 58.6934),
"CU": Atom(29, 135, 3.61, "FCC", 196, 137, 63.546),
"ZN": Atom(30, 135, 2.66, "HEX", 201, 132, 65.38),
"GA": Atom(31, 130, 4.51, "ORC", 187, np.nan, 69.723),
"GE": Atom(32, 125, 5.66, "DIA", 211, np.nan, 72.63),
"AS": Atom(33, 115, 4.13, "RHL", 185, 126, 74.921595),
"SE": Atom(34, 115, 4.36, "HEX", 190, 117, 78.971),
"BR": Atom(35, 115, 6.67, "ORC", 185, 119, 79.904),
"KR": Atom(36, np.nan, 5.72, "FCC", 202, np.nan, 83.798),
"RB": Atom(37, 235, 5.59, "BCC", 303, 225, 85.4678),
"SR": Atom(38, 200, 6.08, "FCC", 249, 195, 87.62),
"Y": Atom(39, 180, 3.65, "HEX", 232, np.nan, 88.90584),
"ZR": Atom(40, 155, 3.23, "HEX", 223, np.nan, 91.224),
"NB": Atom(41, 145, 3.3, "BCC", 218, np.nan, 92.90637),
"MO": Atom(42, 145, 3.15, "BCC", 217, np.nan, 95.95),
"TC": Atom(43, 135, 2.74, "HEX", 216, np.nan, 97.90721),
"RU": Atom(44, 130, 2.7, "HEX", 213, np.nan, 101.07),
"RH": Atom(45, 135, 3.8, "FCC", 210, np.nan, 102.9055),
"PD": Atom(46, 140, 3.89, "FCC", 210, np.nan, 106.42),
"AG": Atom(47, 160, 4.09, "FCC", 211, 177, 107.8682),
"CD": Atom(48, 155, 2.98, "HEX", 218, 160, 112.414),
"IN": Atom(49, 155, 4.59, "TET", 193, np.nan, 114.818),
"SN": Atom(50, 145, 5.82, "TET", 217, 140, 118.71),
"SB": Atom(51, 145, 4.51, "RHL", 206, 140, 121.76),
"TE": Atom(52, 140, 4.45, "HEX", 206, 133, 127.6),
"I": Atom(53, 140, 7.72, "ORC", 198, 140, 126.90447),
"XE": Atom(54, np.nan, 6.2, "FCC", 216, np.nan, 131.293),
"CS": Atom(55, 260, 6.05, "BCC", 343, 237, 132.90545196),
"BA": Atom(56, 215, 5.02, "BCC", 268, 210, 137.327),
"LA": Atom(57, 195, 3.75, "HEX", 243, np.nan, 138.90547),
"CE": Atom(58, 185, 5.16, "FCC", 242, np.nan, 140.116),
"PR": Atom(59, 185, 3.67, "HEX", 240, np.nan, 140.90766),
"ND": Atom(60, 185, 3.66, "HEX", 239, np.nan, 144.242),
"PM": Atom(61, 185, np.nan, "", 238, np.nan, 144.91276),
"SM": Atom(62, 185, 9, "RHL", 236, np.nan, 150.36),
"EU": Atom(63, 185, 4.61, "BCC", 235, np.nan, 151.964),
"GD": Atom(64, 180, 3.64, "HEX", 234, np.nan, 157.25),
"TB": Atom(65, 175, 3.6, "HEX", 233, np.nan, 158.92535),
"DY": Atom(66, 175, 3.59, "HEX", 231, np.nan, 162.5),
"HO": Atom(67, 175, 3.58, "HEX", 230, np.nan, 164.93033),
"ER": Atom(68, 175, 3.56, "HEX", 229, np.nan, 167.259),
"TM": Atom(69, 175, 3.54, "HEX", 227, np.nan, 168.93422),
"YB": Atom(70, 175, 5.49, "FCC", 226, np.nan, 173.045),
"LU": Atom(71, 175, 3.51, "HEX", 224, np.nan, 174.9668),
"HF": Atom(72, 155, 3.2, "HEX", 223, np.nan, 178.49),
"TA": Atom(73, 145, 3.31, "BCC", 222, np.nan, 180.94788),
"W": Atom(74, 135, 3.16, "BCC", 218, np.nan, 183.84),
"RE": Atom(75, 135, 2.76, "HEX", 216, np.nan, 186.207),
"OS": Atom(76, 130, 2.74, "HEX", 216, np.nan, 190.23),
"IR": Atom(77, 135, 3.84, "FCC", 213, np.nan, 192.217),
"PT": Atom(78, 135, 3.92, "FCC", 213, np.nan, 195.084),
"AU": Atom(79, 135, 4.08, "FCC", 214, np.nan, 196.966569),
"HG": Atom(80, 150, 2.99, "RHL", 223, np.nan, 200.592),
"TL": Atom(81, 190, 3.46, "HEX", 196, 190, 204.38),
"PB": Atom(82, 180, 4.95, "FCC", 202, np.nan, 207.2),
"BI": Atom(83, 160, 4.75, "RHL", 207, 148, 208.9804),
"PO": Atom(84, 190, 3.35, "SC", 197, np.nan, 209),
"AT": Atom(85, np.nan, np.nan, "", 202, np.nan, 210),
"RN": Atom(86, np.nan, np.nan, "FCC", 220, np.nan, 222),
"FR": Atom(87, np.nan, np.nan, "BCC", 348, np.nan, 223),
"RA": Atom(88, 215, np.nan, "", 283, np.nan, 226),
"AC": Atom(89, 195, 5.31, "FCC", 247, np.nan, 227),
"TH": Atom(90, 180, 5.08, "FCC", 245, np.nan, 232.0377),
"PA": Atom(91, 180, 3.92, "TET", 243, np.nan, 231.03588),
"U": Atom(92, 175, 2.85, "ORC", 241, np.nan, 238.02891),
"NP": Atom(93, 175, 4.72, "ORC", 239, np.nan, 237),
"PU": Atom(94, 175, np.nan, "MCL", 243, np.nan, 244),
"AM": Atom(95, 175, np.nan, "", 244, np.nan, 243),
"CM": Atom(96, np.nan, np.nan, "", 245, np.nan, 247),
"BK": Atom(97, np.nan, np.nan, "", 244, np.nan, 247),
"CF": Atom(98, np.nan, np.nan, "", 245, np.nan, 251),
"ES": Atom(99, np.nan, np.nan, "", 245, np.nan, 252),
"FM": Atom(100, np.nan, np.nan, "", 245, np.nan, 257),
"MD": Atom(101, np.nan, np.nan, "", 246, np.nan, 258),
"NO": Atom(102, np.nan, np.nan, "", 246, np.nan, 259),
"LR": Atom(103, np.nan, np.nan, "", 246, np.nan, 262),
"RF": Atom(104, np.nan, np.nan, "", np.nan, np.nan, 267),
"DB": Atom(105, np.nan, np.nan, "", np.nan, np.nan, 268),
"SG": Atom(106, np.nan, np.nan, "", np.nan, np.nan, 271),
"BH": Atom(107, np.nan, np.nan, "", np.nan, np.nan, 274),
"HS": Atom(108, np.nan, np.nan, "", np.nan, np.nan, 269),
"MT": Atom(109, np.nan, np.nan, "", np.nan, np.nan, 276),
"DS": Atom(110, np.nan, np.nan, "", np.nan, np.nan, 281),
"RG": Atom(111, np.nan, np.nan, "", np.nan, np.nan, 281),
"CN": Atom(112, np.nan, np.nan, "", np.nan, np.nan, 285),
"NH": Atom(113, np.nan, np.nan, "", np.nan, np.nan, 286),
"FL": Atom(114, np.nan, np.nan, "", np.nan, np.nan, 289),
"MC": Atom(115, np.nan, np.nan, "", np.nan, np.nan, 288),
"LV": Atom(116, np.nan, np.nan, "", np.nan, np.nan, 293),
"TS": Atom(117, np.nan, np.nan, "", np.nan, np.nan, 294),
"OG": Atom(118, np.nan, np.nan, "", np.nan, np.nan, 294),
}
def __getitem__(self, key: str):
"""
Retrieve a value from the internal data using a given key.
Parameters
----------
key : str
Key to retrieve the corresponding value for.
Returns
-------
namedtuple
The Atom tuple associated with the provided key.
"""
return self._elements.get(key, self._default)
def _format_string(string: str) -> str:
"""
Formats a string by adding quotation marks if it contains white spaces.
Parameters
----------
string : str
Input string to be formatted.
Returns
-------
str
Formatted string with added quotation marks if needed.
"""
if " " in string:
return f"'{string}'"
# Occurs e.g. for C1' atoms. The trailing whitespace is necessary.
if string.count("'") == 1:
return f'"{string}"'
return string