import logging
from pathlib import Path
from typing import Any, Dict, Optional, Tuple, Union # noqa: F401
from drugforge.data.backend.openeye import (
load_openeye_pdb,
oechem,
oemol_to_pdb_string,
pdb_string_to_oemol,
split_openeye_mol,
)
from drugforge.data.schema.identifiers import TargetIdentifiers
from drugforge.data.schema.schema_base import (
DataModelAbstractBase,
DataStorageType,
MoleculeFilter,
check_strings_for_equality_with_exclusion,
schema_dict_get_val_overload,
write_file_directly,
)
from pydantic import Field, model_validator
logger = logging.getLogger(__name__)
[docs]
class InvalidTargetError(ValueError): ... # noqa: E701
[docs]
class Target(DataModelAbstractBase):
"""
Schema for a Target, wrapper around a PDB file
"""
target_name: str = Field(None, description="The name of the target")
ids: Optional[TargetIdentifiers] = Field(
None,
description="TargetIdentifiers Schema for identifiers associated with this target",
)
data: str = Field(
"",
description="PDB file stored as a string to hold internal data state",
repr=False,
)
data_format: DataStorageType = Field(
DataStorageType.pdb,
description="Enum describing the data storage method",
frozen=True,
)
@model_validator(mode="before")
@classmethod
def _validate_at_least_one_id(cls, v):
# check if skip validation
if v.get("_skip_validate_ids"):
return v
else:
ids = v.get("ids")
compound_name = v.get("target_name")
# check if all the identifiers are None, sometimes when this is called from
# already instantiated ligand we need to be able to handle a dict and instantiated class
if compound_name is None:
if ids is None or all(
[not v for v in schema_dict_get_val_overload(ids)]
):
raise ValueError(
"At least one identifier must be provided, or target_name must be provided"
)
return v
@classmethod
def from_pdb(
cls, pdb_file: Union[str, Path], target_chains=[], ligand_chain="", **kwargs
) -> "Target":
kwargs.pop("data", None)
# directly read in data
# First load full complex molecule
complex_mol = load_openeye_pdb(pdb_file)
# Split molecule into parts using given chains
mol_filter = MoleculeFilter(
protein_chains=target_chains, ligand_chain=ligand_chain
)
split_dict = split_openeye_mol(complex_mol, mol_filter)
return cls.from_oemol(split_dict["prot"], **kwargs)
def to_pdb(self, filename: Union[str, Path]) -> None:
# directly write out data
write_file_directly(filename, self.data)
@classmethod
def from_oemol(cls, mol: oechem.OEMol, **kwargs) -> "Target":
kwargs.pop("data", None)
pdb_str = oemol_to_pdb_string(mol)
return cls(data=pdb_str, **kwargs)
def to_oemol(self) -> oechem.OEMol:
return pdb_string_to_oemol(self.data)
def __eq__(self, other: Any) -> bool:
if not isinstance(other, Target):
return NotImplemented
# check if the data is the same
# but exclude the MASTER record as this is not always in the SAME PLACE
# for some strange reason
return check_strings_for_equality_with_exclusion(
self.data, other.data, "MASTER"
)
def __ne__(self, other: Any) -> bool:
return not self.__eq__(other)
@property
def hash(self):
"""Create a hash based on the pdb file contents"""
import hashlib
return hashlib.sha256(self.data.encode()).hexdigest()
@property
def crystal_symmetry(self):
"""
Get the crystal symmetry of the target
"""
return oechem.OEGetCrystalSymmetry(self.to_oemol())