from __future__ import annotations
from pathlib import Path
from typing import Any
from drugforge.data.backend.openeye import (
bytes64_to_oedu,
load_openeye_design_unit,
oedu_to_bytes64,
openeye_perceive_residues,
save_openeye_design_unit,
save_openeye_pdb,
split_openeye_design_unit,
)
from drugforge.data.schema.complex import Complex, ComplexBase
from drugforge.data.schema.identifiers import TargetIdentifiers
from drugforge.data.schema.ligand import Ligand
from drugforge.data.schema.schema_base import (
DataModelAbstractBase,
DataStorageType,
schema_dict_get_val_overload,
)
from openeye import oechem
from pydantic import Field, model_validator
[docs]
class PreppedTarget(DataModelAbstractBase):
"""
Schema for a PreppedTarget, wrapper around an OpenEye Design Unit
"""
target_name: str = Field(None, description="The name of the target")
ids: TargetIdentifiers | None = Field(
None,
description="TargetIdentifiers Schema for identifiers associated with this target",
)
data: bytes = Field(
"",
description="OpenEye oedu file stored as a bytes object **encoded in base64** to hold internal data state",
repr=False,
)
data_format: DataStorageType = Field(
DataStorageType.b64oedu,
description="Enum describing the data storage method",
frozen=True,
)
target_hash: str = Field(
...,
description="A unique reproducible hash based on the contents of the pdb file which created the target.",
frozen=True,
)
crystal_symmetry: Any | None = Field(
None,
description="bounding box of the target, lost in oedu conversion so can be saved as attribute.",
)
@model_validator(mode="before")
@classmethod
def _validate_at_least_one_id(cls, v):
# simpler as we never need to pop attrs off the serialised representation.
ids = v.get("ids")
compound_name = v.get("target_name")
# check if all the identifiers are None
if compound_name is None:
if ids is None or all([not v for v in schema_dict_get_val_overload(ids)]):
raise ValueError(
"At least one identifier must be provided, or target_name must be provided"
)
return v
@classmethod
def from_oedu(cls, oedu: oechem.OEDesignUnit, **kwargs) -> PreppedTarget:
kwargs.pop("data", None)
oedu_bytes = oedu_to_bytes64(oedu)
return cls(data=oedu_bytes, **kwargs)
def to_oedu(self) -> oechem.OEDesignUnit:
return bytes64_to_oedu(self.data)
@classmethod
def from_oedu_file(cls, oedu_file: str | Path, **kwargs) -> PreppedTarget:
kwargs.pop("data", None)
oedu = load_openeye_design_unit(oedu_file)
return cls.from_oedu(oedu=oedu, **kwargs)
def to_oedu_file(self, filename: str | Path) -> None:
oedu = self.to_oedu()
save_openeye_design_unit(oedu, filename)
[docs]
def to_pdb_file(self, filename: str):
"""
Write the prepared target receptor to PDB file using openeye.
Parameters
----------
filename: The name of the pdb file the target should be writen to.
"""
oedu = self.to_oedu()
_, oe_receptor, _ = split_openeye_design_unit(du=oedu)
# As advised by Alex <https://github.com/asapdiscovery/asapdiscovery/pull/608#discussion_r1388067468>
openeye_perceive_residues(oe_receptor)
save_openeye_pdb(oe_receptor, pdb_fn=filename)
@property
def hash(self):
"""Create a hash based on the pdb file contents"""
import hashlib
return hashlib.sha256(self.data).hexdigest()
[docs]
class PreppedComplex(ComplexBase):
"""
Schema for a Complex, containing both a PreppedTarget and Ligand
In this case the PreppedTarget contains the protein and ligand.
"""
target: PreppedTarget = Field(description="PreppedTarget schema object")
ligand: Ligand = Field(description="Ligand schema object")
# Overload from base class to check target and ligand individually
def data_equal(self, other: PreppedComplex):
return self.target.data_equal(other.target) and self.ligand.data_equal(
other.ligand
)
@classmethod
def from_oedu(
cls, oedu: oechem.OEDesignUnit, target_kwargs={}, ligand_kwargs={}
) -> PreppedComplex:
prepped_target = PreppedTarget.from_oedu(oedu, **target_kwargs)
lig_oemol = oechem.OEMol()
oedu.GetLigand(lig_oemol)
return cls(
target=prepped_target,
ligand=Ligand.from_oemol(lig_oemol, **ligand_kwargs),
)
@classmethod
def from_oedu_file(cls, oedu_file: str | Path, **kwargs) -> PreppedComplex:
oedu = load_openeye_design_unit(oedu_file)
return cls.from_oedu(oedu=oedu, **kwargs)
[docs]
@classmethod
def from_complex(cls, complex: Complex, prep_kwargs={}) -> PreppedComplex:
"""
Create a PreppedComplex from a Complex by running ProteinPrepper
on the combined oemol of the complex
Parameters
----------
complex : Complex
Complex to create PreppedComplex from
prep_kwargs : dict
Keyword arguments to pass to ProteinPrepper
Returns
-------
PreppedComplex
PreppedComplex object
"""
# use local import here to avoid circular imports
from drugforge.modeling.protein_prep import ProteinPrepper
# overwrite ligand_chain with ligand_chain from complex if it exists
prep_kwargs.pop("ligand_chain", None)
prep_kwargs["ligand_chain"] = complex.ligand_chain
prepped_complexs = ProteinPrepper(**prep_kwargs).prep(inputs=[complex])
return prepped_complexs[0]
@property
def hash(self):
# Using the target_hash instead hashing the OEDU bytes because prepping is stochastic
return f"{self.target.target_hash}+{self.ligand.fixed_inchikey}"