from __future__ import annotations
import json
from enum import Enum
from pathlib import Path
from pydantic import BaseModel, ByteSize, ConfigDict, Field
_SCHEMA_VERSION = "0.1.0"
[docs]
class DataStorageType(str, Enum):
sdf = "sdf"
pdb = "pdb"
b64oedu = "b64oedu"
[docs]
def read_file_directly(file: str | Path) -> str:
with open(str(file)) as f:
contents = f.read()
return contents
[docs]
def write_file_directly(file: str | Path, data: str, mode: str = "w") -> None:
if mode not in ["w", "a"]:
raise ValueError(f"mode must be either 'w' or 'a', got {mode}")
with open(str(file), mode) as f:
f.write(data)
[docs]
def utf8len(s: str) -> int:
return len(s.encode("utf-8"))
[docs]
def check_strings_for_equality_with_exclusion(string1, string2, exclusion_string):
lines1 = [line for line in string1.split("\n") if exclusion_string not in line]
lines2 = [line for line in string2.split("\n") if exclusion_string not in line]
return lines1 == lines2
[docs]
class DataModelAbstractBase(BaseModel):
"""
Base class for drugforge pydantic models that simplify dictionary, JSON
and other behaviour
"""
def __hash__(self) -> int:
return self.model_dump_json().__hash__()
@classmethod
def from_dict(cls, dict):
return cls.model_validate(dict)
@classmethod
def from_json(cls, json_str):
return cls.model_validate(json.loads(json_str))
@classmethod
def from_json_file(cls, file: str | Path):
# first load the file, then use the json parser
contents = read_file_directly(file)
return cls.from_json(contents)
def to_json_file(self, file: str | Path):
write_file_directly(file, self.model_dump_json())
@property
def size(self) -> ByteSize:
"""Size of the resulting JSON object for this class"""
return ByteSize(utf8len(self.model_dump_json())).human_readable()
def full_equal(self, other: DataModelAbstractBase) -> bool:
return self.model_dump() == other.model_dump()
def data_equal(self, other: DataModelAbstractBase) -> bool:
return self.data == other.data
def get_schema_version(self) -> str:
return _SCHEMA_VERSION
# use data_equal instead
def __eq__(self, other: DataModelAbstractBase) -> bool:
# check if has a data attribute
if hasattr(self, "data"):
return self.data_equal(other)
else:
return self.full_equal(other)
# use data_equal instead
def __ne__(self, other: DataModelAbstractBase) -> bool:
return not self.__eq__(other)
model_config = ConfigDict(validate_assignment=True)
[docs]
def schema_dict_get_val_overload(obj: dict | BaseModel):
"""
Overload for Schema and Dict to get values easily
Parameters
----------
obj : Union[Dict, Schema]
Object to get values from
Returns
-------
Iterable[Any]
"""
if isinstance(obj, dict):
return obj.values()
elif isinstance(obj, BaseModel):
return obj.model_dump().values()
else:
raise TypeError(f"Unsupported type {type(obj)}")
[docs]
class MoleculeComponent(str, Enum):
PROTEIN = "protein"
LIGAND = "ligand"
WATER = "water"
OTHER = "other"
[docs]
class MoleculeFilter(BaseModel):
"""Filter for selecting components of a molecule."""
model_config = ConfigDict(extra="forbid")
protein_chains: list = Field(
list(),
description="List of chains containing the desired protein. An empty list will return all chains.",
)
ligand_chain: str = Field(
None,
description="Chain containing the desired ligand. An empty list will return all chains.",
)
water_chains: list = Field(
list(),
description="List of chains containing the desired water. An empty list will return all chains.",
)
other_chains: list = Field(
list(),
description="List of chains containing other items. An empty list will return all chains.",
)
components_to_keep: list[MoleculeComponent] = Field(
["protein", "ligand", "water", "other"],
description="List of components to keep. An empty list will return all components.",
)