Source code for drugforge.data.operators.state_expanders.protomer_expander

import os
import subprocess
import tempfile
from typing import Literal

from drugforge.data.backend.openeye import (
    load_openeye_sdfs,
    oechem,
    oequacpac,
    save_openeye_sdfs,
)
from drugforge.data.operators.state_expanders.state_expander import StateExpanderBase
from drugforge.data.schema.ligand import Ligand
from pydantic import Field


[docs] class ProtomerExpander(StateExpanderBase): """ Expand a molecule to protomers using OpenEye reasonable protomer state enumeration. Note: The input molecule is included in the output. """ expander_type: Literal["ProtomerExpander"] = "ProtomerExpander" def _provenance(self) -> dict[str, str]: return { "oechem": oechem.OEChemGetVersion(), "quacpac": oequacpac.OEQuacPacGetVersion(), } def _expand(self, ligands: list[Ligand]) -> list[Ligand]: expanded_states = [] provenance = self.provenance() for parent_ligand in ligands: oemol = parent_ligand.to_oemol() for protomer in oequacpac.OEGetReasonableProtomers(oemol): fmol = oechem.OEMol(protomer) # copy the ligand properties over to the new molecule, we may want to have more fine grained control over this # down the track. protomer_ligand = Ligand.from_oemol(fmol, **parent_ligand.dict()) if protomer_ligand.fixed_inchikey != parent_ligand.fixed_inchikey: # only add tags to new microstates of the input molecule protomer_ligand.set_expansion( parent=parent_ligand, provenance=provenance ) expanded_states.append(protomer_ligand) else: expanded_states.append(parent_ligand) # add the parent if it is not present. if parent_ligand not in expanded_states: expanded_states.append(parent_ligand) return expanded_states
[docs] class EpikExpander(StateExpanderBase): """ Expand the protomer and tautomeric states of a molecule using epik and capture the state penalties. Note: The method assumes you have schrodinger software installed and the path to the software is exported as a environment variable named SCHRODINGER. """ expander_type: Literal["EpikExpander"] = "EpikExpander" ph: float = Field( 7.3, description="The ph that should be used when calculating the state penalty.", ) def _create_cmd(self, *programs: str) -> str: """ Create a command which can be used to call some SCHRODINGER software Returns ------- The string which can be passed to subprocess to call epik """ # create a path to epik schrodinger_folder = os.getenv("SCHRODINGER") if schrodinger_folder is None: raise RuntimeError( "Epik enumerator requires the path to the schrodinger software to be set as the " "SCHRODINGER environment variable." ) epik = os.path.join(schrodinger_folder, *programs) return epik def _provenance(self) -> dict[str, str]: """ Run epik to get the version info. Returns ------- The version of epik used. """ epik_cmd = self._create_cmd("epik") # call epik to get the version info output = subprocess.check_output([epik_cmd, "-v"]) for line in output.decode("utf-8").split("\n"): if "Epik version" in line: version = line.split()[-1] break else: version = "unknown" return { "epik": version, } def _prepare_ligands(self, ligands: list[Ligand]): """ Convert the list of Ligands to a SCHRODINGER mae file before running with Epik. """ oe_ligands = [ligand.to_oemol() for ligand in ligands] save_openeye_sdfs(oe_ligands, "input.sdf") convert_cmd = self._create_cmd("utilities", "structconvert") with open("structconvert.log", "w") as log: subprocess.run( convert_cmd + " input.sdf input.mae", shell=True, stdout=log, stderr=log, check=True, ) def _extract_ligands(self) -> list[Ligand]: """ Extract the state expanded ligands from the Epik output file. Returns ------- A list of expanded state ligands. """ convert_cmd = self._create_cmd("utilities", "structconvert") with open("structconvert.log", "w") as log: subprocess.run( convert_cmd + " output.mae output.sdf", shell=True, stdout=log, stderr=log, check=True, ) oe_mols = load_openeye_sdfs(sdf_fn="output.sdf") # parse into ligand objects expanded_ligands = [Ligand.from_oemol(oemol) for oemol in oe_mols] return expanded_ligands def _call_epik(self): """Call Epik on the local ligands file.""" import numpy as np epik_command = self._create_cmd("epik") min_population = np.exp(-6) epik_command += f" -WAIT -ms 16 -ph {self.ph} -p {min_population} -imae input.mae -omae output.mae" with open("epik_log.log", "w") as log: subprocess.run(epik_command, shell=True, stdout=log, stderr=log, check=True) def _expand(self, ligands: list[Ligand]) -> list[Ligand]: """ Expand the protomers and tautomers of the input molecules using Epik and calculate the state penalty. Note: All input molecules are scored by Epik and have the values stored in Ligand.tags. Only new molecules will have an expansion tag however. For example ethane would receive a score but no expansion tag. Parameters ---------- ligands: The list of ligands whose states should be expanded. Returns ------- A list of expanded ligand states. """ # store where we are as we run epik in a tempdir home = os.getcwd() # calculate it once as its expensive to call epik every time provenance = self.provenance() # as epic runs on all molecules we need to keep track of the parent by tagging it parents_by_inchikey = {} for lig in ligands: # store the parent inchi key as a tag which will be included in the sdf file fixed_inchikey = lig.fixed_inchikey lig.set_SD_data({"parent": fixed_inchikey}) parents_by_inchikey[fixed_inchikey] = lig with tempfile.TemporaryDirectory() as tempdir: os.chdir(tempdir) # create the mae file self._prepare_ligands(ligands=ligands) # call epik self._call_epik() # convert the ligands to sdf, epik tags are automatically picked up and stored expanded_ligands = self._extract_ligands() # move back to the home dir os.chdir(home) # set the expansion tag only for new microstate ligands for ligand in expanded_ligands: # do not set the expansion tag if the molecule is the same as the parent and has a score of 0 state_pentalty = float(ligand.tags["r_epik_State_Penalty"]) if ligand.tags["parent"] == ligand.fixed_inchikey and state_pentalty == 0: continue parent = parents_by_inchikey[ligand.tags["parent"]] ligand.set_expansion(parent=parent, provenance=provenance) return expanded_ligands