Source code for drugforge.alchemy.alchemize

from drugforge.alchemy.cli.utils import report_alchemize_clusters
from drugforge.data.schema.ligand import Ligand, write_ligands_to_multi_sdf
from rdkit import Chem
from rdkit.Chem import AllChem, rdFMCS
from rdkit.Chem.rdchem import Mol
from rdkit.Chem.Scaffolds import MurckoScaffold
from rich.padding import Padding
from tqdm import tqdm


[docs] def compute_clusters(ligands: list[Ligand], outsider_number: int, console=None): """Clusters ligands into Bajorath-Murcko scaffolds Args: ligands (list[Ligand]): Ligand objects to cluster outsider_number (int): Number of ligands to consider as outsiders console: Rich console object for logging Returns: tuple[dict[str, list[Ligand]], dict[str, list[Ligand]]]: Outsiders and clusters """ # STEP 1: cluster by Bajorath-Murcko scaffold (fast) PATT = Chem.MolFromSmarts( "[$([D1]=[*])]" ) # selects any atoms that are not connected to another heavy atom bm_scaffs = [] # first get regular Bemis-Murcko scaffolds for lig in tqdm(ligands, desc="Computing Murcko scaffolds"): bm_scaffs.append(MurckoScaffold.GetScaffoldForMol(lig.to_rdkit())) bbm_scaffs_smi = [ Chem.MolToSmiles(AllChem.DeleteSubstructs(scaff, PATT)) for scaff in bm_scaffs ] # make them into Bajorath-Bemis-Murcko scaffolds # now embed back with original molecules mols_with_bbm = [[mol, bbm] for mol, bbm in zip(ligands, bbm_scaffs_smi)] # sort so that we make sure that BBM scaffolds get grouped together nicely mols_with_bbm = sorted(mols_with_bbm, key=lambda x: x[1], reverse=False) # now group them together using the BBM scaffold as dict keys. bbm_groups = {} for mol, bbm_scaff_smi in mols_with_bbm: mol.set_SD_data({"bajorath-bemis-murcko-scaffold": bbm_scaff_smi}) bbm_groups.setdefault(bbm_scaff_smi, []).append(mol) outsiders = {} alchemical_clusters = {} for bbm_scaff, mols in bbm_groups.items(): if len(mols) > outsider_number: alchemical_clusters[bbm_scaff] = mols else: outsiders[bbm_scaff] = mols message = Padding( f"Placed {report_alchemize_clusters(alchemical_clusters, outsiders)[-1]} compounds into " "alchemical clusters", (1, 0, 1, 0), ) if console: console.print(message) return outsiders, alchemical_clusters
[docs] def partial_sanitize(mol: Mol) -> Mol: """Does the minimal number of steps for a molecule object to be workable by rdkit; won't throw errors if the mol is funky. Args: mol (Mol): RDKit molecule object Returns: Mol: Sanitized RDKit molecule object """ mol.UpdatePropertyCache(strict=False) Chem.SanitizeMol( mol, Chem.SanitizeFlags.SANITIZE_FINDRADICALS | Chem.SanitizeFlags.SANITIZE_SETAROMATICITY | Chem.SanitizeFlags.SANITIZE_SETCONJUGATION | Chem.SanitizeFlags.SANITIZE_SETHYBRIDIZATION | Chem.SanitizeFlags.SANITIZE_SYMMRINGS, catchErrors=True, ) return mol
[docs] def calc_mcs_residuals(mol1: Mol, mol2: Mol) -> tuple[int, int]: """Subtract the MCS from two molecules and return the number of heavy atoms remaining after removing the MCS from both Args: mol1 (Mol): RDKit molecule object mol2 (Mol): RDKit molecule object Returns: tuple[int, int]: Number of heavy atoms remaining in mol1 and mol2 after removing MCS """ mcs = Chem.MolFromSmarts( rdFMCS.FindMCS( [mol1, mol2], matchValences=False, ringMatchesRingOnly=True, completeRingsOnly=True, matchChiralTag=False, ).smartsString ) return Chem.rdMolDescriptors.CalcNumHeavyAtoms( AllChem.DeleteSubstructs(mol1, mcs) ), Chem.rdMolDescriptors.CalcNumHeavyAtoms(AllChem.DeleteSubstructs(mol2, mcs))
[docs] def rescue_outsiders( outsiders, alchemical_clusters, max_transform, processors: int, console=None ) -> tuple[dict[str, list[Ligand]], dict[str, list[Ligand]]]: """ STEP 2: rescue outsiders by attempting to place them into Alchemical clusters (slow) now for every singleton try to find a suitable cluster to add it into Args: outsiders: dict of str: list of Ligands alchemical_clusters: dict of str: list of RDKit mols max_transform: int processors: int console: Rich console object for logging Returns: tuple[dict[str, list[Ligand]], dict[str, list[Ligand]]]: Outsiders and clusters """ message = Padding( f"Working to add outsiders into alchemical clusters using {processors} processor(s)", (1, 0, 1, 0), ) if console: console.print(message) singletons_to_move = {} for singleton_bbm_scaff, _ in tqdm(outsiders.items(), desc="Rescuing outsiders"): singleton_bbm_scaff_ps = partial_sanitize( Chem.MolFromSmiles(singleton_bbm_scaff, sanitize=False) ) best_match_total_ha = max_transform * 2 best_match_cluster_bbm_scaff = None # check every BBM scaffold in the clusters we've already found for cluster_bbm_scaff, cluster_mols in alchemical_clusters.items(): cluster_bbm_scaff_ps = partial_sanitize( Chem.MolFromSmiles(cluster_bbm_scaff, sanitize=False) ) cluster_mol_n_residual, singleton_mol_n_residual = calc_mcs_residuals( cluster_bbm_scaff_ps, singleton_bbm_scaff_ps ) # first discard this match if any of the residuals are higher than the defined cutoff if ( cluster_mol_n_residual > max_transform or singleton_mol_n_residual > max_transform ): continue elif ( cluster_mol_n_residual + singleton_mol_n_residual < best_match_total_ha ): best_match_total_ha = cluster_mol_n_residual + singleton_mol_n_residual best_match_cluster_bbm_scaff = cluster_bbm_scaff if best_match_cluster_bbm_scaff: # we can add it to the right cluster and remove it from the singletons. # need to do this outside loop to not break the dict iterator though singletons_to_move[singleton_bbm_scaff] = best_match_cluster_bbm_scaff for singleton_bbm_scaff, cluster_to_move_to in singletons_to_move.items(): # first copy the mols from the singleton over to the intended cluster for singleton_mol in outsiders[singleton_bbm_scaff]: alchemical_clusters[cluster_to_move_to].append(singleton_mol) # then purge the singleton from the dict of singletons outsiders.pop(singleton_bbm_scaff) return outsiders, alchemical_clusters
[docs] def write_clusters(alchemical_clusters, clusterfiles_prefix, outsiders, console=None): """Stores clusters to individual SDF files using the clusterfiles prefix variable. Useful for CLI.""" for i, (bbm_cluster_smiles, ligands) in enumerate(alchemical_clusters.items()): output_filename = f"{clusterfiles_prefix}_{i}.sdf" # add bbm_cluster_smiles to each ligand as an SD tag. This way we can see both the ligand's BBM scaffold # AND the BBM scaffold cluster the ligand has been assigned to. [ lig.set_SD_data( {"bajorath-bemis-murcko-scaffold-cluster": bbm_cluster_smiles} ) for lig in ligands ] # write sdf write_ligands_to_multi_sdf(output_filename, ligands, overwrite=True) message = Padding( f"Wrote cluster {i} ({len(ligands)} assigned to Bajorath-Bemis-Murcko scaffold {bbm_cluster_smiles}) " f"to {output_filename}", (1, 0, 1, 0), ) if console: console.print(message) # also write all outsiders to a single SDF outsider_ligands_nested = [lig for _, lig in outsiders.items()] outsider_ligands = [lig for ligs in outsider_ligands_nested for lig in ligs] output_filename_outsiders = f"{clusterfiles_prefix}_outsiders.sdf" write_ligands_to_multi_sdf( output_filename_outsiders, outsider_ligands, overwrite=True ) message = Padding( f"Wrote {len(outsider_ligands)} outsiders to {output_filename_outsiders}.", (1, 0, 1, 0), ) if console: console.print(message)