Source code for gt4sd.properties.utils

#
# MIT License
#
# Copyright (c) 2022 GT4SD team
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
import ipaddress
import json
from typing import Any, Callable, Dict, List, Tuple, Type, Union

from rdkit import Chem
from modlamp.descriptors import GlobalDescriptor
from tdc import Oracle
from tdc.chem_utils.oracle.oracle import fp2fpfunc
from tdc.metadata import download_oracle_names

from ..domains.materials import MacroMolecule, SmallMolecule
from .core import ApiTokenParameters, PropertyValue
from .scores import SCORING_FACTORY


[docs]def to_mol(mol: SmallMolecule) -> Chem.Mol: """Safely convert a string or a rdkit.Chem.Mol to a rdkit.Chem.Mol. Args: mol: a string or a rdkit.Chem.Mol object. Raises: TypeError: if wrong type is given. Returns: a rdkit.Chem.Mol object. """ if isinstance(mol, str): mol = Chem.MolFromSmiles(mol) # type: ignore elif isinstance(mol, Chem.Mol): pass else: raise TypeError( f"Please provide SMILES string or rdkit.Chem.Mol object not {type(mol)}" ) return mol # type: ignore
[docs]def to_smiles(mol: SmallMolecule) -> str: """Safely returns a SMILES string from a SMILES or a rdkit.Chem.Mol object. Args: SmallMolecule: either a SMILES or a rdkit.Chem.Mol object. Returns: a SMILES string. """ if isinstance(mol, str): try: mol = Chem.MolFromSmiles(mol) # type: ignore except Exception: raise ValueError( f"Could not convert SMILES string to rdkit.Chem.Mol: {mol}" ) elif isinstance(mol, Chem.Mol): pass else: raise TypeError( f"Pass a SMILES string or rdkit.Chem.Mol object not {type(mol)}" ) return Chem.MolToSmiles(mol, canonical=True) # type: ignore
[docs]def get_similarity_fn( target_mol: SmallMolecule, fp_key: str = "FCFP4" ) -> Callable[[SmallMolecule], PropertyValue]: """Get a similarity function for a target molecule. Args: target_mol: a target molecule as SMILES or rdkit.Chem.Mol object. fp_key: The type of fingerprint to use. One of `ECFP4`, `ECFP6`, `FCFP4` and `AP`. Returns: a similarity function that can be called with a `SmallMolecule`. """ if fp_key not in fp2fpfunc.keys(): raise ValueError(f"Choose fp_key from {fp2fpfunc.keys()}.") target_smiles = to_smiles(target_mol) return Oracle(name="similarity_meta", target_smiles=target_smiles, fp=fp_key)
[docs]def get_activity_fn(target: str) -> Callable[[SmallMolecule], PropertyValue]: """Get a function to measure activity/affinity against a protein target. Args: target: name of the target protein. Returns: an affinity function that can be called with a `SmallMolecule`. """ if target not in download_oracle_names: raise ValueError( f"Supported targets are: {download_oracle_names}, not {target}" ) return Oracle(name=target)
# for proteins
[docs]def get_sequence(protein: MacroMolecule) -> str: """Safely returns an amino acid sequence of a macromolecule Args: protein: either an AA sequence or a rdkit.Chem.Mol object that can be converted to FASTA. Raises: TypeError: if the input was none of the above types. ValueError: if the sequence was empty or could not be parsed into FASTA. Returns: an AA sequence. """ if isinstance(protein, str): seq = protein.upper().strip() return seq elif isinstance(protein, Chem.Mol): seq = Chem.MolToFASTA(protein).split() else: raise TypeError(f"Pass a string or rdkit.Chem.Mol object not {type(protein)}") if seq == []: raise ValueError( f"Sequence was empty or rdkit.Chem.Mol could not be converted: {protein}" ) return seq[-1]
[docs]def get_descriptor(protein: MacroMolecule) -> GlobalDescriptor: """Convert a macromolecule to a modlamp GlobalDescriptor object. Args: protein: either an AA sequence or a rdkit.Chem.Mol object that can be converted to FASTA. Returns: GlobalDescriptor object. """ seq = get_sequence(protein) return GlobalDescriptor(seq)
[docs]def get_target_parameters( target: Union[str, Dict[str, Any]] ) -> Tuple[List[Type[Any]], List[float]]: """Generates a tuple of scorers and weight list Args: target: scoring functions and parameters related to it Return: A tuple containing scoring functions and weight list """ score_list = [] weights = [] target_dictionary: Dict[str, Any] = {} if isinstance(target, str): target_dictionary = json.loads(target) elif isinstance(target, dict): target_dictionary = target else: raise ValueError( f"{target} of type {type(target)} is not supported: provide 'str' or 'Dict[str, Any]'" ) for scoring_function_name, parameters in target_dictionary.items(): weight = 1.0 if "weight" in parameters: weight = parameters.pop("weight") score_list.append(SCORING_FACTORY[scoring_function_name](**parameters)) weights.append(weight) return (score_list, weights)
[docs]def validate_ip(ip: str, message: str = "") -> None: """ Validates whether the parameter configuration contains a correct IP address. Args: ip: The IP address to validate. message: Additional error message to be displayed. """ try: ipaddress.ip_address(ip) except ValueError: raise ValueError(f"{ip} is not a IPv4 or IPv6 address\n {message}")
[docs]def validate_api_token(parameters: ApiTokenParameters, message: str = "") -> None: """ Validates whether the parameter configuration contains something that _could_ be a valid API key. Args: parameters: ApiTokenParameters. message: Additional error message to be displayed. """ if not hasattr(parameters, "api_token"): raise AttributeError(f"API key missing in {parameters}") if not isinstance(parameters.api_token, str): raise TypeError( f"API key has to be a string not {parameters.api_token}\n {message}" )
[docs]def docking_import_check() -> None: """ Verifies that __some__ of the required packages for docking are installed. Raises: ModuleNotFoundError: If a necessary module was not found. """ try: import openbabel import pdbfixer import pyscreener openbabel, pdbfixer, pyscreener except ModuleNotFoundError: raise ModuleNotFoundError( "You dont seem to have a valid installation for docking. You at " "least need `pdbfixer`, `openbabel` and `pyscreener` installed." "See here for details: https://tdcommons.ai/functions/oracles/#docking-scores" )