#
# MIT License
#
# Copyright (c) 2022 GT4SD team
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
"""Implementation of scorers."""
from functools import partial
from typing import Any, Callable, Dict, List, Type, Optional
import numpy as np
from rdkit import Chem
from guacamol.common_scoring_functions import (
IsomerScoringFunction,
RdkitScoringFunction,
SMARTSScoringFunction,
TanimotoScoringFunction,
)
from guacamol.score_modifier import (
ClippedScoreModifier,
GaussianModifier,
MaxGaussianModifier,
MinGaussianModifier,
)
from guacamol.scoring_function import ScoringFunction
from guacamol.utils.descriptors import (
bertz,
logP,
mol_weight,
num_aromatic_rings,
num_rings,
num_rotatable_bonds,
qed,
tpsa,
)
MODIFIERS: Dict[str, Callable[..., Any]] = {
"gaussian_modifier": GaussianModifier,
"min_gaussian_modifier": MinGaussianModifier,
"max_gaussian_modifier": MaxGaussianModifier,
"clipped_score_modifier": ClippedScoreModifier,
}
MODIFIERS_PARAMETERS: Dict[str, Dict[str, float]] = {
"gaussian_modifier": {"mu": 2, "sigma": 0.5},
"min_gaussian_modifier": {"mu": 0.75, "sigma": 0.1},
"max_gaussian_modifier": {"mu": 100, "sigma": 10},
"clipped_score_modifier": {"upper_x": 0.8},
}
DESCRIPTOR: Dict[str, Callable[..., Any]] = {
"num_rotatable_bonds": num_rotatable_bonds,
"num_aromatic_rings": num_aromatic_rings,
"log_p": logP,
"tpsa": tpsa,
"bertz": bertz,
"qed": qed,
"mol_weight": mol_weight,
"num_rings": num_rings,
}
[docs]def distance_to_score(distance: float, beta: float) -> float:
"""calculating exponential for a given distance
Args:
distance: A float.
Returns:
An exponential score value for a given SMILES
"""
return np.exp(-beta * distance**2)
[docs]class DistanceScorer(ScoringFunction):
[docs] def __init__(self, beta: float = 0.00000001) -> None:
"""DistanceScorer is used to call a partial copy of distance_to_score function.
Args:
beta: A float value used for getting an exponential score value
"""
self.partical_distance_score = partial(distance_to_score, beta=beta)
[docs] def get_distance(self, smile_distance: float) -> float:
"""Generates a partial copy of distance_to_score function
Args:
smiles: SMILES.
Returns:
An exponential score value for a given SMILES
"""
return self.partical_distance_score(smile_distance)
[docs]class TargetValueScorer(DistanceScorer):
[docs] def __init__(self, target: float, scoring_function: Callable[[str], float]) -> None:
"""Scoring function which is used to generate a score based on a taget and a scoring function.
Args:
target: target score that will be used to get the distance to the score of the SMILES
scoring_function: an instance of a scoring class
"""
super().__init__()
self.target = target
self.scoring_function = scoring_function
[docs] def score(self, smiles: str) -> float:
"""Generates a score for a given SMILES
Args:
smiles: SMILES.
Returns:
A score for the given SMILES
"""
return self.get_distance(self.scoring_function(smiles) - self.target)
[docs] def score_list(self, smiles_list: List[str]) -> List[float]:
"""Generates a list of scores for a given SMILES List
Args:
smiles_list: A List of SMILES.
Returns:
A List of scores
"""
return [
self.score(smiles)
for smiles in smiles_list
if Chem.MolFromSmiles(smiles) and smiles
]
[docs]class CombinedScorer:
[docs] def __init__(
self,
scorer_list: List[Type[Any]],
weights: Optional[List[float]] = None,
) -> None:
"""Scoring function which generates a combined score for a SMILES as per the given scoring functions.
Args:
scorer_list: A list of the scoring functions
weights: A list of weights
"""
self.scorer_list = scorer_list
self.weights = self._normalize_weights(weights)
[docs] def _normalize_weights(self, weights=None) -> List[float]:
"""It is used for normalizing weights.
Args:
weights: A list of weights.
Returns:
Sum of all the scores generated by the given scoring functions
"""
weights = weights if weights else [1.0] * len(self.scorer_list)
offsetted_weights = [weight + min(weights) for weight in weights]
return [weight / float(sum(offsetted_weights)) for weight in offsetted_weights]
[docs] def score(self, smiles: str):
"""Generates a score for a given SMILES
Args:
smiles: SMILES.
Returns:
Sum of all the scores generated by the given scoring functions
"""
return sum(
[
scorer.score(smiles) * weight
for scorer, weight in zip(self.scorer_list, self.weights)
]
)
[docs] def score_list(self, smiles_list: List[str]) -> List[float]:
"""Generates a list of scores for a given SMILES List
Args:
smiles_list: A List of SMILES.
Returns:
A List of scores
"""
return [self.score(smiles) for smiles in smiles_list]
[docs]class RDKitDescriptorScorer(TargetValueScorer):
[docs] def __init__(
self,
target: float,
modifier: str = "gaussian_modifier",
descriptor: str = "num_rotatable_bonds",
) -> None:
"""Scoring function wrapping RDKit descriptors.
Args:
target: target score that will be used to get the distance to the score of the SMILES
modifier: score modifier
descriptor: molecular descriptors
"""
self.target = target
self.modifier = MODIFIERS[modifier](**MODIFIERS_PARAMETERS[modifier])
self.descriptor = DESCRIPTOR[descriptor]
super().__init__(target=target, scoring_function=self.score)
[docs] def score(self, smiles: str) -> float:
"""Generates a score for a given SMILES
Args:
smiles: SMILES.
Returns:
A score for the given SMILES
"""
scoring_function = RdkitScoringFunction(
descriptor=self.descriptor,
score_modifier=self.modifier,
)
return scoring_function.score_mol(Chem.MolFromSmiles(smiles)) # type: ignore
[docs]class TanimotoScorer(TargetValueScorer):
[docs] def __init__(
self,
target: float,
target_smile: str,
fp_type: str = "ECFP4",
modifier: str = "gaussian_modifier",
) -> None:
"""Scoring function that looks at the fingerprint similarity against a target molecule.
Args:
target: target score that will be used to get the distance to the score of the SMILES
target_smile: target molecule to compare similarity
fp_type: fingerprint type
modifier: score modifier
"""
self.target = target
self.target_smile = target_smile
self.fp_type = fp_type
self.modifier = MODIFIERS[modifier](**MODIFIERS_PARAMETERS[modifier])
super().__init__(target=target, scoring_function=self.score)
[docs] def score(self, smiles: str) -> float:
"""Generates a score for a given SMILES
Args:
smiles: SMILES.
Returns:
A score for the given SMILES
"""
scoring_function = TanimotoScoringFunction(
self.target_smile,
fp_type=self.fp_type,
score_modifier=self.modifier,
)
return scoring_function.score_mol(Chem.MolFromSmiles(smiles)) # type: ignore
[docs]class IsomerScorer(TargetValueScorer):
[docs] def __init__(self, target: float, target_smile: str) -> None:
"""Scoring function for closeness to a molecular formula.
Args:
target: target score that will be used to get the distance to the score of the SMILES
target_smile: targeted SMILES to compare closeness with
"""
self.target = target
self.target_smile = target_smile
super().__init__(target=target, scoring_function=self.score)
[docs] def score(self, smiles: str) -> float:
"""Generates a score for a given SMILES
Args:
smiles: SMILES.
Returns:
A score for the given SMILES
"""
scoring_function = IsomerScoringFunction(self.target_smile)
return scoring_function.raw_score(smiles)
[docs]class SMARTSScorer(TargetValueScorer):
[docs] def __init__(self, target: float, target_smile: str, inverse: bool = True) -> None:
"""Scoring function that looks at the fingerprint similarity against a target molecule.
Args:
target: target score that will be used to get the distance to the score of the SMILES
target_smile: The SMARTS string to match
inverse: If True then SMARTS is desired else it is not desired in the molecules
"""
self.target = target
self.target_smile = target_smile
self.inverse = inverse
super().__init__(target=target, scoring_function=self.score)
[docs] def score(self, smiles: str) -> float:
"""Generates a score for a given SMILES
Args:
smiles: SMILES.
Returns:
A score for the given SMILES
"""
scoring_function = SMARTSScoringFunction(self.target_smile, self.inverse)
return scoring_function.score_mol(Chem.MolFromSmiles(smiles)) # type: ignore
[docs]class QEDScorer(TargetValueScorer):
[docs] def __init__(self, target: float) -> None:
"""Scoring function that calculates the weighted sum of ADS mapped properties using QED module of rdkit
Args:
target: target score that will be used to get the distance to the score of the SMILES
"""
self.target = target
super().__init__(target=target, scoring_function=self.score)
[docs] def score(self, smiles: str) -> float:
"""Generates a score for a given SMILES
Args:
smiles: SMILES.
Returns:
A score for the given SMILES
"""
return Chem.QED.qed(Chem.MolFromSmiles(smiles))