diff --git a/epdb/models.py b/epdb/models.py index ae85a3d5..25f7bb07 100644 --- a/epdb/models.py +++ b/epdb/models.py @@ -1438,6 +1438,8 @@ class ApplicabilityDomain(EnviPathModel): reliability_threshold = models.FloatField(blank=False, null=False, default=0.5) local_compatibilty_threshold = models.FloatField(blank=False, null=False, default=0.5) + functional_groups = models.JSONField(blank=True, null=True, default=dict) + @staticmethod @transaction.atomic def create(mlrr: MLRelativeReasoning, num_neighbours: int = 5, reliability_threshold: float = 0.5, @@ -1474,6 +1476,15 @@ class ApplicabilityDomain(EnviPathModel): ad = ApplicabilityDomainPCA(num_neighbours=self.num_neighbours) ad.build(ds) + # Collect functional Groups together with their counts for reactivity center highlighting + functional_groups_counts = defaultdict(int) + for cs in CompoundStructure.objects.filter(compound__package__in=self.model.data_packages.all()): + for fg in FormatConverter.get_functional_groups(cs.smiles): + functional_groups_counts[fg] += 1 + + self.functional_groups = dict(functional_groups_counts) + self.save() + end = datetime.now() logger.debug(f"fitting app domain pca took {(end - start).total_seconds()} seconds") diff --git a/utilities/chem.py b/utilities/chem.py index 77d61b3a..e9ecd209 100644 --- a/utilities/chem.py +++ b/utilities/chem.py @@ -12,6 +12,7 @@ from rdkit.Chem import MACCSkeys from rdkit.Chem import rdChemReactions from rdkit.Chem.Draw import rdMolDraw2D from rdkit.Chem.MolStandardize import rdMolStandardize +from rdkit.Contrib.IFG import ifg logger = logging.getLogger(__name__) RDLogger.DisableLog('rdApp.*') @@ -87,6 +88,21 @@ class FormatConverter(object): bitvec = MACCSkeys.GenMACCSKeys(mol) return bitvec.ToList() + @staticmethod + def get_functional_groups(smiles: str) -> List[str]: + res = list() + + try: + m = Chem.MolFromSmiles(smiles) + fgs = ifg.identify_functional_groups(m) + for fg in fgs: + # TODO atoms or type? + res.append(fg.atoms) + except AttributeError: + logger.debug(f"Could not get functional groups for {smiles}") + + return res + @staticmethod def to_svg(smiles, mol_size=(200, 150), kekulize=True): mol = FormatConverter.from_smiles(smiles)