from abc import ABC, abstractmethod from typing import List from mordred import Calculator, descriptors from padelpy import from_smiles from rdkit import Chem class Descriptor(ABC): @abstractmethod def get_molecule_descriptors(self, molecule: str) -> List[float | int] | None: pass @abstractmethod def get_descriptor_names(self) -> List[str]: pass class Mordred(Descriptor): calc = Calculator(descriptors, ignore_3D=True) def get_molecule_descriptors(self, molecule: str) -> List[float | int] | None: mol = Chem.MolFromSmiles(molecule) res = list(self.calc(mol)) return res def get_descriptor_names(self) -> List[str]: return [f"Mordred_{i}" for i in range(len(self.calc.descriptors))] class PaDEL(Descriptor): calc = Calculator(descriptors) def get_molecule_descriptors(self, molecule: str) -> List[float | int] | None: try: padel_descriptors = from_smiles(molecule, threads=1) except RuntimeError: return [] formatted = [] for k, v in padel_descriptors.items(): try: formatted.append(float(v)) except ValueError: formatted.append(0.0) return formatted def get_descriptor_names(self) -> List[str]: return [f"PaDEL_{i}" for i in range(1875)] if __name__ == "__main__": mol = "CC1=CC(O)=CC=C1[N+](=O)[O-]" m = Mordred() print(list(m.get_molecule_descriptors(mol))) p = PaDEL() print(list(p.get_molecule_descriptors(mol)))