Files
enviPy-bayer/pepper/impl/descriptors.py
2026-03-06 22:11:22 +13:00

61 lines
1.5 KiB
Python

from abc import ABC, abstractmethod
from typing import List
from mordred import Calculator, descriptors
from padelpy import from_smiles
from rdkit import Chem
class Descriptor(ABC):
@abstractmethod
def get_molecule_descriptors(self, molecule: str) -> List[float | int] | None:
pass
@abstractmethod
def get_descriptor_names(self) -> List[str]:
pass
class Mordred(Descriptor):
calc = Calculator(descriptors, ignore_3D=True)
def get_molecule_descriptors(self, molecule: str) -> List[float | int] | None:
mol = Chem.MolFromSmiles(molecule)
res = list(self.calc(mol))
return res
def get_descriptor_names(self) -> List[str]:
return [f"Mordred_{i}" for i in range(len(self.calc.descriptors))]
class PaDEL(Descriptor):
calc = Calculator(descriptors)
def get_molecule_descriptors(self, molecule: str) -> List[float | int] | None:
try:
padel_descriptors = from_smiles(molecule, threads=1)
except RuntimeError:
return []
formatted = []
for k, v in padel_descriptors.items():
try:
formatted.append(float(v))
except ValueError:
formatted.append(0.0)
return formatted
def get_descriptor_names(self) -> List[str]:
return [f"PaDEL_{i}" for i in range(1875)]
if __name__ == "__main__":
mol = "CC1=CC(O)=CC=C1[N+](=O)[O-]"
m = Mordred()
print(list(m.get_molecule_descriptors(mol)))
p = PaDEL()
print(list(p.get_molecule_descriptors(mol)))