forked from enviPath/enviPy
61 lines
1.5 KiB
Python
61 lines
1.5 KiB
Python
from abc import ABC, abstractmethod
|
|
from typing import List
|
|
|
|
from mordred import Calculator, descriptors
|
|
from padelpy import from_smiles
|
|
from rdkit import Chem
|
|
|
|
|
|
class Descriptor(ABC):
|
|
@abstractmethod
|
|
def get_molecule_descriptors(self, molecule: str) -> List[float | int] | None:
|
|
pass
|
|
|
|
@abstractmethod
|
|
def get_descriptor_names(self) -> List[str]:
|
|
pass
|
|
|
|
|
|
class Mordred(Descriptor):
|
|
calc = Calculator(descriptors, ignore_3D=True)
|
|
|
|
def get_molecule_descriptors(self, molecule: str) -> List[float | int] | None:
|
|
mol = Chem.MolFromSmiles(molecule)
|
|
res = list(self.calc(mol))
|
|
return res
|
|
|
|
def get_descriptor_names(self) -> List[str]:
|
|
return [f"Mordred_{i}" for i in range(len(self.calc.descriptors))]
|
|
|
|
|
|
class PaDEL(Descriptor):
|
|
calc = Calculator(descriptors)
|
|
|
|
def get_molecule_descriptors(self, molecule: str) -> List[float | int] | None:
|
|
try:
|
|
padel_descriptors = from_smiles(molecule, threads=1)
|
|
except RuntimeError:
|
|
return []
|
|
|
|
formatted = []
|
|
for k, v in padel_descriptors.items():
|
|
try:
|
|
formatted.append(float(v))
|
|
except ValueError:
|
|
formatted.append(0.0)
|
|
|
|
return formatted
|
|
|
|
def get_descriptor_names(self) -> List[str]:
|
|
return [f"PaDEL_{i}" for i in range(1875)]
|
|
|
|
|
|
if __name__ == "__main__":
|
|
mol = "CC1=CC(O)=CC=C1[N+](=O)[O-]"
|
|
|
|
m = Mordred()
|
|
print(list(m.get_molecule_descriptors(mol)))
|
|
|
|
p = PaDEL()
|
|
print(list(p.get_molecule_descriptors(mol)))
|