Powered By Blogger

Monday, February 24, 2025

Drug Discovery

 import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split, KFold, cross_val_score

from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

from sklearn.impute import KNNImputer

from sklearn.linear_model import Ridge

import json


# Start Code:

data = {

    'Binding Affinity': [-8.297, -8.049, -7.930, -7.724, -7.612, -7.386, -7.342, -7.218, -7.155, -7.042,

                           -6.940, -6.933, -6.885, -6.846, -6.808, -6.770, -6.536, -6.533, -6.529, -6.407],

    'Volume': [1964.45, 1780.27] * 10,

    'Surface': [2366.68, 2400.50] * 10,

    'Depth': [33.29, 33.51] * 10,

    'Ellipsoid Ratio c/a': [0.15, 0.11] * 10,

    'Ellipsoid Ratio b/a': [0.26, 0.19] * 10,

    'Enclosure': [0.06, 0.16] * 10,

    'Pocket Atoms': [437, 310] * 10,

    'Carbons': [286, 217] * 10,

    'Nitrogens': [61, 44] * 10,

    'Oxygens': [83, 47] * 10,

    'Sulfurs': [2, 0] * 10,

    'Other Elements': [5, 2] * 10,

    'Hydrogen Bond Donors': [61, 47] * 10,

    'Hydrogen Bond Acceptors': [150, 95] * 10,

    'Metals': [4, 2] * 10,

    'Hydrophobic Interactions': [109, 111] * 10,

    'Hydrophobicity Ratio': [0.34, 0.44] * 10,

    'Apolar Amino Acid Ratio': [0.26, 0.44] * 10,

    'Polar Amino Acid Ratio': [0.33, 0.29] * 10,

    'Positive Amino Acid Ratio': [0.15, 0.20] * 10,

    'Negative Amino Acid Ratio': [0.21, 0.05] * 10,


    #Updated AA Counts feature (USE This One For ALL Models : Data is consistent across Models)

    'ALA':[0,3,0,3,0,3,0,3,0,3,0,3,0,3,0,3,0,3,0,3],

    'ARG':[5,2,5,2,5,2,5,2,5,2,5,2,5,2,5,2,5,2,5,2],

    'ASN':[4,2,4,2,4,2,4,2,4,2,4,2,4,2,4,2,4,2,4,2],

    'ASP':[15,2,15,2,15,2,15,2,15,2,15,2,15,2,15,2,15,2,15,2],

    'CYS':[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],

    'GLN':[0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2],

    'GLU':[5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1],

    'GLY':[11,6,11,6,11,6,11,6,11,6,11,6,11,6,11,6,11,6,11,6],

    'HIS':[6,3,6,3,6,3,6,3,6,3,6,3,6,3,6,3,6,3,6,3],

    'ILE':[8,2,8,2,8,2,8,2,8,2,8,2,8,2,8,2,8,2,8,2],

    'LEU':[2,9,2,9,2,9,2,9,2,9,2,9,2,9,2,9,2,9,2,9],

    'LYS':[3,8,3,8,3,8,3,8,3,8,3,8,3,8,3,8,3,8,3,8],

    'MET':[2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1],

    'PHE':[5,3,5,3,5,3,5,3,5,3,5,3,5,3,5,3,5,3,5,3],

    'PRO':[5,6,5,6,5,6,5,6,5,6,5,6,5,6,5,6,5,6,5,6],

    'SER':[5,3,5,3,5,3,5,3,5,3,5,3,5,3,5,3,5,3,5,3],

    'THR':[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3],

    'TRP':[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1],

    'TYR':[8,3,8,3,8,3,8,3,8,3,8,3,8,3,8,3,8,3,8,3],

    'VAL':[3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4],

    #From OCR

    'Phe231A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # From Image: present

    'Leu218A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

    'Leu197A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

    'Thr226A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

    'His201A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

    'Tyr223A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

    'Lys119A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

    'Lys228A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present


   #What is known for certain from ligand

    'LIG1': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],

    'C': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],

    'N': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],

    'O': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],


   #Parameters

    'vina_dock_model': list(range(1, 21)),

    'Model': list(range(1, 21)),

    'Molweight': [371.52]*20,

    'Number of hydrogen bond acceptors': [2]*20,

    'Number of hydrogen bond donors': [0]*20,

    'Number of rotable bonds': [8]*20,

    'hydrophobic_interactions': [109, 111]*10,

    'RESIDUE': [662.408,676.521,667.184,657.759,661.434,662.727,674.381,670.491,671.178,660.898,671.713,672.719,666.313,667.567,667.751,663.67,666.593,665.243,662.001,666.218],


#Coordinates to chain and chain position.

    'coord_x': [12.3773,14.1198,13.8247,13.4709,12.7294,13.2786,14.5773,12.9101,13.0883,14.0219,11.6945,13.5996,13.2321,13.9812,13.5193,12.9665,14.3094,14.8302,11.615,12.9273],

    'coord_y': [27.1536,30.3026,29.6459,28.5711,27.6687,19.7112,31.0198,21.5947,29.2522,29.7841,26.8426,21.8593,28.5237,19.6639,27.1798,28.8583,22.542,26.5058,23.464,29.566],

    'coord_z': [60.7158,62.2514,61.8274,61.021,61.2426,61.5766,62.0441,61.4143,61.6266,62.4846,61.2329,62.2616,61.1592,62.5268,61.9475,61.4963,62.572,64.2347,61.1289,61.4714]

}


#ADMET data: properties needed

admet_data = {

    'Molecular Weight (MW)': [424.14]*20,

    'Volume_ADMET': [444.27]*20,

    'Density': [0.955]*20,

    'nHA': [6.0]*20,

    'nHD': [1.0]*20,

    'nRot': [4.0]*20,

    'nRing': [4.0]*20,

    'MaxRing': [10.0]*20,

    'nHet': [6.0]*20,

    'fChar': [0.0]*20,

    'nRig': [27.0]*20,

    'Flexibility': [0.148]*20,

    'Stereo Centers': [0.0]*20,

    'TPSA': [81.3]*20,

    'logS': [-4.438]*20,

    'logP': [4.138]*20,

    'logD7.4': [3.039]*20,

    'pka (Acid)': [4.266]*20,

    'pka (Base)': [1.947]*20,

    'Melting point': [198.253]*20,

    'Boiling point': [314.451]*20,

    'QED': [0.511]*20,

    'SAscore': [1]*20,

    'GASA': [1]*20,

    'Fsp3': [0.115]*20,

    'MCE-18': [23.0]*20,

    'NPscore': [-0.742]*20,

    'Colloidal aggregators': [0.73]*20,

    'FLuc inhibitors': [0.175]*20,

    'Blue fluorescence': [0.277]*20,

    'Green fluorescence': [0.766]*20,

    'Reactive compounds': [0.004]*20,

    'Promiscuous compounds': [0.011]*20,

    'Caco-2 Permeability': [-4.882]*20,

    'MDCK Permeability': [-4.666]*20,

    'PPB': [99.0]*20,

    'VDss': [-0.35]*20,

    'Fu': [0.5]*20,

    'CLplasma': [0.555]*20,

    'T1/2': [1.346]*20,

    'Aquatic Toxicity Rule': [0]*20,

    'Genotoxic Carcinogenicity Mutagenicity Rule': [0]*20,

    'NonGenotoxic Carcinogenicity Rule': [0]*20,

    'Skin Sensitization Rule': [1]*20,

    'Acute Toxicity Rule': [0]*20,

    'NonBiodegradable': [0]*20,

    'SureChEMBL Rule': [0]*20,

    'FAF-Drugs4 Rule': [4]*20,

    'hERG Blockers': [0.521]*20,

    'hERG Blockers (10um)': [0.315]*20,

    'DILI': [0.962]*20,

    'AMES Toxicity': [0.81]*20,

    'Rat Oral Acute Toxicity': [0.226]*20,

    'FDAMDD': [0.841]*20,

    'Skin Sensitization_ADMET': [0.354]*20,

    'Carcinogenicity_ADMET': [0.388]*20,

    'Eye Corrosion': [0.0]*20,

    'Eye Irritation': [0.078]*20,

    'Respiratory': [0.436]*20,

    'Human Hepatotoxicity': [0.875]*20,

    'Drug-induced Nephrotoxicity': [0.878]*20,

    'Drug-induced Neurotoxicity': [0.911]*20,

    'Ototoxicity': [0.864]*20,

    'Hematotoxicity': [0.41]*20,

    'Genotoxicity_ADMET': [0.994]*20,

    'RPMI-8226 Immunitoxicity': [0.098]*20,

    'A549 Cytotoxicity': [0.114]*20,

    'Hek293 Cytotoxicity': [0.529]*20,

    'BCF': [0.987]*20,

    'IGC50': [4.088]*20,

    'LC50DM': [5.143]*20,

    'LC50FM': [4.794]*20,


    'Molweight_provided': [371.52]*20,

    'Number of hydrogen bond acceptors_provided': [2]*20,

    'Number of hydrogen bond donors_provided': [0]*20,

    'Number of atoms_provided': [28]*20,

    'Number of bonds_provided': [30]*20,

    'Number of rotable bonds_provided': [8]*20,

    'Molecular refractivity': [119.72]*20,

    'Topological Polar Surface Area': [12.47]*20,

    'octanolwater partition coefficientlogP': [6]*20,


    'Predicted LD50: 1190mgkg': [1190]*20,

    'Predicted Toxicity Class': [4]*20,

    'Hepatotoxicity_PRO': [0.69]*20,

    'Neurotoxicity_PRO': [0.87]*20,

    'Nephrotoxicity_PRO': [0.90]*20,

    'Respiratory toxicity_PRO': [0.98]*20,

    'Cardiotoxicity': [0.77]*20,

    'Carcinogenicity_PRO': [0.62]*20,

    'Immunotoxicity_PRO': [0.96]*20,

    'Mutagenicity_PRO': [0.97]*20,

    'Cytotoxicity_PRO': [0.93]*20,

    'BBBbarrier': [1.0]*20,

    'Ecotoxicity': [0.73]*20,

    'Clinical toxicity': [0.56]*20,

    'Nutritional toxicity': [0.74]*20,

    'Aryl hydrocarbon ReceptorAhR': [0.97]*20,

    'Androgen ReceptorAR': [0.99]*20,

    'Androgen Receptor Ligand Binding DomainARLBD': [0.99]*20,

    'Aromatase': [1.0]*20,

    'Estrogen Receptor AlphaER': [0.99]*20,

    'Estrogen Receptor Ligand Binding DomainERLBD': [1.0]*20,

    'Peroxisome Proliferator Activated Receptor GammaPPARGamma': [0.99]*20,

    'Nuclear factorerythroidderived 2like 2antioxidant responsive elementnrf2ARE': [0.88]*20,

    'Heat shock factor response elementHSE': [0.88]*20,

    'Mitochondrial Membrane PotentialMMP': [0.70]*20,

    'Phosphoprotein Tumor Supressor p53': [0.96]*20,

    'ATPase family AAA domaincontaining protein 5ATAD5': [0.99]*20,

    'Thyroid hormone receptor alphaTHRα': [0.90]*20,

    'Thyroid hormone receptor betaTHRβ': [0.78]*20,

    'TranstyretrinTTR': [0.97]*20,

    'Ryanodine receptorRYR': [0.98]*20,

    'GABA receptorGABAR': [0.96]*20,

    'Glutamate NmethylDaspartate receptorNMDAR': [0.92]*20,

    'alphaamino3hydroxy5methyl4isoxazolepropionate receptorAMPAR': [0.97]*20,

    'Kainate receptorKAR': [0.99]*20,

    'AchetylcholinesteraseAChE': [0.69]*20,

    'Constitutive androstane receptorCAR': [0.98]*20,

    'Pregnane X receptorPXR': [0.92]*20,

    'NADHquinone oxidoreductaseNADHOX': [0.97]*20,

    'Voltage gated sodium channelVGSC': [0.95]*20,

    'NaI symporterNIS': [0.98]*20,

    'Cytochrome CYP1A2': [0.76]*20,

    'Cytochrome CYP2C19': [0.87]*20,

    'Cytochrome CYP2C9': [0.56]*20,

    'Cytochrome CYP2D6': [0.63]*20,

    'Cytochrome CYP3A4': [0.71]*20,

    'Cytochrome CYP2E1': [0.98]*20

}


df = pd.DataFrame(data)

admet_df = pd.DataFrame(admet_data)

df = pd.concat([df, admet_df], axis=1)

df = pd.merge(df, binding_affinity_df, on='Model', how='left')

df['Distance-1 ExcludeAliphatic']= [0.839159] * 20

df['Distance-1 ExcludeAromatic']= [0.839159] * 20


#Chain the data, so you see the right parameters

X = df.drop('Calculated affinity (kcal/mol)', axis=1)

y = df['Calculated affinity (kcal/mol)']


#Code to make predictions for this model.

imputer = KNNImputer(n_neighbors=5)

X = imputer.fit_transform(X)

scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


#Test model: This code will help you to create different data too! 

model = Ridge(alpha=1.0)

model.fit(X_train, y_train)


#Model created predict new results

y_pred = model.predict(X_test)


#Test: Now you can create and check where it is high quality!

r2 = r2_score(y_test, y_pred)

mae = mean_absolute_error(y_test, y_pred)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))


print("R-squared:", r2)

print("MAE:", mae)

print("RMSE:", rmse)


#Finalized and high quality Production for AI

kf = KFold(n_splits=5, shuffle=True, random_state=42)

cv_scores = cross_val_score(model, X_scaled, y, cv=kf, scoring='r2')

print("Cross-validation R-squared scores:", cv_scores)

print("Mean cross-validation R-squared score:", np.mean(cv_scores))


Drug Discovery © 2025 by Devise Foundation is licensed under CC BY-NC-ND 4.0 


No comments:

Post a Comment

From Sea to Sapiens: The Epic Journey of Life’s Evolution

  Around 4 billion years ago, Earth’s oceans churned with the raw ingredients of life. In this primordial soup, simple organic molecules for...