Devise Foundation: Drug Discovery

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split, KFold, cross_val_score

from sklearn.preprocessing import StandardScaler

from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

from sklearn.impute import KNNImputer

from sklearn.linear_model import Ridge

import json

# Start Code:

data = {

'Binding Affinity': [-8.297, -8.049, -7.930, -7.724, -7.612, -7.386, -7.342, -7.218, -7.155, -7.042,

-6.940, -6.933, -6.885, -6.846, -6.808, -6.770, -6.536, -6.533, -6.529, -6.407],

'Volume': [1964.45, 1780.27] * 10,

'Surface': [2366.68, 2400.50] * 10,

'Depth': [33.29, 33.51] * 10,

'Ellipsoid Ratio c/a': [0.15, 0.11] * 10,

'Ellipsoid Ratio b/a': [0.26, 0.19] * 10,

'Enclosure': [0.06, 0.16] * 10,

'Pocket Atoms': [437, 310] * 10,

'Carbons': [286, 217] * 10,

'Nitrogens': [61, 44] * 10,

'Oxygens': [83, 47] * 10,

'Sulfurs': [2, 0] * 10,

'Other Elements': [5, 2] * 10,

'Hydrogen Bond Donors': [61, 47] * 10,

'Hydrogen Bond Acceptors': [150, 95] * 10,

'Metals': [4, 2] * 10,

'Hydrophobic Interactions': [109, 111] * 10,

'Hydrophobicity Ratio': [0.34, 0.44] * 10,

'Apolar Amino Acid Ratio': [0.26, 0.44] * 10,

'Polar Amino Acid Ratio': [0.33, 0.29] * 10,

'Positive Amino Acid Ratio': [0.15, 0.20] * 10,

'Negative Amino Acid Ratio': [0.21, 0.05] * 10,

#Updated AA Counts feature (USE This One For ALL Models : Data is consistent across Models)

'ALA':[0,3,0,3,0,3,0,3,0,3,0,3,0,3,0,3,0,3,0,3],

'ARG':[5,2,5,2,5,2,5,2,5,2,5,2,5,2,5,2,5,2,5,2],

'ASN':[4,2,4,2,4,2,4,2,4,2,4,2,4,2,4,2,4,2,4,2],

'ASP':[15,2,15,2,15,2,15,2,15,2,15,2,15,2,15,2,15,2,15,2],

'CYS':[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],

'GLN':[0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2,0,2],

'GLU':[5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1,5,1],

'GLY':[11,6,11,6,11,6,11,6,11,6,11,6,11,6,11,6,11,6,11,6],

'HIS':[6,3,6,3,6,3,6,3,6,3,6,3,6,3,6,3,6,3,6,3],

'ILE':[8,2,8,2,8,2,8,2,8,2,8,2,8,2,8,2,8,2,8,2],

'LEU':[2,9,2,9,2,9,2,9,2,9,2,9,2,9,2,9,2,9,2,9],

'LYS':[3,8,3,8,3,8,3,8,3,8,3,8,3,8,3,8,3,8,3,8],

'MET':[2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1,2,1],

'PHE':[5,3,5,3,5,3,5,3,5,3,5,3,5,3,5,3,5,3,5,3],

'PRO':[5,6,5,6,5,6,5,6,5,6,5,6,5,6,5,6,5,6,5,6],

'SER':[5,3,5,3,5,3,5,3,5,3,5,3,5,3,5,3,5,3,5,3],

'THR':[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3],

'TRP':[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1],

'TYR':[8,3,8,3,8,3,8,3,8,3,8,3,8,3,8,3,8,3,8,3],

'VAL':[3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4,3,4],

#From OCR

'Phe231A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # From Image: present

'Leu218A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

'Leu197A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

'Thr226A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

'His201A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

'Tyr223A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

'Lys119A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

'Lys228A': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1], # Present

#What is known for certain from ligand

'LIG1': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],

'C': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],

'N': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],

'O': [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1],

#Parameters

'vina_dock_model': list(range(1, 21)),

'Model': list(range(1, 21)),

'Molweight': [371.52]*20,

'Number of hydrogen bond acceptors': [2]*20,

'Number of hydrogen bond donors': [0]*20,

'Number of rotable bonds': [8]*20,

'hydrophobic_interactions': [109, 111]*10,

'RESIDUE': [662.408,676.521,667.184,657.759,661.434,662.727,674.381,670.491,671.178,660.898,671.713,672.719,666.313,667.567,667.751,663.67,666.593,665.243,662.001,666.218],

#Coordinates to chain and chain position.

'coord_x': [12.3773,14.1198,13.8247,13.4709,12.7294,13.2786,14.5773,12.9101,13.0883,14.0219,11.6945,13.5996,13.2321,13.9812,13.5193,12.9665,14.3094,14.8302,11.615,12.9273],

'coord_y': [27.1536,30.3026,29.6459,28.5711,27.6687,19.7112,31.0198,21.5947,29.2522,29.7841,26.8426,21.8593,28.5237,19.6639,27.1798,28.8583,22.542,26.5058,23.464,29.566],

'coord_z': [60.7158,62.2514,61.8274,61.021,61.2426,61.5766,62.0441,61.4143,61.6266,62.4846,61.2329,62.2616,61.1592,62.5268,61.9475,61.4963,62.572,64.2347,61.1289,61.4714]

}

#ADMET data: properties needed

admet_data = {

'Molecular Weight (MW)': [424.14]*20,

'Volume_ADMET': [444.27]*20,

'Density': [0.955]*20,

'nHA': [6.0]*20,

'nHD': [1.0]*20,

'nRot': [4.0]*20,

'nRing': [4.0]*20,

'MaxRing': [10.0]*20,

'nHet': [6.0]*20,

'fChar': [0.0]*20,

'nRig': [27.0]*20,

'Flexibility': [0.148]*20,

'Stereo Centers': [0.0]*20,

'TPSA': [81.3]*20,

'logS': [-4.438]*20,

'logP': [4.138]*20,

'logD7.4': [3.039]*20,

'pka (Acid)': [4.266]*20,

'pka (Base)': [1.947]*20,

'Melting point': [198.253]*20,

'Boiling point': [314.451]*20,

'QED': [0.511]*20,

'SAscore': [1]*20,

'GASA': [1]*20,

'Fsp3': [0.115]*20,

'MCE-18': [23.0]*20,

'NPscore': [-0.742]*20,

'Colloidal aggregators': [0.73]*20,

'FLuc inhibitors': [0.175]*20,

'Blue fluorescence': [0.277]*20,

'Green fluorescence': [0.766]*20,

'Reactive compounds': [0.004]*20,

'Promiscuous compounds': [0.011]*20,

'Caco-2 Permeability': [-4.882]*20,

'MDCK Permeability': [-4.666]*20,

'PPB': [99.0]*20,

'VDss': [-0.35]*20,

'Fu': [0.5]*20,

'CLplasma': [0.555]*20,

'T1/2': [1.346]*20,

'Aquatic Toxicity Rule': [0]*20,

'Genotoxic Carcinogenicity Mutagenicity Rule': [0]*20,

'NonGenotoxic Carcinogenicity Rule': [0]*20,

'Skin Sensitization Rule': [1]*20,

'Acute Toxicity Rule': [0]*20,

'NonBiodegradable': [0]*20,

'SureChEMBL Rule': [0]*20,

'FAF-Drugs4 Rule': [4]*20,

'hERG Blockers': [0.521]*20,

'hERG Blockers (10um)': [0.315]*20,

'DILI': [0.962]*20,

'AMES Toxicity': [0.81]*20,

'Rat Oral Acute Toxicity': [0.226]*20,

'FDAMDD': [0.841]*20,

'Skin Sensitization_ADMET': [0.354]*20,

'Carcinogenicity_ADMET': [0.388]*20,

'Eye Corrosion': [0.0]*20,

'Eye Irritation': [0.078]*20,

'Respiratory': [0.436]*20,

'Human Hepatotoxicity': [0.875]*20,

'Drug-induced Nephrotoxicity': [0.878]*20,

'Drug-induced Neurotoxicity': [0.911]*20,

'Ototoxicity': [0.864]*20,

'Hematotoxicity': [0.41]*20,

'Genotoxicity_ADMET': [0.994]*20,

'RPMI-8226 Immunitoxicity': [0.098]*20,

'A549 Cytotoxicity': [0.114]*20,

'Hek293 Cytotoxicity': [0.529]*20,

'BCF': [0.987]*20,

'IGC50': [4.088]*20,

'LC50DM': [5.143]*20,

'LC50FM': [4.794]*20,

'Molweight_provided': [371.52]*20,

'Number of hydrogen bond acceptors_provided': [2]*20,

'Number of hydrogen bond donors_provided': [0]*20,

'Number of atoms_provided': [28]*20,

'Number of bonds_provided': [30]*20,

'Number of rotable bonds_provided': [8]*20,

'Molecular refractivity': [119.72]*20,

'Topological Polar Surface Area': [12.47]*20,

'octanolwater partition coefficientlogP': [6]*20,

'Predicted LD50: 1190mgkg': [1190]*20,

'Predicted Toxicity Class': [4]*20,

'Hepatotoxicity_PRO': [0.69]*20,

'Neurotoxicity_PRO': [0.87]*20,

'Nephrotoxicity_PRO': [0.90]*20,

'Respiratory toxicity_PRO': [0.98]*20,

'Cardiotoxicity': [0.77]*20,

'Carcinogenicity_PRO': [0.62]*20,

'Immunotoxicity_PRO': [0.96]*20,

'Mutagenicity_PRO': [0.97]*20,

'Cytotoxicity_PRO': [0.93]*20,

'BBBbarrier': [1.0]*20,

'Ecotoxicity': [0.73]*20,

'Clinical toxicity': [0.56]*20,

'Nutritional toxicity': [0.74]*20,

'Aryl hydrocarbon ReceptorAhR': [0.97]*20,

'Androgen ReceptorAR': [0.99]*20,

'Androgen Receptor Ligand Binding DomainARLBD': [0.99]*20,

'Aromatase': [1.0]*20,

'Estrogen Receptor AlphaER': [0.99]*20,

'Estrogen Receptor Ligand Binding DomainERLBD': [1.0]*20,

'Peroxisome Proliferator Activated Receptor GammaPPARGamma': [0.99]*20,

'Nuclear factorerythroidderived 2like 2antioxidant responsive elementnrf2ARE': [0.88]*20,

'Heat shock factor response elementHSE': [0.88]*20,

'Mitochondrial Membrane PotentialMMP': [0.70]*20,

'Phosphoprotein Tumor Supressor p53': [0.96]*20,

'ATPase family AAA domaincontaining protein 5ATAD5': [0.99]*20,

'Thyroid hormone receptor alphaTHRα': [0.90]*20,

'Thyroid hormone receptor betaTHRβ': [0.78]*20,

'TranstyretrinTTR': [0.97]*20,

'Ryanodine receptorRYR': [0.98]*20,

'GABA receptorGABAR': [0.96]*20,

'Glutamate NmethylDaspartate receptorNMDAR': [0.92]*20,

'alphaamino3hydroxy5methyl4isoxazolepropionate receptorAMPAR': [0.97]*20,

'Kainate receptorKAR': [0.99]*20,

'AchetylcholinesteraseAChE': [0.69]*20,

'Constitutive androstane receptorCAR': [0.98]*20,

'Pregnane X receptorPXR': [0.92]*20,

'NADHquinone oxidoreductaseNADHOX': [0.97]*20,

'Voltage gated sodium channelVGSC': [0.95]*20,

'NaI symporterNIS': [0.98]*20,

'Cytochrome CYP1A2': [0.76]*20,

'Cytochrome CYP2C19': [0.87]*20,

'Cytochrome CYP2C9': [0.56]*20,

'Cytochrome CYP2D6': [0.63]*20,

'Cytochrome CYP3A4': [0.71]*20,

'Cytochrome CYP2E1': [0.98]*20

}

df = pd.DataFrame(data)

admet_df = pd.DataFrame(admet_data)

df = pd.concat([df, admet_df], axis=1)

df = pd.merge(df, binding_affinity_df, on='Model', how='left')

df['Distance-1 ExcludeAliphatic']= [0.839159] * 20

df['Distance-1 ExcludeAromatic']= [0.839159] * 20

#Chain the data, so you see the right parameters

X = df.drop('Calculated affinity (kcal/mol)', axis=1)

y = df['Calculated affinity (kcal/mol)']

#Code to make predictions for this model.

imputer = KNNImputer(n_neighbors=5)

X = imputer.fit_transform(X)

scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

#Test model: This code will help you to create different data too!

model = Ridge(alpha=1.0)

model.fit(X_train, y_train)

#Model created predict new results

y_pred = model.predict(X_test)

#Test: Now you can create and check where it is high quality!

r2 = r2_score(y_test, y_pred)

mae = mean_absolute_error(y_test, y_pred)

rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print("R-squared:", r2)

print("MAE:", mae)

print("RMSE:", rmse)

#Finalized and high quality Production for AI

kf = KFold(n_splits=5, shuffle=True, random_state=42)

cv_scores = cross_val_score(model, X_scaled, y, cv=kf, scoring='r2')

print("Cross-validation R-squared scores:", cv_scores)

print("Mean cross-validation R-squared score:", np.mean(cv_scores))

Devise Foundation

Monday, February 24, 2025

Drug Discovery

No comments:

Post a Comment

From Paikpara’s Lanes to Titagarh’s Bazaar—My Food Memories

Report Abuse

Labels

Popular Posts