Powered By Blogger

Friday, February 28, 2025

Actual vs Predicted Binding Affinity for Curcumin and Methylene Blue with TP53

CODE:


import pandas as pd

import json

from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestRegressor

from sklearn.metrics import mean_squared_error, r2_score

import matplotlib.pyplot as plt

import optuna

import numpy as np  # Import numpy


# Debugging: Start script execution

print("Starting script execution...")


# 1. Data Loading and Preparation

print("Loading data...")

curcumin_data = {

    'Model': list(range(1, 20)),  # Model number (1 to 19)

    'Calculated_affinity': [-7.204, -6.922, -6.488, -6.332, -6.236, -6.235, -6.234, -6.216, -6.159, -6.151,

                            -6.111, -6.052, -6.004, -5.777, -5.773, -5.663, -5.662, -5.622, -5.526]

}

curcumin_df = pd.DataFrame(curcumin_data)

curcumin_df['Molecule'] = 'Curcumin'


# Methylene Blue Binding Affinity Data (Revised)

binding_affinity_json = """

[

    {"Model": 1, "Calculated affinity (kcal/mol)": -5.074},

    {"Model": 2, "Calculated affinity (kcal/mol)": -5.073},

    {"Model": 3, "Calculated affinity (kcal/mol)": -5.031},

    {"Model": 4, "Calculated affinity (kcal/mol)": -4.927},

    {"Model": 5, "Calculated affinity (kcal/mol)": -4.807},

    {"Model": 6, "Calculated affinity (kcal/mol)": -4.766},

    {"Model": 7, "Calculated affinity (kcal/mol)": -4.753},

    {"Model": 8, "Calculated affinity (kcal/mol)": -4.751},

    {"Model": 9, "Calculated affinity (kcal/mol)": -4.735},

    {"Model": 10, "Calculated affinity (kcal/mol)": -4.666},

    {"Model": 11, "Calculated affinity (kcal/mol)": -4.656},

    {"Model": 12, "Calculated affinity (kcal/mol)": -4.621},

    {"Model": 13, "Calculated affinity (kcal/mol)": -4.472},

    {"Model": 14, "Calculated affinity (kcal/mol)": -4.436},

    {"Model": 15, "Calculated affinity (kcal/mol)": -4.399},

    {"Model": 16, "Calculated affinity (kcal/mol)": -4.390},

    {"Model": 17, "Calculated affinity (kcal/mol)": -4.351},

    {"Model": 18, "Calculated affinity (kcal/mol)": -4.266},

    {"Model": 19, "Calculated affinity (kcal/mol)": -4.245},

    {"Model": 20, "Calculated affinity (kcal/mol)": -4.127}

]

"""

binding_affinity_data = json.loads(binding_affinity_json)

mb_df = pd.DataFrame(binding_affinity_data)

mb_df['Molecule'] = 'Methylene Blue'

mb_df.rename(columns={'Calculated affinity (kcal/mol)': 'Calculated_affinity'}, inplace=True)


combined_df = pd.concat([curcumin_df, mb_df], ignore_index=True)


# Debugging: Check combined dataset

print("Combined dataset:")

print(combined_df.head())


X = combined_df[['Model']]

y = combined_df['Calculated_affinity']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Debugging: Data split check

print(f"Training set size: {len(X_train)}, Test set size: {len(X_test)}")


# Define the objective function for Optuna

def objective(trial):

    n_estimators = trial.suggest_int('n_estimators', 50, 200)

    max_depth = trial.suggest_int('max_depth', 3, 15)

    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)


    model = RandomForestRegressor(n_estimators=n_estimators,

                                  max_depth=max_depth,

                                  min_samples_split=min_samples_split,

                                  random_state=42)


    model.fit(X_train, y_train)


    y_pred = model.predict(X_test)


    mse = mean_squared_error(y_test, y_pred)


    return mse


# Run Optuna optimization

study = optuna.create_study(direction='minimize')

study.optimize(objective, n_trials=50)


best_params = study.best_params

print(f"Best hyperparameters: {best_params}")


final_rf_model = RandomForestRegressor(**best_params)

final_rf_model.fit(X_train, y_train)


# Calculate performance metrics on the test set

y_pred_test = final_rf_model.predict(X_test)

mse_test = mean_squared_error(y_test, y_pred_test)

r2_test = r2_score(y_test, y_pred_test)


# Print performance metrics for the test set

print(f"Test Set Mean Squared Error (MSE): {mse_test:.4f}")

print(f"Test Set R² Score: {r2_test:.4f}")


# Make predictions on the entire dataset for plotting

combined_df['Predicted_affinity'] = final_rf_model.predict(combined_df[['Model']])


plt.figure(figsize=(12, 8))

plt.scatter(combined_df[combined_df['Molecule'] == 'Curcumin']['Calculated_affinity'],

            combined_df[combined_df['Molecule'] == 'Curcumin']['Predicted_affinity'],

            color='blue', label='Curcumin')

plt.scatter(combined_df[combined_df['Molecule'] == 'Methylene Blue']['Calculated_affinity'],

            combined_df[combined_df['Molecule'] == 'Methylene Blue']['Predicted_affinity'],

            color='green', label='Methylene Blue')

plt.plot([combined_df['Calculated_affinity'].min(), combined_df['Calculated_affinity'].max()],

           [combined_df['Calculated_affinity'].min(), combined_df['Calculated_affinity'].max()],

           linestyle='--', color='red', label='Ideal Prediction')

plt.xlabel('Actual Calculated Affinity')

plt.ylabel('Predicted Calculated Affinity')

plt.title('Actual vs Predicted Binding Affinities')

plt.legend()

plt.grid(True)

plt.tight_layout()


# Save and display plot

output_file = 'actual_vs_predicted_affinity_combined.png'

plt.savefig(output_file)

print(f"Plot saved as: {output_file}")


# Show the plot if running interactively

plt.show()


OUTPUT:

Best hyperparameters: {'n_estimators': 104, 'max_depth': 3, 'min_samples_split': 9}
Test Set Mean Squared Error (MSE): 0.6499
Test Set R² Score: -0.1061

No comments:

Post a Comment

Reviving Life and Redefining Medicine: The ConsciousLeaf Vision

  Date : April 08, 2025 Author : Mrinmoy Chakraborty, Chairman of Devise Foundation, in collaboration with Grok 3 (xAI) Introduction At Devi...