Devise Foundation: TP53 KEGG PATHWAY

ML CODE:

import pandas as pd

import numpy as np

from sklearn.model_selection import train_test_split

from sklearn.metrics import mean_squared_error, mean_absolute_error

from sklearn import svm

from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor

from sklearn.neural_network import MLPRegressor

# Example DataFrame

data = {

"Pathway": ["hsa04115", "hsa01524", "hsa05220", "hsa05210", "hsa04215", "hsa04210", "hsa05206", "hsa04110", "hsa05216", "hsa05214", "hsa05218", "hsa05219", "hsa05222", "hsa05169", "hsa01522", "hsa05213", "hsa04218", "hsa05223", "hsa05202", "hsa05212", "hsa05200", "hsa05162", "hsa05226", "hsa05163", "hsa05161", "hsa05225", "hsa04722", "hsa05203", "hsa05217", "hsa04068", "hsa05166", "hsa05224", "hsa05160", "hsa05215", "hsa05167", "hsa04071", "hsa05131", "hsa04217", "hsa04012", "hsa05205", "hsa05165", "hsa05170", "hsa05132", "hsa04064", "hsa04151", "hsa04919", "hsa05418", "hsa04390", "hsa04630", "hsa05164", "hsa05152", "hsa05416", "hsa05130", "hsa05014", "hsa05230", "hsa01521", "hsa04211", "hsa04933", "hsa05168", "hsa04066", "hsa04928", "hsa04010", "hsa05016", "hsa04650", "hsa04932", "hsa04934", "hsa04310"],

"Description": ["p53 signaling pathway", "Platinum drug resistance", "Chronic myeloid leukemia", "Colorectal cancer", "Apoptosis - multiple species", "Apoptosis", "MicroRNAs in cancer", "Cell cycle", "Thyroid cancer", "Glioma", "Melanoma", "Bladder cancer", "Small cell lung cancer", "Epstein-Barr virus infection", "Endocrine resistance", "Endometrial cancer", "Cellular senescence", "Non-small cell lung cancer", "Transcriptional misregulation in cancer", "Pancreatic cancer", "Pathways in cancer", "Measles", "Gastric cancer", "Human cytomegalovirus infection", "Hepatitis B", "Hepatocellular carcinoma", "Neurotrophin signaling pathway", "Viral carcinogenesis", "Basal cell carcinoma", "FoxO signaling pathway", "Human T-cell leukemia virus 1 infection", "Breast cancer", "Hepatitis C", "Prostate cancer", "Kaposi sarcoma-associated herpesvirus infection", "Sphingolipid signaling pathway", "Shigellosis", "Necroptosis", "ErbB signaling pathway", "Proteoglycans in cancer", "Human papillomavirus infection", "Human immunodeficiency virus 1 infection", "Salmonella infection", "NF-kappa B signaling pathway", "PI3K-Akt signaling pathway", "Thyroid hormone signaling pathway", "Fluid shear stress and atherosclerosis", "Hippo signaling pathway", "JAK-STAT signaling pathway", "Influenza A", "Tuberculosis", "Viral myocarditis", "Pathogenic Escherichia coli infection", "Amyotrophic lateral sclerosis", "Central carbon metabolism in cancer", "EGFR tyrosine kinase inhibitor resistance", "Longevity regulating pathway", "AGE-RAGE signaling pathway in diabetic complications", "Herpes simplex virus 1 infection", "HIF-1 signaling pathway", "Parathyroid hormone synthesis, secretion and action", "MAPK signaling pathway", "Huntington disease", "Natural killer cell mediated cytotoxicity", "Non-alcoholic fatty liver disease", "Cushing syndrome", "Wnt signaling pathway"],

"Values": ["12 of 72", "10 of 70", "8 of 75", "8 of 82", "6 of 30", "9 of 131", "9 of 159", "8 of 120", "5 of 37", "6 of 71", "6 of 72", "5 of 40", "6 of 92", "8 of 192", "6 of 94", "5 of 58", "7 of 150", "5 of 68", "7 of 171", "5 of 71", "12 of 515", "6 of 137", "6 of 146", "7 of 217", "6 of 158", "6 of 161", "5 of 112", "6 of 183", "4 of 63", "5 of 126", "6 of 210", "5 of 146", "5 of 157", "4 of 97", "5 of 187", "4 of 116", "5 of 218", "4 of 147", "3 of 81", "4 of 194", "5 of 324", "4 of 203", "4 of 209", "3 of 101", "5 of 349", "3 of 120", "3 of 129", "3 of 154", "3 of 158", "3 of 163", "3 of 165", "2 of 55", "3 of 187", "4 of 350", "2 of 68", "2 of 77", "2 of 87", "2 of 96", "4 of 478", "2 of 102", "2 of 104", "3 of 286", "3 of 295", "2 of 120", "2 of 146", "2 of 153", "2 of 154"],

"P-Value": ["2.60e-22", "1.02e-17", "4.19e-13", "5.57e-13", "2.38e-11", "3.44e-13", "7.86e-13", "7.55e-12", "7.90e-09", "2.31e-09", "2.31e-09", "1.04e-08", "7.44e-09", "2.25e-10", "7.90e-09", "5.34e-08", "2.31e-09", "9.87e-08", "4.79e-09", "1.12e-07", "4.19e-13", "5.34e-08", "7.25e-08", "1.76e-08", "1.05e-07", "1.12e-07", "8.51e-07", "2.18e-07", "4.62e-06", "1.45e-06", "4.66e-07", "2.85e-06", "3.91e-06", "2.21e-05", "8.55e-06", "4.28e-05", "1.74e-05", "0.00010", "0.00053", "0.00028", "0.00010", "0.00032", "0.00035", "0.00097", "0.00014", "0.0016", "0.0019", "0.0030", "0.0032", "0.0034", "0.0034", "0.0083", "0.0048", "0.0022", "0.0123", "0.0150", "0.0184", "0.0218", "0.0064", "0.0241", "0.0246", "0.0150", "0.0158", "0.0318", "0.0455", "0.0490", "0.0490"]

}

df = pd.DataFrame(data)

# Clean and normalize data if necessary

df['Values'] = df['Values'].apply(lambda x: x.split(' of '))

df['P-Value'] = df['P-Value'].apply(lambda x: float(x))

# For demonstration, let's assume we have a target variable 'target'

# Since the actual target variable isn't specified, we'll use 'P-Value' as an example target.

df['target'] = df['P-Value']

# Split data into features (X) and target (y)

X = df[['Pathway', 'Description', 'Values', 'P-Value']]

y = df['target']

# Convert categorical variables into numerical variables

X['Pathway'] = pd.Categorical(X['Pathway']).codes

X['Description'] = pd.Categorical(X['Description']).codes

# Split data into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# SVM Model

svm_model = svm.SVR()

svm_model.fit(X_train[['Pathway', 'Description', 'P-Value']], y_train)

y_pred_svm = svm_model.predict(X_test[['Pathway', 'Description', 'P-Value']])

# Calculate metrics for SVM

mse_svm = mean_squared_error(y_test, y_pred_svm)

rmse_svm = np.sqrt(mse_svm)

mae_svm = mean_absolute_error(y_test, y_pred_svm)

# Gradient Boosting Model

gb_model = GradientBoostingRegressor()

gb_model.fit(X_train[['Pathway', 'Description', 'P-Value']], y_train)

y_pred_gb = gb_model.predict(X_test[['Pathway', 'Description', 'P-Value']])

# Calculate metrics for Gradient Boosting

mse_gb = mean_squared_error(y_test, y_pred_gb)

rmse_gb = np.sqrt(mse_gb)

mae_gb = mean_absolute_error(y_test, y_pred_gb)

# Neural Networks Model

nn_model = MLPRegressor(hidden_layer_sizes=(50,50), max_iter=1000)

nn_model.fit(X_train[['Pathway', 'Description', 'P-Value']], y_train)

y_pred_nn = nn_model.predict(X_test[['Pathway', 'Description', 'P-Value']])

# Calculate metrics for Neural Networks

mse_nn = mean_squared_error(y_test, y_pred_nn)

rmse_nn = np.sqrt(mse_nn)

mae_nn = mean_absolute_error(y_test, y_pred_nn)

# Random Forest Model

rf_model = RandomForestRegressor()

rf_model.fit(X_train[['Pathway', 'Description', 'P-Value']], y_train)

y_pred_rf = rf_model.predict(X_test[['Pathway', 'Description', 'P-Value']])

# Calculate metrics for Random Forest

mse_rf = mean_squared_error(y_test, y_pred_rf)

rmse_rf = np.sqrt(mse_rf)

mae_rf = mean_absolute_error(y_test, y_pred_rf)

# Print metrics

print(f"SVM RMSE: {rmse_svm}, MSE: {mse_svm}, MAE: {mae_svm}")

print(f"Gradient Boosting RMSE: {rmse_gb}, MSE: {mse_gb}, MAE: {mae_gb}")

print(f"Neural Networks RMSE: {rmse_nn}, MSE: {mse_nn}, MAE: {mae_nn}")

print(f"Random Forest RMSE: {rmse_rf}, MSE: {mse_rf}, MAE: {mae_rf}")

OUTPUT:

SVM RMSE: 0.02244412946469833, MSE: 0.0005037389474281397, MAE: 0.021334132565418294 Gradient Boosting RMSE: 0.0009618251524734596, MSE: 9.251076239305937e-07, MAE: 0.0003519680170682877 Neural Networks RMSE: 0.42080324501370675, MSE: 0.1770753710140657, MAE: 0.3331247859087207 Random Forest RMSE: 0.0008759705473688987, MSE: 7.673243998577681e-07, MAE: 0.00039417599079070856

TP53 KEGG Pathway Analysis

KEGG pathway analysis was performed to understand the potential of Curcumin and Methylene Blue to modulate the TP53 signalling pathway.

CODE:

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

# TP53 KEGG Pathway

tp53_pathway_components = ['TP53', 'MDM2', 'p21', 'BAX', 'PUMA']

tp53_pathway_interactions = [('TP53', 'MDM2'), ('TP53', 'p21'), ('TP53', 'BAX'), ('TP53', 'PUMA')]

plt.figure(figsize=(10, 8))

plt.title('TP53 KEGG Pathway', fontsize=16)

# Node positions - adjust as needed for better visualization

pos = {'TP53': (0, 0), 'MDM2': (2, 1), 'p21': (2, -1), 'BAX': (4, 0.5), 'PUMA': (4, -0.5)}

# Draw nodes

for component in tp53_pathway_components:

plt.plot(pos[component][0], pos[component][1], 'o', markersize=12, color='skyblue', alpha=0.7)

plt.text(pos[component][0], pos[component][1] - 0.2, component, ha='center', fontsize=10)

# Draw edges

for interaction in tp53_pathway_interactions:

start_node, end_node = interaction

start_pos = pos[start_node]

end_pos = pos[end_node]

plt.arrow(start_pos[0], start_pos[1], end_pos[0] - start_pos[0], end_pos[1] - start_pos[1],

head_width=0.1, head_length=0.2, fc='gray', ec='gray', alpha=0.5)

plt.axis('off')

plt.tight_layout()

plt.savefig('tp53_kegg_pathway.png')

plt.show()

# Disease-Gene Association

disease_gene_data = {

'Gene': ['TP53', 'BRCA1', 'PTEN', 'AKT1', 'PIK3CA'],

'Cancer Type': ['Multiple', 'Breast/Ovarian', 'Multiple', 'Multiple', 'Breast'],

'Association': ['Tumor Suppressor', 'Tumor Suppressor', 'Tumor Suppressor', 'Oncogene', 'Oncogene']

}

df = pd.DataFrame(disease_gene_data)

# Group by Cancer Type and count genes

cancer_gene_counts = df.groupby('Cancer Type')['Gene'].count().sort_values(ascending=False)

plt.figure(figsize=(10, 6))

cancer_gene_counts.plot(kind='bar', color='coral', alpha=0.7)

plt.title('Number of Genes Associated with Each Cancer Type', fontsize=14)

plt.xlabel('Cancer Type', fontsize=12)

plt.ylabel('Number of Genes', fontsize=12)

plt.xticks(rotation=45, ha='right')

plt.tight_layout()

plt.savefig('disease_gene_association.png')

plt.show()

Devise Foundation

Monday, February 24, 2025

TP53 KEGG PATHWAY

No comments:

Post a Comment

From Paikpara’s Lanes to Titagarh’s Bazaar—My Food Memories

Report Abuse

Labels

Popular Posts