ML CODE:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn import svm
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.neural_network import MLPRegressor
# Example DataFrame
data = {
"Pathway": ["hsa04115", "hsa01524", "hsa05220", "hsa05210", "hsa04215", "hsa04210", "hsa05206", "hsa04110", "hsa05216", "hsa05214", "hsa05218", "hsa05219", "hsa05222", "hsa05169", "hsa01522", "hsa05213", "hsa04218", "hsa05223", "hsa05202", "hsa05212", "hsa05200", "hsa05162", "hsa05226", "hsa05163", "hsa05161", "hsa05225", "hsa04722", "hsa05203", "hsa05217", "hsa04068", "hsa05166", "hsa05224", "hsa05160", "hsa05215", "hsa05167", "hsa04071", "hsa05131", "hsa04217", "hsa04012", "hsa05205", "hsa05165", "hsa05170", "hsa05132", "hsa04064", "hsa04151", "hsa04919", "hsa05418", "hsa04390", "hsa04630", "hsa05164", "hsa05152", "hsa05416", "hsa05130", "hsa05014", "hsa05230", "hsa01521", "hsa04211", "hsa04933", "hsa05168", "hsa04066", "hsa04928", "hsa04010", "hsa05016", "hsa04650", "hsa04932", "hsa04934", "hsa04310"],
"Description": ["p53 signaling pathway", "Platinum drug resistance", "Chronic myeloid leukemia", "Colorectal cancer", "Apoptosis - multiple species", "Apoptosis", "MicroRNAs in cancer", "Cell cycle", "Thyroid cancer", "Glioma", "Melanoma", "Bladder cancer", "Small cell lung cancer", "Epstein-Barr virus infection", "Endocrine resistance", "Endometrial cancer", "Cellular senescence", "Non-small cell lung cancer", "Transcriptional misregulation in cancer", "Pancreatic cancer", "Pathways in cancer", "Measles", "Gastric cancer", "Human cytomegalovirus infection", "Hepatitis B", "Hepatocellular carcinoma", "Neurotrophin signaling pathway", "Viral carcinogenesis", "Basal cell carcinoma", "FoxO signaling pathway", "Human T-cell leukemia virus 1 infection", "Breast cancer", "Hepatitis C", "Prostate cancer", "Kaposi sarcoma-associated herpesvirus infection", "Sphingolipid signaling pathway", "Shigellosis", "Necroptosis", "ErbB signaling pathway", "Proteoglycans in cancer", "Human papillomavirus infection", "Human immunodeficiency virus 1 infection", "Salmonella infection", "NF-kappa B signaling pathway", "PI3K-Akt signaling pathway", "Thyroid hormone signaling pathway", "Fluid shear stress and atherosclerosis", "Hippo signaling pathway", "JAK-STAT signaling pathway", "Influenza A", "Tuberculosis", "Viral myocarditis", "Pathogenic Escherichia coli infection", "Amyotrophic lateral sclerosis", "Central carbon metabolism in cancer", "EGFR tyrosine kinase inhibitor resistance", "Longevity regulating pathway", "AGE-RAGE signaling pathway in diabetic complications", "Herpes simplex virus 1 infection", "HIF-1 signaling pathway", "Parathyroid hormone synthesis, secretion and action", "MAPK signaling pathway", "Huntington disease", "Natural killer cell mediated cytotoxicity", "Non-alcoholic fatty liver disease", "Cushing syndrome", "Wnt signaling pathway"],
"Values": ["12 of 72", "10 of 70", "8 of 75", "8 of 82", "6 of 30", "9 of 131", "9 of 159", "8 of 120", "5 of 37", "6 of 71", "6 of 72", "5 of 40", "6 of 92", "8 of 192", "6 of 94", "5 of 58", "7 of 150", "5 of 68", "7 of 171", "5 of 71", "12 of 515", "6 of 137", "6 of 146", "7 of 217", "6 of 158", "6 of 161", "5 of 112", "6 of 183", "4 of 63", "5 of 126", "6 of 210", "5 of 146", "5 of 157", "4 of 97", "5 of 187", "4 of 116", "5 of 218", "4 of 147", "3 of 81", "4 of 194", "5 of 324", "4 of 203", "4 of 209", "3 of 101", "5 of 349", "3 of 120", "3 of 129", "3 of 154", "3 of 158", "3 of 163", "3 of 165", "2 of 55", "3 of 187", "4 of 350", "2 of 68", "2 of 77", "2 of 87", "2 of 96", "4 of 478", "2 of 102", "2 of 104", "3 of 286", "3 of 295", "2 of 120", "2 of 146", "2 of 153", "2 of 154"],
"P-Value": ["2.60e-22", "1.02e-17", "4.19e-13", "5.57e-13", "2.38e-11", "3.44e-13", "7.86e-13", "7.55e-12", "7.90e-09", "2.31e-09", "2.31e-09", "1.04e-08", "7.44e-09", "2.25e-10", "7.90e-09", "5.34e-08", "2.31e-09", "9.87e-08", "4.79e-09", "1.12e-07", "4.19e-13", "5.34e-08", "7.25e-08", "1.76e-08", "1.05e-07", "1.12e-07", "8.51e-07", "2.18e-07", "4.62e-06", "1.45e-06", "4.66e-07", "2.85e-06", "3.91e-06", "2.21e-05", "8.55e-06", "4.28e-05", "1.74e-05", "0.00010", "0.00053", "0.00028", "0.00010", "0.00032", "0.00035", "0.00097", "0.00014", "0.0016", "0.0019", "0.0030", "0.0032", "0.0034", "0.0034", "0.0083", "0.0048", "0.0022", "0.0123", "0.0150", "0.0184", "0.0218", "0.0064", "0.0241", "0.0246", "0.0150", "0.0158", "0.0318", "0.0455", "0.0490", "0.0490"]
}
df = pd.DataFrame(data)
# Clean and normalize data if necessary
df['Values'] = df['Values'].apply(lambda x: x.split(' of '))
df['P-Value'] = df['P-Value'].apply(lambda x: float(x))
# For demonstration, let's assume we have a target variable 'target'
# Since the actual target variable isn't specified, we'll use 'P-Value' as an example target.
df['target'] = df['P-Value']
# Split data into features (X) and target (y)
X = df[['Pathway', 'Description', 'Values', 'P-Value']]
y = df['target']
# Convert categorical variables into numerical variables
X['Pathway'] = pd.Categorical(X['Pathway']).codes
X['Description'] = pd.Categorical(X['Description']).codes
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# SVM Model
svm_model = svm.SVR()
svm_model.fit(X_train[['Pathway', 'Description', 'P-Value']], y_train)
y_pred_svm = svm_model.predict(X_test[['Pathway', 'Description', 'P-Value']])
# Calculate metrics for SVM
mse_svm = mean_squared_error(y_test, y_pred_svm)
rmse_svm = np.sqrt(mse_svm)
mae_svm = mean_absolute_error(y_test, y_pred_svm)
# Gradient Boosting Model
gb_model = GradientBoostingRegressor()
gb_model.fit(X_train[['Pathway', 'Description', 'P-Value']], y_train)
y_pred_gb = gb_model.predict(X_test[['Pathway', 'Description', 'P-Value']])
# Calculate metrics for Gradient Boosting
mse_gb = mean_squared_error(y_test, y_pred_gb)
rmse_gb = np.sqrt(mse_gb)
mae_gb = mean_absolute_error(y_test, y_pred_gb)
# Neural Networks Model
nn_model = MLPRegressor(hidden_layer_sizes=(50,50), max_iter=1000)
nn_model.fit(X_train[['Pathway', 'Description', 'P-Value']], y_train)
y_pred_nn = nn_model.predict(X_test[['Pathway', 'Description', 'P-Value']])
# Calculate metrics for Neural Networks
mse_nn = mean_squared_error(y_test, y_pred_nn)
rmse_nn = np.sqrt(mse_nn)
mae_nn = mean_absolute_error(y_test, y_pred_nn)
# Random Forest Model
rf_model = RandomForestRegressor()
rf_model.fit(X_train[['Pathway', 'Description', 'P-Value']], y_train)
y_pred_rf = rf_model.predict(X_test[['Pathway', 'Description', 'P-Value']])
# Calculate metrics for Random Forest
mse_rf = mean_squared_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mse_rf)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
# Print metrics
print(f"SVM RMSE: {rmse_svm}, MSE: {mse_svm}, MAE: {mae_svm}")
print(f"Gradient Boosting RMSE: {rmse_gb}, MSE: {mse_gb}, MAE: {mae_gb}")
print(f"Neural Networks RMSE: {rmse_nn}, MSE: {mse_nn}, MAE: {mae_nn}")
print(f"Random Forest RMSE: {rmse_rf}, MSE: {mse_rf}, MAE: {mae_rf}")
OUTPUT:
SVM RMSE: 0.02244412946469833, MSE: 0.0005037389474281397, MAE: 0.021334132565418294 Gradient Boosting RMSE: 0.0009618251524734596, MSE: 9.251076239305937e-07, MAE: 0.0003519680170682877 Neural Networks RMSE: 0.42080324501370675, MSE: 0.1770753710140657, MAE: 0.3331247859087207 Random Forest RMSE: 0.0008759705473688987, MSE: 7.673243998577681e-07, MAE: 0.00039417599079070856
TP53 KEGG Pathway Analysis
KEGG pathway analysis was performed to understand the potential of Curcumin and Methylene Blue to modulate the TP53 signalling pathway.
CODE:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# TP53 KEGG Pathway
tp53_pathway_components = ['TP53', 'MDM2', 'p21', 'BAX', 'PUMA']
tp53_pathway_interactions = [('TP53', 'MDM2'), ('TP53', 'p21'), ('TP53', 'BAX'), ('TP53', 'PUMA')]
plt.figure(figsize=(10, 8))
plt.title('TP53 KEGG Pathway', fontsize=16)
# Node positions - adjust as needed for better visualization
pos = {'TP53': (0, 0), 'MDM2': (2, 1), 'p21': (2, -1), 'BAX': (4, 0.5), 'PUMA': (4, -0.5)}
# Draw nodes
for component in tp53_pathway_components:
plt.plot(pos[component][0], pos[component][1], 'o', markersize=12, color='skyblue', alpha=0.7)
plt.text(pos[component][0], pos[component][1] - 0.2, component, ha='center', fontsize=10)
# Draw edges
for interaction in tp53_pathway_interactions:
start_node, end_node = interaction
start_pos = pos[start_node]
end_pos = pos[end_node]
plt.arrow(start_pos[0], start_pos[1], end_pos[0] - start_pos[0], end_pos[1] - start_pos[1],
head_width=0.1, head_length=0.2, fc='gray', ec='gray', alpha=0.5)
plt.axis('off')
plt.tight_layout()
plt.savefig('tp53_kegg_pathway.png')
plt.show()
# Disease-Gene Association
disease_gene_data = {
'Gene': ['TP53', 'BRCA1', 'PTEN', 'AKT1', 'PIK3CA'],
'Cancer Type': ['Multiple', 'Breast/Ovarian', 'Multiple', 'Multiple', 'Breast'],
'Association': ['Tumor Suppressor', 'Tumor Suppressor', 'Tumor Suppressor', 'Oncogene', 'Oncogene']
}
df = pd.DataFrame(disease_gene_data)
# Group by Cancer Type and count genes
cancer_gene_counts = df.groupby('Cancer Type')['Gene'].count().sort_values(ascending=False)
plt.figure(figsize=(10, 6))
cancer_gene_counts.plot(kind='bar', color='coral', alpha=0.7)
plt.title('Number of Genes Associated with Each Cancer Type', fontsize=14)
plt.xlabel('Cancer Type', fontsize=12)
plt.ylabel('Number of Genes', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('disease_gene_association.png')
plt.show()
KEGG PATHWAY © 2025 by DEVISE FOUNDATION is licensed under CC BY-NC-ND 4.0
No comments:
Post a Comment