Powered By Blogger

Thursday, February 20, 2025

TP53 PATHWAY CODE

 import pandas as pd

import re


def extract_tp53_features(content):

    """Extracts features from TP53 KEGG record content (string)."""

    features = {}


    # --- Example: Extract pathway information ---

    if "Pathway" in content:

        pathways = re.findall(r"hsa\d+", content.split("Pathway")[1].split("Network")[0])

        features['TP53_in_Cell_Cycle'] = 1 if "hsa04110" in pathways else 0

        # Add more pathway features here...


    # --- Example: Extract disease information ---

    if "Disease" in content:

        diseases = [d.strip() for d in content.split("Disease")[1].split("Drug target")[0].split("\n") if "H" in d]

        features['TP53_in_Cancer'] = 1 if any("cancer" in d.lower() for d in diseases) else 0

        # Add more disease features here...


    # --- Extract drug target information ---

    if "Drug target" in content:

        drugs = [d.strip() for d in content.split("Drug target")[1].split("Brite")[0].split("\n") if "D" in d]

        features['TP53_is_Drug_Target'] = 1 if drugs else 0

        features['TP53_Num_Drug_Target'] = len(drugs)


    return features


# Sample data (paste the content of your file directly here)

data = """

Homo sapiens (human): 7157 Help

Entry

7157              CDS       T01001                                 

Symbol

TP53, BCC7, BMFS5, LFS1, P53, TRP53

Name

(RefSeq) tumor protein p53

  KO

K04451  tumor protein p53

Organism

hsa  Homo sapiens (human)

Pathway

hsa01522  Endocrine resistance

hsa01524  Platinum drug resistance

hsa04010  MAPK signaling pathway

hsa04071  Sphingolipid signaling pathway

hsa04110  Cell cycle

hsa04115  p53 signaling pathway

hsa04137  Mitophagy - animal

hsa04151  PI3K-Akt signaling pathway

hsa04210  Apoptosis

hsa04211  Longevity regulating pathway

hsa04216  Ferroptosis

hsa04218  Cellular senescence

hsa04310  Wnt signaling pathway

hsa04722  Neurotrophin signaling pathway

hsa04919  Thyroid hormone signaling pathway

hsa05012  Parkinson disease

hsa05014  Amyotrophic lateral sclerosis

hsa05016  Huntington disease

hsa05131  Shigellosis

hsa05160  Hepatitis C

hsa05161  Hepatitis B

hsa05162  Measles

hsa05163  Human cytomegalovirus infection

hsa05165  Human papillomavirus infection

hsa05166  Human T-cell leukemia virus 1 infection

hsa05167  Kaposi sarcoma-associated herpesvirus infection

hsa05168  Herpes simplex virus 1 infection

hsa05169  Epstein-Barr virus infection

hsa05200  Pathways in cancer

hsa05202  Transcriptional misregulation in cancer

hsa05203  Viral carcinogenesis

hsa05205  Proteoglycans in cancer

hsa05206  MicroRNAs in cancer

hsa05210  Colorectal cancer

hsa05212  Pancreatic cancer

hsa05213  Endometrial cancer

hsa05214  Glioma

hsa05215  Prostate cancer

hsa05216  Thyroid cancer

hsa05217  Basal cell carcinoma

hsa05218  Melanoma

hsa05219  Bladder cancer

hsa05220  Chronic myeloid leukemia

hsa05222  Small cell lung cancer

hsa05223  Non-small cell lung cancer

hsa05224  Breast cancer

hsa05225  Hepatocellular carcinoma

hsa05226  Gastric cancer

hsa05230  Central carbon metabolism in cancer

hsa05417  Lipid and atherosclerosis

hsa05418  Fluid shear stress and atherosclerosis

Network

nt06160  Human T-cell leukemia virus 1 (HTLV-1)

nt06162  Hepatitis B virus (HBV)

nt06163  Hepatitis C virus (HCV)

nt06164  Kaposi sarcoma-associated herpesvirus (KSHV)

nt06165  Epstein-Barr virus (EBV)

nt06166  Human papillomavirus (HPV)

nt06167  Human cytomegalovirus (HCMV)

nt06168  Herpes simplex virus 1 (HSV-1)

nt06169  Measles virus (MV)

nt06170  Influenza A virus (IAV)

nt06230  Cell cycle (cancer)

nt06240  Transcription (cancer)

nt06260  Colorectal cancer

nt06261  Gastric cancer

nt06262  Pancreatic cancer

nt06263  Hepatocellular carcinoma

nt06265  Bladder cancer

nt06266  Non-small cell lung cancer

nt06267  Small cell lung cancer

nt06268  Melanoma

nt06269  Basal cell carcinoma

nt06270  Breast cancer

nt06271  Endometrial cancer

nt06273  Glioma

nt06274  Thyroid cancer

nt06276  Chronic myeloid leukemia

nt06461  Huntington disease

nt06463  Parkinson disease

  Element

N00066  MDM2-p21-Cell cycle G1/S

N00067  Deleted p14(ARF) to p21-cell cycle G1/S

N00068  Amplified MDM2 to p21-cell cycle G1/S

N00076  Mutation-inactivated p14(ARF) to p21-cell cycle G1/S

N00115  Mutation-inactivated TP53 to transcription

N00131  Amplified MYCN to transcriptional activation

N00167  KSHV vIRF1/3 to p21-cell cycle G1/S

N00169  KSHV LANA to p21-cell cycle G1/S

N00223  EBV EBNA1 to p53-mediated transcription

N00263  EBV EBNA3C to p53-mediated transcription

N00347  p300-p21-Cell cycle G1/S

N00358  HPV E6 to p21-cell cycle G1/S

N00420  HCMV IE2-86 to p21-cell cycle G1/S

N00481  EBV BZLF1 to p53-mediated transcription

N00497  HTLV-1 Tax to p21-cell cycle G1/S

N00499  ATR-p21-Cell cycle G2/M

N00520  HCV NS5A to p21-cell cycle G1/S

N00521  HCV Core to p21-cell cycle G1/S

N00522  HCV NS3 to p21-cell cycle G1/S

N00535  HBV HBx to p53-mediated transcription

N00536  MDM2-p21-Cell cycle G1/S

N00592  HSV ICP0 to p53-mediated transcription

N00697  HV P to p53-mediated transcription

N00982  Mutation-caused aberrant Htt to p53-mediated transcription

N01058  Mutation-inactivated DJ1 to to p53-mediated transcription

Disease

H00004  Chronic myeloid leukemia

H00005  Chronic lymphocytic leukemia

H00006  Hairy cell leukemia

H00008  Burkitt lymphoma

H00009  Adult T-cell leukemia

H00010  Multiple myeloma

H00013  Small cell lung cancer

H00014  Non-small cell lung cancer

H00015  Malignant pleural mesothelioma

H00016  Oral cancer

H00017  Esophageal cancer

H00018  Gastric cancer

H00019  Pancreatic cancer

H00020  Colorectal cancer

H00022  Bladder cancer

H00025  Penile cancer

H00026  Endometrial cancer

H00027  Ovarian cancer

H00028  Choriocarcinoma

H00029  Vulvar cancer

H00031  Breast cancer

H00032  Thyroid cancer

H00033  Adrenal carcinoma

H00036  Osteosarcoma

H00038  Melanoma

H00039  Basal cell carcinoma

H00040  Squamous cell carcinoma

H00041  Kaposi sarcoma

H00042  Glioma

H00044  Cancer of the anal canal

H00046  Cholangiocarcinoma

H00047  Gallbladder cancer

H00048  Hepatocellular carcinoma

H00055  Laryngeal cancer

H00881  Li-Fraumeni syndrome

H01007  Choroid plexus papilloma

H01463  Mycosis fungoides

H01464  Mantle cell lymphoma

H01470  Giant cell tumor of bone

H01554  Fallopian tube cancer

H01555  Merkel cell carcinoma

H01557  Hepatic angiosarcoma

H01559  Oropharyngeal cancer

H01667  Medulloblastoma

H02301  Nephroblastoma

H02411  Chronic myelomonocytic leukemia

H02434  Diffuse large B-cell lymphoma, not otherwise specified

H02529  Bone marrow failure syndrome

Drug target

Cenersen sodium: D08887

Rezatapopt: D12982

Brite

KEGG Orthology (KO) [BR:hsa00001]

 09130 Environmental Information Processing

  09132 Signal transduction

   04310 Wnt signaling pathway

    7157 (TP53)

   04071 Sphingolipid signaling pathway

    7157 (TP53)

   04151 PI3K-Akt signaling pathway

    7157 (TP53)

 09140 Cellular Processes

  09141 Transport and catabolism

   04137 Mitophagy - animal

    7157 (TP53)

 09150 Organismal Systems

  09152 Endocrine system... 04919 Thyroid hormone signaling pathway

    7157 (TP53)

  09156 Nervous system

   04722 Neurotrophin signaling pathway

    7157 (TP53)

  09149 Aging

   04211 Longevity regulating pathway

    7157 (TP53)

 09160 Human Diseases

  09161 Cancer: overview

   05200 Pathways in cancer

    7157 (TP53)

   05202 Transcriptional misregulation in cancer

    7157 (TP53)

   05206 MicroRNAs in cancer

    7157 (TP53)

   05205 Proteoglycans in cancer

    7157 (TP53)

   05203 Viral carcinogenesis

    7157 (TP53)

   05230 Central carbon metabolism in cancer

    7157 (TP53)

  09162 Cancer: specific types

   05210 Colorectal cancer

    7157 (TP53)

   05212 Pancreatic cancer

    7157 (TP53)

   05225 Hepatocellular carcinoma

    7157 (TP53)

   05226 Gastric cancer

    7157 (TP53)

   05214 Glioma

    7157 (TP53)

   05216 Thyroid cancer

    7157 (TP53)

   05220 Chronic myeloid leukemia

    7157 (TP53)

   05217 Basal cell carcinoma

    7157 (TP53)

   05218 Melanoma

    7157 (TP53)

   05219 Bladder cancer

    7157 (TP53)

   05215 Prostate cancer

    7157 (TP53)

   05213 Endometrial cancer

    7157 (TP53)

   05224 Breast cancer

    7157 (TP53)

   05222 Small cell lung cancer

    7157 (TP53)

   05223 Non-small cell lung cancer

    7157 (TP53)

  09172 Infectious disease: viral

   05166 Human T-cell leukemia virus 1 infection

    7157 (TP53)

   05161 Hepatitis B

    7157 (TP53)

   05160 Hepatitis C

    7157 (TP53)

   05162 Measles

    7157 (TP53)

   05163 Human cytomegalovirus infection

    7157 (TP53)

   05167 Kaposi sarcoma-associated herpesvirus infection

    7157 (TP53)

   05169 Epstein-Barr virus infection

    7157 (TP53)

   05165 Human papillomavirus infection

    7157 (TP53)

  09171 Infectious disease: bacterial

   05131 Shigellosis

    7157 (TP53)

  09164 Neurodegenerative disease

   05012 Parkinson disease

    7157 (TP53)

   05014 Amyotrophic lateral sclerosis

    7157 (TP53)

   05016 Huntington disease

    7157 (TP53)

  09166 Cardiovascular disease

   05417 Lipid and atherosclerosis

    7157 (TP53)

   05418 Fluid shear stress and atherosclerosis

    7157 (TP53)

  09176 Drug resistance: antineoplastic

   01524 Platinum drug resistance

    7157 (TP53)

   01522 Endocrine resistance

    7157 (TP53)

 09180 Brite Hierarchies

  09182 Protein families: genetic information processing

   03000 Transcription factors [BR:hsa03000]

    7157 (TP53)

   03036 Chromosome and associated proteins [BR:hsa03036]

    7157 (TP53)

   03400 DNA repair and recombination proteins [BR:hsa03400]

    7157 (TP53)

Transcription factors [BR:hsa03000]

 Eukaryotic type

  beta-Scaffold factors with minor groove contacts

   p53

    7157 (TP53)

Chromosome and associated proteins [BR:hsa03036]

 Eukaryotic type

  Sister chromatid separation proteins

   Aurora kinases

    Regulators of Aurora kinases

     7157 (TP53)

DNA repair and recombination proteins [BR:hsa03400]

 Eukaryotic type

  Check point factors

   Other check point factors

    7157 (TP53)

BRITE hierarchy

SSDB OrthologParalogGene clusterGFIT

Motif

Pfam: P53 TAD2 P53_tetramer P53_TAD

Motif

Other DBs

NCBI-GeneID: 7157

NCBI-ProteinID: NP_000537

OMIM: 191170

HGNC: 11998

Ensembl: ENSG00000141510

UniProt: P04637 K7PPA8 Q53GA5

Structure PDBPDBj

Position

17:complement(7668421..7687490)

Genome browser

AA seq 393 aa AA seqDB search

MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGP

DEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAK

SVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHE

RCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNS

SCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELP

PGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPG

GSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD

NT seq 1182 nt NT seq

atggaggagccgcagtcagatcctagcgtcgagccccctctgagtcaggaaacattttca

gacctatggaaactacttcctgaaaacaacgttctgtcccccttgccgtcccaagcaatg

gatgatttgatgctgtccccggacgatattgaacaatggttcactgaagacccaggtcca

gatgaagctcccagaatgccagaggctgctccccccgtggcccctgcaccagcagctcct

acaccggcggcccctgcaccagccccctcctggcccctgtcatcttctgtcccttcccag

aaaacctaccagggcagctacggtttccgtctgggcttcttgcattctgggacagccaag

tctgtgacttgcacgtactcccctgccctcaacaagatgttttgccaactggccaagacc

tgccctgtgcagctgtgggttgattccacacccccgcccggcacccgcgtccgcgccatg

gccatctacaagcagtcacagcacatgacggaggttgtgaggcgctgcccccaccatgag

cgctgctcagatagcgatggtctggcccctcctcagcatcttatccgagtggaaggaaat

ttgcgtgtggagtatttggatgacagaaacacttttcgacatagtgtggtggtgccctat

gagccgcctgaggttggctctgactgtaccaccatccactacaactacatgtgtaacagt

tcctgcatgggcggcatgaaccggaggcccatcctcaccatcatcacactggaagactcc

agtggtaatctactgggacggaacagctttgaggtgcgtgtttgtgcctgtcctgggaga

gaccggcgcacagaggaagagaatctccgcaagaaaggggagcctcaccacgagctgccc

ccagggagcactaagcgagcactgcccaacaacaccagctcctctccccagccaaagaag

aaaccactggatggagaatatttcacccttcagatccgtgggcgtgagcgcttcgagatg

ttccgagagctgaatgaggccttggaactcaaggatgcccaggctgggaaggagccaggg

gggagcagggctcactccagccacctgaagtccaaaaagggtcagtctacctcccgccat

aaaaaactcatgttcaagacagaagggcctgactcagactga

"""


# Extract features from the data

tp53_features = extract_tp53_features(data)


# Create a Pandas Series to easily add it to an existing DataFrame:

tp53_series = pd.Series(tp53_features)


# Sample combined dataset (replace with your actual data)

combined_data = {'Gene': ['TP53'],

                 'Other_Feature_1': [0.5],

                 'Other_Feature_2': [1.2]}

combined_df = pd.DataFrame(combined_data)

combined_df = combined_df.set_index('Gene') # Set 'Gene' as index



# Add extracted features to the combined dataset using .join, the indexes must match

combined_df = combined_df.join(tp53_series.to_frame().T)


print(combined_df)


#Now the combined_df is ready for further processing in machine learning


TP53 PATHWAY CODE © 2025 by Mrinmoy Chakraborty is licensed under CC BY-NC-ND 4.0 


No comments:

Post a Comment

From Sea to Sapiens: The Epic Journey of Life’s Evolution

  Around 4 billion years ago, Earth’s oceans churned with the raw ingredients of life. In this primordial soup, simple organic molecules for...