CODE:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, make_scorer, f1_score
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import warnings
warnings.filterwarnings("ignore")
# Raw data string containing disease information
data = """
DOID:11054  Urinary bladder cancer  5 of 38 2.13  2.53  1.15e-06
DOID:5672 Large intestine cancer  5 of 44 2.07  2.49  1.15e-06
DOID:5093 Thoracic cancer 5 of 71 1.86  2.13  4.77e-06
DOID:9256 Colorectal cancer 4 of 32 2.11  2.1 1.36e-05
DOID:707  B-cell lymphoma 4 of 34 2.09  2.07  1.57e-05
DOID:9561 Nasopharyngeal disease  3 of 9  2.54  1.99  4.49e-05
DOID:0060119  Pharynx cancer  3 of 12 2.41  1.84  8.75e-05
DOID:0060058  Lymphoma  5 of 105  1.69  1.82  1.77e-05
DOID:3910 Lung adenocarcinoma 3 of 13 2.38  1.82  9.71e-05
DOID:4905 Pancreatic carcinoma  3 of 15 2.32  1.77  0.00012
DOID:2531 Hematologic cancer  6 of 190  1.52  1.71  1.11e-05
DOID:684  Hepatocellular carcinoma  3 of 17 2.26  1.7 0.00016
DOID:219  Colon cancer  3 of 17 2.26  1.7 0.00016
DOID:1795 Tumor of exocrine pancreas  3 of 17 2.26  1.7 0.00016
DOID:1612 Breast cancer 4 of 62 1.83  1.65  9.71e-05
DOID:3908 Lung non-small cell carcinoma 3 of 26 2.08  1.51  0.00037
DOID:11166  Obsolete papillomavirus infectious disease  2 of 2  3.02  1.5 0.00062
DOID:0050615  Respiratory system cancer 4 of 81 1.71  1.49  0.00019
DOID:170  Endocrine gland cancer  4 of 93 1.65  1.4 0.00028
DOID:0050686  Organ system cancer 10 of 757 1.14  1.37  1.15e-06
DOID:9261 Nasopharynx carcinoma 2 of 4  2.71  1.33  0.0014
DOID:1240 Leukemia  4 of 104  1.6 1.32  0.00041
DOID:8584 Burkitt lymphoma  2 of 5  2.62  1.27  0.0018
DOID:12704  Ataxia telangiectasia 2 of 5  2.62  1.27  0.0018
DOID:162  Cancer  11 of 978 1.07  1.26  1.15e-06
DOID:1909 Melanoma  3 of 46 1.83  1.22  0.0014
DOID:3498 Pancreatic ductal adenocarcinoma  2 of 6  2.54  1.22  0.0023
DOID:3012 Li-Fraumeni syndrome  2 of 6  2.54  1.22  0.0023
DOID:8557 Oropharynx cancer 2 of 7  2.47  1.18  0.0028
DOID:1037 Lymphoid leukemia 3 of 53 1.77  1.15  0.0019
DOID:2893 Cervix carcinoma  2 of 9  2.36  1.12  0.0036
DOID:3347 Osteosarcoma  2 of 10 2.32  1.09  0.0041
DOID:0060108  Brain glioma  2 of 10 2.32  1.09  0.0041
DOID:4159 Skin cancer 3 of 63 1.69  1.07  0.0028
DOID:5520 Head and neck squamous cell carcinoma 2 of 11 2.28  1.06  0.0047
DOID:8923 Skin melanoma 2 of 12 2.24  1.03  0.0054
DOID:0050621  Respiratory system benign neoplasm  2 of 12 2.24  1.03  0.0054
DOID:345  Uterine disease 3 of 72 1.64  1.01  0.0036
DOID:0050687  Cell type cancer  6 of 451  1.14  1.0 0.00041
DOID:305  Carcinoma 5 of 307  1.23  0.99  0.0010
DOID:786  Laryngeal disease 2 of 14 2.17  0.99  0.0068
DOID:3068 Glioblastoma multiforme 2 of 14 2.17  0.99  0.0068
DOID:768  Retinoblastoma  2 of 16 2.11  0.95  0.0080
DOID:4001 Ovarian carcinoma 2 of 16 2.11  0.95  0.0080
DOID:229  Female reproductive system disease  4 of 192  1.33  0.94  0.0028
DOID:0050745  Diffuse large B-cell lymphoma 2 of 17 2.09  0.93  0.0086
DOID:8618 Oral cavity cancer  2 of 18 2.06  0.92  0.0094
DOID:120  Female reproductive organ cancer  3 of 100  1.49  0.86  0.0077
DOID:77 Gastrointestinal system disease 6 of 576  1.03  0.82  0.0014
DOID:9952 Acute lymphoblastic leukemia  2 of 26 1.9 0.8 0.0165
DOID:2513 Basal cell carcinoma  2 of 27 1.89  0.78  0.0176
DOID:3459 Breast carcinoma  2 of 29 1.85  0.76  0.0195
DOID:403  Mouth disease 3 of 130  1.38  0.74  0.0137
DOID:10534  Stomach cancer  2 of 31 1.83  0.74  0.0216
DOID:289  Endometriosis 2 of 34 1.79  0.71  0.0252
DOID:0070004  Myeloid neoplasm  2 of 38 1.74  0.68  0.0294
DOID:6713 Cerebrovascular disease 2 of 46 1.65  0.61  0.0400
DOID:28 Endocrine system disease  4 of 398  1.02  0.55  0.0243
DOID:225  Syndrome  6 of 1214 0.71  0.41  0.0315
DOID:7  Disease of anatomical entity  12 of 4798  0.41  0.31  0.0185
"""
# Function to parse the raw data into a structured format
def parse_data(data):
    """
    Parse the raw data string into a list of lists containing numerical values and disease names.
    
    Args:
        data (str): Raw data string with lines containing DOID, disease name, successes of total, and values.
    
    Returns:
        list: List of lists, each containing [successes, total, value1, value2, p_value, disease].
    """
    lines = data.strip().split('\n')
    data_list = []
    for line in lines:
        parts = line.split()  # Split on any whitespace
        
        # Find the last occurrence of 'of' by searching from the end
        i = parts[::-1].index('of')  # Find 'of' from the reversed list
        i = len(parts) - i - 1  # Convert to original index
        
        disease = ' '.join(parts[1:i-1])  # Disease name from after DOID to before successes
        successes = int(parts[i-1])       # Number before 'of'
        total = int(parts[i+1])           # Number after 'of'
        value1 = float(parts[i+2])        # First number after total
        value2 = float(parts[i+3])        # Second number
        p_value = float(parts[i+4])       # Third number (p-value)
        data_list.append([successes, total, value1, value2, p_value, disease])
    return data_list
# Parse data into a DataFrame
data_list = parse_data(data)
df = pd.DataFrame(data_list, columns=['Successes', 'Total', 'Value1', 'Value2', 'P_Value', 'Disease'])
# Feature Engineering
df['Accuracy'] = df['Successes'] / df['Total']  # Success rate
df['LogTotal'] = np.log1p(df['Total'])          # Log of total cases
df['LogPValue'] = -np.log10(df['P_Value'])     # Negative log10 of p-value
# Define cancer-related terms for labeling
cancer_terms = ["cancer", "carcinoma", "melanoma", "lymphoma", "leukemia", 
                "sarcoma", "glioma", "adenocarcinoma", "retinoblastoma", "glioblastoma"]
# Create target variable (1 for cancer, 0 for non-cancer)
y = np.array([1 if any(term in disease.lower() for term in cancer_terms) else 0 for disease in df['Disease']])
# Define feature set
X = df[['Successes', 'Total', 'Value1', 'Value2', 'P_Value', 'Accuracy', 'LogTotal', 'LogPValue']]
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# Custom F1 scorer to handle zero division
def safe_f1(y_true, y_pred):
    return f1_score(y_true, y_pred, average='binary', zero_division=0)
custom_f1 = make_scorer(safe_f1)
# --- Model Pipelines and Tuning ---
# SVM Pipeline
def create_svm_pipeline():
    return Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler()),
        ('svm', SVC(random_state=42, class_weight='balanced'))
    ])
def tune_svm(X_train, y_train):
    param_grid = {
        'svm__C': [0.1, 1, 10],
        'svm__kernel': ['rbf', 'linear'],
        'svm__gamma': ['scale', 'auto', 0.1, 1]
    }
    grid_search = GridSearchCV(create_svm_pipeline(), param_grid, scoring=custom_f1, 
                               cv=KFold(n_splits=10, shuffle=True, random_state=42), n_jobs=-1)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_
# Random Forest Pipeline
def create_rf_pipeline():
    return Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler()),
        ('rf', RandomForestClassifier(random_state=42, class_weight='balanced'))
    ])
def tune_rf(X_train, y_train):
    param_grid = {
        'rf__n_estimators': [50, 100, 200],
        'rf__max_depth': [3, 5, 7],
        'rf__min_samples_leaf': [1, 5, 10]
    }
    grid_search = GridSearchCV(create_rf_pipeline(), param_grid, scoring=custom_f1, 
                               cv=KFold(n_splits=10, shuffle=True, random_state=42), n_jobs=-1)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_
# Gradient Boosting Pipeline
def create_gb_pipeline():
    return Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler()),
        ('gb', GradientBoostingClassifier(random_state=42))
    ])
def tune_gb(X_train, y_train):
    param_grid = {
        'gb__n_estimators': [50, 100, 200],
        'gb__learning_rate': [0.01, 0.1, 0.2],
        'gb__max_depth': [3, 5, 7]
    }
    grid_search = GridSearchCV(create_gb_pipeline(), param_grid, scoring=custom_f1, 
                               cv=KFold(n_splits=10, shuffle=True, random_state=42), n_jobs=-1)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_
# --- Training and Evaluation Function ---
def train_evaluate_model(model_name, create_pipeline_func, tune_func, X_train, y_train, X_test, y_test):
    """
    Train and evaluate a model using the specified pipeline and tuning function.
    
    Args:
        model_name (str): Name of the model (e.g., "SVM").
        create_pipeline_func (callable): Function to create the model pipeline.
        tune_func (callable): Function to tune the model with GridSearchCV.
        X_train, y_train: Training data and labels.
        X_test, y_test: Testing data and labels.
    
    Returns:
        best_model: The trained and tuned model.
    """
    print(f"\nTraining and Evaluating {model_name}...")
    best_model = tune_func(X_train, y_train)
    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = safe_f1(y_test, y_pred)
    print(f"{model_name} Accuracy on Test Set: {accuracy:.4f}")
    print(f"{model_name} F1 Score on Test Set: {f1:.4f}")
    print(f"{model_name} Classification Report:\n{classification_report(y_test, y_pred)}")
    return best_model
# --- Train and Evaluate Models ---
svm_model = train_evaluate_model("SVM", create_svm_pipeline, tune_svm, X_train, y_train, X_test, y_test)
rf_model = train_evaluate_model("Random Forest", create_rf_pipeline, tune_rf, X_train, y_train, X_test, y_test)
gb_model = train_evaluate_model("Gradient Boosting", create_gb_pipeline, tune_gb, X_train, y_train, X_test, y_test)
OUTPUT:
Training and Evaluating SVM...
SVM Accuracy on Test Set: 0.7500
SVM F1 Score on Test Set: 0.8421
SVM Classification Report:
              precision    recall  f1-score   support
           0       0.50      0.33      0.40         3
           1       0.80      0.89      0.84         9
    accuracy                           0.75        12
   macro avg       0.65      0.61      0.62        12
weighted avg       0.72      0.75      0.73        12
Training and Evaluating Random Forest...
Random Forest Accuracy on Test Set: 0.7500
Random Forest F1 Score on Test Set: 0.8421
Random Forest Classification Report:
              precision    recall  f1-score   support
           0       0.50      0.33      0.40         3
           1       0.80      0.89      0.84         9
    accuracy                           0.75        12
   macro avg       0.65      0.61      0.62        12
weighted avg       0.72      0.75      0.73        12
Training and Evaluating Gradient Boosting...
Gradient Boosting Accuracy on Test Set: 0.7500
Gradient Boosting F1 Score on Test Set: 0.8421
Gradient Boosting Classification Report:
              precision    recall  f1-score   support
           0       0.50      0.33      0.40         3
           1       0.80      0.89      0.84         9
    accuracy                           0.75        12
   macro avg       0.65      0.61      0.62        12
weighted avg       0.72      0.75      0.73        12
EXTENDED CODE:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, make_scorer, f1_score
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import warnings
import plotly.express as px  # For dynamic plotting
warnings.filterwarnings("ignore")
# Raw data string (disease dataset)
data = """
DOID:11054  Urinary bladder cancer  5 of 38 2.13  2.53  1.15e-06
DOID:5672 Large intestine cancer  5 of 44 2.07  2.49  1.15e-06
DOID:5093 Thoracic cancer 5 of 71 1.86  2.13  4.77e-06
DOID:9256 Colorectal cancer 4 of 32 2.11  2.1 1.36e-05
DOID:707  B-cell lymphoma 4 of 34 2.09  2.07  1.57e-05
DOID:9561 Nasopharyngeal disease  3 of 9  2.54  1.99  4.49e-05
DOID:0060119  Pharynx cancer  3 of 12 2.41  1.84  8.75e-05
DOID:0060058  Lymphoma  5 of 105  1.69  1.82  1.77e-05
DOID:3910 Lung adenocarcinoma 3 of 13 2.38  1.82  9.71e-05
DOID:4905 Pancreatic carcinoma  3 of 15 2.32  1.77  0.00012
DOID:2531 Hematologic cancer  6 of 190  1.52  1.71  1.11e-05
DOID:684  Hepatocellular carcinoma  3 of 17 2.26  1.7 0.00016
DOID:219  Colon cancer  3 of 17 2.26  1.7 0.00016
DOID:1795 Tumor of exocrine pancreas  3 of 17 2.26  1.7 0.00016
DOID:1612 Breast cancer 4 of 62 1.83  1.65  9.71e-05
DOID:3908 Lung non-small cell carcinoma 3 of 26 2.08  1.51  0.00037
DOID:11166  Obsolete papillomavirus infectious disease  2 of 2  3.02  1.5 0.00062
DOID:0050615  Respiratory system cancer 4 of 81 1.71  1.49  0.00019
DOID:170  Endocrine gland cancer  4 of 93 1.65  1.4 0.00028
DOID:0050686  Organ system cancer 10 of 757 1.14  1.37  1.15e-06
DOID:9261 Nasopharynx carcinoma 2 of 4  2.71  1.33  0.0014
DOID:1240 Leukemia  4 of 104  1.6 1.32  0.00041
DOID:8584 Burkitt lymphoma  2 of 5  2.62  1.27  0.0018
DOID:12704  Ataxia telangiectasia 2 of 5  2.62  1.27  0.0018
DOID:162  Cancer  11 of 978 1.07  1.26  1.15e-06
DOID:1909 Melanoma  3 of 46 1.83  1.22  0.0014
DOID:3498 Pancreatic ductal adenocarcinoma  2 of 6  2.54  1.22  0.0023
DOID:3012 Li-Fraumeni syndrome  2 of 6  2.54  1.22  0.0023
DOID:8557 Oropharynx cancer 2 of 7  2.47  1.18  0.0028
DOID:1037 Lymphoid leukemia 3 of 53 1.77  1.15  0.0019
DOID:2893 Cervix carcinoma  2 of 9  2.36  1.12  0.0036
DOID:3347 Osteosarcoma  2 of 10 2.32  1.09  0.0041
DOID:0060108  Brain glioma  2 of 10 2.32  1.09  0.0041
DOID:4159 Skin cancer 3 of 63 1.69  1.07  0.0028
DOID:5520 Head and neck squamous cell carcinoma 2 of 11 2.28  1.06  0.0047
DOID:8923 Skin melanoma 2 of 12 2.24  1.03  0.0054
DOID:0050621  Respiratory system benign neoplasm  2 of 12 2.24  1.03  0.0054
DOID:345  Uterine disease 3 of 72 1.64  1.01  0.0036
DOID:0050687  Cell type cancer  6 of 451  1.14  1.0 0.00041
DOID:305  Carcinoma 5 of 307  1.23  0.99  0.0010
DOID:786  Laryngeal disease 2 of 14 2.17  0.99  0.0068
DOID:3068 Glioblastoma multiforme 2 of 14 2.17  0.99  0.0068
DOID:768  Retinoblastoma  2 of 16 2.11  0.95  0.0080
DOID:4001 Ovarian carcinoma 2 of 16 2.11  0.95  0.0080
DOID:229  Female reproductive system disease  4 of 192  1.33  0.94  0.0028
DOID:0050745  Diffuse large B-cell lymphoma 2 of 17 2.09  0.93  0.0086
DOID:8618 Oral cavity cancer  2 of 18 2.06  0.92  0.0094
DOID:120  Female reproductive organ cancer  3 of 100  1.49  0.86  0.0077
DOID:77 Gastrointestinal system disease 6 of 576  1.03  0.82  0.0014
DOID:9952 Acute lymphoblastic leukemia  2 of 26 1.9 0.8 0.0165
DOID:2513 Basal cell carcinoma  2 of 27 1.89  0.78  0.0176
DOID:3459 Breast carcinoma  2 of 29 1.85  0.76  0.0195
DOID:403  Mouth disease 3 of 130  1.38  0.74  0.0137
DOID:10534  Stomach cancer  2 of 31 1.83  0.74  0.0216
DOID:289  Endometriosis 2 of 34 1.79  0.71  0.0252
DOID:0070004  Myeloid neoplasm  2 of 38 1.74  0.68  0.0294
DOID:6713 Cerebrovascular disease 2 of 46 1.65  0.61  0.0400
DOID:28 Endocrine system disease  4 of 398  1.02  0.55  0.0243
DOID:225  Syndrome  6 of 1214 0.71  0.41  0.0315
DOID:7  Disease of anatomical entity  12 of 4798  0.41  0.31  0.0185
"""
# Parse data into a structured format
def parse_data(data):
    lines = data.strip().split('\n')
    data_list = []
    for line in lines:
        parts = line.split()
        i = len(parts) - parts[::-1].index('of') - 1  # Find last 'of'
        disease = ' '.join(parts[1:i-1])
        successes = int(parts[i-1])
        total = int(parts[i+1])
        value1 = float(parts[i+2])
        value2 = float(parts[i+3])
        p_value = float(parts[i+4])
        data_list.append([successes, total, value1, value2, p_value, disease])
    return data_list
# Create DataFrame
data_list = parse_data(data)
df = pd.DataFrame(data_list, columns=['Successes', 'Total', 'Value1', 'Value2', 'P_Value', 'Disease'])
# Feature Engineering
df['Accuracy'] = df['Successes'] / df['Total']
df['LogTotal'] = np.log1p(df['Total'])
df['LogPValue'] = -np.log10(df['P_Value'])
# Define target variable (cancer vs. non-cancer)
cancer_terms = ["cancer", "carcinoma", "melanoma", "lymphoma", "leukemia", 
                "sarcoma", "glioma", "adenocarcinoma", "retinoblastoma", "glioblastoma"]
y = np.array([1 if any(term in disease.lower() for term in cancer_terms) else 0 for disease in df['Disease']])
# Features
X = df[['Successes', 'Total', 'Value1', 'Value2', 'P_Value', 'Accuracy', 'LogTotal', 'LogPValue']]
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
# Custom F1 scorer
def safe_f1(y_true, y_pred):
    return f1_score(y_true, y_pred, average='binary', zero_division=0)
custom_f1 = make_scorer(safe_f1)
# Model Pipelines
def create_svm_pipeline():
    return Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler()),
        ('svm', SVC(random_state=42, class_weight='balanced'))
    ])
def tune_svm(X_train, y_train):
    param_grid = {'svm__C': [0.1, 1, 10], 'svm__kernel': ['rbf', 'linear'], 'svm__gamma': ['scale', 'auto', 0.1, 1]}
    grid_search = GridSearchCV(create_svm_pipeline(), param_grid, scoring=custom_f1, 
                               cv=KFold(n_splits=10, shuffle=True, random_state=42), n_jobs=-1)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_
def create_rf_pipeline():
    return Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler()),
        ('rf', RandomForestClassifier(random_state=42, class_weight='balanced'))
    ])
def tune_rf(X_train, y_train):
    param_grid = {'rf__n_estimators': [50, 100, 200], 'rf__max_depth': [3, 5, 7], 'rf__min_samples_leaf': [1, 5, 10]}
    grid_search = GridSearchCV(create_rf_pipeline(), param_grid, scoring=custom_f1, 
                               cv=KFold(n_splits=10, shuffle=True, random_state=42), n_jobs=-1)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_
def create_gb_pipeline():
    return Pipeline([
        ('imputer', SimpleImputer(strategy='median')),
        ('scaler', StandardScaler()),
        ('gb', GradientBoostingClassifier(random_state=42))
    ])
def tune_gb(X_train, y_train):
    param_grid = {'gb__n_estimators': [50, 100, 200], 'gb__learning_rate': [0.01, 0.1, 0.2], 'gb__max_depth': [3, 5, 7]}
    grid_search = GridSearchCV(create_gb_pipeline(), param_grid, scoring=custom_f1, 
                               cv=KFold(n_splits=10, shuffle=True, random_state=42), n_jobs=-1)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_
# Train and Evaluate Models
def train_evaluate_model(model_name, create_func, tune_func, X_train, y_train, X_test, y_test):
    print(f"\nTraining and Evaluating {model_name}...")
    best_model = tune_func(X_train, y_train)
    y_pred = best_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = safe_f1(y_test, y_pred)
    print(f"{model_name} Accuracy: {accuracy:.4f}")
    print(f"{model_name} F1 Score: {f1:.4f}")
    print(f"{model_name} Classification Report:\n{classification_report(y_test, y_pred)}")
    return best_model, {'Model': model_name, 'Accuracy': accuracy, 'F1 Score': f1}
# Train models and collect metrics
models = ['SVM', 'Random Forest', 'Gradient Boosting']
create_funcs = [create_svm_pipeline, create_rf_pipeline, create_gb_pipeline]
tune_funcs = [tune_svm, tune_rf, tune_gb]
metrics_list = []
for model_name, create_func, tune_func in zip(models, create_funcs, tune_funcs):
    _, metrics = train_evaluate_model(model_name, create_func, tune_func, X_train, y_train, X_test, y_test)
    metrics_list.append(metrics)
# Create DataFrame for plotting
metrics_df = pd.DataFrame(metrics_list)
# Generate Dynamic Plot
fig = px.bar(metrics_df, x='Model', y=['Accuracy', 'F1 Score'], barmode='group',
             title='Model Performance Comparison',
             labels={'value': 'Score', 'variable': 'Metric'},
             height=500)
fig.update_layout(showlegend=True)
fig.show()
OUTPUT:
Training and Evaluating Random Forest...
Random Forest Accuracy: 0.7500
Random Forest F1 Score: 0.8421
Random Forest Classification Report:
              precision    recall  f1-score   support
           0       0.50      0.33      0.40         3
           1       0.80      0.89      0.84         9
    accuracy                           0.75        12
   macro avg       0.65      0.61      0.62        12
weighted avg       0.72      0.75      0.73        12
Training and Evaluating Gradient Boosting...
Gradient Boosting Accuracy: 0.7500
Gradient Boosting F1 Score: 0.8421
Gradient Boosting Classification Report:
              precision    recall  f1-score   support
           0       0.50      0.33      0.40         3
           1       0.80      0.89      0.84         9
    accuracy                           0.75        12
   macro avg       0.65      0.61      0.62        12
weighted avg       0.72      0.75      0.73        12

No comments:
Post a Comment