|
| 1 | +import numpy as np |
| 2 | +import pandas as pd |
| 3 | +import os |
| 4 | +from sklearn.datasets import load_breast_cancer |
| 5 | +from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold |
| 6 | +from sklearn.linear_model import LogisticRegression |
| 7 | +from sklearn.neighbors import KNeighborsClassifier |
| 8 | +from sklearn.tree import DecisionTreeClassifier |
| 9 | +from sklearn.svm import SVC |
| 10 | +from sklearn.ensemble import StackingClassifier |
| 11 | +import warnings |
| 12 | +warnings.filterwarnings("ignore") |
| 13 | + |
| 14 | +# Load data |
| 15 | +X, y = load_breast_cancer(return_X_y=True) |
| 16 | + |
| 17 | +# Cross-validation strategy |
| 18 | +cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=42) |
| 19 | + |
| 20 | +# Define base models |
| 21 | +base_models = { |
| 22 | + "Logistic Regression": LogisticRegression(max_iter=1000), |
| 23 | + "KNN": KNeighborsClassifier(n_neighbors=5), |
| 24 | + "Decision Tree": DecisionTreeClassifier(max_depth=10, random_state=42), |
| 25 | + "SVM": SVC(probability=True, random_state=42) |
| 26 | +} |
| 27 | + |
| 28 | +# Helper function to evaluate models |
| 29 | +def evaluate_model(model, X, y): |
| 30 | + scores = cross_val_score(model, X, y, cv=cv, scoring="accuracy") |
| 31 | + return scores.mean(), scores.std() |
| 32 | + |
| 33 | +# Evaluate base models |
| 34 | +print("Individual Model Performance") |
| 35 | +base_results = {} |
| 36 | + |
| 37 | +for name, model in base_models.items(): |
| 38 | + mean_acc, std_acc = evaluate_model(model, X, y) |
| 39 | + base_results[name] = mean_acc |
| 40 | + print(f"{name:20s} | Mean: {mean_acc:.4f} | Std: {std_acc:.4f}") |
| 41 | + |
| 42 | +avg_base_accuracy = np.mean(list(base_results.values())) |
| 43 | +print(f"\nAverage Base Model Accuracy: {avg_base_accuracy:.4f}") |
| 44 | + |
| 45 | +# Define stacking model |
| 46 | +stacking_model = StackingClassifier( |
| 47 | + estimators=[(name, model) for name, model in base_models.items()], |
| 48 | + final_estimator=LogisticRegression(max_iter=1000), |
| 49 | + cv=5 |
| 50 | +) |
| 51 | + |
| 52 | +# Evaluate stacking model |
| 53 | +stack_mean, stack_std = evaluate_model(stacking_model, X, y) |
| 54 | + |
| 55 | +print("\nStacking Model Performance") |
| 56 | +print(f"Stacking Classifier | Mean: {stack_mean:.4f} | Std: {stack_std:.4f}") |
| 57 | + |
| 58 | +# Compare improvement |
| 59 | +improvement = stack_mean - avg_base_accuracy |
| 60 | +print("\nPerformance Comparison") |
| 61 | +print(f"Average Base Accuracy : {avg_base_accuracy:.4f}") |
| 62 | +print(f"Stacking Accuracy : {stack_mean:.4f}") |
| 63 | +print(f"Improvement : {improvement:+.4f}") |
0 commit comments