Skip to content

Commit bdc2740

Browse files
committed
Create 01_Stacking.py
1 parent 6c757d1 commit bdc2740

1 file changed

Lines changed: 63 additions & 0 deletions

File tree

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import numpy as np
2+
import pandas as pd
3+
import os
4+
from sklearn.datasets import load_breast_cancer
5+
from sklearn.model_selection import cross_val_score, RepeatedStratifiedKFold
6+
from sklearn.linear_model import LogisticRegression
7+
from sklearn.neighbors import KNeighborsClassifier
8+
from sklearn.tree import DecisionTreeClassifier
9+
from sklearn.svm import SVC
10+
from sklearn.ensemble import StackingClassifier
11+
import warnings
12+
warnings.filterwarnings("ignore")
13+
14+
# Load data
15+
X, y = load_breast_cancer(return_X_y=True)
16+
17+
# Cross-validation strategy
18+
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=42)
19+
20+
# Define base models
21+
base_models = {
22+
"Logistic Regression": LogisticRegression(max_iter=1000),
23+
"KNN": KNeighborsClassifier(n_neighbors=5),
24+
"Decision Tree": DecisionTreeClassifier(max_depth=10, random_state=42),
25+
"SVM": SVC(probability=True, random_state=42)
26+
}
27+
28+
# Helper function to evaluate models
29+
def evaluate_model(model, X, y):
30+
scores = cross_val_score(model, X, y, cv=cv, scoring="accuracy")
31+
return scores.mean(), scores.std()
32+
33+
# Evaluate base models
34+
print("Individual Model Performance")
35+
base_results = {}
36+
37+
for name, model in base_models.items():
38+
mean_acc, std_acc = evaluate_model(model, X, y)
39+
base_results[name] = mean_acc
40+
print(f"{name:20s} | Mean: {mean_acc:.4f} | Std: {std_acc:.4f}")
41+
42+
avg_base_accuracy = np.mean(list(base_results.values()))
43+
print(f"\nAverage Base Model Accuracy: {avg_base_accuracy:.4f}")
44+
45+
# Define stacking model
46+
stacking_model = StackingClassifier(
47+
estimators=[(name, model) for name, model in base_models.items()],
48+
final_estimator=LogisticRegression(max_iter=1000),
49+
cv=5
50+
)
51+
52+
# Evaluate stacking model
53+
stack_mean, stack_std = evaluate_model(stacking_model, X, y)
54+
55+
print("\nStacking Model Performance")
56+
print(f"Stacking Classifier | Mean: {stack_mean:.4f} | Std: {stack_std:.4f}")
57+
58+
# Compare improvement
59+
improvement = stack_mean - avg_base_accuracy
60+
print("\nPerformance Comparison")
61+
print(f"Average Base Accuracy : {avg_base_accuracy:.4f}")
62+
print(f"Stacking Accuracy : {stack_mean:.4f}")
63+
print(f"Improvement : {improvement:+.4f}")

0 commit comments

Comments
 (0)