-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstreamlit_ml_report_app.py
More file actions
117 lines (104 loc) · 5.01 KB
/
streamlit_ml_report_app.py
File metadata and controls
117 lines (104 loc) · 5.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# streamlit_eval_report_with_full_interpretation.py
import streamlit as st
import pandas as pd
import joblib
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_recall_curve
import tempfile
import os
import numpy as np
st.set_page_config(page_title="Model Evaluation with Interpretations", layout="wide")
st.title("🧠 Model Evaluation with Interpretations")
uploaded_model = st.file_uploader("Upload your trained model (.pkl or .joblib)", type=["pkl", "joblib"])
uploaded_test_data = st.file_uploader("Upload your test data (.csv with 'target' column)", type="csv")
def generate_interpretation(metric_name, value):
if metric_name == "AUC":
if value >= 0.9:
return "Excellent model performance (AUC ≥ 0.9)."
elif value >= 0.75:
return "Good discrimination capability (AUC ≥ 0.75)."
else:
return "Poor discrimination capability. Consider improving the model."
elif metric_name == "Precision":
return f"Precision of {value:.2f} means {value*100:.0f}% of positive predictions were correct."
elif metric_name == "Recall":
return f"Recall of {value:.2f} means the model captured {value*100:.0f}% of actual positives."
elif metric_name == "F1 Score":
return f"F1 Score balances Precision and Recall. A score of {value:.2f} indicates overall accuracy in positive class."
return "No interpretation available."
if uploaded_model and uploaded_test_data:
model = joblib.load(uploaded_model)
df = pd.read_csv(uploaded_test_data)
if 'target' not in df.columns:
st.error("The dataset must include a 'target' column.")
else:
X = df.drop(columns=['target'])
y = df['target']
y_pred = model.predict(X)
y_prob = model.predict_proba(X)[:, 1] if hasattr(model, "predict_proba") else np.zeros_like(y_pred)
# Metrics
cm = confusion_matrix(y, y_pred)
TP = cm[1, 1]
FP = cm[0, 1]
FN = cm[1, 0]
TN = cm[0, 0]
precision = TP / (TP + FP) if TP + FP > 0 else 0.0
recall = TP / (TP + FN) if TP + FN > 0 else 0.0
f1 = 2 * precision * recall / (precision + recall) if precision + recall > 0 else 0.0
st.subheader("📊 Evaluation Metrics")
st.metric("Precision", f"{precision:.2f}")
st.markdown(f"📝 {generate_interpretation('Precision', precision)}")
st.metric("Recall", f"{recall:.2f}")
st.markdown(f"📝 {generate_interpretation('Recall', recall)}")
st.metric("F1 Score", f"{f1:.2f}")
st.markdown(f"📝 {generate_interpretation('F1 Score', f1)}")
# Confusion Matrix
st.subheader("🔲 Confusion Matrix")
fig_cm, ax = plt.subplots()
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
ax.set_title("Confusion Matrix")
st.pyplot(fig_cm)
st.markdown("📝 Interpretation: Confusion matrix shows distribution of prediction results.")
# ROC Curve
st.subheader("📈 ROC Curve")
fpr, tpr, _ = roc_curve(y, y_prob)
auc_score = auc(fpr, tpr)
fig_roc, ax = plt.subplots()
ax.plot(fpr, tpr, label=f"AUC = {auc_score:.2f}")
ax.plot([0, 1], [0, 1], 'k--')
ax.set_title("ROC Curve")
ax.set_xlabel("False Positive Rate")
ax.set_ylabel("True Positive Rate")
ax.legend(loc="lower right")
st.pyplot(fig_roc)
st.markdown(f"📝 Interpretation: {generate_interpretation('AUC', auc_score)}")
# Precision-Recall Curve
st.subheader("📉 Precision-Recall Curve")
precision_vals, recall_vals, _ = precision_recall_curve(y, y_prob)
fig_pr, ax = plt.subplots()
ax.plot(recall_vals, precision_vals)
ax.set_title("Precision-Recall Curve")
ax.set_xlabel("Recall")
ax.set_ylabel("Precision")
st.pyplot(fig_pr)
st.markdown("📝 Interpretation: Shows the tradeoff between precision and recall for different thresholds.")
# Histogram of prediction probabilities
st.subheader("📊 Prediction Probability Histogram")
fig_hist, ax = plt.subplots()
ax.hist(y_prob, bins=20, color='skyblue', edgecolor='black')
ax.set_title("Histogram of Prediction Probabilities")
ax.set_xlabel("Predicted Probability")
ax.set_ylabel("Frequency")
st.pyplot(fig_hist)
st.markdown("📝 Interpretation: Distribution of model confidence in predictions.")
# Feature Importance (if available)
if hasattr(model, "feature_importances_"):
st.subheader("📌 Feature Importance")
importances = model.feature_importances_
fig_fi, ax = plt.subplots()
sns.barplot(x=importances, y=X.columns, ax=ax)
ax.set_title("Feature Importances")
st.pyplot(fig_fi)
top_feature = X.columns[np.argmax(importances)]
st.markdown(f"📝 Interpretation: Feature '{top_feature}' is the most important in decision-making.")