-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpredict.py
More file actions
155 lines (99 loc) · 5.27 KB
/
predict.py
File metadata and controls
155 lines (99 loc) · 5.27 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import argparse
import torch
import os
from models.basemodel import Network, NetworkMargin
import utils.config as config
from utils.dataset import WaterBirds, CelebaDataset
from torch.utils.data import DataLoader
from utils.utils import compute_accuracy
def parse_args():
"""Parses command-line arguments."""
parser = argparse.ArgumentParser(description='Run and compare predictions for baseline and CAML models.')
parser.add_argument('--technique', type=str, required=True, choices=['erm', 'ema', 'swa'],
help='Training technique to evaluate (ERM, EMA, or SWA).')
parser.add_argument('--dataset', type=str, default='waterbirds', choices=['waterbirds', 'celeba'],
help='Dataset to use for prediction.')
parser.add_argument("--gpu", type=str, default='0', help='GPU card ID.')
return parser.parse_args()
def get_model_paths(technique):
"""Selects the correct model paths from config based on the technique."""
paths = {}
if technique == 'erm':
paths['baseline'] = config.baseline_path_erm
paths['margin'] = config.margin_path_erm
elif technique == 'ema':
paths['baseline'] = config.baseline_path_ema
paths['margin'] = config.margin_path_ema
elif technique == 'swa':
paths['baseline'] = config.baseline_path_swa
paths['margin'] = config.margin_path_swa
return paths
def run_evaluation(model, model_path, test_loader, device):
"""
Helper function to load model weights and compute accuracy.
Returns (None, None, None) if the model file is not found.
"""
if not os.path.exists(model_path):
print(f"Warning: Model file not found at '{model_path}'. Skipping evaluation for this model.")
return None, None, None
try:
model.load_state_dict(torch.load(model_path, map_location=device))
print(f"Successfully loaded model from {model_path}")
model.eval()
with torch.no_grad():
test_acc, test_worst, test_avg = compute_accuracy(model, test_loader, device)
return test_acc, test_worst, test_avg
except Exception as e:
print(f"An error occurred while evaluating model {model_path}: {e}")
return None, None, None
def predict_comparison(args):
"""
Loads baseline and CAML models for a given technique, runs predictions,
and prints a comparison of their performance.
"""
DEVICE = torch.device(f'cuda:{args.gpu}' if torch.cuda.is_available() else 'cpu')
# 1. Get paths for the models to compare
# The baseline is always the standard ERM model for a consistent benchmark.
baseline_model_path = config.baseline_path_erm
caml_model_path = get_model_paths(args.technique)['margin'] # Get margin model for the specified technique
# 2. Load the test dataset
if args.dataset == 'waterbirds':
test_dataset = WaterBirds(split='test')
else: # celeba
test_dataset = CelebaDataset(split=2)
test_loader = DataLoader(dataset=test_dataset,
batch_size=config.base_batch_size,
shuffle=False,
num_workers=4,
pin_memory=True)
print(f"\nRunning comparison for technique '{args.technique.upper()}' on dataset '{args.dataset}'...")
# 3. Evaluate Baseline Model (ERM)
print("\n--- Evaluating Baseline Model (ERM) ---")
baseline_model = Network(config.model_name, config.num_class, config.mlp_neurons, config.hid_dim).to(DEVICE)
base_results = run_evaluation(baseline_model, baseline_model_path, test_loader, DEVICE)
# 4. Evaluate Margin Model (CAML + Technique)
print(f"\n--- Evaluating CAML Model ({args.technique.upper()}) ---")
margin_model = NetworkMargin(config.model_name, config.num_class, DEVICE, config.std, config.mlp_neurons, config.hid_dim).to(DEVICE)
margin_results = run_evaluation(margin_model, caml_model_path, test_loader, DEVICE)
# 5. Print comparison table
print("\n" + "="*65)
print(" Prediction Results Comparison")
print("="*65)
print(f"Technique: CAML + {args.technique.upper()} | Dataset: {args.dataset}")
print("-" * 65)
print(f"{'Metric':<28} | {'ERM Baseline':<17} | {'CAML Model':<17}")
print("-" * 65)
metric_names = ['Global Accuracy (%)', 'Worst-Group Accuracy (%)', 'Average-Group Accuracy (%)']
base_acc, base_worst, base_avg = base_results
margin_acc, margin_worst, margin_avg = margin_results
# Format results for printing
base_formatted = [f"{base_acc:.2f}", f"{base_worst*100:.2f}", f"{base_avg*100:.2f}"] if all(r is not None for r in base_results) else ["N/A"] * 3
margin_formatted = [f"{margin_acc:.2f}", f"{margin_worst*100:.2f}", f"{margin_avg*100:.2f}"] if all(r is not None for r in margin_results) else ["N/A"] * 3
for i, name in enumerate(metric_names):
print(f"{name:<28} | {base_formatted[i]:<15} | {margin_formatted[i]:<15}")
if all(r is None for r in base_results) and all(r is None for r in margin_results):
print("\nNo trained models found for this technique. Please run training first.")
print("="*60)
if __name__ == '__main__':
args = parse_args()
predict_comparison(args)