-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconsensus_2model.py
More file actions
118 lines (93 loc) · 3.95 KB
/
consensus_2model.py
File metadata and controls
118 lines (93 loc) · 3.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python3
"""
Generate consensus JSONL from 2-model labeling data (Claude + KIMI).
For each diagnostic ID:
- If both agree → consensus_verdict with consensus_type="agree"
- If disagree → Claude's verdict with consensus_type="disagree"
"""
import json
from collections import OrderedDict
from pathlib import Path
def load_jsonl(filepath):
"""Load JSONL file and deduplicate by ID, keeping first occurrence."""
records = {}
with open(filepath, 'r') as f:
for line in f:
line = line.strip()
if not line:
continue
record = json.loads(line)
diag_id = record['diagnostic']['id']
# Keep first occurrence
if diag_id not in records:
records[diag_id] = record
return records
def get_verdict(record):
"""Extract verdict (label) from record."""
return record['gold']['label']
def get_confidence(record):
"""Extract confidence from record."""
return record['gold']['confidence']
def main():
input_dir = Path('/Users/andresvlc/WebDev/GoGuard/artifacts/corpus/goguard-go-oss-100')
claude_file = input_dir / 'claude-sonnet-4.6.jsonl'
kimi_file = input_dir / 'kimi-k2.5.jsonl'
output_file = input_dir / 'consensus_2model.jsonl'
print(f"Loading Claude records from {claude_file}...")
claude_records = load_jsonl(claude_file)
print(f" → {len(claude_records)} unique diagnostic IDs")
print(f"Loading KIMI records from {kimi_file}...")
kimi_records = load_jsonl(kimi_file)
print(f" → {len(kimi_records)} unique diagnostic IDs")
# Find intersection
common_ids = set(claude_records.keys()) & set(kimi_records.keys())
print(f"\nCommon diagnostic IDs: {len(common_ids)}")
# Generate consensus records
consensus_records = []
agree_count = 0
disagree_count = 0
for diag_id in sorted(common_ids):
claude_rec = claude_records[diag_id]
kimi_rec = kimi_records[diag_id]
claude_verdict = get_verdict(claude_rec)
kimi_verdict = get_verdict(kimi_rec)
claude_confidence = get_confidence(claude_rec)
kimi_confidence = get_confidence(kimi_rec)
if claude_verdict == kimi_verdict:
consensus_type = "agree"
consensus_verdict = claude_verdict
agree_count += 1
else:
consensus_type = "disagree"
# Use Claude's verdict as primary (deeper Go semantic understanding)
consensus_verdict = claude_verdict
disagree_count += 1
# Build consensus record (use Claude as base)
consensus_rec = OrderedDict()
# Copy top-level fields from Claude record
consensus_rec['sample_id'] = claude_rec['sample_id']
consensus_rec['repo'] = claude_rec['repo']
consensus_rec['project'] = claude_rec['project']
consensus_rec['diagnostic'] = claude_rec['diagnostic']
consensus_rec['gold'] = claude_rec['gold'] # Claude's original gold label
# Add consensus fields
consensus_rec['consensus_verdict'] = consensus_verdict
consensus_rec['consensus_type'] = consensus_type
consensus_rec['claude_verdict'] = claude_verdict
consensus_rec['kimi_verdict'] = kimi_verdict
consensus_rec['claude_confidence'] = claude_confidence
consensus_rec['kimi_confidence'] = kimi_confidence
consensus_rec['model_count'] = 2
consensus_records.append(consensus_rec)
# Write output
print(f"\nWriting {len(consensus_records)} consensus records to {output_file}...")
with open(output_file, 'w') as f:
for rec in consensus_records:
f.write(json.dumps(rec, separators=(',', ':')) + '\n')
print(f"\n✓ Done!")
print(f" Agree: {agree_count}")
print(f" Disagree: {disagree_count}")
print(f" Total: {len(consensus_records)}")
print(f" Agreement rate: {agree_count / len(consensus_records) * 100:.1f}%")
if __name__ == '__main__':
main()