-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsummarization_claude.py
More file actions
114 lines (89 loc) · 3.42 KB
/
summarization_claude.py
File metadata and controls
114 lines (89 loc) · 3.42 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import re
import requests
from pathlib import Path
from collections import defaultdict
# === Set your OpenRouter API key here ===
API_KEY = "OPENAI_API_KEY" # Replace with your actual OpenRouter API key
MODEL = "anthropic/claude-3-opus"
headers = {
"Authorization": f"Bearer {API_KEY}",
"Content-Type": "application/json",
#"HTTP-Referer": "https://yourdomain.com", # optional
"X-Title": "TranscriptSummary"
}
# === Function to call Claude 3 via OpenRouter ===
def ask_claude(prompt):
url = "https://openrouter.ai/api/v1/chat/completions"
payload = {
"model": MODEL,
"messages": [{"role": "user", "content": prompt}]
}
response = requests.post(url, headers=headers, json=payload)
response.raise_for_status()
return response.json()["choices"][0]["message"]["content"]
# === Speaker ID to Name mapping ===
speaker_name_map = {
"SPEAKER_00": "Sandra Ulog",
"SPEAKER_01": "Craig Kaufman",
"SPEAKER_02": "Craig Kaufman"
}
# === Load transcript ===
with open("/mnt/c/Inference/final_speaker_transcript.txt", "r", encoding="utf-8") as f:
transcript = f.read()
# === Parse speakers and timestamps ===
pattern = re.compile(r"(SPEAKER_\d+) – (\d+:\d+:\d+)\n(.+?)(?=\n\S|$)", re.DOTALL)
matches = pattern.findall(transcript)
speaker_segments = defaultdict(list)
all_segments = []
for speaker_id, time, text in matches:
speaker_name = speaker_name_map.get(speaker_id, speaker_id)
cleaned_text = text.strip().replace('\n', ' ')
line = f"{time}: {cleaned_text}"
speaker_segments[speaker_name].append(line)
all_segments.append(line)
full_text = "\n".join(all_segments)
# === Step 1: Chunked Summary + Key Points
print("⏳ Generating chunked summaries with Claude 3 Opus...")
summary_outputs = []
chunks = [full_text[i:i+10000] for i in range(0, len(full_text), 10000)]
for i, chunk in enumerate(chunks):
print(f"🔹 Summarizing chunk {i+1}/{len(chunks)}...")
chunk_prompt = f"""
This is chunk {i+1} of a meeting transcript.
Tasks:
1. Write a concise 3–5 sentence summary for this chunk.
2. List key discussion points with any available timestamps.
Transcript:
{chunk}
Format:
📌 CHUNK SUMMARY {i+1}:
📍 KEY POINTS {i+1}:
"""
result = ask_claude(chunk_prompt)
summary_outputs.append(result.strip())
combined_summary = "\n\n".join(summary_outputs)
# === Step 2: Action Items per Speaker
print("⏳ Extracting action items by speaker...")
speaker_action_items = {}
for speaker, segments in speaker_segments.items():
speaker_text = "\n".join(segments)
prompt = f"""
You are reviewing a meeting transcript for the speaker {speaker}.
Extract only the **clear, timestamped action items** this speaker committed to or was responsible for.
Format:
• [timestamp] Action item
Transcript:
{speaker_text[:10000]}
"""
result = ask_claude(prompt)
speaker_action_items[speaker] = result.strip()
# === Save results to file ===
output_path = Path("meeting_summary_claude.txt")
with open(output_path, "w", encoding="utf-8") as f:
f.write("📌 MEETING SUMMARY + KEY POINTS:\n")
f.write(combined_summary + "\n\n")
f.write("✅ ACTION ITEMS BY SPEAKER:\n")
for speaker, items in speaker_action_items.items():
f.write(f"\n{speaker}:\n")
f.write(items + "\n")
print(f"\n🎉 Summary and action items saved to: {output_path.resolve()}")