-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathjsonl_merge
More file actions
executable file
·35 lines (27 loc) · 991 Bytes
/
jsonl_merge
File metadata and controls
executable file
·35 lines (27 loc) · 991 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env python
import json
import sys
from typing import List, Dict
def read_jsonl(file_path: str) -> List[Dict]:
with open(file_path, 'r') as file:
return [json.loads(line) for line in file]
def write_jsonl(data: List[Dict], file_path: str) -> None:
with open(file_path, 'w') as file:
for entry in data:
file.write(json.dumps(entry) + '\n')
def merge_jsonl_files(file1: str, file2: str, output_file: str | None = None):
data1 = read_jsonl(file1)
data2 = read_jsonl(file2)
merged_data = sorted(data1 + data2, key=lambda x: x['timestamp'])
if output_file:
write_jsonl(merged_data, output_file)
else:
for entry in merged_data:
print(json.dumps(entry))
if len(sys.argv) < 3:
print('Usage: merge_jsonl <file1> <file2> [output_file]')
sys.exit(1)
file1 = sys.argv[1]
file2 = sys.argv[2]
output_file = sys.argv[3] if len(sys.argv) > 3 else None
merge_jsonl_files(file1, file2, output_file)