Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 86 additions & 0 deletions diagnostic/build-2b54872c.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
{
"generated_at": "2026-06-22T04:59:36.970904+00:00",
"commit": "2b54872c",
"diagnostic_logd": null,
"diagnostic_logd_error": "3a38c7e6785c22f0e22c",
"chunked": false,
"chunk_size_bytes": null,
"password": null,
"decrypt_command": null,
"total_modules": 10,
"passed": 2,
"failed": 8,
"modules": [
{
"name": "backend",
"status": "FAIL",
"elapsed_seconds": 0,
"artifact": null,
"output": "Command not found: [Errno 2] No such file or directory: 'cargo'"
},
{
"name": "frontend",
"status": "PASS",
"elapsed_seconds": 4.281,
"artifact": "/Users/qingfeng/Desktop/\u81ea\u52a8\u642c\u7816/zeroeye-9904099-parse-errors/frontend/dist",
"output": "> tent-frontend@0.0.0 build\n> tsc -b && vite build\n\nvite v6.4.3 building for production...\ntransforming...\n\u2713 100 modules transformed.\nrendering chunks...\ncomputing gzip size...\ndist/index.html 0.62 kB \u2502 gzip: 0.34 kB\ndist/assets/state-BkjSKDbY.js 8.91 kB \u2502 gzip: 3.55 kB \u2502 map: 57.15 kB\ndist/assets/vendor-CREcWLHI.js 48.93 kB \u2502 gzip: 17.22 kB \u2502 map: 481.27 kB\ndist/assets/index-CyxcoTyU.js 231.32 kB \u2502 gzip: 72.02 kB \u2502 map: 1,044.42 kB\n\u2713 built in 524ms"
},
{
"name": "market",
"status": "FAIL",
"elapsed_seconds": 0,
"artifact": null,
"output": "Command not found: [Errno 2] No such file or directory: 'go'"
},
{
"name": "frailbox",
"status": "FAIL",
"elapsed_seconds": 0.036,
"artifact": null,
"output": "gcc -Wall -Wextra -Wpedantic -std=c2x -O2 -g -D_FORTIFY_SOURCE=3 -fstack-protector-strong -fPIE -Iinclude -MMD -MP -c src/arena.c -o build/src/arena.o\nsrc/arena.c:17:23: error: use of undeclared identifier 'MAP_HUGETLB'\n 17 | mmap_flags |= MAP_HUGETLB;\n | ^\nsrc/arena.c:179:17: warning: comparison of distinct pointer types ('const void *' and 'char *') [-Wcompare-distinct-pointer-types]\n 179 | ptr < (char *)region->start + region->size) {\n | ~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n1 warning and 1 error generated.\nmake: *** [build/src/arena.o] Error 1"
},
{
"name": "engine",
"status": "FAIL",
"elapsed_seconds": 0,
"artifact": null,
"output": "Command not found: [Errno 2] No such file or directory: 'cmake'"
},
{
"name": "compliance",
"status": "FAIL",
"elapsed_seconds": 0.01,
"artifact": null,
"output": "The operation couldn\u2019t be completed. Unable to locate a Java Runtime.\nPlease visit http://www.java.com for information on installing Java."
},
{
"name": "v2-market-stream",
"status": "PASS",
"elapsed_seconds": 0.05,
"artifact": null,
"output": "Syntax OK"
},
{
"name": "nfc-scanner",
"status": "FAIL",
"elapsed_seconds": 0,
"artifact": null,
"output": "Command not found: [Errno 2] No such file or directory: 'luac'"
},
{
"name": "openapi-haskell",
"status": "FAIL",
"elapsed_seconds": 0,
"artifact": null,
"output": "Command not found: [Errno 2] No such file or directory: 'ghc'"
},
{
"name": "openapi-tools",
"status": "FAIL",
"elapsed_seconds": 0,
"artifact": null,
"output": "Command not found: [Errno 2] No such file or directory: 'luac'"
}
],
"pr_note": "Encrypted diagnostic logd artifact was not created; include this JSON report showing why. The encrypted .logd is the required diagnostic content for PR review; this JSON file is metadata. Maintainers may ask you to remove these diagnostic artifacts before merging."
}
86 changes: 81 additions & 5 deletions tools/log_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@
logging.basicConfig(level=logging.INFO, format="[%(levelname)s] %(message)s")
logger = logging.getLogger("log_aggregator")

SECRET_VALUE_PATTERN = re.compile(
r'(?i)\b(?:api[_-]?key|token|secret|password|authorization|bearer)\b'
r'\s*[:=]\s*["\']?[^"\'\s,;]+'
)
LONG_SECRET_PATTERN = re.compile(r'\b(?:0x[a-fA-F0-9]{32,}|[A-Za-z0-9_=-]{32,})\b')

# ---------------------------------------------------------------------------
# LOG PARSERS
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -206,6 +212,7 @@ class LogAggregator:
def __init__(self):
self.parsers = [JSONLogParser(), TextLogParser(), NginxLogParser()]
self.entries: List[Dict[str, Any]] = []
self.parse_errors: List[Dict[str, Any]] = []
self.level_counts: Counter = Counter()
self.service_counts: Counter = Counter()
self.hourly_counts: Counter = Counter()
Expand All @@ -218,13 +225,13 @@ def process_file(self, filepath: str) -> int:
try:
if filepath.endswith('.gz'):
with gzip.open(filepath, 'rt', errors='replace') as f:
for line in f:
if self._parse_line(line):
for line_number, line in enumerate(f, start=1):
if self._parse_line(line, filepath, line_number):
parsed_count += 1
else:
with open(filepath, 'r', errors='replace') as f:
for line in f:
if self._parse_line(line):
for line_number, line in enumerate(f, start=1):
if self._parse_line(line, filepath, line_number):
parsed_count += 1
except Exception as e:
logger.error(f"Error processing {filepath}: {e}")
Expand All @@ -240,7 +247,13 @@ def process_directory(self, dirpath: str, pattern: str = "*.log") -> int:
logger.debug(f" {filepath.name}: {count} entries")
return total

def _parse_line(self, line: str) -> bool:
def _parse_line(
self,
line: str,
filepath: Optional[str] = None,
line_number: Optional[int] = None,
) -> bool:
self._record_json_parse_error_if_needed(line, filepath, line_number)
for parser in self.parsers:
entry = parser.parse(line)
if entry:
Expand All @@ -262,6 +275,47 @@ def _parse_line(self, line: str) -> bool:
return True
return False

def _record_json_parse_error_if_needed(
self,
line: str,
filepath: Optional[str],
line_number: Optional[int],
) -> None:
stripped = line.strip()
if not stripped or stripped[0] not in "{[":
return

try:
decoded = json.loads(stripped)
if not isinstance(decoded, dict):
self._record_parse_error(
parser_type="json",
filepath=filepath,
line_number=line_number,
error="JSON log entry must be an object",
)
except json.JSONDecodeError as e:
self._record_parse_error(
parser_type="json",
filepath=filepath,
line_number=line_number,
error=str(e),
)

def _record_parse_error(
self,
parser_type: str,
filepath: Optional[str],
line_number: Optional[int],
error: str,
) -> None:
self.parse_errors.append({
'parser_type': parser_type,
'file_path': filepath or '<unknown>',
'line_number': line_number or 0,
'error': sanitize_error_message(error),
})

def get_summary(self) -> Dict[str, Any]:
return {
'total_entries': len(self.entries),
Expand Down Expand Up @@ -359,6 +413,18 @@ def export_json(self, output_path: str):
}, f, indent=2, default=str)
logger.info(f"Report exported to {output_path}")

def export_parse_error_report(self, output_path: str):
by_parser = Counter(error['parser_type'] for error in self.parse_errors)
report = {
'generated_at': datetime.now(timezone.utc).isoformat(),
'total_failures': len(self.parse_errors),
'by_parser': dict(sorted(by_parser.items())),
'failures': self.parse_errors,
}
with open(output_path, 'w') as f:
json.dump(report, f, indent=2, sort_keys=True)
logger.info(f"Parse error report exported to {output_path}")

def generate_html_report(self, output_path: str):
summary = self.get_summary()
html = f"""<!DOCTYPE html>
Expand Down Expand Up @@ -410,6 +476,7 @@ def parse_args():
parser.add_argument("--dir", help="Directory containing log files")
parser.add_argument("--output", "-o", default="log_report.json", help="Output file path")
parser.add_argument("--format", choices=["json", "csv", "html"], default="json", help="Output format")
parser.add_argument("--parse-error-report", help="Write sanitized parse failures to this JSON file")
parser.add_argument("--search", help="Search for a string in logs")
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
return parser.parse_args()
Expand Down Expand Up @@ -459,8 +526,17 @@ def main():
else:
aggregator.export_json(args.output)

if args.parse_error_report:
aggregator.export_parse_error_report(args.parse_error_report)

return 0


def sanitize_error_message(message: str) -> str:
sanitized = SECRET_VALUE_PATTERN.sub("[redacted]", message)
sanitized = LONG_SECRET_PATTERN.sub("[redacted]", sanitized)
return sanitized[:240]


if __name__ == "__main__":
main()
68 changes: 68 additions & 0 deletions tools/log_aggregator_parse_error_smoke.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/usr/bin/env python3
"""Smoke coverage for sanitized log parse-error reports."""

import json
import subprocess
import sys
import tempfile
from pathlib import Path


ROOT = Path(__file__).resolve().parents[1]
TOOL = ROOT / "tools" / "log_aggregator.py"


def main() -> int:
with tempfile.TemporaryDirectory() as tempdir:
temp = Path(tempdir)
input_log = temp / "sample.log"
output_report = temp / "report.json"
parse_errors = temp / "parse-errors.json"
secret_value = "sk_test_1234567890abcdefghijklmnopqrstuvwxyz"

input_log.write_text(
"\n".join([
'{"timestamp":1782086400,"level":"info","service":"api","message":"ok"}',
'{"timestamp":"2026-06-22T00:01:00","token":"' + secret_value + '"',
'["not", "an", "object"]',
"2026-06-22 00:02:00 ERROR [api] fallback text log",
])
+ "\n"
)

subprocess.run(
[
sys.executable,
str(TOOL),
"--input",
str(input_log),
"--output",
str(output_report),
"--parse-error-report",
str(parse_errors),
],
cwd=str(ROOT),
check=True,
capture_output=True,
text=True,
)

report = json.loads(parse_errors.read_text())
failures = report["failures"]

assert report["total_failures"] == 2
assert report["by_parser"] == {"json": 2}
assert {failure["line_number"] for failure in failures} == {2, 3}
assert all(failure["file_path"] == str(input_log) for failure in failures)
assert all("parser_type" in failure for failure in failures)

encoded = json.dumps(report)
assert secret_value not in encoded
assert "timestamp" not in encoded
assert "fallback text log" not in encoded

return 0


if __name__ == "__main__":
raise SystemExit(main())