diff --git a/tools/health_check.py b/tools/health_check.py index 5cd0a613..3eb995fb 100644 --- a/tools/health_check.py +++ b/tools/health_check.py @@ -150,23 +150,70 @@ def check_disk_usage(path: str = "/") -> Tuple[str, str, float]: def check_memory_usage() -> Tuple[str, str, float]: + """Check memory usage with cross-platform fallbacks.""" try: - with open("/proc/meminfo") as f: - meminfo = {} - for line in f: - parts = line.split(":") - if len(parts) == 2: - key = parts[0].strip() - value = parts[1].strip().replace(" kB", "") - try: - meminfo[key] = int(value) * 1024 - except ValueError: - pass - - total = meminfo.get("MemTotal", 0) - available = meminfo.get("MemAvailable", 0) - used = total - available - pct = (used / total) * 100 if total > 0 else 0 + # Try Linux /proc/meminfo first + if os.path.exists("/proc/meminfo"): + with open("/proc/meminfo") as f: + meminfo = {} + for line in f: + parts = line.split(":") + if len(parts) == 2: + key = parts[0].strip() + value = parts[1].strip().replace(" kB", "") + try: + meminfo[key] = int(value) * 1024 + except ValueError: + pass + + total = meminfo.get("MemTotal", 0) + available = meminfo.get("MemAvailable", 0) + used = total - available + pct = (used / total) * 100 if total > 0 else 0 + else: + # Cross-platform fallback for macOS/BSD + # On macOS, we can use subprocess to get memory info + try: + result = subprocess.run( + ["sysctl", "-n", "hw.memsize"], + capture_output=True, text=True, timeout=5 + ) + total = int(result.stdout.strip()) + except (subprocess.TimeoutExpired, ValueError, FileNotFoundError): + # Fallback: estimate from resource limits + # RLIMIT_AS is virtual memory limit, not actual total + # Use a reasonable default if we can't determine + total = 0 + + if total > 0: + # Get page size and free pages + try: + result = subprocess.run( + ["vm_stat"], + capture_output=True, text=True, timeout=5 + ) + # Parse vm_stat output for page size and free pages + lines = result.stdout.strip().split("\n") + page_size = 4096 # Default page size + free_pages = 0 + for line in lines: + if "page size of" in line: + page_size = int(line.split()[-1]) + elif "Pages free" in line: + free_pages = int(line.split()[-1].rstrip(".")) + available = free_pages * page_size + used = total - available + pct = (used / total) * 100 if total > 0 else 0 + except (subprocess.TimeoutExpired, ValueError, FileNotFoundError): + pct = 0 + used = 0 + total = 0 + else: + pct = 0 + used = 0 + + if total == 0: + return "WARNING", "Cannot determine memory usage on this platform", 0 if pct < MEMORY_THRESHOLD_WARNING: return "OK", f"{pct:.1f}% used ({used // (1024**3)}GB/{total // (1024**3)}GB)", pct @@ -179,19 +226,31 @@ def check_memory_usage() -> Tuple[str, str, float]: def check_load_average() -> Tuple[str, str, float]: + """Check load average with cross-platform fallbacks.""" try: - with open("/proc/loadavg") as f: - parts = f.read().strip().split() - load = float(parts[0]) - cpu_count = os.cpu_count() or 1 - load_pct = (load / cpu_count) * 100 - - if load_pct < 70: - return "OK", f"Load: {load} ({load_pct:.0f}% of {cpu_count} cores)", load - elif load_pct < 90: - return "WARNING", f"Load: {load} ({load_pct:.0f}% of {cpu_count} cores)", load - else: - return "CRITICAL", f"Load: {load} ({load_pct:.0f}% of {cpu_count} cores)", load + # Try Linux /proc/loadavg first + if os.path.exists("/proc/loadavg"): + with open("/proc/loadavg") as f: + parts = f.read().strip().split() + load = float(parts[0]) + else: + # Cross-platform fallback using os.getloadavg() + # Available on macOS, BSD, and most Unix-like systems + try: + load_1, load_5, load_15 = os.getloadavg() + load = load_1 # Use 1-minute load average + except OSError: + return "WARNING", "Cannot determine load average on this platform", 0 + + cpu_count = os.cpu_count() or 1 + load_pct = (load / cpu_count) * 100 + + if load_pct < 70: + return "OK", f"Load: {load} ({load_pct:.0f}% of {cpu_count} cores)", load + elif load_pct < 90: + return "WARNING", f"Load: {load} ({load_pct:.0f}% of {cpu_count} cores)", load + else: + return "CRITICAL", f"Load: {load} ({load_pct:.0f}% of {cpu_count} cores)", load except Exception as e: return "WARNING", f"Cannot check: {e}", 0