FairRootGroup · rbx · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026 · coderabbitai
diff --git a/src/baseline.py b/src/baseline.py
@@ -61,13 +61,11 @@ def baseline_step(
     """
     job_queue_2d = baseline_state['job_queue'].reshape(-1, 4)
 
-    process_ongoing_jobs(baseline_state['nodes'], baseline_cores_available, baseline_running_jobs)
+    process_ongoing_jobs(baseline_state['nodes'], baseline_cores_available, baseline_running_jobs, metrics, is_baseline=True)
 
     # Age helper queue and fill real queue before new arrivals
     age_backlog_queue(baseline_backlog_queue, metrics, _is_baseline=True)
-    baseline_next_empty_slot, _ = fill_queue_from_backlog(
-        job_queue_2d, baseline_backlog_queue, baseline_next_empty_slot
-    )
+    baseline_next_empty_slot, _ = fill_queue_from_backlog(job_queue_2d, baseline_backlog_queue, baseline_next_empty_slot)
 
     _new_baseline_jobs, baseline_next_empty_slot, baseline_backlog_dropped = add_new_jobs(
         job_queue_2d, new_jobs_count, new_jobs_durations,
@@ -114,14 +112,13 @@ def baseline_step(
 
     baseline_state['job_queue'] = job_queue_2d.flatten()
 
-    baseline_cost = power_cost(num_used_nodes, num_idle_nodes, current_price)
+    num_used_cores = num_on_nodes * CORES_PER_NODE - np.sum(baseline_cores_available)
+    baseline_cost = power_cost(num_on_nodes, num_used_cores, current_price)
     env_print(f"    > baseline_cost: €{baseline_cost:.4f} | used nodes: {num_used_nodes}, idle nodes: {num_idle_nodes}")
-    baseline_cost_off = power_cost(num_used_nodes, 0, current_price)
+    baseline_cost_off = power_cost(num_used_nodes, num_used_cores, current_price)
     env_print(f"    > baseline_cost_off: €{baseline_cost_off:.4f} | used nodes: {num_used_nodes}, idle nodes: 0")
-    baseline_power_mwh = power_consumption_mwh(num_used_nodes, num_idle_nodes)
-    baseline_power_off_mwh = power_consumption_mwh(num_used_nodes, 0)
-
-    num_used_cores = num_on_nodes * CORES_PER_NODE - np.sum(baseline_cores_available)
+    baseline_power_mwh = power_consumption_mwh(num_on_nodes, num_used_cores)
+    baseline_power_off_mwh = power_consumption_mwh(num_used_nodes, num_used_cores)
     return (
         baseline_cost,
         baseline_cost_off,

diff --git a/src/environment.py b/src/environment.py
@@ -322,7 +322,7 @@ def step(self, action: np.ndarray) -> tuple[dict[str, np.ndarray], float, bool,
 
         # Decrement booked time for nodes and complete running jobs
         self.env_print("[1] Processing ongoing jobs...")
-        completed_jobs = process_ongoing_jobs(self.state['nodes'], self.cores_available, self.running_jobs)
+        completed_jobs = process_ongoing_jobs(self.state['nodes'], self.cores_available, self.running_jobs, self.metrics, is_baseline=False)
         self.env_print(f"{len(completed_jobs)} jobs completed: [{' '.join(['#' + str(job_id) for job_id in completed_jobs]) if len(completed_jobs) > 0 else ''}]")
 
         # Age helper queues (jobs waiting outside the fixed queue)
@@ -472,15 +472,15 @@ def step(self, action: np.ndarray) -> tuple[dict[str, np.ndarray], float, bool,
         self.metrics.episode_baseline_used_cores.append(baseline_num_used_cores)
 
         step_reward, step_cost, eff_reward_norm, price_reward, idle_penalty_norm, job_age_penalty_norm = self.reward_calculator.calculate(
-            num_used_nodes, num_idle_nodes, current_price, average_future_price,
+            num_used_nodes, num_idle_nodes, num_used_cores, current_price, average_future_price,
             num_off_nodes, num_launched_jobs, num_node_changes, job_queue_2d,
             num_unprocessed_jobs, self.weights, num_dropped_this_step, self.env_print
         )
 
         self.metrics.episode_reward += step_reward
         self.metrics.total_cost += step_cost
         self.metrics.episode_total_cost += step_cost
-        step_power_mwh = power_consumption_mwh(num_used_nodes, num_idle_nodes)
+        step_power_mwh = power_consumption_mwh(num_on_nodes, num_used_cores)
         self.metrics.total_power_consumption_mwh += step_power_mwh
         self.metrics.episode_total_power_consumption_mwh += step_power_mwh
 

diff --git a/src/evaluation_summary.py b/src/evaluation_summary.py
@@ -63,5 +63,9 @@ def build_episode_summary_line(
         f"Agent Occupancy (Cores)={agent_occupancy_cores_pct:.2f}%, "
         f"Baseline Occupancy (Cores)={baseline_occupancy_cores_pct:.2f}%, "
         f"Agent Occupancy (Nodes)={agent_occupancy_nodes_pct:.2f}%, "
-        f"Baseline Occupancy (Nodes)={baseline_occupancy_nodes_pct:.2f}% "
+        f"Baseline Occupancy (Nodes)={baseline_occupancy_nodes_pct:.2f}%, "
+        f"Power={float(episode_data['agent_power_consumption_mwh']):.1f}/"
+        f"{float(episode_data['baseline_power_consumption_mwh']):.1f}/"
+        f"{float(episode_data['baseline_power_consumption_off_mwh']):.1f} MWh "
+        f"(agent/base/base_off)"
-        f"Baseline Occupancy (Nodes)={baseline_occupancy_nodes_pct:.2f}%, "
-        f"Power={float(episode_data['agent_power_consumption_mwh']):.1f}/"
-        f"{float(episode_data['baseline_power_consumption_mwh']):.1f}/"
-        f"{float(episode_data['baseline_power_consumption_off_mwh']):.1f} MWh "
-        f"(agent/base/base_off)"
+        f"Baseline Occupancy (Nodes)={baseline_occupancy_nodes_pct:.2f}%"
-        f"Baseline Occupancy (Nodes)={baseline_occupancy_nodes_pct:.2f}%, "
-        f"Power={float(episode_data['agent_power_consumption_mwh']):.1f}/"
-        f"{float(episode_data['baseline_power_consumption_mwh']):.1f}/"
-        f"{float(episode_data['baseline_power_consumption_off_mwh']):.1f} MWh "
-        f"(agent/base/base_off)"
+        f"Baseline Occupancy (Nodes)={baseline_occupancy_nodes_pct:.2f}%"
     )
diff --git a/src/job_management.py b/src/job_management.py
@@ -73,27 +73,36 @@ def validate_next_empty(job_queue_2d: np.ndarray, next_empty: int) -> None:
         assert np.all(job_queue_2d[:next_empty, 0] != 0), "hole before next_empty_slot"
 
 
-def process_ongoing_jobs(nodes: np.ndarray, cores_available: np.ndarray, running_jobs: dict[int, dict[str, Any]]) -> list[int]:
+def process_ongoing_jobs(nodes: np.ndarray, cores_available: np.ndarray, running_jobs: dict[int, dict[str, Any]], metrics: MetricsTracker, is_baseline: bool) -> list[int]:
     """
     Process ongoing jobs: decrement their duration, complete finished jobs,
-    and release resources.
+    release resources, and record completion metrics.
+
+    Completion is counted here (when duration hits zero), not at launch time.
+    'wait_time' on each job is the time spent in the queue before being launched,
+    which is the standard HPC metric for scheduler responsiveness.
 
     Args:
         nodes: Array of node states
         cores_available: Array of available cores per node
         running_jobs: Dictionary of currently running jobs
+        metrics: Optional metrics tracker; when provided, completed job counts
+                 and queue wait times are recorded here
+        is_baseline: Whether finished jobs belong to the baseline simulation
 
     Returns:
         List of completed job IDs
     """
     completed_jobs = []
+    completed_wait_time = 0
 
     for job_id, job_data in running_jobs.items():
         job_data['duration'] -= 1
 
         # Check if job is completed
         if job_data['duration'] <= 0:
             completed_jobs.append(job_id)
+            completed_wait_time += int(job_data.get('wait_time', 0))
             # Release resources
             for node_idx, cores_used in job_data['allocation']:
                 cores_available[node_idx] += cores_used
@@ -102,6 +111,19 @@ def process_ongoing_jobs(nodes: np.ndarray, cores_available: np.ndarray, running
     for job_id in completed_jobs:
         del running_jobs[job_id]
 
+    if completed_jobs:
+        completed_count = len(completed_jobs)
+        if is_baseline:
+            metrics.baseline_jobs_completed += completed_count
+            metrics.baseline_total_job_wait_time += completed_wait_time
+            metrics.episode_baseline_jobs_completed += completed_count
+            metrics.episode_baseline_total_job_wait_time += completed_wait_time
+        else:
+            metrics.jobs_completed += completed_count
+            metrics.total_job_wait_time += completed_wait_time
+            metrics.episode_jobs_completed += completed_count
+            metrics.episode_total_job_wait_time += completed_wait_time
+
     # Update node times based on remaining jobs
     # Reset all nodes first
     for i in range(MAX_NODES):
@@ -198,7 +220,7 @@ def assign_jobs_to_available_nodes(
         running_jobs: Dictionary of currently running jobs
         next_empty_slot: Index of next empty slot in queue
         next_job_id: Next available job ID
-        metrics: MetricsTracker object to update with job completion metrics
+        metrics: MetricsTracker object to update with drop/rejection counts
         is_baseline: Whether this is baseline simulation
 
     Returns:
@@ -229,6 +251,7 @@ def assign_jobs_to_available_nodes(
             running_jobs[next_job_id] = {
                 "duration": job_duration,
                 "allocation": job_allocation,
+                "wait_time": int(job_age),  # hours spent in queue; recorded at completion time
             }
             next_job_id += 1
 
@@ -239,18 +262,6 @@ def assign_jobs_to_available_nodes(
             if job_idx < next_empty_slot:
                 next_empty_slot = job_idx
 
-            # Track job completion and wait time
-            if is_baseline:
-                metrics.baseline_jobs_completed += 1
-                metrics.baseline_total_job_wait_time += job_age
-                metrics.episode_baseline_jobs_completed += 1
-                metrics.episode_baseline_total_job_wait_time += job_age
-            else:
-                metrics.jobs_completed += 1
-                metrics.total_job_wait_time += job_age
-                metrics.episode_jobs_completed += 1
-                metrics.episode_total_job_wait_time += job_age
-
             num_processed_jobs += 1
             continue
 

diff --git a/src/metrics_tracker.py b/src/metrics_tracker.py
@@ -1,6 +1,7 @@
 """Metrics tracking and episode recording for the PowerSched environment."""
 
 
+
 class MetricsTracker:
     """Tracks metrics throughout training episodes."""
 
@@ -175,6 +176,7 @@ def record_episode_completion(self, current_episode: int) -> dict[str, float | i
         )
         savings_vs_baseline: float = self.episode_baseline_cost - self.episode_total_cost
         savings_vs_baseline_off: float = self.episode_baseline_cost_off - self.episode_total_cost
+
         dropped_jobs_per_saved_euro: float = self._safe_ratio(
             float(self.episode_jobs_dropped), savings_vs_baseline
         ) if savings_vs_baseline > 0.0 else float("nan")

diff --git a/src/reward_calculation.py b/src/reward_calculation.py
@@ -5,45 +5,47 @@
 import numpy as np
 
 from src.config import (
-    COST_IDLE_MW, COST_USED_MW, PENALTY_IDLE_NODE,
+    COST_IDLE_MW, COST_USED_MW, CORES_PER_NODE, PENALTY_IDLE_NODE,
     PENALTY_DROPPED_JOB, MAX_NODES, MAX_NEW_JOBS_PER_HOUR, WEEK_HOURS
 )
 from src.prices import Prices
 from src.weights import Weights
 
 
-def power_cost(num_used_nodes: int, num_idle_nodes: int, current_price: float) -> float:
+def power_cost(num_on_nodes: int, cores_used: int, current_price: float) -> float:
     """
     Calculate power cost based on node usage and current electricity price.
 
+    Proportional model: all on-nodes draw idle power, plus additional compute
+    power scaled linearly by actual core utilization.
+    Formula: (COST_IDLE_MW * num_on + (COST_USED_MW - COST_IDLE_MW) * cores_used/CORES_PER_NODE) * price
+
     Args:
-        num_used_nodes: Number of nodes with jobs running
-        num_idle_nodes: Number of idle (on but unused) nodes
+        num_on_nodes: Number of nodes that are on (used + idle)
+        cores_used: Total number of cores actively running jobs
         current_price: Current electricity price
 
     Returns:
         Total power cost
     """
-    idle_cost = COST_IDLE_MW * current_price * num_idle_nodes
-    usage_cost = COST_USED_MW * current_price * num_used_nodes
-    total_cost = idle_cost + usage_cost
-    return total_cost
+    return (COST_IDLE_MW * num_on_nodes + (COST_USED_MW - COST_IDLE_MW) * (cores_used / CORES_PER_NODE)) * current_price
 
 
-def power_consumption_mwh(num_used_nodes: int, num_idle_nodes: int) -> float:
+def power_consumption_mwh(num_on_nodes: int, cores_used: int) -> float:
     """
     Calculate energy consumption for one environment step.
 
     One environment step equals one hour, so this is both average MW and MWh/step.
+    Uses the same proportional model as power_cost.
 
     Args:
-        num_used_nodes: Number of nodes with jobs running
-        num_idle_nodes: Number of idle (on but unused) nodes
+        num_on_nodes: Number of nodes that are on (used + idle)
+        cores_used: Total number of cores actively running jobs
 
     Returns:
         Energy consumption in MWh for this step
     """
-    return COST_IDLE_MW * num_idle_nodes + COST_USED_MW * num_used_nodes
+    return COST_IDLE_MW * num_on_nodes + (COST_USED_MW - COST_IDLE_MW) * (cores_used / CORES_PER_NODE)
 
 
 class RewardCalculator:
@@ -80,9 +82,9 @@ def __init__(self, prices: Prices) -> None:
 
     def _compute_bounds(self) -> None:
         """Compute min/max bounds for reward normalization."""
-        # Efficiency bounds
-        cost_for_min_efficiency = power_cost(0, MAX_NODES, self.prices.MAX_PRICE)
-        cost_for_max_efficiency = power_cost(MAX_NODES, 0, self.prices.MIN_PRICE)
+        # Efficiency bounds (for legacy _reward_efficiency_normalized only)
+        cost_for_min_efficiency = power_cost(MAX_NODES, 0, self.prices.MAX_PRICE)
+        cost_for_max_efficiency = power_cost(MAX_NODES, MAX_NODES * CORES_PER_NODE, self.prices.MIN_PRICE)
 
         self._min_efficiency_reward = self._reward_efficiency(0, cost_for_min_efficiency)
         self._max_efficiency_reward = max(1.0, self._reward_efficiency(MAX_NODES, cost_for_max_efficiency))
@@ -230,16 +232,18 @@ def _penalty_job_age_normalized(self, num_off_nodes: int, job_queue_2d: np.ndarr
         normalized_penalty = -current_penalty
         return float(np.clip(normalized_penalty, -1, 0))
 
-    def _reward_energy_efficiency_normalized(self, num_used_nodes: int, num_idle_nodes: int) -> float:
-        '''Redefine meaning of "efficiency". Use purely as "energy efficiency", aka: How much of the energy (in MW) which is currently needed, gets used for work.
-        NOTE: Original efficiency function was doing 3 things at once. 1. Handled Blackout logic, with (2.) penalty-ish reward delay for unprocessed jobs, while blackout.
-        But this log1p function would start to become "harsh" only for a very high number of unprocessed. This rewarded shutting everything off.
-        3. rewarded used/cost, but cost was defined in units of price. Price reward should handle this solely, otherwise double counting.
-        Hence, here new efficiency definition.'''
-        total_work = num_used_nodes * COST_USED_MW + num_idle_nodes * COST_IDLE_MW
-        if total_work <= 0.0:
+    def _reward_energy_efficiency_normalized(self, num_on_nodes: int, cores_used: int) -> float:
+        '''Energy efficiency: fraction of total power draw that goes to actual computation.
+
+        Proportional model: total power = COST_IDLE_MW * num_on + compute_delta * cores/CORES_PER_NODE.
+        Compute power = compute_delta * cores/CORES_PER_NODE (the portion above idle baseline).
+        Efficiency = compute_power / total_power, scaled to [-1, 1].
+        '''
+        compute_power = (COST_USED_MW - COST_IDLE_MW) * (cores_used / CORES_PER_NODE)
+        total_power = COST_IDLE_MW * num_on_nodes + compute_power
+        if total_power <= 0.0:
             return 0.0  # nothing on => no "efficiency" signal
-        return 2*(float(np.clip((num_used_nodes * COST_USED_MW) / total_work, 0.0, 1.0))) - 1.0 # scale to [-1, 1] so that it can be weighted in either direction without exceeding bounds.
+        return 2 * float(np.clip(compute_power / total_power, 0.0, 1.0)) - 1.0  # scale to [-1, 1]
 
     def _blackout_term(self, num_used_nodes: int, num_idle_nodes: int, num_unprocessed_jobs: int) -> float:
         """
@@ -260,7 +264,7 @@ def _blackout_term(self, num_used_nodes: int, num_idle_nodes: int, num_unprocess
         penalty = np.exp(-ratio * SATURATION_FACTOR) - 1.0
         return float(np.clip(penalty, -1.0, 0.0))
 
-    def calculate(self, num_used_nodes: int, num_idle_nodes: int, current_price: float, average_future_price: float,
+    def calculate(self, num_used_nodes: int, num_idle_nodes: int, num_used_cores: int, current_price: float, average_future_price: float,
                   num_off_nodes: int, _num_processed_jobs: int, num_node_changes: int, job_queue_2d: np.ndarray,  # noqa: ARG002 - _num_processed_jobs legacy; num_node_changes reserved for future node-change penalty
                   num_unprocessed_jobs: int, weights: Weights, num_dropped_this_step: int,
                   env_print: Callable[..., None]) -> tuple[float, float, float, float, float, float]:
@@ -286,8 +290,9 @@ def calculate(self, num_used_nodes: int, num_idle_nodes: int, current_price: flo
                       idle_penalty_norm, job_age_penalty_norm)
         """
         # 0. Energy efficiency. Reward calculation based on Workload (used nodes) (W) / Cost (C)
-        total_cost = power_cost(num_used_nodes, num_idle_nodes, current_price)
-        efficiency_reward_norm = self._reward_energy_efficiency_normalized(num_used_nodes, num_idle_nodes) + self._blackout_term(num_used_nodes, num_idle_nodes, num_unprocessed_jobs)
+        num_on_nodes = num_used_nodes + num_idle_nodes
+        total_cost = power_cost(num_on_nodes, num_used_cores, current_price)
+        efficiency_reward_norm = self._reward_energy_efficiency_normalized(num_on_nodes, num_used_cores) + self._blackout_term(num_used_nodes, num_idle_nodes, num_unprocessed_jobs)
         efficiency_reward_weighted = weights.efficiency_weight * efficiency_reward_norm
 
         # 2. Increase reward if current price is favorable and currently used nodes are high.

diff --git a/test/test_sanity_env.py b/test/test_sanity_env.py
@@ -206,6 +206,8 @@ def parse_args():
     p.add_argument("--steps", type=int, default=500)
     p.add_argument("--episodes", type=int, default=1)
     p.add_argument("--check-determinism", action="store_true")
+    p.add_argument("--check-gym", action="store_true", help="Run stable-baselines3 check_env() on the environment.")
+    p.add_argument("--carry-over-state", action="store_true", help="Run carry-over continuity test (state preserved across episodes).")
     # mirror train.py-ish knobs (mostly optional)
     p.add_argument("--session", default="sanity")
     p.add_argument("--render", type=str, default="none", choices=["human", "none"])
@@ -380,9 +382,16 @@ def cmp(name, a, b):
         determinism_test(lambda: make_env_with_carry(), seed=args.seed, n_steps=min(args.steps, 500))
         print("[OK] determinism test passed")
 
-    # 4) Carry-over continuity
-    carry_over_test(lambda: make_env_with_carry(), seed=args.seed, n_steps=min(args.steps, 10))
-    print("[OK] carry-over continuity test passed")
+    # 4) Gym interface check (optional)
+    if args.check_gym:
+        from stable_baselines3.common.env_checker import check_env
+        check_env(make_env_with_carry(), warn=True)
+        print("[OK] gym check passed")
-        from stable_baselines3.common.env_checker import check_env
-        check_env(make_env_with_carry(), warn=True)
-        print("[OK] gym check passed")
+        from stable_baselines3.common.env_checker import check_env
+        gym_env = make_env_with_carry()
+        try:
+            check_env(gym_env, warn=True)
+        finally:
+            gym_env.close()
+        print("[OK] gym check passed")
-        from stable_baselines3.common.env_checker import check_env
-        check_env(make_env_with_carry(), warn=True)
-        print("[OK] gym check passed")
+        from stable_baselines3.common.env_checker import check_env
+        gym_env = make_env_with_carry()
+        try:
+            check_env(gym_env, warn=True)
+        finally:
+            gym_env.close()
+        print("[OK] gym check passed")
+
+    # 5) Carry-over continuity (optional)
+    if args.carry_over_state:
+        carry_over_test(lambda: make_env_with_carry(), seed=args.seed, n_steps=min(args.steps, 10))
+        print("[OK] carry-over continuity test passed")
 
     print("done")