Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 7 additions & 10 deletions src/baseline.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,11 @@ def baseline_step(
"""
job_queue_2d = baseline_state['job_queue'].reshape(-1, 4)

process_ongoing_jobs(baseline_state['nodes'], baseline_cores_available, baseline_running_jobs)
process_ongoing_jobs(baseline_state['nodes'], baseline_cores_available, baseline_running_jobs, metrics, is_baseline=True)

# Age helper queue and fill real queue before new arrivals
age_backlog_queue(baseline_backlog_queue, metrics, _is_baseline=True)
baseline_next_empty_slot, _ = fill_queue_from_backlog(
job_queue_2d, baseline_backlog_queue, baseline_next_empty_slot
)
baseline_next_empty_slot, _ = fill_queue_from_backlog(job_queue_2d, baseline_backlog_queue, baseline_next_empty_slot)

_new_baseline_jobs, baseline_next_empty_slot, baseline_backlog_dropped = add_new_jobs(
job_queue_2d, new_jobs_count, new_jobs_durations,
Expand Down Expand Up @@ -114,14 +112,13 @@ def baseline_step(

baseline_state['job_queue'] = job_queue_2d.flatten()

baseline_cost = power_cost(num_used_nodes, num_idle_nodes, current_price)
num_used_cores = num_on_nodes * CORES_PER_NODE - np.sum(baseline_cores_available)
baseline_cost = power_cost(num_on_nodes, num_used_cores, current_price)
env_print(f" > baseline_cost: €{baseline_cost:.4f} | used nodes: {num_used_nodes}, idle nodes: {num_idle_nodes}")
baseline_cost_off = power_cost(num_used_nodes, 0, current_price)
baseline_cost_off = power_cost(num_used_nodes, num_used_cores, current_price)
env_print(f" > baseline_cost_off: €{baseline_cost_off:.4f} | used nodes: {num_used_nodes}, idle nodes: 0")
baseline_power_mwh = power_consumption_mwh(num_used_nodes, num_idle_nodes)
baseline_power_off_mwh = power_consumption_mwh(num_used_nodes, 0)

num_used_cores = num_on_nodes * CORES_PER_NODE - np.sum(baseline_cores_available)
baseline_power_mwh = power_consumption_mwh(num_on_nodes, num_used_cores)
baseline_power_off_mwh = power_consumption_mwh(num_used_nodes, num_used_cores)
return (
baseline_cost,
baseline_cost_off,
Expand Down
6 changes: 3 additions & 3 deletions src/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ def step(self, action: np.ndarray) -> tuple[dict[str, np.ndarray], float, bool,

# Decrement booked time for nodes and complete running jobs
self.env_print("[1] Processing ongoing jobs...")
completed_jobs = process_ongoing_jobs(self.state['nodes'], self.cores_available, self.running_jobs)
completed_jobs = process_ongoing_jobs(self.state['nodes'], self.cores_available, self.running_jobs, self.metrics, is_baseline=False)
self.env_print(f"{len(completed_jobs)} jobs completed: [{' '.join(['#' + str(job_id) for job_id in completed_jobs]) if len(completed_jobs) > 0 else ''}]")

# Age helper queues (jobs waiting outside the fixed queue)
Expand Down Expand Up @@ -472,15 +472,15 @@ def step(self, action: np.ndarray) -> tuple[dict[str, np.ndarray], float, bool,
self.metrics.episode_baseline_used_cores.append(baseline_num_used_cores)

step_reward, step_cost, eff_reward_norm, price_reward, idle_penalty_norm, job_age_penalty_norm = self.reward_calculator.calculate(
num_used_nodes, num_idle_nodes, current_price, average_future_price,
num_used_nodes, num_idle_nodes, num_used_cores, current_price, average_future_price,
num_off_nodes, num_launched_jobs, num_node_changes, job_queue_2d,
num_unprocessed_jobs, self.weights, num_dropped_this_step, self.env_print
)

self.metrics.episode_reward += step_reward
self.metrics.total_cost += step_cost
self.metrics.episode_total_cost += step_cost
step_power_mwh = power_consumption_mwh(num_used_nodes, num_idle_nodes)
step_power_mwh = power_consumption_mwh(num_on_nodes, num_used_cores)
self.metrics.total_power_consumption_mwh += step_power_mwh
self.metrics.episode_total_power_consumption_mwh += step_power_mwh

Expand Down
6 changes: 5 additions & 1 deletion src/evaluation_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,5 +63,9 @@ def build_episode_summary_line(
f"Agent Occupancy (Cores)={agent_occupancy_cores_pct:.2f}%, "
f"Baseline Occupancy (Cores)={baseline_occupancy_cores_pct:.2f}%, "
f"Agent Occupancy (Nodes)={agent_occupancy_nodes_pct:.2f}%, "
f"Baseline Occupancy (Nodes)={baseline_occupancy_nodes_pct:.2f}% "
f"Baseline Occupancy (Nodes)={baseline_occupancy_nodes_pct:.2f}%, "
f"Power={float(episode_data['agent_power_consumption_mwh']):.1f}/"
f"{float(episode_data['baseline_power_consumption_mwh']):.1f}/"
f"{float(episode_data['baseline_power_consumption_off_mwh']):.1f} MWh "
f"(agent/base/base_off)"
Comment on lines +66 to +70
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Drop the second Power= field.

Line 67 duplicates the same metric already emitted at Lines 40-43, so the episode line now contains two Power= sections. That makes the output noisy and brittle for any downstream log parsing.

✂️ Proposed fix
-        f"Baseline Occupancy (Nodes)={baseline_occupancy_nodes_pct:.2f}%, "
-        f"Power={float(episode_data['agent_power_consumption_mwh']):.1f}/"
-        f"{float(episode_data['baseline_power_consumption_mwh']):.1f}/"
-        f"{float(episode_data['baseline_power_consumption_off_mwh']):.1f} MWh "
-        f"(agent/base/base_off)"
+        f"Baseline Occupancy (Nodes)={baseline_occupancy_nodes_pct:.2f}%"
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
f"Baseline Occupancy (Nodes)={baseline_occupancy_nodes_pct:.2f}%, "
f"Power={float(episode_data['agent_power_consumption_mwh']):.1f}/"
f"{float(episode_data['baseline_power_consumption_mwh']):.1f}/"
f"{float(episode_data['baseline_power_consumption_off_mwh']):.1f} MWh "
f"(agent/base/base_off)"
f"Baseline Occupancy (Nodes)={baseline_occupancy_nodes_pct:.2f}%"
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/evaluation_summary.py` around lines 66 - 70, The episode summary f-string
is emitting a duplicated "Power=" field; update the f-string that builds the
line using baseline_occupancy_nodes_pct and episode_data (keys
agent_power_consumption_mwh, baseline_power_consumption_mwh,
baseline_power_consumption_off_mwh) to remove the redundant "Power=" segment so
only the single Power metric (already emitted earlier) remains; locate the
formatted string construction in evaluation_summary.py and delete the second
Power=/... group while preserving the remaining text and numeric formatting.

)
41 changes: 26 additions & 15 deletions src/job_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,27 +73,36 @@ def validate_next_empty(job_queue_2d: np.ndarray, next_empty: int) -> None:
assert np.all(job_queue_2d[:next_empty, 0] != 0), "hole before next_empty_slot"


def process_ongoing_jobs(nodes: np.ndarray, cores_available: np.ndarray, running_jobs: dict[int, dict[str, Any]]) -> list[int]:
def process_ongoing_jobs(nodes: np.ndarray, cores_available: np.ndarray, running_jobs: dict[int, dict[str, Any]], metrics: MetricsTracker, is_baseline: bool) -> list[int]:
"""
Process ongoing jobs: decrement their duration, complete finished jobs,
and release resources.
release resources, and record completion metrics.

Completion is counted here (when duration hits zero), not at launch time.
'wait_time' on each job is the time spent in the queue before being launched,
which is the standard HPC metric for scheduler responsiveness.
Comment on lines +81 to +83
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Completion-time episode counters now mix different job cohorts.

src/environment.py Lines 294-304 only reset episode metrics between truncations; they do not clear running_jobs, the queue, or backlog. After Lines 117-125 moved these increments to completion time, a job submitted in episode N and finishing in episode N+1 contributes to episode N+1's jobs_completed and avg_wait_time, while record_episode_completion() still divides by that episode's jobs_submitted. That makes the per-episode completion stats drift and can push completion_rate past 100%. Track submission cohort/episode on the job, or keep separate episode-local "started/launched" counters for the summary.

Also applies to: 114-125

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/job_management.py` around lines 81 - 83, Episode metrics are being
polluted because completion-time increments (in job completion handling around
running_jobs and updates to jobs_completed and avg_wait_time) count jobs that
were submitted in previous episodes; update the logic so episode boundaries are
respected by either: (a) tagging each Job object with its submission_episode
(e.g., set submission_episode when the job is enqueued) and only increment
per-episode counters (jobs_completed, avg_wait_time used by
record_episode_completion()) when job.submission_episode == current_episode, or
(b) maintain separate episode-local launch/completion counters (e.g.,
episode_jobs_launched, episode_jobs_completed, episode_wait_accumulator) that
are reset by record_episode_completion() and incremented at launch/completion
time; ensure record_episode_completion() uses those episode-local counters (not
global running_jobs/queue/backlog) so completion_rate cannot exceed 100%.


Args:
nodes: Array of node states
cores_available: Array of available cores per node
running_jobs: Dictionary of currently running jobs
metrics: Optional metrics tracker; when provided, completed job counts
and queue wait times are recorded here
is_baseline: Whether finished jobs belong to the baseline simulation

Returns:
List of completed job IDs
"""
completed_jobs = []
completed_wait_time = 0

for job_id, job_data in running_jobs.items():
job_data['duration'] -= 1

# Check if job is completed
if job_data['duration'] <= 0:
completed_jobs.append(job_id)
completed_wait_time += int(job_data.get('wait_time', 0))
# Release resources
for node_idx, cores_used in job_data['allocation']:
cores_available[node_idx] += cores_used
Expand All @@ -102,6 +111,19 @@ def process_ongoing_jobs(nodes: np.ndarray, cores_available: np.ndarray, running
for job_id in completed_jobs:
del running_jobs[job_id]

if completed_jobs:
completed_count = len(completed_jobs)
if is_baseline:
metrics.baseline_jobs_completed += completed_count
metrics.baseline_total_job_wait_time += completed_wait_time
metrics.episode_baseline_jobs_completed += completed_count
metrics.episode_baseline_total_job_wait_time += completed_wait_time
else:
metrics.jobs_completed += completed_count
metrics.total_job_wait_time += completed_wait_time
metrics.episode_jobs_completed += completed_count
metrics.episode_total_job_wait_time += completed_wait_time

# Update node times based on remaining jobs
# Reset all nodes first
for i in range(MAX_NODES):
Expand Down Expand Up @@ -198,7 +220,7 @@ def assign_jobs_to_available_nodes(
running_jobs: Dictionary of currently running jobs
next_empty_slot: Index of next empty slot in queue
next_job_id: Next available job ID
metrics: MetricsTracker object to update with job completion metrics
metrics: MetricsTracker object to update with drop/rejection counts
is_baseline: Whether this is baseline simulation

Returns:
Expand Down Expand Up @@ -229,6 +251,7 @@ def assign_jobs_to_available_nodes(
running_jobs[next_job_id] = {
"duration": job_duration,
"allocation": job_allocation,
"wait_time": int(job_age), # hours spent in queue; recorded at completion time
}
next_job_id += 1

Expand All @@ -239,18 +262,6 @@ def assign_jobs_to_available_nodes(
if job_idx < next_empty_slot:
next_empty_slot = job_idx

# Track job completion and wait time
if is_baseline:
metrics.baseline_jobs_completed += 1
metrics.baseline_total_job_wait_time += job_age
metrics.episode_baseline_jobs_completed += 1
metrics.episode_baseline_total_job_wait_time += job_age
else:
metrics.jobs_completed += 1
metrics.total_job_wait_time += job_age
metrics.episode_jobs_completed += 1
metrics.episode_total_job_wait_time += job_age

num_processed_jobs += 1
continue

Expand Down
2 changes: 2 additions & 0 deletions src/metrics_tracker.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Metrics tracking and episode recording for the PowerSched environment."""



class MetricsTracker:
"""Tracks metrics throughout training episodes."""

Expand Down Expand Up @@ -175,6 +176,7 @@ def record_episode_completion(self, current_episode: int) -> dict[str, float | i
)
savings_vs_baseline: float = self.episode_baseline_cost - self.episode_total_cost
savings_vs_baseline_off: float = self.episode_baseline_cost_off - self.episode_total_cost

dropped_jobs_per_saved_euro: float = self._safe_ratio(
float(self.episode_jobs_dropped), savings_vs_baseline
) if savings_vs_baseline > 0.0 else float("nan")
Expand Down
59 changes: 32 additions & 27 deletions src/reward_calculation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,45 +5,47 @@
import numpy as np

from src.config import (
COST_IDLE_MW, COST_USED_MW, PENALTY_IDLE_NODE,
COST_IDLE_MW, COST_USED_MW, CORES_PER_NODE, PENALTY_IDLE_NODE,
PENALTY_DROPPED_JOB, MAX_NODES, MAX_NEW_JOBS_PER_HOUR, WEEK_HOURS
)
from src.prices import Prices
from src.weights import Weights


def power_cost(num_used_nodes: int, num_idle_nodes: int, current_price: float) -> float:
def power_cost(num_on_nodes: int, cores_used: int, current_price: float) -> float:
"""
Calculate power cost based on node usage and current electricity price.

Proportional model: all on-nodes draw idle power, plus additional compute
power scaled linearly by actual core utilization.
Formula: (COST_IDLE_MW * num_on + (COST_USED_MW - COST_IDLE_MW) * cores_used/CORES_PER_NODE) * price

Args:
num_used_nodes: Number of nodes with jobs running
num_idle_nodes: Number of idle (on but unused) nodes
num_on_nodes: Number of nodes that are on (used + idle)
cores_used: Total number of cores actively running jobs
current_price: Current electricity price

Returns:
Total power cost
"""
idle_cost = COST_IDLE_MW * current_price * num_idle_nodes
usage_cost = COST_USED_MW * current_price * num_used_nodes
total_cost = idle_cost + usage_cost
return total_cost
return (COST_IDLE_MW * num_on_nodes + (COST_USED_MW - COST_IDLE_MW) * (cores_used / CORES_PER_NODE)) * current_price


def power_consumption_mwh(num_used_nodes: int, num_idle_nodes: int) -> float:
def power_consumption_mwh(num_on_nodes: int, cores_used: int) -> float:
"""
Calculate energy consumption for one environment step.

One environment step equals one hour, so this is both average MW and MWh/step.
Uses the same proportional model as power_cost.

Args:
num_used_nodes: Number of nodes with jobs running
num_idle_nodes: Number of idle (on but unused) nodes
num_on_nodes: Number of nodes that are on (used + idle)
cores_used: Total number of cores actively running jobs

Returns:
Energy consumption in MWh for this step
"""
return COST_IDLE_MW * num_idle_nodes + COST_USED_MW * num_used_nodes
return COST_IDLE_MW * num_on_nodes + (COST_USED_MW - COST_IDLE_MW) * (cores_used / CORES_PER_NODE)


class RewardCalculator:
Expand Down Expand Up @@ -80,9 +82,9 @@ def __init__(self, prices: Prices) -> None:

def _compute_bounds(self) -> None:
"""Compute min/max bounds for reward normalization."""
# Efficiency bounds
cost_for_min_efficiency = power_cost(0, MAX_NODES, self.prices.MAX_PRICE)
cost_for_max_efficiency = power_cost(MAX_NODES, 0, self.prices.MIN_PRICE)
# Efficiency bounds (for legacy _reward_efficiency_normalized only)
cost_for_min_efficiency = power_cost(MAX_NODES, 0, self.prices.MAX_PRICE)
cost_for_max_efficiency = power_cost(MAX_NODES, MAX_NODES * CORES_PER_NODE, self.prices.MIN_PRICE)

self._min_efficiency_reward = self._reward_efficiency(0, cost_for_min_efficiency)
self._max_efficiency_reward = max(1.0, self._reward_efficiency(MAX_NODES, cost_for_max_efficiency))
Expand Down Expand Up @@ -230,16 +232,18 @@ def _penalty_job_age_normalized(self, num_off_nodes: int, job_queue_2d: np.ndarr
normalized_penalty = -current_penalty
return float(np.clip(normalized_penalty, -1, 0))

def _reward_energy_efficiency_normalized(self, num_used_nodes: int, num_idle_nodes: int) -> float:
'''Redefine meaning of "efficiency". Use purely as "energy efficiency", aka: How much of the energy (in MW) which is currently needed, gets used for work.
NOTE: Original efficiency function was doing 3 things at once. 1. Handled Blackout logic, with (2.) penalty-ish reward delay for unprocessed jobs, while blackout.
But this log1p function would start to become "harsh" only for a very high number of unprocessed. This rewarded shutting everything off.
3. rewarded used/cost, but cost was defined in units of price. Price reward should handle this solely, otherwise double counting.
Hence, here new efficiency definition.'''
total_work = num_used_nodes * COST_USED_MW + num_idle_nodes * COST_IDLE_MW
if total_work <= 0.0:
def _reward_energy_efficiency_normalized(self, num_on_nodes: int, cores_used: int) -> float:
'''Energy efficiency: fraction of total power draw that goes to actual computation.

Proportional model: total power = COST_IDLE_MW * num_on + compute_delta * cores/CORES_PER_NODE.
Compute power = compute_delta * cores/CORES_PER_NODE (the portion above idle baseline).
Efficiency = compute_power / total_power, scaled to [-1, 1].
'''
compute_power = (COST_USED_MW - COST_IDLE_MW) * (cores_used / CORES_PER_NODE)
total_power = COST_IDLE_MW * num_on_nodes + compute_power
if total_power <= 0.0:
return 0.0 # nothing on => no "efficiency" signal
return 2*(float(np.clip((num_used_nodes * COST_USED_MW) / total_work, 0.0, 1.0))) - 1.0 # scale to [-1, 1] so that it can be weighted in either direction without exceeding bounds.
return 2 * float(np.clip(compute_power / total_power, 0.0, 1.0)) - 1.0 # scale to [-1, 1]

def _blackout_term(self, num_used_nodes: int, num_idle_nodes: int, num_unprocessed_jobs: int) -> float:
"""
Expand All @@ -260,7 +264,7 @@ def _blackout_term(self, num_used_nodes: int, num_idle_nodes: int, num_unprocess
penalty = np.exp(-ratio * SATURATION_FACTOR) - 1.0
return float(np.clip(penalty, -1.0, 0.0))

def calculate(self, num_used_nodes: int, num_idle_nodes: int, current_price: float, average_future_price: float,
def calculate(self, num_used_nodes: int, num_idle_nodes: int, num_used_cores: int, current_price: float, average_future_price: float,
num_off_nodes: int, _num_processed_jobs: int, num_node_changes: int, job_queue_2d: np.ndarray, # noqa: ARG002 - _num_processed_jobs legacy; num_node_changes reserved for future node-change penalty
num_unprocessed_jobs: int, weights: Weights, num_dropped_this_step: int,
env_print: Callable[..., None]) -> tuple[float, float, float, float, float, float]:
Expand All @@ -286,8 +290,9 @@ def calculate(self, num_used_nodes: int, num_idle_nodes: int, current_price: flo
idle_penalty_norm, job_age_penalty_norm)
"""
# 0. Energy efficiency. Reward calculation based on Workload (used nodes) (W) / Cost (C)
total_cost = power_cost(num_used_nodes, num_idle_nodes, current_price)
efficiency_reward_norm = self._reward_energy_efficiency_normalized(num_used_nodes, num_idle_nodes) + self._blackout_term(num_used_nodes, num_idle_nodes, num_unprocessed_jobs)
num_on_nodes = num_used_nodes + num_idle_nodes
total_cost = power_cost(num_on_nodes, num_used_cores, current_price)
efficiency_reward_norm = self._reward_energy_efficiency_normalized(num_on_nodes, num_used_cores) + self._blackout_term(num_used_nodes, num_idle_nodes, num_unprocessed_jobs)
efficiency_reward_weighted = weights.efficiency_weight * efficiency_reward_norm

# 2. Increase reward if current price is favorable and currently used nodes are high.
Expand Down
15 changes: 12 additions & 3 deletions test/test_sanity_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ def parse_args():
p.add_argument("--steps", type=int, default=500)
p.add_argument("--episodes", type=int, default=1)
p.add_argument("--check-determinism", action="store_true")
p.add_argument("--check-gym", action="store_true", help="Run stable-baselines3 check_env() on the environment.")
p.add_argument("--carry-over-state", action="store_true", help="Run carry-over continuity test (state preserved across episodes).")
# mirror train.py-ish knobs (mostly optional)
p.add_argument("--session", default="sanity")
p.add_argument("--render", type=str, default="none", choices=["human", "none"])
Expand Down Expand Up @@ -380,9 +382,16 @@ def cmp(name, a, b):
determinism_test(lambda: make_env_with_carry(), seed=args.seed, n_steps=min(args.steps, 500))
print("[OK] determinism test passed")

# 4) Carry-over continuity
carry_over_test(lambda: make_env_with_carry(), seed=args.seed, n_steps=min(args.steps, 10))
print("[OK] carry-over continuity test passed")
# 4) Gym interface check (optional)
if args.check_gym:
from stable_baselines3.common.env_checker import check_env
check_env(make_env_with_carry(), warn=True)
print("[OK] gym check passed")
Comment on lines +387 to +389
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

#!/bin/bash
# Verify check_env usage and whether close() is paired in the same control block.
rg -n -C4 'if args\.check_gym|check_env\(|\.close\(' test/test_sanity_env.py

Repository: FairRootGroup/powersched

Length of output: 2611


🏁 Script executed:

#!/bin/bash
# Search for other usages of check_env in the repository
rg -n 'check_env' --type py

Repository: FairRootGroup/powersched

Length of output: 510


🌐 Web query:

stable-baselines3 check_env closes environment cleanup

💡 Result:

Yes, the stable-baselines3 check_env function calls env.close on the environment during execution, specifically as part of the render check when skip_render_check=False (default is True, so it skips by default). If env.render_mode is set, it calls env.render followed by env.close. For environments wrapped in vectorized environments like DummyVecEnv (used internally in _check_nan), the VecEnv.close method is not explicitly called by check_env, but VecEnvs have a close method for cleanup that closes underlying envs (e.g., SubprocVecEnv sends "close" command and joins processes). check_env does not perform full environment cleanup beyond potential render-related close; it leaves the environment in a stepped state after calling reset and step for validation. Users should handle further cleanup if needed after calling check_env(env). To avoid render close: pass skip_render_check=True (default). Official source code confirms: in check_env, if not skipping render check: _check_render(env), which does env.render; env.close if render_mode present.

Citations:


🏁 Script executed:

#!/bin/bash
# Check test_checkenv.py for check_env usage and cleanup
cat -n test/test_checkenv.py

Repository: FairRootGroup/powersched

Length of output: 1065


Close the environment instance returned by check_env() explicitly.

The environment created at line 388 is not explicitly closed. The check_env() function from stable-baselines3 does not automatically close the passed environment instance, leaving it in a stepped state. In this long-running sanity script that may be invoked repeatedly, explicit cleanup prevents resource/descriptor leaks, consistent with the pattern used elsewhere in this file (lines 115, 136, 198, 378).

♻️ Proposed fix
     if args.check_gym:
         from stable_baselines3.common.env_checker import check_env
-        check_env(make_env_with_carry(), warn=True)
+        gym_env = make_env_with_carry()
+        try:
+            check_env(gym_env, warn=True)
+        finally:
+            gym_env.close()
         print("[OK] gym check passed")
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
from stable_baselines3.common.env_checker import check_env
check_env(make_env_with_carry(), warn=True)
print("[OK] gym check passed")
from stable_baselines3.common.env_checker import check_env
gym_env = make_env_with_carry()
try:
check_env(gym_env, warn=True)
finally:
gym_env.close()
print("[OK] gym check passed")
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@test/test_sanity_env.py` around lines 387 - 389, The environment returned by
make_env_with_carry() passed into check_env() isn't explicitly closed; update
the snippet that calls check_env(make_env_with_carry(), warn=True) to capture
the created env (e.g., env = make_env_with_carry()), pass that env to
check_env(env, warn=True), and then call env.close() (or use a try/finally to
ensure env.close() runs) after the check to match the cleanup pattern used
elsewhere.


# 5) Carry-over continuity (optional)
if args.carry_over_state:
carry_over_test(lambda: make_env_with_carry(), seed=args.seed, n_steps=min(args.steps, 10))
print("[OK] carry-over continuity test passed")

print("done")

Expand Down
Loading