From 7bd7c00de9b2faf6c0a5e73db82c9938297174b6 Mon Sep 17 00:00:00 2001 From: Wu Bingqian Date: Fri, 26 Jun 2026 16:16:07 +0800 Subject: [PATCH] Arm Pico reference worker before mocap entry --- AGENTS.md | 5 +- README.md | 2 +- docs/docs/tutorials/pico-sim2real.md | 7 +- .../current/tutorials/pico-sim2real.md | 6 +- scripts/view/view_dataset.py | 4 +- teleopit/sim2real/mp/runtime.py | 111 ++++++++++++- tests/test_sim2real_multiprocess.py | 157 ++++++++++++++++++ .../tasks/tracking/tracking_env_cfg.py | 2 +- 8 files changed, 280 insertions(+), 14 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index ffbeb26c..b9293986 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -142,9 +142,10 @@ target_dof_pos = clip(action, -10, 10) × action_scale + default_dof_pos - Default Pico sim2sim keyboard mappings are `Y` → `MOCAP`, `A` → pause/resume mocap, `B` → toggle `MOCAP`/`ARMS`, `X` → back to `STANDING`, `Q` → quit - Pico4 sim2real pause/resume is handled as a mocap-session control event (`toggle_pause`), not as a mode switch to `STANDING` - Default Pico pause button is `A`; resume resets policy/reference state and yaw/XY root-offset alignment while the process-isolated realtime reference worker continues its live input timeline +- Pico4 sim2real arms the process-isolated reference worker only when entering `MOCAP`; `STANDING` and `DAMPING` disarm it so cold startup frames do not warm-start GMR before mocap entry - Pico4 sim2sim/sim2real support `ARMS` mode toggled from `MOCAP` with Pico/controller `B`; retargeting continues, while the control loop sends the motion tracker a composed reference with stand-pose body/legs/waist and live retargeted arms - `ARMS` entering/exiting/resume resets policy/reference alignment and uses Kp ramp; offline BVH sim2real does not use `ARMS`, and Unitree remote `B` remains BVH replay -- Realtime mode switches and pause/resume use a retargeter-preserving soft reset: policy/reference state, smoothers, and reference alignment are reset, while the GMR IK warm-start is retained +- Realtime Pico pause/resume and `MOCAP ↔ ARMS` switches use a retargeter-preserving soft reset: policy/reference state, smoothers, and reference alignment are reset, while the GMR IK warm-start is retained - Optional LinkerHand control uses `hands.enabled=true`, `hands.driver=linkerhand_l6|linkerhand_o6`, and `hands.mode=gripper|vr_hand_pose`; default is disabled - Optional Pico sim2real HDF5 recording uses `--config-name sim2real_record` or `recording.enabled=true`; it requires `input.provider=pico4`, `input.video.enabled=true`, `input.video.source=realsense`, an interactive terminal, and the `recording` extra - Recording is manual only: terminal `R` starts an episode, `S` saves, `D` discards the active episode, and `Q` shuts down; `STANDING`, `MOCAP`, `ARMS`, and paused mocap are recordable @@ -168,7 +169,7 @@ target_dof_pos = clip(action, -10, 10) × action_scale + default_dof_pos - Realtime inferred `motion_joint_vel`, anchor linear velocity, and anchor angular velocity can be EMA-smoothed via `reference_velocity_smoothing_alpha` and `reference_anchor_velocity_smoothing_alpha` - Sim2real Pico pause/resume uses mocap-session states `ACTIVE ↔ PAUSED`; resume clears policy/reference state, rebuilds yaw/XY root alignment, and does not interpolate retarget qpos from the paused pose - Realtime sim2sim with Pico control events uses the same mocap-session pause/resume semantics and rebuilds the realtime reference path on resume, including the configured warmup -- Realtime sim2sim/sim2real `STANDING ↔ MOCAP` transitions use the same retargeter-preserving soft reset, rather than cold-starting the retargeter from its default qpos +- Realtime sim2sim `STANDING ↔ MOCAP` transitions rebuild the realtime reference path on entry; Pico sim2real `STANDING -> MOCAP` additionally rearms and resets the process-isolated reference worker before accepting fresh references - Realtime Pico sim2sim can start directly in `STANDING` with keyboard mode control enabled via top-level `keyboard.enabled` ### Inference Observation diff --git a/README.md b/README.md index 94d4f117..b2990a7f 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ Full docs at **[BotRunner64.github.io/Teleopit](https://BotRunner64.github.io/Te ### v0.4.0 (2026-06-25) -- Improved Pico realtime control with pico-bridge 0.2.1, `ARMS` mode, and retargeter-preserving mode/pause resets. +- Improved Pico realtime control with pico-bridge 0.2.1, `ARMS` mode, armed sim2real mocap entry, and retargeter-preserving pause/arms resets. - Added optional LinkerHand L6/O6 sim2real control, including Pico gripper input and low-latency L6 `vr_hand_pose`. - Added manual Pico sim2real HDF5 recording and an interactive Pico motion recorder for training NPZ clips. - Refined the training data path with minimal HDF5 shards, explicit precompute, rewind sampling, and updated tracking rewards. diff --git a/docs/docs/tutorials/pico-sim2real.md b/docs/docs/tutorials/pico-sim2real.md index 4b89d3f6..ff9395bc 100644 --- a/docs/docs/tutorials/pico-sim2real.md +++ b/docs/docs/tutorials/pico-sim2real.md @@ -151,8 +151,11 @@ When entering `STANDING`, Teleopit releases active Unitree modes, enters debug/low-level control, locks the current joints briefly, resets policy state, and ramps Kp without changing policy targets. -When entering `MOCAP`, Teleopit resets policy/reference state and starts tracking -the live mocap command through the realtime reference timeline. +When entering `MOCAP`, Teleopit rearms the process-isolated reference worker, +resets its GMR state and realtime reference buffer, then waits for fresh +validated references before tracking the live mocap command. `STANDING` and +`DAMPING` keep the reference worker disarmed so cold startup frames cannot +warm-start retargeting before mocap entry. `ARMS` keeps the same live retargeting timeline running, but sends the motion tracker a composed reference: body, waist, and legs stay at the standing pose diff --git a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/tutorials/pico-sim2real.md b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/tutorials/pico-sim2real.md index 23fd2ce4..de155bcb 100644 --- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/tutorials/pico-sim2real.md +++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/tutorials/pico-sim2real.md @@ -144,8 +144,10 @@ Pico body frames -> retarget -> reference buffer -> observation -> policy -> G1 进入 `STANDING` 时,Teleopit 会释放当前 Unitree 模式,进入 debug/low-level 控制, 短暂锁住当前关节,重置 policy 状态,并在不改变 policy target 的情况下执行 Kp ramp。 -进入 `MOCAP` 时,Teleopit 会重置 policy/reference 状态,并通过实时参考时间线开始跟踪 -实时 mocap 命令。 +进入 `MOCAP` 时,Teleopit 会重新 arm 进程隔离的 reference worker,重置其中的 GMR 状态 +和实时 reference buffer,然后等待新的已验证 reference,再开始跟踪实时 mocap 命令。 +`STANDING` 和 `DAMPING` 会让 reference worker 保持 disarmed,避免冷启动帧在进入 mocap +之前 warm-start retargeting。 `ARMS` 会保持同一条实时 retargeting 时间线继续运行,但发送给 motion tracker 的参考会被组合: 身体、腰部和腿部保持站立姿态,双臂跟随实时 retarget 结果。进入或离开 `ARMS` 时会重置 diff --git a/scripts/view/view_dataset.py b/scripts/view/view_dataset.py index 582f50e7..4bd1e133 100644 --- a/scripts/view/view_dataset.py +++ b/scripts/view/view_dataset.py @@ -166,7 +166,7 @@ def __init__( self._session = DatasetSession(discover_dataset_clips(dataset_path), sort_mode) self._server = viser.ViserServer(port=port, label="Dataset Viewer") - self._scene = ViserMujocoScene.create( + self._scene = ViserMujocoScene( server=self._server, mj_model=self._model, num_envs=1, @@ -266,7 +266,7 @@ def _(_) -> None: if action: self._pending_actions.append(action) - self._scene.create_visualization_gui(show_debug_viz_control=False) + self._scene.create_visualization_gui() def _load_current_clip(self) -> None: clip = self._session.current_clip() diff --git a/teleopit/sim2real/mp/runtime.py b/teleopit/sim2real/mp/runtime.py index 7cd4e99a..f600a5bb 100644 --- a/teleopit/sim2real/mp/runtime.py +++ b/teleopit/sim2real/mp/runtime.py @@ -109,6 +109,8 @@ Float32Array = NDArray[np.float32] Float64Array = NDArray[np.float64] PROJECT_ROOT = Path(__file__).resolve().parents[3] +ARM_MOCAP_REFERENCE_COMMAND = "arm_mocap_reference" +DISARM_MOCAP_REFERENCE_COMMAND = "disarm_mocap_reference" class RobotMode(Enum): @@ -735,8 +737,10 @@ def _main() -> None: body_sub = LatestSubscriber(endpoints.body_pub, BODY_TOPIC) health_sub = LatestSubscriber(endpoints.health_pub, HEALTH_TOPIC) command_sub = LatestSubscriber(endpoints.command_pub, COMMAND_TOPIC) + reference_command_sub = LatestSubscriber(endpoints.reference_command_pub, COMMAND_TOPIC) ref_pub = ZmqPublisher(endpoints.reference_pub) idle_sleep_s = float(cfg_get(_mp_cfg(cfg), "retarget_idle_sleep_s", 0.001)) + mocap_armed = False last_body_seq = -1 last_body_timestamp_s: float | None = None body_dt_s_ema: float | None = None @@ -747,6 +751,48 @@ def _main() -> None: runtime_support_validated = ref_cfg.reference_delay_s is not None or not reference_window_builder.requires_timeline last_valid_qpos: Float64Array | None = None + def _reset_realtime_reference_state(*, reset_retargeter: bool) -> None: + nonlocal last_body_timestamp_s + nonlocal body_dt_s_ema + nonlocal resolved_reference_delay_s + nonlocal runtime_support_validated + nonlocal last_valid_qpos + if timeline is not None: + timeline.clear() + if reference_manager is not None: + reference_manager.reset() + last_body_timestamp_s = None + body_dt_s_ema = None + resolved_reference_delay_s = ( + float(ref_cfg.reference_delay_s) if ref_cfg.reference_delay_s is not None else None + ) + runtime_support_validated = ( + ref_cfg.reference_delay_s is not None or not reference_window_builder.requires_timeline + ) + last_valid_qpos = None + if reset_retargeter: + retargeter.reset() + + def _handle_reference_command(command: CommandPacket | None) -> None: + nonlocal mocap_armed + if not isinstance(command, CommandPacket): + return + if command.command == "shutdown": + stop_event.set() + return + if command.command == ARM_MOCAP_REFERENCE_COMMAND: + if mocap_armed: + return + logger.info("reference worker armed for Pico MOCAP") + mocap_armed = True + _reset_realtime_reference_state(reset_retargeter=True) + return + if command.command == DISARM_MOCAP_REFERENCE_COMMAND: + if mocap_armed: + logger.info("reference worker disarmed for Pico STANDING") + mocap_armed = False + _reset_realtime_reference_state(reset_retargeter=True) + def _publish_invalid_reference(packet: BodyFramePacket, *, elapsed_s: float) -> None: qpos = np.zeros(FULL_QPOS_DIM, dtype=np.float64) qpos[3] = 1.0 @@ -767,9 +813,9 @@ def _publish_invalid_reference(packet: BodyFramePacket, *, elapsed_s: float) -> try: while not stop_event.is_set(): - command = command_sub.recv_latest() - if isinstance(command, CommandPacket) and command.command == "shutdown": - stop_event.set() + _handle_reference_command(command_sub.recv_latest()) + _handle_reference_command(reference_command_sub.recv_latest()) + if stop_event.is_set(): break health_packet = health_sub.recv_latest() @@ -784,6 +830,9 @@ def _publish_invalid_reference(packet: BodyFramePacket, *, elapsed_s: float) -> continue if not isinstance(packet, BodyFramePacket) or int(packet.seq) == last_body_seq: continue + if not mocap_armed: + last_body_seq = int(packet.seq) + continue start_s = time.monotonic() frame_valid = _human_frame_is_valid(packet.frame) if not frame_valid: @@ -854,6 +903,7 @@ def _publish_invalid_reference(packet: BodyFramePacket, *, elapsed_s: float) -> body_sub.close() health_sub.close() command_sub.close() + reference_command_sub.close() ref_pub.close() _worker_loop("reference", cfg, _main) @@ -1046,6 +1096,10 @@ def __init__( self._last_commanded_motion_qpos: Float64Array | None = None self._last_mocap_hold_reason: str | None = None self._mocap_reentry_armed = False + self._mocap_entry_requested = False + self._mocap_reference_armed = False + self._mocap_reference_arm_time_s: float | None = None + self._mocap_reference_arm_retry_s = float(cfg_get(_mp_cfg(cfg), "mocap_reference_arm_retry_s", 0.1)) self._mocap_session = MocapSessionManager() self._latest_reference: ReferencePacket | None = None @@ -1165,10 +1219,13 @@ def _handle_transitions(self) -> None: elif self.mode == RobotMode.STANDING: reentry_request = self._mocap_reentry_armed and self.remote.Y.pressed if self.remote.Y.on_pressed or reentry_request: + self._mocap_entry_requested = True + if self._mocap_entry_requested: + self._arm_mocap_reference_if_needed() if self._can_switch_to_mocap(): operator_logger.info("Y -> MOCAP") self._transition_to_mocap() - else: + elif self.remote.Y.on_pressed or reentry_request: operator_logger.warning("Y -> waiting for fresh retarget reference") elif self.mode in (RobotMode.MOCAP, RobotMode.ARMS): if self.provider_kind == "bvh" and self.remote.B.on_pressed: @@ -1315,6 +1372,9 @@ def _compose_arm_reference_window(self, reference_window: ReferenceWindow | None def _enter_standing(self) -> None: prev_mode = self.mode + self._disarm_mocap_reference_if_needed() + self._clear_reference_gate() + self._mocap_entry_requested = False already_in_debug = self.mode in (RobotMode.STANDING, RobotMode.MOCAP, RobotMode.ARMS) if not already_in_debug: logger.info("Entering debug mode...") @@ -1349,6 +1409,8 @@ def _enter_standing(self) -> None: operator_logger.info("mode -> STANDING") def _can_switch_to_mocap(self) -> bool: + if self.provider_kind == "pico4" and not self._mocap_reference_armed: + return False age_s = self._reference_age_s() if self._latest_reference is None or age_s is None: return False @@ -1380,6 +1442,7 @@ def _transition_to_mocap(self) -> None: if self.provider_kind == "bvh": self._send_reference_command("replay_mocap") self.mode = RobotMode.MOCAP + self._mocap_entry_requested = False operator_logger.info("mode -> MOCAP") def _toggle_arms_mode(self) -> None: @@ -1410,6 +1473,9 @@ def _resume_paused_mocap_if_needed(self) -> None: self._resume_paused_mocap() def _enter_damping(self) -> None: + self._disarm_mocap_reference_if_needed() + self._clear_reference_gate() + self._mocap_entry_requested = False if self.mode in (RobotMode.STANDING, RobotMode.MOCAP, RobotMode.ARMS): logger.info("DAMPING: sending LowCmd damping...") self.robot.set_damping() @@ -1526,6 +1592,35 @@ def _send_reference_command(self, command: str) -> None: CommandPacket(command=command, timestamp_s=time.monotonic()), ) + def _arm_mocap_reference_if_needed(self) -> None: + if getattr(self, "provider_kind", None) != "pico4": + return + now_s = time.monotonic() + if not bool(getattr(self, "_mocap_reference_armed", False)): + self._clear_reference_gate() + self._mocap_reference_armed = True + self._mocap_reference_arm_time_s = now_s + elif getattr(self, "_latest_reference", None) is not None: + return + else: + last_arm_s = getattr(self, "_mocap_reference_arm_time_s", None) + retry_s = float(getattr(self, "_mocap_reference_arm_retry_s", 0.1)) + if last_arm_s is not None and now_s - float(last_arm_s) < retry_s: + return + self._send_reference_command(ARM_MOCAP_REFERENCE_COMMAND) + + def _disarm_mocap_reference_if_needed(self) -> None: + if getattr(self, "provider_kind", None) != "pico4" or not bool(getattr(self, "_mocap_reference_armed", False)): + return + self._send_reference_command(DISARM_MOCAP_REFERENCE_COMMAND) + self._mocap_reference_armed = False + self._mocap_reference_arm_time_s = None + + def _clear_reference_gate(self) -> None: + self._latest_reference = None + self._last_reference_seq = -1 + self._consecutive_valid_references = 0 + def _resolve_mocap_hold_qpos(self) -> Float64Array: if self._last_commanded_motion_qpos is not None: return self._last_commanded_motion_qpos.copy() @@ -1663,6 +1758,14 @@ def _reference_age_s(self) -> float | None: def _note_reference_packet(self, reference: ReferencePacket) -> None: if int(reference.seq) <= self._last_reference_seq: return + arm_time_s = getattr(self, "_mocap_reference_arm_time_s", None) + if ( + self.provider_kind == "pico4" + and bool(getattr(self, "_mocap_reference_armed", False)) + and arm_time_s is not None + and float(reference.timestamp_s) < float(arm_time_s) + ): + return self._last_reference_seq = int(reference.seq) self._latest_reference = reference if ( diff --git a/tests/test_sim2real_multiprocess.py b/tests/test_sim2real_multiprocess.py index b59b8e1b..72a7d933 100644 --- a/tests/test_sim2real_multiprocess.py +++ b/tests/test_sim2real_multiprocess.py @@ -30,6 +30,8 @@ from teleopit.sim2real.mp.messages import HandCommandPacket, ModeStatePacket, RecordStepPacket, ReferencePacket, SharedFrameDescriptor from teleopit.sim.reference_timeline import ReferenceSample, ReferenceWindow from teleopit.sim2real.mp.runtime import ( + ARM_MOCAP_REFERENCE_COMMAND, + DISARM_MOCAP_REFERENCE_COMMAND, map_recording_key_to_command, RobotMode, Sim2RealRuntime, @@ -511,6 +513,7 @@ def test_robot_worker_requires_consecutive_valid_references(monkeypatch) -> None worker._consecutive_valid_references = 0 worker._check_frames = 2 worker._max_reference_age_s = 0.25 + worker._mocap_reference_armed = True worker.provider_kind = "pico4" worker._reference_age_s = lambda: 0.0 worker._mocap_session = SimpleNamespace(state=MocapSessionState.ACTIVE) @@ -566,6 +569,160 @@ def test_robot_worker_requires_consecutive_valid_references(monkeypatch) -> None assert worker._consecutive_valid_references == 1 +def test_robot_worker_arms_pico_reference_before_mocap_entry() -> None: + worker = object.__new__(_RobotControlWorker) + commands: list[str] = [] + worker.provider_kind = "pico4" + worker.mode = RobotMode.STANDING + worker._mocap_reference_armed = False + worker._latest_reference = ReferencePacket( + qpos=np.zeros(36, dtype=np.float64), + timestamp_s=1.0, + seq=1, + source_timestamp_s=1.0, + source_seq=1, + frame_valid=True, + ) + worker._last_reference_seq = 1 + worker._consecutive_valid_references = 10 + worker._send_reference_command = commands.append + + worker._arm_mocap_reference_if_needed() + + assert commands == [ARM_MOCAP_REFERENCE_COMMAND] + assert worker._mocap_reference_armed is True + assert worker._latest_reference is None + assert worker._last_reference_seq == -1 + assert worker._consecutive_valid_references == 0 + + +def test_robot_worker_retries_pico_reference_arm_without_clearing_gate(monkeypatch) -> None: + worker = object.__new__(_RobotControlWorker) + commands: list[str] = [] + now_s = [100.0] + monkeypatch.setattr("teleopit.sim2real.mp.runtime.time.monotonic", lambda: now_s[0]) + worker.provider_kind = "pico4" + worker._mocap_reference_armed = False + worker._mocap_reference_arm_retry_s = 0.5 + worker._latest_reference = None + worker._last_reference_seq = 1 + worker._consecutive_valid_references = 2 + worker._send_reference_command = commands.append + + worker._arm_mocap_reference_if_needed() + + assert commands == [ARM_MOCAP_REFERENCE_COMMAND] + assert worker._mocap_reference_armed is True + assert worker._last_reference_seq == -1 + assert worker._consecutive_valid_references == 0 + assert worker._mocap_reference_arm_time_s == 100.0 + + worker._last_reference_seq = 5 + worker._consecutive_valid_references = 6 + now_s[0] = 100.49 + worker._arm_mocap_reference_if_needed() + assert commands == [ARM_MOCAP_REFERENCE_COMMAND] + + now_s[0] = 100.50 + worker._arm_mocap_reference_if_needed() + assert commands == [ARM_MOCAP_REFERENCE_COMMAND, ARM_MOCAP_REFERENCE_COMMAND] + assert worker._last_reference_seq == 5 + assert worker._consecutive_valid_references == 6 + assert worker._mocap_reference_arm_time_s == 100.0 + + worker._latest_reference = ReferencePacket( + qpos=np.zeros(36, dtype=np.float64), + timestamp_s=100.6, + seq=6, + source_timestamp_s=100.6, + source_seq=6, + frame_valid=True, + ) + now_s[0] = 101.0 + worker._arm_mocap_reference_if_needed() + assert commands == [ARM_MOCAP_REFERENCE_COMMAND, ARM_MOCAP_REFERENCE_COMMAND] + + +def test_robot_worker_checks_pico_reference_arm_while_entry_is_pending() -> None: + worker = object.__new__(_RobotControlWorker) + arm_checks: list[str] = [] + worker.mode = RobotMode.STANDING + worker._mocap_entry_requested = True + worker._mocap_reentry_armed = False + worker.remote = SimpleNamespace(Y=SimpleNamespace(on_pressed=False, pressed=False)) + worker._arm_mocap_reference_if_needed = lambda: arm_checks.append("arm") + worker._can_switch_to_mocap = lambda: False + + worker._handle_transitions() + + assert arm_checks == ["arm"] + + +def test_robot_worker_disarms_pico_reference_and_clears_gate() -> None: + worker = object.__new__(_RobotControlWorker) + commands: list[str] = [] + worker.provider_kind = "pico4" + worker._mocap_reference_armed = True + worker._latest_reference = ReferencePacket( + qpos=np.zeros(36, dtype=np.float64), + timestamp_s=1.0, + seq=1, + source_timestamp_s=1.0, + source_seq=1, + frame_valid=True, + ) + worker._last_reference_seq = 1 + worker._consecutive_valid_references = 10 + worker._send_reference_command = commands.append + + worker._disarm_mocap_reference_if_needed() + worker._clear_reference_gate() + + assert commands == [DISARM_MOCAP_REFERENCE_COMMAND] + assert worker._mocap_reference_armed is False + assert worker._latest_reference is None + assert worker._last_reference_seq == -1 + assert worker._consecutive_valid_references == 0 + + +def test_robot_worker_ignores_pico_reference_older_than_arm_time() -> None: + worker = object.__new__(_RobotControlWorker) + worker.provider_kind = "pico4" + worker._mocap_reference_armed = True + worker._mocap_reference_arm_time_s = 10.0 + worker._last_reference_seq = -1 + worker._latest_reference = None + worker._consecutive_valid_references = 0 + + old_packet = ReferencePacket( + qpos=np.zeros(36, dtype=np.float64), + timestamp_s=9.9, + seq=1, + source_timestamp_s=1.0, + source_seq=1, + frame_valid=True, + ) + worker._note_reference_packet(old_packet) + + assert worker._latest_reference is None + assert worker._last_reference_seq == -1 + assert worker._consecutive_valid_references == 0 + + fresh_packet = ReferencePacket( + qpos=np.zeros(36, dtype=np.float64), + timestamp_s=10.1, + seq=2, + source_timestamp_s=1.1, + source_seq=2, + frame_valid=True, + ) + worker._note_reference_packet(fresh_packet) + + assert worker._latest_reference is fresh_packet + assert worker._last_reference_seq == 2 + assert worker._consecutive_valid_references == 1 + + def test_robot_worker_replays_bvh_on_mocap_entry() -> None: worker = object.__new__(_RobotControlWorker) commands: list[str] = [] diff --git a/train_mimic/tasks/tracking/tracking_env_cfg.py b/train_mimic/tasks/tracking/tracking_env_cfg.py index eb1faaae..ece6329a 100644 --- a/train_mimic/tasks/tracking/tracking_env_cfg.py +++ b/train_mimic/tasks/tracking/tracking_env_cfg.py @@ -269,7 +269,7 @@ def make_tracking_env_cfg() -> ManagerBasedRlEnvCfg: params={"command_name": "motion", "std": 3.0}, ), "survival": RewardTermCfg(func=mdp.survival, weight=3.0), - "action_rate_l2": RewardTermCfg(func=mdp.action_rate_l2, weight=-0.5), + "action_rate_l2": RewardTermCfg(func=mdp.action_rate_l2, weight=-0.3), "joint_limit": RewardTermCfg( func=mdp.joint_pos_limits, weight=-10.0,