From d6f82a389a5b32eb90851e38c1ba101ef995f7a7 Mon Sep 17 00:00:00 2001 From: anderdc Date: Tue, 9 Jun 2026 15:15:06 -0500 Subject: [PATCH 1/3] fix(miner): size send-cache discard margin for two timeout extensions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SENT_CACHE_DISCARD_MARGIN_BLOCKS was MAX_EXTENSION_BLOCKS + DEFAULT_FULFILLMENT_TIMEOUT_BLOCKS (300), covering a single extension. The contract permits MAX_EXTENSIONS_PER_SWAP (2) extensions, each pushing timeout_block forward by up to MAX_EXTENSION_BLOCKS relative to its own propose block with no cumulative cap, so a fully-extended live deadline can reach D0 + 2 * MAX_EXTENSION_BLOCKS. When a get_swap gap drops a swap from the active set across both extensions, the cached deadline is never refreshed and cleanup_stale_sends discards the unmarked entry while the swap is still active on-chain, re-sending destination funds on rediscovery (#461) — the duplicate-send #452 set out to prevent. Size the margin to MAX_EXTENSIONS_PER_SWAP * MAX_EXTENSION_BLOCKS + DEFAULT_FULFILLMENT_TIMEOUT_BLOCKS (550) so it tracks the contract's caps, and add a two-extension regression test. Fixes #461 --- allways/constants.py | 10 +++++++--- tests/test_fulfillment.py | 34 +++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/allways/constants.py b/allways/constants.py index 88fa760..f56b6df 100644 --- a/allways/constants.py +++ b/allways/constants.py @@ -137,6 +137,10 @@ DEFAULT_MAX_SWAP_AMOUNT_RAO = 500_000_000 # 0.5 TAO RESERVATION_TTL_BLOCKS = 50 # ~10 min -# Blocks past a retained send-cache entry's last-known timeout_block before the miner discards it. Above -# MAX_EXTENSION_BLOCKS so a still-active (even fully-extended) swap is never dropped early. ~300 blocks ≈ 1h. -SENT_CACHE_DISCARD_MARGIN_BLOCKS = MAX_EXTENSION_BLOCKS + DEFAULT_FULFILLMENT_TIMEOUT_BLOCKS +# Blocks past a retained send-cache entry's last-known timeout_block before the miner discards it. Sized for +# the worst case the contract permits: each of MAX_EXTENSIONS_PER_SWAP extensions can push timeout_block forward +# by up to MAX_EXTENSION_BLOCKS (the per-extension cap is relative to its own propose block, not cumulative — see +# lib.rs propose_extend_timeout), so a fully-extended live deadline can reach D0 + MAX_EXTENSIONS_PER_SWAP * +# MAX_EXTENSION_BLOCKS. A smaller margin can discard a still-active twice-extended swap whose cached deadline +# predates both extensions, re-sending destination funds on rediscovery (#461). ~550 blocks ≈ 1.8h. +SENT_CACHE_DISCARD_MARGIN_BLOCKS = MAX_EXTENSIONS_PER_SWAP * MAX_EXTENSION_BLOCKS + DEFAULT_FULFILLMENT_TIMEOUT_BLOCKS diff --git a/tests/test_fulfillment.py b/tests/test_fulfillment.py index e6c3d67..7151aa9 100644 --- a/tests/test_fulfillment.py +++ b/tests/test_fulfillment.py @@ -12,7 +12,13 @@ import pytest from allways.classes import Swap, SwapStatus -from allways.constants import MINER_TIMEOUT_CUSHION_BLOCKS, SENT_CACHE_DISCARD_MARGIN_BLOCKS +from allways.constants import ( + DEFAULT_FULFILLMENT_TIMEOUT_BLOCKS, + MAX_EXTENSION_BLOCKS, + MAX_EXTENSIONS_PER_SWAP, + MINER_TIMEOUT_CUSHION_BLOCKS, + SENT_CACHE_DISCARD_MARGIN_BLOCKS, +) from allways.miner.fulfillment import SentSwap, SwapFulfiller from allways.miner.swap_poller import MAX_REFRESH_MISSES, SwapPoller @@ -184,6 +190,32 @@ def test_unmarked_stale_retained_inside_deadline_margin(self): fulfiller.cleanup_stale_sends(active_swap_ids=set()) assert set(fulfiller.sent) == {1, 2} + def test_unmarked_stale_retained_across_two_extensions(self): + # Regression for #461: the contract permits MAX_EXTENSIONS_PER_SWAP (2) + # timeout extensions, each pushing timeout_block forward by up to + # MAX_EXTENSION_BLOCKS relative to its own propose block (not cumulative), + # so a live deadline can reach D0 + 2 * MAX_EXTENSION_BLOCKS. If the cached + # snapshot predates both extensions (a get_swap gap drops the swap from the + # active set, so process_swap never refreshes it), the margin must still + # cover the fully-extended deadline. The old margin (1 * MAX_EXTENSION_BLOCKS + # + DEFAULT_FULFILLMENT_TIMEOUT_BLOCKS) would discard here and re-send on + # rediscovery. + d0 = 100 + old_single_extension_margin = MAX_EXTENSION_BLOCKS + DEFAULT_FULFILLMENT_TIMEOUT_BLOCKS + live_deadline = d0 + MAX_EXTENSIONS_PER_SWAP * MAX_EXTENSION_BLOCKS + # Sanity-check this case actually exercises the gap the fix closes: the + # current block is past the old margin but the swap is still live on-chain. + current = d0 + old_single_extension_margin + 1 + assert current > d0 + old_single_extension_margin # would have been discarded pre-fix + assert current <= live_deadline # but the swap is still active on-chain + + fulfiller = make_fulfiller() + fulfiller.sent = {1: SentSwap('twice-extended-tx', 50, marked_fulfilled=False, timeout_block=d0)} + fulfiller.subtensor.get_current_block.return_value = current + + fulfiller.cleanup_stale_sends(active_swap_ids=set()) + assert set(fulfiller.sent) == {1} + def test_legacy_entry_without_deadline_never_discarded(self): fulfiller = make_fulfiller() fulfiller.sent = {1: SentSwap('legacy-tx', 5, marked_fulfilled=False)} # timeout_block defaults to 0 From 8367e35cd2383d27feb55a2ddceed1bf8d404a01 Mon Sep 17 00:00:00 2001 From: anderdc Date: Tue, 9 Jun 2026 15:28:53 -0500 Subject: [PATCH 2/3] fix(miner): don't apply the timeout cushion to the post-send mark_fulfilled retry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit verify_swap_safety enforces MINER_TIMEOUT_CUSHION_BLOCKS, and process_swap ran it on every pass — including the mark_fulfilled retry after dest funds were already sent. Once the chain reached timeout_block - cushion, the gate returned None and aborted the retry, so a transient mark_fulfilled failure in the final ~18-block window left the swap Active at its deadline and the miner was slashed for the full tao_amount despite having already paid the user (#462). The cushion is scoped to STARTING a fulfill (#356); applying it to the retry re-introduced the loss it was added to prevent. Gate verify_swap_safety / verify_user_sent_funds under the first-send branch only; on the retry recompute user_receives_amount from the snapshotted rate and go straight to mark_fulfilled. Add a regression test that runs the real cushion on the retry path inside the cushion window. Fixes #462 --- allways/miner/fulfillment.py | 35 ++++++++++++++++++++++------------- tests/test_fulfillment.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 13 deletions(-) diff --git a/allways/miner/fulfillment.py b/allways/miner/fulfillment.py index 459bc9b..e7cdc39 100644 --- a/allways/miner/fulfillment.py +++ b/allways/miner/fulfillment.py @@ -297,21 +297,22 @@ def process_swap(self, swap: Swap) -> bool: bt.logging.info(f'Processing swap {swap.id}: {swap.from_chain} -> {swap.to_chain}') - # Step 1: Verify swap safety (timeout, rate, collateral) - safety_result = self.verify_swap_safety(swap) - if safety_result is None: - bt.logging.warning(f'Swap {swap.id}: failed safety checks, skipping') - return False - - user_receives_amount, my_source_address = safety_result + if sent is None: + # First pass — gate the SEND on safety (timeout cushion, rate, + # source funds). The cushion deliberately blocks STARTING a fulfill + # with too little runway left for a rescue extension. + # Step 1: Verify swap safety (timeout, rate, collateral) + safety_result = self.verify_swap_safety(swap) + if safety_result is None: + bt.logging.warning(f'Swap {swap.id}: failed safety checks, skipping') + return False + user_receives_amount, my_source_address = safety_result - # Step 2: Verify user sent source funds - if not self.verify_user_sent_funds(swap, my_source_address): - return False + # Step 2: Verify user sent source funds + if not self.verify_user_sent_funds(swap, my_source_address): + return False - # Step 3: Send destination funds — unless we already did on a previous - # pass, in which case we skip straight to the mark_fulfilled retry. - if sent is None: + # Step 3: Send destination funds send_result = self.send_dest_funds(swap, user_receives_amount) if not send_result: bt.logging.error(f'Swap {swap.id}: failed to send dest funds') @@ -326,6 +327,14 @@ def process_swap(self, swap: Swap) -> bool: self.sent[swap.id] = sent self.save_sent_cache() else: + # Funds are already out — never re-apply the cushion/safety gate + # here. The cushion is scoped to STARTING a fulfill; once the user + # is paid, retrying mark_fulfilled right up to the deadline is the + # only thing that turns the swap Fulfilled and avoids a timeout + # slash of a miner that already delivered (#462). Recompute the + # post-fee amount the contract call needs (rate is snapshotted on + # the swap, so this matches the original send). + _, user_receives_amount = expected_swap_amounts(swap, self.fee_divisor) # Keep the retained deadline current with any extension seen while # active, so cleanup never discards a still-extendable swap early. if swap.timeout_block > sent.timeout_block: diff --git a/tests/test_fulfillment.py b/tests/test_fulfillment.py index 7151aa9..28ee884 100644 --- a/tests/test_fulfillment.py +++ b/tests/test_fulfillment.py @@ -126,6 +126,41 @@ def test_unmarked_stale_retained_marked_stale_removed_within_deadline(self): assert set(fulfiller.sent) == {1, 3} assert fulfiller.mark_fulfilled_attempts == {1: 2, 3: 1} + def test_mark_fulfilled_retry_not_gated_by_cushion_after_send(self): + # Regression for #462: once dest funds are out, the swap stays Active + # until mark_fulfilled lands, and an Active swap is slashed at timeout. + # The cushion is scoped to STARTING a fulfill, so it must NOT gate the + # post-send mark_fulfilled retry — otherwise a transient mark_fulfilled + # failure inside the final cushion window guarantees a slash of a miner + # that already paid. Uses the REAL verify_swap_safety (not stubbed) so + # the cushion actually runs on the retry path. + from allways.contract_client import ContractError + + swap = make_swap(timeout_block=500) + fulfiller = make_fulfiller() + fulfiller.fee_divisor = 100 + # Inside the cushion window: a first SEND would be gated off here. + fulfiller.subtensor.get_current_block.return_value = 500 - MINER_TIMEOUT_CUSHION_BLOCKS + # Dest funds already sent on a prior pass, mark_fulfilled not yet landed. + fulfiller.sent[swap.id] = SentSwap('already-sent-dest-tx', 777, marked_fulfilled=False, timeout_block=500) + fulfiller.send_dest_funds = MagicMock() + # Transient (non-rejection) failure — keeps the entry retryable. + fulfiller.client.mark_fulfilled.side_effect = ContractError('transient rpc failure') + + result = fulfiller.process_swap(swap) + + assert result is False # mark_fulfilled didn't land this pass + fulfiller.send_dest_funds.assert_not_called() # never re-send + # The retry was attempted despite being inside the cushion window. + fulfiller.client.mark_fulfilled.assert_called_once_with( + wallet=fulfiller.wallet, + swap_id=swap.id, + to_tx_hash='already-sent-dest-tx', + to_amount=3_415_500_000, + to_tx_block=777, + ) + assert fulfiller.sent[swap.id].marked_fulfilled is False # still retryable next pass + def test_retained_send_blocks_resend_after_poller_misses_and_rediscovery(self): swap = make_swap(timeout_block=500) poll_client = MagicMock() From d8fd08f4aca314479c601e81e37520e899da505f Mon Sep 17 00:00:00 2001 From: anderdc Date: Tue, 9 Jun 2026 15:43:24 -0500 Subject: [PATCH 3/3] docs: trim verbose comments and drop stale step numbering --- allways/constants.py | 9 +++------ allways/miner/fulfillment.py | 25 +++++++++---------------- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/allways/constants.py b/allways/constants.py index f56b6df..3a1e460 100644 --- a/allways/constants.py +++ b/allways/constants.py @@ -137,10 +137,7 @@ DEFAULT_MAX_SWAP_AMOUNT_RAO = 500_000_000 # 0.5 TAO RESERVATION_TTL_BLOCKS = 50 # ~10 min -# Blocks past a retained send-cache entry's last-known timeout_block before the miner discards it. Sized for -# the worst case the contract permits: each of MAX_EXTENSIONS_PER_SWAP extensions can push timeout_block forward -# by up to MAX_EXTENSION_BLOCKS (the per-extension cap is relative to its own propose block, not cumulative — see -# lib.rs propose_extend_timeout), so a fully-extended live deadline can reach D0 + MAX_EXTENSIONS_PER_SWAP * -# MAX_EXTENSION_BLOCKS. A smaller margin can discard a still-active twice-extended swap whose cached deadline -# predates both extensions, re-sending destination funds on rediscovery (#461). ~550 blocks ≈ 1.8h. +# Blocks past a retained entry's last-known timeout_block before discard. Sized for the contract's worst case: +# MAX_EXTENSIONS_PER_SWAP extensions each push the deadline up to MAX_EXTENSION_BLOCKS further (not cumulative). +# A smaller margin can discard a still-active twice-extended swap and re-send on rediscovery (#461). ~550 ≈ 1.8h. SENT_CACHE_DISCARD_MARGIN_BLOCKS = MAX_EXTENSIONS_PER_SWAP * MAX_EXTENSION_BLOCKS + DEFAULT_FULFILLMENT_TIMEOUT_BLOCKS diff --git a/allways/miner/fulfillment.py b/allways/miner/fulfillment.py index e7cdc39..bde1af7 100644 --- a/allways/miner/fulfillment.py +++ b/allways/miner/fulfillment.py @@ -298,21 +298,18 @@ def process_swap(self, swap: Swap) -> bool: bt.logging.info(f'Processing swap {swap.id}: {swap.from_chain} -> {swap.to_chain}') if sent is None: - # First pass — gate the SEND on safety (timeout cushion, rate, - # source funds). The cushion deliberately blocks STARTING a fulfill + # First pass — gate the send on safety (timeout cushion, rate, + # source funds), then send. The cushion blocks STARTING a fulfill # with too little runway left for a rescue extension. - # Step 1: Verify swap safety (timeout, rate, collateral) safety_result = self.verify_swap_safety(swap) if safety_result is None: bt.logging.warning(f'Swap {swap.id}: failed safety checks, skipping') return False user_receives_amount, my_source_address = safety_result - # Step 2: Verify user sent source funds if not self.verify_user_sent_funds(swap, my_source_address): return False - # Step 3: Send destination funds send_result = self.send_dest_funds(swap, user_receives_amount) if not send_result: bt.logging.error(f'Swap {swap.id}: failed to send dest funds') @@ -327,13 +324,10 @@ def process_swap(self, swap: Swap) -> bool: self.sent[swap.id] = sent self.save_sent_cache() else: - # Funds are already out — never re-apply the cushion/safety gate - # here. The cushion is scoped to STARTING a fulfill; once the user - # is paid, retrying mark_fulfilled right up to the deadline is the - # only thing that turns the swap Fulfilled and avoids a timeout - # slash of a miner that already delivered (#462). Recompute the - # post-fee amount the contract call needs (rate is snapshotted on - # the swap, so this matches the original send). + # Funds are already out — skip the cushion/safety gate (scoped to + # STARTING a fulfill); retrying mark_fulfilled to the deadline only + # helps and avoids a timeout slash of a miner that paid (#462). + # Recompute the post-fee amount (rate is snapshotted on the swap). _, user_receives_amount = expected_swap_amounts(swap, self.fee_divisor) # Keep the retained deadline current with any extension seen while # active, so cleanup never discards a still-extendable swap early. @@ -342,10 +336,9 @@ def process_swap(self, swap: Swap) -> bool: self.save_sent_cache() bt.logging.info(f'Swap {swap.id}: retrying mark_fulfilled for cached send tx {sent.to_tx_hash[:16]}...') - # Step 4: Mark fulfilled on contract. We pass ``user_receives_amount`` - # as ``to_amount`` because at mark_fulfilled time the contract stores - # the actual sent amount (post-fee), which is what ``swap.to_amount`` - # becomes after the call. + # Mark fulfilled on contract. ``user_receives_amount`` is the post-fee + # amount the contract stores as ``to_amount`` (what ``swap.to_amount`` + # becomes after the call). try: self.client.mark_fulfilled( wallet=self.wallet,