From 29a786ec414be13dd8c80d8e968c7a6f33c3d49e Mon Sep 17 00:00:00 2001 From: anderdc Date: Mon, 8 Jun 2026 17:25:15 -0500 Subject: [PATCH] fix(validator): serialize hotkey contract writes to prevent nonce collisions The forward loop and axon handlers both sign contract writes with the validator hotkey over separate substrate connections. Each auto-fetches the best-block nonce independently, so two concurrent writes can grab the same nonce; one lands and the other is rejected and pool-banned (1012). A reserve flood (e.g. during a halt) made this constant and starved confirm/timeout votes until swaps blew past their deadline. Share one write lock across both clients, held across nonce-fetch -> submit -> inclusion so the nonce advances before the next signer composes. Reads stay on their per-connection locks and remain parallel. --- allways/contract_client.py | 11 ++++++++++- neurons/validator.py | 7 +++++++ tests/test_contract_client.py | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) diff --git a/allways/contract_client.py b/allways/contract_client.py index 672a4cb7..228c8125 100644 --- a/allways/contract_client.py +++ b/allways/contract_client.py @@ -352,6 +352,7 @@ def __init__( subtensor: Optional[bt.Subtensor] = None, reconnect_subtensor: Optional[Callable[[], None]] = None, substrate_lock: Optional[Any] = None, + write_lock: Optional[Any] = None, ): self.contract_address = contract_address or get_contract_address() or '' self.subtensor = subtensor @@ -366,6 +367,10 @@ def __init__( # access so concurrent threads can't both land in recv. Callers sharing # this subtensor elsewhere pass that path's lock to serialize as one. self._substrate_lock = substrate_lock or threading.Lock() + # One hotkey = one nonce sequence. Clients that sign with the same + # hotkey across separate connections must share this so two threads + # can't fetch the same nonce and get one extrinsic pool-banned. + self._write_lock = write_lock or threading.Lock() if not self.contract_address: bt.logging.warning('Allways contract address not set') @@ -573,7 +578,11 @@ def submit_extrinsic(s): return s.submit_extrinsic(extrinsic, wait_for_inclusion=wait_for_inclusion, wait_for_finalization=False) try: - receipt = self.substrate_call(submit_extrinsic) + # Hold the write lock across nonce-fetch → submit → inclusion so the + # best-block nonce advances before the next signer (any connection on + # the same hotkey) composes. Reads stay outside it, so they're parallel. + with self._write_lock: + receipt = self.substrate_call(submit_extrinsic) except Exception as e: raise ContractError(f'{method}: exec failed: {e}') from e diff --git a/neurons/validator.py b/neurons/validator.py index 4d3345d5..b3e58dd8 100644 --- a/neurons/validator.py +++ b/neurons/validator.py @@ -70,9 +70,15 @@ class Validator(BaseValidatorNeuron): def __init__(self, config=None): super().__init__(config=config) + # Forward loop and axon handlers both sign contract writes with the + # validator hotkey over separate connections; share one lock so their + # nonce sequences can't collide (one would get pool-banned). Reads stay + # on their own per-connection locks, so they remain parallel. + self.write_lock = threading.Lock() self.contract_client = AllwaysContractClient( subtensor=self.subtensor, reconnect_subtensor=self.reconnect_and_propagate, + write_lock=self.write_lock, ) self.chain_providers = create_chain_providers(check=True, require_send=False, subtensor=self.subtensor) @@ -159,6 +165,7 @@ def __init__(self, config=None): subtensor=self.axon_subtensor, reconnect_subtensor=self.reconnect_axon_subtensor, substrate_lock=self.axon_lock, + write_lock=self.write_lock, ) self.axon_chain_providers = create_chain_providers(subtensor=self.axon_subtensor) # Read block/bounds via axon_subtensor; the forward loop calls this too, diff --git a/tests/test_contract_client.py b/tests/test_contract_client.py index 16a4fafe..6d7fe936 100644 --- a/tests/test_contract_client.py +++ b/tests/test_contract_client.py @@ -6,6 +6,7 @@ and retries the call once. Anything else propagates as-is. """ +import threading from unittest.mock import MagicMock import pytest @@ -199,3 +200,35 @@ def test_new_error_variants_present(self): 'TargetNotForward', 'InvalidTarget', } + + +class TestWriteLockSerialization: + """One hotkey = one nonce sequence; writes across connections must serialize.""" + + def test_shared_write_lock_object(self): + shared = threading.Lock() + a = AllwaysContractClient(contract_address='5xx', subtensor=make_subtensor(), write_lock=shared) + b = AllwaysContractClient(contract_address='5xx', subtensor=make_subtensor(), write_lock=shared) + assert a._write_lock is b._write_lock is shared + + def test_default_write_lock_is_private(self): + a = AllwaysContractClient(contract_address='5xx', subtensor=make_subtensor()) + b = AllwaysContractClient(contract_address='5xx', subtensor=make_subtensor()) + assert a._write_lock is not b._write_lock + + def test_write_lock_held_during_submit_not_reads(self): + # The submit closure runs under the write lock; the account-nonce/balance + # read (a lambda) runs outside it, so reads stay parallel. + shared = threading.Lock() + client = AllwaysContractClient(contract_address='5xx', subtensor=make_subtensor(), write_lock=shared) + receipt = MagicMock(is_success=True, extrinsic_hash='0xabc') + held_during = {} + + def fake_substrate_call(fn): + held_during[getattr(fn, '__name__', '')] = shared.locked() + return receipt if getattr(fn, '__name__', '') == 'submit_extrinsic' else MagicMock() + + client.substrate_call = fake_substrate_call + client.exec_contract_raw('confirm_swap', args={'swap_id': 1}, keypair=client.readonly_keypair) + assert held_during.get('submit_extrinsic') is True + assert held_during.get('') is False