Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions gittensor/utils/mirror/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ def __init__(
self.timeout = timeout
self.max_attempts = max_attempts
self.session = session or requests.Session()
# Per-client cache of repo → maintainer GitHub IDs. A new MirrorClient is
# created per scoring round, so cache lifetime == round. Used by the
# issue-multiplier tier and the maintainer_cut carve-out so a repo's
# maintainer set is fetched once per round, not once per PR.
self._maintainer_github_ids_cache: Dict[str, frozenset[str]] = {}

def close(self) -> None:
self.session.close()
Expand Down Expand Up @@ -130,6 +135,35 @@ def get_repo_maintainers(self, repo_full_name: str) -> MirrorRepoMaintainersResp
except Exception as e:
raise MirrorRequestError(f'Mirror response from {path} was invalid: {e}') from e

def get_maintainer_github_ids(self, repo_full_name: str) -> frozenset[str]:
"""Return the current maintainer GitHub IDs for ``repo_full_name``.

Wraps ``get_repo_maintainers`` with a per-instance cache so the same
mirror call is not re-issued across PRs being scored for the same repo
in a single round. On mirror request failure, returns an empty
``frozenset`` so callers treat the repo as having no identifiable
maintainers — conservative for the issue-bonus tier determination.

Used by ``_calculate_issue_multiplier`` instead of consulting each
linked issue's stored ``author_association`` field, which the mirror
snapshots at ingest time and never refreshes (so a role change after
an issue was filed would otherwise be invisible).
"""
cached = self._maintainer_github_ids_cache.get(repo_full_name)
if cached is not None:
return cached
try:
response = self.get_repo_maintainers(repo_full_name)
ids = frozenset(m.github_id for m in response.maintainers if m.github_id)
except MirrorRequestError as e:
bt.logging.warning(
f'Mirror maintainer lookup failed for {repo_full_name} ({e}); '
f'issue-bonus tier will default to standard for this repo this round'
)
ids = frozenset()
self._maintainer_github_ids_cache[repo_full_name] = ids
return ids

def _fetch_windowed(self, path: str, since_by_repo: Optional[Dict[str, datetime]]) -> dict:
"""POST a per-repo ``since`` map when one is given, else GET the
mirror's default window."""
Expand Down
48 changes: 38 additions & 10 deletions gittensor/validator/oss_contributions/mirror/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,14 @@ async def score_pr(
# eligibility and reporting gates keep their evidence signal.
scored.base_score = repo_config.fixed_base_score

_calculate_pr_multipliers(scored, repo_config, scoring_cfg)
# Resolve the repo's current maintainer set so the issue-bonus tier reflects
# live role state, not the issue's stored author_association (cached at
# mirror-ingest time, never refreshed). Same source the maintainer_cut
# carve-out trusts. Cached per-client; first call per repo hits the mirror,
# subsequent PRs in the same repo are free.
maintainer_github_ids = await asyncio.to_thread(client.get_maintainer_github_ids, pr.repo_full_name)

_calculate_pr_multipliers(scored, repo_config, scoring_cfg, maintainer_github_ids)

if pr.state == 'MERGED':
eval_.unique_repos_contributed_to.add(pr.repo_full_name)
Expand Down Expand Up @@ -368,7 +375,12 @@ def calculate_base_score_for_pr_files(
# ============================================================================


def _calculate_pr_multipliers(scored: ScoredPR, repo_config: RepositoryConfig, scoring_cfg: ResolvedScoring) -> None:
def _calculate_pr_multipliers(
scored: ScoredPR,
repo_config: RepositoryConfig,
scoring_cfg: ResolvedScoring,
maintainer_github_ids: frozenset[str],
) -> None:
"""Compute time_decay, review_quality, label, and issue multipliers.

Spam and credibility multipliers are deferred to ``finalize_miner_scores``
Expand All @@ -381,7 +393,7 @@ def _calculate_pr_multipliers(scored: ScoredPR, repo_config: RepositoryConfig, s
scored.label = chosen_label
scored.label_multiplier = label_multiplier

scored.issue_multiplier = round(_calculate_issue_multiplier(scored, scoring_cfg), 2)
scored.issue_multiplier = round(_calculate_issue_multiplier(scored, scoring_cfg, maintainer_github_ids), 2)

if is_merged:
assert pr.merged_at is not None, f'MERGED PR #{pr.pr_number} missing merged_at'
Expand Down Expand Up @@ -410,11 +422,26 @@ def _resolve_trusted_scoring_label(pr: MirrorPullRequest, repo_config: Repositor
# ============================================================================


def _calculate_issue_multiplier(scored: ScoredPR, scoring: ResolvedScoring) -> float:
def _calculate_issue_multiplier(
scored: ScoredPR,
scoring: ResolvedScoring,
maintainer_github_ids: frozenset[str],
) -> float:
"""Return the multiplier earned from valid linked issues on a PR.

Maintainer-authored valid issues bump the multiplier higher
(``maintainer_issue_multiplier`` vs ``standard_issue_multiplier``).
A linked issue's author is treated as a maintainer iff their GitHub ID is
currently in the repo's maintainer set, sourced from the mirror's
``/repos/:repo/maintainers`` endpoint — the same source the
``maintainer_cut`` carve-out trusts.

The issue's stored ``author_association`` is intentionally NOT consulted:
the mirror snapshots that field at ingest time and never refreshes it, so
a role change after the issue was filed would otherwise be invisible (the
bug surfaced via matthewevans / plind-junior on phase-rs/phase where their
issues stayed classified as CONTRIBUTOR even after they became MEMBER /
COLLABORATOR). Consulting the live maintainer set unifies identity
treatment with the carve-out.

Returns 1.0 if no linked issues pass the anti-gaming gates.
"""
pr = scored.pr
Expand All @@ -427,13 +454,14 @@ def _calculate_issue_multiplier(scored: ScoredPR, scoring: ResolvedScoring) -> f
bt.logging.info(f'PR #{pr.pr_number} - Solved no valid linked issues')
return 1.0

# Prefer a maintainer-authored valid issue so the multiplier doesn't depend
# on mirror response ordering of linked_issues (regression seen in PR #673).
# Prefer an issue whose author is currently in the maintainer set so the
# multiplier doesn't depend on mirror response ordering of linked_issues
# (regression seen in PR #673).
issue = next(
(li for li in valid if li.author_association in MAINTAINER_ASSOCIATIONS),
(li for li in valid if li.author_github_id and li.author_github_id in maintainer_github_ids),
valid[0],
)
is_maintainer = issue.author_association in MAINTAINER_ASSOCIATIONS if issue.author_association else False
is_maintainer = bool(issue.author_github_id and issue.author_github_id in maintainer_github_ids)
multiplier = scoring.maintainer_issue_multiplier if is_maintainer else scoring.standard_issue_multiplier
label = 'maintainer' if is_maintainer else 'standard'
bt.logging.info(f'Linked issue #{issue.number} - {label} | multiplier: {multiplier}')
Expand Down
96 changes: 96 additions & 0 deletions tests/utils/test_mirror_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,3 +445,99 @@ def test_default_max_attempts_from_constants(self):

client = MirrorClient()
assert client.max_attempts == MIRROR_MAX_ATTEMPTS


def _maintainers_payload(*entries: tuple[str, str, str]) -> dict:
"""entries: tuples of (github_id, login, association)"""
return {
'repo_full_name': 'phase-rs/phase',
'generated_at': '2026-06-16T00:00:00Z',
'maintainers': [{'github_id': gid, 'login': login, 'association': assoc} for gid, login, assoc in entries],
}


class TestGetMaintainerGithubIds:
"""``get_maintainer_github_ids`` wraps ``get_repo_maintainers`` with a
per-instance cache, returns a ``frozenset`` of github_ids, and falls back
to an empty set on transport failure (conservative — caller treats the
repo as having no identifiable maintainers).
"""

def test_returns_frozenset_of_ids(self):
session = Mock()
session.get.return_value = _ok(_maintainers_payload(('1388610', 'matthewevans', 'MEMBER')))
client = _make_client(session)

ids = client.get_maintainer_github_ids('phase-rs/phase')

assert ids == frozenset({'1388610'})
assert isinstance(ids, frozenset)

def test_second_call_uses_cache_no_second_http(self):
session = Mock()
session.get.return_value = _ok(_maintainers_payload(('1388610', 'matthewevans', 'MEMBER')))
client = _make_client(session)

client.get_maintainer_github_ids('phase-rs/phase')
client.get_maintainer_github_ids('phase-rs/phase')

# Only one HTTP call across two lookups for the same repo.
assert session.get.call_count == 1

def test_different_repos_each_fetch_once(self):
session = Mock()
session.get.side_effect = [
_ok(_maintainers_payload(('1388610', 'matthewevans', 'MEMBER'))),
_ok(_maintainers_payload(('60993791', 'landyndev', 'COLLABORATOR'))),
]
client = _make_client(session)

a = client.get_maintainer_github_ids('phase-rs/phase')
b = client.get_maintainer_github_ids('entrius/gittensor')

assert a == frozenset({'1388610'})
assert b == frozenset({'60993791'})
assert session.get.call_count == 2

@patch('gittensor.utils.mirror.client.time.sleep')
@patch('gittensor.utils.mirror.client.bt.logging.warning')
def test_mirror_failure_returns_empty_frozenset_not_raised(self, _log, _sleep):
session = Mock()
session.get.return_value = _err(500, 'mirror down')
client = _make_client(session, max_attempts=1)

ids = client.get_maintainer_github_ids('phase-rs/phase')

# Conservative fallback: empty set, NOT an exception.
assert ids == frozenset()

@patch('gittensor.utils.mirror.client.time.sleep')
@patch('gittensor.utils.mirror.client.bt.logging.warning')
def test_failure_is_cached_too(self, _log, _sleep):
session = Mock()
session.get.return_value = _err(500, 'mirror down')
client = _make_client(session, max_attempts=1)

client.get_maintainer_github_ids('phase-rs/phase')
client.get_maintainer_github_ids('phase-rs/phase')

# Subsequent lookups in the same round don't retry the mirror.
assert session.get.call_count == 1

def test_drops_entries_with_missing_github_id(self):
session = Mock()
session.get.return_value = _ok(
{
'repo_full_name': 'phase-rs/phase',
'generated_at': '2026-06-16T00:00:00Z',
'maintainers': [
{'github_id': '1388610', 'login': 'matthewevans', 'association': 'MEMBER'},
{'github_id': '', 'login': 'anon', 'association': 'COLLABORATOR'},
],
}
)
client = _make_client(session)

ids = client.get_maintainer_github_ids('phase-rs/phase')

assert ids == frozenset({'1388610'})
78 changes: 60 additions & 18 deletions tests/validator/oss_contributions/mirror/test_scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,10 @@ def _config(
)


def _apply_multipliers(scored: ScoredPR, cfg: RepositoryConfig) -> None:
_calculate_pr_multipliers(scored, cfg, resolve_scoring(cfg.scoring))
def _apply_multipliers(
scored: ScoredPR, cfg: RepositoryConfig, maintainer_github_ids: frozenset[str] = frozenset()
) -> None:
_calculate_pr_multipliers(scored, cfg, resolve_scoring(cfg.scoring), maintainer_github_ids)


# ============================================================================
Expand Down Expand Up @@ -718,19 +720,23 @@ def _linked_issue(
class TestIssueMultiplier:
def test_no_linked_issues_returns_neutral(self):
scored = ScoredPR(pr=_pr(linked_issues=[]))
assert _calculate_issue_multiplier(scored, resolve_scoring(None)) == 1.0
assert _calculate_issue_multiplier(scored, resolve_scoring(None), frozenset()) == 1.0

def test_valid_standard_issue(self):
from gittensor.constants import STANDARD_ISSUE_MULTIPLIER

scored = ScoredPR(pr=_pr(linked_issues=[_linked_issue()]))
assert _calculate_issue_multiplier(scored, resolve_scoring(None)) == STANDARD_ISSUE_MULTIPLIER
assert _calculate_issue_multiplier(scored, resolve_scoring(None), frozenset()) == STANDARD_ISSUE_MULTIPLIER

def test_maintainer_authored_issue_gets_maintainer_multiplier(self):
from gittensor.constants import MAINTAINER_ISSUE_MULTIPLIER

scored = ScoredPR(pr=_pr(linked_issues=[_linked_issue(author_association='OWNER')]))
assert _calculate_issue_multiplier(scored, resolve_scoring(None)) == MAINTAINER_ISSUE_MULTIPLIER
# Default _linked_issue has author_github_id='999'. Put it in the maintainer set.
scored = ScoredPR(pr=_pr(linked_issues=[_linked_issue()]))
assert (
_calculate_issue_multiplier(scored, resolve_scoring(None), frozenset({'999'}))
== MAINTAINER_ISSUE_MULTIPLIER
)

def test_first_valid_issue_chosen(self):
# Even if the first issue is invalid, valid second one should be chosen
Expand All @@ -739,7 +745,7 @@ def test_first_valid_issue_chosen(self):
scored = ScoredPR(pr=_pr(linked_issues=[invalid, valid]))
from gittensor.constants import STANDARD_ISSUE_MULTIPLIER

assert _calculate_issue_multiplier(scored, resolve_scoring(None)) == STANDARD_ISSUE_MULTIPLIER
assert _calculate_issue_multiplier(scored, resolve_scoring(None), frozenset()) == STANDARD_ISSUE_MULTIPLIER


class TestLinkedIssueValidity:
Expand Down Expand Up @@ -861,16 +867,17 @@ def test_mismatched_solved_by_pr_collapses_multiplier_to_neutral(self):
li_data = _linked_issue()
li_data['solved_by_pr'] = 999
scored = ScoredPR(pr=_pr(linked_issues=[li_data]))
assert _calculate_issue_multiplier(scored, resolve_scoring(None)) == 1.0
assert _calculate_issue_multiplier(scored, resolve_scoring(None), frozenset()) == 1.0

def test_maintainer_authored_mismatch_still_blocked(self):
# The maintainer-preference path runs after _is_valid_linked_issue;
# a maintainer-authored linked issue with the wrong solver must still
# be rejected (no MAINTAINER_ISSUE_MULTIPLIER shortcut).
li_data = _linked_issue(author_association='OWNER')
# be rejected (no MAINTAINER_ISSUE_MULTIPLIER shortcut). Even with the
# author in the maintainer set, the solver gate rejects the issue.
li_data = _linked_issue()
li_data['solved_by_pr'] = 999
scored = ScoredPR(pr=_pr(linked_issues=[li_data]))
assert _calculate_issue_multiplier(scored, resolve_scoring(None)) == 1.0
assert _calculate_issue_multiplier(scored, resolve_scoring(None), frozenset({'999'})) == 1.0


class TestIssueMultiplierPreference:
Expand All @@ -879,19 +886,54 @@ def test_prefer_maintainer_authored_when_multiple_valid(self):
maintainer-authored valid issue regardless of response ordering."""
from gittensor.constants import MAINTAINER_ISSUE_MULTIPLIER

# Non-maintainer issue listed first, maintainer-authored issue second
non_maint = _linked_issue(number=1, author_association='CONTRIBUTOR', author_github_id='111')
maint = _linked_issue(number=2, author_association='OWNER', author_github_id='222')
# Non-maintainer issue listed first, maintainer issue (by current mirror
# maintainer set) second. Author 222 is in the maintainer set.
non_maint = _linked_issue(number=1, author_github_id='111')
maint = _linked_issue(number=2, author_github_id='222')
scored = ScoredPR(pr=_pr(linked_issues=[non_maint, maint]))
assert _calculate_issue_multiplier(scored, resolve_scoring(None)) == MAINTAINER_ISSUE_MULTIPLIER
assert (
_calculate_issue_multiplier(scored, resolve_scoring(None), frozenset({'222'}))
== MAINTAINER_ISSUE_MULTIPLIER
)

def test_falls_back_to_first_when_no_maintainer_authored(self):
from gittensor.constants import STANDARD_ISSUE_MULTIPLIER

issue_a = _linked_issue(number=1, author_association='CONTRIBUTOR', author_github_id='111')
issue_b = _linked_issue(number=2, author_association='CONTRIBUTOR', author_github_id='222')
issue_a = _linked_issue(number=1, author_github_id='111')
issue_b = _linked_issue(number=2, author_github_id='222')
scored = ScoredPR(pr=_pr(linked_issues=[issue_a, issue_b]))
assert _calculate_issue_multiplier(scored, resolve_scoring(None)) == STANDARD_ISSUE_MULTIPLIER
assert _calculate_issue_multiplier(scored, resolve_scoring(None), frozenset()) == STANDARD_ISSUE_MULTIPLIER

def test_stale_contributor_association_still_gets_maintainer_tier(self):
"""#1340-adjacent fix: the mirror snapshots ``author_association`` at
ingest and never refreshes. A linked issue whose author was a CONTRIBUTOR
at ingest but is now a MEMBER/COLLABORATOR/OWNER should still get the
maintainer-tier multiplier — sourced from the live maintainer set, not
from the stale stored field on the issue.
"""
from gittensor.constants import MAINTAINER_ISSUE_MULTIPLIER

# Stored author_association is CONTRIBUTOR (the stale value), but the
# author is currently in the maintainer set.
stale = _linked_issue(author_association='CONTRIBUTOR', author_github_id='777')
scored = ScoredPR(pr=_pr(linked_issues=[stale]))
assert (
_calculate_issue_multiplier(scored, resolve_scoring(None), frozenset({'777'}))
== MAINTAINER_ISSUE_MULTIPLIER
)

def test_stale_owner_association_drops_to_standard_when_not_currently_maintainer(self):
"""Inverse of the above: a stored OWNER/MEMBER snapshot is ignored when
the author has since left the maintainer set. The live mirror endpoint
is the source of truth — not the issue's stored field.
"""
from gittensor.constants import STANDARD_ISSUE_MULTIPLIER

# Stored author_association is OWNER (stale), but author has since
# departed and is not in the current maintainer set.
stale = _linked_issue(author_association='OWNER', author_github_id='ghost')
scored = ScoredPR(pr=_pr(linked_issues=[stale]))
assert _calculate_issue_multiplier(scored, resolve_scoring(None), frozenset()) == STANDARD_ISSUE_MULTIPLIER


class TestCollateralScoreAcceptsScoredPR:
Expand Down
Loading