OpenScience-Collective · neuromechanist · Jun 9, 2026 · Jun 9, 2026 · Jun 9, 2026
diff --git a/src/api/routers/community.py b/src/api/routers/community.py
@@ -34,7 +34,7 @@
 from src.assistants.registry import AssistantInfo
 from src.core.config.community import WidgetConfig
 from src.core.services.litellm_llm import create_openrouter_llm
-from src.knowledge.search import FAQResult, list_faq_entries
+from src.knowledge.search import FAQResult, get_citation_stats, list_faq_entries
 from src.metrics.cost import COST_BLOCK_THRESHOLD, COST_WARN_THRESHOLD, MODEL_PRICING, estimate_cost
 from src.metrics.db import (
     RequestLogEntry,
@@ -229,6 +229,23 @@ class FAQFeedResponse(BaseModel):
     entries: list[FAQEntryResponse] = Field(default_factory=list, description="FAQ entries")
 
 
+class CitationsFeedResponse(BaseModel):
+    """Public citation dashboard data for a community's canonical papers."""
+
+    community_id: str = Field(..., description="Community identifier")
+    total: int = Field(..., description="Total citing papers with a recorded canonical link")
+    per_year: dict[str, int] = Field(
+        default_factory=dict, description="Citing-paper count per year across all papers"
+    )
+    by_paper: dict[str, dict[str, int]] = Field(
+        default_factory=dict,
+        description="Stacked breakdown: canonical DOI -> year -> citing-paper count",
+    )
+    canonical_dois: list[str] = Field(
+        default_factory=list, description="Canonical DOIs tracked for this community"
+    )
+
+
 # Matches bare email addresses so they can be stripped from the public feed.
 _EMAIL_PATTERN = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")
 
@@ -1621,6 +1638,50 @@ async def community_faq(
             entries=[_faq_result_to_response(e) for e in entries],
         )
 
+    @router.get("/citations", response_model=CitationsFeedResponse)
+    async def community_citations(response: Response) -> CitationsFeedResponse:
+        """Public, read-only citation dashboard for this community.
+
+        Returns per-year counts of papers citing the community's canonical
+        works, plus a stacked breakdown keyed by the cited DOI (the shape
+        behind a citations-per-year chart). Disabled by default; a community
+        opts in via ``public_feeds.citations: true`` in its config.
+        """
+        config = info.community_config
+        if config is None or config.public_feeds is None or not config.public_feeds.citations:
+            raise HTTPException(
+                status_code=404,
+                detail="Public citations feed is not enabled for this community.",
+            )
+
+        try:
+            stats = get_citation_stats(project=community_id)
+        except sqlite3.Error:
+            logger.exception("Failed to query citations for community %s", community_id)
+            raise HTTPException(
+                status_code=503,
+                detail="Knowledge database is temporarily unavailable.",
+            )
+        except Exception:
+            logger.exception(
+                "Unexpected error serving citations feed for community %s", community_id
+            )
+            raise HTTPException(
+                status_code=500,
+                detail="An unexpected error occurred while building the citations feed.",
+            )
+
+        canonical_dois = list(config.citations.dois) if config.citations else []
+
+        response.headers["Cache-Control"] = "public, max-age=3600"
+        return CitationsFeedResponse(
+            community_id=community_id,
+            total=stats.total,
+            per_year=stats.per_year,
+            by_paper=stats.by_paper,
+            canonical_dois=canonical_dois,
+        )
+
     return router
 
 

diff --git a/src/knowledge/db.py b/src/knowledge/db.py
@@ -132,6 +132,9 @@ def active_mirror_context(mirror_id: str) -> Iterator[None]:
     url TEXT NOT NULL,
     created_at TEXT,
     synced_at TEXT NOT NULL,
+    -- Canonical DOI this paper cites, when discovered via citation sync.
+    -- NULL for papers found through keyword search rather than a citation link.
+    cites_doi TEXT,
     UNIQUE(source, external_id)
 );
 
@@ -409,6 +412,8 @@ def active_mirror_context(mirror_id: str) -> Iterator[None]:
 CREATE INDEX IF NOT EXISTS idx_github_items_status ON github_items(status);
 CREATE INDEX IF NOT EXISTS idx_github_items_type ON github_items(item_type);
 CREATE INDEX IF NOT EXISTS idx_papers_source ON papers(source);
+-- idx_papers_cites_doi is created in _migrate_db, after the cites_doi column
+-- is ensured, so init_db stays safe on databases predating that column.
 CREATE INDEX IF NOT EXISTS idx_docstrings_repo ON docstrings(repo);
 CREATE INDEX IF NOT EXISTS idx_docstrings_language ON docstrings(language);
 CREATE INDEX IF NOT EXISTS idx_messages_list ON mailing_list_messages(list_name);
@@ -507,6 +512,28 @@ def _migrate_db(conn: sqlite3.Connection) -> None:
         # Table doesn't exist yet - this is fine, schema will create it
         logger.debug("Docstrings table not found during migration (will be created): %s", e)
 
+    # Migration: Add cites_doi column to papers table (added 2026-06-09).
+    # The index lives here (not in SCHEMA_SQL) so executescript never references
+    # cites_doi on a database created before the column existed.
+    try:
+        cursor = conn.execute("PRAGMA table_info(papers)")
+        columns = [row[1] for row in cursor.fetchall()]
+    except sqlite3.OperationalError as e:
+        # Only the PRAGMA is guarded here: a missing papers table is fine since
+        # SCHEMA_SQL creates it. DDL errors below (locked DB, I/O fault) must
+        # propagate rather than be swallowed and leave the table un-indexed.
+        logger.debug("Papers table not found during migration (will be created): %s", e)
+        columns = []
+
+    if columns:  # papers table exists; migrate it in place
+        if "cites_doi" not in columns:
+            logger.info("Migrating papers table: adding cites_doi column")
+            conn.execute("ALTER TABLE papers ADD COLUMN cites_doi TEXT")
+            logger.info("Migration complete: cites_doi column added to papers")
+        # Ensure the index exists for both new and migrated databases.
+        conn.execute("CREATE INDEX IF NOT EXISTS idx_papers_cites_doi ON papers(cites_doi)")
+        conn.commit()
+
 
 def init_db(project: str = "hed") -> None:
     """Initialize database schema for a project.
@@ -586,6 +613,7 @@ def upsert_paper(
     first_message: str | None,
     url: str,
     created_at: str | None,
+    cites_doi: str | None = None,
 ) -> None:
     """Insert or update a paper.
 
@@ -597,6 +625,14 @@ def upsert_paper(
         first_message: Abstract (limited to ~2000 chars)
         url: URL to the paper (DOI or source URL)
         created_at: Publication date (ISO 8601 or year string)
+        cites_doi: Canonical DOI this paper cites, when known from a citation
+            sync. ``None`` for keyword-search results. On conflict the first
+            recorded link is kept (COALESCE), so a later keyword sync passing
+            ``None`` never erases an existing citation link, and a re-sync
+            backfills the link onto rows stored before this column existed.
+            A single column holds one link: a paper citing two tracked DOIs is
+            attributed to whichever was synced first (it is still counted once
+            in the per-year total, only its by-paper bucket is approximate).
     """
     # Limit first_message size
     if first_message and len(first_message) > 2000:
@@ -605,14 +641,15 @@ def upsert_paper(
     conn.execute(
         """
         INSERT INTO papers (source, external_id, title, first_message,
-                            status, url, created_at, synced_at)
-        VALUES (?, ?, ?, ?, 'published', ?, ?, ?)
+                            status, url, created_at, synced_at, cites_doi)
+        VALUES (?, ?, ?, ?, 'published', ?, ?, ?, ?)
         ON CONFLICT(source, external_id) DO UPDATE SET
             title=excluded.title,
             first_message=excluded.first_message,
-            synced_at=excluded.synced_at
+            synced_at=excluded.synced_at,
+            cites_doi=COALESCE(papers.cites_doi, excluded.cites_doi)
         """,
-        (source, external_id, title, first_message, url, created_at, _now_iso()),
+        (source, external_id, title, first_message, url, created_at, _now_iso(), cites_doi),
     )
 
 

diff --git a/src/knowledge/papers_sync.py b/src/knowledge/papers_sync.py
@@ -158,6 +158,7 @@ def _store_papers(
     project: str,
     *,
     force_source: str | None = None,
+    cites_doi: str | None = None,
 ) -> dict[str, int]:
     """Upsert opencite papers into the knowledge DB, returning counts by source.
 
@@ -167,6 +168,8 @@ def _store_papers(
         force_source: When set (a single-source sync), record this OSA source
             label using its native identifier; falls back to the priority
             mapping if that identifier is missing.
+        cites_doi: Canonical DOI these papers cite, recorded on each row when
+            storing the results of a citation sync. ``None`` for keyword search.
     """
     counts: dict[str, int] = {}
     with get_connection(project) as conn:
@@ -193,6 +196,7 @@ def _store_papers(
                 first_message=paper.abstract or None,
                 url=_paper_url(paper),
                 created_at=paper.publication_date or (str(paper.year) if paper.year else None),
+                cites_doi=cites_doi,
             )
             counts[source] = counts.get(source, 0) + 1
         conn.commit()
@@ -420,7 +424,7 @@ def sync_citing_papers(
     total = 0
     for doi, papers in cited:
         try:
-            counts = _store_papers(papers, project)
+            counts = _store_papers(papers, project, cites_doi=doi)
             count = sum(counts.values())
             update_sync_metadata("papers", f"citing_{doi}", count, project)
             logger.info("Synced %d papers citing %s", count, doi)

diff --git a/src/knowledge/search.py b/src/knowledge/search.py
@@ -376,6 +376,77 @@ def search_github_items(
     return results
 
 
+@dataclass
+class CitationStats:
+    """Aggregated citation counts for a community's canonical papers."""
+
+    total: int
+    """Total citing papers with a recorded canonical link and a valid year."""
+
+    per_year: dict[str, int]
+    """Citing-paper count per publication year, summed across canonical DOIs."""
+
+    by_paper: dict[str, dict[str, int]]
+    """Per canonical DOI: a mapping of publication year to citing-paper count."""
+
+
+def get_citation_stats(project: str = "eeglab") -> CitationStats:
+    """Aggregate citation counts for the public citations dashboard.
+
+    Counts papers that cite a community's canonical DOIs (``papers.cites_doi``
+    is set), grouped by the citing paper's publication year. The year is the
+    leading four digits of ``created_at`` (ISO date or bare year); rows whose
+    ``created_at`` is missing or not a four-digit year are skipped so a bad
+    date never lands in a bogus year bucket.
+
+    Args:
+        project: Community ID for database isolation. Defaults to 'eeglab'.
+
+    Returns:
+        CitationStats with the overall ``total``, ``per_year`` totals, and the
+        stacked ``by_paper`` breakdown (canonical DOI -> year -> count). Years
+        are sorted ascending in every mapping.
+    """
+    sql = """
+        SELECT cites_doi, substr(created_at, 1, 4) AS yr, COUNT(*) AS cnt
+        FROM papers
+        WHERE cites_doi IS NOT NULL
+          AND created_at IS NOT NULL
+          AND substr(created_at, 1, 4) GLOB '[0-9][0-9][0-9][0-9]'
+        GROUP BY cites_doi, yr
+    """
+
+    per_year: dict[str, int] = {}
+    by_paper: dict[str, dict[str, int]] = {}
+    total = 0
+    try:
+        with get_connection(project) as conn:
+            for row in conn.execute(sql):
+                doi = row["cites_doi"]
+                year = row["yr"]
+                count = row["cnt"]
+                per_year[year] = per_year.get(year, 0) + count
+                by_paper.setdefault(doi, {})[year] = count
+                total += count
+    except sqlite3.OperationalError as e:
+        logger.error(
+            "Database operational error computing citation stats: %s",
+            e,
+            exc_info=True,
+            extra={"project": project},
+        )
+        raise
+    except sqlite3.Error as e:
+        logger.warning("Database error computing citation stats (project=%s): %s", project, e)
+        raise
+
+    return CitationStats(
+        total=total,
+        per_year=dict(sorted(per_year.items())),
+        by_paper={doi: dict(sorted(years.items())) for doi, years in by_paper.items()},
+    )
+
+
 def search_papers(
     query: str,
     project: str = "hed",