JSONbored · JSONbored · Jun 13, 2026 · Jun 13, 2026 · Jun 13, 2026 · Jun 13, 2026
@@ -0,0 +1,65 @@
+import { useEffect } from "react";
+import { search } from "@/data/search";
+import { absoluteUrl } from "@/lib/seo";
+
+// WebMCP (navigator.modelContext) — exposes directory search to in-browser AI agents.
+// Experimental (Chrome EPP); no-ops where the API is unavailable.
+type WebMcpTool = {
+  name: string;
+  description: string;
+  inputSchema: Record<string, unknown>;
+  execute: (
+    args: Record<string, unknown>,
+  ) => Promise<{ content: { type: string; text: string }[] }>;
+};
+type ModelContextNavigator = Navigator & {
+  modelContext?: { provideContext: (ctx: { tools: WebMcpTool[] }) => void };
+};
+
+export function WebMcpProvider() {
+  useEffect(() => {
+    const nav = navigator as ModelContextNavigator;
+    if (!nav.modelContext?.provideContext) return;
+    try {
+      nav.modelContext.provideContext({
+        tools: [
+          {
+            name: "search_heyclaude",
+            description:
+              "Search the HeyClaude directory of Claude Code resources (MCP servers, agents, skills, hooks, commands, rules, collections, tools). Returns matching entries with titles, categories, descriptions, and URLs.",
+            inputSchema: {
+              type: "object",
+              properties: {
+                query: { type: "string", description: "Free-text search query." },
+                category: {
+                  type: "string",
+                  description:
+                    "Optional category filter, e.g. mcp, agents, skills, hooks, commands, rules, collections, tools.",
+                },
+              },
+              required: ["query"],
+            },
+            async execute(args) {
+              const query = String(args.query ?? "");
+              const category = typeof args.category === "string" ? args.category : "";
+              const results = search({ q: query })
+                .filter((e) => !category || e.category === category)
+                .slice(0, 10)
+                .map((e) => ({
+                  title: e.title,
+                  category: e.category,
+                  description: e.description,
+                  url: absoluteUrl(`/entry/${e.category}/${e.slug}`),
+                }));
+              return { content: [{ type: "text", text: JSON.stringify(results, null, 2) }] };
+            },
+          },
+        ],
+      });
+    } catch {
+      // WebMCP is experimental — ignore registration failures.
+    }
+  }, []);
+
+  return null;
+}
@@ -84,14 +84,25 @@ function sanitizeRenderedHtml(html: string) {
       img: ["https"],
     },
     transformTags: {
-      a: (_tagName, attribs) => ({
-        tagName: "a",
-        attribs: {
-          ...attribs,
-          rel: "nofollow noopener noreferrer",
-          target: "_blank",
-        },
-      }),
+      a: (_tagName, attribs) => {
+        // Drop relative / scheme-less anchors. GFM autolinking turns bare paths in entry
+        // content (e.g. ".claude/hooks/foo.sh", "/utils/trpc") into site-relative links that
+        // Google then crawls as 404s. Real external URLs (http/https/mailto) stay linked.
+        const href = String(attribs.href ?? "");
+        if (!/^(https?:|mailto:)/i.test(href)) {
+          // Unwrap to a non-anchor (span is not in allowedTags, so sanitize-html drops the tag
+          // but keeps the text) — avoids leaving an orphaned, destination-less <a> in the DOM.
+          return { tagName: "span", attribs: {} };
+        }
+        return {
+          tagName: "a",
+          attribs: {
+            ...attribs,
+            rel: "nofollow noopener noreferrer",
+            target: "_blank",
+          },
+        };
+      },
     },
   });
 }

@@ -1,11 +1,20 @@
 import { siteConfig } from "@/lib/site";
 
+// Machine endpoints and generated artifacts should not be crawled: they waste crawl budget
+// and surface as "crawled - not indexed" / 404 noise in Search Console.
+const DISALLOW_PATHS = ["/api/", "/data/", "/downloads/", "/_next/"];
+
+// AI content-usage preferences (contentsignals.org / draft-romm-aipref-contentsignals).
+// Fully open: appear in search + AI answers and allow training.
+const CONTENT_SIGNAL = "ai-train=yes, search=yes, ai-input=yes";
+
 export function getRobotsPolicy() {
   return {
     rules: [
       {
         userAgent: "*",
         allow: "/",
+        disallow: DISALLOW_PATHS,
       },
       {
         userAgent: [
@@ -17,8 +26,10 @@ export function getRobotsPolicy() {
           "Google-Extended",
         ],
         allow: "/",
+        disallow: DISALLOW_PATHS,
       },
     ],
+    contentSignal: CONTENT_SIGNAL,
     sitemap: `${siteConfig.url}/sitemap.xml`,
     host: new URL(siteConfig.url).host,
   };
@@ -33,6 +44,13 @@ export function renderRobotsTxt() {
       lines.push(`User-agent: ${userAgent}`);
     }
     lines.push(`Allow: ${rule.allow}`);
+    for (const path of rule.disallow ?? []) {
+      lines.push(`Disallow: ${path}`);
+    }
+    // Content-Signal applies to all crawlers — emit it once, under the catch-all group.
+    if (policy.contentSignal && rule.userAgent === "*") {
+      lines.push(`Content-Signal: ${policy.contentSignal}`);
+    }
     lines.push("");
   }
   lines.push(`Sitemap: ${policy.sitemap}`);

@@ -56,10 +56,44 @@ const SECURITY_HEADERS = {
   "x-frame-options": "DENY",
 } as const;
 
-export function applySecurityHeaders(headers: Headers) {
+// Non-production hosts (preview/staging) must not be indexed — otherwise Google treats
+// e.g. dev.heyclau.de as duplicate content competing with the canonical production site.
+function isNonProdHost(hostname: string) {
+  return (
+    hostname.startsWith("dev.") ||
+    hostname === "localhost" ||
+    hostname.endsWith(".localhost") ||
+    hostname.includes("staging") ||
+    hostname.endsWith(".workers.dev")
+  );
+}
+
+// RFC 8288 Link header advertising agent-discovery resources from every HTML page.
+const AGENT_LINK_HEADER = [
+  `<${siteConfig.url}/.well-known/api-catalog>; rel="api-catalog"`,
+  `<${siteConfig.url}/openapi.json>; rel="service-desc"; type="application/json"`,
+  `<${siteConfig.url}/api-docs>; rel="service-doc"; type="text/html"`,
+  `<${siteConfig.url}/.well-known/mcp/server-card.json>; rel="related"; title="MCP server card"`,
+  `<${siteConfig.url}/.well-known/agent-skills/index.json>; rel="related"; title="Agent skills index"`,
+].join(", ");
+
+export function applySecurityHeaders(headers: Headers, request?: Request) {
   for (const [name, value] of Object.entries(SECURITY_HEADERS)) {
     if (!headers.has(name)) headers.set(name, value);
   }
+  if ((headers.get("content-type") ?? "").includes("text/html") && !headers.has("link")) {
+    headers.set("link", AGENT_LINK_HEADER);
+  }
+  if (request) {
+    try {
+      const { hostname } = new URL(request.url);
+      if (isNonProdHost(hostname)) {
+        headers.set("x-robots-tag", "noindex, follow");
+      }
+    } catch {
+      // Malformed request URL — leave indexing headers untouched.
+    }
+  }
   return headers;
 }
 

@@ -0,0 +1,50 @@
+import { ENTRIES } from "@/data/entries";
+import type { Entry } from "@/types/registry";
+
+export function tagSlug(tag: string) {
+  return tag
+    .toLowerCase()
+    .trim()
+    .replace(/[^a-z0-9]+/g, "-")
+    .replace(/^-+|-+$/g, "");
+}
+
+export type TagGroup = { slug: string; name: string; entries: Entry[] };
+
+let cache: TagGroup[] | null = null;
+
+export function getAllTagGroups(): TagGroup[] {
+  if (cache) return cache;
+  const map = new Map<string, { entries: Entry[]; names: Map<string, number> }>();
+  for (const entry of ENTRIES) {
+    for (const tag of entry.tags ?? []) {
+      const slug = tagSlug(tag);
+      if (!slug) continue;
+      let group = map.get(slug);
+      if (!group) {
+        group = { entries: [], names: new Map() };
+        map.set(slug, group);
+      }
+      group.entries.push(entry);
+      group.names.set(tag, (group.names.get(tag) ?? 0) + 1);
+    }
+  }
+  cache = [...map.entries()]
+    .map(([slug, group]) => ({
+      slug,
+      // Canonical display name: most frequent raw casing (ties broken alphabetically).
+      name: [...group.names.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))[0][0],
+      entries: group.entries,
+    }))
+    .sort((a, b) => b.entries.length - a.entries.length);
+  return cache;
+}
+
+export function getTagGroup(slug: string): TagGroup | undefined {
+  return getAllTagGroups().find((group) => group.slug === slug);
+}
+
+// Tags with enough entries to be a non-thin, indexable hub.
+export function getIndexableTagGroups(): TagGroup[] {
+  return getAllTagGroups().filter((group) => group.entries.length >= 2);
+}