Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 65 additions & 0 deletions apps/web/src/components/webmcp-provider.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import { useEffect } from "react";
import { search } from "@/data/search";
import { absoluteUrl } from "@/lib/seo";

// WebMCP (navigator.modelContext) — exposes directory search to in-browser AI agents.
// Experimental (Chrome EPP); no-ops where the API is unavailable.
type WebMcpTool = {
name: string;
description: string;
inputSchema: Record<string, unknown>;
execute: (
args: Record<string, unknown>,
) => Promise<{ content: { type: string; text: string }[] }>;
};
type ModelContextNavigator = Navigator & {
modelContext?: { provideContext: (ctx: { tools: WebMcpTool[] }) => void };
};

export function WebMcpProvider() {
useEffect(() => {
const nav = navigator as ModelContextNavigator;
if (!nav.modelContext?.provideContext) return;
try {
nav.modelContext.provideContext({
tools: [
{
name: "search_heyclaude",
description:
"Search the HeyClaude directory of Claude Code resources (MCP servers, agents, skills, hooks, commands, rules, collections, tools). Returns matching entries with titles, categories, descriptions, and URLs.",
inputSchema: {
type: "object",
properties: {
query: { type: "string", description: "Free-text search query." },
category: {
type: "string",
description:
"Optional category filter, e.g. mcp, agents, skills, hooks, commands, rules, collections, tools.",
},
},
required: ["query"],
},
async execute(args) {
const query = String(args.query ?? "");
const category = typeof args.category === "string" ? args.category : "";
const results = search({ q: query })
.filter((e) => !category || e.category === category)
.slice(0, 10)
.map((e) => ({
title: e.title,
category: e.category,
description: e.description,
url: absoluteUrl(`/entry/${e.category}/${e.slug}`),
}));
return { content: [{ type: "text", text: JSON.stringify(results, null, 2) }] };
},
},
],
});
} catch {
// WebMCP is experimental — ignore registration failures.
}
}, []);

return null;
}
27 changes: 19 additions & 8 deletions apps/web/src/lib/detail-assembly.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,25 @@ function sanitizeRenderedHtml(html: string) {
img: ["https"],
},
transformTags: {
a: (_tagName, attribs) => ({
tagName: "a",
attribs: {
...attribs,
rel: "nofollow noopener noreferrer",
target: "_blank",
},
}),
a: (_tagName, attribs) => {
// Drop relative / scheme-less anchors. GFM autolinking turns bare paths in entry
// content (e.g. ".claude/hooks/foo.sh", "/utils/trpc") into site-relative links that
// Google then crawls as 404s. Real external URLs (http/https/mailto) stay linked.
const href = String(attribs.href ?? "");
if (!/^(https?:|mailto:)/i.test(href)) {
// Unwrap to a non-anchor (span is not in allowedTags, so sanitize-html drops the tag
// but keeps the text) — avoids leaving an orphaned, destination-less <a> in the DOM.
return { tagName: "span", attribs: {} };
}
return {
tagName: "a",
attribs: {
...attribs,
rel: "nofollow noopener noreferrer",
target: "_blank",
},
};
},
},
});
}
Expand Down
18 changes: 18 additions & 0 deletions apps/web/src/lib/robots-policy.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
import { siteConfig } from "@/lib/site";

// Machine endpoints and generated artifacts should not be crawled: they waste crawl budget
// and surface as "crawled - not indexed" / 404 noise in Search Console.
const DISALLOW_PATHS = ["/api/", "/data/", "/downloads/", "/_next/"];

// AI content-usage preferences (contentsignals.org / draft-romm-aipref-contentsignals).
// Fully open: appear in search + AI answers and allow training.
const CONTENT_SIGNAL = "ai-train=yes, search=yes, ai-input=yes";

export function getRobotsPolicy() {
return {
rules: [
{
userAgent: "*",
allow: "/",
disallow: DISALLOW_PATHS,
},
{
userAgent: [
Expand All @@ -17,8 +26,10 @@ export function getRobotsPolicy() {
"Google-Extended",
],
allow: "/",
disallow: DISALLOW_PATHS,
},
],
contentSignal: CONTENT_SIGNAL,
sitemap: `${siteConfig.url}/sitemap.xml`,
host: new URL(siteConfig.url).host,
};
Expand All @@ -33,6 +44,13 @@ export function renderRobotsTxt() {
lines.push(`User-agent: ${userAgent}`);
}
lines.push(`Allow: ${rule.allow}`);
for (const path of rule.disallow ?? []) {
lines.push(`Disallow: ${path}`);
}
// Content-Signal applies to all crawlers — emit it once, under the catch-all group.
if (policy.contentSignal && rule.userAgent === "*") {
lines.push(`Content-Signal: ${policy.contentSignal}`);
}
lines.push("");
}
lines.push(`Sitemap: ${policy.sitemap}`);
Expand Down
36 changes: 35 additions & 1 deletion apps/web/src/lib/security-headers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,44 @@ const SECURITY_HEADERS = {
"x-frame-options": "DENY",
} as const;

export function applySecurityHeaders(headers: Headers) {
// Non-production hosts (preview/staging) must not be indexed — otherwise Google treats
// e.g. dev.heyclau.de as duplicate content competing with the canonical production site.
function isNonProdHost(hostname: string) {
return (
hostname.startsWith("dev.") ||
hostname === "localhost" ||
hostname.endsWith(".localhost") ||
hostname.includes("staging") ||
hostname.endsWith(".workers.dev")
);
}

// RFC 8288 Link header advertising agent-discovery resources from every HTML page.
const AGENT_LINK_HEADER = [
`<${siteConfig.url}/.well-known/api-catalog>; rel="api-catalog"`,
`<${siteConfig.url}/openapi.json>; rel="service-desc"; type="application/json"`,
`<${siteConfig.url}/api-docs>; rel="service-doc"; type="text/html"`,
`<${siteConfig.url}/.well-known/mcp/server-card.json>; rel="related"; title="MCP server card"`,
`<${siteConfig.url}/.well-known/agent-skills/index.json>; rel="related"; title="Agent skills index"`,
].join(", ");

export function applySecurityHeaders(headers: Headers, request?: Request) {
for (const [name, value] of Object.entries(SECURITY_HEADERS)) {
if (!headers.has(name)) headers.set(name, value);
}
if ((headers.get("content-type") ?? "").includes("text/html") && !headers.has("link")) {
headers.set("link", AGENT_LINK_HEADER);
}
if (request) {
try {
const { hostname } = new URL(request.url);
if (isNonProdHost(hostname)) {
headers.set("x-robots-tag", "noindex, follow");
}
} catch {
// Malformed request URL — leave indexing headers untouched.
}
}
return headers;
}

Expand Down
50 changes: 50 additions & 0 deletions apps/web/src/lib/tags.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import { ENTRIES } from "@/data/entries";
import type { Entry } from "@/types/registry";

export function tagSlug(tag: string) {
return tag
.toLowerCase()
.trim()
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-+|-+$/g, "");
}

export type TagGroup = { slug: string; name: string; entries: Entry[] };

let cache: TagGroup[] | null = null;

export function getAllTagGroups(): TagGroup[] {
if (cache) return cache;
const map = new Map<string, { entries: Entry[]; names: Map<string, number> }>();
for (const entry of ENTRIES) {
for (const tag of entry.tags ?? []) {
const slug = tagSlug(tag);
if (!slug) continue;
let group = map.get(slug);
if (!group) {
group = { entries: [], names: new Map() };
map.set(slug, group);
}
group.entries.push(entry);
group.names.set(tag, (group.names.get(tag) ?? 0) + 1);
}
}
cache = [...map.entries()]
.map(([slug, group]) => ({
slug,
// Canonical display name: most frequent raw casing (ties broken alphabetically).
name: [...group.names.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0]))[0][0],
entries: group.entries,
}))
.sort((a, b) => b.entries.length - a.entries.length);
return cache;
}

export function getTagGroup(slug: string): TagGroup | undefined {
return getAllTagGroups().find((group) => group.slug === slug);
}

// Tags with enough entries to be a non-thin, indexable hub.
export function getIndexableTagGroups(): TagGroup[] {
return getAllTagGroups().filter((group) => group.entries.length >= 2);
}
Loading
Loading