From 3a646ea0a94f04d6077a93520d2cd38c5966ed57 Mon Sep 17 00:00:00 2001 From: ghost <49853598+JSONbored@users.noreply.github.com> Date: Sat, 13 Jun 2026 00:55:22 -0700 Subject: [PATCH] fix(submissions): block cross-category website duplicates --- apps/submission-gate/src/duplicates.ts | 2 ++ tests/submission-gate-worker.test.ts | 46 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/apps/submission-gate/src/duplicates.ts b/apps/submission-gate/src/duplicates.ts index 16aebddeb4..ae35cf7f33 100644 --- a/apps/submission-gate/src/duplicates.ts +++ b/apps/submission-gate/src/duplicates.ts @@ -83,12 +83,14 @@ const CROSS_CATEGORY_STRICT_URL_FIELDS = new Set([ "repoUrl", "repositoryUrl", "sourceUrl", + "websiteUrl", "download_url", "github_url", "package_url", "repo_url", "repository_url", "source_url", + "website_url", ]); const DOMAIN_ONLY_EXCLUSIONS = new Set([ "github.com", diff --git a/tests/submission-gate-worker.test.ts b/tests/submission-gate-worker.test.ts index 5fb1c3db50..d474aa5262 100644 --- a/tests/submission-gate-worker.test.ts +++ b/tests/submission-gate-worker.test.ts @@ -2762,6 +2762,52 @@ repoUrl: "https://github.com/langchain-ai/langchain.git" ); }); + it("treats same canonical website across different categories as a strict duplicate", () => { + const existingTool = extractContentDuplicateSignals({ + filePath: "content/tools/acme-claude.mdx", + content: `--- +title: Acme Claude +slug: acme-claude +category: tools +description: Tooling for Acme Claude workflows. +websiteUrl: "https://acme-claude.example/product" +--- +`, + }); + const candidateMcp = extractContentDuplicateSignals({ + filePath: "content/mcp/acme-claude-server.mdx", + content: `--- +title: Acme Claude MCP Server +slug: acme-claude-server +category: mcp +description: MCP server for Acme Claude workflows. +websiteUrl: "https://acme-claude.example/product?utm_source=submission" +--- +`, + }); + + expect( + findStrictContentDuplicateMatch(candidateMcp, [existingTool]), + ).toMatchObject({ + reasons: expect.arrayContaining([ + expect.stringContaining( + "same canonical source URL https://acme-claude.example/product across mcp/tools", + ), + ]), + }); + expect(findRelatedContentMatches(candidateMcp, [existingTool])).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + reasons: expect.arrayContaining([ + expect.stringContaining( + "same canonical source URL https://acme-claude.example/product across mcp/tools", + ), + ]), + }), + ]), + ); + }); + it("treats collection member overlap as related context, not a strict duplicate", () => { const existingTool = extractContentDuplicateSignals({ filePath: "content/tools/storybook-a11y.mdx",