diff --git a/common/known-removable-phrases.js b/common/known-removable-phrases.js index 5773766..664b369 100644 --- a/common/known-removable-phrases.js +++ b/common/known-removable-phrases.js @@ -510,6 +510,7 @@ const knownRemovablePhrases = [ "Christian Petzold:", "A Not-So-Christmas", "Cinema Live:", + "Cinema Night London:", "Cinema Restored:", "Cinema Tehran", "Citra Sasmita: Into Eternal Land:", @@ -668,6 +669,8 @@ const knownRemovablePhrases = [ "Indian 101 Diwali Celebration:", "Islamophobia Awareness Month:", "Jazz In Exile:", + "Jewish Culture Month: ", + "Jewish Culutre Month: ", "Just Evidence 1:", "Just Evidence 1.", "John Smith:", @@ -923,6 +926,7 @@ const knownRemovablePhrases = [ "Wallace & Gromit in ", "Wallace & Gromit :", "Windrush Caribbean Film Festival 2025:", + "Women's Only Event:", "Women of Almodóvar:", "International Women's Day:", "International Dog Day:", diff --git a/common/normalize-title.js b/common/normalize-title.js index 813fb9c..44da3e8 100644 --- a/common/normalize-title.js +++ b/common/normalize-title.js @@ -627,6 +627,7 @@ function normalizeTitle(title, options) { ["Di'Anno - ", "Di'Anno: "], ["Gigi & Olive -", "Gigi & Olive: "], ["Community Cinema at UCL East - ", "Community Cinema at UCL East: "], + ["Cinema Night London - ", "Cinema Night London: "], ["Afronauts + ", "Afronauts & "], ["Cockroach + ", "Cockroach & "], ["The Room + ", "The Room & "], diff --git a/common/tests/test-titles.json b/common/tests/test-titles.json index 5cbc5a0..7de9f08 100644 --- a/common/tests/test-titles.json +++ b/common/tests/test-titles.json @@ -61978,5 +61978,65 @@ { "input": "Community Cinema at UCL East - Monk in Pieces", "output": "monk in pieces" + }, + { + "input": "Film Club: Jewish Culture Month: Hester Street", + "output": "hester street" + }, + { + "input": "Film Club: Jewish Culture Month: Menashe", + "output": "menashe" + }, + { + "input": "Film Club: Jewish Culture Month: Solomon & Gaenor", + "output": "solomon gaenor" + }, + { + "input": "Film Club: Jewish Culture Month: Uncut Gems", + "output": "uncut gems" + }, + { + "input": "Film Club: Jewish Culture Month: Shiva Baby", + "output": "shiva baby" + }, + { + "input": "Film Club: Jewish Culutre Month: A Serious Man", + "output": "a serious man" + }, + { + "input": "Film Club: Jewish Culture Month: Best of British", + "output": "best of british" + }, + { + "input": "Goethe-Kino - Mascha Schilinski - Sound of Falling", + "output": "sound of falling" + }, + { + "input": "LONDON PREMIERE Dracula", + "output": "dracula" + }, + { + "input": "Stand by Me (40th Anniversary 4k Restoration)", + "output": "stand by me" + }, + { + "input": "Women’s Only Event: Cinema Night London - The Devil wears Prada 2", + "output": "devil wears prada 2" + }, + { + "input": "The North + Q&A", + "output": "the north" + }, + { + "input": "undertone", + "output": "undertone" + }, + { + "input": "PINK PALACE: THE FILMS OF THE GAY GIRLS RIDING CLUB", + "output": "films of the gay girls riding club" + }, + { + "input": "Hammersmith & Fulham Friends Of The Earth - Free Film Screening", + "output": "hammersmith fulham friends of the earth" } ] diff --git a/docs/reviewing-title-normalisation-test-cases.md b/docs/reviewing-title-normalisation-test-cases.md index 4c0584a..40e2439 100644 --- a/docs/reviewing-title-normalisation-test-cases.md +++ b/docs/reviewing-title-normalisation-test-cases.md @@ -167,6 +167,14 @@ This removes the prefix _including_ the `(`, leaving `Film Title)`. The trailing ## Key pitfalls +**Phrase removal can expose a leading space that matches other phrases** — +`knownRemovablePhrases` does a plain substring replace with no post-trim. If you +add `"Foo Bar:"` and the title is `"foo bar: uncut gems"`, removing `"foo bar:"` +leaves `" uncut gems"`, and a phrase like `" uncut"` (with a leading space) will +then match, producing just `"gems"`. The fix is to include the trailing space in +the phrase: `"Foo Bar: "`. Do this whenever the phrase is always followed by +more content (i.e. it is a prefix label, not a suffix or mid-title fragment). + **Plural/singular ordering** — `known-removable-phrases.js` uses substring matching. `"Documentary Screening"` will partially match `"Documentary Screenings"`, leaving a stray `s`. Always add the **longer