Skip to content

Commit a0eed14

Browse files
Update title normalization test (#336)
Co-authored-by: alistairjcbrown <635903+alistairjcbrown@users.noreply.github.com>
1 parent 2e6fa72 commit a0eed14

File tree

4 files changed

+73
-0
lines changed

4 files changed

+73
-0
lines changed

common/known-removable-phrases.js

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,7 @@ const knownRemovablePhrases = [
510510
"Christian Petzold:",
511511
"A Not-So-Christmas",
512512
"Cinema Live:",
513+
"Cinema Night London:",
513514
"Cinema Restored:",
514515
"Cinema Tehran",
515516
"Citra Sasmita: Into Eternal Land:",
@@ -668,6 +669,8 @@ const knownRemovablePhrases = [
668669
"Indian 101 Diwali Celebration:",
669670
"Islamophobia Awareness Month:",
670671
"Jazz In Exile:",
672+
"Jewish Culture Month: ",
673+
"Jewish Culutre Month: ",
671674
"Just Evidence 1:",
672675
"Just Evidence 1.",
673676
"John Smith:",
@@ -923,6 +926,7 @@ const knownRemovablePhrases = [
923926
"Wallace & Gromit in ",
924927
"Wallace & Gromit :",
925928
"Windrush Caribbean Film Festival 2025:",
929+
"Women's Only Event:",
926930
"Women of Almodóvar:",
927931
"International Women's Day:",
928932
"International Dog Day:",

common/normalize-title.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,7 @@ function normalizeTitle(title, options) {
627627
["Di'Anno - ", "Di'Anno: "],
628628
["Gigi & Olive -", "Gigi & Olive: "],
629629
["Community Cinema at UCL East - ", "Community Cinema at UCL East: "],
630+
["Cinema Night London - ", "Cinema Night London: "],
630631
["Afronauts + ", "Afronauts & "],
631632
["Cockroach + ", "Cockroach & "],
632633
["The Room + ", "The Room & "],

common/tests/test-titles.json

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61978,5 +61978,65 @@
6197861978
{
6197961979
"input": "Community Cinema at UCL East - Monk in Pieces",
6198061980
"output": "monk in pieces"
61981+
},
61982+
{
61983+
"input": "Film Club: Jewish Culture Month: Hester Street",
61984+
"output": "hester street"
61985+
},
61986+
{
61987+
"input": "Film Club: Jewish Culture Month: Menashe",
61988+
"output": "menashe"
61989+
},
61990+
{
61991+
"input": "Film Club: Jewish Culture Month: Solomon & Gaenor",
61992+
"output": "solomon gaenor"
61993+
},
61994+
{
61995+
"input": "Film Club: Jewish Culture Month: Uncut Gems",
61996+
"output": "uncut gems"
61997+
},
61998+
{
61999+
"input": "Film Club: Jewish Culture Month: Shiva Baby",
62000+
"output": "shiva baby"
62001+
},
62002+
{
62003+
"input": "Film Club: Jewish Culutre Month: A Serious Man",
62004+
"output": "a serious man"
62005+
},
62006+
{
62007+
"input": "Film Club: Jewish Culture Month: Best of British",
62008+
"output": "best of british"
62009+
},
62010+
{
62011+
"input": "Goethe-Kino - Mascha Schilinski - Sound of Falling",
62012+
"output": "sound of falling"
62013+
},
62014+
{
62015+
"input": "LONDON PREMIERE Dracula",
62016+
"output": "dracula"
62017+
},
62018+
{
62019+
"input": "Stand by Me (40th Anniversary 4k Restoration)",
62020+
"output": "stand by me"
62021+
},
62022+
{
62023+
"input": "Women’s Only Event: Cinema Night London - The Devil wears Prada 2",
62024+
"output": "devil wears prada 2"
62025+
},
62026+
{
62027+
"input": "The North + Q&A",
62028+
"output": "the north"
62029+
},
62030+
{
62031+
"input": "undertone",
62032+
"output": "undertone"
62033+
},
62034+
{
62035+
"input": "PINK PALACE: THE FILMS OF THE GAY GIRLS RIDING CLUB",
62036+
"output": "films of the gay girls riding club"
62037+
},
62038+
{
62039+
"input": "Hammersmith & Fulham Friends Of The Earth - Free Film Screening",
62040+
"output": "hammersmith fulham friends of the earth"
6198162041
}
6198262042
]

docs/reviewing-title-normalisation-test-cases.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,14 @@ This removes the prefix _including_ the `(`, leaving `Film Title)`. The trailing
167167

168168
## Key pitfalls
169169

170+
**Phrase removal can expose a leading space that matches other phrases**
171+
`knownRemovablePhrases` does a plain substring replace with no post-trim. If you
172+
add `"Foo Bar:"` and the title is `"foo bar: uncut gems"`, removing `"foo bar:"`
173+
leaves `" uncut gems"`, and a phrase like `" uncut"` (with a leading space) will
174+
then match, producing just `"gems"`. The fix is to include the trailing space in
175+
the phrase: `"Foo Bar: "`. Do this whenever the phrase is always followed by
176+
more content (i.e. it is a prefix label, not a suffix or mid-title fragment).
177+
170178
**Plural/singular ordering**`known-removable-phrases.js` uses substring
171179
matching. `"Documentary Screening"` will partially match
172180
`"Documentary Screenings"`, leaving a stray `s`. Always add the **longer

0 commit comments

Comments
 (0)