diff --git a/common/known-removable-phrases.js b/common/known-removable-phrases.js index 652b7f09..22427fcc 100644 --- a/common/known-removable-phrases.js +++ b/common/known-removable-phrases.js @@ -326,6 +326,7 @@ const knownRemovablePhrases = [ "Live Q&A with Cynthia Erivo", " Q&A with George Mackay", "*WORKSHOP SOLD OUT", + "- sold out", " x Flower Workshop", " x Free Vocafest Screening", " x FIST CLUB for IWD", @@ -590,6 +591,7 @@ const knownRemovablePhrases = [ "FILM CLUB FRIDAYS:", "Festival of Creativity:", "Fetish-Friendly:", + "Fetish Friendly:", "FFC x Marsh:", "FFC x Marsm:", "Fighting Spirit 2025:", @@ -650,6 +652,7 @@ const knownRemovablePhrases = [ "Happy Birthday Rik Mayall:", "Hidden Figures:", "Hiroshima Day:", + "Habeshaview Monthly Cinema:", "Holocaust Memorial Day:", "Hayley Mills:", "Halloween with Distorted Frame:", diff --git a/common/normalize-title.js b/common/normalize-title.js index 44da3e8a..9fd4d606 100644 --- a/common/normalize-title.js +++ b/common/normalize-title.js @@ -404,6 +404,7 @@ function normalizeTitle(title, options) { ], ["Film Club |", "Film Club: "], ["IN-HOUSE - ", "IN-HOUSE: "], + ["RIO FOREVER /", "RIO FOREVER: "], ["BAR TRASH - ", "BAR TRASH: "], [ "BAR TRASH Positive East Fundraiser /", diff --git a/common/tests/test-titles.json b/common/tests/test-titles.json index 8920ee30..59a0247f 100644 --- a/common/tests/test-titles.json +++ b/common/tests/test-titles.json @@ -62190,5 +62190,101 @@ { "input": "Ugetsu (1953) – Japanese Golden Age Classic Screening & Q&A with Irene González-López", "output": "ugetsu (1953)" + }, + { + "input": "RBO Live: Siegfried", + "output": "royal ballet opera 2025 siegfried" + }, + { + "input": "We Are Making a Film About Mark Fisher", + "output": "we are making a film about mark fisher" + }, + { + "input": "The Happy Man Tree (2022)", + "output": "happy man tree (2022)" + }, + { + "input": "BAR TRASH: POISON IVY (1992)", + "output": "poison ivy (1992)" + }, + { + "input": "Shaun of the Dead- Sold Out!", + "output": "shaun of the dead" + }, + { + "input": "LONDON PREMIERE Bouchra", + "output": "bouchra" + }, + { + "input": "Underland + pre-recorded Q&A", + "output": "underland" + }, + { + "input": "Merrily We Roll Along + Q&A", + "output": "merrily we roll along" + }, + { + "input": "Oh My Goodness! (Juste Ciel!)", + "output": "oh my goodness" + }, + { + "input": "The Christophers", + "output": "the christophers" + }, + { + "input": "The Count of Monte Cristo (2024)", + "output": "count of monte cristo (2024)" + }, + { + "input": "The Sheep Detectives", + "output": "sheep detectives" + }, + { + "input": "Tuner", + "output": "tuner" + }, + { + "input": "Tuner - Preview", + "output": "tuner" + }, + { + "input": "Habeshaview Monthly Cinema: Zemetaye", + "output": "zemetaye" + }, + { + "input": "RIO FOREVER / NEVER EVER WITH CATEGORY H: An Evening of Public Access", + "output": "never ever an evening of public access" + }, + { + "input": "Amélie: 25th Anniversary", + "output": "amelie" + }, + { + "input": "Arklink AI Film Festival", + "output": "arklink ai film festival" + }, + { + "input": "Fetish Friendly: Night Stage", + "output": "night stage" + }, + { + "input": "A Bigger Splash + Q&A", + "output": "a bigger splash" + }, + { + "input": "California Schemin", + "output": "california schemin" + }, + { + "input": "LVFF 2026: Invincible", + "output": "invincible" + }, + { + "input": "DRAG ME TO HELL", + "output": "drag me to hell" + }, + { + "input": "FREEWAY: CONFESSIONS OF A TRICKBABY", + "output": "freeway confessions of a trickbaby" } ] diff --git a/docs/reviewing-title-normalisation-test-cases.md b/docs/reviewing-title-normalisation-test-cases.md index b6a5c5f8..30b4c5cb 100644 --- a/docs/reviewing-title-normalisation-test-cases.md +++ b/docs/reviewing-title-normalisation-test-cases.md @@ -55,8 +55,10 @@ Understanding the order matters when debugging why an output looks wrong. **The separator must be preceded by whitespace.** The regex requires `\s+` before the separator character. A suffix like `Title/Q&A` (no space before - `/`) will not be caught — `hasSeparator` won't fire, and the `/Q&A` part will - survive into the output. Add it as a removable phrase instead. + `/`) or `Title- Sold Out` (no space before `-`) will not be caught — + `hasSeparator` won't fire, and the suffix will survive into the output. Add + it as a removable phrase, including the leading separator character (e.g. + `"- sold out"`, `"/Q&A with Maria Petschnig"`). **Em-dash `–` is not in this list.** A title like `Venue – Film` passes through `hasSeparator` untouched. The em-dash is instead collapsed to a space @@ -120,16 +122,22 @@ grep -B1 '"output": "' common/tests/test-titles.json | grep ' - ' The existing test data is the authoritative record of intended behaviour. +When checking whether a series/venue prefix already exists in +`known-removable-phrases.js`, also search for the hyphen/space variant — cinemas +often format the same name both ways (e.g. `"Fetish-Friendly:"` and +`"Fetish Friendly:"`). If only one variant is present, add the other alongside +it. + ### Step 3 — classify the issue -| Symptom | Likely cause | Fix | -| ------------------------------------------------- | --------------------------------------------- | ------------------------------------------------------------------------------- | -| Venue/series name left in output | Phrase not in `known-removable-phrases.js` | Add it | -| Film title stripped, only venue name left | `hasSeparator` ate the film (hyphen format) | Dash→colon correction + removable phrase | -| Venue name + film title run together (em-dash) | `–` not in `hasSeparator`; collapses to space | Removable phrase with trailing space (see [em-dash variant](#em-dash-variant)) | -| Stray character(s) left (e.g. a lone `s`) | A phrase partially matches a longer word | Add the longer form **before** the shorter form in `known-removable-phrases.js` | -| Event suffix not removed (Q&A, anniversary, etc.) | Phrase not in `known-removable-phrases.js` | Add it | -| Film title in parentheses dropped | Parentheses removal rule stripped it | Follow the `"Prefix ("` pattern (see below) | +| Symptom | Likely cause | Fix | +| ------------------------------------------------- | --------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------ | +| Venue/series name left in output | Phrase not in `known-removable-phrases.js` | Add it | +| Film title stripped, only venue name left | `hasSeparator` ate the film (hyphen format) | Dash→colon correction + removable phrase | +| Venue name + film title run together (em-dash) | `–` not in `hasSeparator`; collapses to space | Removable phrase with trailing space (see [em-dash variant](#em-dash-variant)) | +| Stray character(s) left (e.g. a lone `s`) | A phrase partially matches a longer word | Add the longer form **before** the shorter form in `known-removable-phrases.js` | +| Event suffix not removed (Q&A, anniversary, etc.) | Phrase not in `known-removable-phrases.js` | Add it; if suffix is attached directly to the last word (no space before separator), include the leading separator in the phrase (e.g. `"- sold out"`) | +| Film title in parentheses dropped | Parentheses removal rule stripped it | Follow the `"Prefix ("` pattern (see below) | ### Step 4 — apply the right fix