class PicturehouseUk::Internal::TitleSanitizer

Sanitize and standardize film titles

Private Instance Methods

remove() click to toggle source

strings and regex to be removed

# File lib/picturehouse_uk/internal/title_sanitizer.rb, line 18
def remove
  [
    /\s\[(AS LIVE:\s*)?[ACPGU1258]+\]/, # regular certificate
    /\s\[R18\]/,                        # R18
    /\s+\(?[23][dD]\)?/,                # 2d or 3d from title
    /\bIMAX\b/i,                        # imax from title
    /\s\[NO CERT\]/,                    # no certificate
    /\s\[TBC\]/,                        # tbc
    /\s\[N\/A\]/i,                      # n/a
    /\s\(Theatre\)/i,                   # (theatre)
    /\s\[\]/,                           # blank certificate
    /ourscreen\: /,                     # ourscreen
    /\s\(Re(\: \d{0,4})?\)/i,           # Re-release
    /\s\[CERT TBC\]/,                   # certificate TBC
    /\s?\-\s?autism.*ing\s?/i,          # austim screening
    /\s?\+\s?Q\&(amp;)*A\.?/i,          # +Q&A
    /KIDS CLUB\s*/i,                    # kids club
    /DISCOVER TUE(sday\:)*\s*/i,        # discover tue
    /FREE Screening\s*-\s*/i,           # free screening
    /\s*-?\s*Big Scream\s*-?\s*/i,      # big scream
    /\*?HOH Subtitled\*?/i,             # subtitled
    /\s*\-?\s*Reminiscence/i,           # reminiscence
    /\s*\-?\s*\(?Re\-issue\)?/i,        # reissue
    /\s*-?\s*Kids\'? Club\s*-?\s*/,     # kids club
    /\s*plus Q\&A.*/i,                  # extended Q&A
    /Cinemania\s*[\:\-]/i,              # cinemania
    /\@\s*komedia/i,                    # @ komedia
    /\s*\+ panel.*/i,                   # panel
    /toddler time\s*\:*\-*/i,           # toddler time
    /\(.*\d{4}\)/,                      # year or captured year
    /\bsingalong\b/i,                   # singalong
    /\s+\-?\s*Parents \& Babies/i,      # parents and babies
    /Subtitled\:*\s*/i,                 # subtitled
    /\(?live\)?\z/i,                    # live
    /amp\;/i
  ]
end
replace() click to toggle source

regexes and their replacements

# File lib/picturehouse_uk/internal/title_sanitizer.rb, line 57
def replace
  {
    /Met\.? Encore:\s*(.*)/    => 'Met Opera: ',
    /Met\.? Opera:\s*(.*)/     => 'Met Opera: ',
    /National Theatre:\s*(.*)/ => 'NT Live: ',
    /NT Encore:\s*(.*)/        => 'NT Live: ',
    /NT Live:\s*(.*)/          => 'NT Live: ',
    /ROH\.? Live:\s*(.*)/      => 'Royal Opera House: ',
    /ROH\.? Encore:\s*(.*)/    => 'Royal Opera House: ',
    /RSC\.? Live:\s*(.*)/      => 'Royal Shakespeare Company: ',
    /RSC\.? Encore:\s*(.*)/    => 'Royal Shakespeare Company: ',
    /(.*) \(Bolshoi Ballet\)/  => 'Bolshoi Ballet: '
  }
end