IMDb+ Ron (RoChess) Combination scraper, using the best data from IMDb and RottenTomatoes. 314159265 MovieDetailsFetcher|MovieCoverFetcher various
]*>)(?:[\r\n\t\s\v]*(?:|||]+>))*[\r\n\t\s\v]* ]]> IMDb\s*>[^<]+(?[^\(]+?)\s*\((?\d{4})[^)]*\) ]]> Handlung:[^>]+>\s+?(?[^<]*)(?: \|)?\s+? Sprache:[^>]+>(?.+?)(?:\s\|[^<]+)?< ]]> Trama:[^>]+>\s+?(?[^<]*)(?: \|)?\s+? Idioma:[^>]+>(?.+?)(?:\s\|[^<]+)?< ]]> (?[^<]+)[^<]+(?\d{4}) ]]> (?[^<]+) ]]> Trama:[^>]+>\s+?(?[^<]*)(?: \|)?\s+? Lingua:[^>]+>(?.+?)(?:\s\|[^<]+)?< ]]> Argumento:[^>]+>\s+?(?[^<]*)(?: \|)?\s+? L\&\#xED;ngua:[^>]+>(?.+?)(?:\s\|[^<]+)?< ]]> (?[^<]+).+?Utgivningsår:(?\d{4}) ]]> \s*]+>

[^<]+

\s*(?[^<]+)\s* ]]>
(?[^<]+).*(?\d{4})\-\d+\-\d+ ]]> (?[^<]+) ]]> (?[^<]+) ]]> (?[^<]+)[^<]+(?\d{4}) ]]> (?[^<]+) ]]> [^<]*(?.*?)|]+>]+>(?.*?)) ]]> ]*>(?:]+>)?\s*(?[^<]+)(?:)?
]]>
[^<]*(?.*?)|]+>]+>(?.*?)) ]]> ]*>(?:]+>)?\s*(?[^<]+)(?:)? ]]> ]+>(?[^<]+)|[^<]+[^"]+")) ]]> (?:')?(?[^(]+(?:\(\D+\))?)(?:')?\s+\(\D*(?\d{4})[^)]*\)[^<]* ]]> [^(]+(?:\(\D+\))?)(?:')?\s+\(\D*(?\d{4})[^)]*\)[^"]*"\s*/> ]]> Release Date:\s*(?(?:\d{1,2}\s*)?[^\s]+\s*\d{4})|]+>Release Date]+>]+>]+>]+>(?[^,]+, \d{4}) ]]> Production Co:\s*(?(?:[,]*\s*]+>\s*)?(?:[^<]*]+href="[^"]+"[^>]*>(?:]+>)?[^<]+(?:|)))*|Production Comp[^<]+]+>]+>(?.*?) ]]> ]+href="[^"]+"[^>]*>(?:]+>)?\s*(?[^<]+)(?:)? ]]> (?.+?)|]+>]+>(?.*?)) ]]> ]*\s*>\s*(?.+?) ]]> ]+>Runtime:\s*]+>(?\d+) min\s*|(?\d+)h\s*(?\d+)min\s* [\s\f\r]*|]+>]+>]+>]+>]+>)(?[^<]+) ]]> ]+>Plot Keywords:\s*(?.+?)(?: ||See more|)|"keywords":"(?[^"]+)" ]]> ]+>Language:\s*(?.+?)(?: ||See more|)|Language]+>]+>(?]+>]+>[^<]+){1,} ]]>
\s*USA:|United States:|Rated\s*)(?(?:Not Rated)|(?:Unrated)|(?:G)|(?:PG)|(?:PG-13)|(?:R)|(?:X)|(?:NC-17))(?:|\s*for)||"ContentRating":\s*"(?[^"]+)"|Certificate:\s*(?[^<]+) ]]> (?:U)|(?:PG)|(?:12)|(?:12A)|(?:15)|(?:18)|(?:R18)) ]]> (?:U)|(?:PG)|(?:12)|(?:12A)|(?:15)|(?:18)|(?:R18))(?! \(original rating\)) ]]> (?:G)|(?:PG)|(?:AA)|(?:14A)|(?:18A)|(?:R)) \([^(]*Alberta[^(]*\) ]]> (?:G)|(?:PG)|(?:14A)|(?:18A)|(?:R)) \([^(]*British Columbia[^(]*\) ]]> (?:G)|(?:PG)|(?:14A)|(?:18A)|(?:R)) \([^(]*Manitoba[^(]*\) ]]> (?:G)|(?:PG)|(?:1[48][A]?)|(?:R)|(?:A)) \([^(]*Nova Scotia[^(]*\) ]]> (?:G)|(?:PG)|(?:AA)|(?:14A)|(?:18A)|(?:R)) \([^(]*Ontario[^(]*\) ]]> (?:G)|(?:13[+]?)|(?:16[+]?)|(?:18[+]?)) \([^(]*Quebec[^(]*\) ]]> (?:E)|(?:G)|(?:PG)|(?:M)|(?:MA15[+]?)|(?:R18[+]?)|(?:X18[+]?)|(?:RC))(?! \(original rating\)) ]]> (?:G)|(?:PG)|(?:M)|(?:[RP]*1[3568])|(?:R))(?! \(original rating\)) ]]> ]+>Summaries\s*]+>\s*]+>\s*

(?.+?)\s*Storyline\s*]*>\s*

\s*]+>\s*(?.+?) ]]> \s*(?(?!\s*]+>Add a Plot).+?)(?:\s*]+>See full [^<]+(?:\w+)*)?\s*

|
]*>(?:\s*[^>]+>\s*is a movie(?:[^,]+,){1,2}[^.]+.)?\s*(?(?!\s*]+>Add a Plot).+?)(?:\s*]+>See full [^<]+[^<]*)?\s*]+>Summaries\s*]+>\s*]+>\s*

(?.+?)

|"description":"(?[^"]+)" ]]> \s*(?(?!\s*]+>Add a Plot).+?)(?:\s*]+>See full [^<]+(?:\w+)*)?\s*

|"description":"(?[^"]+)" ]]>
\d+(?:.\d+)?) based on (?[^\s]+) user ratings"(?:.+?
\s*(?\d+).+?>(?[^\s]+) crit[^<]+)?|"aggregateRating":\{"@type":"AggregateRating","ratingCount":(?\d+),"bestRating":\d+,"worstRating":\d+,"ratingValue":(?\d+(?:.\d+)?)}.+?]+>(?\d+)]+>Critic reviews]+>]+>]+>]+>]+>(?\d+)]+>Metascore ]]> ]+>(?\d{1,3}?).+?
]+>(?:(?\d{1,3}?)|No Score Yet...).+?
]+>(?\d{1,3}?)[%]*.+?)?
]+>\s*(?[^<]+?)\s*(?:\(C\)[^<]+)?\s*< ]]> ]+>\s*(?\d+) hr. (?\d+) min.\s* ]]> (?:G|PG-13|PG|R|NR|NC-17))" ]]> User Reviews.+?
\s*

(?.+?)

\s*
]]>
]+>\s+?]+>([^<]+)\s+?]+>([^<]+)?\s+? ]]> ]+>Also Known As \(AKA\)[^<]+.*?]+>[^<]*USA \(new title\)[^<]*\s*]+>(?[^<]+).*? ]]> (?:')?(?[^\(]+?)(?:')?\s*\(\D*(?\d{4})[^)]*\).*?.+?]+>Also Known As \(AKA\)[^<]+.*USA\s*]+>(?[^<]+).*? ]]> (?:')?(?[^\(]+?)(?:')?\s*\(\D*(?\d{4})[^)]*\).*?.+?]+>Also Known As \(AKA\)[^<]+.*]+>UK\s*(?[^<]+).*? ]]> (?:')?(?[^\(]+?)(?:')?\s*\(\D*(?\d{4})[^)]*\).*?.+?]+>Also Known As \(AKA\)[^<]+.*]+>World-wide \(English title\)\s*]+>(?[^<]+).*? ]]> (?:')?(?[^\(]+?)(?:')?\s*\(\D*(?\d{4})[^)]*\).*?.+?]+>Also Known As \(AKA\)[^<]+.+?]+>Hong Kong[^<]*English[^<]*\s*]+>(?[^<]+).*? ]]> (?:')?(?[^\(]+?)(?:')?\s*\(\D*(?\d{4})[^)]*\).*?.+?]+>Also Known As \(AKA\)[^<]+.+?(?!.*?(?:Japan|Israel|Germany|Philippines|working title|short title|alternative spelling|promotional title|promotional abbreviation|informal title|teaser title|review title|IMAX))[^<]*English[^<]*\s*(?[^<]+).*? ]]> (?:')?(?[^\(]+?)(?:')?\s*\(\D*(?\d{4})[^)]*\).*?.+?]+>Also Known As \(AKA\)[^<]+.*]+>USA\s*]+>(?[^<]+).*? ]]> (?:')?(?[^\(]+?)(?:')?\s*\(\D*(?\d{4})[^)]*\).*?.+?]+>Also Known As \(AKA\)[^<]+\s*]+>.+]+>\s*\(original title\)\s*]+>(?[^<]+) ]]> (?:')?(?[^\(]+?)(?:')?\s*\(\D*(?\d{4})[^)]*\).*?.+?generic.monitoring.set_twilight_info\(\s*"title[^"]+",\s*"US",.+?]+>Also Known As \(AKA\)[^<]+.+?]+>\s*]+>(?[^<]+).*? ]]> (?:')?(?[^\(]+?)(?:')?\s*\(\D*(?\d{4})[^)]*\).*?.*? .+ [^"]+)")?(?:[\t\s]*foreign="(?[^"]+)")?(?:[\t\s]*sortby="(?[^"]+)")?(?:[\t\s]*collection="(?[^"]+)")?[\t\s]*/> ]]> .+ [^"]+)")?(?:[\t\s]*sortby="(?[^"]+)")?(?:[\t\s]*collection="(?[^"]+)")?[\t\s]*/> ]]> (?:IMDb[\s-]*)?(?:')?(.+?)(?:')?\s*\(\D*(\d{4})[^)]*\).*?.+?(tt\d+)/fullcredits ]]> (?:')?.+?(?:')?\s*\(\D*\d{4}[^)]*\).*?.+ ]]> No results found for [^<]+ ]]> ]*>\s+?([^<]+)\s+?([^<]+)\s* ]]> ]*>.+]*> ]]> ]+>(?:\s*\s*]+>]+>\s*)?\s*\s*tt\d+)[^"]+"\s*>(?[^<]+)</a>(?:\s*\([\/IVX]*\))?\s*\((?<year>\d{4})[^)]*\)(?:(?:\s*<br/>aka\s*<i>")(?<aka>[^"]+)(?:"</i>\s*))?[^<]+</td>\s*</tr> ]]> </set> <!-- process the search page and obtain all the popular results --> <loop name="search_results_verified" on="search_results_block" limit="99"> <parse name="movie_details" input="${search_results_verified}" regex="${rx_search_results}" /> <loop name="curr_details" on="movie_details" limit="99"> <add name="counter" value1="${count}" value2="${offset}" /> <set name="movie[${counter}].imdb_id" value="${curr_details[0]:htmldecode}" /> <set name="movie[${counter}].site_id" value="${curr_details[0]:htmldecode}" /> <set name="movie[${counter}].title" value="${curr_details[1]:htmldecode}" /> <set name="movie[${counter}].year" value="${curr_details[2]:htmldecode}" /> <set name="movie[${counter}].alternate_titles" value="${curr_details[3]:htmldecode}" /> <subtract name="movie[${counter}].popularity" value1="100" value2="${counter}" /> </loop> </loop> <!-- Add AKA results to the first match if none exists, or when this is a non-USA search --> <if test="${movie[0].alternate_titles}="><set name="force_aka_to_first_match" value="true" /></if> <if test="${movie[0].alternate_titles}!="> <set name="rx_import_country"> <![CDATA[ "main",\s*"([A-Z]+)", ]]> </set> <parse name="import_country" input="${search_page}" regex="${rx_import_country}" /> <if test="${import_country[0][0]}!=US"> <set name="force_aka_to_first_match" value="true" /> <log LogLevel="Info" Message="IMDb+ Scraper: Non-US country code detected '${import_country[0][0]}'" /> </if> </if> <if test="${force_aka_to_first_match}=true"> <retrieve name="aka_page" url="https://www.imdb.com/title/${movie[0].imdb_id}/releaseinfo" encoding="UTF-8" /> <parse name="akas" input="${aka_page}" regex="${rx_aka_details}" /> <if test="${import_country[0][0]}!=US"> <if test="${akas[0][0]}=(original title)"> <set name="movie[0].alternate_titles" value="${akas[0][1]:htmldecode}" /> </if> </if> <if test="${movie[0].alternate_titles}="> <set name="movie[0].alternate_titles" value="|" /> <loop name="currAka" on="akas" limit="50"> <set name="movie[0].alternate_titles" value="${movie[0].alternate_titles}${currAka[1]:htmldecode}|" /> </loop> </if> </if> </if> </if> <!-- Auto-approve workaround for foreign-users (temporary, will remove once new MovPic gets released) --> <if test="${search.title}!=${movie[0].title}"> <if test="${search.title}=${movie[0].alternate_titles}"> <set name="movie[0].title" value="${search.title}" /> <if test="${search.year}=${movie[0].year}"> <log LogLevel="Info" Message="IMDb+ Scraper: Forcing title as '${search.title}' with match on '${search.year}' to make auto-approve work for foreign users" /> </if> <if test="${search.year}!=${movie[0].year}"> <log LogLevel="Info" Message="IMDb+ Scraper: Forcing title as '${search.title}' with year *NOT* matching to make auto-approve work for foreign users" /> </if> </if> </if> </if> <!-- Using unused 'details_url' to pass special edition strings onto details node --> <parse name="special_edition" input="${search.filename_noext}" regex="${rx_special_editions}" /> <if test="${special_edition[0][0]}!="> <set name="movie[0].details_url" value="###${special_edition}###" /> </if> <if test="${special_edition[0][0]}="> <!-- Filename came up empty; lets try foldername before giving up --> <parse name="special_edition" input="${search.foldername}" regex="${rx_special_editions}" /> <if test="${special_edition[0][0]}!="> <set name="movie[0].details_url" value="###${special_edition}###" /> </if> </if> <log LogLevel="Info" Message="########################## IMDb+ Search Results ###########################" /> <log LogLevel="Info" Message="# Search Folder = ${search.foldername}" /> <log LogLevel="Info" Message="# Search File = ${search.filename}" /> <log LogLevel="Info" Message="# Search IMDb ID = ${search.imdb_id}" /> <log LogLevel="Info" Message="# Search Title = ${search.title} (${search.year})" /> <log LogLevel="Info" Message="#" /> <log LogLevel="Info" Message="# IMDb+ IMDb ID = ${movie[0].imdb_id} (Only showing first movie found)" /> <log LogLevel="Info" Message="# IMDb+ Title = ${movie[0].title} (${movie[0].year})" /> <log LogLevel="Info" Message="# IMDb+ Special = ${special_edition}" /> </if> <if test="${skip_cduniverse[0]}!="> <log LogLevel="Info" Message="# Skipping CDUniverse file = ${search.filename}" /> </if> <log LogLevel="Info" Message="###########################################################################" /> </action> <!-- Covers Node --> <action name="get_cover_art"> <!-- if the site id is not set try to set it using the imdb id --> <if test="${movie.site_id}="> <if test="${movie.imdb_id}!="> <set name="movie.site_id" value="${movie.imdb_id}" /> </if> </if> <!-- if we have a site id (imdb id) we can continue --> <if test="${movie.site_id}!="> <log LogLevel="Info" Message="############################ IMDb+ Cover Node ############################" /> <log LogLevel="Info" Message="# IMDb+ Title = ${movie.title} (${movie.year}) [(${movie.site_id})]" /> <!-- Retrieve details --> <retrieve name="details_page" url="https://www.imdb.com/title/${movie.site_id}/" encoding="UTF-8" /> <!-- Get cover directly by manipulating details page thumbnail --> <set name="rx_cover"> <![CDATA[ content="(http[^_]+_V1)(?:\._CR[^_]+)?[^.]+([^"]+)" ]]> </set> <parse name="cover_src" input="${details_page}" regex="${rx_cover}" /> <!-- set cover --> <set name="cover_art[0].url" value="${cover_src[0][0]}${cover_src[0][1]}" /> <log LogLevel="Info" Message="###########################################################################" /> </if> </action> </ScriptableScraper>