#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from collections import OrderedDict
import json
import pickle
import re
import sys
import time
import xbmcgui
import xbmcplugin
from .singleton import Singleton
from .network import getURL, getURLData, MechanizeLogin
from .logging import Log
from .itemlisting import setContentAndView
from .l10n import *
from .users import *
from .playback import PlayVideo
class PrimeVideo(Singleton):
""" Wrangler of all things PrimeVideo.com """
_catalog = {} # Catalog cache
_videodata = {'urn2gti': {}} # Video data cache
_catalogCache = None # Catalog cache file name
_videodataCache = None # Video data cache file name
_separator = '/' # Virtual path separator
def __init__(self, globalsInstance, settingsInstance):
self._g = globalsInstance
self._s = settingsInstance
self._dateParserData = {
""" Data for date string deconstruction and reassembly
Date references:
https://www.primevideo.com/detail/0LCQSTWDMN9V770DG2DKXY3GVF/ 09 10 11 12 01 02 03 04 05
https://www.primevideo.com/detail/0ND5POOAYD6A4THTH7C1TD3TYE/ 06 07 08 09
"""
'da_DK': {'deconstruct': r'^([0-9]+)\.?\s+([^\s]+)\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'januar': 1, 'februar': 2, 'marts': 3, 'april': 4, 'maj': 5, 'juni': 6, 'juli': 7, 'august': 8, 'september': 9, 'oktober': 10,
'november': 11, 'december': 12}},
'de_DE': {'deconstruct': r'^([0-9]+)\.?\s+([^\s]+)\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'januar': 1, 'februar': 2, 'märz': 3, 'april': 4, 'mai': 5, 'juni': 6, 'juli': 7, 'august': 8, 'september': 9, 'oktober': 10,
'november': 11, 'dezember': 12}},
'en_US': {'deconstruct': r'^([^\s]+)\s+([0-9]+),?\s+([0-9]+)', 'reassemble': '{2}-{0:0>2}-{1:0>2}', 'month': 0,
'months': {'january': 1, 'february': 2, 'march': 3, 'april': 4, 'may': 5, 'june': 6, 'july': 7, 'august': 8, 'september': 9, 'october': 10,
'november': 11, 'december': 12}},
'es_ES': {'deconstruct': r'^([0-9]+)\s+de\s+([^\s]+),?\s+de\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'enero': 1, 'febrero': 2, 'marzo': 3, 'abril': 4, 'mayo': 5, 'junio': 6, 'julio': 7, 'agosto': 8, 'septiembre': 9, 'octubre': 10,
'noviembre': 11, 'diciembre': 12}},
'fr_FR': {'deconstruct': r'^([0-9]+)\s+([^\s]+)\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'janvier': 1, 'février': 2, 'mars': 3, 'avril': 4, 'mai': 5, 'juin': 6, 'juillet': 7, 'aout': 8, 'août': 8, 'septembre': 9,
'octobre': 10, 'novembre': 11, 'décembre': 12}},
'hi_IN': {'deconstruct': r'^([0-9]+)\s+([^\s]+)\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'जनवरी': 1, 'फ़रवरी': 2, 'मार्च': 3, 'अप्रैल': 4, 'मई': 5, 'जून': 6, 'जुलाई': 7, 'अगस्त': 8, 'सितंबर': 9, 'अक्तूबर': 10,
'नवंबर': 11, 'दिसंबर': 12}},
'it_IT': {'deconstruct': r'^([0-9]+)\s+([^\s]+)\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'gennaio': 1, 'febbraio': 2, 'marzo': 3, 'aprile': 4, 'maggio': 5, 'giugno': 6, 'luglio': 7, 'agosto': 8, 'settembre': 9,
'ottobre': 10, 'novembre': 11, 'dicembre': 12}},
'nb_NO': {'deconstruct': r'^([0-9]+)\.?\s+([^\s]+)\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'januar': 1, 'februar': 2, 'mars': 3, 'april': 4, 'mai': 5, 'juni': 6, 'juli': 7, 'august': 8, 'september': 9, 'oktober': 10,
'november': 11, 'desember': 12}},
'nl_NL': {'deconstruct': r'^([0-9]+)\s+([^\s]+)\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'januari': 1, 'februari': 2, 'maart': 3, 'april': 4, 'mei': 5, 'juni': 6, 'juli': 7, 'augustus': 8, 'september': 9,
'oktober': 10, 'november': 11, 'december': 12}},
'pl_PL': {'deconstruct': r'^([0-9]+)\s+([^\s]+)\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'stycznia': 1, 'lutego': 2, 'marca': 3, 'kwietnia': 4, 'maja': 5, 'czerwca': 6, 'lipca': 7, 'sierpnia': 8, 'września': 9,
'października': 10, 'listopada': 11, 'grudnia': 12}},
'pt_BR': {'deconstruct': r'^([0-9]+)\s+de\s+([^\s]+),?\s+de\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'janeiro': 1, 'fevereiro': 2, 'março': 3, 'abril': 4, 'maio': 5, 'junho': 6, 'julho': 7, 'agosto': 8, 'setembro': 9, 'outubro': 10,
'novembro': 11, 'dezembro': 12}},
'sv_SE': {'deconstruct': r'^([0-9]+)\s+([^\s]+)\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'januari': 1, 'februari': 2, 'mars': 3, 'april': 4, 'maj': 5, 'juni': 6, 'juli': 7, 'augusti': 8, 'september': 9, 'oktober': 10,
'november': 11, 'december': 12}},
'ta_IN': {'deconstruct': r'^([0-9]+)\s+([^\s]+),?\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'ஜனவரி': 1, 'பிப்ரவரி': 2, 'மார்ச்': 3, 'ஏப்ரல்': 4, 'மே': 5, 'ஜூன்': 6, 'ஜூலை': 7, 'ஆகஸ்ட்': 8, 'செப்டம்பர்': 9,
'அக்டோபர்': 10, 'நவம்பர்': 11, 'டிசம்பர்': 12}},
'te_IN': {'deconstruct': r'^([0-9]+)\s+([^\s]+),?\s+([0-9]+)', 'reassemble': '{2}-{1:0>2}-{0:0>2}', 'month': 1,
'months': {'జనవరి': 1, 'ఫిబ్రవరి': 2, 'మార్చి': 3, 'ఏప్రిల్': 4, 'మే': 5, 'జూన్': 6, 'జులై': 7, 'ఆగస్టు': 8, 'సెప్టెంబర్': 9, 'అక్టోబర్': 10,
'నవంబర్': 11, 'డిసెంబర్': 12}},
}
self._LoadCache()
def _Flush(self, FlushVideoData=False):
""" Cache catalog and video data """
with open(self._catalogCache, 'w+') as fp:
pickle.dump(self._catalog, fp)
if FlushVideoData:
with open(self._videodataCache, 'w+') as fp:
json.dump(self._videodata, fp)
def _LoadCache(self):
""" Load cached catalog and video data """
from os.path import join as OSPJoin
from xbmcvfs import exists, delete
self._catalogCache = OSPJoin(self._g.DATA_PATH, 'PVCatalog{}.pvcp'.format(self._g.MarketID))
self._videodataCache = OSPJoin(self._g.DATA_PATH, 'PVVideoData{}.pvdp'.format(self._g.MarketID))
if exists(self._videodataCache):
try:
with open(self._videodataCache, 'r') as fp:
data = json.load(fp)
if 'urn2gti' not in data:
raise Exception('Old, unsafe cache data')
self._videodata = data
except:
Log('Removing corrupted cache file “%s”' % self._videodataCache, Log.DEBUG)
delete(self._videodataCache)
self._g.dialog.notification('Corrupted video cache', 'Unable to load the video cache data', xbmcgui.NOTIFICATION_ERROR)
if exists(self._catalogCache):
try:
with open(self._catalogCache, 'r') as fp:
cached = pickle.load(fp)
if time.time() < cached['expiration']:
self._catalog = cached
except:
Log('Removing corrupted cache file “%s”' % self._catalogCache, Log.DEBUG)
delete(self._catalogCache)
self._g.dialog.notification('Corrupted catalog cache', 'Unable to load the catalog cache data', xbmcgui.NOTIFICATION_ERROR)
def _BeautifyText(self, title):
""" Correct stylistic errors in Amazon's titles """
for t in [(r'\s+-\s*([^&])', r' – \1'), # Convert dash from small to medium where needed
(r'\s*-\s+([^&])', r' – \1'), # Convert dash from small to medium where needed
(r'^\s+', ''), # Remove leading spaces
(r'\s+$', ''), # Remove trailing spaces
(r' {2,}', ' '), # Remove double spacing
(r'\.\.\.', '…')]: # Replace triple dots with ellipsis
title = re.sub(t[0], t[1], title)
return title
def _FQify(self, URL):
""" Makes sure to provide correct fully qualified URLs """
base = self._g.BaseUrl
if '://' in URL: # FQ
return URL
elif URL.startswith('//'): # Specified domain, same schema
return base.split(':')[0] + ':' + URL
elif URL.startswith('/'): # Relative URL
return base + URL
else: # Hope and pray we never reach this ¯\_(ツ)_/¯
return base + '/' + URL
def _GrabJSON(self, url, bRaw=False):
""" Extract JSON objects from HTMLs while keeping the API ones intact """
def Unescape(text):
""" Unescape various html/xml entities in dictionary values, courtesy of Fredrik Lundh """
def fixup(m):
""" Unescape entities except for double quotes, lest the JSON breaks """
import htmlentitydefs
text = m.group(0)
if text[:2] == "":
# character reference
try:
if text[:3] == "":
char = int(text[3:-1], 16)
else:
char = int(text[2:-1])
return unichr(char) if 34 != char else '\\"'
except ValueError:
pass
else:
# named entity
try:
char = text[1:-1]
text = unichr(htmlentitydefs.name2codepoint[char]) if 'quot' != char else '\\"'
except KeyError:
pass
return text # leave as is
text = re.sub("?\w+;", fixup, text)
try:
text = text.encode('latin-1').decode('utf-8')
except (UnicodeEncodeError, UnicodeDecodeError):
pass
return text
def Merge(o, n):
""" Merge JSON objects with multiple multi-level collisions """
if (not n) or (o == n): # Nothing to do
return
elif (type(n) == list) or (type(n) == set): # Insert into list/set
for item in n:
if item not in o:
if type(n) == list:
o.append(item)
else:
o.add(item)
elif type(n) == dict:
for k in n.keys():
if k not in o:
o[k] = n[k] # Insert into dictionary
else:
Merge(o[k], n[k]) # Recurse
else:
Log('Collision detected during JSON objects merging, overwriting and praying', Log.WARNING)
o = n
def Prune(d):
""" Prune some commonly found sensitive info from JSON response bodies """
if not d:
return
l = d
if isinstance(l, dict):
for k in l.keys():
if k == 'strings':
l[k] = {s: l[k][s] for s in ['AVOD_DP_season_selector'] if s in l[k]}
if (not l[k]) or (k in ['context', 'params', 'playerConfig', 'refine']):
del l[k]
l = d.values()
for v in l:
if isinstance(v, dict) or isinstance(v, list):
Prune(v)
try:
from urlparse import urlparse, parse_qs
from urllib import urlencode
except:
from urllib.parse import urlparse, parse_qs, urlencode
if url.startswith('/search/'):
np = urlparse(url)
qs = parse_qs(np.query)
if 'from' in qs.keys():
qs['startIndex'] = qs['from']
del qs['from']
np = np._replace(path='/gp/video/api' + np.path, query=urlencode([(k, v) for k, l in qs.items() for v in l]))
url = np.geturl()
r = getURL(self._FQify(url), silent=True, useCookie=True, rjson=False)
if not r:
return None
try:
r = r.strip()
if '{' == r[0:1]:
o = json.loads(Unescape(r))
if not bRaw:
Prune(o)
return o
except:
pass
matches = re.findall(r'\s*(?:)\s*', r)
if not matches:
Log('No JSON objects found in the page', Log.ERROR)
return None
# Create a single object containing all the data from the multiple JSON objects in the page
o = {}
for m in matches:
m = json.loads(Unescape(m))
if 'props' not in m:
m = m['widgets']['Storefront']
else:
m = m['props']
if not bRaw:
# Prune useless/sensitive info
for k in m.keys():
if (not m[k]) or (k in ['copyright', 'links', 'logo', 'params', 'playerConfig', 'refine']):
del m[k]
if 'state' in m:
st = m['state']
for k in st.keys():
if not st[k]:
del st[k]
elif k in ['features', 'customerPreferences']:
del st[k]
# Prune sensitive context info and merge into o
if not bRaw:
Prune(m)
Merge(o, m)
return o if o else None
def _TraverseCatalog(self, path, bRefresh=False):
""" Extract current node, grandparent node and their names """
from urllib import unquote_plus
# Fix the unquote_plus problem with unicode_literals by encoding to latin-1 (byte string) and then decoding
pathList = [unquote_plus(p).encode('latin-1').decode('utf-8') for p in path.split(self._separator)]
if 0 == len(self._catalog):
self.BuildRoot()
# Traverse
node = self._catalog
pathLen = len(pathList)
for i in range(0, pathLen):
nodeName = pathList[i]
# Stop one short while refreshing, due to python mutability reasons
if bRefresh and (i == (pathLen - 1)):
break
if nodeName not in node:
self._g.dialog.notification('Catalog error', 'Catalog path not available…', xbmcgui.NOTIFICATION_ERROR)
return (None, None)
elif 'lazyLoadURL' in node[nodeName]:
self._LazyLoad(node[nodeName], pathList[0:1 + i])
node = node[nodeName]
return (node, pathList)
def BrowseRoot(self):
""" Build and load the root PrimeVideo menu """
if 0 == len(self._catalog):
''' Build the root catalog '''
if not self.BuildRoot():
return
self.Browse('root')
def BuildRoot(self):
""" Parse the top menu on primevideo.com and build the root catalog """
home = self._GrabJSON(self._g.BaseUrl)
if not home:
return False
self._catalog['root'] = OrderedDict()
# Insert the watchlist
try:
watchlist = next((x for x in home['yourAccount']['links'] if '/watchlist/' in x['href']), None)
self._catalog['root']['Watchlist'] = {'title': self._BeautifyText(watchlist['text']), 'lazyLoadURL': watchlist['href']}
except:
Log('Watchlist link not found', Log.ERROR)
# Insert the main sections, in order
try:
for link in home['mainMenu']['links']:
self._catalog['root'][link['text']] = {'title': self._BeautifyText(link['text']), 'lazyLoadURL': link['href']}
if '/home/' in link['href']:
self._catalog['root'][link['text']]['lazyLoadData'] = home
except:
self._g.dialog.notification('PrimeVideo error', 'Unable to find the navigation menu for primevideo.com', xbmcgui.NOTIFICATION_ERROR)
Log('Unable to parse the navigation menu for primevideo.com', Log.ERROR)
return False
# Insert the searching mechanism
try:
sfa = home['searchBar']['searchFormAction']
# Build the query parametrization
query = ''
if 'query' in sfa:
query += '&'.join(['{}={}'.format(k, v) for k, v in sfa['query'].items()])
query = query if not query else query + '&'
self._catalog['root']['Search'] = {
'title': self._BeautifyText(home['searchBar']['searchFormPlaceholder']),
'verb': 'pv/search/',
'endpoint': '{}?{}phrase={{}}'.format(sfa['partialURL'], query)
}
except:
Log('Search functionality not found', Log.ERROR)
# Set the expiration in 11 hours and flush to disk
self._catalog['expiration'] = 39600 + int(time.time())
self._Flush()
return True
def Browse(self, path, forceSort=None):
""" Display and navigate the menu for PrimeVideo users """
# Add multiuser menu if needed
if (self._s.multiuser) and ('root' == path) and (1 < len(loadUsers())):
li = xbmcgui.ListItem(getString(30134).format(loadUser('name')))
li.addContextMenuItems(self._g.CONTEXTMENU_MULTIUSER)
xbmcplugin.addDirectoryItem(self._g.pluginhandle, '{}pv/browse/root{}SwitchUser'.format(self._g.pluginid, self._separator), li, isFolder=False)
if ('root' + self._separator + 'SwitchUser') == path:
if switchUser():
self.BuildRoot()
return
from urllib import quote_plus
node, breadcrumb = self._TraverseCatalog(path)
if None is node:
return
# Populate children list with empty references
nodeName = breadcrumb[-1]
if (nodeName in self._videodata) and ('children' in self._videodata[nodeName]):
for c in self._videodata[nodeName]['children']:
if c not in node:
node[c] = {}
folderType = 0 if 'root' == path else 1
metaKeys = ['metadata', 'ref', 'title', 'verb', 'children', 'parent']
# Exclude me and my sibilings
if (nodeName in self._videodata) and ('siblings' in self._videodata[nodeName]) and (0 self._videodata[child]['metadata']['videometa']['season']):
sn = self._videodata[child]['metadata']['videometa']['season']
snid = child
if None is not snid:
entry['metadata'] = {'artmeta': self._videodata[snid]['metadata']['artmeta'], 'videometa': {'mediatype': 'tvshow'}}
"""
if 'metadata' in entry:
m = entry['metadata']
if 'artmeta' in m:
item.setArt(m['artmeta'])
if 'videometa' in m:
# episode must contain season number and episode number
if 'episode' == m['videometa']['mediatype']:
if 'season' not in m['videometa']:
m['videometa']['season'] = self._videodata[nodeName]['metadata']['videometa']['season']
if 'episode' not in m['videometa']:
m['videometa']['episode'] = episodeExtraNum
episodeExtraNum += 1
# https://codedocs.xyz/xbmc/xbmc/group__python__xbmcgui__listitem.html#ga0b71166869bda87ad744942888fb5f14
item.setInfo('video', m['videometa'])
try:
folderType = {'movie': 5, 'episode': 4, 'tvshow': 2, 'season': 3}[m['videometa']['mediatype']]
except:
folderType = 5 # Default to movie
if bIsVideo:
folder = False
item.setProperty('IsPlayable', 'true')
item.setInfo('video', {'title': title})
if 'runtime' in m:
item.setInfo('video', {'duration': m['runtime']})
item.addStreamInfo('video', {'duration': m['runtime']})
# If it's a video leaf without an actual video, something went wrong with Amazon servers, just hide it
if (not folder) or (4 > folderType):
xbmcplugin.addDirectoryItem(self._g.pluginhandle, url, item, isFolder=folder)
del item
# Set sort method and view
# https://codedocs.xyz/xbmc/xbmc/group__python__xbmcplugin.html#ga85b3bff796fd644fb28f87b136025f40
xbmcplugin.addSortMethod(self._g.pluginhandle, [
xbmcplugin.SORT_METHOD_NONE,
xbmcplugin.SORT_METHOD_LABEL_IGNORE_THE,
xbmcplugin.SORT_METHOD_LABEL_IGNORE_THE,
xbmcplugin.SORT_METHOD_EPISODE,
xbmcplugin.SORT_METHOD_EPISODE,
xbmcplugin.SORT_METHOD_LABEL_IGNORE_THE,
][folderType if None is forceSort else forceSort])
if 'false' == self._g.addon.getSetting("viewenable"):
# Only vfs and videos to keep Kodi's watched functionalities
folderType = 0 if 2 > folderType else 1
# else:
# # Actual views, set the main categories as vfs
# folderType = 0 if 2 > folderType else 2
setContentAndView([None, 'videos', 'series', 'season', 'episode', 'movie'][folderType])
def Search(self):
""" Provide search functionality for PrimeVideo """
searchString = self._g.dialog.input(getString(24121)).strip(' \t\n\r')
if 0 == len(searchString):
xbmcplugin.endOfDirectory(self._g.pluginhandle, succeeded=False)
return
Log('Searching "{}"…'.format(searchString), Log.INFO)
self._catalog['search'] = OrderedDict([('lazyLoadURL', self._catalog['root']['Search']['endpoint'].format(searchString))])
self.Browse('search', xbmcplugin.SORT_METHOD_NONE)
def Refresh(self, path):
""" Provides cache refresh functionality """
refreshes = []
node, breadcrumb = self._TraverseCatalog(path, True)
if None is node:
return
nodeName = breadcrumb[-1]
# Only refresh if previously loaded. If not loaded, and specifically asked, perform a full (lazy) loading
if 'lazyLoadURL' in node[nodeName]:
refreshes.append((node[nodeName], nodeName, False))
else:
bShow = False
if 'ref' in node[nodeName]: # ref's in the cache already
Log('Refreshing element in the cache: {}'.format(nodeName), Log.DEBUG)
targetURL = node[nodeName]['ref']
elif 'ref' in self._videodata[nodeName]: # Movie or Season
Log('Refreshing element: {}'.format(nodeName), Log.DEBUG)
targetURL = self._videodata[nodeName]['ref']
else: # Show
Log('Refreshing Show: {}'.format(nodeName), Log.DEBUG)
bShow = True
for season in [k for k in self._videodata[nodeName]['children'] if (k in self._videodata) and ('ref' in self._videodata[k])]:
node[nodeName][season] = {'lazyLoadURL': self._videodata[season]['ref']}
refreshes.append((node[nodeName][season], season, True))
if not bShow:
# Reset the basic metadata
title = node[nodeName]['title'] if 'title' in node[nodeName] else None
node[nodeName] = {'lazyLoadURL': targetURL}
if title:
node[nodeName]['title'] = title
refreshes.append((node[nodeName], nodeName, True))
from contextlib import contextmanager
@contextmanager
def _busy_dialog():
xbmc.executebuiltin('ActivateWindow(busydialognocancel)')
try:
yield
finally:
xbmc.executebuiltin('Dialog.Close(busydialognocancel)')
with _busy_dialog():
for r in refreshes:
self._LazyLoad(r[0], r[1], r[2])
def Action(self, path, parm):
""" Provides actions functionality """
r = getURL(self._FQify(path), silent=True, useCookie=True, rjson=False, postdata=parm)
def _LazyLoad(self, obj, breadcrumb=None, bCacheRefresh=False):
""" Loader and parser of all the PrimeVideo.com queries """
def MaxSize(imgUrl):
""" Strip the dynamic resize triggers from the URL (and other effects, such as blur) """
return re.sub(r'\._.*_\.', '.', imgUrl)
def ExtractURN(url):
""" Extract the unique resource name identifier """
ret = re.search(r'(?:/gp/video)?/d(?:p|etail)/([^/]+)/', url)
return None if not ret else ret.group(1)
def DelocalizeDate(lang, datestr):
""" Convert language based timestamps into YYYY-MM-DD """
if lang not in self._dateParserData:
Log('Unable to decode date "{}": language "{}" not supported'.format(datestr, lang), Log.WARNING)
return datestr
p = re.search(self._dateParserData[lang]['deconstruct'], datestr.lower())
if None is p:
Log('Unable to parse date "{}" with language "{}"{}'.format(datestr, lang, '' if 'en_US' != lang else ': trying english'), Log.WARNING)
if 'en_US' == lang:
return datestr
# Sometimes Amazon returns english everything, let's try to figure out if this is the case
lang = 'en_US'
p = re.search(self._dateParserData[lang]['deconstruct'], datestr.lower())
if None is p:
Log('Unable to parse date "{}" with language "{}": format changed?'.format(datestr, lang), Log.WARNING)
return datestr
p = list(p.groups())
# TODO: format es: "sep 20, 2019 21:00 CEST"
try:
p[self._dateParserData[lang]['month']] = self._dateParserData[lang]['months'][p[self._dateParserData[lang]['month']]]
except:
pass
return self._dateParserData[lang]['reassemble'].format(p[0], p[1], p[2])
def NotifyUser(msg):
""" Pop up messages while scraping to inform users of progress """
if not hasattr(NotifyUser, 'lastNotification'):
NotifyUser.lastNotification = 0
if NotifyUser.lastNotification < time.time():
# Only update once every other second, to avoid endless message queue
NotifyUser.lastNotification = 1 + time.time()
self._g.dialog.notification(self._g.addon.getAddonInfo('name'), msg, time=1000, sound=False)
def MultiRegexParsing(content, o):
""" Takes a dictionary of regex and applies them to content, returning a filtered dictionary of results """
for i in o:
o[i] = re.search(o[i], content, flags=re.DOTALL)
if None is not o[i]:
o[i] = o[i].groups()
o[i] = Unescape(o[i][0]) if 1 == len(o[i]) else list(o[i])
if 'image' == i:
o[i] = MaxSize(o[i])
elif 'season' == i:
o[i] = {'locale': Unescape(o[i][0]), 'season': int(o[i][1]), 'format': Unescape('{} {}'.format(o[i][0], o[i][1]))}
elif ('episode' == i) or ('year' == i):
o[i] = int(o[i])
elif ('cast' == i) or ('genre' == i) or ('director' == i):
o[i] = re.sub(r'\s*?(a|span|input|label.*?/label)\s*[^>]*>\s*', '', o[i][1]) # Strip everything useless
o[i] = re.split(r'\s*[,;]\s*', o[i])
# Cast is always to be sent as a list, single string is only required/preferred for Genre and Director
if ('cast' != i) and (1 == len(o[i])):
o[i] = o[i][0]
elif 'rating' == i:
o[i] = int(o[i][0]) + (int(o[i][1]) / 10.0)
elif 'premiered' == i:
o[i] = DelocalizeDate(amzLang, o[i])
return o
def ParseSinglePage(o, bCacheRefresh, data=None, url=None):
""" Parse PrimeVideo.com single movie/season pages.
`url` is discarded in favour of `data`, if present.
"""
urn = ExtractURN(url)
# Load from cache, if available
if (not bCacheRefresh) and (urn in self._videodata['urn2gti']) and (self._videodata['urn2gti'][urn] in self._videodata):
gti = self._videodata['urn2gti'][urn]
vd = self._videodata[gti]
# Movie
if 'children' not in vd:
if gti not in o:
o[gti] = vd
return False
# TV Series
siblings = vd['siblings'][:]
siblings.append(gti)
siblings = sorted(siblings, key=(lambda k: self._videodata[k]['metadata']['videometa']['season']))
for gti in siblings:
if gti not in o:
o[gti] = self._videodata[gti]
for c in o[gti]['children']:
if c not in o[gti]:
o[gti][c] = {}
return False
if url:
url = self._FQify(url)
if not data:
if not url:
return False
data = self._GrabJSON(url)
# Maybe Error reason: 404 error
if not data:
return False
# Video/season/movie data are in the `state` field of the response
if 'state' not in data:
return False
state = data['state'] # Video info
GTIs = [] # List of inserted GTIs
parents = {} # Map of parents
bUpdated = False # Video data updated
# Seasons
if 'self' in state:
# "self": {"amzn1.dv.gti.[…]": {"gti": "amzn1.dv.gti.[…]", "link": "/detail/[…]"}}
for gti, s in state['self'].items():
if s['titleType'] != 'season':
continue
if gti not in self._videodata:
o[gti] = {('ref' if state['pageTitleId'] == gti else 'lazyLoadURL'): s['link']}
self._videodata[gti] = {'ref': s['link'], 'children': [], 'siblings': []}
bUpdated = True
else:
o[gti] = self._videodata[gti]
GTIs.append(gti)
siblings = [k for k,ss in state['self'].items() if k != gti and ss['titleType'] == s['titleType']]
if siblings != self._videodata[gti]['siblings']:
self._videodata[gti]['siblings'] = siblings
bUpdated = True
# live streaming storefront
elif 'pageTitleId' in state:
gti = state['pageTitleId']
if gti not in self._videodata:
o[gti] = {'ref': o['ref']}
self._videodata[gti] = {'ref': o['ref'], 'children': [], 'siblings': []}
else:
o[gti] = self._videodata[gti]
GTIs.append(gti)
# Episodes lists
if 'collections' in state:
# "collections": {"amzn1.dv.gti.[…]": [{"titleIds": ["amzn1.dv.gti.[…]", "amzn1.dv.gti.[…]"]}]}
for gti, lc in state['collections'].items():
for le in lc:
for e in le['titleIds']:
o[gti][e] = {}
GTIs.append(e)
# Save parent/children relationships
parents[e] = gti
if e not in self._videodata[gti]['children']:
self._videodata[gti]['children'].append(e)
bUpdated = True
# Video info
if 'detail' not in state:
return bUpdated
if urn not in self._videodata['urn2gti']:
self._videodata['urn2gti'][urn] = state['pageTitleId']
for gti, item in state['detail']['detail'].items():
if gti not in GTIs: # Most likely (surely?) movie
GTIs.append(gti)
o[gti] = {}
if gti not in self._videodata:
self._videodata[gti] = {}
vd = self._videodata[gti]
# Meta prep
if 'metadata' not in vd:
vd['metadata'] = {'compactGTI': urn, 'artmeta': {}, 'videometa': {}}
bUpdate = True
if 'artmeta' not in vd['metadata']:
vd['metadata']['artmeta'] = {}
bUpdate = True
if 'videometa' not in vd['metadata']:
vd['metadata']['videometa'] = {}
bUpdate = True
# Parent
if gti in parents:
vd['parent'] = parents[gti]
bUpdate = True
# Title
if bCacheRefresh or ('title' not in vd):
if 'seasonNumber' not in item:
vd['title'] = self._BeautifyText(item['title'])
bUpdated = True
else:
try:
#vd['title'] = state['strings']['AVOD_DP_season_selector'].format(seasonNumber=item['seasonNumber'])
vd['title'] = self._BeautifyText(item['title'])
bUpdated = True
except:
vd['title'] = 'Season {}'.format(item['seasonNumber'])
bUpdated = True
# Images
for k, v in {'thumb': 'packshot', 'poster': 'titleshot', 'fanart': 'heroshot'}.items():
if (bCacheRefresh or (k not in vd['metadata']['artmeta'])) and \
('images' in item) and (v in item['images']) and item['images'][v]:
vd['metadata']['artmeta'][k] = item['images'][v]
bUpdated = True
# Synopsis, media type, year, duration
for k, v in {'plot': 'synopsis', 'mediatype': 'titleType', 'year': 'releaseYear', 'duration': 'duration'}.items():
if (bCacheRefresh or (k not in vd['metadata']['videometa'])) and (v in item):
vd['metadata']['videometa'][k] = item[v]
bUpdated = True
# check mediatype for events:
if 'mediatype' in vd['metadata']['videometa']:
mt = vd['metadata']['videometa']['mediatype']
if 'EVENT' in mt:
if 'children' in vd and 0 < len(vd['children']):
vd['metadata']['videometa']['mediatype'] = 'season'
else:
vd['metadata']['videometa']['mediatype'] = 'movie'
# Genres
if (bCacheRefresh or ('genre' not in vd['metadata']['videometa'])) and ('genres' in item) and item['genres']:
vd['metadata']['videometa']['genre'] = [g['text'] for g in item['genres']]
bUpdated = True
# Premiered/Aired
if (bCacheRefresh or ('premiered' not in vd['metadata']['videometa'])) and ('releaseDate' in item) and item['releaseDate']:
vd['metadata']['videometa']['premiered'] = DelocalizeDate(amzLang, item['releaseDate'])
vd['metadata']['videometa']['aired'] = vd['metadata']['videometa']['premiered']
bUpdated = True
# MPAA
if (bCacheRefresh or ('mpaa' not in vd['metadata']['videometa'])) and \
('ratingBadge' in item) and ('displayText' in item['ratingBadge']) and item['ratingBadge']['displayText']:
vd['metadata']['videometa']['mpaa'] = item['ratingBadge']['displayText']
bUpdated = True
# Contributors (`producers` are ignored)
if 'contributors' in item:
for k, v in {'director': 'directors', 'cast': 'starringActors', 'cast': 'supportingActors'}.items():
if v in item['contributors']:
for p in item['contributors'][v]:
try:
vd['metadata']['videometa'][k].append(p['name'])
bUpdated = True
except KeyError:
vd['metadata']['videometa'][k] = [p['name']]
bUpdated = True
# Season, TV show title
if ('seasonNumber' in item) and item['seasonNumber']:
if bCacheRefresh or ('season' not in vd['metadata']['videometa']):
vd['metadata']['videometa']['season'] = item['seasonNumber']
bUpdated = True
if bCacheRefresh or ('tvshowtitle' not in vd['metadata']['videometa']):
vd['metadata']['videometa']['tvshowtitle'] = item['parentTitle']
bUpdated = True
# Episode, Season, TV show title
if ('episodeNumber' in item) and item['episodeNumber']:
if bCacheRefresh or ('episode' not in vd['metadata']['videometa']):
vd['metadata']['videometa']['episode'] = item['episodeNumber']
bUpdated = True
if bCacheRefresh or ('season' not in vd['metadata']['videometa']):
try:
vd['metadata']['videometa']['season'] = self._videodata[vd['parent']]['metadata']['videometa']['season']
bUpdated = True
except: pass
if bCacheRefresh or ('tvshowtitle' not in vd['metadata']['videometa']):
try:
vd['metadata']['videometa']['tvshowtitle'] = self._videodata[vd['parent']]['metadata']['videometa']['parentTitle']
bUpdated = True
except: pass
# when the series is updated it is possible that the metadata
# will be lost because they are obtained from the series
# container at the beginning
if gti == state['pageTitleId'] and 'metadata' not in o:
o['metadata'] = vd['metadata']
# IMDB ratings — "imdb": {"amzn1.dv.gti.[…]": {"score": 8.5}}
if ('imdb' in state) and state['imdb']:
for gti in state['imdb']:
vmd = self._videodata[gti]['metadata']['videometa']
if (bCacheRefresh or ('rating' not in vmd)) and ('score' in state['imdb'][gti]) and state['imdb'][gti]['score']:
vmd['rating'] = state['imdb'][gti]['score']
bUpdated = True
# Trailer — "trailer": {"amzn1.dv.gti.[…]": {"playbackID": "amzn1.dv.gti.[…]", "playbackURL": "/detail/[ShortGTI]/ref=atv_dp_watch_trailer?autoplay=trailer"}}
if ('trailer' in state) and state['trailer']:
for gti in state['trailer']:
if 'trailer' not in self._videodata[gti]:
self._videodata[gti]['trailer'] = True
bUpdated = True
if 'watchlist' in state and state['watchlist']:
for gti, c in state['watchlist'].items():
if 'query' in c['endpoint']:
query = ''
query += '&'.join(['{}={}'.format(k, v) for k, v in c['endpoint']['query'].items()])
self._videodata[gti]['actions'] = [{'title': c['text']['string'],
'url': '{}?{}'.format(c['endpoint']['partialURL'], query)}]
bUpdated = True
return bUpdated
if 'lazyLoadURL' not in obj:
return
requestURLs = [obj['lazyLoadURL']]
amzLang = None
if None is not requestURLs[0]:
# Fine the locale amazon's using
cj = MechanizeLogin()
if cj:
amzLang = cj.get('lc-main-av', domain='.primevideo.com', path='/')
amzLang = amzLang if amzLang else 'en_US'
bUpdatedVideoData = False # Whether or not the pvData has been updated
while 0 < len(requestURLs):
requestURL = requestURLs.pop(0)
o = obj
# Load content
bCouldNotParse = False
try:
cnt = None
if 'lazyLoadData' in o:
cnt = o['lazyLoadData']
del o['lazyLoadData']
if not cnt:
urn = ExtractURN(requestURL)
if (not bCacheRefresh) and urn and (urn in self._videodata['urn2gti']):
ParseSinglePage(o, False, url=requestURL)
if 'lazyLoadURL' in o:
if 'ref' not in o:
o['ref'] = o['lazyLoadURL']
del o['lazyLoadURL']
continue
else:
cnt = self._GrabJSON(requestURL)
if cnt and ('lazyLoadURL' in o):
if 'ref' not in o:
o['ref'] = o['lazyLoadURL']
del o['lazyLoadURL']
except:
bCouldNotParse = True
if bCouldNotParse or (not cnt):
self._g.dialog.notification(getString(30251), requestURL, xbmcgui.NOTIFICATION_ERROR)
Log('Unable to fetch the url: {}'.format(requestURL), Log.ERROR)
continue
# Categories
if 'collections' in cnt:
for collection in cnt['collections']:
o[collection['text']] = {'title': self._BeautifyText(collection['text'])}
if 'seeMoreLink' in collection:
o[collection['text']]['lazyLoadURL'] = collection['seeMoreLink']['url']
else:
o[collection['text']]['lazyLoadURL'] = requestURL
o[collection['text']]['lazyLoadData'] = collection
# Widow list / API Search
if ('items' in cnt):
for item in cnt['items']:
# Search results
if 'heading' in item:
title = item['heading']
iu = item['href']
try:
t = item['watchlistAction']['endpoint']['query']['titleType']
except:
t = None
Log('Found {}, type: {}'.format(title, t))
if 'season' != t:
bUpdatedVideoData |= ParseSinglePage(o, bCacheRefresh, url=iu)
else:
o[title] = {
'title': self._BeautifyText(title),
'lazyLoadURL': iu,
'metadata': {
'artmeta': {
'thumb': item['imageSrc']
},
'videometa': {
'mediatype': 'season',
'plot': item['synopsis']
}
}
}
# movie
elif 'titleID' in item:
bUpdatedVideoData |= ParseSinglePage(o, bCacheRefresh, url=item['link']['url'])
# Watchlist
else:
Log('Show all seasons in watchlist: {}'.format(self._s.dispShowOnly))
title = item['title']
iu = item['link']['url']
o[title] = {
'title': self._BeautifyText(title),
'lazyLoadURL': iu,
'metadata': {
'artmeta': {
'thumb': item['image']['url']
},
'videometa': {
'mediatype': 'season',
}
}
}
# Search/list
if ('results' in cnt) and ('items' in cnt['results']):
for item in cnt['results']['items']:
if 'season' not in item:
bUpdatedVideoData |= ParseSinglePage(o, bCacheRefresh, url=item['title']['url'])
else:
if item['title']['text'] not in o:
o[item['title']['text']] = {
'title': self._BeautifyText(item['title']['text']),
'lazyLoadURL': item['title']['url'],
'metadata': {
'artmeta': {
'thumb': MaxSize(item['packshot']['image']['src'])
}
}
}
# Watchlist
if 'filters' in cnt:
for f in cnt['filters']:
# if it is not applied in all filters it indicates that it is the initial
if 'applied' not in cnt['filters'][len(cnt['filters'])-1]:
o[f['id']] = {'title': f['text'], 'lazyLoadURL': f['apiUrl' if 'apiUrl' in f else 'href']}
if 'content' in cnt and 'items' in cnt['content'] and '*className*' in cnt['content']:
for item in cnt['content']['items']:
o[item['titleID']] = {}
bUpdatedVideoData |= ParseSinglePage(o[item['titleID']], bCacheRefresh, url=item['href'])
# Single page
if 'state' in cnt:
bUpdatedVideoData |= ParseSinglePage(o, bCacheRefresh, data=cnt, url=requestURL)
# Pagination
if 'pagination' in cnt:
page = None
if 'apiUrl' in cnt['pagination']:
page = cnt['pagination']['apiUrl']
elif 'paginator' in cnt['pagination']:
page = next((x['href'] for x in cnt['pagination']['paginator'] if x['*className*'] == 'atv.wps.PaginatorNext'), None)
if page:
requestURLs.append(page)
else:
Log('Unknown error while parsing pagination', Log.ERROR)
# Notify new page
if 0 < len(requestURLs):
NotifyUser(getString(30252))
# Flush catalog and data
self._Flush(bCacheRefresh or bUpdatedVideoData)