# -*- coding: utf-8 -*-

import re
import requests

import six
from six import ensure_str
from six.moves.urllib_parse import urlparse

from resources.lib.modules import dom_parser
from resources.lib.modules import log_utils

try: # Py2
    from HTMLParser import HTMLParser
    unescape = HTMLParser().unescape
except ImportError: # Py3:
    from html import unescape


UserAgent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:97.0) Gecko/20100101 Firefox/97.0'
MobileUserAgent = 'Mozilla/5.0 (Android 10; Mobile; rv:83.0) Gecko/83.0 Firefox/83.0'

regex_pattern1 = r'(?:iframe|source).+?(?:src)=(?:\"|\')(.+?)(?:\"|\')'
regex_pattern2 = r'(?:data-video|data-src|data-href)=(?:\"|\')(.+?)(?:\"|\')'
regex_pattern3 = r'(?:file|source)(?:\:)\s*(?:\"|\')(.+?)(?:\"|\')'
regex_pattern4 = r'''(magnet:\?[^"']+)'''
regex_pattern5 = r'<[iI][fF][rR][aA][mM][eE].+?[sS][rR][cC]="(.+?)"'
regex_pattern6 = r'''['"]file['"]\s*:\s*['"]([^'"]+)'''
regex_pattern7 = r'''['"]?file['"]?\s*:\s*['"]([^'"]*)'''
regex_pattern8 = r'file(?:\'|\")?\s*(?:\:)\s*(?:\"|\')(.+?)(?:\"|\')'
regex_pattern9 = r'sources\s*:\s*\[(.+?)\]'
regex_pattern10 = r'\{(.+?)\}'


def re_findall(html, regex):
    match = re.findall(regex, html)
    return match


def re_compile(html, regex):
    match = re.compile(regex).findall(html)
    return match


def unpacked(html):
    from resources.lib.modules import jsunpack
    unpacked = ''
    if jsunpack.detect(html):
        unpacked = jsunpack.unpack(html)
    return unpacked


def parseDOM(html, name='', attrs=None, ret=False):
    if attrs:
        attrs = dict((key, re.compile(value + ('$' if value else ''))) for key, value in six.iteritems(attrs))
    results = dom_parser.parse_dom(html, name, attrs, ret)
    if ret:
        results = [result.attrs[ret.lower()] for result in results]
    else:
        results = [result.content for result in results]
    return results


def remove_codes(string):
    remove = re.compile('<.+?>')
    string = re.sub(remove, '', string)
    return string


def replace_html_entities(string):
    List = [['&lt;', '<'], ['&#60;', '<'], ['&gt;', '>'], ['&#62;', '>'], ['&amp;', '&'], ['&#38;', '&'],
        ['&quot;',' "'], ['&#34;',' "'], ["&apos;", "'"], ["&#39;", "'"], ['\\/', '/']
    ]
    for item in List:
        string = string.replace(item[0], item[1])
    return string


def replaceHTMLCodes(txt):
    txt = re.sub("(&#[0-9]+)([^;^0-9]+)", "\\1;\\2", txt)
    txt = unescape(txt)
    txt = txt.replace("&quot;", "\"")
    txt = txt.replace("&amp;", "&")
    txt = txt.replace("&lt;", "<")
    txt = txt.replace("&gt;", ">")
    txt = txt.replace("&#38;", "&")
    txt = txt.replace("&nbsp;", "")
    txt = txt.replace('&#8230;', '...')
    txt = txt.replace('&#8217;', '\'')
    txt = txt.replace('&#8211;', '-')
    txt = txt.replace("%2B", "+")
    txt = txt.replace("\/", "/")
    txt = txt.replace("\\", "")
    txt = txt.strip()
    return ensure_str(txt)


def removeNonAscii(s):
    return "".join(i for i in s if ord(i) < 128)


def scrapePage(url, referer=None, headers=None, post=None, cookie=None):
    try:
        if not url:
            return
        url =  "https:" + url if url.startswith('//') else url
        with requests.Session() as session:
            if headers:
                session.headers.update(headers)
            if (referer and not 'Referer' in session.headers):
                session.headers.update({'Referer': referer})
            else:
                elements = urlparse(url)
                base = '%s://%s' % (elements.scheme, (elements.netloc or elements.path))
                session.headers.update({'Referer': base})
            if (cookie and not 'Cookie' in session.headers):
                session.headers.update({'Cookie': cookie})
            if not 'User-Agent' in session.headers:
                session.headers.update({'User-Agent': UserAgent})
            if post:
                page = session.post(url, data=post, timeout=10)
            else:
                page = session.get(url, timeout=10)
            page.encoding = 'utf-8'
        return page
    except Exception:
        log_utils.log('scrapePage', 1)
        return


def url_ok(url): #  Old Code Saved.
    r = scrapePage(url)
    if r.status_code == 200 or r.status_code == 301:
        return True
    else:
        return False

