import logging
import webbrowser
import csv
import json
import time
import sys
import os
from datetime import datetime, timedelta
import itertools
import argparse
from collections import OrderedDict
import httplib2
from oauth2client.file import Storage
from oauth2client.client import flow_from_clientsecrets
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

WEBMASTER_CREDENTIALS_FILE_PATH = "webmaster_credentials.dat"

def rate_limit(max_per_minute):
    """
    Decorator function to prevent more than x calls per minute of any function
    Args:
        max_per_minute. Numeric type.
        The maximum number of times the function should run per minute.
    """
    min_interval = 60.0 / float(max_per_minute)
    def decorate(func):
        last_time_called = [0.0]
        def rate_limited_function(*args, **kwargs):
            elapsed = time.clock() - last_time_called[0]
            wait_for = min_interval - elapsed
            if wait_for > 0:
                time.sleep(wait_for)
            ret = func(*args, **kwargs)
            last_time_called[0] = time.clock()
            return ret
        return rate_limited_function
    return decorate


def acquire_new_oauth2_credentials(secrets_file):
    """
    Args:
        secrets_file. The file path to a JSON file of client secrets, containing:
            client_id; client_secret; redirect_uris; auth_uri; token_uri.
    Returns:
        credentials for use with Google APIs
    """
    flow = flow_from_clientsecrets(
        secrets_file,
        scope="https://www.googleapis.com/auth/webmasters.readonly",
        redirect_uri="http://localhost")
    auth_uri = flow.step1_get_authorize_url()
    webbrowser.open(auth_uri)
    print("Please enter the following URL in a browser " + auth_uri)
    auth_code = input("Enter the authentication code: ")
    credentials = flow.step2_exchange(auth_code)
    return credentials


def load_oauth2_credentials(secrets_file):
    """
    Args:
        secrets_file. The file path to a JSON file of client secrets.
    Returns:
        credentials for use with Google APIs.
    Side effect:
        If the secrets file did not exist, fetch the appropriate credentials and create a new one.
    """
    storage = Storage(WEBMASTER_CREDENTIALS_FILE_PATH)
    credentials = storage.get()
    if credentials is None or credentials.invalid:
        credentials = acquire_new_oauth2_credentials(secrets_file)
    storage.put(credentials)
    return credentials


def create_search_console_client(credentials):
    """
    The search console client allows us to perform queries against the API.
    To create it, pass in your already authenticated credentials

    Args:
        credentials. An object representing Google API credentials.
    Returns:
        service. An object used to perform queries against the API.
    """
    http_auth = httplib2.Http()
    http_auth = credentials.authorize(http_auth)
    service = build('webmasters', 'v3', http=http_auth)
    return service


def date_range(start_date, end_date, delta=timedelta(days=1)):
    """
    Yields a stream of datetime objects, for all days within a range.
    The range is inclusive, so both start_date and end_date will be returned,
    as well as all dates in between.

    Args:
        start_date: The datetime object representing the first day in the range.
        end_date: The datetime object representing the second day in the range.
        delta: A datetime.timedelta instance, specifying the step interval. Defaults to one day.
    Yields:
        Each datetime object in the range.
    """
    current_date = start_date
    while current_date <= end_date:
        yield current_date
        current_date += delta


def generate_filters(**kwargs):
    """
    Yields a filter list for each combination of the args provided.
    """
    kwargs = OrderedDict((k, v) for k, v in kwargs.items() if v)
    dimensions = kwargs.keys()
    values = list(kwargs.values())
    for vals in itertools.product(*values):
        yield [{
            'dimension': dim,
            'operator': 'equals',
            'expression': val} for dim, val in zip(dimensions, vals)
              ]


@rate_limit(200)
def execute_request(service, property_uri, request, max_retries=5, wait_interval=4,
                    retry_errors=(503, 500)):
    """
    Executes a searchanalytics request.
    Args:
        service: The webmasters service object/client to use for execution.
        property_uri: Matches the URI in Google Search Console.
        request: The request to be executed.
        max_retries. Optional. Sets the maximum number of retry attempts.
        wait_interval. Optional. Sets the number of seconds to wait between each retry attempt.
        retry_errors. Optional. Retry the request whenever these error codes are encountered.
    Returns:
        An array of response rows.
    """

    response = None
    retries = 0
    while retries <= max_retries:
        try:
            response = service.searchanalytics().query(siteUrl=property_uri, body=request).execute()
        except HttpError as err:
            decoded_error_body = err.content.decode('utf-8')
            json_error = json.loads(decoded_error_body)
            if json_error['error']['code'] in retry_errors:
                time.sleep(wait_interval)
                retries += 1
                continue
        break
    return response


def parse_command_line_options():
    """
    Parses arguments from the command line and returns them in the form of an ArgParser object.
    """
    parser = argparse.ArgumentParser(description="Query the Google Search Console API for every day in a date range.")
    parser.add_argument('property_uri', type=str, help='The property URI to query. Must exactly match a property URI in Google Search Console')
    parser.add_argument('start_date', type=str, help='The start date for the query. Should not be more than 90 days ago')
    parser.add_argument('end_date', type=str, help='The last date to query. Should not be sooner than two days ago.')
    parser.add_argument('--secrets_file', type=str, default='credentials.json', help='File path of your Google Client ID and Client Secret')
    parser.add_argument('--config_file', type=str, help='File path of a config file containing settings for this Search Console property.')
    parser.add_argument('--output_location', type=str, help='The folder output location of the script.', default="")
    parser.add_argument('--url_type', type=str, help='A string to add to the beginning of the file', default="")
    parser.add_argument('--max-rows-per-day', '-n', type=int, default=100, help='The maximum number of rows to return for each day in the range')

    filters = parser.add_argument_group('filters')
    filters.add_argument('--page_filters_file', type=str, help='File path of a CSV list of pages to filter by', default="")
    filters.add_argument('--devices', nargs='*', type=str, help='List of devices to filter by. By default we do segment by device.',
                         default=['mobile', 'desktop', 'tablet'])
    filters.add_argument('--countries', nargs='*', type=str, help='List of countries to filter by', default=[])
    return parser.parse_args()


def read_page_paths_from_file(page_filters_file, property_uri):
    """
    Args:
        page_filters_file. The filepath of a plain text file containing a list of URLs
        to filter by in the Google Search Console.
    Returns:
        A list of those URLs, if they all specify the full GSC property correctly.
        Otherwise, will raise an exception.
    """
    pages = []
    with open(page_filters_file, "r") as file_handle:
        for line in file_handle.readlines():
            if property_uri in line:
                pages.append(line.strip("\n"))
            else:
                raise ValueError("Page filter does not include the property uri: {}".format(line))
    return pages

def main():
    """
    Fetch and parse all command line options.
    Dispatch queries to the GSC API.
    """
    args = parse_command_line_options()

    if args.page_filters_file:
        try:
            pages = read_page_paths_from_file(args.page_filters_file, args.property_uri)
        except IOError as err:
            logging.error("%s is not a valid file path", args.page_filters_file)
            sys.exit(err)
        except ValueError as err:
            logging.error("Error: all page filters must include the full URL of the Google Search Console property.")
            sys.exit(err)
    else:
        pages = []

    # Prepare the API service
    credentials = load_oauth2_credentials(args.secrets_file)
    service = create_search_console_client(credentials)

    start_date = datetime.strptime(args.start_date, "%Y-%m-%d")
    end_date = datetime.strptime(args.end_date, "%Y-%m-%d")

    for day in date_range(start_date, end_date):
        output_file = os.path.join(
            args.output_location,
            "{}_{}.csv".format(args.url_type, day.strftime("%Y%m%d"))
        )
        day = day.strftime("%Y-%m-%d")
        output_rows = []

        for filter_set in generate_filters(page=pages, device=args.devices, country=args.countries):

            request = {
                'startDate' : day,
                'endDate' : day,
                'dimensions' : ['query'],
                'rowLimit' : args.max_rows_per_day,
                'dimensionFilterGroups' : [
                    {
                        "groupType" : "and",
                        "filters" : filter_set
                    }
                ]
            }

            response = execute_request(service, args.property_uri, request)

            if response is None:
                logging.error("Request failed %s", json.dumps(request, indent=2))
                continue

            if 'rows' in response:

                if pages:
                    filters = [pages[0], 'worldwide', 'all_devices', args.url_type]
                else:
                    filters = ['gsc_property', 'worldwide', 'all_devices', args.url_type]

                filter_mapping = {'page': 0, 'country': 1, 'device': 2}
                for _filter in filter_set:
                    filters[filter_mapping[_filter['dimension']]] = _filter['expression']

                for row in response['rows']:
                    keys = ','.join(row['keys'])
                    output_row = [keys, row['clicks'], row['impressions'], row['ctr'], row['position']]
                    output_row.extend(filters)
                    output_rows.append(output_row)

        with open(output_file, 'w', newline="", encoding="utf-8-sig") as file_handle:
            csvwriter = csv.writer(file_handle)
            csvwriter.writerows(output_rows)

        logging.info("Query for %s complete", day)


if __name__ == '__main__':
    main()