"""This script walks through using geospatial data and expressions with the Python API.

Covered here: creating a table, inserting WKT data, filtering geospatial data, aggregating geospatial
data, joining geospatial data, and comparing different geospatial equality functions
"""

import argparse
import json
import gpudb

# Tables created externally to this script
geo_schema = "example_geospatial"
nyc_nta_table = geo_schema + ".nyc_neighborhood"
taxi_trip_table = geo_schema + ".taxi_trip_data"
taxi_zones_table = geo_schema + ".taxi_zone"

# Tables/Views created within this script
poi_table = geo_schema + ".points_of_interest"
dist_check_view = geo_schema + ".distance_check"
zones_by_area_proj = geo_schema + ".zones_by_area"
queens_agb = geo_schema + ".queens_neighborhoods"
pickup_point_location = geo_schema + ".pickup_point_neighborhood_location"
top_pickup_locations_by_frequency = geo_schema + ".top_pickup_locations_by_frequency"
top_pickup_locations_by_distance = geo_schema + ".top_pickup_locations_by_distance"
pickup_agb = geo_schema + ".top_pickup_location_groups"
poi_comp_table = geo_schema + ".points_of_interest_src_compare"


def geospatial_examples():

    print("GEOSPATIAL EXAMPLES OUTPUT")
    print("==========================\n")


    print("CREATING A TABLE AND INSERTING WKT DATA")
    print("---------------------------------------\n")

    # Create a type using the GPUdbRecordType object
    columns = [gpudb.GPUdbRecordColumn("src1_poi", "string",
                                       [gpudb.GPUdbColumnProperty.WKT]),
               gpudb.GPUdbRecordColumn("src2_poi", "string",
                                       [gpudb.GPUdbColumnProperty.WKT])]
    poi_record_type = gpudb.GPUdbRecordType(columns, label="poi_record_type")
    poi_record_type.create_type(kinetica)
    poi_type_id = poi_record_type.type_id
    # Create a table from the poi_record_type
    response = kinetica.create_table(table_name=poi_table, type_id=poi_type_id)
    print(f"Table created: {response['status_info']['status']}")

    # Prepare the data to be inserted
    pois = [
        "POINT(-73.8455003 40.7577272)",
        "POLYGON((-73.9258506 40.8287493, -73.9258292 40.828723, -73.9257795 40.8287387, -73.925801 40.8287635, "
        "-73.9258506 40.8287493))",
        "LINESTRING(-73.9942208 40.7504289, -73.993856 40.7500753, -73.9932525 40.7499941)",
        "LINESTRING(-73.9760944 40.6833433, -73.9764404 40.6830626, -73.9763761 40.6828897)"
    ]
    pois_compare = [
        "POINT(-73.84550029 40.75772724)",
        "POLYGON((-73.9257795 40.8287387, -73.925801 40.8287635, -73.9258506 40.8287493, -73.9258506 40.8287493, "
        "-73.9258292 40.828723))",
        "LINESTRING(-70.9942208 42.7504289, -72.993856 43.7500753, -73.9932525 44.7499941)",
        "LINESTRING(-70.9761 41.6834, -72.9765 39.6831, -76.9764 38.6829)"
    ]
    # Insert the data into the table using a list
    encoded_obj_list = []
    for val in range(0, 4):
        poi1_val = pois[val]
        poi2_val = pois_compare[val]
        record = gpudb.GPUdbRecord(poi_record_type, [poi1_val, poi2_val])
        encoded_obj_list.append(record.binary_data)
    response = kinetica.insert_records(table_name=poi_table, data=encoded_obj_list, list_encoding="binary")
    print(f"Number of records inserted:  {response['count_inserted']}\n")

    print("SCALAR FUNCTIONS")
    print("----------------\n")

    # Scalar Example 1

    """ Using GEODIST(), calculate the distance between pickup and dropoff points to see where the calculated distance
        is less than the recorded trip distance """
    response = kinetica.filter(
            table_name=taxi_trip_table,
            view_name=dist_check_view,
            expression="GEODIST(pickup_longitude, pickup_latitude, dropoff_longitude, dropoff_latitude) "
                        "< trip_distance"
    )
    print(f"Number of trips where geographic distance < recorded distance: {response['count']}\n")

    # Scalar Example 2

    # Calculate the area of each taxi zone and copy the selected data to a projection
    kinetica.create_projection(
            table_name=taxi_zones_table,
            projection_name=zones_by_area_proj,
            column_names=["objectid", "zone", "ST_AREA(geom,1) / 1000000 AS area"],
            options={"order_by": "area desc", "limit": "5"}
    )
    # Display the top 5 largest taxi zones
    large_zones = kinetica.get_records(
            table_name=zones_by_area_proj,
            offset=0, limit=gpudb.GPUdb.END_OF_SET,
            encoding="json",
            options={"sort_by":"area", "sort_order": "descending"}
    )['records_json']
    print("Top 5 largest taxi zones:")
    print("{:<7s} {:<27s} {:<30s}".format("Zone_ID", "Zone_Name", "Zone_Area_in_Square_Kilometers"))
    print("{:=<7s} {:=<27s} {:=<30s}".format("", "", ""))
    for zone in large_zones:
        print("{objectid:>7d} {zone:<27s} {area:>30.4f}".format(**json.loads(zone)))

    print("\nGEOSPATIAL AGGREGATIONS")
    print("-----------------------\n")

    # Dissolve the boundaries between Queens neighborhood tabulation areas to create a single borough boundary
    response = kinetica.aggregate_group_by(
            table_name=nyc_nta_table,
            column_names=["ST_DISSOLVE(geom) AS neighborhoods"],
            offset=0, limit=1,
            encoding="json",
            options={
                    "expression": "BoroName = 'Queens'",
                    "result_table": queens_agb
            }
    )
    print(f"Queens neighborhood boundaries dissolved:  {response['status_info']['status']}")

    print("\nGEOSPATIAL JOINS")
    print("----------------\n")

    # Join Example 1

    # Locate which borough and neighborhood taxi pickup points occurred
    kinetica.create_join_table(
            join_table_name=pickup_point_location,
            table_names=[taxi_trip_table + " AS t", nyc_nta_table + " AS n"],
            column_names=[
                    "t.vendor_id", "t.passenger_count",
                    "t.pickup_longitude", "t.pickup_latitude",
                    "n.BoroName", "n.NTAName"
            ],
            expressions=["STXY_CONTAINS(n.geom, t.pickup_longitude, t.pickup_latitude)"]
    )
    # Display a subset of the pickup point locations
    pickup_points = kinetica.get_records(
            table_name=pickup_point_location,
            offset=0, limit=25,
            encoding="json",
            options={"sort_by":"pickup_longitude"}
    )['records_json']
    print("Subset of pickup point neighborhood locations:")
    print("{:<10s} {:<12s} {:<13s} {:<12s} {:<11s} {:<43s}".format("Vendor ID", "Pass. Count", "Pickup Long.",
                                                                   "Pickup Lat.", "Borough", "NTA"))
    print("{:=<10s} {:=<12s} {:=<13s} {:=<12s} {:=<11s} {:=<43s}".format("", "", "", "", "", ""))
    for point in pickup_points:
        print("{vendor_id:<10s} {passenger_count:<12d} {pickup_longitude:<13f} {pickup_latitude:<12f} "
              "{BoroName:<11s} {NTAName:<43s}".format(**json.loads(point)))
    print("")

    # Join Example 2

    # Find the top neighborhood pickup locations
    kinetica.create_join_table(
            join_table_name=top_pickup_locations_by_frequency,
            table_names=[taxi_trip_table + " AS t", nyc_nta_table + " AS n"],
            column_names=["n.NTAName AS NTAName"],
            expressions=["((STXY_INTERSECTS(t.pickup_longitude, t.pickup_latitude, n.geom) = 1))"]
    )
    # Aggregate the neighborhoods and the count of records per neighborhood
    kinetica.aggregate_group_by(
            table_name=top_pickup_locations_by_frequency,
            column_names=["NTAName AS Pickup_NTA", "COUNT(*) AS Total_Pickups"],
            offset=0, limit=gpudb.GPUdb.END_OF_SET,
            options={"result_table": pickup_agb}
    )
    # Display the top 10 neighborhood pickup locations
    response = kinetica.get_records_by_column(
            table_name=pickup_agb,
            column_names=["Pickup_NTA", "Total_Pickups"],
            offset=0, limit=10,
            encoding="json",
            options={"order_by": "Total_Pickups desc"}
    )
    data = kinetica.parse_dynamic_response(response)['response']
    print("Top 10 neighborhood pickup locations:")
    print("{:<43s} {:<14s}".format("Pickup NTA", "Total Pickups"))
    print("{:=<43s} {:=<14s}".format("", ""))
    for pickupLocs in zip(data["Pickup_NTA"], data["Total_Pickups"]):
        print("{:<43s} {:<14d}".format(*pickupLocs))
    print("")

    # Join Example 3

    # Find the geospatial objects representing the top 10 neighborhoods by
    # distance covered over all taxi trips originating in them
    response = kinetica.create_join_table(
            join_table_name=top_pickup_locations_by_distance,
            table_names=[taxi_trip_table + " AS t", nyc_nta_table + " AS n"],
            column_names=["n.geom AS geom", "t.trip_distance AS trip_distance"],
            expressions=["((STXY_INTERSECTS(t.pickup_longitude, t.pickup_latitude, n.geom) = 1))"]
    )
    # Aggregate the neighborhoods and the count of records per neighborhood
    response = kinetica.aggregate_group_by(
            table_name=top_pickup_locations_by_distance,
            column_names=["geom AS Pickup_Geo", "SUM(trip_distance) AS Total_Trip_Distance"],
            encoding = "json",
            offset = 0, limit = 10,
            options = {"sort_by":"value", "sort_order":"descending"}
    )

    # Display the top 10 neighborhood pickup neighborhood geos
    data = json.loads(response["json_encoded_response"])
    print("Top 10 neighborhood pickup geos:")
    print("{:<19s} {:<50s}".format("Total Trip Distance", "Pickup Geo"))
    print("{:=<19s} {:=<50s}".format("", ""))
    for record in zip(data["column_2"], data["column_1"]):
        print("{:>19.2f} {:<50s}".format(record[0], record[1][0:47] + "..."))

    print("\nGEOSPATIAL EQUALITY")
    print("-------------------\n")

    # Calculate types of geospatial equality for differing sources of geometry
    kinetica.create_projection(
        table_name=poi_table,
        projection_name=poi_comp_table,
        column_names=[
            "ST_GEOMETRYTYPE(src1_poi) AS src1type",
            "ST_GEOMETRYTYPE(src2_poi) AS src2type",
            "ST_ALMOSTEQUALS(src1_poi, src2_poi, 7) AS almostequals",
            "ST_EQUALS(src1_poi, src2_poi) AS equals",
            "ST_EQUALSEXACT(src1_poi, src2_poi, 4) AS equalsexact"
        ]
    )
    equality_comps = kinetica.get_records(
            table_name=poi_comp_table,
            offset=0, limit=10,
            encoding="json",
            options={"sort_by":"src1type"}
    )['records_json']
    print("Comparing ST_ALMOSTEQUALS(), ST_EQUALS(), and ST_EQUALSEXACT():")
    print("{:<13s} {:<13s} {:<13s} {:<6s} {:<12s}".format(
            "Source_1_Type", "Source_2_Type", "Almost_Equals", "Equals", "Equals_Exact"
    ))
    print("{:=<13s} {:=<13s} {:=<13s} {:=<6s} {:=<12s}".format("", "", "", "", ""))
    for comp in equality_comps:
        print("{src1type:<13s} {src2type:<13s} {almostequals:>13d} {equals:>6d} {equalsexact:>12d}".format(**json.loads(comp)))

# end geospatial_examples()


def clear_tables():

    # Drop all the tables
    for table_name in reversed([
        poi_table,
        dist_check_view,
        zones_by_area_proj,
        queens_agb,
        pickup_point_location,
        top_pickup_locations_by_frequency,
        pickup_agb,
        top_pickup_locations_by_distance,
        poi_comp_table

    ]):
        kinetica.clear_table(table_name)

# end clear_tables()


if __name__ == '__main__':

    # Set up args
    parser = argparse.ArgumentParser(description='Run geospatial function examples.')
    parser.add_argument('--url', default='http://127.0.0.1:9191', help='Kinetica URL to run example against')
    parser.add_argument('--username', default='', help='Username of user to run example with')
    parser.add_argument('--password', default='', help='Password of user')

    args = parser.parse_args()

    """ Establish connection with an instance of Kinetica,
        using binary encoding to save memory """
    kinetica = gpudb.GPUdb(host = [args.url], username = args.username, password = args.password)

    clear_tables()
    geospatial_examples()