# Playing with places

In [3]:
import pandas as pd
import folium
from folium.plugins import MarkerCluster
from folium.plugins import HeatMap

  return f(*args, **kwds)
  return f(*args, **kwds)


In [24]:
df = pd.read_csv('data/Mapping Places Spreadsheet - Sheet1.csv', keep_default_na=False)
df.head()

Unnamed: 0,Place Name,New Name (if applicable),File Number,Latitude,Longitude,Geonames Link,Wikidata Link,Image url
0,"Aboriginal Tent Embassy (Canberra, A.C.T.)",,FL451088 | FL468008 | FL654335 | FL467349 | FL...,-35.3011° S,149.13° E,http://www.geonames.org/maps/wikipedia_-35.301...,https://www.wikidata.org/wiki/Q189212,
1,Adelaide (S. Aust.),,,-34.83313 S,138.62537 E,http://www.geonames.org/11523930/adelaide.html,https://www.wikidata.org/wiki/Q5112,
2,Alexandria (N.S.W.),,FL4546518 | FL4562149 | FL4547832 | FL4563288,-33.91667 S,151.2 E,http://www.geonames.org/2178136/alexandria.html,https://www.wikidata.org/wiki/Q2915846,
3,Aquatic Club (corner Riley & Cathedral Streets...,Woolloomooloo,FL4515610,-33.87042,151.21968,https://www.geonames.org/2142996/woolloomooloo...,https://www.wikidata.org/wiki/Q2678743,
4,Art Gallery of New South Wales,,FL4510098 | FL4589913 | FL4566059 | FL4567741 ...,-33.86879°S,151.21748°E,http://www.geonames.org/6949391/art-gallery-of...,https://www.wikidata.org/wiki/Q705551,


## Clean up the data

In [25]:
# Clean up the lats and lons
df = df.replace({'°\s*[EWNS]+': ''}, regex=True)
df = df.replace({'\s*[EWNS]+$': ''}, regex=True)
df['Latitude'] = pd.to_numeric(df['Latitude'], errors='coerce')
df['Longitude'] = pd.to_numeric(df['Longitude'], errors='coerce')
df.head()

Unnamed: 0,Place Name,New Name (if applicable),File Number,Latitude,Longitude,Geonames Link,Wikidata Link,Image url
0,"Aboriginal Tent Embassy (Canberra, A.C.T.)",,FL451088 | FL468008 | FL654335 | FL467349 | FL...,-35.3011,149.13,http://www.geonames.org/maps/wikipedia_-35.301...,https://www.wikidata.org/wiki/Q189212,
1,Adelaide (S. Aust.),,,-34.83313,138.62537,http://www.geonames.org/11523930/adelaide.html,https://www.wikidata.org/wiki/Q5112,
2,Alexandria (N.S.W.),,FL4546518 | FL4562149 | FL4547832 | FL4563288,-33.91667,151.2,http://www.geonames.org/2178136/alexandria.html,https://www.wikidata.org/wiki/Q2915846,
3,Aquatic Club (corner Riley & Cathedral Streets...,Woolloomooloo,FL4515610,-33.87042,151.21968,https://www.geonames.org/2142996/woolloomooloo...,https://www.wikidata.org/wiki/Q2678743,
4,Art Gallery of New South Wales,,FL4510098 | FL4589913 | FL4566059 | FL4567741 ...,-33.86879,151.21748,http://www.geonames.org/6949391/art-gallery-of...,https://www.wikidata.org/wiki/Q705551,


## Reorganise the data so we have one row per image

In [4]:
# This cell creates a CSV file with a row for each image, preserving all the parent item metadata

def tidy_split(df, column, sep='|', keep=False):
    """
    Split the values of a column and expand so the new DataFrame has one split
    value per row. Filters rows where the column is missing.

    Params
    ------
    df : pandas.DataFrame
        dataframe with the column to split and expand
    column : str
        the column to split and expand
    sep : str
        the string used to split the column's values
    keep : bool
        whether to retain the presplit value as it's own row

    Returns
    -------
    pandas.DataFrame
        Returns a dataframe with the same columns as `df`.
    """
    indexes = list()
    new_values = list()
    df = df.dropna(subset=[column])
    for i, presplit in enumerate(df[column].astype(str)):
        values = presplit.split(sep)
        if keep and len(values) > 1:
            indexes.append(i)
            new_values.append(presplit)
        for value in values:
            indexes.append(i)
            new_values.append(value)
    new_df = df.iloc[indexes, :].copy()
    new_df[column] = new_values
    return new_df

images_df = tidy_split(df, 'File Number', sep='|')

# Reomve rows with no images
images_df = images_df.loc[images_df['File Number'] != '']
images_df.head()

Unnamed: 0,Place Name,New Name (if applicable),File Number,Latitude,Longitude,Geonames Link,Wikidata Link,Image url
0,"Aboriginal Tent Embassy (Canberra, A.C.T.)",,FL451088,-35.3011,149.13,http://www.geonames.org/maps/wikipedia_-35.301...,https://www.wikidata.org/wiki/Q189212,
0,"Aboriginal Tent Embassy (Canberra, A.C.T.)",,FL468008,-35.3011,149.13,http://www.geonames.org/maps/wikipedia_-35.301...,https://www.wikidata.org/wiki/Q189212,
0,"Aboriginal Tent Embassy (Canberra, A.C.T.)",,FL654335,-35.3011,149.13,http://www.geonames.org/maps/wikipedia_-35.301...,https://www.wikidata.org/wiki/Q189212,
0,"Aboriginal Tent Embassy (Canberra, A.C.T.)",,FL467349,-35.3011,149.13,http://www.geonames.org/maps/wikipedia_-35.301...,https://www.wikidata.org/wiki/Q189212,
0,"Aboriginal Tent Embassy (Canberra, A.C.T.)",,FL456342,-35.3011,149.13,http://www.geonames.org/maps/wikipedia_-35.301...,https://www.wikidata.org/wiki/Q189212,


## Summarise the data

In [None]:
#How many images have been geo-located?
images_df = images_df.loc[images_df['Latitude'].notnull() & images_df['Longitude'].notnull()]
images_df.shape[0]

In [None]:
# Top 50 places!
images_df['Place Name'].value_counts()[:50]

In [None]:
m = folium.Map(
    location=[-30, 135],
    zoom_start=4
)
# We'll cluster the markers for better readability
marker_cluster = MarkerCluster().add_to(m)

for index, img in images_df.iterrows():
    # Create the content of the marker popup -- includes a search link back to Trove!
    #html = '<b>{}</b><br><a target="_blank" href="https://trove.nla.gov.au/newspaper/result?q={}&l-title={}&l-category={}">{} articles'.format(place[0], params['q'], titles, params.get('l-category', ''), total)
    # Add the marker to the map
    html = '<b>{0}</b><br><a target="_blank" href="http://digital.sl.nsw.gov.au/delivery/DeliveryManagerServlet?dps_pid={1}&embedded=true&toolbar=false"><img width="200" src="https://s3-ap-southeast-2.amazonaws.com/wraggetribune/images/500/{1}-500.jpg"></a>'.format(img['Place Name'].replace("'", ';rsquo'), img['File Number'].strip())
    folium.Marker([img['Latitude'], img['Longitude']], popup=html).add_to(marker_cluster)

m

In [None]:
places_df = df.loc[df['Latitude'].notnull() & df['Longitude'].notnull()]
locations = []
# Loop through the places
for index, row in places_df.iterrows():
    # Get the total
    images = row['File Number'].split('|')
    # Add the coordinates of the place to the list of locations as many times as there are articles
    locations += ([[row['Latitude'], row['Longitude']]] * len(images))


# Create another map
m2 = folium.Map(
    location=[-30, 135],
    zoom_start=4
)

#Add the heatmap data!
HeatMap(locations).add_to(m2)
m2

## Create a data file for the web app

In [26]:
df = df.loc[df['File Number'] != '']
df = df.loc[df['Latitude'].notnull() & df['Longitude'].notnull()]
df['images'] = df['File Number'].str.replace(' ', '').str.split('|')
df = df.drop('File Number', 1)
df = df.drop('Image url', 1)
df['place_id'] = df.index
df.columns = ['place_name', 'alternative_name', 'latitude', 'longitude', 'geonames', 'wikidata', 'images', 'place_id']
df.head()

Unnamed: 0,place_name,alternative_name,latitude,longitude,geonames,wikidata,images,place_id
0,"Aboriginal Tent Embassy (Canberra, A.C.T.)",,-35.3011,149.13,http://www.geonames.org/maps/wikipedia_-35.301...,https://www.wikidata.org/wiki/Q189212,"[FL451088, FL468008, FL654335, FL467349, FL456...",0
2,Alexandria (N.S.W.),,-33.91667,151.2,http://www.geonames.org/2178136/alexandria.html,https://www.wikidata.org/wiki/Q2915846,"[FL4546518, FL4562149, FL4547832, FL4563288]",2
3,Aquatic Club (corner Riley & Cathedral Streets...,Woolloomooloo,-33.87042,151.21968,https://www.geonames.org/2142996/woolloomooloo...,https://www.wikidata.org/wiki/Q2678743,[FL4515610],3
4,Art Gallery of New South Wales,,-33.86879,151.21748,http://www.geonames.org/6949391/art-gallery-of...,https://www.wikidata.org/wiki/Q705551,"[FL4510098, FL4589913, FL4566059, FL4567741, F...",4
5,"Australia Square (Sydney, N.S.W.)",,-33.86487,151.20762,http://www.geonames.org/6615611/australia-squa...,https://www.wikidata.org/wiki/Q2872154,"[FL4542461, FL4523517, FL4571894, FL4404355, F...",5


In [27]:
import json
data = json.loads(df.to_json(orient='records'))

In [22]:
data

[{'place_name': 'Aboriginal Tent Embassy (Canberra, A.C.T.)',
  'alternative_name': '',
  'latitude': -35.3011,
  'longitude': 149.13,
  'geonames': 'http://www.geonames.org/maps/wikipedia_-35.3011_149.13.html',
  'wikidata': 'https://www.wikidata.org/wiki/Q189212',
  'images': ['FL451088',
   'FL468008',
   'FL654335',
   'FL467349',
   'FL456342',
   'FL4404229',
   'FL466542',
   'FL454033'],
  'place_id': 0},
 {'place_name': 'Alexandria (N.S.W.)',
  'alternative_name': '',
  'latitude': -33.91667,
  'longitude': 151.2,
  'geonames': 'http://www.geonames.org/2178136/alexandria.html',
  'wikidata': 'https://www.wikidata.org/wiki/Q2915846',
  'images': ['FL4546518', 'FL4562149', 'FL4547832', 'FL4563288'],
  'place_id': 2},
 {'place_name': 'Aquatic Club (corner Riley & Cathedral Streets, Woolloomooloo, N.S.W.)',
  'alternative_name': 'Woolloomooloo',
  'latitude': -33.87042,
  'longitude': 151.21968,
  'geonames': 'https://www.geonames.org/2142996/woolloomooloo.html',
  'wikidata': 'ht

In [28]:
from tinydb import TinyDB
db = TinyDB('places.json')
db.insert_multiple(data)

[108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,
 185,
 186,
 187,
 188,
 189,
 190,
 191,
 192,
 193,
 194,
 195,
 196,
 197,
 198,
 199,
 200,
 201,
 202,
 203,
 204,
 205,
 206,
 207,
 208,
 209,
 210,
 211,
 212,
 213,
 214]