# Imports

In [1]:
import pyaurorax
import datetime
import pprint
import pandas as pd

# Search for data product records

In [2]:
# set values
start = datetime.datetime(2020, 2, 1, 0, 0, 0)
end = datetime.datetime(2020, 2, 5, 23, 59, 59)
programs = ["trex"]
instrument_types = ["RGB ASI"]

In [3]:
# perform search
s = pyaurorax.data_products.search(start,
                                   end,
                                   programs=programs,
                                   verbose=True)

[2022-01-14 01:38:25.285869] Search object created
[2022-01-14 01:38:25.357410] Request submitted
[2022-01-14 01:38:25.357532] Request ID: 091c6ed8-272e-466c-b002-f91781e84fac
[2022-01-14 01:38:25.357544] Request details available at: https://api.aurorax.space/api/v1/data_products/requests/091c6ed8-272e-466c-b002-f91781e84fac
[2022-01-14 01:38:25.357551] Waiting for data ...
[2022-01-14 01:38:26.424644] Checking for data ...
[2022-01-14 01:38:26.521165] Data is now available
[2022-01-14 01:38:26.535137] Retrieving data ...
[2022-01-14 01:38:27.097308] Retrieved 3.7 MB of data containing 1490 records


In [4]:
# output data as a pandas dataframe
data_products = [d.__dict__ for d in s.data]
df = pd.DataFrame(data_products)
df.sort_values("start")

Unnamed: 0,data_source,data_product_type,start,end,url,metadata
0,"DataSource(identifier=103, program='trex', pla...",keogram,2020-02-01 00:00:00,2020-02-01 00:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly_hires', 'imaging_end_..."
23,"DataSource(identifier=102, program='trex', pla...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_hires_200px', 'imaging..."
24,"DataSource(identifier=102, program='trex', pla...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_hires', 'imaging_end_t..."
26,"DataSource(identifier=102, program='trex', pla...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_moviederived', 'imagin..."
27,"DataSource(identifier=104, program='trex', pla...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily', 'imaging_end_time': ..."
...,...,...,...,...,...,...
1479,"DataSource(identifier=101, program='trex', pla...",montage,2020-02-05 13:00:00,2020-02-05 13:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'montage_type': 'hourly', 'imaging_end_time':..."
1488,"DataSource(identifier=96, program='trex', plat...",montage,2020-02-05 14:00:00,2020-02-05 14:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'montage_type': 'hourly', 'imaging_end_time':..."
1486,"DataSource(identifier=96, program='trex', plat...",keogram,2020-02-05 14:00:00,2020-02-05 14:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly_hires', 'imaging_end_..."
1487,"DataSource(identifier=96, program='trex', plat...",keogram,2020-02-05 14:00:00,2020-02-05 14:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly_hires_200px', 'imagin..."


# Do the search step-by-step

Under the hood, the AuroraX API performs a data product search asynchronously. Note that this does not mean that it can be done using a Python async method; it means that PyAuroraX does more than just a single HTTP request against the AuroraX API when doing a search. With the API operating this way, it adds some more complexity within PyAuroraX but also opens the search up to some very important capabilities. The main capability enabled by this architecture is the ablity to perform queries for a large timeframe, and/or for a large number of data sources. Queries like this can easily take several minutes, and can return hundreds of megabytes - or eve gigabytes - of data. A conventional HTTP request would normally timeout because of this, ultimately failing to complete the search.

Instead of using the `pyaurorax.data_products.search` method which wraps all logic into an easy function, you can also perform a data product search step-by-step if you want more control over the process. Below, we do a search in this manner.

In [5]:
# set up the search parameters
start = datetime.datetime(2020, 2, 1, 0, 0, 0)
end = datetime.datetime(2020, 2, 5, 23, 59, 59)
programs = ["trex"]
instrument_types = ["RGB ASI"]

In [6]:
# create the Search object
s = pyaurorax.data_products.Search(start,
                                   end,
                                   programs=programs,
                                   instrument_types=instrument_types)
print(s)

DataProductsSearch(executed=False, completed=False, request_id='')


In [7]:
# execute the search
s.execute()
print(s)

DataProductsSearch(executed=True, completed=False, request_id='886b355b-bbed-4eb1-a4d3-4a063f45e290')


In [8]:
# get request status
s.update_status()
pprint.pprint(s.status)

{'logs': [{'level': 'debug',
           'summary': 'Search request arrived and saved',
           'timestamp': '2022-01-14T01:38:31.43241Z'},
          {'level': 'info',
           'summary': 'Starting query for 886b355b-bbed-4eb1-a4d3-4a063f45e290',
           'timestamp': '2022-01-14T01:38:31.441468Z'},
          {'level': 'info',
           'summary': 'Finished query in 0.11 seconds.',
           'timestamp': '2022-01-14T01:38:31.561696Z'},
          {'level': 'debug',
           'summary': 'Starting to write data to file',
           'timestamp': '2022-01-14T01:38:31.575273Z'},
          {'level': 'info',
           'summary': 'Finished writing data, found 1490 records in 0.23 '
                      'seconds.',
           'timestamp': '2022-01-14T01:38:31.812434Z'},
          {'level': 'debug',
           'summary': 'File size is 3743875 bytes.',
           'timestamp': '2022-01-14T01:38:31.823212Z'},
          {'level': 'info',
           'summary': 'Finished search in 0.39 secon

In [9]:
# view just the logs for the request (update the status beforehand, but we do that in the above cell)
pprint.pprint(s.logs)

[{'level': 'debug',
  'summary': 'Search request arrived and saved',
  'timestamp': '2022-01-14T01:38:31.43241Z'},
 {'level': 'info',
  'summary': 'Starting query for 886b355b-bbed-4eb1-a4d3-4a063f45e290',
  'timestamp': '2022-01-14T01:38:31.441468Z'},
 {'level': 'info',
  'summary': 'Finished query in 0.11 seconds.',
  'timestamp': '2022-01-14T01:38:31.561696Z'},
 {'level': 'debug',
  'summary': 'Starting to write data to file',
  'timestamp': '2022-01-14T01:38:31.575273Z'},
 {'level': 'info',
  'summary': 'Finished writing data, found 1490 records in 0.23 seconds.',
  'timestamp': '2022-01-14T01:38:31.812434Z'},
 {'level': 'debug',
  'summary': 'File size is 3743875 bytes.',
  'timestamp': '2022-01-14T01:38:31.823212Z'},
 {'level': 'info',
  'summary': 'Finished search in 0.39 seconds and found 1490 records.',
  'timestamp': '2022-01-14T01:38:31.838496Z'}]


In [10]:
# wait for the data
s.wait()
s.update_status()

In [11]:
# get data
s.get_data()
print(s.data[0])

# show data as pandas dataframe
data_products = [d.__dict__ for d in s.data]
df = pd.DataFrame(data_products)
df.sort_values("start")

DataProduct(data_source=DataSource(identifier=102, program='trex', platform='pinawa', instrument_type='RGB ASI', source_type='ground', display_name='TREx RGB PINA'), start=datetime.datetime(2020, 2, 1, 0, 0), end=datetime.datetime(2020, 2, 1, 23, 59), data_product_type='keogram', url='https://data.phys.uc...', metadata={'keogram_type': 'da...})


Unnamed: 0,data_source,data_product_type,start,end,url,metadata
0,"DataSource(identifier=102, program='trex', pla...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily', 'imaging_end_time': ..."
23,"DataSource(identifier=103, program='trex', pla...",movie,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'movie_type': 'real-time daily', 'imaging_end..."
24,"DataSource(identifier=103, program='trex', pla...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily', 'imaging_end_time': ..."
26,"DataSource(identifier=103, program='trex', pla...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_hires_200px', 'imaging..."
27,"DataSource(identifier=103, program='trex', pla...",keogram,2020-02-01 00:00:00,2020-02-01 23:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'daily_hires', 'imaging_end_t..."
...,...,...,...,...,...,...
1479,"DataSource(identifier=96, program='trex', plat...",keogram,2020-02-05 13:00:00,2020-02-05 13:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly_hires', 'imaging_end_..."
1488,"DataSource(identifier=96, program='trex', plat...",montage,2020-02-05 14:00:00,2020-02-05 14:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'montage_type': 'hourly', 'imaging_end_time':..."
1486,"DataSource(identifier=96, program='trex', plat...",keogram,2020-02-05 14:00:00,2020-02-05 14:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly_hires', 'imaging_end_..."
1487,"DataSource(identifier=96, program='trex', plat...",keogram,2020-02-05 14:00:00,2020-02-05 14:59:00,https://data.phys.ucalgary.ca/sort_by_project/...,"{'keogram_type': 'hourly_hires_200px', 'imagin..."
