{ "cells": [ { "cell_type": "markdown", "id": "be9afdfe", "metadata": {}, "source": [ "# Imports" ] }, { "cell_type": "code", "execution_count": 1, "id": "6c54fcad", "metadata": {}, "outputs": [], "source": [ "import pyaurorax\n", "import datetime\n", "import pprint\n", "import pandas as pd" ] }, { "cell_type": "markdown", "id": "4c75f795", "metadata": {}, "source": [ "# Search for data product records" ] }, { "cell_type": "code", "execution_count": 2, "id": "0b9bb0cb", "metadata": {}, "outputs": [], "source": [ "# set values\n", "start = datetime.datetime(2020, 2, 1, 0, 0, 0)\n", "end = datetime.datetime(2020, 2, 5, 23, 59, 59)\n", "programs = [\"trex\"]\n", "instrument_types = [\"RGB ASI\"]" ] }, { "cell_type": "code", "execution_count": 3, "id": "05a31412", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[2022-01-14 01:38:25.285869] Search object created\n", "[2022-01-14 01:38:25.357410] Request submitted\n", "[2022-01-14 01:38:25.357532] Request ID: 091c6ed8-272e-466c-b002-f91781e84fac\n", "[2022-01-14 01:38:25.357544] Request details available at: https://api.aurorax.space/api/v1/data_products/requests/091c6ed8-272e-466c-b002-f91781e84fac\n", "[2022-01-14 01:38:25.357551] Waiting for data ...\n", "[2022-01-14 01:38:26.424644] Checking for data ...\n", "[2022-01-14 01:38:26.521165] Data is now available\n", "[2022-01-14 01:38:26.535137] Retrieving data ...\n", "[2022-01-14 01:38:27.097308] Retrieved 3.7 MB of data containing 1490 records\n" ] } ], "source": [ "# perform search\n", "s = pyaurorax.data_products.search(start,\n", " end,\n", " programs=programs,\n", " verbose=True)" ] }, { "cell_type": "code", "execution_count": 4, "id": "9356aa7f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data_sourcedata_product_typestartendurlmetadata
0DataSource(identifier=103, program='trex', pla...keogram2020-02-01 00:00:002020-02-01 00:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'hourly_hires', 'imaging_end_...
23DataSource(identifier=102, program='trex', pla...keogram2020-02-01 00:00:002020-02-01 23:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'daily_hires_200px', 'imaging...
24DataSource(identifier=102, program='trex', pla...keogram2020-02-01 00:00:002020-02-01 23:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'daily_hires', 'imaging_end_t...
26DataSource(identifier=102, program='trex', pla...keogram2020-02-01 00:00:002020-02-01 23:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'daily_moviederived', 'imagin...
27DataSource(identifier=104, program='trex', pla...keogram2020-02-01 00:00:002020-02-01 23:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'daily', 'imaging_end_time': ...
.....................
1479DataSource(identifier=101, program='trex', pla...montage2020-02-05 13:00:002020-02-05 13:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'montage_type': 'hourly', 'imaging_end_time':...
1488DataSource(identifier=96, program='trex', plat...montage2020-02-05 14:00:002020-02-05 14:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'montage_type': 'hourly', 'imaging_end_time':...
1486DataSource(identifier=96, program='trex', plat...keogram2020-02-05 14:00:002020-02-05 14:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'hourly_hires', 'imaging_end_...
1487DataSource(identifier=96, program='trex', plat...keogram2020-02-05 14:00:002020-02-05 14:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'hourly_hires_200px', 'imagin...
1489DataSource(identifier=96, program='trex', plat...keogram2020-02-05 14:00:002020-02-05 14:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'hourly', 'imaging_end_time':...
\n", "

1490 rows × 6 columns

\n", "
" ], "text/plain": [ " data_source data_product_type \\\n", "0 DataSource(identifier=103, program='trex', pla... keogram \n", "23 DataSource(identifier=102, program='trex', pla... keogram \n", "24 DataSource(identifier=102, program='trex', pla... keogram \n", "26 DataSource(identifier=102, program='trex', pla... keogram \n", "27 DataSource(identifier=104, program='trex', pla... keogram \n", "... ... ... \n", "1479 DataSource(identifier=101, program='trex', pla... montage \n", "1488 DataSource(identifier=96, program='trex', plat... montage \n", "1486 DataSource(identifier=96, program='trex', plat... keogram \n", "1487 DataSource(identifier=96, program='trex', plat... keogram \n", "1489 DataSource(identifier=96, program='trex', plat... keogram \n", "\n", " start end \\\n", "0 2020-02-01 00:00:00 2020-02-01 00:59:00 \n", "23 2020-02-01 00:00:00 2020-02-01 23:59:00 \n", "24 2020-02-01 00:00:00 2020-02-01 23:59:00 \n", "26 2020-02-01 00:00:00 2020-02-01 23:59:00 \n", "27 2020-02-01 00:00:00 2020-02-01 23:59:00 \n", "... ... ... \n", "1479 2020-02-05 13:00:00 2020-02-05 13:59:00 \n", "1488 2020-02-05 14:00:00 2020-02-05 14:59:00 \n", "1486 2020-02-05 14:00:00 2020-02-05 14:59:00 \n", "1487 2020-02-05 14:00:00 2020-02-05 14:59:00 \n", "1489 2020-02-05 14:00:00 2020-02-05 14:59:00 \n", "\n", " url \\\n", "0 https://data.phys.ucalgary.ca/sort_by_project/... \n", "23 https://data.phys.ucalgary.ca/sort_by_project/... \n", "24 https://data.phys.ucalgary.ca/sort_by_project/... \n", "26 https://data.phys.ucalgary.ca/sort_by_project/... \n", "27 https://data.phys.ucalgary.ca/sort_by_project/... \n", "... ... \n", "1479 https://data.phys.ucalgary.ca/sort_by_project/... \n", "1488 https://data.phys.ucalgary.ca/sort_by_project/... \n", "1486 https://data.phys.ucalgary.ca/sort_by_project/... \n", "1487 https://data.phys.ucalgary.ca/sort_by_project/... \n", "1489 https://data.phys.ucalgary.ca/sort_by_project/... \n", "\n", " metadata \n", "0 {'keogram_type': 'hourly_hires', 'imaging_end_... \n", "23 {'keogram_type': 'daily_hires_200px', 'imaging... \n", "24 {'keogram_type': 'daily_hires', 'imaging_end_t... \n", "26 {'keogram_type': 'daily_moviederived', 'imagin... \n", "27 {'keogram_type': 'daily', 'imaging_end_time': ... \n", "... ... \n", "1479 {'montage_type': 'hourly', 'imaging_end_time':... \n", "1488 {'montage_type': 'hourly', 'imaging_end_time':... \n", "1486 {'keogram_type': 'hourly_hires', 'imaging_end_... \n", "1487 {'keogram_type': 'hourly_hires_200px', 'imagin... \n", "1489 {'keogram_type': 'hourly', 'imaging_end_time':... \n", "\n", "[1490 rows x 6 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# output data as a pandas dataframe\n", "data_products = [d.__dict__ for d in s.data]\n", "df = pd.DataFrame(data_products)\n", "df.sort_values(\"start\")" ] }, { "cell_type": "markdown", "id": "5dcdf0ef", "metadata": {}, "source": [ "# Do the search step-by-step\n", "\n", "Under the hood, the AuroraX API performs a data product search asynchronously. Note that this does not mean that it can be done using a Python async method; it means that PyAuroraX does more than just a single HTTP request against the AuroraX API when doing a search. With the API operating this way, it adds some more complexity within PyAuroraX but also opens the search up to some very important capabilities. The main capability enabled by this architecture is the ablity to perform queries for a large timeframe, and/or for a large number of data sources. Queries like this can easily take several minutes, and can return hundreds of megabytes - or eve gigabytes - of data. A conventional HTTP request would normally timeout because of this, ultimately failing to complete the search.\n", "\n", "Instead of using the `pyaurorax.data_products.search` method which wraps all logic into an easy function, you can also perform a data product search step-by-step if you want more control over the process. Below, we do a search in this manner." ] }, { "cell_type": "code", "execution_count": 5, "id": "3a54eeac", "metadata": {}, "outputs": [], "source": [ "# set up the search parameters\n", "start = datetime.datetime(2020, 2, 1, 0, 0, 0)\n", "end = datetime.datetime(2020, 2, 5, 23, 59, 59)\n", "programs = [\"trex\"]\n", "instrument_types = [\"RGB ASI\"]" ] }, { "cell_type": "code", "execution_count": 6, "id": "5adfb247", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DataProductsSearch(executed=False, completed=False, request_id='')\n" ] } ], "source": [ "# create the Search object\n", "s = pyaurorax.data_products.Search(start,\n", " end,\n", " programs=programs,\n", " instrument_types=instrument_types)\n", "print(s)" ] }, { "cell_type": "code", "execution_count": 7, "id": "9de3d908", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DataProductsSearch(executed=True, completed=False, request_id='886b355b-bbed-4eb1-a4d3-4a063f45e290')\n" ] } ], "source": [ "# execute the search\n", "s.execute()\n", "print(s)" ] }, { "cell_type": "code", "execution_count": 8, "id": "5db14c36", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'logs': [{'level': 'debug',\n", " 'summary': 'Search request arrived and saved',\n", " 'timestamp': '2022-01-14T01:38:31.43241Z'},\n", " {'level': 'info',\n", " 'summary': 'Starting query for 886b355b-bbed-4eb1-a4d3-4a063f45e290',\n", " 'timestamp': '2022-01-14T01:38:31.441468Z'},\n", " {'level': 'info',\n", " 'summary': 'Finished query in 0.11 seconds.',\n", " 'timestamp': '2022-01-14T01:38:31.561696Z'},\n", " {'level': 'debug',\n", " 'summary': 'Starting to write data to file',\n", " 'timestamp': '2022-01-14T01:38:31.575273Z'},\n", " {'level': 'info',\n", " 'summary': 'Finished writing data, found 1490 records in 0.23 '\n", " 'seconds.',\n", " 'timestamp': '2022-01-14T01:38:31.812434Z'},\n", " {'level': 'debug',\n", " 'summary': 'File size is 3743875 bytes.',\n", " 'timestamp': '2022-01-14T01:38:31.823212Z'},\n", " {'level': 'info',\n", " 'summary': 'Finished search in 0.39 seconds and found 1490 records.',\n", " 'timestamp': '2022-01-14T01:38:31.838496Z'}],\n", " 'search_request': {'query': {'data_product_type_filters': [],\n", " 'data_sources': {'data_product_metadata_filters': {'expressions': [],\n", " 'logical_operator': 'AND'},\n", " 'instrument_types': ['RGB ASI'],\n", " 'platforms': [],\n", " 'programs': ['trex']},\n", " 'end': '2020-02-05T23:59:59',\n", " 'request_id': '886b355b-bbed-4eb1-a4d3-4a063f45e290',\n", " 'start': '2020-02-01T00:00:00'},\n", " 'request_id': '886b355b-bbed-4eb1-a4d3-4a063f45e290',\n", " 'request_type': 'data_product',\n", " 'requested': '2022-01-14T01:38:31.419171Z'},\n", " 'search_result': {'completed_timestamp': '2022-01-14T01:38:31.830882Z',\n", " 'data_uri': '/api/v1/data_products/requests/886b355b-bbed-4eb1-a4d3-4a063f45e290/data',\n", " 'error_condition': False,\n", " 'file_size': 3743875,\n", " 'query_duration': 389,\n", " 'result_count': 1490,\n", " 'result_file_deleted_timestamp': None}}\n" ] } ], "source": [ "# get request status\n", "s.update_status()\n", "pprint.pprint(s.status)" ] }, { "cell_type": "code", "execution_count": 9, "id": "df10d14b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[{'level': 'debug',\n", " 'summary': 'Search request arrived and saved',\n", " 'timestamp': '2022-01-14T01:38:31.43241Z'},\n", " {'level': 'info',\n", " 'summary': 'Starting query for 886b355b-bbed-4eb1-a4d3-4a063f45e290',\n", " 'timestamp': '2022-01-14T01:38:31.441468Z'},\n", " {'level': 'info',\n", " 'summary': 'Finished query in 0.11 seconds.',\n", " 'timestamp': '2022-01-14T01:38:31.561696Z'},\n", " {'level': 'debug',\n", " 'summary': 'Starting to write data to file',\n", " 'timestamp': '2022-01-14T01:38:31.575273Z'},\n", " {'level': 'info',\n", " 'summary': 'Finished writing data, found 1490 records in 0.23 seconds.',\n", " 'timestamp': '2022-01-14T01:38:31.812434Z'},\n", " {'level': 'debug',\n", " 'summary': 'File size is 3743875 bytes.',\n", " 'timestamp': '2022-01-14T01:38:31.823212Z'},\n", " {'level': 'info',\n", " 'summary': 'Finished search in 0.39 seconds and found 1490 records.',\n", " 'timestamp': '2022-01-14T01:38:31.838496Z'}]\n" ] } ], "source": [ "# view just the logs for the request (update the status beforehand, but we do that in the above cell)\n", "pprint.pprint(s.logs)" ] }, { "cell_type": "code", "execution_count": 10, "id": "0ba4da1f", "metadata": {}, "outputs": [], "source": [ "# wait for the data\n", "s.wait()\n", "s.update_status()" ] }, { "cell_type": "code", "execution_count": 11, "id": "3ecc4eb9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "DataProduct(data_source=DataSource(identifier=102, program='trex', platform='pinawa', instrument_type='RGB ASI', source_type='ground', display_name='TREx RGB PINA'), start=datetime.datetime(2020, 2, 1, 0, 0), end=datetime.datetime(2020, 2, 1, 23, 59), data_product_type='keogram', url='https://data.phys.uc...', metadata={'keogram_type': 'da...})\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
data_sourcedata_product_typestartendurlmetadata
0DataSource(identifier=102, program='trex', pla...keogram2020-02-01 00:00:002020-02-01 23:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'daily', 'imaging_end_time': ...
23DataSource(identifier=103, program='trex', pla...movie2020-02-01 00:00:002020-02-01 23:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'movie_type': 'real-time daily', 'imaging_end...
24DataSource(identifier=103, program='trex', pla...keogram2020-02-01 00:00:002020-02-01 23:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'daily', 'imaging_end_time': ...
26DataSource(identifier=103, program='trex', pla...keogram2020-02-01 00:00:002020-02-01 23:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'daily_hires_200px', 'imaging...
27DataSource(identifier=103, program='trex', pla...keogram2020-02-01 00:00:002020-02-01 23:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'daily_hires', 'imaging_end_t...
.....................
1479DataSource(identifier=96, program='trex', plat...keogram2020-02-05 13:00:002020-02-05 13:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'hourly_hires', 'imaging_end_...
1488DataSource(identifier=96, program='trex', plat...montage2020-02-05 14:00:002020-02-05 14:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'montage_type': 'hourly', 'imaging_end_time':...
1486DataSource(identifier=96, program='trex', plat...keogram2020-02-05 14:00:002020-02-05 14:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'hourly_hires', 'imaging_end_...
1487DataSource(identifier=96, program='trex', plat...keogram2020-02-05 14:00:002020-02-05 14:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'hourly_hires_200px', 'imagin...
1489DataSource(identifier=96, program='trex', plat...keogram2020-02-05 14:00:002020-02-05 14:59:00https://data.phys.ucalgary.ca/sort_by_project/...{'keogram_type': 'hourly', 'imaging_end_time':...
\n", "

1490 rows × 6 columns

\n", "
" ], "text/plain": [ " data_source data_product_type \\\n", "0 DataSource(identifier=102, program='trex', pla... keogram \n", "23 DataSource(identifier=103, program='trex', pla... movie \n", "24 DataSource(identifier=103, program='trex', pla... keogram \n", "26 DataSource(identifier=103, program='trex', pla... keogram \n", "27 DataSource(identifier=103, program='trex', pla... keogram \n", "... ... ... \n", "1479 DataSource(identifier=96, program='trex', plat... keogram \n", "1488 DataSource(identifier=96, program='trex', plat... montage \n", "1486 DataSource(identifier=96, program='trex', plat... keogram \n", "1487 DataSource(identifier=96, program='trex', plat... keogram \n", "1489 DataSource(identifier=96, program='trex', plat... keogram \n", "\n", " start end \\\n", "0 2020-02-01 00:00:00 2020-02-01 23:59:00 \n", "23 2020-02-01 00:00:00 2020-02-01 23:59:00 \n", "24 2020-02-01 00:00:00 2020-02-01 23:59:00 \n", "26 2020-02-01 00:00:00 2020-02-01 23:59:00 \n", "27 2020-02-01 00:00:00 2020-02-01 23:59:00 \n", "... ... ... \n", "1479 2020-02-05 13:00:00 2020-02-05 13:59:00 \n", "1488 2020-02-05 14:00:00 2020-02-05 14:59:00 \n", "1486 2020-02-05 14:00:00 2020-02-05 14:59:00 \n", "1487 2020-02-05 14:00:00 2020-02-05 14:59:00 \n", "1489 2020-02-05 14:00:00 2020-02-05 14:59:00 \n", "\n", " url \\\n", "0 https://data.phys.ucalgary.ca/sort_by_project/... \n", "23 https://data.phys.ucalgary.ca/sort_by_project/... \n", "24 https://data.phys.ucalgary.ca/sort_by_project/... \n", "26 https://data.phys.ucalgary.ca/sort_by_project/... \n", "27 https://data.phys.ucalgary.ca/sort_by_project/... \n", "... ... \n", "1479 https://data.phys.ucalgary.ca/sort_by_project/... \n", "1488 https://data.phys.ucalgary.ca/sort_by_project/... \n", "1486 https://data.phys.ucalgary.ca/sort_by_project/... \n", "1487 https://data.phys.ucalgary.ca/sort_by_project/... \n", "1489 https://data.phys.ucalgary.ca/sort_by_project/... \n", "\n", " metadata \n", "0 {'keogram_type': 'daily', 'imaging_end_time': ... \n", "23 {'movie_type': 'real-time daily', 'imaging_end... \n", "24 {'keogram_type': 'daily', 'imaging_end_time': ... \n", "26 {'keogram_type': 'daily_hires_200px', 'imaging... \n", "27 {'keogram_type': 'daily_hires', 'imaging_end_t... \n", "... ... \n", "1479 {'keogram_type': 'hourly_hires', 'imaging_end_... \n", "1488 {'montage_type': 'hourly', 'imaging_end_time':... \n", "1486 {'keogram_type': 'hourly_hires', 'imaging_end_... \n", "1487 {'keogram_type': 'hourly_hires_200px', 'imagin... \n", "1489 {'keogram_type': 'hourly', 'imaging_end_time':... \n", "\n", "[1490 rows x 6 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# get data\n", "s.get_data()\n", "print(s.data[0])\n", "\n", "# show data as pandas dataframe\n", "data_products = [d.__dict__ for d in s.data]\n", "df = pd.DataFrame(data_products)\n", "df.sort_values(\"start\")" ] }, { "cell_type": "code", "execution_count": null, "id": "4917bbb2", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" } }, "nbformat": 4, "nbformat_minor": 5 }