{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Extracting runs from one DB file to another\n",
    "\n",
    "This notebook shows how to use the `extract_runs_into_db` function to extract runs from a database (DB) file (the source DB) into another DB file (the target DB). If the target DB does not exist, it will be created. The runs are **NOT** removed from the original DB file; they are copied over.\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup\n",
    "\n",
    "Let us set up a DB file with some runs in it."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "from qcodes.dataset.database_extract_runs import extract_runs_into_db\n",
    "from qcodes.dataset.experiment_container import Experiment, load_experiment_by_name\n",
    "from qcodes.tests.instrument_mocks import DummyInstrument\n",
    "from qcodes.dataset.measurements import Measurement\n",
    "from qcodes import Station\n",
    "\n",
    "# The following function is imported and used here only for the sake\n",
    "# of explicitness. As a qcodes user, please, consider this function\n",
    "# private to qcodes which means its name, behavior, and location may\n",
    "# change without notice between qcodes versions.\n",
    "from qcodes.dataset.sqlite.database import connect"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "source_path = os.path.join(os.getcwd(), \"extract_runs_notebook_source.db\")\n",
    "target_path = os.path.join(os.getcwd(), \"extract_runs_notebook_target.db\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Upgrading database; v0 -> v1: : 0it [00:00, ?it/s]\n",
      "Upgrading database; v1 -> v2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 407.89it/s]\n",
      "Upgrading database; v2 -> v3: : 0it [00:00, ?it/s]\n",
      "Upgrading database; v3 -> v4: : 0it [00:00, ?it/s]\n",
      "Upgrading database; v4 -> v5: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 333.17it/s]\n",
      "Upgrading database; v5 -> v6: : 0it [00:00, ?it/s]\n",
      "Upgrading database; v6 -> v7: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 166.47it/s]\n",
      "Upgrading database; v7 -> v8: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 250.05it/s]\n",
      "Upgrading database; v8 -> v9: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 499.56it/s]\n",
      "Upgrading database; v0 -> v1: : 0it [00:00, ?it/s]\n",
      "Upgrading database; v1 -> v2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 333.33it/s]\n",
      "Upgrading database; v2 -> v3: : 0it [00:00, ?it/s]\n",
      "Upgrading database; v3 -> v4: : 0it [00:00, ?it/s]\n",
      "Upgrading database; v4 -> v5: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 333.38it/s]\n",
      "Upgrading database; v5 -> v6: : 0it [00:00, ?it/s]\n",
      "Upgrading database; v6 -> v7: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 166.56it/s]\n",
      "Upgrading database; v7 -> v8: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 499.74it/s]\n",
      "Upgrading database; v8 -> v9: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 500.45it/s]\n"
     ]
    }
   ],
   "source": [
    "source_conn = connect(source_path)\n",
    "target_conn = connect(target_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "exp = Experiment(\n",
    "    name=\"extract_runs_experiment\", sample_name=\"no_sample\", conn=source_conn\n",
    ")\n",
    "\n",
    "my_inst = DummyInstrument(\"my_inst\", gates=[\"voltage\", \"current\"])\n",
    "station = Station(my_inst)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting experimental run with id: 1. \n",
      "Starting experimental run with id: 2. \n",
      "Starting experimental run with id: 3. \n",
      "Starting experimental run with id: 4. \n",
      "Starting experimental run with id: 5. \n",
      "Starting experimental run with id: 6. \n",
      "Starting experimental run with id: 7. \n",
      "Starting experimental run with id: 8. \n",
      "Starting experimental run with id: 9. \n",
      "Starting experimental run with id: 10. \n"
     ]
    }
   ],
   "source": [
    "meas = Measurement(exp=exp)\n",
    "meas.register_parameter(my_inst.voltage)\n",
    "meas.register_parameter(my_inst.current, setpoints=(my_inst.voltage,))\n",
    "\n",
    "# Add 10 runs with gradually more and more data\n",
    "\n",
    "for run_id in range(1, 11):\n",
    "    with meas.run() as datasaver:\n",
    "        for step, noise in enumerate(np.random.randn(run_id)):\n",
    "            datasaver.add_result((my_inst.voltage, step), (my_inst.current, noise))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Extraction\n",
    "\n",
    "Now let us extract runs 3 and 7 into our desired target DB file. All runs must come from the same experiment. To extract runs from different experiments, one may call the function several times.\n",
    "\n",
    "The function will look in the target DB to see if an experiment with matching attributes already exists. If not, such an experiment is created."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "extract_runs_into_db(source_path, target_path, 3, 7)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "target_exp = load_experiment_by_name(name=\"extract_runs_experiment\", conn=target_conn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "extract_runs_experiment#no_sample#1@C:\\Users\\jenielse\\source\\repos\\Qcodes\\docs\\examples\\DataSet\\extract_runs_notebook_target.db\n",
       "-------------------------------------------------------------------------------------------------------------------------------\n",
       "1-results-1-my_inst_voltage,my_inst_current-3\n",
       "2-results-2-my_inst_voltage,my_inst_current-7"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target_exp"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The last number printed in each line is the number of data points. As expected, we get 3 and 7.\n",
    "\n",
    "Note that the runs will have different `run_id`s in the new database. Their GUIDs are, however, the same (as they must be)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'aaaaaaaa-0000-0000-0000-017ea07e4a31'"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "exp.data_set(3).guid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'aaaaaaaa-0000-0000-0000-017ea07e4a31'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target_exp.data_set(1).guid"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Furthermore, note that the original `run_id` preserved as `captured_run_id`. We will demonstrate below how to look up data via the `captured_run_id`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target_exp.data_set(1).captured_run_id"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Merging data from 2 databases"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "There are occasions where it is convenient to combine data from several databases."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's first demonstrate this by creating some new experiments in another db file."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "extra_source_path = os.path.join(os.getcwd(), \"extract_runs_notebook_source_aux.db\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Upgrading database; v0 -> v1: : 0it [00:00, ?it/s]\n",
      "Upgrading database; v1 -> v2: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 500.04it/s]\n",
      "Upgrading database; v2 -> v3: : 0it [00:00, ?it/s]\n",
      "Upgrading database; v3 -> v4: : 0it [00:00, ?it/s]\n",
      "Upgrading database; v4 -> v5: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 333.89it/s]\n",
      "Upgrading database; v5 -> v6: : 0it [00:00, ?it/s]\n",
      "Upgrading database; v6 -> v7: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 199.91it/s]\n",
      "Upgrading database; v7 -> v8: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 333.44it/s]\n",
      "Upgrading database; v8 -> v9: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 333.36it/s]\n"
     ]
    }
   ],
   "source": [
    "source_extra_conn = connect(extra_source_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "exp = Experiment(\n",
    "    name=\"extract_runs_experiment_aux\", sample_name=\"no_sample\", conn=source_extra_conn\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Starting experimental run with id: 1. \n",
      "Starting experimental run with id: 2. \n",
      "Starting experimental run with id: 3. \n",
      "Starting experimental run with id: 4. \n",
      "Starting experimental run with id: 5. \n",
      "Starting experimental run with id: 6. \n",
      "Starting experimental run with id: 7. \n",
      "Starting experimental run with id: 8. \n",
      "Starting experimental run with id: 9. \n",
      "Starting experimental run with id: 10. \n"
     ]
    }
   ],
   "source": [
    "meas = Measurement(exp=exp)\n",
    "meas.register_parameter(my_inst.current)\n",
    "meas.register_parameter(my_inst.voltage, setpoints=(my_inst.current,))\n",
    "\n",
    "# Add 10 runs with gradually more and more data\n",
    "\n",
    "for run_id in range(1, 11):\n",
    "    with meas.run() as datasaver:\n",
    "        for step, noise in enumerate(np.random.randn(run_id)):\n",
    "            datasaver.add_result((my_inst.current, step), (my_inst.voltage, noise))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'aaaaaaaa-0000-0000-0000-017ea07e5986'"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "exp.data_set(3).guid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "extract_runs_into_db(extra_source_path, target_path, 1, 3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "target_exp_aux = load_experiment_by_name(\n",
    "    name=\"extract_runs_experiment_aux\", conn=target_conn\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The GUID should be preserved."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'aaaaaaaa-0000-0000-0000-017ea07e5986'"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target_exp_aux.data_set(2).guid"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "And the original `run_id` is preserved as `captured_run_id` "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "target_exp_aux.data_set(2).captured_run_id"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Uniquely identifying and loading runs\n",
    "\n",
    "As runs move from one database to the other, uniquely identifying a run becomes non-trivial."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note how we now have 2 runs in the same DB sharing the same `captured_run_id`. This means that `captured_run_id` is **not** a unique key. We can demonstrate that `captured_run_id` is not unique by looking up the `GUID`s that match this `captured_run_id`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "from qcodes.dataset.data_set import (\n",
    "    load_by_guid,\n",
    "    load_by_run_spec,\n",
    "    get_guids_by_run_spec,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['aaaaaaaa-0000-0000-0000-017ea07e4a31',\n",
       " 'aaaaaaaa-0000-0000-0000-017ea07e5986']"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "guids = get_guids_by_run_spec(conn=target_conn, captured_run_id=3)\n",
    "guids"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "results #1@C:\\Users\\jenielse\\source\\repos\\Qcodes\\docs\\examples\\DataSet\\extract_runs_notebook_target.db\n",
       "------------------------------------------------------------------------------------------------------\n",
       "my_inst_voltage - numeric\n",
       "my_inst_current - numeric"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "load_by_guid(guids[0], conn=target_conn)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "results #4@C:\\Users\\jenielse\\source\\repos\\Qcodes\\docs\\examples\\DataSet\\extract_runs_notebook_target.db\n",
       "------------------------------------------------------------------------------------------------------\n",
       "my_inst_current - numeric\n",
       "my_inst_voltage - numeric"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "load_by_guid(guids[1], conn=target_conn)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To enable loading of runs that may share the same `captured_run_id`, the function `load_by_run_data` is supplied.\n",
    "This function takes one or more optional sets of metadata. If more than one run matching this information is found the metadata of the matching runs is printed and an error is raised. It is now possible to suply more information to the function to uniquely identify a specific run."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  captured_run_id    captured_counter  experiment_name              sample_name      sample_id    location    work_station\n",
      "-----------------  ------------------  ---------------------------  -------------  -----------  ----------  --------------\n",
      "                3                   3  extract_runs_experiment      no_sample       2863311530           0               0\n",
      "                3                   3  extract_runs_experiment_aux  no_sample       2863311530           0               0\n",
      "Caught a NameError\n"
     ]
    }
   ],
   "source": [
    "try:\n",
    "    load_by_run_spec(captured_run_id=3, conn=target_conn)\n",
    "except NameError:\n",
    "    print(\"Caught a NameError\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To single out one of these two runs, we can thus specify the `experiment_name`:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "results #4@C:\\Users\\jenielse\\source\\repos\\Qcodes\\docs\\examples\\DataSet\\extract_runs_notebook_target.db\n",
       "------------------------------------------------------------------------------------------------------\n",
       "my_inst_current - numeric\n",
       "my_inst_voltage - numeric"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "load_by_run_spec(\n",
    "    captured_run_id=3, experiment_name=\"extract_runs_experiment_aux\", conn=target_conn\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.0"
  },
  "widgets": {
   "application/vnd.jupyter.widget-state+json": {
    "state": {},
    "version_major": 2,
    "version_minor": 0
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}