{ "cells": [ { "cell_type": "markdown", "id": "9tcjtJ8cvaPv", "metadata": { "id": "9tcjtJ8cvaPv" }, "source": [ "# Query to get intergenic regions of a TARA sample in the OcéanIA Platform" ] }, { "cell_type": "markdown", "id": "yFdcX6L9vmkE", "metadata": { "id": "yFdcX6L9vmkE" }, "source": [ "### 1. Install oceania-query-fasta package" ] }, { "cell_type": "code", "execution_count": null, "id": "55fdb172", "metadata": { "id": "55fdb172" }, "outputs": [], "source": [ "%pip install oceania-query-fasta\n", "%pip install openpyxl # required to load supplementary info into pandas" ] }, { "cell_type": "markdown", "id": "5bdf46f4-fbba-4df4-9450-d4e4ab1dc415", "metadata": {}, "source": [ "### 2. Load supplementary information for samples and data of Ocean Microbial Reference Gene Catalog v2" ] }, { "cell_type": "code", "execution_count": null, "id": "42df4bb5-ac1c-4137-bfae-0e40596f9cc4", "metadata": {}, "outputs": [], "source": [ "import pandas\n", "\n", "SUPP_INFO_LINK = \"https://zenodo.org/record/3539258/files/Salazar_et_al_2019_Suppl_Info.xlsx?download=1\"\n", "\n", "table_W1 = pandas.read_excel(SUPP_INFO_LINK, sheet_name=\"Table_W1\")\n", "\n", "# Select first sample_id at the surface water layer\n", "selected_sample = table_W1[table_W1.Layer==\"SRF\"].head(1)\n", "print(selected_sample)\n", "\n", "sample_id = selected_sample[\"PANGAEA sample id\"].item()" ] }, { "cell_type": "markdown", "id": "25c31393-cfed-419b-b02e-b0b26ae01971", "metadata": {}, "source": [ "### 3. Get metadata for the first 10 intergenic regions of size greater than 100 for the selected sample" ] }, { "cell_type": "code", "execution_count": null, "id": "f9effea1-ea3b-4cc5-a289-db95795c6eda", "metadata": {}, "outputs": [], "source": [ "from oceania import list_intergenic_regions\n", "\n", "intergenic_regions_metadata = list_intergenic_regions(sample_id, min_length=100, page=1, page_size=10)\n", "\n", "print(intergenic_regions_metadata)" ] }, { "cell_type": "markdown", "id": "5ea0f4b3", "metadata": {}, "source": [ "### 4. Prepare request params" ] }, { "cell_type": "code", "execution_count": null, "id": "4132badf", "metadata": {}, "outputs": [], "source": [ "request_regions = intergenic_regions_metadata[['sequence', 'start', 'stop']].copy()\n", "request_params = []\n", "for row in request_regions.itertuples():\n", " request_params.append(\n", " (row[1], int(row[2]), int(row[3]))\n", " )\n", "\n", "print(request_params)" ] }, { "cell_type": "markdown", "id": "0159d5e7", "metadata": {}, "source": [ "### 5. Perform call to the OcéanIA services" ] }, { "cell_type": "code", "execution_count": null, "id": "6344f9c6", "metadata": {}, "outputs": [], "source": [ "from oceania import get_sequences_from_fasta\n", "\n", "request_result = get_sequences_from_fasta(\n", " sample_id,\n", " request_params\n", ")\n", "\n", "# request_result is loaded as a pandas.DataFrame\n", "print(request_result)" ] } ], "metadata": { "colab": { "name": "Copia de query_dependecy.ipynb", "provenance": [ { "file_id": "https://github.com/Inria-Chile/oceania-lib-demo/blob/main/notebooks/query_dependecy.ipynb", "timestamp": 1623342428289 } ] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" } }, "nbformat": 4, "nbformat_minor": 5 }