{ "cells": [ { "cell_type": "markdown", "id": "9tcjtJ8cvaPv", "metadata": { "id": "9tcjtJ8cvaPv" }, "source": [ "# Simple query to OcéanIA Platform TARA_A100000171\n", "## This example shows how to extract arbitrary biological sub-sequences from a FASTA file available in the OcéanIA Platform" ] }, { "cell_type": "markdown", "id": "5ea0f4b3", "metadata": {}, "source": [ "### 1. Prepare request params" ] }, { "cell_type": "code", "execution_count": 2, "id": "4132badf", "metadata": {}, "outputs": [], "source": [ "TARA_SAMPLE_ID = \"TARA_A100000171\"\n", "\n", "# REQUEST_PARAMS is a list of tuples that identify subsequences to extract\n", "# each tuple must have the values (sequence_id, start_index, stop_index, sequence_type)\n", "# sequence type accepted values are [raw, complement, reverse_complement], optional value if ommited defaults to \"raw\".\n", "REQUEST_PARAMS = [\n", " (\"TARA_A100000171_G_scaffold48_1\", 10, 50, \"complement\"),\n", " (\"TARA_A100000171_G_scaffold48_1\", 10, 50),\n", " (\"TARA_A100000171_G_scaffold48_1\", 10, 50, \"reverse_complement\"),\n", " (\"TARA_A100000171_G_scaffold181_1\", 0, 50),\n", " (\"TARA_A100000171_G_scaffold181_1\", 100, 200),\n", " (\"TARA_A100000171_G_scaffold181_1\", 200, 230),\n", " (\"TARA_A100000171_G_scaffold493_2\", 54, 76),\n", " (\"TARA_A100000171_G_scaffold50396_2\", 87, 105),\n", " (\"TARA_A100000171_G_C2001995_1\", 20, 635),\n", " (\"TARA_A100000171_G_C2026460_1\", 0, 100),\n", " ]" ] }, { "cell_type": "markdown", "id": "0159d5e7", "metadata": {}, "source": [ "### 2. Perform call to the OcéanIA services" ] }, { "cell_type": "code", "execution_count": 3, "id": "6344f9c6", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[30-06-2021 07:58:00] Sending request for fasta sequences\n", "[30-06-2021 07:58:01] Request accepted\n", "[30-06-2021 07:58:01] Waiting for results...\n", "[30-06-2021 07:59:38] Done. Elapsed time: 98.09430822399736 seconds\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Result loaded into a DataFrame\n", " id start end type \\\n", "0 TARA_A100000171_G_scaffold48_1 10 50 complement \n", "1 TARA_A100000171_G_scaffold48_1 10 50 raw \n", "2 TARA_A100000171_G_scaffold48_1 10 50 reverse_complement \n", "3 TARA_A100000171_G_scaffold181_1 0 50 raw \n", "4 TARA_A100000171_G_scaffold181_1 100 200 raw \n", "5 TARA_A100000171_G_scaffold181_1 200 230 raw \n", "6 TARA_A100000171_G_scaffold493_2 54 76 raw \n", "7 TARA_A100000171_G_scaffold50396_2 87 105 raw \n", "8 TARA_A100000171_G_C2001995_1 20 635 raw \n", "9 TARA_A100000171_G_C2026460_1 0 100 raw \n", "\n", " sequence \n", "0 ACCGTAACGTAGGCCATATTATTTTCATGGTCTTCCACAA \n", "1 TGGCATTGCATCCGGTATAATAAAAGTACCAGAAGGTGTT \n", "2 AACACCTTCTGGTACTTTTATTATACCGGATGCAATGCCA \n", "3 CCAAGACCAAGCAATTTTAACACCACACTTAGATACTGCGCAAACA... \n", "4 ATTATGTTACCAGCACTTGATAACCAAAAAGTTTGGGcaggattaa... \n", "5 ATCAAACTGATGCTACTAACTCAGAAGCAT \n", "6 TAAGTTTTTATTATTATATTTT \n", "7 AGCTGTTCGGAAAACTAG \n", "8 ACAGCACACCAAGCAGGTCGTCGACCGAAACGATATTGAGAAGAAT... \n", "9 AATTTGAAACAACCCTAAAGTGTTTACCATAATAGGTTCTTAAATC... \n" ] } ], "source": [ "#@title Double click to see the cell of the Python program\n", "\n", "from oceania import get_sequences_from_fasta\n", "\n", "request_result = get_sequences_from_fasta(\n", " TARA_SAMPLE_ID,\n", " REQUEST_PARAMS\n", ")\n", "\n", "# get_sequences_from_fasta returns a pandas.DataFrame with the extracted sequences\n", "print(request_result)" ] } ], "metadata": { "colab": { "name": "Copia de query_dependecy.ipynb", "provenance": [ { "file_id": "https://github.com/Inria-Chile/oceania-lib-demo/blob/main/notebooks/query_dependecy.ipynb", "timestamp": 1623342428289 } ] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" } }, "nbformat": 4, "nbformat_minor": 5 }