{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Create a simple ISA descriptor\n", "\n", "This example creates minimal metadata for a single study ISA descriptor with no assay declared. \n", "\n", "It shows how to serialize (write) the ISA Model content to ISA-Tab and ISA-JSON formats." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# If executing the notebooks on `Google Colab`,uncomment the following command \n", "# and run it to install the required python libraries. Also, make the test datasets available.\n", "\n", "# !pip install -r requirements.txt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from isatools.model import (\n", " Comment,\n", " Investigation,\n", " Study,\n", " StudyFactor,\n", " FactorValue,\n", " OntologyAnnotation,\n", " Material,\n", " Sample,\n", " Source,\n", " Protocol,\n", " ProtocolParameter,\n", " ProtocolComponent,\n", " ParameterValue,\n", " Process,\n", " Publication,\n", " Person,\n", " Assay,\n", " DataFile,\n", " plink\n", ")\n", "import datetime" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Study metadata" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "investigation = Investigation()\n", "i_comment = Comment(name=\"i_comment\", value=\"i_value\")\n", "investigation.comments.append(i_comment)\n", "study = Study(filename=\"s_study.txt\")\n", "st_comment = Comment(name=\"st_comment\", value=\"st_value\")\n", "study.comments.append(st_comment)\n", "study.identifier = \"S1\"\n", "study.title = \"My Simple ISA Study\"\n", "study.description = \"We could alternatively use the class constructor's parameters to set some default \" \\\n", " \"values at the time of creation, however we want to demonstrate how to use the \" \\\n", " \"object's instance variables to set values.\"\n", "study.submission_date = str(datetime.datetime.today())\n", "study.public_release_date = str(datetime.datetime.today())\n", "study.sources = [Source(name=\"source1\"), Source(name=\"source2\")]\n", "src_comment = Comment(name=\"src_comment\", value=\"src_value\")\n", "study.sources[0].comments.append(src_comment)\n", "smp_comment = Comment(name=\"smp_comment\", value=\"smp_value\")\n", "study.samples = [Sample(name=\"sample1\")]\n", "study.samples[0].comments.append(smp_comment)\n", "study.samples[0].derives_from.append(study.sources[0])\n", "study.samples[0].derives_from.append(study.sources[1])\n", "\n", "\n", "study.protocols = [\n", " Protocol(name=\"sample collection\",\n", " components=[\n", " ProtocolComponent(name=\"magnetic agitator\",\n", " component_type=OntologyAnnotation(term=\"device\"))]),\n", " Protocol(\n", " name=\"data analysis with Galaxy\",\n", " uri=\"https://doi.org/10.5464/workflow.cwl\",\n", " protocol_type=OntologyAnnotation(term=\"data transformation\"),\n", " parameters=[\n", " ProtocolParameter(parameter_name=OntologyAnnotation(term=\"genome assembly\")),\n", " ProtocolParameter(parameter_name=OntologyAnnotation(term=\"cut-off value\"))\n", " ],\n", " ),\n", " Protocol(\n", " name=\"data visualization with Intermine\",\n", " uri=\"https://intermine.org/10.5464/network.svg\",\n", " protocol_type=OntologyAnnotation(term=\"data visualization\"),\n", " comments=[Comment(name=\"pro_comment\", value=\"pro_value\")]\n", " )\n", "]\n", "\n", "study.factors = [\n", " StudyFactor(name=\"Factor\",factor_type=OntologyAnnotation(term=\"factor category\"))\n", "]\n", "\n", "\n", "study.publications = [\n", " Publication(doi=\"10.12314\",pubmed_id=\"1222322\", title=\"publication title\")\n", " \n", "]\n", "\n", "study.contacts = [\n", " Person(first_name=\"bob\", last_name=\"hughes\", affiliation=\"WHO\", email=\"bob.hughes@who.else\",\n", " comments=[Comment(name=\"per_comment\", value=\"per_value\")]\n", " )\n", "]\n", "\n", "study.process_sequence = [\n", " Process(\n", " executes_protocol=study.protocols[-1],\n", " inputs=[study.sources[-1]],\n", " outputs=[study.samples[-1]]\n", " )\n", "]\n", "investigation.studies = [study]\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Next, we build n Assay object and attach two protocols, extraction and sequencing.\n", "\n", "assay = Assay(filename=\"a_assay.txt\")\n", "\n", "extraction_protocol = Protocol(name='extraction', protocol_type=OntologyAnnotation(term=\"material extraction\"))\n", "study.protocols.append(extraction_protocol)\n", "\n", "labeling_protocol = Protocol(name='labeling', protocol_type=OntologyAnnotation(term=\"labeling\"))\n", "study.protocols.append(extraction_protocol)\n", "\n", "sequencing_protocol = Protocol(name='sequencing', protocol_type=OntologyAnnotation(term=\"material sequencing\"))\n", "study.protocols.append(sequencing_protocol)\n", "\n", "# To build out assay graphs, we enumereate the samples from the study-level, and for each sample we create an\n", "# extraction process and a sequencing process. The extraction process takes as input a sample material, and produces\n", "# an extract material. The sequencing process takes the extract material and produces a data file. This will\n", "# produce three graphs, from sample material through to data, as follows:\n", "#\n", "# (sample_material-0)->(extraction)->(extract-0)->(sequencing)->(sequenced-data-0)\n", "# (sample_material-1)->(extraction)->(extract-1)->(sequencing)->(sequenced-data-1)\n", "# (sample_material-2)->(extraction)->(extract-2)->(sequencing)->(sequenced-data-2)\n", "#\n", "# Note that the extraction processes and sequencing processes are distinctly separate instances, where the three\n", "# graphs are NOT interconnected.\n", "\n", "for i, sample in enumerate(study.samples):\n", "\n", " # create an extraction process that executes the extraction protocol\n", "\n", " extraction_process = Process(executes_protocol=extraction_protocol)\n", "\n", " # extraction process takes as input a sample, and produces an extract material as output\n", "\n", " extraction_process.inputs.append(sample)\n", " material = Material(name=\"extract-{}\".format(i))\n", " mat_comment = Comment(name=\"mat_comment\", value=\"mat_value\")\n", " material.comments.append(mat_comment)\n", " material.type = \"Extract Name\"\n", " extraction_process.outputs.append(material)\n", "\n", " \n", " #labeling process\n", " labeling_process = Process(executes_protocol=labeling_protocol)\n", " le = Material(name=\"labeleddextract-{}\".format(i))\n", " le.type = \"Labeled Extract Name\"\n", " labeling_process.inputs.append(extraction_process.outputs[0])\n", " labeling_process.outputs.append(le)\n", " \n", " # create a sequencing process that executes the sequencing protocol\n", "\n", " sequencing_process = Process(executes_protocol=sequencing_protocol)\n", " sequencing_process.name = \"assay-name-{}\".format(i)\n", " sequencing_process.inputs.append(labeling_process.outputs[0])\n", "\n", " # Sequencing process usually has an output data file\n", "\n", " datafile = DataFile(filename=\"sequenced-data-{}\".format(i), label=\"Raw Data File\")\n", " data_comment = Comment(name=\"data_comment\",value=\"data_value\")\n", " datafile.comments.append(data_comment)\n", " sequencing_process.outputs.append(datafile)\n", "\n", " # Ensure Processes are linked forward and backward. plink(from_process, to_process) is a function to set\n", " # these links for you. It is found in the isatools.model package\n", "\n", " plink(extraction_process, labeling_process)\n", " plink(labeling_process, sequencing_process)\n", "\n", " # make sure the extract, data file, and the processes are attached to the assay\n", "\n", " assay.data_files.append(datafile)\n", " assay.samples.append(sample)\n", " assay.other_material.append(material)\n", " assay.other_material.append(le)\n", " assay.process_sequence.append(extraction_process)\n", " assay.process_sequence.append(labeling_process)\n", " assay.process_sequence.append(sequencing_process)\n", " assay.measurement_type = OntologyAnnotation(term=\"genome sequencing\")\n", " assay.technology_type = OntologyAnnotation(term=\"nucleotide sequencing\")\n", "\n", " \n", "study.assays.append(assay)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "isatools.model.Investigation(identifier='', filename='', title='', submission_date='', public_release_date='', ontology_source_references=[], publications=[], contacts=[], studies=[isatools.model.Study(filename='s_study.txt', identifier='S1', title='My Simple ISA Study', description='We could alternatively use the class constructor's parameters to set some default values at the time of creation, however we want to demonstrate how to use the object's instance variables to set values.', submission_date='2021-12-01 22:29:22.101893', public_release_date='2021-12-01 22:29:22.101933', contacts=[isatools.model.Person(last_name='hughes', first_name='bob', mid_initials='', email='bob.hughes@who.else', phone='', fax='', address='', affiliation='WHO', roles=[], comments=[isatools.model.Comment(name='per_comment', value='per_value')])], design_descriptors=[], publications=[isatools.model.Publication(pubmed_id='1222322', doi='10.12314', author_list='', title='publication title', status=None, comments=[])], factors=[isatools.model.StudyFactor(name='Factor', factor_type=isatools.model.OntologyAnnotation(term='factor category', term_source=None, term_accession='', comments=[]), comments=[])], protocols=[isatools.model.Protocol(name='sample collection', protocol_type=isatools.model.OntologyAnnotation(term='', term_source=None, term_accession='', comments=[]), uri='', version='', parameters=[], components=[], comments=[]), isatools.model.Protocol(name='data analysis with Galaxy', protocol_type=isatools.model.OntologyAnnotation(term='data transformation', term_source=None, term_accession='', comments=[]), uri='https://doi.org/10.5464/workflow.cwl', version='', parameters=[isatools.model.ProtocolParameter(parameter_name=isatools.model.OntologyAnnotation(term='genome assembly', term_source=None, term_accession='', comments=[]), comments=[]), isatools.model.ProtocolParameter(parameter_name=isatools.model.OntologyAnnotation(term='cut-off value', term_source=None, term_accession='', comments=[]), comments=[])], components=[], comments=[]), isatools.model.Protocol(name='data visualization with Intermine', protocol_type=isatools.model.OntologyAnnotation(term='data visualization', term_source=None, term_accession='', comments=[]), uri='https://intermine.org/10.5464/network.svg', version='', parameters=[], components=[], comments=[isatools.model.Comment(name='pro_comment', value='pro_value')]), isatools.model.Protocol(name='extraction', protocol_type=isatools.model.OntologyAnnotation(term='material extraction', term_source=None, term_accession='', comments=[]), uri='', version='', parameters=[], components=[], comments=[]), isatools.model.Protocol(name='extraction', protocol_type=isatools.model.OntologyAnnotation(term='material extraction', term_source=None, term_accession='', comments=[]), uri='', version='', parameters=[], components=[], comments=[]), isatools.model.Protocol(name='sequencing', protocol_type=isatools.model.OntologyAnnotation(term='material sequencing', term_source=None, term_accession='', comments=[]), uri='', version='', parameters=[], components=[], comments=[])], assays=[isatools.model.Assay(measurement_type=isatools.model.OntologyAnnotation(term='genome sequencing', term_source=None, term_accession='', comments=[]), technology_type=isatools.model.OntologyAnnotation(term='nucleotide sequencing', term_source=None, term_accession='', comments=[]), technology_platform='', filename='a_assay.txt', data_files=[isatools.model.DataFile(filename='sequenced-data-0', label='Raw Data File', generated_from=[], comments=[isatools.model.Comment(name='data_comment', value='data_value')])], samples=[isatools.model.Sample(name='sample1', characteristics=[], factor_values=[], derives_from=[isatools.model.Source(name='source1', characteristics=[], comments=[isatools.model.Comment(name='src_comment', value='src_value')]), isatools.model.Source(name='source2', characteristics=[], comments=[])], comments=[isatools.model.Comment(name='smp_comment', value='smp_value')])], process_sequence=[isatools.model.Process(id=\"\". name=\"None\", executes_protocol=Protocol(\n", " name=extraction\n", " protocol_type=material extraction\n", " uri=\n", " version=\n", " parameters=0 ProtocolParameter objects\n", " components=0 OntologyAnnotation objects\n", " comments=0 Comment objects\n", "), date=\"None\", performer=\"None\", inputs=[isatools.model.Sample(name='sample1', characteristics=[], factor_values=[], derives_from=[isatools.model.Source(name='source1', characteristics=[], comments=[isatools.model.Comment(name='src_comment', value='src_value')]), isatools.model.Source(name='source2', characteristics=[], comments=[])], comments=[isatools.model.Comment(name='smp_comment', value='smp_value')])], outputs=[]), isatools.model.Process(id=\"\". name=\"None\", executes_protocol=Protocol(\n", " name=labeling\n", " protocol_type=labeling\n", " uri=\n", " version=\n", " parameters=0 ProtocolParameter objects\n", " components=0 OntologyAnnotation objects\n", " comments=0 Comment objects\n", "), date=\"None\", performer=\"None\", inputs=[], outputs=[]), isatools.model.Process(id=\"\". name=\"assay-name-0\", executes_protocol=Protocol(\n", " name=sequencing\n", " protocol_type=material sequencing\n", " uri=\n", " version=\n", " parameters=0 ProtocolParameter objects\n", " components=0 OntologyAnnotation objects\n", " comments=0 Comment objects\n", "), date=\"None\", performer=\"None\", inputs=[], outputs=[isatools.model.DataFile(filename='sequenced-data-0', label='Raw Data File', generated_from=[], comments=[isatools.model.Comment(name='data_comment', value='data_value')])])], other_material=[, ], characteristic_categories=[], comments=[], units=[])], sources=[isatools.model.Source(name='source1', characteristics=[], comments=[isatools.model.Comment(name='src_comment', value='src_value')]), isatools.model.Source(name='source2', characteristics=[], comments=[])], samples=[isatools.model.Sample(name='sample1', characteristics=[], factor_values=[], derives_from=[isatools.model.Source(name='source1', characteristics=[], comments=[isatools.model.Comment(name='src_comment', value='src_value')]), isatools.model.Source(name='source2', characteristics=[], comments=[])], comments=[isatools.model.Comment(name='smp_comment', value='smp_value')])], process_sequence=[isatools.model.Process(id=\"\". name=\"None\", executes_protocol=Protocol(\n", " name=data visualization with Intermine\n", " protocol_type=data visualization\n", " uri=https://intermine.org/10.5464/network.svg\n", " version=\n", " parameters=0 ProtocolParameter objects\n", " components=0 OntologyAnnotation objects\n", " comments=1 Comment objects\n", "), date=\"None\", performer=\"None\", inputs=[isatools.model.Source(name='source2', characteristics=[], comments=[])], outputs=[isatools.model.Sample(name='sample1', characteristics=[], factor_values=[], derives_from=[isatools.model.Source(name='source1', characteristics=[], comments=[isatools.model.Comment(name='src_comment', value='src_value')]), isatools.model.Source(name='source2', characteristics=[], comments=[])], comments=[isatools.model.Comment(name='smp_comment', value='smp_value')])])], other_material=[], characteristic_categories=[], comments=[isatools.model.Comment(name='st_comment', value='st_value')], units=[])], comments=[isatools.model.Comment(name='i_comment', value='i_value')])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Let's see the object :\n", "investigation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Writing to ISA-Tab" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2021-12-01 22:29:22,205 [INFO]: isatab.py(_all_end_to_end_paths:1131) >> [1]\n", "2021-12-01 22:29:22,206 [WARNING]: isatab.py(write_study_table_files:1194) >> [3, 2, 1]\n", "2021-12-01 22:29:22,207 [INFO]: isatab.py(_longest_path_and_attrs:1091) >> [[1, 3, 2]]\n", "2021-12-01 22:29:22,229 [INFO]: isatab.py(_all_end_to_end_paths:1131) >> [2]\n", "2021-12-01 22:29:22,230 [INFO]: isatab.py(_longest_path_and_attrs:1091) >> [[2, 4, 5, 6, 7, 8]]\n", "2021-12-01 22:29:22,230 [INFO]: isatab.py(_longest_path_and_attrs:1091) >> [[2, 4, 5, 6, 7, 8]]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "/var/folders/5n/rl6lqnks4rqb59pbtpvvntqw0000gr/T/tmpkd97rbgg/i_investigation.txt\n", "ONTOLOGY SOURCE REFERENCE\n", "Term Source Name\n", "Term Source File\n", "Term Source Version\n", "Term Source Description\n", "INVESTIGATION\n", "Investigation Identifier\t\n", "Investigation Title\t\n", "Investigation Description\t\n", "Investigation Submission Date\t\n", "Investigation Public Release Date\t\n", "Comment[i_comment]\ti_value\n", "INVESTIGATION PUBLICATIONS\n", "Investigation PubMed ID\n", "Investigation Publication DOI\n", "Investigation Publication Author List\n", "Investigation Publication Title\n", "Investigation Publication Status\n", "Investigation Publication Status Term Accession Number\n", "Investigation Publication Status Term Source REF\n", "INVESTIGATION CONTACTS\n", "Investigation Person Last Name\n", "Investigation Person First Name\n", "Investigation Person Mid Initials\n", "Investigation Person Email\n", "Investigation Person Phone\n", "Investigation Person Fax\n", "Investigation Person Address\n", "Investigation Person Affiliation\n", "Investigation Person Roles\n", "Investigation Person Roles Term Accession Number\n", "Investigation Person Roles Term Source REF\n", "STUDY\n", "Study Identifier\tS1\n", "Study Title\tMy Simple ISA Study\n", "Study Description\tWe could alternatively use the class constructor's parameters to set some default values at the time of creation, however we want to demonstrate how to use the object's instance variables to set values.\n", "Study Submission Date\t2021-12-01 22:29:22.101893\n", "Study Public Release Date\t2021-12-01 22:29:22.101933\n", "Study File Name\ts_study.txt\n", "Comment[st_comment]\tst_value\n", "STUDY DESIGN DESCRIPTORS\n", "Study Design Type\n", "Study Design Type Term Accession Number\n", "Study Design Type Term Source REF\n", "STUDY PUBLICATIONS\n", "Study PubMed ID\t1222322\n", "Study Publication DOI\t10.12314\n", "Study Publication Author List\t\n", "Study Publication Title\tpublication title\n", "Study Publication Status\t\n", "Study Publication Status Term Accession Number\t\n", "Study Publication Status Term Source REF\t\n", "STUDY FACTORS\n", "Study Factor Name\tFactor\n", "Study Factor Type\tfactor category\n", "Study Factor Type Term Accession Number\t\n", "Study Factor Type Term Source REF\t\n", "STUDY ASSAYS\n", "Study Assay File Name\ta_assay.txt\n", "Study Assay Measurement Type\tgenome sequencing\n", "Study Assay Measurement Type Term Accession Number\t\n", "Study Assay Measurement Type Term Source REF\t\n", "Study Assay Technology Type\tnucleotide sequencing\n", "Study Assay Technology Type Term Accession Number\t\n", "Study Assay Technology Type Term Source REF\t\n", "Study Assay Technology Platform\t\n", "STUDY PROTOCOLS\n", "Study Protocol Name\tsample collection\tdata analysis with Galaxy\tdata visualization with Intermine\textraction\textraction\tsequencing\n", "Study Protocol Type\t\tdata transformation\tdata visualization\tmaterial extraction\tmaterial extraction\tmaterial sequencing\n", "Study Protocol Type Term Accession Number\t\t\t\t\t\t\n", "Study Protocol Type Term Source REF\t\t\t\t\t\t\n", "Study Protocol Description\t\t\t\t\t\t\n", "Study Protocol URI\t\thttps://doi.org/10.5464/workflow.cwl\thttps://intermine.org/10.5464/network.svg\t\t\t\n", "Study Protocol Version\t\t\t\t\t\t\n", "Study Protocol Parameters Name\t\tgenome assembly;cut-off value\t\t\t\t\n", "Study Protocol Parameters Name Term Accession Number\t\t;\t\t\t\t\n", "Study Protocol Parameters Name Term Source REF\t\t;\t\t\t\t\n", "Study Protocol Components Name\t\t\t\t\t\t\n", "Study Protocol Components Type\t\t\t\t\t\t\n", "Study Protocol Components Type Term Accession Number\t\t\t\t\t\t\n", "Study Protocol Components Type Term Source REF\t\t\t\t\t\t\n", "Comment[pro_comment]\t\t\tpro_value\t\t\t\n", "STUDY CONTACTS\n", "Study Person Last Name\thughes\n", "Study Person First Name\tbob\n", "Study Person Mid Initials\t\n", "Study Person Email\tbob.hughes@who.else\n", "Study Person Phone\t\n", "Study Person Fax\t\n", "Study Person Address\t\n", "Study Person Affiliation\tWHO\n", "Study Person Roles\t\n", "Study Person Roles Term Accession Number\t\n", "Study Person Roles Term Source REF\t\n", "Comment[per_comment]\tper_value\n", "--------\n", "/var/folders/5n/rl6lqnks4rqb59pbtpvvntqw0000gr/T/tmpkd97rbgg/s_study.txt\n", "Source Name\tProtocol REF\tSample Name\tComment[smp_comment]\n", "source2\tdata visualization with Intermine\tsample1\tsmp_value\n", "--------\n", "/var/folders/5n/rl6lqnks4rqb59pbtpvvntqw0000gr/T/tmpkd97rbgg/a_assay.txt\n", "Sample Name\tComment[smp_comment]\tProtocol REF\tExtract Name\tComment[mat_comment]\tProtocol REF\tLabeled Extract Name\tProtocol REF\tRaw Data File\tComment[data_comment]\n", "sample1\tsmp_value\textraction\textract-0\tmat_value\tlabeling\tlabeleddextract-0\tsequencing\tsequenced-data-0\tdata_value\n", "\n" ] } ], "source": [ "from isatools.isatab import dumps\n", "print(dumps(investigation))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Writing to ISA-JSON" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\n", " \"@id\": \"#investigation/4909215360\",\n", " \"comments\": [\n", " {\n", " \"name\": \"i_comment\",\n", " \"value\": \"i_value\"\n", " }\n", " ],\n", " \"description\": \"\",\n", " \"identifier\": \"\",\n", " \"ontologySourceReferences\": [],\n", " \"people\": [],\n", " \"publicReleaseDate\": \"\",\n", " \"publications\": [],\n", " \"studies\": [\n", " {\n", " \"@id\": \"#study/4909211904\",\n", " \"assays\": [\n", " {\n", " \"@id\": \"#4909152096\",\n", " \"characteristicCategories\": [],\n", " \"comments\": [],\n", " \"dataFiles\": [\n", " {\n", " \"@id\": \"#data/rawdata-4357026720\",\n", " \"comments\": [\n", " {\n", " \"name\": \"data_comment\",\n", " \"value\": \"data_value\"\n", " }\n", " ],\n", " \"name\": \"sequenced-data-0\",\n", " \"type\": \"Raw Data File\"\n", " }\n", " ],\n", " \"filename\": \"a_assay.txt\",\n", " \"materials\": {\n", " \"otherMaterials\": [\n", " {\n", " \"@id\": \"#material/extract-4357027824\",\n", " \"characteristics\": [],\n", " \"comments\": [\n", " {\n", " \"name\": \"mat_comment\",\n", " \"value\": \"mat_value\"\n", " }\n", " ],\n", " \"name\": \"extract-0\",\n", " \"type\": \"Extract Name\"\n", " },\n", " {\n", " \"@id\": \"#material/labeledextract-4357027488\",\n", " \"characteristics\": [],\n", " \"comments\": [],\n", " \"name\": \"labeleddextract-0\",\n", " \"type\": \"Labeled Extract Name\"\n", " }\n", " ],\n", " \"samples\": [\n", " {\n", " \"@id\": \"#sample/4909191280\",\n", " \"characteristics\": [],\n", " \"comments\": [\n", " {\n", " \"name\": \"smp_comment\",\n", " \"value\": \"smp_value\"\n", " }\n", " ],\n", " \"factorValues\": [],\n", " \"name\": \"sample1\"\n", " }\n", " ]\n", " },\n", " \"measurementType\": {\n", " \"@id\": \"#annotation_value/33b03c28-95c0-4f2c-8755-9986e71e9290\",\n", " \"annotationValue\": \"genome sequencing\",\n", " \"comments\": [],\n", " \"termAccession\": \"\",\n", " \"termSource\": \"\"\n", " },\n", " \"processSequence\": [\n", " {\n", " \"@id\": \"#process/4357029072\",\n", " \"comments\": [],\n", " \"date\": \"\",\n", " \"executesProtocol\": {\n", " \"@id\": \"#protocol/4909154064\"\n", " },\n", " \"inputs\": [\n", " {\n", " \"@id\": \"#sample/4909191280\"\n", " }\n", " ],\n", " \"name\": \"\",\n", " \"nextProcess\": {\n", " \"@id\": \"#process/4357027536\"\n", " },\n", " \"outputs\": [\n", " {\n", " \"@id\": \"#material/extract-4357027824\"\n", " }\n", " ],\n", " \"parameterValues\": [],\n", " \"performer\": \"\"\n", " },\n", " {\n", " \"@id\": \"#process/4357027536\",\n", " \"comments\": [],\n", " \"date\": \"\",\n", " \"executesProtocol\": {\n", " \"@id\": \"#protocol/4909153968\"\n", " },\n", " \"inputs\": [\n", " {\n", " \"@id\": \"#material/extract-4357027824\"\n", " }\n", " ],\n", " \"name\": \"\",\n", " \"nextProcess\": {\n", " \"@id\": \"#process/4357026672\"\n", " },\n", " \"outputs\": [\n", " {\n", " \"@id\": \"#material/labeledextract-4357027488\"\n", " }\n", " ],\n", " \"parameterValues\": [],\n", " \"performer\": \"\",\n", " \"previousProcess\": {\n", " \"@id\": \"#process/4357029072\"\n", " }\n", " },\n", " {\n", " \"@id\": \"#process/4357026672\",\n", " \"comments\": [],\n", " \"date\": \"\",\n", " \"executesProtocol\": {\n", " \"@id\": \"#protocol/4909153920\"\n", " },\n", " \"inputs\": [\n", " {\n", " \"@id\": \"#material/labeledextract-4357027488\"\n", " }\n", " ],\n", " \"name\": \"assay-name-0\",\n", " \"outputs\": [\n", " {\n", " \"@id\": \"#data/rawdata-4357026720\"\n", " }\n", " ],\n", " \"parameterValues\": [],\n", " \"performer\": \"\",\n", " \"previousProcess\": {\n", " \"@id\": \"#process/4357027536\"\n", " }\n", " }\n", " ],\n", " \"technologyPlatform\": \"\",\n", " \"technologyType\": {\n", " \"@id\": \"#annotation_value/5174e4a8-f267-480d-930f-998f81064712\",\n", " \"annotationValue\": \"nucleotide sequencing\",\n", " \"comments\": [],\n", " \"termAccession\": \"\",\n", " \"termSource\": \"\"\n", " },\n", " \"unitCategories\": []\n", " }\n", " ],\n", " \"characteristicCategories\": [],\n", " \"comments\": [\n", " {\n", " \"name\": \"st_comment\",\n", " \"value\": \"st_value\"\n", " }\n", " ],\n", " \"description\": \"We could alternatively use the class constructor's parameters to set some default values at the time of creation, however we want to demonstrate how to use the object's instance variables to set values.\",\n", " \"factors\": [\n", " {\n", " \"@id\": \"#studyfactor/4909204480\",\n", " \"comments\": [],\n", " \"factorName\": \"Factor\",\n", " \"factorType\": {\n", " \"@id\": \"#annotation_value/78be4c7e-965a-4d42-bc4b-fad329c708e6\",\n", " \"annotationValue\": \"factor category\",\n", " \"comments\": [],\n", " \"termAccession\": \"\",\n", " \"termSource\": \"\"\n", " }\n", " }\n", " ],\n", " \"filename\": \"s_study.txt\",\n", " \"identifier\": \"S1\",\n", " \"materials\": {\n", " \"otherMaterials\": [],\n", " \"samples\": [\n", " {\n", " \"@id\": \"#sample/4909191280\",\n", " \"characteristics\": [],\n", " \"comments\": [\n", " {\n", " \"name\": \"smp_comment\",\n", " \"value\": \"smp_value\"\n", " }\n", " ],\n", " \"factorValues\": [],\n", " \"name\": \"sample1\"\n", " }\n", " ],\n", " \"sources\": [\n", " {\n", " \"@id\": \"#source/4909191232\",\n", " \"characteristics\": [],\n", " \"comments\": [\n", " {\n", " \"name\": \"src_comment\",\n", " \"value\": \"src_value\"\n", " }\n", " ],\n", " \"name\": \"source1\"\n", " },\n", " {\n", " \"@id\": \"#source/4909191520\",\n", " \"characteristics\": [],\n", " \"comments\": [],\n", " \"name\": \"source2\"\n", " }\n", " ]\n", " },\n", " \"people\": [\n", " {\n", " \"@id\": \"#person/4909204384\",\n", " \"address\": \"\",\n", " \"affiliation\": \"WHO\",\n", " \"comments\": [\n", " {\n", " \"name\": \"per_comment\",\n", " \"value\": \"per_value\"\n", " }\n", " ],\n", " \"email\": \"bob.hughes@who.else\",\n", " \"fax\": \"\",\n", " \"firstName\": \"bob\",\n", " \"lastName\": \"hughes\",\n", " \"midInitials\": \"\",\n", " \"phone\": \"\",\n", " \"roles\": []\n", " }\n", " ],\n", " \"processSequence\": [\n", " {\n", " \"@id\": \"#process/4909204576\",\n", " \"comments\": [],\n", " \"date\": \"\",\n", " \"executesProtocol\": {\n", " \"@id\": \"#protocol/4909204864\"\n", " },\n", " \"inputs\": [\n", " {\n", " \"@id\": \"#source/4909191520\"\n", " }\n", " ],\n", " \"name\": \"\",\n", " \"outputs\": [\n", " {\n", " \"@id\": \"#sample/4909191280\"\n", " }\n", " ],\n", " \"parameterValues\": [],\n", " \"performer\": \"\"\n", " }\n", " ],\n", " \"protocols\": [\n", " {\n", " \"@id\": \"#protocol/4909205488\",\n", " \"comments\": [],\n", " \"components\": [],\n", " \"description\": \"\",\n", " \"name\": \"sample collection\",\n", " \"parameters\": [],\n", " \"protocolType\": {\n", " \"@id\": \"#annotation_value/e71e71d2-00df-4713-94f7-22913308b18b\",\n", " \"annotationValue\": \"\",\n", " \"comments\": [],\n", " \"termAccession\": \"\",\n", " \"termSource\": \"\"\n", " },\n", " \"uri\": \"\",\n", " \"version\": \"\"\n", " },\n", " {\n", " \"@id\": \"#protocol/4909205104\",\n", " \"comments\": [],\n", " \"components\": [],\n", " \"description\": \"\",\n", " \"name\": \"data analysis with Galaxy\",\n", " \"parameters\": [\n", " {\n", " \"@id\": \"#parameter/4909205296\",\n", " \"parameterName\": {\n", " \"@id\": \"#annotation_value/264cedf2-119a-406d-8da2-9a9e77ee9a96\",\n", " \"annotationValue\": \"genome assembly\",\n", " \"comments\": [],\n", " \"termAccession\": \"\",\n", " \"termSource\": \"\"\n", " }\n", " },\n", " {\n", " \"@id\": \"#parameter/4909205008\",\n", " \"parameterName\": {\n", " \"@id\": \"#annotation_value/ef75225f-9a81-41ec-9143-31dce1542a95\",\n", " \"annotationValue\": \"cut-off value\",\n", " \"comments\": [],\n", " \"termAccession\": \"\",\n", " \"termSource\": \"\"\n", " }\n", " }\n", " ],\n", " \"protocolType\": {\n", " \"@id\": \"#annotation_value/564e017b-4fac-40e2-ad6d-324ca1a6bf55\",\n", " \"annotationValue\": \"data transformation\",\n", " \"comments\": [],\n", " \"termAccession\": \"\",\n", " \"termSource\": \"\"\n", " },\n", " \"uri\": \"https://doi.org/10.5464/workflow.cwl\",\n", " \"version\": \"\"\n", " },\n", " {\n", " \"@id\": \"#protocol/4909204864\",\n", " \"comments\": [\n", " {\n", " \"name\": \"pro_comment\",\n", " \"value\": \"pro_value\"\n", " }\n", " ],\n", " \"components\": [],\n", " \"description\": \"\",\n", " \"name\": \"data visualization with Intermine\",\n", " \"parameters\": [],\n", " \"protocolType\": {\n", " \"@id\": \"#annotation_value/ae77ae54-aa7b-40c9-9c16-a8ba70953a1b\",\n", " \"annotationValue\": \"data visualization\",\n", " \"comments\": [],\n", " \"termAccession\": \"\",\n", " \"termSource\": \"\"\n", " },\n", " \"uri\": \"https://intermine.org/10.5464/network.svg\",\n", " \"version\": \"\"\n", " },\n", " {\n", " \"@id\": \"#protocol/4909154064\",\n", " \"comments\": [],\n", " \"components\": [],\n", " \"description\": \"\",\n", " \"name\": \"extraction\",\n", " \"parameters\": [],\n", " \"protocolType\": {\n", " \"@id\": \"#annotation_value/840a6f25-8ddc-4071-a1f5-60d37c88a9bc\",\n", " \"annotationValue\": \"material extraction\",\n", " \"comments\": [],\n", " \"termAccession\": \"\",\n", " \"termSource\": \"\"\n", " },\n", " \"uri\": \"\",\n", " \"version\": \"\"\n", " },\n", " {\n", " \"@id\": \"#protocol/4909154064\",\n", " \"comments\": [],\n", " \"components\": [],\n", " \"description\": \"\",\n", " \"name\": \"extraction\",\n", " \"parameters\": [],\n", " \"protocolType\": {\n", " \"@id\": \"#annotation_value/840a6f25-8ddc-4071-a1f5-60d37c88a9bc\",\n", " \"annotationValue\": \"material extraction\",\n", " \"comments\": [],\n", " \"termAccession\": \"\",\n", " \"termSource\": \"\"\n", " },\n", " \"uri\": \"\",\n", " \"version\": \"\"\n", " },\n", " {\n", " \"@id\": \"#protocol/4909153920\",\n", " \"comments\": [],\n", " \"components\": [],\n", " \"description\": \"\",\n", " \"name\": \"sequencing\",\n", " \"parameters\": [],\n", " \"protocolType\": {\n", " \"@id\": \"#annotation_value/13330396-e038-4ae6-872b-c7f16ed298e3\",\n", " \"annotationValue\": \"material sequencing\",\n", " \"comments\": [],\n", " \"termAccession\": \"\",\n", " \"termSource\": \"\"\n", " },\n", " \"uri\": \"\",\n", " \"version\": \"\"\n", " }\n", " ],\n", " \"publicReleaseDate\": \"2021-12-01 22:29:22.101933\",\n", " \"publications\": [\n", " {\n", " \"@id\": \"#publication/4909204720\",\n", " \"authorList\": \"\",\n", " \"comments\": [],\n", " \"doi\": \"10.12314\",\n", " \"pubMedID\": \"1222322\",\n", " \"status\": {\n", " \"@id\": \"\"\n", " },\n", " \"title\": \"publication title\"\n", " }\n", " ],\n", " \"studyDesignDescriptors\": [],\n", " \"submissionDate\": \"2021-12-01 22:29:22.101893\",\n", " \"title\": \"My Simple ISA Study\",\n", " \"unitCategories\": []\n", " }\n", " ],\n", " \"submissionDate\": \"\",\n", " \"title\": \"\"\n", "}\n" ] } ], "source": [ "import json\n", "from isatools.isajson import ISAJSONEncoder\n", "print(json.dumps(investigation, cls=ISAJSONEncoder, sort_keys=True, indent=4, separators=(',', ': ')))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "ename": "FileNotFoundError", "evalue": "[Errno 2] No such file or directory: './BII-S-8_FP001RO-isatab-TEST/i_fp001ro-investigation.txt'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/var/folders/5n/rl6lqnks4rqb59pbtpvvntqw0000gr/T/ipykernel_2669/799938493.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;31m# with open(os.path.join('./BII-S-4/', 'i_investigation.txt')) as fp:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;31m# with open(os.path.join('./BII-S-7/', 'i_matteo.txt')) as fp:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mos\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjoin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'./BII-S-8_FP001RO-isatab-TEST/'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'i_fp001ro-investigation.txt'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 8\u001b[0m \u001b[0mISA\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0misatab\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: './BII-S-8_FP001RO-isatab-TEST/i_fp001ro-investigation.txt'" ] } ], "source": [ "import os\n", "from isatools import isatab \n", "# with open(os.path.join('./BII-I-1/', 'i_investigation.txt')) as fp:\n", "#w ith open(os.path.join('./BII-S-3/', 'i_gilbert.txt')) as fp:\n", "# with open(os.path.join('./BII-S-4/', 'i_investigation.txt')) as fp:\n", "# with open(os.path.join('./BII-S-7/', 'i_matteo.txt')) as fp:\n", "with open(os.path.join('./BII-S-8_FP001RO-isatab-TEST/', 'i_fp001ro-investigation.txt')) as fp:\n", " ISA = isatab.load(fp)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from isatools.isatab import dumps\n", "print(dumps(ISA))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## About this notebook\n", "\n", "- authors: philippe.rocca-serra@oerc.ox.ac.uk, massimiliano.izzo@oerc.ox.ac.uk\n", "- license: CC-BY 4.0\n", "- support: isatools@googlegroups.com\n", "- issue tracker: https://github.com/ISA-tools/isa-api/issues" ] } ], "metadata": { "kernelspec": { "display_name": "isa-api-py39", "language": "python", "name": "isa-api-py39" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.0" } }, "nbformat": 4, "nbformat_minor": 1 }