# Create a simple ISA descriptor

This example creates minimal metadata for a single study ISA descriptor with no assay declared. 

It shows how to serialize (write) the ISA Model content to ISA-Tab and ISA-JSON formats.

In [1]:
# If executing the notebooks on `Google Colab`,uncomment the following command 
# and run it to install the required python libraries. Also, make the test datasets available.

# !pip install -r requirements.txt

In [2]:
from isatools.model import (
 Comment,
 Investigation,
 Study,
 StudyFactor,
 FactorValue,
 OntologyAnnotation,
 Material,
 Sample,
 Source,
 Protocol,
 ProtocolParameter,
 ProtocolComponent,
 ParameterValue,
 Process,
 Publication,
 Person,
 Assay,
 DataFile,
 plink
)
import datetime

## Study metadata

In [3]:
investigation = Investigation()
i_comment = Comment(name="i_comment", value="i_value")
investigation.comments.append(i_comment)
study = Study(filename="s_study.txt")
st_comment = Comment(name="st_comment", value="st_value")
study.comments.append(st_comment)
study.identifier = "S1"
study.title = "My Simple ISA Study"
study.description = "We could alternatively use the class constructor's parameters to set some default " \
 "values at the time of creation, however we want to demonstrate how to use the " \
 "object's instance variables to set values."
study.submission_date = str(datetime.datetime.today())
study.public_release_date = str(datetime.datetime.today())
study.sources = [Source(name="source1"), Source(name="source2")]
src_comment = Comment(name="src_comment", value="src_value")
study.sources[0].comments.append(src_comment)
smp_comment = Comment(name="smp_comment", value="smp_value")
study.samples = [Sample(name="sample1")]
study.samples[0].comments.append(smp_comment)
study.samples[0].derives_from.append(study.sources[0])
study.samples[0].derives_from.append(study.sources[1])


study.protocols = [
 Protocol(name="sample collection",
 components=[
 ProtocolComponent(name="magnetic agitator",
 component_type=OntologyAnnotation(term="device"))]),
 Protocol(
 name="data analysis with Galaxy",
 uri="https://doi.org/10.5464/workflow.cwl",
 protocol_type=OntologyAnnotation(term="data transformation"),
 parameters=[
 ProtocolParameter(parameter_name=OntologyAnnotation(term="genome assembly")),
 ProtocolParameter(parameter_name=OntologyAnnotation(term="cut-off value"))
 ],
 ),
 Protocol(
 name="data visualization with Intermine",
 uri="https://intermine.org/10.5464/network.svg",
 protocol_type=OntologyAnnotation(term="data visualization"),
 comments=[Comment(name="pro_comment", value="pro_value")]
 )
]

study.factors = [
 StudyFactor(name="Factor",factor_type=OntologyAnnotation(term="factor category"))
]


study.publications = [
 Publication(doi="10.12314",pubmed_id="1222322", title="publication title")
 
]

study.contacts = [
 Person(first_name="bob", last_name="hughes", affiliation="WHO", email="bob.hughes@who.else",
 comments=[Comment(name="per_comment", value="per_value")]
 )
]

study.process_sequence = [
 Process(
 executes_protocol=study.protocols[-1],
 inputs=[study.sources[-1]],
 outputs=[study.samples[-1]]
 )
]
investigation.studies = [study]



In [4]:
# Next, we build n Assay object and attach two protocols, extraction and sequencing.

assay = Assay(filename="a_assay.txt")

extraction_protocol = Protocol(name='extraction', protocol_type=OntologyAnnotation(term="material extraction"))
study.protocols.append(extraction_protocol)

labeling_protocol = Protocol(name='labeling', protocol_type=OntologyAnnotation(term="labeling"))
study.protocols.append(extraction_protocol)

sequencing_protocol = Protocol(name='sequencing', protocol_type=OntologyAnnotation(term="material sequencing"))
study.protocols.append(sequencing_protocol)

# To build out assay graphs, we enumereate the samples from the study-level, and for each sample we create an
# extraction process and a sequencing process. The extraction process takes as input a sample material, and produces
# an extract material. The sequencing process takes the extract material and produces a data file. This will
# produce three graphs, from sample material through to data, as follows:
#
# (sample_material-0)->(extraction)->(extract-0)->(sequencing)->(sequenced-data-0)
# (sample_material-1)->(extraction)->(extract-1)->(sequencing)->(sequenced-data-1)
# (sample_material-2)->(extraction)->(extract-2)->(sequencing)->(sequenced-data-2)
#
# Note that the extraction processes and sequencing processes are distinctly separate instances, where the three
# graphs are NOT interconnected.

for i, sample in enumerate(study.samples):

 # create an extraction process that executes the extraction protocol

 extraction_process = Process(executes_protocol=extraction_protocol)

 # extraction process takes as input a sample, and produces an extract material as output

 extraction_process.inputs.append(sample)
 material = Material(name="extract-{}".format(i))
 mat_comment = Comment(name="mat_comment", value="mat_value")
 material.comments.append(mat_comment)
 material.type = "Extract Name"
 extraction_process.outputs.append(material)

 
 #labeling process
 labeling_process = Process(executes_protocol=labeling_protocol)
 le = Material(name="labeleddextract-{}".format(i))
 le.type = "Labeled Extract Name"
 labeling_process.inputs.append(extraction_process.outputs[0])
 labeling_process.outputs.append(le)
 
 # create a sequencing process that executes the sequencing protocol

 sequencing_process = Process(executes_protocol=sequencing_protocol)
 sequencing_process.name = "assay-name-{}".format(i)
 sequencing_process.inputs.append(labeling_process.outputs[0])

 # Sequencing process usually has an output data file

 datafile = DataFile(filename="sequenced-data-{}".format(i), label="Raw Data File")
 data_comment = Comment(name="data_comment",value="data_value")
 datafile.comments.append(data_comment)
 sequencing_process.outputs.append(datafile)

 # Ensure Processes are linked forward and backward. plink(from_process, to_process) is a function to set
 # these links for you. It is found in the isatools.model package

 plink(extraction_process, labeling_process)
 plink(labeling_process, sequencing_process)

 # make sure the extract, data file, and the processes are attached to the assay

 assay.data_files.append(datafile)
 assay.samples.append(sample)
 assay.other_material.append(material)
 assay.other_material.append(le)
 assay.process_sequence.append(extraction_process)
 assay.process_sequence.append(labeling_process)
 assay.process_sequence.append(sequencing_process)
 assay.measurement_type = OntologyAnnotation(term="genome sequencing")
 assay.technology_type = OntologyAnnotation(term="nucleotide sequencing")

 
study.assays.append(assay)

In [5]:
# Let's see the object :
investigation

isatools.model.Investigation(identifier='', filename='', title='', submission_date='', public_release_date='', ontology_source_references=[], publications=[], contacts=[], studies=[isatools.model.Study(filename='s_study.txt', identifier='S1', title='My Simple ISA Study', description='We could alternatively use the class constructor's parameters to set some default values at the time of creation, however we want to demonstrate how to use the object's instance variables to set values.', submission_date='2021-12-01 22:29:22.101893', public_release_date='2021-12-01 22:29:22.101933', contacts=[isatools.model.Person(last_name='hughes', first_name='bob', mid_initials='', email='bob.hughes@who.else', phone='', fax='', address='', affiliation='WHO', roles=[], comments=[isatools.model.Comment(name='per_comment', value='per_value')])], design_descriptors=[], publications=[isatools.model.Publication(pubmed_id='1222322', doi='10.12314', author_list='', title='publication title', status=None, commen

## Writing to ISA-Tab

In [6]:
from isatools.isatab import dumps
print(dumps(investigation))

2021-12-01 22:29:22,205 [INFO]: isatab.py(_all_end_to_end_paths:1131) >> [1]
2021-12-01 22:29:22,207 [INFO]: isatab.py(_longest_path_and_attrs:1091) >> [[1, 3, 2]]
2021-12-01 22:29:22,229 [INFO]: isatab.py(_all_end_to_end_paths:1131) >> [2]
2021-12-01 22:29:22,230 [INFO]: isatab.py(_longest_path_and_attrs:1091) >> [[2, 4, 5, 6, 7, 8]]
2021-12-01 22:29:22,230 [INFO]: isatab.py(_longest_path_and_attrs:1091) >> [[2, 4, 5, 6, 7, 8]]


/var/folders/5n/rl6lqnks4rqb59pbtpvvntqw0000gr/T/tmpkd97rbgg/i_investigation.txt
ONTOLOGY SOURCE REFERENCE
Term Source Name
Term Source File
Term Source Version
Term Source Description
INVESTIGATION
Investigation Identifier	
Investigation Title	
Investigation Description	
Investigation Submission Date	
Investigation Public Release Date	
Comment[i_comment]	i_value
INVESTIGATION PUBLICATIONS
Investigation PubMed ID
Investigation Publication DOI
Investigation Publication Author List
Investigation Publication Title
Investigation Publication Status
Investigation Publication Status Term Accession Number
Investigation Publication Status Term Source REF
INVESTIGATION CONTACTS
Investigation Person Last Name
Investigation Person First Name
Investigation Person Mid Initials
Investigation Person Email
Investigation Person Phone
Investigation Person Fax
Investigation Person Address
Investigation Person Affiliation
Investigation Person Roles
Investigation Person Roles Term Accession Number
Investiga

## Writing to ISA-JSON

In [7]:
import json
from isatools.isajson import ISAJSONEncoder
print(json.dumps(investigation, cls=ISAJSONEncoder, sort_keys=True, indent=4, separators=(',', ': ')))

{
 "@id": "#investigation/4909215360",
 "comments": [
 {
 "name": "i_comment",
 "value": "i_value"
 }
 ],
 "description": "",
 "identifier": "",
 "ontologySourceReferences": [],
 "people": [],
 "publicReleaseDate": "",
 "publications": [],
 "studies": [
 {
 "@id": "#study/4909211904",
 "assays": [
 {
 "@id": "#4909152096",
 "characteristicCategories": [],
 "comments": [],
 "dataFiles": [
 {
 "@id": "#data/rawdata-4357026720",
 "comments": [
 {
 "name": "data_comment",
 "value": "data_value"
 }
 ],
 "name": "sequenced-data-0",
 "type": "Raw Data File"
 }
 ],
 "filename": "a_assay.txt",
 "materials": {
 "otherMaterials": [
 {
 "@id": "#material/extract-4357027824",
 "characteristics": [],
 "comments": [
 {
 "name": "mat_comment",
 "value": "mat_value"
 }
 ],
 "name": "extract-0",
 "type": "Extract Name"
 },
 {
 "@id": "#material/labeledextract-4357027488",
 "characteristics": [],
 "comments": [],
 "name": "labeleddextract-0",
 "type": "Labeled Extract Name"
 }
 ],
 "samples": [
 {
 "@id"

In [8]:
import os
from isatools import isatab 
# with open(os.path.join('./BII-I-1/', 'i_investigation.txt')) as fp:
#w ith open(os.path.join('./BII-S-3/', 'i_gilbert.txt')) as fp:
# with open(os.path.join('./BII-S-4/', 'i_investigation.txt')) as fp:
# with open(os.path.join('./BII-S-7/', 'i_matteo.txt')) as fp:
with open(os.path.join('./BII-S-8_FP001RO-isatab-TEST/', 'i_fp001ro-investigation.txt')) as fp:
 ISA = isatab.load(fp)

FileNotFoundError: [Errno 2] No such file or directory: './BII-S-8_FP001RO-isatab-TEST/i_fp001ro-investigation.txt'

In [None]:
from isatools.isatab import dumps
print(dumps(ISA))

## About this notebook

- authors: philippe.rocca-serra@oerc.ox.ac.uk, massimiliano.izzo@oerc.ox.ac.uk
- license: CC-BY 4.0
- support: isatools@googlegroups.com
- issue tracker: https://github.com/ISA-tools/isa-api/issues