{ "$schema": "http://json-schema.org/draft-07/schema#", "title": "OpenTargets", "description": "OpenTargets evidence objects", "type": "object", "properties": { "sourceID": { "type": "string", "description": "A source ID (database or study ID) to help identify who this data is from.", "pattern": "^[a-z0-9_]+$" }, "access_level": { "description": "Choose public as default; private is for internal datasets", "type": "string", "enum": [ "public", "private" ] }, "unique_association_fields": { "description": "String based key-value properties that must uniquely identify each JSON object in your data.", "type": "object" }, "target": { "type": "object", "description": "Target in OpenTargets's target ontology", "properties": { "id": { "type": "string", "description": "An Ensembl or UniProt identifier", "pattern": "^http://identifiers.org/ensembl/ENSG[0-9]{4,}$|^http://identifiers.org/uniprot/.{4,}$" }, "tier": { "type": "string", "description": "Cancer Gene Census genes has been split into two tiers", "enum": [ "tier 1", "tier 2" ] }, "target_type": { "type": "string", "enum": [ "http://identifiers.org/cttv.target/gene_allele", "http://identifiers.org/cttv.target/gene_evidence", "http://identifiers.org/cttv.target/gene_in_LD_region", "http://identifiers.org/cttv.target/gene_in_epigenetic_regulation_complex", "http://identifiers.org/cttv.target/gene_variant", "http://identifiers.org/cttv.target/pro_protein", "http://identifiers.org/cttv.target/protein_evidence", "http://identifiers.org/cttv.target/transcript_evidence", "http://identifiers.org/cttv.target/transcript_isoform", "http://identifiers.org/cttv.target/protein_isoform", "http://identifiers.org/cttv.target/gene_or_protein_or_transcript", "http://identifiers.org/cttv.target/chimeric_protein", "http://identifiers.org/cttv.target/protein_complex", "http://identifiers.org/cttv.target/protein_complex_group", "http://identifiers.org/cttv.target/protein_complex_heteropolymer", "http://identifiers.org/cttv.target/protein_complex_homopolymer", "http://identifiers.org/cttv.target/protein_family", "http://identifiers.org/cttv.target/protein_protein_interaction", "http://identifiers.org/cttv.target/selectivity_group", "http://identifiers.org/cttv.target/single_protein", "http://identifiers.org/cttv.target/nucleic_acid" ], "description": "Type of target; if you do not have detailed information, select from gene_evidence, protein_evidence or transcript_evidence" }, "activity": { "type": "string", "enum": [ "http://identifiers.org/cttv.activity/decreased_transcript_level", "http://identifiers.org/cttv.activity/decreased_translational_product_level", "http://identifiers.org/cttv.activity/negative_modulator", "http://identifiers.org/cttv.activity/positive_modulator", "http://identifiers.org/cttv.activity/other", "http://identifiers.org/cttv.activity/gain_of_function", "http://identifiers.org/cttv.activity/increased_transcript_level", "http://identifiers.org/cttv.activity/increased_translational_product_level", "http://identifiers.org/cttv.activity/loss_of_function", "http://identifiers.org/cttv.activity/partial_loss_of_function", "http://identifiers.org/cttv.activity/up_or_down", "http://identifiers.org/cttv.activity/up", "http://identifiers.org/cttv.activity/down", "http://identifiers.org/cttv.activity/tolerated", "http://identifiers.org/cttv.activity/predicted", "http://identifiers.org/cttv.activity/damaging", "http://identifiers.org/cttv.activity/damaging_to_target", "http://identifiers.org/cttv.activity/predicted_tolerated", "http://identifiers.org/cttv.activity/predicted_damaging", "http://identifiers.org/cttv.activity/tolerated_by_target", "http://identifiers.org/cttv.activity/unknown" ], "description": "Activity of target in disease context" }, "target_name": { "type": "string", "description": "used by ChEMBL initially if they have a more canonical target name, optional" }, "target_class": { "type": "array", "items": { "type": "string" }, "uniqueItems": true } }, "required": [ "id", "target_type", "activity" ] }, "disease": { "type": "object", "description": "Disease in the EFO ontology", "properties": { "id": { "type": "string", "description": "A valid EFO full IRI", "pattern": "^[^\u0020]*$", "minLength": 1, "format": "uri" }, "name": { "type": "string", "description": "Optional - EFO disease name corresponding to the EFO ID" }, "source_name": { "type": "string", "description": "Optional - EFO disease name corresponding to the EFO ID" }, "biosample": { "description": "Tissue names - provide EFO name if you have this", "title": "Disease Biosample", "type": "object", "properties": { "name": { "type": "string", "description": "free text of the tissue / cell name" }, "id": { "type": "string", "format": "uri", "description": "EFO ID of the tissue - optional" } }, "required": [ "name" ] } }, "required": [ "id" ] }, "literature": { "type": "object", "properties": { "references": { "type": "array", "items": { "type": "object", "$ref": "#/definitions/single_lit_reference" }, "minItems": 1, "uniqueItems": true } } }, "evidence": { "type": "object" } }, "required": [ "sourceID", "access_level", "unique_association_fields", "target", "disease", "evidence" ], "oneOf": [ { "properties": { "type": { "const": "rna_expression" }, "evidence": { "allOf": [ { "$ref": "#/definitions/evidence_base" } ], "properties": { "organism_part": { "type": "string" }, "comparison_name": { "type": "string" }, "log2_fold_change": { "type": "object", "properties": { "value": { "type": "number" }, "percentile_rank": { "type": "number", "minimum": 0, "maximum": 100 } }, "required": [ "value", "percentile_rank" ] }, "test_sample": { "description": "Free text - test sample", "type": "string" }, "reference_sample": { "description": "Free text - reference sample", "type": "string" }, "test_replicates_n": { "description": "Count of test replicates", "type": "number", "minimum": 1 }, "reference_replicates_n": { "description": "Count of reference replicates", "type": "number", "minimum": 1 }, "confidence_level": { "type": "string", "enum": [ "high", "medium", "low" ], "description": "high = if the disease state is the only variable (i.e. case vs control); medium = if the disease is a variable but there is one or more other variables; low = where all samples have the disease but the variable is something else e.g. a treatment" }, "experiment_overview": { "type": "string" }, "evidence_codes": { "type": "array", "description": "An array of evidence codes", "items": { "type": "string" }, "minItems": 1, "uniqueItems": true }, "urls": { "type": "array", "items": { "type": "object", "$ref": "#/definitions/linkout" }, "uniqueItems": true } }, "required": [ "evidence_codes", "comparison_name", "log2_fold_change", "test_sample", "reference_sample", "experiment_overview", "test_replicates_n", "reference_replicates_n", "confidence_level", "resource_score" ] } }, "required": [ "type", "evidence" ] }, { "properties": { "type": { "const": "known_drug" }, "evidence": { "properties": { "target2drug": { "type": "object", "allOf": [ { "$ref": "#/definitions/evidence_base" } ], "properties": { "evidence_codes": { "type": "array", "description": "An array of evidence codes", "items": { "type": "string" }, "minItems": 1, "uniqueItems": true }, "mechanism_of_action": { "type": "string" }, "action_type": { "type": "string" }, "urls": { "type": "array", "items": { "type": "object", "$ref": "#/definitions/linkout" }, "uniqueItems": true } }, "required": [ "resource_score", "provenance_type", "is_associated", "date_asserted", "evidence_codes", "mechanism_of_action", "action_type" ] }, "drug2clinic": { "type": "object", "allOf": [ { "$ref": "#/definitions/evidence_base" } ], "properties": { "evidence_codes": { "type": "array", "description": "An array of evidence codes", "items": { "type": "string" }, "minItems": 1, "uniqueItems": true }, "clinical_trial_phase": { "type": "object", "$ref": "#/definitions/diseasephase" }, "urls": { "type": "array", "items": { "type": "object", "$ref": "#/definitions/linkout" }, "uniqueItems": true }, "status": { "type": "string" } }, "required": [ "resource_score", "provenance_type", "is_associated", "date_asserted", "evidence_codes", "clinical_trial_phase" ] } }, "required": [ "target2drug", "drug2clinic" ] }, "drug": { "type": "object", "properties": { "id": { "type": "string", "description": "A ChEMBL or internal drug identifier", "pattern": "^http://identifiers.org/chembl.compound/CHEMBL[0-9]+$|^http://private/.+$" }, "molecule_name": { "type": "string" }, "molecule_type": { "type": "string" } }, "required": [ "id", "molecule_name", "molecule_type" ] }, "target" : { "properties" :{ "complex_id": { "type": "string", "description": "A ChEMBL protein complex identifier", "pattern": "^CHEMBL[0-9]+$" }, "binding_subunit_known": { "type": "boolean", "description": "Is the complex binding subunit known" } } } }, "required": [ "type", "evidence", "drug" ] }, { "properties": { "type": { "const": "animal_model" }, "evidence": { "properties": { "orthologs": { "type": "object", "allOf": [ { "$ref": "#/definitions/evidence_base" } ], "properties": { "evidence_codes": { "type": "array", "description": "An array of evidence codes", "items": { "type": "string", "uniqueItems": true }, "minItems": 1 }, "human_gene_id": { "type": "string", "description": "Human Ensembl gene identifier", "pattern": "^http://identifiers.org/ensembl/ENSG[0-9]{4,}$" }, "model_gene_id": { "type": "string", "description": "Biological model Ensembl gene identifier (ortholog)", "pattern": "^http://identifiers.org/ensembl/ENS[A-Z]{0,3}G[0-9]{4,}$" }, "species": { "type": "string", "enum": [ "mouse", "human", "rat", "zebrafish", "dog" ] }, "urls": { "type": "array", "items": { "type": "object", "$ref": "#/definitions/linkout" }, "uniqueItems": true } }, "required":[ "resource_score" ] }, "biological_model": { "type": "object", "allOf": [ { "$ref": "#/definitions/evidence_base" } ], "properties": { "evidence_codes": { "type": "array", "description": "An array of evidence codes", "items": { "type": "string" }, "minItems": 1, "uniqueItems": true }, "model_gene_id": { "type": "string", "description": "Biological model Ensembl gene identifier (ortholog)", "pattern": "^http://identifiers.org/ensembl/ENS[A-Z]{0,3}G[0-9]{4,}$" }, "model_id": { "type": "string", "description": "Internal identifier for the biological model" }, "allelic_composition": { "type": "string", "description": "Animal model allelic composition" }, "genetic_background": { "type": "string", "description": "Genetic background of the biological model that influences phenotypes" }, "allele_ids": { "type": "string", "description": "List of allele identifiers for this model separated by |" }, "zygosity": { "type": "string", "enum": [ "hom", "het", "hem", "oth" ] }, "species": { "type": "string", "enum": [ "mouse", "human", "rat", "zebrafish", "dog" ] }, "phenotypes": { "type": "array", "description": "List of phenotypes for this model", "items": { "type": "object", "$ref": "#/definitions/phenotype" }, "minItems": 1, "uniqueItems": true }, "urls": { "type": "array", "items": { "type": "object", "$ref": "#/definitions/linkout" }, "uniqueItems": true } }, "required": [ "resource_score", "provenance_type", "is_associated", "date_asserted", "evidence_codes", "model_id", "model_gene_id", "genetic_background", "allele_ids", "allelic_composition", "zygosity", "phenotypes", "species" ] }, "disease_model_association": { "type": "object", "allOf": [ { "$ref": "#/definitions/evidence_base" } ], "properties": { "evidence_codes": { "type": "array", "description": "An array of evidence codes", "items": { "type": "string" }, "minItems": 1, "uniqueItems": true }, "model_id": { "type": "string", "description": "Internal identifier for the biological model" }, "disease_id": { "type": "string", "description": "Disease identifier" }, "human_phenotypes": { "type": "array", "description": "List of human phenotypes for this model", "items": { "type": "object", "$ref": "#/definitions/phenotype" }, "minItems": 1, "uniqueItems": true }, "model_phenotypes": { "type": "array", "description": "List of biomodel phenotypes for this model", "items": { "type": "object", "$ref": "#/definitions/phenotype" }, "minItems": 1, "uniqueItems": true }, "urls": { "type": "array", "items": { "type": "object", "$ref": "#/definitions/linkout" }, "uniqueItems": true } }, "required": [ "resource_score" ] } }, "required": [ "orthologs", "biological_model", "disease_model_association" ] } }, "required": [ "type", "evidence" ] }, { "properties": { "type": { "const": "literature" }, "evidence": { "allOf": [ { "$ref": "#/definitions/evidence_base" } ], "properties": { "evidence_codes": { "type": "array", "description": "An array of evidence codes", "items": { "type": "string" }, "minItems": 1, "uniqueItems": true }, "literature_ref": { "type": "object", "$ref": "#/definitions/single_lit_reference" } }, "required": [ "resource_score" ] } }, "required": [ "type", "evidence" ] }, { "properties": { "type": { "const": "genetic_association" }, "variant": { "type": "object", "properties": { "id": { "type": "string", "description": "An array of variant identifiers", "pattern": "^http://www.ncbi.nlm.nih.gov/clinvar/RCV[0-9]{9}|http://identifiers.org/dbsnp/rs[0-9]{1,}|http://identifiers.org/dbsnp/esv[0-9]{1,}|http://identifiers.org/dbsnp/nsv[0-9]{1,}|.{1,2}_[0-9]+_[ACTG]+_[ACTG]+$" }, "type": { "type": "string", "enum": [ "snp single", "snp snp interaction", "structural variant", "SNP", "deletion", "insertion" ] } }, "required": [ "id", "type" ] }, "evidence": { "properties": { "gene2variant": { "type": "object", "allOf": [ { "$ref": "#/definitions/evidence_base" } ], "properties": { "evidence_codes": { "type": "array", "description": "An array of evidence codes", "items": { "type": "string" }, "minItems": 1, "uniqueItems": true }, "functional_consequence": { "type": "string", "format": "uri" }, "urls": { "type": "array", "items": { "type": "object", "$ref": "#/definitions/linkout" }, "uniqueItems": true } }, "required": [ "provenance_type", "is_associated", "date_asserted", "evidence_codes", "functional_consequence" ] }, "variant2disease": { "type": "object", "allOf": [ { "$ref": "#/definitions/evidence_base" } ], "properties": { "clinical_significance": { "type": "string", "enum": [ "Pathogenic", "Likely pathogenic", "protective", "association", "risk_factor", "Affects", "drug response" ] }, "gwas_panel_resolution": { "description": "Panel resolution of GWAS study", "type": "number", "exclusiveMinimum": 0 }, "gwas_sample_size": { "description": "Sample size of GWAS study", "type": "number", "exclusiveMinimum": 0 }, "evidence_codes": { "type": "array", "description": "An array of evidence codes", "items": { "type": "string" }, "minItems": 1, "uniqueItems": true }, "urls": { "type": "array", "items": { "type": "object", "$ref": "#/definitions/linkout" }, "uniqueItems": true } }, "required": [ "unique_experiment_reference", "resource_score", "provenance_type", "is_associated", "date_asserted", "evidence_codes" ] } }, "required": [ "gene2variant", "variant2disease" ] } }, "required": [ "type", "evidence", "variant" ] }, { "properties": { "type": { "enum": [ "genetic_literature", "affected_pathway", "somatic_mutation" ] }, "evidence": { "type": "object", "allOf": [ { "$ref": "#/definitions/evidence_base" } ], "properties": { "clinical_significance": { "type": "string", "enum": [ "Pathogenic", "Likely pathogenic", "protective", "association", "risk_factor", "Affects", "drug response" ] }, "evidence_codes": { "description": "An array of evidence codes", "type": "array", "items": { "type": "string" }, "minItems": 1, "uniqueItems": true }, "known_mutations": { "description": "An array of mutations", "type": "array", "items": { "type": "object", "properties": { "role_in_cancer": { "description": "The role in cancer can be TSG, oncogene or gene_fusion", "type": "string" }, "preferred_name": { "description": "The preferred name for this mutation, e.g. NM_005228.3(EGFR):c.2500G>T (p.Val834Leu)", "type": "string" }, "alternative_names": { "description": "A list of alternative names for this mutation, if known e.g. NC_000007.14:g.55191749G>T", "type": "array", "items": { "type": "string" }, "minItems": 1, "uniqueItems": true }, "functional_consequence": { "description": "For COSMIC: sequence alteration, deletion, missense_variant, stop_gained, terminator_codon_variant, translational product variant, amino acid insertion, conservative decrease in CDS length, mutation causing uncharacterised change of translational product", "type": "string" }, "number_samples_tested": { "description": "The number of samples tested", "type": "number", "minimum": 0 }, "number_samples_with_mutation_type": { "description": "The number of samples for this mutation type if known", "type": "number", "minimum": 0 }, "number_mutated_samples": { "description": "The total number of samples with any type of mutation if known", "type": "number", "minimum": 0 }, "inheritance_pattern": { "description": "dominant (a single copy of the abnormal allele is sufficient to give rise to the disease), semi-dominant, or recessive (requiring both copies of the gene to have an abnormal allele)", "type": "string", "enum": [ "unknown", "dominant", "semi-dominant", "co-dominant", "recessive", "dominant/recessive", "X-linked recessive" ] } }, "required": [ "preferred_name", "functional_consequence" ] }, "minItems": 0 }, "urls": { "type": "array", "items": { "type": "object", "$ref": "#/definitions/linkout" }, "uniqueItems": true }, "cohort":{ "type": "object", "decription": "Information about the cohort where the target was detected as driver", "properties": { "cohort_id": { "type": "string" }, "cohort_short_name": { "type": "string" }, "cohort_description": { "type": "string" } } }, "significant_driver_methods":{ "type": "array", "description": "Cancer driver gene identification method that detects target as driver", "items": { "type": "string", "enum": [ "cbase", "combination", "dndscv", "hotmaps", "mutpanning", "oncodriveclustl", "oncodrivefml", "smregions" ] } } }, "required": [ "evidence_codes", "resource_score" ] } }, "required": [ "type", "evidence" ] } ], "definitions": { "single_lit_reference": { "properties": { "lit_id": { "type": "string", "description": "Note for pubmed identifiers, use the URI http://europepmc.org/abstract/MED/[0-9]+", "pattern": "NA|http://europepmc.org/abstract/MED/[0-9]+|http://europepmc.org/articles/PMC[0-9]{4,}|[doi|DOI|(https?://)?(dx\u005C.)?doi.org/]*[\u005Cs\u005C.\u005C:]{0,2}(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\u005C\"&\u005C'])\u005CS)+)$" }, "rank": { "type": "object", "$ref": "#/definitions/score_rank" }, "mined_sentences": { "type": "array", "items": { "type": "object", "properties": { "text": { "type": "string" }, "section": { "type": "string", "enum": [ "title", "abstract", "introduction_and_background", "results", "discussion", "case_study", "conclusion_and_future_work", "appendix", "figure", "table", "other" ], "description": "Section of the article in which this sentence appears" }, "t_start": { "type": "number", "minimum": 0, "description": "Start co-ordinate of target (protein/gene) in text" }, "t_end": { "type": "number", "minimum": 0, "description": "End co-ordinate of target (protein/gene) in text" }, "d_start": { "type": "number", "minimum": 0, "description": "Start co-ordinate of disease name in text" }, "d_end": { "type": "number", "minimum": 0, "description": "End co-ordinate of disease name in text" } }, "required": [ "text", "section" ] }, "minItems": 1 } } }, "score_method": { "type": "object", "properties": { "description": { "type": "string" }, "reference": { "type": "string", "description": "Note for pubmed identifiers, use the URI http://europepmc.org/abstract/MED/[0-9]+", "pattern": "http://europepmc.org/abstract/MED/[0-9]+|http://europepmc.org/articles/PMC[0-9]{4,}$" }, "url": { "type": "string", "format": "uri" } } }, "score_pvalue": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "pvalue" ] }, "value": { "type": "number", "maximum": 1, "exclusiveMinimum": 0 }, "mantissa": { "type": "integer", "exclusiveMinimum": 0 }, "exponent": { "type": "number", "maximum": 0 }, "method": { "type": "object", "$ref": "#/definitions/score_method" } }, "required": [ "value" ] }, "score_probability": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "probability", "locus_to_gene_score" ] }, "value": { "type": "number", "maximum": 1, "exclusiveMinimum": 0 }, "method": { "type": "object", "$ref": "#/definitions/score_method" } }, "required": [ "value", "type" ] }, "score_summed_total": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "summed_total" ] }, "value": { "type": "number", "exclusiveMinimum": 0 }, "method": { "type": "object", "$ref": "#/definitions/score_method" } }, "required": [ "value", "type" ] }, "score_rank": { "type": "object", "properties": { "type": { "type": "string", "enum": [ "rank" ] }, "position": { "type": "number", "minimum": 1 }, "sample_size": { "type": "number", "minimum": 1 }, "method": { "type": "object", "$ref": "#/definitions/score_method" } }, "required": [ "position", "sample_size", "type" ] }, "linkout": { "type": "object", "properties": { "nice_name": { "type": "string" }, "url": { "type": "string", "format": "uri" } }, "required": [ "nice_name", "url" ] }, "phenotype": { "type": "object", "properties": { "term_id": { "type": "string", "description": "Phenotype term Identifier from HPO/MP", "pattern": "^http://purl.obolibrary.org/obo/HP_[0-9]{4,}||http://purl.obolibrary.org/obo/MP_[0-9]{4,}$" }, "label": { "type": "string", "description": "Phenotype term label" }, "species": { "type": "string", "enum": [ "mouse", "human", "rat", "zebrafish", "dog" ] } }, "required": [ "id", "term_id", "label", "species" ] }, "diseasephase": { "type": "object", "properties": { "numeric_index": { "description": "An integer indicating the position of this study phase. Higher the number = more advanced phase.", "type": "number" }, "label": { "type": "string" } }, "required": [ "numeric_index", "label" ] }, "evidence_base": { "type": "object", "properties": { "unique_experiment_reference": { "type": "string", "description": "A unique experiment identifier or literature reference that uniquely identifies the study in your database", "pattern": "http://europepmc.org/abstract/MED/[0-9]+|http://europepmc.org/articles/PMC[0-9]{4,}|[doi|DOI|https://dx.doi.org/]*[\u005Cs\u005C.\u005C:]{0,2}(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\u005C\"&\u005C'])\u005CS)+)|STUDYID_.+$" }, "is_associated": { "type": "boolean" }, "date_asserted": { "type": "string", "description": "date the evidence was made public", "format": "date-time" }, "resource_score": { "type": "object", "oneOf": [ { "$ref": "#/definitions/score_pvalue" }, { "$ref": "#/definitions/score_probability" }, { "$ref": "#/definitions/score_rank" }, { "$ref": "#/definitions/score_summed_total" } ] }, "provenance_type": { "type": "object", "properties": { "expert": { "type": "object", "properties": { "statement": { "type": "string" }, "author": { "type": "object", "properties": { "organization": { "type": "string" }, "email": { "type": "string", "format": "email" }, "name": { "type": "string" } } }, "status": { "type": "boolean" } }, "required": [ "status" ] }, "literature": { "type": "object", "properties": { "references": { "type": "array", "items": { "type": "object", "$ref": "#/definitions/single_lit_reference" }, "minItems": 1, "uniqueItems": true } }, "required": [ "references" ] }, "database": { "type": "object", "properties": { "dbxref": { "type": "object", "properties": { "id": { "type": "string", "description": "Please provide the original DB name" }, "url": { "type": "string", "format": "uri", "description": "Please provide a pointer to the original resource: e.g. http://identifiers.org/orphanet/93298" }, "version": { "type": "string" } }, "required": [ "id", "version" ] }, "id": { "type": "string" }, "version": { "type": "string" } }, "required": [ "id", "version" ] } } } }, "required": [ "provenance_type", "is_associated", "date_asserted" ] } } }