{ "$schema": "http://json-schema.org/draft-07/schema", "title": "GA4GH-VRS-Definitions", "type": "object", "definitions": { "Variation": { "description": "A representation of the state of one or more biomolecules.", "oneOf": [ { "$ref": "#/definitions/Allele" }, { "$ref": "#/definitions/CopyNumberChange" }, { "$ref": "#/definitions/CopyNumberCount" }, { "$ref": "#/definitions/Genotype" }, { "$ref": "#/definitions/Haplotype" }, { "$ref": "#/definitions/Text" }, { "$ref": "#/definitions/VariationSet" } ], "discriminator": { "propertyName": "type" } }, "MolecularVariation": { "description": "A variation on a contiguous molecule.", "oneOf": [ { "$ref": "#/definitions/Allele" }, { "$ref": "#/definitions/Haplotype" } ], "discriminator": { "propertyName": "type" } }, "UtilityVariation": { "description": "A collection of Variation subclasses that cannot be constrained to a specific class of biological variation, but are necessary for some applications of VRS.", "oneOf": [ { "$ref": "#/definitions/Text" }, { "$ref": "#/definitions/VariationSet" } ], "discriminator": { "propertyName": "type" } }, "SystemicVariation": { "description": "A Variation of multiple molecules in the context of a system, e.g. a genome, sample, or homologous chromosomes.", "oneOf": [ { "$ref": "#/definitions/CopyNumberChange" }, { "$ref": "#/definitions/CopyNumberCount" }, { "$ref": "#/definitions/Genotype" } ], "discriminator": { "propertyName": "type" } }, "Allele": { "description": "The state of a molecule at a Location.", "type": "object", "properties": { "_id": { "$ref": "#/definitions/CURIE", "description": "Variation Id. MUST be unique within document." }, "type": { "type": "string", "const": "Allele", "default": "Allele", "description": "MUST be \"Allele\"" }, "location": { "oneOf": [ { "$ref": "#/definitions/CURIE" }, { "$ref": "#/definitions/ChromosomeLocation" }, { "$ref": "#/definitions/SequenceLocation" } ], "description": "Where Allele is located" }, "state": { "oneOf": [ { "$ref": "#/definitions/ComposedSequenceExpression" }, { "$ref": "#/definitions/DerivedSequenceExpression" }, { "$ref": "#/definitions/LiteralSequenceExpression" }, { "$ref": "#/definitions/RepeatedSequenceExpression" }, { "$ref": "#/definitions/SequenceState" } ], "description": "An expression of the sequence state", "deprecated": [ { "$ref": "#/definitions/SequenceState" } ] } }, "required": [ "location", "state", "type" ], "additionalProperties": false }, "Haplotype": { "description": "A set of non-overlapping Allele members that co-occur on the same molecule.", "type": "object", "properties": { "_id": { "$ref": "#/definitions/CURIE", "description": "Variation Id. MUST be unique within document." }, "type": { "type": "string", "const": "Haplotype", "default": "Haplotype", "description": "MUST be \"Haplotype\"" }, "members": { "type": "array", "minItems": 2, "uniqueItems": true, "ordered": false, "items": { "oneOf": [ { "$ref": "#/definitions/Allele" }, { "$ref": "#/definitions/CURIE" } ] }, "description": "List of Alleles, or references to Alleles, that comprise this Haplotype." } }, "required": [ "members", "type" ], "additionalProperties": false }, "Text": { "description": "A free-text definition of variation.", "type": "object", "properties": { "_id": { "$ref": "#/definitions/CURIE", "description": "Variation Id. MUST be unique within document." }, "type": { "type": "string", "const": "Text", "default": "Text", "description": "MUST be \"Text\"" }, "definition": { "type": "string", "description": "A textual representation of variation not representable by other subclasses of Variation." } }, "required": [ "definition", "type" ], "additionalProperties": false }, "VariationSet": { "description": "An unconstrained set of Variation members.", "type": "object", "properties": { "_id": { "$ref": "#/definitions/CURIE", "description": "Variation Id. MUST be unique within document." }, "type": { "type": "string", "const": "VariationSet", "default": "VariationSet", "description": "MUST be \"VariationSet\"" }, "members": { "type": "array", "uniqueItems": true, "ordered": false, "items": { "oneOf": [ { "$ref": "#/definitions/Allele" }, { "$ref": "#/definitions/CURIE" }, { "$ref": "#/definitions/CopyNumberChange" }, { "$ref": "#/definitions/CopyNumberCount" }, { "$ref": "#/definitions/Genotype" }, { "$ref": "#/definitions/Haplotype" }, { "$ref": "#/definitions/Text" }, { "$ref": "#/definitions/VariationSet" } ] }, "description": "List of Variation objects or identifiers. Attribute is required, but MAY be empty." } }, "required": [ "members", "type" ], "additionalProperties": false }, "CopyNumberCount": { "type": "object", "description": "The absolute count of discrete copies of a Location or Feature, within a system (e.g. genome, cell, etc.).", "properties": { "_id": { "$ref": "#/definitions/CURIE", "description": "Variation Id. MUST be unique within document." }, "type": { "type": "string", "const": "CopyNumberCount", "default": "CopyNumberCount", "description": "MUST be \"CopyNumberCount\"" }, "subject": { "oneOf": [ { "$ref": "#/definitions/CURIE" }, { "$ref": "#/definitions/ChromosomeLocation" }, { "$ref": "#/definitions/Gene" }, { "$ref": "#/definitions/SequenceLocation" } ], "description": "A location for which the number of systemic copies is described." }, "copies": { "oneOf": [ { "$ref": "#/definitions/DefiniteRange" }, { "$ref": "#/definitions/IndefiniteRange" }, { "$ref": "#/definitions/Number" } ], "description": "The integral number of copies of the subject in a system" } }, "required": [ "copies", "subject", "type" ], "additionalProperties": false }, "CopyNumberChange": { "type": "object", "maturity": "draft", "description": "An assessment of the copy number of a Location or a Feature within a system (e.g. genome, cell, etc.) relative to a baseline ploidy.", "properties": { "_id": { "$ref": "#/definitions/CURIE", "description": "Variation Id. MUST be unique within document." }, "type": { "type": "string", "const": "CopyNumberChange", "default": "CopyNumberChange", "description": "MUST be \"CopyNumberChange\"" }, "subject": { "oneOf": [ { "$ref": "#/definitions/CURIE" }, { "$ref": "#/definitions/ChromosomeLocation" }, { "$ref": "#/definitions/Gene" }, { "$ref": "#/definitions/SequenceLocation" } ], "description": "A location for which the number of systemic copies is described." }, "copy_change": { "type": "string", "enum": [ "efo:0030069", "efo:0020073", "efo:0030068", "efo:0030067", "efo:0030064", "efo:0030070", "efo:0030071", "efo:0030072" ], "description": "MUST be one of \"efo:0030069\" (complete genomic loss), \"efo:0020073\" (high-level loss), \"efo:0030068\" (low-level loss), \"efo:0030067\" (loss), \"efo:0030064\" (regional base ploidy), \"efo:0030070\" (gain), \"efo:0030071\" (low-level gain), \"efo:0030072\" (high-level gain)." } }, "required": [ "copy_change", "subject", "type" ], "additionalProperties": false }, "Genotype": { "description": "A quantified set of MolecularVariation associated with a genomic locus.", "type": "object", "properties": { "_id": { "$ref": "#/definitions/CURIE", "description": "Variation Id. MUST be unique within document." }, "type": { "type": "string", "const": "Genotype", "default": "Genotype", "description": "MUST be \"Genotype\"" }, "members": { "type": "array", "uniqueItems": true, "minItems": 1, "ordered": false, "items": { "$ref": "#/definitions/GenotypeMember" }, "description": "Each GenotypeMember in `members` describes a MolecularVariation and the count of that variation at the locus." }, "count": { "oneOf": [ { "$ref": "#/definitions/DefiniteRange" }, { "$ref": "#/definitions/IndefiniteRange" }, { "$ref": "#/definitions/Number" } ], "description": "The total number of copies of all MolecularVariation at this locus, MUST be greater than or equal to the sum of GenotypeMember copy counts and MUST be greater than or equal to 1. If greater than the total of GenotypeMember counts, this field describes additional MolecularVariation that exist but are not explicitly described." } }, "required": [ "count", "members", "type" ], "additionalProperties": false }, "Location": { "description": "A contiguous segment of a biological sequence.", "oneOf": [ { "$ref": "#/definitions/ChromosomeLocation" }, { "$ref": "#/definitions/SequenceLocation" } ], "discriminator": { "propertyName": "type" } }, "ChromosomeLocation": { "description": "A Location on a chromosome defined by a species and chromosome name.", "type": "object", "properties": { "_id": { "$ref": "#/definitions/CURIE", "description": "Location Id. MUST be unique within document." }, "type": { "type": "string", "const": "ChromosomeLocation", "default": "ChromosomeLocation", "description": "MUST be \"ChromosomeLocation\"" }, "species_id": { "$ref": "#/definitions/CURIE", "default": "taxonomy:9606", "description": "CURIE representing a species from the [NCBI species taxonomy](https://registry.identifiers.org/registry/taxonomy). Default: \"taxonomy:9606\" (human)" }, "chr": { "type": "string", "description": "The symbolic chromosome name. For humans, For humans, chromosome names MUST be one of 1..22, X, Y (case-sensitive)" }, "interval": { "$ref": "#/definitions/CytobandInterval", "description": "The chromosome region defined by a CytobandInterval" } }, "required": [ "chr", "interval", "species_id", "type" ], "additionalProperties": false }, "SequenceLocation": { "description": "A Location defined by an interval on a referenced Sequence.", "type": "object", "properties": { "_id": { "$ref": "#/definitions/CURIE", "description": "Location Id. MUST be unique within document." }, "type": { "type": "string", "const": "SequenceLocation", "default": "SequenceLocation", "description": "MUST be \"SequenceLocation\"" }, "sequence_id": { "$ref": "#/definitions/CURIE", "description": "A VRS Computed Identifier for the reference Sequence." }, "interval": { "oneOf": [ { "$ref": "#/definitions/SequenceInterval" }, { "$ref": "#/definitions/SimpleInterval" } ], "description": "Reference sequence region defined by a SequenceInterval." } }, "required": [ "interval", "sequence_id", "type" ], "additionalProperties": false }, "SequenceInterval": { "description": "A SequenceInterval represents a span on a Sequence. Positions are always represented by contiguous spans using interbase coordinates or coordinate ranges.", "type": "object", "properties": { "type": { "type": "string", "const": "SequenceInterval", "default": "SequenceInterval", "description": "MUST be \"SequenceInterval\"" }, "start": { "oneOf": [ { "$ref": "#/definitions/DefiniteRange" }, { "$ref": "#/definitions/IndefiniteRange" }, { "$ref": "#/definitions/Number" } ], "description": "The start coordinate or range of the interval. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range less than the value of `end`." }, "end": { "oneOf": [ { "$ref": "#/definitions/DefiniteRange" }, { "$ref": "#/definitions/IndefiniteRange" }, { "$ref": "#/definitions/Number" } ], "description": "The end coordinate or range of the interval. The minimum value of this coordinate or range is 0. MUST represent a coordinate or range greater than the value of `start`." } }, "required": [ "end", "start", "type" ], "additionalProperties": false }, "CytobandInterval": { "description": "A contiguous span on a chromosome defined by cytoband features. The span includes the constituent regions described by the start and end cytobands, as well as any intervening regions.", "type": "object", "properties": { "type": { "type": "string", "const": "CytobandInterval", "default": "CytobandInterval", "description": "MUST be \"CytobandInterval\"" }, "start": { "$ref": "#/definitions/HumanCytoband", "description": "The start cytoband region. MUST specify a region nearer the terminal end (telomere) of the chromosome p-arm than `end`." }, "end": { "$ref": "#/definitions/HumanCytoband", "description": "The end cytoband region. MUST specify a region nearer the terminal end (telomere) of the chromosome q-arm than `start`." } }, "example": { "type": "CytobandInterval", "start": "q22.2", "end": "q22.3" }, "required": [ "end", "start", "type" ], "additionalProperties": false }, "SequenceExpression": { "description": "An expression describing a Sequence.", "oneOf": [ { "$ref": "#/definitions/ComposedSequenceExpression" }, { "$ref": "#/definitions/DerivedSequenceExpression" }, { "$ref": "#/definitions/LiteralSequenceExpression" }, { "$ref": "#/definitions/RepeatedSequenceExpression" } ], "discriminator": { "propertyName": "type" } }, "LiteralSequenceExpression": { "description": "An explicit expression of a Sequence.", "type": "object", "properties": { "type": { "type": "string", "const": "LiteralSequenceExpression", "default": "LiteralSequenceExpression", "description": "MUST be \"LiteralSequenceExpression\"" }, "sequence": { "$ref": "#/definitions/Sequence", "description": "the literal Sequence expressed" } }, "required": [ "sequence", "type" ], "additionalProperties": false }, "DerivedSequenceExpression": { "description": "An approximate expression of a sequence that is derived from a referenced sequence location. Use of this class indicates that the derived sequence is *approximately equivalent* to the reference indicated, and is typically used for describing large regions in contexts where the use of an approximate sequence is inconsequential.", "type": "object", "properties": { "type": { "type": "string", "const": "DerivedSequenceExpression", "default": "DerivedSequenceExpression", "description": "MUST be \"DerivedSequenceExpression\"" }, "location": { "$ref": "#/definitions/SequenceLocation", "description": "The location from which the approximate sequence is derived" }, "reverse_complement": { "type": "boolean", "description": "A flag indicating if the expressed sequence is the reverse complement of the sequence referred to by `location`" } }, "required": [ "location", "reverse_complement", "type" ], "additionalProperties": false }, "RepeatedSequenceExpression": { "description": "An expression of a sequence comprised of a tandem repeating subsequence.", "type": "object", "properties": { "type": { "type": "string", "const": "RepeatedSequenceExpression", "default": "RepeatedSequenceExpression", "description": "MUST be \"RepeatedSequenceExpression\"" }, "seq_expr": { "oneOf": [ { "$ref": "#/definitions/DerivedSequenceExpression" }, { "$ref": "#/definitions/LiteralSequenceExpression" } ], "description": "An expression of the repeating subsequence" }, "count": { "oneOf": [ { "$ref": "#/definitions/DefiniteRange" }, { "$ref": "#/definitions/IndefiniteRange" }, { "$ref": "#/definitions/Number" } ], "description": "The count of repeated units, as an integer or inclusive range" } }, "required": [ "count", "seq_expr", "type" ], "additionalProperties": false }, "ComposedSequenceExpression": { "description": "An expression of a sequence composed from multiple other Sequence Expressions objects. MUST have at least one component that is not a ref:`LiteralSequenceExpression`. CANNOT be composed from nested composed sequence expressions.", "additionalProperties": false, "type": "object", "properties": { "type": { "type": "string", "const": "ComposedSequenceExpression", "default": "ComposedSequenceExpression", "description": "MUST be \"ComposedSequenceExpression\"" }, "components": { "type": "array", "uniqueItems": true, "minItems": 2, "ordered": true, "items": { "oneOf": [ { "$ref": "#/definitions/DerivedSequenceExpression" }, { "$ref": "#/definitions/LiteralSequenceExpression" }, { "$ref": "#/definitions/RepeatedSequenceExpression" } ] }, "contains": { "oneOf": [ { "$ref": "#/definitions/RepeatedSequenceExpression" }, { "$ref": "#/definitions/DerivedSequenceExpression" } ] }, "description": "An ordered list of SequenceExpression components comprising the expression." } }, "required": [ "components" ] }, "GenotypeMember": { "description": "A class for expressing the count of a specific MolecularVariation present *in-trans* at a genomic locus represented by a Genotype.", "type": "object", "properties": { "type": { "type": "string", "const": "GenotypeMember", "default": "GenotypeMember", "description": "MUST be \"GenotypeMember\"." }, "count": { "oneOf": [ { "$ref": "#/definitions/DefiniteRange" }, { "$ref": "#/definitions/IndefiniteRange" }, { "$ref": "#/definitions/Number" } ], "description": "The number of copies of the `variation` at a Genotype locus." }, "variation": { "oneOf": [ { "$ref": "#/definitions/Allele" }, { "$ref": "#/definitions/Haplotype" } ], "description": "A MolecularVariation at a Genotype locus." } }, "required": [ "count", "type", "variation" ], "additionalProperties": false }, "Feature": { "description": "A named entity that can be mapped to a Location. Genes, protein domains, exons, and chromosomes are some examples of common biological entities that may be Features.", "oneOf": [ { "$ref": "#/definitions/Gene" } ], "discriminator": { "propertyName": "type" } }, "Gene": { "description": "A reference to a Gene as defined by an authority. For human genes, the use of [hgnc](https://registry.identifiers.org/registry/hgnc) as the gene authority is RECOMMENDED.", "type": "object", "properties": { "type": { "type": "string", "const": "Gene", "default": "Gene", "description": "MUST be \"Gene\"" }, "gene_id": { "$ref": "#/definitions/CURIE", "description": "A CURIE reference to a Gene concept" } }, "required": [ "gene_id", "type" ], "additionalProperties": false }, "Number": { "description": "A simple integer value as a VRS class.", "type": "object", "properties": { "type": { "type": "string", "const": "Number", "default": "Number", "description": "MUST be \"Number\"" }, "value": { "type": "integer", "description": "The value represented by Number" } }, "required": [ "type", "value" ], "additionalProperties": false }, "DefiniteRange": { "description": "A bounded, inclusive range of numbers.", "type": "object", "properties": { "type": { "type": "string", "const": "DefiniteRange", "default": "DefiniteRange", "description": "MUST be \"DefiniteRange\"" }, "min": { "type": "number", "description": "The minimum value; inclusive" }, "max": { "type": "number", "description": "The maximum value; inclusive" } }, "required": [ "max", "min", "type" ], "additionalProperties": false }, "IndefiniteRange": { "description": "A half-bounded range of numbers represented as a number bound and associated comparator. The bound operator is interpreted as follows: '>=' are all numbers greater than and including `value`, '<=' are all numbers less than and including `value`.", "type": "object", "properties": { "type": { "type": "string", "const": "IndefiniteRange", "default": "IndefiniteRange", "description": "MUST be \"IndefiniteRange\"" }, "value": { "type": "integer", "description": "The bounded value; inclusive" }, "comparator": { "type": "string", "enum": [ "<=", ">=" ], "description": "MUST be one of \"<=\" or \">=\", indicating which direction the range is indefinite" } }, "required": [ "comparator", "type", "value" ], "additionalProperties": false }, "CURIE": { "description": "A [W3C Compact URI](https://www.w3.org/TR/curie/) formatted string. A CURIE string has the structure ``prefix``:``reference``, as defined by the W3C syntax.", "type": "string", "pattern": "^\\w[^:]*:.+$", "example": "ensembl:ENSG00000139618" }, "HumanCytoband": { "description": "A character string representing cytobands derived from the *International System for Human Cytogenomic Nomenclature* (ISCN) [guidelines](http://doi.org/10.1159/isbn.978-3-318-06861-0).", "type": "string", "pattern": "^cen|[pq](ter|([1-9][0-9]*(\\.[1-9][0-9]*)?))$", "example": "q22.3" }, "Residue": { "description": "A character representing a specific residue (i.e., molecular species) or groupings of these (\"ambiguity codes\"), using [one-letter IUPAC abbreviations](https://en.wikipedia.org/wiki/International_Union_of_Pure_and_Applied_Chemistry#Amino_acid_and_nucleotide_base_codes) for nucleic acids and amino acids.", "type": "string", "pattern": "[A-Z*\\-]" }, "Sequence": { "description": "A character string of Residues that represents a biological sequence using the conventional sequence order (5\u2019-to-3\u2019 for nucleic acid sequences, and amino-to-carboxyl for amino acid sequences). IUPAC ambiguity codes are permitted in Sequences.", "type": "string", "pattern": "^[A-Z*\\-]*$" }, "SequenceState": { "deprecated": true, "description": "DEPRECATED. A Sequence as a State. This is the State class to use for representing \"ref-alt\" style variation, including SNVs, MNVs, del, ins, and delins. This class is deprecated. Use LiteralSequenceExpression instead.", "type": "object", "properties": { "type": { "type": "string", "const": "SequenceState", "default": "SequenceState", "description": "MUST be \"SequenceState\"" }, "sequence": { "$ref": "#/definitions/Sequence", "description": "A string of Residues" } }, "example": { "type": "SequenceState", "sequence": "C" }, "required": [ "sequence", "type" ], "additionalProperties": false }, "SimpleInterval": { "deprecated": true, "description": "DEPRECATED: A SimpleInterval represents a span of sequence. Positions are always represented by contiguous spans using interbase coordinates. This class is deprecated. Use SequenceInterval instead.", "type": "object", "properties": { "type": { "type": "string", "const": "SimpleInterval", "default": "SimpleInterval", "description": "MUST be \"SimpleInterval\"" }, "start": { "type": "integer", "description": "The start coordinate" }, "end": { "type": "integer", "description": "The end coordinate" } }, "example": { "type": "SimpleInterval", "start": 11, "end": 22 }, "required": [ "end", "start", "type" ], "additionalProperties": false } } }